feat(observability): ✨ add dashboards, scrape configs, and fix victoria-logs to template
Add new Grafana dashboard CRs to grafana-operator/manifests: - platform-overview, forgejo, argocd-operational, cronjob-monitoring Fix victoria-logs dashboard to use grafana.com marketplace (id: 22698) instead of raw GitHub URL Add hub-side scrape configs to victoria-k8s-stack/manifests: - argocd-scrape, garm-scrape, coredns-scrape, ci-sustainability-rules Add client-side forgejo VMServiceScrape to observability-client/vm-client-stack/manifests Enable ArgoCD metrics endpoints in core/argocd/values.yaml (required by argocd-scrape)
This commit is contained in:
parent
69e4d1b3dc
commit
3bc8a7444b
11 changed files with 587 additions and 1 deletions
|
|
@ -40,3 +40,19 @@ notifications:
|
|||
|
||||
dex:
|
||||
enabled: false
|
||||
|
||||
controller:
|
||||
metrics:
|
||||
enabled: true
|
||||
|
||||
server:
|
||||
metrics:
|
||||
enabled: true
|
||||
|
||||
repoServer:
|
||||
metrics:
|
||||
enabled: true
|
||||
|
||||
applicationSet:
|
||||
metrics:
|
||||
enabled: true
|
||||
|
|
|
|||
|
|
@ -0,0 +1,15 @@
|
|||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: forgejo
|
||||
namespace: observability
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- gitea
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: forgejo
|
||||
endpoints:
|
||||
- port: http
|
||||
path: /metrics
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
apiVersion: grafana.integreatly.org/v1beta1
|
||||
kind: GrafanaDashboard
|
||||
metadata:
|
||||
name: argocd-operational
|
||||
spec:
|
||||
instanceSelector:
|
||||
matchLabels:
|
||||
dashboards: "grafana"
|
||||
grafanaCom:
|
||||
id: 19993
|
||||
revision: 2
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
apiVersion: grafana.integreatly.org/v1beta1
|
||||
kind: GrafanaDashboard
|
||||
metadata:
|
||||
name: cronjob-monitoring
|
||||
spec:
|
||||
instanceSelector:
|
||||
matchLabels:
|
||||
dashboards: "grafana"
|
||||
grafanaCom:
|
||||
id: 14279
|
||||
revision: 1
|
||||
|
|
@ -0,0 +1,184 @@
|
|||
apiVersion: grafana.integreatly.org/v1beta1
|
||||
kind: GrafanaDashboard
|
||||
metadata:
|
||||
name: forgejo
|
||||
spec:
|
||||
instanceSelector:
|
||||
matchLabels:
|
||||
dashboards: "grafana"
|
||||
json: |
|
||||
{
|
||||
"annotations": {"list": []},
|
||||
"editable": true,
|
||||
"graphTooltip": 1,
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 0},
|
||||
"title": "Forgejo Health",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"mappings": [{"options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}, "type": "value"}], "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 0, "y": 1},
|
||||
"title": "Status",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "up{job=\"forgejo-server-http\", cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 4, "y": 1},
|
||||
"title": "Version",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_build_info{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{version}}"}],
|
||||
"options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "name"}
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 8, "y": 1},
|
||||
"title": "Repositories",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_repositories{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 12, "y": 1},
|
||||
"title": "Users",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_users{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 16, "y": 1},
|
||||
"title": "Organizations",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_organizations{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 20, "y": 1},
|
||||
"title": "Teams",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_teams{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 5},
|
||||
"title": "Activity",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 6},
|
||||
"title": "Open Issues",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_issues_open{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 6, "x": 6, "y": 6},
|
||||
"title": "Closed Issues",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_issues_closed{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 6},
|
||||
"title": "Webhooks",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_webhooks{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 6},
|
||||
"title": "Hook Tasks",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_hooktasks{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 10},
|
||||
"title": "Content & Auth",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 0, "y": 11},
|
||||
"title": "Stars",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_stars{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 4, "y": 11},
|
||||
"title": "Watches",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_watches{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 8, "y": 11},
|
||||
"title": "Releases",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_releases{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 12, "y": 11},
|
||||
"title": "Mirrors",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_mirrors{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 16, "y": 11},
|
||||
"title": "Public Keys",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_publickeys{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 20, "y": 11},
|
||||
"title": "OAuth Apps",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_oauths{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
"tags": ["edp", "forgejo", "gitea"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {"selected": true, "text": "All", "value": "$__all"},
|
||||
"datasource": {"type": "prometheus"},
|
||||
"definition": "label_values(gitea_repositories, cluster_environment)",
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "cluster_environment",
|
||||
"label": "Environment",
|
||||
"query": "label_values(gitea_repositories, cluster_environment)",
|
||||
"refresh": 2,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {"from": "now-6h", "to": "now"},
|
||||
"title": "Forgejo",
|
||||
"uid": "edp-forgejo"
|
||||
}
|
||||
|
|
@ -0,0 +1,230 @@
|
|||
apiVersion: grafana.integreatly.org/v1beta1
|
||||
kind: GrafanaDashboard
|
||||
metadata:
|
||||
name: platform-overview
|
||||
spec:
|
||||
instanceSelector:
|
||||
matchLabels:
|
||||
dashboards: "grafana"
|
||||
json: |
|
||||
{
|
||||
"annotations": {"list": []},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 0},
|
||||
"title": "Platform Health",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [{"options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}, "type": "value"}],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 0, "y": 1},
|
||||
"title": "Forgejo",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "sum(up{job=\"forgejo-server-http\", cluster_environment=~\"$cluster_environment\"})", "legendFormat": ""}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 3}]}
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 4, "y": 1},
|
||||
"title": "Ingress 5xx (5m)",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "sum(rate(nginx_ingress_controller_requests{status=~\"5..\", cluster_environment=~\"$cluster_environment\"}[5m]))", "legendFormat": ""}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 1}]}
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 8, "y": 1},
|
||||
"title": "Failed Jobs (24h)",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "sum(kube_job_status_failed{namespace=\"gitea\", cluster_environment=~\"$cluster_environment\"}) or vector(0)", "legendFormat": ""}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit",
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 0.7}, {"color": "red", "value": 0.85}]}
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 12, "y": 1},
|
||||
"title": "Cluster CPU Usage",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster_environment=~\"$cluster_environment\"}[5m]))", "legendFormat": ""}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit",
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 0.7}, {"color": "red", "value": 0.85}]}
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 16, "y": 1},
|
||||
"title": "Cluster Memory Usage",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "1 - sum(node_memory_MemAvailable_bytes{cluster_environment=~\"$cluster_environment\"}) / sum(node_memory_MemTotal_bytes{cluster_environment=~\"$cluster_environment\"})", "legendFormat": ""}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit",
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 0.6}, {"color": "red", "value": 0.8}]}
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 20, "y": 1},
|
||||
"title": "Max PVC Usage",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "max(1 - kubelet_volume_stats_available_bytes{cluster_environment=~\"$cluster_environment\"} / kubelet_volume_stats_capacity_bytes{cluster_environment=~\"$cluster_environment\"})", "legendFormat": ""}]
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 5},
|
||||
"title": "Forgejo",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 0, "y": 6},
|
||||
"title": "Repositories",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_repositories{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 4, "y": 6},
|
||||
"title": "Users",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_users{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 8, "y": 6},
|
||||
"title": "Organizations",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_organizations{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 12, "y": 6},
|
||||
"title": "Open Issues",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_issues_open{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 16, "y": 6},
|
||||
"title": "Webhooks",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_webhooks{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {"defaults": {"unit": "short"}},
|
||||
"gridPos": {"h": 4, "w": 4, "x": 20, "y": 6},
|
||||
"title": "Mirrors",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "gitea_mirrors{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}]
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 10},
|
||||
"title": "Resources",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {"defaults": {"unit": "percentunit", "min": 0, "max": 1}},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 11},
|
||||
"title": "Node CPU Usage",
|
||||
"type": "timeseries",
|
||||
"targets": [{"expr": "1 - rate(node_cpu_seconds_total{mode=\"idle\", cluster_environment=~\"$cluster_environment\"}[5m])", "legendFormat": "{{instance}}"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {"defaults": {"unit": "percentunit", "min": 0, "max": 1}},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 11},
|
||||
"title": "PVC Usage by Claim",
|
||||
"type": "timeseries",
|
||||
"targets": [{"expr": "1 - (kubelet_volume_stats_available_bytes{cluster_environment=~\"$cluster_environment\"} / kubelet_volume_stats_capacity_bytes{cluster_environment=~\"$cluster_environment\"})", "legendFormat": "{{namespace}}/{{persistentvolumeclaim}}"}]
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 19},
|
||||
"title": "Backups",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 86400}, {"color": "red", "value": 172800}]}}},
|
||||
"gridPos": {"h": 4, "w": 8, "x": 0, "y": 20},
|
||||
"title": "Time Since Last Backup Schedule",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "time() - kube_cronjob_status_last_schedule_time{cronjob=~\"forgejo-s3-backup|secrets-backup\", cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cronjob}} ({{cluster_environment}})"}]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {"defaults": {"unit": "s"}},
|
||||
"gridPos": {"h": 4, "w": 8, "x": 8, "y": 20},
|
||||
"title": "Backup Job Duration (Last 7d)",
|
||||
"type": "timeseries",
|
||||
"targets": [{"expr": "kube_job_status_completion_time{job_name=~\"forgejo-s3-backup.*|secrets-backup.*\", cluster_environment=~\"$cluster_environment\"} - kube_job_status_start_time{job_name=~\"forgejo-s3-backup.*|secrets-backup.*\", cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{job_name}}"}],
|
||||
"options": {"legend": {"displayMode": "table"}}
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 1}]}}},
|
||||
"gridPos": {"h": 4, "w": 8, "x": 16, "y": 20},
|
||||
"title": "Failed Backup Jobs (Active)",
|
||||
"type": "stat",
|
||||
"targets": [{"expr": "sum by(cluster_environment, job_name) (kube_job_status_failed{job_name=~\"forgejo-s3-backup.*|secrets-backup.*\", cluster_environment=~\"$cluster_environment\"})", "legendFormat": "{{job_name}} ({{cluster_environment}})"}]
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
"tags": ["edp", "platform", "overview"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {"selected": true, "text": "All", "value": "$__all"},
|
||||
"datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"},
|
||||
"definition": "label_values(up, cluster_environment)",
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "cluster_environment",
|
||||
"label": "Environment",
|
||||
"query": "label_values(up, cluster_environment)",
|
||||
"refresh": 2,
|
||||
"sort": 1,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {"from": "now-6h", "to": "now"},
|
||||
"title": "EDP Platform Overview",
|
||||
"uid": "edp-platform-overview"
|
||||
}
|
||||
|
|
@ -6,4 +6,6 @@ spec:
|
|||
instanceSelector:
|
||||
matchLabels:
|
||||
dashboards: "grafana"
|
||||
url: "https://raw.githubusercontent.com/VictoriaMetrics/VictoriaMetrics/refs/heads/master/dashboards/vm/victorialogs.json"
|
||||
grafanaCom:
|
||||
id: 22698
|
||||
revision: 1
|
||||
|
|
|
|||
|
|
@ -0,0 +1,13 @@
|
|||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: argocd
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- argocd
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/part-of: argocd
|
||||
endpoints:
|
||||
- port: http-metrics
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMRule
|
||||
metadata:
|
||||
name: ci-sustainability
|
||||
spec:
|
||||
groups:
|
||||
- name: ci.sustainability.daily
|
||||
interval: 5m
|
||||
rules:
|
||||
- record: ci:cpu_seconds:increase1d
|
||||
expr: |
|
||||
sum by(namespace, cluster) (
|
||||
increase(container_cpu_usage_seconds_total{
|
||||
namespace=~"gitea|garm",
|
||||
pod=~"forgejo-runner.*|garm-.*",
|
||||
container!=""
|
||||
}[1d])
|
||||
)
|
||||
- record: ci:memory_bytes_seconds:avg1d
|
||||
expr: |
|
||||
avg_over_time(
|
||||
sum by(namespace, cluster) (
|
||||
container_memory_working_set_bytes{
|
||||
namespace=~"gitea|garm",
|
||||
pod=~"forgejo-runner.*|garm-.*",
|
||||
container!=""
|
||||
}
|
||||
)[1d:5m]
|
||||
)
|
||||
- record: ci:pod_count:avg1d
|
||||
expr: |
|
||||
avg_over_time(
|
||||
count by(namespace, cluster) (
|
||||
kube_pod_info{
|
||||
namespace=~"gitea|garm",
|
||||
pod=~"forgejo-runner.*|garm-.*"
|
||||
}
|
||||
)[1d:5m]
|
||||
)
|
||||
- record: ci:pod_creations:increase1d
|
||||
expr: |
|
||||
sum by(namespace, cluster) (
|
||||
changes(kube_pod_start_time{
|
||||
namespace=~"gitea|garm",
|
||||
pod=~"forgejo-runner.*|garm-.*"
|
||||
}[1d])
|
||||
)
|
||||
- name: ci.sustainability.cluster
|
||||
interval: 5m
|
||||
rules:
|
||||
- record: cluster:cpu_seconds:rate5m
|
||||
expr: |
|
||||
sum by(cluster) (
|
||||
rate(node_cpu_seconds_total{mode!="idle"}[5m])
|
||||
)
|
||||
- record: cluster:memory_used_bytes:sum
|
||||
expr: |
|
||||
sum by(cluster) (
|
||||
node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes
|
||||
)
|
||||
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: coredns-metrics
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: coredns-metrics
|
||||
spec:
|
||||
clusterIP: None
|
||||
selector:
|
||||
k8s-app: coredns
|
||||
ports:
|
||||
- name: metrics
|
||||
port: 9153
|
||||
targetPort: 9153
|
||||
protocol: TCP
|
||||
---
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: coredns
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: coredns-metrics
|
||||
endpoints:
|
||||
- port: metrics
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: garm
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- garm
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: garm
|
||||
endpoints:
|
||||
- port: http
|
||||
Loading…
Add table
Add a link
Reference in a new issue