Assigns folder field to all GrafanaDashboard CRs: - EDP / Overview: platform-overview - EDP / Applications: forgejo, argocd-operational, garm, argocd - EDP / Operations: cronjob-monitoring, ingress-nginx, victoria-logs
103 lines
4.7 KiB
YAML
103 lines
4.7 KiB
YAML
apiVersion: grafana.integreatly.org/v1beta1
|
|
kind: GrafanaDashboard
|
|
metadata:
|
|
name: cronjob-monitoring
|
|
spec:
|
|
instanceSelector:
|
|
matchLabels:
|
|
dashboards: "grafana"
|
|
folder: "EDP / Operations"
|
|
json: |
|
|
{
|
|
"annotations": {"list": []},
|
|
"editable": true,
|
|
"graphTooltip": 1,
|
|
"panels": [
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 0},
|
|
"title": "Backup Job Status",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus"},
|
|
"fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 86400}, {"color": "red", "value": 172800}]}}},
|
|
"gridPos": {"h": 5, "w": 12, "x": 0, "y": 1},
|
|
"title": "Time Since Last Schedule",
|
|
"type": "stat",
|
|
"targets": [{"expr": "time() - kube_cronjob_status_last_schedule_time{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cronjob}} ({{cluster_environment}})"}]
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus"},
|
|
"fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 1}]}}},
|
|
"gridPos": {"h": 5, "w": 12, "x": 12, "y": 1},
|
|
"title": "Failed Jobs (Active)",
|
|
"type": "stat",
|
|
"targets": [{"expr": "sum by(cluster_environment, job_name) (kube_job_status_failed{cluster_environment=~\"$cluster_environment\"}) > 0", "legendFormat": "{{job_name}} ({{cluster_environment}})"}]
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 6},
|
|
"title": "CronJob Overview",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus"},
|
|
"fieldConfig": {"defaults": {"custom": {"filterable": true}}, "overrides": [{"matcher": {"id": "byName", "options": "Suspended"}, "properties": [{"id": "mappings", "value": [{"options": {"0": {"text": "No", "color": "green"}, "1": {"text": "YES", "color": "red"}}, "type": "value"}]}]}]},
|
|
"gridPos": {"h": 8, "w": 24, "x": 0, "y": 7},
|
|
"title": "All CronJobs",
|
|
"type": "table",
|
|
"targets": [
|
|
{"expr": "kube_cronjob_info{cluster_environment=~\"$cluster_environment\"}", "format": "table", "instant": true, "refId": "A"}
|
|
],
|
|
"transformations": [
|
|
{"id": "filterFieldsByName", "options": {"include": {"names": ["cluster_environment", "cronjob", "namespace", "schedule"]}}},
|
|
{"id": "organize", "options": {"renameByName": {"cluster_environment": "Environment", "cronjob": "CronJob", "namespace": "Namespace", "schedule": "Schedule"}}}
|
|
]
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 15},
|
|
"title": "Job History",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus"},
|
|
"fieldConfig": {"defaults": {"unit": "short"}},
|
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
|
|
"title": "Job Completions (24h)",
|
|
"type": "timeseries",
|
|
"targets": [{"expr": "sum(kube_job_status_succeeded{cluster_environment=~\"$cluster_environment\"}) by (job_name, cluster_environment)", "legendFormat": "{{job_name}} ({{cluster_environment}})"}]
|
|
},
|
|
{
|
|
"datasource": {"type": "prometheus"},
|
|
"fieldConfig": {"defaults": {"unit": "short", "color": {"mode": "palette-classic"}}},
|
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
|
|
"title": "Job Failures (24h)",
|
|
"type": "timeseries",
|
|
"targets": [{"expr": "sum(kube_job_status_failed{cluster_environment=~\"$cluster_environment\"}) by (job_name, cluster_environment)", "legendFormat": "{{job_name}} ({{cluster_environment}})"}]
|
|
}
|
|
],
|
|
"schemaVersion": 39,
|
|
"tags": ["edp", "backup", "cronjob"],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"current": {"selected": true, "text": "All", "value": "$__all"},
|
|
"datasource": {"type": "prometheus"},
|
|
"definition": "label_values(kube_cronjob_info, cluster_environment)",
|
|
"includeAll": true,
|
|
"multi": true,
|
|
"name": "cluster_environment",
|
|
"label": "Environment",
|
|
"query": "label_values(kube_cronjob_info, cluster_environment)",
|
|
"refresh": 2,
|
|
"sort": 1,
|
|
"type": "query"
|
|
}
|
|
]
|
|
},
|
|
"time": {"from": "now-24h", "to": "now"},
|
|
"title": "CronJob & Backup Monitoring",
|
|
"uid": "edp-cronjobs"
|
|
}
|