apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: name: cronjob-monitoring spec: instanceSelector: matchLabels: dashboards: "grafana" folder: "EDP / Operations" json: | { "annotations": {"list": []}, "editable": true, "graphTooltip": 1, "panels": [ { "collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, "title": "Backup Job Status", "type": "row" }, { "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 86400}, {"color": "red", "value": 172800}]}}}, "gridPos": {"h": 5, "w": 12, "x": 0, "y": 1}, "title": "Time Since Last Schedule", "type": "stat", "targets": [{"expr": "time() - kube_cronjob_status_last_schedule_time{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cronjob}} ({{cluster_environment}})"}] }, { "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 1}]}}}, "gridPos": {"h": 5, "w": 12, "x": 12, "y": 1}, "title": "Failed Jobs (Active)", "type": "stat", "targets": [{"expr": "sum by(cluster_environment, job_name) (kube_job_status_failed{cluster_environment=~\"$cluster_environment\"}) > 0", "legendFormat": "{{job_name}} ({{cluster_environment}})"}] }, { "collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 6}, "title": "CronJob Overview", "type": "row" }, { "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"custom": {"filterable": true}}, "overrides": [{"matcher": {"id": "byName", "options": "Suspended"}, "properties": [{"id": "mappings", "value": [{"options": {"0": {"text": "No", "color": "green"}, "1": {"text": "YES", "color": "red"}}, "type": "value"}]}]}]}, "gridPos": {"h": 8, "w": 24, "x": 0, "y": 7}, "title": "All CronJobs", "type": "table", "targets": [ {"expr": "kube_cronjob_info{cluster_environment=~\"$cluster_environment\"}", "format": "table", "instant": true, "refId": "A"} ], "transformations": [ {"id": "filterFieldsByName", "options": {"include": {"names": ["cluster_environment", "cronjob", "namespace", "schedule"]}}}, {"id": "organize", "options": {"renameByName": {"cluster_environment": "Environment", "cronjob": "CronJob", "namespace": "Namespace", "schedule": "Schedule"}}} ] }, { "collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 15}, "title": "Job History", "type": "row" }, { "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "short"}}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16}, "title": "Job Completions (24h)", "type": "timeseries", "targets": [{"expr": "sum(kube_job_status_succeeded{cluster_environment=~\"$cluster_environment\"}) by (job_name, cluster_environment)", "legendFormat": "{{job_name}} ({{cluster_environment}})"}] }, { "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "short", "color": {"mode": "palette-classic"}}}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 16}, "title": "Job Failures (24h)", "type": "timeseries", "targets": [{"expr": "sum(kube_job_status_failed{cluster_environment=~\"$cluster_environment\"}) by (job_name, cluster_environment)", "legendFormat": "{{job_name}} ({{cluster_environment}})"}] } ], "schemaVersion": 39, "tags": ["edp", "backup", "cronjob"], "templating": { "list": [ { "current": {"selected": true, "text": "All", "value": "$__all"}, "datasource": {"type": "prometheus"}, "definition": "label_values(kube_cronjob_info, cluster_environment)", "includeAll": true, "multi": true, "name": "cluster_environment", "label": "Environment", "query": "label_values(kube_cronjob_info, cluster_environment)", "refresh": 2, "sort": 1, "type": "query" } ] }, "time": {"from": "now-24h", "to": "now"}, "title": "CronJob & Backup Monitoring", "uid": "edp-cronjobs" }