stacks/template/stacks/observability/victoria-k8s-stack/manifests/ci-sustainability-rules.yaml
Daniel Sy 3bc8a7444b
feat(observability): add dashboards, scrape configs, and fix victoria-logs to template
Add new Grafana dashboard CRs to grafana-operator/manifests:
- platform-overview, forgejo, argocd-operational, cronjob-monitoring

Fix victoria-logs dashboard to use grafana.com marketplace (id: 22698)
instead of raw GitHub URL

Add hub-side scrape configs to victoria-k8s-stack/manifests:
- argocd-scrape, garm-scrape, coredns-scrape, ci-sustainability-rules

Add client-side forgejo VMServiceScrape to observability-client/vm-client-stack/manifests

Enable ArgoCD metrics endpoints in core/argocd/values.yaml (required by argocd-scrape)
2026-06-19 12:58:26 +02:00

61 lines
1.8 KiB
YAML

apiVersion: operator.victoriametrics.com/v1beta1
kind: VMRule
metadata:
name: ci-sustainability
spec:
groups:
- name: ci.sustainability.daily
interval: 5m
rules:
- record: ci:cpu_seconds:increase1d
expr: |
sum by(namespace, cluster) (
increase(container_cpu_usage_seconds_total{
namespace=~"gitea|garm",
pod=~"forgejo-runner.*|garm-.*",
container!=""
}[1d])
)
- record: ci:memory_bytes_seconds:avg1d
expr: |
avg_over_time(
sum by(namespace, cluster) (
container_memory_working_set_bytes{
namespace=~"gitea|garm",
pod=~"forgejo-runner.*|garm-.*",
container!=""
}
)[1d:5m]
)
- record: ci:pod_count:avg1d
expr: |
avg_over_time(
count by(namespace, cluster) (
kube_pod_info{
namespace=~"gitea|garm",
pod=~"forgejo-runner.*|garm-.*"
}
)[1d:5m]
)
- record: ci:pod_creations:increase1d
expr: |
sum by(namespace, cluster) (
changes(kube_pod_start_time{
namespace=~"gitea|garm",
pod=~"forgejo-runner.*|garm-.*"
}[1d])
)
- name: ci.sustainability.cluster
interval: 5m
rules:
- record: cluster:cpu_seconds:rate5m
expr: |
sum by(cluster) (
rate(node_cpu_seconds_total{mode!="idle"}[5m])
)
- record: cluster:memory_used_bytes:sum
expr: |
sum by(cluster) (
node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes
)