apiVersion: operator.victoriametrics.com/v1beta1 kind: VMRule metadata: name: forgejo-alerts namespace: observability spec: groups: - name: forgejo rules: - alert: forgejo down expr: sum by(cluster_environment) (up{pod=~"forgejo-server-.*"}) < 1 for: 30s labels: severity: critical job: "{{ $labels.job }}" annotations: value: "{{ $value }}" description: 'forgejo is down in cluster environment {{ $labels.cluster_environment }}' - name: forgejo-backup rules: - alert: forgejo s3 backup job failed expr: max by(cluster_environment) (kube_job_status_failed{job_name=~"forgejo-s3-backup-.*"}) != 0 for: 30s labels: severity: critical job: "{{ $labels.job }}" annotations: value: "{{ $value }}" description: 'forgejo s3 backup job failed in cluster environment {{ $labels.cluster_environment }}' - name: disk-consumption-high rules: - alert: disk consumption high expr: 1-(kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes) > 0.6 for: 30s labels: severity: major job: "{{ $labels.job }}" annotations: value: "{{ $value }}" description: 'disk consumption of pvc {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is high in cluster environment {{ $labels.cluster_environment }}'