feat(observability): add sustainability metrics, Kepler, 6-month retention, GARM scrape
This commit is contained in:
parent
bbdca11f00
commit
b5594a8017
6 changed files with 141 additions and 1 deletions
|
|
@ -0,0 +1,75 @@
|
|||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMRule
|
||||
metadata:
|
||||
name: ci-sustainability
|
||||
spec:
|
||||
groups:
|
||||
- name: ci.sustainability.daily
|
||||
interval: 5m
|
||||
rules:
|
||||
- record: ci:cpu_seconds:increase1d
|
||||
expr: |
|
||||
sum by(namespace, cluster) (
|
||||
increase(container_cpu_usage_seconds_total{
|
||||
namespace=~"gitea|garm",
|
||||
pod=~"forgejo-runner.*|garm-.*",
|
||||
container!=""
|
||||
}[1d])
|
||||
)
|
||||
- record: ci:memory_bytes_seconds:avg1d
|
||||
expr: |
|
||||
avg_over_time(
|
||||
sum by(namespace, cluster) (
|
||||
container_memory_working_set_bytes{
|
||||
namespace=~"gitea|garm",
|
||||
pod=~"forgejo-runner.*|garm-.*",
|
||||
container!=""
|
||||
}
|
||||
)[1d:5m]
|
||||
)
|
||||
- record: ci:pod_count:avg1d
|
||||
expr: |
|
||||
avg_over_time(
|
||||
count by(namespace, cluster) (
|
||||
kube_pod_info{
|
||||
namespace=~"gitea|garm",
|
||||
pod=~"forgejo-runner.*|garm-.*"
|
||||
}
|
||||
)[1d:5m]
|
||||
)
|
||||
- record: ci:pod_creations:increase1d
|
||||
expr: |
|
||||
sum by(namespace, cluster) (
|
||||
changes(kube_pod_start_time{
|
||||
namespace=~"gitea|garm",
|
||||
pod=~"forgejo-runner.*|garm-.*"
|
||||
}[1d])
|
||||
)
|
||||
- name: ci.sustainability.cluster
|
||||
interval: 5m
|
||||
rules:
|
||||
- record: cluster:cpu_seconds:rate5m
|
||||
expr: |
|
||||
sum by(cluster) (
|
||||
rate(node_cpu_seconds_total{mode!="idle"}[5m])
|
||||
)
|
||||
- record: cluster:memory_used_bytes:sum
|
||||
expr: |
|
||||
sum by(cluster) (
|
||||
node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes
|
||||
)
|
||||
- name: ci.sustainability.energy
|
||||
interval: 5m
|
||||
rules:
|
||||
- record: ci:joules:increase1d
|
||||
expr: |
|
||||
sum by(container_namespace, cluster) (
|
||||
increase(kepler_container_joules_total{
|
||||
container_namespace=~"gitea|garm"
|
||||
}[1d])
|
||||
)
|
||||
- record: cluster:joules:rate5m
|
||||
expr: |
|
||||
sum by(cluster) (
|
||||
rate(kepler_node_joules_total[5m])
|
||||
)
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: garm
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- garm
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: garm
|
||||
endpoints:
|
||||
- port: metrics
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: kepler
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- observability
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: kepler
|
||||
endpoints:
|
||||
- port: http
|
||||
|
|
@ -283,7 +283,7 @@ vmsingle:
|
|||
spec:
|
||||
port: "8429"
|
||||
# -- Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these [docs](https://docs.victoriametrics.com/single-server-victoriametrics/#retention)
|
||||
retentionPeriod: "1"
|
||||
retentionPeriod: "6"
|
||||
replicaCount: 1
|
||||
extraArgs: {}
|
||||
storageMetadata:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue