fix(observability): 🐛 harden vmagent liveness probe failureThreshold 10→3

Silent outage for 72h went undetected due to lenient probe.
Add startupProbe (failureThreshold=30) to allow slow starts.
This commit is contained in:
Daniel Sy 2026-06-22 10:40:43 +02:00
parent 01c41c9379
commit 3ed3487e97
Signed by untrusted user: danielsy
GPG key ID: 1F39A8BBCD2EE3D3

View file

@ -801,6 +801,20 @@ vmagent:
# Do not store original labels in vmagent's memory by default. This reduces the amount of memory used by vmagent
# but makes vmagent debugging UI less informative. See: https://docs.victoriametrics.com/vmagent/#relabel-debug
promscrape.dropOriginalLabels: "true"
# Harden liveness probe: default failureThreshold=10 masked a 72h silent outage
livenessProbe:
httpGet:
path: /health
port: http
failureThreshold: 3
periodSeconds: 5
timeoutSeconds: 5
startupProbe:
httpGet:
path: /health
port: http
failureThreshold: 30
periodSeconds: 5
# -- (object) VMAgent ingress configuration
ingress:
enabled: false