From 4b11db5668e0fac472a3a55350eb0941c7e52d32 Mon Sep 17 00:00:00 2001 From: Automated pipeline Date: Tue, 17 Mar 2026 13:14:26 +0000 Subject: [PATCH 001/114] Automated upload for dev.t09.de --- otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml b/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml index 4c35c43..ec901a0 100644 --- a/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml +++ b/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml @@ -137,6 +137,9 @@ gitea: ENABLED: true ADAPTER: redis + security: + GLOBAL_TWO_FACTOR_REQUIREMENT: admin + service: DISABLE_REGISTRATION: true ENABLE_NOTIFY_MAIL: true From 2f15b6b3734fe2861f749e94c0342ae7da3d4421 Mon Sep 17 00:00:00 2001 From: Automated pipeline Date: Tue, 17 Mar 2026 13:25:52 +0000 Subject: [PATCH 002/114] Automated upload for edp.buildth.ing --- .../stacks/forgejo/forgejo-server/values.yaml | 5 +++- otc/edp.buildth.ing/stacks/garm/garm.yaml | 2 +- .../stacks/garm/garm/values.yaml | 3 +++ ...iser-receiver.yaml => sizer-receiver.yaml} | 4 +-- .../deployment.yaml | 26 +++++++++---------- .../ingress.yaml | 10 +++---- 6 files changed, 28 insertions(+), 22 deletions(-) rename otc/edp.buildth.ing/stacks/garm/{optimiser-receiver.yaml => sizer-receiver.yaml} (84%) rename otc/edp.buildth.ing/stacks/garm/{optimiser-receiver => sizer-receiver}/deployment.yaml (77%) rename otc/edp.buildth.ing/stacks/garm/{optimiser-receiver => sizer-receiver}/ingress.yaml (69%) diff --git a/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/values.yaml b/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/values.yaml index 2b64cca..c9e7a8a 100644 --- a/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/values.yaml +++ b/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/values.yaml @@ -137,6 +137,9 @@ gitea: ENABLED: true ADAPTER: redis + security: + GLOBAL_TWO_FACTOR_REQUIREMENT: admin + service: DISABLE_REGISTRATION: true ENABLE_NOTIFY_MAIL: true @@ -177,4 +180,4 @@ image: # rootless: true fullOverride: observability.buildth.ing/devfw-cicd/edp-forgejo:14.0.2-edp1-rootless -forgejo: {} \ No newline at end of file +forgejo: {} diff --git a/otc/edp.buildth.ing/stacks/garm/garm.yaml b/otc/edp.buildth.ing/stacks/garm/garm.yaml index ab493b2..1a44c7c 100644 --- a/otc/edp.buildth.ing/stacks/garm/garm.yaml +++ b/otc/edp.buildth.ing/stacks/garm/garm.yaml @@ -20,7 +20,7 @@ spec: sources: - repoURL: https://edp.buildth.ing/DevFW-CICD/garm-helm path: charts/garm - targetRevision: v0.0.7 + targetRevision: v0.0.11 helm: valueFiles: - $values/otc/edp.buildth.ing/stacks/garm/garm/values.yaml diff --git a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml index 7ad8f26..d1a9e79 100644 --- a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml +++ b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml @@ -36,6 +36,9 @@ providerConfig: cloudlet: name: Hamburg organization: TelekomOP + edgeConnectK8s: + sizer: + sidecarImage: edp.buildth.ing/devfw-cicd/forgejo-runner-sizer-collector:0.0.4 garm: logging: diff --git a/otc/edp.buildth.ing/stacks/garm/optimiser-receiver.yaml b/otc/edp.buildth.ing/stacks/garm/sizer-receiver.yaml similarity index 84% rename from otc/edp.buildth.ing/stacks/garm/optimiser-receiver.yaml rename to otc/edp.buildth.ing/stacks/garm/sizer-receiver.yaml index 4fd45cf..a382e6a 100644 --- a/otc/edp.buildth.ing/stacks/garm/optimiser-receiver.yaml +++ b/otc/edp.buildth.ing/stacks/garm/sizer-receiver.yaml @@ -1,7 +1,7 @@ apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: optimiser-receiver + name: sizer-receiver namespace: argocd labels: env: dev @@ -22,4 +22,4 @@ spec: source: repoURL: https://observability.buildth.ing/DevFW-CICD/stacks-instances targetRevision: HEAD - path: "otc/edp.buildth.ing/stacks/garm/optimiser-receiver" + path: "otc/edp.buildth.ing/stacks/garm/sizer-receiver" diff --git a/otc/edp.buildth.ing/stacks/garm/optimiser-receiver/deployment.yaml b/otc/edp.buildth.ing/stacks/garm/sizer-receiver/deployment.yaml similarity index 77% rename from otc/edp.buildth.ing/stacks/garm/optimiser-receiver/deployment.yaml rename to otc/edp.buildth.ing/stacks/garm/sizer-receiver/deployment.yaml index f7e366b..e3c0318 100644 --- a/otc/edp.buildth.ing/stacks/garm/optimiser-receiver/deployment.yaml +++ b/otc/edp.buildth.ing/stacks/garm/sizer-receiver/deployment.yaml @@ -1,22 +1,22 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: optimiser-receiver + name: sizer-receiver labels: - app: optimiser-receiver + app: sizer-receiver spec: replicas: 1 selector: matchLabels: - app: optimiser-receiver + app: sizer-receiver template: metadata: labels: - app: optimiser-receiver + app: sizer-receiver spec: containers: - name: receiver - image: edp.buildth.ing/devfw-cicd/forgejo-runner-optimiser-receiver:0.0.3 + image: edp.buildth.ing/devfw-cicd/forgejo-runner-sizer-receiver:0.0.4 args: - --db=/data/metrics.db ports: @@ -27,12 +27,12 @@ spec: - name: RECEIVER_READ_TOKEN valueFrom: secretKeyRef: - name: optimiser-tokens + name: sizer-tokens key: read-token - name: RECEIVER_HMAC_KEY valueFrom: secretKeyRef: - name: optimiser-tokens + name: sizer-tokens key: hmac-key volumeMounts: - name: data @@ -59,17 +59,17 @@ spec: volumes: - name: data persistentVolumeClaim: - claimName: optimiser-receiver-data + claimName: sizer-receiver-data --- apiVersion: v1 kind: Service metadata: - name: optimiser-receiver + name: sizer-receiver labels: - app: optimiser-receiver + app: sizer-receiver spec: selector: - app: optimiser-receiver + app: sizer-receiver ports: - name: http port: 8080 @@ -79,9 +79,9 @@ spec: apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: optimiser-receiver-data + name: sizer-receiver-data labels: - app: optimiser-receiver + app: sizer-receiver annotations: everest.io/disk-volume-type: GPSSD spec: diff --git a/otc/edp.buildth.ing/stacks/garm/optimiser-receiver/ingress.yaml b/otc/edp.buildth.ing/stacks/garm/sizer-receiver/ingress.yaml similarity index 69% rename from otc/edp.buildth.ing/stacks/garm/optimiser-receiver/ingress.yaml rename to otc/edp.buildth.ing/stacks/garm/sizer-receiver/ingress.yaml index aa6ac34..3fcc484 100644 --- a/otc/edp.buildth.ing/stacks/garm/optimiser-receiver/ingress.yaml +++ b/otc/edp.buildth.ing/stacks/garm/sizer-receiver/ingress.yaml @@ -5,22 +5,22 @@ metadata: nginx.ingress.kubernetes.io/force-ssl-redirect: "true" cert-manager.io/cluster-issuer: main - name: optimiser-receiver + name: sizer-receiver namespace: garm spec: ingressClassName: nginx rules: - - host: optimiser.edp.buildth.ing + - host: sizer.edp.buildth.ing http: paths: - backend: service: - name: optimiser-receiver + name: sizer-receiver port: number: 8080 path: / pathType: Prefix tls: - hosts: - - optimiser.edp.buildth.ing - secretName: optimiser-receiver-tls + - sizer.edp.buildth.ing + secretName: sizer-receiver-tls From 46a1c1aa33590c3babd3be5fc10d1ba86692e0a5 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 10 Apr 2026 13:22:45 +0200 Subject: [PATCH 003/114] feat(dex): add forgejo-runner-sizer OIDC static client Register forgejo-runner-sizer as a Dex static client for OIDC authentication on sizer.dev.t09.de. Adds the client secret env var injection and the staticClients entry with secretEnv reference. --- otc/dev.t09.de/stacks/core/dex/values.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/otc/dev.t09.de/stacks/core/dex/values.yaml b/otc/dev.t09.de/stacks/core/dex/values.yaml index 8a2a79d..7937bba 100644 --- a/otc/dev.t09.de/stacks/core/dex/values.yaml +++ b/otc/dev.t09.de/stacks/core/dex/values.yaml @@ -34,6 +34,11 @@ envVars: secretKeyRef: name: dex-argo-client key: clientSecret + - name: FORGEJO_RUNNER_SIZER_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: dex-runner-sizer-client + key: clientSecret - name: LOG_LEVEL value: debug @@ -74,3 +79,8 @@ config: - "https://grafana.dev.t09.de/login/generic_oauth" name: "Grafana" secretEnv: "OIDC_DEX_GRAFANA_CLIENT_SECRET" + - id: forgejo-runner-sizer + name: "Forgejo Runner Sizer" + redirectURIs: + - "https://sizer.dev.t09.de/ui/callback" + secretEnv: "FORGEJO_RUNNER_SIZER_CLIENT_SECRET" From dedebf17477312abf7226eee731cfd5d0fb16e27 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 13 Apr 2026 15:23:48 +0200 Subject: [PATCH 004/114] chore(garm): update image to v0.1.7-forgejo-5 and add pending_timeout config --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index eebfcf1..95a70ad 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-3 + tag: v0.1.7-forgejo-5 providerConfig: edgeConnect: @@ -37,6 +37,7 @@ providerConfig: name: Hamburg organization: TelekomOP edgeConnectK8s: + pendingTimeout: "5m" sizer: sidecarImage: edp.buildth.ing/devfw-cicd/forgejo-runner-sizer-collector:latest sidecarPushEndpoint: https://sizer.dev.t09.de/api/v1/metrics From ee8b2f0e9cb2c2cde231bed2fa02989e0289af7f Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 13 Apr 2026 16:35:44 +0200 Subject: [PATCH 005/114] chore(garm): bump helm chart to v0.0.13 for nodes RBAC --- otc/dev.t09.de/stacks/garm/garm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm.yaml b/otc/dev.t09.de/stacks/garm/garm.yaml index 43c7d4e..36a9213 100644 --- a/otc/dev.t09.de/stacks/garm/garm.yaml +++ b/otc/dev.t09.de/stacks/garm/garm.yaml @@ -20,7 +20,7 @@ spec: sources: - repoURL: https://edp.buildth.ing/DevFW-CICD/garm-helm path: charts/garm - targetRevision: v0.0.12 + targetRevision: v0.0.13 helm: valueFiles: - $values/otc/dev.t09.de/stacks/garm/garm/values.yaml From d116313afebe5f13b025a062ffbe52a51028ab9b Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 13 Apr 2026 18:02:37 +0200 Subject: [PATCH 006/114] chore(garm): bump to v0.1.7-forgejo-6 (provider nil map fix) --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 95a70ad..fb70c47 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-5 + tag: v0.1.7-forgejo-6 providerConfig: edgeConnect: From 6f9a6372f14474ad8976dccc2ecd765436f4f956 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Tue, 14 Apr 2026 11:23:53 +0200 Subject: [PATCH 007/114] chore(garm): bump garm image to v0.1.7-forgejo-7 - Includes provider v2.0.24 with pod cleanup fixes: - GetPod returns terminal pods for proper GARM lifecycle - ListInstances prefix mismatch fixed - ProviderID consistency fix - buildkitd SIGTERM graceful shutdown --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index fb70c47..c2d6f86 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-6 + tag: v0.1.7-forgejo-7 providerConfig: edgeConnect: From 246be79659197efd2c7c4057f9b5a95a56342790 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Tue, 14 Apr 2026 13:01:17 +0200 Subject: [PATCH 008/114] chore(garm): bump to v0.1.7-forgejo-8 (revert buildkitd wrapper) --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index c2d6f86..7cc14ce 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-7 + tag: v0.1.7-forgejo-8 providerConfig: edgeConnect: From d1ab2f6c8582de401e5b4b573f1a35475b15ee0b Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Tue, 14 Apr 2026 16:58:24 +0200 Subject: [PATCH 009/114] =?UTF-8?q?chore(garm):=20=F0=9F=93=A6=20bump=20im?= =?UTF-8?q?age=20to=20v0.1.7-forgejo-9=20(multi-provider=20support)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit garm-provider-edge-connect v2.0.26 adds GitHub Actions + Forgejo multi-provider support. --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 7cc14ce..5bbdc04 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-8 + tag: v0.1.7-forgejo-9 providerConfig: edgeConnect: From 58c694c9d13261ab45d61501cb6d25b6911a594f Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Wed, 15 Apr 2026 10:23:57 +0200 Subject: [PATCH 010/114] =?UTF-8?q?chore(garm):=20=F0=9F=93=A6=20bump=20im?= =?UTF-8?q?age=20to=20v0.1.7-forgejo-10=20(GitHub=20Actions=20cgroup=20fix?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provider v2.0.27 fixes CIProvider-aware CGROUP_PROCESS_MAP for GitHub Actions runner detection, completing multi-provider support. Ref: IPCEICIS-8514 --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 5bbdc04..8eb0d55 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-9 + tag: v0.1.7-forgejo-10 providerConfig: edgeConnect: From e0f74e9ec4da0157cdbd39020f841f102b3f4e60 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Wed, 15 Apr 2026 12:25:37 +0200 Subject: [PATCH 011/114] =?UTF-8?q?chore(garm):=20=E2=AC=86=EF=B8=8F=20bum?= =?UTF-8?q?p=20image=20to=20v0.1.7-forgejo-11=20with=20fixed=20provider=20?= =?UTF-8?q?binary?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ref: IPCEICIS-8514 --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 8eb0d55..ef21cc9 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-10 + tag: v0.1.7-forgejo-11 providerConfig: edgeConnect: From 9374d90d1f4fbc0bff1314db64b5c08a3568d61c Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Wed, 15 Apr 2026 13:50:54 +0200 Subject: [PATCH 012/114] =?UTF-8?q?chore(garm):=20=E2=AC=86=EF=B8=8F=20bum?= =?UTF-8?q?p=20image=20to=20v0.1.7-forgejo-12=20(ParseExtraSpecs=20fix)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pick up double-encoding fix from garm-provider-edge-connect v2.0.30. Ref: IPCEICIS-8514 --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index ef21cc9..29b0007 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-11 + tag: v0.1.7-forgejo-12 providerConfig: edgeConnect: From a3bae88ce90f58a821bc62b4ce49bfc71e827d47 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Wed, 15 Apr 2026 14:45:27 +0200 Subject: [PATCH 013/114] =?UTF-8?q?fix(sizer-receiver):=20=F0=9F=90=9B=20a?= =?UTF-8?q?dd=20fsGroup=20to=20pod=20securityContext=20for=20PVC=20write?= =?UTF-8?q?=20access?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Distroless nonroot container (UID 65534) needs matching fsGroup to write to the PVC used for SQLite migrations. Ref: IPCEICIS-8514 --- otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml index 91a1049..3cbd5f8 100644 --- a/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml +++ b/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml @@ -16,6 +16,8 @@ spec: labels: app: sizer-receiver spec: + securityContext: + fsGroup: 65534 containers: - name: receiver image: edp.buildth.ing/devfw-cicd/forgejo-runner-sizer-receiver:latest From 47f99082dbda0829011be7b899b8cbde0ec941f5 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Wed, 15 Apr 2026 15:46:55 +0200 Subject: [PATCH 014/114] =?UTF-8?q?feat(sizer-receiver):=20=E2=9C=A8=20add?= =?UTF-8?q?=20GARM=20WebSocket=20event=20enrichment=20env=20vars?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add GARM_URL, GARM_USER, and GARM_PASSWORD environment variables to the sizer-receiver deployment so it can connect to GARM's WebSocket event stream for run-status enrichment. Ref: IPCEICIS-8514 --- .../stacks/garm/sizer-receiver/deployment.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml index 3cbd5f8..056aa20 100644 --- a/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml +++ b/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml @@ -39,6 +39,15 @@ spec: secretKeyRef: name: sizer-tokens key: hmac-key + - name: GARM_URL + value: "http://garm.garm.svc.cluster.local:80" + - name: GARM_USER + value: "admin" + - name: GARM_PASSWORD + valueFrom: + secretKeyRef: + name: garm-fixed-credentials + key: admin_password volumeMounts: - name: data mountPath: /data From 08740eb1daf8fbb08cfa69cccde8d1431c3092c1 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Thu, 16 Apr 2026 13:32:12 +0200 Subject: [PATCH 015/114] chore: bump garm image to v0.1.7-forgejo-13 (RunNumber enrichment via WebSocket) --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 29b0007..19ea653 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-12 + tag: v0.1.7-forgejo-13 providerConfig: edgeConnect: From f2c885cd84990e37abf87f456d57d23a977930d5 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Thu, 16 Apr 2026 15:05:53 +0200 Subject: [PATCH 016/114] =?UTF-8?q?fix(sizer):=20=F0=9F=94=A7=20sync=20git?= =?UTF-8?q?ops=20with=20live=20deployment=20=E2=80=94=20add=20OIDC=20confi?= =?UTF-8?q?g,=20remove=20legacy=20Forgejo=20tokens?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../stacks/garm/sizer-receiver/deployment.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml index 056aa20..be4bd65 100644 --- a/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml +++ b/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml @@ -48,6 +48,21 @@ spec: secretKeyRef: name: garm-fixed-credentials key: admin_password + - name: RECEIVER_OIDC_ISSUER + value: "https://dex.dev.t09.de" + - name: RECEIVER_OIDC_CLIENT_ID + value: "forgejo-runner-sizer" + - name: RECEIVER_OIDC_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: sizer-oidc-client + key: client-secret + - name: RECEIVER_OIDC_REDIRECT_URI + value: "https://sizer.dev.t09.de/ui/callback" + - name: RECEIVER_SESSION_TTL + value: "12h" + - name: RECEIVER_ALLOWED_ORG + value: "DevFW" volumeMounts: - name: data mountPath: /data From 2af607e9493b4e9d865c5ada047174f6207e9134 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 20 Apr 2026 16:08:12 +0200 Subject: [PATCH 017/114] =?UTF-8?q?chore(garm):=20=E2=AC=86=EF=B8=8F=20bum?= =?UTF-8?q?p=20garm=20to=20v0.1.7-forgejo-14,=20add=20CPU=20sizing=20mode?= =?UTF-8?q?=20env=20vars?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml | 4 ++++ otc/edp.buildth.ing/stacks/garm/garm/values.yaml | 2 +- .../stacks/garm/sizer-receiver/deployment.yaml | 4 ++++ 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 19ea653..59f3f5c 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-13 + tag: v0.1.7-forgejo-14 providerConfig: edgeConnect: diff --git a/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml index be4bd65..1753782 100644 --- a/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml +++ b/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml @@ -63,6 +63,10 @@ spec: value: "12h" - name: RECEIVER_ALLOWED_ORG value: "DevFW" + - name: RECEIVER_CPU_SIZING_MODE + value: "observe" + - name: RECEIVER_MEMORY_QOS + value: "guaranteed" volumeMounts: - name: data mountPath: /data diff --git a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml index d1a9e79..9058c6c 100644 --- a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml +++ b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: observability.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-1 + tag: v0.1.7-forgejo-14 providerConfig: edgeConnect: diff --git a/otc/edp.buildth.ing/stacks/garm/sizer-receiver/deployment.yaml b/otc/edp.buildth.ing/stacks/garm/sizer-receiver/deployment.yaml index e3c0318..88053e9 100644 --- a/otc/edp.buildth.ing/stacks/garm/sizer-receiver/deployment.yaml +++ b/otc/edp.buildth.ing/stacks/garm/sizer-receiver/deployment.yaml @@ -34,6 +34,10 @@ spec: secretKeyRef: name: sizer-tokens key: hmac-key + - name: RECEIVER_CPU_SIZING_MODE + value: "observe" + - name: RECEIVER_MEMORY_QOS + value: "guaranteed" volumeMounts: - name: data mountPath: /data From 487e1ac15ce74030ebb90cf769181a8a9db2b256 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 20 Apr 2026 17:32:22 +0200 Subject: [PATCH 018/114] =?UTF-8?q?chore(garm):=20=E2=AC=86=EF=B8=8F=20bum?= =?UTF-8?q?p=20garm=20to=20v0.1.7-forgejo-15?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- otc/edp.buildth.ing/stacks/garm/garm/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 59f3f5c..0caf8c3 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-14 + tag: v0.1.7-forgejo-15 providerConfig: edgeConnect: diff --git a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml index 9058c6c..255636f 100644 --- a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml +++ b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: observability.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-14 + tag: v0.1.7-forgejo-15 providerConfig: edgeConnect: From 61721097d68bd7a30c03c39b7ac09ab6b4c1c4c3 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Tue, 21 Apr 2026 14:16:39 +0200 Subject: [PATCH 019/114] =?UTF-8?q?chore(sizer):=20=F0=9F=94=A7=20rename?= =?UTF-8?q?=20forgejo-runner-sizer=20to=20ci-sizer=20in=20deployment=20con?= =?UTF-8?q?figs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update container image names to ci-sizer-{receiver,collector} - Update Dex OIDC client ID and name to ci-sizer - Template allowed-org as SIZER_ALLOWED_ORG variable --- otc/dev.t09.de/stacks/core/dex/values.yaml | 4 ++-- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml | 4 ++-- otc/edp.buildth.ing/stacks/garm/garm/values.yaml | 2 +- .../stacks/garm/sizer-receiver/deployment.yaml | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/otc/dev.t09.de/stacks/core/dex/values.yaml b/otc/dev.t09.de/stacks/core/dex/values.yaml index 7937bba..c3e842a 100644 --- a/otc/dev.t09.de/stacks/core/dex/values.yaml +++ b/otc/dev.t09.de/stacks/core/dex/values.yaml @@ -79,8 +79,8 @@ config: - "https://grafana.dev.t09.de/login/generic_oauth" name: "Grafana" secretEnv: "OIDC_DEX_GRAFANA_CLIENT_SECRET" - - id: forgejo-runner-sizer - name: "Forgejo Runner Sizer" + - id: ci-sizer + name: "CI Sizer" redirectURIs: - "https://sizer.dev.t09.de/ui/callback" secretEnv: "FORGEJO_RUNNER_SIZER_CLIENT_SECRET" diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 0caf8c3..1b8e7a8 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -39,7 +39,7 @@ providerConfig: edgeConnectK8s: pendingTimeout: "5m" sizer: - sidecarImage: edp.buildth.ing/devfw-cicd/forgejo-runner-sizer-collector:latest + sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:latest sidecarPushEndpoint: https://sizer.dev.t09.de/api/v1/metrics baseUrl: "https://sizer.dev.t09.de" readToken: diff --git a/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml index 1753782..dd918d5 100644 --- a/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml +++ b/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml @@ -20,7 +20,7 @@ spec: fsGroup: 65534 containers: - name: receiver - image: edp.buildth.ing/devfw-cicd/forgejo-runner-sizer-receiver:latest + image: edp.buildth.ing/devfw-cicd/ci-sizer-receiver:latest imagePullPolicy: Always args: - --db=/data/metrics.db @@ -51,7 +51,7 @@ spec: - name: RECEIVER_OIDC_ISSUER value: "https://dex.dev.t09.de" - name: RECEIVER_OIDC_CLIENT_ID - value: "forgejo-runner-sizer" + value: "ci-sizer" - name: RECEIVER_OIDC_CLIENT_SECRET valueFrom: secretKeyRef: diff --git a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml index 255636f..9e26edc 100644 --- a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml +++ b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml @@ -38,7 +38,7 @@ providerConfig: organization: TelekomOP edgeConnectK8s: sizer: - sidecarImage: edp.buildth.ing/devfw-cicd/forgejo-runner-sizer-collector:0.0.4 + sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:0.0.4 garm: logging: diff --git a/otc/edp.buildth.ing/stacks/garm/sizer-receiver/deployment.yaml b/otc/edp.buildth.ing/stacks/garm/sizer-receiver/deployment.yaml index 88053e9..2d3eeaa 100644 --- a/otc/edp.buildth.ing/stacks/garm/sizer-receiver/deployment.yaml +++ b/otc/edp.buildth.ing/stacks/garm/sizer-receiver/deployment.yaml @@ -16,7 +16,7 @@ spec: spec: containers: - name: receiver - image: edp.buildth.ing/devfw-cicd/forgejo-runner-sizer-receiver:0.0.4 + image: edp.buildth.ing/devfw-cicd/ci-sizer-receiver:0.0.4 args: - --db=/data/metrics.db ports: From c682c48be0f9719658255a5165c96ebdf97f9ce8 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Tue, 21 Apr 2026 15:53:50 +0200 Subject: [PATCH 020/114] chore: bump garm image to v0.1.7-forgejo-16 --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 1b8e7a8..37d1aee 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-15 + tag: v0.1.7-forgejo-16 providerConfig: edgeConnect: From 4aa8973c91e2f1658fad12f43ea64b079f23dcd5 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Tue, 21 Apr 2026 16:03:06 +0200 Subject: [PATCH 021/114] =?UTF-8?q?chore(garm):=20=E2=AC=86=EF=B8=8F=20bum?= =?UTF-8?q?p=20garm-helm=20chart=20to=20v0.0.14?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- otc/dev.t09.de/stacks/garm/garm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm.yaml b/otc/dev.t09.de/stacks/garm/garm.yaml index 36a9213..703570d 100644 --- a/otc/dev.t09.de/stacks/garm/garm.yaml +++ b/otc/dev.t09.de/stacks/garm/garm.yaml @@ -20,7 +20,7 @@ spec: sources: - repoURL: https://edp.buildth.ing/DevFW-CICD/garm-helm path: charts/garm - targetRevision: v0.0.13 + targetRevision: v0.0.14 helm: valueFiles: - $values/otc/dev.t09.de/stacks/garm/garm/values.yaml From 0b13b8964081460c03263dcf790b1a82c2fb41e8 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Tue, 21 Apr 2026 16:27:25 +0200 Subject: [PATCH 022/114] =?UTF-8?q?chore(garm):=20=E2=AC=86=EF=B8=8F=20bum?= =?UTF-8?q?p=20garm-helm=20to=20v0.0.15=20(startup=20probe=20fix)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- otc/dev.t09.de/stacks/garm/garm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm.yaml b/otc/dev.t09.de/stacks/garm/garm.yaml index 703570d..3754f9a 100644 --- a/otc/dev.t09.de/stacks/garm/garm.yaml +++ b/otc/dev.t09.de/stacks/garm/garm.yaml @@ -20,7 +20,7 @@ spec: sources: - repoURL: https://edp.buildth.ing/DevFW-CICD/garm-helm path: charts/garm - targetRevision: v0.0.14 + targetRevision: v0.0.15 helm: valueFiles: - $values/otc/dev.t09.de/stacks/garm/garm/values.yaml From 4cea4ffde7bcb11aa90404177b518c7dfc47c17c Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Tue, 21 Apr 2026 17:15:41 +0200 Subject: [PATCH 023/114] chore: bump garm to v0.1.7-forgejo-17 (activeDeadlineSeconds) --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 37d1aee..0145885 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-16 + tag: v0.1.7-forgejo-17 providerConfig: edgeConnect: From b72e2049e3ecae41b5773502dd9138197b28213a Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Wed, 22 Apr 2026 13:19:32 +0200 Subject: [PATCH 024/114] chore: bump garm image to v0.1.7-forgejo-18 for dev.t09.de --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 0145885..3d01fad 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-17 + tag: v0.1.7-forgejo-18 providerConfig: edgeConnect: From a9dcf29f7a1faeea000e7d6b37ae7dcd8acafa30 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 24 Apr 2026 13:41:52 +0200 Subject: [PATCH 025/114] =?UTF-8?q?chore(garm):=20=E2=AC=86=EF=B8=8F=20bum?= =?UTF-8?q?p=20garm-forgejo=20to=20v0.1.7-forgejo-19?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- otc/edp.buildth.ing/stacks/garm/garm/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 3d01fad..8610a77 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-18 + tag: v0.1.7-forgejo-19 providerConfig: edgeConnect: diff --git a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml index 9e26edc..10ae75c 100644 --- a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml +++ b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: observability.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-15 + tag: v0.1.7-forgejo-19 providerConfig: edgeConnect: From e65abf162e371f791cf0bd603b8a58216e2ba4f3 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 24 Apr 2026 14:51:58 +0200 Subject: [PATCH 026/114] =?UTF-8?q?chore(garm):=20=E2=AC=86=EF=B8=8F=20bum?= =?UTF-8?q?p=20garm-forgejo=20to=20v0.1.7-forgejo-20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- otc/edp.buildth.ing/stacks/garm/garm/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 8610a77..0067d4d 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-19 + tag: v0.1.7-forgejo-20 providerConfig: edgeConnect: diff --git a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml index 10ae75c..3bcb3f1 100644 --- a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml +++ b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: observability.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-19 + tag: v0.1.7-forgejo-20 providerConfig: edgeConnect: From bc96d8d7aa4f13df3075cb25691cfda766a72170 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 24 Apr 2026 15:47:23 +0200 Subject: [PATCH 027/114] =?UTF-8?q?chore(garm):=20=E2=AC=86=EF=B8=8F=20bum?= =?UTF-8?q?p=20garm-forgejo=20to=20v0.1.7-forgejo-21?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- otc/edp.buildth.ing/stacks/garm/garm/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 0067d4d..ef70339 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-20 + tag: v0.1.7-forgejo-21 providerConfig: edgeConnect: diff --git a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml index 3bcb3f1..7c4eccc 100644 --- a/otc/edp.buildth.ing/stacks/garm/garm/values.yaml +++ b/otc/edp.buildth.ing/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: observability.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-20 + tag: v0.1.7-forgejo-21 providerConfig: edgeConnect: From 9d042eee1c21b7b9d3283f162cc8c2a9ba92e778 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Tue, 28 Apr 2026 10:11:09 +0200 Subject: [PATCH 028/114] =?UTF-8?q?chore:=20=E2=AC=86=EF=B8=8F=20bump=20ga?= =?UTF-8?q?rm=20image=20to=20v0.1.7-forgejo-22=20on=20dev.t09.de?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index ef70339..e1ee11d 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-21 + tag: v0.1.7-forgejo-22 providerConfig: edgeConnect: From 2e90240c81db4cfbf1c298c1310f5bcc2fbb572f Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Wed, 29 Apr 2026 10:18:36 +0200 Subject: [PATCH 029/114] =?UTF-8?q?refactor(stacks-instances):=20?= =?UTF-8?q?=F0=9F=9A=9A=20migrate=20sizer-receiver=20to=20ci-sizer=20names?= =?UTF-8?q?pace=20(dev.t09.de)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move sizer-receiver from stacks/garm/ to stacks/ci-sizer/ for dev.t09.de only. edp.buildth.ing stays in garm (not deployed yet). --- otc/dev.t09.de/stacks/{garm => ci-sizer}/sizer-receiver.yaml | 4 ++-- .../stacks/{garm => ci-sizer}/sizer-receiver/deployment.yaml | 0 .../stacks/{garm => ci-sizer}/sizer-receiver/ingress.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename otc/dev.t09.de/stacks/{garm => ci-sizer}/sizer-receiver.yaml (85%) rename otc/dev.t09.de/stacks/{garm => ci-sizer}/sizer-receiver/deployment.yaml (100%) rename otc/dev.t09.de/stacks/{garm => ci-sizer}/sizer-receiver/ingress.yaml (96%) diff --git a/otc/dev.t09.de/stacks/garm/sizer-receiver.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver.yaml similarity index 85% rename from otc/dev.t09.de/stacks/garm/sizer-receiver.yaml rename to otc/dev.t09.de/stacks/ci-sizer/sizer-receiver.yaml index 1425cc6..4f1b6bc 100644 --- a/otc/dev.t09.de/stacks/garm/sizer-receiver.yaml +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver.yaml @@ -18,8 +18,8 @@ spec: limit: -1 destination: name: in-cluster - namespace: garm + namespace: ci-sizer source: repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances targetRevision: HEAD - path: "otc/dev.t09.de/stacks/garm/sizer-receiver" + path: "otc/dev.t09.de/stacks/ci-sizer/sizer-receiver" diff --git a/otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml similarity index 100% rename from otc/dev.t09.de/stacks/garm/sizer-receiver/deployment.yaml rename to otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml diff --git a/otc/dev.t09.de/stacks/garm/sizer-receiver/ingress.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml similarity index 96% rename from otc/dev.t09.de/stacks/garm/sizer-receiver/ingress.yaml rename to otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml index bc50d98..9a28977 100644 --- a/otc/dev.t09.de/stacks/garm/sizer-receiver/ingress.yaml +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml @@ -6,7 +6,7 @@ metadata: cert-manager.io/cluster-issuer: main name: sizer-receiver - namespace: garm + namespace: ci-sizer spec: ingressClassName: nginx rules: From 556a784beb3f67030bbba31908fe59f98ae98e4e Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Wed, 29 Apr 2026 10:41:29 +0200 Subject: [PATCH 030/114] =?UTF-8?q?fix(stacks-instances):=20=F0=9F=9A=91?= =?UTF-8?q?=20add=20ci-sizer=20registry=20entry=20for=20dev.t09.de?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create ci-sizer-reg ArgoCD app-of-apps to manage the sizer-receiver after migration from garm namespace. Restores sizer.dev.t09.de ingress. --- otc/dev.t09.de/registry/ci-sizer.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 otc/dev.t09.de/registry/ci-sizer.yaml diff --git a/otc/dev.t09.de/registry/ci-sizer.yaml b/otc/dev.t09.de/registry/ci-sizer.yaml new file mode 100644 index 0000000..58df27e --- /dev/null +++ b/otc/dev.t09.de/registry/ci-sizer.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: ci-sizer-reg + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + name: in-cluster + namespace: argocd + source: + path: "otc/dev.t09.de/stacks/ci-sizer" + repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" + targetRevision: HEAD + project: default + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true From c5191ea18a7490de1ec84c9cac1b84ac6204c143 Mon Sep 17 00:00:00 2001 From: "manuel.ganter" Date: Tue, 5 May 2026 08:29:25 +0000 Subject: [PATCH 031/114] Update otc/dev.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-ingress.yaml --- .../forgejo/forgejo-server/manifests/forgejo-ingress.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-ingress.yaml b/otc/dev.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-ingress.yaml index bcefb1d..8e5146a 100644 --- a/otc/dev.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-ingress.yaml +++ b/otc/dev.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-ingress.yaml @@ -3,7 +3,7 @@ kind: Ingress metadata: annotations: nginx.ingress.kubernetes.io/force-ssl-redirect: "true" - nginx.ingress.kubernetes.io/proxy-body-size: 512m + nginx.ingress.kubernetes.io/proxy-body-size: 5120m cert-manager.io/cluster-issuer: main name: forgejo-server From 5be2bf1409d0af3438fa0b08c45288c9c0bf0a1e Mon Sep 17 00:00:00 2001 From: Patrick Sy Date: Tue, 5 May 2026 14:05:40 +0200 Subject: [PATCH 032/114] fix: increased body size by 10x for large image layer uploads --- .../forgejo/forgejo-server/manifests/forgejo-ingress.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-ingress.yaml b/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-ingress.yaml index 8203a51..e5d71d6 100644 --- a/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-ingress.yaml +++ b/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-ingress.yaml @@ -3,7 +3,7 @@ kind: Ingress metadata: annotations: nginx.ingress.kubernetes.io/force-ssl-redirect: "true" - nginx.ingress.kubernetes.io/proxy-body-size: 512m + nginx.ingress.kubernetes.io/proxy-body-size: 5120m cert-manager.io/cluster-issuer: main name: forgejo-server From bc086d5c318fa1b129f4b8869caf5611feed17ee Mon Sep 17 00:00:00 2001 From: Patrick Sy Date: Thu, 7 May 2026 17:40:36 +0200 Subject: [PATCH 033/114] fix: increase smol backup disk --- .../forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml b/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml index cc153d1..71f1649 100644 --- a/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml +++ b/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml @@ -72,7 +72,7 @@ spec: - ReadWriteOnce resources: requests: - storage: 100Gi + storage: 500Gi --- apiVersion: v1 kind: Secret From d4b54c854fd68932cd709881f93fca89b7adcd0f Mon Sep 17 00:00:00 2001 From: Patrick Sy Date: Mon, 11 May 2026 10:56:01 +0200 Subject: [PATCH 034/114] fix: increased pvc size due to out of disk space error --- .../forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/observability.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml b/otc/observability.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml index 581f2d2..842a7cc 100644 --- a/otc/observability.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml +++ b/otc/observability.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml @@ -72,7 +72,7 @@ spec: - ReadWriteOnce resources: requests: - storage: 100Gi + storage: 500Gi --- apiVersion: v1 kind: Secret From b84476f71ed871332da630ec66b244d24d9b83c4 Mon Sep 17 00:00:00 2001 From: "Daniel.Sy" Date: Wed, 13 May 2026 10:19:17 +0000 Subject: [PATCH 035/114] feat(benchmark): add ci-sizer stacks-instances for benchmark.t09.de [1/4] --- .../stacks/ci-sizer/sizer-receiver.yaml | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver.yaml diff --git a/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver.yaml b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver.yaml new file mode 100644 index 0000000..aeb18c9 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver.yaml @@ -0,0 +1,25 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: sizer-receiver + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: ci-sizer + source: + repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver" From 6977dac98ddd6343d6604156345f8d3ab9ac17a5 Mon Sep 17 00:00:00 2001 From: "Daniel.Sy" Date: Wed, 13 May 2026 10:19:29 +0000 Subject: [PATCH 036/114] feat(benchmark): add ci-sizer deployment for benchmark.t09.de [2/4] --- .../ci-sizer/sizer-receiver/deployment.yaml | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml diff --git a/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml new file mode 100644 index 0000000..ce21f14 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml @@ -0,0 +1,126 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sizer-receiver + labels: + app: sizer-receiver +spec: + strategy: + type: Recreate + replicas: 1 + selector: + matchLabels: + app: sizer-receiver + template: + metadata: + labels: + app: sizer-receiver + spec: + securityContext: + fsGroup: 65534 + containers: + - name: receiver + image: edp.buildth.ing/devfw-cicd/ci-sizer-receiver:latest + imagePullPolicy: Always + args: + - --db=/data/metrics.db + ports: + - name: http + containerPort: 8080 + protocol: TCP + env: + - name: RECEIVER_READ_TOKEN + valueFrom: + secretKeyRef: + name: sizer-tokens + key: read-token + - name: RECEIVER_HMAC_KEY + valueFrom: + secretKeyRef: + name: sizer-tokens + key: hmac-key + - name: GARM_URL + value: "http://garm.garm.svc.cluster.local:80" + - name: GARM_USER + value: "admin" + - name: GARM_PASSWORD + valueFrom: + secretKeyRef: + name: garm-fixed-credentials + key: admin_password + - name: RECEIVER_OIDC_ISSUER + value: "https://dex.benchmark.t09.de" + - name: RECEIVER_OIDC_CLIENT_ID + value: "ci-sizer" + - name: RECEIVER_OIDC_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: sizer-oidc-client + key: client-secret + - name: RECEIVER_OIDC_REDIRECT_URI + value: "https://sizer.benchmark.t09.de/ui/callback" + - name: RECEIVER_SESSION_TTL + value: "12h" + - name: RECEIVER_ALLOWED_ORG + value: "giteaAdmin" + - name: RECEIVER_CPU_SIZING_MODE + value: "observe" + - name: RECEIVER_MEMORY_QOS + value: "guaranteed" + volumeMounts: + - name: data + mountPath: /data + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 2 + periodSeconds: 10 + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 200m + memory: 128Mi + volumes: + - name: data + persistentVolumeClaim: + claimName: sizer-receiver-data +--- +apiVersion: v1 +kind: Service +metadata: + name: sizer-receiver + labels: + app: sizer-receiver +spec: + selector: + app: sizer-receiver + ports: + - name: http + port: 8080 + targetPort: http + protocol: TCP +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: sizer-receiver-data + labels: + app: sizer-receiver + annotations: + everest.io/disk-volume-type: GPSSD +spec: + storageClassName: csi-disk + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi From 1a591f1c3724a63fced56a8033200bb0c0ee2c86 Mon Sep 17 00:00:00 2001 From: "Daniel.Sy" Date: Wed, 13 May 2026 10:19:36 +0000 Subject: [PATCH 037/114] feat(benchmark): add ci-sizer ingress for benchmark.t09.de [3/4] --- .../ci-sizer/sizer-receiver/ingress.yaml | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml diff --git a/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml new file mode 100644 index 0000000..bc2b070 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml @@ -0,0 +1,26 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + cert-manager.io/cluster-issuer: main + + name: sizer-receiver + namespace: ci-sizer +spec: + ingressClassName: nginx + rules: + - host: sizer.benchmark.t09.de + http: + paths: + - backend: + service: + name: sizer-receiver + port: + number: 8080 + path: / + pathType: Prefix + tls: + - hosts: + - sizer.benchmark.t09.de + secretName: sizer-receiver-tls From 2c14713ae5d67fb5a368d75678456aedbde6b076 Mon Sep 17 00:00:00 2001 From: "Daniel.Sy" Date: Wed, 13 May 2026 10:19:43 +0000 Subject: [PATCH 038/114] feat(benchmark): add ci-sizer registry for benchmark.t09.de [4/4] --- otc/benchmark.t09.de/registry/ci-sizer.yaml | 24 +++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 otc/benchmark.t09.de/registry/ci-sizer.yaml diff --git a/otc/benchmark.t09.de/registry/ci-sizer.yaml b/otc/benchmark.t09.de/registry/ci-sizer.yaml new file mode 100644 index 0000000..953c8c1 --- /dev/null +++ b/otc/benchmark.t09.de/registry/ci-sizer.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: ci-sizer-reg + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + name: in-cluster + namespace: argocd + source: + path: "otc/benchmark.t09.de/stacks/ci-sizer" + repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" + targetRevision: HEAD + project: default + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true From 8b9fb6bdd8d417523306ec302ab7e43f75e3cce3 Mon Sep 17 00:00:00 2001 From: Automated pipeline Date: Wed, 13 May 2026 11:39:29 +0000 Subject: [PATCH 039/114] Automated upload for benchmark.t09.de --- .../ci-sizer.yaml => edfbuilder.yaml} | 4 +- otc/benchmark.t09.de/registry/coder.yaml | 24 + otc/benchmark.t09.de/registry/core.yaml | 24 + otc/benchmark.t09.de/registry/docs.yaml | 24 + otc/benchmark.t09.de/registry/forgejo.yaml | 24 + otc/benchmark.t09.de/registry/garm.yaml | 24 + .../registry/observability-client.yaml | 24 + .../registry/observability.yaml | 24 + otc/benchmark.t09.de/registry/otc.yaml | 24 + otc/benchmark.t09.de/registry/terralist.yaml | 24 + .../ci-sizer/sizer-receiver/deployment.yaml | 2 +- .../ci-sizer/sizer-receiver/ingress.yaml | 10 + otc/benchmark.t09.de/stacks/coder/coder.yaml | 32 + .../coder/coder/manifests/postgres.yaml | 38 + .../stacks/coder/coder/values.yaml | 61 + otc/benchmark.t09.de/stacks/core/argocd.yaml | 35 + .../manifests/argocd-server-ingress.yaml | 27 + .../stacks/core/argocd/values.yaml | 42 + .../stacks/core/cloudnative-pg.yaml | 30 + .../stacks/core/cloudnative-pg/values.yaml | 1 + otc/benchmark.t09.de/stacks/core/dex.yaml | 29 + .../stacks/core/dex/values.yaml | 76 + .../stacks/forgejo/forgejo-runner.yaml | 24 + .../forgejo/forgejo-runner/dind-docker.yaml | 104 ++ .../stacks/forgejo/forgejo-server.yaml | 32 + .../manifests/forgejo-ingress.yaml | 27 + .../manifests/forgejo-s3-backup-cronjob.yaml | 91 ++ .../stacks/forgejo/forgejo-server/values.yaml | 183 +++ otc/benchmark.t09.de/stacks/garm/garm.yaml | 29 + .../stacks/garm/garm/values.yaml | 45 + .../observability-client/metrics-server.yaml | 29 + .../metrics-server/values.yaml | 4 + .../stacks/observability-client/vector.yaml | 29 + .../observability-client/vector/values.yaml | 68 + .../observability-client/vm-client-stack.yaml | 30 + .../vm-client-stack/values.yaml | 1288 +++++++++++++++++ .../observability/grafana-operator.yaml | 25 + .../grafana-operator/manifests/argocd.yaml | 9 + .../grafana-operator/manifests/grafana.yaml | 75 + .../manifests/ingress-nginx.yaml | 9 + .../manifests/victoria-logs.yaml | 9 + .../observability/victoria-k8s-stack.yaml | 31 + .../victoria-k8s-stack/manifests/alerts.yaml | 40 + .../victoria-k8s-stack/manifests/vlogs.yaml | 26 + .../victoria-k8s-stack/manifests/vmauth.yaml | 17 + .../victoria-k8s-stack/values.yaml | 1230 ++++++++++++++++ .../cert-manager/manifests/clusterissuer.yaml | 14 + .../stacks/otc/cert-manager/values.yaml | 4 + .../stacks/otc/cert-manger.yaml | 32 + .../stacks/otc/ingress-nginx.yaml | 29 + .../stacks/otc/ingress-nginx/values.yaml | 31 + .../stacks/otc/storageclass.yaml | 25 + .../stacks/otc/storageclass/storageclass.yaml | 18 + .../stacks/terralist/terralist.yaml | 30 + .../stacks/terralist/terralist/values.yaml | 87 ++ 55 files changed, 4324 insertions(+), 3 deletions(-) rename otc/benchmark.t09.de/{registry/ci-sizer.yaml => edfbuilder.yaml} (86%) create mode 100644 otc/benchmark.t09.de/registry/coder.yaml create mode 100644 otc/benchmark.t09.de/registry/core.yaml create mode 100644 otc/benchmark.t09.de/registry/docs.yaml create mode 100644 otc/benchmark.t09.de/registry/forgejo.yaml create mode 100644 otc/benchmark.t09.de/registry/garm.yaml create mode 100644 otc/benchmark.t09.de/registry/observability-client.yaml create mode 100644 otc/benchmark.t09.de/registry/observability.yaml create mode 100644 otc/benchmark.t09.de/registry/otc.yaml create mode 100644 otc/benchmark.t09.de/registry/terralist.yaml create mode 100644 otc/benchmark.t09.de/stacks/coder/coder.yaml create mode 100644 otc/benchmark.t09.de/stacks/coder/coder/manifests/postgres.yaml create mode 100644 otc/benchmark.t09.de/stacks/coder/coder/values.yaml create mode 100644 otc/benchmark.t09.de/stacks/core/argocd.yaml create mode 100644 otc/benchmark.t09.de/stacks/core/argocd/manifests/argocd-server-ingress.yaml create mode 100644 otc/benchmark.t09.de/stacks/core/argocd/values.yaml create mode 100644 otc/benchmark.t09.de/stacks/core/cloudnative-pg.yaml create mode 100644 otc/benchmark.t09.de/stacks/core/cloudnative-pg/values.yaml create mode 100644 otc/benchmark.t09.de/stacks/core/dex.yaml create mode 100644 otc/benchmark.t09.de/stacks/core/dex/values.yaml create mode 100644 otc/benchmark.t09.de/stacks/forgejo/forgejo-runner.yaml create mode 100644 otc/benchmark.t09.de/stacks/forgejo/forgejo-runner/dind-docker.yaml create mode 100644 otc/benchmark.t09.de/stacks/forgejo/forgejo-server.yaml create mode 100644 otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-ingress.yaml create mode 100644 otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml create mode 100644 otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml create mode 100644 otc/benchmark.t09.de/stacks/garm/garm.yaml create mode 100644 otc/benchmark.t09.de/stacks/garm/garm/values.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability-client/metrics-server.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability-client/metrics-server/values.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability-client/vector.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability-client/vector/values.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability-client/vm-client-stack.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/values.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability/grafana-operator.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/argocd.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/ingress-nginx.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/victoria-logs.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/alerts.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml create mode 100644 otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/values.yaml create mode 100644 otc/benchmark.t09.de/stacks/otc/cert-manager/manifests/clusterissuer.yaml create mode 100644 otc/benchmark.t09.de/stacks/otc/cert-manager/values.yaml create mode 100644 otc/benchmark.t09.de/stacks/otc/cert-manger.yaml create mode 100644 otc/benchmark.t09.de/stacks/otc/ingress-nginx.yaml create mode 100644 otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml create mode 100644 otc/benchmark.t09.de/stacks/otc/storageclass.yaml create mode 100644 otc/benchmark.t09.de/stacks/otc/storageclass/storageclass.yaml create mode 100644 otc/benchmark.t09.de/stacks/terralist/terralist.yaml create mode 100644 otc/benchmark.t09.de/stacks/terralist/terralist/values.yaml diff --git a/otc/benchmark.t09.de/registry/ci-sizer.yaml b/otc/benchmark.t09.de/edfbuilder.yaml similarity index 86% rename from otc/benchmark.t09.de/registry/ci-sizer.yaml rename to otc/benchmark.t09.de/edfbuilder.yaml index 953c8c1..1d105ce 100644 --- a/otc/benchmark.t09.de/registry/ci-sizer.yaml +++ b/otc/benchmark.t09.de/edfbuilder.yaml @@ -1,7 +1,7 @@ apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: ci-sizer-reg + name: edfbuilder namespace: argocd labels: env: dev @@ -12,7 +12,7 @@ spec: name: in-cluster namespace: argocd source: - path: "otc/benchmark.t09.de/stacks/ci-sizer" + path: "otc/benchmark.t09.de/registry" repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" targetRevision: HEAD project: default diff --git a/otc/benchmark.t09.de/registry/coder.yaml b/otc/benchmark.t09.de/registry/coder.yaml new file mode 100644 index 0000000..2c36d8d --- /dev/null +++ b/otc/benchmark.t09.de/registry/coder.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: coder-reg + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + name: in-cluster + namespace: argocd + source: + path: "otc/benchmark.t09.de/stacks/coder" + repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" + targetRevision: HEAD + project: default + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/otc/benchmark.t09.de/registry/core.yaml b/otc/benchmark.t09.de/registry/core.yaml new file mode 100644 index 0000000..7a9b64c --- /dev/null +++ b/otc/benchmark.t09.de/registry/core.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: core + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + name: in-cluster + namespace: argocd + source: + path: "otc/benchmark.t09.de/stacks/core" + repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" + targetRevision: HEAD + project: default + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/otc/benchmark.t09.de/registry/docs.yaml b/otc/benchmark.t09.de/registry/docs.yaml new file mode 100644 index 0000000..9d88777 --- /dev/null +++ b/otc/benchmark.t09.de/registry/docs.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: docs-reg + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + name: in-cluster + namespace: argocd + source: + path: argocd-stack + repoURL: "https://edp.buildth.ing/DevFW-CICD/website-and-documentation" + targetRevision: HEAD + project: default + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/otc/benchmark.t09.de/registry/forgejo.yaml b/otc/benchmark.t09.de/registry/forgejo.yaml new file mode 100644 index 0000000..2442409 --- /dev/null +++ b/otc/benchmark.t09.de/registry/forgejo.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: forgejo + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + name: in-cluster + namespace: argocd + source: + path: "otc/benchmark.t09.de/stacks/forgejo" + repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" + targetRevision: HEAD + project: default + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/otc/benchmark.t09.de/registry/garm.yaml b/otc/benchmark.t09.de/registry/garm.yaml new file mode 100644 index 0000000..1e44b8b --- /dev/null +++ b/otc/benchmark.t09.de/registry/garm.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: garm-reg + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + name: in-cluster + namespace: argocd + source: + path: "otc/benchmark.t09.de/stacks/garm" + repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" + targetRevision: HEAD + project: default + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/otc/benchmark.t09.de/registry/observability-client.yaml b/otc/benchmark.t09.de/registry/observability-client.yaml new file mode 100644 index 0000000..1ca1b3e --- /dev/null +++ b/otc/benchmark.t09.de/registry/observability-client.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: observability-client + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + name: in-cluster + namespace: argocd + source: + path: "otc/benchmark.t09.de/stacks/observability-client" + repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" + targetRevision: HEAD + project: default + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/otc/benchmark.t09.de/registry/observability.yaml b/otc/benchmark.t09.de/registry/observability.yaml new file mode 100644 index 0000000..e5473d3 --- /dev/null +++ b/otc/benchmark.t09.de/registry/observability.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: observability + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + name: in-cluster + namespace: argocd + source: + path: "otc/benchmark.t09.de/stacks/observability" + repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" + targetRevision: HEAD + project: default + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/otc/benchmark.t09.de/registry/otc.yaml b/otc/benchmark.t09.de/registry/otc.yaml new file mode 100644 index 0000000..dbba541 --- /dev/null +++ b/otc/benchmark.t09.de/registry/otc.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: otc + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + name: in-cluster + namespace: argocd + source: + path: "otc/benchmark.t09.de/stacks/otc" + repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" + targetRevision: HEAD + project: default + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/otc/benchmark.t09.de/registry/terralist.yaml b/otc/benchmark.t09.de/registry/terralist.yaml new file mode 100644 index 0000000..3ef37d1 --- /dev/null +++ b/otc/benchmark.t09.de/registry/terralist.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: terralist-reg + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + name: in-cluster + namespace: argocd + source: + path: "otc/benchmark.t09.de/stacks/terralist" + repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" + targetRevision: HEAD + project: default + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml index ce21f14..7e9261b 100644 --- a/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml +++ b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml @@ -62,7 +62,7 @@ spec: - name: RECEIVER_SESSION_TTL value: "12h" - name: RECEIVER_ALLOWED_ORG - value: "giteaAdmin" + value: "DevFW-CICD" - name: RECEIVER_CPU_SIZING_MODE value: "observe" - name: RECEIVER_MEMORY_QOS diff --git a/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml index bc2b070..79d90f3 100644 --- a/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml +++ b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml @@ -20,6 +20,16 @@ spec: number: 8080 path: / pathType: Prefix + - host: ci-sizer.benchmark.t09.de + http: + paths: + - backend: + service: + name: sizer-receiver + port: + number: 8080 + path: / + pathType: Prefix tls: - hosts: - sizer.benchmark.t09.de diff --git a/otc/benchmark.t09.de/stacks/coder/coder.yaml b/otc/benchmark.t09.de/stacks/coder/coder.yaml new file mode 100644 index 0000000..f40d6a6 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/coder/coder.yaml @@ -0,0 +1,32 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: coder + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: coder + sources: + - repoURL: https://helm.coder.com/v2 + chart: coder + targetRevision: 2.28.3 + helm: + valueFiles: + - $values/otc/benchmark.t09.de/stacks/coder/coder/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/benchmark.t09.de/stacks/coder/coder/manifests" diff --git a/otc/benchmark.t09.de/stacks/coder/coder/manifests/postgres.yaml b/otc/benchmark.t09.de/stacks/coder/coder/manifests/postgres.yaml new file mode 100644 index 0000000..cae4b97 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/coder/coder/manifests/postgres.yaml @@ -0,0 +1,38 @@ +--- +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: coder-db + namespace: coder +spec: + instances: 1 + primaryUpdateStrategy: unsupervised + resources: + requests: + memory: "1Gi" + cpu: "1" + limits: + memory: "1Gi" + cpu: "1" + managed: + roles: + - name: coder + createdb: true + login: true + passwordSecret: + name: coder-db-user + storage: + size: 10Gi + storageClass: csi-disk +--- +apiVersion: postgresql.cnpg.io/v1 +kind: Database +metadata: + name: coder + namespace: coder +spec: + cluster: + name: coder-db + name: coder + owner: coder +--- diff --git a/otc/benchmark.t09.de/stacks/coder/coder/values.yaml b/otc/benchmark.t09.de/stacks/coder/coder/values.yaml new file mode 100644 index 0000000..eef7ac4 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/coder/coder/values.yaml @@ -0,0 +1,61 @@ +coder: + # You can specify any environment variables you'd like to pass to Coder + # here. Coder consumes environment variables listed in + # `coder server --help`, and these environment variables are also passed + # to the workspace provisioner (so you can consume them in your Terraform + # templates for auth keys etc.). + # + # Please keep in mind that you should not set `CODER_HTTP_ADDRESS`, + # `CODER_TLS_ENABLE`, `CODER_TLS_CERT_FILE` or `CODER_TLS_KEY_FILE` as + # they are already set by the Helm chart and will cause conflicts. + env: + - name: CODER_ACCESS_URL + value: https://coder.benchmark.t09.de + - name: CODER_PG_CONNECTION_URL + valueFrom: + secretKeyRef: + # You'll need to create a secret called coder-db-url with your + # Postgres connection URL like: + # postgres://coder:password@postgres:5432/coder?sslmode=disable + name: coder-db-user + key: url + # For production deployments, we recommend configuring your own GitHub + # OAuth2 provider and disabling the default one. + - name: CODER_OAUTH2_GITHUB_DEFAULT_PROVIDER_ENABLE + value: "false" + - name: EDGE_CONNECT_ENDPOINT + valueFrom: + secretKeyRef: + name: edge-credential + key: endpoint + - name: EDGE_CONNECT_USERNAME + valueFrom: + secretKeyRef: + name: edge-credential + key: username + - name: EDGE_CONNECT_PASSWORD + valueFrom: + secretKeyRef: + name: edge-credential + key: password + + # (Optional) For production deployments the access URL should be set. + # If you're just trying Coder, access the dashboard via the service IP. + # - name: CODER_ACCESS_URL + # value: "https://coder.example.com" + + #tls: + # secretNames: + # - my-tls-secret-name + service: + type: ClusterIP + + ingress: + enable: true + className: nginx + host: coder.benchmark.t09.de + annotations: + cert-manager.io/cluster-issuer: main + tls: + enable: true + secretName: coder-tls-secret diff --git a/otc/benchmark.t09.de/stacks/core/argocd.yaml b/otc/benchmark.t09.de/stacks/core/argocd.yaml new file mode 100644 index 0000000..33d9a7d --- /dev/null +++ b/otc/benchmark.t09.de/stacks/core/argocd.yaml @@ -0,0 +1,35 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: argocd + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: argocd + sources: + - repoURL: https://github.com/argoproj/argo-helm.git + path: charts/argo-cd + # TODO: RIRE Can be updated when https://github.com/argoproj/argo-cd/issues/20790 is fixed and merged + # As logout make problems, it is suggested to switch from path based routing to an own argocd domain, + # similar to the CNOE amazon reference implementation and in our case, Forgejo + targetRevision: argo-cd-9.4.6 + helm: + valueFiles: + - $values/otc/benchmark.t09.de/stacks/core/argocd/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/benchmark.t09.de/stacks/core/argocd/manifests" diff --git a/otc/benchmark.t09.de/stacks/core/argocd/manifests/argocd-server-ingress.yaml b/otc/benchmark.t09.de/stacks/core/argocd/manifests/argocd-server-ingress.yaml new file mode 100644 index 0000000..1c7f405 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/core/argocd/manifests/argocd-server-ingress.yaml @@ -0,0 +1,27 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + nginx.ingress.kubernetes.io/backend-protocol: HTTP + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + cert-manager.io/cluster-issuer: main + + name: argocd-server + namespace: argocd +spec: + ingressClassName: nginx + rules: + - host: argocd.benchmark.t09.de + http: + paths: + - backend: + service: + name: argocd-server + port: + number: 80 + path: / + pathType: Prefix + tls: + - hosts: + - argocd.benchmark.t09.de + secretName: argocd-net-tls diff --git a/otc/benchmark.t09.de/stacks/core/argocd/values.yaml b/otc/benchmark.t09.de/stacks/core/argocd/values.yaml new file mode 100644 index 0000000..a6521b0 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/core/argocd/values.yaml @@ -0,0 +1,42 @@ +global: + domain: argocd.benchmark.t09.de + +configs: + params: + server.insecure: true + cm: + oidc.config: | + name: FORGEJO + issuer: https://dex.benchmark.t09.de + clientID: controller-argocd-dex + clientSecret: $dex-argo-client:clientSecret + requestedScopes: + - openid + - profile + - email + - groups + application.resourceTrackingMethod: annotation + timeout.reconciliation: 60s + resource.exclusions: | + - apiGroups: + - "*" + kinds: + - ProviderConfigUsage + - apiGroups: + - cilium.io + kinds: + - CiliumIdentity + clusters: + - "*" + url: https://argocd.benchmark.t09.de + rbac: + policy.csv: 'g, DevFW, role:admin' + + tls: + certificates: + +notifications: + enabled: false + +dex: + enabled: false diff --git a/otc/benchmark.t09.de/stacks/core/cloudnative-pg.yaml b/otc/benchmark.t09.de/stacks/core/cloudnative-pg.yaml new file mode 100644 index 0000000..aae0345 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/core/cloudnative-pg.yaml @@ -0,0 +1,30 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: cloudnative-pg + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: cloudnative-pg + sources: + - repoURL: https://cloudnative-pg.github.io/charts + chart: cloudnative-pg + targetRevision: 0.26.1 + helm: + valueFiles: + - $values/otc/benchmark.t09.de/stacks/core/cloudnative-pg/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values diff --git a/otc/benchmark.t09.de/stacks/core/cloudnative-pg/values.yaml b/otc/benchmark.t09.de/stacks/core/cloudnative-pg/values.yaml new file mode 100644 index 0000000..cfebbfc --- /dev/null +++ b/otc/benchmark.t09.de/stacks/core/cloudnative-pg/values.yaml @@ -0,0 +1 @@ +# No need for values here. diff --git a/otc/benchmark.t09.de/stacks/core/dex.yaml b/otc/benchmark.t09.de/stacks/core/dex.yaml new file mode 100644 index 0000000..bb58b24 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/core/dex.yaml @@ -0,0 +1,29 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: dex + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: dex + sources: + - repoURL: https://charts.dexidp.io + chart: dex + targetRevision: 0.23.0 + helm: + valueFiles: + - $values/otc/benchmark.t09.de/stacks/core/dex/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values diff --git a/otc/benchmark.t09.de/stacks/core/dex/values.yaml b/otc/benchmark.t09.de/stacks/core/dex/values.yaml new file mode 100644 index 0000000..6da315e --- /dev/null +++ b/otc/benchmark.t09.de/stacks/core/dex/values.yaml @@ -0,0 +1,76 @@ +ingress: + enabled: true + className: nginx + annotations: + cert-manager.io/cluster-issuer: main + hosts: + - host: dex.benchmark.t09.de + paths: + - path: / + pathType: Prefix + tls: + - hosts: + - dex.benchmark.t09.de + secretName: dex-cert + +envVars: + - name: FORGEJO_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: dex-forgejo-client + key: clientSecret + - name: FORGEJO_CLIENT_ID + valueFrom: + secretKeyRef: + name: dex-forgejo-client + key: clientID + - name: OIDC_DEX_GRAFANA_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: dex-grafana-client + key: clientSecret + - name: OIDC_DEX_ARGO_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: dex-argo-client + key: clientSecret + - name: LOG_LEVEL + value: debug + +config: + # Set it to a valid URL + issuer: https://dex.benchmark.t09.de + + # See https://dexidp.io/docs/storage/ for more options + storage: + type: memory + + oauth2: + skipApprovalScreen: true + alwaysShowLoginScreen: false + + connectors: + - type: gitea + id: gitea + name: Forgejo + config: + clientID: "$FORGEJO_CLIENT_ID" + clientSecret: "$FORGEJO_CLIENT_SECRET" + redirectURI: https://dex.benchmark.t09.de/callback + baseURL: https://edp.buildth.ing + # loadAllGroups: true + orgs: + - name: DevFW + enablePasswordDB: false + + staticClients: + - id: controller-argocd-dex + name: ArgoCD Client + redirectURIs: + - "https://argocd.benchmark.t09.de/auth/callback" + secretEnv: "OIDC_DEX_ARGO_CLIENT_SECRET" + - id: grafana + redirectURIs: + - "https://grafana.benchmark.t09.de/login/generic_oauth" + name: "Grafana" + secretEnv: "OIDC_DEX_GRAFANA_CLIENT_SECRET" diff --git a/otc/benchmark.t09.de/stacks/forgejo/forgejo-runner.yaml b/otc/benchmark.t09.de/stacks/forgejo/forgejo-runner.yaml new file mode 100644 index 0000000..5889ae5 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/forgejo/forgejo-runner.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: forgejo-runner + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + server: "https://kubernetes.default.svc" + source: + repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/benchmark.t09.de/stacks/forgejo/forgejo-runner" diff --git a/otc/benchmark.t09.de/stacks/forgejo/forgejo-runner/dind-docker.yaml b/otc/benchmark.t09.de/stacks/forgejo/forgejo-runner/dind-docker.yaml new file mode 100644 index 0000000..fa1ab7e --- /dev/null +++ b/otc/benchmark.t09.de/stacks/forgejo/forgejo-runner/dind-docker.yaml @@ -0,0 +1,104 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: forgejo-runner + name: forgejo-runner + namespace: gitea +spec: + # Two replicas means that if one is busy, the other can pick up jobs. + replicas: 3 + selector: + matchLabels: + app: forgejo-runner + strategy: {} + template: + metadata: + creationTimestamp: null + labels: + app: forgejo-runner + spec: + restartPolicy: Always + volumes: + - name: docker-certs + emptyDir: {} + - name: runner-data + emptyDir: {} + # Initialise our configuration file using offline registration + # https://forgejo.org/docs/v1.21/admin/actions/#offline-registration + initContainers: + - name: runner-register + image: code.forgejo.org/forgejo/runner:12.6.4 + command: + - "sh" + - "-c" + - | + forgejo-runner \ + register \ + --no-interactive \ + --token ${RUNNER_SECRET} \ + --name ${RUNNER_NAME} \ + --instance ${FORGEJO_INSTANCE_URL} \ + --labels docker:docker://node:24-bookworm,ubuntu-22.04:docker://ghcr.io/catthehacker/ubuntu:act-22.04,ubuntu-latest:docker://ghcr.io/catthehacker/ubuntu:act-24.04,ubuntu-24.04:docker://ghcr.io/catthehacker/ubuntu:act-24.04 + env: + - name: RUNNER_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: RUNNER_SECRET + valueFrom: + secretKeyRef: + name: forgejo-runner-token + key: token + - name: FORGEJO_INSTANCE_URL + value: https://benchmark.t09.de + volumeMounts: + - name: runner-data + mountPath: /data + containers: + - name: runner + image: code.forgejo.org/forgejo/runner:12.6.4 + command: + - "sh" + - "-c" + - | + while ! nc -z 127.0.0.1 2376 config.yml ; + sed -i -e "s|privileged: .*|privileged: true|" config.yml + sed -i -e "s|network: .*|network: host|" config.yml ; + sed -i -e "s|^ envs:$$| envs:\n DOCKER_HOST: tcp://127.0.0.1:2376\n DOCKER_TLS_VERIFY: 1\n DOCKER_CERT_PATH: /certs/client|" config.yml ; + sed -i -e "s|^ options:| options: -v /certs/client:/certs/client|" config.yml ; + sed -i -e "s| valid_volumes: \[\]$$| valid_volumes:\n - /certs/client|" config.yml ; + /bin/forgejo-runner --config config.yml daemon + securityContext: + allowPrivilegeEscalation: true + privileged: true + readOnlyRootFilesystem: false + runAsGroup: 0 + runAsNonRoot: false + runAsUser: 0 + env: + - name: DOCKER_HOST + value: tcp://localhost:2376 + - name: DOCKER_CERT_PATH + value: /certs/client + - name: DOCKER_TLS_VERIFY + value: "1" + volumeMounts: + - name: docker-certs + mountPath: /certs + - name: runner-data + mountPath: /data + - name: daemon + image: docker:28.0.4-dind + env: + - name: DOCKER_TLS_CERTDIR + value: /certs + securityContext: + privileged: true + volumeMounts: + - name: docker-certs + mountPath: /certs diff --git a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server.yaml b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server.yaml new file mode 100644 index 0000000..17e91c5 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server.yaml @@ -0,0 +1,32 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: forgejo-server + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: gitea + sources: + - repoURL: https://code.forgejo.org/forgejo-helm/forgejo-helm.git + path: . + targetRevision: v16.2.0 + helm: + valueFiles: + - $values/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests" \ No newline at end of file diff --git a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-ingress.yaml b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-ingress.yaml new file mode 100644 index 0000000..e850f89 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-ingress.yaml @@ -0,0 +1,27 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + nginx.ingress.kubernetes.io/proxy-body-size: 5120m + cert-manager.io/cluster-issuer: main + + name: forgejo-server + namespace: gitea +spec: + ingressClassName: nginx + rules: + - host: benchmark.t09.de + http: + paths: + - backend: + service: + name: forgejo-server-http + port: + number: 3000 + path: / + pathType: Prefix + tls: + - hosts: + - benchmark.t09.de + secretName: forgejo-net-tls diff --git a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml new file mode 100644 index 0000000..18762aa --- /dev/null +++ b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml @@ -0,0 +1,91 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: forgejo-s3-backup + namespace: gitea +spec: + schedule: "0 1 * * *" + concurrencyPolicy: "Forbid" + successfulJobsHistoryLimit: 5 + failedJobsHistoryLimit: 5 + startingDeadlineSeconds: 600 # 10 minutes + jobTemplate: + spec: + # 60 min until backup - 10 min start - (backoffLimit * activeDeadlineSeconds) - some time sync buffer + activeDeadlineSeconds: 1350 + backoffLimit: 2 + ttlSecondsAfterFinished: 259200 # + template: + spec: + containers: + - name: rclone + image: rclone/rclone:1.70 + imagePullPolicy: IfNotPresent + env: + - name: SOURCE_BUCKET + valueFrom: + secretKeyRef: + name: forgejo-cloud-credentials + key: bucket-name + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: forgejo-cloud-credentials + key: access-key + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: forgejo-cloud-credentials + key: secret-key + volumeMounts: + - name: rclone-config + mountPath: /config/rclone + readOnly: true + - name: backup-dir + mountPath: /backup + readOnly: false + command: + - /bin/sh + - -c + - | + rclone sync source:/${SOURCE_BUCKET} /backup -v --ignore-checksum + restartPolicy: OnFailure + volumes: + - name: rclone-config + secret: + secretName: forgejo-s3-backup + - name: backup-dir + persistentVolumeClaim: + claimName: s3-backup +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: s3-backup + namespace: gitea + annotations: + everest.io/disk-volume-type: GPSSD + everest.io/crypt-key-id: 71ebef9e-5575-4b05-a597-ee1f67c911e3 +spec: + storageClassName: csi-disk + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Gi +--- +apiVersion: v1 +kind: Secret +metadata: + name: forgejo-s3-backup + namespace: gitea +type: Opaque +stringData: + rclone.conf: | + [source] + type = s3 + provider = HuaweiOBS + env_auth = true + endpoint = obs.eu-de.otc.t-systems.com + region = eu-de + acl = private diff --git a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml new file mode 100644 index 0000000..8a18a98 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml @@ -0,0 +1,183 @@ + +# We use recreate to make sure only one instance with one version is running, because Forgejo might break or data gets inconsistant. +strategy: + type: Recreate + +redis-cluster: + enabled: false + +redis: + enabled: false + +postgresql: + enabled: false + +postgresql-ha: + enabled: false + +persistence: + enabled: true + size: 200Gi + storageClass: csi-disk + annotations: + everest.io/crypt-key-id: 71ebef9e-5575-4b05-a597-ee1f67c911e3 + everest.io/disk-volume-type: GPSSD + +test: + enabled: false + +deployment: + env: + - name: SSL_CERT_DIR + value: /etc/ssl/forgejo + +extraVolumeMounts: + - mountPath: /etc/ssl/forgejo + name: custom-database-certs-volume + readOnly: true + +extraVolumes: + - name: custom-database-certs-volume + secret: + secretName: custom-database-certs + +gitea: + metrics: + enabled: true + serviceMonitor: + enabled: true + additionalConfigFromEnvs: + - name: FORGEJO__storage__MINIO_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: forgejo-cloud-credentials + key: access-key + - name: FORGEJO__storage__MINIO_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: forgejo-cloud-credentials + key: secret-key + - name: FORGEJO__queue__CONN_STR + valueFrom: + secretKeyRef: + name: redis-forgejo-cloud-credentials + key: connection-string + - name: FORGEJO__session__PROVIDER_CONFIG + valueFrom: + secretKeyRef: + name: redis-forgejo-cloud-credentials + key: connection-string + - name: FORGEJO__cache__HOST + valueFrom: + secretKeyRef: + name: redis-forgejo-cloud-credentials + key: connection-string + - name: FORGEJO__database__HOST + valueFrom: + secretKeyRef: + name: postgres-forgejo-cloud-credentials + key: host_port + - name: FORGEJO__database__NAME + valueFrom: + secretKeyRef: + name: postgres-forgejo-cloud-credentials + key: database + - name: FORGEJO__database__USER + valueFrom: + secretKeyRef: + name: postgres-forgejo-cloud-credentials + key: username + - name: FORGEJO__database__PASSWD + valueFrom: + secretKeyRef: + name: postgres-forgejo-cloud-credentials + key: password + # Either 'elasticsearch' or 'bleve' (go in memory search engine) + - name: FORGEJO__indexer__ISSUE_INDEXER_TYPE + valueFrom: + secretKeyRef: + name: elasticsearch-cloud-credentials + key: type + - name: FORGEJO__indexer__ISSUE_INDEXER_CONN_STR + valueFrom: + secretKeyRef: + name: elasticsearch-cloud-credentials + key: connection-string + - name: FORGEJO__indexer__ISSUE_INDEXER_ENABLED + valueFrom: + secretKeyRef: + name: elasticsearch-cloud-credentials + key: enabled + - name: FORGEJO__mailer__PASSWD + valueFrom: + secretKeyRef: + name: email-user-credentials + key: connection-string + + admin: + existingSecret: gitea-credential + + config: + APP_NAME: 'EDP' + APP_SLOGAN: 'Build your thing in minutes' + storage: + MINIO_ENDPOINT: obs.eu-de.otc.t-systems.com:443 + STORAGE_TYPE: minio + MINIO_LOCATION: eu-de + MINIO_BUCKET: "edp-forgejo-non-prod-benchmark" + MINIO_USE_SSL: true + + queue: + TYPE: redis + + session: + PROVIDER: redis + + cache: + ENABLED: true + ADAPTER: redis + + security: + GLOBAL_TWO_FACTOR_REQUIREMENT: admin + + service: + DISABLE_REGISTRATION: true + ENABLE_NOTIFY_MAIL: true + + other: + SHOW_FOOTER_VERSION: false + SHOW_FOOTER_TEMPLATE_LOAD_TIME: false + + database: + DB_TYPE: postgres + SSL_MODE: verify-ca + + server: + DOMAIN: 'benchmark.t09.de' + ROOT_URL: 'https://benchmark.t09.de:443' + + mailer: + ENABLED: true + USER: ipcei-cis-devfw@mms-support.de + PROTOCOL: smtps + FROM: '"IPCEI CIS DevFW" ' + SMTP_ADDR: mail.mms-support.de + SMTP_PORT: 465 + +service: + ssh: + type: LoadBalancer + nodePort: 32222 + externalTrafficPolicy: Cluster + annotations: + kubernetes.io/elb.id: 5ee936a2-6308-4924-9fdf-0256cbdf3baa + +image: + pullPolicy: "IfNotPresent" + # Overrides the image tag whose default is the chart appVersion. + #tag: "8.0.3" + # Adds -rootless suffix to image name + # rootless: true + fullOverride: edp.buildth.ing/devfw-cicd/edp-forgejo:14.0.2-edp1-rootless + +forgejo: {} diff --git a/otc/benchmark.t09.de/stacks/garm/garm.yaml b/otc/benchmark.t09.de/stacks/garm/garm.yaml new file mode 100644 index 0000000..fee3847 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/garm/garm.yaml @@ -0,0 +1,29 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: garm + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: garm + sources: + - repoURL: https://edp.buildth.ing/DevFW-CICD/garm-helm + path: charts/garm + targetRevision: v0.0.15 + helm: + valueFiles: + - $values/otc/benchmark.t09.de/stacks/garm/garm/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values diff --git a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml new file mode 100644 index 0000000..3143f5d --- /dev/null +++ b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml @@ -0,0 +1,45 @@ +ingress: + enabled: true + className: nginx + annotations: + cert-manager.io/cluster-issuer: main + nginx.ingress.kubernetes.io/backend-protocol: HTTP + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + hosts: + - host: garm.benchmark.t09.de + paths: + - path: / + pathType: Prefix + tls: + - secretName: garm-net-tls + hosts: + - garm.benchmark.t09.de + +# Credentials and Secrets +credentials: + edgeConnect: + existingSecretName: "edge-credential" + gitea: + url: "https://benchmark.t09.de" # Required + db: + existingSecretName: garm-fixed-credentials + +image: + repository: edp.buildth.ing/devfw-cicd/garm-forgejo + tag: v0.1.7-forgejo-22 + +providerConfig: + edgeConnect: + organization: edp2 + region: EU + edgeConnectUrl: "https://hub.apps.edge.platform.mg3.mdb.osc.live" + cloudlet: + name: Hamburg + organization: TelekomOP + edgeConnectK8s: + sizer: + sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:0.0.4 + +garm: + logging: + logLevel: info diff --git a/otc/benchmark.t09.de/stacks/observability-client/metrics-server.yaml b/otc/benchmark.t09.de/stacks/observability-client/metrics-server.yaml new file mode 100644 index 0000000..454a0b7 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability-client/metrics-server.yaml @@ -0,0 +1,29 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: metrics-server + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: observability + sources: + - chart: metrics-server + repoURL: https://kubernetes-sigs.github.io/metrics-server/ + targetRevision: 3.12.2 + helm: + valueFiles: + - $values/otc/benchmark.t09.de/stacks/observability-client/metrics-server/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values diff --git a/otc/benchmark.t09.de/stacks/observability-client/metrics-server/values.yaml b/otc/benchmark.t09.de/stacks/observability-client/metrics-server/values.yaml new file mode 100644 index 0000000..e96ba41 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability-client/metrics-server/values.yaml @@ -0,0 +1,4 @@ +metrics: + enabled: true +serviceMonitor: + enabled: true diff --git a/otc/benchmark.t09.de/stacks/observability-client/vector.yaml b/otc/benchmark.t09.de/stacks/observability-client/vector.yaml new file mode 100644 index 0000000..a56dbe8 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability-client/vector.yaml @@ -0,0 +1,29 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: vector + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: observability + sources: + - chart: vector + repoURL: https://helm.vector.dev + targetRevision: 0.43.0 + helm: + valueFiles: + - $values/otc/benchmark.t09.de/stacks/observability-client/vector/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values diff --git a/otc/benchmark.t09.de/stacks/observability-client/vector/values.yaml b/otc/benchmark.t09.de/stacks/observability-client/vector/values.yaml new file mode 100644 index 0000000..3fb5e53 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability-client/vector/values.yaml @@ -0,0 +1,68 @@ +# -- Enable deployment of vector +role: Agent +dataDir: /vector-data-dir +resources: {} +args: + - -w + - --config-dir + - /etc/vector/ +env: + - name: VECTOR_USER + valueFrom: + secretKeyRef: + name: simple-user-secret + key: username + - name: VECTOR_PASSWORD + valueFrom: + secretKeyRef: + name: simple-user-secret + key: password +containerPorts: + - name: prom-exporter + containerPort: 9090 + protocol: TCP +service: + enabled: false +customConfig: + data_dir: /vector-data-dir + api: + enabled: false + address: 0.0.0.0:8686 + playground: true + sources: + k8s: + type: kubernetes_logs + internal_metrics: + type: internal_metrics + transforms: + parser: + type: remap + inputs: [k8s] + source: | + ._msg = parse_json(.message) ?? .message + del(.message) + # Add the cluster environment to the log event + .cluster_environment = "benchmark" + sinks: + vlogs: + type: elasticsearch + inputs: [parser] + endpoints: + - https://o12y.observability./insert/elasticsearch/ + auth: + strategy: basic + user: ${VECTOR_USER} + password: ${VECTOR_PASSWORD} + mode: bulk + api_version: v8 + compression: gzip + healthcheck: + enabled: false + request: + headers: + AccountID: "0" + ProjectID: "0" + query: + _msg_field: _msg + _time_field: _time + _stream_fields: cluster_environment,kubernetes.container_name,kubernetes.namespace \ No newline at end of file diff --git a/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack.yaml b/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack.yaml new file mode 100644 index 0000000..bcc2fbc --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack.yaml @@ -0,0 +1,30 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: vm-client + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + destination: + name: in-cluster + namespace: observability + sources: + - chart: victoria-metrics-k8s-stack + repoURL: https://victoriametrics.github.io/helm-charts/ + targetRevision: 0.48.1 + helm: + valueFiles: + - $values/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/manifests" diff --git a/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/values.yaml b/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/values.yaml new file mode 100644 index 0000000..dde927b --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/values.yaml @@ -0,0 +1,1288 @@ +global: + # -- Cluster label to use for dashboards and rules + clusterLabel: cluster + # -- Global license configuration + license: + key: "" + keyRef: {} + # name: secret-license + # key: license + cluster: + # -- K8s cluster domain suffix, uses for building storage pods' FQDN. Details are [here](https://kubernetes.io/docs/tasks/administer-cluster/dns-custom-nameservers/) + dnsDomain: cluster.local. + +# -- Override chart name +nameOverride: "" +# -- Resource full name override +fullnameOverride: "" +# -- Tenant to use for Grafana datasources and remote write +tenant: "0" +# -- If this chart is used in "Argocd" with "releaseName" field then +# VMServiceScrapes couldn't select the proper services. +# For correct working need set value 'argocdReleaseOverride=$ARGOCD_APP_NAME' +argocdReleaseOverride: "" + +# -- VictoriaMetrics Operator dependency chart configuration. More values can be found [here](https://docs.victoriametrics.com/helm/victoriametrics-operator#parameters). Also checkout [here](https://docs.victoriametrics.com/operator/vars) possible ENV variables to configure operator behaviour +victoria-metrics-operator: + enabled: true + crds: + plain: true + cleanup: + enabled: true + image: + repository: bitnami/kubectl + pullPolicy: IfNotPresent + serviceMonitor: + enabled: true + operator: + # -- By default, operator converts prometheus-operator objects. + disable_prometheus_converter: false + # group pinguin added the admissionWebhooks value according to https://docs.victoriametrics.com/helm/victoriametrics-k8s-stack/#argocd-issues + admissionWebhooks: + certManager: + enabled: true + +defaultDashboards: + # -- Enable custom dashboards installation + enabled: false + defaultTimezone: utc + labels: {} + annotations: {} + grafanaOperator: + # -- Create dashboards as CRDs (requires grafana-operator to be installed) + enabled: false + spec: + instanceSelector: + matchLabels: + dashboards: grafana + allowCrossNamespaceImport: false + # -- Create dashboards as ConfigMap despite dependency it requires is not installed + dashboards: + victoriametrics-vmalert: + enabled: true + victoriametrics-operator: + enabled: true + # -- In ArgoCD using client-side apply this dashboard reaches annotations size limit and causes k8s issues without server side apply + # See [this issue](https://github.com/VictoriaMetrics/helm-charts/tree/master/charts/victoria-metrics-k8s-stack#metadataannotations-too-long-must-have-at-most-262144-bytes-on-dashboards) + node-exporter-full: + enabled: true + +# -- Create default rules for monitoring the cluster +defaultRules: + # -- Labels, which are used for grouping results of the queries. Note that these labels are joined with `.Values.global.clusterLabel` + additionalGroupByLabels: [] + create: true + + # -- Common properties for VMRule groups + group: + spec: + # -- Optional HTTP URL parameters added to each rule request + params: {} + + # -- Common properties for all VMRules + rule: + spec: + # -- Additional labels for all VMRules + labels: {} + # -- Additional annotations for all VMRules + annotations: {} + + # -- Common properties for VMRules alerts + alerting: + spec: + # -- Additional labels for VMRule alerts + labels: {} + # -- Additional annotations for VMRule alerts + annotations: {} + + # -- Common properties for VMRules recording rules + recording: + spec: + # -- Additional labels for VMRule recording rules + labels: {} + # -- Additional annotations for VMRule recording rules + annotations: {} + + # -- Per rule properties + rules: {} + # CPUThrottlingHigh: + # create: true + # spec: + # for: 15m + # labels: + # severity: critical + # -- Rule group properties + groups: + etcd: + create: true + # -- Common properties for all rules in a group + rules: {} + # spec: + # annotations: + # dashboard: https://example.com/dashboard/1 + general: + create: true + rules: {} + k8sContainerCpuLimits: + create: true + rules: {} + k8sContainerCpuRequests: + create: true + rules: {} + k8sContainerCpuUsageSecondsTotal: + create: true + rules: {} + k8sContainerMemoryLimits: + create: true + rules: {} + k8sContainerMemoryRequests: + create: true + rules: {} + k8sContainerMemoryRss: + create: true + rules: {} + k8sContainerMemoryCache: + create: true + rules: {} + k8sContainerMemoryWorkingSetBytes: + create: true + rules: {} + k8sContainerMemorySwap: + create: true + rules: {} + k8sPodOwner: + create: true + rules: {} + k8sContainerResource: + create: true + rules: {} + kubeApiserver: + create: true + rules: {} + kubeApiserverAvailability: + create: true + rules: {} + kubeApiserverBurnrate: + create: true + rules: {} + kubeApiserverHistogram: + create: true + rules: {} + kubeApiserverSlos: + create: true + rules: {} + kubelet: + create: true + rules: {} + kubePrometheusGeneral: + create: true + rules: {} + kubePrometheusNodeRecording: + create: true + rules: {} + kubernetesApps: + create: true + rules: {} + targetNamespace: ".*" + kubernetesResources: + create: true + rules: {} + kubernetesStorage: + create: true + rules: {} + targetNamespace: ".*" + kubernetesSystem: + create: true + rules: {} + kubernetesSystemKubelet: + create: true + rules: {} + kubernetesSystemApiserver: + create: true + rules: {} + kubernetesSystemControllerManager: + create: true + rules: {} + kubeScheduler: + create: true + rules: {} + kubernetesSystemScheduler: + create: true + rules: {} + kubeStateMetrics: + create: true + rules: {} + nodeNetwork: + create: true + rules: {} + node: + create: true + rules: {} + vmagent: + create: true + rules: {} + vmsingle: + create: true + rules: {} + vmcluster: + create: true + rules: {} + vmHealth: + create: true + rules: {} + vmoperator: + create: true + rules: {} + alertmanager: + create: true + rules: {} + + # -- Runbook url prefix for default rules + runbookUrl: https://runbooks.prometheus-operator.dev/runbooks + + # -- Labels for default rules + labels: {} + # -- Annotations for default rules + annotations: {} + +# -- Provide custom recording or alerting rules to be deployed into the cluster. +additionalVictoriaMetricsMap: +# rule-name: +# groups: +# - name: my_group +# rules: +# - record: my_record +# expr: 100 * my_record + +external: + grafana: + # -- External Grafana host + host: "" + # -- External Grafana datasource name + datasource: VictoriaMetrics + # -- External VM read and write URLs + vm: + read: + url: "" + # bearerTokenSecret: + # name: dbaas-read-access-token + # key: bearerToken + write: + url: "" + # bearerTokenSecret: + # name: dbaas-read-access-token + # key: bearerToken + +# Configures vmsingle params +vmsingle: + # -- VMSingle annotations + annotations: {} + # -- Create VMSingle CR + enabled: false + # -- Full spec for VMSingle CRD. Allowed values describe [here](https://docs.victoriametrics.com/operator/api#vmsinglespec) + spec: + port: "8429" + # -- Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these [docs](https://docs.victoriametrics.com/single-server-victoriametrics/#retention) + retentionPeriod: "1" + replicaCount: 1 + extraArgs: {} + storage: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi + ingress: + # -- Enable deployment of ingress for server component + enabled: false + # -- Ingress annotations + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + # -- Ingress extra labels + labels: {} + # -- Ingress default path + path: "" + # -- Ingress path type + pathType: Prefix + # -- Ingress controller class name + ingressClassName: "" + + # -- Array of host objects + hosts: [] + # - vmsingle.domain.com + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + + # -- Array of TLS objects + tls: [] + # - secretName: vmsingle-ingress-tls + # hosts: + # - vmsingle.domain.com + +vmcluster: + # -- Create VMCluster CR + enabled: false + # -- VMCluster annotations + annotations: {} + # -- Full spec for VMCluster CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmclusterspec) + spec: + # -- Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these [docs](https://docs.victoriametrics.com/single-server-victoriametrics/#retention) + retentionPeriod: "1" + replicationFactor: 2 + vmstorage: + replicaCount: 2 + storageDataPath: /vm-data + storage: + volumeClaimTemplate: + spec: + resources: + requests: + storage: 10Gi + resources: + {} + # limits: + # cpu: "1" + # memory: 1500Mi + vmselect: + # -- Set this value to false to disable VMSelect + enabled: true + port: "8481" + replicaCount: 2 + cacheMountPath: /select-cache + extraArgs: {} + storage: + volumeClaimTemplate: + spec: + resources: + requests: + storage: 2Gi + resources: + {} + # limits: + # cpu: "1" + # memory: "1000Mi" + # requests: + # cpu: "0.5" + # memory: "500Mi" + vminsert: + # -- Set this value to false to disable VMInsert + enabled: true + port: "8480" + replicaCount: 2 + extraArgs: {} + resources: + {} + # limits: + # cpu: "1" + # memory: 1000Mi + # requests: + # cpu: "0.5" + # memory: "500Mi" + + ingress: + storage: + # -- Enable deployment of ingress for server component + enabled: false + + # -- Ingress annotations + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + + # -- Ingress extra labels + labels: {} + + # -- Ingress controller class name + ingressClassName: "" + + # -- Ingress path type + pathType: Prefix + + # -- Ingress default path + path: "" + + # -- Array of host objects + hosts: [] + # - vmstorage.domain.com + + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + + # -- Array of TLS objects + tls: [] + # - secretName: vmstorage-ingress-tls + # hosts: + # - vmstorage.domain.com + select: + # -- Enable deployment of ingress for server component + enabled: false + + # -- Ingress annotations + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + + # -- Ingress extra labels + labels: {} + + # -- Ingress controller class name + ingressClassName: "" + + # -- Ingress path type + pathType: Prefix + + # -- Ingress default path + path: '{{ dig "extraArgs" "http.pathPrefix" "/" .Values.vmcluster.spec.vmselect }}' + + # -- Array of host objects + hosts: [] + # - vmselect.domain.com + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + + # -- Array of TLS objects + tls: [] + # - secretName: vmselect-ingress-tls + # hosts: + # - vmselect.domain.com + insert: + # -- Enable deployment of ingress for server component + enabled: false + + # -- Ingress annotations + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + + # -- Ingress extra labels + labels: {} + + # -- Ingress controller class name + ingressClassName: "" + + # -- Ingress path type + pathType: Prefix + + # -- Ingress default path + path: '{{ dig "extraArgs" "http.pathPrefix" "/" .Values.vmcluster.spec.vminsert }}' + + # -- Array of host objects + hosts: [] + # - vminsert.domain.com + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + + # -- Array of TLS objects + tls: [] + # - secretName: vminsert-ingress-tls + # hosts: + # - vminsert.domain.com + +alertmanager: + # -- Create VMAlertmanager CR + enabled: false + # -- Alertmanager annotations + annotations: {} + # -- (object) Full spec for VMAlertmanager CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmalertmanagerspec) + spec: + replicaCount: 1 + port: "9093" + selectAllByDefault: true + image: + tag: v0.28.1 + externalURL: "" + routePrefix: / + + # -- (string) If this one defined, it will be used for alertmanager configuration and config parameter will be ignored + configSecret: "" + # -- + # @raw + # enable storing .Values.alertmanager.config in VMAlertmanagerConfig instead of k8s Secret. + # Note: VMAlertmanagerConfig and plain Alertmanager config structures are not equal. + # If you're migrating existing config, please make sure that `.Values.alertmanager.config`: + # - with `useManagedConfig: false` has structure described [here](https://prometheus.io/docs/alerting/latest/configuration/). + # - with `useManagedConfig: true` has structure described [here](https://docs.victoriametrics.com/operator/api/#vmalertmanagerconfig). + useManagedConfig: false + # -- (object) Alertmanager configuration + config: + route: + receiver: "blackhole" + # group_by: ["alertgroup", "job"] + # group_wait: 30s + # group_interval: 5m + # repeat_interval: 12h + # routes: + # + # # Duplicate code_owner routes to teams + # # These will send alerts to team channels but continue + # # processing through the rest of the tree to handled by on-call + # - matchers: + # - code_owner_channel!="" + # - severity=~"info|warning|critical" + # group_by: ["code_owner_channel", "alertgroup", "job"] + # receiver: slack-code-owners + # + # # Standard on-call routes + # - matchers: + # - severity=~"info|warning|critical" + # receiver: slack-monitoring + # continue: true + # + # inhibit_rules: + # - target_matchers: + # - severity=~"warning|info" + # source_matchers: + # - severity=critical + # equal: + # - cluster + # - namespace + # - alertname + # - target_matchers: + # - severity=info + # source_matchers: + # - severity=warning + # equal: + # - cluster + # - namespace + # - alertname + # - target_matchers: + # - severity=info + # source_matchers: + # - alertname=InfoInhibitor + # equal: + # - cluster + # - namespace + + receivers: + - name: blackhole + # - name: "slack-monitoring" + # slack_configs: + # - channel: "#channel" + # send_resolved: true + # title: '{{ template "slack.monzo.title" . }}' + # icon_emoji: '{{ template "slack.monzo.icon_emoji" . }}' + # color: '{{ template "slack.monzo.color" . }}' + # text: '{{ template "slack.monzo.text" . }}' + # actions: + # - type: button + # text: "Runbook :green_book:" + # url: "{{ (index .Alerts 0).Annotations.runbook_url }}" + # - type: button + # text: "Query :mag:" + # url: "{{ (index .Alerts 0).GeneratorURL }}" + # - type: button + # text: "Dashboard :grafana:" + # url: "{{ (index .Alerts 0).Annotations.dashboard }}" + # - type: button + # text: "Silence :no_bell:" + # url: '{{ template "__alert_silence_link" . }}' + # - type: button + # text: '{{ template "slack.monzo.link_button_text" . }}' + # url: "{{ .CommonAnnotations.link_url }}" + # - name: slack-code-owners + # slack_configs: + # - channel: "#{{ .CommonLabels.code_owner_channel }}" + # send_resolved: true + # title: '{{ template "slack.monzo.title" . }}' + # icon_emoji: '{{ template "slack.monzo.icon_emoji" . }}' + # color: '{{ template "slack.monzo.color" . }}' + # text: '{{ template "slack.monzo.text" . }}' + # actions: + # - type: button + # text: "Runbook :green_book:" + # url: "{{ (index .Alerts 0).Annotations.runbook }}" + # - type: button + # text: "Query :mag:" + # url: "{{ (index .Alerts 0).GeneratorURL }}" + # - type: button + # text: "Dashboard :grafana:" + # url: "{{ (index .Alerts 0).Annotations.dashboard }}" + # - type: button + # text: "Silence :no_bell:" + # url: '{{ template "__alert_silence_link" . }}' + # - type: button + # text: '{{ template "slack.monzo.link_button_text" . }}' + # url: "{{ .CommonAnnotations.link_url }}" + # + # -- Better alert templates for [slack source](https://gist.github.com/milesbxf/e2744fc90e9c41b47aa47925f8ff6512) + monzoTemplate: + enabled: true + + # -- (object) Extra alert templates + templateFiles: + {} + # template_1.tmpl: |- + # {{ define "hello" -}} + # hello, Victoria! + # {{- end }} + # template_2.tmpl: "" + + # -- (object) Alertmanager ingress configuration + ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: '{{ .Values.alertmanager.spec.routePrefix | default "/" }}' + pathType: Prefix + + hosts: + - alertmanager.domain.com + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: alertmanager-ingress-tls + # hosts: + # - alertmanager.domain.com + +vmalert: + # -- VMAlert annotations + annotations: {} + # -- Create VMAlert CR + enabled: false + + # -- Controls whether VMAlert should use VMAgent or VMInsert as a target for remotewrite + remoteWriteVMAgent: false + # -- (object) Full spec for VMAlert CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmalertspec) + spec: + port: "8080" + selectAllByDefault: true + evaluationInterval: 20s + extraArgs: + http.pathPrefix: "/" + + # External labels to add to all generated recording rules and alerts + externalLabels: {} + + # -- (object) Extra VMAlert annotation templates + templateFiles: + {} + # template_1.tmpl: |- + # {{ define "hello" -}} + # hello, Victoria! + # {{- end }} + # template_2.tmpl: "" + + # -- Allows to configure static notifiers, discover notifiers via Consul and DNS, + # see specification [here](https://docs.victoriametrics.com/vmalert/#notifier-configuration-file). + # This configuration will be created as separate secret and mounted to VMAlert pod. + additionalNotifierConfigs: {} + # dns_sd_configs: + # - names: + # - my.domain.com + # type: 'A' + # port: 9093 + # -- (object) VMAlert ingress config + ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: "" + pathType: Prefix + + hosts: + - vmalert.domain.com + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: vmalert-ingress-tls + # hosts: + # - vmalert.domain.com + +vmauth: + # -- Enable VMAuth CR + enabled: false + # -- VMAuth annotations + annotations: {} + # -- (object) Full spec for VMAuth CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmauthspec) + # It's possible to use given below predefined variables in spec: + # * `{{ .vm.read }}` - parsed vmselect, vmsingle or external.vm.read URL + # * `{{ .vm.write }}` - parsed vminsert, vmsingle or external.vm.write URL + spec: + port: "8427" + ingress: + class_name: nginx + annotations: + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + cert-manager.io/cluster-issuer: main + host: o12y.benchmark.t09.de + tlsHosts: + - o12y.benchmark.t09.de + tlsSecretName: vmauth-tls-secret + unauthorizedUserAccessSpec: {} + selectAllByDefault: true + +vmagent: + # -- Create VMAgent CR + enabled: true + # -- VMAgent annotations + annotations: {} + # -- Remote write configuration of VMAgent, allowed parameters defined in a [spec](https://docs.victoriametrics.com/operator/api#vmagentremotewritespec) + additionalRemoteWrites: + # [] + - url: https://o12y.observability./api/v1/write + basicAuth: + username: + name: simple-user-secret + key: username + password: + name: simple-user-secret + key: password + # -- (object) Full spec for VMAgent CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmagentspec) + spec: + port: "8429" + selectAllByDefault: true + scrapeInterval: 20s + externalLabels: + cluster_environment: "benchmark" + # For multi-cluster setups it is useful to use "cluster" label to identify the metrics source. + # For example: + # cluster: cluster-name + extraArgs: + promscrape.streamParse: "true" + # Do not store original labels in vmagent's memory by default. This reduces the amount of memory used by vmagent + # but makes vmagent debugging UI less informative. See: https://docs.victoriametrics.com/vmagent/#relabel-debug + promscrape.dropOriginalLabels: "true" + # -- (object) VMAgent ingress configuration + ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: "" + pathType: Prefix + + hosts: + - vmagent.domain.com + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: vmagent-ingress-tls + # hosts: + # - vmagent.domain.com + +defaultDatasources: + grafanaOperator: + # -- Create datasources as CRDs (requires grafana-operator to be installed) + enabled: false + annotations: {} + spec: + instanceSelector: + matchLabels: + dashboards: grafana + allowCrossNamespaceImport: false + victoriametrics: + # -- Create per replica prometheus compatible datasource + perReplica: false + # -- List of prometheus compatible datasource configurations. + # VM `url` will be added to each of them in templates. + datasources: + - name: VictoriaMetrics + type: prometheus + access: proxy + isDefault: true + - name: VictoriaMetrics (DS) + isDefault: false + access: proxy + type: victoriametrics-metrics-datasource + version: "0.15.1" + # -- List of alertmanager datasources. + # Alertmanager generated `url` will be added to each datasource in template if alertmanager is enabled + alertmanager: + # -- Create per replica alertmanager compatible datasource + perReplica: false + datasources: + - name: Alertmanager + access: proxy + jsonData: + implementation: prometheus + # -- Configure additional grafana datasources (passed through tpl). + # Check [here](http://docs.grafana.org/administration/provisioning/#datasources) for details + extra: + - name: victoria-logs + access: proxy + type: VictoriaLogs + url: http://vlogs-victorialogs:9428 + version: 1 + +# -- Grafana dependency chart configuration. For possible values refer [here](https://github.com/grafana/helm-charts/tree/main/charts/grafana#configuration) +grafana: + enabled: false + # all values for grafana helm chart can be specified here + persistence: + enabled: true + type: pvc + storageClassName: "default" + sidecar: + datasources: + enabled: true + initDatasources: true + label: grafana_datasource + dashboards: + provider: + name: default + orgid: 1 + folder: /var/lib/grafana/dashboards + defaultFolderName: default + enabled: true + multicluster: false + + # -- Create datasource configmap even if grafana deployment has been disabled + forceDeployDatasource: false + + # Uncomment the block below, if you want to enable VictoriaMetrics Datasource in Grafana: + # Note that Grafana will need internet access to install the datasource plugin. + # + # plugins: + # - victoriametrics-metrics-datasource + + ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: / + pathType: Prefix + + hosts: + - grafana.domain.com + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: grafana-ingress-tls + # hosts: + # - grafana.domain.com + + # -- Grafana VM scrape config + vmScrape: + # whether we should create a service scrape resource for grafana + enabled: true + + # -- [Scrape configuration](https://docs.victoriametrics.com/operator/api#vmservicescrapespec) for Grafana + spec: + selector: + matchLabels: + app.kubernetes.io/name: '{{ include "grafana.name" .Subcharts.grafana }}' + endpoints: + - port: '{{ .Values.grafana.service.portName }}' + +# -- prometheus-node-exporter dependency chart configuration. For possible values check [here](https://github.com/prometheus-community/helm-charts/blob/main/charts/prometheus-node-exporter/values.yaml) +prometheus-node-exporter: + enabled: true + + # all values for prometheus-node-exporter helm chart can be specified here + service: + # Add the 'node-exporter' label to be used by serviceMonitor to match standard common usage in rules and grafana dashboards + # + labels: + jobLabel: node-exporter + extraArgs: + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) + - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|erofs|sysfs|tracefs)$ + # -- Node Exporter VM scrape config + vmScrape: + # whether we should create a service scrape resource for node-exporter + enabled: true + + # -- [Scrape configuration](https://docs.victoriametrics.com/operator/api#vmservicescrapespec) for Node Exporter + spec: + jobLabel: jobLabel + selector: + matchLabels: + app.kubernetes.io/name: '{{ include "prometheus-node-exporter.name" (index .Subcharts "prometheus-node-exporter") }}' + endpoints: + - port: metrics + metricRelabelConfigs: + - action: drop + source_labels: [mountpoint] + regex: "/var/lib/kubelet/pods.+" +# -- kube-state-metrics dependency chart configuration. For possible values check [here](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-state-metrics/values.yaml) +kube-state-metrics: + enabled: true + # -- [Scrape configuration](https://docs.victoriametrics.com/operator/api#vmservicescrapespec) for Kube State Metrics + vmScrape: + enabled: true + spec: + selector: + matchLabels: + app.kubernetes.io/name: '{{ include "kube-state-metrics.name" (index .Subcharts "kube-state-metrics") }}' + app.kubernetes.io/instance: '{{ include "vm.release" . }}' + endpoints: + - port: http + honorLabels: true + metricRelabelConfigs: + - action: labeldrop + regex: (uid|container_id|image_id) + jobLabel: app.kubernetes.io/name + +# -- Component scraping the kubelets +kubelet: + enabled: true + vmScrapes: + # -- Enable scraping /metrics/cadvisor from kubelet's service + cadvisor: + enabled: true + spec: + path: /metrics/cadvisor + # -- Enable scraping /metrics/probes from kubelet's service + probes: + enabled: true + spec: + path: /metrics/probes + # -- Enabled scraping /metrics/resource from kubelet's service + resources: + enabled: true + spec: + path: /metrics/resource + kubelet: + spec: {} + # -- Spec for VMNodeScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmnodescrapespec) + vmScrape: + kind: VMNodeScrape + spec: + scheme: "https" + honorLabels: true + interval: "30s" + scrapeTimeout: "5s" + tlsConfig: + insecureSkipVerify: true + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # drop high cardinality label and useless metrics for cadvisor and kubelet + metricRelabelConfigs: + - action: labeldrop + regex: (uid) + - action: labeldrop + regex: (id|name) + - action: drop + source_labels: [__name__] + regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count) + relabelConfigs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - sourceLabels: [__metrics_path__] + targetLabel: metrics_path + - targetLabel: job + replacement: kubelet + # ignore timestamps of cadvisor's metrics by default + # more info here https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4697#issuecomment-1656540535 + honorTimestamps: false +# Component scraping the kube api server +kubeApiServer: + # -- Enable Kube Api Server metrics scraping + enabled: true + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: https + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + serverName: kubernetes + jobLabel: component + namespaceSelector: + matchNames: + - default + selector: + matchLabels: + component: apiserver + provider: kubernetes + +# Component scraping the kube controller manager +kubeControllerManager: + # -- Enable kube controller manager metrics scraping + enabled: true + + # -- If your kube controller manager is not deployed as a pod, specify IPs it can be found on + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + # If using kubeControllerManager.endpoints only the port and targetPort are used + service: + # -- Create service for kube controller manager metrics scraping + enabled: true + # -- Kube controller manager service port + port: 10257 + # -- Kube controller manager service target port + targetPort: 10257 + # -- Kube controller manager service pod selector + selector: + component: kube-controller-manager + + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: + - kube-system + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + serverName: kubernetes + +# Component scraping kubeDns. Use either this or coreDns +kubeDns: + # -- Enabled KubeDNS metrics scraping + enabled: false + service: + # -- Create Service for KubeDNS metrics + enabled: false + # -- KubeDNS service ports + ports: + dnsmasq: + port: 10054 + targetPort: 10054 + skydns: + port: 10055 + targetPort: 10055 + # -- KubeDNS service pods selector + selector: + k8s-app: kube-dns + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - port: http-metrics-dnsmasq + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + - port: http-metrics-skydns + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + +# Component scraping coreDns. Use either this or kubeDns +coreDns: + # -- Enabled CoreDNS metrics scraping + enabled: true + service: + # -- Create service for CoreDNS metrics + enabled: true + # -- CoreDNS service port + port: 9153 + # -- CoreDNS service target port + targetPort: 9153 + # -- CoreDNS service pod selector + selector: + k8s-app: kube-dns + + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - port: http-metrics + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + +# Component scraping etcd +kubeEtcd: + # -- Enabled KubeETCD metrics scraping + enabled: true + + # -- If your etcd is not deployed as a pod, specify IPs it can be found on + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + # Etcd service. If using kubeEtcd.endpoints only the port and targetPort are used + service: + # -- Enable service for ETCD metrics scraping + enabled: true + # -- ETCD service port + port: 2379 + # -- ETCD service target port + targetPort: 2379 + # -- ETCD service pods selector + selector: + component: etcd + + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + +# Component scraping kube scheduler +kubeScheduler: + # -- Enable KubeScheduler metrics scraping + enabled: true + + # -- If your kube scheduler is not deployed as a pod, specify IPs it can be found on + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + # If using kubeScheduler.endpoints only the port and targetPort are used + service: + # -- Enable service for KubeScheduler metrics scrape + enabled: true + # -- KubeScheduler service port + port: 10259 + # -- KubeScheduler service target port + targetPort: 10259 + # -- KubeScheduler service pod selector + selector: + component: kube-scheduler + + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + +# Component scraping kube proxy +kubeProxy: + # -- Enable kube proxy metrics scraping + enabled: false + + # -- If your kube proxy is not deployed as a pod, specify IPs it can be found on + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + service: + # -- Enable service for kube proxy metrics scraping + enabled: true + # -- Kube proxy service port + port: 10249 + # -- Kube proxy service target port + targetPort: 10249 + # -- Kube proxy service pod selector + selector: + k8s-app: kube-proxy + + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + +# -- Add extra objects dynamically to this chart +extraObjects: [] + diff --git a/otc/benchmark.t09.de/stacks/observability/grafana-operator.yaml b/otc/benchmark.t09.de/stacks/observability/grafana-operator.yaml new file mode 100644 index 0000000..6c208d5 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability/grafana-operator.yaml @@ -0,0 +1,25 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: grafana-operator + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + destination: + name: in-cluster + namespace: observability + sources: + - chart: grafana-operator + repoURL: ghcr.io/grafana/helm-charts + targetRevision: v5.18.0 + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests" diff --git a/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/argocd.yaml b/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/argocd.yaml new file mode 100644 index 0000000..b348ff7 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/argocd.yaml @@ -0,0 +1,9 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: argocd +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + url: "https://raw.githubusercontent.com/argoproj/argo-cd/refs/heads/master/examples/dashboard.json" diff --git a/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml b/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml new file mode 100644 index 0000000..199a104 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml @@ -0,0 +1,75 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: Grafana +metadata: + name: grafana + labels: + dashboards: "grafana" +spec: + persistentVolumeClaim: + metadata: + annotations: + everest.io/disk-volume-type: GPSSD + everest.io/crypt-key-id: 71ebef9e-5575-4b05-a597-ee1f67c911e3 + spec: + storageClassName: csi-disk + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + deployment: + spec: + template: + spec: + containers: + - name: grafana + env: + - name: OAUTH_CLIENT_SECRET + valueFrom: + secretKeyRef: + key: clientSecret + name: dex-grafana-client + config: + log.console: + level: debug + server: + root_url: "https://grafana.benchmark.t09.de" + auth: + disable_login: "true" + disable_login_form: "true" + auth.generic_oauth: + enabled: "true" + name: Forgejo + allow_sign_up: "true" + use_refresh_token: "true" + client_id: grafana + client_secret: $__env{OAUTH_CLIENT_SECRET} + scopes: openid email profile offline_access groups + auth_url: https://dex.benchmark.t09.de/auth + token_url: https://dex.benchmark.t09.de/token + api_url: https://dex.benchmark.t09.de/userinfo + redirect_uri: https://grafana.benchmark.t09.de/login/generic_oauth + role_attribute_path: "contains(groups[*], 'DevFW') && 'GrafanaAdmin' || 'None'" + allow_assign_grafana_admin: "true" + ingress: + metadata: + annotations: + cert-manager.io/cluster-issuer: main + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + spec: + ingressClassName: nginx + rules: + - host: grafana.benchmark.t09.de + http: + paths: + - backend: + service: + name: grafana-service + port: + number: 3000 + path: / + pathType: Prefix + tls: + - hosts: + - grafana.benchmark.t09.de + secretName: grafana-net-tls diff --git a/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/ingress-nginx.yaml b/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/ingress-nginx.yaml new file mode 100644 index 0000000..c13d6a2 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/ingress-nginx.yaml @@ -0,0 +1,9 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: ingress-nginx +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + url: "https://raw.githubusercontent.com/adinhodovic/ingress-nginx-mixin/refs/heads/main/dashboards_out/ingress-nginx-overview.json" diff --git a/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/victoria-logs.yaml b/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/victoria-logs.yaml new file mode 100644 index 0000000..4018fbd --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/victoria-logs.yaml @@ -0,0 +1,9 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: victoria-logs +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + url: "https://raw.githubusercontent.com/VictoriaMetrics/VictoriaMetrics/refs/heads/master/dashboards/vm/victorialogs.json" diff --git a/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack.yaml b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack.yaml new file mode 100644 index 0000000..3a6506f --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack.yaml @@ -0,0 +1,31 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: o12y + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + destination: + name: in-cluster + namespace: observability + sources: + - chart: victoria-metrics-k8s-stack + repoURL: https://victoriametrics.github.io/helm-charts/ + targetRevision: 0.48.1 + helm: + valueFiles: + - $values/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests" diff --git a/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/alerts.yaml b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/alerts.yaml new file mode 100644 index 0000000..110ee7e --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/alerts.yaml @@ -0,0 +1,40 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: forgejo-alerts + namespace: observability +spec: + groups: + - name: forgejo + rules: + - alert: forgejo down + expr: sum by(cluster_environment) (up{pod=~"forgejo-server-.*"}) < 1 + for: 30s + labels: + severity: critical + job: "{{ $labels.job }}" + annotations: + value: "{{ $value }}" + description: 'forgejo is down in cluster environment {{ $labels.cluster_environment }}' + - name: forgejo-backup + rules: + - alert: forgejo s3 backup job failed + expr: max by(cluster_environment) (kube_job_status_failed{job_name=~"forgejo-s3-backup-.*"}) != 0 + for: 30s + labels: + severity: critical + job: "{{ $labels.job }}" + annotations: + value: "{{ $value }}" + description: 'forgejo s3 backup job failed in cluster environment {{ $labels.cluster_environment }}' + - name: disk-consumption-high + rules: + - alert: disk consumption high + expr: 1-(kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes) > 0.6 + for: 30s + labels: + severity: major + job: "{{ $labels.job }}" + annotations: + value: "{{ $value }}" + description: 'disk consumption of pvc {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is high in cluster environment {{ $labels.cluster_environment }}' diff --git a/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml new file mode 100644 index 0000000..a23bc0c --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml @@ -0,0 +1,26 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VLogs +metadata: + name: victorialogs + namespace: observability +spec: + retentionPeriod: "12" + removePvcAfterDelete: true + storageMetadata: + annotations: + everest.io/crypt-key-id: 71ebef9e-5575-4b05-a597-ee1f67c911e3 + everest.io/disk-volume-type: GPSSD + storage: + storageClassName: csi-disk + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + resources: + requests: + memory: 500Mi + cpu: 500m + limits: + memory: 10Gi + cpu: 2 diff --git a/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml new file mode 100644 index 0000000..5759093 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml @@ -0,0 +1,17 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMUser +metadata: + name: simple-user + namespace: observability +spec: + username: simple-user + passwordRef: + key: password + name: simple-user-secret + targetRefs: + - static: + url: http://vmsingle-o12y:8429 + paths: ["/api/v1/write"] + - static: + url: http://vlogs-victorialogs:9428 + paths: ["/insert/elasticsearch/.*"] \ No newline at end of file diff --git a/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/values.yaml b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/values.yaml new file mode 100644 index 0000000..b6565f0 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/values.yaml @@ -0,0 +1,1230 @@ +global: + # -- Cluster label to use for dashboards and rules + clusterLabel: cluster + # -- Global license configuration + license: + key: "" + keyRef: {} + # name: secret-license + # key: license + cluster: + # -- K8s cluster domain suffix, uses for building storage pods' FQDN. Details are [here](https://kubernetes.io/docs/tasks/administer-cluster/dns-custom-nameservers/) + dnsDomain: cluster.local. + +# -- Override chart name +nameOverride: "" +# -- Resource full name override +fullnameOverride: "o12y" +# -- Tenant to use for Grafana datasources and remote write +tenant: "0" +# -- If this chart is used in "Argocd" with "releaseName" field then +# VMServiceScrapes couldn't select the proper services. +# For correct working need set value 'argocdReleaseOverride=$ARGOCD_APP_NAME' +argocdReleaseOverride: "o12y" + +# -- VictoriaMetrics Operator dependency chart configuration. More values can be found [here](https://docs.victoriametrics.com/helm/victoriametrics-operator#parameters). Also checkout [here](https://docs.victoriametrics.com/operator/vars) possible ENV variables to configure operator behaviour +victoria-metrics-operator: + enabled: true + crds: + plain: true + cleanup: + enabled: true + image: + repository: bitnami/kubectl + pullPolicy: IfNotPresent + serviceMonitor: + enabled: true + operator: + # -- By default, operator converts prometheus-operator objects. + disable_prometheus_converter: false + # group pinguin added the admissionWebhooks value according to https://docs.victoriametrics.com/helm/victoriametrics-k8s-stack/#argocd-issues + admissionWebhooks: + certManager: + enabled: true + +defaultDashboards: + # -- Enable custom dashboards installation + enabled: true + defaultTimezone: utc + labels: {} + annotations: {} + grafanaOperator: + # -- Create dashboards as CRDs (requires grafana-operator to be installed) + enabled: true + spec: + instanceSelector: + matchLabels: + dashboards: grafana + allowCrossNamespaceImport: false + # -- Create dashboards as ConfigMap despite dependency it requires is not installed + dashboards: + victoriametrics-vmalert: + enabled: true + victoriametrics-operator: + enabled: true + # -- In ArgoCD using client-side apply this dashboard reaches annotations size limit and causes k8s issues without server side apply + # See [this issue](https://github.com/VictoriaMetrics/helm-charts/tree/master/charts/victoria-metrics-k8s-stack#metadataannotations-too-long-must-have-at-most-262144-bytes-on-dashboards) + node-exporter-full: + enabled: true + +# -- Create default rules for monitoring the cluster +defaultRules: + # -- Labels, which are used for grouping results of the queries. Note that these labels are joined with `.Values.global.clusterLabel` + additionalGroupByLabels: [] + create: true + + # -- Common properties for VMRule groups + group: + spec: + # -- Optional HTTP URL parameters added to each rule request + params: {} + + # -- Common properties for all VMRules + rule: + spec: + # -- Additional labels for all VMRules + labels: {} + # -- Additional annotations for all VMRules + annotations: {} + + # -- Common properties for VMRules alerts + alerting: + spec: + # -- Additional labels for VMRule alerts + labels: {} + # -- Additional annotations for VMRule alerts + annotations: {} + + # -- Common properties for VMRules recording rules + recording: + spec: + # -- Additional labels for VMRule recording rules + labels: {} + # -- Additional annotations for VMRule recording rules + annotations: {} + + # -- Per rule properties + rules: {} + # CPUThrottlingHigh: + # create: true + # spec: + # for: 15m + # labels: + # severity: critical + # -- Rule group properties + groups: + etcd: + create: true + # -- Common properties for all rules in a group + rules: {} + # spec: + # annotations: + # dashboard: https://example.com/dashboard/1 + general: + create: true + rules: {} + k8sContainerCpuLimits: + create: true + rules: {} + k8sContainerCpuRequests: + create: true + rules: {} + k8sContainerCpuUsageSecondsTotal: + create: true + rules: {} + k8sContainerMemoryLimits: + create: true + rules: {} + k8sContainerMemoryRequests: + create: true + rules: {} + k8sContainerMemoryRss: + create: true + rules: {} + k8sContainerMemoryCache: + create: true + rules: {} + k8sContainerMemoryWorkingSetBytes: + create: true + rules: {} + k8sContainerMemorySwap: + create: true + rules: {} + k8sPodOwner: + create: true + rules: {} + k8sContainerResource: + create: true + rules: {} + kubeApiserver: + create: true + rules: {} + kubeApiserverAvailability: + create: true + rules: {} + kubeApiserverBurnrate: + create: true + rules: {} + kubeApiserverHistogram: + create: true + rules: {} + kubeApiserverSlos: + create: true + rules: {} + kubelet: + create: true + rules: {} + kubePrometheusGeneral: + create: true + rules: {} + kubePrometheusNodeRecording: + create: true + rules: {} + kubernetesApps: + create: true + rules: {} + targetNamespace: ".*" + kubernetesResources: + create: true + rules: {} + kubernetesStorage: + create: true + rules: {} + targetNamespace: ".*" + kubernetesSystem: + create: true + rules: {} + kubernetesSystemKubelet: + create: true + rules: {} + kubernetesSystemApiserver: + create: true + rules: {} + kubernetesSystemControllerManager: + create: false + rules: {} + kubeScheduler: + create: false + rules: {} + kubernetesSystemScheduler: + create: false + rules: {} + kubeStateMetrics: + create: true + rules: {} + nodeNetwork: + create: true + rules: {} + node: + create: true + rules: {} + vmagent: + create: true + rules: {} + vmsingle: + create: true + rules: {} + vmcluster: + create: true + rules: {} + vmHealth: + create: true + rules: {} + vmoperator: + create: true + rules: {} + alertmanager: + create: true + rules: {} + + # -- Runbook url prefix for default rules + runbookUrl: https://runbooks.prometheus-operator.dev/runbooks + + # -- Labels for default rules + labels: {} + # -- Annotations for default rules + annotations: {} + +# -- Provide custom recording or alerting rules to be deployed into the cluster. +additionalVictoriaMetricsMap: +# rule-name: +# groups: +# - name: my_group +# rules: +# - record: my_record +# expr: 100 * my_record + +external: + grafana: + # -- External Grafana host + host: "" + # -- External Grafana datasource name + datasource: VictoriaMetrics + # -- External VM read and write URLs + vm: + read: + url: "" + # bearerTokenSecret: + # name: dbaas-read-access-token + # key: bearerToken + write: + url: "" + # bearerTokenSecret: + # name: dbaas-read-access-token + # key: bearerToken + +# Configures vmsingle params +vmsingle: + # -- VMSingle annotations + annotations: {} + # -- Create VMSingle CR + enabled: true + # -- Full spec for VMSingle CRD. Allowed values describe [here](https://docs.victoriametrics.com/operator/api#vmsinglespec) + spec: + port: "8429" + # -- Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these [docs](https://docs.victoriametrics.com/single-server-victoriametrics/#retention) + retentionPeriod: "1" + replicaCount: 1 + extraArgs: {} + storageMetadata: + annotations: + everest.io/crypt-key-id: 71ebef9e-5575-4b05-a597-ee1f67c911e3 + everest.io/disk-volume-type: GPSSD + storage: + storageClassName: csi-disk + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi + ingress: + # -- Enable deployment of ingress for server component + enabled: false + # -- Ingress annotations + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + # -- Ingress extra labels + labels: {} + # -- Ingress default path + path: "" + # -- Ingress path type + pathType: Prefix + # -- Ingress controller class name + ingressClassName: "" + + # -- Array of host objects + hosts: [] + # - vmsingle.domain.com + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + + # -- Array of TLS objects + tls: [] + # - secretName: vmsingle-ingress-tls + # hosts: + # - vmsingle.domain.com + +vmcluster: + # -- Create VMCluster CR + enabled: false + # -- VMCluster annotations + annotations: {} + # -- Full spec for VMCluster CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmclusterspec) + spec: + # -- Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these [docs](https://docs.victoriametrics.com/single-server-victoriametrics/#retention) + retentionPeriod: "1" + replicationFactor: 2 + vmstorage: + replicaCount: 2 + storageDataPath: /vm-data + storage: + volumeClaimTemplate: + spec: + resources: + requests: + storage: 10Gi + resources: + {} + # limits: + # cpu: "1" + # memory: 1500Mi + vmselect: + # -- Set this value to false to disable VMSelect + enabled: true + port: "8481" + replicaCount: 2 + cacheMountPath: /select-cache + extraArgs: {} + storage: + volumeClaimTemplate: + spec: + resources: + requests: + storage: 2Gi + resources: + {} + # limits: + # cpu: "1" + # memory: "1000Mi" + # requests: + # cpu: "0.5" + # memory: "500Mi" + vminsert: + # -- Set this value to false to disable VMInsert + enabled: true + port: "8480" + replicaCount: 2 + extraArgs: {} + resources: + {} + # limits: + # cpu: "1" + # memory: 1000Mi + # requests: + # cpu: "0.5" + # memory: "500Mi" + + ingress: + storage: + # -- Enable deployment of ingress for server component + enabled: false + + # -- Ingress annotations + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + + # -- Ingress extra labels + labels: {} + + # -- Ingress controller class name + ingressClassName: "" + + # -- Ingress path type + pathType: Prefix + + # -- Ingress default path + path: "" + + # -- Array of host objects + hosts: [] + # - vmstorage.domain.com + + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + + # -- Array of TLS objects + tls: [] + # - secretName: vmstorage-ingress-tls + # hosts: + # - vmstorage.domain.com + select: + # -- Enable deployment of ingress for server component + enabled: false + + # -- Ingress annotations + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + + # -- Ingress extra labels + labels: {} + + # -- Ingress controller class name + ingressClassName: "" + + # -- Ingress path type + pathType: Prefix + + # -- Ingress default path + path: '{{ dig "extraArgs" "http.pathPrefix" "/" .Values.vmcluster.spec.vmselect }}' + + # -- Array of host objects + hosts: [] + # - vmselect.domain.com + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + + # -- Array of TLS objects + tls: [] + # - secretName: vmselect-ingress-tls + # hosts: + # - vmselect.domain.com + insert: + # -- Enable deployment of ingress for server component + enabled: false + + # -- Ingress annotations + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + + # -- Ingress extra labels + labels: {} + + # -- Ingress controller class name + ingressClassName: "" + + # -- Ingress path type + pathType: Prefix + + # -- Ingress default path + path: '{{ dig "extraArgs" "http.pathPrefix" "/" .Values.vmcluster.spec.vminsert }}' + + # -- Array of host objects + hosts: [] + # - vminsert.domain.com + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + + # -- Array of TLS objects + tls: [] + # - secretName: vminsert-ingress-tls + # hosts: + # - vminsert.domain.com + +alertmanager: + # -- Create VMAlertmanager CR + enabled: true + # -- Alertmanager annotations + annotations: {} + # -- (object) Full spec for VMAlertmanager CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmalertmanagerspec) + spec: + replicaCount: 1 + port: "9093" + selectAllByDefault: true + image: + tag: v0.28.1 + externalURL: "" + routePrefix: / + + # -- (string) If this one defined, it will be used for alertmanager configuration and config parameter will be ignored + configSecret: "" + # -- + # @raw + # enable storing .Values.alertmanager.config in VMAlertmanagerConfig instead of k8s Secret. + # Note: VMAlertmanagerConfig and plain Alertmanager config structures are not equal. + # If you're migrating existing config, please make sure that `.Values.alertmanager.config`: + # - with `useManagedConfig: false` has structure described [here](https://prometheus.io/docs/alerting/latest/configuration/). + # - with `useManagedConfig: true` has structure described [here](https://docs.victoriametrics.com/operator/api/#vmalertmanagerconfig). + useManagedConfig: true + # -- (object) Alertmanager configuration + config: + route: + receiver: "blackhole" + routes: + - matchers: + - severity=~"critical|major" + receiver: outlook + receivers: + - name: blackhole + - name: outlook + email_configs: + - smarthost: 'mail.mms-support.de:465' + auth_username: 'ipcei-cis-devfw@mms-support.de' + auth_password: + name: email-user-credentials + key: connection-string + from: '"IPCEI CIS DevFW" ' + to: 'f9f9953a.mg.telekom.de@de.teams.ms' + headers: + subject: 'Grafana Mail Alerts' + require_tls: false + + # -- Better alert templates for [slack source](https://gist.github.com/milesbxf/e2744fc90e9c41b47aa47925f8ff6512) + monzoTemplate: + enabled: true + + # -- (object) Extra alert templates + templateFiles: + {} + # template_1.tmpl: |- + # {{ define "hello" -}} + # hello, Victoria! + # {{- end }} + # template_2.tmpl: "" + + # -- (object) Alertmanager ingress configuration + ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: '{{ .Values.alertmanager.spec.routePrefix | default "/" }}' + pathType: Prefix + + hosts: + - alertmanager.domain.com + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: alertmanager-ingress-tls + # hosts: + # - alertmanager.domain.com + +vmalert: + # -- VMAlert annotations + annotations: {} + # -- Create VMAlert CR + enabled: true + + # -- Controls whether VMAlert should use VMAgent or VMInsert as a target for remotewrite + remoteWriteVMAgent: false + # -- (object) Full spec for VMAlert CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmalertspec) + spec: + port: "8080" + selectAllByDefault: true + evaluationInterval: 20s + extraArgs: + http.pathPrefix: "/" + + # External labels to add to all generated recording rules and alerts + externalLabels: {} + + # -- (object) Extra VMAlert annotation templates + templateFiles: + {} + # template_1.tmpl: |- + # {{ define "hello" -}} + # hello, Victoria! + # {{- end }} + # template_2.tmpl: "" + + # -- Allows to configure static notifiers, discover notifiers via Consul and DNS, + # see specification [here](https://docs.victoriametrics.com/vmalert/#notifier-configuration-file). + # This configuration will be created as separate secret and mounted to VMAlert pod. + additionalNotifierConfigs: {} + # dns_sd_configs: + # - names: + # - my.domain.com + # type: 'A' + # port: 9093 + # -- (object) VMAlert ingress config + ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: "" + pathType: Prefix + + hosts: + - vmalert.domain.com + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: vmalert-ingress-tls + # hosts: + # - vmalert.domain.com + +vmauth: + # -- Enable VMAuth CR + enabled: true + # -- VMAuth annotations + annotations: {} + # -- (object) Full spec for VMAuth CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmauthspec) + # It's possible to use given below predefined variables in spec: + # * `{{ .vm.read }}` - parsed vmselect, vmsingle or external.vm.read URL + # * `{{ .vm.write }}` - parsed vminsert, vmsingle or external.vm.write URL + spec: + port: "8427" + ingress: + class_name: nginx + annotations: + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + cert-manager.io/cluster-issuer: main + host: o12y.observability. + tlsHosts: + - o12y.observability. + tlsSecretName: vmauth-tls-secret + unauthorizedUserAccessSpec: {} + selectAllByDefault: true + +vmagent: + # -- Create VMAgent CR + enabled: false + # -- VMAgent annotations + annotations: {} + # -- Remote write configuration of VMAgent, allowed parameters defined in a [spec](https://docs.victoriametrics.com/operator/api#vmagentremotewritespec) + additionalRemoteWrites: + [] + #- url: http://some-remote-write/api/v1/write + # -- (object) Full spec for VMAgent CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmagentspec) + spec: + port: "8429" + selectAllByDefault: true + scrapeInterval: 20s + externalLabels: {} + # For multi-cluster setups it is useful to use "cluster" label to identify the metrics source. + # For example: + # cluster: cluster-name + extraArgs: + promscrape.streamParse: "true" + # Do not store original labels in vmagent's memory by default. This reduces the amount of memory used by vmagent + # but makes vmagent debugging UI less informative. See: https://docs.victoriametrics.com/vmagent/#relabel-debug + promscrape.dropOriginalLabels: "true" + # -- (object) VMAgent ingress configuration + ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: "" + pathType: Prefix + + hosts: + - vmagent.domain.com + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: vmagent-ingress-tls + # hosts: + # - vmagent.domain.com + +defaultDatasources: + grafanaOperator: + # -- Create datasources as CRDs (requires grafana-operator to be installed) + enabled: true + annotations: {} + spec: + plugins: + - name: victoriametrics-metrics-datasource + version: 0.16.0 + - name: victoriametrics-logs-datasource + version: 0.17.0 + instanceSelector: + matchLabels: + dashboards: grafana + allowCrossNamespaceImport: false + victoriametrics: + # -- Create per replica prometheus compatible datasource + perReplica: false + # -- List of prometheus compatible datasource configurations. + # VM `url` will be added to each of them in templates. + datasources: + - name: VictoriaMetrics + type: prometheus + access: proxy + isDefault: true + - name: VictoriaMetrics (DS) + isDefault: false + access: proxy + type: victoriametrics-metrics-datasource + version: "0.15.1" + # -- List of alertmanager datasources. + # Alertmanager generated `url` will be added to each datasource in template if alertmanager is enabled + alertmanager: + # -- Create per replica alertmanager compatible datasource + perReplica: false + datasources: + - name: Alertmanager + access: proxy + jsonData: + implementation: prometheus + # -- Configure additional grafana datasources (passed through tpl). + # Check [here](http://docs.grafana.org/administration/provisioning/#datasources) for details + extra: + - name: VictoriaLogs + access: proxy + type: victoriametrics-logs-datasource + url: http://vlogs-victorialogs:9428 + version: 0.18.0 + +# -- Grafana dependency chart configuration. For possible values refer [here](https://github.com/grafana/helm-charts/tree/main/charts/grafana#configuration) +grafana: + enabled: false + # all values for grafana helm chart can be specified here + persistence: + enabled: false + type: pvc + storageClassName: "default" + grafana.ini: + # auth: + # login_maximum_inactive_lifetime_duration: 0 + # login_maximum_lifetime_duration: 0 + security: + disable_brute_force_login_protection: true + sidecar: + datasources: + enabled: true + initDatasources: true + label: grafana_datasource + dashboards: + provider: + name: default + orgid: 1 + folder: /var/lib/grafana/dashboards + defaultFolderName: default + enabled: true + multicluster: false + + # dashboards: + # default: + # victoria-logs: + # url: "https://raw.githubusercontent.com/VictoriaMetrics/VictoriaMetrics/refs/heads/master/dashboards/vm/victorialogs.json" + # victoria-logs-explorer: + # url: "https://grafana.com/api/dashboards/22759/revisions/6/download" + # ingress-nginx: + # url: "https://raw.githubusercontent.com/adinhodovic/ingress-nginx-mixin/refs/heads/main/dashboards_out/ingress-nginx-overview.json" + # argocd: + # url: "https://raw.githubusercontent.com/argoproj/argo-cd/refs/heads/master/examples/dashboard.json" + + # -- Create datasource configmap even if grafana deployment has been disabled + forceDeployDatasource: true + + # Uncomment the block below, if you want to enable VictoriaMetrics Datasource in Grafana: + # Note that Grafana will need internet access to install the datasource plugin. + + plugins: + - victoriametrics-metrics-datasource + - victoriametrics-logs-datasource + + ingress: + enabled: false + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + # Values can be templated + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + labels: {} + path: / + pathType: Prefix + + hosts: + - grafana.domain.com + # -- Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: service + tls: [] + # - secretName: grafana-ingress-tls + # hosts: + # - grafana.domain.com + + # -- Grafana VM scrape config + vmScrape: + # whether we should create a service scrape resource for grafana + enabled: true + + # -- [Scrape configuration](https://docs.victoriametrics.com/operator/api#vmservicescrapespec) for Grafana + spec: + selector: + matchLabels: + app.kubernetes.io/name: '{{ include "grafana.name" .Subcharts.grafana }}' + endpoints: + - port: '{{ .Values.grafana.service.portName }}' + +# -- prometheus-node-exporter dependency chart configuration. For possible values check [here](https://github.com/prometheus-community/helm-charts/blob/main/charts/prometheus-node-exporter/values.yaml) +prometheus-node-exporter: + enabled: true + + # all values for prometheus-node-exporter helm chart can be specified here + service: + # Add the 'node-exporter' label to be used by serviceMonitor to match standard common usage in rules and grafana dashboards + # + labels: + jobLabel: node-exporter + extraArgs: + - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) + - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|erofs|sysfs|tracefs)$ + # -- Node Exporter VM scrape config + vmScrape: + # whether we should create a service scrape resource for node-exporter + enabled: true + + # -- [Scrape configuration](https://docs.victoriametrics.com/operator/api#vmservicescrapespec) for Node Exporter + spec: + jobLabel: jobLabel + selector: + matchLabels: + app.kubernetes.io/name: '{{ include "prometheus-node-exporter.name" (index .Subcharts "prometheus-node-exporter") }}' + endpoints: + - port: metrics + metricRelabelConfigs: + - action: drop + source_labels: [mountpoint] + regex: "/var/lib/kubelet/pods.+" +# -- kube-state-metrics dependency chart configuration. For possible values check [here](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-state-metrics/values.yaml) +kube-state-metrics: + enabled: true + # -- [Scrape configuration](https://docs.victoriametrics.com/operator/api#vmservicescrapespec) for Kube State Metrics + vmScrape: + enabled: true + spec: + selector: + matchLabels: + app.kubernetes.io/name: '{{ include "kube-state-metrics.name" (index .Subcharts "kube-state-metrics") }}' + app.kubernetes.io/instance: '{{ include "vm.release" . }}' + endpoints: + - port: http + honorLabels: true + metricRelabelConfigs: + - action: labeldrop + regex: (uid|container_id|image_id) + jobLabel: app.kubernetes.io/name + +# -- Component scraping the kubelets +kubelet: + enabled: true + vmScrapes: + # -- Enable scraping /metrics/cadvisor from kubelet's service + cadvisor: + enabled: true + spec: + path: /metrics/cadvisor + # -- Enable scraping /metrics/probes from kubelet's service + probes: + enabled: true + spec: + path: /metrics/probes + # -- Enabled scraping /metrics/resource from kubelet's service + resources: + enabled: true + spec: + path: /metrics/resource + kubelet: + spec: {} + # -- Spec for VMNodeScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmnodescrapespec) + vmScrape: + kind: VMNodeScrape + spec: + scheme: "https" + honorLabels: true + interval: "30s" + scrapeTimeout: "5s" + tlsConfig: + insecureSkipVerify: true + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # drop high cardinality label and useless metrics for cadvisor and kubelet + metricRelabelConfigs: + - action: labeldrop + regex: (uid) + - action: labeldrop + regex: (id|name) + - action: drop + source_labels: [__name__] + regex: (rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count) + relabelConfigs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - sourceLabels: [__metrics_path__] + targetLabel: metrics_path + - targetLabel: job + replacement: kubelet + # ignore timestamps of cadvisor's metrics by default + # more info here https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4697#issuecomment-1656540535 + honorTimestamps: false +# Component scraping the kube api server +kubeApiServer: + # -- Enable Kube Api Server metrics scraping + enabled: true + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: https + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + serverName: kubernetes + jobLabel: component + namespaceSelector: + matchNames: + - default + selector: + matchLabels: + component: apiserver + provider: kubernetes + +# Component scraping the kube controller manager +kubeControllerManager: + # -- Enable kube controller manager metrics scraping + enabled: false + + # -- If your kube controller manager is not deployed as a pod, specify IPs it can be found on + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + # If using kubeControllerManager.endpoints only the port and targetPort are used + service: + # -- Create service for kube controller manager metrics scraping + enabled: true + # -- Kube controller manager service port + port: 10257 + # -- Kube controller manager service target port + targetPort: 10257 + # -- Kube controller manager service pod selector + selector: + component: kube-controller-manager + + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: + - kube-system + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + serverName: kubernetes + +# Component scraping kubeDns. Use either this or coreDns +kubeDns: + # -- Enabled KubeDNS metrics scraping + enabled: false + service: + # -- Create Service for KubeDNS metrics + enabled: false + # -- KubeDNS service ports + ports: + dnsmasq: + port: 10054 + targetPort: 10054 + skydns: + port: 10055 + targetPort: 10055 + # -- KubeDNS service pods selector + selector: + k8s-app: kube-dns + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - port: http-metrics-dnsmasq + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + - port: http-metrics-skydns + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + +# Component scraping coreDns. Use either this or kubeDns +coreDns: + # -- Enabled CoreDNS metrics scraping + enabled: true + service: + # -- Create service for CoreDNS metrics + enabled: true + # -- CoreDNS service port + port: 9153 + # -- CoreDNS service target port + targetPort: 9153 + # -- CoreDNS service pod selector + selector: + k8s-app: kube-dns + + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - port: http-metrics + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + +# Component scraping etcd +kubeEtcd: + # -- Enabled KubeETCD metrics scraping + enabled: true + + # -- If your etcd is not deployed as a pod, specify IPs it can be found on + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + # Etcd service. If using kubeEtcd.endpoints only the port and targetPort are used + service: + # -- Enable service for ETCD metrics scraping + enabled: true + # -- ETCD service port + port: 2379 + # -- ETCD service target port + targetPort: 2379 + # -- ETCD service pods selector + selector: + component: etcd + + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + +# Component scraping kube scheduler +kubeScheduler: + # -- Enable KubeScheduler metrics scraping + enabled: false + + # -- If your kube scheduler is not deployed as a pod, specify IPs it can be found on + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + # If using kubeScheduler.endpoints only the port and targetPort are used + service: + # -- Enable service for KubeScheduler metrics scrape + enabled: true + # -- KubeScheduler service port + port: 10259 + # -- KubeScheduler service target port + targetPort: 10259 + # -- KubeScheduler service pod selector + selector: + component: kube-scheduler + + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + +# Component scraping kube proxy +kubeProxy: + # -- Enable kube proxy metrics scraping + enabled: false + + # -- If your kube proxy is not deployed as a pod, specify IPs it can be found on + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + service: + # -- Enable service for kube proxy metrics scraping + enabled: true + # -- Kube proxy service port + port: 10249 + # -- Kube proxy service target port + targetPort: 10249 + # -- Kube proxy service pod selector + selector: + k8s-app: kube-proxy + + # -- Spec for VMServiceScrape CRD is [here](https://docs.victoriametrics.com/operator/api.html#vmservicescrapespec) + vmScrape: + spec: + jobLabel: jobLabel + namespaceSelector: + matchNames: [kube-system] + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + # bearerTokenSecret: + # key: "" + port: http-metrics + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + +# -- Add extra objects dynamically to this chart +extraObjects: [] + diff --git a/otc/benchmark.t09.de/stacks/otc/cert-manager/manifests/clusterissuer.yaml b/otc/benchmark.t09.de/stacks/otc/cert-manager/manifests/clusterissuer.yaml new file mode 100644 index 0000000..73d0b7f --- /dev/null +++ b/otc/benchmark.t09.de/stacks/otc/cert-manager/manifests/clusterissuer.yaml @@ -0,0 +1,14 @@ +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: main +spec: + acme: + email: admin@think-ahead.tech + server: https://acme-v02.api.letsencrypt.org/directory + privateKeySecretRef: + name: cluster-issuer-account-key + solvers: + - http01: + ingress: + ingressClassName: nginx diff --git a/otc/benchmark.t09.de/stacks/otc/cert-manager/values.yaml b/otc/benchmark.t09.de/stacks/otc/cert-manager/values.yaml new file mode 100644 index 0000000..a0b2211 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/otc/cert-manager/values.yaml @@ -0,0 +1,4 @@ +crds: + enabled: true + +replicaCount: 1 diff --git a/otc/benchmark.t09.de/stacks/otc/cert-manger.yaml b/otc/benchmark.t09.de/stacks/otc/cert-manger.yaml new file mode 100644 index 0000000..2c93d4c --- /dev/null +++ b/otc/benchmark.t09.de/stacks/otc/cert-manger.yaml @@ -0,0 +1,32 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: cert-manager + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: cert-manager + sources: + - chart: cert-manager + repoURL: https://charts.jetstack.io + targetRevision: v1.17.2 + helm: + valueFiles: + - $values/otc/benchmark.t09.de/stacks/otc/cert-manager/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/benchmark.t09.de/stacks/otc/cert-manager/manifests" diff --git a/otc/benchmark.t09.de/stacks/otc/ingress-nginx.yaml b/otc/benchmark.t09.de/stacks/otc/ingress-nginx.yaml new file mode 100644 index 0000000..33d6d7b --- /dev/null +++ b/otc/benchmark.t09.de/stacks/otc/ingress-nginx.yaml @@ -0,0 +1,29 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: ingress-nginx + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: ingress-nginx + sources: + - repoURL: https://github.com/kubernetes/ingress-nginx.git + path: charts/ingress-nginx + targetRevision: helm-chart-4.12.1 + helm: + valueFiles: + - $values/otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values diff --git a/otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml b/otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml new file mode 100644 index 0000000..038cbc5 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml @@ -0,0 +1,31 @@ +controller: + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + + service: + annotations: + kubernetes.io/elb.class: union + kubernetes.io/elb.port: '80' + kubernetes.io/elb.id: 5ee936a2-6308-4924-9fdf-0256cbdf3baa + kubernetes.io/elb.ip: 80.158.90.69 + + ingressClassResource: + name: nginx + + # added for idpbuilder + allowSnippetAnnotations: true + + # added for idpbuilder + config: + proxy-buffer-size: 32k + use-forwarded-headers: "true" + + # monitoring nginx + metrics: + enabled: true + serviceMonitor: + additionalLabels: + release: "ingress-nginx" + enabled: true diff --git a/otc/benchmark.t09.de/stacks/otc/storageclass.yaml b/otc/benchmark.t09.de/stacks/otc/storageclass.yaml new file mode 100644 index 0000000..bf46764 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/otc/storageclass.yaml @@ -0,0 +1,25 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: storageclass + namespace: argocd + labels: + example: otc + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + namespace: default + server: "https://kubernetes.default.svc" + source: + repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/benchmark.t09.de/stacks/otc/storageclass" + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 diff --git a/otc/benchmark.t09.de/stacks/otc/storageclass/storageclass.yaml b/otc/benchmark.t09.de/stacks/otc/storageclass/storageclass.yaml new file mode 100644 index 0000000..038bf24 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/otc/storageclass/storageclass.yaml @@ -0,0 +1,18 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + annotations: + storageclass.beta.kubernetes.io/is-default-class: "true" + labels: + kubernetes.io/cluster-service: "true" + name: default +parameters: + kubernetes.io/description: "" + kubernetes.io/hw:passthrough: "true" + kubernetes.io/storagetype: BS + kubernetes.io/volumetype: SATA + kubernetes.io/zone: eu-de-02 +provisioner: flexvolume-huawei.com/fuxivol +reclaimPolicy: Delete +volumeBindingMode: Immediate +allowVolumeExpansion: true \ No newline at end of file diff --git a/otc/benchmark.t09.de/stacks/terralist/terralist.yaml b/otc/benchmark.t09.de/stacks/terralist/terralist.yaml new file mode 100644 index 0000000..83afc42 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/terralist/terralist.yaml @@ -0,0 +1,30 @@ +# helm upgrade --install --create-namespace --namespace terralist terralist oci://ghcr.io/terralist/helm-charts/terralist -f terralist-values.yaml +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: terralist + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: terralist + sources: + - repoURL: https://github.com/terralist/helm-charts + path: charts/terralist + targetRevision: terralist-0.8.1 + helm: + valueFiles: + - $values/otc/benchmark.t09.de/stacks/terralist/terralist/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values diff --git a/otc/benchmark.t09.de/stacks/terralist/terralist/values.yaml b/otc/benchmark.t09.de/stacks/terralist/terralist/values.yaml new file mode 100644 index 0000000..95af42f --- /dev/null +++ b/otc/benchmark.t09.de/stacks/terralist/terralist/values.yaml @@ -0,0 +1,87 @@ +controllers: + main: + strategy: Recreate + containers: + app: + env: + - name: TERRALIST_OAUTH_PROVIDER + value: oidc + - name: TERRALIST_OI_CLIENT_ID + valueFrom: + secretKeyRef: + name: oidc-credentials + key: client-id + - name: TERRALIST_OI_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: oidc-credentials + key: client-secret + - name: TERRALIST_OI_AUTHORIZE_URL + valueFrom: + secretKeyRef: + name: oidc-credentials + key: authorize-url + - name: TERRALIST_OI_TOKEN_URL + valueFrom: + secretKeyRef: + name: oidc-credentials + key: token-url + - name: TERRALIST_OI_USERINFO_URL + valueFrom: + secretKeyRef: + name: oidc-credentials + key: userinfo-url + - name: TERRALIST_OI_SCOPE + valueFrom: + secretKeyRef: + name: oidc-credentials + key: scope + - name: TERRALIST_TOKEN_SIGNING_SECRET + valueFrom: + secretKeyRef: + name: terralist-secret + key: token-signing-secret + - name: TERRALIST_COOKIE_SECRET + valueFrom: + secretKeyRef: + name: terralist-secret + key: cookie-secret + - name: TERRALIST_URL + value: https://terralist.benchmark.t09.de + - name: TERRALIST_SQLITE_PATH + value: /data/db.sqlite + - name: TERRALIST_LOCAL_STORE + value: /data/modules + - name: TERRALIST_PROVIDERS_ANONYMOUS_READ + value: "true" + +ingress: + main: + enabled: true + className: nginx + annotations: + cert-manager.io/cluster-issuer: main + hosts: + - host: terralist.benchmark.t09.de + paths: + - path: / + pathType: Prefix + service: + identifier: main + port: http + tls: + - hosts: + - terralist.benchmark.t09.de + secretName: terralist-tls-secret + +persistence: + data: + enabled: true + accessMode: ReadWriteOnce + size: 10Gi + retain: false + storageClass: "csi-disk" + annotations: + everest.io/disk-volume-type: GPSSD + globalMounts: + - path: /data From 75e4a2384b5e2eb414fc50ff34a52a3f1ba63495 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 18 May 2026 10:25:58 +0200 Subject: [PATCH 040/114] =?UTF-8?q?fix(ci-sizer):=20=F0=9F=90=9B=20align?= =?UTF-8?q?=20GARM=5FURL=20with=20template=20output?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use short service DNS (garm.garm.svc:80) instead of FQDN (garm.garm.svc.cluster.local:80) to match what the stack template now generates. Ref: IPCEICIS-6886 --- otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml index dd918d5..ccb6a86 100644 --- a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml @@ -40,7 +40,7 @@ spec: name: sizer-tokens key: hmac-key - name: GARM_URL - value: "http://garm.garm.svc.cluster.local:80" + value: "http://garm.garm.svc:80" - name: GARM_USER value: "admin" - name: GARM_PASSWORD From f2747ece688ce81025a382e30fe9f33fa126db1f Mon Sep 17 00:00:00 2001 From: Automated pipeline Date: Mon, 18 May 2026 10:02:58 +0000 Subject: [PATCH 041/114] Automated upload for benchmark.t09.de --- .../stacks/ci-sizer/gitlab-webhook.yaml | 29 ++++ .../ci-sizer/gitlab-webhook/certificates.yaml | 27 ++++ .../ci-sizer/gitlab-webhook/deployment.yaml | 141 ++++++++++++++++++ .../gitlab-webhook/webhook-config.yaml | 30 ++++ .../stacks/ci-sizer/sizer-receiver.yaml | 4 + .../ci-sizer/sizer-receiver/deployment.yaml | 4 +- .../manifests/forgejo-s3-backup-cronjob.yaml | 2 +- .../stacks/forgejo/forgejo-server/values.yaml | 6 +- otc/benchmark.t09.de/stacks/garm/garm.yaml | 4 + .../grafana-operator/manifests/grafana.yaml | 2 +- .../victoria-k8s-stack/manifests/vlogs.yaml | 2 +- .../victoria-k8s-stack/values.yaml | 2 +- .../stacks/otc/ingress-nginx/values.yaml | 4 +- 13 files changed, 246 insertions(+), 11 deletions(-) create mode 100644 otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook.yaml create mode 100644 otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook/certificates.yaml create mode 100644 otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook/deployment.yaml create mode 100644 otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook/webhook-config.yaml diff --git a/otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook.yaml b/otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook.yaml new file mode 100644 index 0000000..f876092 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook.yaml @@ -0,0 +1,29 @@ +# Optional: GitLab CI integration +# Only hydrate this app for clusters that run GitLab Runner. +# For Forgejo/GitHub-only deployments, omit this app from stacks-instances. +# See: ci-sizer/docs/deployment-modes.md +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: gitlab-sizer-webhook + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: ci-sizer + source: + repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook" diff --git a/otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook/certificates.yaml b/otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook/certificates.yaml new file mode 100644 index 0000000..ee1fece --- /dev/null +++ b/otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook/certificates.yaml @@ -0,0 +1,27 @@ +# Self-signed Issuer for webhook TLS. +# For production, replace with a ClusterIssuer backed by a real CA. +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: selfsigned-issuer +spec: + selfSigned: {} +--- +# cert-manager Certificate for the webhook TLS. +# The resulting Secret (gitlab-sizer-webhook-tls) is mounted into the webhook pod. +# cert-manager also injects the CA into the MutatingWebhookConfiguration via the +# cert-manager.io/inject-ca-from annotation. +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: gitlab-sizer-webhook-cert +spec: + secretName: gitlab-sizer-webhook-tls + issuerRef: + name: selfsigned-issuer + kind: Issuer + dnsNames: + - gitlab-sizer-webhook.ci-sizer.svc + - gitlab-sizer-webhook.ci-sizer.svc.cluster.local + duration: 8760h + renewBefore: 720h diff --git a/otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook/deployment.yaml b/otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook/deployment.yaml new file mode 100644 index 0000000..0b99859 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook/deployment.yaml @@ -0,0 +1,141 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: gitlab-sizer-webhook +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: gitlab-sizer-webhook +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: gitlab-sizer-webhook +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: gitlab-sizer-webhook +subjects: + - kind: ServiceAccount + name: gitlab-sizer-webhook + namespace: ci-sizer +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gitlab-sizer-webhook + labels: + app: gitlab-sizer-webhook +spec: + replicas: 2 + selector: + matchLabels: + app: gitlab-sizer-webhook + template: + metadata: + labels: + app: gitlab-sizer-webhook + spec: + serviceAccountName: gitlab-sizer-webhook + securityContext: + runAsNonRoot: true + runAsUser: 65534 + runAsGroup: 65534 + seccompProfile: + type: RuntimeDefault + containers: + - name: webhook + image: edp.buildth.ing/devfw-cicd/gitlab-webhook-edge-connect:latest + imagePullPolicy: Always + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + ports: + - containerPort: 8443 + protocol: TCP + args: + - --listen-addr=:8443 + - --tls-cert-file=/etc/webhook/tls/tls.crt + - --tls-key-file=/etc/webhook/tls/tls.key + - --sizer-url=http://sizer-receiver.ci-sizer.svc:8080 + - --sizer-sidecar-image=edp.buildth.ing/devfw-cicd/ci-sizer-collector:latest + env: + - name: WEBHOOK_SIZER_READ_TOKEN + valueFrom: + secretKeyRef: + name: gitlab-sizer-webhook-tokens + key: sizer-read-token + - name: WEBHOOK_SIZER_PUSH_TOKEN + valueFrom: + secretKeyRef: + name: gitlab-sizer-webhook-tokens + key: sizer-push-token + - name: HTTP_PROXY + valueFrom: + configMapKeyRef: + name: gitlab-sizer-webhook-config + key: HTTP_PROXY + optional: true + - name: HTTPS_PROXY + valueFrom: + configMapKeyRef: + name: gitlab-sizer-webhook-config + key: HTTPS_PROXY + optional: true + - name: NO_PROXY + valueFrom: + configMapKeyRef: + name: gitlab-sizer-webhook-config + key: NO_PROXY + optional: true + volumeMounts: + - name: webhook-tls + mountPath: /etc/webhook/tls + readOnly: true + livenessProbe: + httpGet: + path: /healthz + port: 8443 + scheme: HTTPS + initialDelaySeconds: 5 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /healthz + port: 8443 + scheme: HTTPS + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 128Mi + volumes: + - name: webhook-tls + secret: + secretName: gitlab-sizer-webhook-tls +--- +apiVersion: v1 +kind: Service +metadata: + name: gitlab-sizer-webhook + labels: + app: gitlab-sizer-webhook +spec: + selector: + app: gitlab-sizer-webhook + ports: + - port: 443 + targetPort: 8443 + protocol: TCP diff --git a/otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook/webhook-config.yaml b/otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook/webhook-config.yaml new file mode 100644 index 0000000..72aea4a --- /dev/null +++ b/otc/benchmark.t09.de/stacks/ci-sizer/gitlab-webhook/webhook-config.yaml @@ -0,0 +1,30 @@ +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: gitlab-sizer-webhook + annotations: + cert-manager.io/inject-ca-from: ci-sizer/gitlab-sizer-webhook-cert +webhooks: + - name: gitlab-sizer-webhook.ci-sizer.svc + admissionReviewVersions: ["v1"] + sideEffects: NoneOnDryRun + failurePolicy: Ignore + timeoutSeconds: 5 + reinvocationPolicy: Never + clientConfig: + service: + name: gitlab-sizer-webhook + namespace: ci-sizer + path: /mutate + rules: + - apiGroups: [""] + apiVersions: ["v1"] + operations: ["CREATE"] + resources: ["pods"] + namespaceSelector: + matchLabels: + ci-sizer.devfw.io/watch: "true" + objectSelector: + matchExpressions: + - key: job.runner.gitlab.com/pod + operator: Exists diff --git a/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver.yaml b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver.yaml index aeb18c9..a1623f9 100644 --- a/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver.yaml +++ b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver.yaml @@ -1,3 +1,7 @@ +# Required: CI Sizer receiver +# Always deploy this — it stores metrics and computes sizing recommendations. +# Works standalone or with GARM (Forgejo/GitHub) and/or GitLab webhook. +# See: ci-sizer/docs/deployment-modes.md apiVersion: argoproj.io/v1alpha1 kind: Application metadata: diff --git a/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml index 7e9261b..fc78147 100644 --- a/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml +++ b/otc/benchmark.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml @@ -40,7 +40,7 @@ spec: name: sizer-tokens key: hmac-key - name: GARM_URL - value: "http://garm.garm.svc.cluster.local:80" + value: "http://garm.garm.svc:80" - name: GARM_USER value: "admin" - name: GARM_PASSWORD @@ -62,7 +62,7 @@ spec: - name: RECEIVER_SESSION_TTL value: "12h" - name: RECEIVER_ALLOWED_ORG - value: "DevFW-CICD" + value: "giteaAdmin" - name: RECEIVER_CPU_SIZING_MODE value: "observe" - name: RECEIVER_MEMORY_QOS diff --git a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml index 18762aa..aa8324a 100644 --- a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml +++ b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml @@ -65,7 +65,7 @@ metadata: namespace: gitea annotations: everest.io/disk-volume-type: GPSSD - everest.io/crypt-key-id: 71ebef9e-5575-4b05-a597-ee1f67c911e3 + everest.io/crypt-key-id: fc9a8e53-1853-4903-b500-7a67dd1a8566 spec: storageClassName: csi-disk accessModes: diff --git a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml index 8a18a98..69a3213 100644 --- a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml +++ b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml @@ -20,7 +20,7 @@ persistence: size: 200Gi storageClass: csi-disk annotations: - everest.io/crypt-key-id: 71ebef9e-5575-4b05-a597-ee1f67c911e3 + everest.io/crypt-key-id: fc9a8e53-1853-4903-b500-7a67dd1a8566 everest.io/disk-volume-type: GPSSD test: @@ -170,7 +170,7 @@ service: nodePort: 32222 externalTrafficPolicy: Cluster annotations: - kubernetes.io/elb.id: 5ee936a2-6308-4924-9fdf-0256cbdf3baa + kubernetes.io/elb.id: 1fb3ccb7-ae1c-4787-a743-6a620978ec8d image: pullPolicy: "IfNotPresent" @@ -178,6 +178,6 @@ image: #tag: "8.0.3" # Adds -rootless suffix to image name # rootless: true - fullOverride: edp.buildth.ing/devfw-cicd/edp-forgejo:14.0.2-edp1-rootless + fullOverride: edp.buildth.ing/devfw-cicd/edp-forgejo:workflow-webhook-20260305 forgejo: {} diff --git a/otc/benchmark.t09.de/stacks/garm/garm.yaml b/otc/benchmark.t09.de/stacks/garm/garm.yaml index fee3847..e7102d4 100644 --- a/otc/benchmark.t09.de/stacks/garm/garm.yaml +++ b/otc/benchmark.t09.de/stacks/garm/garm.yaml @@ -1,3 +1,7 @@ +# Default: Forgejo/GitHub Actions runner manager +# Deploys GARM with the ci-sizer provider for automatic sizing + collector injection. +# For GitLab-only deployments, omit this and use gitlab-webhook instead. +# See: ci-sizer/docs/deployment-modes.md apiVersion: argoproj.io/v1alpha1 kind: Application metadata: diff --git a/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml b/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml index 199a104..2fcd4bf 100644 --- a/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml +++ b/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml @@ -9,7 +9,7 @@ spec: metadata: annotations: everest.io/disk-volume-type: GPSSD - everest.io/crypt-key-id: 71ebef9e-5575-4b05-a597-ee1f67c911e3 + everest.io/crypt-key-id: fc9a8e53-1853-4903-b500-7a67dd1a8566 spec: storageClassName: csi-disk accessModes: diff --git a/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml index a23bc0c..c771b52 100644 --- a/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml +++ b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml @@ -8,7 +8,7 @@ spec: removePvcAfterDelete: true storageMetadata: annotations: - everest.io/crypt-key-id: 71ebef9e-5575-4b05-a597-ee1f67c911e3 + everest.io/crypt-key-id: fc9a8e53-1853-4903-b500-7a67dd1a8566 everest.io/disk-volume-type: GPSSD storage: storageClassName: csi-disk diff --git a/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/values.yaml b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/values.yaml index b6565f0..8c61b03 100644 --- a/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/values.yaml +++ b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/values.yaml @@ -288,7 +288,7 @@ vmsingle: extraArgs: {} storageMetadata: annotations: - everest.io/crypt-key-id: 71ebef9e-5575-4b05-a597-ee1f67c911e3 + everest.io/crypt-key-id: fc9a8e53-1853-4903-b500-7a67dd1a8566 everest.io/disk-volume-type: GPSSD storage: storageClassName: csi-disk diff --git a/otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml b/otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml index 038cbc5..7c4d780 100644 --- a/otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml +++ b/otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml @@ -8,8 +8,8 @@ controller: annotations: kubernetes.io/elb.class: union kubernetes.io/elb.port: '80' - kubernetes.io/elb.id: 5ee936a2-6308-4924-9fdf-0256cbdf3baa - kubernetes.io/elb.ip: 80.158.90.69 + kubernetes.io/elb.id: 1fb3ccb7-ae1c-4787-a743-6a620978ec8d + kubernetes.io/elb.ip: 164.30.4.5 ingressClassResource: name: nginx From 7e1b0418f6fd43f462e4d95c71f0b3f98ec4f7e7 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 18 May 2026 12:21:27 +0200 Subject: [PATCH 042/114] feat(benchmark): add ci-sizer registry app for benchmark.t09.de --- otc/benchmark.t09.de/registry/ci-sizer.yaml | 24 +++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 otc/benchmark.t09.de/registry/ci-sizer.yaml diff --git a/otc/benchmark.t09.de/registry/ci-sizer.yaml b/otc/benchmark.t09.de/registry/ci-sizer.yaml new file mode 100644 index 0000000..953c8c1 --- /dev/null +++ b/otc/benchmark.t09.de/registry/ci-sizer.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: ci-sizer-reg + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + name: in-cluster + namespace: argocd + source: + path: "otc/benchmark.t09.de/stacks/ci-sizer" + repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" + targetRevision: HEAD + project: default + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true From 046679e355600a787d704c1d0235c423aecb9333 Mon Sep 17 00:00:00 2001 From: Automated pipeline Date: Mon, 18 May 2026 10:29:51 +0000 Subject: [PATCH 043/114] Automated upload for benchmark.t09.de --- otc/benchmark.t09.de/registry/ci-sizer.yaml | 24 --------------------- 1 file changed, 24 deletions(-) delete mode 100644 otc/benchmark.t09.de/registry/ci-sizer.yaml diff --git a/otc/benchmark.t09.de/registry/ci-sizer.yaml b/otc/benchmark.t09.de/registry/ci-sizer.yaml deleted file mode 100644 index 953c8c1..0000000 --- a/otc/benchmark.t09.de/registry/ci-sizer.yaml +++ /dev/null @@ -1,24 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: ci-sizer-reg - namespace: argocd - labels: - env: dev - finalizers: - - resources-finalizer.argocd.argoproj.io -spec: - destination: - name: in-cluster - namespace: argocd - source: - path: "otc/benchmark.t09.de/stacks/ci-sizer" - repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" - targetRevision: HEAD - project: default - syncPolicy: - automated: - prune: true - selfHeal: true - syncOptions: - - CreateNamespace=true From 27475f9cf30fa39abba3fd236b8cc0a215856110 Mon Sep 17 00:00:00 2001 From: Automated pipeline Date: Mon, 18 May 2026 14:04:23 +0000 Subject: [PATCH 044/114] Automated upload for benchmark.t09.de --- .../forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml | 2 +- .../stacks/forgejo/forgejo-server/values.yaml | 4 ++-- .../observability/grafana-operator/manifests/grafana.yaml | 2 +- .../observability/victoria-k8s-stack/manifests/vlogs.yaml | 2 +- .../stacks/observability/victoria-k8s-stack/values.yaml | 2 +- otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml index aa8324a..ed54cb0 100644 --- a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml +++ b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml @@ -65,7 +65,7 @@ metadata: namespace: gitea annotations: everest.io/disk-volume-type: GPSSD - everest.io/crypt-key-id: fc9a8e53-1853-4903-b500-7a67dd1a8566 + everest.io/crypt-key-id: ac5a45e8-c705-445e-8026-e643e3f2525d spec: storageClassName: csi-disk accessModes: diff --git a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml index 69a3213..fc1836b 100644 --- a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml +++ b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml @@ -20,7 +20,7 @@ persistence: size: 200Gi storageClass: csi-disk annotations: - everest.io/crypt-key-id: fc9a8e53-1853-4903-b500-7a67dd1a8566 + everest.io/crypt-key-id: ac5a45e8-c705-445e-8026-e643e3f2525d everest.io/disk-volume-type: GPSSD test: @@ -170,7 +170,7 @@ service: nodePort: 32222 externalTrafficPolicy: Cluster annotations: - kubernetes.io/elb.id: 1fb3ccb7-ae1c-4787-a743-6a620978ec8d + kubernetes.io/elb.id: db60c1a9-312c-42b7-847b-781d950a0e7a image: pullPolicy: "IfNotPresent" diff --git a/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml b/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml index 2fcd4bf..0989872 100644 --- a/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml +++ b/otc/benchmark.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml @@ -9,7 +9,7 @@ spec: metadata: annotations: everest.io/disk-volume-type: GPSSD - everest.io/crypt-key-id: fc9a8e53-1853-4903-b500-7a67dd1a8566 + everest.io/crypt-key-id: ac5a45e8-c705-445e-8026-e643e3f2525d spec: storageClassName: csi-disk accessModes: diff --git a/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml index c771b52..2247375 100644 --- a/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml +++ b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml @@ -8,7 +8,7 @@ spec: removePvcAfterDelete: true storageMetadata: annotations: - everest.io/crypt-key-id: fc9a8e53-1853-4903-b500-7a67dd1a8566 + everest.io/crypt-key-id: ac5a45e8-c705-445e-8026-e643e3f2525d everest.io/disk-volume-type: GPSSD storage: storageClassName: csi-disk diff --git a/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/values.yaml b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/values.yaml index 8c61b03..999f596 100644 --- a/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/values.yaml +++ b/otc/benchmark.t09.de/stacks/observability/victoria-k8s-stack/values.yaml @@ -288,7 +288,7 @@ vmsingle: extraArgs: {} storageMetadata: annotations: - everest.io/crypt-key-id: fc9a8e53-1853-4903-b500-7a67dd1a8566 + everest.io/crypt-key-id: ac5a45e8-c705-445e-8026-e643e3f2525d everest.io/disk-volume-type: GPSSD storage: storageClassName: csi-disk diff --git a/otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml b/otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml index 7c4d780..ec2d3aa 100644 --- a/otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml +++ b/otc/benchmark.t09.de/stacks/otc/ingress-nginx/values.yaml @@ -8,8 +8,8 @@ controller: annotations: kubernetes.io/elb.class: union kubernetes.io/elb.port: '80' - kubernetes.io/elb.id: 1fb3ccb7-ae1c-4787-a743-6a620978ec8d - kubernetes.io/elb.ip: 164.30.4.5 + kubernetes.io/elb.id: db60c1a9-312c-42b7-847b-781d950a0e7a + kubernetes.io/elb.ip: 164.30.20.78 ingressClassResource: name: nginx From f12daac048fb27160013d45fa914de3dfd9ee534 Mon Sep 17 00:00:00 2001 From: Automated pipeline Date: Mon, 18 May 2026 14:32:18 +0000 Subject: [PATCH 045/114] Automated upload for benchmark.t09.de --- otc/benchmark.t09.de/registry/ci-sizer.yaml | 24 +++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 otc/benchmark.t09.de/registry/ci-sizer.yaml diff --git a/otc/benchmark.t09.de/registry/ci-sizer.yaml b/otc/benchmark.t09.de/registry/ci-sizer.yaml new file mode 100644 index 0000000..953c8c1 --- /dev/null +++ b/otc/benchmark.t09.de/registry/ci-sizer.yaml @@ -0,0 +1,24 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: ci-sizer-reg + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + name: in-cluster + namespace: argocd + source: + path: "otc/benchmark.t09.de/stacks/ci-sizer" + repoURL: "https://edp.buildth.ing/DevFW-CICD/stacks-instances" + targetRevision: HEAD + project: default + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true From 3c8850d2e20a0ea004601f053c985e541109dd7e Mon Sep 17 00:00:00 2001 From: Automated pipeline Date: Mon, 18 May 2026 15:20:18 +0000 Subject: [PATCH 046/114] Automated upload for benchmark.t09.de --- otc/benchmark.t09.de/stacks/core/dex/values.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/otc/benchmark.t09.de/stacks/core/dex/values.yaml b/otc/benchmark.t09.de/stacks/core/dex/values.yaml index 6da315e..76b8450 100644 --- a/otc/benchmark.t09.de/stacks/core/dex/values.yaml +++ b/otc/benchmark.t09.de/stacks/core/dex/values.yaml @@ -34,6 +34,11 @@ envVars: secretKeyRef: name: dex-argo-client key: clientSecret + - name: FORGEJO_RUNNER_SIZER_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: dex-sizer-client + key: clientSecret - name: LOG_LEVEL value: debug @@ -74,3 +79,8 @@ config: - "https://grafana.benchmark.t09.de/login/generic_oauth" name: "Grafana" secretEnv: "OIDC_DEX_GRAFANA_CLIENT_SECRET" + - id: ci-sizer + name: "CI Sizer" + redirectURIs: + - "https://sizer.benchmark.t09.de/ui/callback" + secretEnv: "FORGEJO_RUNNER_SIZER_CLIENT_SECRET" From 732a27d5f1db55cad2f266d693b10365a09190c0 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Mon, 18 May 2026 17:22:57 +0200 Subject: [PATCH 047/114] fix(benchmark): disable 2FA requirement for benchmark cluster --- otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml index fc1836b..df16dee 100644 --- a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml +++ b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/values.yaml @@ -137,9 +137,6 @@ gitea: ENABLED: true ADAPTER: redis - security: - GLOBAL_TWO_FACTOR_REQUIREMENT: admin - service: DISABLE_REGISTRATION: true ENABLE_NOTIFY_MAIL: true From c927cbd0dc9c24b0c811e96ea1c4e491a9fe92e4 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 19 May 2026 09:54:48 +0200 Subject: [PATCH 048/114] bump garm-helm to v0.0.16 for benchmark --- otc/benchmark.t09.de/stacks/garm/garm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/benchmark.t09.de/stacks/garm/garm.yaml b/otc/benchmark.t09.de/stacks/garm/garm.yaml index e7102d4..820c8b2 100644 --- a/otc/benchmark.t09.de/stacks/garm/garm.yaml +++ b/otc/benchmark.t09.de/stacks/garm/garm.yaml @@ -24,7 +24,7 @@ spec: sources: - repoURL: https://edp.buildth.ing/DevFW-CICD/garm-helm path: charts/garm - targetRevision: v0.0.15 + targetRevision: v0.0.16 helm: valueFiles: - $values/otc/benchmark.t09.de/stacks/garm/garm/values.yaml From a7bc25603c0ce1c25fa7bf3169731f0e393ceed0 Mon Sep 17 00:00:00 2001 From: Patrick Sy Date: Tue, 19 May 2026 14:01:18 +0200 Subject: [PATCH 049/114] Added DevFW-CICD users as admins --- otc/dev.t09.de/stacks/core/argocd/values.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/core/argocd/values.yaml b/otc/dev.t09.de/stacks/core/argocd/values.yaml index dd5b83d..53cac97 100644 --- a/otc/dev.t09.de/stacks/core/argocd/values.yaml +++ b/otc/dev.t09.de/stacks/core/argocd/values.yaml @@ -30,7 +30,9 @@ configs: - "*" url: https://argocd.dev.t09.de rbac: - policy.csv: 'g, DevFW, role:admin' + policy.csv: | + g, DevFW, role:admin + g, DevFW-CICD, role:admin tls: certificates: From 1686764b39d8fdf9a42e98e251885d5205bb98d7 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Mon, 1 Jun 2026 12:57:53 +0100 Subject: [PATCH 050/114] Upgrade grafana-operator to v5.23.0 and enable useKubeAuth --- .../stacks/observability/grafana-operator.yaml | 2 +- .../observability/grafana-operator/manifests/grafana.yaml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator.yaml index 69b4f6b..85a4837 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator.yaml @@ -19,7 +19,7 @@ spec: sources: - chart: grafana-operator repoURL: ghcr.io/grafana/helm-charts - targetRevision: v5.18.0 + targetRevision: v5.23.0 - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances targetRevision: HEAD path: "otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests" diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/grafana.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/grafana.yaml index f51be7b..04191f9 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/grafana.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/grafana.yaml @@ -5,6 +5,8 @@ metadata: labels: dashboards: "grafana" spec: + client: + useKubeAuth: true persistentVolumeClaim: metadata: annotations: From 3b31475552c13d2e4caf3a6c9afc0b83cf73bbf1 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Mon, 1 Jun 2026 13:02:49 +0100 Subject: [PATCH 051/114] Fix grafana-operator chart version tag (no v prefix) --- .../stacks/observability/grafana-operator.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator.yaml index 85a4837..4a27771 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator.yaml @@ -19,7 +19,7 @@ spec: sources: - chart: grafana-operator repoURL: ghcr.io/grafana/helm-charts - targetRevision: v5.23.0 + targetRevision: 5.23.0 - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances targetRevision: HEAD path: "otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests" From 32fd6ffd541988752bb6bd174a67c7b8b3ccc2eb Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Mon, 1 Jun 2026 13:06:41 +0100 Subject: [PATCH 052/114] Remove useKubeAuth temporarily to unblock operator upgrade From e89d48c2a54c270f82d1a6aae1156e63e7514b49 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Mon, 1 Jun 2026 13:16:37 +0100 Subject: [PATCH 053/114] Upgrade Grafana to 12.4.0 and add auth.jwt config for useKubeAuth --- .../grafana-operator/manifests/grafana.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/grafana.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/grafana.yaml index 04191f9..8e186c2 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/grafana.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/grafana.yaml @@ -5,6 +5,7 @@ metadata: labels: dashboards: "grafana" spec: + version: "12.4.0" client: useKubeAuth: true persistentVolumeClaim: @@ -39,6 +40,18 @@ spec: auth: disable_login: "true" disable_login_form: "true" + auth.jwt: + enabled: "true" + header_name: Authorization + username_claim: sub + email_claim: sub + auto_sign_up: "true" + role_attribute_strict: "true" + role_attribute_path: "contains(sub, 'system:serviceaccount:observability:grafana-operator') && 'GrafanaAdmin' || 'None'" + jwk_set_url: "https://kubernetes.default.svc:443/openid/v1/jwks" + jwk_set_bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + tls_client_ca: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + expect_claims: '{"aud": ["operator.grafana.com"]}' auth.generic_oauth: enabled: "true" name: Forgejo From 3212016398112ccfede14c71243086971accb859 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Mon, 1 Jun 2026 16:47:24 +0100 Subject: [PATCH 054/114] fix(vector): use in-cluster endpoint for VictoriaLogs log shipping --- .../stacks/observability-client/vector/values.yaml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/otc/observability.buildth.ing/stacks/observability-client/vector/values.yaml b/otc/observability.buildth.ing/stacks/observability-client/vector/values.yaml index 4905c71..44e042b 100644 --- a/otc/observability.buildth.ing/stacks/observability-client/vector/values.yaml +++ b/otc/observability.buildth.ing/stacks/observability-client/vector/values.yaml @@ -47,12 +47,8 @@ customConfig: vlogs: type: elasticsearch inputs: [parser] - endpoints: - - https://o12y.observability./insert/elasticsearch/ - auth: - strategy: basic - user: ${VECTOR_USER} - password: ${VECTOR_PASSWORD} + endpoints: + - http://vlogs-victorialogs.observability.svc:9428/insert/elasticsearch/ mode: bulk api_version: v8 compression: gzip From da0ccbd1b54bd847ea2e5a933658dbc566a33dfd Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Mon, 1 Jun 2026 16:47:31 +0100 Subject: [PATCH 055/114] fix(observability): enable ArgoCD/CoreDNS scraping, add cluster label, fix node dashboard --- .../stacks/core/argocd/values.yaml | 24 +++++++++++++++++++ .../manifests/argocd-scrape.yaml | 13 ++++++++++ .../manifests/coredns-scrape.yaml | 14 +++++++++++ .../victoria-k8s-stack/values.yaml | 11 +++++---- 4 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/argocd-scrape.yaml create mode 100644 otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/coredns-scrape.yaml diff --git a/otc/observability.buildth.ing/stacks/core/argocd/values.yaml b/otc/observability.buildth.ing/stacks/core/argocd/values.yaml index 04b0f2b..182d8ca 100644 --- a/otc/observability.buildth.ing/stacks/core/argocd/values.yaml +++ b/otc/observability.buildth.ing/stacks/core/argocd/values.yaml @@ -35,6 +35,30 @@ configs: tls: certificates: +controller: + metrics: + enabled: true + serviceMonitor: + enabled: false + +server: + metrics: + enabled: true + serviceMonitor: + enabled: false + +repoServer: + metrics: + enabled: true + serviceMonitor: + enabled: false + +applicationSet: + metrics: + enabled: true + serviceMonitor: + enabled: false + notifications: enabled: false diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/argocd-scrape.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/argocd-scrape.yaml new file mode 100644 index 0000000..d72e88d --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/argocd-scrape.yaml @@ -0,0 +1,13 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + name: argocd +spec: + namespaceSelector: + matchNames: + - argocd + selector: + matchLabels: + app.kubernetes.io/part-of: argocd + endpoints: + - port: metrics diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/coredns-scrape.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/coredns-scrape.yaml new file mode 100644 index 0000000..c7e379e --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/coredns-scrape.yaml @@ -0,0 +1,14 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMPodScrape +metadata: + name: coredns +spec: + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + k8s-app: coredns + podMetricsEndpoints: + - targetPort: 9153 + path: /metrics diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml index cdb96a9..9b6aba9 100644 --- a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml @@ -711,10 +711,8 @@ vmagent: port: "8429" selectAllByDefault: true scrapeInterval: 20s - externalLabels: {} - # For multi-cluster setups it is useful to use "cluster" label to identify the metrics source. - # For example: - # cluster: cluster-name + externalLabels: + cluster: observability extraArgs: promscrape.streamParse: "true" # Do not store original labels in vmagent's memory by default. This reduces the amount of memory used by vmagent @@ -921,6 +919,11 @@ prometheus-node-exporter: - action: drop source_labels: [mountpoint] regex: "/var/lib/kubelet/pods.+" + - action: replace + source_labels: [__name__, instance] + regex: "node_uname_info;([^:]+):.+" + target_label: nodename + replacement: "$1" # -- kube-state-metrics dependency chart configuration. For possible values check [here](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-state-metrics/values.yaml) kube-state-metrics: enabled: true From 342870fa033e0531f25bb48498690ef840e5e879 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 09:30:24 +0100 Subject: [PATCH 056/114] fix(vm-client): add cluster external label for dashboard variable resolution --- .../stacks/observability-client/vm-client-stack/values.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml b/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml index 7bd29da..723d36c 100644 --- a/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml +++ b/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml @@ -791,11 +791,9 @@ vmagent: port: "8429" selectAllByDefault: true scrapeInterval: 20s - externalLabels: + externalLabels: cluster_environment: "observability" - # For multi-cluster setups it is useful to use "cluster" label to identify the metrics source. - # For example: - # cluster: cluster-name + cluster: observability extraArgs: promscrape.streamParse: "true" # Do not store original labels in vmagent's memory by default. This reduces the amount of memory used by vmagent From 07d08e58399305dfee6843251fa0bdc5859e1554 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 09:50:04 +0100 Subject: [PATCH 057/114] upgrade chart versions: argocd, dex, cloudnative-pg, cert-manager, ingress-nginx, vector, metrics-server --- otc/observability.buildth.ing/stacks/core/argocd.yaml | 2 +- otc/observability.buildth.ing/stacks/core/cloudnative-pg.yaml | 2 +- otc/observability.buildth.ing/stacks/core/dex.yaml | 2 +- .../stacks/observability-client/metrics-server.yaml | 2 +- .../stacks/observability-client/vector.yaml | 2 +- otc/observability.buildth.ing/stacks/otc/cert-manger.yaml | 2 +- otc/observability.buildth.ing/stacks/otc/ingress-nginx.yaml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/otc/observability.buildth.ing/stacks/core/argocd.yaml b/otc/observability.buildth.ing/stacks/core/argocd.yaml index 57fe99b..55729e1 100644 --- a/otc/observability.buildth.ing/stacks/core/argocd.yaml +++ b/otc/observability.buildth.ing/stacks/core/argocd.yaml @@ -23,7 +23,7 @@ spec: # TODO: RIRE Can be updated when https://github.com/argoproj/argo-cd/issues/20790 is fixed and merged # As logout make problems, it is suggested to switch from path based routing to an own argocd domain, # similar to the CNOE amazon reference implementation and in our case, Forgejo - targetRevision: argo-cd-9.4.6 + targetRevision: argo-cd-9.5.17 helm: valueFiles: - $values/otc/observability.buildth.ing/stacks/core/argocd/values.yaml diff --git a/otc/observability.buildth.ing/stacks/core/cloudnative-pg.yaml b/otc/observability.buildth.ing/stacks/core/cloudnative-pg.yaml index 0642dd0..be7ebcc 100644 --- a/otc/observability.buildth.ing/stacks/core/cloudnative-pg.yaml +++ b/otc/observability.buildth.ing/stacks/core/cloudnative-pg.yaml @@ -21,7 +21,7 @@ spec: sources: - repoURL: https://cloudnative-pg.github.io/charts chart: cloudnative-pg - targetRevision: 0.26.1 + targetRevision: 0.28.2 helm: valueFiles: - $values/otc/observability.buildth.ing/stacks/core/cloudnative-pg/values.yaml diff --git a/otc/observability.buildth.ing/stacks/core/dex.yaml b/otc/observability.buildth.ing/stacks/core/dex.yaml index e24fe03..4e76f2d 100644 --- a/otc/observability.buildth.ing/stacks/core/dex.yaml +++ b/otc/observability.buildth.ing/stacks/core/dex.yaml @@ -20,7 +20,7 @@ spec: sources: - repoURL: https://charts.dexidp.io chart: dex - targetRevision: 0.23.0 + targetRevision: 0.24.1 helm: valueFiles: - $values/otc/observability.buildth.ing/stacks/core/dex/values.yaml diff --git a/otc/observability.buildth.ing/stacks/observability-client/metrics-server.yaml b/otc/observability.buildth.ing/stacks/observability-client/metrics-server.yaml index 286ba67..80a405b 100644 --- a/otc/observability.buildth.ing/stacks/observability-client/metrics-server.yaml +++ b/otc/observability.buildth.ing/stacks/observability-client/metrics-server.yaml @@ -20,7 +20,7 @@ spec: sources: - chart: metrics-server repoURL: https://kubernetes-sigs.github.io/metrics-server/ - targetRevision: 3.12.2 + targetRevision: 3.13.1 helm: valueFiles: - $values/otc/observability.buildth.ing/stacks/observability-client/metrics-server/values.yaml diff --git a/otc/observability.buildth.ing/stacks/observability-client/vector.yaml b/otc/observability.buildth.ing/stacks/observability-client/vector.yaml index c66556e..ab888de 100644 --- a/otc/observability.buildth.ing/stacks/observability-client/vector.yaml +++ b/otc/observability.buildth.ing/stacks/observability-client/vector.yaml @@ -20,7 +20,7 @@ spec: sources: - chart: vector repoURL: https://helm.vector.dev - targetRevision: 0.43.0 + targetRevision: 0.52.0 helm: valueFiles: - $values/otc/observability.buildth.ing/stacks/observability-client/vector/values.yaml diff --git a/otc/observability.buildth.ing/stacks/otc/cert-manger.yaml b/otc/observability.buildth.ing/stacks/otc/cert-manger.yaml index 3ee7573..ea4125e 100644 --- a/otc/observability.buildth.ing/stacks/otc/cert-manger.yaml +++ b/otc/observability.buildth.ing/stacks/otc/cert-manger.yaml @@ -20,7 +20,7 @@ spec: sources: - chart: cert-manager repoURL: https://charts.jetstack.io - targetRevision: v1.17.2 + targetRevision: v1.19.5 helm: valueFiles: - $values/otc/observability.buildth.ing/stacks/otc/cert-manager/values.yaml diff --git a/otc/observability.buildth.ing/stacks/otc/ingress-nginx.yaml b/otc/observability.buildth.ing/stacks/otc/ingress-nginx.yaml index db06173..3ddf6a2 100644 --- a/otc/observability.buildth.ing/stacks/otc/ingress-nginx.yaml +++ b/otc/observability.buildth.ing/stacks/otc/ingress-nginx.yaml @@ -20,7 +20,7 @@ spec: sources: - repoURL: https://github.com/kubernetes/ingress-nginx.git path: charts/ingress-nginx - targetRevision: helm-chart-4.12.1 + targetRevision: helm-chart-4.15.1 helm: valueFiles: - $values/otc/observability.buildth.ing/stacks/otc/ingress-nginx/values.yaml From 07261b081e1a580c8ca1d341e77c692d6a6a8406 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 09:51:49 +0100 Subject: [PATCH 058/114] upgrade victoria-metrics-k8s-stack 0.48.1 -> 0.81.0 with values migration --- .../observability-client/vm-client-stack.yaml | 3 +- .../vm-client-stack/values.yaml | 84 +++++++++---------- .../observability/victoria-k8s-stack.yaml | 2 +- .../victoria-k8s-stack/values.yaml | 84 +++++++++---------- 4 files changed, 85 insertions(+), 88 deletions(-) diff --git a/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack.yaml b/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack.yaml index 673c087..ffbb931 100644 --- a/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack.yaml +++ b/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack.yaml @@ -12,13 +12,14 @@ spec: selfHeal: true syncOptions: - CreateNamespace=true + - ServerSideApply=true destination: name: in-cluster namespace: observability sources: - chart: victoria-metrics-k8s-stack repoURL: https://victoriametrics.github.io/helm-charts/ - targetRevision: 0.48.1 + targetRevision: 0.81.0 helm: valueFiles: - $values/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml diff --git a/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml b/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml index 723d36c..58d6f50 100644 --- a/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml +++ b/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml @@ -70,8 +70,8 @@ defaultDashboards: # -- Create default rules for monitoring the cluster defaultRules: # -- Labels, which are used for grouping results of the queries. Note that these labels are joined with `.Values.global.clusterLabel` - additionalGroupByLabels: [] - create: true + extraGroupByLabels: [] + enabled: true # -- Common properties for VMRule groups group: @@ -114,127 +114,127 @@ defaultRules: # -- Rule group properties groups: etcd: - create: true + enabled: true # -- Common properties for all rules in a group rules: {} # spec: # annotations: # dashboard: https://example.com/dashboard/1 general: - create: true + enabled: true rules: {} k8sContainerCpuLimits: - create: true + enabled: true rules: {} k8sContainerCpuRequests: - create: true + enabled: true rules: {} k8sContainerCpuUsageSecondsTotal: - create: true + enabled: true rules: {} k8sContainerMemoryLimits: - create: true + enabled: true rules: {} k8sContainerMemoryRequests: - create: true + enabled: true rules: {} k8sContainerMemoryRss: - create: true + enabled: true rules: {} k8sContainerMemoryCache: - create: true + enabled: true rules: {} k8sContainerMemoryWorkingSetBytes: - create: true + enabled: true rules: {} k8sContainerMemorySwap: - create: true + enabled: true rules: {} k8sPodOwner: - create: true + enabled: true rules: {} k8sContainerResource: - create: true + enabled: true rules: {} kubeApiserver: - create: true + enabled: true rules: {} kubeApiserverAvailability: - create: true + enabled: true rules: {} kubeApiserverBurnrate: - create: true + enabled: true rules: {} kubeApiserverHistogram: - create: true + enabled: true rules: {} kubeApiserverSlos: - create: true + enabled: true rules: {} kubelet: - create: true + enabled: true rules: {} kubePrometheusGeneral: - create: true + enabled: true rules: {} kubePrometheusNodeRecording: - create: true + enabled: true rules: {} kubernetesApps: - create: true + enabled: true rules: {} targetNamespace: ".*" kubernetesResources: - create: true + enabled: true rules: {} kubernetesStorage: - create: true + enabled: true rules: {} targetNamespace: ".*" kubernetesSystem: - create: true + enabled: true rules: {} kubernetesSystemKubelet: - create: true + enabled: true rules: {} kubernetesSystemApiserver: - create: true + enabled: true rules: {} kubernetesSystemControllerManager: - create: true + enabled: true rules: {} kubeScheduler: - create: true + enabled: true rules: {} kubernetesSystemScheduler: - create: true + enabled: true rules: {} kubeStateMetrics: - create: true + enabled: true rules: {} nodeNetwork: - create: true + enabled: true rules: {} node: - create: true + enabled: true rules: {} vmagent: - create: true + enabled: true rules: {} vmsingle: - create: true + enabled: true rules: {} vmcluster: - create: true + enabled: true rules: {} vmHealth: - create: true + enabled: true rules: {} vmoperator: - create: true + enabled: true rules: {} alertmanager: - create: true + enabled: true rules: {} # -- Runbook url prefix for default rules @@ -841,7 +841,6 @@ defaultDatasources: allowCrossNamespaceImport: false victoriametrics: # -- Create per replica prometheus compatible datasource - perReplica: false # -- List of prometheus compatible datasource configurations. # VM `url` will be added to each of them in templates. datasources: @@ -858,7 +857,6 @@ defaultDatasources: # Alertmanager generated `url` will be added to each datasource in template if alertmanager is enabled alertmanager: # -- Create per replica alertmanager compatible datasource - perReplica: false datasources: - name: Alertmanager access: proxy diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack.yaml index e38414f..a236b2c 100644 --- a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack.yaml +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack.yaml @@ -19,7 +19,7 @@ spec: sources: - chart: victoria-metrics-k8s-stack repoURL: https://victoriametrics.github.io/helm-charts/ - targetRevision: 0.48.1 + targetRevision: 0.81.0 helm: valueFiles: - $values/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml index 9b6aba9..bd22879 100644 --- a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml @@ -70,8 +70,8 @@ defaultDashboards: # -- Create default rules for monitoring the cluster defaultRules: # -- Labels, which are used for grouping results of the queries. Note that these labels are joined with `.Values.global.clusterLabel` - additionalGroupByLabels: [] - create: true + extraGroupByLabels: [] + enabled: true # -- Common properties for VMRule groups group: @@ -114,127 +114,127 @@ defaultRules: # -- Rule group properties groups: etcd: - create: true + enabled: true # -- Common properties for all rules in a group rules: {} # spec: # annotations: # dashboard: https://example.com/dashboard/1 general: - create: true + enabled: true rules: {} k8sContainerCpuLimits: - create: true + enabled: true rules: {} k8sContainerCpuRequests: - create: true + enabled: true rules: {} k8sContainerCpuUsageSecondsTotal: - create: true + enabled: true rules: {} k8sContainerMemoryLimits: - create: true + enabled: true rules: {} k8sContainerMemoryRequests: - create: true + enabled: true rules: {} k8sContainerMemoryRss: - create: true + enabled: true rules: {} k8sContainerMemoryCache: - create: true + enabled: true rules: {} k8sContainerMemoryWorkingSetBytes: - create: true + enabled: true rules: {} k8sContainerMemorySwap: - create: true + enabled: true rules: {} k8sPodOwner: - create: true + enabled: true rules: {} k8sContainerResource: - create: true + enabled: true rules: {} kubeApiserver: - create: true + enabled: true rules: {} kubeApiserverAvailability: - create: true + enabled: true rules: {} kubeApiserverBurnrate: - create: true + enabled: true rules: {} kubeApiserverHistogram: - create: true + enabled: true rules: {} kubeApiserverSlos: - create: true + enabled: true rules: {} kubelet: - create: true + enabled: true rules: {} kubePrometheusGeneral: - create: true + enabled: true rules: {} kubePrometheusNodeRecording: - create: true + enabled: true rules: {} kubernetesApps: - create: true + enabled: true rules: {} targetNamespace: ".*" kubernetesResources: - create: true + enabled: true rules: {} kubernetesStorage: - create: true + enabled: true rules: {} targetNamespace: ".*" kubernetesSystem: - create: true + enabled: true rules: {} kubernetesSystemKubelet: - create: true + enabled: true rules: {} kubernetesSystemApiserver: - create: true + enabled: true rules: {} kubernetesSystemControllerManager: - create: false + enabled: false rules: {} kubeScheduler: - create: false + enabled: false rules: {} kubernetesSystemScheduler: - create: false + enabled: false rules: {} kubeStateMetrics: - create: true + enabled: true rules: {} nodeNetwork: - create: true + enabled: true rules: {} node: - create: true + enabled: true rules: {} vmagent: - create: true + enabled: true rules: {} vmsingle: - create: true + enabled: true rules: {} vmcluster: - create: true + enabled: true rules: {} vmHealth: - create: true + enabled: true rules: {} vmoperator: - create: true + enabled: true rules: {} alertmanager: - create: true + enabled: true rules: {} # -- Runbook url prefix for default rules @@ -765,7 +765,6 @@ defaultDatasources: allowCrossNamespaceImport: false victoriametrics: # -- Create per replica prometheus compatible datasource - perReplica: false # -- List of prometheus compatible datasource configurations. # VM `url` will be added to each of them in templates. datasources: @@ -782,7 +781,6 @@ defaultDatasources: # Alertmanager generated `url` will be added to each datasource in template if alertmanager is enabled alertmanager: # -- Create per replica alertmanager compatible datasource - perReplica: false datasources: - name: Alertmanager access: proxy From d0b0c85cf84c467cd6980185567aef76b8a1b001 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 09:57:05 +0100 Subject: [PATCH 059/114] fix: add ServerSideApply for argocd CRDs, remove deprecated vector playground field --- otc/observability.buildth.ing/stacks/core/argocd.yaml | 1 + .../stacks/observability-client/vector/values.yaml | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/observability.buildth.ing/stacks/core/argocd.yaml b/otc/observability.buildth.ing/stacks/core/argocd.yaml index 55729e1..32f51a9 100644 --- a/otc/observability.buildth.ing/stacks/core/argocd.yaml +++ b/otc/observability.buildth.ing/stacks/core/argocd.yaml @@ -12,6 +12,7 @@ spec: selfHeal: true syncOptions: - CreateNamespace=true + - ServerSideApply=true retry: limit: -1 destination: diff --git a/otc/observability.buildth.ing/stacks/observability-client/vector/values.yaml b/otc/observability.buildth.ing/stacks/observability-client/vector/values.yaml index 44e042b..042df5e 100644 --- a/otc/observability.buildth.ing/stacks/observability-client/vector/values.yaml +++ b/otc/observability.buildth.ing/stacks/observability-client/vector/values.yaml @@ -28,7 +28,6 @@ customConfig: api: enabled: false address: 0.0.0.0:8686 - playground: true sources: k8s: type: kubernetes_logs From e95fa403e935ba92d3188e4ef1503e1772ff504c Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 11:56:11 +0100 Subject: [PATCH 060/114] fix(benchmark.t09.de/garm): wire sizer baseUrl + readToken so edge-connect-k8s provider actually applies sizer recommendations (was silently no-op) --- otc/benchmark.t09.de/stacks/garm/garm/values.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml index 3143f5d..4948cdf 100644 --- a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml +++ b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml @@ -37,8 +37,13 @@ providerConfig: name: Hamburg organization: TelekomOP edgeConnectK8s: + pendingTimeout: "5m" sizer: sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:0.0.4 + sidecarPushEndpoint: https://sizer.benchmark.t09.de/api/v1/metrics + baseUrl: "https://sizer.benchmark.t09.de" + readToken: + existingSecretName: sizer-tokens garm: logging: From 71a8fef501c10d87b471815b0f41e362d4169170 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 11:59:42 +0100 Subject: [PATCH 061/114] fix(vm-client): create missing manifests directory --- .../observability-client/vm-client-stack/manifests/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/manifests/.gitkeep diff --git a/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/manifests/.gitkeep b/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/manifests/.gitkeep new file mode 100644 index 0000000..e69de29 From eca54cb19cac66efe0505105c03dfb6ea950d3a8 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 12:03:44 +0100 Subject: [PATCH 062/114] fix(vm-client): use in-cluster VMSingle URL for remote write --- .../observability-client/vm-client-stack/values.yaml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml b/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml index 58d6f50..91223a1 100644 --- a/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml +++ b/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml @@ -778,14 +778,7 @@ vmagent: # -- Remote write configuration of VMAgent, allowed parameters defined in a [spec](https://docs.victoriametrics.com/operator/api#vmagentremotewritespec) additionalRemoteWrites: # [] - - url: https://o12y.observability./api/v1/write - basicAuth: - username: - name: simple-user-secret - key: username - password: - name: simple-user-secret - key: password + - url: http://vmsingle-o12y.observability.svc:8429/api/v1/write # -- (object) Full spec for VMAgent CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmagentspec) spec: port: "8429" From b98486f445b22c09d80bf29d579b5d96f30be5ff Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 12:13:38 +0100 Subject: [PATCH 063/114] fix: argocd metrics port name, coredns metrics via headless service --- .../manifests/argocd-scrape.yaml | 2 +- .../manifests/coredns-scrape.yaml | 26 +++++++++++++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/argocd-scrape.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/argocd-scrape.yaml index d72e88d..0517321 100644 --- a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/argocd-scrape.yaml +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/argocd-scrape.yaml @@ -10,4 +10,4 @@ spec: matchLabels: app.kubernetes.io/part-of: argocd endpoints: - - port: metrics + - port: http-metrics diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/coredns-scrape.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/coredns-scrape.yaml index c7e379e..77cef00 100644 --- a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/coredns-scrape.yaml +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/coredns-scrape.yaml @@ -1,5 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: coredns-metrics + namespace: kube-system + labels: + k8s-app: coredns-metrics +spec: + clusterIP: None + selector: + k8s-app: coredns + ports: + - name: metrics + port: 9153 + targetPort: 9153 + protocol: TCP +--- apiVersion: operator.victoriametrics.com/v1beta1 -kind: VMPodScrape +kind: VMServiceScrape metadata: name: coredns spec: @@ -8,7 +25,6 @@ spec: - kube-system selector: matchLabels: - k8s-app: coredns - podMetricsEndpoints: - - targetPort: 9153 - path: /metrics + k8s-app: coredns-metrics + endpoints: + - port: metrics From e2469e78438b153c415775f621c6651f0b03c54d Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 14:38:41 +0100 Subject: [PATCH 064/114] fix(benchmark.t09.de/garm): explicit sizer readToken mountPath/key/fileName (chart defaults not deep-merging, was rendering broken %!s() path that crashed sizer consultation) --- otc/benchmark.t09.de/stacks/garm/garm/values.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml index 4948cdf..4220878 100644 --- a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml +++ b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml @@ -44,6 +44,9 @@ providerConfig: baseUrl: "https://sizer.benchmark.t09.de" readToken: existingSecretName: sizer-tokens + key: read-token + mountPath: /etc/garm-secrets/sizer + fileName: read-token garm: logging: From 3be56f5a078536785552d3086ea6d8800257c789 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 14:58:36 +0100 Subject: [PATCH 065/114] fix(vm-client): add nodename-to-IP metricRelabelConfig for node-exporter Co-Authored-By: Claude Opus 4.6 --- .../stacks/observability-client/vm-client-stack/values.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml b/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml index 91223a1..8784dcc 100644 --- a/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml +++ b/otc/observability.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml @@ -968,6 +968,11 @@ prometheus-node-exporter: - action: drop source_labels: [mountpoint] regex: "/var/lib/kubelet/pods.+" + - action: replace + source_labels: [__name__, instance] + regex: "node_uname_info;([^:]+):.+" + target_label: nodename + replacement: "$1" # -- kube-state-metrics dependency chart configuration. For possible values check [here](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-state-metrics/values.yaml) kube-state-metrics: enabled: true From bbdca11f0010486a4ccb64f7e9bf66418d99b9c4 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 15:42:10 +0100 Subject: [PATCH 066/114] fix(benchmark.t09.de/garm): bump ci-sizer-collector to :latest (0.0.4 tag doesn't exist in registry, was unreachable until sizer integration was restored) --- otc/benchmark.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml index 4220878..01864ab 100644 --- a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml +++ b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml @@ -39,7 +39,7 @@ providerConfig: edgeConnectK8s: pendingTimeout: "5m" sizer: - sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:0.0.4 + sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:latest sidecarPushEndpoint: https://sizer.benchmark.t09.de/api/v1/metrics baseUrl: "https://sizer.benchmark.t09.de" readToken: From b5594a8017952e180485fb3a8623a09c7b587ac7 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 15:51:26 +0100 Subject: [PATCH 067/114] feat(observability): add sustainability metrics, Kepler, 6-month retention, GARM scrape --- .../stacks/observability-client/kepler.yaml | 29 +++++++ .../observability-client/kepler/values.yaml | 10 +++ .../manifests/ci-sustainability-rules.yaml | 75 +++++++++++++++++++ .../manifests/garm-scrape.yaml | 13 ++++ .../manifests/kepler-scrape.yaml | 13 ++++ .../victoria-k8s-stack/values.yaml | 2 +- 6 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 otc/observability.buildth.ing/stacks/observability-client/kepler.yaml create mode 100644 otc/observability.buildth.ing/stacks/observability-client/kepler/values.yaml create mode 100644 otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/ci-sustainability-rules.yaml create mode 100644 otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml create mode 100644 otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/kepler-scrape.yaml diff --git a/otc/observability.buildth.ing/stacks/observability-client/kepler.yaml b/otc/observability.buildth.ing/stacks/observability-client/kepler.yaml new file mode 100644 index 0000000..288718e --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability-client/kepler.yaml @@ -0,0 +1,29 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: kepler + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: observability + sources: + - chart: kepler + repoURL: https://sustainable-computing-io.github.io/kepler-helm-chart + targetRevision: 0.6.1 + helm: + valueFiles: + - $values/otc/observability.buildth.ing/stacks/observability-client/kepler/values.yaml + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + ref: values diff --git a/otc/observability.buildth.ing/stacks/observability-client/kepler/values.yaml b/otc/observability.buildth.ing/stacks/observability-client/kepler/values.yaml new file mode 100644 index 0000000..90fa6e4 --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability-client/kepler/values.yaml @@ -0,0 +1,10 @@ +canMount: + usrSrc: false + +serviceMonitor: + enabled: false + +extraEnvVars: + ENABLE_GPU: "false" + ENABLE_EBPF_CGROUPID: "true" + KEPLER_LOG_LEVEL: "1" diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/ci-sustainability-rules.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/ci-sustainability-rules.yaml new file mode 100644 index 0000000..0108b14 --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/ci-sustainability-rules.yaml @@ -0,0 +1,75 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: ci-sustainability +spec: + groups: + - name: ci.sustainability.daily + interval: 5m + rules: + - record: ci:cpu_seconds:increase1d + expr: | + sum by(namespace, cluster) ( + increase(container_cpu_usage_seconds_total{ + namespace=~"gitea|garm", + pod=~"forgejo-runner.*|garm-.*", + container!="" + }[1d]) + ) + - record: ci:memory_bytes_seconds:avg1d + expr: | + avg_over_time( + sum by(namespace, cluster) ( + container_memory_working_set_bytes{ + namespace=~"gitea|garm", + pod=~"forgejo-runner.*|garm-.*", + container!="" + } + )[1d:5m] + ) + - record: ci:pod_count:avg1d + expr: | + avg_over_time( + count by(namespace, cluster) ( + kube_pod_info{ + namespace=~"gitea|garm", + pod=~"forgejo-runner.*|garm-.*" + } + )[1d:5m] + ) + - record: ci:pod_creations:increase1d + expr: | + sum by(namespace, cluster) ( + changes(kube_pod_start_time{ + namespace=~"gitea|garm", + pod=~"forgejo-runner.*|garm-.*" + }[1d]) + ) + - name: ci.sustainability.cluster + interval: 5m + rules: + - record: cluster:cpu_seconds:rate5m + expr: | + sum by(cluster) ( + rate(node_cpu_seconds_total{mode!="idle"}[5m]) + ) + - record: cluster:memory_used_bytes:sum + expr: | + sum by(cluster) ( + node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes + ) + - name: ci.sustainability.energy + interval: 5m + rules: + - record: ci:joules:increase1d + expr: | + sum by(container_namespace, cluster) ( + increase(kepler_container_joules_total{ + container_namespace=~"gitea|garm" + }[1d]) + ) + - record: cluster:joules:rate5m + expr: | + sum by(cluster) ( + rate(kepler_node_joules_total[5m]) + ) diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml new file mode 100644 index 0000000..a4c6119 --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml @@ -0,0 +1,13 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + name: garm +spec: + namespaceSelector: + matchNames: + - garm + selector: + matchLabels: + app.kubernetes.io/name: garm + endpoints: + - port: metrics diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/kepler-scrape.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/kepler-scrape.yaml new file mode 100644 index 0000000..3cdbc1d --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/kepler-scrape.yaml @@ -0,0 +1,13 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + name: kepler +spec: + namespaceSelector: + matchNames: + - observability + selector: + matchLabels: + app.kubernetes.io/name: kepler + endpoints: + - port: http diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml index bd22879..5bb9361 100644 --- a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml @@ -283,7 +283,7 @@ vmsingle: spec: port: "8429" # -- Data retention period. Possible units character: h(ours), d(ays), w(eeks), y(ears), if no unit character specified - month. The minimum retention period is 24h. See these [docs](https://docs.victoriametrics.com/single-server-victoriametrics/#retention) - retentionPeriod: "1" + retentionPeriod: "6" replicaCount: 1 extraArgs: {} storageMetadata: From 608439697be2ad25266e7f84081f1c5ca4673e38 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 16:08:35 +0100 Subject: [PATCH 068/114] fix(benchmark.t09.de/garm): pin ci-sizer-collector to 0.8.3 (latest tagged release, avoid :latest drift during long runs) --- otc/benchmark.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml index 01864ab..453aa65 100644 --- a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml +++ b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml @@ -39,7 +39,7 @@ providerConfig: edgeConnectK8s: pendingTimeout: "5m" sizer: - sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:latest + sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:0.8.3 sidecarPushEndpoint: https://sizer.benchmark.t09.de/api/v1/metrics baseUrl: "https://sizer.benchmark.t09.de" readToken: From f98f53a5a02647c8f170c27e32e185758c2d3c7f Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 16:12:06 +0100 Subject: [PATCH 069/114] revert(kepler): remove Kepler, incompatible with OTC CCE proc mount restrictions --- .../stacks/observability-client/kepler.yaml | 29 ------------------- .../observability-client/kepler/values.yaml | 10 ------- .../manifests/kepler-scrape.yaml | 13 --------- 3 files changed, 52 deletions(-) delete mode 100644 otc/observability.buildth.ing/stacks/observability-client/kepler.yaml delete mode 100644 otc/observability.buildth.ing/stacks/observability-client/kepler/values.yaml delete mode 100644 otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/kepler-scrape.yaml diff --git a/otc/observability.buildth.ing/stacks/observability-client/kepler.yaml b/otc/observability.buildth.ing/stacks/observability-client/kepler.yaml deleted file mode 100644 index 288718e..0000000 --- a/otc/observability.buildth.ing/stacks/observability-client/kepler.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: kepler - namespace: argocd - labels: - env: dev -spec: - project: default - syncPolicy: - automated: - selfHeal: true - syncOptions: - - CreateNamespace=true - retry: - limit: -1 - destination: - name: in-cluster - namespace: observability - sources: - - chart: kepler - repoURL: https://sustainable-computing-io.github.io/kepler-helm-chart - targetRevision: 0.6.1 - helm: - valueFiles: - - $values/otc/observability.buildth.ing/stacks/observability-client/kepler/values.yaml - - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances - targetRevision: HEAD - ref: values diff --git a/otc/observability.buildth.ing/stacks/observability-client/kepler/values.yaml b/otc/observability.buildth.ing/stacks/observability-client/kepler/values.yaml deleted file mode 100644 index 90fa6e4..0000000 --- a/otc/observability.buildth.ing/stacks/observability-client/kepler/values.yaml +++ /dev/null @@ -1,10 +0,0 @@ -canMount: - usrSrc: false - -serviceMonitor: - enabled: false - -extraEnvVars: - ENABLE_GPU: "false" - ENABLE_EBPF_CGROUPID: "true" - KEPLER_LOG_LEVEL: "1" diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/kepler-scrape.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/kepler-scrape.yaml deleted file mode 100644 index 3cdbc1d..0000000 --- a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/kepler-scrape.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: operator.victoriametrics.com/v1beta1 -kind: VMServiceScrape -metadata: - name: kepler -spec: - namespaceSelector: - matchNames: - - observability - selector: - matchLabels: - app.kubernetes.io/name: kepler - endpoints: - - port: http From 63cdb926b97bd56a5d8fc0b95b8caf629b1dc9fd Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 16:12:22 +0100 Subject: [PATCH 070/114] fix(sustainability-rules): remove Kepler energy rules since Kepler is incompatible --- .../manifests/ci-sustainability-rules.yaml | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/ci-sustainability-rules.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/ci-sustainability-rules.yaml index 0108b14..2290b99 100644 --- a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/ci-sustainability-rules.yaml +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/ci-sustainability-rules.yaml @@ -58,18 +58,4 @@ spec: sum by(cluster) ( node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes ) - - name: ci.sustainability.energy - interval: 5m - rules: - - record: ci:joules:increase1d - expr: | - sum by(container_namespace, cluster) ( - increase(kepler_container_joules_total{ - container_namespace=~"gitea|garm" - }[1d]) - ) - - record: cluster:joules:rate5m - expr: | - sum by(cluster) ( - rate(kepler_node_joules_total[5m]) - ) + From 14873b7941cd7abab4991319157446327ad53d0c Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Tue, 2 Jun 2026 16:21:51 +0100 Subject: [PATCH 071/114] fix(garm): bump dev+benchmark to garm-helm v0.0.17 (template-robust readToken); drop now-redundant explicit fields on benchmark --- otc/benchmark.t09.de/stacks/garm/garm.yaml | 2 +- otc/benchmark.t09.de/stacks/garm/garm/values.yaml | 4 +--- otc/dev.t09.de/stacks/garm/garm.yaml | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/otc/benchmark.t09.de/stacks/garm/garm.yaml b/otc/benchmark.t09.de/stacks/garm/garm.yaml index 820c8b2..05bb67c 100644 --- a/otc/benchmark.t09.de/stacks/garm/garm.yaml +++ b/otc/benchmark.t09.de/stacks/garm/garm.yaml @@ -24,7 +24,7 @@ spec: sources: - repoURL: https://edp.buildth.ing/DevFW-CICD/garm-helm path: charts/garm - targetRevision: v0.0.16 + targetRevision: v0.0.17 helm: valueFiles: - $values/otc/benchmark.t09.de/stacks/garm/garm/values.yaml diff --git a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml index 453aa65..0fac909 100644 --- a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml +++ b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml @@ -44,9 +44,7 @@ providerConfig: baseUrl: "https://sizer.benchmark.t09.de" readToken: existingSecretName: sizer-tokens - key: read-token - mountPath: /etc/garm-secrets/sizer - fileName: read-token + # key/mountPath/fileName default sanely in garm-helm ≥v0.0.17 garm: logging: diff --git a/otc/dev.t09.de/stacks/garm/garm.yaml b/otc/dev.t09.de/stacks/garm/garm.yaml index 3754f9a..911f2cf 100644 --- a/otc/dev.t09.de/stacks/garm/garm.yaml +++ b/otc/dev.t09.de/stacks/garm/garm.yaml @@ -20,7 +20,7 @@ spec: sources: - repoURL: https://edp.buildth.ing/DevFW-CICD/garm-helm path: charts/garm - targetRevision: v0.0.15 + targetRevision: v0.0.17 helm: valueFiles: - $values/otc/dev.t09.de/stacks/garm/garm/values.yaml From 011f436fb79ad7a23d7f0611e01f46daded38695 Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Wed, 3 Jun 2026 15:01:09 +0100 Subject: [PATCH 072/114] =?UTF-8?q?feat(benchmark.t09.de/garm):=20bump=20c?= =?UTF-8?q?i-sizer-collector=200.8.3=20=E2=86=92=200.9.0=20(kernel-peak=20?= =?UTF-8?q?+=20cgroup-v1=20limit=20fallback)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- otc/benchmark.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml index 0fac909..8de0c9e 100644 --- a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml +++ b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml @@ -39,7 +39,7 @@ providerConfig: edgeConnectK8s: pendingTimeout: "5m" sizer: - sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:0.8.3 + sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:0.9.0 sidecarPushEndpoint: https://sizer.benchmark.t09.de/api/v1/metrics baseUrl: "https://sizer.benchmark.t09.de" readToken: From 422f568c8e1ced3d08b829a1e9f75b6b6e4a0cf8 Mon Sep 17 00:00:00 2001 From: Automated pipeline Date: Mon, 8 Jun 2026 12:15:27 +0000 Subject: [PATCH 073/114] Automated upload for dev.t09.de --- .../stacks/ci-sizer/gitlab-webhook.yaml | 29 ++++ .../ci-sizer/gitlab-webhook/certificates.yaml | 27 ++++ .../ci-sizer/gitlab-webhook/deployment.yaml | 141 ++++++++++++++++++ .../gitlab-webhook/webhook-config.yaml | 30 ++++ .../stacks/ci-sizer/sizer-receiver.yaml | 4 + .../ci-sizer/sizer-receiver/deployment.yaml | 2 +- .../ci-sizer/sizer-receiver/ingress.yaml | 10 ++ otc/dev.t09.de/stacks/core/argocd/values.yaml | 4 +- otc/dev.t09.de/stacks/core/dex/values.yaml | 2 +- .../stacks/core/secrets-backup.yaml | 23 +++ .../manifests/secrets-backup-cronjob.yaml | 129 ++++++++++++++++ .../forgejo/forgejo-runner/dind-docker.yaml | 2 +- .../manifests/forgejo-s3-backup-cronjob.yaml | 2 +- .../stacks/forgejo/forgejo-server/values.yaml | 2 +- otc/dev.t09.de/stacks/garm/garm.yaml | 6 +- otc/dev.t09.de/stacks/garm/garm/values.yaml | 11 +- 16 files changed, 407 insertions(+), 17 deletions(-) create mode 100644 otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook.yaml create mode 100644 otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook/certificates.yaml create mode 100644 otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook/deployment.yaml create mode 100644 otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook/webhook-config.yaml create mode 100644 otc/dev.t09.de/stacks/core/secrets-backup.yaml create mode 100644 otc/dev.t09.de/stacks/core/secrets-backup/manifests/secrets-backup-cronjob.yaml diff --git a/otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook.yaml b/otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook.yaml new file mode 100644 index 0000000..c02e1cc --- /dev/null +++ b/otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook.yaml @@ -0,0 +1,29 @@ +# Optional: GitLab CI integration +# Only hydrate this app for clusters that run GitLab Runner. +# For Forgejo/GitHub-only deployments, omit this app from stacks-instances. +# See: ci-sizer/docs/deployment-modes.md +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: gitlab-sizer-webhook + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: ci-sizer + source: + repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook" diff --git a/otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook/certificates.yaml b/otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook/certificates.yaml new file mode 100644 index 0000000..ee1fece --- /dev/null +++ b/otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook/certificates.yaml @@ -0,0 +1,27 @@ +# Self-signed Issuer for webhook TLS. +# For production, replace with a ClusterIssuer backed by a real CA. +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: selfsigned-issuer +spec: + selfSigned: {} +--- +# cert-manager Certificate for the webhook TLS. +# The resulting Secret (gitlab-sizer-webhook-tls) is mounted into the webhook pod. +# cert-manager also injects the CA into the MutatingWebhookConfiguration via the +# cert-manager.io/inject-ca-from annotation. +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: gitlab-sizer-webhook-cert +spec: + secretName: gitlab-sizer-webhook-tls + issuerRef: + name: selfsigned-issuer + kind: Issuer + dnsNames: + - gitlab-sizer-webhook.ci-sizer.svc + - gitlab-sizer-webhook.ci-sizer.svc.cluster.local + duration: 8760h + renewBefore: 720h diff --git a/otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook/deployment.yaml b/otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook/deployment.yaml new file mode 100644 index 0000000..0b99859 --- /dev/null +++ b/otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook/deployment.yaml @@ -0,0 +1,141 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: gitlab-sizer-webhook +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: gitlab-sizer-webhook +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: gitlab-sizer-webhook +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: gitlab-sizer-webhook +subjects: + - kind: ServiceAccount + name: gitlab-sizer-webhook + namespace: ci-sizer +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gitlab-sizer-webhook + labels: + app: gitlab-sizer-webhook +spec: + replicas: 2 + selector: + matchLabels: + app: gitlab-sizer-webhook + template: + metadata: + labels: + app: gitlab-sizer-webhook + spec: + serviceAccountName: gitlab-sizer-webhook + securityContext: + runAsNonRoot: true + runAsUser: 65534 + runAsGroup: 65534 + seccompProfile: + type: RuntimeDefault + containers: + - name: webhook + image: edp.buildth.ing/devfw-cicd/gitlab-webhook-edge-connect:latest + imagePullPolicy: Always + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + ports: + - containerPort: 8443 + protocol: TCP + args: + - --listen-addr=:8443 + - --tls-cert-file=/etc/webhook/tls/tls.crt + - --tls-key-file=/etc/webhook/tls/tls.key + - --sizer-url=http://sizer-receiver.ci-sizer.svc:8080 + - --sizer-sidecar-image=edp.buildth.ing/devfw-cicd/ci-sizer-collector:latest + env: + - name: WEBHOOK_SIZER_READ_TOKEN + valueFrom: + secretKeyRef: + name: gitlab-sizer-webhook-tokens + key: sizer-read-token + - name: WEBHOOK_SIZER_PUSH_TOKEN + valueFrom: + secretKeyRef: + name: gitlab-sizer-webhook-tokens + key: sizer-push-token + - name: HTTP_PROXY + valueFrom: + configMapKeyRef: + name: gitlab-sizer-webhook-config + key: HTTP_PROXY + optional: true + - name: HTTPS_PROXY + valueFrom: + configMapKeyRef: + name: gitlab-sizer-webhook-config + key: HTTPS_PROXY + optional: true + - name: NO_PROXY + valueFrom: + configMapKeyRef: + name: gitlab-sizer-webhook-config + key: NO_PROXY + optional: true + volumeMounts: + - name: webhook-tls + mountPath: /etc/webhook/tls + readOnly: true + livenessProbe: + httpGet: + path: /healthz + port: 8443 + scheme: HTTPS + initialDelaySeconds: 5 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /healthz + port: 8443 + scheme: HTTPS + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 128Mi + volumes: + - name: webhook-tls + secret: + secretName: gitlab-sizer-webhook-tls +--- +apiVersion: v1 +kind: Service +metadata: + name: gitlab-sizer-webhook + labels: + app: gitlab-sizer-webhook +spec: + selector: + app: gitlab-sizer-webhook + ports: + - port: 443 + targetPort: 8443 + protocol: TCP diff --git a/otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook/webhook-config.yaml b/otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook/webhook-config.yaml new file mode 100644 index 0000000..72aea4a --- /dev/null +++ b/otc/dev.t09.de/stacks/ci-sizer/gitlab-webhook/webhook-config.yaml @@ -0,0 +1,30 @@ +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: gitlab-sizer-webhook + annotations: + cert-manager.io/inject-ca-from: ci-sizer/gitlab-sizer-webhook-cert +webhooks: + - name: gitlab-sizer-webhook.ci-sizer.svc + admissionReviewVersions: ["v1"] + sideEffects: NoneOnDryRun + failurePolicy: Ignore + timeoutSeconds: 5 + reinvocationPolicy: Never + clientConfig: + service: + name: gitlab-sizer-webhook + namespace: ci-sizer + path: /mutate + rules: + - apiGroups: [""] + apiVersions: ["v1"] + operations: ["CREATE"] + resources: ["pods"] + namespaceSelector: + matchLabels: + ci-sizer.devfw.io/watch: "true" + objectSelector: + matchExpressions: + - key: job.runner.gitlab.com/pod + operator: Exists diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver.yaml index 4f1b6bc..1f56541 100644 --- a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver.yaml +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver.yaml @@ -1,3 +1,7 @@ +# Required: CI Sizer receiver +# Always deploy this — it stores metrics and computes sizing recommendations. +# Works standalone or with GARM (Forgejo/GitHub) and/or GitLab webhook. +# See: ci-sizer/docs/deployment-modes.md apiVersion: argoproj.io/v1alpha1 kind: Application metadata: diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml index ccb6a86..3cbfb4c 100644 --- a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml @@ -62,7 +62,7 @@ spec: - name: RECEIVER_SESSION_TTL value: "12h" - name: RECEIVER_ALLOWED_ORG - value: "DevFW" + value: "DevFW-CICD" - name: RECEIVER_CPU_SIZING_MODE value: "observe" - name: RECEIVER_MEMORY_QOS diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml index 9a28977..1bd81a9 100644 --- a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/ingress.yaml @@ -20,6 +20,16 @@ spec: number: 8080 path: / pathType: Prefix + - host: ci-sizer.dev.t09.de + http: + paths: + - backend: + service: + name: sizer-receiver + port: + number: 8080 + path: / + pathType: Prefix tls: - hosts: - sizer.dev.t09.de diff --git a/otc/dev.t09.de/stacks/core/argocd/values.yaml b/otc/dev.t09.de/stacks/core/argocd/values.yaml index 53cac97..dd5b83d 100644 --- a/otc/dev.t09.de/stacks/core/argocd/values.yaml +++ b/otc/dev.t09.de/stacks/core/argocd/values.yaml @@ -30,9 +30,7 @@ configs: - "*" url: https://argocd.dev.t09.de rbac: - policy.csv: | - g, DevFW, role:admin - g, DevFW-CICD, role:admin + policy.csv: 'g, DevFW, role:admin' tls: certificates: diff --git a/otc/dev.t09.de/stacks/core/dex/values.yaml b/otc/dev.t09.de/stacks/core/dex/values.yaml index c3e842a..6f4955b 100644 --- a/otc/dev.t09.de/stacks/core/dex/values.yaml +++ b/otc/dev.t09.de/stacks/core/dex/values.yaml @@ -37,7 +37,7 @@ envVars: - name: FORGEJO_RUNNER_SIZER_CLIENT_SECRET valueFrom: secretKeyRef: - name: dex-runner-sizer-client + name: dex-sizer-client key: clientSecret - name: LOG_LEVEL value: debug diff --git a/otc/dev.t09.de/stacks/core/secrets-backup.yaml b/otc/dev.t09.de/stacks/core/secrets-backup.yaml new file mode 100644 index 0000000..1f33c8d --- /dev/null +++ b/otc/dev.t09.de/stacks/core/secrets-backup.yaml @@ -0,0 +1,23 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: secrets-backup + namespace: argocd + labels: + env: dev +spec: + project: default + syncPolicy: + automated: + selfHeal: true + syncOptions: + - CreateNamespace=true + retry: + limit: -1 + destination: + name: in-cluster + namespace: gitea + sources: + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/dev.t09.de/stacks/core/secrets-backup/manifests" diff --git a/otc/dev.t09.de/stacks/core/secrets-backup/manifests/secrets-backup-cronjob.yaml b/otc/dev.t09.de/stacks/core/secrets-backup/manifests/secrets-backup-cronjob.yaml new file mode 100644 index 0000000..bd1f913 --- /dev/null +++ b/otc/dev.t09.de/stacks/core/secrets-backup/manifests/secrets-backup-cronjob.yaml @@ -0,0 +1,129 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: secrets-backup + namespace: gitea +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: secrets-backup-reader +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "list"] + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: secrets-backup-reader +subjects: + - kind: ServiceAccount + name: secrets-backup + namespace: gitea +roleRef: + kind: ClusterRole + name: secrets-backup-reader + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: v1 +kind: Secret +metadata: + name: secrets-backup-config + namespace: gitea +type: Opaque +stringData: + # IMPORTANT: Replace this placeholder with a strong passphrase per environment. + # This secret should be managed via external-secrets or manually set after initial deploy. + encryption-passphrase: "CHANGE-ME-SET-PER-ENVIRONMENT" +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: secrets-backup + namespace: gitea +spec: + schedule: "30 3 * * *" + concurrencyPolicy: "Forbid" + successfulJobsHistoryLimit: 5 + failedJobsHistoryLimit: 5 + startingDeadlineSeconds: 600 # 10 minutes + jobTemplate: + spec: + activeDeadlineSeconds: 900 + backoffLimit: 2 + ttlSecondsAfterFinished: 259200 + template: + spec: + serviceAccountName: secrets-backup + containers: + - name: secrets-backup + image: alpine/k8s:1.28.0 + imagePullPolicy: IfNotPresent + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: forgejo-cloud-credentials + key: access-key + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: forgejo-cloud-credentials + key: secret-key + - name: ENCRYPTION_PASSPHRASE + valueFrom: + secretKeyRef: + name: secrets-backup-config + key: encryption-passphrase + - name: SOURCE_BUCKET + valueFrom: + secretKeyRef: + name: forgejo-cloud-credentials + key: bucket-name + - name: OBS_ENDPOINT + value: "obs.eu-de.otc.t-systems.com" + command: + - /bin/sh + - -c + - | + set -euo pipefail + + TIMESTAMP=$(date +%Y%m%d-%H%M%S) + BACKUP_DIR="/tmp/secrets-backup-${TIMESTAMP}" + NAMESPACES="argocd cert-manager external-secrets" + + mkdir -p "${BACKUP_DIR}" + + echo "=== Exporting secrets from critical namespaces ===" + for NS in ${NAMESPACES}; do + echo "Exporting namespace: ${NS}" + kubectl get secrets -n "${NS}" \ + -o json \ + --field-selector type!=kubernetes.io/service-account-token \ + > "${BACKUP_DIR}/${NS}-secrets.json" + done + + echo "=== Encrypting backup with AES-256-CBC ===" + ARCHIVE="${BACKUP_DIR}/secrets-backup-${TIMESTAMP}.tar.gz" + tar -czf "${ARCHIVE}" -C "${BACKUP_DIR}" \ + $(ls "${BACKUP_DIR}"/*.json 2>/dev/null | xargs -n1 basename) + + ENCRYPTED="${BACKUP_DIR}/secrets-backup-${TIMESTAMP}.tar.gz.enc" + openssl enc -aes-256-cbc -salt -pbkdf2 -iter 100000 \ + -in "${ARCHIVE}" \ + -out "${ENCRYPTED}" \ + -pass env:ENCRYPTION_PASSPHRASE + + echo "=== Uploading to OBS ===" + aws s3 cp "${ENCRYPTED}" \ + "s3://${SOURCE_BUCKET}/cluster-secrets-backup/${TIMESTAMP}/secrets-backup.tar.gz.enc" \ + --endpoint-url "https://${OBS_ENDPOINT}" + + echo "=== Cleanup ===" + rm -rf "${BACKUP_DIR}" + echo "Backup completed: ${TIMESTAMP}" + restartPolicy: OnFailure diff --git a/otc/dev.t09.de/stacks/forgejo/forgejo-runner/dind-docker.yaml b/otc/dev.t09.de/stacks/forgejo/forgejo-runner/dind-docker.yaml index 093a819..bcdb719 100644 --- a/otc/dev.t09.de/stacks/forgejo/forgejo-runner/dind-docker.yaml +++ b/otc/dev.t09.de/stacks/forgejo/forgejo-runner/dind-docker.yaml @@ -7,7 +7,7 @@ metadata: namespace: gitea spec: # Two replicas means that if one is busy, the other can pick up jobs. - replicas: 0 + replicas: 3 selector: matchLabels: app: forgejo-runner diff --git a/otc/dev.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml b/otc/dev.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml index 1251a81..de14801 100644 --- a/otc/dev.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml +++ b/otc/dev.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml @@ -72,7 +72,7 @@ spec: - ReadWriteOnce resources: requests: - storage: 100Gi + storage: 500Gi --- apiVersion: v1 kind: Secret diff --git a/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml b/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml index ec901a0..a8a173e 100644 --- a/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml +++ b/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml @@ -178,6 +178,6 @@ image: #tag: "8.0.3" # Adds -rootless suffix to image name # rootless: true - fullOverride: edp.buildth.ing/devfw-cicd/edp-forgejo:workflow-webhook-20260305 + fullOverride: edp.buildth.ing/devfw-cicd/edp-forgejo:14.0.2-edp1-rootless forgejo: {} diff --git a/otc/dev.t09.de/stacks/garm/garm.yaml b/otc/dev.t09.de/stacks/garm/garm.yaml index 911f2cf..a0bbd69 100644 --- a/otc/dev.t09.de/stacks/garm/garm.yaml +++ b/otc/dev.t09.de/stacks/garm/garm.yaml @@ -1,3 +1,7 @@ +# Default: Forgejo/GitHub Actions runner manager +# Deploys GARM with the ci-sizer provider for automatic sizing + collector injection. +# For GitLab-only deployments, omit this and use gitlab-webhook instead. +# See: ci-sizer/docs/deployment-modes.md apiVersion: argoproj.io/v1alpha1 kind: Application metadata: @@ -20,7 +24,7 @@ spec: sources: - repoURL: https://edp.buildth.ing/DevFW-CICD/garm-helm path: charts/garm - targetRevision: v0.0.17 + targetRevision: v0.0.16 helm: valueFiles: - $values/otc/dev.t09.de/stacks/garm/garm/values.yaml diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index e1ee11d..827e495 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-22 + tag: v0.1.7-forgejo-23 providerConfig: edgeConnect: @@ -37,14 +37,9 @@ providerConfig: name: Hamburg organization: TelekomOP edgeConnectK8s: - pendingTimeout: "5m" sizer: - sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:latest - sidecarPushEndpoint: https://sizer.dev.t09.de/api/v1/metrics - baseUrl: "https://sizer.dev.t09.de" - readToken: - existingSecretName: sizer-tokens + sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:0.0.4 garm: logging: - logLevel: debug + logLevel: info From a047be3aae0c637b39a9bdbe68bc7c31f1637258 Mon Sep 17 00:00:00 2001 From: "Daniel.Sy" Date: Mon, 8 Jun 2026 14:11:05 +0000 Subject: [PATCH 074/114] =?UTF-8?q?fix(garm):=20=E2=AC=87=EF=B8=8F=20rollb?= =?UTF-8?q?ack=20to=20v0.1.7-forgejo-22=20=E2=80=94=20-23=20has=20exec=20f?= =?UTF-8?q?ormat=20error=20(wrong=20arch)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 827e495..bce1f62 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,9 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-23 + # NOTE: v0.1.7-forgejo-23 has exec format error (wrong arch build) + # Rolled back to -22 until -23 is rebuilt for amd64 + tag: v0.1.7-forgejo-22 providerConfig: edgeConnect: From 9a7544418cd4d2c1ea42657cb0314fab9d2fad9e Mon Sep 17 00:00:00 2001 From: "Daniel.Sy" Date: Mon, 8 Jun 2026 14:11:31 +0000 Subject: [PATCH 075/114] =?UTF-8?q?fix(forgejo):=20=F0=9F=90=9B=20use=20wo?= =?UTF-8?q?rkflow-webhook=20image=20matching=20DB=20migration=20level=20(v?= =?UTF-8?q?15a/v15b)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DB was migrated to v15 schema by this image in March. The 14.0.2-edp1-rootless image cannot start against it. Today's automated pipeline sync triggered pod restart, exposing the mismatch. --- otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml b/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml index a8a173e..f2e3fb2 100644 --- a/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml +++ b/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml @@ -1,4 +1,3 @@ - # We use recreate to make sure only one instance with one version is running, because Forgejo might break or data gets inconsistant. strategy: type: Recreate @@ -174,10 +173,8 @@ service: image: pullPolicy: "IfNotPresent" - # Overrides the image tag whose default is the chart appVersion. - #tag: "8.0.3" - # Adds -rootless suffix to image name - # rootless: true - fullOverride: edp.buildth.ing/devfw-cicd/edp-forgejo:14.0.2-edp1-rootless + # DB has v15a/v15b migrations from workflow-webhook build. + # Using that image until a proper v15+ EDP release is cut. + fullOverride: edp.buildth.ing/devfw-cicd/edp-forgejo:workflow-webhook-20260305 forgejo: {} From 967edf0382b0b7d47342b9418a5d78fbb457277a Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 8 Jun 2026 16:59:47 +0200 Subject: [PATCH 076/114] =?UTF-8?q?fix(ci-sizer):=20=F0=9F=94=90=20align?= =?UTF-8?q?=20OIDC=20client=20secret=20with=20dex=20config?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Secret mismatch caused infinite login loop on sizer.dev.t09.de. Added sizer-oidc-client secret manifest to GitOps so ArgoCD manages it. Value now matches dex-runner-sizer-client (dex side). --- .../stacks/ci-sizer/sizer-receiver/secret.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml new file mode 100644 index 0000000..42538da --- /dev/null +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Secret +metadata: + name: sizer-oidc-client + labels: + app: sizer-receiver +type: Opaque +stringData: + client-secret: "di4ul3ce6D796q19lVQsjiGBVzluC/J7WCzjNDRZKKMRr/+C" From bd82384eb19b6c6fecc20943c366afe831caedcb Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 8 Jun 2026 17:11:02 +0200 Subject: [PATCH 077/114] =?UTF-8?q?fix(dex):=20=F0=9F=94=90=20correct=20si?= =?UTF-8?q?zer=20client=20secret=20to=20match=20sizer-oidc-client?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The deploy hydration created dex-sizer-client with wrong value. Reverting to the original shared secret that sizer expects (73eda906... - active for 81 days before hydration overwrote it). Changes: - sizer-oidc-client: restore correct shared secret - dex-sizer-client: add managed manifest to prevent future drift - dex.yaml: add manifests source for ArgoCD to sync the secret Broken by stacks rehydration pipeline run. --- otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml | 2 +- otc/dev.t09.de/stacks/core/dex.yaml | 3 +++ .../stacks/core/dex/manifests/dex-sizer-client.yaml | 8 ++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 otc/dev.t09.de/stacks/core/dex/manifests/dex-sizer-client.yaml diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml index 42538da..ac8a37c 100644 --- a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml @@ -6,4 +6,4 @@ metadata: app: sizer-receiver type: Opaque stringData: - client-secret: "di4ul3ce6D796q19lVQsjiGBVzluC/J7WCzjNDRZKKMRr/+C" + client-secret: "73eda9068bd00dfe67d29f087b5540cb1cd82cc1dd2ac0f838558ac8bbcfcb3a" diff --git a/otc/dev.t09.de/stacks/core/dex.yaml b/otc/dev.t09.de/stacks/core/dex.yaml index 5da98f5..b67aa7d 100644 --- a/otc/dev.t09.de/stacks/core/dex.yaml +++ b/otc/dev.t09.de/stacks/core/dex.yaml @@ -27,3 +27,6 @@ spec: - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances targetRevision: HEAD ref: values + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/dev.t09.de/stacks/core/dex/manifests" diff --git a/otc/dev.t09.de/stacks/core/dex/manifests/dex-sizer-client.yaml b/otc/dev.t09.de/stacks/core/dex/manifests/dex-sizer-client.yaml new file mode 100644 index 0000000..884df64 --- /dev/null +++ b/otc/dev.t09.de/stacks/core/dex/manifests/dex-sizer-client.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: dex-sizer-client + namespace: dex +type: Opaque +stringData: + clientSecret: "73eda9068bd00dfe67d29f087b5540cb1cd82cc1dd2ac0f838558ac8bbcfcb3a" From 925c7416b35e65d317b9ce493bc137f0de18adaf Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 8 Jun 2026 17:51:13 +0200 Subject: [PATCH 078/114] =?UTF-8?q?fix(ci-sizer):=20=F0=9F=90=9B=20revert?= =?UTF-8?q?=20RECEIVER=5FALLOWED=5FORG=20to=20DevFW=20for=20dev=20env?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Template default is DevFW-CICD (prod), but dev Forgejo uses DevFW org. Hydration overwrote the correct value today. --- otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml index 3cbfb4c..ccb6a86 100644 --- a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml @@ -62,7 +62,7 @@ spec: - name: RECEIVER_SESSION_TTL value: "12h" - name: RECEIVER_ALLOWED_ORG - value: "DevFW-CICD" + value: "DevFW" - name: RECEIVER_CPU_SIZING_MODE value: "observe" - name: RECEIVER_MEMORY_QOS From 69839f767b1e94842cb98d8fb35a58465dab761e Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 8 Jun 2026 18:00:47 +0200 Subject: [PATCH 079/114] =?UTF-8?q?fix(ci-sizer):=20=F0=9F=90=9B=20set=20R?= =?UTF-8?q?ECEIVER=5FALLOWED=5FORG=3DgiteaAdmin=20for=20dev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dev Forgejo repos live under giteaAdmin user, not DevFW org. Prod will use DevFW-CICD (template default). Dev needs explicit override. --- otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml index ccb6a86..6870fb3 100644 --- a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml @@ -62,7 +62,7 @@ spec: - name: RECEIVER_SESSION_TTL value: "12h" - name: RECEIVER_ALLOWED_ORG - value: "DevFW" + value: "giteaAdmin" - name: RECEIVER_CPU_SIZING_MODE value: "observe" - name: RECEIVER_MEMORY_QOS From 3fdfda9da70da48ef91ffc291b01f16ac26f737d Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 8 Jun 2026 18:06:00 +0200 Subject: [PATCH 080/114] =?UTF-8?q?fix(ci-sizer):=20=F0=9F=93=8C=20pin=20s?= =?UTF-8?q?izer-receiver=20to=20v0.8.2=20for=20dev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v0.8.3 introduced RequireOrgMatch middleware that breaks dev env where repos are under giteaAdmin but OIDC org resolves differently. Pin to v0.8.2 until IPCEICIS-9326 fixes multi-env org support. --- otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml index 6870fb3..88379f6 100644 --- a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml @@ -20,7 +20,7 @@ spec: fsGroup: 65534 containers: - name: receiver - image: edp.buildth.ing/devfw-cicd/ci-sizer-receiver:latest + image: edp.buildth.ing/devfw-cicd/ci-sizer-receiver:v0.8.2 imagePullPolicy: Always args: - --db=/data/metrics.db From f4aa47089411e4c07f57afeffcbf34ec3bf235d0 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 8 Jun 2026 18:08:04 +0200 Subject: [PATCH 081/114] =?UTF-8?q?fix(ci-sizer):=20=F0=9F=93=8C=20pin=20s?= =?UTF-8?q?izer-receiver=20to=20v0.8.1=20for=20dev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v0.8.2 does not exist — tags go v0.8.1 → v0.8.3. v0.8.3 introduced RequireOrgMatch middleware that breaks dev env where repos are under giteaAdmin but OIDC org resolves differently. Pin to v0.8.1 until IPCEICIS-9326 fixes multi-env org support. --- otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml index 88379f6..d1e31e4 100644 --- a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml @@ -20,7 +20,7 @@ spec: fsGroup: 65534 containers: - name: receiver - image: edp.buildth.ing/devfw-cicd/ci-sizer-receiver:v0.8.2 + image: edp.buildth.ing/devfw-cicd/ci-sizer-receiver:v0.8.1 imagePullPolicy: Always args: - --db=/data/metrics.db From cf8271fd86b745754c241e3905254bf2364782d1 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 8 Jun 2026 18:12:56 +0200 Subject: [PATCH 082/114] =?UTF-8?q?revert(ci-sizer):=20=F0=9F=94=A5=20reve?= =?UTF-8?q?rt=20image=20pin=20=E2=80=94=20no=20versioned=20images=20in=20r?= =?UTF-8?q?egistry?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GoReleaser config uses 'dockers_v2' (invalid key, should be 'dockers') so versioned container images were never pushed. Only :latest exists. Reverting to :latest until CI pipeline is fixed to publish version tags. Refs: IPCEICIS-9326 --- otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml index d1e31e4..6870fb3 100644 --- a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml @@ -20,7 +20,7 @@ spec: fsGroup: 65534 containers: - name: receiver - image: edp.buildth.ing/devfw-cicd/ci-sizer-receiver:v0.8.1 + image: edp.buildth.ing/devfw-cicd/ci-sizer-receiver:latest imagePullPolicy: Always args: - --db=/data/metrics.db From 9bbcf4efca1a9e24aa4d5690af1e0a5c5a96b7ea Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 12 Jun 2026 09:32:35 +0200 Subject: [PATCH 083/114] =?UTF-8?q?fix(secrets-backup):=20=F0=9F=90=9B=20a?= =?UTF-8?q?dd=20openssl=20install=20+=20upgrade=20image=20to=201.32.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit alpine/k8s:1.28.0 does not ship openssl. Script calls openssl enc on line 116 causing exit 127 on every run since initial deploy. Fix: - apk add --no-cache openssl at script start (defensive, idempotent) - upgrade image 1.28.0 -> 1.32.0 (kubectl client was 5 minor versions behind cluster v1.33, outside supported skew of +/-1) --- .../secrets-backup/manifests/secrets-backup-cronjob.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/core/secrets-backup/manifests/secrets-backup-cronjob.yaml b/otc/dev.t09.de/stacks/core/secrets-backup/manifests/secrets-backup-cronjob.yaml index bd1f913..aafcf84 100644 --- a/otc/dev.t09.de/stacks/core/secrets-backup/manifests/secrets-backup-cronjob.yaml +++ b/otc/dev.t09.de/stacks/core/secrets-backup/manifests/secrets-backup-cronjob.yaml @@ -61,7 +61,7 @@ spec: serviceAccountName: secrets-backup containers: - name: secrets-backup - image: alpine/k8s:1.28.0 + image: alpine/k8s:1.32.0 imagePullPolicy: IfNotPresent env: - name: AWS_ACCESS_KEY_ID @@ -92,6 +92,9 @@ spec: - | set -euo pipefail + # Ensure openssl is available (not bundled in alpine/k8s image) + apk add --no-cache openssl --quiet + TIMESTAMP=$(date +%Y%m%d-%H%M%S) BACKUP_DIR="/tmp/secrets-backup-${TIMESTAMP}" NAMESPACES="argocd cert-manager external-secrets" From 95deeef6a0b933ae8164b5e627832f2b04fd1732 Mon Sep 17 00:00:00 2001 From: Automated pipeline Date: Fri, 12 Jun 2026 07:46:00 +0000 Subject: [PATCH 084/114] Automated upload for dev.t09.de --- .../ci-sizer/sizer-receiver/deployment.yaml | 2 +- .../ci-sizer/sizer-receiver/secret.yaml | 9 --- otc/dev.t09.de/stacks/core/dex.yaml | 3 - .../core/dex/manifests/dex-sizer-client.yaml | 8 -- .../stacks/forgejo/forgejo-server/values.yaml | 9 ++- otc/dev.t09.de/stacks/garm/garm/values.yaml | 4 +- .../manifests/backup-alerts.yaml | 78 +++++++++++++++++++ 7 files changed, 86 insertions(+), 27 deletions(-) delete mode 100644 otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml delete mode 100644 otc/dev.t09.de/stacks/core/dex/manifests/dex-sizer-client.yaml create mode 100644 otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/backup-alerts.yaml diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml index 6870fb3..3cbfb4c 100644 --- a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/deployment.yaml @@ -62,7 +62,7 @@ spec: - name: RECEIVER_SESSION_TTL value: "12h" - name: RECEIVER_ALLOWED_ORG - value: "giteaAdmin" + value: "DevFW-CICD" - name: RECEIVER_CPU_SIZING_MODE value: "observe" - name: RECEIVER_MEMORY_QOS diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml deleted file mode 100644 index ac8a37c..0000000 --- a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: sizer-oidc-client - labels: - app: sizer-receiver -type: Opaque -stringData: - client-secret: "73eda9068bd00dfe67d29f087b5540cb1cd82cc1dd2ac0f838558ac8bbcfcb3a" diff --git a/otc/dev.t09.de/stacks/core/dex.yaml b/otc/dev.t09.de/stacks/core/dex.yaml index b67aa7d..5da98f5 100644 --- a/otc/dev.t09.de/stacks/core/dex.yaml +++ b/otc/dev.t09.de/stacks/core/dex.yaml @@ -27,6 +27,3 @@ spec: - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances targetRevision: HEAD ref: values - - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances - targetRevision: HEAD - path: "otc/dev.t09.de/stacks/core/dex/manifests" diff --git a/otc/dev.t09.de/stacks/core/dex/manifests/dex-sizer-client.yaml b/otc/dev.t09.de/stacks/core/dex/manifests/dex-sizer-client.yaml deleted file mode 100644 index 884df64..0000000 --- a/otc/dev.t09.de/stacks/core/dex/manifests/dex-sizer-client.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: dex-sizer-client - namespace: dex -type: Opaque -stringData: - clientSecret: "73eda9068bd00dfe67d29f087b5540cb1cd82cc1dd2ac0f838558ac8bbcfcb3a" diff --git a/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml b/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml index f2e3fb2..a8a173e 100644 --- a/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml +++ b/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml @@ -1,3 +1,4 @@ + # We use recreate to make sure only one instance with one version is running, because Forgejo might break or data gets inconsistant. strategy: type: Recreate @@ -173,8 +174,10 @@ service: image: pullPolicy: "IfNotPresent" - # DB has v15a/v15b migrations from workflow-webhook build. - # Using that image until a proper v15+ EDP release is cut. - fullOverride: edp.buildth.ing/devfw-cicd/edp-forgejo:workflow-webhook-20260305 + # Overrides the image tag whose default is the chart appVersion. + #tag: "8.0.3" + # Adds -rootless suffix to image name + # rootless: true + fullOverride: edp.buildth.ing/devfw-cicd/edp-forgejo:14.0.2-edp1-rootless forgejo: {} diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index bce1f62..827e495 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,9 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - # NOTE: v0.1.7-forgejo-23 has exec format error (wrong arch build) - # Rolled back to -22 until -23 is rebuilt for amd64 - tag: v0.1.7-forgejo-22 + tag: v0.1.7-forgejo-23 providerConfig: edgeConnect: diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/backup-alerts.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/backup-alerts.yaml new file mode 100644 index 0000000..259a2bf --- /dev/null +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/backup-alerts.yaml @@ -0,0 +1,78 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: backup-alerts + namespace: observability +spec: + groups: + - name: backup-schedule-staleness + rules: + - alert: BackupCronJobNotScheduled + expr: | + time() - kube_cronjob_status_last_schedule_time{cronjob=~"forgejo-s3-backup|secrets-backup", namespace="gitea"} + > 26 * 3600 + for: 5m + labels: + severity: critical + cronjob: "{{ $labels.cronjob }}" + annotations: + value: "{{ $value | humanizeDuration }}" + description: >- + CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} has not been + scheduled for over 26 hours in cluster {{ $labels.cluster_environment }}. + Last schedule was {{ $value | humanizeDuration }} ago. + summary: "Backup CronJob {{ $labels.cronjob }} is stale" + + - alert: BackupCronJobNeverScheduled + expr: | + kube_cronjob_status_last_schedule_time{cronjob=~"forgejo-s3-backup|secrets-backup", namespace="gitea"} + == 0 + for: 30m + labels: + severity: critical + cronjob: "{{ $labels.cronjob }}" + annotations: + description: >- + CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} has never been + scheduled in cluster {{ $labels.cluster_environment }}. + summary: "Backup CronJob {{ $labels.cronjob }} never ran" + + - name: backup-job-failures + rules: + - alert: BackupJobFailed + expr: | + max by(cluster_environment, namespace, job_name) ( + kube_job_status_failed{job_name=~"forgejo-s3-backup-.*|secrets-backup-.*", namespace="gitea"} + ) > 0 + for: 30s + labels: + severity: critical + job_name: "{{ $labels.job_name }}" + annotations: + value: "{{ $value }}" + description: >- + Backup job {{ $labels.namespace }}/{{ $labels.job_name }} has + {{ $value }} failed pod(s) in cluster {{ $labels.cluster_environment }}. + summary: "Backup job {{ $labels.job_name }} failed" + + - name: backup-job-duration + rules: + - alert: BackupJobTooSlow + expr: | + ( + time() - kube_job_status_start_time{job_name=~"forgejo-s3-backup-.*|secrets-backup-.*", namespace="gitea"} + ) > 300 + and + kube_job_status_active{job_name=~"forgejo-s3-backup-.*|secrets-backup-.*", namespace="gitea"} > 0 + for: 1m + labels: + severity: major + job_name: "{{ $labels.job_name }}" + annotations: + value: "{{ $value | humanizeDuration }}" + description: >- + Backup job {{ $labels.namespace }}/{{ $labels.job_name }} has been + running for {{ $value | humanizeDuration }} (threshold: 5m) + in cluster {{ $labels.cluster_environment }}. This may indicate a + hung process or connectivity issue. + summary: "Backup job {{ $labels.job_name }} running too long" From 900c1f6c806b1177c16c6b3e35e57da639f4baf5 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 12 Jun 2026 10:10:50 +0200 Subject: [PATCH 085/114] =?UTF-8?q?fix(dev):=20=F0=9F=90=9B=20revert=20aut?= =?UTF-8?q?omated-upload=20damage=20=E2=80=94=20restore=20working=20image?= =?UTF-8?q?=20pins=20+=20OIDC=20secrets?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automated upload (95deeef) overwrote 5 manually-pinned values: - forgejo-server: restore workflow-webhook-20260305 (DB has v15a/v15b migrations; rolling back to 14.0.2-edp1-rootless WILL break the DB) - garm: restore v0.1.7-forgejo-22 (v0.1.7-forgejo-23 has exec format error — wrong arch build, crashes on OTC CCE amd64 nodes) - sizer-receiver/secret.yaml: re-add sizer-oidc-client secret (deleted by upload; causes OIDC auth failure on every sizer-receiver login) - dex/manifests/dex-sizer-client.yaml: re-add (deleted by upload; dex cannot resolve sizer OIDC client without this secret) - dex.yaml: restore manifests source block (removed by upload; without it ArgoCD never deploys the dex/manifests/ directory) backup-alerts.yaml (new VMRule from automated upload) is kept as-is. --- .../stacks/ci-sizer/sizer-receiver/secret.yaml | 9 +++++++++ otc/dev.t09.de/stacks/core/dex.yaml | 3 +++ .../stacks/core/dex/manifests/dex-sizer-client.yaml | 8 ++++++++ otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml | 9 ++++----- otc/dev.t09.de/stacks/garm/garm/values.yaml | 5 ++++- 5 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml create mode 100644 otc/dev.t09.de/stacks/core/dex/manifests/dex-sizer-client.yaml diff --git a/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml new file mode 100644 index 0000000..ac8a37c --- /dev/null +++ b/otc/dev.t09.de/stacks/ci-sizer/sizer-receiver/secret.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Secret +metadata: + name: sizer-oidc-client + labels: + app: sizer-receiver +type: Opaque +stringData: + client-secret: "73eda9068bd00dfe67d29f087b5540cb1cd82cc1dd2ac0f838558ac8bbcfcb3a" diff --git a/otc/dev.t09.de/stacks/core/dex.yaml b/otc/dev.t09.de/stacks/core/dex.yaml index 5da98f5..b67aa7d 100644 --- a/otc/dev.t09.de/stacks/core/dex.yaml +++ b/otc/dev.t09.de/stacks/core/dex.yaml @@ -27,3 +27,6 @@ spec: - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances targetRevision: HEAD ref: values + - repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances + targetRevision: HEAD + path: "otc/dev.t09.de/stacks/core/dex/manifests" diff --git a/otc/dev.t09.de/stacks/core/dex/manifests/dex-sizer-client.yaml b/otc/dev.t09.de/stacks/core/dex/manifests/dex-sizer-client.yaml new file mode 100644 index 0000000..884df64 --- /dev/null +++ b/otc/dev.t09.de/stacks/core/dex/manifests/dex-sizer-client.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: dex-sizer-client + namespace: dex +type: Opaque +stringData: + clientSecret: "73eda9068bd00dfe67d29f087b5540cb1cd82cc1dd2ac0f838558ac8bbcfcb3a" diff --git a/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml b/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml index a8a173e..bf6a0fd 100644 --- a/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml +++ b/otc/dev.t09.de/stacks/forgejo/forgejo-server/values.yaml @@ -174,10 +174,9 @@ service: image: pullPolicy: "IfNotPresent" - # Overrides the image tag whose default is the chart appVersion. - #tag: "8.0.3" - # Adds -rootless suffix to image name - # rootless: true - fullOverride: edp.buildth.ing/devfw-cicd/edp-forgejo:14.0.2-edp1-rootless + # DB has v15a/v15b migrations from workflow-webhook build. + # Using that image until a proper v15+ EDP release is cut. + # DO NOT revert — automated upload will break the DB schema. + fullOverride: edp.buildth.ing/devfw-cicd/edp-forgejo:workflow-webhook-20260305 forgejo: {} diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 827e495..7bce701 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,7 +26,10 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - tag: v0.1.7-forgejo-23 + # NOTE: v0.1.7-forgejo-23 has exec format error (wrong arch build). + # Rolled back to -22 until -23 is rebuilt for amd64. + # DO NOT bump — automated upload will restore wrong arch. + tag: v0.1.7-forgejo-22 providerConfig: edgeConnect: From 8939b4f32b127bfb00fc199d51d5b2d963fd13a6 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 12 Jun 2026 13:12:04 +0200 Subject: [PATCH 086/114] =?UTF-8?q?fix(secrets-backup):=20=F0=9F=94=84=20s?= =?UTF-8?q?ync=20simplified=20manifest=20from=20template?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove client-side openssl encryption. OBS SSE-KMS handles encryption at rest. Updated: no apk add openssl, no openssl enc step, no secrets-backup-config Secret, upload .tar.gz directly. Image tag bumped to 1.0.1 (built without openssl). Ref: IPCEICIS-9317 --- .../manifests/secrets-backup-cronjob.yaml | 35 +++---------------- 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/otc/dev.t09.de/stacks/core/secrets-backup/manifests/secrets-backup-cronjob.yaml b/otc/dev.t09.de/stacks/core/secrets-backup/manifests/secrets-backup-cronjob.yaml index aafcf84..5ea260d 100644 --- a/otc/dev.t09.de/stacks/core/secrets-backup/manifests/secrets-backup-cronjob.yaml +++ b/otc/dev.t09.de/stacks/core/secrets-backup/manifests/secrets-backup-cronjob.yaml @@ -29,17 +29,6 @@ roleRef: name: secrets-backup-reader apiGroup: rbac.authorization.k8s.io --- -apiVersion: v1 -kind: Secret -metadata: - name: secrets-backup-config - namespace: gitea -type: Opaque -stringData: - # IMPORTANT: Replace this placeholder with a strong passphrase per environment. - # This secret should be managed via external-secrets or manually set after initial deploy. - encryption-passphrase: "CHANGE-ME-SET-PER-ENVIRONMENT" ---- apiVersion: batch/v1 kind: CronJob metadata: @@ -61,7 +50,7 @@ spec: serviceAccountName: secrets-backup containers: - name: secrets-backup - image: alpine/k8s:1.32.0 + image: edp.buildth.ing/devfw-cicd/secrets-backup:1.0.1 imagePullPolicy: IfNotPresent env: - name: AWS_ACCESS_KEY_ID @@ -74,11 +63,6 @@ spec: secretKeyRef: name: forgejo-cloud-credentials key: secret-key - - name: ENCRYPTION_PASSPHRASE - valueFrom: - secretKeyRef: - name: secrets-backup-config - key: encryption-passphrase - name: SOURCE_BUCKET valueFrom: secretKeyRef: @@ -92,9 +76,6 @@ spec: - | set -euo pipefail - # Ensure openssl is available (not bundled in alpine/k8s image) - apk add --no-cache openssl --quiet - TIMESTAMP=$(date +%Y%m%d-%H%M%S) BACKUP_DIR="/tmp/secrets-backup-${TIMESTAMP}" NAMESPACES="argocd cert-manager external-secrets" @@ -110,20 +91,14 @@ spec: > "${BACKUP_DIR}/${NS}-secrets.json" done - echo "=== Encrypting backup with AES-256-CBC ===" + echo "=== Creating compressed archive ===" ARCHIVE="${BACKUP_DIR}/secrets-backup-${TIMESTAMP}.tar.gz" tar -czf "${ARCHIVE}" -C "${BACKUP_DIR}" \ $(ls "${BACKUP_DIR}"/*.json 2>/dev/null | xargs -n1 basename) - ENCRYPTED="${BACKUP_DIR}/secrets-backup-${TIMESTAMP}.tar.gz.enc" - openssl enc -aes-256-cbc -salt -pbkdf2 -iter 100000 \ - -in "${ARCHIVE}" \ - -out "${ENCRYPTED}" \ - -pass env:ENCRYPTION_PASSPHRASE - - echo "=== Uploading to OBS ===" - aws s3 cp "${ENCRYPTED}" \ - "s3://${SOURCE_BUCKET}/cluster-secrets-backup/${TIMESTAMP}/secrets-backup.tar.gz.enc" \ + echo "=== Uploading to OBS (SSE-KMS encryption at rest) ===" + aws s3 cp "${ARCHIVE}" \ + "s3://${SOURCE_BUCKET}/cluster-secrets-backup/${TIMESTAMP}/secrets-backup.tar.gz" \ --endpoint-url "https://${OBS_ENDPOINT}" echo "=== Cleanup ===" From 7949cabb293a33aedbb0d235d87f43e46ec3afc8 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 12 Jun 2026 13:41:43 +0200 Subject: [PATCH 087/114] =?UTF-8?q?fix(garm):=20=E2=AC=86=EF=B8=8F=20updat?= =?UTF-8?q?e=20to=20v0.1.7-forgejo-24=20(fresh=20multi-arch=20build)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Build completed successfully. Fixes exec format error from -23. Dropped stale NOTE warning — image is clean amd64. --- otc/dev.t09.de/stacks/garm/garm/values.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 7bce701..5baed69 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -26,10 +26,7 @@ credentials: image: repository: edp.buildth.ing/devfw-cicd/garm-forgejo - # NOTE: v0.1.7-forgejo-23 has exec format error (wrong arch build). - # Rolled back to -22 until -23 is rebuilt for amd64. - # DO NOT bump — automated upload will restore wrong arch. - tag: v0.1.7-forgejo-22 + tag: v0.1.7-forgejo-24 providerConfig: edgeConnect: From 57ee5afa62d56de3918588c967b70caad9ad3542 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Mon, 15 Jun 2026 21:05:11 +0200 Subject: [PATCH 088/114] =?UTF-8?q?feat(observability):=20=E2=9C=A8=20add?= =?UTF-8?q?=20VMServiceScrapes=20+=20migrate=20VLogs=20=E2=86=92=20VLSingl?= =?UTF-8?q?e?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Migrate VLogs CRD to VLSingle (operator.victoriametrics.com/v1beta1) - Add VMServiceScrape for Forgejo (gitea ns, port http, /metrics) - Add VMServiceScrape for ArgoCD (argocd ns, port http-metrics) - Add VMServiceScrape for GARM (garm ns, port metrics) - Add VMServiceScrape for CoreDNS (kube-system ns, k8s-app: kube-dns) Ref: IPCEICIS-4618, IPCEICIS-5066 --- .../manifests/argocd-scrape.yaml | 14 ++++++++++++++ .../manifests/coredns-scrape.yaml | 14 ++++++++++++++ .../manifests/forgejo-scrape.yaml | 15 +++++++++++++++ .../victoria-k8s-stack/manifests/garm-scrape.yaml | 14 ++++++++++++++ .../victoria-k8s-stack/manifests/vlogs.yaml | 2 +- 5 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/argocd-scrape.yaml create mode 100644 otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/coredns-scrape.yaml create mode 100644 otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/forgejo-scrape.yaml create mode 100644 otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/argocd-scrape.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/argocd-scrape.yaml new file mode 100644 index 0000000..2e9248f --- /dev/null +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/argocd-scrape.yaml @@ -0,0 +1,14 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + name: argocd + namespace: observability +spec: + namespaceSelector: + matchNames: + - argocd + selector: + matchLabels: + app.kubernetes.io/part-of: argocd + endpoints: + - port: http-metrics diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/coredns-scrape.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/coredns-scrape.yaml new file mode 100644 index 0000000..d4814dc --- /dev/null +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/coredns-scrape.yaml @@ -0,0 +1,14 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + name: coredns + namespace: observability +spec: + namespaceSelector: + matchNames: + - kube-system + selector: + matchLabels: + k8s-app: kube-dns + endpoints: + - port: metrics diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/forgejo-scrape.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/forgejo-scrape.yaml new file mode 100644 index 0000000..aecf517 --- /dev/null +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/forgejo-scrape.yaml @@ -0,0 +1,15 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + name: forgejo + namespace: observability +spec: + namespaceSelector: + matchNames: + - gitea + selector: + matchLabels: + app.kubernetes.io/name: forgejo + endpoints: + - port: http + path: /metrics diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml new file mode 100644 index 0000000..6fc8de6 --- /dev/null +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml @@ -0,0 +1,14 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + name: garm + namespace: observability +spec: + namespaceSelector: + matchNames: + - garm + selector: + matchLabels: + app.kubernetes.io/name: garm + endpoints: + - port: metrics diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml index 72e13d1..8657ac8 100644 --- a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml @@ -1,5 +1,5 @@ apiVersion: operator.victoriametrics.com/v1beta1 -kind: VLogs +kind: VLSingle metadata: name: victorialogs namespace: observability From 9ed3ff50d24e86cd8c945cd2e84306c37b731a7e Mon Sep 17 00:00:00 2001 From: Martin McCaffery Date: Wed, 17 Jun 2026 11:38:48 +0200 Subject: [PATCH 089/114] =?UTF-8?q?bump(benchmark):=20ci-sizer-collector?= =?UTF-8?q?=20sidecar=200.9.0=20=E2=86=92=200.9.7=20to=20pick=20up=20host-?= =?UTF-8?q?resolved=20kernel=5Fpeak=20+=20cgroup=5Fpath=5Fcount=20diagnost?= =?UTF-8?q?ic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- otc/benchmark.t09.de/stacks/garm/garm/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml index 8de0c9e..347f792 100644 --- a/otc/benchmark.t09.de/stacks/garm/garm/values.yaml +++ b/otc/benchmark.t09.de/stacks/garm/garm/values.yaml @@ -39,7 +39,7 @@ providerConfig: edgeConnectK8s: pendingTimeout: "5m" sizer: - sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:0.9.0 + sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:0.9.7 sidecarPushEndpoint: https://sizer.benchmark.t09.de/api/v1/metrics baseUrl: "https://sizer.benchmark.t09.de" readToken: From a52a6691a8fb36155d6d01ed72dd0fc9865528ac Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 09:51:42 +0200 Subject: [PATCH 090/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20ad?= =?UTF-8?q?d=20prune=20+=20RespectIgnoreDifferences=20to=20o12y=20syncPoli?= =?UTF-8?q?cy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix CRD bootstrap deadlock on victoria-metrics-k8s-stack ArgoCD app. Adds prune: true and RespectIgnoreDifferences=true to prevent sync failures when CRs are applied before CRDs are established. --- otc/dev.t09.de/stacks/observability/victoria-k8s-stack.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack.yaml index 3011a2f..0ff2853 100644 --- a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack.yaml +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack.yaml @@ -9,10 +9,12 @@ spec: project: default syncPolicy: automated: + prune: true selfHeal: true syncOptions: - CreateNamespace=true - ServerSideApply=true + - RespectIgnoreDifferences=true destination: name: in-cluster namespace: observability From 29c0a59734cbba01b85413e9f6772a6eaec929a1 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 09:56:19 +0200 Subject: [PATCH 091/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20ad?= =?UTF-8?q?d=20SkipDryRunOnMissingResource=20to=20o12y=20syncOptions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VLSingle CRD missing at sync time — ArgoCD pre-validates all resources before applying any, causing 'synchronization tasks not valid' on CRs whose CRDs are created by the operator in the same sync wave. SkipDryRunOnMissingResource=true bypasses dry-run for missing CRDs, unblocking the CRD bootstrap deadlock. --- otc/dev.t09.de/stacks/observability/victoria-k8s-stack.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack.yaml index 0ff2853..d7599b9 100644 --- a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack.yaml +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack.yaml @@ -15,6 +15,7 @@ spec: - CreateNamespace=true - ServerSideApply=true - RespectIgnoreDifferences=true + - SkipDryRunOnMissingResource=true destination: name: in-cluster namespace: observability From ef4a1d7ce23d12d51fbee92697318df30d32b403 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 09:58:50 +0200 Subject: [PATCH 092/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20di?= =?UTF-8?q?sable=20crds.cleanup=20hook=20in=20victoria-metrics-operator?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-upgrade cleanup hook uses bitnami/kubectl and spawns on every ArgoCD sync. Dev cluster nodes are at 99% CPU / pod limit — hook pod cannot be scheduled, blocking the entire sync indefinitely. Disabling cleanup.enabled prevents the hook Job from being created. CRD cleanup is safe to skip on a fresh bootstrap where no old CRDs exist. --- .../stacks/observability/victoria-k8s-stack/values.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/values.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/values.yaml index d407910..ca0b671 100644 --- a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/values.yaml +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/values.yaml @@ -28,10 +28,7 @@ victoria-metrics-operator: crds: plain: true cleanup: - enabled: true - image: - repository: bitnami/kubectl - pullPolicy: IfNotPresent + enabled: false # disabled: cleanup hook can't schedule on resource-constrained nodes (Insufficient cpu / Too many pods) serviceMonitor: enabled: true operator: From d83945413d5eaac583d2aee0ec5f061f0cddf257 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 10:20:13 +0200 Subject: [PATCH 093/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20ch?= =?UTF-8?q?ange=20VLSingle=20=E2=86=92=20VLogs=20in=20victorialogs=20manif?= =?UTF-8?q?est?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Chart 0.48.1 / operator v0.58.0 uses VLogs CRD for VictoriaLogs, not VLSingle. The VLSingle kind was introduced in a newer operator version and is not registered in this chart release. Changing to VLogs which has identical spec fields (retentionPeriod, removePvcAfterDelete, storage, storageMetadata, resources all supported). --- .../observability/victoria-k8s-stack/manifests/vlogs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml index 8657ac8..72e13d1 100644 --- a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/vlogs.yaml @@ -1,5 +1,5 @@ apiVersion: operator.victoriametrics.com/v1beta1 -kind: VLSingle +kind: VLogs metadata: name: victorialogs namespace: observability From 369961a940b84298a8772fda64e0ba2f6868c76d Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 10:44:25 +0200 Subject: [PATCH 094/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20en?= =?UTF-8?q?able=20vmagent,=20fix=20grafana=20auth,=20disable=20vmauth=20on?= =?UTF-8?q?=20dev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Enable VMAgent (was disabled → no metrics scraped) - Remove disable_login from Grafana config; add security block so operator can auth via API - Disable VMAuth (invalid trailing-dot hostname o12y.observability.; not needed on dev) --- .../observability/grafana-operator/manifests/grafana.yaml | 4 +++- .../stacks/observability/victoria-k8s-stack/values.yaml | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/otc/dev.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml b/otc/dev.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml index 1e8b038..17d6046 100644 --- a/otc/dev.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml +++ b/otc/dev.t09.de/stacks/observability/grafana-operator/manifests/grafana.yaml @@ -35,8 +35,10 @@ spec: server: root_url: "https://grafana.dev.t09.de" auth: - disable_login: "true" disable_login_form: "true" + security: + admin_user: admin + admin_password: admin auth.generic_oauth: enabled: "true" name: Forgejo diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/values.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/values.yaml index ca0b671..9751113 100644 --- a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/values.yaml +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/values.yaml @@ -673,7 +673,7 @@ vmalert: vmauth: # -- Enable VMAuth CR - enabled: true + enabled: false # -- VMAuth annotations annotations: {} # -- (object) Full spec for VMAuth CRD. Allowed values described [here](https://docs.victoriametrics.com/operator/api#vmauthspec) @@ -696,7 +696,7 @@ vmauth: vmagent: # -- Create VMAgent CR - enabled: false + enabled: true # -- VMAgent annotations annotations: {} # -- Remote write configuration of VMAgent, allowed parameters defined in a [spec](https://docs.victoriametrics.com/operator/api#vmagentremotewritespec) From 59eed97263992d1c705a3d9e72251199794eba47 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 11:41:20 +0200 Subject: [PATCH 095/114] =?UTF-8?q?fix(observability-client):=20?= =?UTF-8?q?=F0=9F=90=9B=20fix=20remote=20write=20URL=20and=20add=20missing?= =?UTF-8?q?=20manifests=20dir?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix broken remote write URL: o12y.observability. → o12y.observability.buildth.ing - Create manifests/ directory with .gitkeep for ArgoCD source path --- .../observability-client/vm-client-stack/manifests/.gitkeep | 0 .../stacks/observability-client/vm-client-stack/values.yaml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/.gitkeep diff --git a/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/.gitkeep b/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/otc/dev.t09.de/stacks/observability-client/vm-client-stack/values.yaml b/otc/dev.t09.de/stacks/observability-client/vm-client-stack/values.yaml index f85a786..9224a46 100644 --- a/otc/dev.t09.de/stacks/observability-client/vm-client-stack/values.yaml +++ b/otc/dev.t09.de/stacks/observability-client/vm-client-stack/values.yaml @@ -778,7 +778,7 @@ vmagent: # -- Remote write configuration of VMAgent, allowed parameters defined in a [spec](https://docs.victoriametrics.com/operator/api#vmagentremotewritespec) additionalRemoteWrites: # [] - - url: https://o12y.observability./api/v1/write + - url: https://o12y.observability.buildth.ing/api/v1/write basicAuth: username: name: simple-user-secret From 32e998df5b1b4b937dd72c31e985beebaf9bad85 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 12:35:18 +0200 Subject: [PATCH 096/114] =?UTF-8?q?fix(forgejo):=20=E2=8F=B1=EF=B8=8F=20in?= =?UTF-8?q?crease=20s3-backup=20activeDeadlineSeconds=201350=E2=86=927200?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous 22.5m deadline caused DeadlineExceeded on 2026-06-19 when rclone sync took >22m (vs 13-16s prior days). Likely triggered by significant new data in OBS bucket. 2h window accommodates large incremental syncs while BackupJobTooSlow alert still fires at 5m. --- .../forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml | 4 ++-- .../forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml | 4 ++-- .../forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml | 4 ++-- .../forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml index ed54cb0..12883a9 100644 --- a/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml +++ b/otc/benchmark.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml @@ -11,8 +11,8 @@ spec: startingDeadlineSeconds: 600 # 10 minutes jobTemplate: spec: - # 60 min until backup - 10 min start - (backoffLimit * activeDeadlineSeconds) - some time sync buffer - activeDeadlineSeconds: 1350 + # 2h window: handles large incremental syncs after repo growth or OBS slowness; BackupJobTooSlow alert fires at 5m + activeDeadlineSeconds: 7200 backoffLimit: 2 ttlSecondsAfterFinished: 259200 # template: diff --git a/otc/dev.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml b/otc/dev.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml index de14801..d313b18 100644 --- a/otc/dev.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml +++ b/otc/dev.t09.de/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml @@ -11,8 +11,8 @@ spec: startingDeadlineSeconds: 600 # 10 minutes jobTemplate: spec: - # 60 min until backup - 10 min start - (backoffLimit * activeDeadlineSeconds) - some time sync buffer - activeDeadlineSeconds: 1350 + # 2h window: handles large incremental syncs after repo growth or OBS slowness; BackupJobTooSlow alert fires at 5m + activeDeadlineSeconds: 7200 backoffLimit: 2 ttlSecondsAfterFinished: 259200 # template: diff --git a/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml b/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml index 71f1649..7226bd2 100644 --- a/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml +++ b/otc/edp.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml @@ -11,8 +11,8 @@ spec: startingDeadlineSeconds: 600 # 10 minutes jobTemplate: spec: - # 60 min until backup - 10 min start - (backoffLimit * activeDeadlineSeconds) - some time sync buffer - activeDeadlineSeconds: 1350 + # 2h window: handles large incremental syncs after repo growth or OBS slowness; BackupJobTooSlow alert fires at 5m + activeDeadlineSeconds: 7200 backoffLimit: 2 ttlSecondsAfterFinished: 259200 # template: diff --git a/otc/observability.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml b/otc/observability.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml index 842a7cc..a1caaae 100644 --- a/otc/observability.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml +++ b/otc/observability.buildth.ing/stacks/forgejo/forgejo-server/manifests/forgejo-s3-backup-cronjob.yaml @@ -11,8 +11,8 @@ spec: startingDeadlineSeconds: 600 # 10 minutes jobTemplate: spec: - # 60 min until backup - 10 min start - (backoffLimit * activeDeadlineSeconds) - some time sync buffer - activeDeadlineSeconds: 1350 + # 2h window: handles large incremental syncs after repo growth or OBS slowness; BackupJobTooSlow alert fires at 5m + activeDeadlineSeconds: 7200 backoffLimit: 2 ttlSecondsAfterFinished: 259200 # template: From 0316eefa43c725484f31a0d6248f4a3a0175c737 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 12:42:04 +0200 Subject: [PATCH 097/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20di?= =?UTF-8?q?sable=20false-positive=20control-plane=20alerts=20and=20fix=20e?= =?UTF-8?q?mpty=20cluster=5Fenvironment=20label?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hub defaultRules groups kubernetesSystemControllerManager, kubeScheduler, and kubernetesSystemScheduler used wrong key 'enabled: false' — chart expects 'create: false'. This caused KubeControllerManagerDown/KubeSchedulerDown to fire as false positives because OTC CCE managed k8s does not expose control plane for scraping. Dev local vmagent had empty externalLabels, so backup-alert rules evaluated by local vmalert had no cluster_environment label on kube_job_status_failed metrics. Added cluster_environment=dev to match what the vm-client-stack vmagent adds for hub shipping. --- .../stacks/observability/victoria-k8s-stack/values.yaml | 3 ++- .../stacks/observability/victoria-k8s-stack/values.yaml | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/values.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/values.yaml index 9751113..e7bffbc 100644 --- a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/values.yaml +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/values.yaml @@ -708,7 +708,8 @@ vmagent: port: "8429" selectAllByDefault: true scrapeInterval: 20s - externalLabels: {} + externalLabels: + cluster_environment: "dev" # For multi-cluster setups it is useful to use "cluster" label to identify the metrics source. # For example: # cluster: cluster-name diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml index 5bb9361..c535829 100644 --- a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/values.yaml @@ -201,13 +201,13 @@ defaultRules: enabled: true rules: {} kubernetesSystemControllerManager: - enabled: false + create: false rules: {} kubeScheduler: - enabled: false + create: false rules: {} kubernetesSystemScheduler: - enabled: false + create: false rules: {} kubeStateMetrics: enabled: true From c2528f6f693ee3bb38f2a2ae322618f151589910 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 12:47:34 +0200 Subject: [PATCH 098/114] =?UTF-8?q?feat(observability):=20=E2=9C=A8=20add?= =?UTF-8?q?=20platform=20grafana=20dashboard=20CRs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add forgejo.yaml: Forgejo app dashboard (grafana.com ID 17802) - Add argocd-operational.yaml: ArgoCD operational dashboard (grafana.com ID 19993) - Add cronjob-monitoring.yaml: CronJob/backup monitoring dashboard (grafana.com ID 14279) - Add platform-overview.yaml: custom EDP Platform Overview inline dashboard (platform health, forgejo stats, resource usage, backup status rows) - Fix victoria-logs.yaml: replace broken URL with grafanaCom ID 22698 --- .../manifests/argocd-operational.yaml | 11 + .../manifests/cronjob-monitoring.yaml | 11 + .../grafana-operator/manifests/forgejo.yaml | 11 + .../manifests/platform-overview.yaml | 227 ++++++++++++++++++ .../manifests/victoria-logs.yaml | 4 +- 5 files changed, 263 insertions(+), 1 deletion(-) create mode 100644 otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml create mode 100644 otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/cronjob-monitoring.yaml create mode 100644 otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml create mode 100644 otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml new file mode 100644 index 0000000..b3fa256 --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml @@ -0,0 +1,11 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: argocd-operational +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + grafanaCom: + id: 19993 + revision: 2 diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/cronjob-monitoring.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/cronjob-monitoring.yaml new file mode 100644 index 0000000..e77eb20 --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/cronjob-monitoring.yaml @@ -0,0 +1,11 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: cronjob-monitoring +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + grafanaCom: + id: 14279 + revision: 1 diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml new file mode 100644 index 0000000..cf32e5e --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml @@ -0,0 +1,11 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: forgejo +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + grafanaCom: + id: 17802 + revision: 1 diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml new file mode 100644 index 0000000..aa8be4c --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml @@ -0,0 +1,227 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: platform-overview +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + json: | + { + "annotations": {"list": []}, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "title": "Platform Health", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": { + "defaults": { + "mappings": [{"options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}, "type": "value"}], + "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 1}, + "title": "Forgejo", + "type": "stat", + "targets": [{"expr": "sum(up{job=\"forgejo-server-http\", cluster_environment=~\"$cluster_environment\"})", "legendFormat": ""}] + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": { + "defaults": { + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 3}]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 1}, + "title": "Ingress 5xx (5m)", + "type": "stat", + "targets": [{"expr": "sum(rate(nginx_ingress_controller_requests{status=~\"5..\", cluster_environment=~\"$cluster_environment\"}[5m]))", "legendFormat": ""}] + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": { + "defaults": { + "unit": "short", + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 1}]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 1}, + "title": "Failed Jobs (24h)", + "type": "stat", + "targets": [{"expr": "sum(kube_job_status_failed{namespace=\"gitea\", cluster_environment=~\"$cluster_environment\"}) or vector(0)", "legendFormat": ""}] + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": { + "defaults": { + "unit": "percentunit", + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 0.7}, {"color": "red", "value": 0.85}]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 1}, + "title": "Cluster CPU Usage", + "type": "stat", + "targets": [{"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster_environment=~\"$cluster_environment\"}[5m]))", "legendFormat": ""}] + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": { + "defaults": { + "unit": "percentunit", + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 0.7}, {"color": "red", "value": 0.85}]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 1}, + "title": "Cluster Memory Usage", + "type": "stat", + "targets": [{"expr": "1 - sum(node_memory_MemAvailable_bytes{cluster_environment=~\"$cluster_environment\"}) / sum(node_memory_MemTotal_bytes{cluster_environment=~\"$cluster_environment\"})", "legendFormat": ""}] + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": { + "defaults": { + "unit": "percentunit", + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 0.6}, {"color": "red", "value": 0.8}]} + } + }, + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 1}, + "title": "Max PVC Usage", + "type": "stat", + "targets": [{"expr": "max(1 - kubelet_volume_stats_available_bytes{cluster_environment=~\"$cluster_environment\"} / kubelet_volume_stats_capacity_bytes{cluster_environment=~\"$cluster_environment\"})", "legendFormat": ""}] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}, + "title": "Forgejo", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 6}, + "title": "Repositories", + "type": "stat", + "targets": [{"expr": "gitea_repositories{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 6}, + "title": "Users", + "type": "stat", + "targets": [{"expr": "gitea_users{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 6}, + "title": "Organizations", + "type": "stat", + "targets": [{"expr": "gitea_organizations{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 6}, + "title": "Open Issues", + "type": "stat", + "targets": [{"expr": "gitea_issues_open{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 6}, + "title": "Webhooks", + "type": "stat", + "targets": [{"expr": "gitea_webhooks{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 6}, + "title": "Mirrors", + "type": "stat", + "targets": [{"expr": "gitea_mirrors{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 10}, + "title": "Resources", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": {"defaults": {"unit": "percentunit", "min": 0, "max": 1}}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 11}, + "title": "Node CPU Usage", + "type": "timeseries", + "targets": [{"expr": "1 - rate(node_cpu_seconds_total{mode=\"idle\", cluster_environment=~\"$cluster_environment\"}[5m])", "legendFormat": "{{instance}}"}] + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": {"defaults": {"unit": "percentunit", "min": 0, "max": 1}}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 11}, + "title": "PVC Usage by Claim", + "type": "timeseries", + "targets": [{"expr": "1 - (kubelet_volume_stats_available_bytes{cluster_environment=~\"$cluster_environment\"} / kubelet_volume_stats_capacity_bytes{cluster_environment=~\"$cluster_environment\"})", "legendFormat": "{{namespace}}/{{persistentvolumeclaim}}"}] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 19}, + "title": "Backups", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 86400}, {"color": "red", "value": 172800}]}}}, + "gridPos": {"h": 4, "w": 8, "x": 0, "y": 20}, + "title": "Time Since Last Backup Schedule", + "type": "stat", + "targets": [{"expr": "time() - kube_cronjob_status_last_schedule_time{cronjob=~\"forgejo-s3-backup|secrets-backup\", cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cronjob}} ({{cluster_environment}})"}] + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": {"defaults": {"unit": "s"}}, + "gridPos": {"h": 4, "w": 8, "x": 8, "y": 20}, + "title": "Backup Job Duration (Last 7d)", + "type": "timeseries", + "targets": [{"expr": "kube_job_status_completion_time{job_name=~\"forgejo-s3-backup.*|secrets-backup.*\", cluster_environment=~\"$cluster_environment\"} - kube_job_status_start_time{job_name=~\"forgejo-s3-backup.*|secrets-backup.*\", cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{job_name}}"}], + "options": {"legend": {"displayMode": "table"}} + }, + { + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 1}]}}}, + "gridPos": {"h": 4, "w": 8, "x": 16, "y": 20}, + "title": "Failed Backup Jobs (Active)", + "type": "stat", + "targets": [{"expr": "sum by(cluster_environment, job_name) (kube_job_status_failed{job_name=~\"forgejo-s3-backup.*|secrets-backup.*\", cluster_environment=~\"$cluster_environment\"})", "legendFormat": "{{job_name}} ({{cluster_environment}})"}] + } + ], + "schemaVersion": 39, + "tags": ["edp", "platform", "overview"], + "templating": { + "list": [ + { + "current": {"selected": true, "text": "All", "value": "$__all"}, + "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "definition": "label_values(up, cluster_environment)", + "includeAll": true, + "multi": true, + "name": "cluster_environment", + "query": "label_values(up, cluster_environment)", + "type": "query" + } + ] + }, + "time": {"from": "now-6h", "to": "now"}, + "title": "EDP Platform Overview", + "uid": "edp-platform-overview" + } diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/victoria-logs.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/victoria-logs.yaml index 4018fbd..819dec7 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/victoria-logs.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/victoria-logs.yaml @@ -6,4 +6,6 @@ spec: instanceSelector: matchLabels: dashboards: "grafana" - url: "https://raw.githubusercontent.com/VictoriaMetrics/VictoriaMetrics/refs/heads/master/dashboards/vm/victorialogs.json" + grafanaCom: + id: 22698 + revision: 1 From 949529eb5c3d693306e9916c15724195fc91040a Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 12:50:20 +0200 Subject: [PATCH 099/114] =?UTF-8?q?feat(observability):=20=E2=9C=A8=20add?= =?UTF-8?q?=20cluster=5Fenvironment=20dropdown=20to=20Forgejo=20and=20plat?= =?UTF-8?q?form-overview=20dashboards?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace grafanaCom import (17802) with custom inline Forgejo dashboard containing cluster_environment query variable (refresh=2, label=Environment) - Add label, refresh=2, sort=1 to platform-overview cluster_environment variable - ArgoCD (19993) and CronJob (14279) remain grafanaCom imports (acceptable) --- .../grafana-operator/manifests/forgejo.yaml | 179 +++++++++++++++++- .../manifests/platform-overview.yaml | 3 + 2 files changed, 179 insertions(+), 3 deletions(-) diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml index cf32e5e..bf566a5 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml @@ -6,6 +6,179 @@ spec: instanceSelector: matchLabels: dashboards: "grafana" - grafanaCom: - id: 17802 - revision: 1 + json: | + { + "annotations": {"list": []}, + "editable": true, + "graphTooltip": 1, + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "title": "Forgejo Health", + "type": "row" + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"mappings": [{"options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}, "type": "value"}], "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}}}, + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 1}, + "title": "Status", + "type": "stat", + "targets": [{"expr": "up{job=\"forgejo-server-http\", cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 1}, + "title": "Version", + "type": "stat", + "targets": [{"expr": "gitea_build_info{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{version}}"}], + "options": {"reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "name"} + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 1}, + "title": "Repositories", + "type": "stat", + "targets": [{"expr": "gitea_repositories{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 1}, + "title": "Users", + "type": "stat", + "targets": [{"expr": "gitea_users{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 1}, + "title": "Organizations", + "type": "stat", + "targets": [{"expr": "gitea_organizations{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 1}, + "title": "Teams", + "type": "stat", + "targets": [{"expr": "gitea_teams{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}, + "title": "Activity", + "type": "row" + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 6}, + "title": "Open Issues", + "type": "stat", + "targets": [{"expr": "gitea_issues_open{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 6, "x": 6, "y": 6}, + "title": "Closed Issues", + "type": "stat", + "targets": [{"expr": "gitea_issues_closed{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 6, "x": 12, "y": 6}, + "title": "Webhooks", + "type": "stat", + "targets": [{"expr": "gitea_webhooks{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 6, "x": 18, "y": 6}, + "title": "Hook Tasks", + "type": "stat", + "targets": [{"expr": "gitea_hooktasks{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 10}, + "title": "Content & Auth", + "type": "row" + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 11}, + "title": "Stars", + "type": "stat", + "targets": [{"expr": "gitea_stars{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 11}, + "title": "Watches", + "type": "stat", + "targets": [{"expr": "gitea_watches{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 11}, + "title": "Releases", + "type": "stat", + "targets": [{"expr": "gitea_releases{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 11}, + "title": "Mirrors", + "type": "stat", + "targets": [{"expr": "gitea_mirrors{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 11}, + "title": "Public Keys", + "type": "stat", + "targets": [{"expr": "gitea_publickeys{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 11}, + "title": "OAuth Apps", + "type": "stat", + "targets": [{"expr": "gitea_oauths{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + } + ], + "schemaVersion": 39, + "tags": ["edp", "forgejo", "gitea"], + "templating": { + "list": [ + { + "current": {"selected": true, "text": "All", "value": "$__all"}, + "datasource": {"type": "prometheus"}, + "definition": "label_values(gitea_repositories, cluster_environment)", + "includeAll": true, + "multi": true, + "name": "cluster_environment", + "label": "Environment", + "query": "label_values(gitea_repositories, cluster_environment)", + "refresh": 2, + "type": "query" + } + ] + }, + "time": {"from": "now-6h", "to": "now"}, + "title": "Forgejo", + "uid": "edp-forgejo" + } diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml index aa8be4c..d4102fb 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml @@ -216,7 +216,10 @@ spec: "includeAll": true, "multi": true, "name": "cluster_environment", + "label": "Environment", "query": "label_values(up, cluster_environment)", + "refresh": 2, + "sort": 1, "type": "query" } ] From 91db8038e68e1dbdfb8fab012863777d9b055106 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 13:02:35 +0200 Subject: [PATCH 100/114] =?UTF-8?q?feat(observability):=20=E2=9C=A8=20cust?= =?UTF-8?q?om=20ArgoCD=20dashboard=20with=20cluster=5Fenvironment=20filter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../manifests/garm-scrape.yaml | 2 +- .../manifests/argocd-operational.yaml | 133 +++++++++++++++++- .../manifests/garm-scrape.yaml | 2 +- 3 files changed, 132 insertions(+), 5 deletions(-) diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml index 6fc8de6..4b5807e 100644 --- a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml @@ -11,4 +11,4 @@ spec: matchLabels: app.kubernetes.io/name: garm endpoints: - - port: metrics + - port: http diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml index b3fa256..f37cf03 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml @@ -6,6 +6,133 @@ spec: instanceSelector: matchLabels: dashboards: "grafana" - grafanaCom: - id: 19993 - revision: 2 + json: | + { + "annotations": {"list": []}, + "editable": true, + "graphTooltip": 1, + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "title": "Application Status", + "type": "row" + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}}, + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 1}, + "title": "Total Apps", + "type": "stat", + "targets": [{"expr": "count(argocd_app_info{cluster_environment=~\"$cluster_environment\"})", "legendFormat": ""}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}}, + "gridPos": {"h": 4, "w": 4, "x": 4, "y": 1}, + "title": "Healthy", + "type": "stat", + "targets": [{"expr": "count(argocd_app_info{cluster_environment=~\"$cluster_environment\", health_status=\"Healthy\"})", "legendFormat": ""}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}]}}}, + "gridPos": {"h": 4, "w": 4, "x": 8, "y": 1}, + "title": "Degraded", + "type": "stat", + "targets": [{"expr": "count(argocd_app_info{cluster_environment=~\"$cluster_environment\", health_status=\"Degraded\"}) or vector(0)", "legendFormat": ""}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}}, + "gridPos": {"h": 4, "w": 4, "x": 12, "y": 1}, + "title": "Synced", + "type": "stat", + "targets": [{"expr": "count(argocd_app_info{cluster_environment=~\"$cluster_environment\", sync_status=\"Synced\"})", "legendFormat": ""}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "yellow", "value": null}]}}}, + "gridPos": {"h": 4, "w": 4, "x": 16, "y": 1}, + "title": "OutOfSync", + "type": "stat", + "targets": [{"expr": "count(argocd_app_info{cluster_environment=~\"$cluster_environment\", sync_status=\"OutOfSync\"}) or vector(0)", "legendFormat": ""}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "orange", "value": null}]}}}, + "gridPos": {"h": 4, "w": 4, "x": 20, "y": 1}, + "title": "Progressing", + "type": "stat", + "targets": [{"expr": "count(argocd_app_info{cluster_environment=~\"$cluster_environment\", health_status=\"Progressing\"}) or vector(0)", "legendFormat": ""}] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 5}, + "title": "Application Details", + "type": "row" + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": { + "defaults": {"custom": {"filterable": true}}, + "overrides": [ + {"matcher": {"id": "byName", "options": "Health"}, "properties": [{"id": "custom.cellOptions", "value": {"type": "color-text"}}, {"id": "mappings", "value": [{"options": {"Healthy": {"color": "green", "text": "Healthy"}, "Degraded": {"color": "red", "text": "Degraded"}, "Progressing": {"color": "yellow", "text": "Progressing"}, "Missing": {"color": "purple", "text": "Missing"}}, "type": "value"}]}]}, + {"matcher": {"id": "byName", "options": "Sync"}, "properties": [{"id": "custom.cellOptions", "value": {"type": "color-text"}}, {"id": "mappings", "value": [{"options": {"Synced": {"color": "green", "text": "Synced"}, "OutOfSync": {"color": "orange", "text": "OutOfSync"}}, "type": "value"}]}]} + ] + }, + "gridPos": {"h": 12, "w": 24, "x": 0, "y": 6}, + "title": "All Applications", + "type": "table", + "targets": [{"expr": "argocd_app_info{cluster_environment=~\"$cluster_environment\"}", "format": "table", "instant": true, "legendFormat": ""}], + "transformations": [ + {"id": "filterFieldsByName", "options": {"include": {"names": ["cluster_environment", "name", "dest_namespace", "health_status", "sync_status", "repo"]}}}, + {"id": "organize", "options": {"renameByName": {"cluster_environment": "Environment", "name": "Application", "dest_namespace": "Namespace", "health_status": "Health", "sync_status": "Sync", "repo": "Repository"}}} + ] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 18}, + "title": "Sync Activity", + "type": "row" + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "ops"}}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 19}, + "title": "Sync Operations (rate)", + "type": "timeseries", + "targets": [{"expr": "sum(rate(argocd_app_sync_total{cluster_environment=~\"$cluster_environment\"}[5m])) by (name, phase)", "legendFormat": "{{name}} ({{phase}})"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "ops"}}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 19}, + "title": "Reconciliation Rate", + "type": "timeseries", + "targets": [{"expr": "sum(rate(argocd_app_reconcile_count{cluster_environment=~\"$cluster_environment\"}[5m])) by (namespace)", "legendFormat": "{{namespace}}"}] + } + ], + "schemaVersion": 39, + "tags": ["edp", "argocd", "gitops"], + "templating": { + "list": [ + { + "current": {"selected": true, "text": "All", "value": "$__all"}, + "datasource": {"type": "prometheus"}, + "definition": "label_values(argocd_app_info, cluster_environment)", + "includeAll": true, + "multi": true, + "name": "cluster_environment", + "label": "Environment", + "query": "label_values(argocd_app_info, cluster_environment)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": {"from": "now-6h", "to": "now"}, + "title": "ArgoCD Operations", + "uid": "edp-argocd-ops" + } diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml index a4c6119..f73afa8 100644 --- a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/garm-scrape.yaml @@ -10,4 +10,4 @@ spec: matchLabels: app.kubernetes.io/name: garm endpoints: - - port: metrics + - port: http From 6ea1e798d2592c1b627636808928c3628f3ba389 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 13:06:19 +0200 Subject: [PATCH 101/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20ad?= =?UTF-8?q?d=20missing=20manifests=20to=20instance=20stacks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - backup-alerts.yaml → observability.buildth.ing victoria-k8s-stack - forgejo-scrape.yaml → dev.t09.de vm-client-stack --- .../manifests/forgejo-scrape.yaml | 15 ++++ .../manifests/backup-alerts.yaml | 78 +++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/forgejo-scrape.yaml create mode 100644 otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/backup-alerts.yaml diff --git a/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/forgejo-scrape.yaml b/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/forgejo-scrape.yaml new file mode 100644 index 0000000..aecf517 --- /dev/null +++ b/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/forgejo-scrape.yaml @@ -0,0 +1,15 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + name: forgejo + namespace: observability +spec: + namespaceSelector: + matchNames: + - gitea + selector: + matchLabels: + app.kubernetes.io/name: forgejo + endpoints: + - port: http + path: /metrics diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/backup-alerts.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/backup-alerts.yaml new file mode 100644 index 0000000..259a2bf --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/backup-alerts.yaml @@ -0,0 +1,78 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMRule +metadata: + name: backup-alerts + namespace: observability +spec: + groups: + - name: backup-schedule-staleness + rules: + - alert: BackupCronJobNotScheduled + expr: | + time() - kube_cronjob_status_last_schedule_time{cronjob=~"forgejo-s3-backup|secrets-backup", namespace="gitea"} + > 26 * 3600 + for: 5m + labels: + severity: critical + cronjob: "{{ $labels.cronjob }}" + annotations: + value: "{{ $value | humanizeDuration }}" + description: >- + CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} has not been + scheduled for over 26 hours in cluster {{ $labels.cluster_environment }}. + Last schedule was {{ $value | humanizeDuration }} ago. + summary: "Backup CronJob {{ $labels.cronjob }} is stale" + + - alert: BackupCronJobNeverScheduled + expr: | + kube_cronjob_status_last_schedule_time{cronjob=~"forgejo-s3-backup|secrets-backup", namespace="gitea"} + == 0 + for: 30m + labels: + severity: critical + cronjob: "{{ $labels.cronjob }}" + annotations: + description: >- + CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} has never been + scheduled in cluster {{ $labels.cluster_environment }}. + summary: "Backup CronJob {{ $labels.cronjob }} never ran" + + - name: backup-job-failures + rules: + - alert: BackupJobFailed + expr: | + max by(cluster_environment, namespace, job_name) ( + kube_job_status_failed{job_name=~"forgejo-s3-backup-.*|secrets-backup-.*", namespace="gitea"} + ) > 0 + for: 30s + labels: + severity: critical + job_name: "{{ $labels.job_name }}" + annotations: + value: "{{ $value }}" + description: >- + Backup job {{ $labels.namespace }}/{{ $labels.job_name }} has + {{ $value }} failed pod(s) in cluster {{ $labels.cluster_environment }}. + summary: "Backup job {{ $labels.job_name }} failed" + + - name: backup-job-duration + rules: + - alert: BackupJobTooSlow + expr: | + ( + time() - kube_job_status_start_time{job_name=~"forgejo-s3-backup-.*|secrets-backup-.*", namespace="gitea"} + ) > 300 + and + kube_job_status_active{job_name=~"forgejo-s3-backup-.*|secrets-backup-.*", namespace="gitea"} > 0 + for: 1m + labels: + severity: major + job_name: "{{ $labels.job_name }}" + annotations: + value: "{{ $value | humanizeDuration }}" + description: >- + Backup job {{ $labels.namespace }}/{{ $labels.job_name }} has been + running for {{ $value | humanizeDuration }} (threshold: 5m) + in cluster {{ $labels.cluster_environment }}. This may indicate a + hung process or connectivity issue. + summary: "Backup job {{ $labels.job_name }} running too long" From 076b2a16c9129f5f53bdb139f9d1abeb78e99143 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 13:11:32 +0200 Subject: [PATCH 102/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20fi?= =?UTF-8?q?x=20datasource=20UIDs,=20replace=20cronjob=20dashboard,=20add?= =?UTF-8?q?=20GARM?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove all ${DS_VICTORIAMETRICS} uid refs from platform-overview; use type-only datasource so grafana-operator resolves default prometheus DS - Replace grafanaCom id:14279 cronjob dashboard with inline custom version supporting cluster_environment variable (dev/edp/observability) - Add new GARM runners dashboard (edp-garm) ready for when GARM metrics are scraped; uses or vector(0) guards so panels show 0 not empty Note: cluster_environment values confirmed as dev/edp/observability (no benchmark). GARM metrics not yet present in VictoriaMetrics (0 series found). --- .../manifests/cronjob-monitoring.yaml | 97 ++++++++++++++- .../grafana-operator/manifests/garm.yaml | 116 ++++++++++++++++++ .../manifests/platform-overview.yaml | 36 +++--- 3 files changed, 228 insertions(+), 21 deletions(-) create mode 100644 otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/garm.yaml diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/cronjob-monitoring.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/cronjob-monitoring.yaml index e77eb20..5b5eeac 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/cronjob-monitoring.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/cronjob-monitoring.yaml @@ -6,6 +6,97 @@ spec: instanceSelector: matchLabels: dashboards: "grafana" - grafanaCom: - id: 14279 - revision: 1 + json: | + { + "annotations": {"list": []}, + "editable": true, + "graphTooltip": 1, + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "title": "Backup Job Status", + "type": "row" + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 86400}, {"color": "red", "value": 172800}]}}}, + "gridPos": {"h": 5, "w": 12, "x": 0, "y": 1}, + "title": "Time Since Last Schedule", + "type": "stat", + "targets": [{"expr": "time() - kube_cronjob_status_last_schedule_time{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cronjob}} ({{cluster_environment}})"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 1}]}}}, + "gridPos": {"h": 5, "w": 12, "x": 12, "y": 1}, + "title": "Failed Jobs (Active)", + "type": "stat", + "targets": [{"expr": "sum by(cluster_environment, job_name) (kube_job_status_failed{cluster_environment=~\"$cluster_environment\"}) > 0", "legendFormat": "{{job_name}} ({{cluster_environment}})"}] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 6}, + "title": "CronJob Overview", + "type": "row" + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"custom": {"filterable": true}}, "overrides": [{"matcher": {"id": "byName", "options": "Suspended"}, "properties": [{"id": "mappings", "value": [{"options": {"0": {"text": "No", "color": "green"}, "1": {"text": "YES", "color": "red"}}, "type": "value"}]}]}]}, + "gridPos": {"h": 8, "w": 24, "x": 0, "y": 7}, + "title": "All CronJobs", + "type": "table", + "targets": [ + {"expr": "kube_cronjob_info{cluster_environment=~\"$cluster_environment\"}", "format": "table", "instant": true, "refId": "A"} + ], + "transformations": [ + {"id": "filterFieldsByName", "options": {"include": {"names": ["cluster_environment", "cronjob", "namespace", "schedule"]}}}, + {"id": "organize", "options": {"renameByName": {"cluster_environment": "Environment", "cronjob": "CronJob", "namespace": "Namespace", "schedule": "Schedule"}}} + ] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 15}, + "title": "Job History", + "type": "row" + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16}, + "title": "Job Completions (24h)", + "type": "timeseries", + "targets": [{"expr": "sum(kube_job_status_succeeded{cluster_environment=~\"$cluster_environment\"}) by (job_name, cluster_environment)", "legendFormat": "{{job_name}} ({{cluster_environment}})"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "color": {"mode": "palette-classic"}}}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 16}, + "title": "Job Failures (24h)", + "type": "timeseries", + "targets": [{"expr": "sum(kube_job_status_failed{cluster_environment=~\"$cluster_environment\"}) by (job_name, cluster_environment)", "legendFormat": "{{job_name}} ({{cluster_environment}})"}] + } + ], + "schemaVersion": 39, + "tags": ["edp", "backup", "cronjob"], + "templating": { + "list": [ + { + "current": {"selected": true, "text": "All", "value": "$__all"}, + "datasource": {"type": "prometheus"}, + "definition": "label_values(kube_cronjob_info, cluster_environment)", + "includeAll": true, + "multi": true, + "name": "cluster_environment", + "label": "Environment", + "query": "label_values(kube_cronjob_info, cluster_environment)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": {"from": "now-24h", "to": "now"}, + "title": "CronJob & Backup Monitoring", + "uid": "edp-cronjobs" + } diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/garm.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/garm.yaml new file mode 100644 index 0000000..9e01a51 --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/garm.yaml @@ -0,0 +1,116 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: garm +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + json: | + { + "annotations": {"list": []}, + "editable": true, + "graphTooltip": 1, + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "title": "GARM Runner Status", + "type": "row" + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}}, + "gridPos": {"h": 5, "w": 6, "x": 0, "y": 1}, + "title": "Total Runners", + "type": "stat", + "targets": [{"expr": "count(garm_runner_status{cluster_environment=~\"$cluster_environment\"}) or vector(0)", "legendFormat": ""}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}}, + "gridPos": {"h": 5, "w": 6, "x": 6, "y": 1}, + "title": "Idle Runners", + "type": "stat", + "targets": [{"expr": "count(garm_runner_status{cluster_environment=~\"$cluster_environment\", status=\"idle\"}) or vector(0)", "legendFormat": ""}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "yellow", "value": null}]}}}, + "gridPos": {"h": 5, "w": 6, "x": 12, "y": 1}, + "title": "Creating", + "type": "stat", + "targets": [{"expr": "count(garm_runner_status{cluster_environment=~\"$cluster_environment\", status=\"creating\"}) or vector(0)", "legendFormat": ""}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}]}}}, + "gridPos": {"h": 5, "w": 6, "x": 18, "y": 1}, + "title": "Errors", + "type": "stat", + "targets": [{"expr": "sum(rate(garm_runner_errors_total{cluster_environment=~\"$cluster_environment\"}[5m])) or vector(0)", "legendFormat": ""}] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 6}, + "title": "GitHub API Rate Limits", + "type": "row" + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "short", "min": 0}}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 7}, + "title": "Rate Limit Remaining", + "type": "timeseries", + "targets": [{"expr": "garm_github_rate_limit_remaining{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"unit": "ops"}}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 7}, + "title": "Runner Operations Rate", + "type": "timeseries", + "targets": [{"expr": "sum(rate(garm_runner_operations_total{cluster_environment=~\"$cluster_environment\"}[5m])) by (cluster_environment)", "legendFormat": "{{cluster_environment}}"}] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 15}, + "title": "Runner Details", + "type": "row" + }, + { + "datasource": {"type": "prometheus"}, + "fieldConfig": {"defaults": {"custom": {"filterable": true}}}, + "gridPos": {"h": 8, "w": 24, "x": 0, "y": 16}, + "title": "Runner Pool Status", + "type": "table", + "targets": [{"expr": "garm_runner_status{cluster_environment=~\"$cluster_environment\"}", "format": "table", "instant": true}], + "transformations": [ + {"id": "filterFieldsByName", "options": {"include": {"names": ["cluster_environment", "name", "status", "pool_owner", "pool_type", "provider"]}}}, + {"id": "organize", "options": {"renameByName": {"cluster_environment": "Environment", "name": "Runner", "status": "Status", "pool_owner": "Pool Owner", "pool_type": "Type", "provider": "Provider"}}} + ] + } + ], + "schemaVersion": 39, + "tags": ["edp", "garm", "ci-cd", "runners"], + "templating": { + "list": [ + { + "current": {"selected": true, "text": "All", "value": "$__all"}, + "datasource": {"type": "prometheus"}, + "definition": "label_values(garm_runner_status, cluster_environment)", + "includeAll": true, + "multi": true, + "name": "cluster_environment", + "label": "Environment", + "query": "label_values(garm_runner_status, cluster_environment)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": {"from": "now-6h", "to": "now"}, + "title": "GARM Runners", + "uid": "edp-garm" + } diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml index d4102fb..ac099d0 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml @@ -21,7 +21,7 @@ spec: "type": "row" }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": { "defaults": { "mappings": [{"options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}, "type": "value"}], @@ -34,7 +34,7 @@ spec: "targets": [{"expr": "sum(up{job=\"forgejo-server-http\", cluster_environment=~\"$cluster_environment\"})", "legendFormat": ""}] }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": { "defaults": { "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 3}]} @@ -46,7 +46,7 @@ spec: "targets": [{"expr": "sum(rate(nginx_ingress_controller_requests{status=~\"5..\", cluster_environment=~\"$cluster_environment\"}[5m]))", "legendFormat": ""}] }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": { "defaults": { "unit": "short", @@ -59,7 +59,7 @@ spec: "targets": [{"expr": "sum(kube_job_status_failed{namespace=\"gitea\", cluster_environment=~\"$cluster_environment\"}) or vector(0)", "legendFormat": ""}] }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": { "defaults": { "unit": "percentunit", @@ -72,7 +72,7 @@ spec: "targets": [{"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster_environment=~\"$cluster_environment\"}[5m]))", "legendFormat": ""}] }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": { "defaults": { "unit": "percentunit", @@ -85,7 +85,7 @@ spec: "targets": [{"expr": "1 - sum(node_memory_MemAvailable_bytes{cluster_environment=~\"$cluster_environment\"}) / sum(node_memory_MemTotal_bytes{cluster_environment=~\"$cluster_environment\"})", "legendFormat": ""}] }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": { "defaults": { "unit": "percentunit", @@ -104,7 +104,7 @@ spec: "type": "row" }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "short"}}, "gridPos": {"h": 4, "w": 4, "x": 0, "y": 6}, "title": "Repositories", @@ -112,7 +112,7 @@ spec: "targets": [{"expr": "gitea_repositories{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "short"}}, "gridPos": {"h": 4, "w": 4, "x": 4, "y": 6}, "title": "Users", @@ -120,7 +120,7 @@ spec: "targets": [{"expr": "gitea_users{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "short"}}, "gridPos": {"h": 4, "w": 4, "x": 8, "y": 6}, "title": "Organizations", @@ -128,7 +128,7 @@ spec: "targets": [{"expr": "gitea_organizations{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "short"}}, "gridPos": {"h": 4, "w": 4, "x": 12, "y": 6}, "title": "Open Issues", @@ -136,7 +136,7 @@ spec: "targets": [{"expr": "gitea_issues_open{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "short"}}, "gridPos": {"h": 4, "w": 4, "x": 16, "y": 6}, "title": "Webhooks", @@ -144,7 +144,7 @@ spec: "targets": [{"expr": "gitea_webhooks{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "short"}}, "gridPos": {"h": 4, "w": 4, "x": 20, "y": 6}, "title": "Mirrors", @@ -158,7 +158,7 @@ spec: "type": "row" }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "percentunit", "min": 0, "max": 1}}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 11}, "title": "Node CPU Usage", @@ -166,7 +166,7 @@ spec: "targets": [{"expr": "1 - rate(node_cpu_seconds_total{mode=\"idle\", cluster_environment=~\"$cluster_environment\"}[5m])", "legendFormat": "{{instance}}"}] }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "percentunit", "min": 0, "max": 1}}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 11}, "title": "PVC Usage by Claim", @@ -180,7 +180,7 @@ spec: "type": "row" }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "s", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 86400}, {"color": "red", "value": 172800}]}}}, "gridPos": {"h": 4, "w": 8, "x": 0, "y": 20}, "title": "Time Since Last Backup Schedule", @@ -188,7 +188,7 @@ spec: "targets": [{"expr": "time() - kube_cronjob_status_last_schedule_time{cronjob=~\"forgejo-s3-backup|secrets-backup\", cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cronjob}} ({{cluster_environment}})"}] }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "s"}}, "gridPos": {"h": 4, "w": 8, "x": 8, "y": 20}, "title": "Backup Job Duration (Last 7d)", @@ -197,7 +197,7 @@ spec: "options": {"legend": {"displayMode": "table"}} }, { - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "fieldConfig": {"defaults": {"unit": "short", "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 1}]}}}, "gridPos": {"h": 4, "w": 8, "x": 16, "y": 20}, "title": "Failed Backup Jobs (Active)", @@ -211,7 +211,7 @@ spec: "list": [ { "current": {"selected": true, "text": "All", "value": "$__all"}, - "datasource": {"type": "prometheus", "uid": "${DS_VICTORIAMETRICS}"}, + "datasource": {"type": "prometheus"}, "definition": "label_values(up, cluster_environment)", "includeAll": true, "multi": true, From 238ef71630a9dbde930d119b9d9d9efa2efe6527 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 13:23:37 +0200 Subject: [PATCH 103/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20fi?= =?UTF-8?q?x=20remote=20write=20URL=20and=20add=20manifests=20for=20benchm?= =?UTF-8?q?ark=20+=20edp=20clients?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix broken remote write URL (o12y.observability./ → o12y.observability.buildth.ing/) - Create manifests/ dirs with .gitkeep for benchmark.t09.de and edp.buildth.ing - Copy forgejo-scrape.yaml VMServiceScrape manifest to both instances --- .../vm-client-stack/manifests/.gitkeep | 0 .../vm-client-stack/manifests/forgejo-scrape.yaml | 15 +++++++++++++++ .../vm-client-stack/values.yaml | 2 +- .../vm-client-stack/manifests/.gitkeep | 0 .../vm-client-stack/manifests/forgejo-scrape.yaml | 15 +++++++++++++++ .../vm-client-stack/values.yaml | 2 +- 6 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/manifests/.gitkeep create mode 100644 otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/manifests/forgejo-scrape.yaml create mode 100644 otc/edp.buildth.ing/stacks/observability-client/vm-client-stack/manifests/.gitkeep create mode 100644 otc/edp.buildth.ing/stacks/observability-client/vm-client-stack/manifests/forgejo-scrape.yaml diff --git a/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/manifests/.gitkeep b/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/manifests/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/manifests/forgejo-scrape.yaml b/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/manifests/forgejo-scrape.yaml new file mode 100644 index 0000000..aecf517 --- /dev/null +++ b/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/manifests/forgejo-scrape.yaml @@ -0,0 +1,15 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + name: forgejo + namespace: observability +spec: + namespaceSelector: + matchNames: + - gitea + selector: + matchLabels: + app.kubernetes.io/name: forgejo + endpoints: + - port: http + path: /metrics diff --git a/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/values.yaml b/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/values.yaml index dde927b..4bc089d 100644 --- a/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/values.yaml +++ b/otc/benchmark.t09.de/stacks/observability-client/vm-client-stack/values.yaml @@ -778,7 +778,7 @@ vmagent: # -- Remote write configuration of VMAgent, allowed parameters defined in a [spec](https://docs.victoriametrics.com/operator/api#vmagentremotewritespec) additionalRemoteWrites: # [] - - url: https://o12y.observability./api/v1/write + - url: https://o12y.observability.buildth.ing/api/v1/write basicAuth: username: name: simple-user-secret diff --git a/otc/edp.buildth.ing/stacks/observability-client/vm-client-stack/manifests/.gitkeep b/otc/edp.buildth.ing/stacks/observability-client/vm-client-stack/manifests/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/otc/edp.buildth.ing/stacks/observability-client/vm-client-stack/manifests/forgejo-scrape.yaml b/otc/edp.buildth.ing/stacks/observability-client/vm-client-stack/manifests/forgejo-scrape.yaml new file mode 100644 index 0000000..aecf517 --- /dev/null +++ b/otc/edp.buildth.ing/stacks/observability-client/vm-client-stack/manifests/forgejo-scrape.yaml @@ -0,0 +1,15 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + name: forgejo + namespace: observability +spec: + namespaceSelector: + matchNames: + - gitea + selector: + matchLabels: + app.kubernetes.io/name: forgejo + endpoints: + - port: http + path: /metrics diff --git a/otc/edp.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml b/otc/edp.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml index 4e1c079..255e9e5 100644 --- a/otc/edp.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml +++ b/otc/edp.buildth.ing/stacks/observability-client/vm-client-stack/values.yaml @@ -778,7 +778,7 @@ vmagent: # -- Remote write configuration of VMAgent, allowed parameters defined in a [spec](https://docs.victoriametrics.com/operator/api#vmagentremotewritespec) additionalRemoteWrites: # [] - - url: https://o12y.observability./api/v1/write + - url: https://o12y.observability.buildth.ing/api/v1/write basicAuth: username: name: simple-user-secret From bcf583a0556af1b1c88246d128e2709a2f9c706e Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 13:32:13 +0200 Subject: [PATCH 104/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20fi?= =?UTF-8?q?x=20Vector=20log=20shipping=20URL=20on=20all=20clusters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restores missing '.buildth.ing' domain segment in Vector elasticsearch endpoint for benchmark, dev, and edp instances. Template source uses {{{ .Env.DOMAIN_O12Y }}} (correct) — instances were mis-hydrated, omitting the TLD suffix. --- .../stacks/observability-client/vector/values.yaml | 2 +- otc/dev.t09.de/stacks/observability-client/vector/values.yaml | 2 +- .../stacks/observability-client/vector/values.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/otc/benchmark.t09.de/stacks/observability-client/vector/values.yaml b/otc/benchmark.t09.de/stacks/observability-client/vector/values.yaml index 3fb5e53..2393b1a 100644 --- a/otc/benchmark.t09.de/stacks/observability-client/vector/values.yaml +++ b/otc/benchmark.t09.de/stacks/observability-client/vector/values.yaml @@ -48,7 +48,7 @@ customConfig: type: elasticsearch inputs: [parser] endpoints: - - https://o12y.observability./insert/elasticsearch/ + - https://o12y.observability.buildth.ing/insert/elasticsearch/ auth: strategy: basic user: ${VECTOR_USER} diff --git a/otc/dev.t09.de/stacks/observability-client/vector/values.yaml b/otc/dev.t09.de/stacks/observability-client/vector/values.yaml index c0644cf..4d7458a 100644 --- a/otc/dev.t09.de/stacks/observability-client/vector/values.yaml +++ b/otc/dev.t09.de/stacks/observability-client/vector/values.yaml @@ -48,7 +48,7 @@ customConfig: type: elasticsearch inputs: [parser] endpoints: - - https://o12y.observability./insert/elasticsearch/ + - https://o12y.observability.buildth.ing/insert/elasticsearch/ auth: strategy: basic user: ${VECTOR_USER} diff --git a/otc/edp.buildth.ing/stacks/observability-client/vector/values.yaml b/otc/edp.buildth.ing/stacks/observability-client/vector/values.yaml index 7b30cdc..2fefacd 100644 --- a/otc/edp.buildth.ing/stacks/observability-client/vector/values.yaml +++ b/otc/edp.buildth.ing/stacks/observability-client/vector/values.yaml @@ -48,7 +48,7 @@ customConfig: type: elasticsearch inputs: [parser] endpoints: - - https://o12y.observability./insert/elasticsearch/ + - https://o12y.observability.buildth.ing/insert/elasticsearch/ auth: strategy: basic user: ${VECTOR_USER} From b6fbd3f6eb92cdb394fe86ef0ddf5c7c6cbd2b3f Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 13:34:08 +0200 Subject: [PATCH 105/114] =?UTF-8?q?feat(observability):=20=E2=9C=A8=20add?= =?UTF-8?q?=20VictoriaLogs=20log=20panels=20to=20platform,=20forgejo,=20ar?= =?UTF-8?q?gocd=20dashboards?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../manifests/argocd-operational.yaml | 14 ++++++++++++ .../grafana-operator/manifests/forgejo.yaml | 22 +++++++++++++++++++ .../manifests/platform-overview.yaml | 14 ++++++++++++ 3 files changed, 50 insertions(+) diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml index f37cf03..e8e51a2 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml @@ -111,6 +111,20 @@ spec: "title": "Reconciliation Rate", "type": "timeseries", "targets": [{"expr": "sum(rate(argocd_app_reconcile_count{cluster_environment=~\"$cluster_environment\"}[5m])) by (namespace)", "legendFormat": "{{namespace}}"}] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 27}, + "title": "ArgoCD Logs", + "type": "row" + }, + { + "datasource": {"type": "victoriametrics-logs-datasource"}, + "gridPos": {"h": 10, "w": 24, "x": 0, "y": 28}, + "title": "ArgoCD Logs", + "type": "logs", + "targets": [{"expr": "{cluster_environment=~\"$cluster_environment\", kubernetes.namespace=\"argocd\"}", "refId": "A"}], + "options": {"showTime": true, "showLabels": true, "wrapLogMessage": true, "enableLogDetails": true, "sortOrder": "Descending"} } ], "schemaVersion": 39, diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml index bf566a5..606b601 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml @@ -158,6 +158,28 @@ spec: "title": "OAuth Apps", "type": "stat", "targets": [{"expr": "gitea_oauths{cluster_environment=~\"$cluster_environment\"}", "legendFormat": "{{cluster_environment}}"}] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 15}, + "title": "Forgejo Logs", + "type": "row" + }, + { + "datasource": {"type": "victoriametrics-logs-datasource"}, + "gridPos": {"h": 10, "w": 12, "x": 0, "y": 16}, + "title": "Forgejo Server Logs", + "type": "logs", + "targets": [{"expr": "{cluster_environment=~\"$cluster_environment\", kubernetes.namespace=\"gitea\"}", "refId": "A"}], + "options": {"showTime": true, "showLabels": true, "wrapLogMessage": true, "enableLogDetails": true, "sortOrder": "Descending"} + }, + { + "datasource": {"type": "victoriametrics-logs-datasource"}, + "gridPos": {"h": 10, "w": 12, "x": 12, "y": 16}, + "title": "Forgejo Errors", + "type": "logs", + "targets": [{"expr": "{cluster_environment=~\"$cluster_environment\", kubernetes.namespace=\"gitea\"} error OR Error OR ERROR OR panic", "refId": "A"}], + "options": {"showTime": true, "showLabels": true, "wrapLogMessage": true, "enableLogDetails": true, "sortOrder": "Descending"} } ], "schemaVersion": 39, diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml index ac099d0..eab6c9f 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml @@ -203,6 +203,20 @@ spec: "title": "Failed Backup Jobs (Active)", "type": "stat", "targets": [{"expr": "sum by(cluster_environment, job_name) (kube_job_status_failed{job_name=~\"forgejo-s3-backup.*|secrets-backup.*\", cluster_environment=~\"$cluster_environment\"})", "legendFormat": "{{job_name}} ({{cluster_environment}})"}] + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 24}, + "title": "Logs", + "type": "row" + }, + { + "datasource": {"type": "victoriametrics-logs-datasource"}, + "gridPos": {"h": 10, "w": 24, "x": 0, "y": 25}, + "title": "Recent Errors (all namespaces)", + "type": "logs", + "targets": [{"expr": "{cluster_environment=~\"$cluster_environment\"} error OR Error OR ERROR OR panic OR PANIC", "refId": "A"}], + "options": {"showTime": true, "showLabels": true, "showCommonLabels": false, "wrapLogMessage": true, "prettifyLogMessage": false, "enableLogDetails": true, "sortOrder": "Descending", "dedupStrategy": "none"} } ], "schemaVersion": 39, From 7f5c680e19b4e35eec48e703f1eea44f36705824 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 13:36:15 +0200 Subject: [PATCH 106/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20en?= =?UTF-8?q?able=20GARM=20unauthenticated=20metrics=20+=20ArgoCD=20metrics?= =?UTF-8?q?=20on=20all=20instances?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - GARM dev.t09.de: set garm.metrics.disableAuth=true to unblock Prometheus scraping (was 401) - ArgoCD dev.t09.de: add controller/server/repoServer/applicationSet metrics blocks - ArgoCD edp.buildth.ing: add controller/server/repoServer/applicationSet metrics blocks - ArgoCD benchmark.t09.de: add controller/server/repoServer/applicationSet metrics blocks - observability.buildth.ing already had metrics enabled (no change needed) --- .../stacks/core/argocd/values.yaml | 24 +++++++++++++++++++ otc/dev.t09.de/stacks/core/argocd/values.yaml | 24 +++++++++++++++++++ otc/dev.t09.de/stacks/garm/garm/values.yaml | 3 +++ .../stacks/core/argocd/values.yaml | 24 +++++++++++++++++++ 4 files changed, 75 insertions(+) diff --git a/otc/benchmark.t09.de/stacks/core/argocd/values.yaml b/otc/benchmark.t09.de/stacks/core/argocd/values.yaml index a6521b0..1591cc9 100644 --- a/otc/benchmark.t09.de/stacks/core/argocd/values.yaml +++ b/otc/benchmark.t09.de/stacks/core/argocd/values.yaml @@ -35,6 +35,30 @@ configs: tls: certificates: +controller: + metrics: + enabled: true + serviceMonitor: + enabled: false + +server: + metrics: + enabled: true + serviceMonitor: + enabled: false + +repoServer: + metrics: + enabled: true + serviceMonitor: + enabled: false + +applicationSet: + metrics: + enabled: true + serviceMonitor: + enabled: false + notifications: enabled: false diff --git a/otc/dev.t09.de/stacks/core/argocd/values.yaml b/otc/dev.t09.de/stacks/core/argocd/values.yaml index dd5b83d..cb856f0 100644 --- a/otc/dev.t09.de/stacks/core/argocd/values.yaml +++ b/otc/dev.t09.de/stacks/core/argocd/values.yaml @@ -35,6 +35,30 @@ configs: tls: certificates: +controller: + metrics: + enabled: true + serviceMonitor: + enabled: false + +server: + metrics: + enabled: true + serviceMonitor: + enabled: false + +repoServer: + metrics: + enabled: true + serviceMonitor: + enabled: false + +applicationSet: + metrics: + enabled: true + serviceMonitor: + enabled: false + notifications: enabled: false diff --git a/otc/dev.t09.de/stacks/garm/garm/values.yaml b/otc/dev.t09.de/stacks/garm/garm/values.yaml index 5baed69..41fc84c 100644 --- a/otc/dev.t09.de/stacks/garm/garm/values.yaml +++ b/otc/dev.t09.de/stacks/garm/garm/values.yaml @@ -41,5 +41,8 @@ providerConfig: sidecarImage: edp.buildth.ing/devfw-cicd/ci-sizer-collector:0.0.4 garm: + metrics: + enable: true + disableAuth: true logging: logLevel: info diff --git a/otc/edp.buildth.ing/stacks/core/argocd/values.yaml b/otc/edp.buildth.ing/stacks/core/argocd/values.yaml index 019dc65..c1bde64 100644 --- a/otc/edp.buildth.ing/stacks/core/argocd/values.yaml +++ b/otc/edp.buildth.ing/stacks/core/argocd/values.yaml @@ -35,6 +35,30 @@ configs: tls: certificates: +controller: + metrics: + enabled: true + serviceMonitor: + enabled: false + +server: + metrics: + enabled: true + serviceMonitor: + enabled: false + +repoServer: + metrics: + enabled: true + serviceMonitor: + enabled: false + +applicationSet: + metrics: + enabled: true + serviceMonitor: + enabled: false + notifications: enabled: false From 4591ee7b144d7bad8c30eda360d8e89200121501 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 14:46:35 +0200 Subject: [PATCH 107/114] =?UTF-8?q?feat(observability):=20=F0=9F=97=82?= =?UTF-8?q?=EF=B8=8F=20organize=20dashboards=20into=20Grafana=20folders?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assigns folder field to all GrafanaDashboard CRs: - EDP / Overview: platform-overview - EDP / Applications: forgejo, argocd-operational, garm, argocd - EDP / Operations: cronjob-monitoring, ingress-nginx, victoria-logs --- .../grafana-operator/manifests/argocd-operational.yaml | 1 + .../stacks/observability/grafana-operator/manifests/argocd.yaml | 1 + .../grafana-operator/manifests/cronjob-monitoring.yaml | 1 + .../stacks/observability/grafana-operator/manifests/forgejo.yaml | 1 + .../stacks/observability/grafana-operator/manifests/garm.yaml | 1 + .../observability/grafana-operator/manifests/ingress-nginx.yaml | 1 + .../grafana-operator/manifests/platform-overview.yaml | 1 + .../observability/grafana-operator/manifests/victoria-logs.yaml | 1 + 8 files changed, 8 insertions(+) diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml index e8e51a2..9130b42 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd-operational.yaml @@ -6,6 +6,7 @@ spec: instanceSelector: matchLabels: dashboards: "grafana" + folder: "EDP / Applications" json: | { "annotations": {"list": []}, diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd.yaml index b348ff7..2b81b2b 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/argocd.yaml @@ -6,4 +6,5 @@ spec: instanceSelector: matchLabels: dashboards: "grafana" + folder: "EDP / Applications" url: "https://raw.githubusercontent.com/argoproj/argo-cd/refs/heads/master/examples/dashboard.json" diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/cronjob-monitoring.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/cronjob-monitoring.yaml index 5b5eeac..ddcc883 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/cronjob-monitoring.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/cronjob-monitoring.yaml @@ -6,6 +6,7 @@ spec: instanceSelector: matchLabels: dashboards: "grafana" + folder: "EDP / Operations" json: | { "annotations": {"list": []}, diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml index 606b601..ec40751 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/forgejo.yaml @@ -6,6 +6,7 @@ spec: instanceSelector: matchLabels: dashboards: "grafana" + folder: "EDP / Applications" json: | { "annotations": {"list": []}, diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/garm.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/garm.yaml index 9e01a51..2a23e20 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/garm.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/garm.yaml @@ -6,6 +6,7 @@ spec: instanceSelector: matchLabels: dashboards: "grafana" + folder: "EDP / Applications" json: | { "annotations": {"list": []}, diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/ingress-nginx.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/ingress-nginx.yaml index c13d6a2..077edd8 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/ingress-nginx.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/ingress-nginx.yaml @@ -6,4 +6,5 @@ spec: instanceSelector: matchLabels: dashboards: "grafana" + folder: "EDP / Operations" url: "https://raw.githubusercontent.com/adinhodovic/ingress-nginx-mixin/refs/heads/main/dashboards_out/ingress-nginx-overview.json" diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml index eab6c9f..ffce4e2 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/platform-overview.yaml @@ -6,6 +6,7 @@ spec: instanceSelector: matchLabels: dashboards: "grafana" + folder: "EDP / Overview" json: | { "annotations": {"list": []}, diff --git a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/victoria-logs.yaml b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/victoria-logs.yaml index 819dec7..c44c474 100644 --- a/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/victoria-logs.yaml +++ b/otc/observability.buildth.ing/stacks/observability/grafana-operator/manifests/victoria-logs.yaml @@ -6,6 +6,7 @@ spec: instanceSelector: matchLabels: dashboards: "grafana" + folder: "EDP / Operations" grafanaCom: id: 22698 revision: 1 From b1a00d0395d298e46ed679b6564a3548516c025f Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 15:28:03 +0200 Subject: [PATCH 108/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20ad?= =?UTF-8?q?d=20missing=20simple-user-secret=20to=20hub=20observability=20s?= =?UTF-8?q?tack?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hub's VMUser (vmauth.yaml) references simple-user-secret via passwordRef, but the Secret was never added to the hub's manifests. Without this Secret, the VM operator cannot reconcile the VMUser into the vmauth config, causing ALL requests to fall through to the unauthorizedUser catch-all (vmsingle). Result: Vector log shipping to VictoriaLogs was broken — vmauth routed /insert/elasticsearch/_bulk to vmsingle instead of vlogs-victorialogs. --- .../victoria-k8s-stack/manifests/simple-user-secret.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/simple-user-secret.yaml diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/simple-user-secret.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/simple-user-secret.yaml new file mode 100644 index 0000000..7013863 --- /dev/null +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/simple-user-secret.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Secret +metadata: + name: simple-user-secret + namespace: observability +type: Opaque +data: + username: c2ltcGxlLXVzZXI= + password: c3g1Z0M3b29XYVdPT0R3RA== From 8488de0c6f8ec81192b2e9bd7c530f48ddc5bc49 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 15:45:48 +0200 Subject: [PATCH 109/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20us?= =?UTF-8?q?e=20plaintext=20password=20in=20hub=20VMUser=20to=20unblock=20o?= =?UTF-8?q?perator=20reconciliation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hub VMUser was using passwordRef pointing to simple-user-secret, but that Secret was not present in the cluster (only exists in git now via the previous commit). VM operator skips VMUser reconciliation when passwordRef cannot resolve, leaving vmauth with only the unauthorizedUser catch-all (vmsingle). Switching to inline password ensures immediate operator reconciliation without waiting for Secret deployment. The simple-user-secret.yaml manifest is kept for Vector's credential reference. --- .../observability/victoria-k8s-stack/manifests/vmauth.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml index 5759093..374511d 100644 --- a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml @@ -5,9 +5,7 @@ metadata: namespace: observability spec: username: simple-user - passwordRef: - key: password - name: simple-user-secret + password: sx5gC7ooWaWOODwD targetRefs: - static: url: http://vmsingle-o12y:8429 From f3931dc5509a2b86a8772e2388cd9a1c83e083a7 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 16:07:06 +0200 Subject: [PATCH 110/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20ad?= =?UTF-8?q?d=20ArgoCD=20+=20GARM=20VMServiceScrapes=20to=20dev=20client=20?= =?UTF-8?q?stack?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../vm-client-stack/manifests/argocd-scrape.yaml | 14 ++++++++++++++ .../vm-client-stack/manifests/garm-scrape.yaml | 15 +++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/argocd-scrape.yaml create mode 100644 otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/garm-scrape.yaml diff --git a/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/argocd-scrape.yaml b/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/argocd-scrape.yaml new file mode 100644 index 0000000..710145a --- /dev/null +++ b/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/argocd-scrape.yaml @@ -0,0 +1,14 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + name: argocd + namespace: observability +spec: + namespaceSelector: + matchNames: + - argocd + selector: + matchLabels: + app.kubernetes.io/part-of: argocd + endpoints: + - port: metrics diff --git a/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/garm-scrape.yaml b/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/garm-scrape.yaml new file mode 100644 index 0000000..9904e86 --- /dev/null +++ b/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/garm-scrape.yaml @@ -0,0 +1,15 @@ +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + name: garm + namespace: observability +spec: + namespaceSelector: + matchNames: + - garm + selector: + matchLabels: + app.kubernetes.io/name: garm + endpoints: + - port: http + path: /metrics From 0a249820de5c682267ec7f73ebbd70b41f6a0a49 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 16:11:09 +0200 Subject: [PATCH 111/114] =?UTF-8?q?fix(observability):=20=F0=9F=90=9B=20fi?= =?UTF-8?q?x=20ArgoCD=20scrape=20port=20name=20http-metrics=20not=20metric?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../vm-client-stack/manifests/argocd-scrape.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/argocd-scrape.yaml b/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/argocd-scrape.yaml index 710145a..2e9248f 100644 --- a/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/argocd-scrape.yaml +++ b/otc/dev.t09.de/stacks/observability-client/vm-client-stack/manifests/argocd-scrape.yaml @@ -11,4 +11,4 @@ spec: matchLabels: app.kubernetes.io/part-of: argocd endpoints: - - port: metrics + - port: http-metrics From 23edd5d6b4883999c581a66e9b2040fbeb529696 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 16:33:04 +0200 Subject: [PATCH 112/114] =?UTF-8?q?feat(observability):=20=E2=9C=A8=20add?= =?UTF-8?q?=20read=20routes=20to=20vmauth=20for=20metrics=20and=20logs=20q?= =?UTF-8?q?ueries?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../victoria-k8s-stack/manifests/vmauth.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml index 374511d..e1de2c6 100644 --- a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml @@ -10,6 +10,12 @@ spec: - static: url: http://vmsingle-o12y:8429 paths: ["/api/v1/write"] + - static: + url: http://vmsingle-o12y:8429 + paths: ["/api/v1/.*"] - static: url: http://vlogs-victorialogs:9428 - paths: ["/insert/elasticsearch/.*"] \ No newline at end of file + paths: ["/insert/elasticsearch/.*"] + - static: + url: http://vlogs-victorialogs:9428 + paths: ["/select/.*"] \ No newline at end of file From 70939149ea29154a2a155c4d7068b43d087f3770 Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 16:37:33 +0200 Subject: [PATCH 113/114] =?UTF-8?q?feat(observability):=20=E2=9C=A8=20add?= =?UTF-8?q?=20read=20routes=20to=20vmauth=20for=20dev.t09.de=20instance?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../victoria-k8s-stack/manifests/vmauth.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml index 5759093..a4f0368 100644 --- a/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml +++ b/otc/dev.t09.de/stacks/observability/victoria-k8s-stack/manifests/vmauth.yaml @@ -12,6 +12,12 @@ spec: - static: url: http://vmsingle-o12y:8429 paths: ["/api/v1/write"] + - static: + url: http://vmsingle-o12y:8429 + paths: ["/api/v1/.*"] - static: url: http://vlogs-victorialogs:9428 - paths: ["/insert/elasticsearch/.*"] \ No newline at end of file + paths: ["/insert/elasticsearch/.*"] + - static: + url: http://vlogs-victorialogs:9428 + paths: ["/select/.*"] \ No newline at end of file From 3141b7bd6c97ffe21ed1a3e258f649c74e79109e Mon Sep 17 00:00:00 2001 From: Daniel Sy Date: Fri, 19 Jun 2026 16:43:21 +0200 Subject: [PATCH 114/114] =?UTF-8?q?feat(observability):=20=E2=9C=A8=20comp?= =?UTF-8?q?rehensive=20platform=20alert=20rules?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace ad-hoc forgejo/disk alerts with structured VMRule covering: - platform-health: ForgejoDown, IngressHighErrorRate, NodeNotReady, PodCrashLooping - storage: PVCUsageHigh (>80%), PVCUsageCritical (>90%) - resources: NodeCPUHigh (>85%), NodeMemoryHigh (>90%) --- .../victoria-k8s-stack/manifests/alerts.yaml | 109 +++++++++++++----- 1 file changed, 82 insertions(+), 27 deletions(-) diff --git a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/alerts.yaml b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/alerts.yaml index 110ee7e..cb0f1e3 100644 --- a/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/alerts.yaml +++ b/otc/observability.buildth.ing/stacks/observability/victoria-k8s-stack/manifests/alerts.yaml @@ -1,40 +1,95 @@ apiVersion: operator.victoriametrics.com/v1beta1 kind: VMRule metadata: - name: forgejo-alerts + name: edp-platform-alerts namespace: observability spec: groups: - - name: forgejo + - name: platform-health rules: - - alert: forgejo down - expr: sum by(cluster_environment) (up{pod=~"forgejo-server-.*"}) < 1 - for: 30s + - alert: ForgejoDown + expr: sum by(cluster_environment) (up{job="forgejo-server-http"}) < 1 + for: 1m labels: severity: critical - job: "{{ $labels.job }}" annotations: - value: "{{ $value }}" - description: 'forgejo is down in cluster environment {{ $labels.cluster_environment }}' - - name: forgejo-backup - rules: - - alert: forgejo s3 backup job failed - expr: max by(cluster_environment) (kube_job_status_failed{job_name=~"forgejo-s3-backup-.*"}) != 0 - for: 30s - labels: - severity: critical - job: "{{ $labels.job }}" - annotations: - value: "{{ $value }}" - description: 'forgejo s3 backup job failed in cluster environment {{ $labels.cluster_environment }}' - - name: disk-consumption-high - rules: - - alert: disk consumption high - expr: 1-(kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes) > 0.6 - for: 30s + summary: "Forgejo is down on {{ $labels.cluster_environment }}" + description: "Forgejo server has been unreachable for more than 1 minute in cluster {{ $labels.cluster_environment }}." + + - alert: IngressHighErrorRate + expr: | + sum by(cluster_environment) (rate(nginx_ingress_controller_requests{status=~"5.."}[5m])) + / sum by(cluster_environment) (rate(nginx_ingress_controller_requests[5m])) > 0.05 + for: 5m labels: severity: major - job: "{{ $labels.job }}" annotations: - value: "{{ $value }}" - description: 'disk consumption of pvc {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} is high in cluster environment {{ $labels.cluster_environment }}' + summary: "High ingress 5xx rate on {{ $labels.cluster_environment }}" + description: "More than 5% of ingress requests are returning 5xx errors for over 5 minutes." + value: "{{ $value | humanizePercentage }}" + + - alert: NodeNotReady + expr: kube_node_status_condition{condition="Ready", status="true"} == 0 + for: 5m + labels: + severity: critical + annotations: + summary: "Node {{ $labels.node }} not ready on {{ $labels.cluster_environment }}" + description: "Node {{ $labels.node }} has been in NotReady state for more than 5 minutes." + + - alert: PodCrashLooping + expr: rate(kube_pod_container_status_restarts_total[15m]) * 60 * 15 > 3 + for: 5m + labels: + severity: major + annotations: + summary: "Pod {{ $labels.namespace }}/{{ $labels.pod }} crash-looping on {{ $labels.cluster_environment }}" + description: "Pod has restarted more than 3 times in the last 15 minutes." + + - name: storage + rules: + - alert: PVCUsageHigh + expr: | + 1 - (kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes) > 0.80 + for: 5m + labels: + severity: major + annotations: + summary: "PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} usage >80%" + description: "PVC usage is at {{ $value | humanizePercentage }} on {{ $labels.cluster_environment }}." + value: "{{ $value | humanizePercentage }}" + + - alert: PVCUsageCritical + expr: | + 1 - (kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes) > 0.90 + for: 5m + labels: + severity: critical + annotations: + summary: "PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} usage >90%" + description: "PVC is almost full at {{ $value | humanizePercentage }} on {{ $labels.cluster_environment }}. Immediate action required." + value: "{{ $value | humanizePercentage }}" + + - name: resources + rules: + - alert: NodeCPUHigh + expr: | + 1 - avg by(instance, cluster_environment) (rate(node_cpu_seconds_total{mode="idle"}[5m])) > 0.85 + for: 15m + labels: + severity: major + annotations: + summary: "Node {{ $labels.instance }} CPU >85% on {{ $labels.cluster_environment }}" + description: "Node CPU utilization has been above 85% for 15 minutes." + value: "{{ $value | humanizePercentage }}" + + - alert: NodeMemoryHigh + expr: | + 1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) > 0.90 + for: 10m + labels: + severity: major + annotations: + summary: "Node memory >90% on {{ $labels.cluster_environment }}" + description: "Node memory utilization above 90% for 10 minutes." + value: "{{ $value | humanizePercentage }}"