Automated upload for dev.t09.de
This commit is contained in:
parent
9bbcf4efca
commit
95deeef6a0
7 changed files with 86 additions and 27 deletions
|
|
@ -62,7 +62,7 @@ spec:
|
||||||
- name: RECEIVER_SESSION_TTL
|
- name: RECEIVER_SESSION_TTL
|
||||||
value: "12h"
|
value: "12h"
|
||||||
- name: RECEIVER_ALLOWED_ORG
|
- name: RECEIVER_ALLOWED_ORG
|
||||||
value: "giteaAdmin"
|
value: "DevFW-CICD"
|
||||||
- name: RECEIVER_CPU_SIZING_MODE
|
- name: RECEIVER_CPU_SIZING_MODE
|
||||||
value: "observe"
|
value: "observe"
|
||||||
- name: RECEIVER_MEMORY_QOS
|
- name: RECEIVER_MEMORY_QOS
|
||||||
|
|
|
||||||
|
|
@ -1,9 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Secret
|
|
||||||
metadata:
|
|
||||||
name: sizer-oidc-client
|
|
||||||
labels:
|
|
||||||
app: sizer-receiver
|
|
||||||
type: Opaque
|
|
||||||
stringData:
|
|
||||||
client-secret: "73eda9068bd00dfe67d29f087b5540cb1cd82cc1dd2ac0f838558ac8bbcfcb3a"
|
|
||||||
|
|
@ -27,6 +27,3 @@ spec:
|
||||||
- repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances
|
- repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances
|
||||||
targetRevision: HEAD
|
targetRevision: HEAD
|
||||||
ref: values
|
ref: values
|
||||||
- repoURL: https://edp.buildth.ing/DevFW-CICD/stacks-instances
|
|
||||||
targetRevision: HEAD
|
|
||||||
path: "otc/dev.t09.de/stacks/core/dex/manifests"
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Secret
|
|
||||||
metadata:
|
|
||||||
name: dex-sizer-client
|
|
||||||
namespace: dex
|
|
||||||
type: Opaque
|
|
||||||
stringData:
|
|
||||||
clientSecret: "73eda9068bd00dfe67d29f087b5540cb1cd82cc1dd2ac0f838558ac8bbcfcb3a"
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
# We use recreate to make sure only one instance with one version is running, because Forgejo might break or data gets inconsistant.
|
# We use recreate to make sure only one instance with one version is running, because Forgejo might break or data gets inconsistant.
|
||||||
strategy:
|
strategy:
|
||||||
type: Recreate
|
type: Recreate
|
||||||
|
|
@ -173,8 +174,10 @@ service:
|
||||||
|
|
||||||
image:
|
image:
|
||||||
pullPolicy: "IfNotPresent"
|
pullPolicy: "IfNotPresent"
|
||||||
# DB has v15a/v15b migrations from workflow-webhook build.
|
# Overrides the image tag whose default is the chart appVersion.
|
||||||
# Using that image until a proper v15+ EDP release is cut.
|
#tag: "8.0.3"
|
||||||
fullOverride: edp.buildth.ing/devfw-cicd/edp-forgejo:workflow-webhook-20260305
|
# Adds -rootless suffix to image name
|
||||||
|
# rootless: true
|
||||||
|
fullOverride: edp.buildth.ing/devfw-cicd/edp-forgejo:14.0.2-edp1-rootless
|
||||||
|
|
||||||
forgejo: {}
|
forgejo: {}
|
||||||
|
|
|
||||||
|
|
@ -26,9 +26,7 @@ credentials:
|
||||||
|
|
||||||
image:
|
image:
|
||||||
repository: edp.buildth.ing/devfw-cicd/garm-forgejo
|
repository: edp.buildth.ing/devfw-cicd/garm-forgejo
|
||||||
# NOTE: v0.1.7-forgejo-23 has exec format error (wrong arch build)
|
tag: v0.1.7-forgejo-23
|
||||||
# Rolled back to -22 until -23 is rebuilt for amd64
|
|
||||||
tag: v0.1.7-forgejo-22
|
|
||||||
|
|
||||||
providerConfig:
|
providerConfig:
|
||||||
edgeConnect:
|
edgeConnect:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,78 @@
|
||||||
|
apiVersion: operator.victoriametrics.com/v1beta1
|
||||||
|
kind: VMRule
|
||||||
|
metadata:
|
||||||
|
name: backup-alerts
|
||||||
|
namespace: observability
|
||||||
|
spec:
|
||||||
|
groups:
|
||||||
|
- name: backup-schedule-staleness
|
||||||
|
rules:
|
||||||
|
- alert: BackupCronJobNotScheduled
|
||||||
|
expr: |
|
||||||
|
time() - kube_cronjob_status_last_schedule_time{cronjob=~"forgejo-s3-backup|secrets-backup", namespace="gitea"}
|
||||||
|
> 26 * 3600
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
cronjob: "{{ $labels.cronjob }}"
|
||||||
|
annotations:
|
||||||
|
value: "{{ $value | humanizeDuration }}"
|
||||||
|
description: >-
|
||||||
|
CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} has not been
|
||||||
|
scheduled for over 26 hours in cluster {{ $labels.cluster_environment }}.
|
||||||
|
Last schedule was {{ $value | humanizeDuration }} ago.
|
||||||
|
summary: "Backup CronJob {{ $labels.cronjob }} is stale"
|
||||||
|
|
||||||
|
- alert: BackupCronJobNeverScheduled
|
||||||
|
expr: |
|
||||||
|
kube_cronjob_status_last_schedule_time{cronjob=~"forgejo-s3-backup|secrets-backup", namespace="gitea"}
|
||||||
|
== 0
|
||||||
|
for: 30m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
cronjob: "{{ $labels.cronjob }}"
|
||||||
|
annotations:
|
||||||
|
description: >-
|
||||||
|
CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} has never been
|
||||||
|
scheduled in cluster {{ $labels.cluster_environment }}.
|
||||||
|
summary: "Backup CronJob {{ $labels.cronjob }} never ran"
|
||||||
|
|
||||||
|
- name: backup-job-failures
|
||||||
|
rules:
|
||||||
|
- alert: BackupJobFailed
|
||||||
|
expr: |
|
||||||
|
max by(cluster_environment, namespace, job_name) (
|
||||||
|
kube_job_status_failed{job_name=~"forgejo-s3-backup-.*|secrets-backup-.*", namespace="gitea"}
|
||||||
|
) > 0
|
||||||
|
for: 30s
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
job_name: "{{ $labels.job_name }}"
|
||||||
|
annotations:
|
||||||
|
value: "{{ $value }}"
|
||||||
|
description: >-
|
||||||
|
Backup job {{ $labels.namespace }}/{{ $labels.job_name }} has
|
||||||
|
{{ $value }} failed pod(s) in cluster {{ $labels.cluster_environment }}.
|
||||||
|
summary: "Backup job {{ $labels.job_name }} failed"
|
||||||
|
|
||||||
|
- name: backup-job-duration
|
||||||
|
rules:
|
||||||
|
- alert: BackupJobTooSlow
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
time() - kube_job_status_start_time{job_name=~"forgejo-s3-backup-.*|secrets-backup-.*", namespace="gitea"}
|
||||||
|
) > 300
|
||||||
|
and
|
||||||
|
kube_job_status_active{job_name=~"forgejo-s3-backup-.*|secrets-backup-.*", namespace="gitea"} > 0
|
||||||
|
for: 1m
|
||||||
|
labels:
|
||||||
|
severity: major
|
||||||
|
job_name: "{{ $labels.job_name }}"
|
||||||
|
annotations:
|
||||||
|
value: "{{ $value | humanizeDuration }}"
|
||||||
|
description: >-
|
||||||
|
Backup job {{ $labels.namespace }}/{{ $labels.job_name }} has been
|
||||||
|
running for {{ $value | humanizeDuration }} (threshold: 5m)
|
||||||
|
in cluster {{ $labels.cluster_environment }}. This may indicate a
|
||||||
|
hung process or connectivity issue.
|
||||||
|
summary: "Backup job {{ $labels.job_name }} running too long"
|
||||||
Loading…
Add table
Add a link
Reference in a new issue