From 4e11074323c311cf16a479ffe22be5beb4c7fec9 Mon Sep 17 00:00:00 2001 From: Rafael da Fonseca Date: Tue, 14 May 2024 22:45:25 +0100 Subject: [PATCH] Allow configuring nginx worker reload behaviour, to prevent multiple concurrent worker reloads which can lead to high resource usage and OOMKill (#10884) * feat: allow configuring nginx worker reload behaviour, to prevent multiple concurrent worker reloads Signed-off-by: Rafael da Fonseca * appease linter, remove unnecessary log line Signed-off-by: Rafael da Fonseca * Flip to using a positive behaviour flag instead of negative Signed-off-by: Rafael da Fonseca * Update helm-docs Signed-off-by: Rafael da Fonseca * Avoid calling GetBackendConfiguration() twice, use clearer name for helm chart option Signed-off-by: Rafael da Fonseca * Fix helm-docs ordering Signed-off-by: Rafael da Fonseca --------- Signed-off-by: Rafael da Fonseca --- charts/ingress-nginx/README.md | 1 + .../templates/controller-configmap.yaml | 1 + charts/ingress-nginx/values.yaml | 2 + .../nginx-configuration/configmap.md | 1 + internal/ingress/controller/config/config.go | 8 ++++ internal/ingress/controller/nginx.go | 47 +++++++++++++++++-- .../ingress/controller/template/configmap.go | 12 +++++ 7 files changed, 69 insertions(+), 3 deletions(-) diff --git a/charts/ingress-nginx/README.md b/charts/ingress-nginx/README.md index 1c166e05e..8ea6bae85 100644 --- a/charts/ingress-nginx/README.md +++ b/charts/ingress-nginx/README.md @@ -301,6 +301,7 @@ As of version `1.26.0` of this chart, by simply not providing any clusterIP valu | controller.enableAnnotationValidations | bool | `false` | | | controller.enableMimalloc | bool | `true` | Enable mimalloc as a drop-in replacement for malloc. # ref: https://github.com/microsoft/mimalloc # | | controller.enableTopologyAwareRouting | bool | `false` | This configuration enables Topology Aware Routing feature, used together with service annotation service.kubernetes.io/topology-mode="auto" Defaults to false | +| controller.enableWorkerSerialReloads | bool | `false` | This configuration defines if NGINX workers should reload serially instead of concurrently when multiple changes that require reloads are queued | | controller.existingPsp | string | `""` | Use an existing PSP instead of creating one | | controller.extraArgs | object | `{}` | Additional command line arguments to pass to Ingress-Nginx Controller E.g. to specify the default SSL certificate you can use | | controller.extraContainers | list | `[]` | Additional containers to be added to the controller pod. See https://github.com/lemonldap-ng-controller/lemonldap-ng-controller as example. | diff --git a/charts/ingress-nginx/templates/controller-configmap.yaml b/charts/ingress-nginx/templates/controller-configmap.yaml index 22080d115..50ae824a8 100644 --- a/charts/ingress-nginx/templates/controller-configmap.yaml +++ b/charts/ingress-nginx/templates/controller-configmap.yaml @@ -14,6 +14,7 @@ metadata: namespace: {{ include "ingress-nginx.namespace" . }} data: allow-snippet-annotations: "{{ .Values.controller.allowSnippetAnnotations }}" + enable-serial-reloads: "{{ .Values.controller.enableWorkerSerialReloads }}" {{- if .Values.controller.addHeaders }} add-headers: {{ include "ingress-nginx.namespace" . }}/{{ include "ingress-nginx.fullname" . }}-custom-add-headers {{- end }} diff --git a/charts/ingress-nginx/values.yaml b/charts/ingress-nginx/values.yaml index 85990614b..2a361614c 100644 --- a/charts/ingress-nginx/values.yaml +++ b/charts/ingress-nginx/values.yaml @@ -93,6 +93,8 @@ controller: # when users add those annotations. # Global snippets in ConfigMap are still respected allowSnippetAnnotations: false + # -- This configuration defines if NGINX workers should reload serially instead of concurrently when multiple changes that require reloads are queued + enableWorkerSerialReloads: false # -- Required for use with CNI based kubernetes installations (such as ones set up by kubeadm), # since CNI and hostport don't mix yet. Can be deprecated once https://github.com/kubernetes/kubernetes/issues/23920 # is merged diff --git a/docs/user-guide/nginx-configuration/configmap.md b/docs/user-guide/nginx-configuration/configmap.md index 52c54c4fe..13f82a17d 100644 --- a/docs/user-guide/nginx-configuration/configmap.md +++ b/docs/user-guide/nginx-configuration/configmap.md @@ -114,6 +114,7 @@ The following table shows a configuration option's name, type, and the default v |[worker-processes](#worker-processes)| string | `` || |[worker-cpu-affinity](#worker-cpu-affinity)| string | "" || |[worker-shutdown-timeout](#worker-shutdown-timeout)| string | "240s" || +|[enable-serial-reloads](#enable-serial-reloads)|bool|"false"|| |[load-balance](#load-balance)| string | "round_robin" || |[variables-hash-bucket-size](#variables-hash-bucket-size)| int | 128 || |[variables-hash-max-size](#variables-hash-max-size)| int | 2048 || diff --git a/internal/ingress/controller/config/config.go b/internal/ingress/controller/config/config.go index 47f2120f1..6349d6006 100644 --- a/internal/ingress/controller/config/config.go +++ b/internal/ingress/controller/config/config.go @@ -477,6 +477,13 @@ type Configuration struct { // http://nginx.org/en/docs/ngx_core_module.html#worker_processes WorkerProcesses string `json:"worker-processes,omitempty"` + // Defines whether multiple concurrent reloads of worker processes should occur. + // Set this to false to prevent more than n x 2 workers to exist at any time, to avoid potential OOM situations and high CPU load + // With this setting on false, configuration changes in the queue will be re-queued with an exponential backoff, until the number of worker process is the expected value. + // By default new worker processes are spawned every time there's a change that cannot be applied dynamically with no upper limit to the number of running workers + // http://nginx.org/en/docs/ngx_core_module.html#worker_processes + WorkerSerialReloads bool `json:"enable-serial-reloads,omitempty"` + // Defines a timeout for a graceful shutdown of worker processes // http://nginx.org/en/docs/ngx_core_module.html#worker_shutdown_timeout WorkerShutdownTimeout string `json:"worker-shutdown-timeout,omitempty"` @@ -851,6 +858,7 @@ func NewDefault() Configuration { UseGeoIP2: false, GeoIP2AutoReloadMinutes: 0, WorkerProcesses: strconv.Itoa(runtime.NumCPU()), + WorkerSerialReloads: false, WorkerShutdownTimeout: "240s", VariablesHashBucketSize: 256, VariablesHashMaxSize: 2048, diff --git a/internal/ingress/controller/nginx.go b/internal/ingress/controller/nginx.go index b81734154..f74b3245e 100644 --- a/internal/ingress/controller/nginx.go +++ b/internal/ingress/controller/nginx.go @@ -35,6 +35,7 @@ import ( "syscall" "text/template" "time" + "unicode" proxyproto "github.com/armon/go-proxyproto" "github.com/eapache/channels" @@ -87,9 +88,10 @@ func NewNGINXController(config *Configuration, mc metric.Collector) *NGINXContro n := &NGINXController{ isIPV6Enabled: ing_net.IsIPv6Enabled(), - resolver: h, - cfg: config, - syncRateLimiter: flowcontrol.NewTokenBucketRateLimiter(config.SyncRateLimit, 1), + resolver: h, + cfg: config, + syncRateLimiter: flowcontrol.NewTokenBucketRateLimiter(config.SyncRateLimit, 1), + workersReloading: false, recorder: eventBroadcaster.NewRecorder(scheme.Scheme, apiv1.EventSource{ Component: "nginx-ingress-controller", @@ -229,6 +231,8 @@ type NGINXController struct { syncRateLimiter flowcontrol.RateLimiter + workersReloading bool + // stopLock is used to enforce that only a single call to Stop send at // a given time. We allow stopping through an HTTP endpoint and // allowing concurrent stoppers leads to stack traces. @@ -676,6 +680,11 @@ func (n *NGINXController) OnUpdate(ingressCfg ingress.Configuration) error { cfg := n.store.GetBackendConfiguration() cfg.Resolver = n.resolver + workerSerialReloads := cfg.WorkerSerialReloads + if workerSerialReloads && n.workersReloading { + return errors.New("worker reload already in progress, requeuing reload") + } + content, err := n.generateTemplate(cfg, ingressCfg) if err != nil { return err @@ -738,9 +747,41 @@ func (n *NGINXController) OnUpdate(ingressCfg ingress.Configuration) error { return fmt.Errorf("%v\n%v", err, string(o)) } + // Reload status checking runs in a separate goroutine to avoid blocking the sync queue + if workerSerialReloads { + go n.awaitWorkersReload() + } + return nil } +// awaitWorkersReload checks if the number of workers has returned to the expected count +func (n *NGINXController) awaitWorkersReload() { + n.workersReloading = true + defer func() { n.workersReloading = false }() + + expectedWorkers := n.store.GetBackendConfiguration().WorkerProcesses + var numWorkers string + klog.V(3).Infof("waiting for worker count to be equal to %s", expectedWorkers) + for numWorkers != expectedWorkers { + time.Sleep(time.Second) + o, err := exec.Command("/bin/sh", "-c", "pgrep worker | wc -l").Output() + if err != nil { + klog.ErrorS(err, numWorkers) + return + } + // cleanup any non-printable chars from shell output + numWorkers = strings.Map(func(r rune) rune { + if unicode.IsPrint(r) { + return r + } + return -1 + }, string(o)) + + klog.V(3).Infof("Currently running nginx worker processes: %s, expected %s", numWorkers, expectedWorkers) + } +} + // nginxHashBucketSize computes the correct NGINX hash_bucket_size for a hash // with the given longest key. func nginxHashBucketSize(longestString int) int { diff --git a/internal/ingress/controller/template/configmap.go b/internal/ingress/controller/template/configmap.go index f78dbad03..1a7f15f1c 100644 --- a/internal/ingress/controller/template/configmap.go +++ b/internal/ingress/controller/template/configmap.go @@ -69,6 +69,7 @@ const ( luaSharedDictsKey = "lua-shared-dicts" plugins = "plugins" debugConnections = "debug-connections" + workerSerialReloads = "enable-serial-reloads" ) var ( @@ -404,6 +405,17 @@ func ReadConfig(src map[string]string) config.Configuration { delete(conf, workerProcesses) } + if val, ok := conf[workerSerialReloads]; ok { + boolVal, err := strconv.ParseBool(val) + if err != nil { + to.WorkerSerialReloads = false + klog.Warningf("failed to parse enable-serial-reloads setting, valid values are true or false, found %s", val) + } else { + to.WorkerSerialReloads = boolVal + } + delete(conf, workerSerialReloads) + } + if val, ok := conf[plugins]; ok { to.Plugins = splitAndTrimSpace(val, ",") delete(conf, plugins)