Added new affinity mode for maximum session stickyness. Fixes kubernetes/ingress-nginx#4475

This commit is contained in:
Alexander Maret-Huskinson 2019-08-30 11:40:29 +02:00
parent 8740c1b661
commit 9170591185
16 changed files with 541 additions and 55 deletions

View file

@ -0,0 +1,57 @@
-- An affinity mode which makes sure connections are rebalanced when a deployment is scaled.
-- The advantage of this mode is that the load on the pods will be redistributed.
-- The drawback of this mode is that, when scaling up a deployment, roughly (n-c)/n users
-- will lose their session, where c is the current number of pods and n is the new number of
-- pods.
--
-- This class extends/implements the abstract class balancer.sticky.
--
local math = require("math")
local resty_chash = require("resty.chash")
local util = require("util")
local _M = {}
-- Consider the situation of N upstreams one of which is failing.
-- Then the probability to obtain failing upstream after M iterations would be close to (1/N)**M.
-- For the worst case (2 upstreams; 20 iterations) it would be ~10**(-6)
-- which is much better then ~10**(-3) for 10 iterations.
local MAX_UPSTREAM_CHECKS_COUNT = 20
local function get_routing_key(self)
return self:get_cookie(), nil
end
local function set_routing_key(self, key)
self:set_cookie(key)
end
local function pick_new_upstream(self, failed_upstreams)
for i = 1, MAX_UPSTREAM_CHECKS_COUNT do
local key = string.format("%s.%s.%s", ngx.now() + i, ngx.worker.pid(), math.random(999999))
local new_upstream = self.instance:find(key)
if not failed_upstreams[new_upstream] then
return new_upstream, key
end
end
return nil, nil
end
function _M.new(self, sticky_balancer, backend)
local o = sticky_balancer or {}
local nodes = util.get_nodes(backend.endpoints)
-- override sticky.balancer methods
o.instance = resty_chash:new(nodes)
o.get_routing_key = get_routing_key
o.set_routing_key = set_routing_key
o.pick_new_upstream = pick_new_upstream
return sticky_balancer
end
return _M

View file

@ -0,0 +1,53 @@
-- An affinity mode which makes sure a session is always routed to the same endpoint.
-- The advantage of this mode is that a user will never lose his session.
-- The drawback of this mode is that when scaling up a deployment, sessions will not
-- be rebalanced.
--
local util = require("util")
local util_nodemap = require("util.nodemap")
local _M = {}
local function get_routing_key(self)
local cookie_value = self:get_cookie()
if cookie_value then
-- format <timestamp>.<workder-pid>.<routing-key>
local routing_key = string.match(cookie_value, '[^\\.]+$')
if routing_key == nil then
local err = string.format("Failed to extract routing key from cookie '%s'!", cookie_value)
return nil, err
end
return routing_key, nil
end
return nil, nil
end
local function set_routing_key(self, key)
local value = string.format("%s.%s.%s", ngx.now(), ngx.worker.pid(), key)
self:set_cookie(value);
end
local function pick_new_upstream(self, failed_upstreams)
return self.instance:random_except(failed_upstreams)
end
function _M.new(self, sticky_balancer, backend)
local o = sticky_balancer or {}
local nodes = util.get_nodes(backend.endpoints)
local hash_salt = backend["name"]
-- override sticky.balancer methods
o.instance = util_nodemap:new(nodes, hash_salt)
o.get_routing_key = get_routing_key
o.set_routing_key = set_routing_key
o.pick_new_upstream = pick_new_upstream
return sticky_balancer
end
return _M

View file

@ -1,8 +1,8 @@
local affinity_balanced = require("affinity.balanced")
local affinity_persistent = require("affinity.persistent")
local balancer_resty = require("balancer.resty")
local resty_chash = require("resty.chash")
local util = require("util")
local ck = require("resty.cookie")
local math = require("math")
local ngx_balancer = require("ngx.balancer")
local split = require("util.split")
@ -10,34 +10,60 @@ local string_format = string.format
local ngx_log = ngx.log
local INFO = ngx.INFO
local _M = balancer_resty:new({ factory = resty_chash, name = "sticky" })
local _M = balancer_resty:new({ name = "sticky" })
local DEFAULT_COOKIE_NAME = "route"
-- Consider the situation of N upstreams one of which is failing.
-- Then the probability to obtain failing upstream after M iterations would be close to (1/N)**M.
-- For the worst case (2 upstreams; 20 iterations) it would be ~10**(-6)
-- which is much better then ~10**(-3) for 10 iterations.
local MAX_UPSTREAM_CHECKS_COUNT = 20
function _M.cookie_name(self)
return self.cookie_session_affinity.name or DEFAULT_COOKIE_NAME
end
function _M.new(self, backend)
local nodes = util.get_nodes(backend.endpoints)
local function init_affinity_mode(self, backend)
local mode = backend["sessionAffinityConfig"]["mode"] or 'balanced'
-- set default mode to 'balanced' for backwards compatibility
if mode == nil or mode == '' then
mode = 'balanced'
end
self.affinity_mode = mode
if mode == 'persistent' then
return affinity_persistent:new(self, backend)
end
-- default is 'balanced' for backwards compatibility
if mode ~= 'balanced' then
ngx.log(ngx.WARN, string.format("Invalid affinity mode '%s'! Using 'balanced' as a default.", mode))
end
return affinity_balanced:new(self, backend)
end
function _M.new(self, backend)
local o = {
instance = self.factory:new(nodes),
instance = nil,
affinity_mode = nil,
traffic_shaping_policy = backend.trafficShapingPolicy,
alternative_backends = backend.alternativeBackends,
cookie_session_affinity = backend["sessionAffinityConfig"]["cookieSessionAffinity"]
}
setmetatable(o, self)
self.__index = self
return o
return init_affinity_mode(o, backend)
end
local function set_cookie(self, value)
function _M.get_cookie(self)
local cookie, err = ck:new()
if not cookie then
ngx.log(ngx.ERR, err)
end
return cookie:get(self:cookie_name())
end
function _M.set_cookie(self, value)
local cookie, err = ck:new()
if not cookie then
ngx.log(ngx.ERR, err)
@ -86,19 +112,30 @@ local function get_failed_upstreams()
return indexed_upstream_addrs
end
local function pick_new_upstream(self)
local failed_upstreams = get_failed_upstreams()
--- get_routing_key gets the current routing key from the cookie
-- @treturn string, string The routing key and an error message if an error occured.
function _M.get_routing_key(self)
-- interface method to get the routing key from the cookie
-- has to be overridden by an affinity mode
ngx.log(ngx.ERR, "[BUG] Failed to get routing key as no implementation has been provided!")
return nil, nil
end
for i = 1, MAX_UPSTREAM_CHECKS_COUNT do
local key = string.format("%s.%s.%s", ngx.now() + i, ngx.worker.pid(), math.random(999999))
local new_upstream = self.instance:find(key)
if not failed_upstreams[new_upstream] then
return new_upstream, key
end
end
--- set_routing_key sets the current routing key on the cookie
-- @tparam string key The routing key to set on the cookie.
function _M.set_routing_key(self, key)
-- interface method to set the routing key on the cookie
-- has to be overridden by an affinity mode
ngx.log(ngx.ERR, "[BUG] Failed to set routing key as no implementation has been provided!")
end
--- pick_new_upstream picks a new upstream while ignoring the given failed upstreams.
-- @tparam {[string]=boolean} A table of upstreams to ignore where the key is the endpoint and the value a boolean.
-- @treturn string, string The endpoint and its key.
function _M.pick_new_upstream(self, failed_upstreams)
-- interface method to get a new upstream
-- has to be overridden by an affinity mode
ngx.log(ngx.ERR, "[BUG] Failed to pick new upstream as no implementation has been provided!")
return nil, nil
end
@ -128,15 +165,9 @@ local function should_set_cookie(self)
end
function _M.balance(self)
local cookie, err = ck:new()
if not cookie then
ngx.log(ngx.ERR, "error while initializing cookie: " .. tostring(err))
return
end
local upstream_from_cookie
local key = cookie:get(self:cookie_name())
local key = self:get_routing_key()
if key then
upstream_from_cookie = self.instance:find(key)
end
@ -151,24 +182,34 @@ function _M.balance(self)
local new_upstream
new_upstream, key = pick_new_upstream(self)
new_upstream, key = self:pick_new_upstream(get_failed_upstreams())
if not new_upstream then
ngx.log(ngx.WARN, string.format("failed to get new upstream; using upstream %s", new_upstream))
elseif should_set_cookie(self) then
set_cookie(self, key)
self:set_routing_key(key)
end
return new_upstream
end
function _M.sync(self, backend)
local changed = false
-- check and reinit affinity mode before syncing the balancer which will reinit the nodes
if self.affinity_mode ~= backend.sessionAffinityConfig.mode then
changed = true
init_affinity_mode(self, backend)
end
-- reload balancer nodes
balancer_resty.sync(self, backend)
-- Reload the balancer if any of the annotations have changed.
local changed = not util.deep_compare(
changed = changed or not util.deep_compare(
self.cookie_session_affinity,
backend.sessionAffinityConfig.cookieSessionAffinity
)
if not changed then
return
end

View file

@ -15,11 +15,16 @@ local function reset_ngx()
end
function get_mocked_cookie_new()
local o = { value = nil }
local mock = {
get = function(self, n) return self.value end,
set = function(self, c) self.value = c.value ; return true, nil end
}
setmetatable(o, mock)
mock.__index = mock
return function(self)
return {
get = function(self, n) return nil, "error" end,
set = function(self, n) return true, "" end
}
return o;
end
end
@ -229,7 +234,7 @@ describe("Sticky", function()
end)
end)
local function get_several_test_backends(change_on_failure)
local function get_several_test_backends(option)
return {
name = "access-router-production-web-80",
endpoints = {
@ -238,7 +243,13 @@ describe("Sticky", function()
},
sessionAffinityConfig = {
name = "cookie",
cookieSessionAffinity = { name = "test_name", hash = "sha1", change_on_failure = change_on_failure }
mode = option["mode"],
cookieSessionAffinity = {
name = "test_name",
hash = "sha1",
change_on_failure = option["change_on_failure"],
locations = { ['test.com'] = {'/'} }
}
},
}
end
@ -257,21 +268,20 @@ describe("Sticky", function()
context("when request to upstream fails", function()
it("changes upstream when change_on_failure option is true", function()
-- create sticky cookie
cookie.new = function(self)
local return_obj = {
set = function(v) return false, nil end,
get = function(k) return "" end,
}
return return_obj, false
end
local options = {false, true}
local options = {
{["change_on_failure"] = false, ["mode"] = nil},
{["change_on_failure"] = false, ["mode"] = 'balanced'},
{["change_on_failure"] = false, ["mode"] = 'persistent'},
{["change_on_failure"] = true, ["mode"] = nil},
{["change_on_failure"] = true, ["mode"] = 'balanced'},
{["change_on_failure"] = true, ["mode"] = 'persistent'}
}
for _, option in ipairs(options) do
local sticky_balancer_instance = sticky:new(get_several_test_backends(option))
local old_upstream = sticky_balancer_instance:balance()
assert.is.Not.Nil(old_upstream)
for _ = 1, 100 do
-- make sure upstream doesn't change on subsequent calls of balance()
assert.equal(old_upstream, sticky_balancer_instance:balance())
@ -281,11 +291,11 @@ describe("Sticky", function()
sticky_balancer_instance.get_last_failure = function()
return "failed"
end
_G.ngx.var = { upstream_addr = old_upstream }
_G.ngx.var.upstream_addr = old_upstream
for _ = 1, 100 do
local new_upstream = sticky_balancer_instance:balance()
if option == false then
if option["change_on_failure"] == false then
-- upstream should be the same inspite of error, if change_on_failure option is false
assert.equal(new_upstream, old_upstream)
else

View file

@ -0,0 +1,167 @@
local util = require("util")
local nodemap = require("util.nodemap")
local function get_test_backend_single()
return {
name = "access-router-production-web-80",
endpoints = {
{ address = "10.184.7.40", port = "8080", maxFails = 0, failTimeout = 0 }
}
}
end
local function get_test_backend_multi()
return {
name = "access-router-production-web-80",
endpoints = {
{ address = "10.184.7.40", port = "8080", maxFails = 0, failTimeout = 0 },
{ address = "10.184.7.41", port = "8080", maxFails = 0, failTimeout = 0 }
}
}
end
local function get_test_nodes_ignore(endpoint)
local ignore = {}
ignore[endpoint] = true
return ignore
end
describe("Node Map", function()
local test_backend_single = get_test_backend_single()
local test_backend_multi = get_test_backend_multi()
local test_salt = test_backend_single.name
local test_nodes_single = util.get_nodes(test_backend_single.endpoints)
local test_nodes_multi = util.get_nodes(test_backend_multi.endpoints)
local test_endpoint1 = test_backend_multi.endpoints[1].address .. ":" .. test_backend_multi.endpoints[1].port
local test_endpoint2 = test_backend_multi.endpoints[2].address .. ":" .. test_backend_multi.endpoints[2].port
local test_nodes_ignore = get_test_nodes_ignore(test_endpoint1)
describe("new()", function()
context("when no salt has been provided", function()
it("random() returns an unsalted key", function()
local nodemap_instance = nodemap:new(test_nodes_single, nil)
local expected_endpoint = test_endpoint1
local expected_hash_key = ngx.md5(expected_endpoint)
local actual_endpoint
local actual_hash_key
actual_endpoint, actual_hash_key = nodemap_instance:random()
assert.equal(actual_endpoint, expected_endpoint)
assert.equal(expected_hash_key, actual_hash_key)
end)
end)
context("when a salt has been provided", function()
it("random() returns a salted key", function()
local nodemap_instance = nodemap:new(test_nodes_single, test_salt)
local expected_endpoint = test_endpoint1
local expected_hash_key = ngx.md5(test_salt .. expected_endpoint)
local actual_endpoint
local actual_hash_key
actual_endpoint, actual_hash_key = nodemap_instance:random()
assert.equal(actual_endpoint, expected_endpoint)
assert.equal(expected_hash_key, actual_hash_key)
end)
end)
context("when no nodes have been provided", function()
it("random() returns nil", function()
local nodemap_instance = nodemap:new({}, test_salt)
local actual_endpoint
local actual_hash_key
actual_endpoint, actual_hash_key = nodemap_instance:random()
assert.equal(actual_endpoint, nil)
assert.equal(expected_hash_key, nil)
end)
end)
end)
describe("find()", function()
before_each(function()
package.loaded["util.nodemap"] = nil
nodemap = require("util.nodemap")
end)
context("when a hash key is valid", function()
it("find() returns the correct endpoint", function()
local nodemap_instance = nodemap:new(test_nodes_single, test_salt)
local test_hash_key
local expected_endpoint
local actual_endpoint
expected_endpoint, test_hash_key = nodemap_instance:random()
assert.not_equal(expected_endpoint, nil)
assert.not_equal(test_hash_key, nil)
actual_endpoint = nodemap_instance:find(test_hash_key)
assert.equal(actual_endpoint, expected_endpoint)
end)
end)
context("when a hash key is invalid", function()
it("find() returns nil", function()
local nodemap_instance = nodemap:new(test_nodes_single, test_salt)
local test_hash_key = "invalid or nonexistent hash key"
local actual_endpoint
actual_endpoint = nodemap_instance:find(test_hash_key)
assert.equal(actual_endpoint, nil)
end)
end)
end)
describe("random_except()", function()
before_each(function()
package.loaded["util.nodemap"] = nil
nodemap = require("util.nodemap")
end)
context("when nothing has been excluded", function()
it("random_except() returns the correct endpoint", function()
local nodemap_instance = nodemap:new(test_nodes_single, test_salt)
local expected_endpoint = test_endpoint1
local test_hash_key
local actual_endpoint
actual_endpoint, test_hash_key = nodemap_instance:random_except({})
assert.equal(expected_endpoint, actual_endpoint)
assert.not_equal(test_hash_key, nil)
end)
end)
context("when everything has been excluded", function()
it("random_except() returns nil", function()
local nodemap_instance = nodemap:new(test_nodes_single, test_salt)
local actual_hash_key
local actual_endpoint
actual_endpoint, actual_hash_key = nodemap_instance:random_except(test_nodes_ignore)
assert.equal(actual_endpoint, nil)
assert.equal(actual_hash_key, nil)
end)
end)
context("when an endpoint has been excluded", function()
it("random_except() does not return it", function()
local nodemap_instance = nodemap:new(test_nodes_multi, test_salt)
local expected_endpoint = test_endpoint2
local actual_endpoint
local test_hash_key
actual_endpoint, test_hash_key = nodemap_instance:random_except(test_nodes_ignore)
assert.equal(actual_endpoint, expected_endpoint)
assert.not_equal(test_hash_key, nil)
end)
end)
end)
end)

View file

@ -0,0 +1,120 @@
local math = require("math")
local util = require("util")
local _M = {}
--- create_map generates the node hash table
-- @tparam {[string]=number} nodes A table with the node as a key and its weight as a value.
-- @tparam string salt A salt that will be used to generate salted hash keys.
local function create_map(nodes, salt)
local hash_map = {}
for endpoint, _ in pairs(nodes) do
-- obfuscate the endpoint with a shared key to prevent brute force
-- and rainbow table attacks which could reveal internal endpoints
local key = salt .. endpoint
local hash_key = ngx.md5(key)
hash_map[hash_key] = endpoint
end
return hash_map
end
--- get_random_node picks a random node from the given map.
-- @tparam {[string], ...} map A key to node hash table.
-- @treturn string,string The node and its key
local function get_random_node(map)
local size = util.tablelength(map)
if size < 1 then
return nil, nil
end
local index = math.random(1, size)
local count = 1
for key, endpoint in pairs(map) do
if count == index then
return endpoint, key
end
count = count + 1
end
ngx.log(ngx.ERR, string.format("Failed to find node %d of %d! This is a bug, please report!", index, size))
return nil, nil
end
--- new constructs a new instance of the node map
--
-- The map uses MD5 to create hash keys for a given node. For security reasons it supports
-- salted hash keys, to prevent attackers from using rainbow tables or brute forcing
-- the node endpoints, which would reveal cluster internal network information.
--
-- To make sure hash keys are reproducible on different ingress controller instances the salt
-- needs to be shared and therefore is not simply generated randomly.
--
-- @tparam {[string]=number} endpoints A table with the node endpoint as a key and its weight as a value.
-- @tparam[opt] string hash_salt A optional hash salt that will be used to obfuscate the hash key.
function _M.new(self, endpoints, hash_salt)
if hash_salt == nil then
hash_salt = ''
end
-- the endpoints have to be saved as 'nodes' to keep compatibility to balancer.resty
local o = {
salt = hash_salt,
nodes = endpoints,
map = create_map(endpoints, hash_salt)
}
setmetatable(o, self)
self.__index = self
return o
end
--- reinit reinitializes the node map reusing the original salt
-- @tparam {[string]=number} nodes A table with the node as a key and its weight as a value.
function _M.reinit(self, nodes)
self.nodes = nodes
self.map = create_map(nodes, self.salt)
end
--- find looks up a node by hash key.
-- @tparam string key The hash key.
-- @treturn string The node.
function _M.find(self, key)
return self.map[key]
end
--- random picks a random node from the hashmap.
-- @treturn string,string A random node and its key or both nil.
function _M.random(self)
return get_random_node(self.map)
end
--- random_except picks a random node from the hashmap, ignoring the nodes in the given table
-- @tparam {string, } ignore_nodes A table of nodes to ignore, the node needs to be the key,
-- the value needs to be set to true
-- @treturn string,string A random node and its key or both nil.
function _M.random_except(self, ignore_nodes)
local valid_nodes = {}
-- avoid generating the map if no ignores where provided
if ignore_nodes == nil or util.tablelength(ignore_nodes) == 0 then
return get_random_node(self.map)
end
-- generate valid endpoints
for key, endpoint in pairs(self.map) do
if not ignore_nodes[endpoint] then
valid_nodes[key] = endpoint
end
end
return get_random_node(valid_nodes)
end
return _M