ewma improvements

This commit is contained in:
Elvin Efendi 2019-08-15 11:04:32 -04:00
parent 0b375989f3
commit 30b64df10a
6 changed files with 354 additions and 98 deletions

View file

@ -5,6 +5,7 @@
-- /finagle-core/src/main/scala/com/twitter/finagle/loadbalancer/PeakEwma.scala
local resty_lock = require("resty.lock")
local util = require("util")
local split = require("util.split")
@ -13,10 +14,36 @@ local ngx_log = ngx.log
local INFO = ngx.INFO
local DECAY_TIME = 10 -- this value is in seconds
local LOCK_KEY = ":ewma_key"
local PICK_SET_SIZE = 2
local ewma_lock, ewma_lock_err = resty_lock:new("balancer_ewma_locks", {timeout = 0, exptime = 0.1})
if not ewma_lock then
error(ewma_lock_err)
end
local _M = { name = "ewma" }
local function lock(upstream)
local _, err = ewma_lock:lock(upstream .. LOCK_KEY)
if err then
if err ~= "timeout" then
ngx.log(ngx.ERR, string.format("EWMA Balancer failed to lock: %s", tostring(err)))
end
end
return err
end
local function unlock()
local ok, err = ewma_lock:unlock()
if not ok then
ngx.log(ngx.ERR, string.format("EWMA Balancer failed to unlock: %s", tostring(err)))
end
return err
end
local function decay_ewma(ewma, last_touched_at, rtt, now)
local td = now - last_touched_at
td = (td > 0) and td or 0
@ -26,28 +53,55 @@ local function decay_ewma(ewma, last_touched_at, rtt, now)
return ewma
end
local function get_or_update_ewma(self, upstream, rtt, update)
local ewma = self.ewma[upstream] or 0
local function store_stats(upstream, ewma, now)
local success, err, forcible = ngx.shared.balancer_ewma_last_touched_at:set(upstream, now)
if not success then
ngx.log(ngx.WARN, "balancer_ewma_last_touched_at:set failed " .. err)
end
if forcible then
ngx.log(ngx.WARN, "balancer_ewma_last_touched_at:set valid items forcibly overwritten")
end
success, err, forcible = ngx.shared.balancer_ewma:set(upstream, ewma)
if not success then
ngx.log(ngx.WARN, "balancer_ewma:set failed " .. err)
end
if forcible then
ngx.log(ngx.WARN, "balancer_ewma:set valid items forcibly overwritten")
end
end
local function get_or_update_ewma(upstream, rtt, update)
local lock_err = nil
if update then
lock_err = lock(upstream)
end
local ewma = ngx.shared.balancer_ewma:get(upstream) or 0
if lock_err ~= nil then
return ewma, lock_err
end
local now = ngx.now()
local last_touched_at = self.ewma_last_touched_at[upstream] or 0
local last_touched_at = ngx.shared.balancer_ewma_last_touched_at:get(upstream) or 0
ewma = decay_ewma(ewma, last_touched_at, rtt, now)
if not update then
return ewma, nil
end
self.ewma[upstream] = ewma
self.ewma_last_touched_at[upstream] = now
store_stats(upstream, ewma, now)
unlock()
return ewma, nil
end
local function score(self, upstream)
local function score(upstream)
-- Original implementation used names
-- Endpoints don't have names, so passing in IP:Port as key instead
local upstream_name = upstream.address .. ":" .. upstream.port
return get_or_update_ewma(self, upstream_name, 0, false)
return get_or_update_ewma(upstream_name, 0, false)
end
-- implementation similar to https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle
@ -63,12 +117,12 @@ local function shuffle_peers(peers, k)
-- peers[1 .. k] will now contain a randomly selected k from #peers
end
local function pick_and_score(self, peers, k)
local function pick_and_score(peers, k)
shuffle_peers(peers, k)
local lowest_score_index = 1
local lowest_score = score(self, peers[lowest_score_index])
local lowest_score = score(peers[lowest_score_index])
for i = 2, k do
local new_score = score(self, peers[i])
local new_score = score(peers[i])
if new_score < lowest_score then
lowest_score_index, lowest_score = i, new_score
end
@ -76,6 +130,31 @@ local function pick_and_score(self, peers, k)
return peers[lowest_score_index], lowest_score
end
-- slow_start_ewma is something we use to avoid sending too many requests
-- to the newly introduced endpoints. We currently use average ewma values
-- of existing endpoints.
local function calculate_slow_start_ewma(self)
local total_ewma = 0
local endpoints_count = 0
for _, endpoint in pairs(self.peers) do
local endpoint_string = endpoint.address .. ":" .. endpoint.port
local ewma = ngx.shared.balancer_ewma:get(endpoint_string)
if ewma then
endpoints_count = endpoints_count + 1
total_ewma = total_ewma + ewma
end
end
if endpoints_count == 0 then
ngx.log(ngx.INFO, "no ewma value exists for the endpoints")
return nil
end
return total_ewma / endpoints_count
end
function _M.balance(self)
local peers = self.peers
local endpoint, ewma_score = peers[1], -1
@ -83,7 +162,7 @@ function _M.balance(self)
if #peers > 1 then
local k = (#peers < PICK_SET_SIZE) and #peers or PICK_SET_SIZE
local peer_copy = util.deepcopy(peers)
endpoint, ewma_score = pick_and_score(self, peer_copy, k)
endpoint, ewma_score = pick_and_score(peer_copy, k)
end
ngx.var.balancer_ewma_score = ewma_score
@ -92,7 +171,7 @@ function _M.balance(self)
return endpoint.address .. ":" .. endpoint.port
end
function _M.after_balance(self)
function _M.after_balance(_)
local response_time = tonumber(split.get_first_value(ngx.var.upstream_response_time)) or 0
local connect_time = tonumber(split.get_first_value(ngx.var.upstream_connect_time)) or 0
local rtt = connect_time + response_time
@ -101,30 +180,41 @@ function _M.after_balance(self)
if util.is_blank(upstream) then
return
end
get_or_update_ewma(self, upstream, rtt, true)
get_or_update_ewma(upstream, rtt, true)
end
function _M.sync(self, backend)
self.traffic_shaping_policy = backend.trafficShapingPolicy
self.alternative_backends = backend.alternativeBackends
local normalized_endpoints_added, normalized_endpoints_removed = util.diff_endpoints(self.peers, backend.endpoints)
local changed = not util.deep_compare(self.peers, backend.endpoints)
if not changed then
if #normalized_endpoints_added == 0 and #normalized_endpoints_removed == 0 then
ngx.log(ngx.INFO, "endpoints did not change for backend " .. tostring(backend.name))
return
end
ngx_log(INFO, string_format("[%s] peers have changed for backend %s", self.name, backend.name))
self.traffic_shaping_policy = backend.trafficShapingPolicy
self.alternative_backends = backend.alternativeBackends
self.peers = backend.endpoints
self.ewma = {}
self.ewma_last_touched_at = {}
for _, endpoint_string in ipairs(normalized_endpoints_removed) do
ngx.shared.balancer_ewma:delete(endpoint_string)
ngx.shared.balancer_ewma_last_touched_at:delete(endpoint_string)
end
local slow_start_ewma = calculate_slow_start_ewma(self)
if slow_start_ewma ~= nil then
local now = ngx.now()
for _, endpoint_string in ipairs(normalized_endpoints_added) do
store_stats(endpoint_string, slow_start_ewma, now)
end
end
end
function _M.new(self, backend)
local o = {
peers = backend.endpoints,
ewma = {},
ewma_last_touched_at = {},
traffic_shaping_policy = backend.trafficShapingPolicy,
alternative_backends = backend.alternativeBackends,
}