overleaf/services/web/app/coffee/infrastructure/LockManager.coffee
2018-05-04 15:02:36 +01:00

100 lines
4 KiB
CoffeeScript

metrics = require('metrics-sharelatex')
Settings = require('settings-sharelatex')
RedisWrapper = require("./RedisWrapper")
rclient = RedisWrapper.client("lock")
logger = require "logger-sharelatex"
os = require "os"
crypto = require "crypto"
HOST = os.hostname()
PID = process.pid
RND = crypto.randomBytes(4).toString('hex')
COUNT = 0
module.exports = LockManager =
LOCK_TEST_INTERVAL: 50 # 50ms between each test of the lock
MAX_TEST_INTERVAL: 1000 # back off to 1s between each test of the lock
MAX_LOCK_WAIT_TIME: 10000 # 10s maximum time to spend trying to get the lock
REDIS_LOCK_EXPIRY: 30 # seconds. Time until lock auto expires in redis
SLOW_EXECUTION_THRESHOLD: 5000 # 5s, if execution takes longer than this then log
# Use a signed lock value as described in
# http://redis.io/topics/distlock#correct-implementation-with-a-single-instance
# to prevent accidental unlocking by multiple processes
randomLock : () ->
time = Date.now()
return "locked:host=#{HOST}:pid=#{PID}:random=#{RND}:time=#{time}:count=#{COUNT++}"
unlockScript: 'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end'
runWithLock: (namespace, id, runner = ( (releaseLock = (error) ->) -> ), callback = ( (error) -> )) ->
# This error is defined here so we get a useful stacktrace
slowExecutionError = new Error "slow execution during lock"
timer = new metrics.Timer("lock.#{namespace}")
key = "lock:web:#{namespace}:#{id}"
LockManager._getLock key, namespace, (error, lockValue) ->
return callback(error) if error?
# The lock can expire in redis but the process carry on. This setTimout call
# is designed to log if this happens.
countIfExceededLockTimeout = () ->
metrics.inc "lock.#{namespace}.exceeded_lock_timeout"
logger.log "exceeded lock timeout", { namespace, id, slowExecutionError }
exceededLockTimeout = setTimeout countIfExceededLockTimeout, LockManager.REDIS_LOCK_EXPIRY * 1000
runner (error1, values...) ->
LockManager._releaseLock key, lockValue, (error2) ->
clearTimeout exceededLockTimeout
timeTaken = new Date - timer.start
if timeTaken > LockManager.SLOW_EXECUTION_THRESHOLD
logger.log "slow execution during lock", { namespace, id, timeTaken, slowExecutionError }
timer.done()
error = error1 or error2
return callback(error) if error?
callback null, values...
_tryLock : (key, namespace, callback = (err, isFree, lockValue)->)->
lockValue = LockManager.randomLock()
rclient.set key, lockValue, "EX", LockManager.REDIS_LOCK_EXPIRY, "NX", (err, gotLock)->
return callback(err) if err?
if gotLock == "OK"
metrics.inc "lock.#{namespace}.try.success"
callback err, true, lockValue
else
metrics.inc "lock.#{namespace}.try.failed"
logger.log key: key, redis_response: gotLock, "lock is locked"
callback err, false
_getLock: (key, namespace, callback = (error, lockValue) ->) ->
startTime = Date.now()
testInterval = LockManager.LOCK_TEST_INTERVAL
attempts = 0
do attempt = () ->
if Date.now() - startTime > LockManager.MAX_LOCK_WAIT_TIME
metrics.inc "lock.#{namespace}.get.failed"
return callback(new Error("Timeout"))
attempts += 1
LockManager._tryLock key, namespace, (error, gotLock, lockValue) ->
return callback(error) if error?
if gotLock
metrics.gauge "lock.#{namespace}.get.success.tries", attempts
callback(null, lockValue)
else
setTimeout attempt, testInterval
# back off when the lock is taken to avoid overloading
testInterval = Math.min(testInterval * 2, LockManager.MAX_TEST_INTERVAL)
_releaseLock: (key, lockValue, callback)->
rclient.eval LockManager.unlockScript, 1, key, lockValue, (err, result) ->
if err?
return callback(err)
else if result? and result isnt 1 # successful unlock should release exactly one key
logger.error {key:key, lockValue:lockValue, redis_err:err, redis_result:result}, "unlocking error"
metrics.inc "unlock-error"
return callback(new Error("tried to release timed out lock"))
else
callback(null,result)