2017-04-03 11:18:30 -04:00
|
|
|
metrics = require('metrics-sharelatex')
|
2014-11-25 10:26:33 -05:00
|
|
|
Settings = require('settings-sharelatex')
|
2017-05-04 10:22:54 -04:00
|
|
|
RedisWrapper = require("./RedisWrapper")
|
|
|
|
rclient = RedisWrapper.client("lock")
|
2014-11-25 10:26:33 -05:00
|
|
|
logger = require "logger-sharelatex"
|
2018-05-04 08:22:33 -04:00
|
|
|
os = require "os"
|
|
|
|
crypto = require "crypto"
|
|
|
|
|
|
|
|
HOST = os.hostname()
|
|
|
|
PID = process.pid
|
|
|
|
RND = crypto.randomBytes(4).toString('hex')
|
|
|
|
COUNT = 0
|
2014-11-25 10:26:33 -05:00
|
|
|
|
|
|
|
module.exports = LockManager =
|
|
|
|
LOCK_TEST_INTERVAL: 50 # 50ms between each test of the lock
|
2018-05-04 08:43:12 -04:00
|
|
|
MAX_TEST_INTERVAL: 1000 # back off to 1s between each test of the lock
|
2014-11-25 10:26:33 -05:00
|
|
|
MAX_LOCK_WAIT_TIME: 10000 # 10s maximum time to spend trying to get the lock
|
2018-02-19 06:23:43 -05:00
|
|
|
REDIS_LOCK_EXPIRY: 30 # seconds. Time until lock auto expires in redis
|
|
|
|
SLOW_EXECUTION_THRESHOLD: 5000 # 5s, if execution takes longer than this then log
|
2014-11-25 10:26:33 -05:00
|
|
|
|
2018-05-04 08:22:33 -04:00
|
|
|
# Use a signed lock value as described in
|
|
|
|
# http://redis.io/topics/distlock#correct-implementation-with-a-single-instance
|
|
|
|
# to prevent accidental unlocking by multiple processes
|
|
|
|
randomLock : () ->
|
|
|
|
time = Date.now()
|
|
|
|
return "locked:host=#{HOST}:pid=#{PID}:random=#{RND}:time=#{time}:count=#{COUNT++}"
|
|
|
|
|
|
|
|
unlockScript: 'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end'
|
|
|
|
|
2018-02-19 06:23:43 -05:00
|
|
|
runWithLock: (namespace, id, runner = ( (releaseLock = (error) ->) -> ), callback = ( (error) -> )) ->
|
2018-02-20 09:03:43 -05:00
|
|
|
# This error is defined here so we get a useful stacktrace
|
2018-02-19 06:23:43 -05:00
|
|
|
slowExecutionError = new Error "slow execution during lock"
|
|
|
|
|
|
|
|
timer = new metrics.Timer("lock.#{namespace}")
|
|
|
|
key = "lock:web:#{namespace}:#{id}"
|
2018-05-04 08:22:33 -04:00
|
|
|
LockManager._getLock key, namespace, (error, lockValue) ->
|
2018-02-01 10:31:42 -05:00
|
|
|
return callback(error) if error?
|
2018-02-20 09:03:43 -05:00
|
|
|
|
|
|
|
# The lock can expire in redis but the process carry on. This setTimout call
|
|
|
|
# is designed to log if this happens.
|
|
|
|
countIfExceededLockTimeout = () ->
|
|
|
|
metrics.inc "lock.#{namespace}.exceeded_lock_timeout"
|
|
|
|
logger.log "exceeded lock timeout", { namespace, id, slowExecutionError }
|
|
|
|
exceededLockTimeout = setTimeout countIfExceededLockTimeout, LockManager.REDIS_LOCK_EXPIRY * 1000
|
|
|
|
|
2018-02-01 10:31:42 -05:00
|
|
|
runner (error1, values...) ->
|
2018-05-04 08:22:33 -04:00
|
|
|
LockManager._releaseLock key, lockValue, (error2) ->
|
2018-02-20 09:03:43 -05:00
|
|
|
clearTimeout exceededLockTimeout
|
|
|
|
|
2018-02-19 06:23:43 -05:00
|
|
|
timeTaken = new Date - timer.start
|
|
|
|
if timeTaken > LockManager.SLOW_EXECUTION_THRESHOLD
|
|
|
|
logger.log "slow execution during lock", { namespace, id, timeTaken, slowExecutionError }
|
|
|
|
|
|
|
|
timer.done()
|
2018-02-01 10:31:42 -05:00
|
|
|
error = error1 or error2
|
|
|
|
return callback(error) if error?
|
|
|
|
callback null, values...
|
|
|
|
|
2018-05-04 08:22:33 -04:00
|
|
|
_tryLock : (key, namespace, callback = (err, isFree, lockValue)->)->
|
|
|
|
lockValue = LockManager.randomLock()
|
|
|
|
rclient.set key, lockValue, "EX", LockManager.REDIS_LOCK_EXPIRY, "NX", (err, gotLock)->
|
2014-11-25 10:26:33 -05:00
|
|
|
return callback(err) if err?
|
|
|
|
if gotLock == "OK"
|
2018-02-20 08:58:09 -05:00
|
|
|
metrics.inc "lock.#{namespace}.try.success"
|
2018-05-04 08:22:33 -04:00
|
|
|
callback err, true, lockValue
|
2014-11-25 10:26:33 -05:00
|
|
|
else
|
2018-02-20 08:58:09 -05:00
|
|
|
metrics.inc "lock.#{namespace}.try.failed"
|
2017-05-11 10:27:01 -04:00
|
|
|
logger.log key: key, redis_response: gotLock, "lock is locked"
|
2014-11-25 10:26:33 -05:00
|
|
|
callback err, false
|
|
|
|
|
2018-05-04 08:22:33 -04:00
|
|
|
_getLock: (key, namespace, callback = (error, lockValue) ->) ->
|
2014-11-25 10:26:33 -05:00
|
|
|
startTime = Date.now()
|
2018-05-04 08:43:12 -04:00
|
|
|
testInterval = LockManager.LOCK_TEST_INTERVAL
|
2018-02-20 08:58:09 -05:00
|
|
|
attempts = 0
|
2014-11-25 10:26:33 -05:00
|
|
|
do attempt = () ->
|
|
|
|
if Date.now() - startTime > LockManager.MAX_LOCK_WAIT_TIME
|
2018-02-20 08:58:09 -05:00
|
|
|
metrics.inc "lock.#{namespace}.get.failed"
|
2014-11-25 10:26:33 -05:00
|
|
|
return callback(new Error("Timeout"))
|
|
|
|
|
2018-02-20 08:58:09 -05:00
|
|
|
attempts += 1
|
2018-05-04 08:22:33 -04:00
|
|
|
LockManager._tryLock key, namespace, (error, gotLock, lockValue) ->
|
2014-11-25 10:26:33 -05:00
|
|
|
return callback(error) if error?
|
|
|
|
if gotLock
|
2018-02-21 08:17:51 -05:00
|
|
|
metrics.gauge "lock.#{namespace}.get.success.tries", attempts
|
2018-05-04 08:22:33 -04:00
|
|
|
callback(null, lockValue)
|
2014-11-25 10:26:33 -05:00
|
|
|
else
|
2018-05-04 08:43:12 -04:00
|
|
|
setTimeout attempt, testInterval
|
|
|
|
# back off when the lock is taken to avoid overloading
|
|
|
|
testInterval = Math.min(testInterval * 2, LockManager.MAX_TEST_INTERVAL)
|
2014-11-25 10:26:33 -05:00
|
|
|
|
2018-05-04 08:22:33 -04:00
|
|
|
_releaseLock: (key, lockValue, callback)->
|
|
|
|
rclient.eval LockManager.unlockScript, 1, key, lockValue, (err, result) ->
|
|
|
|
if err?
|
|
|
|
return callback(err)
|
|
|
|
else if result? and result isnt 1 # successful unlock should release exactly one key
|
|
|
|
logger.error {key:key, lockValue:lockValue, redis_err:err, redis_result:result}, "unlocking error"
|
|
|
|
metrics.inc "unlock-error"
|
|
|
|
return callback(new Error("tried to release timed out lock"))
|
|
|
|
else
|
|
|
|
callback(null,result)
|