overleaf/services/web/app/coffee/infrastructure/LockManager.coffee

metrics = require('metrics-sharelatex')
Settings = require('settings-sharelatex')
RedisWrapper = require("./RedisWrapper")
rclient = RedisWrapper.client("lock")
logger = require "logger-sharelatex"
os = require "os"
crypto = require "crypto"

HOST = os.hostname()
PID = process.pid
RND = crypto.randomBytes(4).toString('hex')
COUNT = 0

module.exports = LockManager =
	LOCK_TEST_INTERVAL: 50 # 50ms between each test of the lock
	MAX_TEST_INTERVAL: 1000 # back off to 1s between each test of the lock
	MAX_LOCK_WAIT_TIME: 10000 # 10s maximum time to spend trying to get the lock
	REDIS_LOCK_EXPIRY: 30 # seconds. Time until lock auto expires in redis
	SLOW_EXECUTION_THRESHOLD: 5000 # 5s, if execution takes longer than this then log

	# Use a signed lock value as described in
	# http://redis.io/topics/distlock#correct-implementation-with-a-single-instance
	# to prevent accidental unlocking by multiple processes
	randomLock : () ->
		time = Date.now()
		return "locked:host=#{HOST}:pid=#{PID}:random=#{RND}:time=#{time}:count=#{COUNT++}"

	unlockScript: 'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end'

	runWithLock: (namespace, id, runner = ( (releaseLock = (error) ->) -> ), callback = ( (error) -> )) ->
		# This error is defined here so we get a useful stacktrace
		slowExecutionError = new Error "slow execution during lock"

		timer = new metrics.Timer("lock.#{namespace}")
		key = "lock:web:#{namespace}:#{id}"
		LockManager._getLock key, namespace, (error, lockValue) ->
			return callback(error) if error?

			# The lock can expire in redis but the process carry on. This setTimout call
			# is designed to log if this happens.
			countIfExceededLockTimeout = () ->
				metrics.inc "lock.#{namespace}.exceeded_lock_timeout"
				logger.log "exceeded lock timeout", { namespace, id, slowExecutionError }
			exceededLockTimeout = setTimeout countIfExceededLockTimeout, LockManager.REDIS_LOCK_EXPIRY * 1000

			runner (error1, values...) ->
				LockManager._releaseLock key, lockValue, (error2) ->
					clearTimeout exceededLockTimeout

					timeTaken = new Date - timer.start
					if timeTaken > LockManager.SLOW_EXECUTION_THRESHOLD
						logger.log "slow execution during lock", { namespace, id, timeTaken, slowExecutionError }

					timer.done()
					error = error1 or error2
					return callback(error) if error?
					callback null, values...

	_tryLock : (key, namespace, callback = (err, isFree, lockValue)->)->
		lockValue = LockManager.randomLock()
		rclient.set key, lockValue, "EX", LockManager.REDIS_LOCK_EXPIRY, "NX", (err, gotLock)->
			return callback(err) if err?
			if gotLock == "OK"
				metrics.inc "lock.#{namespace}.try.success"
				callback err, true, lockValue
			else
				metrics.inc "lock.#{namespace}.try.failed"
				logger.log key: key, redis_response: gotLock, "lock is locked"
				callback err, false

	_getLock: (key, namespace, callback = (error, lockValue) ->) ->
		startTime = Date.now()
		testInterval = LockManager.LOCK_TEST_INTERVAL
		attempts = 0
		do attempt = () ->
			if Date.now() - startTime > LockManager.MAX_LOCK_WAIT_TIME
				metrics.inc "lock.#{namespace}.get.failed"
				return callback(new Error("Timeout"))

			attempts += 1
			LockManager._tryLock key, namespace, (error, gotLock, lockValue) ->
				return callback(error) if error?
				if gotLock
					metrics.gauge "lock.#{namespace}.get.success.tries", attempts
					callback(null, lockValue)
				else
					setTimeout attempt, testInterval
					# back off when the lock is taken to avoid overloading
					testInterval = Math.min(testInterval * 2, LockManager.MAX_TEST_INTERVAL)

	_releaseLock: (key, lockValue, callback)->
		rclient.eval LockManager.unlockScript, 1, key, lockValue, (err, result) ->
			if err?
				return callback(err)
			else if result? and result isnt 1 # successful unlock should release exactly one key
				logger.error {key:key, lockValue:lockValue, redis_err:err, redis_result:result}, "unlocking error"
				metrics.inc "unlock-error"
				return callback(new Error("tried to release timed out lock"))
			else
				callback(null,result)
Remove the Metrics module, use metrics-sharelatex 2017-04-03 11:18:30 -04:00			`metrics = require('metrics-sharelatex')`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00			`Settings = require('settings-sharelatex')`
Move all redis end points to be cluster compatible 2017-05-04 10:22:54 -04:00			`RedisWrapper = require("./RedisWrapper")`
			`rclient = RedisWrapper.client("lock")`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00			`logger = require "logger-sharelatex"`
add support for unique lock values 2018-05-04 08:22:33 -04:00			`os = require "os"`
			`crypto = require "crypto"`

			`HOST = os.hostname()`
			`PID = process.pid`
			`RND = crypto.randomBytes(4).toString('hex')`
			`COUNT = 0`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00
			`module.exports = LockManager =`
			`LOCK_TEST_INTERVAL: 50 # 50ms between each test of the lock`
retry lock with exponential backoff 2018-05-04 08:43:12 -04:00			`MAX_TEST_INTERVAL: 1000 # back off to 1s between each test of the lock`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00			`MAX_LOCK_WAIT_TIME: 10000 # 10s maximum time to spend trying to get the lock`
add extra metrics around locking 2018-02-19 06:23:43 -05:00			`REDIS_LOCK_EXPIRY: 30 # seconds. Time until lock auto expires in redis`
			`SLOW_EXECUTION_THRESHOLD: 5000 # 5s, if execution takes longer than this then log`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00
add support for unique lock values 2018-05-04 08:22:33 -04:00			`# Use a signed lock value as described in`
			`# http://redis.io/topics/distlock#correct-implementation-with-a-single-instance`
			`# to prevent accidental unlocking by multiple processes`
			`randomLock : () ->`
			`time = Date.now()`
			`return "locked:host=#{HOST}:pid=#{PID}:random=#{RND}:time=#{time}:count=#{COUNT++}"`

			`unlockScript: 'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end'`

add extra metrics around locking 2018-02-19 06:23:43 -05:00			`runWithLock: (namespace, id, runner = ( (releaseLock = (error) ->) -> ), callback = ( (error) -> )) ->`
fix exceeded lock timeout logging we were logging this after failing to get the lock, this stops that 2018-02-20 09:03:43 -05:00			`# This error is defined here so we get a useful stacktrace`
add extra metrics around locking 2018-02-19 06:23:43 -05:00			`slowExecutionError = new Error "slow execution during lock"`

			`timer = new metrics.Timer("lock.#{namespace}")`
			`key = "lock:web:#{namespace}:#{id}"`
add support for unique lock values 2018-05-04 08:22:33 -04:00			`LockManager._getLock key, namespace, (error, lockValue) ->`
add mongo transaction lock when getting or updating projects 2018-02-01 10:31:42 -05:00			`return callback(error) if error?`
fix exceeded lock timeout logging we were logging this after failing to get the lock, this stops that 2018-02-20 09:03:43 -05:00
			`# The lock can expire in redis but the process carry on. This setTimout call`
			`# is designed to log if this happens.`
			`countIfExceededLockTimeout = () ->`
			`metrics.inc "lock.#{namespace}.exceeded_lock_timeout"`
			`logger.log "exceeded lock timeout", { namespace, id, slowExecutionError }`
			`exceededLockTimeout = setTimeout countIfExceededLockTimeout, LockManager.REDIS_LOCK_EXPIRY * 1000`

add mongo transaction lock when getting or updating projects 2018-02-01 10:31:42 -05:00			`runner (error1, values...) ->`
add support for unique lock values 2018-05-04 08:22:33 -04:00			`LockManager._releaseLock key, lockValue, (error2) ->`
fix exceeded lock timeout logging we were logging this after failing to get the lock, this stops that 2018-02-20 09:03:43 -05:00			`clearTimeout exceededLockTimeout`

add extra metrics around locking 2018-02-19 06:23:43 -05:00			`timeTaken = new Date - timer.start`
			`if timeTaken > LockManager.SLOW_EXECUTION_THRESHOLD`
			`logger.log "slow execution during lock", { namespace, id, timeTaken, slowExecutionError }`

			`timer.done()`
add mongo transaction lock when getting or updating projects 2018-02-01 10:31:42 -05:00			`error = error1 or error2`
			`return callback(error) if error?`
			`callback null, values...`

add support for unique lock values 2018-05-04 08:22:33 -04:00			`_tryLock : (key, namespace, callback = (err, isFree, lockValue)->)->`
			`lockValue = LockManager.randomLock()`
			`rclient.set key, lockValue, "EX", LockManager.REDIS_LOCK_EXPIRY, "NX", (err, gotLock)->`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00			`return callback(err) if err?`
			`if gotLock == "OK"`
improve metrics 2018-02-20 08:58:09 -05:00			`metrics.inc "lock.#{namespace}.try.success"`
add support for unique lock values 2018-05-04 08:22:33 -04:00			`callback err, true, lockValue`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00			`else`
improve metrics 2018-02-20 08:58:09 -05:00			`metrics.inc "lock.#{namespace}.try.failed"`
Rename lock to avoid potential conflict with doc updater 2017-05-11 10:27:01 -04:00			`logger.log key: key, redis_response: gotLock, "lock is locked"`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00			`callback err, false`

add support for unique lock values 2018-05-04 08:22:33 -04:00			`_getLock: (key, namespace, callback = (error, lockValue) ->) ->`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00			`startTime = Date.now()`
retry lock with exponential backoff 2018-05-04 08:43:12 -04:00			`testInterval = LockManager.LOCK_TEST_INTERVAL`
improve metrics 2018-02-20 08:58:09 -05:00			`attempts = 0`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00			`do attempt = () ->`
			`if Date.now() - startTime > LockManager.MAX_LOCK_WAIT_TIME`
improve metrics 2018-02-20 08:58:09 -05:00			`metrics.inc "lock.#{namespace}.get.failed"`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00			`return callback(new Error("Timeout"))`

improve metrics 2018-02-20 08:58:09 -05:00			`attempts += 1`
add support for unique lock values 2018-05-04 08:22:33 -04:00			`LockManager._tryLock key, namespace, (error, gotLock, lockValue) ->`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00			`return callback(error) if error?`
			`if gotLock`
use gauge not inc 2018-02-21 08:17:51 -05:00			`metrics.gauge "lock.#{namespace}.get.success.tries", attempts`
add support for unique lock values 2018-05-04 08:22:33 -04:00			`callback(null, lockValue)`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00			`else`
retry lock with exponential backoff 2018-05-04 08:43:12 -04:00			`setTimeout attempt, testInterval`
			`# back off when the lock is taken to avoid overloading`
			`testInterval = Math.min(testInterval * 2, LockManager.MAX_TEST_INTERVAL)`
coppied the lock manager over from doc updater 2014-11-25 10:26:33 -05:00
add support for unique lock values 2018-05-04 08:22:33 -04:00			`_releaseLock: (key, lockValue, callback)->`
			`rclient.eval LockManager.unlockScript, 1, key, lockValue, (err, result) ->`
			`if err?`
			`return callback(err)`
			`else if result? and result isnt 1 # successful unlock should release exactly one key`
			`logger.error {key:key, lockValue:lockValue, redis_err:err, redis_result:result}, "unlocking error"`
			`metrics.inc "unlock-error"`
			`return callback(new Error("tried to release timed out lock"))`
			`else`
			`callback(null,result)`