Merge pull request #352 from sharelatex/hof-lock-metrics

Improve lock metrics
This commit is contained in:
James Allen 2018-02-21 13:52:08 +00:00 committed by GitHub
commit 0371aaaf05
5 changed files with 41 additions and 97 deletions

View file

@ -11,27 +11,25 @@ module.exports = LockManager =
SLOW_EXECUTION_THRESHOLD: 5000 # 5s, if execution takes longer than this then log
runWithLock: (namespace, id, runner = ( (releaseLock = (error) ->) -> ), callback = ( (error) -> )) ->
# The lock can expire in redis but the process carry on. This setTimout call
# is designed to log if this happens.
#
# error is defined here so we get a useful stacktrace
lockReleased = false
# This error is defined here so we get a useful stacktrace
slowExecutionError = new Error "slow execution during lock"
countIfExceededLockTimeout = () ->
if !lockReleased
metrics.inc "lock.#{namespace}.exceeded_lock_timeout"
logger.log "exceeded lock timeout", { namespace, id, slowExecutionError }
setTimeout countIfExceededLockTimeout, LockManager.REDIS_LOCK_EXPIRY * 1000
timer = new metrics.Timer("lock.#{namespace}")
key = "lock:web:#{namespace}:#{id}"
LockManager._getLock key, (error) ->
LockManager._getLock key, namespace, (error) ->
return callback(error) if error?
# The lock can expire in redis but the process carry on. This setTimout call
# is designed to log if this happens.
countIfExceededLockTimeout = () ->
metrics.inc "lock.#{namespace}.exceeded_lock_timeout"
logger.log "exceeded lock timeout", { namespace, id, slowExecutionError }
exceededLockTimeout = setTimeout countIfExceededLockTimeout, LockManager.REDIS_LOCK_EXPIRY * 1000
runner (error1, values...) ->
LockManager._releaseLock key, (error2) ->
lockReleased = true
clearTimeout exceededLockTimeout
timeTaken = new Date - timer.start
if timeTaken > LockManager.SLOW_EXECUTION_THRESHOLD
logger.log "slow execution during lock", { namespace, id, timeTaken, slowExecutionError }
@ -41,42 +39,33 @@ module.exports = LockManager =
return callback(error) if error?
callback null, values...
_tryLock : (key, callback = (err, isFree)->)->
_tryLock : (key, namespace, callback = (err, isFree)->)->
rclient.set key, "locked", "EX", LockManager.REDIS_LOCK_EXPIRY, "NX", (err, gotLock)->
return callback(err) if err?
if gotLock == "OK"
metrics.inc "lock-not-blocking"
metrics.inc "lock.#{namespace}.try.success"
callback err, true
else
metrics.inc "lock-blocking"
metrics.inc "lock.#{namespace}.try.failed"
logger.log key: key, redis_response: gotLock, "lock is locked"
callback err, false
_getLock: (key, callback = (error) ->) ->
_getLock: (key, namespace, callback = (error) ->) ->
startTime = Date.now()
attempts = 0
do attempt = () ->
if Date.now() - startTime > LockManager.MAX_LOCK_WAIT_TIME
metrics.inc "lock.#{namespace}.get.failed"
return callback(new Error("Timeout"))
LockManager._tryLock key, (error, gotLock) ->
attempts += 1
LockManager._tryLock key, namespace, (error, gotLock) ->
return callback(error) if error?
if gotLock
metrics.gauge "lock.#{namespace}.get.success.tries", attempts
callback(null)
else
setTimeout attempt, LockManager.LOCK_TEST_INTERVAL
_checkLock: (key, callback = (err, isFree)->)->
multi = rclient.multi()
multi.exists key
multi.exec (err, replys)->
return callback(err) if err?
exists = parseInt replys[0]
if exists == 1
metrics.inc "lock-blocking"
callback err, false
else
metrics.inc "lock-not-blocking"
callback err, true
_releaseLock: (key, callback)->
rclient.del key, callback

View file

@ -31,8 +31,9 @@ describe "ProjectStructureMongoLock", ->
ProjectCreationHandler.createBlankProject user._id, 'locked-project', (err, project) =>
throw err if err?
@locked_project = project
@lock_key = "lock:web:#{ProjectEntityMongoUpdateHandler.LOCK_NAMESPACE}:#{project._id}"
LockManager._getLock @lock_key, done
namespace = ProjectEntityMongoUpdateHandler.LOCK_NAMESPACE
@lock_key = "lock:web:#{namespace}:#{project._id}"
LockManager._getLock @lock_key, namespace, done
after (done) ->
LockManager._releaseLock @lock_key, done

View file

@ -1,46 +0,0 @@
sinon = require('sinon')
assert = require('assert')
path = require('path')
modulePath = path.join __dirname, '../../../../../app/js/infrastructure/LockManager.js'
lockKey = "lock:web:{#{5678}}"
SandboxedModule = require('sandboxed-module')
describe 'LockManager - checking the lock', ()->
existsStub = sinon.stub()
setStub = sinon.stub()
exireStub = sinon.stub()
execStub = sinon.stub()
mocks =
"logger-sharelatex": log:->
"./RedisWrapper":
client: ()->
auth:->
multi: ->
exists: existsStub
expire: exireStub
set: setStub
exec: execStub
LockManager = SandboxedModule.require(modulePath, requires: mocks)
it 'should check if lock exists but not set or expire', (done)->
execStub.callsArgWith(0, null, ["1"])
LockManager._checkLock lockKey, (err, docIsLocked)->
existsStub.calledWith(lockKey).should.equal true
setStub.called.should.equal false
exireStub.called.should.equal false
done()
it 'should return true if the key does not exists', (done)->
execStub.callsArgWith(0, null, "0")
LockManager._checkLock lockKey, (err, free)->
free.should.equal true
done()
it 'should return false if the key does exists', (done)->
execStub.callsArgWith(0, null, "1")
LockManager._checkLock lockKey, (err, free)->
free.should.equal false
done()

View file

@ -13,21 +13,24 @@ describe 'LockManager - getting the lock', ->
client: ()->
auth:->
"settings-sharelatex":{redis:{}}
"metrics-sharelatex": inc:->
"metrics-sharelatex":
inc:->
gauge:->
@callback = sinon.stub()
@doc_id = "doc-id-123"
@key = "lock:web:lockName:project-id}"
@namespace = 'lockName'
describe "when the lock is not set", ->
beforeEach (done) ->
@LockManager._tryLock = sinon.stub().callsArgWith(1, null, true)
@LockManager._getLock @doc_id, (args...) =>
@LockManager._tryLock = sinon.stub().yields(null, true)
@LockManager._getLock @key, @namespace, (args...) =>
@callback(args...)
done()
it "should try to get the lock", ->
@LockManager._tryLock
.calledWith(@doc_id)
.calledWith(@key, @namespace)
.should.equal true
it "should only need to try once", ->
@ -41,7 +44,7 @@ describe 'LockManager - getting the lock', ->
startTime = Date.now()
tries = 0
@LockManager.LOCK_TEST_INTERVAL = 5
@LockManager._tryLock = (doc_id, callback = (error, isFree) ->) ->
@LockManager._tryLock = (key, namespace, callback = (error, isFree) ->) ->
if (Date.now() - startTime < 20) or (tries < 2)
tries = tries + 1
callback null, false
@ -49,7 +52,7 @@ describe 'LockManager - getting the lock', ->
callback null, true
sinon.spy @LockManager, "_tryLock"
@LockManager._getLock @doc_id, (args...) =>
@LockManager._getLock @key, @namespace, (args...) =>
@callback(args...)
done()
@ -63,13 +66,10 @@ describe 'LockManager - getting the lock', ->
beforeEach (done) ->
time = Date.now()
@LockManager.MAX_LOCK_WAIT_TIME = 5
@LockManager._tryLock = sinon.stub().callsArgWith(1, null, false)
@LockManager._getLock @doc_id, (args...) =>
@LockManager._tryLock = sinon.stub().yields(null, false)
@LockManager._getLock @key, @namespace, (args...) =>
@callback(args...)
done()
it "should return the callback with an error", ->
@callback.calledWith(new Error("timeout")).should.equal true

View file

@ -16,13 +16,13 @@ describe 'LockManager - trying the lock', ->
"settings-sharelatex":{redis:{}}
"metrics-sharelatex": inc:->
@callback = sinon.stub()
@doc_id = "doc-id-123"
@key = "lock:web:{#{@doc_id}}"
@key = "lock:web:lockName:project-id}"
@namespace = "lockName"
describe "when the lock is not set", ->
beforeEach ->
@set.callsArgWith(5, null, "OK")
@LockManager._tryLock @key, @callback
@LockManager._tryLock @key, @namespace, @callback
it "should set the lock key with an expiry if it is not set", ->
@set.calledWith(@key, "locked", "EX", 30, "NX")
@ -34,7 +34,7 @@ describe 'LockManager - trying the lock', ->
describe "when the lock is already set", ->
beforeEach ->
@set.callsArgWith(5, null, null)
@LockManager._tryLock @key, @callback
@LockManager._tryLock @key, @namespace, @callback
it "should return the callback with false", ->
@callback.calledWith(null, false).should.equal true