From d924dbd1ed883a6f42838abf020c264e63202031 Mon Sep 17 00:00:00 2001 From: Hayden Faulds Date: Tue, 20 Feb 2018 13:58:09 +0000 Subject: [PATCH 1/3] improve metrics --- .../coffee/infrastructure/LockManager.coffee | 29 ++++-------- .../ProjectStructureMongoLockTest.coffee | 5 +- .../LockManager/CheckingTheLock.coffee | 46 ------------------- .../LockManager/getLockTests.coffee | 22 ++++----- .../LockManager/tryLockTests.coffee | 8 ++-- 5 files changed, 27 insertions(+), 83 deletions(-) delete mode 100644 services/web/test/unit/coffee/infrastructure/LockManager/CheckingTheLock.coffee diff --git a/services/web/app/coffee/infrastructure/LockManager.coffee b/services/web/app/coffee/infrastructure/LockManager.coffee index b748bbdb33..4c45d8703b 100644 --- a/services/web/app/coffee/infrastructure/LockManager.coffee +++ b/services/web/app/coffee/infrastructure/LockManager.coffee @@ -27,7 +27,7 @@ module.exports = LockManager = timer = new metrics.Timer("lock.#{namespace}") key = "lock:web:#{namespace}:#{id}" - LockManager._getLock key, (error) -> + LockManager._getLock key, namespace, (error) -> return callback(error) if error? runner (error1, values...) -> LockManager._releaseLock key, (error2) -> @@ -41,42 +41,33 @@ module.exports = LockManager = return callback(error) if error? callback null, values... - _tryLock : (key, callback = (err, isFree)->)-> + _tryLock : (key, namespace, callback = (err, isFree)->)-> rclient.set key, "locked", "EX", LockManager.REDIS_LOCK_EXPIRY, "NX", (err, gotLock)-> return callback(err) if err? if gotLock == "OK" - metrics.inc "lock-not-blocking" + metrics.inc "lock.#{namespace}.try.success" callback err, true else - metrics.inc "lock-blocking" + metrics.inc "lock.#{namespace}.try.failed" logger.log key: key, redis_response: gotLock, "lock is locked" callback err, false - _getLock: (key, callback = (error) ->) -> + _getLock: (key, namespace, callback = (error) ->) -> startTime = Date.now() + attempts = 0 do attempt = () -> if Date.now() - startTime > LockManager.MAX_LOCK_WAIT_TIME + metrics.inc "lock.#{namespace}.get.failed" return callback(new Error("Timeout")) - LockManager._tryLock key, (error, gotLock) -> + attempts += 1 + LockManager._tryLock key, namespace, (error, gotLock) -> return callback(error) if error? if gotLock + metrics.inc "lock.#{namespace}.get.success.tries", attempts callback(null) else setTimeout attempt, LockManager.LOCK_TEST_INTERVAL - _checkLock: (key, callback = (err, isFree)->)-> - multi = rclient.multi() - multi.exists key - multi.exec (err, replys)-> - return callback(err) if err? - exists = parseInt replys[0] - if exists == 1 - metrics.inc "lock-blocking" - callback err, false - else - metrics.inc "lock-not-blocking" - callback err, true - _releaseLock: (key, callback)-> rclient.del key, callback diff --git a/services/web/test/acceptance/coffee/ProjectStructureMongoLockTest.coffee b/services/web/test/acceptance/coffee/ProjectStructureMongoLockTest.coffee index 7788e18fc1..8e39e7e2f2 100644 --- a/services/web/test/acceptance/coffee/ProjectStructureMongoLockTest.coffee +++ b/services/web/test/acceptance/coffee/ProjectStructureMongoLockTest.coffee @@ -31,8 +31,9 @@ describe "ProjectStructureMongoLock", -> ProjectCreationHandler.createBlankProject user._id, 'locked-project', (err, project) => throw err if err? @locked_project = project - @lock_key = "lock:web:#{ProjectEntityMongoUpdateHandler.LOCK_NAMESPACE}:#{project._id}" - LockManager._getLock @lock_key, done + namespace = ProjectEntityMongoUpdateHandler.LOCK_NAMESPACE + @lock_key = "lock:web:#{namespace}:#{project._id}" + LockManager._getLock @lock_key, namespace, done after (done) -> LockManager._releaseLock @lock_key, done diff --git a/services/web/test/unit/coffee/infrastructure/LockManager/CheckingTheLock.coffee b/services/web/test/unit/coffee/infrastructure/LockManager/CheckingTheLock.coffee deleted file mode 100644 index 80fcdd1dc2..0000000000 --- a/services/web/test/unit/coffee/infrastructure/LockManager/CheckingTheLock.coffee +++ /dev/null @@ -1,46 +0,0 @@ -sinon = require('sinon') -assert = require('assert') -path = require('path') -modulePath = path.join __dirname, '../../../../../app/js/infrastructure/LockManager.js' -lockKey = "lock:web:{#{5678}}" -SandboxedModule = require('sandboxed-module') - -describe 'LockManager - checking the lock', ()-> - - existsStub = sinon.stub() - setStub = sinon.stub() - exireStub = sinon.stub() - execStub = sinon.stub() - - mocks = - "logger-sharelatex": log:-> - - "./RedisWrapper": - client: ()-> - auth:-> - multi: -> - exists: existsStub - expire: exireStub - set: setStub - exec: execStub - LockManager = SandboxedModule.require(modulePath, requires: mocks) - - it 'should check if lock exists but not set or expire', (done)-> - execStub.callsArgWith(0, null, ["1"]) - LockManager._checkLock lockKey, (err, docIsLocked)-> - existsStub.calledWith(lockKey).should.equal true - setStub.called.should.equal false - exireStub.called.should.equal false - done() - - it 'should return true if the key does not exists', (done)-> - execStub.callsArgWith(0, null, "0") - LockManager._checkLock lockKey, (err, free)-> - free.should.equal true - done() - - it 'should return false if the key does exists', (done)-> - execStub.callsArgWith(0, null, "1") - LockManager._checkLock lockKey, (err, free)-> - free.should.equal false - done() diff --git a/services/web/test/unit/coffee/infrastructure/LockManager/getLockTests.coffee b/services/web/test/unit/coffee/infrastructure/LockManager/getLockTests.coffee index 754d1f2a85..bb2466c104 100644 --- a/services/web/test/unit/coffee/infrastructure/LockManager/getLockTests.coffee +++ b/services/web/test/unit/coffee/infrastructure/LockManager/getLockTests.coffee @@ -16,18 +16,19 @@ describe 'LockManager - getting the lock', -> "metrics-sharelatex": inc:-> @callback = sinon.stub() - @doc_id = "doc-id-123" - + @key = "lock:web:lockName:project-id}" + @namespace = 'lockName' + describe "when the lock is not set", -> beforeEach (done) -> - @LockManager._tryLock = sinon.stub().callsArgWith(1, null, true) - @LockManager._getLock @doc_id, (args...) => + @LockManager._tryLock = sinon.stub().yields(null, true) + @LockManager._getLock @key, @namespace, (args...) => @callback(args...) done() it "should try to get the lock", -> @LockManager._tryLock - .calledWith(@doc_id) + .calledWith(@key, @namespace) .should.equal true it "should only need to try once", -> @@ -41,7 +42,7 @@ describe 'LockManager - getting the lock', -> startTime = Date.now() tries = 0 @LockManager.LOCK_TEST_INTERVAL = 5 - @LockManager._tryLock = (doc_id, callback = (error, isFree) ->) -> + @LockManager._tryLock = (key, namespace, callback = (error, isFree) ->) -> if (Date.now() - startTime < 20) or (tries < 2) tries = tries + 1 callback null, false @@ -49,7 +50,7 @@ describe 'LockManager - getting the lock', -> callback null, true sinon.spy @LockManager, "_tryLock" - @LockManager._getLock @doc_id, (args...) => + @LockManager._getLock @key, @namespace, (args...) => @callback(args...) done() @@ -63,13 +64,10 @@ describe 'LockManager - getting the lock', -> beforeEach (done) -> time = Date.now() @LockManager.MAX_LOCK_WAIT_TIME = 5 - @LockManager._tryLock = sinon.stub().callsArgWith(1, null, false) - @LockManager._getLock @doc_id, (args...) => + @LockManager._tryLock = sinon.stub().yields(null, false) + @LockManager._getLock @key, @namespace, (args...) => @callback(args...) done() it "should return the callback with an error", -> @callback.calledWith(new Error("timeout")).should.equal true - - - diff --git a/services/web/test/unit/coffee/infrastructure/LockManager/tryLockTests.coffee b/services/web/test/unit/coffee/infrastructure/LockManager/tryLockTests.coffee index 3ea837df92..b3719623bb 100644 --- a/services/web/test/unit/coffee/infrastructure/LockManager/tryLockTests.coffee +++ b/services/web/test/unit/coffee/infrastructure/LockManager/tryLockTests.coffee @@ -16,13 +16,13 @@ describe 'LockManager - trying the lock', -> "settings-sharelatex":{redis:{}} "metrics-sharelatex": inc:-> @callback = sinon.stub() - @doc_id = "doc-id-123" - @key = "lock:web:{#{@doc_id}}" + @key = "lock:web:lockName:project-id}" + @namespace = "lockName" describe "when the lock is not set", -> beforeEach -> @set.callsArgWith(5, null, "OK") - @LockManager._tryLock @key, @callback + @LockManager._tryLock @key, @namespace, @callback it "should set the lock key with an expiry if it is not set", -> @set.calledWith(@key, "locked", "EX", 30, "NX") @@ -34,7 +34,7 @@ describe 'LockManager - trying the lock', -> describe "when the lock is already set", -> beforeEach -> @set.callsArgWith(5, null, null) - @LockManager._tryLock @key, @callback + @LockManager._tryLock @key, @namespace, @callback it "should return the callback with false", -> @callback.calledWith(null, false).should.equal true From 584cd711739d28b46a7b5610251d968ee8fa9267 Mon Sep 17 00:00:00 2001 From: Hayden Faulds Date: Tue, 20 Feb 2018 14:03:43 +0000 Subject: [PATCH 2/3] fix exceeded lock timeout logging we were logging this after failing to get the lock, this stops that --- .../coffee/infrastructure/LockManager.coffee | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/services/web/app/coffee/infrastructure/LockManager.coffee b/services/web/app/coffee/infrastructure/LockManager.coffee index 4c45d8703b..f3b53beda7 100644 --- a/services/web/app/coffee/infrastructure/LockManager.coffee +++ b/services/web/app/coffee/infrastructure/LockManager.coffee @@ -11,27 +11,25 @@ module.exports = LockManager = SLOW_EXECUTION_THRESHOLD: 5000 # 5s, if execution takes longer than this then log runWithLock: (namespace, id, runner = ( (releaseLock = (error) ->) -> ), callback = ( (error) -> )) -> - - # The lock can expire in redis but the process carry on. This setTimout call - # is designed to log if this happens. - # - # error is defined here so we get a useful stacktrace - lockReleased = false + # This error is defined here so we get a useful stacktrace slowExecutionError = new Error "slow execution during lock" - countIfExceededLockTimeout = () -> - if !lockReleased - metrics.inc "lock.#{namespace}.exceeded_lock_timeout" - logger.log "exceeded lock timeout", { namespace, id, slowExecutionError } - - setTimeout countIfExceededLockTimeout, LockManager.REDIS_LOCK_EXPIRY * 1000 timer = new metrics.Timer("lock.#{namespace}") key = "lock:web:#{namespace}:#{id}" LockManager._getLock key, namespace, (error) -> return callback(error) if error? + + # The lock can expire in redis but the process carry on. This setTimout call + # is designed to log if this happens. + countIfExceededLockTimeout = () -> + metrics.inc "lock.#{namespace}.exceeded_lock_timeout" + logger.log "exceeded lock timeout", { namespace, id, slowExecutionError } + exceededLockTimeout = setTimeout countIfExceededLockTimeout, LockManager.REDIS_LOCK_EXPIRY * 1000 + runner (error1, values...) -> LockManager._releaseLock key, (error2) -> - lockReleased = true + clearTimeout exceededLockTimeout + timeTaken = new Date - timer.start if timeTaken > LockManager.SLOW_EXECUTION_THRESHOLD logger.log "slow execution during lock", { namespace, id, timeTaken, slowExecutionError } From adc90b68e7cc72bc80c87ea3f756ffe5d827bfc7 Mon Sep 17 00:00:00 2001 From: Hayden Faulds Date: Wed, 21 Feb 2018 13:17:51 +0000 Subject: [PATCH 3/3] use gauge not inc --- services/web/app/coffee/infrastructure/LockManager.coffee | 2 +- .../coffee/infrastructure/LockManager/getLockTests.coffee | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/services/web/app/coffee/infrastructure/LockManager.coffee b/services/web/app/coffee/infrastructure/LockManager.coffee index f3b53beda7..dd29bd0bde 100644 --- a/services/web/app/coffee/infrastructure/LockManager.coffee +++ b/services/web/app/coffee/infrastructure/LockManager.coffee @@ -62,7 +62,7 @@ module.exports = LockManager = LockManager._tryLock key, namespace, (error, gotLock) -> return callback(error) if error? if gotLock - metrics.inc "lock.#{namespace}.get.success.tries", attempts + metrics.gauge "lock.#{namespace}.get.success.tries", attempts callback(null) else setTimeout attempt, LockManager.LOCK_TEST_INTERVAL diff --git a/services/web/test/unit/coffee/infrastructure/LockManager/getLockTests.coffee b/services/web/test/unit/coffee/infrastructure/LockManager/getLockTests.coffee index bb2466c104..3bdf6373af 100644 --- a/services/web/test/unit/coffee/infrastructure/LockManager/getLockTests.coffee +++ b/services/web/test/unit/coffee/infrastructure/LockManager/getLockTests.coffee @@ -13,7 +13,9 @@ describe 'LockManager - getting the lock', -> client: ()-> auth:-> "settings-sharelatex":{redis:{}} - "metrics-sharelatex": inc:-> + "metrics-sharelatex": + inc:-> + gauge:-> @callback = sinon.stub() @key = "lock:web:lockName:project-id}"