From da89ff717295a97bfa15f4a6057047459ca0826e Mon Sep 17 00:00:00 2001 From: James Allen Date: Thu, 23 Jun 2016 18:00:03 +0100 Subject: [PATCH] Add in external health check rather than internal --- services/document-updater/app.coffee | 7 ++ .../app/coffee/RedisBackend.coffee | 48 +++++++----- .../app/coffee/RedisManager.coffee | 4 +- .../config/settings.defaults.coffee | 51 ++++++------ .../RedisBackend/RedisBackendTests.coffee | 77 ++++++++++++------- .../RedisManager/RedisManagerTests.coffee | 1 - 6 files changed, 113 insertions(+), 75 deletions(-) diff --git a/services/document-updater/app.coffee b/services/document-updater/app.coffee index eafe03e402..cb50471965 100644 --- a/services/document-updater/app.coffee +++ b/services/document-updater/app.coffee @@ -60,6 +60,13 @@ app.get "/health_check/redis", (req, res, next)-> else res.send 500 +app.get "/health_check/redis_cluster", (req, res, next) -> + RedisManager.rclient.healthCheck (error, alive) -> + if error? + logger.err {err: error}, "failed redis cluster health check" + res.send 500 + else + res.send 200 app.use (error, req, res, next) -> if error instanceof Errors.NotFoundError diff --git a/services/document-updater/app/coffee/RedisBackend.coffee b/services/document-updater/app/coffee/RedisBackend.coffee index f72e530471..ca9e3de9e7 100644 --- a/services/document-updater/app/coffee/RedisBackend.coffee +++ b/services/document-updater/app/coffee/RedisBackend.coffee @@ -5,7 +5,6 @@ logger = require "logger-sharelatex" class Client constructor: (@clients) -> - @HEARTBEAT_INTERVAL = 5000 @HEARTBEAT_TIMEOUT = 2000 multi: () -> @@ -18,28 +17,41 @@ class Client } ) - monitorTcpAndReconnect: () -> - for client in @clients - if client.driver == "ioredis" - @_monitorCluster(client.rclient) + healthCheck: (callback) -> + jobs = @clients.map (client) => + (cb) => @_healthCheckClient(client, cb) + async.parallel jobs, callback - _monitorCluster: (rclient) -> - setInterval () => - # Nodes can come and go as the cluster moves/heals, so each heartbeat - # we ask again for the currently known nodes. - for node in rclient.nodes("all") - @_checkNode(node) - , @HEARTBEAT_INTERVAL + _healthCheckClient: (client, callback) -> + if client.driver == "ioredis" + @_healthCheckClusterClient(client, callback) + else + @_healthCheckNodeRedisClient(client, callback) - _checkNode: (node) -> + _healthCheckNodeRedisClient: (client, callback) -> + client.healthCheck ?= require("redis-sharelatex").activeHealthCheckRedis(Settings.redis.web) + if client.healthCheck.isAlive() + return callback() + else + return callback(new Error("node-redis client failed health check")) + + _healthCheckClusterClient: (client, callback) -> + jobs = client.rclient.nodes("all").map (n) => + (cb) => @_checkNode(n, cb) + async.parallel jobs, callback + + _checkNode: (node, _callback) -> + callback = (args...) -> + _callback(args...) + _callback = () -> timer = setTimeout () -> - logger.error {err: new Error("Node timed out, reconnecting"), key: node.options.key} - # Discussion of application layer monitoring recommends this way of reconnecting at https://github.com/luin/ioredis/issues/275 - node.stream.destroy() + error = new Error("ioredis node ping check timed out") + logger.error {err: error, key: node.options.key}, "node timed out" + callback(error) , @HEARTBEAT_TIMEOUT node.ping (err) -> - if !err? - clearTimeout timer + clearTimeout timer + callback(err) class MultiClient constructor: (@clients) -> diff --git a/services/document-updater/app/coffee/RedisManager.coffee b/services/document-updater/app/coffee/RedisManager.coffee index a23725653b..f8b109ca17 100644 --- a/services/document-updater/app/coffee/RedisManager.coffee +++ b/services/document-updater/app/coffee/RedisManager.coffee @@ -10,9 +10,9 @@ Errors = require "./Errors" # Make times easy to read minutes = 60 # seconds for Redis expire -rclient.monitorTcpAndReconnect() - module.exports = RedisManager = + rclient: rclient + putDocInMemory : (project_id, doc_id, docLines, version, _callback)-> timer = new metrics.Timer("redis.put-doc") callback = (error) -> diff --git a/services/document-updater/config/settings.defaults.coffee b/services/document-updater/config/settings.defaults.coffee index 6a2902036e..15456db932 100755 --- a/services/document-updater/config/settings.defaults.coffee +++ b/services/document-updater/config/settings.defaults.coffee @@ -20,32 +20,31 @@ module.exports = port:"6379" host:"localhost" password:"" - documentupdater: - # port:"6379" - # host:"localhost" - # password:"" - # key_schema: - # blockingKey: ({doc_id}) -> "Blocking:#{doc_id}" - # docLines: ({doc_id}) -> "doclines:#{doc_id}" - # docOps: ({doc_id}) -> "DocOps:#{doc_id}" - # docVersion: ({doc_id}) -> "DocVersion:#{doc_id}" - # projectKey: ({doc_id}) -> "ProjectId:#{doc_id}" - # docsInProject: ({project_id}) -> "DocsIn:#{project_id}" - # To use Redis cluster, configure the backend as follows: - [{ - primary: true - cluster: [{ - port: "7000" - host: "localhost" - }] - key_schema: - blockingKey: ({doc_id}) -> "Blocking:{#{doc_id}}" - docLines: ({doc_id}) -> "doclines:{#{doc_id}}" - docOps: ({doc_id}) -> "DocOps:{#{doc_id}}" - docVersion: ({doc_id}) -> "DocVersion:{#{doc_id}}" - projectKey: ({doc_id}) -> "ProjectId:{#{doc_id}}" - docsInProject: ({project_id}) -> "DocsIn:{#{project_id}}" - }] + documentupdater: [{ + primary: true + port:"6379" + host:"localhost" + password:"" + key_schema: + blockingKey: ({doc_id}) -> "Blocking:#{doc_id}" + docLines: ({doc_id}) -> "doclines:#{doc_id}" + docOps: ({doc_id}) -> "DocOps:#{doc_id}" + docVersion: ({doc_id}) -> "DocVersion:#{doc_id}" + projectKey: ({doc_id}) -> "ProjectId:#{doc_id}" + docsInProject: ({project_id}) -> "DocsIn:#{project_id}" + # }, { + # cluster: [{ + # port: "7000" + # host: "localhost" + # }] + # key_schema: + # blockingKey: ({doc_id}) -> "Blocking:{#{doc_id}}" + # docLines: ({doc_id}) -> "doclines:{#{doc_id}}" + # docOps: ({doc_id}) -> "DocOps:{#{doc_id}}" + # docVersion: ({doc_id}) -> "DocVersion:{#{doc_id}}" + # projectKey: ({doc_id}) -> "ProjectId:{#{doc_id}}" + # docsInProject: ({project_id}) -> "DocsIn:{#{project_id}}" + }] max_doc_length: 2 * 1024 * 1024 # 2mb diff --git a/services/document-updater/test/unit/coffee/RedisBackend/RedisBackendTests.coffee b/services/document-updater/test/unit/coffee/RedisBackend/RedisBackendTests.coffee index 10ad599301..648395cd1a 100644 --- a/services/document-updater/test/unit/coffee/RedisBackend/RedisBackendTests.coffee +++ b/services/document-updater/test/unit/coffee/RedisBackend/RedisBackendTests.coffee @@ -50,6 +50,7 @@ describe "RedisBackend", -> "logger-sharelatex": @logger = { error: sinon.stub(), log: sinon.stub(), warn: sinon.stub() } "redis-sharelatex": @redis = createClient: sinon.stub().returns @rclient_redis = {} + activeHealthCheck: sinon.stub() "ioredis": @ioredis = Cluster: Cluster @client = @RedisBackend.createClient() @@ -317,10 +318,40 @@ describe "RedisBackend", -> .calledWith(@rclient_ioredis) .should.equal true - describe "_monitorCluster", -> + describe "_healthCheckNodeRedisClient", -> + beforeEach -> + @redis.activeHealthCheckRedis = sinon.stub().returns @healthCheck = { + isAlive: sinon.stub() + } + + describe "successfully", -> + beforeEach (done) -> + @healthCheck.isAlive.returns true + @redis_client = {} + @client._healthCheckNodeRedisClient(@redis_client, done) + + it "should check the status of the node redis client", -> + @healthCheck.isAlive.called.should.equal true + + it "should only create one health check when called multiple times", (done) -> + @client._healthCheckNodeRedisClient @redis_client, () => + @redis.activeHealthCheckRedis.calledOnce.should.equal true + @healthCheck.isAlive.calledTwice.should.equal true + done() + + describe "when failing", -> + beforeEach -> + @healthCheck.isAlive.returns false + @redis_client = {} + + it "should return an error", (done) -> + @client._healthCheckNodeRedisClient @redis_client, (error) -> + error.message.should.equal "node-redis client failed health check" + done() + + describe "_healthCheckClusterClient", -> beforeEach -> @client.HEARTBEAT_TIMEOUT = 10 - @client.HEARTBEAT_INTERVAL = 100 @nodes = [{ options: key: "node-0" stream: destroy: sinon.stub() @@ -329,38 +360,28 @@ describe "RedisBackend", -> stream: destroy: sinon.stub() }] @rclient_ioredis.nodes = sinon.stub().returns(@nodes) - - describe "successfully", -> - beforeEach -> - @nodes[0].ping = (cb) -> cb() - @nodes[1].ping = (cb) -> cb() - @client._monitorCluster(@rclient_ioredis) + + describe "when both clients are successful", -> + beforeEach (done) -> + @nodes[0].ping = sinon.stub().yields() + @nodes[1].ping = sinon.stub().yields() + @client._healthCheckClusterClient({ rclient: @rclient_ioredis }, done) - it "should get all nodes", -> - setTimeout () => - @rclient_ioredis.nodes - .calledWith("all") - .should.equal true - , 200 + it "should get all cluster nodes", -> + @rclient_ioredis.nodes + .calledWith("all") + .should.equal true - it "should not reset the node connections", (done) -> - setTimeout () => - @nodes[0].stream.destroy.called.should.equal false - @nodes[1].stream.destroy.called.should.equal false - done() - , 200 + it "should ping each cluster node", -> + for node in @nodes + node.ping.called.should.equal true describe "when ping fails to a node", -> beforeEach -> @nodes[0].ping = (cb) -> cb() @nodes[1].ping = (cb) -> # Just hang - @client._monitorCluster(@rclient_ioredis) - it "should reset the failing node connection", (done) -> - setTimeout () => - @nodes[0].stream.destroy.called.should.equal false - @nodes[1].stream.destroy.called.should.equal true + it "should return an error", -> + @client._healthCheckClusterClient { rclient: @rclient_ioredis }, (error) -> + error.message.should.equal "ioredis node ping check timed out" done() - , 200 - - \ No newline at end of file diff --git a/services/document-updater/test/unit/coffee/RedisManager/RedisManagerTests.coffee b/services/document-updater/test/unit/coffee/RedisManager/RedisManagerTests.coffee index 9022163da6..d5b8fbe5ec 100644 --- a/services/document-updater/test/unit/coffee/RedisManager/RedisManagerTests.coffee +++ b/services/document-updater/test/unit/coffee/RedisManager/RedisManagerTests.coffee @@ -10,7 +10,6 @@ describe "RedisManager", -> @rclient = auth: () -> exec: sinon.stub() - monitorTcpAndReconnect: () -> @rclient.multi = () => @rclient @RedisManager = SandboxedModule.require modulePath, requires: "./RedisBackend":