mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
Add in external health check rather than internal
This commit is contained in:
parent
414ab5d6a9
commit
da89ff7172
6 changed files with 113 additions and 75 deletions
|
@ -60,6 +60,13 @@ app.get "/health_check/redis", (req, res, next)->
|
||||||
else
|
else
|
||||||
res.send 500
|
res.send 500
|
||||||
|
|
||||||
|
app.get "/health_check/redis_cluster", (req, res, next) ->
|
||||||
|
RedisManager.rclient.healthCheck (error, alive) ->
|
||||||
|
if error?
|
||||||
|
logger.err {err: error}, "failed redis cluster health check"
|
||||||
|
res.send 500
|
||||||
|
else
|
||||||
|
res.send 200
|
||||||
|
|
||||||
app.use (error, req, res, next) ->
|
app.use (error, req, res, next) ->
|
||||||
if error instanceof Errors.NotFoundError
|
if error instanceof Errors.NotFoundError
|
||||||
|
|
|
@ -5,7 +5,6 @@ logger = require "logger-sharelatex"
|
||||||
|
|
||||||
class Client
|
class Client
|
||||||
constructor: (@clients) ->
|
constructor: (@clients) ->
|
||||||
@HEARTBEAT_INTERVAL = 5000
|
|
||||||
@HEARTBEAT_TIMEOUT = 2000
|
@HEARTBEAT_TIMEOUT = 2000
|
||||||
|
|
||||||
multi: () ->
|
multi: () ->
|
||||||
|
@ -18,28 +17,41 @@ class Client
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
monitorTcpAndReconnect: () ->
|
healthCheck: (callback) ->
|
||||||
for client in @clients
|
jobs = @clients.map (client) =>
|
||||||
|
(cb) => @_healthCheckClient(client, cb)
|
||||||
|
async.parallel jobs, callback
|
||||||
|
|
||||||
|
_healthCheckClient: (client, callback) ->
|
||||||
if client.driver == "ioredis"
|
if client.driver == "ioredis"
|
||||||
@_monitorCluster(client.rclient)
|
@_healthCheckClusterClient(client, callback)
|
||||||
|
else
|
||||||
|
@_healthCheckNodeRedisClient(client, callback)
|
||||||
|
|
||||||
_monitorCluster: (rclient) ->
|
_healthCheckNodeRedisClient: (client, callback) ->
|
||||||
setInterval () =>
|
client.healthCheck ?= require("redis-sharelatex").activeHealthCheckRedis(Settings.redis.web)
|
||||||
# Nodes can come and go as the cluster moves/heals, so each heartbeat
|
if client.healthCheck.isAlive()
|
||||||
# we ask again for the currently known nodes.
|
return callback()
|
||||||
for node in rclient.nodes("all")
|
else
|
||||||
@_checkNode(node)
|
return callback(new Error("node-redis client failed health check"))
|
||||||
, @HEARTBEAT_INTERVAL
|
|
||||||
|
|
||||||
_checkNode: (node) ->
|
_healthCheckClusterClient: (client, callback) ->
|
||||||
|
jobs = client.rclient.nodes("all").map (n) =>
|
||||||
|
(cb) => @_checkNode(n, cb)
|
||||||
|
async.parallel jobs, callback
|
||||||
|
|
||||||
|
_checkNode: (node, _callback) ->
|
||||||
|
callback = (args...) ->
|
||||||
|
_callback(args...)
|
||||||
|
_callback = () ->
|
||||||
timer = setTimeout () ->
|
timer = setTimeout () ->
|
||||||
logger.error {err: new Error("Node timed out, reconnecting"), key: node.options.key}
|
error = new Error("ioredis node ping check timed out")
|
||||||
# Discussion of application layer monitoring recommends this way of reconnecting at https://github.com/luin/ioredis/issues/275
|
logger.error {err: error, key: node.options.key}, "node timed out"
|
||||||
node.stream.destroy()
|
callback(error)
|
||||||
, @HEARTBEAT_TIMEOUT
|
, @HEARTBEAT_TIMEOUT
|
||||||
node.ping (err) ->
|
node.ping (err) ->
|
||||||
if !err?
|
|
||||||
clearTimeout timer
|
clearTimeout timer
|
||||||
|
callback(err)
|
||||||
|
|
||||||
class MultiClient
|
class MultiClient
|
||||||
constructor: (@clients) ->
|
constructor: (@clients) ->
|
||||||
|
|
|
@ -10,9 +10,9 @@ Errors = require "./Errors"
|
||||||
# Make times easy to read
|
# Make times easy to read
|
||||||
minutes = 60 # seconds for Redis expire
|
minutes = 60 # seconds for Redis expire
|
||||||
|
|
||||||
rclient.monitorTcpAndReconnect()
|
|
||||||
|
|
||||||
module.exports = RedisManager =
|
module.exports = RedisManager =
|
||||||
|
rclient: rclient
|
||||||
|
|
||||||
putDocInMemory : (project_id, doc_id, docLines, version, _callback)->
|
putDocInMemory : (project_id, doc_id, docLines, version, _callback)->
|
||||||
timer = new metrics.Timer("redis.put-doc")
|
timer = new metrics.Timer("redis.put-doc")
|
||||||
callback = (error) ->
|
callback = (error) ->
|
||||||
|
|
|
@ -20,31 +20,30 @@ module.exports =
|
||||||
port:"6379"
|
port:"6379"
|
||||||
host:"localhost"
|
host:"localhost"
|
||||||
password:""
|
password:""
|
||||||
documentupdater:
|
documentupdater: [{
|
||||||
# port:"6379"
|
|
||||||
# host:"localhost"
|
|
||||||
# password:""
|
|
||||||
# key_schema:
|
|
||||||
# blockingKey: ({doc_id}) -> "Blocking:#{doc_id}"
|
|
||||||
# docLines: ({doc_id}) -> "doclines:#{doc_id}"
|
|
||||||
# docOps: ({doc_id}) -> "DocOps:#{doc_id}"
|
|
||||||
# docVersion: ({doc_id}) -> "DocVersion:#{doc_id}"
|
|
||||||
# projectKey: ({doc_id}) -> "ProjectId:#{doc_id}"
|
|
||||||
# docsInProject: ({project_id}) -> "DocsIn:#{project_id}"
|
|
||||||
# To use Redis cluster, configure the backend as follows:
|
|
||||||
[{
|
|
||||||
primary: true
|
primary: true
|
||||||
cluster: [{
|
port:"6379"
|
||||||
port: "7000"
|
host:"localhost"
|
||||||
host: "localhost"
|
password:""
|
||||||
}]
|
|
||||||
key_schema:
|
key_schema:
|
||||||
blockingKey: ({doc_id}) -> "Blocking:{#{doc_id}}"
|
blockingKey: ({doc_id}) -> "Blocking:#{doc_id}"
|
||||||
docLines: ({doc_id}) -> "doclines:{#{doc_id}}"
|
docLines: ({doc_id}) -> "doclines:#{doc_id}"
|
||||||
docOps: ({doc_id}) -> "DocOps:{#{doc_id}}"
|
docOps: ({doc_id}) -> "DocOps:#{doc_id}"
|
||||||
docVersion: ({doc_id}) -> "DocVersion:{#{doc_id}}"
|
docVersion: ({doc_id}) -> "DocVersion:#{doc_id}"
|
||||||
projectKey: ({doc_id}) -> "ProjectId:{#{doc_id}}"
|
projectKey: ({doc_id}) -> "ProjectId:#{doc_id}"
|
||||||
docsInProject: ({project_id}) -> "DocsIn:{#{project_id}}"
|
docsInProject: ({project_id}) -> "DocsIn:#{project_id}"
|
||||||
|
# }, {
|
||||||
|
# cluster: [{
|
||||||
|
# port: "7000"
|
||||||
|
# host: "localhost"
|
||||||
|
# }]
|
||||||
|
# key_schema:
|
||||||
|
# blockingKey: ({doc_id}) -> "Blocking:{#{doc_id}}"
|
||||||
|
# docLines: ({doc_id}) -> "doclines:{#{doc_id}}"
|
||||||
|
# docOps: ({doc_id}) -> "DocOps:{#{doc_id}}"
|
||||||
|
# docVersion: ({doc_id}) -> "DocVersion:{#{doc_id}}"
|
||||||
|
# projectKey: ({doc_id}) -> "ProjectId:{#{doc_id}}"
|
||||||
|
# docsInProject: ({project_id}) -> "DocsIn:{#{project_id}}"
|
||||||
}]
|
}]
|
||||||
|
|
||||||
max_doc_length: 2 * 1024 * 1024 # 2mb
|
max_doc_length: 2 * 1024 * 1024 # 2mb
|
||||||
|
|
|
@ -50,6 +50,7 @@ describe "RedisBackend", ->
|
||||||
"logger-sharelatex": @logger = { error: sinon.stub(), log: sinon.stub(), warn: sinon.stub() }
|
"logger-sharelatex": @logger = { error: sinon.stub(), log: sinon.stub(), warn: sinon.stub() }
|
||||||
"redis-sharelatex": @redis =
|
"redis-sharelatex": @redis =
|
||||||
createClient: sinon.stub().returns @rclient_redis = {}
|
createClient: sinon.stub().returns @rclient_redis = {}
|
||||||
|
activeHealthCheck: sinon.stub()
|
||||||
"ioredis": @ioredis =
|
"ioredis": @ioredis =
|
||||||
Cluster: Cluster
|
Cluster: Cluster
|
||||||
@client = @RedisBackend.createClient()
|
@client = @RedisBackend.createClient()
|
||||||
|
@ -317,10 +318,40 @@ describe "RedisBackend", ->
|
||||||
.calledWith(@rclient_ioredis)
|
.calledWith(@rclient_ioredis)
|
||||||
.should.equal true
|
.should.equal true
|
||||||
|
|
||||||
describe "_monitorCluster", ->
|
describe "_healthCheckNodeRedisClient", ->
|
||||||
|
beforeEach ->
|
||||||
|
@redis.activeHealthCheckRedis = sinon.stub().returns @healthCheck = {
|
||||||
|
isAlive: sinon.stub()
|
||||||
|
}
|
||||||
|
|
||||||
|
describe "successfully", ->
|
||||||
|
beforeEach (done) ->
|
||||||
|
@healthCheck.isAlive.returns true
|
||||||
|
@redis_client = {}
|
||||||
|
@client._healthCheckNodeRedisClient(@redis_client, done)
|
||||||
|
|
||||||
|
it "should check the status of the node redis client", ->
|
||||||
|
@healthCheck.isAlive.called.should.equal true
|
||||||
|
|
||||||
|
it "should only create one health check when called multiple times", (done) ->
|
||||||
|
@client._healthCheckNodeRedisClient @redis_client, () =>
|
||||||
|
@redis.activeHealthCheckRedis.calledOnce.should.equal true
|
||||||
|
@healthCheck.isAlive.calledTwice.should.equal true
|
||||||
|
done()
|
||||||
|
|
||||||
|
describe "when failing", ->
|
||||||
|
beforeEach ->
|
||||||
|
@healthCheck.isAlive.returns false
|
||||||
|
@redis_client = {}
|
||||||
|
|
||||||
|
it "should return an error", (done) ->
|
||||||
|
@client._healthCheckNodeRedisClient @redis_client, (error) ->
|
||||||
|
error.message.should.equal "node-redis client failed health check"
|
||||||
|
done()
|
||||||
|
|
||||||
|
describe "_healthCheckClusterClient", ->
|
||||||
beforeEach ->
|
beforeEach ->
|
||||||
@client.HEARTBEAT_TIMEOUT = 10
|
@client.HEARTBEAT_TIMEOUT = 10
|
||||||
@client.HEARTBEAT_INTERVAL = 100
|
|
||||||
@nodes = [{
|
@nodes = [{
|
||||||
options: key: "node-0"
|
options: key: "node-0"
|
||||||
stream: destroy: sinon.stub()
|
stream: destroy: sinon.stub()
|
||||||
|
@ -330,37 +361,27 @@ describe "RedisBackend", ->
|
||||||
}]
|
}]
|
||||||
@rclient_ioredis.nodes = sinon.stub().returns(@nodes)
|
@rclient_ioredis.nodes = sinon.stub().returns(@nodes)
|
||||||
|
|
||||||
describe "successfully", ->
|
describe "when both clients are successful", ->
|
||||||
beforeEach ->
|
beforeEach (done) ->
|
||||||
@nodes[0].ping = (cb) -> cb()
|
@nodes[0].ping = sinon.stub().yields()
|
||||||
@nodes[1].ping = (cb) -> cb()
|
@nodes[1].ping = sinon.stub().yields()
|
||||||
@client._monitorCluster(@rclient_ioredis)
|
@client._healthCheckClusterClient({ rclient: @rclient_ioredis }, done)
|
||||||
|
|
||||||
it "should get all nodes", ->
|
it "should get all cluster nodes", ->
|
||||||
setTimeout () =>
|
|
||||||
@rclient_ioredis.nodes
|
@rclient_ioredis.nodes
|
||||||
.calledWith("all")
|
.calledWith("all")
|
||||||
.should.equal true
|
.should.equal true
|
||||||
, 200
|
|
||||||
|
|
||||||
it "should not reset the node connections", (done) ->
|
it "should ping each cluster node", ->
|
||||||
setTimeout () =>
|
for node in @nodes
|
||||||
@nodes[0].stream.destroy.called.should.equal false
|
node.ping.called.should.equal true
|
||||||
@nodes[1].stream.destroy.called.should.equal false
|
|
||||||
done()
|
|
||||||
, 200
|
|
||||||
|
|
||||||
describe "when ping fails to a node", ->
|
describe "when ping fails to a node", ->
|
||||||
beforeEach ->
|
beforeEach ->
|
||||||
@nodes[0].ping = (cb) -> cb()
|
@nodes[0].ping = (cb) -> cb()
|
||||||
@nodes[1].ping = (cb) -> # Just hang
|
@nodes[1].ping = (cb) -> # Just hang
|
||||||
@client._monitorCluster(@rclient_ioredis)
|
|
||||||
|
|
||||||
it "should reset the failing node connection", (done) ->
|
it "should return an error", ->
|
||||||
setTimeout () =>
|
@client._healthCheckClusterClient { rclient: @rclient_ioredis }, (error) ->
|
||||||
@nodes[0].stream.destroy.called.should.equal false
|
error.message.should.equal "ioredis node ping check timed out"
|
||||||
@nodes[1].stream.destroy.called.should.equal true
|
|
||||||
done()
|
done()
|
||||||
, 200
|
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,6 @@ describe "RedisManager", ->
|
||||||
@rclient =
|
@rclient =
|
||||||
auth: () ->
|
auth: () ->
|
||||||
exec: sinon.stub()
|
exec: sinon.stub()
|
||||||
monitorTcpAndReconnect: () ->
|
|
||||||
@rclient.multi = () => @rclient
|
@rclient.multi = () => @rclient
|
||||||
@RedisManager = SandboxedModule.require modulePath, requires:
|
@RedisManager = SandboxedModule.require modulePath, requires:
|
||||||
"./RedisBackend":
|
"./RedisBackend":
|
||||||
|
|
Loading…
Reference in a new issue