mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
Add in application layer monitoring of the health of each cluster node
This commit is contained in:
parent
27a74d6b71
commit
8ef03c3d2f
4 changed files with 89 additions and 1 deletions
|
@ -5,6 +5,8 @@ logger = require "logger-sharelatex"
|
||||||
|
|
||||||
class Client
|
class Client
|
||||||
constructor: (@clients) ->
|
constructor: (@clients) ->
|
||||||
|
@HEARTBEAT_INTERVAL = 5000
|
||||||
|
@HEARTBEAT_TIMEOUT = 2000
|
||||||
|
|
||||||
multi: () ->
|
multi: () ->
|
||||||
return new MultiClient(
|
return new MultiClient(
|
||||||
|
@ -16,6 +18,29 @@ class Client
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
monitorAndReconnect: () ->
|
||||||
|
for client in @clients
|
||||||
|
if client.driver == "ioredis"
|
||||||
|
@_monitorCluster(client.rclient)
|
||||||
|
|
||||||
|
_monitorCluster: (rclient) ->
|
||||||
|
setInterval () =>
|
||||||
|
# Nodes can come and go as the cluster moves/heals, so each heartbeat
|
||||||
|
# we ask again for the currently known nodes.
|
||||||
|
for node in rclient.nodes("all")
|
||||||
|
do (node) =>
|
||||||
|
timer = setTimeout () =>
|
||||||
|
logger.error {err: new Error("Node timed out, reconnecting"), key: node.options.key}
|
||||||
|
node.stream.destroy()
|
||||||
|
timer = null
|
||||||
|
, @HEARTBEAT_TIMEOUT
|
||||||
|
node.ping (err) ->
|
||||||
|
if !err?
|
||||||
|
clearTimeout timer
|
||||||
|
timer = null
|
||||||
|
, @HEARTBEAT_INTERVAL
|
||||||
|
|
||||||
|
|
||||||
class MultiClient
|
class MultiClient
|
||||||
constructor: (@clients) ->
|
constructor: (@clients) ->
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,8 @@ Errors = require "./Errors"
|
||||||
# Make times easy to read
|
# Make times easy to read
|
||||||
minutes = 60 # seconds for Redis expire
|
minutes = 60 # seconds for Redis expire
|
||||||
|
|
||||||
|
rclient.monitorAndReconnect()
|
||||||
|
|
||||||
module.exports = RedisManager =
|
module.exports = RedisManager =
|
||||||
putDocInMemory : (project_id, doc_id, docLines, version, _callback)->
|
putDocInMemory : (project_id, doc_id, docLines, version, _callback)->
|
||||||
timer = new metrics.Timer("redis.put-doc")
|
timer = new metrics.Timer("redis.put-doc")
|
||||||
|
|
|
@ -42,6 +42,8 @@ describe "RedisBackend", ->
|
||||||
class Cluster
|
class Cluster
|
||||||
constructor: (@config) ->
|
constructor: (@config) ->
|
||||||
test_context.rclient_ioredis = @
|
test_context.rclient_ioredis = @
|
||||||
|
|
||||||
|
nodes: sinon.stub()
|
||||||
|
|
||||||
@RedisBackend = SandboxedModule.require modulePath, requires:
|
@RedisBackend = SandboxedModule.require modulePath, requires:
|
||||||
"settings-sharelatex": @Settings
|
"settings-sharelatex": @Settings
|
||||||
|
@ -305,3 +307,60 @@ describe "RedisBackend", ->
|
||||||
}, "error in redis backend")
|
}, "error in redis backend")
|
||||||
.should.equal true
|
.should.equal true
|
||||||
|
|
||||||
|
describe "monitorAndReconnect", ->
|
||||||
|
beforeEach ->
|
||||||
|
@client._monitorCluster = sinon.stub()
|
||||||
|
@client.monitorAndReconnect()
|
||||||
|
|
||||||
|
it "should monitor the cluster client", ->
|
||||||
|
@client._monitorCluster
|
||||||
|
.calledWith(@rclient_ioredis)
|
||||||
|
.should.equal true
|
||||||
|
|
||||||
|
describe "_monitorCluster", ->
|
||||||
|
beforeEach ->
|
||||||
|
@client.HEARTBEAT_TIMEOUT = 10
|
||||||
|
@client.HEARTBEAT_INTERVAL = 100
|
||||||
|
@nodes = [{
|
||||||
|
options: key: "node-0"
|
||||||
|
stream: destroy: sinon.stub()
|
||||||
|
}, {
|
||||||
|
options: key: "node-1"
|
||||||
|
stream: destroy: sinon.stub()
|
||||||
|
}]
|
||||||
|
@rclient_ioredis.nodes = sinon.stub().returns(@nodes)
|
||||||
|
|
||||||
|
describe "successfully", ->
|
||||||
|
beforeEach ->
|
||||||
|
@nodes[0].ping = (cb) -> cb()
|
||||||
|
@nodes[1].ping = (cb) -> cb()
|
||||||
|
@client._monitorCluster(@rclient_ioredis)
|
||||||
|
|
||||||
|
it "should get all nodes", ->
|
||||||
|
setTimeout () =>
|
||||||
|
@rclient_ioredis.nodes
|
||||||
|
.calledWith("all")
|
||||||
|
.should.equal true
|
||||||
|
, 200
|
||||||
|
|
||||||
|
it "should not reset the node connections", (done) ->
|
||||||
|
setTimeout () =>
|
||||||
|
@nodes[0].stream.destroy.called.should.equal false
|
||||||
|
@nodes[1].stream.destroy.called.should.equal false
|
||||||
|
done()
|
||||||
|
, 200
|
||||||
|
|
||||||
|
describe "when ping fails to a node", ->
|
||||||
|
beforeEach ->
|
||||||
|
@nodes[0].ping = (cb) -> cb()
|
||||||
|
@nodes[1].ping = (cb) -> # Just hang
|
||||||
|
@client._monitorCluster(@rclient_ioredis)
|
||||||
|
|
||||||
|
it "should reset the failing node connection", (done) ->
|
||||||
|
setTimeout () =>
|
||||||
|
@nodes[0].stream.destroy.called.should.equal false
|
||||||
|
@nodes[1].stream.destroy.called.should.equal true
|
||||||
|
done()
|
||||||
|
, 200
|
||||||
|
|
||||||
|
|
|
@ -10,9 +10,11 @@ describe "RedisManager", ->
|
||||||
@rclient =
|
@rclient =
|
||||||
auth: () ->
|
auth: () ->
|
||||||
exec: sinon.stub()
|
exec: sinon.stub()
|
||||||
|
monitorAndReconnect: () ->
|
||||||
@rclient.multi = () => @rclient
|
@rclient.multi = () => @rclient
|
||||||
@RedisManager = SandboxedModule.require modulePath, requires:
|
@RedisManager = SandboxedModule.require modulePath, requires:
|
||||||
"./RedisBackend": createClient: () => @rclient
|
"./RedisBackend":
|
||||||
|
createClient: () => @rclient
|
||||||
"./RedisKeyBuilder":
|
"./RedisKeyBuilder":
|
||||||
blockingKey: ({doc_id}) -> "Blocking:#{doc_id}"
|
blockingKey: ({doc_id}) -> "Blocking:#{doc_id}"
|
||||||
docLines: ({doc_id}) -> "doclines:#{doc_id}"
|
docLines: ({doc_id}) -> "doclines:#{doc_id}"
|
||||||
|
|
Loading…
Reference in a new issue