Merge pull request #11 from overleaf/bg-make-health-check-more-robust

make health check more robust
This commit is contained in:
Brian Gough 2019-09-19 15:45:54 +01:00 committed by GitHub
commit 69ba8e391d
2 changed files with 44 additions and 5 deletions

View file

@ -0,0 +1,13 @@
# execute this script with a redis container running to test the health check
# starting and stopping redis with this script running is a good test
redis = require "./index.coffee"
rclient = redis.createClient({host:"localhost",port:"6379"})
setInterval () ->
rclient.healthCheck (err) ->
if err?
console.log "HEALTH CHECK FAILED", err
else
console.log "HEALTH CHECK OK"
, 1000

View file

@ -1,5 +1,13 @@
_ = require("underscore") _ = require("underscore")
async = require "async" async = require "async"
os = require('os')
crypto = require('crypto')
# generate unique values for health check
HOST = os.hostname()
PID = process.pid
RND = crypto.randomBytes(4).toString('hex')
COUNT = 0
module.exports = RedisSharelatex = module.exports = RedisSharelatex =
createClient: (opts = {port: 6379, host: "localhost"})-> createClient: (opts = {port: 6379, host: "localhost"})->
@ -44,19 +52,37 @@ module.exports = RedisSharelatex =
_checkClient: (client, callback) -> _checkClient: (client, callback) ->
callback = _.once(callback) callback = _.once(callback)
# check the redis connection by storing and retrieving a unique key/value pair
uniqueToken = "host=#{HOST}:pid=#{PID}:random=#{RND}:time=#{Date.now()}:count=#{COUNT++}"
timer = setTimeout () -> timer = setTimeout () ->
error = new Error("redis client ping check timed out #{client?.options?.host}") error = new Error("redis client health check timed out #{client?.options?.host}")
console.error { console.error {
err: error, err: error,
key: client.options?.key # only present for cluster key: client.options?.key # only present for cluster
clientOptions: client.options clientOptions: client.options
uniqueToken: uniqueToken
}, "client timed out" }, "client timed out"
callback(error) callback(error)
, RedisSharelatex.HEARTBEAT_TIMEOUT , RedisSharelatex.HEARTBEAT_TIMEOUT
client.ping (err) -> healthCheckKey = "_redis-wrapper:healthCheckKey:{#{uniqueToken}}"
clearTimeout timer healthCheckValue = "_redis-wrapper:healthCheckValue:{#{uniqueToken}}"
callback(err) # set the unique key/value pair
multi = client.multi()
multi.set healthCheckKey, healthCheckValue, "EX", 60
multi.exec (err, reply) ->
if err?
clearTimeout timer
return callback(err)
# check that we can retrieve the unique key/value pair
multi = client.multi()
multi.get healthCheckKey
multi.del healthCheckKey
multi.exec (err, reply) ->
clearTimeout timer
return callback(err) if err?
return callback(new Error("bad response from redis health check")) if reply?[0] isnt healthCheckValue or reply?[1] isnt 1
return callback()
_monkeyPatchIoredisExec: (client) -> _monkeyPatchIoredisExec: (client) ->
_multi = client.multi _multi = client.multi
client.multi = (args...) -> client.multi = (args...) ->