Merge pull request #16 from sharelatex/cluster-restart

Graceful cluster restart on domain exception
This commit is contained in:
Brian Gough 2015-01-07 11:50:24 +00:00
commit 2c731586a8
4 changed files with 61 additions and 32 deletions

View file

@ -19,7 +19,7 @@ module.exports = (grunt) ->
app_server: app_server:
expand: true, expand: true,
flatten: false, flatten: false,
src: ['app.coffee'], src: ['app.coffee', 'cluster.coffee'],
dest: './', dest: './',
ext: '.js' ext: '.js'
@ -41,7 +41,7 @@ module.exports = (grunt) ->
watch: watch:
server_coffee: server_coffee:
files: ['app/*.coffee','app/**/*.coffee', 'test/unit/coffee/**/*.coffee', 'test/unit/coffee/*.coffee', "app.coffee"] files: ['app/*.coffee','app/**/*.coffee', 'test/unit/coffee/**/*.coffee', 'test/unit/coffee/*.coffee', "app.coffee", "cluster.coffee"]
tasks: ["clean", 'coffee', 'mochaTest'] tasks: ["clean", 'coffee', 'mochaTest']
clean: ["app/js", "test/unit/js", "app.js"] clean: ["app/js", "test/unit/js", "app.js"]

View file

@ -37,28 +37,36 @@ app.use (req, res, next) ->
requestDomain.add req requestDomain.add req
requestDomain.add res requestDomain.add res
requestDomain.on "error", (err)-> requestDomain.on "error", (err)->
# request a shutdown to prevent memory leaks try
appIsOk = false appIsOk = false
setTimeout(-> # request a shutdown to prevent memory leaks
beginShutdown()
if !res.headerSent if !res.headerSent
res.send(500) res.send(500, "uncaught exception")
, 3000) logger = require('logger-sharelatex')
logger = require('logger-sharelatex') req =
req = body:req.body
body:req.body headers:req.headers
headers:req.headers url:req.url
url:req.url key: req.key
key: req.key statusCode: req.statusCode
statusCode: req.statusCode err =
err = message: err.message
message: err.message stack: err.stack
stack: err.stack name: err.name
name: err.name type: err.type
type: err.type arguments: err.arguments
arguments: err.arguments logger.err err:err, req:req, res:res, "uncaught exception thrown on request"
logger.err err:err, req:req, res:res, "uncaught exception thrown on request" catch exception
logger.err err: exception, "exception in request domain handler"
requestDomain.run next requestDomain.run next
app.use (req, res, next) ->
if not appIsOk
# when shutting down, close any HTTP keep-alive connections
res.set 'Connection', 'close'
next()
app.get "/project/:project_id/file/:file_id", keyBuilder.userFileKey, fileController.getFile app.get "/project/:project_id/file/:file_id", keyBuilder.userFileKey, fileController.getFile
app.post "/project/:project_id/file/:file_id", keyBuilder.userFileKey, fileController.insertFile app.post "/project/:project_id/file/:file_id", keyBuilder.userFileKey, fileController.insertFile
@ -95,19 +103,30 @@ app.get "/health_check", (req, res)->
else else
res.send(503) res.send(503)
app.get '*', (req, res)-> app.get '*', (req, res)->
res.send 404 res.send 404
serverDomain = domain.create() server = require('http').createServer(app)
serverDomain.run -> port = settings.internal.filestore.port or 3009
server = require('http').createServer(app) host = settings.internal.filestore.host or "localhost"
port = settings.internal.filestore.port or 3009
host = settings.internal.filestore.host or "localhost"
server.listen port, host, ->
logger.log("filestore store listening on #{host}:#{port}")
serverDomain.on "error", (err)-> beginShutdown = () ->
logger.log err:err, "top level uncaught exception" if appIsOk
appIsOk = false
# hard-terminate this process if graceful shutdown fails
killTimer = setTimeout () ->
process.exit 1
, 120*1000
killTimer.unref?() # prevent timer from keeping process alive
server.close () ->
logger.log "closed all connections"
Metrics.close()
process.disconnect?()
logger.log "server will stop accepting connections"
server.listen port, host, ->
logger.log("filestore listening on #{host}:#{port}")
process.on 'SIGTERM', () ->
logger.log("filestore got SIGTERM, shutting down gracefully")
beginShutdown()

View file

@ -0,0 +1,9 @@
recluster = require "recluster" # https://github.com/doxout/recluster
path = require "path"
cluster = recluster path.join(__dirname, 'app.js'), {
workers: 2,
backoff: 0,
readyWhen: "listening"
}
cluster.run()

View file

@ -20,6 +20,7 @@
"node-transloadit": "0.0.4", "node-transloadit": "0.0.4",
"node-uuid": "~1.4.1", "node-uuid": "~1.4.1",
"pngcrush": "0.0.3", "pngcrush": "0.0.3",
"recluster": "^0.3.7",
"request": "2.14.0", "request": "2.14.0",
"response": "0.14.0", "response": "0.14.0",
"rimraf": "2.2.8", "rimraf": "2.2.8",