Monitor event loop by looking for skew

If we monitor with setImmediate, we miss big blocking loops. For example,
suppose we have 1000 1ms loops then a single bad 1000ms loop. setImmediate
will only be called at the right time 1/1000 of the time (it has to be the
loop just before the bad one). So this monitoring method gives a good average
if the std dev is low, but doesn't pick up spikes.

Instead, we can monitor the skew from the expected time between setIntervals.
In the case above, with a setInterval for 1000ms, we will pick up a skew
proportional to the amount of time that it overlaps the bad loop. So 50%
change of picking up skew > 500ms, and thus getting a good sense of any
spikes.
This commit is contained in:
James Allen 2015-12-03 16:32:20 +00:00
parent 738363a6de
commit 8db30020ae

View file

@ -1,18 +1,16 @@
seconds = 1000
module.exports = EventLoopMonitor =
monitor: (logger) ->
interval = setInterval () ->
EventLoopMonitor.Delay()
, 1 * seconds
monitor: (logger, interval = 1000, log_threshold = 100) ->
Metrics = require "./metrics"
previous = Date.now()
intervalId = setInterval () ->
now = Date.now()
offset = now - previous - interval
if offset > log_threshold
logger.warn {offset: offset}, "slow event loop"
previous = now
Metrics.timing("event-loop-millsec", offset)
, interval
Metrics.registerDestructor () ->
clearInterval(interval)
Delay: () ->
Metrics = require "./metrics"
t1 = process.hrtime()
setImmediate () ->
delta = process.hrtime(t1)
responseTime = delta[0]*1e6 + delta[1]*1e-3
Metrics.timing("event-loop-microsec", responseTime)
clearInterval(intervalId)