overleaf/services/real-time/app/js/HealthCheckManager.js

78 lines
2.1 KiB
JavaScript
Raw Normal View History

const metrics = require('@overleaf/metrics')
const logger = require('logger-sharelatex')
2019-04-15 09:05:26 -04:00
const os = require('os')
const HOST = os.hostname()
const PID = process.pid
let COUNT = 0
2019-04-15 09:05:26 -04:00
const CHANNEL_MANAGER = {} // hash of event checkers by channel name
const CHANNEL_ERROR = {} // error status by channel name
2019-04-15 09:05:26 -04:00
[misc] reland decaff cleanup (#166) * [misc] decaff cleanup: RoomManager * [misc] decaff cleanup: RedisClientManager * [misc] decaff cleanup: SafeJsonParse * [misc] decaff cleanup: WebApiManager * [misc] decaff cleanup: WebsocketController * [misc] decaff cleanup: WebsocketLoadBalancer * [misc] decaff cleanup: SessionSockets * [misc] decaff cleanup: HttpController * [misc] decaff cleanup: HttpApiController * [misc] decaff cleanup: HealthCheckManager * [misc] decaff cleanup: EventLogger * [misc] decaff cleanup: Errors o-error will eliminate most of it -- when we migrate over. * [misc] decaff cleanup: DrainManager * [misc] decaff cleanup: DocumentUpdaterManager * [misc] decaff cleanup: DocumentUpdaterController: no-unused-vars * [misc] decaff cleanup: DocumentUpdaterController: Array.from * [misc] decaff cleanup: DocumentUpdaterController: implicit return * [misc] decaff cleanup: DocumentUpdaterController: IIFE * [misc] decaff cleanup: DocumentUpdaterController: null checks * [misc] decaff cleanup: DocumentUpdaterController: simpler loops * [misc] decaff cleanup: DocumentUpdaterController: move module name def * [misc] decaff cleanup: ConnectedUsersManager: handle-callback-err * [misc] decaff cleanup: ConnectedUsersManager: implicit returns * [misc] decaff cleanup: ConnectedUsersManager: null checks * [misc] decaff cleanup: ChannelManager: no-unused-vars * [misc] decaff cleanup: ChannelManager: implicit returns * [misc] decaff cleanup: ChannelManager: other cleanup - var -> const - drop variable assignment before return * [misc] decaff cleanup: AuthorizationManager: handle-callback-err Note: This requires a change in WebsocketController to provide a dummy callback. * [misc] decaff cleanup: AuthorizationManager: Array.from * [misc] decaff cleanup: AuthorizationManager: implicit returns * [misc] decaff cleanup: AuthorizationManager: null checks * [misc] decaff cleanup: Router: handle-callback-err * [misc] decaff cleanup: Router: standard/no-callback-literal * [misc] decaff cleanup: Router: Array.from * [misc] decaff cleanup: Router: implicit returns * [misc] decaff cleanup: Router: refactor __guard__ wrapper * [misc] decaff cleanup: Router: null checks And a minor bug fix: user.id -> user._id * [misc] decaff cleanup: Router: move variable declarations to assignments * [misc] decaff cleanup: app: implicit returns * [misc] decaff cleanup: app: __guard__ * [misc] decaff cleanup: app: null checks * [misc] decaff cleanup: app: function definitions * [misc] decaff cleanup: app: drop unused next argument * [misc] decaff cleanup: app: var -> const
2020-07-07 06:06:02 -04:00
module.exports = class HealthCheckManager {
// create an instance of this class which checks that an event with a unique
// id is received only once within a timeout
constructor(channel, timeout) {
// unique event string
this.channel = channel
this.id = `host=${HOST}:pid=${PID}:count=${COUNT++}`
// count of number of times the event is received
this.count = 0
// after a timeout check the status of the count
this.handler = setTimeout(() => {
[misc] reland decaff cleanup (#166) * [misc] decaff cleanup: RoomManager * [misc] decaff cleanup: RedisClientManager * [misc] decaff cleanup: SafeJsonParse * [misc] decaff cleanup: WebApiManager * [misc] decaff cleanup: WebsocketController * [misc] decaff cleanup: WebsocketLoadBalancer * [misc] decaff cleanup: SessionSockets * [misc] decaff cleanup: HttpController * [misc] decaff cleanup: HttpApiController * [misc] decaff cleanup: HealthCheckManager * [misc] decaff cleanup: EventLogger * [misc] decaff cleanup: Errors o-error will eliminate most of it -- when we migrate over. * [misc] decaff cleanup: DrainManager * [misc] decaff cleanup: DocumentUpdaterManager * [misc] decaff cleanup: DocumentUpdaterController: no-unused-vars * [misc] decaff cleanup: DocumentUpdaterController: Array.from * [misc] decaff cleanup: DocumentUpdaterController: implicit return * [misc] decaff cleanup: DocumentUpdaterController: IIFE * [misc] decaff cleanup: DocumentUpdaterController: null checks * [misc] decaff cleanup: DocumentUpdaterController: simpler loops * [misc] decaff cleanup: DocumentUpdaterController: move module name def * [misc] decaff cleanup: ConnectedUsersManager: handle-callback-err * [misc] decaff cleanup: ConnectedUsersManager: implicit returns * [misc] decaff cleanup: ConnectedUsersManager: null checks * [misc] decaff cleanup: ChannelManager: no-unused-vars * [misc] decaff cleanup: ChannelManager: implicit returns * [misc] decaff cleanup: ChannelManager: other cleanup - var -> const - drop variable assignment before return * [misc] decaff cleanup: AuthorizationManager: handle-callback-err Note: This requires a change in WebsocketController to provide a dummy callback. * [misc] decaff cleanup: AuthorizationManager: Array.from * [misc] decaff cleanup: AuthorizationManager: implicit returns * [misc] decaff cleanup: AuthorizationManager: null checks * [misc] decaff cleanup: Router: handle-callback-err * [misc] decaff cleanup: Router: standard/no-callback-literal * [misc] decaff cleanup: Router: Array.from * [misc] decaff cleanup: Router: implicit returns * [misc] decaff cleanup: Router: refactor __guard__ wrapper * [misc] decaff cleanup: Router: null checks And a minor bug fix: user.id -> user._id * [misc] decaff cleanup: Router: move variable declarations to assignments * [misc] decaff cleanup: app: implicit returns * [misc] decaff cleanup: app: __guard__ * [misc] decaff cleanup: app: null checks * [misc] decaff cleanup: app: function definitions * [misc] decaff cleanup: app: drop unused next argument * [misc] decaff cleanup: app: var -> const
2020-07-07 06:06:02 -04:00
this.setStatus()
}, timeout || 1000)
// use a timer to record the latency of the channel
this.timer = new metrics.Timer(`event.${this.channel}.latency`)
// keep a record of these objects to dispatch on
CHANNEL_MANAGER[this.channel] = this
}
processEvent(id) {
// if this is our event record it
if (id === this.id) {
this.count++
[misc] reland decaff cleanup (#166) * [misc] decaff cleanup: RoomManager * [misc] decaff cleanup: RedisClientManager * [misc] decaff cleanup: SafeJsonParse * [misc] decaff cleanup: WebApiManager * [misc] decaff cleanup: WebsocketController * [misc] decaff cleanup: WebsocketLoadBalancer * [misc] decaff cleanup: SessionSockets * [misc] decaff cleanup: HttpController * [misc] decaff cleanup: HttpApiController * [misc] decaff cleanup: HealthCheckManager * [misc] decaff cleanup: EventLogger * [misc] decaff cleanup: Errors o-error will eliminate most of it -- when we migrate over. * [misc] decaff cleanup: DrainManager * [misc] decaff cleanup: DocumentUpdaterManager * [misc] decaff cleanup: DocumentUpdaterController: no-unused-vars * [misc] decaff cleanup: DocumentUpdaterController: Array.from * [misc] decaff cleanup: DocumentUpdaterController: implicit return * [misc] decaff cleanup: DocumentUpdaterController: IIFE * [misc] decaff cleanup: DocumentUpdaterController: null checks * [misc] decaff cleanup: DocumentUpdaterController: simpler loops * [misc] decaff cleanup: DocumentUpdaterController: move module name def * [misc] decaff cleanup: ConnectedUsersManager: handle-callback-err * [misc] decaff cleanup: ConnectedUsersManager: implicit returns * [misc] decaff cleanup: ConnectedUsersManager: null checks * [misc] decaff cleanup: ChannelManager: no-unused-vars * [misc] decaff cleanup: ChannelManager: implicit returns * [misc] decaff cleanup: ChannelManager: other cleanup - var -> const - drop variable assignment before return * [misc] decaff cleanup: AuthorizationManager: handle-callback-err Note: This requires a change in WebsocketController to provide a dummy callback. * [misc] decaff cleanup: AuthorizationManager: Array.from * [misc] decaff cleanup: AuthorizationManager: implicit returns * [misc] decaff cleanup: AuthorizationManager: null checks * [misc] decaff cleanup: Router: handle-callback-err * [misc] decaff cleanup: Router: standard/no-callback-literal * [misc] decaff cleanup: Router: Array.from * [misc] decaff cleanup: Router: implicit returns * [misc] decaff cleanup: Router: refactor __guard__ wrapper * [misc] decaff cleanup: Router: null checks And a minor bug fix: user.id -> user._id * [misc] decaff cleanup: Router: move variable declarations to assignments * [misc] decaff cleanup: app: implicit returns * [misc] decaff cleanup: app: __guard__ * [misc] decaff cleanup: app: null checks * [misc] decaff cleanup: app: function definitions * [misc] decaff cleanup: app: drop unused next argument * [misc] decaff cleanup: app: var -> const
2020-07-07 06:06:02 -04:00
if (this.timer) {
this.timer.done()
}
[misc] reland decaff cleanup (#166) * [misc] decaff cleanup: RoomManager * [misc] decaff cleanup: RedisClientManager * [misc] decaff cleanup: SafeJsonParse * [misc] decaff cleanup: WebApiManager * [misc] decaff cleanup: WebsocketController * [misc] decaff cleanup: WebsocketLoadBalancer * [misc] decaff cleanup: SessionSockets * [misc] decaff cleanup: HttpController * [misc] decaff cleanup: HttpApiController * [misc] decaff cleanup: HealthCheckManager * [misc] decaff cleanup: EventLogger * [misc] decaff cleanup: Errors o-error will eliminate most of it -- when we migrate over. * [misc] decaff cleanup: DrainManager * [misc] decaff cleanup: DocumentUpdaterManager * [misc] decaff cleanup: DocumentUpdaterController: no-unused-vars * [misc] decaff cleanup: DocumentUpdaterController: Array.from * [misc] decaff cleanup: DocumentUpdaterController: implicit return * [misc] decaff cleanup: DocumentUpdaterController: IIFE * [misc] decaff cleanup: DocumentUpdaterController: null checks * [misc] decaff cleanup: DocumentUpdaterController: simpler loops * [misc] decaff cleanup: DocumentUpdaterController: move module name def * [misc] decaff cleanup: ConnectedUsersManager: handle-callback-err * [misc] decaff cleanup: ConnectedUsersManager: implicit returns * [misc] decaff cleanup: ConnectedUsersManager: null checks * [misc] decaff cleanup: ChannelManager: no-unused-vars * [misc] decaff cleanup: ChannelManager: implicit returns * [misc] decaff cleanup: ChannelManager: other cleanup - var -> const - drop variable assignment before return * [misc] decaff cleanup: AuthorizationManager: handle-callback-err Note: This requires a change in WebsocketController to provide a dummy callback. * [misc] decaff cleanup: AuthorizationManager: Array.from * [misc] decaff cleanup: AuthorizationManager: implicit returns * [misc] decaff cleanup: AuthorizationManager: null checks * [misc] decaff cleanup: Router: handle-callback-err * [misc] decaff cleanup: Router: standard/no-callback-literal * [misc] decaff cleanup: Router: Array.from * [misc] decaff cleanup: Router: implicit returns * [misc] decaff cleanup: Router: refactor __guard__ wrapper * [misc] decaff cleanup: Router: null checks And a minor bug fix: user.id -> user._id * [misc] decaff cleanup: Router: move variable declarations to assignments * [misc] decaff cleanup: app: implicit returns * [misc] decaff cleanup: app: __guard__ * [misc] decaff cleanup: app: null checks * [misc] decaff cleanup: app: function definitions * [misc] decaff cleanup: app: drop unused next argument * [misc] decaff cleanup: app: var -> const
2020-07-07 06:06:02 -04:00
this.timer = undefined // only time the latency of the first event
}
}
setStatus() {
// if we saw the event anything other than a single time that is an error
[misc] reland decaff cleanup (#166) * [misc] decaff cleanup: RoomManager * [misc] decaff cleanup: RedisClientManager * [misc] decaff cleanup: SafeJsonParse * [misc] decaff cleanup: WebApiManager * [misc] decaff cleanup: WebsocketController * [misc] decaff cleanup: WebsocketLoadBalancer * [misc] decaff cleanup: SessionSockets * [misc] decaff cleanup: HttpController * [misc] decaff cleanup: HttpApiController * [misc] decaff cleanup: HealthCheckManager * [misc] decaff cleanup: EventLogger * [misc] decaff cleanup: Errors o-error will eliminate most of it -- when we migrate over. * [misc] decaff cleanup: DrainManager * [misc] decaff cleanup: DocumentUpdaterManager * [misc] decaff cleanup: DocumentUpdaterController: no-unused-vars * [misc] decaff cleanup: DocumentUpdaterController: Array.from * [misc] decaff cleanup: DocumentUpdaterController: implicit return * [misc] decaff cleanup: DocumentUpdaterController: IIFE * [misc] decaff cleanup: DocumentUpdaterController: null checks * [misc] decaff cleanup: DocumentUpdaterController: simpler loops * [misc] decaff cleanup: DocumentUpdaterController: move module name def * [misc] decaff cleanup: ConnectedUsersManager: handle-callback-err * [misc] decaff cleanup: ConnectedUsersManager: implicit returns * [misc] decaff cleanup: ConnectedUsersManager: null checks * [misc] decaff cleanup: ChannelManager: no-unused-vars * [misc] decaff cleanup: ChannelManager: implicit returns * [misc] decaff cleanup: ChannelManager: other cleanup - var -> const - drop variable assignment before return * [misc] decaff cleanup: AuthorizationManager: handle-callback-err Note: This requires a change in WebsocketController to provide a dummy callback. * [misc] decaff cleanup: AuthorizationManager: Array.from * [misc] decaff cleanup: AuthorizationManager: implicit returns * [misc] decaff cleanup: AuthorizationManager: null checks * [misc] decaff cleanup: Router: handle-callback-err * [misc] decaff cleanup: Router: standard/no-callback-literal * [misc] decaff cleanup: Router: Array.from * [misc] decaff cleanup: Router: implicit returns * [misc] decaff cleanup: Router: refactor __guard__ wrapper * [misc] decaff cleanup: Router: null checks And a minor bug fix: user.id -> user._id * [misc] decaff cleanup: Router: move variable declarations to assignments * [misc] decaff cleanup: app: implicit returns * [misc] decaff cleanup: app: __guard__ * [misc] decaff cleanup: app: null checks * [misc] decaff cleanup: app: function definitions * [misc] decaff cleanup: app: drop unused next argument * [misc] decaff cleanup: app: var -> const
2020-07-07 06:06:02 -04:00
const isFailing = this.count !== 1
if (isFailing) {
logger.err(
{ channel: this.channel, count: this.count, id: this.id },
'redis channel health check error'
)
}
[misc] reland decaff cleanup (#166) * [misc] decaff cleanup: RoomManager * [misc] decaff cleanup: RedisClientManager * [misc] decaff cleanup: SafeJsonParse * [misc] decaff cleanup: WebApiManager * [misc] decaff cleanup: WebsocketController * [misc] decaff cleanup: WebsocketLoadBalancer * [misc] decaff cleanup: SessionSockets * [misc] decaff cleanup: HttpController * [misc] decaff cleanup: HttpApiController * [misc] decaff cleanup: HealthCheckManager * [misc] decaff cleanup: EventLogger * [misc] decaff cleanup: Errors o-error will eliminate most of it -- when we migrate over. * [misc] decaff cleanup: DrainManager * [misc] decaff cleanup: DocumentUpdaterManager * [misc] decaff cleanup: DocumentUpdaterController: no-unused-vars * [misc] decaff cleanup: DocumentUpdaterController: Array.from * [misc] decaff cleanup: DocumentUpdaterController: implicit return * [misc] decaff cleanup: DocumentUpdaterController: IIFE * [misc] decaff cleanup: DocumentUpdaterController: null checks * [misc] decaff cleanup: DocumentUpdaterController: simpler loops * [misc] decaff cleanup: DocumentUpdaterController: move module name def * [misc] decaff cleanup: ConnectedUsersManager: handle-callback-err * [misc] decaff cleanup: ConnectedUsersManager: implicit returns * [misc] decaff cleanup: ConnectedUsersManager: null checks * [misc] decaff cleanup: ChannelManager: no-unused-vars * [misc] decaff cleanup: ChannelManager: implicit returns * [misc] decaff cleanup: ChannelManager: other cleanup - var -> const - drop variable assignment before return * [misc] decaff cleanup: AuthorizationManager: handle-callback-err Note: This requires a change in WebsocketController to provide a dummy callback. * [misc] decaff cleanup: AuthorizationManager: Array.from * [misc] decaff cleanup: AuthorizationManager: implicit returns * [misc] decaff cleanup: AuthorizationManager: null checks * [misc] decaff cleanup: Router: handle-callback-err * [misc] decaff cleanup: Router: standard/no-callback-literal * [misc] decaff cleanup: Router: Array.from * [misc] decaff cleanup: Router: implicit returns * [misc] decaff cleanup: Router: refactor __guard__ wrapper * [misc] decaff cleanup: Router: null checks And a minor bug fix: user.id -> user._id * [misc] decaff cleanup: Router: move variable declarations to assignments * [misc] decaff cleanup: app: implicit returns * [misc] decaff cleanup: app: __guard__ * [misc] decaff cleanup: app: null checks * [misc] decaff cleanup: app: function definitions * [misc] decaff cleanup: app: drop unused next argument * [misc] decaff cleanup: app: var -> const
2020-07-07 06:06:02 -04:00
CHANNEL_ERROR[this.channel] = isFailing
}
2019-04-15 09:05:26 -04:00
// class methods
static check(channel, id) {
// dispatch event to manager for channel
[misc] reland decaff cleanup (#166) * [misc] decaff cleanup: RoomManager * [misc] decaff cleanup: RedisClientManager * [misc] decaff cleanup: SafeJsonParse * [misc] decaff cleanup: WebApiManager * [misc] decaff cleanup: WebsocketController * [misc] decaff cleanup: WebsocketLoadBalancer * [misc] decaff cleanup: SessionSockets * [misc] decaff cleanup: HttpController * [misc] decaff cleanup: HttpApiController * [misc] decaff cleanup: HealthCheckManager * [misc] decaff cleanup: EventLogger * [misc] decaff cleanup: Errors o-error will eliminate most of it -- when we migrate over. * [misc] decaff cleanup: DrainManager * [misc] decaff cleanup: DocumentUpdaterManager * [misc] decaff cleanup: DocumentUpdaterController: no-unused-vars * [misc] decaff cleanup: DocumentUpdaterController: Array.from * [misc] decaff cleanup: DocumentUpdaterController: implicit return * [misc] decaff cleanup: DocumentUpdaterController: IIFE * [misc] decaff cleanup: DocumentUpdaterController: null checks * [misc] decaff cleanup: DocumentUpdaterController: simpler loops * [misc] decaff cleanup: DocumentUpdaterController: move module name def * [misc] decaff cleanup: ConnectedUsersManager: handle-callback-err * [misc] decaff cleanup: ConnectedUsersManager: implicit returns * [misc] decaff cleanup: ConnectedUsersManager: null checks * [misc] decaff cleanup: ChannelManager: no-unused-vars * [misc] decaff cleanup: ChannelManager: implicit returns * [misc] decaff cleanup: ChannelManager: other cleanup - var -> const - drop variable assignment before return * [misc] decaff cleanup: AuthorizationManager: handle-callback-err Note: This requires a change in WebsocketController to provide a dummy callback. * [misc] decaff cleanup: AuthorizationManager: Array.from * [misc] decaff cleanup: AuthorizationManager: implicit returns * [misc] decaff cleanup: AuthorizationManager: null checks * [misc] decaff cleanup: Router: handle-callback-err * [misc] decaff cleanup: Router: standard/no-callback-literal * [misc] decaff cleanup: Router: Array.from * [misc] decaff cleanup: Router: implicit returns * [misc] decaff cleanup: Router: refactor __guard__ wrapper * [misc] decaff cleanup: Router: null checks And a minor bug fix: user.id -> user._id * [misc] decaff cleanup: Router: move variable declarations to assignments * [misc] decaff cleanup: app: implicit returns * [misc] decaff cleanup: app: __guard__ * [misc] decaff cleanup: app: null checks * [misc] decaff cleanup: app: function definitions * [misc] decaff cleanup: app: drop unused next argument * [misc] decaff cleanup: app: var -> const
2020-07-07 06:06:02 -04:00
if (CHANNEL_MANAGER[channel]) {
CHANNEL_MANAGER[channel].processEvent(id)
}
}
static status() {
// return status of all channels for logging
return CHANNEL_ERROR
}
static isFailing() {
// check if any channel status is bad
for (const channel in CHANNEL_ERROR) {
const error = CHANNEL_ERROR[channel]
if (error === true) {
return true
}
}
return false
}
}