Merge pull request #20 from overleaf/spd-metrics-ttl

Add mechanism to expire old prometheus metrics
This commit is contained in:
Simon Detheridge 2019-10-28 17:21:57 +00:00 committed by GitHub
commit 53aa2490f5
2 changed files with 127 additions and 50 deletions

View file

@ -3,30 +3,25 @@ if process.env["USE_PROM_METRICS"] != "true"
else else
console.log("using prometheus") console.log("using prometheus")
prom = require('./prom_wrapper')
prom = require('prom-client')
Register = require('prom-client').register
collectDefaultMetrics = prom.collectDefaultMetrics collectDefaultMetrics = prom.collectDefaultMetrics
appname = "unknown" appname = "unknown"
hostname = require('os').hostname() hostname = require('os').hostname()
buildKey = (key)-> "#{name}.#{hostname}.#{key}"
buildGlobalKey = (key)-> "#{name}.global.#{key}"
promMetrics = {}
destructors = [] destructors = []
require "./uv_threadpool_size" require "./uv_threadpool_size"
module.exports = Metrics = module.exports = Metrics =
register:Register register: prom.registry
initialize: (_name) ->
initialize: (_name, opts = {}) ->
appname = _name appname = _name
collectDefaultMetrics({ timeout: 5000, prefix: Metrics.buildPromKey()}) collectDefaultMetrics({ timeout: 5000, prefix: Metrics.buildPromKey()})
if opts.ttlInMinutes
prom.ttlInMinutes = opts.ttlInMinutes
console.log("ENABLE_TRACE_AGENT set to #{process.env['ENABLE_TRACE_AGENT']}") console.log("ENABLE_TRACE_AGENT set to #{process.env['ENABLE_TRACE_AGENT']}")
if process.env['ENABLE_TRACE_AGENT'] == "true" if process.env['ENABLE_TRACE_AGENT'] == "true"
@ -67,8 +62,8 @@ module.exports = Metrics =
injectMetricsRoute: (app) -> injectMetricsRoute: (app) ->
app.get('/metrics', (req, res) -> app.get('/metrics', (req, res) ->
res.set('Content-Type', Register.contentType) res.set('Content-Type', prom.registry.contentType)
res.end(Register.metrics()) res.end(prom.registry.metrics())
) )
buildPromKey: (key = "")-> buildPromKey: (key = "")->
@ -82,43 +77,23 @@ module.exports = Metrics =
inc : (key, sampleRate = 1, opts = {})-> inc : (key, sampleRate = 1, opts = {})->
key = Metrics.buildPromKey(key) key = Metrics.buildPromKey(key)
if !promMetrics[key]?
promMetrics[key] = new prom.Counter({
name: key,
help: key,
labelNames: ['app','host','status','method', 'path']
})
opts.app = appname opts.app = appname
opts.host = hostname opts.host = hostname
promMetrics[key].inc(opts) prom.metric('counter', key).inc(opts)
if process.env['DEBUG_METRICS'] if process.env['DEBUG_METRICS']
console.log("doing inc", key, opts) console.log("doing inc", key, opts)
count : (key, count, sampleRate = 1)-> count : (key, count, sampleRate = 1)->
key = Metrics.buildPromKey(key) key = Metrics.buildPromKey(key)
if !promMetrics[key]? prom.metric('counter', key).inc({app: appname, host: hostname}, count)
promMetrics[key] = new prom.Counter({
name: key,
help: key,
labelNames: ['app','host']
})
promMetrics[key].inc({app: appname, host: hostname}, count)
if process.env['DEBUG_METRICS'] if process.env['DEBUG_METRICS']
console.log("doing count/inc", key, opts) console.log("doing count/inc", key, opts)
timing: (key, timeSpan, sampleRate, opts = {})-> timing: (key, timeSpan, sampleRate, opts = {})->
key = Metrics.buildPromKey("timer_" + key) key = Metrics.buildPromKey("timer_" + key)
if !promMetrics[key]?
promMetrics[key] = new prom.Summary({
name: key,
help: key,
maxAgeSeconds: 600,
ageBuckets: 10,
labelNames: ['app', 'host', 'path', 'status_code', 'method', 'collection', 'query']
})
opts.app = appname opts.app = appname
opts.host = hostname opts.host = hostname
promMetrics[key].observe(opts, timeSpan) prom.metric('summary', key).observe(opts, timeSpan)
if process.env['DEBUG_METRICS'] if process.env['DEBUG_METRICS']
console.log("doing timing", key, opts) console.log("doing timing", key, opts)
@ -137,25 +112,13 @@ module.exports = Metrics =
gauge : (key, value, sampleRate = 1, opts)-> gauge : (key, value, sampleRate = 1, opts)->
key = Metrics.buildPromKey(key) key = Metrics.buildPromKey(key)
if !promMetrics[key]? prom.metric('gauge', key).set({app: appname, host: hostname, status: opts?.status}, this.sanitizeValue(value))
promMetrics[key] = new prom.Gauge({
name: key,
help: key,
labelNames: ['app','host', 'status']
})
promMetrics[key].set({app: appname, host: hostname, status: opts?.status}, this.sanitizeValue(value))
if process.env['DEBUG_METRICS'] if process.env['DEBUG_METRICS']
console.log("doing gauge", key, opts) console.log("doing gauge", key, opts)
globalGauge: (key, value, sampleRate = 1, opts)-> globalGauge: (key, value, sampleRate = 1, opts)->
key = Metrics.buildPromKey(key) key = Metrics.buildPromKey(key)
if !promMetrics[key]? prom.metric('gauge', key).set({app: appname, status: opts?.status},this.sanitizeValue(value))
promMetrics[key] = new prom.Gauge({
name: key,
help: key,
labelNames: ['app','host', 'status']
})
promMetrics[key].set({app: appname, status: opts?.status},this.sanitizeValue(value))
mongodb: require "./mongodb" mongodb: require "./mongodb"
http: require "./http" http: require "./http"

View file

@ -0,0 +1,114 @@
prom = require('prom-client')
registry = require('prom-client').register
metrics = new Map()
optsKey = (opts) ->
keys = Object.keys(opts)
return '' if keys.length == 0
keys = keys.sort()
hash = '';
for key in keys
hash += "," if hash.length
hash += "#{key}:#{opts[key]}"
return hash
extendOpts = (opts, labelNames) ->
for label in labelNames
opts[label] ||= ''
return opts
optsAsArgs = (opts, labelNames) ->
args = []
for label in labelNames
args.push(opts[label] || '')
return args
PromWrapper =
ttlInMinutes: 0
registry: registry
metric: (type, name) ->
metrics.get(name) || new MetricWrapper(type, name)
collectDefaultMetrics: prom.collectDefaultMetrics
class MetricWrapper
constructor: (type, name) ->
metrics.set(name, this)
@name = name
@instances = new Map()
@lastAccess = new Date()
@metric = switch type
when "counter"
new prom.Counter({
name: name,
help: name,
labelNames: ['app','host','status','method', 'path']
})
when "summary"
new prom.Summary({
name: name,
help: name,
maxAgeSeconds: 600,
ageBuckets: 10,
labelNames: ['app', 'host', 'path', 'status_code', 'method', 'collection', 'query']
})
when "gauge"
prom.Gauge({
name: name,
help: name,
labelNames: ['app','host', 'status']
})
inc: (opts, value) ->
@_execMethod 'inc', opts, value
observe: (opts, value) ->
@_execMethod 'observe', opts, value
set: (opts, value) ->
@_execMethod 'set', opts, value
sweep: () ->
thresh = new Date(Date.now() - 1000 * 60 * PromWrapper.ttlInMinutes)
@instances.forEach (instance, key) =>
if thresh > instance.time
if process.env['DEBUG_METRICS']
console.log("Sweeping stale metric instance", @name, opts: instance.opts, key)
@metric.remove(optsAsArgs(instance.opts, @metric.labelNames)...)
if thresh > @lastAccess
if process.env['DEBUG_METRICS']
console.log("Sweeping stale metric", @name, thresh, @lastAccess)
metrics.delete(@name)
registry.removeSingleMetric(@name)
_execMethod: (method, opts, value) ->
opts = extendOpts(opts, @metric.labelNames)
key = optsKey(opts)
@instances.set(key, { time: new Date(), opts }) unless key == ''
@lastAccess = new Date()
@metric[method](opts, value)
unless PromWrapper.sweepRegistered
if process.env['DEBUG_METRICS']
console.log("Registering sweep method")
PromWrapper.sweepRegistered = true
setInterval(
() ->
if PromWrapper.ttlInMinutes
if process.env['DEBUG_METRICS']
console.log("Sweeping metrics")
metrics.forEach (metric, key) =>
metric.sweep()
60000)
module.exports = PromWrapper