From 07e4eb4dfb9b044a58aff277ca97cd5c5d65c201 Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 23 Oct 2019 17:07:23 +0100 Subject: [PATCH] Add mechanism to expire old prometheus metrics Adds a wrapper around the prometheus client, which keeps track of the last time a metrics was accessed, and removes old ones once they have not been accessed for a period of time. --- libraries/metrics/metrics.coffee | 63 +++----------- libraries/metrics/package.json | 2 +- libraries/metrics/prom_wrapper.coffee | 114 ++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 51 deletions(-) create mode 100644 libraries/metrics/prom_wrapper.coffee diff --git a/libraries/metrics/metrics.coffee b/libraries/metrics/metrics.coffee index 0f65c7b1ea..03e492be8e 100644 --- a/libraries/metrics/metrics.coffee +++ b/libraries/metrics/metrics.coffee @@ -3,30 +3,25 @@ if process.env["USE_PROM_METRICS"] != "true" else console.log("using prometheus") - -prom = require('prom-client') -Register = require('prom-client').register +prom = require('./prom_wrapper') collectDefaultMetrics = prom.collectDefaultMetrics appname = "unknown" hostname = require('os').hostname() -buildKey = (key)-> "#{name}.#{hostname}.#{key}" -buildGlobalKey = (key)-> "#{name}.global.#{key}" - -promMetrics = {} - destructors = [] require "./uv_threadpool_size" module.exports = Metrics = - register:Register - initialize: (_name) -> + register: prom.registry + + initialize: (_name, opts = {}) -> appname = _name collectDefaultMetrics({ timeout: 5000, prefix: Metrics.buildPromKey()}) - + if opts.ttlInMinutes + prom.ttlInMinutes = opts.ttlInMinutes console.log("ENABLE_TRACE_AGENT set to #{process.env['ENABLE_TRACE_AGENT']}") if process.env['ENABLE_TRACE_AGENT'] == "true" @@ -67,8 +62,8 @@ module.exports = Metrics = injectMetricsRoute: (app) -> app.get('/metrics', (req, res) -> - res.set('Content-Type', Register.contentType) - res.end(Register.metrics()) + res.set('Content-Type', prom.registry.contentType) + res.end(prom.registry.metrics()) ) buildPromKey: (key = "")-> @@ -82,43 +77,23 @@ module.exports = Metrics = inc : (key, sampleRate = 1, opts = {})-> key = Metrics.buildPromKey(key) - if !promMetrics[key]? - promMetrics[key] = new prom.Counter({ - name: key, - help: key, - labelNames: ['app','host','status','method', 'path'] - }) opts.app = appname opts.host = hostname - promMetrics[key].inc(opts) + prom.metric('counter', key).inc(opts) if process.env['DEBUG_METRICS'] console.log("doing inc", key, opts) count : (key, count, sampleRate = 1)-> key = Metrics.buildPromKey(key) - if !promMetrics[key]? - promMetrics[key] = new prom.Counter({ - name: key, - help: key, - labelNames: ['app','host'] - }) - promMetrics[key].inc({app: appname, host: hostname}, count) + prom.metric('counter', key).inc({app: appname, host: hostname}, count) if process.env['DEBUG_METRICS'] console.log("doing count/inc", key, opts) timing: (key, timeSpan, sampleRate, opts = {})-> key = Metrics.buildPromKey("timer_" + key) - if !promMetrics[key]? - promMetrics[key] = new prom.Summary({ - name: key, - help: key, - maxAgeSeconds: 600, - ageBuckets: 10, - labelNames: ['app', 'host', 'path', 'status_code', 'method', 'collection', 'query'] - }) opts.app = appname opts.host = hostname - promMetrics[key].observe(opts, timeSpan) + prom.metric('summary', key).observe(opts, timeSpan) if process.env['DEBUG_METRICS'] console.log("doing timing", key, opts) @@ -137,25 +112,13 @@ module.exports = Metrics = gauge : (key, value, sampleRate = 1, opts)-> key = Metrics.buildPromKey(key) - if !promMetrics[key]? - promMetrics[key] = new prom.Gauge({ - name: key, - help: key, - labelNames: ['app','host', 'status'] - }) - promMetrics[key].set({app: appname, host: hostname, status: opts?.status}, this.sanitizeValue(value)) + prom.metric('gague', key).set({app: appname, host: hostname, status: opts?.status}, this.sanitizeValue(value)) if process.env['DEBUG_METRICS'] console.log("doing gauge", key, opts) globalGauge: (key, value, sampleRate = 1, opts)-> key = Metrics.buildPromKey(key) - if !promMetrics[key]? - promMetrics[key] = new prom.Gauge({ - name: key, - help: key, - labelNames: ['app','host', 'status'] - }) - promMetrics[key].set({app: appname, status: opts?.status},this.sanitizeValue(value)) + prom.metric('gague', key).set({app: appname, status: opts?.status},this.sanitizeValue(value)) mongodb: require "./mongodb" http: require "./http" diff --git a/libraries/metrics/package.json b/libraries/metrics/package.json index 24daa5da46..86f2e39a9e 100644 --- a/libraries/metrics/package.json +++ b/libraries/metrics/package.json @@ -1,6 +1,6 @@ { "name": "metrics-sharelatex", - "version": "2.2.0", + "version": "2.3.0", "description": "A drop-in metrics and monitoring module for node.js apps", "repository": { "type": "git", diff --git a/libraries/metrics/prom_wrapper.coffee b/libraries/metrics/prom_wrapper.coffee new file mode 100644 index 0000000000..eb2c132314 --- /dev/null +++ b/libraries/metrics/prom_wrapper.coffee @@ -0,0 +1,114 @@ +prom = require('prom-client') +registry = require('prom-client').register +metrics = {} + + +optsKey = (opts) -> + keys = Object.keys(opts) + return '' if keys.length == 0 + + keys = keys.sort() + + hash = ''; + for key in keys + hash += "," if hash.length + hash += "#{key}:#{opts[key]}" + + return hash + +extendOpts = (opts, labelNames) -> + for label in labelNames + opts[label] ||= '' + return opts + +optsAsArgs = (opts, labelNames) -> + args = [] + for label in labelNames + args.push(opts[label] || '') + return args + + +PromWrapper = + ttlInMinutes: 0 + registry: registry + + metric: (type, name) -> + registry.getSingleMetric(name) || new MetricWrapper(type, name) + + collectDefaultMetrics: prom.collectDefaultMetrics + + +class MetricWrapper + constructor: (type, name) -> + metrics[name] = this + @name = name + @instances = {} + @lastAccess = new Date() + @metric = switch type + when "counter" + new prom.Counter({ + name: name, + help: name, + labelNames: ['app','host','status','method', 'path'] + }) + when "summary" + new prom.Summary({ + name: name, + help: name, + maxAgeSeconds: 600, + ageBuckets: 10, + labelNames: ['app', 'host', 'path', 'status_code', 'method', 'collection', 'query'] + }) + when "gauge" + prom.Gauge({ + name: name, + help: name, + labelNames: ['app','host', 'status'] + }) + + inc: (opts, value) -> + @_execMethod 'inc', opts, value + + observe: (opts, value) -> + @_execMethod 'observe', opts, value + + set: (opts, value) -> + @_execMethod 'set', opts, value + + sweep: () -> + thresh = new Date(Date.now() - 1000 * 60 * PromWrapper.ttlInMinutes) + for key in Object.keys(@instances) + if thresh > @instances[key].time + if process.env['DEBUG_METRICS'] + console.log("Sweeping stale metric instance", @name, opts: @instances[key].opts, key) + @metric.remove(optsAsArgs(@instances[key].opts, @metric.labelNames)...) + + if thresh > @lastAccess + if process.env['DEBUG_METRICS'] + console.log("Sweeping stale metric", @name) + delete metrics[@name] + registry.removeSingleMetric(@name) + + _execMethod: (method, opts, value) -> + opts = extendOpts(opts, @metric.labelNames) + key = optsKey(opts) + @instances[key] = { time: new Date(), opts } unless key == '' + @lastAccess = new Date() + @metric[method](opts, value) + + +unless PromWrapper.sweepRegistered + if process.env['DEBUG_METRICS'] + console.log("Registering sweep method") + PromWrapper.sweepRegistered = true + setInterval( + () -> + if PromWrapper.ttlInMinutes + if process.env['DEBUG_METRICS'] + console.log("Sweeping metrics") + for key in Object.keys(metrics) + metrics[key].sweep() + 60000) + + +module.exports = PromWrapper