From e01af0e9c69d2680e44b212f2aa272bb92c28d00 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Wed, 17 Jan 2024 09:22:46 +0000 Subject: [PATCH] Merge pull request #16519 from overleaf/bg-clsi-timeouts-add-metrics add metrics for clsi cache operations GitOrigin-RevId: c5ec221afd235434c8b81bafa2f482f11422ac4e --- libraries/metrics/index.js | 7 ++-- libraries/metrics/package.json | 2 +- .../metrics/test/acceptance/metrics_tests.js | 15 ++++++-- package-lock.json | 2 +- .../clsi/app/js/ProjectPersistenceManager.js | 6 ++- services/clsi/app/js/UrlCache.js | 37 +++++++++++++++++-- services/clsi/app/js/UrlFetcher.js | 10 ++++- .../unit/js/ProjectPersistenceManagerTests.js | 4 +- services/clsi/test/unit/js/UrlCacheTests.js | 3 ++ 9 files changed, 70 insertions(+), 16 deletions(-) diff --git a/libraries/metrics/index.js b/libraries/metrics/index.js index eeb5a94978..b711ea4a49 100644 --- a/libraries/metrics/index.js +++ b/libraries/metrics/index.js @@ -121,12 +121,13 @@ class Timer { this.buckets = buckets } - done() { + // any labels passed into the done method override labels from constructor + done(labels = {}) { const timeSpan = new Date() - this.start if (this.buckets) { - histogram(this.key, timeSpan, this.buckets, this.labels) + histogram(this.key, timeSpan, this.buckets, { ...this.labels, ...labels }) } else { - timing(this.key, timeSpan, this.sampleRate, this.labels) + timing(this.key, timeSpan, this.sampleRate, { ...this.labels, ...labels }) } return timeSpan } diff --git a/libraries/metrics/package.json b/libraries/metrics/package.json index cc1a927d17..e88fdd9367 100644 --- a/libraries/metrics/package.json +++ b/libraries/metrics/package.json @@ -1,6 +1,6 @@ { "name": "@overleaf/metrics", - "version": "4.1.0", + "version": "4.2.0", "description": "A drop-in metrics and monitoring module for node.js apps", "repository": { "type": "git", diff --git a/libraries/metrics/test/acceptance/metrics_tests.js b/libraries/metrics/test/acceptance/metrics_tests.js index 1a5b78103e..307cabc56c 100644 --- a/libraries/metrics/test/acceptance/metrics_tests.js +++ b/libraries/metrics/test/acceptance/metrics_tests.js @@ -94,11 +94,16 @@ describe('Metrics module', function () { beforeEach('collect timings', async function () { const buckets = [10, 100, 1000] for (const duration of [1, 1, 1, 15, 15, 15, 105, 105, 105]) { - const withBuckets = new Metrics.Timer('height', 1, {}, buckets) - const withOutBuckets = new Metrics.Timer('depth', 1, {}) + const withBuckets = new Metrics.Timer( + 'height', + 1, + { label_1: 'a' }, + buckets + ) + const withOutBuckets = new Metrics.Timer('depth', 1, { label_2: 'b' }) await sleep(duration) withBuckets.done() - withOutBuckets.done() + withOutBuckets.done({ label_3: 'c' }) } }) @@ -109,6 +114,8 @@ describe('Metrics module', function () { 1000: 9, '+Inf': 9, }) + const labelNames = await getMetric('histogram_height').labelNames + expect(labelNames).to.deep.equal(['label_1']) }) it('without buckets', async function () { @@ -121,6 +128,8 @@ describe('Metrics module', function () { 0.99: 105, 0.999: 105, }) + const labelNames = await getMetric('timer_depth').labelNames + expect(labelNames).to.deep.equal(['label_2', 'label_3']) }) }) diff --git a/package-lock.json b/package-lock.json index 3627e57436..73319ee546 100644 --- a/package-lock.json +++ b/package-lock.json @@ -295,7 +295,7 @@ }, "libraries/metrics": { "name": "@overleaf/metrics", - "version": "4.1.0", + "version": "4.2.0", "dependencies": { "@google-cloud/opentelemetry-cloud-trace-exporter": "^2.1.0", "@google-cloud/profiler": "^6.0.0", diff --git a/services/clsi/app/js/ProjectPersistenceManager.js b/services/clsi/app/js/ProjectPersistenceManager.js index 21348e1d7e..52f61840c7 100644 --- a/services/clsi/app/js/ProjectPersistenceManager.js +++ b/services/clsi/app/js/ProjectPersistenceManager.js @@ -124,6 +124,7 @@ module.exports = ProjectPersistenceManager = { projectId => callback => ProjectPersistenceManager.clearProjectFromCache( projectId, + { reason: 'expired' }, function (err) { if (err != null) { logger.error({ err, projectId }, 'error clearing project') @@ -156,6 +157,7 @@ module.exports = ProjectPersistenceManager = { } return ProjectPersistenceManager.clearProjectFromCache( projectId, + { reason: 'cleared' }, function (error) { if (error != null) { return callback(error) @@ -166,12 +168,12 @@ module.exports = ProjectPersistenceManager = { }) }, - clearProjectFromCache(projectId, callback) { + clearProjectFromCache(projectId, options, callback) { if (callback == null) { callback = function () {} } logger.debug({ projectId }, 'clearing project from cache') - return UrlCache.clearProject(projectId, function (error) { + return UrlCache.clearProject(projectId, options, function (error) { if (error != null) { logger.err({ error, projectId }, 'error clearing project from cache') return callback(error) diff --git a/services/clsi/app/js/UrlCache.js b/services/clsi/app/js/UrlCache.js index 919479b096..ae8eae6b2a 100644 --- a/services/clsi/app/js/UrlCache.js +++ b/services/clsi/app/js/UrlCache.js @@ -15,6 +15,7 @@ const Settings = require('@overleaf/settings') const fs = require('fs') const Path = require('path') const { callbackify } = require('util') +const Metrics = require('./Metrics') const PENDING_DOWNLOADS = new Map() @@ -30,11 +31,16 @@ function getCachePath(projectId, url, lastModified) { return Path.join(getProjectDir(projectId), key) } -async function clearProject(projectId) { +async function clearProject(projectId, options) { + const timer = new Metrics.Timer('url_cache', { + status: options?.reason || 'unknown', + path: 'delete', + }) await fs.promises.rm(getProjectDir(projectId), { force: true, recursive: true, }) + timer.done() } async function createProjectDir(projectId) { @@ -44,15 +50,40 @@ async function createProjectDir(projectId) { async function downloadUrlToFile(projectId, url, destPath, lastModified) { const cachePath = getCachePath(projectId, url, lastModified) try { + const timer = new Metrics.Timer('url_cache', { + status: 'cache-hit', + path: 'copy', + }) await fs.promises.copyFile(cachePath, destPath) + // the metric is only updated if the file is present in the cache + timer.done() return } catch (e) { if (e.code !== 'ENOENT') { throw e } } - await download(url, cachePath) - await fs.promises.copyFile(cachePath, destPath) + // time the download + { + const timer = new Metrics.Timer('url_cache', { + status: 'cache-miss', + path: 'download', + }) + try { + await download(url, cachePath) + } finally { + timer.done() + } + } + // time the file copy + { + const timer = new Metrics.Timer('url_cache', { + status: 'cache-miss', + path: 'copy', + }) + await fs.promises.copyFile(cachePath, destPath) + timer.done() + } } async function download(url, cachePath) { diff --git a/services/clsi/app/js/UrlFetcher.js b/services/clsi/app/js/UrlFetcher.js index a580da44fe..efae8b6777 100644 --- a/services/clsi/app/js/UrlFetcher.js +++ b/services/clsi/app/js/UrlFetcher.js @@ -4,15 +4,21 @@ const Settings = require('@overleaf/settings') const { fetchStream } = require('@overleaf/fetch-utils') const { URL } = require('url') const { pipeline } = require('stream/promises') +const Metrics = require('./Metrics') async function pipeUrlToFileWithRetry(url, filePath) { let remainingAttempts = 3 let lastErr while (remainingAttempts-- > 0) { + const timer = new Metrics.Timer('url_fetcher', { + path: lastErr ? ' retry' : 'fetch', + }) try { await pipeUrlToFile(url, filePath) + timer.done({ status: 'success' }) return } catch (err) { + timer.done({ status: 'error' }) logger.warn( { err, url, filePath, remainingAttempts }, 'error downloading url' @@ -38,8 +44,10 @@ async function pipeUrlToFile(url, filePath) { const atomicWrite = filePath + '~' try { - await pipeline(stream, fs.createWriteStream(atomicWrite)) + const output = fs.createWriteStream(atomicWrite) + await pipeline(stream, output) await fs.promises.rename(atomicWrite, filePath) + Metrics.count('UrlFetcher.downloaded_bytes', output.bytesWritten) } catch (err) { try { await fs.promises.unlink(atomicWrite) diff --git a/services/clsi/test/unit/js/ProjectPersistenceManagerTests.js b/services/clsi/test/unit/js/ProjectPersistenceManagerTests.js index 10fe389dbf..c5f30b1ea9 100644 --- a/services/clsi/test/unit/js/ProjectPersistenceManagerTests.js +++ b/services/clsi/test/unit/js/ProjectPersistenceManagerTests.js @@ -97,7 +97,7 @@ describe('ProjectPersistenceManager', function () { .callsArgWith(0, null, this.project_ids) this.ProjectPersistenceManager.clearProjectFromCache = sinon .stub() - .callsArg(1) + .callsArg(2) this.CompileManager.clearExpiredProjects = sinon.stub().callsArg(1) return this.ProjectPersistenceManager.clearExpiredProjects(this.callback) }) @@ -120,7 +120,7 @@ describe('ProjectPersistenceManager', function () { this.ProjectPersistenceManager._clearProjectFromDatabase = sinon .stub() .callsArg(1) - this.UrlCache.clearProject = sinon.stub().callsArg(1) + this.UrlCache.clearProject = sinon.stub().callsArg(2) this.CompileManager.clearProject = sinon.stub().callsArg(2) return this.ProjectPersistenceManager.clearProject( this.project_id, diff --git a/services/clsi/test/unit/js/UrlCacheTests.js b/services/clsi/test/unit/js/UrlCacheTests.js index 37dc3bec80..a5c1e60c0a 100644 --- a/services/clsi/test/unit/js/UrlCacheTests.js +++ b/services/clsi/test/unit/js/UrlCacheTests.js @@ -29,6 +29,9 @@ describe('UrlCache', function () { '@overleaf/settings': (this.Settings = { path: { clsiCacheDir: '/cache/dir' }, }), + '@overleaf/metrics': { + Timer: sinon.stub().returns({ done: sinon.stub() }), + }, fs: (this.fs = { promises: { rm: sinon.stub().resolves(),