From 5d472e9b383dcf35ef478c61d2e9f29bd30fdce3 Mon Sep 17 00:00:00 2001 From: Liangjun Song <146005915+adai26@users.noreply.github.com> Date: Mon, 5 Aug 2024 13:14:56 +0200 Subject: [PATCH] limit the number of concurrent compile requests in clsi (#19717) GitOrigin-RevId: 17909a4dd0717ea4a75288f734ddef19c7d6592e --- services/clsi/app/js/CompileController.js | 5 +- services/clsi/app/js/Errors.js | 2 + services/clsi/app/js/LockManager.js | 21 ++++- services/clsi/config/settings.defaults.js | 4 + .../test/unit/js/CompileControllerTests.js | 31 +++++++ .../clsi/test/unit/js/LockManagerTests.js | 84 +++++++++++++++++-- 6 files changed, 137 insertions(+), 10 deletions(-) diff --git a/services/clsi/app/js/CompileController.js b/services/clsi/app/js/CompileController.js index 9e12cf28b7..3da884eed7 100644 --- a/services/clsi/app/js/CompileController.js +++ b/services/clsi/app/js/CompileController.js @@ -48,7 +48,10 @@ function compile(req, res, next) { }, 'files out of sync, please retry' ) - } else if (error?.code === 'EPIPE') { + } else if ( + error?.code === 'EPIPE' || + error instanceof Errors.TooManyCompileRequestsError + ) { // docker returns EPIPE when shutting down code = 503 // send 503 Unavailable response status = 'unavailable' diff --git a/services/clsi/app/js/Errors.js b/services/clsi/app/js/Errors.js index 4e23a833d3..5c5fd3745a 100644 --- a/services/clsi/app/js/Errors.js +++ b/services/clsi/app/js/Errors.js @@ -34,6 +34,7 @@ AlreadyCompilingError.prototype.__proto__ = Error.prototype class QueueLimitReachedError extends OError {} class TimedOutError extends OError {} class NoXrefTableError extends OError {} +class TooManyCompileRequestsError extends OError {} module.exports = Errors = { QueueLimitReachedError, @@ -42,4 +43,5 @@ module.exports = Errors = { FilesOutOfSyncError, AlreadyCompilingError, NoXrefTableError, + TooManyCompileRequestsError, } diff --git a/services/clsi/app/js/LockManager.js b/services/clsi/app/js/LockManager.js index 21abb43a2a..653691f7e8 100644 --- a/services/clsi/app/js/LockManager.js +++ b/services/clsi/app/js/LockManager.js @@ -2,6 +2,7 @@ const logger = require('@overleaf/logger') const Errors = require('./Errors') const RequestParser = require('./RequestParser') const Metrics = require('@overleaf/metrics') +const Settings = require('@overleaf/settings') // The lock timeout should be higher than the maximum end-to-end compile time. // Here, we use the maximum compile timeout plus 2 minutes. @@ -9,7 +10,7 @@ const LOCK_TIMEOUT_MS = RequestParser.MAX_TIMEOUT * 1000 + 120000 const LOCKS = new Map() -function acquire(key) { +function acquire(key, concurrencyLimitDryRun = true) { const currentLock = LOCKS.get(key) if (currentLock != null) { if (currentLock.isExpired()) { @@ -20,13 +21,29 @@ function acquire(key) { } } - Metrics.gauge('concurrent_compile_requests', LOCKS.size) + checkConcurrencyLimit(concurrencyLimitDryRun) const lock = new Lock(key) LOCKS.set(key, lock) return lock } +function checkConcurrencyLimit(dryRun) { + Metrics.gauge('concurrent_compile_requests', LOCKS.size) + + if (LOCKS.size <= Settings.compileConcurrencyLimit) { + return + } + + Metrics.inc('exceeded-compilier-concurrency-limit') + + if (!dryRun) { + throw new Errors.TooManyCompileRequestsError( + 'too many concurrent compile requests' + ) + } +} + class Lock { constructor(key) { this.key = key diff --git a/services/clsi/config/settings.defaults.js b/services/clsi/config/settings.defaults.js index eaf069867d..934ef6d8c8 100644 --- a/services/clsi/config/settings.defaults.js +++ b/services/clsi/config/settings.defaults.js @@ -1,4 +1,7 @@ const Path = require('path') +const os = require('os') + +const isPreEmptible = os.hostname().includes('pre-emp') module.exports = { compileSizeLimit: process.env.COMPILE_SIZE_LIMIT || '7mb', @@ -69,6 +72,7 @@ module.exports = { parseInt(process.env.PDF_CACHING_WORKER_POOL_SIZE, 10) || 4, pdfCachingWorkerPoolBackLogLimit: parseInt(process.env.PDF_CACHING_WORKER_POOL_BACK_LOG_LIMIT, 10) || 40, + compileConcurrencyLimit: isPreEmptible ? 32 : 64, } if (process.env.ALLOWED_COMPILE_GROUPS) { diff --git a/services/clsi/test/unit/js/CompileControllerTests.js b/services/clsi/test/unit/js/CompileControllerTests.js index 6a8793d1cf..ac807d212e 100644 --- a/services/clsi/test/unit/js/CompileControllerTests.js +++ b/services/clsi/test/unit/js/CompileControllerTests.js @@ -5,6 +5,7 @@ const modulePath = require('path').join( __dirname, '../../../app/js/CompileController' ) +const Errors = require('../../../app/js/Errors') function tryImageNameValidation(method, imageNameField) { describe('when allowedImages is set', function () { @@ -67,6 +68,7 @@ describe('CompileController', function () { Timer: sinon.stub().returns({ done: sinon.stub() }), }, './ProjectPersistenceManager': (this.ProjectPersistenceManager = {}), + './Errors': (this.Erros = Errors), }, }) this.Settings.externalUrl = 'http://www.example.com' @@ -312,6 +314,35 @@ describe('CompileController', function () { }) }) + describe('with too many compile requests error', function () { + beforeEach(function () { + const error = new Errors.TooManyCompileRequestsError( + 'too many concurrent compile requests' + ) + this.CompileManager.doCompileWithLock = sinon + .stub() + .callsArgWith(1, error, null) + this.CompileController.compile(this.req, this.res) + }) + + it('should return the JSON response with the error', function () { + this.res.status.calledWith(503).should.equal(true) + this.res.send + .calledWith({ + compile: { + status: 'unavailable', + error: 'too many concurrent compile requests', + outputUrlPrefix: '/zone/b', + outputFiles: [], + buildId: undefined, + stats: undefined, + timings: undefined, + }, + }) + .should.equal(true) + }) + }) + describe('when the request times out', function () { beforeEach(function () { this.error = new Error((this.message = 'container timed out')) diff --git a/services/clsi/test/unit/js/LockManagerTests.js b/services/clsi/test/unit/js/LockManagerTests.js index 772849ddbc..6048044912 100644 --- a/services/clsi/test/unit/js/LockManagerTests.js +++ b/services/clsi/test/unit/js/LockManagerTests.js @@ -1,12 +1,28 @@ const { expect } = require('chai') const sinon = require('sinon') -const LockManager = require('../../../app/js/LockManager') +const SandboxedModule = require('sandboxed-module') +const modulePath = require('path').join( + __dirname, + '../../../app/js/LockManager' +) const Errors = require('../../../app/js/Errors') describe('LockManager', function () { beforeEach(function () { this.key = '/local/compile/directory' this.clock = sinon.useFakeTimers() + this.LockManager = SandboxedModule.require(modulePath, { + requires: { + '@overleaf/metrics': (this.Metrics = { + inc: sinon.stub(), + gauge: sinon.stub(), + }), + '@overleaf/settings': (this.Settings = { + compileConcurrencyLimit: 5, + }), + './Errors': (this.Erros = Errors), + }, + }) }) afterEach(function () { @@ -15,7 +31,7 @@ describe('LockManager', function () { describe('when the lock is available', function () { it('the lock can be acquired', function () { - const lock = LockManager.acquire(this.key) + const lock = this.LockManager.acquire(this.key) expect(lock).to.exist lock.release() }) @@ -23,7 +39,7 @@ describe('LockManager', function () { describe('after the lock is acquired', function () { beforeEach(function () { - this.lock = LockManager.acquire(this.key) + this.lock = this.LockManager.acquire(this.key) }) afterEach(function () { @@ -33,13 +49,13 @@ describe('LockManager', function () { }) it("the lock can't be acquired again", function () { - expect(() => LockManager.acquire(this.key)).to.throw( + expect(() => this.LockManager.acquire(this.key)).to.throw( Errors.AlreadyCompilingError ) }) it('another lock can be acquired', function () { - const lock = LockManager.acquire('another key') + const lock = this.LockManager.acquire('another key') expect(lock).to.exist lock.release() }) @@ -47,14 +63,68 @@ describe('LockManager', function () { it('the lock can be acquired again after an expiry period', function () { // The expiry time is a little bit over 10 minutes. Let's wait 15 minutes. this.clock.tick(15 * 60 * 1000) - this.lock = LockManager.acquire(this.key) + this.lock = this.LockManager.acquire(this.key) expect(this.lock).to.exist }) it('the lock can be acquired again after it was released', function () { this.lock.release() - this.lock = LockManager.acquire(this.key) + this.lock = this.LockManager.acquire(this.key) expect(this.lock).to.exist }) }) + + describe('concurrency limit', function () { + it('dry run', function () { + for (let i = 0; i <= this.Settings.compileConcurrencyLimit; i++) { + this.LockManager.acquire('test_key' + i) + } + this.Metrics.inc + .calledWith('exceeded-compilier-concurrency-limit') + .should.equal(false) + this.LockManager.acquire( + 'test_key_' + (this.Settings.compileConcurrencyLimit + 1) + ) + this.Metrics.inc + .calledWith('exceeded-compilier-concurrency-limit') + .should.equal(true) + }) + + it('exceeding the limit', function () { + for (let i = 0; i <= this.Settings.compileConcurrencyLimit; i++) { + this.LockManager.acquire('test_key' + i, false) + } + this.Metrics.inc + .calledWith('exceeded-compilier-concurrency-limit') + .should.equal(false) + expect(() => + this.LockManager.acquire( + 'test_key_' + (this.Settings.compileConcurrencyLimit + 1), + false + ) + ).to.throw(Errors.TooManyCompileRequestsError) + + this.Metrics.inc + .calledWith('exceeded-compilier-concurrency-limit') + .should.equal(true) + }) + + it('within the limit', function () { + for (let i = 0; i <= this.Settings.compileConcurrencyLimit - 1; i++) { + this.LockManager.acquire('test_key' + i, false) + } + this.Metrics.inc + .calledWith('exceeded-compilier-concurrency-limit') + .should.equal(false) + + const lock = this.LockManager.acquire( + 'test_key_' + this.Settings.compileConcurrencyLimit, + false + ) + + expect(lock.key).to.equal( + 'test_key_' + this.Settings.compileConcurrencyLimit + ) + }) + }) })