From e8e31dbdb5dfdc760e5c3302df7ea70e035e231b Mon Sep 17 00:00:00 2001 From: Eric Mc Sween <5454374+emcsween@users.noreply.github.com> Date: Tue, 2 Jul 2024 09:55:35 -0400 Subject: [PATCH] Merge pull request #19041 from overleaf/em-docupdater-block-project Endpoint for blocking projects from being loaded in docupdater GitOrigin-RevId: 87d79a0b8ccfa0ed46fbf0c198e8a405c1c1151f --- services/document-updater/app.js | 3 + .../document-updater/app/js/HttpController.js | 31 +++ .../document-updater/app/js/RedisManager.js | 179 ++++++++++++------ .../config/settings.defaults.js | 3 + .../unit/js/RedisManager/RedisManagerTests.js | 37 +++- 5 files changed, 191 insertions(+), 62 deletions(-) diff --git a/services/document-updater/app.js b/services/document-updater/app.js index 03cc12ec82..420daddafc 100644 --- a/services/document-updater/app.js +++ b/services/document-updater/app.js @@ -184,6 +184,9 @@ app.delete( HttpController.deleteComment ) +app.post('/project/:project_id/block', HttpController.blockProject) +app.post('/project/:project_id/unblock', HttpController.unblockProject) + app.get('/flush_all_projects', HttpController.flushAllProjects) app.get('/flush_queued_projects', HttpController.flushQueuedProjects) diff --git a/services/document-updater/app/js/HttpController.js b/services/document-updater/app/js/HttpController.js index b847a95e67..92f5ab6daa 100644 --- a/services/document-updater/app/js/HttpController.js +++ b/services/document-updater/app/js/HttpController.js @@ -443,6 +443,35 @@ function flushQueuedProjects(req, res, next) { }) } +/** + * Block a project from getting loaded in docupdater + * + * The project is blocked only if it's not already loaded in docupdater. The + * response indicates whether the project has been blocked or not. + */ +function blockProject(req, res, next) { + const projectId = req.params.project_id + RedisManager.blockProject(projectId, (err, blocked) => { + if (err) { + return next(err) + } + res.json({ blocked }) + }) +} + +/** + * Unblock a project + */ +function unblockProject(req, res, next) { + const projectId = req.params.project_id + RedisManager.unblockProject(projectId, (err, wasBlocked) => { + if (err) { + return next(err) + } + res.json({ wasBlocked }) + }) +} + module.exports = { getDoc, peekDoc, @@ -462,4 +491,6 @@ module.exports = { resyncProjectHistory, flushAllProjects, flushQueuedProjects, + blockProject, + unblockProject, } diff --git a/services/document-updater/app/js/RedisManager.js b/services/document-updater/app/js/RedisManager.js index fa08a63f36..40ce658b9c 100644 --- a/services/document-updater/app/js/RedisManager.js +++ b/services/document-updater/app/js/RedisManager.js @@ -16,6 +16,7 @@ const { docIsTooLarge } = require('./Limits') // after 30 seconds. We can't let any errors in the rest of the stack // hold us up, and need to bail out quickly if there is a problem. const MAX_REDIS_REQUEST_LENGTH = 5000 // 5 seconds +const PROJECT_BLOCK_TTL_SECS = 30 // Make times easy to read const minutes = 60 // seconds for Redis expire @@ -77,70 +78,84 @@ const RedisManager = { logger.error({ err: error, docId, projectId }, error.message) return callback(error) } - RedisManager.setHistoryRangesSupportFlag( - docId, - historyRangesSupport, - err => { - if (err) { - return callback(err) - } - // update docsInProject set before writing doc contents - rclient.sadd( - keys.docsInProject({ project_id: projectId }), - docId, - error => { - if (error) return callback(error) - if (!pathname) { - metrics.inc('pathname', 1, { - path: 'RedisManager.setDoc', - status: pathname === '' ? 'zero-length' : 'undefined', - }) - } - - // Make sure that this MULTI operation only operates on doc - // specific keys, i.e. keys that have the doc id in curly braces. - // The curly braces identify a hash key for Redis and ensures that - // the MULTI's operations are all done on the same node in a - // cluster environment. - const multi = rclient.multi() - multi.mset({ - [keys.docLines({ doc_id: docId })]: docLines, - [keys.projectKey({ doc_id: docId })]: projectId, - [keys.docVersion({ doc_id: docId })]: version, - [keys.docHash({ doc_id: docId })]: docHash, - [keys.ranges({ doc_id: docId })]: ranges, - [keys.pathname({ doc_id: docId })]: pathname, - [keys.projectHistoryId({ doc_id: docId })]: projectHistoryId, - }) - if (historyRangesSupport) { - multi.del(keys.resolvedCommentIds({ doc_id: docId })) - if (resolvedCommentIds.length > 0) { - multi.sadd( - keys.resolvedCommentIds({ doc_id: docId }), - ...resolvedCommentIds - ) - } - } - multi.exec(err => { - if (err) { - callback( - OError.tag(err, 'failed to write doc to Redis in MULTI', { - previousErrors: err.previousErrors.map(e => ({ - name: e.name, - message: e.message, - command: e.command, - })), - }) - ) - } else { - callback() - } - }) - } + // update docsInProject set before writing doc contents + const multi = rclient.multi() + multi.exists(keys.projectBlock({ project_id: projectId })) + multi.sadd(keys.docsInProject({ project_id: projectId }), docId) + multi.exec((err, reply) => { + if (err) { + return callback(err) + } + const projectBlocked = reply[0] === 1 + if (projectBlocked) { + // We don't clean up the spurious docId added in the docsInProject + // set. There is a risk that the docId was successfully added by a + // concurrent process. This set is used when unloading projects. An + // extra docId will not prevent the project from being uploaded, but + // a missing docId means that the doc might stay in Redis forever. + return callback( + new OError('Project blocked from loading docs', { projectId }) ) } - ) + + RedisManager.setHistoryRangesSupportFlag( + docId, + historyRangesSupport, + err => { + if (err) { + return callback(err) + } + + if (!pathname) { + metrics.inc('pathname', 1, { + path: 'RedisManager.setDoc', + status: pathname === '' ? 'zero-length' : 'undefined', + }) + } + + // Make sure that this MULTI operation only operates on doc + // specific keys, i.e. keys that have the doc id in curly braces. + // The curly braces identify a hash key for Redis and ensures that + // the MULTI's operations are all done on the same node in a + // cluster environment. + const multi = rclient.multi() + multi.mset({ + [keys.docLines({ doc_id: docId })]: docLines, + [keys.projectKey({ doc_id: docId })]: projectId, + [keys.docVersion({ doc_id: docId })]: version, + [keys.docHash({ doc_id: docId })]: docHash, + [keys.ranges({ doc_id: docId })]: ranges, + [keys.pathname({ doc_id: docId })]: pathname, + [keys.projectHistoryId({ doc_id: docId })]: projectHistoryId, + }) + if (historyRangesSupport) { + multi.del(keys.resolvedCommentIds({ doc_id: docId })) + if (resolvedCommentIds.length > 0) { + multi.sadd( + keys.resolvedCommentIds({ doc_id: docId }), + ...resolvedCommentIds + ) + } + } + multi.exec(err => { + if (err) { + callback( + OError.tag(err, 'failed to write doc to Redis in MULTI', { + previousErrors: err.previousErrors.map(e => ({ + name: e.name, + message: e.message, + command: e.command, + })), + }) + ) + } else { + callback() + } + }) + } + ) + }) }) }, @@ -684,6 +699,48 @@ const RedisManager = { } }, + blockProject(projectId, callback) { + // Make sure that this MULTI operation only operates on project + // specific keys, i.e. keys that have the project id in curly braces. + // The curly braces identify a hash key for Redis and ensures that + // the MULTI's operations are all done on the same node in a + // cluster environment. + const multi = rclient.multi() + multi.setex( + keys.projectBlock({ project_id: projectId }), + PROJECT_BLOCK_TTL_SECS, + '1' + ) + multi.scard(keys.docsInProject({ project_id: projectId })) + multi.exec((err, reply) => { + if (err) { + return callback(err) + } + const docsInProject = reply[1] + if (docsInProject > 0) { + // Too late to lock the project + rclient.del(keys.projectBlock({ project_id: projectId }), err => { + if (err) { + return callback(err) + } + callback(null, false) + }) + } else { + callback(null, true) + } + }) + }, + + unblockProject(projectId, callback) { + rclient.del(keys.projectBlock({ project_id: projectId }), (err, reply) => { + if (err) { + return callback(err) + } + const wasBlocked = reply === 1 + callback(null, wasBlocked) + }) + }, + _serializeRanges(ranges, callback) { let jsonRanges = JSON.stringify(ranges) if (jsonRanges && jsonRanges.length > MAX_RANGES_SIZE) { diff --git a/services/document-updater/config/settings.defaults.js b/services/document-updater/config/settings.defaults.js index c3e7526a98..d8c2ada6d3 100755 --- a/services/document-updater/config/settings.defaults.js +++ b/services/document-updater/config/settings.defaults.js @@ -128,6 +128,9 @@ module.exports = { projectState({ project_id: projectId }) { return `ProjectState:{${projectId}}` }, + projectBlock({ project_id: projectId }) { + return `ProjectBlock:{${projectId}}` + }, pendingUpdates({ doc_id: docId }) { return `PendingUpdates:{${docId}}` }, diff --git a/services/document-updater/test/unit/js/RedisManager/RedisManagerTests.js b/services/document-updater/test/unit/js/RedisManager/RedisManagerTests.js index 7e0e2fcea8..37591d6cfe 100644 --- a/services/document-updater/test/unit/js/RedisManager/RedisManagerTests.js +++ b/services/document-updater/test/unit/js/RedisManager/RedisManagerTests.js @@ -1,4 +1,5 @@ const sinon = require('sinon') +const { expect } = require('chai') const SandboxedModule = require('sandboxed-module') const Errors = require('../../../../app/js/Errors') const crypto = require('crypto') @@ -54,6 +55,9 @@ describe('RedisManager', function () { projectState({ project_id: projectId }) { return `ProjectState:${projectId}` }, + projectBlock({ project_id: projectId }) { + return `ProjectBlock:${projectId}` + }, unflushedTime({ doc_id: docId }) { return `UnflushedTime:${docId}` }, @@ -782,6 +786,8 @@ describe('RedisManager', function () { this.multi.mset = sinon.stub() this.multi.sadd = sinon.stub() this.multi.del = sinon.stub() + this.multi.exists = sinon.stub() + this.multi.exec.onCall(0).yields(null, [0]) this.rclient.sadd = sinon.stub().yields() this.lines = ['one', 'two', 'three', 'これは'] this.version = 42 @@ -825,7 +831,7 @@ describe('RedisManager', function () { }) it('should add the docId to the project set', function () { - this.rclient.sadd + this.multi.sadd .calledWith(`DocsIn:${this.project_id}`, this.docId) .should.equal(true) }) @@ -975,6 +981,35 @@ describe('RedisManager', function () { ) }) }) + + describe('when the project is blocked', function () { + beforeEach(function (done) { + this.multi.exec.onCall(0).yields(null, [1]) + this.RedisManager.putDocInMemory( + this.project_id, + this.docId, + this.lines, + this.version, + this.ranges, + this.resolvedCommentIds, + this.pathname, + this.projectHistoryId, + this.historyRangesSupport, + err => { + this.error = err + done() + } + ) + }) + + it('should throw an error', function () { + expect(this.error.message).to.equal('Project blocked from loading docs') + }) + + it('should not store the doc', function () { + expect(this.multi.mset).to.not.have.been.called + }) + }) }) describe('removeDocFromMemory', function () {