From b257625f0764ef38861e01deae7e58e830bd0586 Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Fri, 8 Nov 2024 10:26:55 +0100 Subject: [PATCH] Merge pull request #21748 from overleaf/jpa-publish-mongo-utils [copybara] publish mongo-utils library GitOrigin-RevId: 0ef269f186ed645f7bcb9d948a4d1e591c687b31 --- libraries/mongo-utils/.dockerignore | 1 + libraries/mongo-utils/.gitignore | 3 + libraries/mongo-utils/.nvmrc | 1 + libraries/mongo-utils/batchedUpdate.js | 282 +++++++++++++++++++++++++ libraries/mongo-utils/buildscript.txt | 10 + libraries/mongo-utils/index.js | 0 libraries/mongo-utils/package.json | 30 +++ libraries/mongo-utils/test/setup.js | 11 + libraries/mongo-utils/tsconfig.json | 7 + 9 files changed, 345 insertions(+) create mode 100644 libraries/mongo-utils/.dockerignore create mode 100644 libraries/mongo-utils/.gitignore create mode 100644 libraries/mongo-utils/.nvmrc create mode 100644 libraries/mongo-utils/batchedUpdate.js create mode 100644 libraries/mongo-utils/buildscript.txt create mode 100644 libraries/mongo-utils/index.js create mode 100644 libraries/mongo-utils/package.json create mode 100644 libraries/mongo-utils/test/setup.js create mode 100644 libraries/mongo-utils/tsconfig.json diff --git a/libraries/mongo-utils/.dockerignore b/libraries/mongo-utils/.dockerignore new file mode 100644 index 0000000000..c2658d7d1b --- /dev/null +++ b/libraries/mongo-utils/.dockerignore @@ -0,0 +1 @@ +node_modules/ diff --git a/libraries/mongo-utils/.gitignore b/libraries/mongo-utils/.gitignore new file mode 100644 index 0000000000..edb0f85350 --- /dev/null +++ b/libraries/mongo-utils/.gitignore @@ -0,0 +1,3 @@ + +# managed by monorepo$ bin/update_build_scripts +.npmrc diff --git a/libraries/mongo-utils/.nvmrc b/libraries/mongo-utils/.nvmrc new file mode 100644 index 0000000000..17719ce25a --- /dev/null +++ b/libraries/mongo-utils/.nvmrc @@ -0,0 +1 @@ +18.20.4 diff --git a/libraries/mongo-utils/batchedUpdate.js b/libraries/mongo-utils/batchedUpdate.js new file mode 100644 index 0000000000..89965e4aa9 --- /dev/null +++ b/libraries/mongo-utils/batchedUpdate.js @@ -0,0 +1,282 @@ +// @ts-check +/* eslint-disable no-console */ +const { ObjectId, ReadPreference } = require('mongodb') + +const READ_PREFERENCE_SECONDARY = + process.env.MONGO_HAS_SECONDARIES === 'true' + ? ReadPreference.secondary.mode + : ReadPreference.secondaryPreferred.mode + +const ONE_MONTH_IN_MS = 1000 * 60 * 60 * 24 * 31 +let ID_EDGE_PAST +const ID_EDGE_FUTURE = objectIdFromMs(Date.now() + 1000) +let BATCH_DESCENDING +let BATCH_SIZE +let VERBOSE_LOGGING +let BATCH_RANGE_START +let BATCH_RANGE_END +let BATCH_MAX_TIME_SPAN_IN_MS + +/** + * @typedef {import("mongodb").Collection} Collection + * @typedef {import("mongodb-legacy").Collection} LegacyCollection + * @typedef {import("mongodb").Document} Document + * @typedef {import("mongodb").FindOptions} FindOptions + * @typedef {import("mongodb").UpdateFilter} UpdateDocument + */ + +/** + * @typedef {Object} BatchedUpdateOptions + * @property {string} [BATCH_DESCENDING] + * @property {string} [BATCH_LAST_ID] + * @property {string} [BATCH_MAX_TIME_SPAN_IN_MS] + * @property {string} [BATCH_RANGE_END] + * @property {string} [BATCH_RANGE_START] + * @property {string} [BATCH_SIZE] + * @property {string} [VERBOSE_LOGGING] + */ + +/** + * @param {BatchedUpdateOptions} options + */ +function refreshGlobalOptionsForBatchedUpdate(options = {}) { + options = Object.assign({}, options, process.env) + + BATCH_DESCENDING = options.BATCH_DESCENDING === 'true' + BATCH_SIZE = parseInt(options.BATCH_SIZE || '1000', 10) || 1000 + VERBOSE_LOGGING = options.VERBOSE_LOGGING === 'true' + if (options.BATCH_LAST_ID) { + BATCH_RANGE_START = new ObjectId(options.BATCH_LAST_ID) + } else if (options.BATCH_RANGE_START) { + BATCH_RANGE_START = new ObjectId(options.BATCH_RANGE_START) + } else { + if (BATCH_DESCENDING) { + BATCH_RANGE_START = ID_EDGE_FUTURE + } else { + BATCH_RANGE_START = ID_EDGE_PAST + } + } + BATCH_MAX_TIME_SPAN_IN_MS = parseInt( + options.BATCH_MAX_TIME_SPAN_IN_MS || ONE_MONTH_IN_MS.toString(), + 10 + ) + if (options.BATCH_RANGE_END) { + BATCH_RANGE_END = new ObjectId(options.BATCH_RANGE_END) + } else { + if (BATCH_DESCENDING) { + BATCH_RANGE_END = ID_EDGE_PAST + } else { + BATCH_RANGE_END = ID_EDGE_FUTURE + } + } +} + +/** + * @param {Collection | LegacyCollection} collection + * @param {Document} query + * @param {ObjectId} start + * @param {ObjectId} end + * @param {Document} projection + * @param {FindOptions} findOptions + * @return {Promise>} + */ +async function getNextBatch( + collection, + query, + start, + end, + projection, + findOptions +) { + if (BATCH_DESCENDING) { + query._id = { + $gt: end, + $lte: start, + } + } else { + query._id = { + $gt: start, + $lte: end, + } + } + return await collection + .find(query, findOptions) + .project(projection) + .sort({ _id: BATCH_DESCENDING ? -1 : 1 }) + .limit(BATCH_SIZE) + .toArray() +} + +/** + * @param {Collection | LegacyCollection} collection + * @param {Array} nextBatch + * @param {UpdateDocument} update + * @return {Promise} + */ +async function performUpdate(collection, nextBatch, update) { + await collection.updateMany( + { _id: { $in: nextBatch.map(entry => entry._id) } }, + update + ) +} + +/** + * @param {number} ms + * @return {ObjectId} + */ +function objectIdFromMs(ms) { + return ObjectId.createFromTime(ms / 1000) +} + +/** + * @param {ObjectId} id + * @return {number} + */ +function getMsFromObjectId(id) { + return id.getTimestamp().getTime() +} + +/** + * @param {ObjectId} start + * @return {ObjectId} + */ +function getNextEnd(start) { + let end + if (BATCH_DESCENDING) { + end = objectIdFromMs(getMsFromObjectId(start) - BATCH_MAX_TIME_SPAN_IN_MS) + if (getMsFromObjectId(end) <= getMsFromObjectId(BATCH_RANGE_END)) { + end = BATCH_RANGE_END + } + } else { + end = objectIdFromMs(getMsFromObjectId(start) + BATCH_MAX_TIME_SPAN_IN_MS) + if (getMsFromObjectId(end) >= getMsFromObjectId(BATCH_RANGE_END)) { + end = BATCH_RANGE_END + } + } + return end +} + +/** + * @param {Collection | LegacyCollection} collection + * @return {Promise} + */ +async function getIdEdgePast(collection) { + const [first] = await collection + .find({}) + .project({ _id: 1 }) + .sort({ _id: 1 }) + .limit(1) + .toArray() + if (!first) return null + // Go one second further into the past in order to include the first entry via + // first._id > ID_EDGE_PAST + return objectIdFromMs(Math.max(0, getMsFromObjectId(first._id) - 1000)) +} + +/** + * @param {Collection | LegacyCollection} collection + * @param {Document} query + * @param {UpdateDocument | ((batch: Array) => Promise)} update + * @param {Document} [projection] + * @param {FindOptions} [findOptions] + * @param {BatchedUpdateOptions} [batchedUpdateOptions] + */ +async function batchedUpdate( + collection, + query, + update, + projection, + findOptions, + batchedUpdateOptions +) { + ID_EDGE_PAST = await getIdEdgePast(collection) + if (!ID_EDGE_PAST) { + console.warn( + `The collection ${collection.collectionName} appears to be empty.` + ) + return 0 + } + refreshGlobalOptionsForBatchedUpdate(batchedUpdateOptions) + + findOptions = findOptions || {} + findOptions.readPreference = READ_PREFERENCE_SECONDARY + + projection = projection || { _id: 1 } + let nextBatch + let updated = 0 + let start = BATCH_RANGE_START + + while (start !== BATCH_RANGE_END) { + let end = getNextEnd(start) + nextBatch = await getNextBatch( + collection, + query, + start, + end, + projection, + findOptions + ) + if (nextBatch.length > 0) { + end = nextBatch[nextBatch.length - 1]._id + updated += nextBatch.length + + if (VERBOSE_LOGGING) { + console.log( + `Running update on batch with ids ${JSON.stringify( + nextBatch.map(entry => entry._id) + )}` + ) + } else { + console.error(`Running update on batch ending ${end}`) + } + + if (typeof update === 'function') { + await update(nextBatch) + } else { + await performUpdate(collection, nextBatch, update) + } + } + console.error(`Completed batch ending ${end}`) + start = end + } + return updated +} + +/** + * @param {Collection | LegacyCollection} collection + * @param {Document} query + * @param {UpdateDocument | ((batch: Array) => Promise)} update + * @param {Document} [projection] + * @param {FindOptions} [findOptions] + * @param {BatchedUpdateOptions} [batchedUpdateOptions] + */ +function batchedUpdateWithResultHandling( + collection, + query, + update, + projection, + findOptions, + batchedUpdateOptions +) { + batchedUpdate( + collection, + query, + update, + projection, + findOptions, + batchedUpdateOptions + ) + .then(processed => { + console.error({ processed }) + process.exit(0) + }) + .catch(error => { + console.error({ error }) + process.exit(1) + }) +} + +module.exports = { + batchedUpdate, + batchedUpdateWithResultHandling, +} diff --git a/libraries/mongo-utils/buildscript.txt b/libraries/mongo-utils/buildscript.txt new file mode 100644 index 0000000000..9cebfa8dfc --- /dev/null +++ b/libraries/mongo-utils/buildscript.txt @@ -0,0 +1,10 @@ +mongo-utils +--dependencies=None +--docker-repos=gcr.io/overleaf-ops +--env-add= +--env-pass-through= +--esmock-loader=False +--is-library=True +--node-version=18.20.4 +--public-repo=False +--script-version=4.5.0 diff --git a/libraries/mongo-utils/index.js b/libraries/mongo-utils/index.js new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libraries/mongo-utils/package.json b/libraries/mongo-utils/package.json new file mode 100644 index 0000000000..8bbdad49ba --- /dev/null +++ b/libraries/mongo-utils/package.json @@ -0,0 +1,30 @@ +{ + "name": "@overleaf/mongo-utils", + "version": "0.0.1", + "description": "utilities to help working with mongo", + "main": "index.js", + "scripts": { + "test": "npm run lint && npm run format && npm run types:check && npm run test:unit", + "test:unit": "mocha --exit test/**/*.{js,cjs}", + "lint": "eslint --ext .js --ext .cjs --ext .ts --max-warnings 0 --format unix .", + "lint:fix": "eslint --fix --ext .js --ext .cjs --ext .ts .", + "format": "prettier --list-different $PWD/'**/*.{js,cjs,ts}'", + "format:fix": "prettier --write $PWD/'**/*.{js,cjs,ts}'", + "test:ci": "npm run test:unit", + "types:check": "tsc --noEmit" + }, + "author": "Overleaf (https://www.overleaf.com)", + "license": "AGPL-3.0-only", + "dependencies": { + "mongodb": "6.7.0", + "mongodb-legacy": "6.1.0" + }, + "devDependencies": { + "chai": "^4.3.6", + "mocha": "^10.2.0", + "sandboxed-module": "^2.0.4", + "sinon": "^9.2.4", + "sinon-chai": "^3.7.0", + "typescript": "^5.0.4" + } +} diff --git a/libraries/mongo-utils/test/setup.js b/libraries/mongo-utils/test/setup.js new file mode 100644 index 0000000000..78e563f853 --- /dev/null +++ b/libraries/mongo-utils/test/setup.js @@ -0,0 +1,11 @@ +const chai = require('chai') +const sinonChai = require('sinon-chai') +const SandboxedModule = require('sandboxed-module') + +// Chai configuration +chai.should() +chai.use(sinonChai) + +SandboxedModule.configure({ + globals: { Buffer, JSON, console, process }, +}) diff --git a/libraries/mongo-utils/tsconfig.json b/libraries/mongo-utils/tsconfig.json new file mode 100644 index 0000000000..d43bb2470a --- /dev/null +++ b/libraries/mongo-utils/tsconfig.json @@ -0,0 +1,7 @@ +{ + "extends": "../../tsconfig.backend.json", + "include": [ + "**/*.js", + "**/*.cjs" + ] +}