Merge pull request #21748 from overleaf/jpa-publish-mongo-utils

[copybara] publish mongo-utils library

GitOrigin-RevId: 0ef269f186ed645f7bcb9d948a4d1e591c687b31
This commit is contained in:
Jakob Ackermann 2024-11-08 10:26:55 +01:00 committed by Copybot
parent 252533b2fd
commit b257625f07
9 changed files with 345 additions and 0 deletions

View file

@ -0,0 +1 @@
node_modules/

3
libraries/mongo-utils/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
# managed by monorepo$ bin/update_build_scripts
.npmrc

View file

@ -0,0 +1 @@
18.20.4

View file

@ -0,0 +1,282 @@
// @ts-check
/* eslint-disable no-console */
const { ObjectId, ReadPreference } = require('mongodb')
const READ_PREFERENCE_SECONDARY =
process.env.MONGO_HAS_SECONDARIES === 'true'
? ReadPreference.secondary.mode
: ReadPreference.secondaryPreferred.mode
const ONE_MONTH_IN_MS = 1000 * 60 * 60 * 24 * 31
let ID_EDGE_PAST
const ID_EDGE_FUTURE = objectIdFromMs(Date.now() + 1000)
let BATCH_DESCENDING
let BATCH_SIZE
let VERBOSE_LOGGING
let BATCH_RANGE_START
let BATCH_RANGE_END
let BATCH_MAX_TIME_SPAN_IN_MS
/**
* @typedef {import("mongodb").Collection} Collection
* @typedef {import("mongodb-legacy").Collection} LegacyCollection
* @typedef {import("mongodb").Document} Document
* @typedef {import("mongodb").FindOptions} FindOptions
* @typedef {import("mongodb").UpdateFilter<Document>} UpdateDocument
*/
/**
* @typedef {Object} BatchedUpdateOptions
* @property {string} [BATCH_DESCENDING]
* @property {string} [BATCH_LAST_ID]
* @property {string} [BATCH_MAX_TIME_SPAN_IN_MS]
* @property {string} [BATCH_RANGE_END]
* @property {string} [BATCH_RANGE_START]
* @property {string} [BATCH_SIZE]
* @property {string} [VERBOSE_LOGGING]
*/
/**
* @param {BatchedUpdateOptions} options
*/
function refreshGlobalOptionsForBatchedUpdate(options = {}) {
options = Object.assign({}, options, process.env)
BATCH_DESCENDING = options.BATCH_DESCENDING === 'true'
BATCH_SIZE = parseInt(options.BATCH_SIZE || '1000', 10) || 1000
VERBOSE_LOGGING = options.VERBOSE_LOGGING === 'true'
if (options.BATCH_LAST_ID) {
BATCH_RANGE_START = new ObjectId(options.BATCH_LAST_ID)
} else if (options.BATCH_RANGE_START) {
BATCH_RANGE_START = new ObjectId(options.BATCH_RANGE_START)
} else {
if (BATCH_DESCENDING) {
BATCH_RANGE_START = ID_EDGE_FUTURE
} else {
BATCH_RANGE_START = ID_EDGE_PAST
}
}
BATCH_MAX_TIME_SPAN_IN_MS = parseInt(
options.BATCH_MAX_TIME_SPAN_IN_MS || ONE_MONTH_IN_MS.toString(),
10
)
if (options.BATCH_RANGE_END) {
BATCH_RANGE_END = new ObjectId(options.BATCH_RANGE_END)
} else {
if (BATCH_DESCENDING) {
BATCH_RANGE_END = ID_EDGE_PAST
} else {
BATCH_RANGE_END = ID_EDGE_FUTURE
}
}
}
/**
* @param {Collection | LegacyCollection} collection
* @param {Document} query
* @param {ObjectId} start
* @param {ObjectId} end
* @param {Document} projection
* @param {FindOptions} findOptions
* @return {Promise<Array<Document>>}
*/
async function getNextBatch(
collection,
query,
start,
end,
projection,
findOptions
) {
if (BATCH_DESCENDING) {
query._id = {
$gt: end,
$lte: start,
}
} else {
query._id = {
$gt: start,
$lte: end,
}
}
return await collection
.find(query, findOptions)
.project(projection)
.sort({ _id: BATCH_DESCENDING ? -1 : 1 })
.limit(BATCH_SIZE)
.toArray()
}
/**
* @param {Collection | LegacyCollection} collection
* @param {Array<Document>} nextBatch
* @param {UpdateDocument} update
* @return {Promise<void>}
*/
async function performUpdate(collection, nextBatch, update) {
await collection.updateMany(
{ _id: { $in: nextBatch.map(entry => entry._id) } },
update
)
}
/**
* @param {number} ms
* @return {ObjectId}
*/
function objectIdFromMs(ms) {
return ObjectId.createFromTime(ms / 1000)
}
/**
* @param {ObjectId} id
* @return {number}
*/
function getMsFromObjectId(id) {
return id.getTimestamp().getTime()
}
/**
* @param {ObjectId} start
* @return {ObjectId}
*/
function getNextEnd(start) {
let end
if (BATCH_DESCENDING) {
end = objectIdFromMs(getMsFromObjectId(start) - BATCH_MAX_TIME_SPAN_IN_MS)
if (getMsFromObjectId(end) <= getMsFromObjectId(BATCH_RANGE_END)) {
end = BATCH_RANGE_END
}
} else {
end = objectIdFromMs(getMsFromObjectId(start) + BATCH_MAX_TIME_SPAN_IN_MS)
if (getMsFromObjectId(end) >= getMsFromObjectId(BATCH_RANGE_END)) {
end = BATCH_RANGE_END
}
}
return end
}
/**
* @param {Collection | LegacyCollection} collection
* @return {Promise<ObjectId|null>}
*/
async function getIdEdgePast(collection) {
const [first] = await collection
.find({})
.project({ _id: 1 })
.sort({ _id: 1 })
.limit(1)
.toArray()
if (!first) return null
// Go one second further into the past in order to include the first entry via
// first._id > ID_EDGE_PAST
return objectIdFromMs(Math.max(0, getMsFromObjectId(first._id) - 1000))
}
/**
* @param {Collection | LegacyCollection} collection
* @param {Document} query
* @param {UpdateDocument | ((batch: Array<Document>) => Promise<void>)} update
* @param {Document} [projection]
* @param {FindOptions} [findOptions]
* @param {BatchedUpdateOptions} [batchedUpdateOptions]
*/
async function batchedUpdate(
collection,
query,
update,
projection,
findOptions,
batchedUpdateOptions
) {
ID_EDGE_PAST = await getIdEdgePast(collection)
if (!ID_EDGE_PAST) {
console.warn(
`The collection ${collection.collectionName} appears to be empty.`
)
return 0
}
refreshGlobalOptionsForBatchedUpdate(batchedUpdateOptions)
findOptions = findOptions || {}
findOptions.readPreference = READ_PREFERENCE_SECONDARY
projection = projection || { _id: 1 }
let nextBatch
let updated = 0
let start = BATCH_RANGE_START
while (start !== BATCH_RANGE_END) {
let end = getNextEnd(start)
nextBatch = await getNextBatch(
collection,
query,
start,
end,
projection,
findOptions
)
if (nextBatch.length > 0) {
end = nextBatch[nextBatch.length - 1]._id
updated += nextBatch.length
if (VERBOSE_LOGGING) {
console.log(
`Running update on batch with ids ${JSON.stringify(
nextBatch.map(entry => entry._id)
)}`
)
} else {
console.error(`Running update on batch ending ${end}`)
}
if (typeof update === 'function') {
await update(nextBatch)
} else {
await performUpdate(collection, nextBatch, update)
}
}
console.error(`Completed batch ending ${end}`)
start = end
}
return updated
}
/**
* @param {Collection | LegacyCollection} collection
* @param {Document} query
* @param {UpdateDocument | ((batch: Array<Object>) => Promise<void>)} update
* @param {Document} [projection]
* @param {FindOptions} [findOptions]
* @param {BatchedUpdateOptions} [batchedUpdateOptions]
*/
function batchedUpdateWithResultHandling(
collection,
query,
update,
projection,
findOptions,
batchedUpdateOptions
) {
batchedUpdate(
collection,
query,
update,
projection,
findOptions,
batchedUpdateOptions
)
.then(processed => {
console.error({ processed })
process.exit(0)
})
.catch(error => {
console.error({ error })
process.exit(1)
})
}
module.exports = {
batchedUpdate,
batchedUpdateWithResultHandling,
}

View file

@ -0,0 +1,10 @@
mongo-utils
--dependencies=None
--docker-repos=gcr.io/overleaf-ops
--env-add=
--env-pass-through=
--esmock-loader=False
--is-library=True
--node-version=18.20.4
--public-repo=False
--script-version=4.5.0

View file

View file

@ -0,0 +1,30 @@
{
"name": "@overleaf/mongo-utils",
"version": "0.0.1",
"description": "utilities to help working with mongo",
"main": "index.js",
"scripts": {
"test": "npm run lint && npm run format && npm run types:check && npm run test:unit",
"test:unit": "mocha --exit test/**/*.{js,cjs}",
"lint": "eslint --ext .js --ext .cjs --ext .ts --max-warnings 0 --format unix .",
"lint:fix": "eslint --fix --ext .js --ext .cjs --ext .ts .",
"format": "prettier --list-different $PWD/'**/*.{js,cjs,ts}'",
"format:fix": "prettier --write $PWD/'**/*.{js,cjs,ts}'",
"test:ci": "npm run test:unit",
"types:check": "tsc --noEmit"
},
"author": "Overleaf (https://www.overleaf.com)",
"license": "AGPL-3.0-only",
"dependencies": {
"mongodb": "6.7.0",
"mongodb-legacy": "6.1.0"
},
"devDependencies": {
"chai": "^4.3.6",
"mocha": "^10.2.0",
"sandboxed-module": "^2.0.4",
"sinon": "^9.2.4",
"sinon-chai": "^3.7.0",
"typescript": "^5.0.4"
}
}

View file

@ -0,0 +1,11 @@
const chai = require('chai')
const sinonChai = require('sinon-chai')
const SandboxedModule = require('sandboxed-module')
// Chai configuration
chai.should()
chai.use(sinonChai)
SandboxedModule.configure({
globals: { Buffer, JSON, console, process },
})

View file

@ -0,0 +1,7 @@
{
"extends": "../../tsconfig.backend.json",
"include": [
"**/*.js",
"**/*.cjs"
]
}