From 4f25b0de91025e503d4134a9eac5a9e55fba75a1 Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Thu, 21 Nov 2024 09:29:39 +0100 Subject: [PATCH] Merge pull request #22028 from overleaf/jpa-logging [misc] logging tweaks GitOrigin-RevId: 7757b81b6c684955374038376035dc2bd272c4e8 --- libraries/mongo-utils/batchedUpdate.js | 34 ++++++++-- .../storage/scripts/back_fill_file_hash.mjs | 42 ++++++++---- .../js/storage/back_fill_file_hash.test.mjs | 8 ++- .../BackFillDocNameForDeletedDocsTests.mjs | 5 +- .../acceptance/src/BatchedUpdateTests.mjs | 68 +++++++++++++++++-- .../RegenerateDuplicateReferralIdsTests.mjs | 9 +-- 6 files changed, 134 insertions(+), 32 deletions(-) diff --git a/libraries/mongo-utils/batchedUpdate.js b/libraries/mongo-utils/batchedUpdate.js index be0370956a..5e97f45aca 100644 --- a/libraries/mongo-utils/batchedUpdate.js +++ b/libraries/mongo-utils/batchedUpdate.js @@ -46,9 +46,9 @@ function refreshGlobalOptionsForBatchedUpdate(options = {}) { BATCH_SIZE = parseInt(options.BATCH_SIZE || '1000', 10) || 1000 VERBOSE_LOGGING = options.VERBOSE_LOGGING === 'true' if (options.BATCH_LAST_ID) { - BATCH_RANGE_START = new ObjectId(options.BATCH_LAST_ID) + BATCH_RANGE_START = objectIdFromInput(options.BATCH_LAST_ID) } else if (options.BATCH_RANGE_START) { - BATCH_RANGE_START = new ObjectId(options.BATCH_RANGE_START) + BATCH_RANGE_START = objectIdFromInput(options.BATCH_RANGE_START) } else { if (BATCH_DESCENDING) { BATCH_RANGE_START = ID_EDGE_FUTURE @@ -61,7 +61,7 @@ function refreshGlobalOptionsForBatchedUpdate(options = {}) { 10 ) if (options.BATCH_RANGE_END) { - BATCH_RANGE_END = new ObjectId(options.BATCH_RANGE_END) + BATCH_RANGE_END = objectIdFromInput(options.BATCH_RANGE_END) } else { if (BATCH_DESCENDING) { BATCH_RANGE_END = ID_EDGE_PAST @@ -120,6 +120,28 @@ async function performUpdate(collection, nextBatch, update) { ) } +/** + * @param {string} input + * @return {ObjectId} + */ +function objectIdFromInput(input) { + if (input.includes('T')) { + const t = new Date(input).getTime() + if (Number.isNaN(t)) throw new Error(`${input} is not a valid date`) + return objectIdFromMs(t) + } else { + return new ObjectId(input) + } +} + +/** + * @param {ObjectId} objectId + * @return {string} + */ +function renderObjectId(objectId) { + return `${objectId} (${objectId.getTimestamp().toISOString()})` +} + /** * @param {number} ms * @return {ObjectId} @@ -227,7 +249,7 @@ async function batchedUpdate( )}` ) } else { - console.error(`Running update on batch ending ${end}`) + console.error(`Running update on batch ending ${renderObjectId(end)}`) } if (typeof update === 'function') { @@ -236,7 +258,7 @@ async function batchedUpdate( await performUpdate(collection, nextBatch, update) } } - console.error(`Completed batch ending ${end}`) + console.error(`Completed batch ending ${renderObjectId(end)}`) start = end } return updated @@ -278,6 +300,8 @@ function batchedUpdateWithResultHandling( module.exports = { READ_PREFERENCE_SECONDARY, + objectIdFromInput, + renderObjectId, batchedUpdate, batchedUpdateWithResultHandling, } diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs index 742e857ac4..101e19134f 100644 --- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs +++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs @@ -11,6 +11,7 @@ import { Binary, ObjectId } from 'mongodb' import logger from '@overleaf/logger' import { batchedUpdate, + objectIdFromInput, READ_PREFERENCE_SECONDARY, } from '@overleaf/mongo-utils/batchedUpdate.js' import OError from '@overleaf/o-error' @@ -85,14 +86,18 @@ ObjectId.cacheHexString = true const COLLECT_BLOBS = process.argv.includes('blobs') const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z') -const BATCH_RANGE_START = - process.env.BATCH_RANGE_START || - ObjectId.createFromTime(PUBLIC_LAUNCH_DATE.getTime() / 1000).toString() -const BATCH_RANGE_END = process.env.BATCH_RANGE_END || new ObjectId().toString() +const BATCH_RANGE_START = objectIdFromInput( + process.env.BATCH_RANGE_START || PUBLIC_LAUNCH_DATE.toISOString() +).toString() +const BATCH_RANGE_END = objectIdFromInput( + process.env.BATCH_RANGE_END || new Date().toISOString() +).toString() // We need to control the start and end as ids of deleted projects are created at time of deletion. delete process.env.BATCH_RANGE_START delete process.env.BATCH_RANGE_END +const LOGGING_IDENTIFIER = process.env.LOGGING_IDENTIFIER || BATCH_RANGE_START + // Concurrency for downloading from GCS and updating hashes in mongo const CONCURRENCY = parseInt(process.env.CONCURRENCY || '100', 10) // Retries for processing a given file @@ -197,18 +202,25 @@ function computeDiff(nextEventLoopStats, now) { return Object.assign(diff, bandwidthStats(diff, ms)) } -function printStats() { +/** + * @param {boolean} isLast + */ +function printStats(isLast = false) { const now = performance.now() const nextEventLoopStats = performance.eventLoopUtilization() - console.log( - JSON.stringify({ - time: new Date(), - ...STATS, - ...bandwidthStats(STATS, now - processStart), - eventLoop: nextEventLoopStats, - diff: computeDiff(nextEventLoopStats, now), - }) - ) + const logLine = JSON.stringify({ + time: new Date(), + LOGGING_IDENTIFIER, + ...STATS, + ...bandwidthStats(STATS, now - processStart), + eventLoop: nextEventLoopStats, + diff: computeDiff(nextEventLoopStats, now), + }) + if (isLast) { + console.warn(logLine) + } else { + console.log(logLine) + } lastEventLoopStats = nextEventLoopStats lastLog = Object.assign({}, STATS) } @@ -1100,7 +1112,7 @@ try { try { await main() } finally { - printStats() + printStats(true) try { // Perform non-recursive removal of the BUFFER_DIR. Individual files // should get removed in parallel as part of batch processing. diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs index af75d3a871..c42cfd1108 100644 --- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs +++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs @@ -520,7 +520,12 @@ describe('back_fill_file_hash script', function () { 'readFromGCSThroughputMiBPerSecond', 'writeToAWSThroughputMiBPerSecond', ] - const stats = JSON.parse(result.stdout.trimEnd().split('\n').pop()) + const stats = JSON.parse( + result.stderr + .split('\n') + .filter(l => l.includes('LOGGING_IDENTIFIER')) + .pop() + ) expect(Object.keys(stats.diff).sort()).to.deep.equal( [...extraStatsKeys, ...Object.keys(STATS_ALL)].sort() ) @@ -534,6 +539,7 @@ describe('back_fill_file_hash script', function () { for (const key of extraStatsKeys) { delete stats[key] } + delete stats.LOGGING_IDENTIFIER return { stats, result } } diff --git a/services/web/test/acceptance/src/BackFillDocNameForDeletedDocsTests.mjs b/services/web/test/acceptance/src/BackFillDocNameForDeletedDocsTests.mjs index 5311a74fd5..9fd4073eaa 100644 --- a/services/web/test/acceptance/src/BackFillDocNameForDeletedDocsTests.mjs +++ b/services/web/test/acceptance/src/BackFillDocNameForDeletedDocsTests.mjs @@ -4,6 +4,7 @@ import { expect } from 'chai' import logger from '@overleaf/logger' import { db, ObjectId } from '../../../app/src/infrastructure/mongodb.js' import UserHelper from './helpers/User.js' +import { renderObjectId } from '@overleaf/mongo-utils/batchedUpdate.js' const User = UserHelper.promises @@ -78,7 +79,9 @@ describe('BackFillDocNameForDeletedDocs', function () { } const { stderr: stdErr } = result - expect(stdErr).to.include(`Completed batch ending ${projectId2}`) + expect(stdErr).to.include( + `Completed batch ending ${renderObjectId(projectId2)}` + ) } function checkDocsBackFilled() { diff --git a/services/web/test/acceptance/src/BatchedUpdateTests.mjs b/services/web/test/acceptance/src/BatchedUpdateTests.mjs index 221553331f..c73b91c5d2 100644 --- a/services/web/test/acceptance/src/BatchedUpdateTests.mjs +++ b/services/web/test/acceptance/src/BatchedUpdateTests.mjs @@ -63,9 +63,15 @@ describe('BatchedUpdateTests', function () { expect( await db.systemmessages.find({}).project({ content: 1, _id: 0 }).toArray() ).to.deep.equal([{ content: '42' }, { content: '42' }, { content: '42' }]) - expect(stderr).to.include('Completed batch ending 300000000000000000000000') - expect(stderr).to.include(`Completed batch ending ${edge}`) // hit the edge - expect(stderr).to.include('Completed batch ending 400000000000000000000000') + expect(stderr).to.include( + 'Completed batch ending 300000000000000000000000 (1995-07-09T16:12:48.000Z)' + ) + expect(stderr).to.include( + `Completed batch ending ${edge} (1995-08-09T16:12:48.000Z)` + ) // hit the edge + expect(stderr).to.include( + 'Completed batch ending 400000000000000000000000 (2004-01-10T13:37:04.000Z)' + ) }) it('can handle ids sitting on the edge descending', async function () { @@ -101,8 +107,58 @@ describe('BatchedUpdateTests', function () { expect( await db.systemmessages.find({}).project({ content: 1, _id: 0 }).toArray() ).to.deep.equal([{ content: '42' }, { content: '42' }, { content: '42' }]) - expect(stderr).to.include('Completed batch ending 400000000000000000000000') - expect(stderr).to.include(`Completed batch ending ${edge}`) // hit the edge - expect(stderr).to.include('Completed batch ending 300000000000000000000000') + expect(stderr).to.include( + 'Completed batch ending 400000000000000000000000 (2004-01-10T13:37:04.000Z)' + ) + expect(stderr).to.include( + `Completed batch ending ${edge} (2003-12-10T13:37:04.000Z)` + ) // hit the edge + expect(stderr).to.include( + 'Completed batch ending 300000000000000000000000 (1995-07-09T16:12:48.000Z)' + ) + }) + + it('can handle dates as input', async function () { + await db.systemmessages.insertOne({ + content: '1', + _id: new ObjectId('500000000000000000000000'), + }) + await db.systemmessages.insertOne({ + content: '2', + _id: new ObjectId('400000000000000000000000'), + }) + await db.systemmessages.insertOne({ + content: '3', + _id: new ObjectId('600000000000000000000000'), + }) + await db.systemmessages.insertOne({ + content: '4', + _id: new ObjectId('300000000000000000000000'), + }) + + spawnSync( + process.argv0, + [ + '--input-type=module', + '-e', + 'import { batchedUpdateWithResultHandling } from "@overleaf/mongo-utils/batchedUpdate.js"; import { db } from "./app/src/infrastructure/mongodb.js"; batchedUpdateWithResultHandling(db.systemmessages, { content: { $ne: "42" }}, { $set: { content: "42" } })', + ], + { + env: { + ...process.env, + BATCH_RANGE_START: '2004-01-10T13:37:03.000Z', + BATCH_RANGE_END: '2012-07-13T11:01:20.000Z', + }, + } + ) + + await expect( + db.systemmessages.find({}).project({ content: 1, _id: 0 }).toArray() + ).to.eventually.deep.equal([ + { content: '42' }, + { content: '42' }, + { content: '3' }, + { content: '4' }, + ]) }) }) diff --git a/services/web/test/acceptance/src/RegenerateDuplicateReferralIdsTests.mjs b/services/web/test/acceptance/src/RegenerateDuplicateReferralIdsTests.mjs index 673a907a91..ebee8a3ae6 100644 --- a/services/web/test/acceptance/src/RegenerateDuplicateReferralIdsTests.mjs +++ b/services/web/test/acceptance/src/RegenerateDuplicateReferralIdsTests.mjs @@ -4,6 +4,7 @@ import { expect } from 'chai' import logger from '@overleaf/logger' import { filterOutput } from './helpers/settings.mjs' import { db } from '../../../app/src/infrastructure/mongodb.js' +import { renderObjectId } from '@overleaf/mongo-utils/batchedUpdate.js' const BATCH_SIZE = 100 let n = 0 @@ -111,10 +112,10 @@ describe('RegenerateDuplicateReferralIds', function () { stdErr = stdErr.split('\n').filter(filterOutput) stdOut = stdOut.split('\n').filter(filterOutput) expect(stdErr).to.include.members([ - `Completed batch ending ${firstBatch[BATCH_SIZE - 1]}`, - `Completed batch ending ${secondBatch[BATCH_SIZE - 1]}`, - `Completed batch ending ${thirdBatch[BATCH_SIZE - 1]}`, - `Completed batch ending ${forthBatch[BATCH_SIZE - 1]}`, + `Completed batch ending ${renderObjectId(firstBatch[BATCH_SIZE - 1])}`, + `Completed batch ending ${renderObjectId(secondBatch[BATCH_SIZE - 1])}`, + `Completed batch ending ${renderObjectId(thirdBatch[BATCH_SIZE - 1])}`, + `Completed batch ending ${renderObjectId(forthBatch[BATCH_SIZE - 1])}`, 'Done.', '', ])