Merge pull request #22028 from overleaf/jpa-logging

[misc] logging tweaks

GitOrigin-RevId: 7757b81b6c684955374038376035dc2bd272c4e8
This commit is contained in:
Jakob Ackermann 2024-11-21 09:29:39 +01:00 committed by Copybot
parent a92a1f5a24
commit 4f25b0de91
6 changed files with 134 additions and 32 deletions

View file

@ -46,9 +46,9 @@ function refreshGlobalOptionsForBatchedUpdate(options = {}) {
BATCH_SIZE = parseInt(options.BATCH_SIZE || '1000', 10) || 1000
VERBOSE_LOGGING = options.VERBOSE_LOGGING === 'true'
if (options.BATCH_LAST_ID) {
BATCH_RANGE_START = new ObjectId(options.BATCH_LAST_ID)
BATCH_RANGE_START = objectIdFromInput(options.BATCH_LAST_ID)
} else if (options.BATCH_RANGE_START) {
BATCH_RANGE_START = new ObjectId(options.BATCH_RANGE_START)
BATCH_RANGE_START = objectIdFromInput(options.BATCH_RANGE_START)
} else {
if (BATCH_DESCENDING) {
BATCH_RANGE_START = ID_EDGE_FUTURE
@ -61,7 +61,7 @@ function refreshGlobalOptionsForBatchedUpdate(options = {}) {
10
)
if (options.BATCH_RANGE_END) {
BATCH_RANGE_END = new ObjectId(options.BATCH_RANGE_END)
BATCH_RANGE_END = objectIdFromInput(options.BATCH_RANGE_END)
} else {
if (BATCH_DESCENDING) {
BATCH_RANGE_END = ID_EDGE_PAST
@ -120,6 +120,28 @@ async function performUpdate(collection, nextBatch, update) {
)
}
/**
* @param {string} input
* @return {ObjectId}
*/
function objectIdFromInput(input) {
if (input.includes('T')) {
const t = new Date(input).getTime()
if (Number.isNaN(t)) throw new Error(`${input} is not a valid date`)
return objectIdFromMs(t)
} else {
return new ObjectId(input)
}
}
/**
* @param {ObjectId} objectId
* @return {string}
*/
function renderObjectId(objectId) {
return `${objectId} (${objectId.getTimestamp().toISOString()})`
}
/**
* @param {number} ms
* @return {ObjectId}
@ -227,7 +249,7 @@ async function batchedUpdate(
)}`
)
} else {
console.error(`Running update on batch ending ${end}`)
console.error(`Running update on batch ending ${renderObjectId(end)}`)
}
if (typeof update === 'function') {
@ -236,7 +258,7 @@ async function batchedUpdate(
await performUpdate(collection, nextBatch, update)
}
}
console.error(`Completed batch ending ${end}`)
console.error(`Completed batch ending ${renderObjectId(end)}`)
start = end
}
return updated
@ -278,6 +300,8 @@ function batchedUpdateWithResultHandling(
module.exports = {
READ_PREFERENCE_SECONDARY,
objectIdFromInput,
renderObjectId,
batchedUpdate,
batchedUpdateWithResultHandling,
}

View file

@ -11,6 +11,7 @@ import { Binary, ObjectId } from 'mongodb'
import logger from '@overleaf/logger'
import {
batchedUpdate,
objectIdFromInput,
READ_PREFERENCE_SECONDARY,
} from '@overleaf/mongo-utils/batchedUpdate.js'
import OError from '@overleaf/o-error'
@ -85,14 +86,18 @@ ObjectId.cacheHexString = true
const COLLECT_BLOBS = process.argv.includes('blobs')
const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z')
const BATCH_RANGE_START =
process.env.BATCH_RANGE_START ||
ObjectId.createFromTime(PUBLIC_LAUNCH_DATE.getTime() / 1000).toString()
const BATCH_RANGE_END = process.env.BATCH_RANGE_END || new ObjectId().toString()
const BATCH_RANGE_START = objectIdFromInput(
process.env.BATCH_RANGE_START || PUBLIC_LAUNCH_DATE.toISOString()
).toString()
const BATCH_RANGE_END = objectIdFromInput(
process.env.BATCH_RANGE_END || new Date().toISOString()
).toString()
// We need to control the start and end as ids of deleted projects are created at time of deletion.
delete process.env.BATCH_RANGE_START
delete process.env.BATCH_RANGE_END
const LOGGING_IDENTIFIER = process.env.LOGGING_IDENTIFIER || BATCH_RANGE_START
// Concurrency for downloading from GCS and updating hashes in mongo
const CONCURRENCY = parseInt(process.env.CONCURRENCY || '100', 10)
// Retries for processing a given file
@ -197,18 +202,25 @@ function computeDiff(nextEventLoopStats, now) {
return Object.assign(diff, bandwidthStats(diff, ms))
}
function printStats() {
/**
* @param {boolean} isLast
*/
function printStats(isLast = false) {
const now = performance.now()
const nextEventLoopStats = performance.eventLoopUtilization()
console.log(
JSON.stringify({
time: new Date(),
...STATS,
...bandwidthStats(STATS, now - processStart),
eventLoop: nextEventLoopStats,
diff: computeDiff(nextEventLoopStats, now),
})
)
const logLine = JSON.stringify({
time: new Date(),
LOGGING_IDENTIFIER,
...STATS,
...bandwidthStats(STATS, now - processStart),
eventLoop: nextEventLoopStats,
diff: computeDiff(nextEventLoopStats, now),
})
if (isLast) {
console.warn(logLine)
} else {
console.log(logLine)
}
lastEventLoopStats = nextEventLoopStats
lastLog = Object.assign({}, STATS)
}
@ -1100,7 +1112,7 @@ try {
try {
await main()
} finally {
printStats()
printStats(true)
try {
// Perform non-recursive removal of the BUFFER_DIR. Individual files
// should get removed in parallel as part of batch processing.

View file

@ -520,7 +520,12 @@ describe('back_fill_file_hash script', function () {
'readFromGCSThroughputMiBPerSecond',
'writeToAWSThroughputMiBPerSecond',
]
const stats = JSON.parse(result.stdout.trimEnd().split('\n').pop())
const stats = JSON.parse(
result.stderr
.split('\n')
.filter(l => l.includes('LOGGING_IDENTIFIER'))
.pop()
)
expect(Object.keys(stats.diff).sort()).to.deep.equal(
[...extraStatsKeys, ...Object.keys(STATS_ALL)].sort()
)
@ -534,6 +539,7 @@ describe('back_fill_file_hash script', function () {
for (const key of extraStatsKeys) {
delete stats[key]
}
delete stats.LOGGING_IDENTIFIER
return { stats, result }
}

View file

@ -4,6 +4,7 @@ import { expect } from 'chai'
import logger from '@overleaf/logger'
import { db, ObjectId } from '../../../app/src/infrastructure/mongodb.js'
import UserHelper from './helpers/User.js'
import { renderObjectId } from '@overleaf/mongo-utils/batchedUpdate.js'
const User = UserHelper.promises
@ -78,7 +79,9 @@ describe('BackFillDocNameForDeletedDocs', function () {
}
const { stderr: stdErr } = result
expect(stdErr).to.include(`Completed batch ending ${projectId2}`)
expect(stdErr).to.include(
`Completed batch ending ${renderObjectId(projectId2)}`
)
}
function checkDocsBackFilled() {

View file

@ -63,9 +63,15 @@ describe('BatchedUpdateTests', function () {
expect(
await db.systemmessages.find({}).project({ content: 1, _id: 0 }).toArray()
).to.deep.equal([{ content: '42' }, { content: '42' }, { content: '42' }])
expect(stderr).to.include('Completed batch ending 300000000000000000000000')
expect(stderr).to.include(`Completed batch ending ${edge}`) // hit the edge
expect(stderr).to.include('Completed batch ending 400000000000000000000000')
expect(stderr).to.include(
'Completed batch ending 300000000000000000000000 (1995-07-09T16:12:48.000Z)'
)
expect(stderr).to.include(
`Completed batch ending ${edge} (1995-08-09T16:12:48.000Z)`
) // hit the edge
expect(stderr).to.include(
'Completed batch ending 400000000000000000000000 (2004-01-10T13:37:04.000Z)'
)
})
it('can handle ids sitting on the edge descending', async function () {
@ -101,8 +107,58 @@ describe('BatchedUpdateTests', function () {
expect(
await db.systemmessages.find({}).project({ content: 1, _id: 0 }).toArray()
).to.deep.equal([{ content: '42' }, { content: '42' }, { content: '42' }])
expect(stderr).to.include('Completed batch ending 400000000000000000000000')
expect(stderr).to.include(`Completed batch ending ${edge}`) // hit the edge
expect(stderr).to.include('Completed batch ending 300000000000000000000000')
expect(stderr).to.include(
'Completed batch ending 400000000000000000000000 (2004-01-10T13:37:04.000Z)'
)
expect(stderr).to.include(
`Completed batch ending ${edge} (2003-12-10T13:37:04.000Z)`
) // hit the edge
expect(stderr).to.include(
'Completed batch ending 300000000000000000000000 (1995-07-09T16:12:48.000Z)'
)
})
it('can handle dates as input', async function () {
await db.systemmessages.insertOne({
content: '1',
_id: new ObjectId('500000000000000000000000'),
})
await db.systemmessages.insertOne({
content: '2',
_id: new ObjectId('400000000000000000000000'),
})
await db.systemmessages.insertOne({
content: '3',
_id: new ObjectId('600000000000000000000000'),
})
await db.systemmessages.insertOne({
content: '4',
_id: new ObjectId('300000000000000000000000'),
})
spawnSync(
process.argv0,
[
'--input-type=module',
'-e',
'import { batchedUpdateWithResultHandling } from "@overleaf/mongo-utils/batchedUpdate.js"; import { db } from "./app/src/infrastructure/mongodb.js"; batchedUpdateWithResultHandling(db.systemmessages, { content: { $ne: "42" }}, { $set: { content: "42" } })',
],
{
env: {
...process.env,
BATCH_RANGE_START: '2004-01-10T13:37:03.000Z',
BATCH_RANGE_END: '2012-07-13T11:01:20.000Z',
},
}
)
await expect(
db.systemmessages.find({}).project({ content: 1, _id: 0 }).toArray()
).to.eventually.deep.equal([
{ content: '42' },
{ content: '42' },
{ content: '3' },
{ content: '4' },
])
})
})

View file

@ -4,6 +4,7 @@ import { expect } from 'chai'
import logger from '@overleaf/logger'
import { filterOutput } from './helpers/settings.mjs'
import { db } from '../../../app/src/infrastructure/mongodb.js'
import { renderObjectId } from '@overleaf/mongo-utils/batchedUpdate.js'
const BATCH_SIZE = 100
let n = 0
@ -111,10 +112,10 @@ describe('RegenerateDuplicateReferralIds', function () {
stdErr = stdErr.split('\n').filter(filterOutput)
stdOut = stdOut.split('\n').filter(filterOutput)
expect(stdErr).to.include.members([
`Completed batch ending ${firstBatch[BATCH_SIZE - 1]}`,
`Completed batch ending ${secondBatch[BATCH_SIZE - 1]}`,
`Completed batch ending ${thirdBatch[BATCH_SIZE - 1]}`,
`Completed batch ending ${forthBatch[BATCH_SIZE - 1]}`,
`Completed batch ending ${renderObjectId(firstBatch[BATCH_SIZE - 1])}`,
`Completed batch ending ${renderObjectId(secondBatch[BATCH_SIZE - 1])}`,
`Completed batch ending ${renderObjectId(thirdBatch[BATCH_SIZE - 1])}`,
`Completed batch ending ${renderObjectId(forthBatch[BATCH_SIZE - 1])}`,
'Done.',
'',
])