mirror of
https://github.com/overleaf/overleaf.git
synced 2025-01-07 05:11:18 +00:00
Merge pull request #22028 from overleaf/jpa-logging
[misc] logging tweaks GitOrigin-RevId: 7757b81b6c684955374038376035dc2bd272c4e8
This commit is contained in:
parent
a92a1f5a24
commit
4f25b0de91
6 changed files with 134 additions and 32 deletions
|
@ -46,9 +46,9 @@ function refreshGlobalOptionsForBatchedUpdate(options = {}) {
|
|||
BATCH_SIZE = parseInt(options.BATCH_SIZE || '1000', 10) || 1000
|
||||
VERBOSE_LOGGING = options.VERBOSE_LOGGING === 'true'
|
||||
if (options.BATCH_LAST_ID) {
|
||||
BATCH_RANGE_START = new ObjectId(options.BATCH_LAST_ID)
|
||||
BATCH_RANGE_START = objectIdFromInput(options.BATCH_LAST_ID)
|
||||
} else if (options.BATCH_RANGE_START) {
|
||||
BATCH_RANGE_START = new ObjectId(options.BATCH_RANGE_START)
|
||||
BATCH_RANGE_START = objectIdFromInput(options.BATCH_RANGE_START)
|
||||
} else {
|
||||
if (BATCH_DESCENDING) {
|
||||
BATCH_RANGE_START = ID_EDGE_FUTURE
|
||||
|
@ -61,7 +61,7 @@ function refreshGlobalOptionsForBatchedUpdate(options = {}) {
|
|||
10
|
||||
)
|
||||
if (options.BATCH_RANGE_END) {
|
||||
BATCH_RANGE_END = new ObjectId(options.BATCH_RANGE_END)
|
||||
BATCH_RANGE_END = objectIdFromInput(options.BATCH_RANGE_END)
|
||||
} else {
|
||||
if (BATCH_DESCENDING) {
|
||||
BATCH_RANGE_END = ID_EDGE_PAST
|
||||
|
@ -120,6 +120,28 @@ async function performUpdate(collection, nextBatch, update) {
|
|||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @return {ObjectId}
|
||||
*/
|
||||
function objectIdFromInput(input) {
|
||||
if (input.includes('T')) {
|
||||
const t = new Date(input).getTime()
|
||||
if (Number.isNaN(t)) throw new Error(`${input} is not a valid date`)
|
||||
return objectIdFromMs(t)
|
||||
} else {
|
||||
return new ObjectId(input)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ObjectId} objectId
|
||||
* @return {string}
|
||||
*/
|
||||
function renderObjectId(objectId) {
|
||||
return `${objectId} (${objectId.getTimestamp().toISOString()})`
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} ms
|
||||
* @return {ObjectId}
|
||||
|
@ -227,7 +249,7 @@ async function batchedUpdate(
|
|||
)}`
|
||||
)
|
||||
} else {
|
||||
console.error(`Running update on batch ending ${end}`)
|
||||
console.error(`Running update on batch ending ${renderObjectId(end)}`)
|
||||
}
|
||||
|
||||
if (typeof update === 'function') {
|
||||
|
@ -236,7 +258,7 @@ async function batchedUpdate(
|
|||
await performUpdate(collection, nextBatch, update)
|
||||
}
|
||||
}
|
||||
console.error(`Completed batch ending ${end}`)
|
||||
console.error(`Completed batch ending ${renderObjectId(end)}`)
|
||||
start = end
|
||||
}
|
||||
return updated
|
||||
|
@ -278,6 +300,8 @@ function batchedUpdateWithResultHandling(
|
|||
|
||||
module.exports = {
|
||||
READ_PREFERENCE_SECONDARY,
|
||||
objectIdFromInput,
|
||||
renderObjectId,
|
||||
batchedUpdate,
|
||||
batchedUpdateWithResultHandling,
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@ import { Binary, ObjectId } from 'mongodb'
|
|||
import logger from '@overleaf/logger'
|
||||
import {
|
||||
batchedUpdate,
|
||||
objectIdFromInput,
|
||||
READ_PREFERENCE_SECONDARY,
|
||||
} from '@overleaf/mongo-utils/batchedUpdate.js'
|
||||
import OError from '@overleaf/o-error'
|
||||
|
@ -85,14 +86,18 @@ ObjectId.cacheHexString = true
|
|||
const COLLECT_BLOBS = process.argv.includes('blobs')
|
||||
|
||||
const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z')
|
||||
const BATCH_RANGE_START =
|
||||
process.env.BATCH_RANGE_START ||
|
||||
ObjectId.createFromTime(PUBLIC_LAUNCH_DATE.getTime() / 1000).toString()
|
||||
const BATCH_RANGE_END = process.env.BATCH_RANGE_END || new ObjectId().toString()
|
||||
const BATCH_RANGE_START = objectIdFromInput(
|
||||
process.env.BATCH_RANGE_START || PUBLIC_LAUNCH_DATE.toISOString()
|
||||
).toString()
|
||||
const BATCH_RANGE_END = objectIdFromInput(
|
||||
process.env.BATCH_RANGE_END || new Date().toISOString()
|
||||
).toString()
|
||||
// We need to control the start and end as ids of deleted projects are created at time of deletion.
|
||||
delete process.env.BATCH_RANGE_START
|
||||
delete process.env.BATCH_RANGE_END
|
||||
|
||||
const LOGGING_IDENTIFIER = process.env.LOGGING_IDENTIFIER || BATCH_RANGE_START
|
||||
|
||||
// Concurrency for downloading from GCS and updating hashes in mongo
|
||||
const CONCURRENCY = parseInt(process.env.CONCURRENCY || '100', 10)
|
||||
// Retries for processing a given file
|
||||
|
@ -197,18 +202,25 @@ function computeDiff(nextEventLoopStats, now) {
|
|||
return Object.assign(diff, bandwidthStats(diff, ms))
|
||||
}
|
||||
|
||||
function printStats() {
|
||||
/**
|
||||
* @param {boolean} isLast
|
||||
*/
|
||||
function printStats(isLast = false) {
|
||||
const now = performance.now()
|
||||
const nextEventLoopStats = performance.eventLoopUtilization()
|
||||
console.log(
|
||||
JSON.stringify({
|
||||
time: new Date(),
|
||||
...STATS,
|
||||
...bandwidthStats(STATS, now - processStart),
|
||||
eventLoop: nextEventLoopStats,
|
||||
diff: computeDiff(nextEventLoopStats, now),
|
||||
})
|
||||
)
|
||||
const logLine = JSON.stringify({
|
||||
time: new Date(),
|
||||
LOGGING_IDENTIFIER,
|
||||
...STATS,
|
||||
...bandwidthStats(STATS, now - processStart),
|
||||
eventLoop: nextEventLoopStats,
|
||||
diff: computeDiff(nextEventLoopStats, now),
|
||||
})
|
||||
if (isLast) {
|
||||
console.warn(logLine)
|
||||
} else {
|
||||
console.log(logLine)
|
||||
}
|
||||
lastEventLoopStats = nextEventLoopStats
|
||||
lastLog = Object.assign({}, STATS)
|
||||
}
|
||||
|
@ -1100,7 +1112,7 @@ try {
|
|||
try {
|
||||
await main()
|
||||
} finally {
|
||||
printStats()
|
||||
printStats(true)
|
||||
try {
|
||||
// Perform non-recursive removal of the BUFFER_DIR. Individual files
|
||||
// should get removed in parallel as part of batch processing.
|
||||
|
|
|
@ -520,7 +520,12 @@ describe('back_fill_file_hash script', function () {
|
|||
'readFromGCSThroughputMiBPerSecond',
|
||||
'writeToAWSThroughputMiBPerSecond',
|
||||
]
|
||||
const stats = JSON.parse(result.stdout.trimEnd().split('\n').pop())
|
||||
const stats = JSON.parse(
|
||||
result.stderr
|
||||
.split('\n')
|
||||
.filter(l => l.includes('LOGGING_IDENTIFIER'))
|
||||
.pop()
|
||||
)
|
||||
expect(Object.keys(stats.diff).sort()).to.deep.equal(
|
||||
[...extraStatsKeys, ...Object.keys(STATS_ALL)].sort()
|
||||
)
|
||||
|
@ -534,6 +539,7 @@ describe('back_fill_file_hash script', function () {
|
|||
for (const key of extraStatsKeys) {
|
||||
delete stats[key]
|
||||
}
|
||||
delete stats.LOGGING_IDENTIFIER
|
||||
return { stats, result }
|
||||
}
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ import { expect } from 'chai'
|
|||
import logger from '@overleaf/logger'
|
||||
import { db, ObjectId } from '../../../app/src/infrastructure/mongodb.js'
|
||||
import UserHelper from './helpers/User.js'
|
||||
import { renderObjectId } from '@overleaf/mongo-utils/batchedUpdate.js'
|
||||
|
||||
const User = UserHelper.promises
|
||||
|
||||
|
@ -78,7 +79,9 @@ describe('BackFillDocNameForDeletedDocs', function () {
|
|||
}
|
||||
const { stderr: stdErr } = result
|
||||
|
||||
expect(stdErr).to.include(`Completed batch ending ${projectId2}`)
|
||||
expect(stdErr).to.include(
|
||||
`Completed batch ending ${renderObjectId(projectId2)}`
|
||||
)
|
||||
}
|
||||
|
||||
function checkDocsBackFilled() {
|
||||
|
|
|
@ -63,9 +63,15 @@ describe('BatchedUpdateTests', function () {
|
|||
expect(
|
||||
await db.systemmessages.find({}).project({ content: 1, _id: 0 }).toArray()
|
||||
).to.deep.equal([{ content: '42' }, { content: '42' }, { content: '42' }])
|
||||
expect(stderr).to.include('Completed batch ending 300000000000000000000000')
|
||||
expect(stderr).to.include(`Completed batch ending ${edge}`) // hit the edge
|
||||
expect(stderr).to.include('Completed batch ending 400000000000000000000000')
|
||||
expect(stderr).to.include(
|
||||
'Completed batch ending 300000000000000000000000 (1995-07-09T16:12:48.000Z)'
|
||||
)
|
||||
expect(stderr).to.include(
|
||||
`Completed batch ending ${edge} (1995-08-09T16:12:48.000Z)`
|
||||
) // hit the edge
|
||||
expect(stderr).to.include(
|
||||
'Completed batch ending 400000000000000000000000 (2004-01-10T13:37:04.000Z)'
|
||||
)
|
||||
})
|
||||
|
||||
it('can handle ids sitting on the edge descending', async function () {
|
||||
|
@ -101,8 +107,58 @@ describe('BatchedUpdateTests', function () {
|
|||
expect(
|
||||
await db.systemmessages.find({}).project({ content: 1, _id: 0 }).toArray()
|
||||
).to.deep.equal([{ content: '42' }, { content: '42' }, { content: '42' }])
|
||||
expect(stderr).to.include('Completed batch ending 400000000000000000000000')
|
||||
expect(stderr).to.include(`Completed batch ending ${edge}`) // hit the edge
|
||||
expect(stderr).to.include('Completed batch ending 300000000000000000000000')
|
||||
expect(stderr).to.include(
|
||||
'Completed batch ending 400000000000000000000000 (2004-01-10T13:37:04.000Z)'
|
||||
)
|
||||
expect(stderr).to.include(
|
||||
`Completed batch ending ${edge} (2003-12-10T13:37:04.000Z)`
|
||||
) // hit the edge
|
||||
expect(stderr).to.include(
|
||||
'Completed batch ending 300000000000000000000000 (1995-07-09T16:12:48.000Z)'
|
||||
)
|
||||
})
|
||||
|
||||
it('can handle dates as input', async function () {
|
||||
await db.systemmessages.insertOne({
|
||||
content: '1',
|
||||
_id: new ObjectId('500000000000000000000000'),
|
||||
})
|
||||
await db.systemmessages.insertOne({
|
||||
content: '2',
|
||||
_id: new ObjectId('400000000000000000000000'),
|
||||
})
|
||||
await db.systemmessages.insertOne({
|
||||
content: '3',
|
||||
_id: new ObjectId('600000000000000000000000'),
|
||||
})
|
||||
await db.systemmessages.insertOne({
|
||||
content: '4',
|
||||
_id: new ObjectId('300000000000000000000000'),
|
||||
})
|
||||
|
||||
spawnSync(
|
||||
process.argv0,
|
||||
[
|
||||
'--input-type=module',
|
||||
'-e',
|
||||
'import { batchedUpdateWithResultHandling } from "@overleaf/mongo-utils/batchedUpdate.js"; import { db } from "./app/src/infrastructure/mongodb.js"; batchedUpdateWithResultHandling(db.systemmessages, { content: { $ne: "42" }}, { $set: { content: "42" } })',
|
||||
],
|
||||
{
|
||||
env: {
|
||||
...process.env,
|
||||
BATCH_RANGE_START: '2004-01-10T13:37:03.000Z',
|
||||
BATCH_RANGE_END: '2012-07-13T11:01:20.000Z',
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
await expect(
|
||||
db.systemmessages.find({}).project({ content: 1, _id: 0 }).toArray()
|
||||
).to.eventually.deep.equal([
|
||||
{ content: '42' },
|
||||
{ content: '42' },
|
||||
{ content: '3' },
|
||||
{ content: '4' },
|
||||
])
|
||||
})
|
||||
})
|
||||
|
|
|
@ -4,6 +4,7 @@ import { expect } from 'chai'
|
|||
import logger from '@overleaf/logger'
|
||||
import { filterOutput } from './helpers/settings.mjs'
|
||||
import { db } from '../../../app/src/infrastructure/mongodb.js'
|
||||
import { renderObjectId } from '@overleaf/mongo-utils/batchedUpdate.js'
|
||||
|
||||
const BATCH_SIZE = 100
|
||||
let n = 0
|
||||
|
@ -111,10 +112,10 @@ describe('RegenerateDuplicateReferralIds', function () {
|
|||
stdErr = stdErr.split('\n').filter(filterOutput)
|
||||
stdOut = stdOut.split('\n').filter(filterOutput)
|
||||
expect(stdErr).to.include.members([
|
||||
`Completed batch ending ${firstBatch[BATCH_SIZE - 1]}`,
|
||||
`Completed batch ending ${secondBatch[BATCH_SIZE - 1]}`,
|
||||
`Completed batch ending ${thirdBatch[BATCH_SIZE - 1]}`,
|
||||
`Completed batch ending ${forthBatch[BATCH_SIZE - 1]}`,
|
||||
`Completed batch ending ${renderObjectId(firstBatch[BATCH_SIZE - 1])}`,
|
||||
`Completed batch ending ${renderObjectId(secondBatch[BATCH_SIZE - 1])}`,
|
||||
`Completed batch ending ${renderObjectId(thirdBatch[BATCH_SIZE - 1])}`,
|
||||
`Completed batch ending ${renderObjectId(forthBatch[BATCH_SIZE - 1])}`,
|
||||
'Done.',
|
||||
'',
|
||||
])
|
||||
|
|
Loading…
Reference in a new issue