Merge pull request #24019 from overleaf/bg-backup-add-error-logging

add logging of backup errors in batch mode

GitOrigin-RevId: 96ae7a2354f82451b3bbe8c5459c4d4cf47e5459
This commit is contained in:
Brian Gough 2025-03-03 13:22:17 +00:00 committed by Copybot
parent 408f6dfee3
commit 1dbf5dca10
2 changed files with 34 additions and 4 deletions

View file

@ -653,15 +653,39 @@ function convertToISODate(dateStr) {
export async function initializeProjects(options) {
await ensureGlobalBlobsLoaded()
const limiter = pLimit(BATCH_CONCURRENCY)
let totalErrors = 0
let totalProjects = 0
async function backupProjectWithErrorLogging(projectId) {
try {
await backupProject(projectId, options)
} catch (err) {
logger.error({ projectId, err }, 'error backing up project')
throw err
}
}
async function processBatch(batch) {
if (gracefulShutdownInitiated) {
throw new Error('graceful shutdown')
}
const batchOperations = batch.map(project =>
limiter(backupProject, project._id.toHexString(), options)
limiter(backupProjectWithErrorLogging, project._id.toHexString())
)
await Promise.allSettled(batchOperations)
const results = await Promise.allSettled(batchOperations)
const errors = results.filter(result => result.status === 'rejected').length
if (errors > 0) {
logger.error(
{
errors,
batchSize: batch.length,
batchStart: batch[0]._id.toHexString(),
batchEnd: batch[batch.length - 1]._id.toHexString(),
},
'errors in batch'
)
}
totalErrors += errors
totalProjects += batch.length
}
const query = {
@ -683,6 +707,8 @@ export async function initializeProjects(options) {
BATCH_RANGE_END: convertToISODate(options['end-date']),
}
)
return { errors: totalErrors, projects: totalProjects }
}
async function backupPendingProjects(options) {

View file

@ -65,9 +65,13 @@ async function runBackup(projectId) {
)
try {
logger.info({ projectId }, 'processing backup for project')
await backupProject(projectId, {})
const { errors, completed } = await backupProject(projectId, {})
metrics.inc('backup_worker_project', completed - errors, {
status: 'success',
})
metrics.inc('backup_worker_project', errors, { status: 'failed' })
timer.done()
return `backup completed ${projectId}`
return `backup completed ${projectId} (${errors} failed in ${completed} projects)`
} catch (err) {
logger.error({ projectId, err }, 'backup failed')
throw err // Re-throw to mark job as failed