From 1dbf5dca105a385bbd18a518241e182b5ee137f7 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Mon, 3 Mar 2025 13:22:17 +0000 Subject: [PATCH] Merge pull request #24019 from overleaf/bg-backup-add-error-logging add logging of backup errors in batch mode GitOrigin-RevId: 96ae7a2354f82451b3bbe8c5459c4d4cf47e5459 --- .../history-v1/storage/scripts/backup.mjs | 30 +++++++++++++++++-- .../storage/scripts/backup_worker.mjs | 8 +++-- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/services/history-v1/storage/scripts/backup.mjs b/services/history-v1/storage/scripts/backup.mjs index 821491ba9b..9835096695 100644 --- a/services/history-v1/storage/scripts/backup.mjs +++ b/services/history-v1/storage/scripts/backup.mjs @@ -653,15 +653,39 @@ function convertToISODate(dateStr) { export async function initializeProjects(options) { await ensureGlobalBlobsLoaded() const limiter = pLimit(BATCH_CONCURRENCY) + let totalErrors = 0 + let totalProjects = 0 + async function backupProjectWithErrorLogging(projectId) { + try { + await backupProject(projectId, options) + } catch (err) { + logger.error({ projectId, err }, 'error backing up project') + throw err + } + } async function processBatch(batch) { if (gracefulShutdownInitiated) { throw new Error('graceful shutdown') } const batchOperations = batch.map(project => - limiter(backupProject, project._id.toHexString(), options) + limiter(backupProjectWithErrorLogging, project._id.toHexString()) ) - await Promise.allSettled(batchOperations) + const results = await Promise.allSettled(batchOperations) + const errors = results.filter(result => result.status === 'rejected').length + if (errors > 0) { + logger.error( + { + errors, + batchSize: batch.length, + batchStart: batch[0]._id.toHexString(), + batchEnd: batch[batch.length - 1]._id.toHexString(), + }, + 'errors in batch' + ) + } + totalErrors += errors + totalProjects += batch.length } const query = { @@ -683,6 +707,8 @@ export async function initializeProjects(options) { BATCH_RANGE_END: convertToISODate(options['end-date']), } ) + + return { errors: totalErrors, projects: totalProjects } } async function backupPendingProjects(options) { diff --git a/services/history-v1/storage/scripts/backup_worker.mjs b/services/history-v1/storage/scripts/backup_worker.mjs index 449a260c6a..3e592e5ffb 100644 --- a/services/history-v1/storage/scripts/backup_worker.mjs +++ b/services/history-v1/storage/scripts/backup_worker.mjs @@ -65,9 +65,13 @@ async function runBackup(projectId) { ) try { logger.info({ projectId }, 'processing backup for project') - await backupProject(projectId, {}) + const { errors, completed } = await backupProject(projectId, {}) + metrics.inc('backup_worker_project', completed - errors, { + status: 'success', + }) + metrics.inc('backup_worker_project', errors, { status: 'failed' }) timer.done() - return `backup completed ${projectId}` + return `backup completed ${projectId} (${errors} failed in ${completed} projects)` } catch (err) { logger.error({ projectId, err }, 'backup failed') throw err // Re-throw to mark job as failed