From a2d5e030c614d043e5f0a136b9f9dd01cbeb1382 Mon Sep 17 00:00:00 2001 From: Eric Mc Sween <5454374+emcsween@users.noreply.github.com> Date: Mon, 22 Apr 2024 08:53:43 -0400 Subject: [PATCH] Merge pull request #18041 from overleaf/em-jpa-recovery-script [history-v1] add doc version recovery script GitOrigin-RevId: 3f240f313465ce5fa9c53f72a992807f9396ebb4 --- .../910_initiate_doc_version_recovery | 50 ++++ .../storage/scripts/recover_doc_versions.js | 243 ++++++++++++++++++ 2 files changed, 293 insertions(+) create mode 100755 server-ce/init_scripts/910_initiate_doc_version_recovery create mode 100644 services/history-v1/storage/scripts/recover_doc_versions.js diff --git a/server-ce/init_scripts/910_initiate_doc_version_recovery b/server-ce/init_scripts/910_initiate_doc_version_recovery new file mode 100755 index 0000000000..6a4819ce62 --- /dev/null +++ b/server-ce/init_scripts/910_initiate_doc_version_recovery @@ -0,0 +1,50 @@ +#!/bin/bash + +set -euo pipefail + +source /etc/container_environment.sh +source /etc/overleaf/env.sh + +LOG_FILE=/var/lib/overleaf/data/history/doc-version-recovery.log +export RESYNCS_NEEDED_FILE=/var/lib/overleaf/data/history/doc-version-recovery-resyncs.log + +echo "Checking for doc version recovery. This can take a while if needed. Logs are in $LOG_FILE" +cd /overleaf/services/history-v1 +LOG_LEVEL=info node storage/scripts/recover_doc_versions.js 2>&1 | tee -a "$LOG_FILE" + +function resyncAllProjectsInBackground() { + waitForService docstore 3016 + waitForService document-updater 3003 + waitForService filestore 3009 + waitForService history-v1 3100 + waitForService project-history 3054 + waitForService web-api 4000 + + # Resync files that had their versions updated + while read -r project_id; do + echo "Resyncing project $project_id..." + curl -X POST --silent "http://127.0.0.1:3054/project/$project_id/resync?force=true" + done < "$RESYNCS_NEEDED_FILE" + + # Resync files that have broken histories + /overleaf/bin/force-history-resyncs + + echo "Finished resyncing history for all projects. Adding .done suffix to log file" + mv "$RESYNCS_NEEDED_FILE" "$RESYNCS_NEEDED_FILE.done" +} + +function waitForService() { + local name=$1 + local port=$2 + while ! curl --fail --silent "http://127.0.0.1:$port/status"; do + echo "Waiting for $name service to start up" + sleep 10 + done +} + +if [ -f "$RESYNCS_NEEDED_FILE" ]; then + echo "Finished recovery of doc versions. Resyncing history for all projects in the background." + resyncAllProjectsInBackground & +else + echo "No recovery of doc versions needed." +fi diff --git a/services/history-v1/storage/scripts/recover_doc_versions.js b/services/history-v1/storage/scripts/recover_doc_versions.js new file mode 100644 index 0000000000..32e1ddee4f --- /dev/null +++ b/services/history-v1/storage/scripts/recover_doc_versions.js @@ -0,0 +1,243 @@ +const fsPromises = require('fs/promises') +const { ObjectId } = require('mongodb') +const BPromise = require('bluebird') +const logger = require('@overleaf/logger') +const mongodb = require('../lib/mongodb') +const { chunkStore } = require('..') +const Events = require('events') + +// Silence warning. +Events.setMaxListeners(20) + +const BATCH_SIZE = 1000 +const OPTIONS = { + concurrency: parseInt(process.env.DOC_VERSION_RECOVERY_CONCURRENCY, 10) || 20, + force: process.env.DOC_VERSION_RECOVERY_FORCE === 'true', + 'skip-history-failures': + process.env.DOC_VERSION_RECOVERY_SKIP_HISTORY_FAILURES === 'true', + 'resyncs-needed-file': process.env.DOC_VERSION_RECOVERY_RESYNCS_NEEDED_FILE, +} + +const db = { + deletedProjects: mongodb.db.collection('deletedProjects'), + docs: mongodb.db.collection('docs'), + migrations: mongodb.db.collection('migrations'), + projects: mongodb.db.collection('projects'), +} + +const BAD_MIGRATION_NAME = + '20231219081700_move_doc_versions_from_docops_to_docs' + +let loggingChain = Promise.resolve() +const projectIdsThatNeedResyncing = [] + +async function flushLogQueue() { + const logPath = OPTIONS['resyncs-needed-file'] + loggingChain = loggingChain.then(async () => { + const batch = projectIdsThatNeedResyncing.splice(0) + if (batch.length === 0) return + try { + await fsPromises.appendFile(logPath, batch.join('\n') + '\n') + } catch (err) { + projectIdsThatNeedResyncing.push(...batch) + logger.err({ err, logPath, batch }, 'Failed to write to log file') + } + }) + await loggingChain +} +async function recordProjectNeedsResync(projectId) { + if (OPTIONS['resyncs-needed-file']) { + projectIdsThatNeedResyncing.push(projectId) + await flushLogQueue() + } else { + console.log(`Project ${projectId} needs a hard resync.`) + } +} + +async function main() { + const badMigration = await db.migrations.findOne({ name: BAD_MIGRATION_NAME }) + if (OPTIONS.force || badMigration != null) { + console.warn('Need to recover doc versions. This will take a while.') + await runRecovery() + } + await db.migrations.deleteOne({ name: BAD_MIGRATION_NAME }) + console.log('Done.') +} + +async function runRecovery() { + let batch = [] + const summary = { + updated: 0, + ignored: 0, + skipped: 0, + deletedUpdated: 0, + deletedIgnored: 0, + } + const processBatchAndLogProgress = async () => { + try { + await BPromise.map(batch, project => processProject(project, summary), { + concurrency: OPTIONS.concurrency, + }) + } finally { + console.log(`${summary.updated} projects updated`) + console.log(`${summary.ignored} projects had good versions`) + console.log(`${summary.deletedUpdated} deleted projects updated`) + console.log( + `${summary.deletedIgnored} deleted projects had good versions` + ) + console.log(`${summary.skipped} projects skipped`) + } + batch = [] + } + + await printDBStats() + await touchResyncsNeededFile() + for await (const project of getProjects()) { + batch.push(project) + if (batch.length >= BATCH_SIZE) { + await processBatchAndLogProgress() + } + } + + for await (const deletedProject of getDeletedProjects()) { + const project = deletedProject.project + project.isDeleted = true + batch.push(project) + if (batch.length >= BATCH_SIZE) { + await processBatchAndLogProgress() + } + } + + if (batch.length > 0) { + await processBatchAndLogProgress() + } + + await backfillMissingVersions() +} + +async function printDBStats() { + const projects = await db.projects.estimatedDocumentCount() + const docs = await db.docs.estimatedDocumentCount() + console.log( + `Need to check ${projects} projects with a total of ${docs} docs.` + ) +} + +async function touchResyncsNeededFile() { + if (OPTIONS['resyncs-needed-file']) { + await fsPromises.appendFile(OPTIONS['resyncs-needed-file'], '') + } +} + +function getProjects() { + return db.projects.find({}, { projection: { _id: 1, overleaf: 1 } }) +} + +function getDeletedProjects() { + return db.deletedProjects.find( + { project: { $ne: null } }, + { projection: { 'project._id': 1, 'project.overleaf': 1 } } + ) +} + +async function processProject(project, summary) { + const projectId = project._id.toString() + let updated = false + try { + const historyDocVersions = await getHistoryDocVersions(project) + + for (const { docId, version } of historyDocVersions) { + const update = await fixMongoDocVersion(docId, version) + if (update != null) { + updated = true + } + } + + if (project.isDeleted) { + if (updated) { + summary.deletedUpdated += 1 + } else { + summary.deletedIgnored += 1 + } + } else { + await recordProjectNeedsResync(projectId) + if (updated) { + summary.updated += 1 + } else { + summary.ignored += 1 + } + } + } catch (err) { + logger.error({ err, projectId }, 'Failed to process project') + if (OPTIONS['skip-history-failures']) { + summary.skipped += 1 + } else { + throw err + } + } +} + +async function getHistoryDocVersions(project) { + const historyId = project.overleaf.history.id + const chunk = await chunkStore.loadLatest(historyId) + if (chunk == null) { + return [] + } + + const snapshot = chunk.getSnapshot() + const changes = chunk.getChanges() + snapshot.applyAll(changes) + const v2DocVersions = snapshot.getV2DocVersions() + if (v2DocVersions == null) { + return [] + } + return Object.entries(v2DocVersions.data).map(([docId, versionInfo]) => ({ + docId, + version: versionInfo.v, + })) +} + +async function fixMongoDocVersion(docId, historyVersion) { + const docBeforeUpdate = await db.docs.findOneAndUpdate( + { + _id: new ObjectId(docId), + $or: [ + { version: { $lte: historyVersion } }, + { version: { $exists: false } }, + ], + }, + { $set: { version: historyVersion + 1 } } + ) + if (docBeforeUpdate != null) { + return { + previousVersion: docBeforeUpdate.version, + newVersion: historyVersion + 1, + } + } else { + return null + } +} + +/** + * Set all remaining versions to 0 + */ +async function backfillMissingVersions() { + console.log('Defaulting version to 0 for remaining docs.') + await db.docs.updateMany( + { version: { $exists: false } }, + { $set: { version: 0 } } + ) +} + +main() + .finally(async () => { + console.log('Flushing log queue.') + await flushLogQueue() + }) + .then(() => { + process.exit(0) + }) + .catch(err => { + console.error(err) + process.exit(1) + })