diff --git a/server-ce/hotfix/5.0.3/910_initiate_doc_version_recovery b/server-ce/hotfix/5.0.3/910_initiate_doc_version_recovery new file mode 100755 index 0000000000..b5f9ce2a09 --- /dev/null +++ b/server-ce/hotfix/5.0.3/910_initiate_doc_version_recovery @@ -0,0 +1,51 @@ +#!/bin/bash + +set -euo pipefail + +source /etc/container_environment.sh +source /etc/overleaf/env.sh + +LOG_FILE=/var/lib/overleaf/data/history/doc-version-recovery.log +RESYNCS_NEEDED_FILE=/var/lib/overleaf/data/history/doc-version-recovery-resyncs-5.0.3.log + +echo "Checking for doc version recovery. This can take a while if needed. Logs are in $LOG_FILE" +cd /overleaf/services/history-v1 +LOG_LEVEL=info DOC_VERSION_RECOVERY_RESYNCS_NEEDED_FILE="$RESYNCS_NEEDED_FILE" node storage/scripts/recover_doc_versions.js 2>&1 | tee -a "$LOG_FILE" + +function resyncAllProjectsInBackground() { + waitForService docstore 3016 + waitForService document-updater 3003 + waitForService filestore 3009 + waitForService history-v1 3100 + waitForService project-history 3054 + waitForService web-api 4000 + + # Resync files that had their versions updated + while read -r project_id; do + echo "Resyncing project $project_id..." + curl -X POST --silent "http://127.0.0.1:3054/project/$project_id/resync?force=true" + done < "$RESYNCS_NEEDED_FILE" + + # Resync files that have broken histories + /overleaf/bin/force-history-resyncs + + echo "Finished resyncing history for all projects. Adding .done suffix to log file" + mv "$RESYNCS_NEEDED_FILE" "$RESYNCS_NEEDED_FILE.done" +} + +function waitForService() { + local name=$1 + local port=$2 + while ! curl --fail --silent "http://127.0.0.1:$port/status"; do + echo "Waiting for $name service to start up" + sleep 10 + done +} + +if [ -f "$RESYNCS_NEEDED_FILE" ]; then + echo "Finished recovery of doc versions. Resyncing history for all projects in the background." + resyncAllProjectsInBackground & +else + echo "No recovery of doc versions needed." +fi + diff --git a/server-ce/hotfix/5.0.3/Dockerfile b/server-ce/hotfix/5.0.3/Dockerfile new file mode 100644 index 0000000000..79be043963 --- /dev/null +++ b/server-ce/hotfix/5.0.3/Dockerfile @@ -0,0 +1,7 @@ +FROM sharelatex/sharelatex:5.0.2-RC6 + +# Patch: https://github.com/overleaf/internal/pull/18065 +RUN npm install @overleaf/redis-wrapper @overleaf/settings -w services/history-v1 +ADD 910_initiate_doc_version_recovery /etc/my_init.d/910_initiate_doc_version_recovery +COPY pr_18065.patch . +RUN patch -p0 < pr_18065.patch && rm pr_18065.patch diff --git a/server-ce/hotfix/5.0.3/pr_18065.patch b/server-ce/hotfix/5.0.3/pr_18065.patch new file mode 100644 index 0000000000..4fe440f952 --- /dev/null +++ b/server-ce/hotfix/5.0.3/pr_18065.patch @@ -0,0 +1,307 @@ +--- services/history-v1/storage/scripts/recover_doc_versions.js ++++ services/history-v1/storage/scripts/recover_doc_versions.js +@@ -2,6 +2,10 @@ const fsPromises = require('fs/promises') + const { ObjectId } = require('mongodb') + const BPromise = require('bluebird') + const logger = require('@overleaf/logger') ++const Settings = require('@overleaf/settings') ++const rclient = require('@overleaf/redis-wrapper').createClient( ++ Settings.redis.documentupdater ++) + const mongodb = require('../lib/mongodb') + const { chunkStore } = require('..') + const Events = require('events') +@@ -28,8 +32,14 @@ const db = { + const BAD_MIGRATION_NAME = + '20231219081700_move_doc_versions_from_docops_to_docs' + ++const RECOVERY_FILES_502 = [ ++ '/var/lib/overleaf/data/history/doc-version-recovery-resyncs.log', ++ '/var/lib/overleaf/data/history/doc-version-recovery-resyncs.log.done', ++] ++ + let loggingChain = Promise.resolve() + const projectIdsThatNeedResyncing = [] ++const unflushedDocIds = new Set() + + async function flushLogQueue() { + const logPath = OPTIONS['resyncs-needed-file'] +@@ -55,23 +65,67 @@ async function recordProjectNeedsResync(projectId) { + } + + async function main() { ++ const recovery502Ran = await did502RecoveryRun() ++ await getUnflushedDocIds() + const badMigration = await db.migrations.findOne({ name: BAD_MIGRATION_NAME }) +- if (OPTIONS.force || badMigration != null) { ++ ++ if (unflushedDocIds.size > 0 && !recovery502Ran && badMigration != null) { ++ // Tell customers that they need to flush ++ console.log(` ++-------------------------------------------------------------------- ++Detected unflushed changes while recovering doc versions. ++Please go back to version 5.0.1 and follow the recovery procedure ++for flushing document updates: ++ ++https://github.com/overleaf/overleaf/wiki/Doc-version-recovery ++--------------------------------------------------------------------`) ++ process.exit(1) ++ } ++ ++ if (OPTIONS.force || recovery502Ran || badMigration != null) { + console.warn('Need to recover doc versions. This will take a while.') + await runRecovery() ++ await db.migrations.deleteOne({ name: BAD_MIGRATION_NAME }) ++ await delete502RecoveryFiles() + } +- await db.migrations.deleteOne({ name: BAD_MIGRATION_NAME }) ++ + console.log('Done.') + } + ++async function did502RecoveryRun() { ++ for (const file of RECOVERY_FILES_502) { ++ try { ++ await fsPromises.stat(file) ++ return true ++ } catch (err) { ++ // file doesn't exist. continue ++ } ++ } ++ return false ++} ++ ++async function delete502RecoveryFiles() { ++ for (const file of RECOVERY_FILES_502) { ++ try { ++ await fsPromises.rename(file, file.replace('.log', '-5.0.2.log')) ++ } catch (err) { ++ // file doesn't exist. continue ++ } ++ } ++} ++ + async function runRecovery() { + let batch = [] + const summary = { +- updated: 0, + ignored: 0, + skipped: 0, +- deletedUpdated: 0, ++ deletedUpdatedMongo: 0, ++ deletedUpdatedRedis: 0, ++ deletedUpdatedBoth: 0, + deletedIgnored: 0, ++ updatedMongo: 0, ++ updatedRedis: 0, ++ updatedBoth: 0, + } + const processBatchAndLogProgress = async () => { + try { +@@ -79,9 +133,21 @@ async function runRecovery() { + concurrency: OPTIONS.concurrency, + }) + } finally { +- console.log(`${summary.updated} projects updated`) ++ console.log(`${summary.updatedRedis} projects updated in Redis`) ++ console.log(`${summary.updatedMongo} projects updated in Mongo`) ++ console.log( ++ `${summary.updatedBoth} projects updated in both Mongo and Redis` ++ ) + console.log(`${summary.ignored} projects had good versions`) +- console.log(`${summary.deletedUpdated} deleted projects updated`) ++ console.log( ++ `${summary.deletedUpdatedMongo} deleted projects updated in Mongo` ++ ) ++ console.log( ++ `${summary.deletedUpdatedRedis} deleted projects updated in Redis` ++ ) ++ console.log( ++ `${summary.deletedUpdatedBoth} deleted projects updated in both Mongo and Redis` ++ ) + console.log( + `${summary.deletedIgnored} deleted projects had good versions` + ) +@@ -91,7 +157,7 @@ async function runRecovery() { + } + + await printDBStats() +- await touchResyncsNeededFile() ++ await initResyncsNeededFile() + for await (const project of getProjects()) { + batch.push(project) + if (batch.length >= BATCH_SIZE) { +@@ -115,17 +181,38 @@ async function runRecovery() { + await backfillMissingVersions() + } + ++async function getUnflushedDocIds() { ++ const batchSize = 1000 ++ let cursor = '0' ++ do { ++ const [newCursor, keys] = await rclient.scan( ++ cursor, ++ 'MATCH', ++ Settings.redis.documentupdater.key_schema.docVersion({ doc_id: '*' }), ++ 'COUNT', ++ batchSize ++ ) ++ for (const key of keys) { ++ unflushedDocIds.add(key.slice('DocVersion:'.length)) ++ } ++ cursor = newCursor ++ } while (cursor !== '0') ++} ++ + async function printDBStats() { + const projects = await db.projects.estimatedDocumentCount() ++ const deletedProjects = await db.deletedProjects.countDocuments() + const docs = await db.docs.estimatedDocumentCount() + console.log( +- `Need to check ${projects} projects with a total of ${docs} docs.` ++ `Need to check ${projects} projects and up-to ${deletedProjects} deleted projects with a total of ${docs} docs.` + ) + } + +-async function touchResyncsNeededFile() { +- if (OPTIONS['resyncs-needed-file']) { +- await fsPromises.appendFile(OPTIONS['resyncs-needed-file'], '') ++async function initResyncsNeededFile() { ++ const logPath = OPTIONS['resyncs-needed-file'] ++ if (logPath) { ++ await fsPromises.writeFile(logPath, '') ++ await fsPromises.rm(`${logPath}.done`, { force: true }) + } + } + +@@ -135,34 +222,47 @@ function getProjects() { + + function getDeletedProjects() { + return db.deletedProjects.find( +- { project: { $ne: null } }, ++ { 'project.overleaf.history.id': { $exists: true } }, + { projection: { 'project._id': 1, 'project.overleaf': 1 } } + ) + } + + async function processProject(project, summary) { + const projectId = project._id.toString() +- let updated = false ++ let updatedMongo = false ++ let updatedRedis = false + try { + const historyDocVersions = await getHistoryDocVersions(project) + + for (const { docId, version } of historyDocVersions) { +- const update = await fixMongoDocVersion(docId, version) ++ const update = await fixDocVersion(docId, version) + if (update != null) { +- updated = true ++ if (update.in === 'mongo') { ++ updatedMongo = true ++ } else if (update.in === 'redis') { ++ updatedRedis = true ++ } + } + } + + if (project.isDeleted) { +- if (updated) { +- summary.deletedUpdated += 1 ++ if (updatedMongo && updatedRedis) { ++ summary.deletedUpdatedBoth += 1 ++ } else if (updatedMongo) { ++ summary.deletedUpdatedMongo += 1 ++ } else if (updatedRedis) { ++ summary.deletedUpdatedRedis += 1 + } else { + summary.deletedIgnored += 1 + } + } else { + await recordProjectNeedsResync(projectId) +- if (updated) { +- summary.updated += 1 ++ if (updatedMongo && updatedRedis) { ++ summary.updatedBoth += 1 ++ } else if (updatedMongo) { ++ summary.updatedMongo += 1 ++ } else if (updatedRedis) { ++ summary.updatedRedis += 1 + } else { + summary.ignored += 1 + } +@@ -197,25 +297,61 @@ async function getHistoryDocVersions(project) { + })) + } + +-async function fixMongoDocVersion(docId, historyVersion) { +- const docBeforeUpdate = await db.docs.findOneAndUpdate( +- { +- _id: new ObjectId(docId), +- $or: [ +- { version: { $lte: historyVersion } }, +- { version: { $exists: false } }, +- ], +- }, +- { $set: { version: historyVersion + 1 } } +- ) +- if (docBeforeUpdate != null) { ++async function fixDocVersion(docId, historyVersion) { ++ const redisVersion = await getRedisDocVersion(docId) ++ if (redisVersion != null && historyVersion >= redisVersion) { ++ await setRedisDocVersion(docId, historyVersion + 1) + return { +- previousVersion: docBeforeUpdate.version, ++ in: 'redis', ++ previousVersion: redisVersion, + newVersion: historyVersion + 1, + } + } else { ++ const docBeforeUpdate = await db.docs.findOneAndUpdate( ++ { ++ _id: new ObjectId(docId), ++ $or: [ ++ { version: { $lte: historyVersion } }, ++ { version: { $exists: false } }, ++ ], ++ }, ++ { $set: { version: historyVersion + 1 } }, ++ { projection: { _id: 1, version: 1 } } ++ ) ++ ++ if (docBeforeUpdate != null) { ++ return { ++ in: 'mongo', ++ previousVersion: docBeforeUpdate.version, ++ newVersion: historyVersion + 1, ++ } ++ } else { ++ return null ++ } ++ } ++} ++ ++async function getRedisDocVersion(docId) { ++ if (!unflushedDocIds.has(docId)) { + return null + } ++ const result = await rclient.get( ++ Settings.redis.documentupdater.key_schema.docVersion({ doc_id: docId }) ++ ) ++ if (result == null) { ++ return null ++ } ++ return parseInt(result, 10) ++} ++ ++async function setRedisDocVersion(docId, version) { ++ const multi = rclient.multi() ++ multi.set( ++ Settings.redis.documentupdater.key_schema.docVersion({ doc_id: docId }), ++ version ++ ) ++ multi.set(`UnflushedTime:{${docId}}`, Date.now(), 'NX') ++ await multi.exec() + } + + /** diff --git a/server-ce/init_scripts/910_initiate_doc_version_recovery b/server-ce/init_scripts/910_initiate_doc_version_recovery index 6a4819ce62..1daecd3c2f 100755 --- a/server-ce/init_scripts/910_initiate_doc_version_recovery +++ b/server-ce/init_scripts/910_initiate_doc_version_recovery @@ -6,11 +6,11 @@ source /etc/container_environment.sh source /etc/overleaf/env.sh LOG_FILE=/var/lib/overleaf/data/history/doc-version-recovery.log -export RESYNCS_NEEDED_FILE=/var/lib/overleaf/data/history/doc-version-recovery-resyncs.log +RESYNCS_NEEDED_FILE=/var/lib/overleaf/data/history/doc-version-recovery-resyncs-5.0.3.log echo "Checking for doc version recovery. This can take a while if needed. Logs are in $LOG_FILE" cd /overleaf/services/history-v1 -LOG_LEVEL=info node storage/scripts/recover_doc_versions.js 2>&1 | tee -a "$LOG_FILE" +LOG_LEVEL=info DOC_VERSION_RECOVERY_RESYNCS_NEEDED_FILE="$RESYNCS_NEEDED_FILE" node storage/scripts/recover_doc_versions.js 2>&1 | tee -a "$LOG_FILE" function resyncAllProjectsInBackground() { waitForService docstore 3016