Merge pull request #18065 from overleaf/em-doc-versions-recovery-fix

Doc versions recovery for Server Pro 5.0.3

GitOrigin-RevId: 226b09383f9499c7eb1c20bf9e8e8e549bdbccb0
This commit is contained in:
Eric Mc Sween 2024-04-24 10:18:33 -04:00 committed by Copybot
parent 56a0a33b70
commit b8195f537d
3 changed files with 174 additions and 32 deletions

4
package-lock.json generated
View file

@ -41272,6 +41272,8 @@
"@overleaf/metrics": "*", "@overleaf/metrics": "*",
"@overleaf/o-error": "*", "@overleaf/o-error": "*",
"@overleaf/object-persistor": "*", "@overleaf/object-persistor": "*",
"@overleaf/redis-wrapper": "*",
"@overleaf/settings": "*",
"@overleaf/stream-utils": "^0.1.0", "@overleaf/stream-utils": "^0.1.0",
"archiver": "^5.3.0", "archiver": "^5.3.0",
"basic-auth": "^2.0.1", "basic-auth": "^2.0.1",
@ -70828,6 +70830,8 @@
"@overleaf/metrics": "*", "@overleaf/metrics": "*",
"@overleaf/o-error": "*", "@overleaf/o-error": "*",
"@overleaf/object-persistor": "*", "@overleaf/object-persistor": "*",
"@overleaf/redis-wrapper": "*",
"@overleaf/settings": "*",
"@overleaf/stream-utils": "^0.1.0", "@overleaf/stream-utils": "^0.1.0",
"archiver": "^5.3.0", "archiver": "^5.3.0",
"basic-auth": "^2.0.1", "basic-auth": "^2.0.1",

View file

@ -10,6 +10,8 @@
"@overleaf/metrics": "*", "@overleaf/metrics": "*",
"@overleaf/o-error": "*", "@overleaf/o-error": "*",
"@overleaf/object-persistor": "*", "@overleaf/object-persistor": "*",
"@overleaf/redis-wrapper": "*",
"@overleaf/settings": "*",
"@overleaf/stream-utils": "^0.1.0", "@overleaf/stream-utils": "^0.1.0",
"archiver": "^5.3.0", "archiver": "^5.3.0",
"basic-auth": "^2.0.1", "basic-auth": "^2.0.1",

View file

@ -2,6 +2,10 @@ const fsPromises = require('fs/promises')
const { ObjectId } = require('mongodb') const { ObjectId } = require('mongodb')
const BPromise = require('bluebird') const BPromise = require('bluebird')
const logger = require('@overleaf/logger') const logger = require('@overleaf/logger')
const Settings = require('@overleaf/settings')
const rclient = require('@overleaf/redis-wrapper').createClient(
Settings.redis.documentupdater
)
const mongodb = require('../lib/mongodb') const mongodb = require('../lib/mongodb')
const { chunkStore } = require('..') const { chunkStore } = require('..')
const Events = require('events') const Events = require('events')
@ -28,8 +32,14 @@ const db = {
const BAD_MIGRATION_NAME = const BAD_MIGRATION_NAME =
'20231219081700_move_doc_versions_from_docops_to_docs' '20231219081700_move_doc_versions_from_docops_to_docs'
const RECOVERY_FILES_502 = [
'/var/lib/overleaf/data/history/doc-version-recovery-resyncs.log',
'/var/lib/overleaf/data/history/doc-version-recovery-resyncs.log.done',
]
let loggingChain = Promise.resolve() let loggingChain = Promise.resolve()
const projectIdsThatNeedResyncing = [] const projectIdsThatNeedResyncing = []
const unflushedDocIds = new Set()
async function flushLogQueue() { async function flushLogQueue() {
const logPath = OPTIONS['resyncs-needed-file'] const logPath = OPTIONS['resyncs-needed-file']
@ -55,23 +65,67 @@ async function recordProjectNeedsResync(projectId) {
} }
async function main() { async function main() {
const recovery502Ran = await did502RecoveryRun()
await getUnflushedDocIds()
const badMigration = await db.migrations.findOne({ name: BAD_MIGRATION_NAME }) const badMigration = await db.migrations.findOne({ name: BAD_MIGRATION_NAME })
if (OPTIONS.force || badMigration != null) {
if (unflushedDocIds.size > 0 && !recovery502Ran && badMigration != null) {
// Tell customers that they need to flush
console.log(`
--------------------------------------------------------------------
Detected unflushed changes while recovering doc versions.
Please go back to version 5.0.1 and follow the recovery procedure
for flushing document updates:
https://github.com/overleaf/overleaf/wiki/Doc-version-recovery
--------------------------------------------------------------------`)
process.exit(1)
}
if (OPTIONS.force || recovery502Ran || badMigration != null) {
console.warn('Need to recover doc versions. This will take a while.') console.warn('Need to recover doc versions. This will take a while.')
await runRecovery() await runRecovery()
}
await db.migrations.deleteOne({ name: BAD_MIGRATION_NAME }) await db.migrations.deleteOne({ name: BAD_MIGRATION_NAME })
await delete502RecoveryFiles()
}
console.log('Done.') console.log('Done.')
} }
async function did502RecoveryRun() {
for (const file of RECOVERY_FILES_502) {
try {
await fsPromises.stat(file)
return true
} catch (err) {
// file doesn't exist. continue
}
}
return false
}
async function delete502RecoveryFiles() {
for (const file of RECOVERY_FILES_502) {
try {
await fsPromises.rename(file, file.replace('.log', '-5.0.2.log'))
} catch (err) {
// file doesn't exist. continue
}
}
}
async function runRecovery() { async function runRecovery() {
let batch = [] let batch = []
const summary = { const summary = {
updated: 0,
ignored: 0, ignored: 0,
skipped: 0, skipped: 0,
deletedUpdated: 0, deletedUpdatedMongo: 0,
deletedUpdatedRedis: 0,
deletedUpdatedBoth: 0,
deletedIgnored: 0, deletedIgnored: 0,
updatedMongo: 0,
updatedRedis: 0,
updatedBoth: 0,
} }
const processBatchAndLogProgress = async () => { const processBatchAndLogProgress = async () => {
try { try {
@ -79,9 +133,21 @@ async function runRecovery() {
concurrency: OPTIONS.concurrency, concurrency: OPTIONS.concurrency,
}) })
} finally { } finally {
console.log(`${summary.updated} projects updated`) console.log(`${summary.updatedRedis} projects updated in Redis`)
console.log(`${summary.updatedMongo} projects updated in Mongo`)
console.log(
`${summary.updatedBoth} projects updated in both Mongo and Redis`
)
console.log(`${summary.ignored} projects had good versions`) console.log(`${summary.ignored} projects had good versions`)
console.log(`${summary.deletedUpdated} deleted projects updated`) console.log(
`${summary.deletedUpdatedMongo} deleted projects updated in Mongo`
)
console.log(
`${summary.deletedUpdatedRedis} deleted projects updated in Redis`
)
console.log(
`${summary.deletedUpdatedBoth} deleted projects updated in both Mongo and Redis`
)
console.log( console.log(
`${summary.deletedIgnored} deleted projects had good versions` `${summary.deletedIgnored} deleted projects had good versions`
) )
@ -91,7 +157,7 @@ async function runRecovery() {
} }
await printDBStats() await printDBStats()
await touchResyncsNeededFile() await initResyncsNeededFile()
for await (const project of getProjects()) { for await (const project of getProjects()) {
batch.push(project) batch.push(project)
if (batch.length >= BATCH_SIZE) { if (batch.length >= BATCH_SIZE) {
@ -115,17 +181,38 @@ async function runRecovery() {
await backfillMissingVersions() await backfillMissingVersions()
} }
async function getUnflushedDocIds() {
const batchSize = 1000
let cursor = '0'
do {
const [newCursor, keys] = await rclient.scan(
cursor,
'MATCH',
Settings.redis.documentupdater.key_schema.docVersion({ doc_id: '*' }),
'COUNT',
batchSize
)
for (const key of keys) {
unflushedDocIds.add(key.slice('DocVersion:'.length))
}
cursor = newCursor
} while (cursor !== '0')
}
async function printDBStats() { async function printDBStats() {
const projects = await db.projects.estimatedDocumentCount() const projects = await db.projects.estimatedDocumentCount()
const deletedProjects = await db.deletedProjects.countDocuments()
const docs = await db.docs.estimatedDocumentCount() const docs = await db.docs.estimatedDocumentCount()
console.log( console.log(
`Need to check ${projects} projects with a total of ${docs} docs.` `Need to check ${projects} projects and up-to ${deletedProjects} deleted projects with a total of ${docs} docs.`
) )
} }
async function touchResyncsNeededFile() { async function initResyncsNeededFile() {
if (OPTIONS['resyncs-needed-file']) { const logPath = OPTIONS['resyncs-needed-file']
await fsPromises.appendFile(OPTIONS['resyncs-needed-file'], '') if (logPath) {
await fsPromises.writeFile(logPath, '')
await fsPromises.rm(`${logPath}.done`, { force: true })
} }
} }
@ -135,34 +222,47 @@ function getProjects() {
function getDeletedProjects() { function getDeletedProjects() {
return db.deletedProjects.find( return db.deletedProjects.find(
{ project: { $ne: null } }, { 'project.overleaf.history.id': { $exists: true } },
{ projection: { 'project._id': 1, 'project.overleaf': 1 } } { projection: { 'project._id': 1, 'project.overleaf': 1 } }
) )
} }
async function processProject(project, summary) { async function processProject(project, summary) {
const projectId = project._id.toString() const projectId = project._id.toString()
let updated = false let updatedMongo = false
let updatedRedis = false
try { try {
const historyDocVersions = await getHistoryDocVersions(project) const historyDocVersions = await getHistoryDocVersions(project)
for (const { docId, version } of historyDocVersions) { for (const { docId, version } of historyDocVersions) {
const update = await fixMongoDocVersion(docId, version) const update = await fixDocVersion(docId, version)
if (update != null) { if (update != null) {
updated = true if (update.in === 'mongo') {
updatedMongo = true
} else if (update.in === 'redis') {
updatedRedis = true
}
} }
} }
if (project.isDeleted) { if (project.isDeleted) {
if (updated) { if (updatedMongo && updatedRedis) {
summary.deletedUpdated += 1 summary.deletedUpdatedBoth += 1
} else if (updatedMongo) {
summary.deletedUpdatedMongo += 1
} else if (updatedRedis) {
summary.deletedUpdatedRedis += 1
} else { } else {
summary.deletedIgnored += 1 summary.deletedIgnored += 1
} }
} else { } else {
await recordProjectNeedsResync(projectId) await recordProjectNeedsResync(projectId)
if (updated) { if (updatedMongo && updatedRedis) {
summary.updated += 1 summary.updatedBoth += 1
} else if (updatedMongo) {
summary.updatedMongo += 1
} else if (updatedRedis) {
summary.updatedRedis += 1
} else { } else {
summary.ignored += 1 summary.ignored += 1
} }
@ -197,7 +297,16 @@ async function getHistoryDocVersions(project) {
})) }))
} }
async function fixMongoDocVersion(docId, historyVersion) { async function fixDocVersion(docId, historyVersion) {
const redisVersion = await getRedisDocVersion(docId)
if (redisVersion != null && historyVersion >= redisVersion) {
await setRedisDocVersion(docId, historyVersion + 1)
return {
in: 'redis',
previousVersion: redisVersion,
newVersion: historyVersion + 1,
}
} else {
const docBeforeUpdate = await db.docs.findOneAndUpdate( const docBeforeUpdate = await db.docs.findOneAndUpdate(
{ {
_id: new ObjectId(docId), _id: new ObjectId(docId),
@ -206,16 +315,43 @@ async function fixMongoDocVersion(docId, historyVersion) {
{ version: { $exists: false } }, { version: { $exists: false } },
], ],
}, },
{ $set: { version: historyVersion + 1 } } { $set: { version: historyVersion + 1 } },
{ projection: { _id: 1, version: 1 } }
) )
if (docBeforeUpdate != null) { if (docBeforeUpdate != null) {
return { return {
in: 'mongo',
previousVersion: docBeforeUpdate.version, previousVersion: docBeforeUpdate.version,
newVersion: historyVersion + 1, newVersion: historyVersion + 1,
} }
} else { } else {
return null return null
} }
}
}
async function getRedisDocVersion(docId) {
if (!unflushedDocIds.has(docId)) {
return null
}
const result = await rclient.get(
Settings.redis.documentupdater.key_schema.docVersion({ doc_id: docId })
)
if (result == null) {
return null
}
return parseInt(result, 10)
}
async function setRedisDocVersion(docId, version) {
const multi = rclient.multi()
multi.set(
Settings.redis.documentupdater.key_schema.docVersion({ doc_id: docId }),
version
)
multi.set(`UnflushedTime:{${docId}}`, Date.now(), 'NX')
await multi.exec()
} }
/** /**