diff --git a/services/web/app/src/Features/Project/ProjectEntityRestoreHandler.js b/services/web/app/src/Features/Project/ProjectEntityRestoreHandler.js index b9308416e3..8b06cc9f25 100644 --- a/services/web/app/src/Features/Project/ProjectEntityRestoreHandler.js +++ b/services/web/app/src/Features/Project/ProjectEntityRestoreHandler.js @@ -3,20 +3,27 @@ const Path = require('path') const ProjectEntityHandler = require('./ProjectEntityHandler') const ProjectEntityUpdateHandler = require('./ProjectEntityUpdateHandler') +// generate a new name based on the original, with an optional label. +// e.g. origname-20210101-122345.tex (default) +// origname-restored-20210101-122345.tex (label="restored") +function generateRestoredName(docName, label) { + const formattedTimestamp = new Date() + .toISOString() + .replace('T', '-') + .replace(/[^0-9-]/g, '') + const extension = Path.extname(docName) + const basename = + Path.basename(docName, extension) + (label ? `-${label}` : '') + return `${basename}-${formattedTimestamp}${extension}` +} + async function restoreDeletedDoc(projectId, docId, docName, userId) { const deletedDoc = await ProjectEntityHandler.promises.getDoc( projectId, docId, { include_deleted: true } ) - - const formattedTimestamp = new Date() - .toISOString() - .replace('T', '-') - .replace(/[^0-9-]/g, '') - const extension = Path.extname(docName) - const basename = Path.basename(docName, extension) - const deletedDocName = `${basename}-${formattedTimestamp}${extension}` + const deletedDocName = generateRestoredName(docName) return await ProjectEntityUpdateHandler.promises.addDocWithRanges( projectId, null, @@ -29,6 +36,7 @@ async function restoreDeletedDoc(projectId, docId, docName, userId) { module.exports = { restoreDeletedDoc: callbackify(restoreDeletedDoc), + generateRestoredName, promises: { restoreDeletedDoc, }, diff --git a/services/web/scripts/recover_docs_from_redis.js b/services/web/scripts/recover_docs_from_redis.js new file mode 100644 index 0000000000..3777ba56bd --- /dev/null +++ b/services/web/scripts/recover_docs_from_redis.js @@ -0,0 +1,160 @@ +// recover docs from redis where there is no doc in mongo but the project exists + +const minimist = require('minimist') +const { db, waitForDb, ObjectId } = require('../app/src/infrastructure/mongodb') +const ProjectEntityUpdateHandler = require('../app/src/Features/Project/ProjectEntityUpdateHandler') +const ProjectEntityRestoreHandler = require('../app/src/Features/Project/ProjectEntityRestoreHandler') +const RedisWrapper = require('@overleaf/redis-wrapper') +const Settings = require('@overleaf/settings') +const logger = require('@overleaf/logger') +const opts = parseArgs() +const redis = RedisWrapper.createClient(Settings.redis.web) + +function parseArgs() { + const args = minimist(process.argv.slice(2), { + boolean: ['commit'], + }) + const commit = args.commit + return { commit, maxDocSize: 2 * 1024 * 1024 } +} + +function extractObjectId(s) { + const m = s.match(/:\{?([0-9a-f]{24})\}?/) + return m[1] +} + +async function main() { + await waitForDb() + logger.info({ opts }, 'removing deleted docs') + let cursor = 0 + do { + const [newCursor, doclinesKeys] = await redis.scan( + cursor, + 'MATCH', + 'doclines:{*}', + 'COUNT', + 1000 + ) + const docIds = doclinesKeys.map(extractObjectId) + for (const docId of docIds) { + await processDoc(docId) + } + cursor = newCursor + } while (cursor !== '0') + if (!opts.commit) { + console.log('This was a dry run. Re-run with --commit to apply changes') + } +} + +async function processDoc(docId) { + // check if the doc is in mongo.. if so ignore it + const docCount = await db.docs.find({ _id: ObjectId(docId) }).count() + if (docCount > 0) { + logger.debug({ docId }, 'doc is present in mongo - no recovery needed') + return + } + // get the doc from redis and check if it has a project id + const doc = await getDoc(docId) + const projectId = doc.projectId + if (!projectId) { + logger.warn( + { docId, doc }, + 'projectId not available in redis, cannot restore - skipping' + ) + // we could delete the document in redis here since we have no way to recover it + return + } + // check that the project is in mongo, if not delete the doc + const project = await db.projects.findOne({ _id: ObjectId(projectId) }) + if (!project) { + logger.warn( + { docId }, + 'project not present in mongo - could remove doc from redis' + ) + return + } + // if the doc is too big we will need to convert it to a file, skip it for now + const size = doc.lines.reduce((sum, line) => sum + line.length + 1, 0) + if (size > opts.maxDocSize) { + logger.warn( + { docId, projectId, size }, + 'doc that exceeds max size, cannot restore' + ) + return + } + // now we have a doc content from redis, in a project where the doc has been deleted + const restoredName = ProjectEntityRestoreHandler.generateRestoredName( + doc.name || 'unknown', + 'recovered' + ) + logger.info( + { docId, projectId, restoredName, commit: opts.commit }, + 'recovering doc from redis to mongo' + ) + + if (opts.commit) { + const folderId = project.rootFolder[0]._id + try { + await ProjectEntityUpdateHandler.promises.addDocWithRanges( + projectId, + folderId, + restoredName, + doc.lines, + doc.ranges, + null + ) + await deleteDocFromRedis(projectId, docId) + } catch (err) { + logger.error( + { docId, projectId, restoreErr: err }, + 'error restoring doc from redis to mongo' + ) + } + } +} + +async function getDoc(docId) { + const [projectId, lines, ranges, pathname] = await redis.mget( + `ProjectId:{${docId}}`, + `doclines:{${docId}}`, + `Ranges:{${docId}}`, + `Pathname:{${docId}}` + ) + const name = pathname?.split('/').pop() + return { + projectId, + id: docId, + lines: JSON.parse(lines), + ranges: ranges ? JSON.parse(ranges) : {}, + name: name || 'unnamed', + } +} + +async function deleteDocFromRedis(projectId, docId) { + await redis.del( + `Blocking:{${docId}}`, + `doclines:{${docId}}`, + `DocOps:{${docId}}`, + `DocVersion:{${docId}}`, + `DocHash:{${docId}}`, + `ProjectId:{${docId}}`, + `Ranges:{${docId}}`, + `UnflushedTime:{${docId}}`, + `Pathname:{${docId}}`, + `ProjectHistoryId:{${docId}}`, + `ProjectHistoryType:{${docId}}`, + `PendingUpdates:{${docId}}`, + `lastUpdatedAt:{${docId}}`, + `lastUpdatedBy:{${docId}}` + ) + await redis.srem(`DocsIn:{${projectId}}`, projectId) +} + +main() + .then(() => { + process.exit(0) + }) + .catch(err => { + console.error(err) + process.exit(1) + })