From 18ecbf37e62cf24d21205eefcffb79ead50472b0 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Thu, 25 Nov 2021 14:08:06 +0000 Subject: [PATCH] Merge pull request #5916 from overleaf/bg-fix-missing-project-ids add script to fix missing project ids GitOrigin-RevId: 5b98705859fa041816d5fb1ad431e868b0e81457 --- .../scripts/fix_docs_with_missing_project.js | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 services/document-updater/scripts/fix_docs_with_missing_project.js diff --git a/services/document-updater/scripts/fix_docs_with_missing_project.js b/services/document-updater/scripts/fix_docs_with_missing_project.js new file mode 100644 index 0000000000..c88e71cb01 --- /dev/null +++ b/services/document-updater/scripts/fix_docs_with_missing_project.js @@ -0,0 +1,102 @@ +const Settings = require('@overleaf/settings') +const logger = require('@overleaf/logger') +const rclient = require('@overleaf/redis-wrapper').createClient( + Settings.redis.documentupdater +) +const keys = Settings.redis.documentupdater.key_schema +const ProjectFlusher = require('app/js/ProjectFlusher') +const DocumentManager = require('app/js/DocumentManager') +const util = require('util') +const flushAndDeleteDocWithLock = util.promisify( + DocumentManager.flushAndDeleteDocWithLock +) + +const { MongoClient, ObjectId } = require('mongodb') + +const clientPromise = MongoClient.connect( + Settings.mongo.url, + Settings.mongo.options +) + +const db = {} +clientPromise.then(client => { + db.docs = client.db().collection('docs') +}) + +async function fixDocsWithMissingProjectIds(dockeys, options) { + const docIds = ProjectFlusher._extractIds(dockeys) + for (const docId of docIds) { + const projectId = await rclient.get(keys.projectKey({ doc_id: docId })) + logger.debug({ docId, projectId }, 'checking doc') + if (!projectId) { + try { + await insertMissingProjectId(docId, options) + } catch (err) { + logger.error({ docId, err }, 'error fixing doc without project id') + } + } + } +} + +async function insertMissingProjectId(docId, options) { + const doc = await db.docs.findOne({ _id: ObjectId(docId) }) + if (!doc) { + logger.warn({ docId }, 'doc not found in mongo') + return + } + if (!doc.project_id) { + logger.error({ docId }, 'doc does not have project id in mongo') + return + } + logger.debug({ docId, doc }, 'found doc') + const projectIdFromMongo = doc.project_id.toString() + if (options.dryRun) { + logger.info( + { projectIdFromMongo, docId }, + 'dry run mode - would insert project id in redis' + ) + return + } + // set the project id for this doc + await rclient.set(keys.projectKey({ doc_id: docId }), projectIdFromMongo) + logger.debug({ docId, projectIdFromMongo }, 'inserted project id in redis') + if (projectIdFromMongo) { + await flushAndDeleteDocWithLock(projectIdFromMongo, docId, {}) + logger.info( + { docId, projectIdFromMongo }, + 'fixed doc with empty project id' + ) + } + return projectIdFromMongo +} + +async function findAndProcessDocs(options) { + logger.info({ options }, 'fixing docs with missing projcct id') + let cursor = 0 + do { + const [newCursor, doclinesKeys] = await rclient.scan( + cursor, + 'MATCH', + keys.docLines({ doc_id: '*' }), + 'COUNT', + options.limit + ) + await fixDocsWithMissingProjectIds(doclinesKeys, options) + cursor = newCursor + } while (cursor !== '0') +} + +clientPromise.then(client => { + findAndProcessDocs({ limit: 1000, dryRun: process.env.DRY_RUN !== 'false' }) + .then(result => { + rclient.quit() + client.close() + console.log('DONE') + }) + .catch(function (error) { + if (error) { + throw error + } + return process.exit() + }) +})