const { batchedUpdate } = require('./helpers/batchedUpdate') const { promiseMapWithLimit, promisify } = require('@overleaf/promise-utils') const { db } = require('../app/src/infrastructure/mongodb') const sleep = promisify(setTimeout) const _ = require('lodash') async function main(options) { if (!options) { options = {} } _.defaults(options, { writeConcurrency: parseInt(process.env.WRITE_CONCURRENCY, 10) || 10, performCleanup: process.argv.includes('--perform-cleanup'), fixPartialInserts: process.argv.includes('--fix-partial-inserts'), letUserDoubleCheckInputsFor: parseInt( process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR || 10 * 1000, 10 ), }) await letUserDoubleCheckInputs(options) await batchedUpdate( 'projects', // array is not empty ~ array has one item { 'deletedFiles.0': { $exists: true } }, async projects => { await processBatch(projects, options) }, { _id: 1, deletedFiles: 1 } ) } async function processBatch(projects, options) { await promiseMapWithLimit( options.writeConcurrency, projects, async project => { await processProject(project, options) } ) } async function processProject(project, options) { await backFillFiles(project, options) if (options.performCleanup) { await cleanupProject(project) } } async function backFillFiles(project, options) { const projectId = project._id filterDuplicatesInPlace(project) project.deletedFiles.forEach(file => { file.projectId = projectId }) if (options.fixPartialInserts) { await fixPartialInserts(project) } else { await db.deletedFiles.insertMany(project.deletedFiles) } } function filterDuplicatesInPlace(project) { const fileIds = new Set() project.deletedFiles = project.deletedFiles.filter(file => { const id = file._id.toString() if (fileIds.has(id)) return false fileIds.add(id) return true }) } async function fixPartialInserts(project) { const seenFileIds = new Set( ( await db.deletedFiles .find( { _id: { $in: project.deletedFiles.map(file => file._id) } }, { projection: { _id: 1 } } ) .toArray() ).map(file => file._id.toString()) ) project.deletedFiles = project.deletedFiles.filter(file => { const id = file._id.toString() if (seenFileIds.has(id)) return false seenFileIds.add(id) return true }) if (project.deletedFiles.length > 0) { await db.deletedFiles.insertMany(project.deletedFiles) } } async function cleanupProject(project) { await db.projects.updateOne( { _id: project._id }, { $set: { deletedFiles: [] } } ) } async function letUserDoubleCheckInputs(options) { if (options.performCleanup) { console.error('BACK FILLING AND PERFORMING CLEANUP') } else { console.error( 'BACK FILLING ONLY - You will need to rerun with --perform-cleanup' ) } console.error( 'Waiting for you to double check inputs for', options.letUserDoubleCheckInputsFor, 'ms' ) await sleep(options.letUserDoubleCheckInputsFor) } module.exports = main if (require.main === module) { main() .then(() => { process.exit(0) }) .catch(error => { console.error({ error }) process.exit(1) }) }