overleaf/services/web/scripts/back_fill_deleted_files.mjs

135 lines
3.3 KiB
JavaScript
Raw Normal View History

import BatchedUpdateModule from './helpers/batchedUpdate.mjs'
import { promiseMapWithLimit, promisify } from '@overleaf/promise-utils'
import { db } from '../app/src/infrastructure/mongodb.js'
import _ from 'lodash'
import { fileURLToPath } from 'url'
const { batchedUpdate } = BatchedUpdateModule
const sleep = promisify(setTimeout)
async function main(options) {
if (!options) {
options = {}
}
_.defaults(options, {
writeConcurrency: parseInt(process.env.WRITE_CONCURRENCY, 10) || 10,
performCleanup: process.argv.includes('--perform-cleanup'),
fixPartialInserts: process.argv.includes('--fix-partial-inserts'),
letUserDoubleCheckInputsFor: parseInt(
process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR || 10 * 1000,
10
),
})
await letUserDoubleCheckInputs(options)
await batchedUpdate(
'projects',
// array is not empty ~ array has one item
{ 'deletedFiles.0': { $exists: true } },
async projects => {
await processBatch(projects, options)
},
{ _id: 1, deletedFiles: 1 }
)
}
async function processBatch(projects, options) {
await promiseMapWithLimit(
options.writeConcurrency,
projects,
async project => {
await processProject(project, options)
}
)
}
async function processProject(project, options) {
await backFillFiles(project, options)
if (options.performCleanup) {
await cleanupProject(project)
}
}
async function backFillFiles(project, options) {
const projectId = project._id
filterDuplicatesInPlace(project)
project.deletedFiles.forEach(file => {
file.projectId = projectId
})
if (options.fixPartialInserts) {
await fixPartialInserts(project)
} else {
await db.deletedFiles.insertMany(project.deletedFiles)
}
}
function filterDuplicatesInPlace(project) {
const fileIds = new Set()
project.deletedFiles = project.deletedFiles.filter(file => {
const id = file._id.toString()
if (fileIds.has(id)) return false
fileIds.add(id)
return true
})
}
async function fixPartialInserts(project) {
const seenFileIds = new Set(
(
await db.deletedFiles
.find(
{ _id: { $in: project.deletedFiles.map(file => file._id) } },
{ projection: { _id: 1 } }
)
.toArray()
).map(file => file._id.toString())
)
project.deletedFiles = project.deletedFiles.filter(file => {
const id = file._id.toString()
if (seenFileIds.has(id)) return false
seenFileIds.add(id)
return true
})
if (project.deletedFiles.length > 0) {
await db.deletedFiles.insertMany(project.deletedFiles)
}
}
async function cleanupProject(project) {
await db.projects.updateOne(
{ _id: project._id },
{ $set: { deletedFiles: [] } }
)
}
async function letUserDoubleCheckInputs(options) {
if (options.performCleanup) {
console.error('BACK FILLING AND PERFORMING CLEANUP')
} else {
console.error(
'BACK FILLING ONLY - You will need to rerun with --perform-cleanup'
)
}
console.error(
'Waiting for you to double check inputs for',
options.letUserDoubleCheckInputsFor,
'ms'
)
await sleep(options.letUserDoubleCheckInputsFor)
}
export default main
if (fileURLToPath(import.meta.url) === process.argv[1]) {
try {
await main()
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}
}