mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
1988d0afb5
Co-authored-by: Jakob Ackermann <jakob.ackermann@overleaf.com>
131 lines
3.8 KiB
JavaScript
131 lines
3.8 KiB
JavaScript
const DocArchiveManager = require('../app/js/DocArchiveManager').promises
|
|
const MongoManager = require('../app/js/MongoManager').promises
|
|
const { getCollection, ObjectId } = require('../app/js/mongodb')
|
|
const minimist = require('minimist')
|
|
|
|
async function worker(projectId) {
|
|
try {
|
|
// see if the project needs to be unarchived, and unarchive it
|
|
const archivedDocs = await MongoManager.getArchivedProjectDocs(projectId)
|
|
if (archivedDocs.length) {
|
|
await DocArchiveManager.unArchiveAllDocs(projectId)
|
|
}
|
|
|
|
// get the doc content so we can validate it
|
|
const docs = await MongoManager.getProjectsDocs(
|
|
projectId,
|
|
{ include_deleted: false },
|
|
{ lines: 1 }
|
|
)
|
|
|
|
// start archiving in the background while we check the content, if it was archived to begin with
|
|
let archivePromise
|
|
if (archivedDocs.length) {
|
|
archivePromise = DocArchiveManager.archiveAllDocs(projectId)
|
|
}
|
|
|
|
let warning = false
|
|
|
|
// validate the doc contents and log any warnings to investigate later
|
|
for (const doc of docs) {
|
|
if (!doc.lines) {
|
|
warning = true
|
|
console.error('WARN:', projectId, doc._id, 'has no content')
|
|
}
|
|
// eslint-disable-next-line no-control-regex
|
|
if (doc.lines && doc.lines.some((line) => line.match(/(\r|\u0000)/))) {
|
|
warning = true
|
|
console.error('WARN:', projectId, doc._id, 'has invalid characters')
|
|
}
|
|
}
|
|
|
|
// ensure the archive process has finished
|
|
if (archivePromise) {
|
|
await archivePromise
|
|
}
|
|
|
|
if (!warning) {
|
|
// log to stderr along with the other output
|
|
console.error('OK:', projectId)
|
|
}
|
|
} catch (err) {
|
|
console.error('ERROR:', projectId, err)
|
|
}
|
|
}
|
|
|
|
async function rearchiveAllDocs() {
|
|
const params = minimist(process.argv.slice(2))
|
|
const maxWorkers = params.w || 1
|
|
console.log(`Starting with ${maxWorkers} workers`)
|
|
|
|
// start from an objectId and run in ascending order, so we can resume later
|
|
const query = {}
|
|
const startId = params._[0]
|
|
const endId = params.e
|
|
if (startId) {
|
|
if (!new RegExp('^[0-9a-fA-F]{24}$').test(startId)) {
|
|
throw new Error('Invalid start object id')
|
|
}
|
|
query._id = {
|
|
$gte: ObjectId(startId)
|
|
}
|
|
console.log(`Starting from object ID ${startId}`)
|
|
} else {
|
|
console.log('No object id specified. Starting from the beginning.')
|
|
}
|
|
|
|
if (endId) {
|
|
if (!new RegExp('^[0-9a-fA-F]{24}$').test(endId)) {
|
|
throw new Error('Invalid end object id')
|
|
}
|
|
query._id = query._id || {}
|
|
query._id.$lte = ObjectId(endId)
|
|
console.log(`Stopping at object ID ${endId}`)
|
|
}
|
|
|
|
const results = (await getCollection('projects'))
|
|
.find(query, { _id: 1 })
|
|
.sort({ _id: 1 })
|
|
let jobCount = 0
|
|
|
|
// keep going until we run out of projects
|
|
while (true) {
|
|
// get a new project to run a job with
|
|
const project = await results.next()
|
|
// if there are no more projects, wait until all the jobs have finished and exit
|
|
if (!project) {
|
|
// eslint-disable-next-line no-unmodified-loop-condition
|
|
while (jobCount) {
|
|
await new Promise((resolve) => setTimeout(resolve, 50))
|
|
}
|
|
return
|
|
}
|
|
|
|
// wait until there are fewer than maxWorkers jobs running
|
|
// eslint-disable-next-line no-unmodified-loop-condition
|
|
while (jobCount >= maxWorkers) {
|
|
await new Promise((resolve) => setTimeout(resolve, 50))
|
|
}
|
|
|
|
// start a new job in the background and then continue the loop
|
|
++jobCount
|
|
worker(project._id)
|
|
.then(() => --jobCount)
|
|
.catch(() => {
|
|
console.error('ERROR:', project._id)
|
|
--jobCount
|
|
})
|
|
}
|
|
}
|
|
|
|
if (!module.parent) {
|
|
rearchiveAllDocs()
|
|
.then(() => {
|
|
console.log('Finished!')
|
|
process.exit(0)
|
|
})
|
|
.catch((err) => {
|
|
console.error('Something went wrong:', err)
|
|
process.exit(1)
|
|
})
|
|
}
|