overleaf/services/web/scripts/history/migrate_history.js
Brian Gough 9276ba87b2 Merge pull request #12121 from overleaf/bg-fix-bad-offsets-in-history-migration
fix out of range offsets to avoid invalid history exports

GitOrigin-RevId: d00e3e9e08feb6c3be671b5805102a66782b3f92
2023-03-08 09:04:00 +00:00

291 lines
8.3 KiB
JavaScript

// raise mongo timeout to 1hr if otherwise unspecified
process.env.MONGO_SOCKET_TIMEOUT =
parseInt(process.env.MONGO_SOCKET_TIMEOUT, 10) || 3600000
const fs = require('fs')
if (fs.existsSync('/etc/container_environment.json')) {
try {
const envData = JSON.parse(
fs.readFileSync('/etc/container_environment.json', 'utf8')
)
for (const [key, value] of Object.entries(envData)) {
process.env[key] = value
}
} catch (err) {
console.error(
'cannot read /etc/container_environment.json, the script needs to be run as root',
err
)
process.exit(1)
}
}
const VERSION = '0.9.0-cli'
const {
countProjects,
countDocHistory,
upgradeProject,
findProjects,
} = require('../../modules/history-migration/app/src/HistoryUpgradeHelper')
const { waitForDb } = require('../../app/src/infrastructure/mongodb')
const minimist = require('minimist')
const util = require('util')
const pLimit = require('p-limit')
const logger = require('@overleaf/logger')
logger.initialize('history-migration')
// disable logging to stdout from internal modules
logger.logger.streams = []
const DEFAULT_OUTPUT_FILE = `history-migration-${new Date()
.toISOString()
.replace(/[:.]/g, '_')}.log`
const argv = minimist(process.argv.slice(2), {
boolean: [
'verbose',
'fix-invalid-characters',
'convert-large-docs-to-file',
'import-broken-history-as-zip',
'force-upgrade-on-failure',
'dry-run',
'use-query-hint',
'retry-failed',
'archive-on-failure',
'force-clean',
],
string: ['output', 'user-id'],
alias: {
verbose: 'v',
output: 'o',
'dry-run': 'd',
concurrency: 'j',
'use-query-hint': 'q',
'retry-failed': 'r',
'archive-on-failure': 'a',
},
default: {
output: DEFAULT_OUTPUT_FILE,
concurrency: 1,
'batch-size': 100,
'max-upgrades-to-attempt': false,
'max-failures': 50,
},
})
let INTERRUPT = false
async function findProjectsToMigrate() {
console.log('History Migration Statistics')
// Show statistics about the number of projects to migrate
const migratedProjects = await countProjects({
'overleaf.history.display': true,
})
const totalProjects = await countProjects()
console.log('Migrated Projects : ', migratedProjects)
console.log('Total Projects : ', totalProjects)
console.log('Remaining Projects : ', totalProjects - migratedProjects)
if (migratedProjects === totalProjects) {
console.log('All projects have been migrated')
process.exit(0)
}
// Get a list of projects to migrate
const projectsToMigrate = await findProjects(
{ 'overleaf.history.display': { $ne: true } },
{ _id: 1, overleaf: 1 }
)
// Show statistics for docHistory collection
const docHistoryWithoutProjectId = await countDocHistory({
project_id: { $exists: false },
})
if (docHistoryWithoutProjectId > 0) {
console.log(
`WARNING: docHistory collection contains ${docHistoryWithoutProjectId} records without project_id`
)
process.exit(1)
}
// Find the total number of history records for the projects we need to migrate
let docHistoryCount = 0
for await (const project of projectsToMigrate) {
const count = await countDocHistory({ project_id: project._id })
docHistoryCount += count
}
console.log('Total history records to migrate:', docHistoryCount)
return projectsToMigrate
}
function createProgressBar() {
const startTime = new Date()
return function progressBar(current, total, msg) {
const barLength = 20
const percentage = Math.floor((current / total) * 100)
const bar = '='.repeat(percentage / (100 / barLength))
const empty = ' '.repeat(barLength - bar.length)
const elapsed = new Date() - startTime
// convert elapsed time to hours, minutes, seconds
const ss = Math.floor((elapsed / 1000) % 60)
.toString()
.padStart(2, '0')
const mm = Math.floor((elapsed / (1000 * 60)) % 60)
.toString()
.padStart(2, '0')
const hh = Math.floor(elapsed / (1000 * 60 * 60))
.toString()
.padStart(2, '0')
process.stdout.write(
`\r${hh}:${mm}:${ss} |${bar}${empty}| ${percentage}% (${current}/${total}) ${msg}`
)
}
}
async function migrateProjects(projectsToMigrate) {
let projectsMigrated = 0
let projectsFailed = 0
console.log('Starting migration...')
if (argv.concurrency > 1) {
console.log(`Using ${argv.concurrency} concurrent migrations`)
}
// send log output for each migration to a file
const output = fs.createWriteStream(argv.output, { flags: 'a' })
console.log(`Writing log output to ${argv.output}`)
const logger = new console.Console({ stdout: output })
function logJson(obj) {
logger.log(JSON.stringify(obj))
}
// limit the number of concurrent migrations
const limit = pLimit(argv.concurrency)
const jobs = []
// throttle progress reporting to 2x per second
const progressBar = createProgressBar()
let i = 0
const N = projectsToMigrate.length
const progressBarTimer = setInterval(() => {
if (INTERRUPT) {
return // don't update the progress bar if we're shutting down
}
progressBar(
i,
N,
`Migrated: ${projectsMigrated}, Failed: ${projectsFailed}`
)
}, 500)
const options = {
migrationOptions: {
archiveOnFailure: argv['import-broken-history-as-zip'],
fixInvalidCharacters: argv['fix-invalid-characters'],
forceNewHistoryOnFailure: argv['force-upgrade-on-failure'],
},
convertLargeDocsToFile: argv['convert-large-docs-to-file'],
userId: argv['user-id'],
reason: VERSION,
forceClean: argv['force-clean'],
}
async function _migrateProject(project) {
if (INTERRUPT) {
return // don't start any new jobs if we're shutting down
}
const startTime = new Date()
try {
const result = await upgradeProject(project._id, options)
i++
if (INTERRUPT && limit.activeCount > 1) {
// an interrupt was requested while this job was running
// report that we're waiting for the remaining jobs to finish
console.log(
`Waiting for remaining ${
limit.activeCount - 1
} active jobs to finish\r`
)
}
if (result.error) {
// failed to migrate this project
logJson({
project_id: project._id,
result,
stack: result.error.stack,
startTime,
endTime: new Date(),
})
projectsFailed++
} else {
// successfully migrated this project
logJson({
project_id: project._id,
result,
startTime,
endTime: new Date(),
})
projectsMigrated++
}
} catch (err) {
// unexpected error from the migration
projectsFailed++
logJson({
project_id: project._id,
exception: util.inspect(err),
startTime,
endTime: new Date(),
})
}
}
for (const project of projectsToMigrate) {
jobs.push(limit(_migrateProject, project))
}
// wait for all the queued jobs to complete
await Promise.all(jobs)
clearInterval(progressBarTimer)
progressBar(i, N, `Migrated: ${projectsMigrated}, Failed: ${projectsFailed}`)
process.stdout.write('\n')
return { projectsMigrated, projectsFailed }
}
async function main() {
const projectsToMigrate = await findProjectsToMigrate()
if (argv['dry-run']) {
console.log('Dry run, exiting')
process.exit(0)
}
const { projectsMigrated, projectsFailed } = await migrateProjects(
projectsToMigrate
)
console.log('Projects migrated: ', projectsMigrated)
console.log('Projects failed: ', projectsFailed)
if (projectsFailed > 0) {
console.log(`Log output written to ${argv.output}`)
console.log('Please check the log for errors.')
}
if (INTERRUPT) {
console.log('Migration interrupted, please run again to continue.')
} else if (projectsFailed === 0) {
console.log(`All projects migrated successfully.`)
}
console.log('Done.')
process.exit(projectsFailed > 0 ? 1 : 0)
}
// Upgrading history is not atomic, if we quit out mid-initialisation
// then history could get into a broken state
// Instead, skip any unprocessed projects and exit() at end of the batch.
process.on('SIGINT', function () {
console.log(
'\nCaught SIGINT, waiting for all in-progess upgrades to complete'
)
INTERRUPT = true
})
waitForDb()
.then(main)
.catch(err => {
console.error(err)
process.exit(1)
})