From 99fe2eca51c9a8e8351ede8a86bf10f90fdf061f Mon Sep 17 00:00:00 2001 From: June Kelly Date: Wed, 4 Aug 2021 10:34:41 +0100 Subject: [PATCH] Merge pull request #4352 from overleaf/sk-add-ce-sp-migration-deleted-files Add migrations for CE/SP, run scripts for deleted files and docs GitOrigin-RevId: e425a8d929a4aefdc6f61cff636fb5a1b4c67dce --- ...0727123346_ce_sp_backfill_deleted_files.js | 14 +++ ...10727150530_ce_sp_backfill_deleted_docs.js | 13 +++ ...728115327_ce_sp_backfill_dummy_doc_meta.js | 23 ++++ services/web/migrations/lib/adapter.js | 6 +- .../web/scripts/back_fill_deleted_files.js | 79 ++++++++----- .../back_fill_doc_name_for_deleted_docs.js | 71 +++++++----- .../web/scripts/back_fill_dummy_doc_meta.js | 106 +++++++++--------- services/web/scripts/mark_migration.js | 67 +++++++++++ .../src/BackFillDummyDocMetaTests.js | 10 +- 9 files changed, 275 insertions(+), 114 deletions(-) create mode 100644 services/web/migrations/20210727123346_ce_sp_backfill_deleted_files.js create mode 100644 services/web/migrations/20210727150530_ce_sp_backfill_deleted_docs.js create mode 100644 services/web/migrations/20210728115327_ce_sp_backfill_dummy_doc_meta.js create mode 100644 services/web/scripts/mark_migration.js diff --git a/services/web/migrations/20210727123346_ce_sp_backfill_deleted_files.js b/services/web/migrations/20210727123346_ce_sp_backfill_deleted_files.js new file mode 100644 index 0000000000..fd2250be88 --- /dev/null +++ b/services/web/migrations/20210727123346_ce_sp_backfill_deleted_files.js @@ -0,0 +1,14 @@ +const runScript = require('../scripts/back_fill_deleted_files.js') + +exports.tags = ['server-ce', 'server-pro', 'saas'] + +exports.migrate = async client => { + const options = { + performCleanup: true, + letUserDoubleCheckInputsFor: 10, + fixPartialInserts: true, + } + await runScript(options) +} + +exports.rollback = async client => {} diff --git a/services/web/migrations/20210727150530_ce_sp_backfill_deleted_docs.js b/services/web/migrations/20210727150530_ce_sp_backfill_deleted_docs.js new file mode 100644 index 0000000000..79c007cf0d --- /dev/null +++ b/services/web/migrations/20210727150530_ce_sp_backfill_deleted_docs.js @@ -0,0 +1,13 @@ +const runScript = require('../scripts/back_fill_doc_name_for_deleted_docs.js') + +exports.tags = ['server-ce', 'server-pro', 'saas'] + +exports.migrate = async client => { + const options = { + performCleanup: true, + letUserDoubleCheckInputsFor: 10, + } + await runScript(options) +} + +exports.rollback = async client => {} diff --git a/services/web/migrations/20210728115327_ce_sp_backfill_dummy_doc_meta.js b/services/web/migrations/20210728115327_ce_sp_backfill_dummy_doc_meta.js new file mode 100644 index 0000000000..37e9664c4d --- /dev/null +++ b/services/web/migrations/20210728115327_ce_sp_backfill_dummy_doc_meta.js @@ -0,0 +1,23 @@ +const runScript = require('../scripts/back_fill_dummy_doc_meta.js') + +exports.tags = ['server-ce', 'server-pro', 'saas'] + +exports.migrate = async client => { + const { db } = client + const [firstProject] = await db.projects + .find() + .sort({ _id: 1 }) + .limit(1) + .toArray() + if (!firstProject) { + return + } + const options = { + firstProjectId: firstProject._id, + performCleanup: true, + letUserDoubleCheckInputsFor: 10, + } + await runScript(options) +} + +exports.rollback = async client => {} diff --git a/services/web/migrations/lib/adapter.js b/services/web/migrations/lib/adapter.js index fa3b11ccb6..a254eac572 100644 --- a/services/web/migrations/lib/adapter.js +++ b/services/web/migrations/lib/adapter.js @@ -7,7 +7,11 @@ const { class Adapter { constructor(params) { - if (!(process.argv.includes('-t') || process.argv.includes('--tags'))) { + if ( + !process.env.SKIP_TAG_CHECK && + !process.argv.includes('create') && + !(process.argv.includes('-t') || process.argv.includes('--tags')) + ) { console.error("ERROR: must pass tags using '-t' or '--tags', exiting") process.exit(1) } diff --git a/services/web/scripts/back_fill_deleted_files.js b/services/web/scripts/back_fill_deleted_files.js index ea7d0e9466..9ae394af4d 100644 --- a/services/web/scripts/back_fill_deleted_files.js +++ b/services/web/scripts/back_fill_deleted_files.js @@ -1,58 +1,62 @@ -const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10 - const { batchedUpdate } = require('./helpers/batchedUpdate') const { promiseMapWithLimit, promisify } = require('../app/src/util/promises') const { db } = require('../app/src/infrastructure/mongodb') const sleep = promisify(setTimeout) +const _ = require('lodash') -const PERFORM_CLEANUP = process.argv.includes('--perform-cleanup') -const FIX_PARTIAL_INSERTS = process.argv.includes('--fix-partial-inserts') -const LET_USER_DOUBLE_CHECK_INPUTS_FOR = parseInt( - process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR || 10 * 1000, - 10 -) +async function main(options) { + if (!options) { + options = {} + } + _.defaults(options, { + writeConcurrency: parseInt(process.env.WRITE_CONCURRENCY, 10) || 10, + performCleanup: process.argv.includes('--perform-cleanup'), + fixPartialInserts: process.argv.includes('--fix-partial-inserts'), + letUserDoubleCheckInputsFor: parseInt( + process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR || 10 * 1000, + 10 + ), + }) -async function main() { - await letUserDoubleCheckInputs() + await letUserDoubleCheckInputs(options) await batchedUpdate( 'projects', // array is not empty ~ array has one item { 'deletedFiles.0': { $exists: true } }, - processBatch, + async (x, projects) => { + await processBatch(x, projects, options) + }, { _id: 1, deletedFiles: 1 } ) } -main() - .then(() => { - process.exit(0) - }) - .catch(error => { - console.error({ error }) - process.exit(1) - }) - -async function processBatch(_, projects) { - await promiseMapWithLimit(WRITE_CONCURRENCY, projects, processProject) +async function processBatch(_, projects, options) { + await promiseMapWithLimit( + options.writeConcurrency, + projects, + async project => { + await processProject(project, options) + } + ) } -async function processProject(project) { - await backFillFiles(project) +async function processProject(project, options) { + await backFillFiles(project, options) - if (PERFORM_CLEANUP) { + if (options.performCleanup) { await cleanupProject(project) } } -async function backFillFiles(project) { +async function backFillFiles(project, options) { const projectId = project._id filterDuplicatesInPlace(project) project.deletedFiles.forEach(file => { file.projectId = projectId }) - if (FIX_PARTIAL_INSERTS) { + if (options.fixPartialInserts) { await fixPartialInserts(project) } else { await db.deletedFiles.insertMany(project.deletedFiles) @@ -98,8 +102,8 @@ async function cleanupProject(project) { ) } -async function letUserDoubleCheckInputs() { - if (PERFORM_CLEANUP) { +async function letUserDoubleCheckInputs(options) { + if (options.performCleanup) { console.error('BACK FILLING AND PERFORMING CLEANUP') } else { console.error( @@ -108,8 +112,21 @@ async function letUserDoubleCheckInputs() { } console.error( 'Waiting for you to double check inputs for', - LET_USER_DOUBLE_CHECK_INPUTS_FOR, + options.letUserDoubleCheckInputsFor, 'ms' ) - await sleep(LET_USER_DOUBLE_CHECK_INPUTS_FOR) + await sleep(options.letUserDoubleCheckInputsFor) +} + +module.exports = main + +if (require.main === module) { + main() + .then(() => { + process.exit(0) + }) + .catch(error => { + console.error({ error }) + process.exit(1) + }) } diff --git a/services/web/scripts/back_fill_doc_name_for_deleted_docs.js b/services/web/scripts/back_fill_doc_name_for_deleted_docs.js index 805699f05b..20e778db29 100644 --- a/services/web/scripts/back_fill_doc_name_for_deleted_docs.js +++ b/services/web/scripts/back_fill_doc_name_for_deleted_docs.js @@ -1,46 +1,50 @@ -const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10 - const { batchedUpdate } = require('./helpers/batchedUpdate') const { promiseMapWithLimit, promisify } = require('../app/src/util/promises') const { db } = require('../app/src/infrastructure/mongodb') const sleep = promisify(setTimeout) +const _ = require('lodash') -const PERFORM_CLEANUP = process.argv.pop() === '--perform-cleanup' -const LET_USER_DOUBLE_CHECK_INPUTS_FOR = parseInt( - process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR || 10 * 1000, - 10 -) +async function main(options) { + if (!options) { + options = {} + } + _.defaults(options, { + writeConcurrency: parseInt(process.env.WRITE_CONCURRENCY, 10) || 10, + performCleanup: process.argv.pop() === '--perform-cleanup', + letUserDoubleCheckInputsFor: parseInt( + process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR || 10 * 1000, + 10 + ), + }) -async function main() { - await letUserDoubleCheckInputs() + await letUserDoubleCheckInputs(options) await batchedUpdate( 'projects', // array is not empty ~ array has one item { 'deletedDocs.0': { $exists: true } }, - processBatch, + async (_collection, projects) => { + await processBatch(_collection, projects, options) + }, { _id: 1, deletedDocs: 1 } ) } -main() - .then(() => { - process.exit(0) - }) - .catch(error => { - console.error({ error }) - process.exit(1) - }) - -async function processBatch(_, projects) { - await promiseMapWithLimit(WRITE_CONCURRENCY, projects, processProject) +async function processBatch(_, projects, options) { + await promiseMapWithLimit( + options.writeConcurrency, + projects, + async project => { + await processProject(project, options) + } + ) } -async function processProject(project) { +async function processProject(project, options) { for (const doc of project.deletedDocs) { await backFillDoc(doc) } - if (PERFORM_CLEANUP) { + if (options.performCleanup) { await cleanupProject(project) } } @@ -57,8 +61,8 @@ async function cleanupProject(project) { ) } -async function letUserDoubleCheckInputs() { - if (PERFORM_CLEANUP) { +async function letUserDoubleCheckInputs(options) { + if (options.performCleanup) { console.error('BACK FILLING AND PERFORMING CLEANUP') } else { console.error( @@ -67,8 +71,21 @@ async function letUserDoubleCheckInputs() { } console.error( 'Waiting for you to double check inputs for', - LET_USER_DOUBLE_CHECK_INPUTS_FOR, + options.letUserDoubleCheckInputsFor, 'ms' ) - await sleep(LET_USER_DOUBLE_CHECK_INPUTS_FOR) + await sleep(options.letUserDoubleCheckInputsFor) +} + +module.exports = main + +if (require.main === module) { + main() + .then(() => { + process.exit(0) + }) + .catch(error => { + console.error({ error }) + process.exit(1) + }) } diff --git a/services/web/scripts/back_fill_dummy_doc_meta.js b/services/web/scripts/back_fill_dummy_doc_meta.js index 3b389390eb..270cf9d2b3 100644 --- a/services/web/scripts/back_fill_dummy_doc_meta.js +++ b/services/web/scripts/back_fill_dummy_doc_meta.js @@ -2,45 +2,54 @@ const { promisify } = require('util') const { ObjectId, ReadPreference } = require('mongodb') const { db, waitForDb } = require('../app/src/infrastructure/mongodb') const sleep = promisify(setTimeout) +const _ = require('lodash') const NOW_IN_S = Date.now() / 1000 const ONE_WEEK_IN_S = 60 * 60 * 24 * 7 const TEN_SECONDS = 10 * 1000 -const CACHE_SIZE = parseInt(process.env.CACHE_SIZE, 10) || 100 -const DRY_RUN = process.env.DRY_RUN === 'true' -if (!process.env.FIRST_PROJECT_ID) { - console.error('Set FIRST_PROJECT_ID and re-run.') - process.exit(1) -} -const FIRST_PROJECT_ID = ObjectId(process.env.FIRST_PROJECT_ID) -const INCREMENT_BY_S = parseInt(process.env.INCREMENT_BY_S, 10) || ONE_WEEK_IN_S -const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 1000 -const STOP_AT_S = parseInt(process.env.STOP_AT_S, 10) || NOW_IN_S -const LET_USER_DOUBLE_CHECK_INPUTS_FOR = - parseInt(process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR, 10) || TEN_SECONDS - const DUMMY_NAME = 'unknown.tex' const DUMMY_TIME = new Date('2021-04-12T00:00:00.000Z') const LRUCache = require('lru-cache') -const deletedProjectsCache = new LRUCache({ - max: CACHE_SIZE, -}) +let deletedProjectsCache = null function getSecondsFromObjectId(id) { return id.getTimestamp().getTime() / 1000 } -async function main() { - await letUserDoubleCheckInputs() +async function main(options) { + if (!options) { + options = {} + } + _.defaults(options, { + dryRun: process.env.DRY_RUN === 'true', + cacheSize: parseInt(process.env.CACHE_SIZE, 10) || 100, + firstProjectId: ObjectId(process.env.FIRST_PROJECT_ID), + incrementByS: parseInt(process.env.INCREMENT_BY_S, 10) || ONE_WEEK_IN_S, + batchSize: parseInt(process.env.BATCH_SIZE, 10) || 1000, + stopAtS: parseInt(process.env.STOP_AT_S, 10) || NOW_IN_S, + letUserDoubleCheckInputsFor: + parseInt(process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR, 10) || TEN_SECONDS, + }) + + if (!options.firstProjectId) { + console.error('Set FIRST_PROJECT_ID and re-run.') + process.exit(1) + } + + deletedProjectsCache = new LRUCache({ + max: options.cacheSize, + }) + + await letUserDoubleCheckInputs(options) await waitForDb() - let startId = FIRST_PROJECT_ID + let startId = options.firstProjectId let nProcessed = 0 - while (getSecondsFromObjectId(startId) <= STOP_AT_S) { - const end = getSecondsFromObjectId(startId) + INCREMENT_BY_S + while (getSecondsFromObjectId(startId) <= options.stopAtS) { + const end = getSecondsFromObjectId(startId) + options.incrementByS let endId = ObjectId.createFromTime(end) const query = { project_id: { @@ -57,16 +66,16 @@ async function main() { const docs = await db.docs .find(query, { readPreference: ReadPreference.SECONDARY }) .project({ _id: 1, project_id: 1 }) - .limit(BATCH_SIZE) + .limit(options.batchSize) .toArray() if (docs.length) { const docIds = docs.map(doc => doc._id) console.log('Back filling dummy meta data for', JSON.stringify(docIds)) - await processBatch(docs) + await processBatch(docs, options) nProcessed += docIds.length - if (docs.length === BATCH_SIZE) { + if (docs.length === options.batchSize) { endId = docs[docs.length - 1].project_id } } @@ -94,7 +103,7 @@ async function getDeletedProject(projectId) { return deletedProject } -async function processBatch(docs) { +async function processBatch(docs, options) { for (const doc of docs) { const { _id: docId, project_id: projectId } = doc const deletedProject = await getDeletedProject(projectId) @@ -119,42 +128,31 @@ async function processBatch(docs) { } else { console.log('Orphaned deleted doc %s (no deletedProjects entry)', docId) } - if (DRY_RUN) continue + if (options.dryRun) continue await db.docs.updateOne({ _id: docId }, { $set: { name, deletedAt } }) } } -async function letUserDoubleCheckInputs() { - console.error( - 'Options:', - JSON.stringify( - { - BATCH_SIZE, - CACHE_SIZE, - DRY_RUN, - FIRST_PROJECT_ID, - INCREMENT_BY_S, - STOP_AT_S, - LET_USER_DOUBLE_CHECK_INPUTS_FOR, - }, - null, - 2 - ) - ) +async function letUserDoubleCheckInputs(options) { + console.error('Options:', JSON.stringify(options, null, 2)) console.error( 'Waiting for you to double check inputs for', - LET_USER_DOUBLE_CHECK_INPUTS_FOR, + options.letUserDoubleCheckInputsFor, 'ms' ) - await sleep(LET_USER_DOUBLE_CHECK_INPUTS_FOR) + await sleep(options.letUserDoubleCheckInputsFor) } -main() - .then(() => { - console.error('Done.') - process.exit(0) - }) - .catch(error => { - console.error({ error }) - process.exit(1) - }) +module.exports = main + +if (require.main === module) { + main() + .then(() => { + console.error('Done.') + process.exit(0) + }) + .catch(error => { + console.error({ error }) + process.exit(1) + }) +} diff --git a/services/web/scripts/mark_migration.js b/services/web/scripts/mark_migration.js new file mode 100644 index 0000000000..dec6d71b4f --- /dev/null +++ b/services/web/scripts/mark_migration.js @@ -0,0 +1,67 @@ +const Adapter = require('../migrations/lib/adapter') +const fs = require('fs').promises +const path = require('path') + +async function main(args) { + if ( + !args || + args.length === 0 || + args.includes('help') || + args.includes('--help') || + args.includes('-h') + ) { + console.log('') + console.log('usage: node ./scripts/mark_migration.js migration state') + console.log('') + console.log(' migration: name of migration file') + console.log(' state: executed | unexecuted') + console.log('') + return + } + + const migration = args[0] + if (!migration) { + throw new Error('Error: migration must be supplied') + } + const state = args[1] + if (!state) { + throw new Error('Error: migration state must be supplied') + } + + try { + await fs.access(path.join(__dirname, '../migrations', `${migration}.js`)) + } catch (err) { + throw new Error( + `Error: migration ${migration} does not exist on disk: ${err}` + ) + } + + console.log(`Marking ${migration} as ${state}`) + + process.env.SKIP_TAG_CHECK = 'true' + const adapter = new Adapter() + await adapter.connect() + switch (state) { + case 'executed': + await adapter.markExecuted(migration) + break + case 'unexecuted': + await adapter.unmarkExecuted(migration) + break + default: + throw new Error(`invalid state "${state}"`) + } + console.log('Done') +} + +if (require.main === module) { + const args = process.argv.slice(2) + main(args) + .then(() => { + process.exit(0) + }) + .catch(err => { + console.error(err) + process.exit(1) + }) +} diff --git a/services/web/test/acceptance/src/BackFillDummyDocMetaTests.js b/services/web/test/acceptance/src/BackFillDummyDocMetaTests.js index b1cc277a25..17b6727dbf 100644 --- a/services/web/test/acceptance/src/BackFillDummyDocMetaTests.js +++ b/services/web/test/acceptance/src/BackFillDummyDocMetaTests.js @@ -188,7 +188,15 @@ describe('BackFillDummyDocMeta', function () { '', ]) expect(stdErr.filter(filterOutput)).to.deep.equal([ - ...`Options: ${JSON.stringify(options, null, 2)}`.split('\n'), + `Options: {`, + ` "dryRun": ${options.DRY_RUN},`, + ` "cacheSize": ${options.CACHE_SIZE},`, + ` "firstProjectId": "${options.FIRST_PROJECT_ID}",`, + ` "incrementByS": ${options.INCREMENT_BY_S},`, + ` "batchSize": ${options.BATCH_SIZE},`, + ` "stopAtS": ${options.STOP_AT_S},`, + ` "letUserDoubleCheckInputsFor": ${options.LET_USER_DOUBLE_CHECK_INPUTS_FOR}`, + '}', 'Waiting for you to double check inputs for 1 ms', `Processed 1 until ${getObjectIdFromDate('2021-04-02T00:00:00.000Z')}`, `Processed 2 until ${getObjectIdFromDate('2021-04-03T00:00:00.000Z')}`,