Merge pull request #4352 from overleaf/sk-add-ce-sp-migration-deleted-files

Add migrations for CE/SP, run scripts for deleted files and docs

GitOrigin-RevId: e425a8d929a4aefdc6f61cff636fb5a1b4c67dce
This commit is contained in:
June Kelly 2021-08-04 10:34:41 +01:00 committed by Copybot
parent 397a3d97df
commit 99fe2eca51
9 changed files with 275 additions and 114 deletions

View file

@ -0,0 +1,14 @@
const runScript = require('../scripts/back_fill_deleted_files.js')
exports.tags = ['server-ce', 'server-pro', 'saas']
exports.migrate = async client => {
const options = {
performCleanup: true,
letUserDoubleCheckInputsFor: 10,
fixPartialInserts: true,
}
await runScript(options)
}
exports.rollback = async client => {}

View file

@ -0,0 +1,13 @@
const runScript = require('../scripts/back_fill_doc_name_for_deleted_docs.js')
exports.tags = ['server-ce', 'server-pro', 'saas']
exports.migrate = async client => {
const options = {
performCleanup: true,
letUserDoubleCheckInputsFor: 10,
}
await runScript(options)
}
exports.rollback = async client => {}

View file

@ -0,0 +1,23 @@
const runScript = require('../scripts/back_fill_dummy_doc_meta.js')
exports.tags = ['server-ce', 'server-pro', 'saas']
exports.migrate = async client => {
const { db } = client
const [firstProject] = await db.projects
.find()
.sort({ _id: 1 })
.limit(1)
.toArray()
if (!firstProject) {
return
}
const options = {
firstProjectId: firstProject._id,
performCleanup: true,
letUserDoubleCheckInputsFor: 10,
}
await runScript(options)
}
exports.rollback = async client => {}

View file

@ -7,7 +7,11 @@ const {
class Adapter {
constructor(params) {
if (!(process.argv.includes('-t') || process.argv.includes('--tags'))) {
if (
!process.env.SKIP_TAG_CHECK &&
!process.argv.includes('create') &&
!(process.argv.includes('-t') || process.argv.includes('--tags'))
) {
console.error("ERROR: must pass tags using '-t' or '--tags', exiting")
process.exit(1)
}

View file

@ -1,58 +1,62 @@
const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10
const { batchedUpdate } = require('./helpers/batchedUpdate')
const { promiseMapWithLimit, promisify } = require('../app/src/util/promises')
const { db } = require('../app/src/infrastructure/mongodb')
const sleep = promisify(setTimeout)
const _ = require('lodash')
const PERFORM_CLEANUP = process.argv.includes('--perform-cleanup')
const FIX_PARTIAL_INSERTS = process.argv.includes('--fix-partial-inserts')
const LET_USER_DOUBLE_CHECK_INPUTS_FOR = parseInt(
async function main(options) {
if (!options) {
options = {}
}
_.defaults(options, {
writeConcurrency: parseInt(process.env.WRITE_CONCURRENCY, 10) || 10,
performCleanup: process.argv.includes('--perform-cleanup'),
fixPartialInserts: process.argv.includes('--fix-partial-inserts'),
letUserDoubleCheckInputsFor: parseInt(
process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR || 10 * 1000,
10
)
),
})
async function main() {
await letUserDoubleCheckInputs()
await letUserDoubleCheckInputs(options)
await batchedUpdate(
'projects',
// array is not empty ~ array has one item
{ 'deletedFiles.0': { $exists: true } },
processBatch,
async (x, projects) => {
await processBatch(x, projects, options)
},
{ _id: 1, deletedFiles: 1 }
)
}
main()
.then(() => {
process.exit(0)
})
.catch(error => {
console.error({ error })
process.exit(1)
})
async function processBatch(_, projects) {
await promiseMapWithLimit(WRITE_CONCURRENCY, projects, processProject)
async function processBatch(_, projects, options) {
await promiseMapWithLimit(
options.writeConcurrency,
projects,
async project => {
await processProject(project, options)
}
)
}
async function processProject(project) {
await backFillFiles(project)
async function processProject(project, options) {
await backFillFiles(project, options)
if (PERFORM_CLEANUP) {
if (options.performCleanup) {
await cleanupProject(project)
}
}
async function backFillFiles(project) {
async function backFillFiles(project, options) {
const projectId = project._id
filterDuplicatesInPlace(project)
project.deletedFiles.forEach(file => {
file.projectId = projectId
})
if (FIX_PARTIAL_INSERTS) {
if (options.fixPartialInserts) {
await fixPartialInserts(project)
} else {
await db.deletedFiles.insertMany(project.deletedFiles)
@ -98,8 +102,8 @@ async function cleanupProject(project) {
)
}
async function letUserDoubleCheckInputs() {
if (PERFORM_CLEANUP) {
async function letUserDoubleCheckInputs(options) {
if (options.performCleanup) {
console.error('BACK FILLING AND PERFORMING CLEANUP')
} else {
console.error(
@ -108,8 +112,21 @@ async function letUserDoubleCheckInputs() {
}
console.error(
'Waiting for you to double check inputs for',
LET_USER_DOUBLE_CHECK_INPUTS_FOR,
options.letUserDoubleCheckInputsFor,
'ms'
)
await sleep(LET_USER_DOUBLE_CHECK_INPUTS_FOR)
await sleep(options.letUserDoubleCheckInputsFor)
}
module.exports = main
if (require.main === module) {
main()
.then(() => {
process.exit(0)
})
.catch(error => {
console.error({ error })
process.exit(1)
})
}

View file

@ -1,46 +1,50 @@
const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10
const { batchedUpdate } = require('./helpers/batchedUpdate')
const { promiseMapWithLimit, promisify } = require('../app/src/util/promises')
const { db } = require('../app/src/infrastructure/mongodb')
const sleep = promisify(setTimeout)
const _ = require('lodash')
const PERFORM_CLEANUP = process.argv.pop() === '--perform-cleanup'
const LET_USER_DOUBLE_CHECK_INPUTS_FOR = parseInt(
async function main(options) {
if (!options) {
options = {}
}
_.defaults(options, {
writeConcurrency: parseInt(process.env.WRITE_CONCURRENCY, 10) || 10,
performCleanup: process.argv.pop() === '--perform-cleanup',
letUserDoubleCheckInputsFor: parseInt(
process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR || 10 * 1000,
10
)
),
})
async function main() {
await letUserDoubleCheckInputs()
await letUserDoubleCheckInputs(options)
await batchedUpdate(
'projects',
// array is not empty ~ array has one item
{ 'deletedDocs.0': { $exists: true } },
processBatch,
async (_collection, projects) => {
await processBatch(_collection, projects, options)
},
{ _id: 1, deletedDocs: 1 }
)
}
main()
.then(() => {
process.exit(0)
})
.catch(error => {
console.error({ error })
process.exit(1)
})
async function processBatch(_, projects) {
await promiseMapWithLimit(WRITE_CONCURRENCY, projects, processProject)
async function processBatch(_, projects, options) {
await promiseMapWithLimit(
options.writeConcurrency,
projects,
async project => {
await processProject(project, options)
}
)
}
async function processProject(project) {
async function processProject(project, options) {
for (const doc of project.deletedDocs) {
await backFillDoc(doc)
}
if (PERFORM_CLEANUP) {
if (options.performCleanup) {
await cleanupProject(project)
}
}
@ -57,8 +61,8 @@ async function cleanupProject(project) {
)
}
async function letUserDoubleCheckInputs() {
if (PERFORM_CLEANUP) {
async function letUserDoubleCheckInputs(options) {
if (options.performCleanup) {
console.error('BACK FILLING AND PERFORMING CLEANUP')
} else {
console.error(
@ -67,8 +71,21 @@ async function letUserDoubleCheckInputs() {
}
console.error(
'Waiting for you to double check inputs for',
LET_USER_DOUBLE_CHECK_INPUTS_FOR,
options.letUserDoubleCheckInputsFor,
'ms'
)
await sleep(LET_USER_DOUBLE_CHECK_INPUTS_FOR)
await sleep(options.letUserDoubleCheckInputsFor)
}
module.exports = main
if (require.main === module) {
main()
.then(() => {
process.exit(0)
})
.catch(error => {
console.error({ error })
process.exit(1)
})
}

View file

@ -2,45 +2,54 @@ const { promisify } = require('util')
const { ObjectId, ReadPreference } = require('mongodb')
const { db, waitForDb } = require('../app/src/infrastructure/mongodb')
const sleep = promisify(setTimeout)
const _ = require('lodash')
const NOW_IN_S = Date.now() / 1000
const ONE_WEEK_IN_S = 60 * 60 * 24 * 7
const TEN_SECONDS = 10 * 1000
const CACHE_SIZE = parseInt(process.env.CACHE_SIZE, 10) || 100
const DRY_RUN = process.env.DRY_RUN === 'true'
if (!process.env.FIRST_PROJECT_ID) {
console.error('Set FIRST_PROJECT_ID and re-run.')
process.exit(1)
}
const FIRST_PROJECT_ID = ObjectId(process.env.FIRST_PROJECT_ID)
const INCREMENT_BY_S = parseInt(process.env.INCREMENT_BY_S, 10) || ONE_WEEK_IN_S
const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 1000
const STOP_AT_S = parseInt(process.env.STOP_AT_S, 10) || NOW_IN_S
const LET_USER_DOUBLE_CHECK_INPUTS_FOR =
parseInt(process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR, 10) || TEN_SECONDS
const DUMMY_NAME = 'unknown.tex'
const DUMMY_TIME = new Date('2021-04-12T00:00:00.000Z')
const LRUCache = require('lru-cache')
const deletedProjectsCache = new LRUCache({
max: CACHE_SIZE,
})
let deletedProjectsCache = null
function getSecondsFromObjectId(id) {
return id.getTimestamp().getTime() / 1000
}
async function main() {
await letUserDoubleCheckInputs()
async function main(options) {
if (!options) {
options = {}
}
_.defaults(options, {
dryRun: process.env.DRY_RUN === 'true',
cacheSize: parseInt(process.env.CACHE_SIZE, 10) || 100,
firstProjectId: ObjectId(process.env.FIRST_PROJECT_ID),
incrementByS: parseInt(process.env.INCREMENT_BY_S, 10) || ONE_WEEK_IN_S,
batchSize: parseInt(process.env.BATCH_SIZE, 10) || 1000,
stopAtS: parseInt(process.env.STOP_AT_S, 10) || NOW_IN_S,
letUserDoubleCheckInputsFor:
parseInt(process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR, 10) || TEN_SECONDS,
})
if (!options.firstProjectId) {
console.error('Set FIRST_PROJECT_ID and re-run.')
process.exit(1)
}
deletedProjectsCache = new LRUCache({
max: options.cacheSize,
})
await letUserDoubleCheckInputs(options)
await waitForDb()
let startId = FIRST_PROJECT_ID
let startId = options.firstProjectId
let nProcessed = 0
while (getSecondsFromObjectId(startId) <= STOP_AT_S) {
const end = getSecondsFromObjectId(startId) + INCREMENT_BY_S
while (getSecondsFromObjectId(startId) <= options.stopAtS) {
const end = getSecondsFromObjectId(startId) + options.incrementByS
let endId = ObjectId.createFromTime(end)
const query = {
project_id: {
@ -57,16 +66,16 @@ async function main() {
const docs = await db.docs
.find(query, { readPreference: ReadPreference.SECONDARY })
.project({ _id: 1, project_id: 1 })
.limit(BATCH_SIZE)
.limit(options.batchSize)
.toArray()
if (docs.length) {
const docIds = docs.map(doc => doc._id)
console.log('Back filling dummy meta data for', JSON.stringify(docIds))
await processBatch(docs)
await processBatch(docs, options)
nProcessed += docIds.length
if (docs.length === BATCH_SIZE) {
if (docs.length === options.batchSize) {
endId = docs[docs.length - 1].project_id
}
}
@ -94,7 +103,7 @@ async function getDeletedProject(projectId) {
return deletedProject
}
async function processBatch(docs) {
async function processBatch(docs, options) {
for (const doc of docs) {
const { _id: docId, project_id: projectId } = doc
const deletedProject = await getDeletedProject(projectId)
@ -119,37 +128,25 @@ async function processBatch(docs) {
} else {
console.log('Orphaned deleted doc %s (no deletedProjects entry)', docId)
}
if (DRY_RUN) continue
if (options.dryRun) continue
await db.docs.updateOne({ _id: docId }, { $set: { name, deletedAt } })
}
}
async function letUserDoubleCheckInputs() {
console.error(
'Options:',
JSON.stringify(
{
BATCH_SIZE,
CACHE_SIZE,
DRY_RUN,
FIRST_PROJECT_ID,
INCREMENT_BY_S,
STOP_AT_S,
LET_USER_DOUBLE_CHECK_INPUTS_FOR,
},
null,
2
)
)
async function letUserDoubleCheckInputs(options) {
console.error('Options:', JSON.stringify(options, null, 2))
console.error(
'Waiting for you to double check inputs for',
LET_USER_DOUBLE_CHECK_INPUTS_FOR,
options.letUserDoubleCheckInputsFor,
'ms'
)
await sleep(LET_USER_DOUBLE_CHECK_INPUTS_FOR)
await sleep(options.letUserDoubleCheckInputsFor)
}
main()
module.exports = main
if (require.main === module) {
main()
.then(() => {
console.error('Done.')
process.exit(0)
@ -158,3 +155,4 @@ main()
console.error({ error })
process.exit(1)
})
}

View file

@ -0,0 +1,67 @@
const Adapter = require('../migrations/lib/adapter')
const fs = require('fs').promises
const path = require('path')
async function main(args) {
if (
!args ||
args.length === 0 ||
args.includes('help') ||
args.includes('--help') ||
args.includes('-h')
) {
console.log('')
console.log('usage: node ./scripts/mark_migration.js migration state')
console.log('')
console.log(' migration: name of migration file')
console.log(' state: executed | unexecuted')
console.log('')
return
}
const migration = args[0]
if (!migration) {
throw new Error('Error: migration must be supplied')
}
const state = args[1]
if (!state) {
throw new Error('Error: migration state must be supplied')
}
try {
await fs.access(path.join(__dirname, '../migrations', `${migration}.js`))
} catch (err) {
throw new Error(
`Error: migration ${migration} does not exist on disk: ${err}`
)
}
console.log(`Marking ${migration} as ${state}`)
process.env.SKIP_TAG_CHECK = 'true'
const adapter = new Adapter()
await adapter.connect()
switch (state) {
case 'executed':
await adapter.markExecuted(migration)
break
case 'unexecuted':
await adapter.unmarkExecuted(migration)
break
default:
throw new Error(`invalid state "${state}"`)
}
console.log('Done')
}
if (require.main === module) {
const args = process.argv.slice(2)
main(args)
.then(() => {
process.exit(0)
})
.catch(err => {
console.error(err)
process.exit(1)
})
}

View file

@ -188,7 +188,15 @@ describe('BackFillDummyDocMeta', function () {
'',
])
expect(stdErr.filter(filterOutput)).to.deep.equal([
...`Options: ${JSON.stringify(options, null, 2)}`.split('\n'),
`Options: {`,
` "dryRun": ${options.DRY_RUN},`,
` "cacheSize": ${options.CACHE_SIZE},`,
` "firstProjectId": "${options.FIRST_PROJECT_ID}",`,
` "incrementByS": ${options.INCREMENT_BY_S},`,
` "batchSize": ${options.BATCH_SIZE},`,
` "stopAtS": ${options.STOP_AT_S},`,
` "letUserDoubleCheckInputsFor": ${options.LET_USER_DOUBLE_CHECK_INPUTS_FOR}`,
'}',
'Waiting for you to double check inputs for 1 ms',
`Processed 1 until ${getObjectIdFromDate('2021-04-02T00:00:00.000Z')}`,
`Processed 2 until ${getObjectIdFromDate('2021-04-03T00:00:00.000Z')}`,