Merge pull request #12254 from overleaf/jpa-back-fill-doc-rev

[web] add script for back filling rev=1 for old (deleted) docs

GitOrigin-RevId: 62f5b773fb83ddbf9bb202a592ce2e8f33b5e6b2
This commit is contained in:
Jakob Ackermann 2023-03-16 10:33:09 +00:00 committed by Copybot
parent 2d5e76acbc
commit fa5804a3cf
3 changed files with 133 additions and 0 deletions

View file

@ -0,0 +1,9 @@
const runScript = require('../scripts/back_fill_doc_rev')
exports.tags = ['server-ce', 'server-pro']
exports.migrate = async () => {
await runScript(false)
}
exports.rollback = async () => {}

View file

@ -0,0 +1,58 @@
const { batchedUpdate } = require('./helpers/batchedUpdate')
const DRY_RUN = !process.argv.includes('--dry-run=false')
const LOG_EVERY_IN_S = parseInt(process.env.LOG_EVERY_IN_S, 10) || 5
async function main(DRY_RUN) {
let processed = 0
let deleted = 0
let lastLog = 0
function logProgress() {
console.log(`rev missing ${processed} | deleted=true ${deleted}`)
}
await batchedUpdate(
'docs',
{ rev: { $exists: false } },
async (docsCollection, docs) => {
if (!DRY_RUN) {
await docsCollection.updateMany(
{
_id: { $in: docs.map(doc => doc._id) },
rev: { $exists: false },
},
{ $set: { rev: 1 } }
)
}
processed += docs.length
deleted += docs.filter(doc => doc.deleted).length
if (Date.now() - lastLog >= LOG_EVERY_IN_S * 1000) {
logProgress()
lastLog = Date.now()
}
},
{
_id: 1,
deleted: true,
}
)
logProgress()
}
module.exports = main
if (require.main === module) {
main(DRY_RUN)
.then(() => {
console.log('Done.')
process.exit(0)
})
.catch(error => {
console.error({ error })
process.exit(1)
})
}

View file

@ -0,0 +1,66 @@
const { db, ObjectId } = require('../../../app/src/infrastructure/mongodb')
const { promisify } = require('util')
const { exec } = require('child_process')
const logger = require('@overleaf/logger/logging-manager')
const { expect } = require('chai')
describe('BackFillDocRevTests', function () {
const docId1 = ObjectId.createFromTime(1)
const docId2 = ObjectId.createFromTime(2)
const docId3 = ObjectId.createFromTime(3)
beforeEach('insert docs', async function () {
await db.docs.insertMany([
{ _id: docId1, deleted: true },
{ _id: docId2 },
{ _id: docId3, rev: 42 },
])
})
async function runScript(dryRun) {
let result
try {
result = await promisify(exec)(
['node', 'scripts/back_fill_doc_rev', dryRun].join(' ')
)
} catch (error) {
// dump details like exit code, stdErr and stdOut
logger.error({ error }, 'script failed')
throw error
}
const { stdout: stdOut, stderr: stdErr } = result
expect(stdOut).to.include('rev missing 2 | deleted=true 1')
expect(stdErr).to.include(`Completed batch ending ${docId2}`)
}
describe('dry-run=true', function () {
beforeEach('run script', async function () {
await runScript('--dry-run=true')
})
it('should not back fill the rev', async function () {
const docs = await db.docs.find({}, { $sort: { _id: 1 } }).toArray()
expect(docs).to.deep.equal([
{ _id: docId1, deleted: true },
{ _id: docId2 },
{ _id: docId3, rev: 42 },
])
})
})
describe('dry-run=false', function () {
beforeEach('run script', async function () {
await runScript('--dry-run=false')
})
it('should back fill the rev', async function () {
const docs = await db.docs.find({}, { $sort: { _id: 1 } }).toArray()
expect(docs).to.deep.equal([
{ _id: docId1, rev: 1, deleted: true },
{ _id: docId2, rev: 1 },
{ _id: docId3, rev: 42 },
])
})
})
})