mirror of
https://github.com/overleaf/overleaf.git
synced 2025-04-06 21:40:35 +00:00
Merge pull request #3907 from overleaf/jpa-script-back-fill-dummy-doc-meta
[scripts] add a new script for back filling dummy doc meta data GitOrigin-RevId: d3cb0900f08d4d3264cb1e8a0b3f7e8acdd6eb6c
This commit is contained in:
parent
83adfff7a6
commit
70841f4b3b
2 changed files with 353 additions and 0 deletions
139
services/web/scripts/back_fill_dummy_doc_meta.js
Normal file
139
services/web/scripts/back_fill_dummy_doc_meta.js
Normal file
|
@ -0,0 +1,139 @@
|
|||
const { promisify } = require('util')
|
||||
const { ObjectId, ReadPreference } = require('mongodb')
|
||||
const { db, waitForDb } = require('../app/src/infrastructure/mongodb')
|
||||
const sleep = promisify(setTimeout)
|
||||
|
||||
const NOW_IN_S = Date.now() / 1000
|
||||
const ONE_WEEK_IN_S = 1000 * 60 * 60 * 24 * 7
|
||||
const TEN_SECONDS = 10 * 1000
|
||||
|
||||
const DRY_RUN = process.env.DRY_RUN === 'true'
|
||||
if (!process.env.FIRST_PROJECT_ID) {
|
||||
console.error('Set FIRST_PROJECT_ID and re-run.')
|
||||
process.exit(1)
|
||||
}
|
||||
const FIRST_PROJECT_ID = ObjectId(process.env.FIRST_PROJECT_ID)
|
||||
const INCREMENT_BY_S = parseInt(process.env.INCREMENT_BY_S, 10) || ONE_WEEK_IN_S
|
||||
const STOP_AT_S = parseInt(process.env.STOP_AT_S, 10) || NOW_IN_S
|
||||
const LET_USER_DOUBLE_CHECK_INPUTS_FOR =
|
||||
parseInt(process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR, 10) || TEN_SECONDS
|
||||
|
||||
const DUMMY_NAME = 'unknown.tex'
|
||||
const DUMMY_TIME = new Date('2021-04-12T00:00:00.000Z')
|
||||
|
||||
function getSecondsFromObjectId(id) {
|
||||
return id.getTimestamp().getTime() / 1000
|
||||
}
|
||||
|
||||
async function main() {
|
||||
await letUserDoubleCheckInputs()
|
||||
await waitForDb()
|
||||
|
||||
let start = getSecondsFromObjectId(FIRST_PROJECT_ID)
|
||||
// include the FIRST_PROJECT_ID in the first batch
|
||||
start -= 1
|
||||
|
||||
let nProcessed = 0
|
||||
while (start < STOP_AT_S) {
|
||||
const end = start + INCREMENT_BY_S
|
||||
const startId = ObjectId.createFromTime(start)
|
||||
const endId = ObjectId.createFromTime(end)
|
||||
const query = {
|
||||
project_id: {
|
||||
// do not include edge
|
||||
$gt: startId,
|
||||
// include edge
|
||||
$lte: endId
|
||||
},
|
||||
deleted: true,
|
||||
name: {
|
||||
$exists: false
|
||||
}
|
||||
}
|
||||
const docs = await db.docs
|
||||
.find(query, { readPreference: ReadPreference.SECONDARY })
|
||||
.project({ _id: 1, project_id: 1 })
|
||||
.toArray()
|
||||
|
||||
if (docs.length) {
|
||||
const docIds = docs.map(doc => doc._id)
|
||||
console.log('Back filling dummy meta data for', JSON.stringify(docIds))
|
||||
await processBatch(docs)
|
||||
nProcessed += docIds.length
|
||||
}
|
||||
console.error('Processed %d until %s', nProcessed, endId)
|
||||
|
||||
start = end
|
||||
}
|
||||
}
|
||||
|
||||
async function processBatch(docs) {
|
||||
for (const doc of docs) {
|
||||
const { _id: docId, project_id: projectId } = doc
|
||||
const deletedProject = await db.deletedProjects.findOne(
|
||||
{ 'deleterData.deletedProjectId': projectId },
|
||||
{
|
||||
projection: {
|
||||
_id: 1,
|
||||
'project.deletedDocs': 1
|
||||
}
|
||||
}
|
||||
)
|
||||
let name = DUMMY_NAME
|
||||
let deletedAt = DUMMY_TIME
|
||||
if (deletedProject) {
|
||||
const project = deletedProject.project
|
||||
if (project) {
|
||||
const deletedDoc =
|
||||
project.deletedDocs &&
|
||||
project.deletedDocs.find(deletedDoc => docId.equals(deletedDoc._id))
|
||||
if (deletedDoc) {
|
||||
console.log('Found deletedDoc for %s', docId)
|
||||
name = deletedDoc.name
|
||||
deletedAt = deletedDoc.deletedAt
|
||||
} else {
|
||||
console.log('Missing deletedDoc for %s', docId)
|
||||
}
|
||||
} else {
|
||||
console.log('Orphaned deleted doc %s (failed hard deletion)', docId)
|
||||
}
|
||||
} else {
|
||||
console.log('Orphaned deleted doc %s (no deletedProjects entry)', docId)
|
||||
}
|
||||
if (DRY_RUN) return
|
||||
await db.docs.updateOne({ _id: docId }, { $set: { name, deletedAt } })
|
||||
}
|
||||
}
|
||||
|
||||
async function letUserDoubleCheckInputs() {
|
||||
console.error(
|
||||
'Options:',
|
||||
JSON.stringify(
|
||||
{
|
||||
DRY_RUN,
|
||||
FIRST_PROJECT_ID,
|
||||
INCREMENT_BY_S,
|
||||
STOP_AT_S,
|
||||
LET_USER_DOUBLE_CHECK_INPUTS_FOR
|
||||
},
|
||||
null,
|
||||
2
|
||||
)
|
||||
)
|
||||
console.error(
|
||||
'Waiting for you to double check inputs for',
|
||||
LET_USER_DOUBLE_CHECK_INPUTS_FOR,
|
||||
'ms'
|
||||
)
|
||||
await sleep(LET_USER_DOUBLE_CHECK_INPUTS_FOR)
|
||||
}
|
||||
|
||||
main()
|
||||
.then(() => {
|
||||
console.error('Done.')
|
||||
process.exit(0)
|
||||
})
|
||||
.catch(error => {
|
||||
console.error({ error })
|
||||
process.exit(1)
|
||||
})
|
214
services/web/test/acceptance/src/BackFillDummyDocMetaTests.js
Normal file
214
services/web/test/acceptance/src/BackFillDummyDocMetaTests.js
Normal file
|
@ -0,0 +1,214 @@
|
|||
const { exec } = require('child_process')
|
||||
const { promisify } = require('util')
|
||||
const { expect } = require('chai')
|
||||
const logger = require('logger-sharelatex')
|
||||
const { db, ObjectId } = require('../../../app/src/infrastructure/mongodb')
|
||||
|
||||
const DUMMY_NAME = 'unknown.tex'
|
||||
const DUMMY_TIME = new Date('2021-04-12T00:00:00.000Z')
|
||||
const ONE_DAY_IN_S = 60 * 60 * 24
|
||||
|
||||
function getObjectIdFromDate(date) {
|
||||
const seconds = new Date(date).getTime() / 1000
|
||||
return ObjectId.createFromTime(seconds)
|
||||
}
|
||||
|
||||
describe('BackFillDummyDocMeta', function () {
|
||||
let docId1, docId2, docId3, docId4, docId5, docId6
|
||||
let projectId1, projectId2, projectId3, projectId4, projectId5, projectId6
|
||||
let stopAtSeconds
|
||||
beforeEach('create docs', async function () {
|
||||
docId1 = getObjectIdFromDate('2021-04-01T00:00:00.000Z')
|
||||
docId2 = getObjectIdFromDate('2021-04-11T00:00:00.000Z')
|
||||
docId3 = getObjectIdFromDate('2021-04-12T00:00:00.000Z')
|
||||
docId4 = getObjectIdFromDate('2021-04-13T00:00:00.000Z')
|
||||
docId5 = getObjectIdFromDate('2021-04-14T00:00:00.000Z')
|
||||
docId6 = getObjectIdFromDate('2021-04-15T00:00:00.000Z')
|
||||
|
||||
projectId1 = getObjectIdFromDate('2021-04-01T00:00:00.000Z')
|
||||
projectId2 = getObjectIdFromDate('2021-04-11T00:00:00.000Z')
|
||||
projectId3 = getObjectIdFromDate('2021-04-12T00:00:00.000Z')
|
||||
projectId4 = getObjectIdFromDate('2021-04-13T00:00:00.000Z')
|
||||
projectId5 = getObjectIdFromDate('2021-04-14T00:00:00.000Z')
|
||||
projectId6 = getObjectIdFromDate('2021-04-15T00:00:00.000Z')
|
||||
|
||||
stopAtSeconds = new Date('2021-04-16T00:00:00.000Z').getTime() / 1000
|
||||
})
|
||||
const now = new Date()
|
||||
beforeEach('insert doc stubs into docs collection', async function () {
|
||||
await db.docs.insertMany([
|
||||
// incomplete, without deletedDocs context
|
||||
{ _id: docId1, project_id: projectId1, deleted: true },
|
||||
{ _id: docId2, project_id: projectId2, deleted: true },
|
||||
{ _id: docId3, project_id: projectId3, deleted: true },
|
||||
// incomplete, with deletedDocs context
|
||||
{ _id: docId4, project_id: projectId4, deleted: true },
|
||||
// complete
|
||||
{
|
||||
_id: docId5,
|
||||
project_id: projectId5,
|
||||
deleted: true,
|
||||
name: 'foo.tex',
|
||||
deletedAt: now
|
||||
},
|
||||
// not deleted
|
||||
{ _id: docId6, project_id: projectId6 }
|
||||
])
|
||||
})
|
||||
beforeEach('insert deleted project context', async function () {
|
||||
await db.deletedProjects.insertMany([
|
||||
// projectId1 has no entry
|
||||
|
||||
// hard-deleted
|
||||
{ deleterData: { deletedProjectId: projectId2 } },
|
||||
// soft-deleted, no entry for doc
|
||||
{
|
||||
deleterData: { deletedProjectId: projectId3 },
|
||||
project: { deletedDocs: [] }
|
||||
},
|
||||
// soft-deleted, has entry for doc
|
||||
{
|
||||
deleterData: { deletedProjectId: projectId4 },
|
||||
project: {
|
||||
deletedDocs: [{ _id: docId4, name: 'main.tex', deletedAt: now }]
|
||||
}
|
||||
}
|
||||
])
|
||||
})
|
||||
|
||||
let options
|
||||
async function runScript(dryRun) {
|
||||
options = {
|
||||
DRY_RUN: dryRun,
|
||||
FIRST_PROJECT_ID: projectId1.toString(),
|
||||
INCREMENT_BY_S: ONE_DAY_IN_S,
|
||||
STOP_AT_S: stopAtSeconds,
|
||||
// start right away
|
||||
LET_USER_DOUBLE_CHECK_INPUTS_FOR: 1
|
||||
}
|
||||
let result
|
||||
try {
|
||||
result = await promisify(exec)(
|
||||
Object.entries(options)
|
||||
.map(([key, value]) => `${key}=${value}`)
|
||||
.concat(['node', 'scripts/back_fill_dummy_doc_meta.js'])
|
||||
.join(' ')
|
||||
)
|
||||
} catch (error) {
|
||||
// dump details like exit code, stdErr and stdOut
|
||||
logger.error({ error }, 'script failed')
|
||||
throw error
|
||||
}
|
||||
let { stderr: stdErr, stdout: stdOut } = result
|
||||
stdErr = stdErr.split('\n')
|
||||
stdOut = stdOut
|
||||
.split('\n')
|
||||
.filter(line => !line.includes('Using settings from'))
|
||||
|
||||
expect(stdOut).to.deep.equal([
|
||||
`Back filling dummy meta data for ["${docId1}"]`,
|
||||
`Orphaned deleted doc ${docId1} (no deletedProjects entry)`,
|
||||
`Back filling dummy meta data for ["${docId2}"]`,
|
||||
`Orphaned deleted doc ${docId2} (failed hard deletion)`,
|
||||
`Back filling dummy meta data for ["${docId3}"]`,
|
||||
`Missing deletedDoc for ${docId3}`,
|
||||
`Back filling dummy meta data for ["${docId4}"]`,
|
||||
`Found deletedDoc for ${docId4}`,
|
||||
''
|
||||
])
|
||||
expect(stdErr).to.deep.equal([
|
||||
...`Options: ${JSON.stringify(options, null, 2)}`.split('\n'),
|
||||
'Waiting for you to double check inputs for 1 ms',
|
||||
`Processed 1 until ${getObjectIdFromDate('2021-04-01T23:59:59.000Z')}`,
|
||||
`Processed 1 until ${getObjectIdFromDate('2021-04-02T23:59:59.000Z')}`,
|
||||
`Processed 1 until ${getObjectIdFromDate('2021-04-03T23:59:59.000Z')}`,
|
||||
`Processed 1 until ${getObjectIdFromDate('2021-04-04T23:59:59.000Z')}`,
|
||||
`Processed 1 until ${getObjectIdFromDate('2021-04-05T23:59:59.000Z')}`,
|
||||
`Processed 1 until ${getObjectIdFromDate('2021-04-06T23:59:59.000Z')}`,
|
||||
`Processed 1 until ${getObjectIdFromDate('2021-04-07T23:59:59.000Z')}`,
|
||||
`Processed 1 until ${getObjectIdFromDate('2021-04-08T23:59:59.000Z')}`,
|
||||
`Processed 1 until ${getObjectIdFromDate('2021-04-09T23:59:59.000Z')}`,
|
||||
`Processed 1 until ${getObjectIdFromDate('2021-04-10T23:59:59.000Z')}`,
|
||||
`Processed 2 until ${getObjectIdFromDate('2021-04-11T23:59:59.000Z')}`,
|
||||
`Processed 3 until ${getObjectIdFromDate('2021-04-12T23:59:59.000Z')}`,
|
||||
`Processed 4 until ${getObjectIdFromDate('2021-04-13T23:59:59.000Z')}`,
|
||||
`Processed 4 until ${getObjectIdFromDate('2021-04-14T23:59:59.000Z')}`,
|
||||
`Processed 4 until ${getObjectIdFromDate('2021-04-15T23:59:59.000Z')}`,
|
||||
`Processed 4 until ${getObjectIdFromDate('2021-04-16T23:59:59.000Z')}`,
|
||||
'Done.',
|
||||
''
|
||||
])
|
||||
}
|
||||
|
||||
describe('DRY_RUN=true', function () {
|
||||
beforeEach('run script', async function () {
|
||||
await runScript(true)
|
||||
})
|
||||
|
||||
it('should leave docs as is', async function () {
|
||||
const docs = await db.docs.find({}).toArray()
|
||||
expect(docs).to.deep.equal([
|
||||
{ _id: docId1, project_id: projectId1, deleted: true },
|
||||
{ _id: docId2, project_id: projectId2, deleted: true },
|
||||
{ _id: docId3, project_id: projectId3, deleted: true },
|
||||
{ _id: docId4, project_id: projectId4, deleted: true },
|
||||
{
|
||||
_id: docId5,
|
||||
project_id: projectId5,
|
||||
deleted: true,
|
||||
name: 'foo.tex',
|
||||
deletedAt: now
|
||||
},
|
||||
{ _id: docId6, project_id: projectId6 }
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('DRY_RUN=false', function () {
|
||||
beforeEach('run script', async function () {
|
||||
await runScript(false)
|
||||
})
|
||||
|
||||
it('should back fill name and deletedAt dates into broken docs', async function () {
|
||||
const docs = await db.docs.find({}).toArray()
|
||||
expect(docs).to.deep.equal([
|
||||
{
|
||||
_id: docId1,
|
||||
project_id: projectId1,
|
||||
deleted: true,
|
||||
name: DUMMY_NAME,
|
||||
deletedAt: DUMMY_TIME
|
||||
},
|
||||
{
|
||||
_id: docId2,
|
||||
project_id: projectId2,
|
||||
deleted: true,
|
||||
name: DUMMY_NAME,
|
||||
deletedAt: DUMMY_TIME
|
||||
},
|
||||
{
|
||||
_id: docId3,
|
||||
project_id: projectId3,
|
||||
deleted: true,
|
||||
name: DUMMY_NAME,
|
||||
deletedAt: DUMMY_TIME
|
||||
},
|
||||
{
|
||||
_id: docId4,
|
||||
project_id: projectId4,
|
||||
deleted: true,
|
||||
name: 'main.tex',
|
||||
deletedAt: now
|
||||
},
|
||||
{
|
||||
_id: docId5,
|
||||
project_id: projectId5,
|
||||
deleted: true,
|
||||
name: 'foo.tex',
|
||||
deletedAt: now
|
||||
},
|
||||
{ _id: docId6, project_id: projectId6 }
|
||||
])
|
||||
})
|
||||
})
|
||||
})
|
Loading…
Add table
Reference in a new issue