2024-10-18 07:04:57 -04:00
|
|
|
import DocstoreManager from '../app/src/Features/Docstore/DocstoreManager.js'
|
|
|
|
import { promisify } from 'util'
|
|
|
|
import mongodb from 'mongodb-legacy'
|
|
|
|
import {
|
2023-04-18 07:15:34 -04:00
|
|
|
db,
|
|
|
|
READ_PREFERENCE_PRIMARY,
|
|
|
|
READ_PREFERENCE_SECONDARY,
|
2024-10-18 07:04:57 -04:00
|
|
|
} from '../app/src/infrastructure/mongodb.js'
|
|
|
|
import { promiseMapWithLimit } from '@overleaf/promise-utils'
|
|
|
|
import DeleteOrphanedDataHelper from './delete_orphaned_data_helper.mjs'
|
|
|
|
|
|
|
|
const { ObjectId } = mongodb
|
2021-04-28 04:48:15 -04:00
|
|
|
const sleep = promisify(setTimeout)
|
2024-10-18 07:04:57 -04:00
|
|
|
const { getHardDeletedProjectIds } = DeleteOrphanedDataHelper
|
2021-04-28 04:48:15 -04:00
|
|
|
|
|
|
|
const NOW_IN_S = Date.now() / 1000
|
|
|
|
const ONE_WEEK_IN_S = 60 * 60 * 24 * 7
|
|
|
|
const TEN_SECONDS = 10 * 1000
|
|
|
|
|
|
|
|
const DRY_RUN = process.env.DRY_RUN === 'true'
|
|
|
|
if (!process.env.BATCH_LAST_ID) {
|
|
|
|
console.error('Set BATCH_LAST_ID and re-run.')
|
|
|
|
process.exit(1)
|
|
|
|
}
|
2023-12-18 05:54:01 -05:00
|
|
|
const BATCH_LAST_ID = new ObjectId(process.env.BATCH_LAST_ID)
|
2021-04-28 04:48:15 -04:00
|
|
|
const INCREMENT_BY_S = parseInt(process.env.INCREMENT_BY_S, 10) || ONE_WEEK_IN_S
|
|
|
|
const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 1000
|
|
|
|
const READ_CONCURRENCY_SECONDARY =
|
|
|
|
parseInt(process.env.READ_CONCURRENCY_SECONDARY, 10) || 1000
|
|
|
|
const READ_CONCURRENCY_PRIMARY =
|
|
|
|
parseInt(process.env.READ_CONCURRENCY_PRIMARY, 10) || 500
|
|
|
|
const STOP_AT_S = parseInt(process.env.STOP_AT_S, 10) || NOW_IN_S
|
|
|
|
const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10
|
|
|
|
const LET_USER_DOUBLE_CHECK_INPUTS_FOR =
|
|
|
|
parseInt(process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR, 10) || TEN_SECONDS
|
|
|
|
|
|
|
|
function getSecondsFromObjectId(id) {
|
|
|
|
return id.getTimestamp().getTime() / 1000
|
|
|
|
}
|
|
|
|
|
|
|
|
async function main() {
|
|
|
|
await letUserDoubleCheckInputs()
|
|
|
|
let lowerProjectId = BATCH_LAST_ID
|
|
|
|
|
|
|
|
let nProjectsProcessedTotal = 0
|
|
|
|
let nProjectsWithOrphanedDocsTotal = 0
|
|
|
|
let nDeletedDocsTotal = 0
|
|
|
|
while (getSecondsFromObjectId(lowerProjectId) <= STOP_AT_S) {
|
|
|
|
const upperTime = getSecondsFromObjectId(lowerProjectId) + INCREMENT_BY_S
|
|
|
|
let upperProjectId = ObjectId.createFromTime(upperTime)
|
|
|
|
const query = {
|
|
|
|
project_id: {
|
|
|
|
// exclude edge
|
|
|
|
$gt: lowerProjectId,
|
|
|
|
// include edge
|
|
|
|
$lte: upperProjectId,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
const docs = await db.docs
|
2023-04-18 07:15:34 -04:00
|
|
|
.find(query, { readPreference: READ_PREFERENCE_SECONDARY })
|
2021-04-28 04:48:15 -04:00
|
|
|
.project({ project_id: 1 })
|
|
|
|
.sort({ project_id: 1 })
|
|
|
|
.limit(BATCH_SIZE)
|
|
|
|
.toArray()
|
|
|
|
|
|
|
|
if (docs.length) {
|
|
|
|
const projectIds = Array.from(
|
|
|
|
new Set(docs.map(doc => doc.project_id.toString()))
|
2023-12-18 05:54:01 -05:00
|
|
|
).map(id => new ObjectId(id))
|
2021-04-28 04:48:15 -04:00
|
|
|
console.log('Checking projects', JSON.stringify(projectIds))
|
2024-03-25 06:51:40 -04:00
|
|
|
const { nProjectsWithOrphanedDocs, nDeletedDocs } =
|
|
|
|
await processBatch(projectIds)
|
2021-04-28 04:48:15 -04:00
|
|
|
nProjectsProcessedTotal += projectIds.length
|
|
|
|
nProjectsWithOrphanedDocsTotal += nProjectsWithOrphanedDocs
|
|
|
|
nDeletedDocsTotal += nDeletedDocs
|
|
|
|
|
|
|
|
if (docs.length === BATCH_SIZE) {
|
|
|
|
// This project may have more than BATCH_SIZE docs.
|
|
|
|
const lastDoc = docs[docs.length - 1]
|
|
|
|
// Resume from after this projectId.
|
|
|
|
upperProjectId = lastDoc.project_id
|
|
|
|
}
|
|
|
|
}
|
|
|
|
console.error(
|
|
|
|
'Processed %d projects ' +
|
|
|
|
'(%d projects with orphaned docs/%d docs deleted) ' +
|
|
|
|
'until %s',
|
|
|
|
nProjectsProcessedTotal,
|
|
|
|
nProjectsWithOrphanedDocsTotal,
|
|
|
|
nDeletedDocsTotal,
|
|
|
|
upperProjectId
|
|
|
|
)
|
|
|
|
|
|
|
|
lowerProjectId = upperProjectId
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async function getProjectDocs(projectId) {
|
|
|
|
return await db.docs
|
|
|
|
.find(
|
|
|
|
{ project_id: projectId },
|
|
|
|
{
|
|
|
|
projection: { _id: 1 },
|
2023-04-18 07:15:34 -04:00
|
|
|
readPreference: READ_PREFERENCE_PRIMARY,
|
2021-04-28 04:48:15 -04:00
|
|
|
}
|
|
|
|
)
|
|
|
|
.toArray()
|
|
|
|
}
|
|
|
|
|
|
|
|
async function processBatch(projectIds) {
|
2022-04-25 05:18:05 -04:00
|
|
|
const projectsWithOrphanedDocs = await getHardDeletedProjectIds({
|
|
|
|
projectIds,
|
|
|
|
READ_CONCURRENCY_PRIMARY,
|
|
|
|
READ_CONCURRENCY_SECONDARY,
|
|
|
|
})
|
2021-04-28 04:48:15 -04:00
|
|
|
|
2022-04-25 05:18:05 -04:00
|
|
|
let nDeletedDocs = 0
|
|
|
|
async function countOrphanedDocs(projectId) {
|
2021-04-28 04:48:15 -04:00
|
|
|
const docs = await getProjectDocs(projectId)
|
|
|
|
nDeletedDocs += docs.length
|
|
|
|
console.log(
|
|
|
|
'Deleted project %s has %s orphaned docs: %s',
|
|
|
|
projectId,
|
|
|
|
docs.length,
|
|
|
|
JSON.stringify(docs.map(doc => doc._id))
|
|
|
|
)
|
|
|
|
}
|
|
|
|
await promiseMapWithLimit(
|
|
|
|
READ_CONCURRENCY_PRIMARY,
|
2022-04-25 05:18:05 -04:00
|
|
|
projectsWithOrphanedDocs,
|
|
|
|
countOrphanedDocs
|
2021-04-28 04:48:15 -04:00
|
|
|
)
|
|
|
|
if (!DRY_RUN) {
|
|
|
|
await promiseMapWithLimit(
|
|
|
|
WRITE_CONCURRENCY,
|
|
|
|
projectsWithOrphanedDocs,
|
|
|
|
DocstoreManager.promises.destroyProject
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
const nProjectsWithOrphanedDocs = projectsWithOrphanedDocs.length
|
|
|
|
return { nProjectsWithOrphanedDocs, nDeletedDocs }
|
|
|
|
}
|
|
|
|
|
|
|
|
async function letUserDoubleCheckInputs() {
|
|
|
|
console.error(
|
|
|
|
'Options:',
|
|
|
|
JSON.stringify(
|
|
|
|
{
|
|
|
|
BATCH_LAST_ID,
|
|
|
|
BATCH_SIZE,
|
|
|
|
DRY_RUN,
|
|
|
|
INCREMENT_BY_S,
|
|
|
|
STOP_AT_S,
|
|
|
|
READ_CONCURRENCY_SECONDARY,
|
|
|
|
READ_CONCURRENCY_PRIMARY,
|
|
|
|
WRITE_CONCURRENCY,
|
|
|
|
LET_USER_DOUBLE_CHECK_INPUTS_FOR,
|
|
|
|
},
|
|
|
|
null,
|
|
|
|
2
|
|
|
|
)
|
|
|
|
)
|
|
|
|
console.error(
|
|
|
|
'Waiting for you to double check inputs for',
|
|
|
|
LET_USER_DOUBLE_CHECK_INPUTS_FOR,
|
|
|
|
'ms'
|
|
|
|
)
|
|
|
|
await sleep(LET_USER_DOUBLE_CHECK_INPUTS_FOR)
|
|
|
|
}
|
|
|
|
|
2024-10-18 07:04:57 -04:00
|
|
|
try {
|
|
|
|
await main()
|
|
|
|
console.error('Done.')
|
|
|
|
process.exit(0)
|
|
|
|
} catch (error) {
|
|
|
|
console.error({ error })
|
|
|
|
process.exit(1)
|
|
|
|
}
|