const Settings = require('@overleaf/settings') const OError = require('@overleaf/o-error') const { waitForDb } = require('../app/src/infrastructure/mongodb') const { promiseMapWithLimit } = require('../app/src/util/promises') const { getHardDeletedProjectIds } = require('./delete_orphaned_data_helper') const TpdsUpdateSender = require('../app/src/Features/ThirdPartyDataStore/TpdsUpdateSender') const { promisify } = require('util') const { ObjectId } = require('mongodb') const fetch = require('node-fetch') const sleep = promisify(setTimeout) const START_OFFSET = process.env.START_OFFSET const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 1000 const DRY_RUN = process.env.DRY_RUN !== 'false' const READ_CONCURRENCY_SECONDARY = parseInt(process.env.READ_CONCURRENCY_SECONDARY, 10) || 1000 const READ_CONCURRENCY_PRIMARY = parseInt(process.env.READ_CONCURRENCY_PRIMARY, 10) || 500 const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10 const LET_USER_DOUBLE_CHECK_INPUTS_FOR = parseInt(process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR, 10) || 10 * 1000 async function main() { await letUserDoubleCheckInputs() await waitForDb() let processed = 0 let hardDeleted = 0 let pageToken = '' let startOffset = START_OFFSET while (pageToken !== undefined) { const url = new URL(`${Settings.apis.project_archiver.url}/project/list`) url.searchParams.append('pageToken', pageToken) url.searchParams.append('startOffset', startOffset) const response = await fetch(url, { headers: { Accept: 'application/json' }, }) if (!response.ok) { throw new OError('Failed to get list of projects from project archiver', { status: response.status, }) } const { nextPageToken, entries } = await response.json() pageToken = nextPageToken startOffset = undefined hardDeleted += await processBatch(entries) processed += entries.length console.log( 'processed:', processed.toString().padStart(10, '0'), 'hard deleted:', hardDeleted.toString().padStart(10, '0'), 'nextPageToken:', nextPageToken, 'START_OFFSET:', entries.pop()?.prefix ) } } async function processBatch(entries) { const projectIdToPrefix = new Map() for (const { prefix, projectId } of entries) { const prefixes = projectIdToPrefix.get(projectId) || [] prefixes.push(prefix) projectIdToPrefix.set(projectId, prefixes) } const projectIds = Array.from(projectIdToPrefix.keys()).map(id => ObjectId(id) ) const projectsWithOrphanedArchive = await getHardDeletedProjectIds({ projectIds, READ_CONCURRENCY_PRIMARY, READ_CONCURRENCY_SECONDARY, }) await promiseMapWithLimit( WRITE_CONCURRENCY, projectsWithOrphanedArchive.flatMap(id => projectIdToPrefix.get(id.toString()) ), hardDeleteProjectArchiverData ) return projectsWithOrphanedArchive.length } async function hardDeleteProjectArchiverData(prefix) { console.log(`Destroying hard deleted project archive at '${prefix}/'`) if (DRY_RUN) return for (let i = 0; i < 10; i++) { await sleep(1000 * i) try { const ok = await TpdsUpdateSender.promises.deleteProject({ projectId: encodeURIComponent(prefix), }) if (ok) { return } } catch (e) { console.error(`deletion failed for '${prefix}/'`, e) } } throw new Error(`deletion failed for '${prefix}/', check logs`) } async function letUserDoubleCheckInputs() { console.error( 'Options:', JSON.stringify( { BATCH_SIZE, DRY_RUN, LET_USER_DOUBLE_CHECK_INPUTS_FOR, READ_CONCURRENCY_SECONDARY, READ_CONCURRENCY_PRIMARY, START_OFFSET, WRITE_CONCURRENCY, }, null, 2 ) ) console.error( 'Waiting for you to double check inputs for', LET_USER_DOUBLE_CHECK_INPUTS_FOR, 'ms' ) await sleep(LET_USER_DOUBLE_CHECK_INPUTS_FOR) } main() .then(() => { console.log('Done.') process.exit(0) }) .catch(error => { console.error({ error }) process.exit(1) })