[DocArchiveManager] destroy in small batches

Also add settings options for PARALLEL_JOBS and DESTROY_BATCH_SIZE.
This commit is contained in:
Jakob Ackermann 2021-04-16 09:27:09 +01:00
parent ccf1f502e2
commit 3f94dd4dbf
4 changed files with 27 additions and 13 deletions

View file

@ -9,7 +9,8 @@ const RangeManager = require('./RangeManager')
const PersistorManager = require('./PersistorManager')
const pMap = require('p-map')
const PARALLEL_JOBS = 5
const PARALLEL_JOBS = settings.parallelArchiveJobs
const DESTROY_BATCH_SIZE = settings.destroyBatchSize
module.exports = {
archiveAllDocs: callbackify(archiveAllDocs),
@ -175,12 +176,15 @@ async function unarchiveDoc(projectId, docId) {
}
async function destroyAllDocs(projectId) {
const docs = await MongoManager.getProjectsDocs(
projectId,
{ include_deleted: true },
{ _id: 1 }
)
if (docs && docs.length) {
while (true) {
const docs = await MongoManager.getProjectsDocs(
projectId,
{ include_deleted: true, limit: DESTROY_BATCH_SIZE },
{ _id: 1 }
)
if (!docs || docs.length === 0) {
break
}
await pMap(docs, (doc) => destroyDoc(projectId, doc._id), {
concurrency: PARALLEL_JOBS
})

View file

@ -57,11 +57,13 @@ module.exports = MongoManager = {
if (!options.include_deleted) {
query.deleted = { $ne: true }
}
db.docs
.find(query, {
projection: filter
})
.toArray(callback)
const queryOptions = {
projection: filter
}
if (options.limit) {
queryOptions.limit = options.limit
}
db.docs.find(query, queryOptions).toArray(callback)
},
getArchivedProjectDocs(project_id, callback) {

View file

@ -40,7 +40,10 @@ const Settings = {
max_deleted_docs: parseInt(process.env.MAX_DELETED_DOCS, 10) || 2000,
max_doc_length: parseInt(process.env.MAX_DOC_LENGTH) || 2 * 1024 * 1024 // 2mb
max_doc_length: parseInt(process.env.MAX_DOC_LENGTH) || 2 * 1024 * 1024, // 2mb
destroyBatchSize: parseInt(process.env.DESTROY_BATCH_SIZE, 10) || 2000,
parallelArchiveJobs: parseInt(process.env.PARALLEL_ARCHIVE_JOBS, 10) || 5
}
if (process.env.MONGO_CONNECTION_STRING != null) {

View file

@ -518,6 +518,11 @@ describe('DocArchiveManager', function () {
})
describe('destroyAllDocs', function () {
beforeEach(function () {
MongoManager.promises.getProjectsDocs.onCall(0).resolves(mongoDocs)
MongoManager.promises.getProjectsDocs.onCall(1).resolves([])
})
it('should resolve with valid arguments', async function () {
await expect(DocArchiveManager.promises.destroyAllDocs(projectId)).to
.eventually.be.fulfilled