mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-07 20:31:06 -05:00
Merge pull request #109 from overleaf/jpa-batched-archiving-and-un-archiving
[DocArchiveManager] (un-)archive docs in batches and let db filter docs
This commit is contained in:
commit
3c95327b67
4 changed files with 63 additions and 44 deletions
|
@ -10,6 +10,8 @@ const PersistorManager = require('./PersistorManager')
|
|||
const pMap = require('p-map')
|
||||
|
||||
const PARALLEL_JOBS = settings.parallelArchiveJobs
|
||||
const ARCHIVE_BATCH_SIZE = settings.archiveBatchSize
|
||||
const UN_ARCHIVE_BATCH_SIZE = settings.unArchiveBatchSize
|
||||
const DESTROY_BATCH_SIZE = settings.destroyBatchSize
|
||||
const DESTROY_RETRY_COUNT = settings.destroyRetryCount
|
||||
|
||||
|
@ -33,20 +35,19 @@ module.exports = {
|
|||
}
|
||||
|
||||
async function archiveAllDocs(projectId) {
|
||||
const docs = await MongoManager.getProjectsDocs(
|
||||
projectId,
|
||||
{ include_deleted: true },
|
||||
{ lines: true, ranges: true, rev: true, inS3: true }
|
||||
)
|
||||
while (true) {
|
||||
const docs = await MongoManager.getNonArchivedProjectDocs(
|
||||
projectId,
|
||||
ARCHIVE_BATCH_SIZE
|
||||
)
|
||||
if (!docs || docs.length === 0) {
|
||||
break
|
||||
}
|
||||
|
||||
if (!docs) {
|
||||
throw new Errors.NotFoundError(`No docs for project ${projectId}`)
|
||||
await pMap(docs, (doc) => archiveDoc(projectId, doc), {
|
||||
concurrency: PARALLEL_JOBS
|
||||
})
|
||||
}
|
||||
|
||||
const docsToArchive = docs.filter((doc) => !doc.inS3)
|
||||
await pMap(docsToArchive, (doc) => archiveDoc(projectId, doc), {
|
||||
concurrency: PARALLEL_JOBS
|
||||
})
|
||||
}
|
||||
|
||||
async function archiveDocById(projectId, docId) {
|
||||
|
@ -102,18 +103,26 @@ async function archiveDoc(projectId, doc) {
|
|||
}
|
||||
|
||||
async function unArchiveAllDocs(projectId) {
|
||||
let docs
|
||||
if (settings.docstore.keepSoftDeletedDocsArchived) {
|
||||
docs = await MongoManager.getNonDeletedArchivedProjectDocs(projectId)
|
||||
} else {
|
||||
docs = await MongoManager.getArchivedProjectDocs(projectId)
|
||||
while (true) {
|
||||
let docs
|
||||
if (settings.docstore.keepSoftDeletedDocsArchived) {
|
||||
docs = await MongoManager.getNonDeletedArchivedProjectDocs(
|
||||
projectId,
|
||||
UN_ARCHIVE_BATCH_SIZE
|
||||
)
|
||||
} else {
|
||||
docs = await MongoManager.getArchivedProjectDocs(
|
||||
projectId,
|
||||
UN_ARCHIVE_BATCH_SIZE
|
||||
)
|
||||
}
|
||||
if (!docs || docs.length === 0) {
|
||||
break
|
||||
}
|
||||
await pMap(docs, (doc) => unarchiveDoc(projectId, doc._id), {
|
||||
concurrency: PARALLEL_JOBS
|
||||
})
|
||||
}
|
||||
if (!docs) {
|
||||
throw new Errors.NotFoundError(`No docs for project ${projectId}`)
|
||||
}
|
||||
await pMap(docs, (doc) => unarchiveDoc(projectId, doc._id), {
|
||||
concurrency: PARALLEL_JOBS
|
||||
})
|
||||
}
|
||||
|
||||
async function unarchiveDoc(projectId, docId) {
|
||||
|
|
|
@ -64,21 +64,33 @@ module.exports = MongoManager = {
|
|||
db.docs.find(query, queryOptions).toArray(callback)
|
||||
},
|
||||
|
||||
getArchivedProjectDocs(project_id, callback) {
|
||||
getArchivedProjectDocs(project_id, maxResults, callback) {
|
||||
const query = {
|
||||
project_id: ObjectId(project_id.toString()),
|
||||
inS3: true
|
||||
}
|
||||
db.docs.find(query).toArray(callback)
|
||||
db.docs
|
||||
.find(query, { projection: { _id: 1 }, limit: maxResults })
|
||||
.toArray(callback)
|
||||
},
|
||||
|
||||
getNonDeletedArchivedProjectDocs(project_id, callback) {
|
||||
getNonArchivedProjectDocs(project_id, maxResults, callback) {
|
||||
const query = {
|
||||
project_id: ObjectId(project_id.toString()),
|
||||
inS3: { $ne: true }
|
||||
}
|
||||
db.docs.find(query, { limit: maxResults }).toArray(callback)
|
||||
},
|
||||
|
||||
getNonDeletedArchivedProjectDocs(project_id, maxResults, callback) {
|
||||
const query = {
|
||||
project_id: ObjectId(project_id.toString()),
|
||||
deleted: { $ne: true },
|
||||
inS3: true
|
||||
}
|
||||
db.docs.find(query).toArray(callback)
|
||||
db.docs
|
||||
.find(query, { projection: { _id: 1 }, limit: maxResults })
|
||||
.toArray(callback)
|
||||
},
|
||||
|
||||
upsertIntoDocCollection(project_id, doc_id, updates, callback) {
|
||||
|
|
|
@ -37,6 +37,8 @@ const Settings = {
|
|||
|
||||
max_doc_length: parseInt(process.env.MAX_DOC_LENGTH) || 2 * 1024 * 1024, // 2mb
|
||||
|
||||
archiveBatchSize: parseInt(process.env.ARCHIVE_BATCH_SIZE, 10) || 50,
|
||||
unArchiveBatchSize: parseInt(process.env.UN_ARCHIVE_BATCH_SIZE, 10) || 50,
|
||||
destroyBatchSize: parseInt(process.env.DESTROY_BATCH_SIZE, 10) || 2000,
|
||||
destroyRetryCount: parseInt(process.env.DESTROY_RETRY_COUNT || '3', 10),
|
||||
parallelArchiveJobs: parseInt(process.env.PARALLEL_ARCHIVE_JOBS, 10) || 5
|
||||
|
|
|
@ -116,12 +116,24 @@ describe('DocArchiveManager', function () {
|
|||
deleteObject: sinon.stub().resolves()
|
||||
}
|
||||
|
||||
const getNonArchivedProjectDocs = sinon.stub()
|
||||
getNonArchivedProjectDocs
|
||||
.onCall(0)
|
||||
.resolves(mongoDocs.filter((doc) => !doc.inS3))
|
||||
getNonArchivedProjectDocs.onCall(1).resolves([])
|
||||
|
||||
const getArchivedProjectDocs = sinon.stub()
|
||||
getArchivedProjectDocs.onCall(0).resolves(archivedDocs)
|
||||
getArchivedProjectDocs.onCall(1).resolves([])
|
||||
|
||||
MongoManager = {
|
||||
promises: {
|
||||
markDocAsArchived: sinon.stub().resolves(),
|
||||
upsertIntoDocCollection: sinon.stub().resolves(),
|
||||
getProjectsDocs: sinon.stub().resolves(mongoDocs),
|
||||
getArchivedProjectDocs: sinon.stub().resolves(archivedDocs),
|
||||
getNonDeletedArchivedProjectDocs: getArchivedProjectDocs,
|
||||
getNonArchivedProjectDocs,
|
||||
getArchivedProjectDocs,
|
||||
findDoc: sinon.stub().rejects(new Errors.NotFoundError()),
|
||||
destroyDoc: sinon.stub().resolves()
|
||||
}
|
||||
|
@ -519,14 +531,6 @@ describe('DocArchiveManager', function () {
|
|||
MongoManager.promises.markDocAsArchived
|
||||
).not.to.have.been.calledWith(mongoDocs[3]._id)
|
||||
})
|
||||
|
||||
it('should return error if the project has no docs', async function () {
|
||||
MongoManager.promises.getProjectsDocs.resolves(null)
|
||||
|
||||
await expect(
|
||||
DocArchiveManager.promises.archiveAllDocs(projectId)
|
||||
).to.eventually.be.rejected.and.be.instanceof(Errors.NotFoundError)
|
||||
})
|
||||
})
|
||||
|
||||
describe('unArchiveAllDocs', function () {
|
||||
|
@ -545,14 +549,6 @@ describe('DocArchiveManager', function () {
|
|||
)
|
||||
}
|
||||
})
|
||||
|
||||
it('should return error if the project has no docs', async function () {
|
||||
MongoManager.promises.getArchivedProjectDocs.resolves(null)
|
||||
|
||||
await expect(
|
||||
DocArchiveManager.promises.unArchiveAllDocs(projectId)
|
||||
).to.eventually.be.rejected.and.be.instanceof(Errors.NotFoundError)
|
||||
})
|
||||
})
|
||||
|
||||
describe('destroyAllDocs', function () {
|
||||
|
|
Loading…
Reference in a new issue