Merge pull request #11890 from overleaf/em-fix-deleted-chunks-indexes

Add a partial index for pending chunks

GitOrigin-RevId: ec0705d1de7ffacb2cb88a8e5e1ff9f05c5acf88
This commit is contained in:
Eric Mc Sween 2023-02-22 10:09:18 -05:00 committed by Copybot
parent e35b6e49d1
commit e41d3cf5db
3 changed files with 112 additions and 16 deletions

View file

@ -197,29 +197,49 @@ async function deleteProjectChunks(projectId) {
*/
async function getOldChunksBatch(count, minAgeSecs) {
const maxUpdatedAt = new Date(Date.now() - minAgeSecs * 1000)
const cursor = mongodb.chunks.find(
{
state: { $in: ['deleted', 'pending'] },
updatedAt: { $lt: maxUpdatedAt },
},
{
limit: count,
projection: { _id: 1, projectId: 1 },
const batch = []
// We need to fetch one state at a time to take advantage of the partial
// indexes on the chunks collection.
//
// Mongo 6.0 allows partial indexes that use the $in operator. When we reach
// that Mongo version, we can create a partial index on both the deleted and
// pending states and simplify this logic a bit.
for (const state of ['deleted', 'pending']) {
if (count === 0) {
// There's no more space in the batch
break
}
)
return await cursor
.map(record => ({
chunkId: record._id,
projectId: record.projectId,
}))
.toArray()
const cursor = mongodb.chunks
.find(
{ state, updatedAt: { $lt: maxUpdatedAt } },
{
limit: count,
projection: { _id: 1, projectId: 1 },
}
)
.map(record => ({
chunkId: record._id.toString(),
projectId: record.projectId.toString(),
}))
for await (const record of cursor) {
batch.push(record)
count -= 1
}
}
return batch
}
/**
* Delete a batch of old chunks from the database
*/
async function deleteOldChunks(chunkIds) {
await mongodb.chunks.deleteMany({ _id: { $in: chunkIds }, state: 'deleted' })
await mongodb.chunks.deleteMany({
_id: { $in: chunkIds.map(ObjectId) },
state: { $in: ['deleted', 'pending'] },
})
}
/**

View file

@ -0,0 +1,52 @@
const { expect } = require('chai')
const { ObjectId } = require('mongodb')
const { Chunk, Snapshot, History } = require('overleaf-editor-core')
const cleanup = require('./support/cleanup')
const backend = require('../../../../storage/lib/chunk_store/mongo')
describe('chunk store Mongo backend', function () {
beforeEach(cleanup.everything)
describe('garbage collection', function () {
it('deletes pending and deleted chunks', async function () {
const projectId = ObjectId().toString()
// Create a pending chunk
const pendingChunk = makeChunk([], 0)
const pendingChunkId = await backend.insertPendingChunk(
projectId,
pendingChunk
)
// Create a deleted chunk
const deletedChunk = makeChunk([], 0)
const deletedChunkId = await backend.insertPendingChunk(
projectId,
deletedChunk
)
await backend.confirmCreate(projectId, deletedChunk, deletedChunkId)
await backend.deleteChunk(projectId, deletedChunkId)
// Check that both chunks are ready to be deleted
let oldChunks = await backend.getOldChunksBatch(100, 0)
expect(oldChunks).to.have.deep.members([
{ projectId, chunkId: pendingChunkId },
{ projectId, chunkId: deletedChunkId },
])
// Delete old chunks
await backend.deleteOldChunks(oldChunks.map(chunk => chunk.chunkId))
// Check that there are no more chunks to be deleted
oldChunks = await backend.getOldChunksBatch(100, 0)
expect(oldChunks).to.deep.equal([])
})
})
})
function makeChunk(changes, versionNumber) {
const snapshot = Snapshot.fromRaw({ files: {} })
const history = new History(snapshot, [])
const chunk = new Chunk(history, versionNumber)
return chunk
}

View file

@ -0,0 +1,24 @@
const Helpers = require('./lib/helpers')
exports.tags = ['server-ce', 'server-pro', 'saas']
const indexes = [
{
// The { state: -1 } sort order works around a restriction of Mongo 4.4
// where it doesn't allow multiple indexes with the same keys and different
// options. The restriction has been lifted in Mongo 5.0
//
// See https://www.mongodb.com/docs/manual/core/index-partial/#restrictions
key: { state: -1 },
name: 'state_pending',
partialFilterExpression: { state: 'pending' },
},
]
exports.migrate = async ({ db }) => {
await Helpers.addIndexesToCollection(db.projectHistoryChunks, indexes)
}
exports.rollback = async ({ db }) => {
await Helpers.dropIndexesFromCollection(db.projectHistoryChunks, indexes)
}