mirror of
https://github.com/overleaf/overleaf.git
synced 2025-01-13 10:03:30 +00:00
Merge pull request #11890 from overleaf/em-fix-deleted-chunks-indexes
Add a partial index for pending chunks GitOrigin-RevId: ec0705d1de7ffacb2cb88a8e5e1ff9f05c5acf88
This commit is contained in:
parent
e35b6e49d1
commit
e41d3cf5db
3 changed files with 112 additions and 16 deletions
|
@ -197,29 +197,49 @@ async function deleteProjectChunks(projectId) {
|
|||
*/
|
||||
async function getOldChunksBatch(count, minAgeSecs) {
|
||||
const maxUpdatedAt = new Date(Date.now() - minAgeSecs * 1000)
|
||||
const cursor = mongodb.chunks.find(
|
||||
{
|
||||
state: { $in: ['deleted', 'pending'] },
|
||||
updatedAt: { $lt: maxUpdatedAt },
|
||||
},
|
||||
{
|
||||
limit: count,
|
||||
projection: { _id: 1, projectId: 1 },
|
||||
const batch = []
|
||||
|
||||
// We need to fetch one state at a time to take advantage of the partial
|
||||
// indexes on the chunks collection.
|
||||
//
|
||||
// Mongo 6.0 allows partial indexes that use the $in operator. When we reach
|
||||
// that Mongo version, we can create a partial index on both the deleted and
|
||||
// pending states and simplify this logic a bit.
|
||||
for (const state of ['deleted', 'pending']) {
|
||||
if (count === 0) {
|
||||
// There's no more space in the batch
|
||||
break
|
||||
}
|
||||
)
|
||||
return await cursor
|
||||
.map(record => ({
|
||||
chunkId: record._id,
|
||||
projectId: record.projectId,
|
||||
}))
|
||||
.toArray()
|
||||
|
||||
const cursor = mongodb.chunks
|
||||
.find(
|
||||
{ state, updatedAt: { $lt: maxUpdatedAt } },
|
||||
{
|
||||
limit: count,
|
||||
projection: { _id: 1, projectId: 1 },
|
||||
}
|
||||
)
|
||||
.map(record => ({
|
||||
chunkId: record._id.toString(),
|
||||
projectId: record.projectId.toString(),
|
||||
}))
|
||||
|
||||
for await (const record of cursor) {
|
||||
batch.push(record)
|
||||
count -= 1
|
||||
}
|
||||
}
|
||||
return batch
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a batch of old chunks from the database
|
||||
*/
|
||||
async function deleteOldChunks(chunkIds) {
|
||||
await mongodb.chunks.deleteMany({ _id: { $in: chunkIds }, state: 'deleted' })
|
||||
await mongodb.chunks.deleteMany({
|
||||
_id: { $in: chunkIds.map(ObjectId) },
|
||||
state: { $in: ['deleted', 'pending'] },
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
const { expect } = require('chai')
|
||||
const { ObjectId } = require('mongodb')
|
||||
const { Chunk, Snapshot, History } = require('overleaf-editor-core')
|
||||
const cleanup = require('./support/cleanup')
|
||||
const backend = require('../../../../storage/lib/chunk_store/mongo')
|
||||
|
||||
describe('chunk store Mongo backend', function () {
|
||||
beforeEach(cleanup.everything)
|
||||
|
||||
describe('garbage collection', function () {
|
||||
it('deletes pending and deleted chunks', async function () {
|
||||
const projectId = ObjectId().toString()
|
||||
|
||||
// Create a pending chunk
|
||||
const pendingChunk = makeChunk([], 0)
|
||||
const pendingChunkId = await backend.insertPendingChunk(
|
||||
projectId,
|
||||
pendingChunk
|
||||
)
|
||||
|
||||
// Create a deleted chunk
|
||||
const deletedChunk = makeChunk([], 0)
|
||||
const deletedChunkId = await backend.insertPendingChunk(
|
||||
projectId,
|
||||
deletedChunk
|
||||
)
|
||||
await backend.confirmCreate(projectId, deletedChunk, deletedChunkId)
|
||||
await backend.deleteChunk(projectId, deletedChunkId)
|
||||
|
||||
// Check that both chunks are ready to be deleted
|
||||
let oldChunks = await backend.getOldChunksBatch(100, 0)
|
||||
expect(oldChunks).to.have.deep.members([
|
||||
{ projectId, chunkId: pendingChunkId },
|
||||
{ projectId, chunkId: deletedChunkId },
|
||||
])
|
||||
|
||||
// Delete old chunks
|
||||
await backend.deleteOldChunks(oldChunks.map(chunk => chunk.chunkId))
|
||||
|
||||
// Check that there are no more chunks to be deleted
|
||||
oldChunks = await backend.getOldChunksBatch(100, 0)
|
||||
expect(oldChunks).to.deep.equal([])
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
function makeChunk(changes, versionNumber) {
|
||||
const snapshot = Snapshot.fromRaw({ files: {} })
|
||||
const history = new History(snapshot, [])
|
||||
const chunk = new Chunk(history, versionNumber)
|
||||
return chunk
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
const Helpers = require('./lib/helpers')
|
||||
|
||||
exports.tags = ['server-ce', 'server-pro', 'saas']
|
||||
|
||||
const indexes = [
|
||||
{
|
||||
// The { state: -1 } sort order works around a restriction of Mongo 4.4
|
||||
// where it doesn't allow multiple indexes with the same keys and different
|
||||
// options. The restriction has been lifted in Mongo 5.0
|
||||
//
|
||||
// See https://www.mongodb.com/docs/manual/core/index-partial/#restrictions
|
||||
key: { state: -1 },
|
||||
name: 'state_pending',
|
||||
partialFilterExpression: { state: 'pending' },
|
||||
},
|
||||
]
|
||||
|
||||
exports.migrate = async ({ db }) => {
|
||||
await Helpers.addIndexesToCollection(db.projectHistoryChunks, indexes)
|
||||
}
|
||||
|
||||
exports.rollback = async ({ db }) => {
|
||||
await Helpers.dropIndexesFromCollection(db.projectHistoryChunks, indexes)
|
||||
}
|
Loading…
Reference in a new issue