Merge pull request #21931 from overleaf/bg-get-all-blobs-for-project

add getProjectBlobs method to retrieve metadata for all blobs in a project

GitOrigin-RevId: 38f504a4fb56cd8ef8beaff1d8917ead26e85f5a
This commit is contained in:
Jakob Ackermann 2024-11-19 10:48:22 +01:00 committed by Copybot
parent 8e74d3c58c
commit fb36fff63d
5 changed files with 97 additions and 0 deletions

View file

@ -317,6 +317,16 @@ class BlobStore {
return blobs
}
/**
* Retrieve all blobs associated with the project.
* @returns {Promise<core.Blob[]>} A promise that resolves to an array of blobs.
*/
async getProjectBlobs() {
const projectBlobs = await this.backend.getProjectBlobs(this.projectId)
return projectBlobs
}
/**
* Delete all blobs that belong to the project.
*/

View file

@ -197,6 +197,55 @@ async function findBlobsSharded(projectId, hashSet) {
return blobs
}
/**
* Return metadata for all blobs in the given project
*/
async function getProjectBlobs(projectId) {
assert.mongoId(projectId, 'bad projectId')
const result = await mongodb.blobs.findOne(
{ _id: new ObjectId(projectId) },
{ projection: { _id: 0 } }
)
if (!result) {
return []
}
// Build blobs from the query results
const blobs = []
for (const bucket of Object.values(result.blobs)) {
for (const record of bucket) {
blobs.push(recordToBlob(record))
}
}
// Look for all possible sharded blobs
const minShardedId = makeShardedId(projectId, '0')
const maxShardedId = makeShardedId(projectId, 'f')
// @ts-ignore We are using a custom _id here.
const shardedRecords = mongodb.shardedBlobs.find(
{
_id: { $gte: minShardedId, $lte: maxShardedId },
},
{ projection: { _id: 0 } }
)
for await (const shardedRecord of shardedRecords) {
if (shardedRecord.blobs == null) {
continue
}
for (const bucket of Object.values(shardedRecord.blobs)) {
for (const record of bucket) {
blobs.push(recordToBlob(record))
}
}
}
return blobs
}
/**
* Add a blob's metadata to the blobs collection after it has been uploaded.
* @param {string} projectId
@ -323,6 +372,7 @@ module.exports = {
initialize,
findBlob,
findBlobs,
getProjectBlobs,
insertBlob,
deleteBlobs,
}

View file

@ -53,6 +53,23 @@ async function findBlobs(projectId, hashes) {
return blobs
}
/**
* Return metadata for all blobs in the given project
*/
async function getProjectBlobs(projectId) {
projectId = parseInt(projectId, 10)
assert.integer(projectId, 'bad projectId')
const records = await knex('project_blobs')
.select('hash_bytes', 'byte_length', 'string_length')
.where({
project_id: projectId,
})
const blobs = records.map(recordToBlob)
return blobs
}
/**
* Add a blob's metadata to the blobs table after it has been uploaded.
*/
@ -108,6 +125,7 @@ module.exports = {
initialize,
findBlob,
findBlobs,
getProjectBlobs,
insertBlob,
deleteBlobs,
}

View file

@ -259,6 +259,16 @@ describe('BlobStore', function () {
testFiles.GRAPH_PNG_HASH,
])
})
it('getProjectBlobs() returns all blobs in the project', async function () {
const blobs = await blobStore.getProjectBlobs()
const hashes = blobs.map(blob => blob.getHash())
expect(hashes).to.have.members([
testFiles.HELLO_TXT_HASH,
testFiles.GRAPH_PNG_HASH,
helloWorldHash,
])
})
})
describe('two blob stores on different projects', function () {

View file

@ -113,6 +113,15 @@ describe('BlobStore Mongo backend', function () {
})
})
describe('getProjectBlobs', function () {
it('returns all blobs for a given project', async function () {
const blobs = await mongoBackend.getProjectBlobs(projectId)
const obtainedHashes = blobs.map(blob => blob.getHash())
const expectedHashes = hashes.abcd.concat(hashes[1234])
expect(obtainedHashes).to.have.members(expectedHashes)
})
})
describe('deleteBlobs', function () {
it('deletes all blobs for a given project', async function () {
await mongoBackend.deleteBlobs(projectId)