[history-v1] block deletion of bucket prefix that is not a valid project prefix (#22889)

Co-authored-by: Brian Gough <brian.gough@overleaf.com>
GitOrigin-RevId: a3aff76a2e299b2e2ed030e34da0631fce51ad4b
This commit is contained in:
Jakob Ackermann 2025-02-06 11:03:13 +00:00 committed by Copybot
parent 185c53bd70
commit 26321c5dba
10 changed files with 123 additions and 33 deletions

View file

@ -343,9 +343,10 @@ class PerProjectEncryptedS3Persistor extends S3Persistor {
}
async deleteDirectory(bucketName, path, continuationToken) {
// Let [Settings.pathToProjectFolder] validate the project path before deleting things.
const { projectFolder, dekPath } = this.#buildProjectPaths(bucketName, path)
// Note: Listing/Deleting a prefix does not require SSE-C credentials.
await super.deleteDirectory(bucketName, path, continuationToken)
const { projectFolder, dekPath } = this.#buildProjectPaths(bucketName, path)
if (projectFolder === path) {
await super.deleteObject(
this.#settings.dataEncryptionKeyBucketName,

View file

@ -1305,11 +1305,39 @@ describe('Filestore', function () {
})
describe('deleteDirectory', function () {
let checkGET2
let checkGET1, checkGET2
beforeEach('create files', async function () {
await createRandomContent(fileUrl1, '1')
checkGET1 = await createRandomContent(fileUrl1, '1')
checkGET2 = await createRandomContent(fileUrl2, '2')
})
it('should refuse to delete top-level prefix', async function () {
await expect(
app.persistor.deleteDirectory(
Settings.filestore.stores.user_files,
projectId.slice(0, 3)
)
).to.be.rejectedWith('not a project-folder')
expect(
await app.persistor.checkIfObjectExists(
Settings.filestore.stores.user_files,
fileKey1
)
).to.equal(true)
await checkGET1()
expect(
await app.persistor.checkIfObjectExists(
Settings.filestore.stores.user_files,
fileKey2
)
).to.equal(true)
expect(
await app.persistor.getDataEncryptionKeySize(
Settings.filestore.stores.user_files,
fileKey2
)
).to.equal(32)
await checkGET2()
})
it('should delete sub-folder and keep DEK', async function () {
await app.persistor.deleteDirectory(
Settings.filestore.stores.user_files,

View file

@ -40,7 +40,9 @@ function s3SSECConfig() {
automaticallyRotateDEKEncryption: true,
dataEncryptionKeyBucketName: process.env.AWS_S3_USER_FILES_DEK_BUCKET_NAME,
pathToProjectFolder(_bucketName, path) {
const [projectFolder] = path.match(/^[a-f0-9]+\//)
const match = path.match(/^[a-f0-9]{24}\//)
if (!match) throw new Error('not a project-folder')
const [projectFolder] = match
return projectFolder
},
async getRootKeyEncryptionKeys() {

View file

@ -4,6 +4,7 @@ import Path from 'node:path'
import _ from 'lodash'
import config from 'config'
import { SecretManagerServiceClient } from '@google-cloud/secret-manager'
import OError from '@overleaf/o-error'
import {
PerProjectEncryptedS3Persistor,
RootKeyEncryptionKey,
@ -55,17 +56,24 @@ if (DELETION_ONLY) {
getRawRootKeyEncryptionKeys = () => new Promise(_resolve => {})
}
const PROJECT_FOLDER_REGEX =
/^\d{3}\/\d{3}\/\d{3,}\/|[0-9a-f]{3}\/[0-9a-f]{3}\/[0-9a-f]{18}\/$/
/**
* @param {string} bucketName
* @param {string} path
* @return {string}
*/
function pathToProjectFolder(bucketName, path) {
export function pathToProjectFolder(bucketName, path) {
switch (bucketName) {
case deksBucket:
case chunksBucket:
case projectBlobsBucket:
return Path.join(...path.split('/').slice(0, 3)) + '/'
const projectFolder = Path.join(...path.split('/').slice(0, 3)) + '/'
if (!PROJECT_FOLDER_REGEX.test(projectFolder)) {
throw new OError('invalid project folder', { bucketName, path })
}
return projectFolder
default:
throw new Error(`${bucketName} does not store per-project files`)
}

View file

@ -98,12 +98,6 @@ describe('backupDeletion', function () {
const projectIdWithChunks = new ObjectId('000000000000000000000005')
const projectIdNoHistoryId = new ObjectId('000000000000000000000006')
beforeEach('cleanup s3 buckets', async function () {
await backupPersistor.deleteDirectory(deksBucket, '')
await backupPersistor.deleteDirectory(chunksBucket, '')
await backupPersistor.deleteDirectory(projectBlobsBucket, '')
})
beforeEach('populate mongo', async function () {
await deletedProjectsCollection.insertMany([
{

View file

@ -261,12 +261,6 @@ describe('back_fill_file_hash script', function () {
}
beforeEach(cleanup.everything)
beforeEach('cleanup s3 buckets', async function () {
await backupPersistor.deleteDirectory(deksBucket, '')
await backupPersistor.deleteDirectory(projectBlobsBucket, '')
expect(await listS3Bucket(deksBucket)).to.have.length(0)
expect(await listS3Bucket(projectBlobsBucket)).to.have.length(0)
})
beforeEach('populate mongo', async function () {
await globalBlobs.insertMany([

View file

@ -325,12 +325,6 @@ describe('back_fill_file_hash_fix_up script', function () {
}
before(cleanup.everything)
before('cleanup s3 buckets', async function () {
await backupPersistor.deleteDirectory(deksBucket, '')
await backupPersistor.deleteDirectory(projectBlobsBucket, '')
expect(await listS3Bucket(deksBucket)).to.have.length(0)
expect(await listS3Bucket(projectBlobsBucket)).to.have.length(0)
})
before('populate blobs/GCS', async function () {
await FILESTORE_PERSISTOR.sendStream(

View file

@ -18,6 +18,7 @@ import {
projectBlobsBucket,
} from '../../../../storage/lib/backupPersistor.mjs'
import { WritableBuffer } from '@overleaf/stream-utils'
import cleanup from './support/cleanup.js'
async function listS3BucketRaw(bucket) {
const client = backupPersistor._getClientForBucket(bucket)
@ -55,14 +56,7 @@ describe('backupBlob', function () {
}
})
beforeEach(async function () {
await backupPersistor.deleteDirectory(projectBlobsBucket, '')
expect(await listS3Bucket(projectBlobsBucket)).to.have.length(0)
})
beforeEach('cleanup mongo', async function () {
await backedUpBlobs.deleteMany({})
})
beforeEach(cleanup.everything)
describe('when the blob is already backed up', function () {
let blob

View file

@ -0,0 +1,51 @@
import {
pathToProjectFolder,
projectBlobsBucket,
} from '../../../../storage/lib/backupPersistor.mjs'
import { expect } from 'chai'
describe('backupPersistor', () => {
describe('pathToProjectFolder', () => {
it('handles postgres and mongo-ids', function () {
expect(pathToProjectFolder(projectBlobsBucket, '100/000/000')).to.equal(
'100/000/000/'
)
expect(pathToProjectFolder(projectBlobsBucket, '100/000/000/')).to.equal(
'100/000/000/'
)
expect(
pathToProjectFolder(projectBlobsBucket, '100/000/000/foo')
).to.equal('100/000/000/')
expect(pathToProjectFolder(projectBlobsBucket, '210/000/000')).to.equal(
'210/000/000/'
)
expect(pathToProjectFolder(projectBlobsBucket, '987/654/321')).to.equal(
'987/654/321/'
)
expect(pathToProjectFolder(projectBlobsBucket, '987/654/3219')).to.equal(
'987/654/3219/'
)
expect(
pathToProjectFolder(projectBlobsBucket, 'fed/cba/987654321000000000')
).to.equal('fed/cba/987654321000000000/')
expect(
pathToProjectFolder(projectBlobsBucket, 'fed/cba/987654321000000000/')
).to.equal('fed/cba/987654321000000000/')
expect(
pathToProjectFolder(
projectBlobsBucket,
'fed/cba/987654321000000000/foo'
)
).to.equal('fed/cba/987654321000000000/')
})
it('rejects invalid input', function () {
const cases = ['', '//', '1/2/3', '123/456/78', 'abc/d/e', 'abc/def/012']
for (const key of cases) {
expect(() => {
pathToProjectFolder(projectBlobsBucket, key)
}, key).to.throw('invalid project folder')
}
})
})
})

View file

@ -1,6 +1,7 @@
const config = require('config')
const { knex, persistor, mongodb } = require('../../../../../storage')
const { S3Persistor } = require('@overleaf/object-persistor/src/S3Persistor')
const POSTGRES_TABLES = [
'chunks',
@ -55,16 +56,39 @@ async function clearBucket(name) {
await persistor.deleteDirectory(name, '')
}
let s3PersistorForBackupCleanup
async function cleanupBackup() {
// The backupPersistor refuses to delete short prefixes. Use a low-level S3 persistor.
if (!s3PersistorForBackupCleanup) {
const { backupPersistor } = await import(
'../../../../../storage/lib/backupPersistor.mjs'
)
s3PersistorForBackupCleanup = new S3Persistor(backupPersistor.settings)
}
await Promise.all(
Object.values(config.get('backupStore')).map(name =>
s3PersistorForBackupCleanup.deleteDirectory(name, '')
)
)
}
async function cleanupEverything() {
// Set the timeout when called in a Mocha test. This function is also called
// in benchmarks where it is not passed a Mocha context.
this.timeout?.(5000)
await Promise.all([cleanupPostgres(), cleanupMongo(), cleanupPersistor()])
await Promise.all([
cleanupPostgres(),
cleanupMongo(),
cleanupPersistor(),
cleanupBackup(),
])
}
module.exports = {
postgres: cleanupPostgres,
mongo: cleanupMongo,
persistor: cleanupPersistor,
backup: cleanupBackup,
everything: cleanupEverything,
}