Merge pull request #21829 from overleaf/jpa-backup-deletion

[history-v1] backup-deletion-app: initial revision

GitOrigin-RevId: 9e05c0e16595cb8d180861ef503c7b4522110d39
This commit is contained in:
Jakob Ackermann 2024-11-15 15:53:15 +01:00 committed by Copybot
parent 97b39ef98f
commit 9cc6f2a9d5
19 changed files with 620 additions and 3 deletions

View file

@ -105,6 +105,8 @@ function handleJWTAuth(req, authOrSecDef, scopesOrApiKey, next) {
next()
}
exports.hasValidBasicAuthCredentials = hasValidBasicAuthCredentials
/**
* Verify and decode the given JSON Web Token
*/

View file

@ -0,0 +1,81 @@
// @ts-check
// Metrics must be initialized before importing anything else
import '@overleaf/metrics/initialize.js'
import http from 'node:http'
import { fileURLToPath } from 'node:url'
import { promisify } from 'node:util'
import express from 'express'
import logger from '@overleaf/logger'
import Metrics from '@overleaf/metrics'
import { hasValidBasicAuthCredentials } from './api/app/security.js'
import {
deleteProjectBackupCb,
healthCheck,
healthCheckCb,
NotReadyToDelete,
} from './storage/lib/backupDeletion.mjs'
import { mongodb } from './storage/index.js'
const app = express()
logger.initialize('history-v1-backup-deletion')
Metrics.open_sockets.monitor()
Metrics.injectMetricsRoute(app)
app.use(Metrics.http.monitor(logger))
Metrics.leaked_sockets.monitor(logger)
Metrics.event_loop.monitor(logger)
Metrics.memory.monitor(logger)
function basicAuth(req, res, next) {
if (hasValidBasicAuthCredentials(req)) return next()
res.setHeader('WWW-Authenticate', 'Basic realm="Application"')
res.sendStatus(401)
}
app.delete('/project/:projectId/backup', basicAuth, (req, res, next) => {
deleteProjectBackupCb(req.params.projectId, err => {
if (err) {
return next(err)
}
res.sendStatus(204)
})
})
app.get('/status', (req, res) => {
res.send('history-v1-backup-deletion is up')
})
app.get('/health_check', (req, res, next) => {
healthCheckCb(err => {
if (err) return next(err)
res.sendStatus(200)
})
})
app.use((err, req, res, next) => {
req.logger.addFields({ err })
if (err instanceof NotReadyToDelete) {
req.logger.setLevel('warn')
return res.status(422).send(err.message)
}
req.logger.setLevel('error')
next(err)
})
/**
* @param {number} port
* @return {Promise<http.Server>}
*/
export async function startApp(port) {
await mongodb.client.connect()
await healthCheck()
const server = http.createServer(app)
await promisify(server.listen.bind(server, port))()
return server
}
// Run this if we're called directly
if (process.argv[1] === fileURLToPath(import.meta.url)) {
const PORT = parseInt(process.env.PORT || '3101', 10)
await startApp(PORT)
}

View file

@ -7,4 +7,4 @@ history-v1
--node-version=20.18.0
--public-repo=False
--script-version=4.5.0
--tsconfig-extra-includes=api/**/*,migrations/**/*,storage/**/*
--tsconfig-extra-includes=backup-deletion-app.mjs,api/**/*,migrations/**/*,storage/**/*

View file

@ -63,6 +63,8 @@
"globalBlobsBucket":"BACKUP_OVERLEAF_EDITOR_GLOBAL_BLOBS_BUCKET",
"projectBlobsBucket":"BACKUP_OVERLEAF_EDITOR_PROJECT_BLOBS_BUCKET"
},
"healthCheckProjects": "HEALTH_CHECK_PROJECTS",
"minSoftDeletionPeriodDays": "MIN_SOFT_DELETION_PERIOD_DAYS",
"mongo": {
"uri": "MONGO_CONNECTION_STRING"
},

View file

@ -29,6 +29,7 @@
"zipStore": {
"zipTimeoutMs": "360000"
},
"minSoftDeletionPeriodDays": "90",
"maxDeleteKeys": "1000",
"useDeleteObjects": "true",
"clusterWorkers": "1",

View file

@ -33,6 +33,7 @@
},
"tieringStorageClass": "REDUCED_REDUNDANCY"
},
"healthCheckProjects": "[\"42\",\"000000000000000000000042\"]",
"maxDeleteKeys": "3",
"useDeleteObjects": "false",
"mongo": {

View file

@ -0,0 +1,86 @@
// @ts-check
import { callbackify } from 'util'
import { ObjectId } from 'mongodb'
import config from 'config'
import OError from '@overleaf/o-error'
import { db } from './mongodb.js'
import projectKey from './project_key.js'
import chunkStore from '../lib/chunk_store/index.js'
import {
backupPersistor,
chunksBucket,
projectBlobsBucket,
} from './backupPersistor.mjs'
const MS_PER_DAY = 24 * 60 * 60 * 1000
const EXPIRE_PROJECTS_AFTER_MS =
parseInt(config.get('minSoftDeletionPeriodDays'), 10) * MS_PER_DAY
const deletedProjectsCollection = db.collection('deletedProjects')
/**
* @param {string} historyId
* @return {Promise<boolean>}
*/
async function projectHasLatestChunk(historyId) {
const chunk = await chunkStore.getBackend(historyId).getLatestChunk(historyId)
return chunk != null
}
export class NotReadyToDelete extends OError {}
/**
* @param {string} projectId
* @return {Promise<void>}
*/
async function deleteProjectBackup(projectId) {
const deletedProject = await deletedProjectsCollection.findOne(
{ 'deleterData.deletedProjectId': new ObjectId(projectId) },
{
projection: {
'project.overleaf.history.id': 1,
'deleterData.deletedAt': 1,
},
}
)
if (!deletedProject) {
throw new NotReadyToDelete('refusing to delete non-deleted project')
}
const expiresAt =
deletedProject.deleterData.deletedAt.getTime() + EXPIRE_PROJECTS_AFTER_MS
if (expiresAt > Date.now()) {
throw new NotReadyToDelete('refusing to delete non-expired project')
}
const historyId = deletedProject.project.overleaf.history.id
if (await projectHasLatestChunk(historyId)) {
throw new NotReadyToDelete(
'refusing to delete project with remaining chunks'
)
}
const prefix = projectKey.format(historyId) + '/'
await backupPersistor.deleteDirectory(chunksBucket, prefix)
await backupPersistor.deleteDirectory(projectBlobsBucket, prefix)
}
export async function healthCheck() {
const HEALTH_CHECK_PROJECTS = JSON.parse(config.get('healthCheckProjects'))
if (HEALTH_CHECK_PROJECTS.length !== 2) {
throw new Error('expected 2 healthCheckProjects')
}
if (!HEALTH_CHECK_PROJECTS.some(id => id.length === 24)) {
throw new Error('expected mongo id in healthCheckProjects')
}
if (!HEALTH_CHECK_PROJECTS.some(id => id.length < 24)) {
throw new Error('expected postgres id in healthCheckProjects')
}
for (const historyId of HEALTH_CHECK_PROJECTS) {
if (!(await projectHasLatestChunk(historyId))) {
throw new Error(`project has no history: ${historyId}`)
}
}
}
export const healthCheckCb = callbackify(healthCheck)
export const deleteProjectBackupCb = callbackify(deleteProjectBackup)

View file

@ -81,7 +81,7 @@ async function lazyLoadHistoryFiles(history, batchBlobStore) {
/**
* Load the latest Chunk stored for a project, including blob metadata.
*
* @param {number} projectId
* @param {number|string} projectId
* @return {Promise.<Chunk>}
*/
async function loadLatest(projectId) {
@ -315,6 +315,7 @@ class AlreadyInitialized extends OError {
}
module.exports = {
getBackend,
initializeProject,
loadLatest,
loadAtVersion,

View file

@ -0,0 +1,242 @@
// @ts-check
import cleanup from '../storage/support/cleanup.js'
import fetch from 'node-fetch'
import testServer from './support/test_backup_server.mjs'
import { expect } from 'chai'
import testProjects from './support/test_projects.js'
import { db } from '../../../../storage/lib/mongodb.js'
import { ObjectId } from 'mongodb'
import {
backupPersistor,
projectBlobsBucket,
chunksBucket,
} from '../../../../storage/lib/backupPersistor.mjs'
import { makeProjectKey } from '../../../../storage/lib/blob_store/index.js'
import config from 'config'
import Stream from 'stream'
import projectKey from '../../../../storage/lib/project_key.js'
/**
* @typedef {import("node-fetch").Response} Response
*/
const { deksBucket } = config.get('backupStore')
const deletedProjectsCollection = db.collection('deletedProjects')
/**
* @param {string} bucket
* @param {string} prefix
* @return {Promise<Array<string>>}
*/
async function listS3Bucket(bucket, prefix) {
// @ts-ignore access to internal library helper
const client = backupPersistor._getClientForBucket(bucket)
const response = await client
.listObjectsV2({ Bucket: bucket, Prefix: prefix })
.promise()
return (response.Contents || []).map(item => item.Key || '')
}
/**
* @param {ObjectId} projectId
* @return {Promise<Response>}
*/
async function deleteProject(projectId) {
return await fetch(testServer.testUrl(`/project/${projectId}/backup`), {
method: 'DELETE',
headers: { Authorization: testServer.basicAuthHeader },
})
}
/**
* @param {string|ObjectId} historyId
* @return {Promise<void>}
*/
async function expectToHaveBackup(historyId) {
const prefix = projectKey.format(historyId.toString()) + '/'
expect(await listS3Bucket(deksBucket, prefix)).to.have.length(1)
expect(await listS3Bucket(chunksBucket, prefix)).to.have.length(2)
expect(await listS3Bucket(projectBlobsBucket, prefix)).to.have.length(2)
}
/**
* @param {string|ObjectId} historyId
* @return {Promise<void>}
*/
async function expectToHaveNoBackup(historyId) {
const prefix = projectKey.format(historyId.toString()) + '/'
expect(await listS3Bucket(deksBucket, prefix)).to.have.length(0)
expect(await listS3Bucket(chunksBucket, prefix)).to.have.length(0)
expect(await listS3Bucket(projectBlobsBucket, prefix)).to.have.length(0)
}
describe('backupDeletion', function () {
beforeEach(cleanup.everything)
beforeEach('create health check projects', async function () {
await testProjects.createEmptyProject('42')
await testProjects.createEmptyProject('000000000000000000000042')
})
beforeEach(testServer.listenOnRandomPort)
it('renders 200 on /status', async function () {
const response = await fetch(testServer.testUrl('/status'))
expect(response.status).to.equal(200)
})
it('renders 200 on /health_check', async function () {
const response = await fetch(testServer.testUrl('/health_check'))
expect(response.status).to.equal(200)
})
describe('DELETE /project/:projectId', function () {
const postgresHistoryId = '1'
const projectIdPostgres = new ObjectId('000000000000000000000001')
const projectIdMongoDB = new ObjectId('000000000000000000000002')
const projectIdNonDeleted = new ObjectId('000000000000000000000003')
const projectIdNonExpired = new ObjectId('000000000000000000000004')
const projectIdWithChunks = new ObjectId('000000000000000000000005')
beforeEach('cleanup s3 buckets', async function () {
await backupPersistor.deleteDirectory(deksBucket, '')
await backupPersistor.deleteDirectory(chunksBucket, '')
await backupPersistor.deleteDirectory(projectBlobsBucket, '')
})
beforeEach('populate mongo', async function () {
await deletedProjectsCollection.insertMany([
{
_id: new ObjectId(),
project: {
_id: projectIdPostgres,
overleaf: { history: { id: postgresHistoryId } },
},
deleterData: {
deletedProjectId: projectIdPostgres,
deletedAt: new Date('2024-01-01T00:00:00Z'),
},
},
{
_id: new ObjectId(),
project: {
_id: projectIdNonExpired,
overleaf: { history: { id: projectIdNonExpired.toString() } },
},
deleterData: {
deletedProjectId: projectIdNonExpired,
deletedAt: new Date(),
},
},
...[projectIdMongoDB, projectIdWithChunks].map(projectId => {
return {
_id: new ObjectId(),
project: {
_id: projectId,
overleaf: { history: { id: projectId.toString() } },
},
deleterData: {
deletedProjectId: projectId,
deletedAt: new Date('2024-01-01T00:00:00Z'),
},
}
}),
])
})
beforeEach('initialize history', async function () {
await testProjects.createEmptyProject(projectIdWithChunks.toString())
})
beforeEach('create a file in s3', async function () {
const historyIds = [
postgresHistoryId,
projectIdMongoDB,
projectIdNonDeleted,
projectIdNonExpired,
projectIdWithChunks,
]
const jobs = []
for (const historyId of historyIds) {
jobs.push(
backupPersistor.sendStream(
projectBlobsBucket,
makeProjectKey(historyId, 'a'.repeat(40)),
Stream.Readable.from(['blob a']),
{ contentLength: 6 }
)
)
jobs.push(
backupPersistor.sendStream(
projectBlobsBucket,
makeProjectKey(historyId, 'b'.repeat(40)),
Stream.Readable.from(['blob b']),
{ contentLength: 6 }
)
)
jobs.push(
backupPersistor.sendStream(
chunksBucket,
projectKey.format(historyId) + '/111',
Stream.Readable.from(['chunk 1']),
{ contentLength: 7 }
)
)
jobs.push(
backupPersistor.sendStream(
chunksBucket,
projectKey.format(historyId) + '/222',
Stream.Readable.from(['chunk 2']),
{ contentLength: 7 }
)
)
}
await Promise.all(jobs)
})
it('renders 401 without auth', async function () {
const response = await fetch(
testServer.testUrl('/project/000000000000000000000042/backup'),
{ method: 'DELETE' }
)
expect(response.status).to.equal(401)
expect(response.headers.get('www-authenticate')).to.match(/^Basic/)
})
it('returns 422 when not deleted', async function () {
const response = await deleteProject(projectIdNonDeleted)
expect(response.status).to.equal(422)
expect(await response.text()).to.equal(
'refusing to delete non-deleted project'
)
await expectToHaveBackup(projectIdNonDeleted)
})
it('returns 422 when not expired', async function () {
const response = await deleteProject(projectIdNonExpired)
expect(response.status).to.equal(422)
expect(await response.text()).to.equal(
'refusing to delete non-expired project'
)
await expectToHaveBackup(projectIdNonExpired)
})
it('returns 422 when live-history not deleted', async function () {
const response = await deleteProject(projectIdWithChunks)
expect(response.status).to.equal(422)
expect(await response.text()).to.equal(
'refusing to delete project with remaining chunks'
)
await expectToHaveBackup(projectIdWithChunks)
})
it('should successfully delete postgres id', async function () {
await expectToHaveBackup(postgresHistoryId)
const response = await deleteProject(projectIdPostgres)
expect(response.status).to.equal(204)
await expectToHaveNoBackup(postgresHistoryId)
})
it('should successfully delete mongo id', async function () {
await expectToHaveBackup(projectIdMongoDB)
const response = await deleteProject(projectIdMongoDB)
expect(response.status).to.equal(204)
await expectToHaveNoBackup(projectIdMongoDB)
})
})
})

View file

@ -0,0 +1,51 @@
// @ts-check
import config from 'config'
import { startApp } from '../../../../../backup-deletion-app.mjs'
/** @type {import("http").Server} */
let server
/**
* @param {string} pathname
* @return {string}
*/
function testUrl(pathname) {
const url = new URL('http://127.0.0.1')
const addr = server.address()
if (addr && typeof addr === 'object') {
url.port = addr.port.toString()
}
url.pathname = pathname
return url.toString()
}
const basicAuthHeader =
'Basic ' +
Buffer.from(`staging:${config.get('basicHttpAuth.password')}`).toString(
'base64'
)
async function listenOnRandomPort() {
if (server) return // already running
for (let i = 0; i < 10; i++) {
try {
server = await startApp(0)
return
} catch {}
}
server = await startApp(0)
}
after('close server', function (done) {
if (server) {
server.close(done)
} else {
done()
}
})
export default {
testUrl,
basicAuthHeader,
listenOnRandomPort,
}

View file

@ -4,6 +4,7 @@
"api/**/*",
"app.js",
"app/js/**/*",
"backup-deletion-app.mjs",
"benchmarks/**/*",
"config/**/*",
"migrations/**/*",

View file

@ -0,0 +1,20 @@
const { fetchNothing } = require('@overleaf/fetch-utils')
const Settings = require('@overleaf/settings')
async function deleteProject(projectId) {
if (!Settings.apis.historyBackupDeletion.enabled) return
const url = new URL(Settings.apis.historyBackupDeletion.url)
url.pathname += `project/${projectId}/backup`
await fetchNothing(url, {
method: 'DELETE',
basicAuth: {
user: Settings.apis.historyBackupDeletion.user,
password: Settings.apis.historyBackupDeletion.pass,
},
})
}
module.exports = {
deleteProject,
}

View file

@ -4,6 +4,7 @@ const settings = require('@overleaf/settings')
const OError = require('@overleaf/o-error')
const UserGetter = require('../User/UserGetter')
const ProjectGetter = require('../Project/ProjectGetter')
const HistoryBackupDeletionHandler = require('./HistoryBackupDeletionHandler')
async function initializeProject(projectId) {
const body = await fetchJson(`${settings.apis.project_history.url}/project`, {
@ -77,6 +78,7 @@ async function deleteProject(projectId, historyId) {
tasks.push(_deleteProjectInFullProjectHistory(historyId))
}
await Promise.all(tasks)
await HistoryBackupDeletionHandler.deleteProject(projectId)
}
async function _deleteProjectInProjectHistory(projectId) {

View file

@ -246,6 +246,12 @@ module.exports = {
sendProjectStructureOps: true,
url: `http://${process.env.PROJECT_HISTORY_HOST || '127.0.0.1'}:3054`,
},
historyBackupDeletion: {
enabled: false,
url: `http://${process.env.HISTORY_BACKUP_DELETION_HOST || '127.0.0.1'}:3101`,
user: process.env.HISTORY_BACKUP_DELETION_USER || 'staging',
pass: process.env.HISTORY_BACKUP_DELETION_PASS,
},
realTime: {
url: `http://${process.env.REALTIME_HOST || '127.0.0.1'}:3026`,
},

View file

@ -86,6 +86,11 @@ module.exports = {
user: 'overleaf',
pass: 'password',
},
historyBackupDeletion: {
url: `http://127.0.0.1:23101`,
user: 'overleaf',
pass: 'password',
},
webpack: {
url: 'http://127.0.0.1:23808',
},

View file

@ -10,14 +10,20 @@ import MockDocstoreApiClass from './mocks/MockDocstoreApi.js'
import MockFilestoreApiClass from './mocks/MockFilestoreApi.js'
import MockChatApiClass from './mocks/MockChatApi.mjs'
import MockGitBridgeApiClass from './mocks/MockGitBridgeApi.mjs'
import MockHistoryBackupDeletionApiClass from './mocks/MockHistoryBackupDeletionApi.mjs'
let MockDocstoreApi, MockFilestoreApi, MockChatApi, MockGitBridgeApi
let MockDocstoreApi,
MockFilestoreApi,
MockChatApi,
MockGitBridgeApi,
MockHistoryBackupDeletionApi
before(function () {
MockDocstoreApi = MockDocstoreApiClass.instance()
MockFilestoreApi = MockFilestoreApiClass.instance()
MockChatApi = MockChatApiClass.instance()
MockGitBridgeApi = MockGitBridgeApiClass.instance()
MockHistoryBackupDeletionApi = MockHistoryBackupDeletionApiClass.instance()
})
describe('Deleting a user', function () {
@ -474,6 +480,66 @@ describe('Deleting a project', function () {
}
)
})
if (Features.hasFeature('saas')) {
it('Should destroy the history backup', function (done) {
MockHistoryBackupDeletionApi.prepareProject(this.projectId, 204)
request.post(
`/internal/project/${this.projectId}/expire-deleted-project`,
{
auth: {
user: settings.apis.web.user,
pass: settings.apis.web.pass,
sendImmediately: true,
},
},
(error, res) => {
expect(error).not.to.exist
expect(res.statusCode).to.equal(200)
expect(
MockHistoryBackupDeletionApi.projects[this.projectId.toString()]
).not.to.exist
done()
}
)
})
it('Should abort when the history backup cannot be deleted', function (done) {
MockHistoryBackupDeletionApi.prepareProject(this.projectId, 422)
request.post(
`/internal/project/${this.projectId}/expire-deleted-project`,
{
auth: {
user: settings.apis.web.user,
pass: settings.apis.web.pass,
sendImmediately: true,
},
},
(error, res) => {
expect(error).not.to.exist
expect(res.statusCode).to.equal(500)
expect(
MockHistoryBackupDeletionApi.projects[this.projectId.toString()]
).to.exist
db.deletedProjects.findOne(
{
'deleterData.deletedProjectId': new ObjectId(this.projectId),
},
(error, deletedProject) => {
expect(error).not.to.exist
expect(deletedProject).to.exist
expect(deletedProject.project).to.exist
done()
}
)
}
)
})
}
})
})

View file

@ -15,6 +15,7 @@ import MockV1Api from './mocks/MockV1Api.js'
import MockV1HistoryApi from './mocks/MockV1HistoryApi.js'
import MockHaveIBeenPwnedApi from './mocks/MockHaveIBeenPwnedApi.mjs'
import MockThirdPartyDataStoreApi from './mocks/MockThirdPartyDataStoreApi.mjs'
import MockHistoryBackupDeletionApi from './mocks/MockHistoryBackupDeletionApi.mjs'
const mockOpts = {
debug: ['1', 'true', 'TRUE'].includes(process.env.DEBUG_MOCKS),
@ -30,6 +31,7 @@ MockSpellingApi.initialize(23005, mockOpts)
MockHaveIBeenPwnedApi.initialize(1337, mockOpts)
MockProjectHistoryApi.initialize(23054, mockOpts)
MockV1HistoryApi.initialize(23100, mockOpts)
MockHistoryBackupDeletionApi.initialize(23101, mockOpts)
if (Features.hasFeature('saas')) {
MockAnalyticsApi.initialize(23050, mockOpts)

View file

@ -0,0 +1,37 @@
import AbstractMockApi from './AbstractMockApi.js'
class MockHistoryBackupDeletionApi extends AbstractMockApi {
reset() {
this.projects = {}
}
prepareProject(projectId, status) {
this.projects[projectId.toString()] = status
}
deleteProject(req, res) {
const projectId = req.params.project_id
const status = this.projects[projectId]
if (status === 422) {
return res.sendStatus(422)
}
delete this.projects[projectId]
res.sendStatus(204)
}
applyRoutes() {
this.app.delete('/project/:project_id/backup', (req, res) =>
this.deleteProject(req, res)
)
}
}
export default MockHistoryBackupDeletionApi
// type hint for the inherited `instance` method
/**
* @function instance
* @memberOf MockHistoryBackupDeletionApi
* @static
* @returns {MockHistoryBackupDeletionApi}
*/

View file

@ -54,12 +54,17 @@ describe('HistoryManager', function () {
},
}
this.HistoryBackupDeletionHandler = {
deleteProject: sinon.stub().resolves(),
}
this.HistoryManager = SandboxedModule.require(MODULE_PATH, {
requires: {
'@overleaf/fetch-utils': this.FetchUtils,
'@overleaf/settings': this.settings,
'../User/UserGetter': this.UserGetter,
'../Project/ProjectGetter': this.ProjectGetter,
'./HistoryBackupDeletionHandler': this.HistoryBackupDeletionHandler,
},
})
})
@ -287,5 +292,11 @@ describe('HistoryManager', function () {
}
)
})
it('should call the history-backup-deletion service', async function () {
expect(
this.HistoryBackupDeletionHandler.deleteProject
).to.have.been.calledWith(projectId)
})
})
})