Merge pull request #20129 from overleaf/jpa-handle-partial-deletion

[misc] improve handling of document deletion

GitOrigin-RevId: bd6b225b91ab38365e9ff272c50ece995e767bf2
This commit is contained in:
Jakob Ackermann 2024-08-26 15:09:25 +02:00 committed by Copybot
parent 2dcf87e3f6
commit a98fefd24b
5 changed files with 346 additions and 4 deletions

View file

@ -3,15 +3,22 @@ const Path = require('path')
const _ = require('lodash')
const logger = require('@overleaf/logger')
const OError = require('@overleaf/o-error')
const Errors = require('../app/js/Errors')
const LockManager = require('../app/js/LockManager')
const PersistenceManager = require('../app/js/PersistenceManager')
const ProjectFlusher = require('../app/js/ProjectFlusher')
const ProjectManager = require('../app/js/ProjectManager')
const RedisManager = require('../app/js/RedisManager')
const Settings = require('@overleaf/settings')
const request = require('requestretry').defaults({
maxAttempts: 2,
retryDelay: 10,
})
const AUTO_FIX_VERSION_MISMATCH =
process.env.AUTO_FIX_VERSION_MISMATCH === 'true'
const AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA =
process.env.AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA === 'true'
const SCRIPT_LOG_LEVEL = process.env.SCRIPT_LOG_LEVEL || 'warn'
const FLUSH_IN_SYNC_PROJECTS = process.env.FLUSH_IN_SYNC_PROJECTS === 'true'
const FOLDER =
@ -32,6 +39,7 @@ const COMPARE_AND_SET =
* @property {Array<string>} lines
* @property {string} pathname
* @property {Object} ranges
* @property {boolean} [partiallyDeleted]
*/
class TryAgainError extends Error {}
@ -66,6 +74,101 @@ async function updateDocVersionInRedis(docId, redisDoc, mongoDoc) {
}
}
async function fixPartiallyDeletedDocMetadata(projectId, docId, pathname) {
await new Promise((resolve, reject) => {
request(
{
method: 'PATCH',
url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016/project/${projectId}/doc/${docId}`,
timeout: 60 * 1000,
json: {
name: Path.basename(pathname),
deleted: true,
deletedAt: new Date(),
},
},
(err, res, body) => {
if (err) return reject(err)
const { statusCode } = res
if (statusCode !== 204) {
return reject(
new OError('patch request to docstore failed', {
statusCode,
body,
})
)
}
resolve()
}
)
})
}
async function getDocFromMongo(projectId, docId) {
try {
return await PersistenceManager.promises.getDoc(projectId, docId)
} catch (err) {
if (!(err instanceof Errors.NotFoundError)) {
throw err
}
}
const docstoreDoc = await new Promise((resolve, reject) => {
request(
{
url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016/project/${projectId}/doc/${docId}/peek`,
timeout: 60 * 1000,
json: true,
},
(err, res, body) => {
if (err) return reject(err)
const { statusCode } = res
if (statusCode !== 200) {
return reject(
new OError('fallback request to docstore failed', {
statusCode,
body,
})
)
}
resolve(body)
}
)
})
const deletedDocName = await new Promise((resolve, reject) => {
request(
{
url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016/project/${projectId}/doc-deleted`,
timeout: 60 * 1000,
json: true,
},
(err, res, body) => {
if (err) return reject(err)
const { statusCode } = res
if (statusCode !== 200) {
return reject(
new OError('list deleted docs request to docstore failed', {
statusCode,
body,
})
)
}
resolve(body.find(doc => doc._id === docId)?.name)
}
)
})
if (docstoreDoc.deleted && deletedDocName) {
return {
...docstoreDoc,
pathname: deletedDocName,
}
}
return {
...docstoreDoc,
pathname: `/partially-deleted-doc-with-unknown-name-and-id-${docId}.txt`,
partiallyDeleted: true,
}
}
/**
* @param {string} projectId
* @param {string} docId
@ -76,10 +179,20 @@ async function processDoc(projectId, docId) {
projectId,
docId
)
const mongoDoc = /** @type Doc */ await PersistenceManager.promises.getDoc(
projectId,
docId
)
const mongoDoc = /** @type Doc */ await getDocFromMongo(projectId, docId)
if (mongoDoc.partiallyDeleted) {
if (AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA) {
console.log(
`Found partially deleted doc ${docId} in project ${projectId}: fixing metadata`
)
await fixPartiallyDeletedDocMetadata(projectId, docId, redisDoc.pathname)
} else {
console.log(
`Found partially deleted doc ${docId} in project ${projectId}: use AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA=true to fix metadata`
)
}
}
if (mongoDoc.version < redisDoc.version) {
// mongo is behind, we can flush to mongo when all docs are processed.
@ -121,6 +234,9 @@ async function processDoc(projectId, docId) {
if (!WRITE_CONTENT) return true
console.log(`pathname: ${mongoDoc.pathname}`)
if (mongoDoc.pathname !== redisDoc.pathname) {
console.log(`pathname redis: ${redisDoc.pathname}`)
}
console.log(`mongo version: ${mongoDoc.version}`)
console.log(`redis version: ${redisDoc.version}`)

View file

@ -7,6 +7,8 @@ const { expect } = require('chai')
const Settings = require('@overleaf/settings')
const fs = require('fs')
const Path = require('path')
const MockDocstoreApi = require('./helpers/MockDocstoreApi')
const sinon = require('sinon')
const rclient = require('@overleaf/redis-wrapper').createClient(
Settings.redis.documentupdater
@ -20,6 +22,14 @@ describe('CheckRedisMongoSyncState', function () {
await rclient.flushall()
})
let peekDocumentInDocstore
beforeEach(function () {
peekDocumentInDocstore = sinon.spy(MockDocstoreApi, 'peekDocument')
})
afterEach(function () {
peekDocumentInDocstore.restore()
})
async function runScript(options) {
let result
try {
@ -67,6 +77,8 @@ describe('CheckRedisMongoSyncState', function () {
expect(result.stdout).to.include(
'Found 0 projects with 0 out of sync docs'
)
expect(peekDocumentInDocstore).to.not.have.been.called
})
describe('with out of sync lines', function () {
@ -263,4 +275,97 @@ describe('CheckRedisMongoSyncState', function () {
expect(result.stdout).to.include('Processed 20 projects')
})
})
describe('with partially deleted doc', function () {
let projectId, docId
beforeEach(function (done) {
projectId = DocUpdaterClient.randomId()
docId = DocUpdaterClient.randomId()
MockWebApi.insertDoc(projectId, docId, {
lines: ['mongo', 'lines'],
version: 1,
})
MockDocstoreApi.insertDoc(projectId, docId, {
lines: ['mongo', 'lines'],
version: 1,
})
DocUpdaterClient.getDoc(projectId, docId, err => {
MockWebApi.clearDocs()
done(err)
})
})
describe('with only the file-tree entry deleted', function () {
it('should flag the partial deletion', async function () {
const result = await runScript({})
expect(result.code).to.equal(0)
expect(result.stdout).to.include('Processed 1 projects')
expect(result.stdout).to.include(
`Found partially deleted doc ${docId} in project ${projectId}: use AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA=true to fix metadata`
)
expect(result.stdout).to.include(
'Found 0 projects with 0 out of sync docs'
)
expect(MockDocstoreApi.getDoc(projectId, docId)).to.not.include({
deleted: true,
name: 'c.tex',
})
expect(peekDocumentInDocstore).to.have.been.called
})
it('should autofix the partial deletion', async function () {
const result = await runScript({
AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA: 'true',
})
expect(result.code).to.equal(0)
expect(result.stdout).to.include('Processed 1 projects')
expect(result.stdout).to.include(
`Found partially deleted doc ${docId} in project ${projectId}: fixing metadata`
)
expect(result.stdout).to.include(
'Found 0 projects with 0 out of sync docs'
)
expect(MockDocstoreApi.getDoc(projectId, docId)).to.include({
deleted: true,
name: 'c.tex',
})
const result2 = await runScript({})
expect(result2.code).to.equal(0)
expect(result2.stdout).to.include('Processed 1 projects')
expect(result2.stdout).to.not.include(
`Found partially deleted doc ${docId} in project ${projectId}`
)
expect(result2.stdout).to.include(
'Found 0 projects with 0 out of sync docs'
)
})
})
describe('with docstore metadata updated', function () {
beforeEach(function (done) {
MockDocstoreApi.patchDocument(
projectId,
docId,
{
deleted: true,
deletedAt: new Date(),
name: 'c.tex',
},
done
)
})
it('should work when in sync', async function () {
const result = await runScript({})
expect(result.code).to.equal(0)
expect(result.stdout).to.include('Processed 1 projects')
expect(result.stdout).to.not.include(
`Found partially deleted doc ${docId} in project ${projectId}`
)
expect(result.stdout).to.include(
'Found 0 projects with 0 out of sync docs'
)
expect(peekDocumentInDocstore).to.have.been.called
})
})
})
})

View file

@ -0,0 +1,111 @@
const express = require('express')
const bodyParser = require('body-parser')
const app = express()
const MAX_REQUEST_SIZE = 2 * (2 * 1024 * 1024 + 64 * 1024)
const MockDocstoreApi = {
docs: {},
clearDocs() {
this.docs = {}
},
getDoc(projectId, docId) {
return this.docs[`${projectId}:${docId}`]
},
insertDoc(projectId, docId, doc) {
if (doc.version == null) {
doc.version = 0
}
if (doc.lines == null) {
doc.lines = []
}
this.docs[`${projectId}:${docId}`] = doc
},
patchDocument(projectId, docId, meta, callback) {
Object.assign(this.docs[`${projectId}:${docId}`], meta)
callback(null)
},
peekDocument(projectId, docId, callback) {
callback(null, this.docs[`${projectId}:${docId}`])
},
getAllDeletedDocs(projectId, callback) {
callback(
null,
Object.entries(this.docs)
.filter(([key, doc]) => key.startsWith(projectId) && doc.deleted)
.map(([key, doc]) => {
return {
_id: key.split(':')[1],
name: doc.name,
deletedAt: doc.deletedAt,
}
})
)
},
run() {
app.get('/project/:project_id/doc-deleted', (req, res, next) => {
this.getAllDeletedDocs(req.params.project_id, (error, docs) => {
if (error) {
res.sendStatus(500)
} else {
res.json(docs)
}
})
})
app.get('/project/:project_id/doc/:doc_id/peek', (req, res, next) => {
this.peekDocument(
req.params.project_id,
req.params.doc_id,
(error, doc) => {
if (error) {
res.sendStatus(500)
} else if (doc) {
res.json(doc)
} else {
res.sendStatus(404)
}
}
)
})
app.patch(
'/project/:project_id/doc/:doc_id',
bodyParser.json({ limit: MAX_REQUEST_SIZE }),
(req, res, next) => {
MockDocstoreApi.patchDocument(
req.params.project_id,
req.params.doc_id,
req.body,
error => {
if (error) {
res.sendStatus(500)
} else {
res.sendStatus(204)
}
}
)
}
)
app
.listen(3016, error => {
if (error) {
throw error
}
})
.on('error', error => {
console.error('error starting MockDocstoreApi:', error.message)
process.exit(1)
})
},
}
MockDocstoreApi.run()
module.exports = MockDocstoreApi

View file

@ -774,6 +774,10 @@ const deleteEntity = wrapWithLock(
throw new Error('No entityType set')
}
entityType = entityType.toLowerCase()
// Flush the entire project to avoid leaving partially deleted docs in redis.
await DocumentUpdaterHandler.promises.flushProjectToMongo(projectId)
const { entity, path, projectBeforeDeletion, newProject } =
await ProjectEntityMongoUpdateHandler.promises.deleteEntity(
projectId,

View file

@ -1731,6 +1731,12 @@ describe('ProjectEntityUpdateHandler', function () {
)
})
it('flushes the project to mongo', function () {
this.DocumentUpdaterHandler.promises.flushProjectToMongo.should.have.been.calledWith(
projectId
)
})
it('deletes the entity in mongo', function () {
this.ProjectEntityMongoUpdateHandler.promises.deleteEntity
.calledWith(projectId, docId, 'doc')