mirror of
https://github.com/overleaf/overleaf.git
synced 2025-04-11 21:35:32 +00:00
Merge pull request #20129 from overleaf/jpa-handle-partial-deletion
[misc] improve handling of document deletion GitOrigin-RevId: bd6b225b91ab38365e9ff272c50ece995e767bf2
This commit is contained in:
parent
2dcf87e3f6
commit
a98fefd24b
5 changed files with 346 additions and 4 deletions
|
@ -3,15 +3,22 @@ const Path = require('path')
|
|||
const _ = require('lodash')
|
||||
const logger = require('@overleaf/logger')
|
||||
const OError = require('@overleaf/o-error')
|
||||
const Errors = require('../app/js/Errors')
|
||||
const LockManager = require('../app/js/LockManager')
|
||||
const PersistenceManager = require('../app/js/PersistenceManager')
|
||||
const ProjectFlusher = require('../app/js/ProjectFlusher')
|
||||
const ProjectManager = require('../app/js/ProjectManager')
|
||||
const RedisManager = require('../app/js/RedisManager')
|
||||
const Settings = require('@overleaf/settings')
|
||||
const request = require('requestretry').defaults({
|
||||
maxAttempts: 2,
|
||||
retryDelay: 10,
|
||||
})
|
||||
|
||||
const AUTO_FIX_VERSION_MISMATCH =
|
||||
process.env.AUTO_FIX_VERSION_MISMATCH === 'true'
|
||||
const AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA =
|
||||
process.env.AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA === 'true'
|
||||
const SCRIPT_LOG_LEVEL = process.env.SCRIPT_LOG_LEVEL || 'warn'
|
||||
const FLUSH_IN_SYNC_PROJECTS = process.env.FLUSH_IN_SYNC_PROJECTS === 'true'
|
||||
const FOLDER =
|
||||
|
@ -32,6 +39,7 @@ const COMPARE_AND_SET =
|
|||
* @property {Array<string>} lines
|
||||
* @property {string} pathname
|
||||
* @property {Object} ranges
|
||||
* @property {boolean} [partiallyDeleted]
|
||||
*/
|
||||
|
||||
class TryAgainError extends Error {}
|
||||
|
@ -66,6 +74,101 @@ async function updateDocVersionInRedis(docId, redisDoc, mongoDoc) {
|
|||
}
|
||||
}
|
||||
|
||||
async function fixPartiallyDeletedDocMetadata(projectId, docId, pathname) {
|
||||
await new Promise((resolve, reject) => {
|
||||
request(
|
||||
{
|
||||
method: 'PATCH',
|
||||
url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016/project/${projectId}/doc/${docId}`,
|
||||
timeout: 60 * 1000,
|
||||
json: {
|
||||
name: Path.basename(pathname),
|
||||
deleted: true,
|
||||
deletedAt: new Date(),
|
||||
},
|
||||
},
|
||||
(err, res, body) => {
|
||||
if (err) return reject(err)
|
||||
const { statusCode } = res
|
||||
if (statusCode !== 204) {
|
||||
return reject(
|
||||
new OError('patch request to docstore failed', {
|
||||
statusCode,
|
||||
body,
|
||||
})
|
||||
)
|
||||
}
|
||||
resolve()
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
async function getDocFromMongo(projectId, docId) {
|
||||
try {
|
||||
return await PersistenceManager.promises.getDoc(projectId, docId)
|
||||
} catch (err) {
|
||||
if (!(err instanceof Errors.NotFoundError)) {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
const docstoreDoc = await new Promise((resolve, reject) => {
|
||||
request(
|
||||
{
|
||||
url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016/project/${projectId}/doc/${docId}/peek`,
|
||||
timeout: 60 * 1000,
|
||||
json: true,
|
||||
},
|
||||
(err, res, body) => {
|
||||
if (err) return reject(err)
|
||||
const { statusCode } = res
|
||||
if (statusCode !== 200) {
|
||||
return reject(
|
||||
new OError('fallback request to docstore failed', {
|
||||
statusCode,
|
||||
body,
|
||||
})
|
||||
)
|
||||
}
|
||||
resolve(body)
|
||||
}
|
||||
)
|
||||
})
|
||||
const deletedDocName = await new Promise((resolve, reject) => {
|
||||
request(
|
||||
{
|
||||
url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016/project/${projectId}/doc-deleted`,
|
||||
timeout: 60 * 1000,
|
||||
json: true,
|
||||
},
|
||||
(err, res, body) => {
|
||||
if (err) return reject(err)
|
||||
const { statusCode } = res
|
||||
if (statusCode !== 200) {
|
||||
return reject(
|
||||
new OError('list deleted docs request to docstore failed', {
|
||||
statusCode,
|
||||
body,
|
||||
})
|
||||
)
|
||||
}
|
||||
resolve(body.find(doc => doc._id === docId)?.name)
|
||||
}
|
||||
)
|
||||
})
|
||||
if (docstoreDoc.deleted && deletedDocName) {
|
||||
return {
|
||||
...docstoreDoc,
|
||||
pathname: deletedDocName,
|
||||
}
|
||||
}
|
||||
return {
|
||||
...docstoreDoc,
|
||||
pathname: `/partially-deleted-doc-with-unknown-name-and-id-${docId}.txt`,
|
||||
partiallyDeleted: true,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} docId
|
||||
|
@ -76,10 +179,20 @@ async function processDoc(projectId, docId) {
|
|||
projectId,
|
||||
docId
|
||||
)
|
||||
const mongoDoc = /** @type Doc */ await PersistenceManager.promises.getDoc(
|
||||
projectId,
|
||||
docId
|
||||
)
|
||||
const mongoDoc = /** @type Doc */ await getDocFromMongo(projectId, docId)
|
||||
|
||||
if (mongoDoc.partiallyDeleted) {
|
||||
if (AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA) {
|
||||
console.log(
|
||||
`Found partially deleted doc ${docId} in project ${projectId}: fixing metadata`
|
||||
)
|
||||
await fixPartiallyDeletedDocMetadata(projectId, docId, redisDoc.pathname)
|
||||
} else {
|
||||
console.log(
|
||||
`Found partially deleted doc ${docId} in project ${projectId}: use AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA=true to fix metadata`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if (mongoDoc.version < redisDoc.version) {
|
||||
// mongo is behind, we can flush to mongo when all docs are processed.
|
||||
|
@ -121,6 +234,9 @@ async function processDoc(projectId, docId) {
|
|||
if (!WRITE_CONTENT) return true
|
||||
|
||||
console.log(`pathname: ${mongoDoc.pathname}`)
|
||||
if (mongoDoc.pathname !== redisDoc.pathname) {
|
||||
console.log(`pathname redis: ${redisDoc.pathname}`)
|
||||
}
|
||||
console.log(`mongo version: ${mongoDoc.version}`)
|
||||
console.log(`redis version: ${redisDoc.version}`)
|
||||
|
||||
|
|
|
@ -7,6 +7,8 @@ const { expect } = require('chai')
|
|||
const Settings = require('@overleaf/settings')
|
||||
const fs = require('fs')
|
||||
const Path = require('path')
|
||||
const MockDocstoreApi = require('./helpers/MockDocstoreApi')
|
||||
const sinon = require('sinon')
|
||||
|
||||
const rclient = require('@overleaf/redis-wrapper').createClient(
|
||||
Settings.redis.documentupdater
|
||||
|
@ -20,6 +22,14 @@ describe('CheckRedisMongoSyncState', function () {
|
|||
await rclient.flushall()
|
||||
})
|
||||
|
||||
let peekDocumentInDocstore
|
||||
beforeEach(function () {
|
||||
peekDocumentInDocstore = sinon.spy(MockDocstoreApi, 'peekDocument')
|
||||
})
|
||||
afterEach(function () {
|
||||
peekDocumentInDocstore.restore()
|
||||
})
|
||||
|
||||
async function runScript(options) {
|
||||
let result
|
||||
try {
|
||||
|
@ -67,6 +77,8 @@ describe('CheckRedisMongoSyncState', function () {
|
|||
expect(result.stdout).to.include(
|
||||
'Found 0 projects with 0 out of sync docs'
|
||||
)
|
||||
|
||||
expect(peekDocumentInDocstore).to.not.have.been.called
|
||||
})
|
||||
|
||||
describe('with out of sync lines', function () {
|
||||
|
@ -263,4 +275,97 @@ describe('CheckRedisMongoSyncState', function () {
|
|||
expect(result.stdout).to.include('Processed 20 projects')
|
||||
})
|
||||
})
|
||||
|
||||
describe('with partially deleted doc', function () {
|
||||
let projectId, docId
|
||||
beforeEach(function (done) {
|
||||
projectId = DocUpdaterClient.randomId()
|
||||
docId = DocUpdaterClient.randomId()
|
||||
MockWebApi.insertDoc(projectId, docId, {
|
||||
lines: ['mongo', 'lines'],
|
||||
version: 1,
|
||||
})
|
||||
MockDocstoreApi.insertDoc(projectId, docId, {
|
||||
lines: ['mongo', 'lines'],
|
||||
version: 1,
|
||||
})
|
||||
DocUpdaterClient.getDoc(projectId, docId, err => {
|
||||
MockWebApi.clearDocs()
|
||||
done(err)
|
||||
})
|
||||
})
|
||||
describe('with only the file-tree entry deleted', function () {
|
||||
it('should flag the partial deletion', async function () {
|
||||
const result = await runScript({})
|
||||
expect(result.code).to.equal(0)
|
||||
expect(result.stdout).to.include('Processed 1 projects')
|
||||
expect(result.stdout).to.include(
|
||||
`Found partially deleted doc ${docId} in project ${projectId}: use AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA=true to fix metadata`
|
||||
)
|
||||
expect(result.stdout).to.include(
|
||||
'Found 0 projects with 0 out of sync docs'
|
||||
)
|
||||
expect(MockDocstoreApi.getDoc(projectId, docId)).to.not.include({
|
||||
deleted: true,
|
||||
name: 'c.tex',
|
||||
})
|
||||
expect(peekDocumentInDocstore).to.have.been.called
|
||||
})
|
||||
it('should autofix the partial deletion', async function () {
|
||||
const result = await runScript({
|
||||
AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA: 'true',
|
||||
})
|
||||
expect(result.code).to.equal(0)
|
||||
expect(result.stdout).to.include('Processed 1 projects')
|
||||
expect(result.stdout).to.include(
|
||||
`Found partially deleted doc ${docId} in project ${projectId}: fixing metadata`
|
||||
)
|
||||
expect(result.stdout).to.include(
|
||||
'Found 0 projects with 0 out of sync docs'
|
||||
)
|
||||
|
||||
expect(MockDocstoreApi.getDoc(projectId, docId)).to.include({
|
||||
deleted: true,
|
||||
name: 'c.tex',
|
||||
})
|
||||
|
||||
const result2 = await runScript({})
|
||||
expect(result2.code).to.equal(0)
|
||||
expect(result2.stdout).to.include('Processed 1 projects')
|
||||
expect(result2.stdout).to.not.include(
|
||||
`Found partially deleted doc ${docId} in project ${projectId}`
|
||||
)
|
||||
expect(result2.stdout).to.include(
|
||||
'Found 0 projects with 0 out of sync docs'
|
||||
)
|
||||
})
|
||||
})
|
||||
describe('with docstore metadata updated', function () {
|
||||
beforeEach(function (done) {
|
||||
MockDocstoreApi.patchDocument(
|
||||
projectId,
|
||||
docId,
|
||||
{
|
||||
deleted: true,
|
||||
deletedAt: new Date(),
|
||||
name: 'c.tex',
|
||||
},
|
||||
done
|
||||
)
|
||||
})
|
||||
|
||||
it('should work when in sync', async function () {
|
||||
const result = await runScript({})
|
||||
expect(result.code).to.equal(0)
|
||||
expect(result.stdout).to.include('Processed 1 projects')
|
||||
expect(result.stdout).to.not.include(
|
||||
`Found partially deleted doc ${docId} in project ${projectId}`
|
||||
)
|
||||
expect(result.stdout).to.include(
|
||||
'Found 0 projects with 0 out of sync docs'
|
||||
)
|
||||
expect(peekDocumentInDocstore).to.have.been.called
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
const express = require('express')
|
||||
const bodyParser = require('body-parser')
|
||||
const app = express()
|
||||
const MAX_REQUEST_SIZE = 2 * (2 * 1024 * 1024 + 64 * 1024)
|
||||
|
||||
const MockDocstoreApi = {
|
||||
docs: {},
|
||||
|
||||
clearDocs() {
|
||||
this.docs = {}
|
||||
},
|
||||
|
||||
getDoc(projectId, docId) {
|
||||
return this.docs[`${projectId}:${docId}`]
|
||||
},
|
||||
|
||||
insertDoc(projectId, docId, doc) {
|
||||
if (doc.version == null) {
|
||||
doc.version = 0
|
||||
}
|
||||
if (doc.lines == null) {
|
||||
doc.lines = []
|
||||
}
|
||||
this.docs[`${projectId}:${docId}`] = doc
|
||||
},
|
||||
|
||||
patchDocument(projectId, docId, meta, callback) {
|
||||
Object.assign(this.docs[`${projectId}:${docId}`], meta)
|
||||
callback(null)
|
||||
},
|
||||
|
||||
peekDocument(projectId, docId, callback) {
|
||||
callback(null, this.docs[`${projectId}:${docId}`])
|
||||
},
|
||||
|
||||
getAllDeletedDocs(projectId, callback) {
|
||||
callback(
|
||||
null,
|
||||
Object.entries(this.docs)
|
||||
.filter(([key, doc]) => key.startsWith(projectId) && doc.deleted)
|
||||
.map(([key, doc]) => {
|
||||
return {
|
||||
_id: key.split(':')[1],
|
||||
name: doc.name,
|
||||
deletedAt: doc.deletedAt,
|
||||
}
|
||||
})
|
||||
)
|
||||
},
|
||||
|
||||
run() {
|
||||
app.get('/project/:project_id/doc-deleted', (req, res, next) => {
|
||||
this.getAllDeletedDocs(req.params.project_id, (error, docs) => {
|
||||
if (error) {
|
||||
res.sendStatus(500)
|
||||
} else {
|
||||
res.json(docs)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
app.get('/project/:project_id/doc/:doc_id/peek', (req, res, next) => {
|
||||
this.peekDocument(
|
||||
req.params.project_id,
|
||||
req.params.doc_id,
|
||||
(error, doc) => {
|
||||
if (error) {
|
||||
res.sendStatus(500)
|
||||
} else if (doc) {
|
||||
res.json(doc)
|
||||
} else {
|
||||
res.sendStatus(404)
|
||||
}
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
app.patch(
|
||||
'/project/:project_id/doc/:doc_id',
|
||||
bodyParser.json({ limit: MAX_REQUEST_SIZE }),
|
||||
(req, res, next) => {
|
||||
MockDocstoreApi.patchDocument(
|
||||
req.params.project_id,
|
||||
req.params.doc_id,
|
||||
req.body,
|
||||
error => {
|
||||
if (error) {
|
||||
res.sendStatus(500)
|
||||
} else {
|
||||
res.sendStatus(204)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
app
|
||||
.listen(3016, error => {
|
||||
if (error) {
|
||||
throw error
|
||||
}
|
||||
})
|
||||
.on('error', error => {
|
||||
console.error('error starting MockDocstoreApi:', error.message)
|
||||
process.exit(1)
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
MockDocstoreApi.run()
|
||||
module.exports = MockDocstoreApi
|
|
@ -774,6 +774,10 @@ const deleteEntity = wrapWithLock(
|
|||
throw new Error('No entityType set')
|
||||
}
|
||||
entityType = entityType.toLowerCase()
|
||||
|
||||
// Flush the entire project to avoid leaving partially deleted docs in redis.
|
||||
await DocumentUpdaterHandler.promises.flushProjectToMongo(projectId)
|
||||
|
||||
const { entity, path, projectBeforeDeletion, newProject } =
|
||||
await ProjectEntityMongoUpdateHandler.promises.deleteEntity(
|
||||
projectId,
|
||||
|
|
|
@ -1731,6 +1731,12 @@ describe('ProjectEntityUpdateHandler', function () {
|
|||
)
|
||||
})
|
||||
|
||||
it('flushes the project to mongo', function () {
|
||||
this.DocumentUpdaterHandler.promises.flushProjectToMongo.should.have.been.calledWith(
|
||||
projectId
|
||||
)
|
||||
})
|
||||
|
||||
it('deletes the entity in mongo', function () {
|
||||
this.ProjectEntityMongoUpdateHandler.promises.deleteEntity
|
||||
.calledWith(projectId, docId, 'doc')
|
||||
|
|
Loading…
Add table
Reference in a new issue