Merge pull request #7627 from overleaf/em-docstore-archiving

Safer doc unarchiving

GitOrigin-RevId: 60f7aa39401d2f09c13570097c4f376cc401931f
This commit is contained in:
Eric Mc Sween 2022-04-26 07:16:36 -04:00 committed by Copybot
parent 40bbaf168b
commit ddb94d159a
9 changed files with 500 additions and 447 deletions

View file

@ -78,7 +78,7 @@ app.delete('/project/:project_id/doc/:doc_id', (req, res) => {
app.post('/project/:project_id/archive', HttpController.archiveAllDocs)
app.post('/project/:project_id/doc/:doc_id/archive', HttpController.archiveDoc)
app.post('/project/:project_id/unarchive', HttpController.unArchiveAllDocs)
app.post('/project/:project_id/destroy', HttpController.destroyAllDocs)
app.post('/project/:project_id/destroy', HttpController.destroyProject)
app.get('/health_check', HttpController.healthCheck)

View file

@ -2,18 +2,16 @@ const { callbackify } = require('util')
const MongoManager = require('./MongoManager').promises
const Errors = require('./Errors')
const logger = require('@overleaf/logger')
const settings = require('@overleaf/settings')
const Settings = require('@overleaf/settings')
const crypto = require('crypto')
const Streamifier = require('streamifier')
const RangeManager = require('./RangeManager')
const PersistorManager = require('./PersistorManager')
const pMap = require('p-map')
const PARALLEL_JOBS = settings.parallelArchiveJobs
const ARCHIVE_BATCH_SIZE = settings.archiveBatchSize
const UN_ARCHIVE_BATCH_SIZE = settings.unArchiveBatchSize
const DESTROY_BATCH_SIZE = settings.destroyBatchSize
const DESTROY_RETRY_COUNT = settings.destroyRetryCount
const PARALLEL_JOBS = Settings.parallelArchiveJobs
const ARCHIVE_BATCH_SIZE = Settings.archiveBatchSize
const UN_ARCHIVE_BATCH_SIZE = Settings.unArchiveBatchSize
module.exports = {
archiveAllDocs: callbackify(archiveAllDocs),
@ -21,8 +19,7 @@ module.exports = {
archiveDoc: callbackify(archiveDoc),
unArchiveAllDocs: callbackify(unArchiveAllDocs),
unarchiveDoc: callbackify(unarchiveDoc),
destroyAllDocs: callbackify(destroyAllDocs),
destroyDoc: callbackify(destroyDoc),
destroyProject: callbackify(destroyProject),
getDoc: callbackify(getDoc),
promises: {
archiveAllDocs,
@ -30,8 +27,7 @@ module.exports = {
archiveDoc,
unArchiveAllDocs,
unarchiveDoc,
destroyAllDocs,
destroyDoc,
destroyProject,
getDoc,
},
}
@ -66,9 +62,11 @@ async function archiveDocById(projectId, docId) {
)
}
// TODO(das7pad): consider refactoring MongoManager.findDoc to take a query
if (doc.inS3) return
return archiveDoc(projectId, doc)
if (doc.inS3) {
// No need to throw an error if the doc is already archived
return
}
await archiveDoc(projectId, doc)
}
async function archiveDoc(projectId, doc) {
@ -85,6 +83,7 @@ async function archiveDoc(projectId, doc) {
const json = JSON.stringify({
lines: doc.lines,
ranges: doc.ranges,
rev: doc.rev,
schema_v: 1,
})
@ -98,7 +97,7 @@ async function archiveDoc(projectId, doc) {
const md5 = crypto.createHash('md5').update(json).digest('hex')
const stream = Streamifier.createReadStream(json)
await PersistorManager.sendStream(settings.docstore.bucket, key, stream, {
await PersistorManager.sendStream(Settings.docstore.bucket, key, stream, {
sourceMd5: md5,
})
await MongoManager.markDocAsArchived(doc._id, doc.rev)
@ -107,7 +106,7 @@ async function archiveDoc(projectId, doc) {
async function unArchiveAllDocs(projectId) {
while (true) {
let docs
if (settings.docstore.keepSoftDeletedDocsArchived) {
if (Settings.docstore.keepSoftDeletedDocsArchived) {
docs = await MongoManager.getNonDeletedArchivedProjectDocs(
projectId,
UN_ARCHIVE_BATCH_SIZE
@ -131,11 +130,11 @@ async function unArchiveAllDocs(projectId) {
async function getDoc(projectId, docId) {
const key = `${projectId}/${docId}`
const sourceMd5 = await PersistorManager.getObjectMd5Hash(
settings.docstore.bucket,
Settings.docstore.bucket,
key
)
const stream = await PersistorManager.getObjectStream(
settings.docstore.bucket,
Settings.docstore.bucket,
key
)
stream.resume()
@ -150,106 +149,38 @@ async function getDoc(projectId, docId) {
}
const json = buffer.toString()
const doc = JSON.parse(json)
const mongoDoc = {}
if (doc.schema_v === 1 && doc.lines != null) {
mongoDoc.lines = doc.lines
if (doc.ranges != null) {
mongoDoc.ranges = RangeManager.jsonRangesToMongo(doc.ranges)
}
} else if (Array.isArray(doc)) {
mongoDoc.lines = doc
} else {
throw new Error("I don't understand the doc format in s3")
}
return mongoDoc
return _deserializeArchivedDoc(json)
}
// get the doc and unarchive it to mongo
async function unarchiveDoc(projectId, docId) {
logger.log(
{ project_id: projectId, doc_id: docId },
'getting doc from persistor'
)
const key = `${projectId}/${docId}`
const originalDoc = await MongoManager.findDoc(projectId, docId, { inS3: 1 })
if (!originalDoc.inS3) {
// return if it's not actually in S3 as there's nothing to do
logger.log({ projectId, docId }, 'getting doc from persistor')
const mongoDoc = await MongoManager.findDoc(projectId, docId, {
inS3: 1,
rev: 1,
})
if (!mongoDoc.inS3) {
// The doc is already unarchived
return
}
let mongoDoc
try {
mongoDoc = await getDoc(projectId, docId)
} catch (err) {
// if we get a 404, we could be in a race and something else has unarchived the doc already
if (err instanceof Errors.NotFoundError) {
const doc = await MongoManager.findDoc(projectId, docId, { inS3: 1 })
if (!doc.inS3) {
// the doc has been archived while we were looking for it, so no error
return
}
}
throw err
const archivedDoc = await getDoc(projectId, docId)
if (archivedDoc.rev == null) {
// Older archived docs didn't have a rev. Assume that the rev of the
// archived doc is the rev that was stored in Mongo when we retrieved it
// earlier.
archivedDoc.rev = mongoDoc.rev
}
await MongoManager.upsertIntoDocCollection(projectId, docId, mongoDoc)
await PersistorManager.deleteObject(settings.docstore.bucket, key)
await MongoManager.restoreArchivedDoc(projectId, docId, archivedDoc)
}
async function destroyAllDocs(projectId) {
while (true) {
const docs = await MongoManager.getProjectsDocs(
projectId,
{ include_deleted: true, limit: DESTROY_BATCH_SIZE },
{ _id: 1 }
async function destroyProject(projectId) {
const tasks = [MongoManager.destroyProject(projectId)]
if (_isArchivingEnabled()) {
tasks.push(
PersistorManager.deleteDirectory(Settings.docstore.bucket, projectId)
)
if (!docs || docs.length === 0) {
break
}
await pMap(docs, doc => destroyDoc(projectId, doc._id), {
concurrency: PARALLEL_JOBS,
})
}
}
async function destroyDoc(projectId, docId) {
logger.log(
{ project_id: projectId, doc_id: docId },
'removing doc from mongo and persistor'
)
const doc = await MongoManager.findDoc(projectId, docId, {
inS3: 1,
})
if (!doc) {
throw new Errors.NotFoundError('Doc not found in Mongo')
}
if (doc.inS3) {
await destroyArchiveWithRetry(projectId, docId)
}
await MongoManager.destroyDoc(docId)
}
async function destroyArchiveWithRetry(projectId, docId) {
let attempt = 0
let lastError
while (attempt++ <= DESTROY_RETRY_COUNT) {
try {
await PersistorManager.deleteObject(
settings.docstore.bucket,
`${projectId}/${docId}`
)
return
} catch (err) {
lastError = err
logger.warn(
{ projectId, docId, err, attempt },
'destroying archive failed'
)
}
}
throw lastError
await Promise.all(tasks)
}
async function _streamToBuffer(stream) {
@ -260,3 +191,41 @@ async function _streamToBuffer(stream) {
stream.on('end', () => resolve(Buffer.concat(chunks)))
})
}
function _deserializeArchivedDoc(json) {
const doc = JSON.parse(json)
const result = {}
if (doc.schema_v === 1 && doc.lines != null) {
result.lines = doc.lines
if (doc.ranges != null) {
result.ranges = RangeManager.jsonRangesToMongo(doc.ranges)
}
} else if (Array.isArray(doc)) {
result.lines = doc
} else {
throw new Error("I don't understand the doc format in s3")
}
if (doc.rev != null) {
result.rev = doc.rev
}
return result
}
function _isArchivingEnabled() {
const backend = Settings.docstore.backend
if (!backend) {
return false
}
// The default backend is S3. If another backend is configured or the S3
// backend itself is correctly configured, then archiving is enabled.
if (backend === 's3' && Settings.docstore.s3 == null) {
return false
}
return true
}

View file

@ -2,6 +2,7 @@ const DocManager = require('./DocManager')
const logger = require('@overleaf/logger')
const DocArchive = require('./DocArchiveManager')
const HealthChecker = require('./HealthChecker')
const Errors = require('./Errors')
const Settings = require('@overleaf/settings')
function getDoc(req, res, next) {
@ -250,16 +251,19 @@ function unArchiveAllDocs(req, res, next) {
logger.log({ projectId }, 'unarchiving all docs')
DocArchive.unArchiveAllDocs(projectId, function (error) {
if (error) {
if (error instanceof Errors.DocRevValueError) {
return res.sendStatus(409)
}
return next(error)
}
res.sendStatus(200)
})
}
function destroyAllDocs(req, res, next) {
function destroyProject(req, res, next) {
const { project_id: projectId } = req.params
logger.log({ projectId }, 'destroying all docs')
DocArchive.destroyAllDocs(projectId, function (error) {
DocArchive.destroyProject(projectId, function (error) {
if (error) {
return next(error)
}
@ -291,6 +295,6 @@ module.exports = {
archiveAllDocs,
archiveDoc,
unArchiveAllDocs,
destroyAllDocs,
destroyProject,
healthCheck,
}

View file

@ -2,6 +2,7 @@ const { db, ObjectId } = require('./mongodb')
const logger = require('@overleaf/logger')
const metrics = require('@overleaf/metrics')
const Settings = require('@overleaf/settings')
const OError = require('@overleaf/o-error')
const Errors = require('./Errors')
const { promisify } = require('util')
@ -122,6 +123,48 @@ function markDocAsArchived(docId, rev, callback) {
db.docs.updateOne(query, update, callback)
}
/**
* Restore an archived doc
*
* This checks that inS3 is true and that the archived doc's rev matches. The
* rev was not always stored with docs, so this check is optional.
*/
function restoreArchivedDoc(projectId, docId, archivedDoc, callback) {
const query = {
_id: ObjectId(docId),
project_id: ObjectId(projectId),
inS3: true,
rev: archivedDoc.rev,
}
const update = {
$set: {
lines: archivedDoc.lines,
ranges: archivedDoc.ranges || {},
},
$unset: {
inS3: true,
},
}
db.docs.updateOne(query, update, (err, result) => {
if (err) {
OError.tag(err, 'failed to unarchive doc', {
docId,
rev: archivedDoc.rev,
})
return callback(err)
}
if (result.modifiedCount === 0) {
return callback(
new Errors.DocRevValueError('failed to unarchive doc', {
docId,
rev: archivedDoc.rev,
})
)
}
callback()
})
}
function getDocVersion(docId, callback) {
db.docOps.findOne(
{
@ -204,23 +247,21 @@ function withRevCheck(doc, method, callback) {
})
}
function destroyDoc(docId, callback) {
db.docs.deleteOne(
{
_id: ObjectId(docId),
},
function (err) {
function destroyProject(projectId, callback) {
db.docs
.find({ project_id: ObjectId(projectId) }, { projection: { _id: 1 } })
.toArray((err, records) => {
const docIds = records.map(r => r._id)
if (err) {
return callback(err)
}
db.docOps.deleteOne(
{
doc_id: ObjectId(docId),
},
callback
)
}
)
db.docOps.deleteMany({ doc_id: { $in: docIds } }, err => {
if (err) {
return callback(err)
}
db.docs.deleteMany({ project_id: ObjectId(projectId) }, callback)
})
})
}
module.exports = {
@ -231,12 +272,13 @@ module.exports = {
getNonArchivedProjectDocs,
getNonDeletedArchivedProjectDocs,
upsertIntoDocCollection,
restoreArchivedDoc,
patchDoc,
markDocAsArchived,
getDocVersion,
setDocVersion,
withRevCheck,
destroyDoc,
destroyProject,
}
const methods = Object.getOwnPropertyNames(module.exports)

View file

@ -42,8 +42,6 @@ const Settings = {
archiveBatchSize: parseInt(process.env.ARCHIVE_BATCH_SIZE, 10) || 50,
unArchiveBatchSize: parseInt(process.env.UN_ARCHIVE_BATCH_SIZE, 10) || 50,
destroyBatchSize: parseInt(process.env.DESTROY_BATCH_SIZE, 10) || 2000,
destroyRetryCount: parseInt(process.env.DESTROY_RETRY_COUNT || '3', 10),
parallelArchiveJobs: parseInt(process.env.PARALLEL_ARCHIVE_JOBS, 10) || 5,
}

View file

@ -1,15 +1,3 @@
/* eslint-disable
camelcase,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const { db, ObjectId } = require('../../../app/js/mongodb')
const { expect } = require('chai')
const DocstoreApp = require('./helpers/DocstoreApp')
@ -25,18 +13,18 @@ function deleteTestSuite(deleteDoc) {
this.lines = ['original', 'lines']
this.version = 42
this.ranges = []
return DocstoreApp.ensureRunning(() => {
return DocstoreClient.createDoc(
DocstoreApp.ensureRunning(() => {
DocstoreClient.createDoc(
this.project_id,
this.doc_id,
this.lines,
this.version,
this.ranges,
error => {
if (error != null) {
if (error) {
throw error
}
return done()
done()
}
)
})
@ -60,12 +48,12 @@ function deleteTestSuite(deleteDoc) {
deleteDoc(this.project_id, this.doc_id, (error, res, doc) => {
if (error) return done(error)
this.res = res
return done()
done()
})
})
afterEach(function (done) {
return db.docs.remove({ _id: this.doc_id }, done)
db.docs.remove({ _id: this.doc_id }, done)
})
it('should mark the doc as deleted on /deleted', function (done) {
@ -82,12 +70,12 @@ function deleteTestSuite(deleteDoc) {
})
it('should insert a deleted doc into the docs collection', function (done) {
return db.docs.find({ _id: this.doc_id }).toArray((error, docs) => {
db.docs.find({ _id: this.doc_id }).toArray((error, docs) => {
if (error) return done(error)
docs[0]._id.should.deep.equal(this.doc_id)
docs[0].lines.should.deep.equal(this.lines)
docs[0].deleted.should.equal(true)
return done()
done()
})
})
@ -150,12 +138,12 @@ function deleteTestSuite(deleteDoc) {
})
it('should set the doc in s3 correctly', function (done) {
DocstoreClient.getS3Doc(this.project_id, this.doc_id, (error, s3_doc) => {
DocstoreClient.getS3Doc(this.project_id, this.doc_id, (error, s3doc) => {
if (error) {
return done(error)
}
expect(s3_doc.lines).to.deep.equal(this.lines)
expect(s3_doc.ranges).to.deep.equal(this.ranges)
expect(s3doc.lines).to.deep.equal(this.lines)
expect(s3doc.ranges).to.deep.equal(this.ranges)
done()
})
})
@ -181,12 +169,12 @@ function deleteTestSuite(deleteDoc) {
})
})
return describe('when the doc does not exist', function () {
describe('when the doc does not exist', function () {
it('should show as not existing on /deleted', function (done) {
const missing_doc_id = ObjectId()
const missingDocId = ObjectId()
DocstoreClient.isDocDeleted(
this.project_id,
missing_doc_id,
missingDocId,
(error, res) => {
if (error) return done(error)
expect(res.statusCode).to.equal(404)
@ -195,12 +183,12 @@ function deleteTestSuite(deleteDoc) {
)
})
return it('should return a 404', function (done) {
const missing_doc_id = ObjectId()
deleteDoc(this.project_id, missing_doc_id, (error, res, doc) => {
it('should return a 404', function (done) {
const missingDocId = ObjectId()
deleteDoc(this.project_id, missingDocId, (error, res, doc) => {
if (error) return done(error)
res.statusCode.should.equal(404)
return done()
done()
})
})
})
@ -441,64 +429,84 @@ describe('Delete via PATCH', function () {
})
describe("Destroying a project's documents", function () {
describe('when the doc exists', function () {
beforeEach(function (done) {
return db.docOps.insert(
{ doc_id: ObjectId(this.doc_id), version: 1 },
function (err) {
if (err != null) {
return done(err)
beforeEach(function (done) {
this.project_id = ObjectId()
this.doc_id = ObjectId()
this.lines = ['original', 'lines']
this.version = 42
this.ranges = []
DocstoreApp.ensureRunning(() => {
DocstoreClient.createDoc(
this.project_id,
this.doc_id,
this.lines,
this.version,
this.ranges,
error => {
if (error) {
throw error
}
return DocstoreClient.destroyAllDoc(this.project_id, done)
done()
}
)
})
it('should remove the doc from the docs collection', function (done) {
return db.docs.find({ _id: this.doc_id }).toArray((err, docs) => {
expect(err).not.to.exist
expect(docs).to.deep.equal([])
return done()
})
})
return it('should remove the docOps from the docOps collection', function (done) {
return db.docOps.find({ doc_id: this.doc_id }).toArray((err, docOps) => {
expect(err).not.to.exist
expect(docOps).to.deep.equal([])
return done()
})
})
})
return describe('when the doc is archived', function () {
describe('when the doc exists', function () {
beforeEach(function (done) {
return DocstoreClient.archiveAllDoc(this.project_id, err => {
if (err != null) {
db.docOps.insert({ doc_id: ObjectId(this.doc_id), version: 1 }, err => {
if (err) {
return done(err)
}
return DocstoreClient.destroyAllDoc(this.project_id, done)
DocstoreClient.destroyAllDoc(this.project_id, done)
})
})
it('should remove the doc from the docs collection', function (done) {
return db.docs.find({ _id: this.doc_id }).toArray((err, docs) => {
db.docs.find({ _id: this.doc_id }).toArray((err, docs) => {
expect(err).not.to.exist
expect(docs).to.deep.equal([])
return done()
done()
})
})
it('should remove the docOps from the docOps collection', function (done) {
return db.docOps.find({ doc_id: this.doc_id }).toArray((err, docOps) => {
db.docOps.find({ doc_id: this.doc_id }).toArray((err, docOps) => {
expect(err).not.to.exist
expect(docOps).to.deep.equal([])
return done()
done()
})
})
})
describe('when the doc is archived', function () {
beforeEach(function (done) {
DocstoreClient.archiveAllDoc(this.project_id, err => {
if (err) {
return done(err)
}
DocstoreClient.destroyAllDoc(this.project_id, done)
})
})
return it('should remove the doc contents from s3', function (done) {
return DocstoreClient.getS3Doc(this.project_id, this.doc_id, error => {
it('should remove the doc from the docs collection', function (done) {
db.docs.find({ _id: this.doc_id }).toArray((err, docs) => {
expect(err).not.to.exist
expect(docs).to.deep.equal([])
done()
})
})
it('should remove the docOps from the docOps collection', function (done) {
db.docOps.find({ doc_id: this.doc_id }).toArray((err, docOps) => {
expect(err).not.to.exist
expect(docOps).to.deep.equal([])
done()
})
})
it('should remove the doc contents from s3', function (done) {
DocstoreClient.getS3Doc(this.project_id, this.doc_id, error => {
expect(error).to.be.instanceOf(Errors.NotFoundError)
done()
})

View file

@ -17,7 +17,8 @@ describe('DocArchiveManager', function () {
HashUpdate,
archivedDocs,
mongoDocs,
docJson,
archivedDoc,
archivedDocJson,
md5Sum,
projectId,
readStream,
@ -31,11 +32,10 @@ describe('DocArchiveManager', function () {
}
Settings = {
docstore: {
backend: 'gcs',
bucket: 'wombat',
},
parallelArchiveJobs: 3,
destroyBatchSize: 10,
destroyRetryCount: 3,
}
HashDigest = sinon.stub().returns(md5Sum)
HashUpdate = sinon.stub().returns({ digest: HashDigest })
@ -92,17 +92,18 @@ describe('DocArchiveManager', function () {
},
]
docJson = JSON.stringify({
archivedDoc = {
lines: mongoDocs[0].lines,
ranges: mongoDocs[0].ranges,
schema_v: 1,
})
rev: mongoDocs[0].rev,
}
archivedDocJson = JSON.stringify({ ...archivedDoc, schema_v: 1 })
stream = {
on: sinon.stub(),
resume: sinon.stub(),
}
stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8'))
stream.on.withArgs('data').yields(Buffer.from(archivedDocJson, 'utf8'))
stream.on.withArgs('end').yields()
readStream = {
@ -114,6 +115,7 @@ describe('DocArchiveManager', function () {
sendStream: sinon.stub().resolves(),
getObjectMd5Hash: sinon.stub().resolves(md5Sum),
deleteObject: sinon.stub().resolves(),
deleteDirectory: sinon.stub().resolves(),
}
const getNonArchivedProjectDocs = sinon.stub()
@ -129,13 +131,14 @@ describe('DocArchiveManager', function () {
MongoManager = {
promises: {
markDocAsArchived: sinon.stub().resolves(),
restoreArchivedDoc: sinon.stub().resolves(),
upsertIntoDocCollection: sinon.stub().resolves(),
getProjectsDocs: sinon.stub().resolves(mongoDocs),
getNonDeletedArchivedProjectDocs: getArchivedProjectDocs,
getNonArchivedProjectDocs,
getArchivedProjectDocs,
findDoc: sinon.stub().rejects(new Errors.NotFoundError()),
destroyDoc: sinon.stub().resolves(),
destroyProject: sinon.stub().resolves(),
},
}
for (const mongoDoc of mongoDocs.concat(archivedDocs)) {
@ -181,16 +184,9 @@ describe('DocArchiveManager', function () {
})
it('should calculate the hex md5 sum of the content', async function () {
const json = JSON.stringify({
lines: mongoDocs[0].lines,
ranges: mongoDocs[0].ranges,
schema_v: 1,
})
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0])
expect(Crypto.createHash).to.have.been.calledWith('md5')
expect(HashUpdate).to.have.been.calledWith(json)
expect(HashUpdate).to.have.been.calledWith(archivedDocJson)
expect(HashDigest).to.have.been.calledWith('hex')
})
@ -216,7 +212,9 @@ describe('DocArchiveManager', function () {
it('should create a stream from the encoded json and send it', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0])
expect(Streamifier.createReadStream).to.have.been.calledWith(docJson)
expect(Streamifier.createReadStream).to.have.been.calledWith(
archivedDocJson
)
expect(PersistorManager.sendStream).to.have.been.calledWith(
sinon.match.any,
sinon.match.any,
@ -252,12 +250,16 @@ describe('DocArchiveManager', function () {
})
describe('unarchiveDoc', function () {
let docId
let docId, lines, rev
describe('when the doc is in S3', function () {
beforeEach(function () {
MongoManager.promises.findDoc = sinon.stub().resolves({ inS3: true })
MongoManager.promises.findDoc = sinon
.stub()
.resolves({ inS3: true, rev })
docId = mongoDocs[0]._id
lines = ['doc', 'lines']
rev = 123
})
it('should resolve when passed a valid document', async function () {
@ -267,7 +269,7 @@ describe('DocArchiveManager', function () {
it('should test md5 validity with the raw buffer', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(HashUpdate).to.have.been.calledWithMatch(
expect(HashUpdate).to.have.been.calledWith(
sinon.match.instanceOf(Buffer)
)
})
@ -279,96 +281,100 @@ describe('DocArchiveManager', function () {
).to.eventually.be.rejected.and.be.instanceof(Errors.Md5MismatchError)
})
it('should update the doc lines in mongo', async function () {
it('should restore the doc in Mongo', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.upsertIntoDocCollection
).to.have.been.calledWith(projectId, docId, {
lines: mongoDocs[0].lines,
})
})
it('should delete the doc in s3', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(PersistorManager.deleteObject).to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${docId}`
)
MongoManager.promises.restoreArchivedDoc
).to.have.been.calledWith(projectId, docId, archivedDoc)
})
describe('doc contents', function () {
let mongoDoc, s3Doc
let archivedDoc
describe('when the doc has the old schema', function () {
beforeEach(function () {
mongoDoc = {
lines: ['doc', 'lines'],
}
s3Doc = ['doc', 'lines']
docJson = JSON.stringify(s3Doc)
stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8'))
archivedDoc = lines
archivedDocJson = JSON.stringify(archivedDoc)
stream.on
.withArgs('data')
.yields(Buffer.from(archivedDocJson, 'utf8'))
})
it('should return the docs lines', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.upsertIntoDocCollection
).to.have.been.calledWith(projectId, docId, mongoDoc)
MongoManager.promises.restoreArchivedDoc
).to.have.been.calledWith(projectId, docId, { lines, rev })
})
})
describe('with the new schema and ranges', function () {
beforeEach(function () {
s3Doc = {
lines: ['doc', 'lines'],
archivedDoc = {
lines,
ranges: { json: 'ranges' },
rev: 456,
schema_v: 1,
}
mongoDoc = {
lines: ['doc', 'lines'],
ranges: { mongo: 'ranges' },
}
docJson = JSON.stringify(s3Doc)
stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8'))
archivedDocJson = JSON.stringify(archivedDoc)
stream.on
.withArgs('data')
.yields(Buffer.from(archivedDocJson, 'utf8'))
})
it('should return the doc lines and ranges', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.upsertIntoDocCollection
).to.have.been.calledWith(projectId, docId, mongoDoc)
MongoManager.promises.restoreArchivedDoc
).to.have.been.calledWith(projectId, docId, {
lines,
ranges: { mongo: 'ranges' },
rev: 456,
})
})
})
describe('with the new schema and no ranges', function () {
beforeEach(function () {
s3Doc = {
lines: ['doc', 'lines'],
schema_v: 1,
}
mongoDoc = {
lines: ['doc', 'lines'],
}
docJson = JSON.stringify(s3Doc)
stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8'))
archivedDoc = { lines, rev: 456, schema_v: 1 }
archivedDocJson = JSON.stringify(archivedDoc)
stream.on
.withArgs('data')
.yields(Buffer.from(archivedDocJson, 'utf8'))
})
it('should return only the doc lines', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.upsertIntoDocCollection
).to.have.been.calledWith(projectId, docId, mongoDoc)
MongoManager.promises.restoreArchivedDoc
).to.have.been.calledWith(projectId, docId, { lines, rev: 456 })
})
})
describe('with the new schema and no rev', function () {
beforeEach(function () {
archivedDoc = { lines, schema_v: 1 }
archivedDocJson = JSON.stringify(archivedDoc)
stream.on
.withArgs('data')
.yields(Buffer.from(archivedDocJson, 'utf8'))
})
it('should use the rev obtained from Mongo', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.restoreArchivedDoc
).to.have.been.calledWith(projectId, docId, { lines, rev })
})
})
describe('with an unrecognised schema', function () {
beforeEach(function () {
s3Doc = {
lines: ['doc', 'lines'],
schema_v: 2,
}
docJson = JSON.stringify(s3Doc)
stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8'))
archivedDoc = { lines, schema_v: 2 }
archivedDocJson = JSON.stringify(archivedDoc)
stream.on
.withArgs('data')
.yields(Buffer.from(archivedDocJson, 'utf8'))
})
it('should throw an error', async function () {
@ -388,33 +394,7 @@ describe('DocArchiveManager', function () {
expect(PersistorManager.getObjectStream).not.to.have.been.called
})
describe('when the file is removed while we are processing it', function () {
beforeEach(function () {
MongoManager.promises.findDoc = sinon.stub().resolves({ inS3: true })
MongoManager.promises.findDoc.onSecondCall().resolves({ inS3: false })
})
it('should not throw an error if the file is unarchived before we get for its hash', async function () {
PersistorManager.getObjectMd5Hash = sinon
.stub()
.rejects(new Errors.NotFoundError())
await expect(DocArchiveManager.promises.unarchiveDoc(projectId, docId))
.to.eventually.be.fulfilled
expect(PersistorManager.getObjectStream).not.to.have.been.called
})
it('should not throw an error if the file is unarchived before we download it', async function () {
PersistorManager.getObjectStream = sinon
.stub()
.rejects(new Errors.NotFoundError())
await expect(DocArchiveManager.promises.unarchiveDoc(projectId, docId))
.to.eventually.be.fulfilled
expect(MongoManager.promises.upsertIntoDocCollection).not.to.have.been
.called
})
})
it('should throw an error if the file is not found but is still listed as archived', async function () {
it('should throw an error if the file is not found', async function () {
PersistorManager.getObjectStream = sinon
.stub()
.rejects(new Errors.NotFoundError())
@ -424,89 +404,40 @@ describe('DocArchiveManager', function () {
})
})
describe('destroyDoc', function () {
let docId
beforeEach(function () {
docId = mongoDocs[0]._id
})
it('should resolve when passed a valid document', async function () {
await expect(DocArchiveManager.promises.destroyDoc(projectId, docId)).to
.eventually.be.fulfilled
})
it('should throw a not found error when there is no document', async function () {
await expect(
DocArchiveManager.promises.destroyDoc(projectId, 'wombat')
).to.eventually.be.rejected.and.be.instanceof(Errors.NotFoundError)
})
describe('when the doc is in s3', function () {
beforeEach(function () {
mongoDocs[0].inS3 = true
describe('destroyProject', function () {
describe('when archiving is enabled', function () {
beforeEach(async function () {
await DocArchiveManager.promises.destroyProject(projectId)
})
it('should delete the document from s3, if it is in s3', async function () {
await DocArchiveManager.promises.destroyDoc(projectId, docId)
expect(PersistorManager.deleteObject).to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${docId}`
it('should delete the project in Mongo', function () {
expect(MongoManager.promises.destroyProject).to.have.been.calledWith(
projectId
)
})
it('should delete the doc in mongo', async function () {
await DocArchiveManager.promises.destroyDoc(projectId, docId)
})
describe('when the destroy request errors', function () {
beforeEach(function () {
mongoDocs[0].inS3 = true
PersistorManager.deleteObject.onFirstCall().rejects(new Error('1'))
PersistorManager.deleteObject.onSecondCall().rejects(new Error('2'))
PersistorManager.deleteObject.onThirdCall().resolves()
})
it('should retry', async function () {
await DocArchiveManager.promises.destroyDoc(projectId, docId)
expect(PersistorManager.deleteObject).to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${docId}`
)
expect(PersistorManager.deleteObject.callCount).to.equal(3)
})
})
describe('when the destroy request errors permanent', function () {
beforeEach(function () {
mongoDocs[0].inS3 = true
PersistorManager.deleteObject.rejects(new Error('permanent'))
})
it('should retry and fail eventually', async function () {
await expect(DocArchiveManager.promises.destroyDoc(projectId, docId))
.to.eventually.be.rejected
expect(PersistorManager.deleteObject).to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${docId}`
)
expect(PersistorManager.deleteObject.callCount).to.equal(4)
})
it('should delete the project in the persistor', function () {
expect(PersistorManager.deleteDirectory).to.have.been.calledWith(
Settings.docstore.bucket,
projectId
)
})
})
describe('when the doc is not in s3', function () {
beforeEach(function () {
mongoDocs[0].inS3 = false
describe('when archiving is disabled', function () {
beforeEach(async function () {
Settings.docstore.backend = ''
await DocArchiveManager.promises.destroyProject(projectId)
})
it('should not delete the document from s3, if it is not in s3', async function () {
await DocArchiveManager.promises.destroyDoc(projectId, docId)
expect(PersistorManager.deleteObject).not.to.have.been.called
it('should delete the project in Mongo', function () {
expect(MongoManager.promises.destroyProject).to.have.been.calledWith(
projectId
)
})
it('should delete the doc in mongo', async function () {
await DocArchiveManager.promises.destroyDoc(projectId, docId)
it('should not delete the project in the persistor', function () {
expect(PersistorManager.deleteDirectory).not.to.have.been.called
})
})
})
@ -557,46 +488,4 @@ describe('DocArchiveManager', function () {
}
})
})
describe('destroyAllDocs', function () {
beforeEach(function () {
MongoManager.promises.getProjectsDocs.onCall(0).resolves(mongoDocs)
MongoManager.promises.getProjectsDocs.onCall(1).resolves([])
})
it('should resolve with valid arguments', async function () {
await expect(DocArchiveManager.promises.destroyAllDocs(projectId)).to
.eventually.be.fulfilled
})
it('should delete all docs that are in s3 from s3', async function () {
await DocArchiveManager.promises.destroyAllDocs(projectId)
// not inS3
for (const index of [0, 1, 4]) {
expect(PersistorManager.deleteObject).not.to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${mongoDocs[index]._id}`
)
}
// inS3
for (const index of [2, 3]) {
expect(PersistorManager.deleteObject).to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${mongoDocs[index]._id}`
)
}
})
it('should destroy all docs in mongo', async function () {
await DocArchiveManager.promises.destroyAllDocs(projectId)
for (const mongoDoc of mongoDocs) {
expect(MongoManager.promises.destroyDoc).to.have.been.calledWith(
mongoDoc._id
)
}
})
})
})

View file

@ -6,18 +6,24 @@ const modulePath = require('path').join(
'../../../app/js/HttpController'
)
const { ObjectId } = require('mongodb')
const Errors = require('../../../app/js/Errors')
describe('HttpController', function () {
beforeEach(function () {
const settings = {
max_doc_length: 2 * 1024 * 1024,
}
this.DocArchiveManager = {
unArchiveAllDocs: sinon.stub().yields(),
}
this.DocManager = {}
this.HttpController = SandboxedModule.require(modulePath, {
requires: {
'./DocManager': (this.DocManager = {}),
'./DocArchiveManager': (this.DocArchiveManager = {}),
'./DocManager': this.DocManager,
'./DocArchiveManager': this.DocArchiveManager,
'@overleaf/settings': settings,
'./HealthChecker': {},
'./Errors': Errors,
},
})
this.res = {
@ -481,16 +487,47 @@ describe('HttpController', function () {
})
})
describe('destroyAllDocs', function () {
describe('unArchiveAllDocs', function () {
beforeEach(function () {
this.req.params = { project_id: this.projectId }
this.DocArchiveManager.destroyAllDocs = sinon.stub().callsArg(1)
this.HttpController.destroyAllDocs(this.req, this.res, this.next)
})
describe('on success', function () {
beforeEach(function (done) {
this.res.sendStatus.callsFake(() => done())
this.HttpController.unArchiveAllDocs(this.req, this.res, this.next)
})
it('returns a 200', function () {
expect(this.res.sendStatus).to.have.been.calledWith(200)
})
})
describe("when the archived rev doesn't match", function () {
beforeEach(function (done) {
this.res.sendStatus.callsFake(() => done())
this.DocArchiveManager.unArchiveAllDocs.yields(
new Errors.DocRevValueError('bad rev')
)
this.HttpController.unArchiveAllDocs(this.req, this.res, this.next)
})
it('returns a 409', function () {
expect(this.res.sendStatus).to.have.been.calledWith(409)
})
})
})
describe('destroyProject', function () {
beforeEach(function () {
this.req.params = { project_id: this.projectId }
this.DocArchiveManager.destroyProject = sinon.stub().callsArg(1)
this.HttpController.destroyProject(this.req, this.res, this.next)
})
it('should destroy the docs', function () {
sinon.assert.calledWith(
this.DocArchiveManager.destroyAllDocs,
this.DocArchiveManager.destroyProject,
this.projectId
)
})

View file

@ -5,15 +5,21 @@ const modulePath = require('path').join(
'../../../app/js/MongoManager'
)
const { ObjectId } = require('mongodb')
const { assert } = require('chai')
const { assert, expect } = require('chai')
const Errors = require('../../../app/js/Errors')
describe('MongoManager', function () {
beforeEach(function () {
this.db = {
docs: {
updateOne: sinon.stub().yields(null, { modifedCount: 1 }),
},
docOps: {},
}
this.MongoManager = SandboxedModule.require(modulePath, {
requires: {
'./mongodb': {
db: (this.db = { docs: {}, docOps: {} }),
db: this.db,
ObjectId,
},
'@overleaf/metrics': { timeAsyncMethod: sinon.stub() },
@ -21,8 +27,8 @@ describe('MongoManager', function () {
'./Errors': Errors,
},
})
this.project_id = ObjectId().toString()
this.doc_id = ObjectId().toString()
this.projectId = ObjectId().toString()
this.docId = ObjectId().toString()
this.callback = sinon.stub()
this.stubbedErr = new Error('hello world')
})
@ -33,8 +39,8 @@ describe('MongoManager', function () {
this.db.docs.findOne = sinon.stub().callsArgWith(2, null, this.doc)
this.filter = { lines: true }
this.MongoManager.findDoc(
this.project_id,
this.doc_id,
this.projectId,
this.docId,
this.filter,
this.callback
)
@ -44,8 +50,8 @@ describe('MongoManager', function () {
this.db.docs.findOne
.calledWith(
{
_id: ObjectId(this.doc_id),
project_id: ObjectId(this.project_id),
_id: ObjectId(this.docId),
project_id: ObjectId(this.projectId),
},
{
projection: this.filter,
@ -61,12 +67,11 @@ describe('MongoManager', function () {
describe('patchDoc', function () {
beforeEach(function (done) {
this.db.docs.updateOne = sinon.stub().yields(null)
this.meta = { name: 'foo.tex' }
this.callback.callsFake(done)
this.MongoManager.patchDoc(
this.project_id,
this.doc_id,
this.projectId,
this.docId,
this.meta,
this.callback
)
@ -75,8 +80,8 @@ describe('MongoManager', function () {
it('should pass the parameter along', function () {
this.db.docs.updateOne.should.have.been.calledWith(
{
_id: ObjectId(this.doc_id),
project_id: ObjectId(this.project_id),
_id: ObjectId(this.docId),
project_id: ObjectId(this.projectId),
},
{
$set: this.meta,
@ -103,7 +108,7 @@ describe('MongoManager', function () {
describe('with included_deleted = false', function () {
beforeEach(function () {
this.MongoManager.getProjectsDocs(
this.project_id,
this.projectId,
{ include_deleted: false },
this.filter,
this.callback
@ -114,7 +119,7 @@ describe('MongoManager', function () {
this.db.docs.find
.calledWith(
{
project_id: ObjectId(this.project_id),
project_id: ObjectId(this.projectId),
deleted: { $ne: true },
},
{
@ -134,7 +139,7 @@ describe('MongoManager', function () {
describe('with included_deleted = true', function () {
beforeEach(function () {
this.MongoManager.getProjectsDocs(
this.project_id,
this.projectId,
{ include_deleted: true },
this.filter,
this.callback
@ -145,7 +150,7 @@ describe('MongoManager', function () {
this.db.docs.find
.calledWith(
{
project_id: ObjectId(this.project_id),
project_id: ObjectId(this.projectId),
},
{
projection: this.filter,
@ -173,7 +178,7 @@ describe('MongoManager', function () {
})
this.callback.callsFake(done)
this.MongoManager.getProjectsDeletedDocs(
this.project_id,
this.projectId,
this.filter,
this.callback
)
@ -182,7 +187,7 @@ describe('MongoManager', function () {
it('should find the deleted docs via the project_id', function () {
this.db.docs.find
.calledWith({
project_id: ObjectId(this.project_id),
project_id: ObjectId(this.projectId),
deleted: true,
})
.should.equal(true)
@ -207,22 +212,22 @@ describe('MongoManager', function () {
describe('upsertIntoDocCollection', function () {
beforeEach(function () {
this.db.docs.updateOne = sinon.stub().callsArgWith(3, this.stubbedErr)
this.db.docs.updateOne.yields(this.stubbedErr)
this.oldRev = 77
})
it('should upsert the document', function (done) {
this.MongoManager.upsertIntoDocCollection(
this.project_id,
this.doc_id,
this.projectId,
this.docId,
{ lines: this.lines },
err => {
assert.equal(err, this.stubbedErr)
const args = this.db.docs.updateOne.args[0]
assert.deepEqual(args[0], { _id: ObjectId(this.doc_id) })
assert.deepEqual(args[0], { _id: ObjectId(this.docId) })
assert.equal(args[1].$set.lines, this.lines)
assert.equal(args[1].$inc.rev, 1)
assert.deepEqual(args[1].$set.project_id, ObjectId(this.project_id))
assert.deepEqual(args[1].$set.project_id, ObjectId(this.projectId))
done()
}
)
@ -230,8 +235,8 @@ describe('MongoManager', function () {
it('should return the error', function (done) {
this.MongoManager.upsertIntoDocCollection(
this.project_id,
this.doc_id,
this.projectId,
this.docId,
{ lines: this.lines },
err => {
err.should.equal(this.stubbedErr)
@ -241,22 +246,35 @@ describe('MongoManager', function () {
})
})
describe('destroyDoc', function () {
describe('destroyProject', function () {
beforeEach(function (done) {
this.db.docs.deleteOne = sinon.stub().yields()
this.db.docOps.deleteOne = sinon.stub().yields()
this.MongoManager.destroyDoc('123456789012', done)
this.projectId = ObjectId()
this.docIds = [ObjectId(), ObjectId()]
this.db.docs.deleteMany = sinon.stub().yields()
this.db.docOps.deleteMany = sinon.stub().yields()
this.db.docs.find = sinon
.stub()
.withArgs({ project_id: this.projectId })
.returns({
toArray: sinon.stub().yields(
null,
this.docIds.map(id => ({
_id: id,
}))
),
})
this.MongoManager.destroyProject(this.projectId, done)
})
it('should destroy the doc', function () {
sinon.assert.calledWith(this.db.docs.deleteOne, {
_id: ObjectId('123456789012'),
it('should destroy all docs', function () {
sinon.assert.calledWith(this.db.docs.deleteMany, {
project_id: this.projectId,
})
})
it('should destroy the docOps', function () {
sinon.assert.calledWith(this.db.docOps.deleteOne, {
doc_id: ObjectId('123456789012'),
sinon.assert.calledWith(this.db.docOps.deleteMany, {
doc_id: { $in: this.docIds },
})
})
})
@ -266,13 +284,13 @@ describe('MongoManager', function () {
beforeEach(function () {
this.doc = { version: (this.version = 42) }
this.db.docOps.findOne = sinon.stub().callsArgWith(2, null, this.doc)
this.MongoManager.getDocVersion(this.doc_id, this.callback)
this.MongoManager.getDocVersion(this.docId, this.callback)
})
it('should look for the doc in the database', function () {
this.db.docOps.findOne
.calledWith(
{ doc_id: ObjectId(this.doc_id) },
{ doc_id: ObjectId(this.docId) },
{
projection: { version: 1 },
}
@ -288,7 +306,7 @@ describe('MongoManager', function () {
describe("when the doc doesn't exist", function () {
beforeEach(function () {
this.db.docOps.findOne = sinon.stub().callsArgWith(2, null, null)
this.MongoManager.getDocVersion(this.doc_id, this.callback)
this.MongoManager.getDocVersion(this.docId, this.callback)
})
it('should call the callback with 0', function () {
@ -301,14 +319,14 @@ describe('MongoManager', function () {
beforeEach(function () {
this.version = 42
this.db.docOps.updateOne = sinon.stub().callsArg(3)
this.MongoManager.setDocVersion(this.doc_id, this.version, this.callback)
this.MongoManager.setDocVersion(this.docId, this.version, this.callback)
})
it('should update the doc version', function () {
this.db.docOps.updateOne
.calledWith(
{
doc_id: ObjectId(this.doc_id),
doc_id: ObjectId(this.docId),
},
{
$set: {
@ -383,4 +401,92 @@ describe('MongoManager', function () {
)
})
})
describe('restoreArchivedDoc', function () {
beforeEach(function () {
this.archivedDoc = {
lines: ['a', 'b', 'c'],
ranges: { some: 'ranges' },
rev: 2,
}
})
describe('complete doc', function () {
beforeEach(function (done) {
this.MongoManager.restoreArchivedDoc(
this.projectId,
this.docId,
this.archivedDoc,
done
)
})
it('updates Mongo', function () {
expect(this.db.docs.updateOne).to.have.been.calledWith(
{
_id: ObjectId(this.docId),
project_id: ObjectId(this.projectId),
inS3: true,
rev: this.archivedDoc.rev,
},
{
$set: {
lines: this.archivedDoc.lines,
ranges: this.archivedDoc.ranges,
},
$unset: {
inS3: true,
},
}
)
})
})
describe('without ranges', function () {
beforeEach(function (done) {
delete this.archivedDoc.ranges
this.MongoManager.restoreArchivedDoc(
this.projectId,
this.docId,
this.archivedDoc,
done
)
})
it('sets ranges to an empty object', function () {
expect(this.db.docs.updateOne).to.have.been.calledWith(
{
_id: ObjectId(this.docId),
project_id: ObjectId(this.projectId),
inS3: true,
rev: this.archivedDoc.rev,
},
{
$set: {
lines: this.archivedDoc.lines,
ranges: {},
},
$unset: {
inS3: true,
},
}
)
})
})
describe("when the update doesn't succeed", function () {
it('throws a DocRevValueError', function (done) {
this.db.docs.updateOne.yields(null, { modifiedCount: 0 })
this.MongoManager.restoreArchivedDoc(
this.projectId,
this.docId,
this.archivedDoc,
err => {
expect(err).to.be.instanceof(Errors.DocRevValueError)
done()
}
)
})
})
})
})