overleaf/services/docstore/test/unit/js/DocArchiveManagerTests.js

558 lines
17 KiB
JavaScript
Raw Normal View History

const sinon = require('sinon')
const { expect } = require('chai')
const modulePath = '../../../app/js/DocArchiveManager.js'
const SandboxedModule = require('sandboxed-module')
const { ObjectId } = require('mongodb')
const Errors = require('../../../app/js/Errors')
2020-07-23 14:43:51 -04:00
2020-05-28 09:20:54 -04:00
describe('DocArchiveManager', function () {
2020-07-23 14:43:51 -04:00
let DocArchiveManager,
PersistorManager,
MongoManager,
RangeManager,
Settings,
Crypto,
StreamUtils,
2020-07-23 14:43:51 -04:00
HashDigest,
HashUpdate,
archivedDocs,
mongoDocs,
archivedDoc,
archivedDocJson,
2020-07-23 14:43:51 -04:00
md5Sum,
projectId,
readStream,
stream
2020-05-28 09:20:54 -04:00
beforeEach(function () {
2020-07-23 14:43:51 -04:00
md5Sum = 'decafbad'
RangeManager = {
2021-07-13 07:04:48 -04:00
jsonRangesToMongo: sinon.stub().returns({ mongo: 'ranges' }),
2020-07-23 14:43:51 -04:00
}
Settings = {
docstore: {
backend: 'gcs',
2021-07-13 07:04:48 -04:00
bucket: 'wombat',
},
parallelArchiveJobs: 3,
}
2020-07-23 14:43:51 -04:00
HashDigest = sinon.stub().returns(md5Sum)
HashUpdate = sinon.stub().returns({ digest: HashDigest })
Crypto = {
2021-07-13 07:04:48 -04:00
createHash: sinon.stub().returns({ update: HashUpdate }),
2020-07-23 14:43:51 -04:00
}
StreamUtils = {
ReadableString: sinon.stub().returns({ stream: 'readStream' }),
}
projectId = new ObjectId()
2020-07-23 14:43:51 -04:00
archivedDocs = [
{
_id: new ObjectId(),
inS3: true,
2021-07-13 07:04:48 -04:00
rev: 2,
},
{
_id: new ObjectId(),
inS3: true,
2021-07-13 07:04:48 -04:00
rev: 4,
},
{
_id: new ObjectId(),
inS3: true,
2021-07-13 07:04:48 -04:00
rev: 6,
},
]
2020-07-23 14:43:51 -04:00
mongoDocs = [
{
_id: new ObjectId(),
lines: ['one', 'two', 'three'],
2021-07-13 07:04:48 -04:00
rev: 2,
},
{
_id: new ObjectId(),
lines: ['aaa', 'bbb', 'ccc'],
2021-07-13 07:04:48 -04:00
rev: 4,
},
{
_id: new ObjectId(),
inS3: true,
2021-07-13 07:04:48 -04:00
rev: 6,
},
{
_id: new ObjectId(),
inS3: true,
2021-07-13 07:04:48 -04:00
rev: 6,
},
{
_id: new ObjectId(),
lines: ['111', '222', '333'],
2021-07-13 07:04:48 -04:00
rev: 6,
},
]
archivedDoc = {
2020-07-23 14:43:51 -04:00
lines: mongoDocs[0].lines,
rev: mongoDocs[0].rev,
}
archivedDocJson = JSON.stringify({ ...archivedDoc, schema_v: 1 })
2020-07-23 14:43:51 -04:00
stream = {
on: sinon.stub(),
2021-07-13 07:04:48 -04:00
resume: sinon.stub(),
2020-07-23 14:43:51 -04:00
}
stream.on.withArgs('data').yields(Buffer.from(archivedDocJson, 'utf8'))
2020-07-23 14:43:51 -04:00
stream.on.withArgs('end').yields()
2020-07-23 14:43:51 -04:00
readStream = {
2021-07-13 07:04:48 -04:00
stream: 'readStream',
}
2020-07-23 14:43:51 -04:00
PersistorManager = {
getObjectStream: sinon.stub().resolves(stream),
sendStream: sinon.stub().resolves(),
getObjectMd5Hash: sinon.stub().resolves(md5Sum),
2021-07-13 07:04:48 -04:00
deleteObject: sinon.stub().resolves(),
deleteDirectory: sinon.stub().resolves(),
2020-07-23 14:43:51 -04:00
}
const getNonArchivedProjectDocIds = sinon.stub()
getNonArchivedProjectDocIds
.onCall(0)
.resolves(mongoDocs.filter(doc => !doc.inS3).map(doc => doc._id))
getNonArchivedProjectDocIds.onCall(1).resolves([])
const getArchivedProjectDocs = sinon.stub()
getArchivedProjectDocs.onCall(0).resolves(archivedDocs)
getArchivedProjectDocs.onCall(1).resolves([])
const fakeGetDoc = async (_projectId, _docId) => {
if (_projectId.equals(projectId)) {
for (const mongoDoc of mongoDocs.concat(archivedDocs)) {
if (mongoDoc._id.equals(_docId)) {
return mongoDoc
}
}
}
throw new Errors.NotFoundError()
}
2020-07-23 14:43:51 -04:00
MongoManager = {
2020-07-31 11:20:07 -04:00
promises: {
markDocAsArchived: sinon.stub().resolves(),
restoreArchivedDoc: sinon.stub().resolves(),
2020-07-31 11:20:07 -04:00
upsertIntoDocCollection: sinon.stub().resolves(),
getProjectsDocs: sinon.stub().resolves(mongoDocs),
getNonDeletedArchivedProjectDocs: getArchivedProjectDocs,
getNonArchivedProjectDocIds,
getArchivedProjectDocs,
findDoc: sinon.stub().callsFake(fakeGetDoc),
getDocForArchiving: sinon.stub().callsFake(fakeGetDoc),
destroyProject: sinon.stub().resolves(),
2021-07-13 07:04:48 -04:00
},
2020-07-23 14:43:51 -04:00
}
DocArchiveManager = SandboxedModule.require(modulePath, {
requires: {
'@overleaf/settings': Settings,
2020-07-23 14:43:51 -04:00
crypto: Crypto,
'@overleaf/stream-utils': StreamUtils,
2020-07-23 14:43:51 -04:00
'./MongoManager': MongoManager,
'./RangeManager': RangeManager,
'./PersistorManager': PersistorManager,
2021-07-13 07:04:48 -04:00
'./Errors': Errors,
},
2020-07-23 14:43:51 -04:00
})
})
2020-05-28 09:20:54 -04:00
describe('archiveDoc', function () {
2020-07-23 14:43:51 -04:00
it('should resolve when passed a valid document', async function () {
await expect(
DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
2020-07-23 14:43:51 -04:00
).to.eventually.be.fulfilled
})
it('should throw an error if the doc has no lines', async function () {
const doc = mongoDocs[0]
doc.lines = null
await expect(
DocArchiveManager.promises.archiveDoc(projectId, doc._id)
2020-07-23 14:43:51 -04:00
).to.eventually.be.rejectedWith('doc has no lines')
})
it('should add the schema version', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[1]._id)
expect(StreamUtils.ReadableString).to.have.been.calledWith(
2020-07-23 14:43:51 -04:00
sinon.match(/"schema_v":1/)
)
})
2020-07-23 14:43:51 -04:00
it('should calculate the hex md5 sum of the content', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
2020-07-23 14:43:51 -04:00
expect(Crypto.createHash).to.have.been.calledWith('md5')
expect(HashUpdate).to.have.been.calledWith(archivedDocJson)
2020-07-23 14:43:51 -04:00
expect(HashDigest).to.have.been.calledWith('hex')
})
2020-07-23 14:43:51 -04:00
it('should pass the md5 hash to the object persistor for verification', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
2020-07-23 14:43:51 -04:00
expect(PersistorManager.sendStream).to.have.been.calledWith(
sinon.match.any,
sinon.match.any,
sinon.match.any,
{ sourceMd5: md5Sum }
)
})
2020-07-23 14:43:51 -04:00
it('should pass the correct bucket and key to the persistor', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
2020-07-23 14:43:51 -04:00
expect(PersistorManager.sendStream).to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${mongoDocs[0]._id}`
)
})
2020-07-23 14:43:51 -04:00
it('should create a stream from the encoded json and send it', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
expect(StreamUtils.ReadableString).to.have.been.calledWith(
archivedDocJson
)
2020-07-23 14:43:51 -04:00
expect(PersistorManager.sendStream).to.have.been.calledWith(
sinon.match.any,
sinon.match.any,
readStream
)
})
2020-07-23 14:43:51 -04:00
it('should mark the doc as archived', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
2020-07-31 11:20:07 -04:00
expect(MongoManager.promises.markDocAsArchived).to.have.been.calledWith(
projectId,
2020-07-23 14:43:51 -04:00
mongoDocs[0]._id,
mongoDocs[0].rev
)
})
describe('when archiving is not configured', function () {
beforeEach(function () {
Settings.docstore.backend = undefined
})
it('should bail out early', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
expect(MongoManager.promises.getDocForArchiving).to.not.have.been.called
})
})
2020-07-23 14:43:51 -04:00
describe('with null bytes in the result', function () {
const _stringify = JSON.stringify
beforeEach(function () {
JSON.stringify = sinon.stub().returns('{"bad": "\u0000"}')
})
2020-07-23 14:43:51 -04:00
afterEach(function () {
JSON.stringify = _stringify
})
2020-07-23 14:43:51 -04:00
it('should return an error', async function () {
await expect(
DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
2020-07-23 14:43:51 -04:00
).to.eventually.be.rejectedWith('null bytes detected')
})
})
2020-07-23 14:43:51 -04:00
})
2020-07-23 14:43:51 -04:00
describe('unarchiveDoc', function () {
let docId, lines, rev
describe('when the doc is in S3', function () {
beforeEach(function () {
MongoManager.promises.findDoc = sinon
.stub()
.resolves({ inS3: true, rev })
docId = mongoDocs[0]._id
lines = ['doc', 'lines']
rev = 123
})
it('should resolve when passed a valid document', async function () {
await expect(DocArchiveManager.promises.unarchiveDoc(projectId, docId))
.to.eventually.be.fulfilled
})
2020-07-23 14:43:51 -04:00
it('should test md5 validity with the raw buffer', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(HashUpdate).to.have.been.calledWith(
sinon.match.instanceOf(Buffer)
)
})
it('should throw an error if the md5 does not match', async function () {
PersistorManager.getObjectMd5Hash.resolves('badf00d')
await expect(
DocArchiveManager.promises.unarchiveDoc(projectId, docId)
).to.eventually.be.rejected.and.be.instanceof(Errors.Md5MismatchError)
})
2020-07-23 14:43:51 -04:00
it('should restore the doc in Mongo', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.restoreArchivedDoc
).to.have.been.calledWith(projectId, docId, archivedDoc)
})
2020-07-23 14:43:51 -04:00
describe('when archiving is not configured', function () {
beforeEach(function () {
Settings.docstore.backend = undefined
})
it('should error out on archived doc', async function () {
await expect(
DocArchiveManager.promises.unarchiveDoc(projectId, docId)
).to.eventually.be.rejected.and.match(
/found archived doc, but archiving backend is not configured/
)
})
it('should return early on non-archived doc', async function () {
MongoManager.promises.findDoc = sinon.stub().resolves({ rev })
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(PersistorManager.getObjectMd5Hash).to.not.have.been.called
})
})
describe('doc contents', function () {
let archivedDoc
describe('when the doc has the old schema', function () {
beforeEach(function () {
archivedDoc = lines
archivedDocJson = JSON.stringify(archivedDoc)
stream.on
.withArgs('data')
.yields(Buffer.from(archivedDocJson, 'utf8'))
})
it('should return the docs lines', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.restoreArchivedDoc
).to.have.been.calledWith(projectId, docId, { lines, rev })
})
2020-07-23 14:43:51 -04:00
})
describe('with the new schema and ranges', function () {
beforeEach(function () {
archivedDoc = {
lines,
ranges: { json: 'ranges' },
rev: 456,
2021-07-13 07:04:48 -04:00
schema_v: 1,
}
archivedDocJson = JSON.stringify(archivedDoc)
stream.on
.withArgs('data')
.yields(Buffer.from(archivedDocJson, 'utf8'))
})
it('should return the doc lines and ranges', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.restoreArchivedDoc
).to.have.been.calledWith(projectId, docId, {
lines,
ranges: { mongo: 'ranges' },
rev: 456,
})
})
})
describe('with the new schema and no ranges', function () {
beforeEach(function () {
archivedDoc = { lines, rev: 456, schema_v: 1 }
archivedDocJson = JSON.stringify(archivedDoc)
stream.on
.withArgs('data')
.yields(Buffer.from(archivedDocJson, 'utf8'))
})
it('should return only the doc lines', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.restoreArchivedDoc
).to.have.been.calledWith(projectId, docId, { lines, rev: 456 })
})
})
describe('with the new schema and no rev', function () {
beforeEach(function () {
archivedDoc = { lines, schema_v: 1 }
archivedDocJson = JSON.stringify(archivedDoc)
stream.on
.withArgs('data')
.yields(Buffer.from(archivedDocJson, 'utf8'))
})
it('should use the rev obtained from Mongo', async function () {
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(
MongoManager.promises.restoreArchivedDoc
).to.have.been.calledWith(projectId, docId, { lines, rev })
})
2020-07-23 14:43:51 -04:00
})
describe('with an unrecognised schema', function () {
beforeEach(function () {
archivedDoc = { lines, schema_v: 2 }
archivedDocJson = JSON.stringify(archivedDoc)
stream.on
.withArgs('data')
.yields(Buffer.from(archivedDocJson, 'utf8'))
})
it('should throw an error', async function () {
await expect(
DocArchiveManager.promises.unarchiveDoc(projectId, docId)
).to.eventually.be.rejectedWith(
"I don't understand the doc format in s3"
)
})
2020-07-23 14:43:51 -04:00
})
})
})
it('should not do anything if the file is already unarchived', async function () {
MongoManager.promises.findDoc.resolves({ inS3: false })
await DocArchiveManager.promises.unarchiveDoc(projectId, docId)
expect(PersistorManager.getObjectStream).not.to.have.been.called
})
2020-07-23 14:43:51 -04:00
it('should throw an error if the file is not found', async function () {
PersistorManager.getObjectStream = sinon
.stub()
.rejects(new Errors.NotFoundError())
await expect(
DocArchiveManager.promises.unarchiveDoc(projectId, docId)
).to.eventually.be.rejected.and.be.instanceof(Errors.NotFoundError)
})
})
describe('destroyProject', function () {
describe('when archiving is enabled', function () {
beforeEach(async function () {
await DocArchiveManager.promises.destroyProject(projectId)
2020-07-23 14:43:51 -04:00
})
it('should delete the project in Mongo', function () {
expect(MongoManager.promises.destroyProject).to.have.been.calledWith(
projectId
2020-07-23 14:43:51 -04:00
)
})
it('should delete the project in the persistor', function () {
expect(PersistorManager.deleteDirectory).to.have.been.calledWith(
Settings.docstore.bucket,
projectId
)
})
})
describe('when archiving is disabled', function () {
beforeEach(async function () {
Settings.docstore.backend = ''
await DocArchiveManager.promises.destroyProject(projectId)
})
it('should delete the project in Mongo', function () {
expect(MongoManager.promises.destroyProject).to.have.been.calledWith(
projectId
)
2020-07-23 14:43:51 -04:00
})
it('should not delete the project in the persistor', function () {
expect(PersistorManager.deleteDirectory).not.to.have.been.called
2020-07-23 14:43:51 -04:00
})
})
})
2020-07-23 14:43:51 -04:00
describe('archiveAllDocs', function () {
it('should resolve with valid arguments', async function () {
await expect(DocArchiveManager.promises.archiveAllDocs(projectId)).to
.eventually.be.fulfilled
})
2020-07-23 14:43:51 -04:00
it('should archive all project docs which are not in s3', async function () {
await DocArchiveManager.promises.archiveAllDocs(projectId)
// not inS3
2020-07-31 11:20:07 -04:00
expect(MongoManager.promises.markDocAsArchived).to.have.been.calledWith(
projectId,
2020-07-23 14:43:51 -04:00
mongoDocs[0]._id
)
2020-07-31 11:20:07 -04:00
expect(MongoManager.promises.markDocAsArchived).to.have.been.calledWith(
projectId,
2020-07-23 14:43:51 -04:00
mongoDocs[1]._id
)
2020-07-31 11:20:07 -04:00
expect(MongoManager.promises.markDocAsArchived).to.have.been.calledWith(
projectId,
2020-07-23 14:43:51 -04:00
mongoDocs[4]._id
)
2020-07-23 14:43:51 -04:00
// inS3
2020-07-31 11:20:07 -04:00
expect(
MongoManager.promises.markDocAsArchived
).not.to.have.been.calledWith(projectId, mongoDocs[2]._id)
2020-07-31 11:20:07 -04:00
expect(
MongoManager.promises.markDocAsArchived
).not.to.have.been.calledWith(projectId, mongoDocs[3]._id)
})
describe('when archiving is not configured', function () {
beforeEach(function () {
Settings.docstore.backend = undefined
})
it('should bail out early', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
expect(MongoManager.promises.getNonArchivedProjectDocIds).to.not.have
.been.called
})
})
})
2020-07-23 14:43:51 -04:00
describe('unArchiveAllDocs', function () {
it('should resolve with valid arguments', async function () {
await expect(DocArchiveManager.promises.unArchiveAllDocs(projectId)).to
.eventually.be.fulfilled
})
it('should unarchive all inS3 docs', async function () {
await DocArchiveManager.promises.unArchiveAllDocs(projectId)
for (const doc of archivedDocs) {
expect(PersistorManager.getObjectStream).to.have.been.calledWith(
Settings.docstore.bucket,
`${projectId}/${doc._id}`
)
2020-07-23 14:43:51 -04:00
}
})
describe('when archiving is not configured', function () {
beforeEach(function () {
Settings.docstore.backend = undefined
})
it('should bail out early', async function () {
await DocArchiveManager.promises.archiveDoc(projectId, mongoDocs[0]._id)
expect(MongoManager.promises.getNonDeletedArchivedProjectDocs).to.not
.have.been.called
})
})
2020-07-23 14:43:51 -04:00
})
})