From 31c757cce27eaeed0d42c654fd93e910fcf797ff Mon Sep 17 00:00:00 2001 From: Simon Detheridge Date: Wed, 23 Sep 2020 14:12:22 +0100 Subject: [PATCH] Don't throw 404 errors when unarchiving, if the doc is already unarchived --- services/docstore/app/js/DocArchiveManager.js | 34 ++- .../test/unit/js/DocArchiveManagerTests.js | 268 ++++++++++-------- 2 files changed, 183 insertions(+), 119 deletions(-) diff --git a/services/docstore/app/js/DocArchiveManager.js b/services/docstore/app/js/DocArchiveManager.js index 126fbd6da0..9bf9284fd0 100644 --- a/services/docstore/app/js/DocArchiveManager.js +++ b/services/docstore/app/js/DocArchiveManager.js @@ -93,15 +93,33 @@ async function unarchiveDoc(projectId, docId) { { project_id: projectId, doc_id: docId }, 'getting doc from persistor' ) + const originalDoc = await MongoManager.findDoc(projectId, docId, { inS3: 1 }) + if (!originalDoc.inS3) { + // return if it's not actually in S3 as there's nothing to do + return + } const key = `${projectId}/${docId}` - const sourceMd5 = await PersistorManager.getObjectMd5Hash( - settings.docstore.bucket, - key - ) - const stream = await PersistorManager.getObjectStream( - settings.docstore.bucket, - key - ) + let stream, sourceMd5 + try { + sourceMd5 = await PersistorManager.getObjectMd5Hash( + settings.docstore.bucket, + key + ) + stream = await PersistorManager.getObjectStream( + settings.docstore.bucket, + key + ) + } catch (err) { + // if we get a 404, we could be in a race and something else has unarchived the doc already + if (err instanceof Errors.NotFoundError) { + const doc = await MongoManager.findDoc(projectId, docId, { inS3: 1 }) + if (!doc.inS3) { + // the doc has been archived while we were looking for it, so no error + return + } + } + throw err + } stream.resume() const json = await _streamToString(stream) const md5 = crypto.createHash('md5').update(json).digest('hex') diff --git a/services/docstore/test/unit/js/DocArchiveManagerTests.js b/services/docstore/test/unit/js/DocArchiveManagerTests.js index dbe6069572..000afb1660 100644 --- a/services/docstore/test/unit/js/DocArchiveManagerTests.js +++ b/services/docstore/test/unit/js/DocArchiveManagerTests.js @@ -128,13 +128,13 @@ describe('DocArchiveManager', function () { upsertIntoDocCollection: sinon.stub().resolves(), getProjectsDocs: sinon.stub().resolves(mongoDocs), getArchivedProjectDocs: sinon.stub().resolves(archivedDocs), - findDoc: sinon.stub().resolves(), + findDoc: sinon.stub().rejects(new Errors.NotFoundError()), destroyDoc: sinon.stub().resolves() } } - for (const mongoDoc of mongoDocs) { + for (const mongoDoc of mongoDocs.concat(archivedDocs)) { MongoManager.promises.findDoc - .withArgs(projectId, mongoDoc._id) + .withArgs(projectId, mongoDoc._id, sinon.match.any) .resolves(mongoDoc) } @@ -253,120 +253,166 @@ describe('DocArchiveManager', function () { describe('unarchiveDoc', function () { let docId - beforeEach(function () { - docId = mongoDocs[0]._id + describe('when the doc is in S3', function () { + beforeEach(function () { + MongoManager.promises.findDoc = sinon.stub().resolves({ inS3: true }) + docId = mongoDocs[0]._id + }) + + it('should resolve when passed a valid document', async function () { + await expect(DocArchiveManager.promises.unarchiveDoc(projectId, docId)) + .to.eventually.be.fulfilled + }) + + it('should throw an error if the md5 does not match', async function () { + PersistorManager.getObjectMd5Hash.resolves('badf00d') + await expect( + DocArchiveManager.promises.unarchiveDoc(projectId, docId) + ).to.eventually.be.rejected.and.be.instanceof(Errors.Md5MismatchError) + }) + + it('should update the doc lines in mongo', async function () { + await DocArchiveManager.promises.unarchiveDoc(projectId, docId) + expect( + MongoManager.promises.upsertIntoDocCollection + ).to.have.been.calledWith(projectId, docId, { + lines: mongoDocs[0].lines + }) + }) + + it('should delete the doc in s3', async function () { + await DocArchiveManager.promises.unarchiveDoc(projectId, docId) + expect(PersistorManager.deleteObject).to.have.been.calledWith( + Settings.docstore.bucket, + `${projectId}/${docId}` + ) + }) + + describe('doc contents', function () { + let mongoDoc, s3Doc + + describe('when the doc has the old schema', function () { + beforeEach(function () { + mongoDoc = { + lines: ['doc', 'lines'] + } + s3Doc = ['doc', 'lines'] + docJson = JSON.stringify(s3Doc) + stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8')) + }) + + it('should return the docs lines', async function () { + await DocArchiveManager.promises.unarchiveDoc(projectId, docId) + expect( + MongoManager.promises.upsertIntoDocCollection + ).to.have.been.calledWith(projectId, docId, mongoDoc) + }) + }) + + describe('with the new schema and ranges', function () { + beforeEach(function () { + s3Doc = { + lines: ['doc', 'lines'], + ranges: { json: 'ranges' }, + schema_v: 1 + } + mongoDoc = { + lines: ['doc', 'lines'], + ranges: { mongo: 'ranges' } + } + docJson = JSON.stringify(s3Doc) + stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8')) + }) + + it('should return the doc lines and ranges', async function () { + await DocArchiveManager.promises.unarchiveDoc(projectId, docId) + expect( + MongoManager.promises.upsertIntoDocCollection + ).to.have.been.calledWith(projectId, docId, mongoDoc) + }) + }) + + describe('with the new schema and no ranges', function () { + beforeEach(function () { + s3Doc = { + lines: ['doc', 'lines'], + schema_v: 1 + } + mongoDoc = { + lines: ['doc', 'lines'] + } + docJson = JSON.stringify(s3Doc) + stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8')) + }) + + it('should return only the doc lines', async function () { + await DocArchiveManager.promises.unarchiveDoc(projectId, docId) + expect( + MongoManager.promises.upsertIntoDocCollection + ).to.have.been.calledWith(projectId, docId, mongoDoc) + }) + }) + + describe('with an unrecognised schema', function () { + beforeEach(function () { + s3Doc = { + lines: ['doc', 'lines'], + schema_v: 2 + } + docJson = JSON.stringify(s3Doc) + stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8')) + }) + + it('should throw an error', async function () { + await expect( + DocArchiveManager.promises.unarchiveDoc(projectId, docId) + ).to.eventually.be.rejectedWith( + "I don't understand the doc format in s3" + ) + }) + }) + }) }) - it('should resolve when passed a valid document', async function () { - await expect(DocArchiveManager.promises.unarchiveDoc(projectId, docId)).to - .eventually.be.fulfilled + it('should not do anything if the file is already unarchived', async function () { + MongoManager.promises.findDoc.resolves({ inS3: false }) + await DocArchiveManager.promises.unarchiveDoc(projectId, docId) + expect(PersistorManager.getObjectStream).not.to.have.been.called }) - it('should throw an error if the md5 does not match', async function () { - PersistorManager.getObjectMd5Hash.resolves('badf00d') + describe('when the file is removed while we are processing it', function () { + beforeEach(function () { + MongoManager.promises.findDoc = sinon.stub().resolves({ inS3: true }) + MongoManager.promises.findDoc.onSecondCall().resolves({ inS3: false }) + }) + + it('should not throw an error if the file is unarchived before we get for its hash', async function () { + PersistorManager.getObjectMd5Hash = sinon + .stub() + .rejects(new Errors.NotFoundError()) + await expect(DocArchiveManager.promises.unarchiveDoc(projectId, docId)) + .to.eventually.be.fulfilled + expect(PersistorManager.getObjectStream).not.to.have.been.called + }) + + it('should not throw an error if the file is unarchived before we download it', async function () { + PersistorManager.getObjectStream = sinon + .stub() + .rejects(new Errors.NotFoundError()) + await expect(DocArchiveManager.promises.unarchiveDoc(projectId, docId)) + .to.eventually.be.fulfilled + expect(MongoManager.promises.upsertIntoDocCollection).not.to.have.been + .called + }) + }) + + it('should throw an error if the file is not found but is still listed as archived', async function () { + PersistorManager.getObjectStream = sinon + .stub() + .rejects(new Errors.NotFoundError()) await expect( DocArchiveManager.promises.unarchiveDoc(projectId, docId) - ).to.eventually.be.rejected.and.be.instanceof(Errors.Md5MismatchError) - }) - - it('should update the doc lines in mongo', async function () { - await DocArchiveManager.promises.unarchiveDoc(projectId, docId) - expect( - MongoManager.promises.upsertIntoDocCollection - ).to.have.been.calledWith(projectId, docId, { lines: mongoDocs[0].lines }) - }) - - it('should delete the doc in s3', async function () { - await DocArchiveManager.promises.unarchiveDoc(projectId, docId) - expect(PersistorManager.deleteObject).to.have.been.calledWith( - Settings.docstore.bucket, - `${projectId}/${docId}` - ) - }) - - describe('doc contents', function () { - let mongoDoc, s3Doc - - describe('when the doc has the old schema', function () { - beforeEach(function () { - mongoDoc = { - lines: ['doc', 'lines'] - } - s3Doc = ['doc', 'lines'] - docJson = JSON.stringify(s3Doc) - stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8')) - }) - - it('should return the docs lines', async function () { - await DocArchiveManager.promises.unarchiveDoc(projectId, docId) - expect( - MongoManager.promises.upsertIntoDocCollection - ).to.have.been.calledWith(projectId, docId, mongoDoc) - }) - }) - - describe('with the new schema and ranges', function () { - beforeEach(function () { - s3Doc = { - lines: ['doc', 'lines'], - ranges: { json: 'ranges' }, - schema_v: 1 - } - mongoDoc = { - lines: ['doc', 'lines'], - ranges: { mongo: 'ranges' } - } - docJson = JSON.stringify(s3Doc) - stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8')) - }) - - it('should return the doc lines and ranges', async function () { - await DocArchiveManager.promises.unarchiveDoc(projectId, docId) - expect( - MongoManager.promises.upsertIntoDocCollection - ).to.have.been.calledWith(projectId, docId, mongoDoc) - }) - }) - - describe('with the new schema and no ranges', function () { - beforeEach(function () { - s3Doc = { - lines: ['doc', 'lines'], - schema_v: 1 - } - mongoDoc = { - lines: ['doc', 'lines'] - } - docJson = JSON.stringify(s3Doc) - stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8')) - }) - - it('should return only the doc lines', async function () { - await DocArchiveManager.promises.unarchiveDoc(projectId, docId) - expect( - MongoManager.promises.upsertIntoDocCollection - ).to.have.been.calledWith(projectId, docId, mongoDoc) - }) - }) - - describe('with an unrecognised schema', function () { - beforeEach(function () { - s3Doc = { - lines: ['doc', 'lines'], - schema_v: 2 - } - docJson = JSON.stringify(s3Doc) - stream.on.withArgs('data').yields(Buffer.from(docJson, 'utf8')) - }) - - it('should throw an error', async function () { - await expect( - DocArchiveManager.promises.unarchiveDoc(projectId, docId) - ).to.eventually.be.rejectedWith( - "I don't understand the doc format in s3" - ) - }) - }) + ).to.eventually.be.rejected.and.be.instanceof(Errors.NotFoundError) }) })