Merge pull request #112 from overleaf/bg-fetch-doc-directly

retrieve doc without unarchiving
This commit is contained in:
Brian Gough 2021-08-04 11:13:54 +01:00 committed by GitHub
commit 76b5836b68
11 changed files with 345 additions and 33 deletions

View file

@ -54,6 +54,7 @@ app.get('/project/:project_id/ranges', HttpController.getAllRanges)
app.get('/project/:project_id/doc/:doc_id', HttpController.getDoc)
app.get('/project/:project_id/doc/:doc_id/deleted', HttpController.isDocDeleted)
app.get('/project/:project_id/doc/:doc_id/raw', HttpController.getRawDoc)
app.get('/project/:project_id/doc/:doc_id/peek', HttpController.peekDoc)
// Add 64kb overhead for the JSON encoding, and double the size to allow for ranges in the json payload
app.post(
'/project/:project_id/doc/:doc_id',
@ -90,6 +91,8 @@ app.use(function (error, req, res, next) {
logger.error({ err: error, req }, 'request errored')
if (error instanceof Errors.NotFoundError) {
return res.sendStatus(404)
} else if (error instanceof Errors.DocModifiedError) {
return res.sendStatus(409)
} else {
return res.status(500).send('Oops, something went wrong')
}

View file

@ -23,6 +23,7 @@ module.exports = {
unarchiveDoc: callbackify(unarchiveDoc),
destroyAllDocs: callbackify(destroyAllDocs),
destroyDoc: callbackify(destroyDoc),
getDoc: callbackify(getDoc),
promises: {
archiveAllDocs,
archiveDocById,
@ -31,6 +32,7 @@ module.exports = {
unarchiveDoc,
destroyAllDocs,
destroyDoc,
getDoc,
},
}
@ -125,38 +127,17 @@ async function unArchiveAllDocs(projectId) {
}
}
async function unarchiveDoc(projectId, docId) {
logger.log(
{ project_id: projectId, doc_id: docId },
'getting doc from persistor'
)
const originalDoc = await MongoManager.findDoc(projectId, docId, { inS3: 1 })
if (!originalDoc.inS3) {
// return if it's not actually in S3 as there's nothing to do
return
}
// get the doc from the PersistorManager without storing it in mongo
async function getDoc(projectId, docId) {
const key = `${projectId}/${docId}`
let stream, sourceMd5
try {
sourceMd5 = await PersistorManager.getObjectMd5Hash(
const sourceMd5 = await PersistorManager.getObjectMd5Hash(
settings.docstore.bucket,
key
)
stream = await PersistorManager.getObjectStream(
const stream = await PersistorManager.getObjectStream(
settings.docstore.bucket,
key
)
} catch (err) {
// if we get a 404, we could be in a race and something else has unarchived the doc already
if (err instanceof Errors.NotFoundError) {
const doc = await MongoManager.findDoc(projectId, docId, { inS3: 1 })
if (!doc.inS3) {
// the doc has been archived while we were looking for it, so no error
return
}
}
throw err
}
stream.resume()
const json = await _streamToString(stream)
const md5 = crypto.createHash('md5').update(json).digest('hex')
@ -181,6 +162,36 @@ async function unarchiveDoc(projectId, docId) {
} else {
throw new Error("I don't understand the doc format in s3")
}
return mongoDoc
}
// get the doc and unarchive it to mongo
async function unarchiveDoc(projectId, docId) {
logger.log(
{ project_id: projectId, doc_id: docId },
'getting doc from persistor'
)
const key = `${projectId}/${docId}`
const originalDoc = await MongoManager.findDoc(projectId, docId, { inS3: 1 })
if (!originalDoc.inS3) {
// return if it's not actually in S3 as there's nothing to do
return
}
let mongoDoc
try {
mongoDoc = await getDoc(projectId, docId)
} catch (err) {
// if we get a 404, we could be in a race and something else has unarchived the doc already
if (err instanceof Errors.NotFoundError) {
const doc = await MongoManager.findDoc(projectId, docId, { inS3: 1 })
if (!doc.inS3) {
// the doc has been archived while we were looking for it, so no error
return
}
}
throw err
}
await MongoManager.upsertIntoDocCollection(projectId, docId, mongoDoc)
await PersistorManager.deleteObject(settings.docstore.bucket, key)
}

View file

@ -124,6 +124,70 @@ module.exports = DocManager = {
)
},
// returns the doc without any version information
_peekRawDoc(project_id, doc_id, callback) {
MongoManager.findDoc(
project_id,
doc_id,
{
lines: true,
rev: true,
deleted: true,
version: true,
ranges: true,
inS3: true,
},
(err, doc) => {
if (err) return callback(err)
if (doc == null) {
return callback(
new Errors.NotFoundError(
`No such doc: ${doc_id} in project ${project_id}`
)
)
}
if (doc && !doc.inS3) {
return callback(null, doc)
}
// skip the unarchiving to mongo when getting a doc
DocArchive.getDoc(project_id, doc_id, function (err, archivedDoc) {
if (err != null) {
logger.err(
{ err, project_id, doc_id },
'error getting doc from archive'
)
return callback(err)
}
doc = _.extend(doc, archivedDoc)
callback(null, doc)
})
}
)
},
// get the doc from mongo if possible, or from the persistent store otherwise,
// without unarchiving it (avoids unnecessary writes to mongo)
peekDoc(project_id, doc_id, callback) {
DocManager._peekRawDoc(project_id, doc_id, (err, doc) => {
if (err) {
return callback(err)
}
MongoManager.withRevCheck(
doc,
MongoManager.getDocVersion,
function (error, version) {
// If the doc has been modified while we were retrieving it, we
// will get a DocModified error
if (error != null) {
return callback(error)
}
doc.version = version
return callback(err, doc)
}
)
})
},
getDocLines(project_id, doc_id, callback) {
if (callback == null) {
callback = function (err, doc) {}

View file

@ -4,7 +4,10 @@ const { Errors } = require('@overleaf/object-persistor')
class Md5MismatchError extends OError {}
class DocModifiedError extends OError {}
module.exports = {
Md5MismatchError,
DocModifiedError,
...Errors,
}

View file

@ -44,6 +44,23 @@ module.exports = HttpController = {
})
},
peekDoc(req, res, next) {
const { project_id } = req.params
const { doc_id } = req.params
logger.log({ project_id, doc_id }, 'peeking doc')
DocManager.peekDoc(project_id, doc_id, function (error, doc) {
if (error) {
return next(error)
}
if (doc == null) {
return res.sendStatus(404)
} else {
res.setHeader('x-doc-status', doc.inS3 ? 'archived' : 'active')
return res.json(HttpController._buildDocView(doc))
}
})
},
isDocDeleted(req, res, next) {
const { doc_id: docId, project_id: projectId } = req.params
DocManager.isDocDeleted(projectId, docId, function (error, deleted) {

View file

@ -15,6 +15,7 @@ const { db, ObjectId } = require('./mongodb')
const logger = require('logger-sharelatex')
const metrics = require('@overleaf/metrics')
const Settings = require('@overleaf/settings')
const Errors = require('./Errors')
const { promisify } = require('util')
module.exports = MongoManager = {
@ -178,6 +179,45 @@ module.exports = MongoManager = {
)
},
getDocRev(doc_id, callback) {
db.docs.findOne(
{
_id: ObjectId(doc_id.toString()),
},
{
projection: { rev: 1 },
},
function (err, doc) {
if (err != null) {
return callback(err)
}
callback(null, doc && doc.rev)
}
)
},
// Helper method to support optimistic locking. Call the provided method for
// an existing doc and return the result if the rev in mongo is unchanged when
// checked afterwards. If the rev has changed, return a DocModifiedError.
withRevCheck(doc, method, callback) {
method(doc._id, function (err, result) {
if (err) return callback(err)
MongoManager.getDocRev(doc._id, function (err, currentRev) {
if (err) return callback(err)
if (doc.rev !== currentRev) {
return callback(
new Errors.DocModifiedError('doc rev has changed', {
doc_id: doc._id,
rev: doc.rev,
currentRev,
})
)
}
return callback(null, result)
})
})
},
destroyDoc(doc_id, callback) {
db.docs.deleteOne(
{

View file

@ -12,7 +12,7 @@
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
process.env.BACKEND = 'gcs'
const Settings = require('@overleaf/settings')
const { expect } = require('chai')
const { db, ObjectId } = require('../../../app/js/mongodb')

View file

@ -0,0 +1,127 @@
const Settings = require('@overleaf/settings')
const { ObjectId } = require('../../../app/js/mongodb')
const DocstoreApp = require('./helpers/DocstoreApp')
const DocstoreClient = require('./helpers/DocstoreClient')
const { Storage } = require('@google-cloud/storage')
describe('Getting A Doc from Archive', function () {
before(function (done) {
return DocstoreApp.ensureRunning(done)
})
before(async function () {
const storage = new Storage(Settings.docstore.gcs.endpoint)
await storage.createBucket(Settings.docstore.bucket)
await storage.createBucket(`${Settings.docstore.bucket}-deleted`)
})
describe('for an archived doc', function () {
before(function (done) {
this.project_id = ObjectId()
this.timeout(1000 * 30)
this.doc = {
_id: ObjectId(),
lines: ['foo', 'bar'],
ranges: {},
version: 2,
}
DocstoreClient.createDoc(
this.project_id,
this.doc._id,
this.doc.lines,
this.doc.version,
this.doc.ranges,
error => {
if (error) {
return done(error)
}
DocstoreClient.archiveDocById(
this.project_id,
this.doc._id,
(error, res) => {
this.res = res
if (error) {
return done(error)
}
done()
}
)
}
)
})
it('should successully archive the doc', function (done) {
this.res.statusCode.should.equal(204)
done()
})
it('should return the doc lines and version from persistent storage', function (done) {
return DocstoreClient.peekDoc(
this.project_id,
this.doc._id,
{},
(error, res, doc) => {
res.statusCode.should.equal(200)
res.headers['x-doc-status'].should.equal('archived')
doc.lines.should.deep.equal(this.doc.lines)
doc.version.should.equal(this.doc.version)
doc.ranges.should.deep.equal(this.doc.ranges)
return done()
}
)
})
it('should return the doc lines and version from persistent storage on subsequent requests', function (done) {
return DocstoreClient.peekDoc(
this.project_id,
this.doc._id,
{},
(error, res, doc) => {
res.statusCode.should.equal(200)
res.headers['x-doc-status'].should.equal('archived')
doc.lines.should.deep.equal(this.doc.lines)
doc.version.should.equal(this.doc.version)
doc.ranges.should.deep.equal(this.doc.ranges)
return done()
}
)
})
describe('for an non-archived doc', function () {
before(function (done) {
this.project_id = ObjectId()
this.timeout(1000 * 30)
this.doc = {
_id: ObjectId(),
lines: ['foo', 'bar'],
ranges: {},
version: 2,
}
DocstoreClient.createDoc(
this.project_id,
this.doc._id,
this.doc.lines,
this.doc.version,
this.doc.ranges,
done
)
})
it('should return the doc lines and version from mongo', function (done) {
return DocstoreClient.peekDoc(
this.project_id,
this.doc._id,
{},
(error, res, doc) => {
res.statusCode.should.equal(200)
res.headers['x-doc-status'].should.equal('active')
doc.lines.should.deep.equal(this.doc.lines)
doc.version.should.equal(this.doc.version)
doc.ranges.should.deep.equal(this.doc.ranges)
return done()
}
)
})
})
})
})

View file

@ -60,6 +60,17 @@ module.exports = DocstoreClient = {
)
},
peekDoc(project_id, doc_id, qs, callback) {
request.get(
{
url: `http://localhost:${settings.internal.docstore.port}/project/${project_id}/doc/${doc_id}/peek`,
json: true,
qs,
},
callback
)
},
isDocDeleted(project_id, doc_id, callback) {
request.get(
{

View file

@ -4,6 +4,8 @@ const sinonChai = require('sinon-chai')
const chaiAsPromised = require('chai-as-promised')
const SandboxedModule = require('sandboxed-module')
process.env.BACKEND = 'gcs'
// Chai configuration
chai.should()
chai.use(sinonChai)

View file

@ -17,6 +17,7 @@ const modulePath = require('path').join(
)
const { ObjectId } = require('mongodb')
const { assert } = require('chai')
const Errors = require('../../../app/js/Errors')
describe('MongoManager', function () {
beforeEach(function () {
@ -28,6 +29,7 @@ describe('MongoManager', function () {
},
'@overleaf/metrics': { timeAsyncMethod: sinon.stub() },
'@overleaf/settings': { max_deleted_docs: 42 },
'./Errors': Errors,
},
})
this.project_id = ObjectId().toString()
@ -305,7 +307,7 @@ describe('MongoManager', function () {
})
})
return describe('setDocVersion', function () {
describe('setDocVersion', function () {
beforeEach(function () {
this.version = 42
this.db.docOps.updateOne = sinon.stub().callsArg(3)
@ -338,4 +340,36 @@ describe('MongoManager', function () {
return this.callback.called.should.equal(true)
})
})
describe('withRevCheck', function () {
this.beforeEach(function () {
this.doc = { _id: ObjectId(), name: 'mock-doc', rev: 1 }
this.testFunction = sinon.stub().yields(null, 'foo')
})
it('should call the callback when the rev has not changed', function (done) {
this.db.docs.findOne = sinon.stub().callsArgWith(2, null, { rev: 1 })
this.MongoManager.withRevCheck(
this.doc,
this.testFunction,
(err, result) => {
result.should.equal('foo')
assert.isNull(err)
done()
}
)
})
it('should return an error when the rev has changed', function (done) {
this.db.docs.findOne = sinon.stub().callsArgWith(2, null, { rev: 2 })
this.MongoManager.withRevCheck(
this.doc,
this.testFunction,
(err, result) => {
err.should.be.instanceof(Errors.DocModifiedError)
done()
}
)
})
})
})