mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
Merge pull request #112 from overleaf/bg-fetch-doc-directly
retrieve doc without unarchiving
This commit is contained in:
commit
76b5836b68
11 changed files with 345 additions and 33 deletions
|
@ -54,6 +54,7 @@ app.get('/project/:project_id/ranges', HttpController.getAllRanges)
|
|||
app.get('/project/:project_id/doc/:doc_id', HttpController.getDoc)
|
||||
app.get('/project/:project_id/doc/:doc_id/deleted', HttpController.isDocDeleted)
|
||||
app.get('/project/:project_id/doc/:doc_id/raw', HttpController.getRawDoc)
|
||||
app.get('/project/:project_id/doc/:doc_id/peek', HttpController.peekDoc)
|
||||
// Add 64kb overhead for the JSON encoding, and double the size to allow for ranges in the json payload
|
||||
app.post(
|
||||
'/project/:project_id/doc/:doc_id',
|
||||
|
@ -90,6 +91,8 @@ app.use(function (error, req, res, next) {
|
|||
logger.error({ err: error, req }, 'request errored')
|
||||
if (error instanceof Errors.NotFoundError) {
|
||||
return res.sendStatus(404)
|
||||
} else if (error instanceof Errors.DocModifiedError) {
|
||||
return res.sendStatus(409)
|
||||
} else {
|
||||
return res.status(500).send('Oops, something went wrong')
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ module.exports = {
|
|||
unarchiveDoc: callbackify(unarchiveDoc),
|
||||
destroyAllDocs: callbackify(destroyAllDocs),
|
||||
destroyDoc: callbackify(destroyDoc),
|
||||
getDoc: callbackify(getDoc),
|
||||
promises: {
|
||||
archiveAllDocs,
|
||||
archiveDocById,
|
||||
|
@ -31,6 +32,7 @@ module.exports = {
|
|||
unarchiveDoc,
|
||||
destroyAllDocs,
|
||||
destroyDoc,
|
||||
getDoc,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -125,38 +127,17 @@ async function unArchiveAllDocs(projectId) {
|
|||
}
|
||||
}
|
||||
|
||||
async function unarchiveDoc(projectId, docId) {
|
||||
logger.log(
|
||||
{ project_id: projectId, doc_id: docId },
|
||||
'getting doc from persistor'
|
||||
)
|
||||
const originalDoc = await MongoManager.findDoc(projectId, docId, { inS3: 1 })
|
||||
if (!originalDoc.inS3) {
|
||||
// return if it's not actually in S3 as there's nothing to do
|
||||
return
|
||||
}
|
||||
// get the doc from the PersistorManager without storing it in mongo
|
||||
async function getDoc(projectId, docId) {
|
||||
const key = `${projectId}/${docId}`
|
||||
let stream, sourceMd5
|
||||
try {
|
||||
sourceMd5 = await PersistorManager.getObjectMd5Hash(
|
||||
const sourceMd5 = await PersistorManager.getObjectMd5Hash(
|
||||
settings.docstore.bucket,
|
||||
key
|
||||
)
|
||||
stream = await PersistorManager.getObjectStream(
|
||||
const stream = await PersistorManager.getObjectStream(
|
||||
settings.docstore.bucket,
|
||||
key
|
||||
)
|
||||
} catch (err) {
|
||||
// if we get a 404, we could be in a race and something else has unarchived the doc already
|
||||
if (err instanceof Errors.NotFoundError) {
|
||||
const doc = await MongoManager.findDoc(projectId, docId, { inS3: 1 })
|
||||
if (!doc.inS3) {
|
||||
// the doc has been archived while we were looking for it, so no error
|
||||
return
|
||||
}
|
||||
}
|
||||
throw err
|
||||
}
|
||||
stream.resume()
|
||||
const json = await _streamToString(stream)
|
||||
const md5 = crypto.createHash('md5').update(json).digest('hex')
|
||||
|
@ -181,6 +162,36 @@ async function unarchiveDoc(projectId, docId) {
|
|||
} else {
|
||||
throw new Error("I don't understand the doc format in s3")
|
||||
}
|
||||
|
||||
return mongoDoc
|
||||
}
|
||||
|
||||
// get the doc and unarchive it to mongo
|
||||
async function unarchiveDoc(projectId, docId) {
|
||||
logger.log(
|
||||
{ project_id: projectId, doc_id: docId },
|
||||
'getting doc from persistor'
|
||||
)
|
||||
const key = `${projectId}/${docId}`
|
||||
const originalDoc = await MongoManager.findDoc(projectId, docId, { inS3: 1 })
|
||||
if (!originalDoc.inS3) {
|
||||
// return if it's not actually in S3 as there's nothing to do
|
||||
return
|
||||
}
|
||||
let mongoDoc
|
||||
try {
|
||||
mongoDoc = await getDoc(projectId, docId)
|
||||
} catch (err) {
|
||||
// if we get a 404, we could be in a race and something else has unarchived the doc already
|
||||
if (err instanceof Errors.NotFoundError) {
|
||||
const doc = await MongoManager.findDoc(projectId, docId, { inS3: 1 })
|
||||
if (!doc.inS3) {
|
||||
// the doc has been archived while we were looking for it, so no error
|
||||
return
|
||||
}
|
||||
}
|
||||
throw err
|
||||
}
|
||||
await MongoManager.upsertIntoDocCollection(projectId, docId, mongoDoc)
|
||||
await PersistorManager.deleteObject(settings.docstore.bucket, key)
|
||||
}
|
||||
|
|
|
@ -124,6 +124,70 @@ module.exports = DocManager = {
|
|||
)
|
||||
},
|
||||
|
||||
// returns the doc without any version information
|
||||
_peekRawDoc(project_id, doc_id, callback) {
|
||||
MongoManager.findDoc(
|
||||
project_id,
|
||||
doc_id,
|
||||
{
|
||||
lines: true,
|
||||
rev: true,
|
||||
deleted: true,
|
||||
version: true,
|
||||
ranges: true,
|
||||
inS3: true,
|
||||
},
|
||||
(err, doc) => {
|
||||
if (err) return callback(err)
|
||||
if (doc == null) {
|
||||
return callback(
|
||||
new Errors.NotFoundError(
|
||||
`No such doc: ${doc_id} in project ${project_id}`
|
||||
)
|
||||
)
|
||||
}
|
||||
if (doc && !doc.inS3) {
|
||||
return callback(null, doc)
|
||||
}
|
||||
// skip the unarchiving to mongo when getting a doc
|
||||
DocArchive.getDoc(project_id, doc_id, function (err, archivedDoc) {
|
||||
if (err != null) {
|
||||
logger.err(
|
||||
{ err, project_id, doc_id },
|
||||
'error getting doc from archive'
|
||||
)
|
||||
return callback(err)
|
||||
}
|
||||
doc = _.extend(doc, archivedDoc)
|
||||
callback(null, doc)
|
||||
})
|
||||
}
|
||||
)
|
||||
},
|
||||
|
||||
// get the doc from mongo if possible, or from the persistent store otherwise,
|
||||
// without unarchiving it (avoids unnecessary writes to mongo)
|
||||
peekDoc(project_id, doc_id, callback) {
|
||||
DocManager._peekRawDoc(project_id, doc_id, (err, doc) => {
|
||||
if (err) {
|
||||
return callback(err)
|
||||
}
|
||||
MongoManager.withRevCheck(
|
||||
doc,
|
||||
MongoManager.getDocVersion,
|
||||
function (error, version) {
|
||||
// If the doc has been modified while we were retrieving it, we
|
||||
// will get a DocModified error
|
||||
if (error != null) {
|
||||
return callback(error)
|
||||
}
|
||||
doc.version = version
|
||||
return callback(err, doc)
|
||||
}
|
||||
)
|
||||
})
|
||||
},
|
||||
|
||||
getDocLines(project_id, doc_id, callback) {
|
||||
if (callback == null) {
|
||||
callback = function (err, doc) {}
|
||||
|
|
|
@ -4,7 +4,10 @@ const { Errors } = require('@overleaf/object-persistor')
|
|||
|
||||
class Md5MismatchError extends OError {}
|
||||
|
||||
class DocModifiedError extends OError {}
|
||||
|
||||
module.exports = {
|
||||
Md5MismatchError,
|
||||
DocModifiedError,
|
||||
...Errors,
|
||||
}
|
||||
|
|
|
@ -44,6 +44,23 @@ module.exports = HttpController = {
|
|||
})
|
||||
},
|
||||
|
||||
peekDoc(req, res, next) {
|
||||
const { project_id } = req.params
|
||||
const { doc_id } = req.params
|
||||
logger.log({ project_id, doc_id }, 'peeking doc')
|
||||
DocManager.peekDoc(project_id, doc_id, function (error, doc) {
|
||||
if (error) {
|
||||
return next(error)
|
||||
}
|
||||
if (doc == null) {
|
||||
return res.sendStatus(404)
|
||||
} else {
|
||||
res.setHeader('x-doc-status', doc.inS3 ? 'archived' : 'active')
|
||||
return res.json(HttpController._buildDocView(doc))
|
||||
}
|
||||
})
|
||||
},
|
||||
|
||||
isDocDeleted(req, res, next) {
|
||||
const { doc_id: docId, project_id: projectId } = req.params
|
||||
DocManager.isDocDeleted(projectId, docId, function (error, deleted) {
|
||||
|
|
|
@ -15,6 +15,7 @@ const { db, ObjectId } = require('./mongodb')
|
|||
const logger = require('logger-sharelatex')
|
||||
const metrics = require('@overleaf/metrics')
|
||||
const Settings = require('@overleaf/settings')
|
||||
const Errors = require('./Errors')
|
||||
const { promisify } = require('util')
|
||||
|
||||
module.exports = MongoManager = {
|
||||
|
@ -178,6 +179,45 @@ module.exports = MongoManager = {
|
|||
)
|
||||
},
|
||||
|
||||
getDocRev(doc_id, callback) {
|
||||
db.docs.findOne(
|
||||
{
|
||||
_id: ObjectId(doc_id.toString()),
|
||||
},
|
||||
{
|
||||
projection: { rev: 1 },
|
||||
},
|
||||
function (err, doc) {
|
||||
if (err != null) {
|
||||
return callback(err)
|
||||
}
|
||||
callback(null, doc && doc.rev)
|
||||
}
|
||||
)
|
||||
},
|
||||
|
||||
// Helper method to support optimistic locking. Call the provided method for
|
||||
// an existing doc and return the result if the rev in mongo is unchanged when
|
||||
// checked afterwards. If the rev has changed, return a DocModifiedError.
|
||||
withRevCheck(doc, method, callback) {
|
||||
method(doc._id, function (err, result) {
|
||||
if (err) return callback(err)
|
||||
MongoManager.getDocRev(doc._id, function (err, currentRev) {
|
||||
if (err) return callback(err)
|
||||
if (doc.rev !== currentRev) {
|
||||
return callback(
|
||||
new Errors.DocModifiedError('doc rev has changed', {
|
||||
doc_id: doc._id,
|
||||
rev: doc.rev,
|
||||
currentRev,
|
||||
})
|
||||
)
|
||||
}
|
||||
return callback(null, result)
|
||||
})
|
||||
})
|
||||
},
|
||||
|
||||
destroyDoc(doc_id, callback) {
|
||||
db.docs.deleteOne(
|
||||
{
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
* DS207: Consider shorter variations of null checks
|
||||
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
||||
*/
|
||||
process.env.BACKEND = 'gcs'
|
||||
|
||||
const Settings = require('@overleaf/settings')
|
||||
const { expect } = require('chai')
|
||||
const { db, ObjectId } = require('../../../app/js/mongodb')
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
const Settings = require('@overleaf/settings')
|
||||
const { ObjectId } = require('../../../app/js/mongodb')
|
||||
const DocstoreApp = require('./helpers/DocstoreApp')
|
||||
const DocstoreClient = require('./helpers/DocstoreClient')
|
||||
const { Storage } = require('@google-cloud/storage')
|
||||
|
||||
describe('Getting A Doc from Archive', function () {
|
||||
before(function (done) {
|
||||
return DocstoreApp.ensureRunning(done)
|
||||
})
|
||||
|
||||
before(async function () {
|
||||
const storage = new Storage(Settings.docstore.gcs.endpoint)
|
||||
await storage.createBucket(Settings.docstore.bucket)
|
||||
await storage.createBucket(`${Settings.docstore.bucket}-deleted`)
|
||||
})
|
||||
|
||||
describe('for an archived doc', function () {
|
||||
before(function (done) {
|
||||
this.project_id = ObjectId()
|
||||
this.timeout(1000 * 30)
|
||||
this.doc = {
|
||||
_id: ObjectId(),
|
||||
lines: ['foo', 'bar'],
|
||||
ranges: {},
|
||||
version: 2,
|
||||
}
|
||||
DocstoreClient.createDoc(
|
||||
this.project_id,
|
||||
this.doc._id,
|
||||
this.doc.lines,
|
||||
this.doc.version,
|
||||
this.doc.ranges,
|
||||
error => {
|
||||
if (error) {
|
||||
return done(error)
|
||||
}
|
||||
DocstoreClient.archiveDocById(
|
||||
this.project_id,
|
||||
this.doc._id,
|
||||
(error, res) => {
|
||||
this.res = res
|
||||
if (error) {
|
||||
return done(error)
|
||||
}
|
||||
done()
|
||||
}
|
||||
)
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
it('should successully archive the doc', function (done) {
|
||||
this.res.statusCode.should.equal(204)
|
||||
done()
|
||||
})
|
||||
|
||||
it('should return the doc lines and version from persistent storage', function (done) {
|
||||
return DocstoreClient.peekDoc(
|
||||
this.project_id,
|
||||
this.doc._id,
|
||||
{},
|
||||
(error, res, doc) => {
|
||||
res.statusCode.should.equal(200)
|
||||
res.headers['x-doc-status'].should.equal('archived')
|
||||
doc.lines.should.deep.equal(this.doc.lines)
|
||||
doc.version.should.equal(this.doc.version)
|
||||
doc.ranges.should.deep.equal(this.doc.ranges)
|
||||
return done()
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
it('should return the doc lines and version from persistent storage on subsequent requests', function (done) {
|
||||
return DocstoreClient.peekDoc(
|
||||
this.project_id,
|
||||
this.doc._id,
|
||||
{},
|
||||
(error, res, doc) => {
|
||||
res.statusCode.should.equal(200)
|
||||
res.headers['x-doc-status'].should.equal('archived')
|
||||
doc.lines.should.deep.equal(this.doc.lines)
|
||||
doc.version.should.equal(this.doc.version)
|
||||
doc.ranges.should.deep.equal(this.doc.ranges)
|
||||
return done()
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
describe('for an non-archived doc', function () {
|
||||
before(function (done) {
|
||||
this.project_id = ObjectId()
|
||||
this.timeout(1000 * 30)
|
||||
this.doc = {
|
||||
_id: ObjectId(),
|
||||
lines: ['foo', 'bar'],
|
||||
ranges: {},
|
||||
version: 2,
|
||||
}
|
||||
DocstoreClient.createDoc(
|
||||
this.project_id,
|
||||
this.doc._id,
|
||||
this.doc.lines,
|
||||
this.doc.version,
|
||||
this.doc.ranges,
|
||||
done
|
||||
)
|
||||
})
|
||||
|
||||
it('should return the doc lines and version from mongo', function (done) {
|
||||
return DocstoreClient.peekDoc(
|
||||
this.project_id,
|
||||
this.doc._id,
|
||||
{},
|
||||
(error, res, doc) => {
|
||||
res.statusCode.should.equal(200)
|
||||
res.headers['x-doc-status'].should.equal('active')
|
||||
doc.lines.should.deep.equal(this.doc.lines)
|
||||
doc.version.should.equal(this.doc.version)
|
||||
doc.ranges.should.deep.equal(this.doc.ranges)
|
||||
return done()
|
||||
}
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
|
@ -60,6 +60,17 @@ module.exports = DocstoreClient = {
|
|||
)
|
||||
},
|
||||
|
||||
peekDoc(project_id, doc_id, qs, callback) {
|
||||
request.get(
|
||||
{
|
||||
url: `http://localhost:${settings.internal.docstore.port}/project/${project_id}/doc/${doc_id}/peek`,
|
||||
json: true,
|
||||
qs,
|
||||
},
|
||||
callback
|
||||
)
|
||||
},
|
||||
|
||||
isDocDeleted(project_id, doc_id, callback) {
|
||||
request.get(
|
||||
{
|
||||
|
|
|
@ -4,6 +4,8 @@ const sinonChai = require('sinon-chai')
|
|||
const chaiAsPromised = require('chai-as-promised')
|
||||
const SandboxedModule = require('sandboxed-module')
|
||||
|
||||
process.env.BACKEND = 'gcs'
|
||||
|
||||
// Chai configuration
|
||||
chai.should()
|
||||
chai.use(sinonChai)
|
||||
|
|
|
@ -17,6 +17,7 @@ const modulePath = require('path').join(
|
|||
)
|
||||
const { ObjectId } = require('mongodb')
|
||||
const { assert } = require('chai')
|
||||
const Errors = require('../../../app/js/Errors')
|
||||
|
||||
describe('MongoManager', function () {
|
||||
beforeEach(function () {
|
||||
|
@ -28,6 +29,7 @@ describe('MongoManager', function () {
|
|||
},
|
||||
'@overleaf/metrics': { timeAsyncMethod: sinon.stub() },
|
||||
'@overleaf/settings': { max_deleted_docs: 42 },
|
||||
'./Errors': Errors,
|
||||
},
|
||||
})
|
||||
this.project_id = ObjectId().toString()
|
||||
|
@ -305,7 +307,7 @@ describe('MongoManager', function () {
|
|||
})
|
||||
})
|
||||
|
||||
return describe('setDocVersion', function () {
|
||||
describe('setDocVersion', function () {
|
||||
beforeEach(function () {
|
||||
this.version = 42
|
||||
this.db.docOps.updateOne = sinon.stub().callsArg(3)
|
||||
|
@ -338,4 +340,36 @@ describe('MongoManager', function () {
|
|||
return this.callback.called.should.equal(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('withRevCheck', function () {
|
||||
this.beforeEach(function () {
|
||||
this.doc = { _id: ObjectId(), name: 'mock-doc', rev: 1 }
|
||||
this.testFunction = sinon.stub().yields(null, 'foo')
|
||||
})
|
||||
|
||||
it('should call the callback when the rev has not changed', function (done) {
|
||||
this.db.docs.findOne = sinon.stub().callsArgWith(2, null, { rev: 1 })
|
||||
this.MongoManager.withRevCheck(
|
||||
this.doc,
|
||||
this.testFunction,
|
||||
(err, result) => {
|
||||
result.should.equal('foo')
|
||||
assert.isNull(err)
|
||||
done()
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
it('should return an error when the rev has changed', function (done) {
|
||||
this.db.docs.findOne = sinon.stub().callsArgWith(2, null, { rev: 2 })
|
||||
this.MongoManager.withRevCheck(
|
||||
this.doc,
|
||||
this.testFunction,
|
||||
(err, result) => {
|
||||
err.should.be.instanceof(Errors.DocModifiedError)
|
||||
done()
|
||||
}
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
Loading…
Reference in a new issue