mirror of
https://github.com/overleaf/overleaf.git
synced 2025-04-20 12:23:47 +00:00
Merge pull request #22 from sharelatex/ja-archive-ranges
Archive ranges as well as doc lines
This commit is contained in:
commit
e10d09d040
6 changed files with 515 additions and 165 deletions
|
@ -6,12 +6,13 @@ async = require "async"
|
|||
settings = require("settings-sharelatex")
|
||||
request = require("request")
|
||||
crypto = require("crypto")
|
||||
RangeManager = require("./RangeManager")
|
||||
thirtySeconds = 30 * 1000
|
||||
|
||||
module.exports = DocArchive =
|
||||
|
||||
archiveAllDocs: (project_id, callback = (err, docs) ->) ->
|
||||
MongoManager.getProjectsDocs project_id, {include_deleted: true}, {lines: true, rev: true, inS3: true}, (err, docs) ->
|
||||
MongoManager.getProjectsDocs project_id, {include_deleted: true}, {lines: true, ranges: true, rev: true, inS3: true}, (err, docs) ->
|
||||
if err?
|
||||
return callback(err)
|
||||
else if !docs?
|
||||
|
@ -26,21 +27,26 @@ module.exports = DocArchive =
|
|||
archiveDoc: (project_id, doc, callback)->
|
||||
logger.log project_id: project_id, doc_id: doc._id, "sending doc to s3"
|
||||
try
|
||||
options = DocArchive.buildS3Options(doc.lines, project_id+"/"+doc._id)
|
||||
options = DocArchive.buildS3Options(project_id+"/"+doc._id)
|
||||
catch e
|
||||
return callback e
|
||||
request.put options, (err, res)->
|
||||
if err? || res.statusCode != 200
|
||||
logger.err err:err, res:res, project_id:project_id, doc_id: doc._id, statusCode: res?.statusCode, "something went wrong archiving doc in aws"
|
||||
return callback new Error("Error in S3 request")
|
||||
md5lines = crypto.createHash("md5").update(JSON.stringify(doc.lines), "utf8").digest("hex")
|
||||
md5response = res.headers.etag.toString().replace(/\"/g, '')
|
||||
if md5lines != md5response
|
||||
logger.err responseMD5:md5response, linesMD5:md5lines, project_id:project_id, doc_id: doc?._id, "err in response md5 from s3"
|
||||
return callback new Error("Error in S3 md5 response")
|
||||
MongoManager.markDocAsArchived doc._id, doc.rev, (err) ->
|
||||
return callback(err) if err?
|
||||
callback()
|
||||
DocArchive._mongoDocToS3Doc doc, (error, json_doc) ->
|
||||
return callback(error) if error?
|
||||
options.body = json_doc
|
||||
options.headers =
|
||||
'Content-Type': "application/json"
|
||||
request.put options, (err, res) ->
|
||||
if err? || res.statusCode != 200
|
||||
logger.err err:err, res:res, project_id:project_id, doc_id: doc._id, statusCode: res?.statusCode, "something went wrong archiving doc in aws"
|
||||
return callback new Error("Error in S3 request")
|
||||
md5lines = crypto.createHash("md5").update(json_doc, "utf8").digest("hex")
|
||||
md5response = res.headers.etag.toString().replace(/\"/g, '')
|
||||
if md5lines != md5response
|
||||
logger.err responseMD5:md5response, linesMD5:md5lines, project_id:project_id, doc_id: doc?._id, "err in response md5 from s3"
|
||||
return callback new Error("Error in S3 md5 response")
|
||||
MongoManager.markDocAsArchived doc._id, doc.rev, (err) ->
|
||||
return callback(err) if err?
|
||||
callback()
|
||||
|
||||
unArchiveAllDocs: (project_id, callback = (err) ->) ->
|
||||
MongoManager.getArchivedProjectDocs project_id, (err, docs) ->
|
||||
|
@ -60,26 +66,52 @@ module.exports = DocArchive =
|
|||
unarchiveDoc: (project_id, doc_id, callback)->
|
||||
logger.log project_id: project_id, doc_id: doc_id, "getting doc from s3"
|
||||
try
|
||||
options = DocArchive.buildS3Options(true, project_id+"/"+doc_id)
|
||||
options = DocArchive.buildS3Options(project_id+"/"+doc_id)
|
||||
catch e
|
||||
return callback e
|
||||
request.get options, (err, res, lines)->
|
||||
options.json = true
|
||||
request.get options, (err, res, doc)->
|
||||
if err? || res.statusCode != 200
|
||||
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong unarchiving doc from aws"
|
||||
return callback new Errors.NotFoundError("Error in S3 request")
|
||||
if !(lines instanceof Array)
|
||||
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, lines:lines, "doc lines from aws are not in array format, likely not JSON parsable"
|
||||
return callback(new Error("Error unpacking doc"))
|
||||
MongoManager.upsertIntoDocCollection project_id, doc_id.toString(), {lines}, (err) ->
|
||||
return callback(err) if err?
|
||||
logger.log project_id: project_id, doc_id: doc_id, "deleting doc from s3"
|
||||
request.del options, (err, res, body)->
|
||||
if err? || res.statusCode != 204
|
||||
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong deleting doc from aws"
|
||||
return callback new Errors.NotFoundError("Error in S3 request")
|
||||
callback()
|
||||
DocArchive._s3DocToMongoDoc doc, (error, mongo_doc) ->
|
||||
return callback(error) if error?
|
||||
MongoManager.upsertIntoDocCollection project_id, doc_id.toString(), mongo_doc, (err) ->
|
||||
return callback(err) if err?
|
||||
logger.log project_id: project_id, doc_id: doc_id, "deleting doc from s3"
|
||||
request.del options, (err, res, body)->
|
||||
if err? || res.statusCode != 204
|
||||
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong deleting doc from aws"
|
||||
return callback new Errors.NotFoundError("Error in S3 request")
|
||||
callback()
|
||||
|
||||
buildS3Options: (content, key)->
|
||||
_s3DocToMongoDoc: (doc, callback = (error, mongo_doc) ->) ->
|
||||
mongo_doc = {}
|
||||
if doc.schema_v == 1 and doc.lines?
|
||||
mongo_doc.lines = doc.lines
|
||||
if doc.ranges?
|
||||
mongo_doc.ranges = RangeManager.jsonRangesToMongo(doc.ranges)
|
||||
else if doc instanceof Array
|
||||
mongo_doc.lines = doc
|
||||
else
|
||||
return callback(new Error("I don't understand the doc format in s3"))
|
||||
return callback null, mongo_doc
|
||||
|
||||
_mongoDocToS3Doc: (doc, callback = (error, s3_doc) ->) ->
|
||||
if !doc.lines?
|
||||
return callback(new Error("doc has no lines"))
|
||||
json = JSON.stringify({
|
||||
lines: doc.lines
|
||||
ranges: doc.ranges
|
||||
schema_v: 1
|
||||
})
|
||||
if json.indexOf("\u0000") != -1
|
||||
error = new Error("null bytes detected")
|
||||
logger.err {err: error, doc, json}, error.message
|
||||
return callback(error)
|
||||
return callback null, json
|
||||
|
||||
buildS3Options: (key)->
|
||||
if !settings.docstore.s3?
|
||||
throw new Error("S3 settings are not configured")
|
||||
return {
|
||||
|
@ -88,6 +120,5 @@ module.exports = DocArchive =
|
|||
secret: settings.docstore.s3.secret
|
||||
bucket: settings.docstore.s3.bucket
|
||||
timeout: thirtySeconds
|
||||
json: content
|
||||
uri:"https://#{settings.docstore.s3.bucket}.s3.amazonaws.com/#{key}"
|
||||
}
|
||||
|
|
|
@ -44,6 +44,7 @@ module.exports = MongoManager =
|
|||
$unset: {}
|
||||
update.$set["inS3"] = true
|
||||
update.$unset["lines"] = true
|
||||
update.$unset["ranges"] = true
|
||||
query =
|
||||
_id: doc_id
|
||||
rev: rev
|
||||
|
|
|
@ -16,16 +16,21 @@ module.exports = RangeManager =
|
|||
|
||||
jsonRangesToMongo: (ranges) ->
|
||||
return null if !ranges?
|
||||
|
||||
updateMetadata = (metadata) ->
|
||||
if metadata?.ts?
|
||||
metadata.ts = new Date(metadata.ts)
|
||||
if metadata?.user_id?
|
||||
metadata.user_id = RangeManager._safeObjectId(metadata.user_id)
|
||||
|
||||
for change in ranges.changes or []
|
||||
change.id = @_safeObjectId(change.id)
|
||||
if change.metadata?.ts?
|
||||
change.metadata.ts = new Date(change.metadata.ts)
|
||||
if change.metadata?.user_id?
|
||||
change.metadata.user_id = @_safeObjectId(change.metadata.user_id)
|
||||
change.id = RangeManager._safeObjectId(change.id)
|
||||
updateMetadata(change.metadata)
|
||||
for comment in ranges.comments or []
|
||||
comment.id = @_safeObjectId(comment.id)
|
||||
comment.id = RangeManager._safeObjectId(comment.id)
|
||||
if comment.op?.t?
|
||||
comment.op.t = @_safeObjectId(comment.op.t)
|
||||
comment.op.t = RangeManager._safeObjectId(comment.op.t)
|
||||
updateMetadata(comment.metadata)
|
||||
return ranges
|
||||
|
||||
_safeObjectId: (data) ->
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -48,5 +48,6 @@ module.exports = DocstoreClient =
|
|||
|
||||
|
||||
getS3Doc: (project_id, doc_id, callback = (error, res, body) ->) ->
|
||||
options = DocArchiveManager.buildS3Options(true, project_id+"/"+doc_id)
|
||||
request.get options, callback
|
||||
options = DocArchiveManager.buildS3Options(project_id+"/"+doc_id)
|
||||
options.json = true
|
||||
request.get options, callback
|
||||
|
|
|
@ -71,14 +71,17 @@ describe "DocArchiveManager", ->
|
|||
"settings-sharelatex": @settings
|
||||
"./MongoManager": @MongoManager
|
||||
"request": @request
|
||||
"./RangeManager": @RangeManager = {}
|
||||
"logger-sharelatex":
|
||||
log:->
|
||||
err:->
|
||||
@globals =
|
||||
JSON: JSON
|
||||
|
||||
@error = "my errror"
|
||||
@project_id = ObjectId().toString()
|
||||
@stubbedError = new Errors.NotFoundError("Error in S3 request")
|
||||
@DocArchiveManager = SandboxedModule.require modulePath, requires: @requires
|
||||
@DocArchiveManager = SandboxedModule.require modulePath, requires: @requires, globals: @globals
|
||||
|
||||
describe "archiveDoc", ->
|
||||
|
||||
|
@ -87,13 +90,22 @@ describe "DocArchiveManager", ->
|
|||
@DocArchiveManager.archiveDoc @project_id, @mongoDocs[0], (err)=>
|
||||
opts = @request.put.args[0][0]
|
||||
assert.deepEqual(opts.aws, {key:@settings.docstore.s3.key, secret:@settings.docstore.s3.secret, bucket:@settings.docstore.s3.bucket})
|
||||
opts.json.should.equal @mongoDocs[0].lines
|
||||
opts.body.should.equal JSON.stringify(
|
||||
lines: @mongoDocs[0].lines
|
||||
ranges: @mongoDocs[0].ranges
|
||||
schema_v: 1
|
||||
)
|
||||
opts.timeout.should.equal (30*1000)
|
||||
opts.uri.should.equal "https://#{@settings.docstore.s3.bucket}.s3.amazonaws.com/#{@project_id}/#{@mongoDocs[0]._id}"
|
||||
done()
|
||||
|
||||
it "should return no md5 error", (done)->
|
||||
@md5 = crypto.createHash("md5").update(JSON.stringify(@mongoDocs[0].lines)).digest("hex")
|
||||
data = JSON.stringify(
|
||||
lines: @mongoDocs[0].lines
|
||||
ranges: @mongoDocs[0].ranges
|
||||
schema_v: 1
|
||||
)
|
||||
@md5 = crypto.createHash("md5").update(data).digest("hex")
|
||||
@request.put = sinon.stub().callsArgWith(1, null, {statusCode:200,headers:{etag:@md5}})
|
||||
@DocArchiveManager.archiveDoc @project_id, @mongoDocs[0], (err)=>
|
||||
should.not.exist err
|
||||
|
@ -202,3 +214,82 @@ describe "DocArchiveManager", ->
|
|||
@DocArchiveManager.unArchiveAllDocs @project_id, (err)=>
|
||||
err.should.equal @error
|
||||
done()
|
||||
|
||||
describe "_s3DocToMongoDoc", ->
|
||||
describe "with the old schema", ->
|
||||
it "should return the docs lines", (done) ->
|
||||
@DocArchiveManager._s3DocToMongoDoc ["doc", "lines"], (error, doc) ->
|
||||
expect(doc).to.deep.equal {
|
||||
lines: ["doc", "lines"]
|
||||
}
|
||||
done()
|
||||
|
||||
describe "with the new schema", ->
|
||||
it "should return the doc lines and ranges", (done) ->
|
||||
@RangeManager.jsonRangesToMongo = sinon.stub().returns {"mongo": "ranges"}
|
||||
@DocArchiveManager._s3DocToMongoDoc {
|
||||
lines: ["doc", "lines"]
|
||||
ranges: {"json": "ranges"}
|
||||
schema_v: 1
|
||||
}, (error, doc) ->
|
||||
expect(doc).to.deep.equal {
|
||||
lines: ["doc", "lines"]
|
||||
ranges: {"mongo": "ranges"}
|
||||
}
|
||||
done()
|
||||
|
||||
it "should return just the doc lines when there are no ranges", (done) ->
|
||||
@DocArchiveManager._s3DocToMongoDoc {
|
||||
lines: ["doc", "lines"]
|
||||
schema_v: 1
|
||||
}, (error, doc) ->
|
||||
expect(doc).to.deep.equal {
|
||||
lines: ["doc", "lines"]
|
||||
}
|
||||
done()
|
||||
|
||||
describe "with an unrecognised schema", ->
|
||||
it "should return an error", (done) ->
|
||||
@DocArchiveManager._s3DocToMongoDoc {
|
||||
schema_v: 2
|
||||
}, (error, doc) ->
|
||||
expect(error).to.exist
|
||||
done()
|
||||
|
||||
describe "_mongoDocToS3Doc", ->
|
||||
describe "with a valid doc", ->
|
||||
it "should return the json version", (done) ->
|
||||
@DocArchiveManager._mongoDocToS3Doc doc = {
|
||||
lines: ["doc", "lines"]
|
||||
ranges: { "mock": "ranges" }
|
||||
}, (err, s3_doc) ->
|
||||
expect(s3_doc).to.equal JSON.stringify({
|
||||
lines: ["doc", "lines"]
|
||||
ranges: { "mock": "ranges" }
|
||||
schema_v: 1
|
||||
})
|
||||
done()
|
||||
|
||||
describe "with null bytes in the result", ->
|
||||
beforeEach ->
|
||||
@_stringify = JSON.stringify
|
||||
JSON.stringify = sinon.stub().returns '{"bad": "\u0000"}'
|
||||
|
||||
afterEach ->
|
||||
JSON.stringify = @_stringify
|
||||
|
||||
it "should return an error", (done) ->
|
||||
@DocArchiveManager._mongoDocToS3Doc {
|
||||
lines: ["doc", "lines"]
|
||||
ranges: { "mock": "ranges" }
|
||||
}, (err, s3_doc) ->
|
||||
expect(err).to.exist
|
||||
done()
|
||||
|
||||
describe "without doc lines", ->
|
||||
it "should return an error", (done) ->
|
||||
@DocArchiveManager._mongoDocToS3Doc {}, (err, s3_doc) ->
|
||||
expect(err).to.exist
|
||||
done()
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue