overleaf/services/docstore/app/coffee/DocArchiveManager.coffee

125 lines
4.7 KiB
CoffeeScript
Raw Normal View History

2015-06-02 14:55:22 -04:00
MongoManager = require "./MongoManager"
Errors = require "./Errors"
logger = require "logger-sharelatex"
_ = require "underscore"
async = require "async"
settings = require("settings-sharelatex")
request = require("request")
crypto = require("crypto")
2017-03-30 12:13:43 -04:00
RangeManager = require("./RangeManager")
2015-06-02 14:55:22 -04:00
thirtySeconds = 30 * 1000
module.exports = DocArchive =
archiveAllDocs: (project_id, callback = (err, docs) ->) ->
2017-03-30 12:13:43 -04:00
MongoManager.getProjectsDocs project_id, {include_deleted: true}, {lines: true, ranges: true, rev: true, inS3: true}, (err, docs) ->
if err?
return callback(err)
2015-06-02 14:55:22 -04:00
else if !docs?
return callback new Errors.NotFoundError("No docs for project #{project_id}")
docs = _.filter docs, (doc)-> doc.inS3 != true
2015-06-02 14:55:22 -04:00
jobs = _.map docs, (doc) ->
(cb)->
DocArchive.archiveDoc project_id, doc, cb
async.parallelLimit jobs, 5, callback
2015-06-02 14:55:22 -04:00
archiveDoc: (project_id, doc, callback)->
logger.log project_id: project_id, doc_id: doc._id, "sending doc to s3"
try
2017-03-30 12:13:43 -04:00
options = DocArchive.buildS3Options(project_id+"/"+doc._id)
catch e
return callback e
2017-03-30 12:13:43 -04:00
DocArchive._mongoDocToS3Doc doc, (error, json_doc) ->
return callback(error) if error?
options.body = json_doc
options.headers =
'Content-Type': "application/json"
request.put options, (err, res) ->
if err? || res.statusCode != 200
logger.err err:err, res:res, project_id:project_id, doc_id: doc._id, statusCode: res?.statusCode, "something went wrong archiving doc in aws"
return callback new Error("Error in S3 request")
md5lines = crypto.createHash("md5").update(json_doc, "utf8").digest("hex")
md5response = res.headers.etag.toString().replace(/\"/g, '')
if md5lines != md5response
logger.err responseMD5:md5response, linesMD5:md5lines, project_id:project_id, doc_id: doc?._id, "err in response md5 from s3"
return callback new Error("Error in S3 md5 response")
MongoManager.markDocAsArchived doc._id, doc.rev, (err) ->
return callback(err) if err?
callback()
2015-06-02 14:55:22 -04:00
unArchiveAllDocs: (project_id, callback = (err) ->) ->
MongoManager.getArchivedProjectDocs project_id, (err, docs) ->
if err?
logger.err err:err, project_id:project_id, "error unarchiving all docs"
return callback(err)
2015-06-02 14:55:22 -04:00
else if !docs?
return callback new Errors.NotFoundError("No docs for project #{project_id}")
jobs = _.map docs, (doc) ->
(cb)->
if !doc.inS3?
return cb()
else
DocArchive.unarchiveDoc project_id, doc._id, cb
async.parallelLimit jobs, 5, callback
2015-06-02 14:55:22 -04:00
unarchiveDoc: (project_id, doc_id, callback)->
logger.log project_id: project_id, doc_id: doc_id, "getting doc from s3"
try
2017-03-30 12:13:43 -04:00
options = DocArchive.buildS3Options(project_id+"/"+doc_id)
catch e
return callback e
2017-03-30 12:13:43 -04:00
options.json = true
request.get options, (err, res, doc)->
2015-06-02 14:55:22 -04:00
if err? || res.statusCode != 200
2015-08-13 08:59:12 -04:00
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong unarchiving doc from aws"
2015-06-02 19:08:50 -04:00
return callback new Errors.NotFoundError("Error in S3 request")
2017-03-30 12:13:43 -04:00
DocArchive._s3DocToMongoDoc doc, (error, mongo_doc) ->
return callback(error) if error?
MongoManager.upsertIntoDocCollection project_id, doc_id.toString(), mongo_doc, (err) ->
return callback(err) if err?
logger.log project_id: project_id, doc_id: doc_id, "deleting doc from s3"
request.del options, (err, res, body)->
if err? || res.statusCode != 204
logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong deleting doc from aws"
return callback new Errors.NotFoundError("Error in S3 request")
callback()
2017-03-30 12:13:43 -04:00
_s3DocToMongoDoc: (doc, callback = (error, mongo_doc) ->) ->
mongo_doc = {}
if doc.schema_v == 1 and doc.lines?
mongo_doc.lines = doc.lines
if doc.ranges?
mongo_doc.ranges = RangeManager.jsonRangesToMongo(doc.ranges)
else if doc instanceof Array
mongo_doc.lines = doc
else
return callback(new Error("I don't understand the doc format in s3"))
return callback null, mongo_doc
_mongoDocToS3Doc: (doc, callback = (error, s3_doc) ->) ->
2017-04-03 11:04:54 -04:00
if !doc.lines?
return callback(new Error("doc has no lines"))
2017-03-30 12:13:43 -04:00
json = JSON.stringify({
lines: doc.lines
ranges: doc.ranges
schema_v: 1
})
if json.indexOf("\u0000") != -1
error = new Error("null bytes detected")
2017-04-03 11:04:54 -04:00
logger.err {err: error, doc, json}, error.message
2017-03-30 12:13:43 -04:00
return callback(error)
return callback null, json
2015-06-02 14:55:22 -04:00
2017-03-30 12:13:43 -04:00
buildS3Options: (key)->
if !settings.docstore.s3?
throw new Error("S3 settings are not configured")
2015-06-02 18:24:45 -04:00
return {
aws:
key: settings.docstore.s3.key
secret: settings.docstore.s3.secret
bucket: settings.docstore.s3.bucket
2015-06-02 18:24:45 -04:00
timeout: thirtySeconds
uri:"https://#{settings.docstore.s3.bucket}.s3.amazonaws.com/#{key}"
2017-03-09 12:36:28 -05:00
}