MongoManager = require "./MongoManager" Errors = require "./Errors" logger = require "logger-sharelatex" _ = require "underscore" async = require "async" settings = require("settings-sharelatex") request = require("request") crypto = require("crypto") RangeManager = require("./RangeManager") thirtySeconds = 30 * 1000 module.exports = DocArchive = archiveAllDocs: (project_id, callback = (err, docs) ->) -> MongoManager.getProjectsDocs project_id, {include_deleted: true}, {lines: true, ranges: true, rev: true, inS3: true}, (err, docs) -> if err? return callback(err) else if !docs? return callback new Errors.NotFoundError("No docs for project #{project_id}") docs = _.filter docs, (doc)-> doc.inS3 != true jobs = _.map docs, (doc) -> (cb)-> DocArchive.archiveDoc project_id, doc, cb async.parallelLimit jobs, 5, callback archiveDoc: (project_id, doc, callback)-> logger.log project_id: project_id, doc_id: doc._id, "sending doc to s3" try options = DocArchive.buildS3Options(project_id+"/"+doc._id) catch e return callback e DocArchive._mongoDocToS3Doc doc, (error, json_doc) -> return callback(error) if error? options.body = json_doc options.headers = 'Content-Type': "application/json" request.put options, (err, res) -> if err? || res.statusCode != 200 logger.err err:err, res:res, project_id:project_id, doc_id: doc._id, statusCode: res?.statusCode, "something went wrong archiving doc in aws" return callback new Error("Error in S3 request") md5lines = crypto.createHash("md5").update(json_doc, "utf8").digest("hex") md5response = res.headers.etag.toString().replace(/\"/g, '') if md5lines != md5response logger.err responseMD5:md5response, linesMD5:md5lines, project_id:project_id, doc_id: doc?._id, "err in response md5 from s3" return callback new Error("Error in S3 md5 response") MongoManager.markDocAsArchived doc._id, doc.rev, (err) -> return callback(err) if err? callback() unArchiveAllDocs: (project_id, callback = (err) ->) -> MongoManager.getArchivedProjectDocs project_id, (err, docs) -> if err? logger.err err:err, project_id:project_id, "error unarchiving all docs" return callback(err) else if !docs? return callback new Errors.NotFoundError("No docs for project #{project_id}") jobs = _.map docs, (doc) -> (cb)-> if !doc.inS3? return cb() else DocArchive.unarchiveDoc project_id, doc._id, cb async.parallelLimit jobs, 5, callback unarchiveDoc: (project_id, doc_id, callback)-> logger.log project_id: project_id, doc_id: doc_id, "getting doc from s3" try options = DocArchive.buildS3Options(project_id+"/"+doc_id) catch e return callback e options.json = true request.get options, (err, res, doc)-> if err? || res.statusCode != 200 logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong unarchiving doc from aws" return callback new Errors.NotFoundError("Error in S3 request") DocArchive._s3DocToMongoDoc doc, (error, mongo_doc) -> return callback(error) if error? MongoManager.upsertIntoDocCollection project_id, doc_id.toString(), mongo_doc, (err) -> return callback(err) if err? logger.log project_id: project_id, doc_id: doc_id, "deleting doc from s3" DocArchive._deleteDocFromS3 project_id, doc_id, callback destroyAllDocs: (project_id, callback = (err) ->) -> MongoManager.getProjectsDocs project_id, {include_deleted: true}, {_id: 1}, (err, docs) -> if err? logger.err err:err, project_id:project_id, "error getting project's docs" return callback(err) else if !docs? return callback() jobs = _.map docs, (doc) -> (cb)-> DocArchive.destroyDoc(project_id, doc._id, cb) async.parallelLimit jobs, 5, callback destroyDoc: (project_id, doc_id, callback)-> logger.log project_id: project_id, doc_id: doc_id, "removing doc from mongo and s3" MongoManager.findDoc project_id, doc_id, {inS3: 1}, (error, doc) -> return callback error if error? return callback new Errors.NotFoundError("Doc not found in Mongo") unless doc? if doc.inS3 == true DocArchive._deleteDocFromS3 project_id, doc_id, (err) -> return err if err? MongoManager.destroyDoc doc_id, callback else MongoManager.destroyDoc doc_id, callback _deleteDocFromS3: (project_id, doc_id, callback) -> try options = DocArchive.buildS3Options(project_id+"/"+doc_id) catch e return callback e options.json = true request.del options, (err, res, body)-> if err? || res.statusCode != 204 logger.err err:err, res:res, project_id:project_id, doc_id:doc_id, "something went wrong deleting doc from aws" return callback new Error("Error in S3 request") callback() _s3DocToMongoDoc: (doc, callback = (error, mongo_doc) ->) -> mongo_doc = {} if doc.schema_v == 1 and doc.lines? mongo_doc.lines = doc.lines if doc.ranges? mongo_doc.ranges = RangeManager.jsonRangesToMongo(doc.ranges) else if doc instanceof Array mongo_doc.lines = doc else return callback(new Error("I don't understand the doc format in s3")) return callback null, mongo_doc _mongoDocToS3Doc: (doc, callback = (error, s3_doc) ->) -> if !doc.lines? return callback(new Error("doc has no lines")) json = JSON.stringify({ lines: doc.lines ranges: doc.ranges schema_v: 1 }) if json.indexOf("\u0000") != -1 error = new Error("null bytes detected") logger.err {err: error, doc, json}, error.message return callback(error) return callback null, json buildS3Options: (key)-> if !settings.docstore.s3? throw new Error("S3 settings are not configured") return { aws: key: settings.docstore.s3.key secret: settings.docstore.s3.secret bucket: settings.docstore.s3.bucket timeout: thirtySeconds uri:"https://#{settings.docstore.s3.bucket}.s3.amazonaws.com/#{key}" }