2020-02-17 12:34:21 -05:00
|
|
|
/* eslint-disable
|
|
|
|
camelcase,
|
|
|
|
no-return-assign,
|
|
|
|
no-unused-vars,
|
|
|
|
*/
|
|
|
|
// TODO: This file was created by bulk-decaffeinate.
|
|
|
|
// Fix any style issues and re-enable lint.
|
2020-02-17 12:34:04 -05:00
|
|
|
/*
|
|
|
|
* decaffeinate suggestions:
|
|
|
|
* DS101: Remove unnecessary use of Array.from
|
|
|
|
* DS102: Remove unnecessary code created because of implicit returns
|
|
|
|
* DS207: Consider shorter variations of null checks
|
|
|
|
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
|
|
|
*/
|
2020-02-17 12:34:28 -05:00
|
|
|
let MongoAWS
|
2021-07-12 12:47:16 -04:00
|
|
|
const settings = require('@overleaf/settings')
|
2021-12-14 08:00:35 -05:00
|
|
|
const logger = require('@overleaf/logger')
|
2020-02-17 12:34:28 -05:00
|
|
|
const AWS = require('aws-sdk')
|
|
|
|
const S3S = require('s3-streams')
|
2020-09-10 07:58:06 -04:00
|
|
|
const { db, ObjectId } = require('./mongodb')
|
2020-02-17 12:34:28 -05:00
|
|
|
const JSONStream = require('JSONStream')
|
|
|
|
const ReadlineStream = require('byline')
|
|
|
|
const zlib = require('zlib')
|
2020-11-25 06:57:20 -05:00
|
|
|
const Metrics = require('@overleaf/metrics')
|
2020-02-17 12:34:28 -05:00
|
|
|
|
|
|
|
const DAYS = 24 * 3600 * 1000 // one day in milliseconds
|
2020-02-17 12:34:04 -05:00
|
|
|
|
2020-06-04 04:24:21 -04:00
|
|
|
const createStream = function (streamConstructor, project_id, doc_id, pack_id) {
|
2020-02-17 12:34:28 -05:00
|
|
|
const AWS_CONFIG = {
|
|
|
|
accessKeyId: settings.trackchanges.s3.key,
|
|
|
|
secretAccessKey: settings.trackchanges.s3.secret,
|
|
|
|
endpoint: settings.trackchanges.s3.endpoint,
|
2021-07-13 07:04:43 -04:00
|
|
|
s3ForcePathStyle: settings.trackchanges.s3.pathStyle,
|
2020-02-17 12:34:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
return streamConstructor(new AWS.S3(AWS_CONFIG), {
|
|
|
|
Bucket: settings.trackchanges.stores.doc_history,
|
2021-07-13 07:04:43 -04:00
|
|
|
Key: project_id + '/changes-' + doc_id + '/pack-' + pack_id,
|
2020-02-17 12:34:28 -05:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = MongoAWS = {
|
|
|
|
archivePack(project_id, doc_id, pack_id, _callback) {
|
|
|
|
if (_callback == null) {
|
2021-10-27 05:49:18 -04:00
|
|
|
_callback = function () {}
|
2020-02-17 12:34:28 -05:00
|
|
|
}
|
2020-06-04 04:24:21 -04:00
|
|
|
const callback = function (...args) {
|
2020-02-17 12:34:28 -05:00
|
|
|
_callback(...Array.from(args || []))
|
2020-06-04 04:24:21 -04:00
|
|
|
return (_callback = function () {})
|
2020-02-17 12:34:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
const query = {
|
|
|
|
_id: ObjectId(pack_id),
|
2021-07-13 07:04:43 -04:00
|
|
|
doc_id: ObjectId(doc_id),
|
2020-02-17 12:34:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if (project_id == null) {
|
|
|
|
return callback(new Error('invalid project id'))
|
|
|
|
}
|
|
|
|
if (doc_id == null) {
|
|
|
|
return callback(new Error('invalid doc id'))
|
|
|
|
}
|
|
|
|
if (pack_id == null) {
|
|
|
|
return callback(new Error('invalid pack id'))
|
|
|
|
}
|
|
|
|
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug({ project_id, doc_id, pack_id }, 'uploading data to s3')
|
2020-02-17 12:34:28 -05:00
|
|
|
|
|
|
|
const upload = createStream(S3S.WriteStream, project_id, doc_id, pack_id)
|
|
|
|
|
2020-06-04 04:24:21 -04:00
|
|
|
return db.docHistory.findOne(query, function (err, result) {
|
2020-02-17 12:34:28 -05:00
|
|
|
if (err != null) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
if (result == null) {
|
|
|
|
return callback(new Error('cannot find pack to send to s3'))
|
|
|
|
}
|
|
|
|
if (result.expiresAt != null) {
|
|
|
|
return callback(new Error('refusing to send pack with TTL to s3'))
|
|
|
|
}
|
|
|
|
const uncompressedData = JSON.stringify(result)
|
|
|
|
if (uncompressedData.indexOf('\u0000') !== -1) {
|
|
|
|
const error = new Error('null bytes found in upload')
|
|
|
|
logger.error({ err: error, project_id, doc_id, pack_id }, error.message)
|
|
|
|
return callback(error)
|
|
|
|
}
|
2020-06-04 04:24:21 -04:00
|
|
|
return zlib.gzip(uncompressedData, function (err, buf) {
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug(
|
2020-02-17 12:34:28 -05:00
|
|
|
{
|
|
|
|
project_id,
|
|
|
|
doc_id,
|
|
|
|
pack_id,
|
|
|
|
origSize: uncompressedData.length,
|
2021-07-13 07:04:43 -04:00
|
|
|
newSize: buf.length,
|
2020-02-17 12:34:28 -05:00
|
|
|
},
|
|
|
|
'compressed pack'
|
|
|
|
)
|
|
|
|
if (err != null) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
2021-07-13 07:04:43 -04:00
|
|
|
upload.on('error', err => callback(err))
|
2020-06-04 04:24:21 -04:00
|
|
|
upload.on('finish', function () {
|
2020-02-17 12:34:28 -05:00
|
|
|
Metrics.inc('archive-pack')
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug(
|
|
|
|
{ project_id, doc_id, pack_id },
|
|
|
|
'upload to s3 completed'
|
|
|
|
)
|
2020-02-17 12:34:28 -05:00
|
|
|
return callback(null)
|
|
|
|
})
|
|
|
|
upload.write(buf)
|
|
|
|
return upload.end()
|
|
|
|
})
|
|
|
|
})
|
|
|
|
},
|
|
|
|
|
|
|
|
readArchivedPack(project_id, doc_id, pack_id, _callback) {
|
|
|
|
if (_callback == null) {
|
2021-10-27 05:49:18 -04:00
|
|
|
_callback = function () {}
|
2020-02-17 12:34:28 -05:00
|
|
|
}
|
2020-06-04 04:24:21 -04:00
|
|
|
const callback = function (...args) {
|
2020-02-17 12:34:28 -05:00
|
|
|
_callback(...Array.from(args || []))
|
2020-06-04 04:24:21 -04:00
|
|
|
return (_callback = function () {})
|
2020-02-17 12:34:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if (project_id == null) {
|
|
|
|
return callback(new Error('invalid project id'))
|
|
|
|
}
|
|
|
|
if (doc_id == null) {
|
|
|
|
return callback(new Error('invalid doc id'))
|
|
|
|
}
|
|
|
|
if (pack_id == null) {
|
|
|
|
return callback(new Error('invalid pack id'))
|
|
|
|
}
|
|
|
|
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug({ project_id, doc_id, pack_id }, 'downloading data from s3')
|
2020-02-17 12:34:28 -05:00
|
|
|
|
|
|
|
const download = createStream(S3S.ReadStream, project_id, doc_id, pack_id)
|
|
|
|
|
|
|
|
const inputStream = download
|
2021-07-13 07:04:43 -04:00
|
|
|
.on('open', obj => 1)
|
|
|
|
.on('error', err => callback(err))
|
2020-02-17 12:34:28 -05:00
|
|
|
|
|
|
|
const gunzip = zlib.createGunzip()
|
|
|
|
gunzip.setEncoding('utf8')
|
2020-06-04 04:24:21 -04:00
|
|
|
gunzip.on('error', function (err) {
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug(
|
2020-02-17 12:34:28 -05:00
|
|
|
{ project_id, doc_id, pack_id, err },
|
|
|
|
'error uncompressing gzip stream'
|
|
|
|
)
|
|
|
|
return callback(err)
|
|
|
|
})
|
|
|
|
|
|
|
|
const outputStream = inputStream.pipe(gunzip)
|
|
|
|
const parts = []
|
2021-07-13 07:04:43 -04:00
|
|
|
outputStream.on('error', err => callback(err))
|
2020-06-04 04:24:21 -04:00
|
|
|
outputStream.on('end', function () {
|
2020-02-17 12:34:28 -05:00
|
|
|
let object
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug(
|
|
|
|
{ project_id, doc_id, pack_id },
|
|
|
|
'download from s3 completed'
|
|
|
|
)
|
2020-02-17 12:34:28 -05:00
|
|
|
try {
|
|
|
|
object = JSON.parse(parts.join(''))
|
|
|
|
} catch (e) {
|
|
|
|
return callback(e)
|
|
|
|
}
|
|
|
|
object._id = ObjectId(object._id)
|
|
|
|
object.doc_id = ObjectId(object.doc_id)
|
|
|
|
object.project_id = ObjectId(object.project_id)
|
|
|
|
for (const op of Array.from(object.pack)) {
|
|
|
|
if (op._id != null) {
|
|
|
|
op._id = ObjectId(op._id)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return callback(null, object)
|
|
|
|
})
|
2021-07-13 07:04:43 -04:00
|
|
|
return outputStream.on('data', data => parts.push(data))
|
2020-02-17 12:34:28 -05:00
|
|
|
},
|
|
|
|
|
|
|
|
unArchivePack(project_id, doc_id, pack_id, callback) {
|
|
|
|
if (callback == null) {
|
2021-10-27 05:49:18 -04:00
|
|
|
callback = function () {}
|
2020-02-17 12:34:28 -05:00
|
|
|
}
|
2021-07-13 07:04:43 -04:00
|
|
|
return MongoAWS.readArchivedPack(
|
|
|
|
project_id,
|
|
|
|
doc_id,
|
|
|
|
pack_id,
|
|
|
|
function (err, object) {
|
|
|
|
if (err != null) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
Metrics.inc('unarchive-pack')
|
|
|
|
// allow the object to expire, we can always retrieve it again
|
|
|
|
object.expiresAt = new Date(Date.now() + 7 * DAYS)
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug(
|
|
|
|
{ project_id, doc_id, pack_id },
|
|
|
|
'inserting object from s3'
|
|
|
|
)
|
2021-07-13 07:04:43 -04:00
|
|
|
return db.docHistory.insertOne(object, (err, confirmation) => {
|
|
|
|
if (err) return callback(err)
|
|
|
|
object._id = confirmation.insertedId
|
|
|
|
callback(null, object)
|
|
|
|
})
|
2020-02-17 12:34:28 -05:00
|
|
|
}
|
2021-07-13 07:04:43 -04:00
|
|
|
)
|
|
|
|
},
|
2020-02-17 12:34:28 -05:00
|
|
|
}
|