2019-12-16 05:42:31 -05:00
|
|
|
const http = require('http')
|
|
|
|
const https = require('https')
|
2020-01-03 13:22:08 -05:00
|
|
|
http.globalAgent.maxSockets = 300
|
2019-12-16 05:42:31 -05:00
|
|
|
https.globalAgent.maxSockets = 300
|
2020-01-03 13:22:08 -05:00
|
|
|
|
2019-12-16 05:42:31 -05:00
|
|
|
const settings = require('settings-sharelatex')
|
2020-01-29 07:23:31 -05:00
|
|
|
|
2020-02-17 09:04:42 -05:00
|
|
|
const PersistorHelper = require('./PersistorHelper')
|
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
const fs = require('fs')
|
|
|
|
const S3 = require('aws-sdk/clients/s3')
|
|
|
|
const { URL } = require('url')
|
2020-03-25 12:59:51 -04:00
|
|
|
const Stream = require('stream')
|
|
|
|
const { promisify, callbackify } = require('util')
|
2020-01-07 05:24:46 -05:00
|
|
|
const {
|
|
|
|
WriteError,
|
|
|
|
ReadError,
|
|
|
|
NotFoundError,
|
|
|
|
SettingsError
|
|
|
|
} = require('./Errors')
|
2020-03-25 12:59:51 -04:00
|
|
|
const pipeline = promisify(Stream.pipeline)
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-02-17 09:04:42 -05:00
|
|
|
const S3Persistor = {
|
2020-01-03 13:22:08 -05:00
|
|
|
sendFile: callbackify(sendFile),
|
|
|
|
sendStream: callbackify(sendStream),
|
|
|
|
getFileStream: callbackify(getFileStream),
|
2020-02-17 09:04:42 -05:00
|
|
|
getFileMd5Hash: callbackify(getFileMd5Hash),
|
2020-01-03 13:22:08 -05:00
|
|
|
deleteDirectory: callbackify(deleteDirectory),
|
|
|
|
getFileSize: callbackify(getFileSize),
|
|
|
|
deleteFile: callbackify(deleteFile),
|
|
|
|
copyFile: callbackify(copyFile),
|
|
|
|
checkIfFileExists: callbackify(checkIfFileExists),
|
2020-01-06 10:35:40 -05:00
|
|
|
directorySize: callbackify(directorySize),
|
2020-01-03 13:22:08 -05:00
|
|
|
promises: {
|
|
|
|
sendFile,
|
|
|
|
sendStream,
|
|
|
|
getFileStream,
|
2020-02-17 09:04:42 -05:00
|
|
|
getFileMd5Hash,
|
2020-01-03 13:22:08 -05:00
|
|
|
deleteDirectory,
|
|
|
|
getFileSize,
|
|
|
|
deleteFile,
|
|
|
|
copyFile,
|
|
|
|
checkIfFileExists,
|
2020-01-06 10:35:40 -05:00
|
|
|
directorySize
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
|
|
|
}
|
2014-02-14 11:39:05 -05:00
|
|
|
|
2020-02-17 09:04:42 -05:00
|
|
|
module.exports = S3Persistor
|
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function sendFile(bucketName, key, fsPath) {
|
2020-03-04 11:38:05 -05:00
|
|
|
return sendStream(bucketName, key, fs.createReadStream(fsPath))
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2019-12-05 09:25:25 -05:00
|
|
|
|
2020-02-17 09:04:42 -05:00
|
|
|
async function sendStream(bucketName, key, readStream, sourceMd5) {
|
2020-01-03 13:22:08 -05:00
|
|
|
try {
|
2020-03-25 12:59:51 -04:00
|
|
|
// egress from us to S3
|
|
|
|
const observeOptions = { metric: 's3.egress' }
|
2020-02-17 09:04:42 -05:00
|
|
|
let b64Hash
|
|
|
|
|
|
|
|
if (sourceMd5) {
|
2020-02-13 11:55:01 -05:00
|
|
|
b64Hash = PersistorHelper.hexToBase64(sourceMd5)
|
2020-02-17 09:04:42 -05:00
|
|
|
} else {
|
2020-03-25 12:59:51 -04:00
|
|
|
// if there is no supplied md5 hash, we calculate the hash as the data passes through
|
|
|
|
observeOptions.hash = 'md5'
|
2020-02-17 09:04:42 -05:00
|
|
|
}
|
2020-01-27 06:26:37 -05:00
|
|
|
|
2020-03-25 12:59:51 -04:00
|
|
|
const observer = new PersistorHelper.ObserverStream(observeOptions)
|
|
|
|
pipeline(readStream, observer)
|
2020-02-17 09:04:42 -05:00
|
|
|
|
|
|
|
// if we have an md5 hash, pass this to S3 to verify the upload
|
|
|
|
const uploadOptions = {
|
|
|
|
Bucket: bucketName,
|
|
|
|
Key: key,
|
2020-03-25 12:59:51 -04:00
|
|
|
Body: observer
|
2020-02-17 09:04:42 -05:00
|
|
|
}
|
|
|
|
if (b64Hash) {
|
|
|
|
uploadOptions.ContentMD5 = b64Hash
|
|
|
|
}
|
|
|
|
|
|
|
|
const response = await _getClientForBucket(bucketName)
|
2020-02-17 08:50:21 -05:00
|
|
|
.upload(uploadOptions, { partSize: settings.filestore.s3.partSize })
|
2020-01-03 13:22:08 -05:00
|
|
|
.promise()
|
2020-02-17 07:32:50 -05:00
|
|
|
let destMd5 = _md5FromResponse(response)
|
|
|
|
if (!destMd5) {
|
|
|
|
// the eTag isn't in md5 format so we need to calculate it ourselves
|
|
|
|
const verifyStream = await getFileStream(
|
|
|
|
response.Bucket,
|
|
|
|
response.Key,
|
|
|
|
{}
|
|
|
|
)
|
|
|
|
destMd5 = await PersistorHelper.calculateStreamMd5(verifyStream)
|
|
|
|
}
|
2020-02-17 09:04:42 -05:00
|
|
|
|
|
|
|
// if we didn't have an md5 hash, we should compare our computed one with S3's
|
|
|
|
// as we couldn't tell S3 about it beforehand
|
2020-03-25 12:59:51 -04:00
|
|
|
if (!sourceMd5) {
|
|
|
|
sourceMd5 = observer.getHash()
|
2020-02-17 09:04:42 -05:00
|
|
|
// throws on mismatch
|
|
|
|
await PersistorHelper.verifyMd5(
|
|
|
|
S3Persistor,
|
|
|
|
bucketName,
|
|
|
|
key,
|
|
|
|
sourceMd5,
|
|
|
|
destMd5
|
|
|
|
)
|
|
|
|
}
|
2020-01-03 13:22:08 -05:00
|
|
|
} catch (err) {
|
2020-02-17 09:04:42 -05:00
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 13:22:08 -05:00
|
|
|
err,
|
|
|
|
'upload to S3 failed',
|
|
|
|
{ bucketName, key },
|
|
|
|
WriteError
|
|
|
|
)
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function getFileStream(bucketName, key, opts) {
|
|
|
|
opts = opts || {}
|
|
|
|
|
|
|
|
const params = {
|
|
|
|
Bucket: bucketName,
|
|
|
|
Key: key
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2020-01-03 13:22:08 -05:00
|
|
|
if (opts.start != null && opts.end != null) {
|
|
|
|
params.Range = `bytes=${opts.start}-${opts.end}`
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-02-17 09:04:42 -05:00
|
|
|
const stream = _getClientForBucket(bucketName)
|
|
|
|
.getObject(params)
|
|
|
|
.createReadStream()
|
2020-01-29 07:23:31 -05:00
|
|
|
|
2020-03-25 12:59:51 -04:00
|
|
|
// ingress from S3 to us
|
|
|
|
const observer = new PersistorHelper.ObserverStream({ metric: 's3.ingress' })
|
2020-02-17 09:04:42 -05:00
|
|
|
|
|
|
|
try {
|
2020-03-26 12:24:08 -04:00
|
|
|
// wait for the pipeline to be ready, to catch non-200s
|
|
|
|
await PersistorHelper.getReadyPipeline(stream, observer)
|
2020-03-25 12:59:51 -04:00
|
|
|
return observer
|
2020-02-17 09:04:42 -05:00
|
|
|
} catch (err) {
|
|
|
|
throw PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'error reading file from S3',
|
|
|
|
{ bucketName, key, opts },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
2020-01-03 13:22:08 -05:00
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function deleteDirectory(bucketName, key) {
|
|
|
|
let response
|
|
|
|
|
|
|
|
try {
|
2020-01-08 04:17:30 -05:00
|
|
|
response = await _getClientForBucket(bucketName)
|
2020-01-03 13:22:08 -05:00
|
|
|
.listObjects({ Bucket: bucketName, Prefix: key })
|
|
|
|
.promise()
|
|
|
|
} catch (err) {
|
2020-02-17 09:04:42 -05:00
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 13:22:08 -05:00
|
|
|
err,
|
|
|
|
'failed to list objects in S3',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
const objects = response.Contents.map(item => ({ Key: item.Key }))
|
|
|
|
if (objects.length) {
|
|
|
|
try {
|
2020-01-08 04:17:30 -05:00
|
|
|
await _getClientForBucket(bucketName)
|
2020-01-03 13:22:08 -05:00
|
|
|
.deleteObjects({
|
|
|
|
Bucket: bucketName,
|
|
|
|
Delete: {
|
|
|
|
Objects: objects,
|
|
|
|
Quiet: true
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.promise()
|
|
|
|
} catch (err) {
|
2020-02-17 09:04:42 -05:00
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 13:22:08 -05:00
|
|
|
err,
|
|
|
|
'failed to delete objects in S3',
|
|
|
|
{ bucketName, key },
|
|
|
|
WriteError
|
|
|
|
)
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2020-01-03 13:22:08 -05:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function getFileSize(bucketName, key) {
|
|
|
|
try {
|
2020-01-08 04:17:30 -05:00
|
|
|
const response = await _getClientForBucket(bucketName)
|
2020-01-03 13:22:08 -05:00
|
|
|
.headObject({ Bucket: bucketName, Key: key })
|
|
|
|
.promise()
|
|
|
|
return response.ContentLength
|
|
|
|
} catch (err) {
|
2020-02-17 09:04:42 -05:00
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 13:22:08 -05:00
|
|
|
err,
|
|
|
|
'error getting size of s3 object',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-02-17 09:04:42 -05:00
|
|
|
async function getFileMd5Hash(bucketName, key) {
|
|
|
|
try {
|
|
|
|
const response = await _getClientForBucket(bucketName)
|
|
|
|
.headObject({ Bucket: bucketName, Key: key })
|
|
|
|
.promise()
|
2020-02-17 07:32:50 -05:00
|
|
|
return _md5FromResponse(response)
|
2020-02-17 09:04:42 -05:00
|
|
|
} catch (err) {
|
|
|
|
throw PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'error getting hash of s3 object',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function deleteFile(bucketName, key) {
|
|
|
|
try {
|
2020-01-08 04:17:30 -05:00
|
|
|
await _getClientForBucket(bucketName)
|
2020-01-07 05:24:46 -05:00
|
|
|
.deleteObject({ Bucket: bucketName, Key: key })
|
|
|
|
.promise()
|
2020-01-03 13:22:08 -05:00
|
|
|
} catch (err) {
|
2020-02-17 09:04:42 -05:00
|
|
|
// s3 does not give us a NotFoundError here
|
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 13:22:08 -05:00
|
|
|
err,
|
|
|
|
'failed to delete file in S3',
|
|
|
|
{ bucketName, key },
|
|
|
|
WriteError
|
2019-12-16 05:42:31 -05:00
|
|
|
)
|
2020-01-03 13:22:08 -05:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function copyFile(bucketName, sourceKey, destKey) {
|
|
|
|
const params = {
|
|
|
|
Bucket: bucketName,
|
|
|
|
Key: destKey,
|
|
|
|
CopySource: `${bucketName}/${sourceKey}`
|
|
|
|
}
|
|
|
|
try {
|
2020-01-08 04:17:30 -05:00
|
|
|
await _getClientForBucket(bucketName)
|
2020-01-07 05:24:46 -05:00
|
|
|
.copyObject(params)
|
|
|
|
.promise()
|
2020-01-03 13:22:08 -05:00
|
|
|
} catch (err) {
|
2020-02-17 09:04:42 -05:00
|
|
|
throw PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'failed to copy file in S3',
|
|
|
|
params,
|
|
|
|
WriteError
|
|
|
|
)
|
2020-01-03 13:22:08 -05:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function checkIfFileExists(bucketName, key) {
|
|
|
|
try {
|
|
|
|
await getFileSize(bucketName, key)
|
|
|
|
return true
|
|
|
|
} catch (err) {
|
|
|
|
if (err instanceof NotFoundError) {
|
|
|
|
return false
|
|
|
|
}
|
2020-02-17 09:04:42 -05:00
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 13:22:08 -05:00
|
|
|
err,
|
|
|
|
'error checking whether S3 object exists',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-06 10:35:40 -05:00
|
|
|
async function directorySize(bucketName, key) {
|
2020-01-03 13:22:08 -05:00
|
|
|
try {
|
2020-01-08 04:17:30 -05:00
|
|
|
const response = await _getClientForBucket(bucketName)
|
2020-01-03 13:22:08 -05:00
|
|
|
.listObjects({ Bucket: bucketName, Prefix: key })
|
|
|
|
.promise()
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
return response.Contents.reduce((acc, item) => item.Size + acc, 0)
|
|
|
|
} catch (err) {
|
2020-02-17 09:04:42 -05:00
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 13:22:08 -05:00
|
|
|
err,
|
|
|
|
'error getting directory size in S3',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
2019-12-16 05:42:31 -05:00
|
|
|
)
|
2020-01-03 13:22:08 -05:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-08 04:17:30 -05:00
|
|
|
const _clients = new Map()
|
|
|
|
let _defaultClient
|
2020-01-07 05:24:46 -05:00
|
|
|
|
2020-01-08 04:17:30 -05:00
|
|
|
function _getClientForBucket(bucket) {
|
2020-01-07 05:24:46 -05:00
|
|
|
if (_clients[bucket]) {
|
|
|
|
return _clients[bucket]
|
|
|
|
}
|
|
|
|
|
|
|
|
if (
|
2020-01-08 06:20:44 -05:00
|
|
|
settings.filestore.s3BucketCreds &&
|
|
|
|
settings.filestore.s3BucketCreds[bucket]
|
2020-01-07 05:24:46 -05:00
|
|
|
) {
|
|
|
|
_clients[bucket] = new S3(
|
2020-01-08 06:20:44 -05:00
|
|
|
_buildClientOptions(settings.filestore.s3BucketCreds[bucket])
|
2020-01-07 05:24:46 -05:00
|
|
|
)
|
|
|
|
return _clients[bucket]
|
|
|
|
}
|
|
|
|
|
|
|
|
// no specific credentials for the bucket
|
2020-01-08 04:17:30 -05:00
|
|
|
if (_defaultClient) {
|
|
|
|
return _defaultClient
|
2020-01-07 05:24:46 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if (settings.filestore.s3.key) {
|
2020-01-08 04:17:30 -05:00
|
|
|
_defaultClient = new S3(_buildClientOptions())
|
|
|
|
return _defaultClient
|
2020-01-07 05:24:46 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
throw new SettingsError({
|
|
|
|
message: 'no bucket-specific or default credentials provided',
|
|
|
|
info: { bucket }
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-01-08 04:17:30 -05:00
|
|
|
function _buildClientOptions(bucketCredentials) {
|
2020-01-07 05:24:46 -05:00
|
|
|
const options = {}
|
|
|
|
|
|
|
|
if (bucketCredentials) {
|
|
|
|
options.credentials = {
|
|
|
|
accessKeyId: bucketCredentials.auth_key,
|
|
|
|
secretAccessKey: bucketCredentials.auth_secret
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
options.credentials = {
|
2020-01-03 13:22:08 -05:00
|
|
|
accessKeyId: settings.filestore.s3.key,
|
|
|
|
secretAccessKey: settings.filestore.s3.secret
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2020-01-03 13:22:08 -05:00
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
if (settings.filestore.s3.endpoint) {
|
|
|
|
const endpoint = new URL(settings.filestore.s3.endpoint)
|
|
|
|
options.endpoint = settings.filestore.s3.endpoint
|
|
|
|
options.sslEnabled = endpoint.protocol === 'https'
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2020-01-03 13:22:08 -05:00
|
|
|
|
2020-01-07 10:05:51 -05:00
|
|
|
// path-style access is only used for acceptance tests
|
|
|
|
if (settings.filestore.s3.pathStyle) {
|
|
|
|
options.s3ForcePathStyle = true
|
|
|
|
}
|
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
return options
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2020-02-17 09:04:42 -05:00
|
|
|
|
2020-02-17 07:32:50 -05:00
|
|
|
function _md5FromResponse(response) {
|
|
|
|
const md5 = (response.ETag || '').replace(/[ "]/g, '')
|
2020-02-17 09:04:42 -05:00
|
|
|
if (!md5.match(/^[a-f0-9]{32}$/)) {
|
2020-02-17 07:32:50 -05:00
|
|
|
return null
|
2020-02-17 09:04:42 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
return md5
|
|
|
|
}
|