2019-12-16 10:42:31 +00:00
|
|
|
const http = require('http')
|
|
|
|
const https = require('https')
|
2020-01-03 18:22:08 +00:00
|
|
|
http.globalAgent.maxSockets = 300
|
2019-12-16 10:42:31 +00:00
|
|
|
https.globalAgent.maxSockets = 300
|
2020-01-03 18:22:08 +00:00
|
|
|
|
2019-12-16 10:42:31 +00:00
|
|
|
const settings = require('settings-sharelatex')
|
2020-01-29 12:23:31 +00:00
|
|
|
|
2020-02-17 14:04:42 +00:00
|
|
|
const PersistorHelper = require('./PersistorHelper')
|
|
|
|
|
2020-01-03 18:22:08 +00:00
|
|
|
const fs = require('fs')
|
|
|
|
const S3 = require('aws-sdk/clients/s3')
|
|
|
|
const { URL } = require('url')
|
2020-02-12 10:32:26 +00:00
|
|
|
const { callbackify } = require('util')
|
2020-01-07 10:24:46 +00:00
|
|
|
const {
|
|
|
|
WriteError,
|
|
|
|
ReadError,
|
|
|
|
NotFoundError,
|
|
|
|
SettingsError
|
|
|
|
} = require('./Errors')
|
2019-12-16 10:24:35 +00:00
|
|
|
|
2020-02-17 14:04:42 +00:00
|
|
|
const S3Persistor = {
|
2020-01-03 18:22:08 +00:00
|
|
|
sendFile: callbackify(sendFile),
|
|
|
|
sendStream: callbackify(sendStream),
|
|
|
|
getFileStream: callbackify(getFileStream),
|
2020-02-17 14:04:42 +00:00
|
|
|
getFileMd5Hash: callbackify(getFileMd5Hash),
|
2020-01-03 18:22:08 +00:00
|
|
|
deleteDirectory: callbackify(deleteDirectory),
|
|
|
|
getFileSize: callbackify(getFileSize),
|
|
|
|
deleteFile: callbackify(deleteFile),
|
|
|
|
copyFile: callbackify(copyFile),
|
|
|
|
checkIfFileExists: callbackify(checkIfFileExists),
|
2020-01-06 15:35:40 +00:00
|
|
|
directorySize: callbackify(directorySize),
|
2020-01-03 18:22:08 +00:00
|
|
|
promises: {
|
|
|
|
sendFile,
|
|
|
|
sendStream,
|
|
|
|
getFileStream,
|
2020-02-17 14:04:42 +00:00
|
|
|
getFileMd5Hash,
|
2020-01-03 18:22:08 +00:00
|
|
|
deleteDirectory,
|
|
|
|
getFileSize,
|
|
|
|
deleteFile,
|
|
|
|
copyFile,
|
|
|
|
checkIfFileExists,
|
2020-01-06 15:35:40 +00:00
|
|
|
directorySize
|
2019-12-16 10:42:31 +00:00
|
|
|
}
|
|
|
|
}
|
2014-02-14 16:39:05 +00:00
|
|
|
|
2020-02-17 14:04:42 +00:00
|
|
|
module.exports = S3Persistor
|
|
|
|
|
2020-01-03 18:22:08 +00:00
|
|
|
async function sendFile(bucketName, key, fsPath) {
|
2020-03-04 16:38:05 +00:00
|
|
|
return sendStream(bucketName, key, fs.createReadStream(fsPath))
|
2019-12-16 10:42:31 +00:00
|
|
|
}
|
2019-12-05 14:25:25 +00:00
|
|
|
|
2020-02-17 14:04:42 +00:00
|
|
|
async function sendStream(bucketName, key, readStream, sourceMd5) {
|
2020-01-03 18:22:08 +00:00
|
|
|
try {
|
2020-02-17 14:04:42 +00:00
|
|
|
// if there is no supplied md5 hash, we calculate the hash as the data passes through
|
|
|
|
let hashPromise
|
|
|
|
let b64Hash
|
|
|
|
|
|
|
|
if (sourceMd5) {
|
2020-02-13 16:55:01 +00:00
|
|
|
b64Hash = PersistorHelper.hexToBase64(sourceMd5)
|
2020-02-17 14:04:42 +00:00
|
|
|
} else {
|
|
|
|
hashPromise = PersistorHelper.calculateStreamMd5(readStream)
|
|
|
|
}
|
2020-01-27 11:26:37 +00:00
|
|
|
|
2020-02-17 14:04:42 +00:00
|
|
|
const meteredStream = PersistorHelper.getMeteredStream(
|
|
|
|
readStream,
|
2020-03-04 16:25:11 +00:00
|
|
|
's3.egress' // egress from us to s3
|
2020-02-17 14:04:42 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// if we have an md5 hash, pass this to S3 to verify the upload
|
|
|
|
const uploadOptions = {
|
|
|
|
Bucket: bucketName,
|
|
|
|
Key: key,
|
|
|
|
Body: meteredStream
|
|
|
|
}
|
|
|
|
if (b64Hash) {
|
|
|
|
uploadOptions.ContentMD5 = b64Hash
|
|
|
|
}
|
|
|
|
|
|
|
|
const response = await _getClientForBucket(bucketName)
|
2020-02-17 13:50:21 +00:00
|
|
|
.upload(uploadOptions, { partSize: settings.filestore.s3.partSize })
|
2020-01-03 18:22:08 +00:00
|
|
|
.promise()
|
2020-02-17 12:32:50 +00:00
|
|
|
let destMd5 = _md5FromResponse(response)
|
|
|
|
if (!destMd5) {
|
|
|
|
// the eTag isn't in md5 format so we need to calculate it ourselves
|
|
|
|
const verifyStream = await getFileStream(
|
|
|
|
response.Bucket,
|
|
|
|
response.Key,
|
|
|
|
{}
|
|
|
|
)
|
|
|
|
destMd5 = await PersistorHelper.calculateStreamMd5(verifyStream)
|
|
|
|
}
|
2020-02-17 14:04:42 +00:00
|
|
|
|
|
|
|
// if we didn't have an md5 hash, we should compare our computed one with S3's
|
|
|
|
// as we couldn't tell S3 about it beforehand
|
|
|
|
if (hashPromise) {
|
|
|
|
sourceMd5 = await hashPromise
|
|
|
|
// throws on mismatch
|
|
|
|
await PersistorHelper.verifyMd5(
|
|
|
|
S3Persistor,
|
|
|
|
bucketName,
|
|
|
|
key,
|
|
|
|
sourceMd5,
|
|
|
|
destMd5
|
|
|
|
)
|
|
|
|
}
|
2020-01-03 18:22:08 +00:00
|
|
|
} catch (err) {
|
2020-02-17 14:04:42 +00:00
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 18:22:08 +00:00
|
|
|
err,
|
|
|
|
'upload to S3 failed',
|
|
|
|
{ bucketName, key },
|
|
|
|
WriteError
|
|
|
|
)
|
2019-12-16 10:42:31 +00:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 10:24:35 +00:00
|
|
|
|
2020-01-03 18:22:08 +00:00
|
|
|
async function getFileStream(bucketName, key, opts) {
|
|
|
|
opts = opts || {}
|
|
|
|
|
|
|
|
const params = {
|
|
|
|
Bucket: bucketName,
|
|
|
|
Key: key
|
2019-12-16 10:42:31 +00:00
|
|
|
}
|
2020-01-03 18:22:08 +00:00
|
|
|
if (opts.start != null && opts.end != null) {
|
|
|
|
params.Range = `bytes=${opts.start}-${opts.end}`
|
2019-12-16 10:42:31 +00:00
|
|
|
}
|
2019-12-16 10:24:35 +00:00
|
|
|
|
2020-02-17 14:04:42 +00:00
|
|
|
const stream = _getClientForBucket(bucketName)
|
|
|
|
.getObject(params)
|
|
|
|
.createReadStream()
|
2020-01-29 12:23:31 +00:00
|
|
|
|
2020-03-04 16:25:11 +00:00
|
|
|
const meteredStream = PersistorHelper.getMeteredStream(
|
|
|
|
stream,
|
|
|
|
's3.ingress' // ingress to us from s3
|
|
|
|
)
|
2020-02-17 14:04:42 +00:00
|
|
|
|
|
|
|
try {
|
|
|
|
await PersistorHelper.waitForStreamReady(stream)
|
|
|
|
return meteredStream
|
|
|
|
} catch (err) {
|
|
|
|
throw PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'error reading file from S3',
|
|
|
|
{ bucketName, key, opts },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
2020-01-03 18:22:08 +00:00
|
|
|
}
|
2019-12-16 10:24:35 +00:00
|
|
|
|
2020-01-03 18:22:08 +00:00
|
|
|
async function deleteDirectory(bucketName, key) {
|
|
|
|
let response
|
|
|
|
|
|
|
|
try {
|
2020-01-08 09:17:30 +00:00
|
|
|
response = await _getClientForBucket(bucketName)
|
2020-01-03 18:22:08 +00:00
|
|
|
.listObjects({ Bucket: bucketName, Prefix: key })
|
|
|
|
.promise()
|
|
|
|
} catch (err) {
|
2020-02-17 14:04:42 +00:00
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 18:22:08 +00:00
|
|
|
err,
|
|
|
|
'failed to list objects in S3',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
const objects = response.Contents.map(item => ({ Key: item.Key }))
|
|
|
|
if (objects.length) {
|
|
|
|
try {
|
2020-01-08 09:17:30 +00:00
|
|
|
await _getClientForBucket(bucketName)
|
2020-01-03 18:22:08 +00:00
|
|
|
.deleteObjects({
|
|
|
|
Bucket: bucketName,
|
|
|
|
Delete: {
|
|
|
|
Objects: objects,
|
|
|
|
Quiet: true
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.promise()
|
|
|
|
} catch (err) {
|
2020-02-17 14:04:42 +00:00
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 18:22:08 +00:00
|
|
|
err,
|
|
|
|
'failed to delete objects in S3',
|
|
|
|
{ bucketName, key },
|
|
|
|
WriteError
|
|
|
|
)
|
2019-12-16 10:42:31 +00:00
|
|
|
}
|
2020-01-03 18:22:08 +00:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 10:24:35 +00:00
|
|
|
|
2020-01-03 18:22:08 +00:00
|
|
|
async function getFileSize(bucketName, key) {
|
|
|
|
try {
|
2020-01-08 09:17:30 +00:00
|
|
|
const response = await _getClientForBucket(bucketName)
|
2020-01-03 18:22:08 +00:00
|
|
|
.headObject({ Bucket: bucketName, Key: key })
|
|
|
|
.promise()
|
|
|
|
return response.ContentLength
|
|
|
|
} catch (err) {
|
2020-02-17 14:04:42 +00:00
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 18:22:08 +00:00
|
|
|
err,
|
|
|
|
'error getting size of s3 object',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2019-12-16 10:24:35 +00:00
|
|
|
|
2020-02-17 14:04:42 +00:00
|
|
|
async function getFileMd5Hash(bucketName, key) {
|
|
|
|
try {
|
|
|
|
const response = await _getClientForBucket(bucketName)
|
|
|
|
.headObject({ Bucket: bucketName, Key: key })
|
|
|
|
.promise()
|
2020-02-17 12:32:50 +00:00
|
|
|
return _md5FromResponse(response)
|
2020-02-17 14:04:42 +00:00
|
|
|
} catch (err) {
|
|
|
|
throw PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'error getting hash of s3 object',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-03 18:22:08 +00:00
|
|
|
async function deleteFile(bucketName, key) {
|
|
|
|
try {
|
2020-01-08 09:17:30 +00:00
|
|
|
await _getClientForBucket(bucketName)
|
2020-01-07 10:24:46 +00:00
|
|
|
.deleteObject({ Bucket: bucketName, Key: key })
|
|
|
|
.promise()
|
2020-01-03 18:22:08 +00:00
|
|
|
} catch (err) {
|
2020-02-17 14:04:42 +00:00
|
|
|
// s3 does not give us a NotFoundError here
|
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 18:22:08 +00:00
|
|
|
err,
|
|
|
|
'failed to delete file in S3',
|
|
|
|
{ bucketName, key },
|
|
|
|
WriteError
|
2019-12-16 10:42:31 +00:00
|
|
|
)
|
2020-01-03 18:22:08 +00:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 10:24:35 +00:00
|
|
|
|
2020-01-03 18:22:08 +00:00
|
|
|
async function copyFile(bucketName, sourceKey, destKey) {
|
|
|
|
const params = {
|
|
|
|
Bucket: bucketName,
|
|
|
|
Key: destKey,
|
|
|
|
CopySource: `${bucketName}/${sourceKey}`
|
|
|
|
}
|
|
|
|
try {
|
2020-01-08 09:17:30 +00:00
|
|
|
await _getClientForBucket(bucketName)
|
2020-01-07 10:24:46 +00:00
|
|
|
.copyObject(params)
|
|
|
|
.promise()
|
2020-01-03 18:22:08 +00:00
|
|
|
} catch (err) {
|
2020-02-17 14:04:42 +00:00
|
|
|
throw PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'failed to copy file in S3',
|
|
|
|
params,
|
|
|
|
WriteError
|
|
|
|
)
|
2020-01-03 18:22:08 +00:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 10:24:35 +00:00
|
|
|
|
2020-01-03 18:22:08 +00:00
|
|
|
async function checkIfFileExists(bucketName, key) {
|
|
|
|
try {
|
|
|
|
await getFileSize(bucketName, key)
|
|
|
|
return true
|
|
|
|
} catch (err) {
|
|
|
|
if (err instanceof NotFoundError) {
|
|
|
|
return false
|
|
|
|
}
|
2020-02-17 14:04:42 +00:00
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 18:22:08 +00:00
|
|
|
err,
|
|
|
|
'error checking whether S3 object exists',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2019-12-16 10:24:35 +00:00
|
|
|
|
2020-01-06 15:35:40 +00:00
|
|
|
async function directorySize(bucketName, key) {
|
2020-01-03 18:22:08 +00:00
|
|
|
try {
|
2020-01-08 09:17:30 +00:00
|
|
|
const response = await _getClientForBucket(bucketName)
|
2020-01-03 18:22:08 +00:00
|
|
|
.listObjects({ Bucket: bucketName, Prefix: key })
|
|
|
|
.promise()
|
2019-12-16 10:24:35 +00:00
|
|
|
|
2020-01-03 18:22:08 +00:00
|
|
|
return response.Contents.reduce((acc, item) => item.Size + acc, 0)
|
|
|
|
} catch (err) {
|
2020-02-17 14:04:42 +00:00
|
|
|
throw PersistorHelper.wrapError(
|
2020-01-03 18:22:08 +00:00
|
|
|
err,
|
|
|
|
'error getting directory size in S3',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
2019-12-16 10:42:31 +00:00
|
|
|
)
|
2020-01-03 18:22:08 +00:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 10:24:35 +00:00
|
|
|
|
2020-01-08 09:17:30 +00:00
|
|
|
const _clients = new Map()
|
|
|
|
let _defaultClient
|
2020-01-07 10:24:46 +00:00
|
|
|
|
2020-01-08 09:17:30 +00:00
|
|
|
function _getClientForBucket(bucket) {
|
2020-01-07 10:24:46 +00:00
|
|
|
if (_clients[bucket]) {
|
|
|
|
return _clients[bucket]
|
|
|
|
}
|
|
|
|
|
|
|
|
if (
|
2020-01-08 11:20:44 +00:00
|
|
|
settings.filestore.s3BucketCreds &&
|
|
|
|
settings.filestore.s3BucketCreds[bucket]
|
2020-01-07 10:24:46 +00:00
|
|
|
) {
|
|
|
|
_clients[bucket] = new S3(
|
2020-01-08 11:20:44 +00:00
|
|
|
_buildClientOptions(settings.filestore.s3BucketCreds[bucket])
|
2020-01-07 10:24:46 +00:00
|
|
|
)
|
|
|
|
return _clients[bucket]
|
|
|
|
}
|
|
|
|
|
|
|
|
// no specific credentials for the bucket
|
2020-01-08 09:17:30 +00:00
|
|
|
if (_defaultClient) {
|
|
|
|
return _defaultClient
|
2020-01-07 10:24:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (settings.filestore.s3.key) {
|
2020-01-08 09:17:30 +00:00
|
|
|
_defaultClient = new S3(_buildClientOptions())
|
|
|
|
return _defaultClient
|
2020-01-07 10:24:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
throw new SettingsError({
|
|
|
|
message: 'no bucket-specific or default credentials provided',
|
|
|
|
info: { bucket }
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-01-08 09:17:30 +00:00
|
|
|
function _buildClientOptions(bucketCredentials) {
|
2020-01-07 10:24:46 +00:00
|
|
|
const options = {}
|
|
|
|
|
|
|
|
if (bucketCredentials) {
|
|
|
|
options.credentials = {
|
|
|
|
accessKeyId: bucketCredentials.auth_key,
|
|
|
|
secretAccessKey: bucketCredentials.auth_secret
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
options.credentials = {
|
2020-01-03 18:22:08 +00:00
|
|
|
accessKeyId: settings.filestore.s3.key,
|
|
|
|
secretAccessKey: settings.filestore.s3.secret
|
2019-12-16 10:42:31 +00:00
|
|
|
}
|
2020-01-03 18:22:08 +00:00
|
|
|
}
|
2019-12-16 10:24:35 +00:00
|
|
|
|
2020-01-03 18:22:08 +00:00
|
|
|
if (settings.filestore.s3.endpoint) {
|
|
|
|
const endpoint = new URL(settings.filestore.s3.endpoint)
|
|
|
|
options.endpoint = settings.filestore.s3.endpoint
|
|
|
|
options.sslEnabled = endpoint.protocol === 'https'
|
2019-12-16 10:42:31 +00:00
|
|
|
}
|
2020-01-03 18:22:08 +00:00
|
|
|
|
2020-01-07 15:05:51 +00:00
|
|
|
// path-style access is only used for acceptance tests
|
|
|
|
if (settings.filestore.s3.pathStyle) {
|
|
|
|
options.s3ForcePathStyle = true
|
|
|
|
}
|
|
|
|
|
2020-01-03 18:22:08 +00:00
|
|
|
return options
|
2019-12-16 10:42:31 +00:00
|
|
|
}
|
2020-02-17 14:04:42 +00:00
|
|
|
|
2020-02-17 12:32:50 +00:00
|
|
|
function _md5FromResponse(response) {
|
|
|
|
const md5 = (response.ETag || '').replace(/[ "]/g, '')
|
2020-02-17 14:04:42 +00:00
|
|
|
if (!md5.match(/^[a-f0-9]{32}$/)) {
|
2020-02-17 12:32:50 +00:00
|
|
|
return null
|
2020-02-17 14:04:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return md5
|
|
|
|
}
|