2019-12-16 05:42:31 -05:00
|
|
|
const http = require('http')
|
|
|
|
const https = require('https')
|
2020-01-03 13:22:08 -05:00
|
|
|
http.globalAgent.maxSockets = 300
|
2019-12-16 05:42:31 -05:00
|
|
|
https.globalAgent.maxSockets = 300
|
2020-01-03 13:22:08 -05:00
|
|
|
|
2019-12-16 05:42:31 -05:00
|
|
|
const settings = require('settings-sharelatex')
|
|
|
|
const metrics = require('metrics-sharelatex')
|
2020-01-27 06:26:37 -05:00
|
|
|
const logger = require('logger-sharelatex')
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-27 06:26:37 -05:00
|
|
|
const Minipass = require('minipass')
|
2020-01-03 13:22:08 -05:00
|
|
|
const meter = require('stream-meter')
|
2020-01-27 06:26:37 -05:00
|
|
|
const crypto = require('crypto')
|
2020-01-03 13:22:08 -05:00
|
|
|
const fs = require('fs')
|
|
|
|
const S3 = require('aws-sdk/clients/s3')
|
|
|
|
const { URL } = require('url')
|
|
|
|
const { callbackify } = require('util')
|
2020-01-07 05:24:46 -05:00
|
|
|
const {
|
|
|
|
WriteError,
|
|
|
|
ReadError,
|
|
|
|
NotFoundError,
|
|
|
|
SettingsError
|
|
|
|
} = require('./Errors')
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
module.exports = {
|
|
|
|
sendFile: callbackify(sendFile),
|
|
|
|
sendStream: callbackify(sendStream),
|
|
|
|
getFileStream: callbackify(getFileStream),
|
2020-01-27 06:26:37 -05:00
|
|
|
getFileMd5Hash: callbackify(getFileMd5Hash),
|
2020-01-03 13:22:08 -05:00
|
|
|
deleteDirectory: callbackify(deleteDirectory),
|
|
|
|
getFileSize: callbackify(getFileSize),
|
|
|
|
deleteFile: callbackify(deleteFile),
|
|
|
|
copyFile: callbackify(copyFile),
|
|
|
|
checkIfFileExists: callbackify(checkIfFileExists),
|
2020-01-06 10:35:40 -05:00
|
|
|
directorySize: callbackify(directorySize),
|
2020-01-03 13:22:08 -05:00
|
|
|
promises: {
|
|
|
|
sendFile,
|
|
|
|
sendStream,
|
|
|
|
getFileStream,
|
2020-01-27 06:26:37 -05:00
|
|
|
getFileMd5Hash,
|
2020-01-03 13:22:08 -05:00
|
|
|
deleteDirectory,
|
|
|
|
getFileSize,
|
|
|
|
deleteFile,
|
|
|
|
copyFile,
|
|
|
|
checkIfFileExists,
|
2020-01-06 10:35:40 -05:00
|
|
|
directorySize
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
|
|
|
}
|
2014-02-14 11:39:05 -05:00
|
|
|
|
2020-01-27 06:26:37 -05:00
|
|
|
function hexToBase64(hex) {
|
|
|
|
return Buffer.from(hex, 'hex').toString('base64')
|
|
|
|
}
|
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function sendFile(bucketName, key, fsPath) {
|
|
|
|
let readStream
|
|
|
|
try {
|
|
|
|
readStream = fs.createReadStream(fsPath)
|
|
|
|
} catch (err) {
|
|
|
|
throw _wrapError(
|
|
|
|
err,
|
|
|
|
'error reading file from disk',
|
|
|
|
{ bucketName, key, fsPath },
|
|
|
|
ReadError
|
|
|
|
)
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2020-01-03 13:22:08 -05:00
|
|
|
return sendStream(bucketName, key, readStream)
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2019-12-05 09:25:25 -05:00
|
|
|
|
2020-01-27 06:26:37 -05:00
|
|
|
async function sendStream(bucketName, key, readStream, sourceMd5) {
|
2020-01-03 13:22:08 -05:00
|
|
|
try {
|
2020-01-27 06:26:37 -05:00
|
|
|
// if there is no supplied md5 hash, we calculate the hash as the data passes through
|
|
|
|
const passthroughStream = new Minipass()
|
|
|
|
let hashPromise
|
|
|
|
let b64Hash
|
|
|
|
|
|
|
|
if (sourceMd5) {
|
|
|
|
b64Hash = hexToBase64(sourceMd5)
|
|
|
|
} else {
|
|
|
|
const hash = crypto.createHash('md5')
|
|
|
|
hash.setEncoding('hex')
|
|
|
|
passthroughStream.pipe(hash)
|
|
|
|
hashPromise = new Promise((resolve, reject) => {
|
|
|
|
passthroughStream.on('end', () => {
|
|
|
|
hash.end()
|
|
|
|
resolve(hash.read())
|
|
|
|
})
|
|
|
|
passthroughStream.on('error', err => {
|
|
|
|
reject(err)
|
|
|
|
})
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
const meteredStream = meter()
|
2020-01-27 06:26:37 -05:00
|
|
|
passthroughStream.pipe(meteredStream)
|
2020-01-03 13:22:08 -05:00
|
|
|
meteredStream.on('finish', () => {
|
|
|
|
metrics.count('s3.egress', meteredStream.bytes)
|
|
|
|
})
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-27 06:26:37 -05:00
|
|
|
// pipe the readstream through minipass, which can write to both the metered
|
|
|
|
// stream (which goes on to S3) and the md5 generator if necessary
|
|
|
|
// - we do this last so that a listener streams does not consume data meant
|
|
|
|
// for both destinations
|
|
|
|
readStream.pipe(passthroughStream)
|
|
|
|
|
|
|
|
// if we have an md5 hash, pass this to S3 to verify the upload
|
|
|
|
const uploadOptions = {
|
|
|
|
Bucket: bucketName,
|
|
|
|
Key: key,
|
|
|
|
Body: meteredStream
|
|
|
|
}
|
|
|
|
if (b64Hash) {
|
|
|
|
uploadOptions.ContentMD5 = b64Hash
|
|
|
|
}
|
|
|
|
|
|
|
|
const response = await _getClientForBucket(bucketName)
|
|
|
|
.upload(uploadOptions)
|
2020-01-03 13:22:08 -05:00
|
|
|
.promise()
|
2020-01-27 06:26:37 -05:00
|
|
|
const destMd5 = _md5FromResponse(response)
|
|
|
|
|
|
|
|
// if we didn't have an md5 hash, compare our computed one with S3's
|
|
|
|
if (hashPromise) {
|
|
|
|
sourceMd5 = await hashPromise
|
|
|
|
|
|
|
|
if (sourceMd5 !== destMd5) {
|
|
|
|
try {
|
|
|
|
await deleteFile(bucketName, key)
|
|
|
|
} catch (err) {
|
|
|
|
logger.warn(err, 'error deleting file for invalid upload')
|
|
|
|
}
|
|
|
|
|
|
|
|
throw new WriteError({
|
|
|
|
message: 'source and destination hashes do not match',
|
|
|
|
info: {
|
|
|
|
sourceMd5,
|
|
|
|
destMd5,
|
|
|
|
bucketName,
|
|
|
|
key
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2020-01-03 13:22:08 -05:00
|
|
|
} catch (err) {
|
|
|
|
throw _wrapError(
|
|
|
|
err,
|
|
|
|
'upload to S3 failed',
|
|
|
|
{ bucketName, key },
|
|
|
|
WriteError
|
|
|
|
)
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function getFileStream(bucketName, key, opts) {
|
|
|
|
opts = opts || {}
|
|
|
|
|
|
|
|
const params = {
|
|
|
|
Bucket: bucketName,
|
|
|
|
Key: key
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2020-01-03 13:22:08 -05:00
|
|
|
if (opts.start != null && opts.end != null) {
|
|
|
|
params.Range = `bytes=${opts.start}-${opts.end}`
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
return new Promise((resolve, reject) => {
|
2020-01-08 04:17:30 -05:00
|
|
|
const stream = _getClientForBucket(bucketName)
|
2020-01-07 05:24:46 -05:00
|
|
|
.getObject(params)
|
|
|
|
.createReadStream()
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
const meteredStream = meter()
|
|
|
|
meteredStream.on('finish', () => {
|
|
|
|
metrics.count('s3.ingress', meteredStream.bytes)
|
2019-12-16 05:42:31 -05:00
|
|
|
})
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
const onStreamReady = function() {
|
|
|
|
stream.removeListener('readable', onStreamReady)
|
|
|
|
resolve(stream.pipe(meteredStream))
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2020-01-03 13:22:08 -05:00
|
|
|
stream.on('readable', onStreamReady)
|
|
|
|
stream.on('error', err => {
|
|
|
|
reject(_wrapError(err, 'error reading from S3', params, ReadError))
|
|
|
|
})
|
|
|
|
})
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function deleteDirectory(bucketName, key) {
|
|
|
|
let response
|
|
|
|
|
|
|
|
try {
|
2020-01-08 04:17:30 -05:00
|
|
|
response = await _getClientForBucket(bucketName)
|
2020-01-03 13:22:08 -05:00
|
|
|
.listObjects({ Bucket: bucketName, Prefix: key })
|
|
|
|
.promise()
|
|
|
|
} catch (err) {
|
|
|
|
throw _wrapError(
|
|
|
|
err,
|
|
|
|
'failed to list objects in S3',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
const objects = response.Contents.map(item => ({ Key: item.Key }))
|
|
|
|
if (objects.length) {
|
|
|
|
try {
|
2020-01-08 04:17:30 -05:00
|
|
|
await _getClientForBucket(bucketName)
|
2020-01-03 13:22:08 -05:00
|
|
|
.deleteObjects({
|
|
|
|
Bucket: bucketName,
|
|
|
|
Delete: {
|
|
|
|
Objects: objects,
|
|
|
|
Quiet: true
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.promise()
|
|
|
|
} catch (err) {
|
|
|
|
throw _wrapError(
|
|
|
|
err,
|
|
|
|
'failed to delete objects in S3',
|
|
|
|
{ bucketName, key },
|
|
|
|
WriteError
|
|
|
|
)
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2020-01-03 13:22:08 -05:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function getFileSize(bucketName, key) {
|
|
|
|
try {
|
2020-01-08 04:17:30 -05:00
|
|
|
const response = await _getClientForBucket(bucketName)
|
2020-01-03 13:22:08 -05:00
|
|
|
.headObject({ Bucket: bucketName, Key: key })
|
|
|
|
.promise()
|
|
|
|
return response.ContentLength
|
|
|
|
} catch (err) {
|
|
|
|
throw _wrapError(
|
|
|
|
err,
|
|
|
|
'error getting size of s3 object',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-27 06:26:37 -05:00
|
|
|
async function getFileMd5Hash(bucketName, key) {
|
|
|
|
try {
|
|
|
|
const response = await _getClientForBucket(bucketName)
|
|
|
|
.headObject({ Bucket: bucketName, Key: key })
|
|
|
|
.promise()
|
|
|
|
const md5 = _md5FromResponse(response)
|
|
|
|
return md5
|
|
|
|
} catch (err) {
|
|
|
|
throw _wrapError(
|
|
|
|
err,
|
|
|
|
'error getting hash of s3 object',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function deleteFile(bucketName, key) {
|
|
|
|
try {
|
2020-01-08 04:17:30 -05:00
|
|
|
await _getClientForBucket(bucketName)
|
2020-01-07 05:24:46 -05:00
|
|
|
.deleteObject({ Bucket: bucketName, Key: key })
|
|
|
|
.promise()
|
2020-01-03 13:22:08 -05:00
|
|
|
} catch (err) {
|
2020-01-16 11:25:12 -05:00
|
|
|
// s3 does not give us a NotFoundError here
|
2020-01-03 13:22:08 -05:00
|
|
|
throw _wrapError(
|
|
|
|
err,
|
|
|
|
'failed to delete file in S3',
|
|
|
|
{ bucketName, key },
|
|
|
|
WriteError
|
2019-12-16 05:42:31 -05:00
|
|
|
)
|
2020-01-03 13:22:08 -05:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function copyFile(bucketName, sourceKey, destKey) {
|
|
|
|
const params = {
|
|
|
|
Bucket: bucketName,
|
|
|
|
Key: destKey,
|
|
|
|
CopySource: `${bucketName}/${sourceKey}`
|
|
|
|
}
|
|
|
|
try {
|
2020-01-08 04:17:30 -05:00
|
|
|
await _getClientForBucket(bucketName)
|
2020-01-07 05:24:46 -05:00
|
|
|
.copyObject(params)
|
|
|
|
.promise()
|
2020-01-03 13:22:08 -05:00
|
|
|
} catch (err) {
|
|
|
|
throw _wrapError(err, 'failed to copy file in S3', params, WriteError)
|
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
async function checkIfFileExists(bucketName, key) {
|
|
|
|
try {
|
|
|
|
await getFileSize(bucketName, key)
|
|
|
|
return true
|
|
|
|
} catch (err) {
|
|
|
|
if (err instanceof NotFoundError) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
throw _wrapError(
|
|
|
|
err,
|
|
|
|
'error checking whether S3 object exists',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-06 10:35:40 -05:00
|
|
|
async function directorySize(bucketName, key) {
|
2020-01-03 13:22:08 -05:00
|
|
|
try {
|
2020-01-08 04:17:30 -05:00
|
|
|
const response = await _getClientForBucket(bucketName)
|
2020-01-03 13:22:08 -05:00
|
|
|
.listObjects({ Bucket: bucketName, Prefix: key })
|
|
|
|
.promise()
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
return response.Contents.reduce((acc, item) => item.Size + acc, 0)
|
|
|
|
} catch (err) {
|
|
|
|
throw _wrapError(
|
|
|
|
err,
|
|
|
|
'error getting directory size in S3',
|
|
|
|
{ bucketName, key },
|
|
|
|
ReadError
|
2019-12-16 05:42:31 -05:00
|
|
|
)
|
2020-01-03 13:22:08 -05:00
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
function _wrapError(error, message, params, ErrorType) {
|
2020-01-16 11:25:12 -05:00
|
|
|
// the AWS client can return one of 'NoSuchKey', 'NotFound' or 404 (integer)
|
|
|
|
// when something is not found, depending on the endpoint
|
2020-01-09 09:13:24 -05:00
|
|
|
if (
|
2020-01-16 11:25:12 -05:00
|
|
|
['NoSuchKey', 'NotFound', 404, 'AccessDenied', 'ENOENT'].includes(
|
|
|
|
error.code
|
|
|
|
)
|
2020-01-09 09:13:24 -05:00
|
|
|
) {
|
2020-01-03 13:22:08 -05:00
|
|
|
return new NotFoundError({
|
|
|
|
message: 'no such file',
|
|
|
|
info: params
|
|
|
|
}).withCause(error)
|
|
|
|
} else {
|
|
|
|
return new ErrorType({
|
|
|
|
message: message,
|
|
|
|
info: params
|
|
|
|
}).withCause(error)
|
|
|
|
}
|
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-08 04:17:30 -05:00
|
|
|
const _clients = new Map()
|
|
|
|
let _defaultClient
|
2020-01-07 05:24:46 -05:00
|
|
|
|
2020-01-08 04:17:30 -05:00
|
|
|
function _getClientForBucket(bucket) {
|
2020-01-07 05:24:46 -05:00
|
|
|
if (_clients[bucket]) {
|
|
|
|
return _clients[bucket]
|
|
|
|
}
|
|
|
|
|
|
|
|
if (
|
2020-01-08 06:20:44 -05:00
|
|
|
settings.filestore.s3BucketCreds &&
|
|
|
|
settings.filestore.s3BucketCreds[bucket]
|
2020-01-07 05:24:46 -05:00
|
|
|
) {
|
|
|
|
_clients[bucket] = new S3(
|
2020-01-08 06:20:44 -05:00
|
|
|
_buildClientOptions(settings.filestore.s3BucketCreds[bucket])
|
2020-01-07 05:24:46 -05:00
|
|
|
)
|
|
|
|
return _clients[bucket]
|
|
|
|
}
|
|
|
|
|
|
|
|
// no specific credentials for the bucket
|
2020-01-08 04:17:30 -05:00
|
|
|
if (_defaultClient) {
|
|
|
|
return _defaultClient
|
2020-01-07 05:24:46 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if (settings.filestore.s3.key) {
|
2020-01-08 04:17:30 -05:00
|
|
|
_defaultClient = new S3(_buildClientOptions())
|
|
|
|
return _defaultClient
|
2020-01-07 05:24:46 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
throw new SettingsError({
|
|
|
|
message: 'no bucket-specific or default credentials provided',
|
|
|
|
info: { bucket }
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-01-08 04:17:30 -05:00
|
|
|
function _buildClientOptions(bucketCredentials) {
|
2020-01-07 05:24:46 -05:00
|
|
|
const options = {}
|
|
|
|
|
|
|
|
if (bucketCredentials) {
|
|
|
|
options.credentials = {
|
|
|
|
accessKeyId: bucketCredentials.auth_key,
|
|
|
|
secretAccessKey: bucketCredentials.auth_secret
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
options.credentials = {
|
2020-01-03 13:22:08 -05:00
|
|
|
accessKeyId: settings.filestore.s3.key,
|
|
|
|
secretAccessKey: settings.filestore.s3.secret
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2020-01-03 13:22:08 -05:00
|
|
|
}
|
2019-12-16 05:24:35 -05:00
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
if (settings.filestore.s3.endpoint) {
|
|
|
|
const endpoint = new URL(settings.filestore.s3.endpoint)
|
|
|
|
options.endpoint = settings.filestore.s3.endpoint
|
|
|
|
options.sslEnabled = endpoint.protocol === 'https'
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2020-01-03 13:22:08 -05:00
|
|
|
|
2020-01-07 10:05:51 -05:00
|
|
|
// path-style access is only used for acceptance tests
|
|
|
|
if (settings.filestore.s3.pathStyle) {
|
|
|
|
options.s3ForcePathStyle = true
|
|
|
|
}
|
|
|
|
|
2020-01-03 13:22:08 -05:00
|
|
|
return options
|
2019-12-16 05:42:31 -05:00
|
|
|
}
|
2020-01-27 06:26:37 -05:00
|
|
|
|
|
|
|
function _md5FromResponse(response) {
|
|
|
|
const md5 = (response.ETag || '').replace(/[ "]/g, '')
|
|
|
|
if (!md5.match(/^[a-f0-9]{32}$/)) {
|
|
|
|
throw new ReadError({
|
|
|
|
message: 's3 etag not in md5-hash format',
|
|
|
|
info: {
|
|
|
|
md5,
|
|
|
|
eTag: response.ETag
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
return md5
|
|
|
|
}
|