diff --git a/services/filestore/.eslintrc b/services/filestore/.eslintrc index 42a4b5cace..73103de7f6 100644 --- a/services/filestore/.eslintrc +++ b/services/filestore/.eslintrc @@ -23,7 +23,8 @@ "rules": { // Swap the no-unused-expressions rule with a more chai-friendly one "no-unused-expressions": 0, - "chai-friendly/no-unused-expressions": "error" + "chai-friendly/no-unused-expressions": "error", + "no-console": "error" }, "overrides": [ { diff --git a/services/filestore/app/js/FSPersistorManager.js b/services/filestore/app/js/FSPersistor.js similarity index 75% rename from services/filestore/app/js/FSPersistorManager.js rename to services/filestore/app/js/FSPersistor.js index 862acb9bcb..973c670efd 100644 --- a/services/filestore/app/js/FSPersistorManager.js +++ b/services/filestore/app/js/FSPersistor.js @@ -7,6 +7,7 @@ const { promisify, callbackify } = require('util') const LocalFileWriter = require('./LocalFileWriter').promises const { NotFoundError, ReadError, WriteError } = require('./Errors') +const PersistorHelper = require('./PersistorHelper') const pipeline = promisify(Stream.pipeline) const fsUnlink = promisify(fs.unlink) @@ -27,7 +28,7 @@ async function sendFile(location, target, source) { const targetStream = fs.createWriteStream(`${location}/${filteredTarget}`) await pipeline(sourceStream, targetStream) } catch (err) { - throw _wrapError( + throw PersistorHelper.wrapError( err, 'failed to copy the specified file', { location, target, source }, @@ -36,11 +37,22 @@ async function sendFile(location, target, source) { } } -async function sendStream(location, target, sourceStream) { +async function sendStream(location, target, sourceStream, sourceMd5) { const fsPath = await LocalFileWriter.writeStream(sourceStream) + if (!sourceMd5) { + sourceMd5 = await _getFileMd5HashForPath(fsPath) + } try { await sendFile(location, target, fsPath) + const destMd5 = await getFileMd5Hash(location, target) + if (sourceMd5 !== destMd5) { + await LocalFileWriter.deleteFile(`${location}/${filterName(target)}`) + throw new WriteError({ + message: 'md5 hash mismatch', + info: { sourceMd5, destMd5, location, target } + }) + } } finally { await LocalFileWriter.deleteFile(fsPath) } @@ -53,7 +65,7 @@ async function getFileStream(location, name, opts) { try { opts.fd = await fsOpen(`${location}/${filteredName}`, 'r') } catch (err) { - throw _wrapError( + throw PersistorHelper.wrapError( err, 'failed to open file for streaming', { location, filteredName, opts }, @@ -71,7 +83,7 @@ async function getFileSize(location, filename) { const stat = await fsStat(fullPath) return stat.size } catch (err) { - throw _wrapError( + throw PersistorHelper.wrapError( err, 'failed to stat file', { location, filename }, @@ -80,6 +92,18 @@ async function getFileSize(location, filename) { } } +async function getFileMd5Hash(location, filename) { + const fullPath = path.join(location, filterName(filename)) + try { + return await _getFileMd5HashForPath(fullPath) + } catch (err) { + throw new ReadError({ + message: 'unable to get md5 hash from file', + info: { location, filename } + }).withCause(err) + } +} + async function copyFile(location, fromName, toName) { const filteredFromName = filterName(fromName) const filteredToName = filterName(toName) @@ -89,7 +113,7 @@ async function copyFile(location, fromName, toName) { const targetStream = fs.createWriteStream(`${location}/${filteredToName}`) await pipeline(sourceStream, targetStream) } catch (err) { - throw _wrapError( + throw PersistorHelper.wrapError( err, 'failed to copy file', { location, filteredFromName, filteredToName }, @@ -103,12 +127,17 @@ async function deleteFile(location, name) { try { await fsUnlink(`${location}/${filteredName}`) } catch (err) { - throw _wrapError( + const wrappedError = PersistorHelper.wrapError( err, 'failed to delete file', { location, filteredName }, WriteError ) + if (!(wrappedError instanceof NotFoundError)) { + // S3 doesn't give us a 404 when a file wasn't there to be deleted, so we + // should be consistent here as well + throw wrappedError + } } } @@ -119,7 +148,7 @@ async function deleteDirectory(location, name) { try { await rmrf(`${location}/${filteredName}`) } catch (err) { - throw _wrapError( + throw PersistorHelper.wrapError( err, 'failed to delete directory', { location, filteredName }, @@ -137,7 +166,7 @@ async function checkIfFileExists(location, name) { if (err.code === 'ENOENT') { return false } - throw _wrapError( + throw PersistorHelper.wrapError( err, 'failed to stat file', { location, filteredName }, @@ -167,7 +196,7 @@ async function directorySize(location, name) { } } } catch (err) { - throw _wrapError( + throw PersistorHelper.wrapError( err, 'failed to get directory size', { location, name }, @@ -178,25 +207,12 @@ async function directorySize(location, name) { return size } -function _wrapError(error, message, params, ErrorType) { - if (error.code === 'ENOENT') { - return new NotFoundError({ - message: 'no such file or directory', - info: params - }).withCause(error) - } else { - return new ErrorType({ - message: message, - info: params - }).withCause(error) - } -} - module.exports = { sendFile: callbackify(sendFile), sendStream: callbackify(sendStream), getFileStream: callbackify(getFileStream), getFileSize: callbackify(getFileSize), + getFileMd5Hash: callbackify(getFileMd5Hash), copyFile: callbackify(copyFile), deleteFile: callbackify(deleteFile), deleteDirectory: callbackify(deleteDirectory), @@ -207,6 +223,7 @@ module.exports = { sendStream, getFileStream, getFileSize, + getFileMd5Hash, copyFile, deleteFile, deleteDirectory, @@ -214,3 +231,8 @@ module.exports = { directorySize } } + +async function _getFileMd5HashForPath(fullPath) { + const stream = fs.createReadStream(fullPath) + return PersistorHelper.calculateStreamMd5(stream) +} diff --git a/services/filestore/app/js/MigrationPersistor.js b/services/filestore/app/js/MigrationPersistor.js new file mode 100644 index 0000000000..3ddc762922 --- /dev/null +++ b/services/filestore/app/js/MigrationPersistor.js @@ -0,0 +1,228 @@ +const metrics = require('metrics-sharelatex') +const Settings = require('settings-sharelatex') +const logger = require('logger-sharelatex') +const Stream = require('stream') +const { callbackify, promisify } = require('util') +const { NotFoundError, WriteError } = require('./Errors') + +const pipeline = promisify(Stream.pipeline) + +// Persistor that wraps two other persistors. Talks to the 'primary' by default, +// but will fall back to an older persistor in the case of a not-found error. +// If `Settings.filestore.fallback.copyOnMiss` is set, this will copy files from the fallback +// to the primary, in the event that they are missing. +// +// It is unlikely that the bucket/location name will be the same on the fallback +// as the primary. The bucket names should be overridden in `Settings.filestore.fallback.buckets` +// e.g. +// Settings.filestore.fallback.buckets = { +// myBucketOnS3: 'myBucketOnGCS' +// } + +module.exports = function(primary, fallback) { + function _wrapMethodOnBothPersistors(method) { + return async function(bucket, key, ...moreArgs) { + const fallbackBucket = _getFallbackBucket(bucket) + + await Promise.all([ + primary.promises[method](bucket, key, ...moreArgs), + fallback.promises[method](fallbackBucket, key, ...moreArgs) + ]) + } + } + + async function getFileStreamWithFallback(bucket, key, opts) { + const shouldCopy = + Settings.filestore.fallback.copyOnMiss && !opts.start && !opts.end + + try { + return await primary.promises.getFileStream(bucket, key, opts) + } catch (err) { + if (err instanceof NotFoundError) { + const fallbackBucket = _getFallbackBucket(bucket) + const fallbackStream = await fallback.promises.getFileStream( + fallbackBucket, + key, + opts + ) + // tee the stream to the client, and as a copy to the primary (if necessary) + // start listening on both straight away so that we don't consume bytes + // in one place before the other + const returnStream = new Stream.PassThrough() + pipeline(fallbackStream, returnStream) + + if (shouldCopy) { + const copyStream = new Stream.PassThrough() + pipeline(fallbackStream, copyStream) + + _copyStreamFromFallbackAndVerify( + copyStream, + fallbackBucket, + bucket, + key, + key + ).catch(() => { + // swallow errors, as this runs in the background and will log a warning + }) + } + return returnStream + } + throw err + } + } + + async function copyFileWithFallback(bucket, sourceKey, destKey) { + try { + return await primary.promises.copyFile(bucket, sourceKey, destKey) + } catch (err) { + if (err instanceof NotFoundError) { + const fallbackBucket = _getFallbackBucket(bucket) + const fallbackStream = await fallback.promises.getFileStream( + fallbackBucket, + sourceKey, + {} + ) + + const copyStream = new Stream.PassThrough() + pipeline(fallbackStream, copyStream) + + if (Settings.filestore.fallback.copyOnMiss) { + const missStream = new Stream.PassThrough() + pipeline(fallbackStream, missStream) + + // copy from sourceKey -> sourceKey + _copyStreamFromFallbackAndVerify( + missStream, + fallbackBucket, + bucket, + sourceKey, + sourceKey + ).then(() => { + // swallow errors, as this runs in the background and will log a warning + }) + } + // copy from sourceKey -> destKey + return _copyStreamFromFallbackAndVerify( + copyStream, + fallbackBucket, + bucket, + sourceKey, + destKey + ) + } + throw err + } + } + + function _getFallbackBucket(bucket) { + return Settings.filestore.fallback.buckets[bucket] + } + + function _wrapFallbackMethod(method) { + return async function(bucket, key, ...moreArgs) { + try { + return await primary.promises[method](bucket, key, ...moreArgs) + } catch (err) { + if (err instanceof NotFoundError) { + const fallbackBucket = _getFallbackBucket(bucket) + if (Settings.filestore.fallback.copyOnMiss) { + const fallbackStream = await fallback.promises.getFileStream( + fallbackBucket, + key, + {} + ) + // run in background + _copyStreamFromFallbackAndVerify( + fallbackStream, + fallbackBucket, + bucket, + key, + key + ).catch(err => { + logger.warn({ err }, 'failed to copy file from fallback') + }) + } + return fallback.promises[method](fallbackBucket, key, ...moreArgs) + } + throw err + } + } + } + + async function _copyStreamFromFallbackAndVerify( + stream, + sourceBucket, + destBucket, + sourceKey, + destKey + ) { + try { + let sourceMd5 + try { + sourceMd5 = await fallback.promises.getFileMd5Hash( + sourceBucket, + sourceKey + ) + } catch (err) { + logger.warn(err, 'error getting md5 hash from fallback persistor') + } + + await primary.promises.sendStream(destBucket, destKey, stream, sourceMd5) + } catch (err) { + const error = new WriteError({ + message: 'unable to copy file to destination persistor', + info: { + sourceBucket, + destBucket, + sourceKey, + destKey + } + }).withCause(err) + metrics.inc('fallback.copy.failure') + + try { + await primary.promises.deleteFile(destBucket, destKey) + } catch (err) { + error.info.cleanupError = new WriteError({ + message: 'unable to clean up destination copy artifact', + info: { + destBucket, + destKey + } + }).withCause(err) + } + + logger.warn({ error }, 'failed to copy file from fallback') + throw error + } + } + + return { + primaryPersistor: primary, + fallbackPersistor: fallback, + sendFile: primary.sendFile, + sendStream: primary.sendStream, + getFileStream: callbackify(getFileStreamWithFallback), + getFileMd5Hash: callbackify(_wrapFallbackMethod('getFileMd5Hash')), + deleteDirectory: callbackify( + _wrapMethodOnBothPersistors('deleteDirectory') + ), + getFileSize: callbackify(_wrapFallbackMethod('getFileSize')), + deleteFile: callbackify(_wrapMethodOnBothPersistors('deleteFile')), + copyFile: callbackify(copyFileWithFallback), + checkIfFileExists: callbackify(_wrapFallbackMethod('checkIfFileExists')), + directorySize: callbackify(_wrapFallbackMethod('directorySize')), + promises: { + sendFile: primary.promises.sendFile, + sendStream: primary.promises.sendStream, + getFileStream: getFileStreamWithFallback, + getFileMd5Hash: _wrapFallbackMethod('getFileMd5Hash'), + deleteDirectory: _wrapMethodOnBothPersistors('deleteDirectory'), + getFileSize: _wrapFallbackMethod('getFileSize'), + deleteFile: _wrapMethodOnBothPersistors('deleteFile'), + copyFile: copyFileWithFallback, + checkIfFileExists: _wrapFallbackMethod('checkIfFileExists'), + directorySize: _wrapFallbackMethod('directorySize') + } + } +} diff --git a/services/filestore/app/js/PersistorHelper.js b/services/filestore/app/js/PersistorHelper.js new file mode 100644 index 0000000000..ea8132a9c9 --- /dev/null +++ b/services/filestore/app/js/PersistorHelper.js @@ -0,0 +1,105 @@ +const crypto = require('crypto') +const meter = require('stream-meter') +const Stream = require('stream') +const logger = require('logger-sharelatex') +const { WriteError, ReadError, NotFoundError } = require('./Errors') +const { promisify } = require('util') + +const pipeline = promisify(Stream.pipeline) + +module.exports = { + calculateStreamMd5, + verifyMd5, + getMeteredStream, + waitForStreamReady, + wrapError +} + +// returns a promise which resolves with the md5 hash of the stream +function calculateStreamMd5(stream) { + const hash = crypto.createHash('md5') + hash.setEncoding('hex') + + return pipeline(stream, hash).then(() => hash.read()) +} + +// verifies the md5 hash of a file against the supplied md5 or the one stored in +// storage if not supplied - deletes the new file if the md5 does not match and +// throws an error +async function verifyMd5(persistor, bucket, key, sourceMd5, destMd5 = null) { + if (!destMd5) { + destMd5 = await persistor.promises.getFileMd5Hash(bucket, key) + } + + if (sourceMd5 !== destMd5) { + try { + await persistor.promises.deleteFile(bucket, key) + } catch (err) { + logger.warn(err, 'error deleting file for invalid upload') + } + + throw new WriteError({ + message: 'source and destination hashes do not match', + info: { + sourceMd5, + destMd5, + bucket, + key + } + }) + } +} + +// returns the next stream in the pipeline, and calls the callback with the byte count +// when the stream finishes or receives an error +function getMeteredStream(stream, callback) { + const meteredStream = meter() + + pipeline(stream, meteredStream) + .then(() => { + callback(null, meteredStream.bytes) + }) + .catch(err => { + // on error, just send how many bytes we received before the stream stopped + callback(err, meteredStream.bytes) + }) + + return meteredStream +} + +// resolves when a stream is 'readable', or rejects if the stream throws an error +// before that happens - this lets us handle protocol-level errors before trying +// to read them +function waitForStreamReady(stream) { + return new Promise((resolve, reject) => { + const onError = function(err) { + reject(wrapError(err, 'error before stream became ready', {}, ReadError)) + } + const onStreamReady = function() { + stream.removeListener('readable', onStreamReady) + stream.removeListener('error', onError) + resolve(stream) + } + stream.on('readable', onStreamReady) + stream.on('error', onError) + }) +} + +function wrapError(error, message, params, ErrorType) { + if ( + error instanceof NotFoundError || + ['NoSuchKey', 'NotFound', 404, 'AccessDenied', 'ENOENT'].includes( + error.code + ) + ) { + return new NotFoundError({ + message: 'no such file', + info: params + }).withCause(error) + } else { + return new ErrorType({ + message: message, + info: params + }).withCause(error) + } +} diff --git a/services/filestore/app/js/PersistorManager.js b/services/filestore/app/js/PersistorManager.js index cca0cf0f36..32f6cd41f8 100644 --- a/services/filestore/app/js/PersistorManager.js +++ b/services/filestore/app/js/PersistorManager.js @@ -3,7 +3,8 @@ const logger = require('logger-sharelatex') logger.log( { - backend: settings.filestore.backend + backend: settings.filestore.backend, + fallback: settings.filestore.fallback && settings.filestore.fallback.backend }, 'Loading backend' ) @@ -11,14 +12,26 @@ if (!settings.filestore.backend) { throw new Error('no backend specified - config incomplete') } -switch (settings.filestore.backend) { - case 'aws-sdk': - case 's3': - module.exports = require('./S3PersistorManager') - break - case 'fs': - module.exports = require('./FSPersistorManager') - break - default: - throw new Error(`unknown filestore backend: ${settings.filestore.backend}`) +function getPersistor(backend) { + switch (backend) { + case 'aws-sdk': + case 's3': + return require('./S3Persistor') + case 'fs': + return require('./FSPersistor') + default: + throw new Error(`unknown filestore backend: ${backend}`) + } } + +let persistor = getPersistor(settings.filestore.backend) + +if (settings.filestore.fallback && settings.filestore.fallback.backend) { + const migrationPersistor = require('./MigrationPersistor') + persistor = migrationPersistor( + persistor, + getPersistor(settings.filestore.fallback.backend) + ) +} + +module.exports = persistor diff --git a/services/filestore/app/js/S3PersistorManager.js b/services/filestore/app/js/S3Persistor.js similarity index 63% rename from services/filestore/app/js/S3PersistorManager.js rename to services/filestore/app/js/S3Persistor.js index 52cadfbfbd..891d7be68e 100644 --- a/services/filestore/app/js/S3PersistorManager.js +++ b/services/filestore/app/js/S3Persistor.js @@ -6,7 +6,8 @@ https.globalAgent.maxSockets = 300 const settings = require('settings-sharelatex') const metrics = require('metrics-sharelatex') -const meter = require('stream-meter') +const PersistorHelper = require('./PersistorHelper') + const fs = require('fs') const S3 = require('aws-sdk/clients/s3') const { URL } = require('url') @@ -18,10 +19,11 @@ const { SettingsError } = require('./Errors') -module.exports = { +const S3Persistor = { sendFile: callbackify(sendFile), sendStream: callbackify(sendStream), getFileStream: callbackify(getFileStream), + getFileMd5Hash: callbackify(getFileMd5Hash), deleteDirectory: callbackify(deleteDirectory), getFileSize: callbackify(getFileSize), deleteFile: callbackify(deleteFile), @@ -32,6 +34,7 @@ module.exports = { sendFile, sendStream, getFileStream, + getFileMd5Hash, deleteDirectory, getFileSize, deleteFile, @@ -41,12 +44,18 @@ module.exports = { } } +module.exports = S3Persistor + +function hexToBase64(hex) { + return Buffer.from(hex, 'hex').toString('base64') +} + async function sendFile(bucketName, key, fsPath) { let readStream try { readStream = fs.createReadStream(fsPath) } catch (err) { - throw _wrapError( + throw PersistorHelper.wrapError( err, 'error reading file from disk', { bucketName, key, fsPath }, @@ -56,22 +65,56 @@ async function sendFile(bucketName, key, fsPath) { return sendStream(bucketName, key, readStream) } -async function sendStream(bucketName, key, readStream) { +async function sendStream(bucketName, key, readStream, sourceMd5) { try { - const meteredStream = meter() - meteredStream.on('finish', () => { - metrics.count('s3.egress', meteredStream.bytes) - }) + // if there is no supplied md5 hash, we calculate the hash as the data passes through + let hashPromise + let b64Hash - await _getClientForBucket(bucketName) - .upload({ - Bucket: bucketName, - Key: key, - Body: readStream.pipe(meteredStream) - }) + if (sourceMd5) { + b64Hash = hexToBase64(sourceMd5) + } else { + hashPromise = PersistorHelper.calculateStreamMd5(readStream) + } + + const meteredStream = PersistorHelper.getMeteredStream( + readStream, + (_, byteCount) => { + // ignore the error parameter and just log the byte count + metrics.count('s3.egress', byteCount) + } + ) + + // if we have an md5 hash, pass this to S3 to verify the upload + const uploadOptions = { + Bucket: bucketName, + Key: key, + Body: meteredStream + } + if (b64Hash) { + uploadOptions.ContentMD5 = b64Hash + } + + const response = await _getClientForBucket(bucketName) + .upload(uploadOptions) .promise() + const destMd5 = _md5FromResponse(response) + + // if we didn't have an md5 hash, we should compare our computed one with S3's + // as we couldn't tell S3 about it beforehand + if (hashPromise) { + sourceMd5 = await hashPromise + // throws on mismatch + await PersistorHelper.verifyMd5( + S3Persistor, + bucketName, + key, + sourceMd5, + destMd5 + ) + } } catch (err) { - throw _wrapError( + throw PersistorHelper.wrapError( err, 'upload to S3 failed', { bucketName, key }, @@ -91,25 +134,29 @@ async function getFileStream(bucketName, key, opts) { params.Range = `bytes=${opts.start}-${opts.end}` } - return new Promise((resolve, reject) => { - const stream = _getClientForBucket(bucketName) - .getObject(params) - .createReadStream() + const stream = _getClientForBucket(bucketName) + .getObject(params) + .createReadStream() - const meteredStream = meter() - meteredStream.on('finish', () => { - metrics.count('s3.ingress', meteredStream.bytes) - }) - - const onStreamReady = function() { - stream.removeListener('readable', onStreamReady) - resolve(stream.pipe(meteredStream)) + const meteredStream = PersistorHelper.getMeteredStream( + stream, + (_, byteCount) => { + // ignore the error parameter and just log the byte count + metrics.count('s3.ingress', byteCount) } - stream.on('readable', onStreamReady) - stream.on('error', err => { - reject(_wrapError(err, 'error reading from S3', params, ReadError)) - }) - }) + ) + + try { + await PersistorHelper.waitForStreamReady(stream) + return meteredStream + } catch (err) { + throw PersistorHelper.wrapError( + err, + 'error reading file from S3', + { bucketName, key, opts }, + ReadError + ) + } } async function deleteDirectory(bucketName, key) { @@ -120,7 +167,7 @@ async function deleteDirectory(bucketName, key) { .listObjects({ Bucket: bucketName, Prefix: key }) .promise() } catch (err) { - throw _wrapError( + throw PersistorHelper.wrapError( err, 'failed to list objects in S3', { bucketName, key }, @@ -141,7 +188,7 @@ async function deleteDirectory(bucketName, key) { }) .promise() } catch (err) { - throw _wrapError( + throw PersistorHelper.wrapError( err, 'failed to delete objects in S3', { bucketName, key }, @@ -158,7 +205,7 @@ async function getFileSize(bucketName, key) { .promise() return response.ContentLength } catch (err) { - throw _wrapError( + throw PersistorHelper.wrapError( err, 'error getting size of s3 object', { bucketName, key }, @@ -167,13 +214,31 @@ async function getFileSize(bucketName, key) { } } +async function getFileMd5Hash(bucketName, key) { + try { + const response = await _getClientForBucket(bucketName) + .headObject({ Bucket: bucketName, Key: key }) + .promise() + const md5 = _md5FromResponse(response) + return md5 + } catch (err) { + throw PersistorHelper.wrapError( + err, + 'error getting hash of s3 object', + { bucketName, key }, + ReadError + ) + } +} + async function deleteFile(bucketName, key) { try { await _getClientForBucket(bucketName) .deleteObject({ Bucket: bucketName, Key: key }) .promise() } catch (err) { - throw _wrapError( + // s3 does not give us a NotFoundError here + throw PersistorHelper.wrapError( err, 'failed to delete file in S3', { bucketName, key }, @@ -193,7 +258,12 @@ async function copyFile(bucketName, sourceKey, destKey) { .copyObject(params) .promise() } catch (err) { - throw _wrapError(err, 'failed to copy file in S3', params, WriteError) + throw PersistorHelper.wrapError( + err, + 'failed to copy file in S3', + params, + WriteError + ) } } @@ -205,7 +275,7 @@ async function checkIfFileExists(bucketName, key) { if (err instanceof NotFoundError) { return false } - throw _wrapError( + throw PersistorHelper.wrapError( err, 'error checking whether S3 object exists', { bucketName, key }, @@ -222,7 +292,7 @@ async function directorySize(bucketName, key) { return response.Contents.reduce((acc, item) => item.Size + acc, 0) } catch (err) { - throw _wrapError( + throw PersistorHelper.wrapError( err, 'error getting directory size in S3', { bucketName, key }, @@ -231,22 +301,6 @@ async function directorySize(bucketName, key) { } } -function _wrapError(error, message, params, ErrorType) { - if ( - ['NoSuchKey', 'NotFound', 'AccessDenied', 'ENOENT'].includes(error.code) - ) { - return new NotFoundError({ - message: 'no such file', - info: params - }).withCause(error) - } else { - return new ErrorType({ - message: message, - info: params - }).withCause(error) - } -} - const _clients = new Map() let _defaultClient @@ -309,3 +363,18 @@ function _buildClientOptions(bucketCredentials) { return options } + +function _md5FromResponse(response) { + const md5 = (response.ETag || '').replace(/[ "]/g, '') + if (!md5.match(/^[a-f0-9]{32}$/)) { + throw new ReadError({ + message: 's3 etag not in md5-hash format', + info: { + md5, + eTag: response.ETag + } + }) + } + + return md5 +} diff --git a/services/filestore/config/settings.defaults.coffee b/services/filestore/config/settings.defaults.coffee index 206f932a76..bb124ae8e0 100644 --- a/services/filestore/config/settings.defaults.coffee +++ b/services/filestore/config/settings.defaults.coffee @@ -7,6 +7,19 @@ if process.env['AWS_KEY'] && !process.env['AWS_ACCESS_KEY_ID'] if process.env['AWS_SECRET'] && !process.env['AWS_SECRET_ACCESS_KEY'] process.env['AWS_SECRET_ACCESS_KEY'] = process.env['AWS_SECRET'] +# pre-backend setting, fall back to old behaviour +unless process.env['BACKEND']? + if process.env['AWS_ACCESS_KEY_ID']? or process.env['S3_BUCKET_CREDENTIALS']? + process.env['BACKEND'] = "s3" + process.env['USER_FILES_BUCKET_NAME'] = process.env['AWS_S3_USER_FILES_BUCKET_NAME'] + process.env['TEMPLATE_FILES_BUCKET_NAME'] = process.env['AWS_S3_TEMPLATE_FILES_BUCKET_NAME'] + process.env['PUBLIC_FILES_BUCKET_NAME'] = process.env['AWS_S3_PUBLIC_FILES_BUCKET_NAME'] + else + process.env['BACKEND'] = "fs" + process.env['USER_FILES_BUCKET_NAME'] = Path.resolve(__dirname + "/../user_files") + process.env['TEMPLATE_FILES_BUCKET_NAME'] = Path.resolve(__dirname + "/../template_files") + process.env['PUBLIC_FILES_BUCKET_NAME'] = Path.resolve(__dirname + "/../public_files") + settings = internal: filestore: @@ -18,38 +31,28 @@ settings = # Choices are # s3 - Amazon S3 # fs - local filesystem - if process.env['AWS_ACCESS_KEY_ID']? or process.env['S3_BUCKET_CREDENTIALS']? - backend: "s3" - s3: + backend: process.env['BACKEND'] + + s3: + if process.env['AWS_ACCESS_KEY_ID']? or process.env['S3_BUCKET_CREDENTIALS']? key: process.env['AWS_ACCESS_KEY_ID'] secret: process.env['AWS_SECRET_ACCESS_KEY'] endpoint: process.env['AWS_S3_ENDPOINT'] - stores: - user_files: process.env['AWS_S3_USER_FILES_BUCKET_NAME'] - template_files: process.env['AWS_S3_TEMPLATE_FILES_BUCKET_NAME'] - public_files: process.env['AWS_S3_PUBLIC_FILES_BUCKET_NAME'] - # if you are using S3, then fill in your S3 details below, - # or use env var with the same structure. - # s3: - # key: "" # default - # secret: "" # default - # - # s3BucketCreds: - # bucketname1: # secrets for bucketname1 - # auth_key: "" - # auth_secret: "" - # bucketname2: # secrets for bucketname2... - s3BucketCreds: JSON.parse process.env['S3_BUCKET_CREDENTIALS'] if process.env['S3_BUCKET_CREDENTIALS']? - else - backend: "fs" - stores: - # - # For local filesystem this is the directory to store the files in. - # Must contain full path, e.g. "/var/lib/sharelatex/data". - # This path must exist, not be tmpfs and be writable to by the user sharelatex is run as. - user_files: Path.resolve(__dirname + "/../user_files") - public_files: Path.resolve(__dirname + "/../public_files") - template_files: Path.resolve(__dirname + "/../template_files") + + stores: + user_files: process.env['USER_FILES_BUCKET_NAME'] + template_files: process.env['TEMPLATE_FILES_BUCKET_NAME'] + public_files: process.env['PUBLIC_FILES_BUCKET_NAME'] + + s3BucketCreds: JSON.parse process.env['S3_BUCKET_CREDENTIALS'] if process.env['S3_BUCKET_CREDENTIALS']? + + fallback: + if process.env['FALLBACK_BACKEND']? + backend: process.env['FALLBACK_BACKEND'] + # mapping of bucket names on the fallback, to bucket names on the primary. + # e.g. { myS3UserFilesBucketName: 'myGoogleUserFilesBucketName' } + buckets: JSON.parse(process.env['FALLBACK_BUCKET_MAPPING'] || '{}') + copyOnMiss: process.env['COPY_ON_MISS'] == 'true' path: uploadFolder: Path.resolve(__dirname + "/../uploads") diff --git a/services/filestore/npm-shrinkwrap.json b/services/filestore/npm-shrinkwrap.json index 8d78271caa..bdc836e237 100644 --- a/services/filestore/npm-shrinkwrap.json +++ b/services/filestore/npm-shrinkwrap.json @@ -606,11 +606,6 @@ "event-target-shim": "^5.0.0" } }, - "accept-encoding": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/accept-encoding/-/accept-encoding-0.1.0.tgz", - "integrity": "sha1-XdiLjfcfHcLlzGuVZezOHjmaMz4=" - }, "accepts": { "version": "1.3.5", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.5.tgz", @@ -790,11 +785,6 @@ } } }, - "aws-sign": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/aws-sign/-/aws-sign-0.2.1.tgz", - "integrity": "sha1-uWGyLwuqTxXsJBFA83dtbBQoVtA=" - }, "aws-sign2": { "version": "0.7.0", "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz", @@ -857,14 +847,6 @@ "tweetnacl": "^0.14.3" } }, - "best-encoding": { - "version": "0.1.1", - "resolved": "https://registry.npmjs.org/best-encoding/-/best-encoding-0.1.1.tgz", - "integrity": "sha1-GVIT2rysBFgYuAe3ox+Dn63cl04=", - "requires": { - "accept-encoding": "~0.1.0" - } - }, "bignumber.js": { "version": "7.2.1", "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-7.2.1.tgz", @@ -883,14 +865,6 @@ "resolved": "https://registry.npmjs.org/bintrees/-/bintrees-1.0.1.tgz", "integrity": "sha1-DmVcm5wkNeqraL9AJyJtK1WjRSQ=" }, - "bl": { - "version": "0.7.0", - "resolved": "https://registry.npmjs.org/bl/-/bl-0.7.0.tgz", - "integrity": "sha1-P7BnBgKsKHjrdw3CA58YNr5irls=", - "requires": { - "readable-stream": "~1.0.2" - } - }, "body-parser": { "version": "1.18.3", "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.18.3.tgz", @@ -914,14 +888,6 @@ "integrity": "sha1-tcCeF8rNET0Rt7s+04TMASmU2Gs=", "dev": true }, - "boom": { - "version": "0.3.8", - "resolved": "https://registry.npmjs.org/boom/-/boom-0.3.8.tgz", - "integrity": "sha1-yM2wQUNZEnQWKMBE7Mcy0dF8Ceo=", - "requires": { - "hoek": "0.7.x" - } - }, "brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", @@ -995,11 +961,6 @@ "quick-lru": "^4.0.1" } }, - "caseless": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.3.0.tgz", - "integrity": "sha1-U06XkWOH07cGtk/eu6xGQ4RQk08=" - }, "chai": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/chai/-/chai-4.2.0.tgz", @@ -1117,14 +1078,6 @@ "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=", "dev": true }, - "combined-stream": { - "version": "0.0.7", - "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-0.0.7.tgz", - "integrity": "sha1-ATfmV7qlp1QcV6w3rF/AfXO03B8=", - "requires": { - "delayed-stream": "0.0.5" - } - }, "common-tags": { "version": "1.8.0", "resolved": "https://registry.npmjs.org/common-tags/-/common-tags-1.8.0.tgz", @@ -1171,11 +1124,6 @@ "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.3.1.tgz", "integrity": "sha1-5+Ch+e9DtMi6klxcWpboBtFoc7s=" }, - "cookie-jar": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/cookie-jar/-/cookie-jar-0.2.0.tgz", - "integrity": "sha1-ZOzAasl423leS1KQy+SLo3gUAPo=" - }, "cookie-signature": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", @@ -1213,14 +1161,6 @@ } } }, - "cryptiles": { - "version": "0.1.3", - "resolved": "https://registry.npmjs.org/cryptiles/-/cryptiles-0.1.3.tgz", - "integrity": "sha1-GlVnNPBtJLo0hirpy55wmjr7/xw=", - "requires": { - "boom": "0.3.x" - } - }, "dashdash": { "version": "1.14.1", "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", @@ -1272,11 +1212,6 @@ "resolved": "https://registry.npmjs.org/delay/-/delay-4.3.0.tgz", "integrity": "sha1-7+6/uPVFV5yzlrOnIkQ+yW0UxQ4=" }, - "delayed-stream": { - "version": "0.0.5", - "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-0.0.5.tgz", - "integrity": "sha1-1LH0OpPoKW3+AmlPRoC8N6MTxz8=" - }, "depd": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz", @@ -2078,28 +2013,6 @@ } } }, - "forever-agent": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.2.0.tgz", - "integrity": "sha1-4cJcetROCcOPIzh2x2/MJP+EOx8=" - }, - "form-data": { - "version": "0.0.10", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-0.0.10.tgz", - "integrity": "sha1-2zRaU3jYau6x7V1VO4aawZLS9e0=", - "requires": { - "async": "~0.2.7", - "combined-stream": "~0.0.4", - "mime": "~1.2.2" - }, - "dependencies": { - "mime": { - "version": "1.2.11", - "resolved": "https://registry.npmjs.org/mime/-/mime-1.2.11.tgz", - "integrity": "sha1-WCA+7Ybjpe8XrtK32evUfwpg3RA=" - } - } - }, "forwarded": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz", @@ -2323,17 +2236,6 @@ "integrity": "sha512-PLcsoqu++dmEIZB+6totNFKq/7Do+Z0u4oT0zKOJNl3lYK6vGwwu2hjHs+68OEZbTjiUE9bgOABXbP/GvrS0Kg==", "dev": true }, - "hawk": { - "version": "0.10.2", - "resolved": "https://registry.npmjs.org/hawk/-/hawk-0.10.2.tgz", - "integrity": "sha1-mzYd7pWpMWQObVBOBWCaj8OsRdI=", - "requires": { - "boom": "0.3.x", - "cryptiles": "0.1.x", - "hoek": "0.7.x", - "sntp": "0.1.x" - } - }, "he": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/he/-/he-1.1.1.tgz", @@ -2349,11 +2251,6 @@ "resolved": "https://registry.npmjs.org/hex2dec/-/hex2dec-1.1.2.tgz", "integrity": "sha1-jhzkvvNqdPfVcjw/swkMKGAHczg=" }, - "hoek": { - "version": "0.7.6", - "resolved": "https://registry.npmjs.org/hoek/-/hoek-0.7.6.tgz", - "integrity": "sha1-YPvZBFV1Qc0rh5Wr8wihs3cOFVo=" - }, "hosted-git-info": { "version": "2.8.5", "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.5.tgz", @@ -2701,28 +2598,6 @@ "graceful-fs": "^4.1.9" } }, - "knox": { - "version": "0.9.2", - "resolved": "https://registry.npmjs.org/knox/-/knox-0.9.2.tgz", - "integrity": "sha1-NzZZNmniTwJP2vcjtqHcSv2DmnE=", - "requires": { - "debug": "^1.0.2", - "mime": "*", - "once": "^1.3.0", - "stream-counter": "^1.0.0", - "xml2js": "^0.4.4" - }, - "dependencies": { - "debug": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/debug/-/debug-1.0.5.tgz", - "integrity": "sha1-9yQSF0MPmd7EwrRz6rkiKOh0wqw=", - "requires": { - "ms": "2.0.0" - } - } - } - }, "levn": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", @@ -3353,55 +3228,6 @@ "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-0.8.4.tgz", "integrity": "sha1-1nOGYrZhvhnicR7wGqOxghLxMDA=" }, - "node-transloadit": { - "version": "0.0.4", - "resolved": "https://registry.npmjs.org/node-transloadit/-/node-transloadit-0.0.4.tgz", - "integrity": "sha1-4ZoHheON94NblO2AANHjXmg7zsE=", - "requires": { - "request": "~2.16.6", - "underscore": "1.2.1" - }, - "dependencies": { - "json-stringify-safe": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-3.0.0.tgz", - "integrity": "sha1-nbew5TDH8onF6MhDKvGRwv91pbM=" - }, - "mime": { - "version": "1.2.11", - "resolved": "https://registry.npmjs.org/mime/-/mime-1.2.11.tgz", - "integrity": "sha1-WCA+7Ybjpe8XrtK32evUfwpg3RA=" - }, - "qs": { - "version": "0.5.6", - "resolved": "https://registry.npmjs.org/qs/-/qs-0.5.6.tgz", - "integrity": "sha1-MbGtBYVnZRxSaSFQa5qHk5EaA4Q=" - }, - "request": { - "version": "2.16.6", - "resolved": "https://registry.npmjs.org/request/-/request-2.16.6.tgz", - "integrity": "sha1-hy/kRa5y3iZrN4edatfclI+gHK0=", - "requires": { - "aws-sign": "~0.2.0", - "cookie-jar": "~0.2.0", - "forever-agent": "~0.2.0", - "form-data": "~0.0.3", - "hawk": "~0.10.2", - "json-stringify-safe": "~3.0.0", - "mime": "~1.2.7", - "node-uuid": "~1.4.0", - "oauth-sign": "~0.2.0", - "qs": "~0.5.4", - "tunnel-agent": "~0.2.0" - } - }, - "underscore": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.2.1.tgz", - "integrity": "sha1-/FxrB2VnPZKi1KyLTcCqiHAuK9Q=" - } - } - }, "node-uuid": { "version": "1.4.8", "resolved": "https://registry.npmjs.org/node-uuid/-/node-uuid-1.4.8.tgz", @@ -3427,11 +3253,6 @@ } } }, - "oauth-sign": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.2.0.tgz", - "integrity": "sha1-oOahcV2u0GLzIrYit/5a/RA1tuI=" - }, "object-inspect": { "version": "1.7.0", "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.7.0.tgz", @@ -4446,29 +4267,6 @@ "read-pkg": "^2.0.0" } }, - "readable-stream": { - "version": "1.0.34", - "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.0.34.tgz", - "integrity": "sha1-Elgg40vIQtLyqq+v5MKRbuMsFXw=", - "requires": { - "core-util-is": "~1.0.0", - "inherits": "~2.0.1", - "isarray": "0.0.1", - "string_decoder": "~0.10.x" - }, - "dependencies": { - "isarray": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", - "integrity": "sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8=" - } - } - }, - "recluster": { - "version": "0.3.7", - "resolved": "https://registry.npmjs.org/recluster/-/recluster-0.3.7.tgz", - "integrity": "sha1-aKRx3ZC2obl3ZjTPdpZAWutWeJU=" - }, "regexpp": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-2.0.1.tgz", @@ -4648,24 +4446,6 @@ "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", "dev": true }, - "response": { - "version": "0.14.0", - "resolved": "https://registry.npmjs.org/response/-/response-0.14.0.tgz", - "integrity": "sha1-BmNS/z5rAm0EdYCUB2Y7Rob9JpY=", - "requires": { - "best-encoding": "^0.1.1", - "bl": "~0.7.0", - "caseless": "^0.3.0", - "mime": "~1.2.11" - }, - "dependencies": { - "mime": { - "version": "1.2.11", - "resolved": "https://registry.npmjs.org/mime/-/mime-1.2.11.tgz", - "integrity": "sha1-WCA+7Ybjpe8XrtK32evUfwpg3RA=" - } - } - }, "restore-cursor": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-2.0.0.tgz", @@ -4880,14 +4660,6 @@ } } }, - "sntp": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/sntp/-/sntp-0.1.4.tgz", - "integrity": "sha1-XvSBuVGnspr/30r9fyaDj8ESD4Q=", - "requires": { - "hoek": "0.7.x" - } - }, "source-map": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", @@ -4975,49 +4747,11 @@ "resolved": "https://registry.npmjs.org/stealthy-require/-/stealthy-require-1.1.1.tgz", "integrity": "sha1-NbCYdbT/SfJqd35QmzCQoyJr8ks=" }, - "stream-browserify": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/stream-browserify/-/stream-browserify-2.0.1.tgz", - "integrity": "sha1-ZiZu5fm9uZQKTkUUyvtDu3Hlyds=", - "requires": { - "inherits": "~2.0.1", - "readable-stream": "^2.0.2" - }, - "dependencies": { - "readable-stream": { - "version": "2.3.6", - "resolved": "http://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", - "integrity": "sha1-sRwn2IuP8fvgcGQ8+UsMea4bCq8=", - "requires": { - "core-util-is": "~1.0.0", - "inherits": "~2.0.3", - "isarray": "~1.0.0", - "process-nextick-args": "~2.0.0", - "safe-buffer": "~5.1.1", - "string_decoder": "~1.1.1", - "util-deprecate": "~1.0.1" - } - }, - "string_decoder": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", - "integrity": "sha1-nPFhG6YmhdcDCunkujQUnDrwP8g=", - "requires": { - "safe-buffer": "~5.1.0" - } - } - } - }, "stream-buffers": { "version": "0.2.6", "resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-0.2.6.tgz", "integrity": "sha1-GBwI1bs2kARfaUAbmuanoM8zE/w=" }, - "stream-counter": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/stream-counter/-/stream-counter-1.0.0.tgz", - "integrity": "sha1-kc8lac5NxQYf6816yyY5SloRR1E=" - }, "stream-meter": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/stream-meter/-/stream-meter-1.0.4.tgz", @@ -5055,6 +4789,12 @@ "resolved": "https://registry.npmjs.org/stream-shift/-/stream-shift-1.0.0.tgz", "integrity": "sha1-1cdSgl5TZ+eG944Y5EXqIjoVWVI=" }, + "streamifier": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/streamifier/-/streamifier-0.1.1.tgz", + "integrity": "sha1-l+mNj6TRBdYqJpHR3AfoINuN/E8=", + "dev": true + }, "string-width": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz", @@ -5096,11 +4836,6 @@ "function-bind": "^1.1.1" } }, - "string_decoder": { - "version": "0.10.31", - "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz", - "integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ=" - }, "strip-ansi": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-5.2.0.tgz", @@ -5300,11 +5035,6 @@ "integrity": "sha512-qOebF53frne81cf0S9B41ByenJ3/IuH8yJKngAX35CmiZySA0khhkovshKK+jGCaMnVomla7gVlIcc3EvKPbTQ==", "dev": true }, - "tunnel-agent": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.2.0.tgz", - "integrity": "sha1-aFPCr7GyEJ5FYp5JK9419Fnqaeg=" - }, "tweetnacl": { "version": "0.14.5", "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz", diff --git a/services/filestore/package.json b/services/filestore/package.json index 14e35cd8a2..6f1dde0e8a 100644 --- a/services/filestore/package.json +++ b/services/filestore/package.json @@ -28,21 +28,16 @@ "fs-extra": "^1.0.0", "glob": "^7.1.6", "heapdump": "^0.3.2", - "knox": "~0.9.1", "logger-sharelatex": "^1.7.0", "metrics-sharelatex": "^2.2.0", "mocha": "5.2.0", - "node-transloadit": "0.0.4", "node-uuid": "~1.4.1", "pngcrush": "0.0.3", "range-parser": "^1.0.2", - "recluster": "^0.3.7", "request": "^2.88.0", "request-promise-native": "^1.0.8", - "response": "0.14.0", "rimraf": "2.2.8", "settings-sharelatex": "^1.1.0", - "stream-browserify": "^2.0.1", "stream-buffers": "~0.2.5", "stream-meter": "^1.0.4", "underscore": "~1.5.2" @@ -68,6 +63,7 @@ "prettier-eslint-cli": "^5.0.0", "sandboxed-module": "2.0.3", "sinon": "7.1.1", - "sinon-chai": "^3.3.0" + "sinon-chai": "^3.3.0", + "streamifier": "^0.1.1" } } diff --git a/services/filestore/test/acceptance/js/FilestoreApp.js b/services/filestore/test/acceptance/js/FilestoreApp.js index 718d53bcf8..20564e2d40 100644 --- a/services/filestore/test/acceptance/js/FilestoreApp.js +++ b/services/filestore/test/acceptance/js/FilestoreApp.js @@ -56,6 +56,7 @@ class FilestoreApp { } this.initing = false + this.persistor = require('../../../app/js/PersistorManager') } async waitForInit() { diff --git a/services/filestore/test/acceptance/js/FilestoreTests.js b/services/filestore/test/acceptance/js/FilestoreTests.js index d7dfbce57c..fd1baed474 100644 --- a/services/filestore/test/acceptance/js/FilestoreTests.js +++ b/services/filestore/test/acceptance/js/FilestoreTests.js @@ -11,6 +11,7 @@ const S3 = require('aws-sdk/clients/s3') const Stream = require('stream') const request = require('request') const { promisify } = require('util') +const streamifier = require('streamifier') chai.use(require('chai-as-promised')) const fsWriteFile = promisify(fs.writeFile) @@ -25,6 +26,20 @@ async function getMetric(filestoreUrl, metric) { return parseInt(found ? found[1] : 0) || 0 } +if (!process.env.AWS_ACCESS_KEY_ID) { + throw new Error('please provide credentials for the AWS S3 test server') +} + +function streamToString(stream) { + const chunks = [] + return new Promise((resolve, reject) => { + stream.on('data', chunk => chunks.push(chunk)) + stream.on('error', reject) + stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8'))) + stream.resume() + }) +} + // store settings for multiple backends, so that we can test each one. // fs will always be available - add others if they are configured const BackendSettings = { @@ -35,11 +50,8 @@ const BackendSettings = { public_files: Path.resolve(__dirname, '../../../public_files'), template_files: Path.resolve(__dirname, '../../../template_files') } - } -} - -if (process.env.AWS_ACCESS_KEY_ID) { - BackendSettings.S3Persistor = { + }, + S3Persistor: { backend: 's3', s3: { key: process.env.AWS_ACCESS_KEY_ID, @@ -52,6 +64,62 @@ if (process.env.AWS_ACCESS_KEY_ID) { template_files: process.env.AWS_S3_TEMPLATE_FILES_BUCKET_NAME, public_files: process.env.AWS_S3_PUBLIC_FILES_BUCKET_NAME } + }, + FallbackS3ToFSPersistor: { + backend: 's3', + s3: { + key: process.env.AWS_ACCESS_KEY_ID, + secret: process.env.AWS_SECRET_ACCESS_KEY, + endpoint: process.env.AWS_S3_ENDPOINT, + pathStyle: true + }, + stores: { + user_files: process.env.AWS_S3_USER_FILES_BUCKET_NAME, + template_files: process.env.AWS_S3_TEMPLATE_FILES_BUCKET_NAME, + public_files: process.env.AWS_S3_PUBLIC_FILES_BUCKET_NAME + }, + fallback: { + backend: 'fs', + buckets: { + [process.env.AWS_S3_USER_FILES_BUCKET_NAME]: Path.resolve( + __dirname, + '../../../user_files' + ), + [process.env.AWS_S3_PUBLIC_FILES_BUCKET_NAME]: Path.resolve( + __dirname, + '../../../public_files' + ), + [process.env.AWS_S3_TEMPLATE_FILES_BUCKET_NAME]: Path.resolve( + __dirname, + '../../../template_files' + ) + } + } + }, + FallbackFSToS3Persistor: { + backend: 'fs', + s3: { + key: process.env.AWS_ACCESS_KEY_ID, + secret: process.env.AWS_SECRET_ACCESS_KEY, + endpoint: process.env.AWS_S3_ENDPOINT, + pathStyle: true + }, + stores: { + user_files: Path.resolve(__dirname, '../../../user_files'), + public_files: Path.resolve(__dirname, '../../../public_files'), + template_files: Path.resolve(__dirname, '../../../template_files') + }, + fallback: { + backend: 's3', + buckets: { + [Path.resolve(__dirname, '../../../user_files')]: process.env + .AWS_S3_USER_FILES_BUCKET_NAME, + [Path.resolve(__dirname, '../../../public_files')]: process.env + .AWS_S3_PUBLIC_FILES_BUCKET_NAME, + [Path.resolve(__dirname, '../../../template_files')]: process.env + .AWS_S3_TEMPLATE_FILES_BUCKET_NAME + } + } } } @@ -63,7 +131,7 @@ describe('Filestore', function() { // redefine the test suite for every available backend Object.keys(BackendSettings).forEach(backend => { describe(backend, function() { - let app, previousEgress, previousIngress + let app, previousEgress, previousIngress, projectId before(async function() { // create the app with the relevant filestore settings @@ -84,6 +152,7 @@ describe('Filestore', function() { getMetric(filestoreUrl, 's3_ingress') ]) } + projectId = `acceptance_tests_${Math.random()}` }) it('should send a 200 for the status endpoint', async function() { @@ -100,23 +169,21 @@ describe('Filestore', function() { }) describe('with a file on the server', function() { - let fileId, fileUrl + let fileId, fileUrl, constantFileContent const localFileReadPath = '/tmp/filestore_acceptance_tests_file_read.txt' - const constantFileContent = [ - 'hello world', - `line 2 goes here ${Math.random()}`, - 'there are 3 lines in all' - ].join('\n') - - before(async function() { - await fsWriteFile(localFileReadPath, constantFileContent) - }) beforeEach(async function() { fileId = Math.random() - fileUrl = `${filestoreUrl}/project/acceptance_tests/file/${directoryName}%2F${fileId}` + fileUrl = `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileId}` + constantFileContent = [ + 'hello world', + `line 2 goes here ${Math.random()}`, + 'there are 3 lines in all' + ].join('\n') + + await fsWriteFile(localFileReadPath, constantFileContent) const writeStream = request.post(fileUrl) const readStream = fs.createReadStream(localFileReadPath) @@ -177,7 +244,7 @@ describe('Filestore', function() { }) it('should be able to copy files', async function() { - const newProjectID = 'acceptance_tests_copyied_project' + const newProjectID = `acceptance_tests_copied_project_${Math.random()}` const newFileId = Math.random() const newFileUrl = `${filestoreUrl}/project/${newProjectID}/file/${directoryName}%2F${newFileId}` const opts = { @@ -185,7 +252,7 @@ describe('Filestore', function() { uri: newFileUrl, json: { source: { - project_id: 'acceptance_tests', + project_id: projectId, file_id: `${directoryName}/${fileId}` } } @@ -198,6 +265,18 @@ describe('Filestore', function() { expect(response.body).to.equal(constantFileContent) }) + it('should be able to overwrite the file', async function() { + const newContent = `here is some different content, ${Math.random()}` + const writeStream = request.post(fileUrl) + const readStream = streamifier.createReadStream(newContent) + // hack to consume the result to ensure the http request has been fully processed + const resultStream = fs.createWriteStream('/dev/null') + await pipeline(readStream, writeStream, resultStream) + + const response = await rp.get(fileUrl) + expect(response.body).to.equal(newContent) + }) + if (backend === 'S3Persistor') { it('should record an egress metric for the upload', async function() { const metric = await getMetric(filestoreUrl, 's3_egress') @@ -227,7 +306,7 @@ describe('Filestore', function() { }) describe('with multiple files', function() { - let fileIds, fileUrls, project + let fileIds, fileUrls const directoryName = 'directory' const localFileReadPaths = [ '/tmp/filestore_acceptance_tests_file_read_1.txt', @@ -254,11 +333,10 @@ describe('Filestore', function() { }) beforeEach(async function() { - project = `acceptance_tests_${Math.random()}` fileIds = [Math.random(), Math.random()] fileUrls = [ - `${filestoreUrl}/project/${project}/file/${directoryName}%2F${fileIds[0]}`, - `${filestoreUrl}/project/${project}/file/${directoryName}%2F${fileIds[1]}` + `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileIds[0]}`, + `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileIds[1]}` ] const writeStreams = [ @@ -282,7 +360,7 @@ describe('Filestore', function() { it('should get the directory size', async function() { const response = await rp.get( - `${filestoreUrl}/project/${project}/size` + `${filestoreUrl}/project/${projectId}/size` ) expect(parseInt(JSON.parse(response.body)['total bytes'])).to.equal( constantFileContents[0].length + constantFileContents[1].length @@ -292,10 +370,10 @@ describe('Filestore', function() { if (backend === 'S3Persistor') { describe('with a file in a specific bucket', function() { - let constantFileContents, fileId, fileUrl, bucketName + let constantFileContent, fileId, fileUrl, bucketName beforeEach(async function() { - constantFileContents = `This is a file in a different S3 bucket ${Math.random()}` + constantFileContent = `This is a file in a different S3 bucket ${Math.random()}` fileId = Math.random().toString() bucketName = Math.random().toString() fileUrl = `${filestoreUrl}/bucket/${bucketName}/key/${fileId}` @@ -320,14 +398,368 @@ describe('Filestore', function() { .upload({ Bucket: bucketName, Key: fileId, - Body: constantFileContents + Body: constantFileContent }) .promise() }) it('should get the file from the specified bucket', async function() { const response = await rp.get(fileUrl) - expect(response.body).to.equal(constantFileContents) + expect(response.body).to.equal(constantFileContent) + }) + }) + } + + if (BackendSettings[backend].fallback) { + describe('with a fallback', function() { + async function uploadStringToPersistor( + persistor, + bucket, + key, + content + ) { + const fileStream = streamifier.createReadStream(content) + await persistor.promises.sendStream(bucket, key, fileStream) + } + + async function getStringFromPersistor(persistor, bucket, key) { + const stream = await persistor.promises.getFileStream( + bucket, + key, + {} + ) + return streamToString(stream) + } + + async function expectPersistorToHaveFile( + persistor, + bucket, + key, + content + ) { + const foundContent = await getStringFromPersistor( + persistor, + bucket, + key + ) + expect(foundContent).to.equal(content) + } + + async function expectPersistorNotToHaveFile(persistor, bucket, key) { + await expect( + getStringFromPersistor(persistor, bucket, key) + ).to.eventually.have.been.rejected.with.property( + 'name', + 'NotFoundError' + ) + } + + let constantFileContent, + fileId, + fileKey, + fileUrl, + bucket, + fallbackBucket + + beforeEach(function() { + constantFileContent = `This is yet more file content ${Math.random()}` + fileId = Math.random().toString() + fileKey = `${projectId}/${directoryName}/${fileId}` + fileUrl = `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileId}` + + bucket = Settings.filestore.stores.user_files + fallbackBucket = Settings.filestore.fallback.buckets[bucket] + }) + + describe('with a file in the fallback bucket', function() { + beforeEach(async function() { + await uploadStringToPersistor( + app.persistor.fallbackPersistor, + fallbackBucket, + fileKey, + constantFileContent + ) + }) + + it('should not find file in the primary', async function() { + await expectPersistorNotToHaveFile( + app.persistor.primaryPersistor, + bucket, + fileKey + ) + }) + + it('should find the file in the fallback', async function() { + await expectPersistorToHaveFile( + app.persistor.fallbackPersistor, + fallbackBucket, + fileKey, + constantFileContent + ) + }) + + describe('when copyOnMiss is disabled', function() { + beforeEach(function() { + Settings.filestore.fallback.copyOnMiss = false + }) + + it('should fetch the file', async function() { + const res = await rp.get(fileUrl) + expect(res.body).to.equal(constantFileContent) + }) + + it('should not copy the file to the primary', async function() { + await rp.get(fileUrl) + + await expectPersistorNotToHaveFile( + app.persistor.primaryPersistor, + bucket, + fileKey + ) + }) + }) + + describe('when copyOnMiss is enabled', function() { + beforeEach(function() { + Settings.filestore.fallback.copyOnMiss = true + }) + + it('should fetch the file', async function() { + const res = await rp.get(fileUrl) + expect(res.body).to.equal(constantFileContent) + }) + + it('copies the file to the primary', async function() { + await rp.get(fileUrl) + // wait for the file to copy in the background + await promisify(setTimeout)(1000) + + await expectPersistorToHaveFile( + app.persistor.primaryPersistor, + bucket, + fileKey, + constantFileContent + ) + }) + }) + + describe('when copying a file', function() { + let newFileId, newFileUrl, newFileKey, opts + + beforeEach(function() { + const newProjectID = `acceptance_tests_copied_project_${Math.random()}` + newFileId = Math.random() + newFileUrl = `${filestoreUrl}/project/${newProjectID}/file/${directoryName}%2F${newFileId}` + newFileKey = `${newProjectID}/${directoryName}/${newFileId}` + + opts = { + method: 'put', + uri: newFileUrl, + json: { + source: { + project_id: projectId, + file_id: `${directoryName}/${fileId}` + } + } + } + }) + + describe('when copyOnMiss is false', function() { + beforeEach(async function() { + Settings.filestore.fallback.copyOnMiss = false + + const response = await rp(opts) + expect(response.statusCode).to.equal(200) + }) + + it('should leave the old file in the old bucket', async function() { + await expectPersistorToHaveFile( + app.persistor.fallbackPersistor, + fallbackBucket, + fileKey, + constantFileContent + ) + }) + + it('should not create a new file in the old bucket', async function() { + await expectPersistorNotToHaveFile( + app.persistor.fallbackPersistor, + fallbackBucket, + newFileKey + ) + }) + + it('should create a new file in the new bucket', async function() { + await expectPersistorToHaveFile( + app.persistor.primaryPersistor, + bucket, + newFileKey, + constantFileContent + ) + }) + + it('should not copy the old file to the primary with the old key', async function() { + // wait for the file to copy in the background + await promisify(setTimeout)(1000) + + await expectPersistorNotToHaveFile( + app.persistor.primaryPersistor, + bucket, + fileKey + ) + }) + }) + + describe('when copyOnMiss is true', function() { + beforeEach(async function() { + Settings.filestore.fallback.copyOnMiss = true + + const response = await rp(opts) + expect(response.statusCode).to.equal(200) + }) + + it('should leave the old file in the old bucket', async function() { + await expectPersistorToHaveFile( + app.persistor.fallbackPersistor, + fallbackBucket, + fileKey, + constantFileContent + ) + }) + + it('should not create a new file in the old bucket', async function() { + await expectPersistorNotToHaveFile( + app.persistor.fallbackPersistor, + fallbackBucket, + newFileKey + ) + }) + + it('should create a new file in the new bucket', async function() { + await expectPersistorToHaveFile( + app.persistor.primaryPersistor, + bucket, + newFileKey, + constantFileContent + ) + }) + + it('should copy the old file to the primary with the old key', async function() { + // wait for the file to copy in the background + await promisify(setTimeout)(1000) + + await expectPersistorToHaveFile( + app.persistor.primaryPersistor, + bucket, + fileKey, + constantFileContent + ) + }) + }) + }) + }) + + describe('when sending a file', function() { + beforeEach(async function() { + const writeStream = request.post(fileUrl) + const readStream = streamifier.createReadStream( + constantFileContent + ) + // hack to consume the result to ensure the http request has been fully processed + const resultStream = fs.createWriteStream('/dev/null') + await pipeline(readStream, writeStream, resultStream) + }) + + it('should store the file on the primary', async function() { + await expectPersistorToHaveFile( + app.persistor.primaryPersistor, + bucket, + fileKey, + constantFileContent + ) + }) + + it('should not store the file on the fallback', async function() { + await expectPersistorNotToHaveFile( + app.persistor.fallbackPersistor, + fallbackBucket, + `${projectId}/${directoryName}/${fileId}` + ) + }) + }) + + describe('when deleting a file', function() { + describe('when the file exists on the primary', function() { + beforeEach(async function() { + await uploadStringToPersistor( + app.persistor.primaryPersistor, + bucket, + fileKey, + constantFileContent + ) + }) + + it('should delete the file', async function() { + const response = await rp.del(fileUrl) + expect(response.statusCode).to.equal(204) + await expect( + rp.get(fileUrl) + ).to.eventually.be.rejected.and.have.property('statusCode', 404) + }) + }) + + describe('when the file exists on the fallback', function() { + beforeEach(async function() { + await uploadStringToPersistor( + app.persistor.fallbackPersistor, + fallbackBucket, + fileKey, + constantFileContent + ) + }) + + it('should delete the file', async function() { + const response = await rp.del(fileUrl) + expect(response.statusCode).to.equal(204) + await expect( + rp.get(fileUrl) + ).to.eventually.be.rejected.and.have.property('statusCode', 404) + }) + }) + + describe('when the file exists on both the primary and the fallback', function() { + beforeEach(async function() { + await uploadStringToPersistor( + app.persistor.primaryPersistor, + bucket, + fileKey, + constantFileContent + ) + await uploadStringToPersistor( + app.persistor.fallbackPersistor, + fallbackBucket, + fileKey, + constantFileContent + ) + }) + + it('should delete the files', async function() { + const response = await rp.del(fileUrl) + expect(response.statusCode).to.equal(204) + await expect( + rp.get(fileUrl) + ).to.eventually.be.rejected.and.have.property('statusCode', 404) + }) + }) + + describe('when the file does not exist', function() { + it('should return return 204', async function() { + // S3 doesn't give us a 404 when the object doesn't exist, so to stay + // consistent we merrily return 204 ourselves here as well + const response = await rp.del(fileUrl) + expect(response.statusCode).to.equal(204) + }) + }) }) }) } @@ -341,7 +773,7 @@ describe('Filestore', function() { beforeEach(async function() { fileId = Math.random() - fileUrl = `${filestoreUrl}/project/acceptance_tests/file/${directoryName}%2F${fileId}` + fileUrl = `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileId}` const stat = await fsStat(localFileReadPath) localFileSize = stat.size const writeStream = request.post(fileUrl) diff --git a/services/filestore/test/unit/js/FSPersistorManagerTests.js b/services/filestore/test/unit/js/FSPersistorTests.js similarity index 69% rename from services/filestore/test/unit/js/FSPersistorManagerTests.js rename to services/filestore/test/unit/js/FSPersistorTests.js index 3b3b4bf417..0a09869bc0 100644 --- a/services/filestore/test/unit/js/FSPersistorManagerTests.js +++ b/services/filestore/test/unit/js/FSPersistorTests.js @@ -7,24 +7,37 @@ const Errors = require('../../../app/js/Errors') chai.use(require('sinon-chai')) chai.use(require('chai-as-promised')) -const modulePath = '../../../app/js/FSPersistorManager.js' +const modulePath = '../../../app/js/FSPersistor.js' -describe('FSPersistorManagerTests', function() { +describe('FSPersistorTests', function() { const stat = { size: 4, isFile: sinon.stub().returns(true) } const fd = 1234 - const readStream = 'readStream' const writeStream = 'writeStream' const remoteStream = 'remoteStream' const tempFile = '/tmp/potato.txt' const location = '/foo' const error = new Error('guru meditation error') + const md5 = 'ffffffff' const files = ['animals/wombat.tex', 'vegetables/potato.tex'] const globs = [`${location}/${files[0]}`, `${location}/${files[1]}`] const filteredFilenames = ['animals_wombat.tex', 'vegetables_potato.tex'] - let fs, rimraf, stream, LocalFileWriter, FSPersistorManager, glob + let fs, + rimraf, + stream, + LocalFileWriter, + FSPersistor, + glob, + readStream, + crypto, + Hash beforeEach(function() { + readStream = { + name: 'readStream', + on: sinon.stub().yields(), + pipe: sinon.stub() + } fs = { createReadStream: sinon.stub().returns(readStream), createWriteStream: sinon.stub().returns(writeStream), @@ -41,14 +54,26 @@ describe('FSPersistorManagerTests', function() { deleteFile: sinon.stub().resolves() } } - FSPersistorManager = SandboxedModule.require(modulePath, { + Hash = { + end: sinon.stub(), + read: sinon.stub().returns(md5), + setEncoding: sinon.stub() + } + crypto = { + createHash: sinon.stub().returns(Hash) + } + FSPersistor = SandboxedModule.require(modulePath, { requires: { './LocalFileWriter': LocalFileWriter, './Errors': Errors, fs, glob, rimraf, - stream + stream, + crypto, + // imported by PersistorHelper but otherwise unused here + 'stream-meter': {}, + 'logger-sharelatex': {} }, globals: { console } }) @@ -57,7 +82,7 @@ describe('FSPersistorManagerTests', function() { describe('sendFile', function() { const localFilesystemPath = '/path/to/local/file' it('should copy the file', async function() { - await FSPersistorManager.promises.sendFile( + await FSPersistor.promises.sendFile( location, files[0], localFilesystemPath @@ -72,33 +97,21 @@ describe('FSPersistorManagerTests', function() { it('should return an error if the file cannot be stored', async function() { stream.pipeline.yields(error) await expect( - FSPersistorManager.promises.sendFile( - location, - files[0], - localFilesystemPath - ) + FSPersistor.promises.sendFile(location, files[0], localFilesystemPath) ).to.eventually.be.rejected.and.have.property('cause', error) }) }) describe('sendStream', function() { it('should send the stream to LocalFileWriter', async function() { - await FSPersistorManager.promises.sendStream( - location, - files[0], - remoteStream - ) + await FSPersistor.promises.sendStream(location, files[0], remoteStream) expect(LocalFileWriter.promises.writeStream).to.have.been.calledWith( remoteStream ) }) it('should delete the temporary file', async function() { - await FSPersistorManager.promises.sendStream( - location, - files[0], - remoteStream - ) + await FSPersistor.promises.sendStream(location, files[0], remoteStream) expect(LocalFileWriter.promises.deleteFile).to.have.been.calledWith( tempFile ) @@ -107,30 +120,55 @@ describe('FSPersistorManagerTests', function() { it('should return the error from LocalFileWriter', async function() { LocalFileWriter.promises.writeStream.rejects(error) await expect( - FSPersistorManager.promises.sendStream(location, files[0], remoteStream) + FSPersistor.promises.sendStream(location, files[0], remoteStream) ).to.eventually.be.rejectedWith(error) }) it('should send the temporary file to the filestore', async function() { - await FSPersistorManager.promises.sendStream( - location, - files[0], - remoteStream - ) + await FSPersistor.promises.sendStream(location, files[0], remoteStream) expect(fs.createReadStream).to.have.been.calledWith(tempFile) }) + + describe('when the md5 hash does not match', function() { + it('should return a write error', async function() { + await expect( + FSPersistor.promises.sendStream( + location, + files[0], + remoteStream, + '00000000' + ) + ) + .to.eventually.be.rejected.and.be.an.instanceOf(Errors.WriteError) + .and.have.property('message', 'md5 hash mismatch') + }) + + it('deletes the copied file', async function() { + try { + await FSPersistor.promises.sendStream( + location, + files[0], + remoteStream, + '00000000' + ) + } catch (_) {} + expect(LocalFileWriter.promises.deleteFile).to.have.been.calledWith( + `${location}/${filteredFilenames[0]}` + ) + }) + }) }) describe('getFileStream', function() { it('should use correct file location', async function() { - await FSPersistorManager.promises.getFileStream(location, files[0], {}) + await FSPersistor.promises.getFileStream(location, files[0], {}) expect(fs.open).to.have.been.calledWith( `${location}/${filteredFilenames[0]}` ) }) it('should pass the options to createReadStream', async function() { - await FSPersistorManager.promises.getFileStream(location, files[0], { + await FSPersistor.promises.getFileStream(location, files[0], { start: 0, end: 8 }) @@ -146,18 +184,14 @@ describe('FSPersistorManagerTests', function() { err.code = 'ENOENT' fs.open.yields(err) - await expect( - FSPersistorManager.promises.getFileStream(location, files[0], {}) - ) + await expect(FSPersistor.promises.getFileStream(location, files[0], {})) .to.eventually.be.rejected.and.be.an.instanceOf(Errors.NotFoundError) .and.have.property('cause', err) }) it('should wrap any other error', async function() { fs.open.yields(error) - await expect( - FSPersistorManager.promises.getFileStream(location, files[0], {}) - ) + await expect(FSPersistor.promises.getFileStream(location, files[0], {})) .to.eventually.be.rejectedWith('failed to open file for streaming') .and.be.an.instanceOf(Errors.ReadError) .and.have.property('cause', error) @@ -181,18 +215,18 @@ describe('FSPersistorManagerTests', function() { it('should return the file size', async function() { expect( - await FSPersistorManager.promises.getFileSize(location, files[0]) + await FSPersistor.promises.getFileSize(location, files[0]) ).to.equal(size) }) it('should throw a NotFoundError if the file does not exist', async function() { await expect( - FSPersistorManager.promises.getFileSize(location, badFilename) + FSPersistor.promises.getFileSize(location, badFilename) ).to.eventually.be.rejected.and.be.an.instanceOf(Errors.NotFoundError) }) it('should wrap any other error', async function() { - await expect(FSPersistorManager.promises.getFileSize(location, 'raccoon')) + await expect(FSPersistor.promises.getFileSize(location, 'raccoon')) .to.eventually.be.rejected.and.be.an.instanceOf(Errors.ReadError) .and.have.property('cause', error) }) @@ -200,28 +234,28 @@ describe('FSPersistorManagerTests', function() { describe('copyFile', function() { it('Should open the source for reading', async function() { - await FSPersistorManager.promises.copyFile(location, files[0], files[1]) + await FSPersistor.promises.copyFile(location, files[0], files[1]) expect(fs.createReadStream).to.have.been.calledWith( `${location}/${filteredFilenames[0]}` ) }) it('Should open the target for writing', async function() { - await FSPersistorManager.promises.copyFile(location, files[0], files[1]) + await FSPersistor.promises.copyFile(location, files[0], files[1]) expect(fs.createWriteStream).to.have.been.calledWith( `${location}/${filteredFilenames[1]}` ) }) it('Should pipe the source to the target', async function() { - await FSPersistorManager.promises.copyFile(location, files[0], files[1]) + await FSPersistor.promises.copyFile(location, files[0], files[1]) expect(stream.pipeline).to.have.been.calledWith(readStream, writeStream) }) }) describe('deleteFile', function() { it('Should call unlink with correct options', async function() { - await FSPersistorManager.promises.deleteFile(location, files[0]) + await FSPersistor.promises.deleteFile(location, files[0]) expect(fs.unlink).to.have.been.calledWith( `${location}/${filteredFilenames[0]}` ) @@ -230,14 +264,14 @@ describe('FSPersistorManagerTests', function() { it('Should propagate the error', async function() { fs.unlink.yields(error) await expect( - FSPersistorManager.promises.deleteFile(location, files[0]) + FSPersistor.promises.deleteFile(location, files[0]) ).to.eventually.be.rejected.and.have.property('cause', error) }) }) describe('deleteDirectory', function() { it('Should call rmdir(rimraf) with correct options', async function() { - await FSPersistorManager.promises.deleteDirectory(location, files[0]) + await FSPersistor.promises.deleteDirectory(location, files[0]) expect(rimraf).to.have.been.calledWith( `${location}/${filteredFilenames[0]}` ) @@ -246,7 +280,7 @@ describe('FSPersistorManagerTests', function() { it('Should propagate the error', async function() { rimraf.yields(error) await expect( - FSPersistorManager.promises.deleteDirectory(location, files[0]) + FSPersistor.promises.deleteDirectory(location, files[0]) ).to.eventually.be.rejected.and.have.property('cause', error) }) }) @@ -266,7 +300,7 @@ describe('FSPersistorManagerTests', function() { }) it('Should call stat with correct options', async function() { - await FSPersistorManager.promises.checkIfFileExists(location, files[0]) + await FSPersistor.promises.checkIfFileExists(location, files[0]) expect(fs.stat).to.have.been.calledWith( `${location}/${filteredFilenames[0]}` ) @@ -274,23 +308,18 @@ describe('FSPersistorManagerTests', function() { it('Should return true for existing files', async function() { expect( - await FSPersistorManager.promises.checkIfFileExists(location, files[0]) + await FSPersistor.promises.checkIfFileExists(location, files[0]) ).to.equal(true) }) it('Should return false for non-existing files', async function() { expect( - await FSPersistorManager.promises.checkIfFileExists( - location, - badFilename - ) + await FSPersistor.promises.checkIfFileExists(location, badFilename) ).to.equal(false) }) it('should wrap the error if there is a problem', async function() { - await expect( - FSPersistorManager.promises.checkIfFileExists(location, 'llama') - ) + await expect(FSPersistor.promises.checkIfFileExists(location, 'llama')) .to.eventually.be.rejected.and.be.an.instanceOf(Errors.ReadError) .and.have.property('cause', error) }) @@ -299,9 +328,7 @@ describe('FSPersistorManagerTests', function() { describe('directorySize', function() { it('should wrap the error', async function() { glob.yields(error) - await expect( - FSPersistorManager.promises.directorySize(location, files[0]) - ) + await expect(FSPersistor.promises.directorySize(location, files[0])) .to.eventually.be.rejected.and.be.an.instanceOf(Errors.ReadError) .and.include({ cause: error }) .and.have.property('info') @@ -309,7 +336,7 @@ describe('FSPersistorManagerTests', function() { }) it('should filter the directory name', async function() { - await FSPersistorManager.promises.directorySize(location, files[0]) + await FSPersistor.promises.directorySize(location, files[0]) expect(glob).to.have.been.calledWith( `${location}/${filteredFilenames[0]}_*` ) @@ -317,7 +344,7 @@ describe('FSPersistorManagerTests', function() { it('should sum directory files size', async function() { expect( - await FSPersistorManager.promises.directorySize(location, files[0]) + await FSPersistor.promises.directorySize(location, files[0]) ).to.equal(stat.size * files.length) }) }) diff --git a/services/filestore/test/unit/js/MigrationPersistorTests.js b/services/filestore/test/unit/js/MigrationPersistorTests.js new file mode 100644 index 0000000000..db8401c78c --- /dev/null +++ b/services/filestore/test/unit/js/MigrationPersistorTests.js @@ -0,0 +1,519 @@ +const sinon = require('sinon') +const chai = require('chai') +const { expect } = chai +const modulePath = '../../../app/js/MigrationPersistor.js' +const SandboxedModule = require('sandboxed-module') + +const Errors = require('../../../app/js/Errors') + +// Not all methods are tested here, but a method with each type of wrapping has +// tests. Specifically, the following wrapping methods are tested here: +// getFileStream: _wrapFallbackMethod +// sendStream: forward-to-primary +// deleteFile: _wrapMethodOnBothPersistors +// copyFile: copyFileWithFallback + +describe('MigrationPersistorTests', function() { + const bucket = 'womBucket' + const fallbackBucket = 'bucKangaroo' + const key = 'monKey' + const destKey = 'donKey' + const genericError = new Error('guru meditation error') + const notFoundError = new Errors.NotFoundError('not found') + const size = 33 + const md5 = 'ffffffff' + + let Metrics, + Settings, + Logger, + Stream, + MigrationPersistor, + fileStream, + newPersistor + + beforeEach(function() { + fileStream = { + name: 'fileStream', + on: sinon + .stub() + .withArgs('end') + .yields(), + pipe: sinon.stub() + } + + newPersistor = function(hasFile) { + return { + promises: { + sendFile: sinon.stub().resolves(), + sendStream: sinon.stub().resolves(), + getFileStream: hasFile + ? sinon.stub().resolves(fileStream) + : sinon.stub().rejects(notFoundError), + deleteDirectory: sinon.stub().resolves(), + getFileSize: hasFile + ? sinon.stub().resolves(size) + : sinon.stub().rejects(notFoundError), + deleteFile: sinon.stub().resolves(), + copyFile: hasFile + ? sinon.stub().resolves() + : sinon.stub().rejects(notFoundError), + checkIfFileExists: sinon.stub().resolves(hasFile), + directorySize: hasFile + ? sinon.stub().resolves(size) + : sinon.stub().rejects(notFoundError), + getFileMd5Hash: hasFile + ? sinon.stub().resolves(md5) + : sinon.stub().rejects(notFoundError) + } + } + } + + Settings = { + filestore: { + fallback: { + buckets: { + [bucket]: fallbackBucket + } + } + } + } + + Metrics = { + inc: sinon.stub() + } + + Stream = { + pipeline: sinon.stub().yields(), + PassThrough: sinon.stub() + } + + Logger = { + warn: sinon.stub() + } + + MigrationPersistor = SandboxedModule.require(modulePath, { + requires: { + 'settings-sharelatex': Settings, + stream: Stream, + './Errors': Errors, + 'metrics-sharelatex': Metrics, + 'logger-sharelatex': Logger + }, + globals: { console } + }) + }) + + describe('getFileStream', function() { + const options = { wombat: 'potato' } + describe('when the primary persistor has the file', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor, response + beforeEach(async function() { + primaryPersistor = newPersistor(true) + fallbackPersistor = newPersistor(false) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + response = await migrationPersistor.promises.getFileStream( + bucket, + key, + options + ) + }) + + it('should return the file stream', function() { + expect(response).to.equal(fileStream) + }) + + it('should fetch the file from the primary persistor, with the correct options', function() { + expect( + primaryPersistor.promises.getFileStream + ).to.have.been.calledWithExactly(bucket, key, options) + }) + + it('should not query the fallback persistor', function() { + expect(fallbackPersistor.promises.getFileStream).not.to.have.been.called + }) + }) + + describe('when the fallback persistor has the file', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor, response + beforeEach(async function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(true) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + response = await migrationPersistor.promises.getFileStream( + bucket, + key, + options + ) + }) + + it('should return the file stream', function() { + expect(response).to.be.an.instanceOf(Stream.PassThrough) + }) + + it('should fetch the file from the primary persistor with the correct options', function() { + expect( + primaryPersistor.promises.getFileStream + ).to.have.been.calledWithExactly(bucket, key, options) + }) + + it('should fetch the file from the fallback persistor with the fallback bucket with the correct options', function() { + expect( + fallbackPersistor.promises.getFileStream + ).to.have.been.calledWithExactly(fallbackBucket, key, options) + }) + + it('should create one read stream', function() { + expect(fallbackPersistor.promises.getFileStream).to.have.been.calledOnce + }) + + it('should not send the file to the primary', function() { + expect(primaryPersistor.promises.sendStream).not.to.have.been.called + }) + }) + + describe('when the file should be copied to the primary', function() { + let primaryPersistor, + fallbackPersistor, + migrationPersistor, + returnedStream + beforeEach(async function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(true) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + Settings.filestore.fallback.copyOnMiss = true + returnedStream = await migrationPersistor.promises.getFileStream( + bucket, + key, + options + ) + }) + + it('should create one read stream', function() { + expect(fallbackPersistor.promises.getFileStream).to.have.been.calledOnce + }) + + it('should get the md5 hash from the source', function() { + expect( + fallbackPersistor.promises.getFileMd5Hash + ).to.have.been.calledWith(fallbackBucket, key) + }) + + it('should send a stream to the primary', function() { + expect( + primaryPersistor.promises.sendStream + ).to.have.been.calledWithExactly( + bucket, + key, + sinon.match.instanceOf(Stream.PassThrough), + md5 + ) + }) + + it('should send a stream to the client', function() { + expect(returnedStream).to.be.an.instanceOf(Stream.PassThrough) + }) + }) + + describe('when neither persistor has the file', function() { + it('rejects with a NotFoundError', async function() { + const migrationPersistor = MigrationPersistor( + newPersistor(false), + newPersistor(false) + ) + return expect( + migrationPersistor.promises.getFileStream(bucket, key) + ).to.eventually.be.rejected.and.be.an.instanceOf(Errors.NotFoundError) + }) + }) + + describe('when the primary persistor throws an unexpected error', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor, error + beforeEach(async function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(true) + primaryPersistor.promises.getFileStream = sinon + .stub() + .rejects(genericError) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + try { + await migrationPersistor.promises.getFileStream(bucket, key, options) + } catch (err) { + error = err + } + }) + + it('rejects with the error', function() { + expect(error).to.equal(genericError) + }) + + it('does not call the fallback', function() { + expect(fallbackPersistor.promises.getFileStream).not.to.have.been.called + }) + }) + + describe('when the fallback persistor throws an unexpected error', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor, error + beforeEach(async function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(false) + fallbackPersistor.promises.getFileStream = sinon + .stub() + .rejects(genericError) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + try { + await migrationPersistor.promises.getFileStream(bucket, key, options) + } catch (err) { + error = err + } + }) + + it('rejects with the error', function() { + expect(error).to.equal(genericError) + }) + + it('should have called the fallback', function() { + expect( + fallbackPersistor.promises.getFileStream + ).to.have.been.calledWith(fallbackBucket, key) + }) + }) + }) + + describe('sendStream', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor + beforeEach(function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(false) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + }) + + describe('when it works', function() { + beforeEach(async function() { + return migrationPersistor.promises.sendStream(bucket, key, fileStream) + }) + + it('should send the file to the primary persistor', function() { + expect( + primaryPersistor.promises.sendStream + ).to.have.been.calledWithExactly(bucket, key, fileStream) + }) + + it('should not send the file to the fallback persistor', function() { + expect(fallbackPersistor.promises.sendStream).not.to.have.been.called + }) + }) + + describe('when the primary persistor throws an error', function() { + it('returns the error', async function() { + primaryPersistor.promises.sendStream.rejects(notFoundError) + return expect( + migrationPersistor.promises.sendStream(bucket, key, fileStream) + ).to.eventually.be.rejected.and.be.an.instanceOf(Errors.NotFoundError) + }) + }) + }) + + describe('deleteFile', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor + beforeEach(function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(false) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + }) + + describe('when it works', function() { + beforeEach(async function() { + return migrationPersistor.promises.deleteFile(bucket, key) + }) + + it('should delete the file from the primary', function() { + expect( + primaryPersistor.promises.deleteFile + ).to.have.been.calledWithExactly(bucket, key) + }) + + it('should delete the file from the fallback', function() { + expect( + fallbackPersistor.promises.deleteFile + ).to.have.been.calledWithExactly(fallbackBucket, key) + }) + }) + + describe('when the primary persistor throws an error', function() { + let error + beforeEach(async function() { + primaryPersistor.promises.deleteFile.rejects(genericError) + try { + await migrationPersistor.promises.deleteFile(bucket, key) + } catch (err) { + error = err + } + }) + + it('should return the error', function() { + expect(error).to.equal(genericError) + }) + + it('should delete the file from the primary', function() { + expect( + primaryPersistor.promises.deleteFile + ).to.have.been.calledWithExactly(bucket, key) + }) + + it('should delete the file from the fallback', function() { + expect( + fallbackPersistor.promises.deleteFile + ).to.have.been.calledWithExactly(fallbackBucket, key) + }) + }) + + describe('when the fallback persistor throws an error', function() { + let error + beforeEach(async function() { + fallbackPersistor.promises.deleteFile.rejects(genericError) + try { + await migrationPersistor.promises.deleteFile(bucket, key) + } catch (err) { + error = err + } + }) + + it('should return the error', function() { + expect(error).to.equal(genericError) + }) + + it('should delete the file from the primary', function() { + expect( + primaryPersistor.promises.deleteFile + ).to.have.been.calledWithExactly(bucket, key) + }) + + it('should delete the file from the fallback', function() { + expect( + fallbackPersistor.promises.deleteFile + ).to.have.been.calledWithExactly(fallbackBucket, key) + }) + }) + }) + + describe('copyFile', function() { + describe('when the file exists on the primary', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor + beforeEach(async function() { + primaryPersistor = newPersistor(true) + fallbackPersistor = newPersistor(false) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + return migrationPersistor.promises.copyFile(bucket, key, destKey) + }) + + it('should call copyFile to copy the file', function() { + expect( + primaryPersistor.promises.copyFile + ).to.have.been.calledWithExactly(bucket, key, destKey) + }) + + it('should not try to read from the fallback', function() { + expect(fallbackPersistor.promises.getFileStream).not.to.have.been.called + }) + }) + + describe('when the file does not exist on the primary', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor + beforeEach(async function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(true) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + return migrationPersistor.promises.copyFile(bucket, key, destKey) + }) + + it('should call copyFile to copy the file', function() { + expect( + primaryPersistor.promises.copyFile + ).to.have.been.calledWithExactly(bucket, key, destKey) + }) + + it('should fetch the file from the fallback', function() { + expect( + fallbackPersistor.promises.getFileStream + ).not.to.have.been.calledWithExactly(fallbackBucket, key) + }) + + it('should get the md5 hash from the source', function() { + expect( + fallbackPersistor.promises.getFileMd5Hash + ).to.have.been.calledWith(fallbackBucket, key) + }) + + it('should send the file to the primary', function() { + expect( + primaryPersistor.promises.sendStream + ).to.have.been.calledWithExactly( + bucket, + destKey, + sinon.match.instanceOf(Stream.PassThrough), + md5 + ) + }) + }) + + describe('when the file does not exist on the fallback', function() { + let primaryPersistor, fallbackPersistor, migrationPersistor, error + beforeEach(async function() { + primaryPersistor = newPersistor(false) + fallbackPersistor = newPersistor(false) + migrationPersistor = MigrationPersistor( + primaryPersistor, + fallbackPersistor + ) + try { + await migrationPersistor.promises.copyFile(bucket, key, destKey) + } catch (err) { + error = err + } + }) + + it('should call copyFile to copy the file', function() { + expect( + primaryPersistor.promises.copyFile + ).to.have.been.calledWithExactly(bucket, key, destKey) + }) + + it('should fetch the file from the fallback', function() { + expect( + fallbackPersistor.promises.getFileStream + ).not.to.have.been.calledWithExactly(fallbackBucket, key) + }) + + it('should return a not-found error', function() { + expect(error).to.be.an.instanceOf(Errors.NotFoundError) + }) + }) + }) +}) diff --git a/services/filestore/test/unit/js/PersistorManagerTests.js b/services/filestore/test/unit/js/PersistorManagerTests.js index 0ecbb22078..cdc9de0f92 100644 --- a/services/filestore/test/unit/js/PersistorManagerTests.js +++ b/services/filestore/test/unit/js/PersistorManagerTests.js @@ -6,18 +6,14 @@ const SandboxedModule = require('sandboxed-module') const modulePath = '../../../app/js/PersistorManager.js' describe('PersistorManager', function() { - let PersistorManager, - FSPersistorManager, - S3PersistorManager, - settings, - requires + let PersistorManager, FSPersistor, S3Persistor, settings, requires beforeEach(function() { - FSPersistorManager = { - wrappedMethod: sinon.stub().returns('FSPersistorManager') + FSPersistor = { + wrappedMethod: sinon.stub().returns('FSPersistor') } - S3PersistorManager = { - wrappedMethod: sinon.stub().returns('S3PersistorManager') + S3Persistor = { + wrappedMethod: sinon.stub().returns('S3Persistor') } settings = { @@ -25,8 +21,8 @@ describe('PersistorManager', function() { } requires = { - './S3PersistorManager': S3PersistorManager, - './FSPersistorManager': FSPersistorManager, + './S3Persistor': S3Persistor, + './FSPersistor': FSPersistor, 'settings-sharelatex': settings, 'logger-sharelatex': { log() {}, @@ -40,7 +36,7 @@ describe('PersistorManager', function() { PersistorManager = SandboxedModule.require(modulePath, { requires }) expect(PersistorManager).to.respondTo('wrappedMethod') - expect(PersistorManager.wrappedMethod()).to.equal('S3PersistorManager') + expect(PersistorManager.wrappedMethod()).to.equal('S3Persistor') }) it("should implement the S3 wrapped method when 'aws-sdk' is configured", function() { @@ -48,7 +44,7 @@ describe('PersistorManager', function() { PersistorManager = SandboxedModule.require(modulePath, { requires }) expect(PersistorManager).to.respondTo('wrappedMethod') - expect(PersistorManager.wrappedMethod()).to.equal('S3PersistorManager') + expect(PersistorManager.wrappedMethod()).to.equal('S3Persistor') }) it('should implement the FS wrapped method when FS is configured', function() { @@ -56,7 +52,7 @@ describe('PersistorManager', function() { PersistorManager = SandboxedModule.require(modulePath, { requires }) expect(PersistorManager).to.respondTo('wrappedMethod') - expect(PersistorManager.wrappedMethod()).to.equal('FSPersistorManager') + expect(PersistorManager.wrappedMethod()).to.equal('FSPersistor') }) it('should throw an error when the backend is not configured', function() { diff --git a/services/filestore/test/unit/js/S3PersistorManagerTests.js b/services/filestore/test/unit/js/S3PersistorTests.js similarity index 78% rename from services/filestore/test/unit/js/S3PersistorManagerTests.js rename to services/filestore/test/unit/js/S3PersistorTests.js index daeac66d3f..9686deed5f 100644 --- a/services/filestore/test/unit/js/S3PersistorManagerTests.js +++ b/services/filestore/test/unit/js/S3PersistorTests.js @@ -1,12 +1,12 @@ const sinon = require('sinon') const chai = require('chai') const { expect } = chai -const modulePath = '../../../app/js/S3PersistorManager.js' +const modulePath = '../../../app/js/S3Persistor.js' const SandboxedModule = require('sandboxed-module') const Errors = require('../../../app/js/Errors') -describe('S3PersistorManagerTests', function() { +describe('S3PersistorTests', function() { const defaultS3Key = 'frog' const defaultS3Secret = 'prince' const defaultS3Credentials = { @@ -26,21 +26,26 @@ describe('S3PersistorManagerTests', function() { { Key: 'hippo', Size: 22 } ] const filesSize = 33 + const md5 = 'ffffffff00000000ffffffff00000000' let Metrics, + Logger, S3, Fs, Meter, MeteredStream, ReadStream, - S3PersistorManager, + Stream, + S3Persistor, S3Client, S3ReadStream, S3NotFoundError, S3AccessDeniedError, FileNotFoundError, EmptyPromise, - settings + settings, + Hash, + crypto beforeEach(function() { settings = { @@ -56,6 +61,10 @@ describe('S3PersistorManagerTests', function() { } } + Stream = { + pipeline: sinon.stub().yields() + } + EmptyPromise = { promise: sinon.stub().resolves() } @@ -65,7 +74,11 @@ describe('S3PersistorManagerTests', function() { } ReadStream = { - pipe: sinon.stub().returns('readStream') + pipe: sinon.stub().returns('readStream'), + on: sinon + .stub() + .withArgs('end') + .yields() } FileNotFoundError = new Error('File not found') @@ -76,6 +89,7 @@ describe('S3PersistorManagerTests', function() { } MeteredStream = { + type: 'metered', on: sinon.stub(), bytes: objectSize } @@ -90,7 +104,7 @@ describe('S3PersistorManagerTests', function() { S3ReadStream = { on: sinon.stub(), - pipe: sinon.stub().returns('s3Stream'), + pipe: sinon.stub(), removeListener: sinon.stub() } S3ReadStream.on.withArgs('readable').yields() @@ -100,7 +114,8 @@ describe('S3PersistorManagerTests', function() { }), headObject: sinon.stub().returns({ promise: sinon.stub().resolves({ - ContentLength: objectSize + ContentLength: objectSize, + ETag: md5 }) }), listObjects: sinon.stub().returns({ @@ -108,21 +123,39 @@ describe('S3PersistorManagerTests', function() { Contents: files }) }), - upload: sinon.stub().returns(EmptyPromise), + upload: sinon + .stub() + .returns({ promise: sinon.stub().resolves({ ETag: `"${md5}"` }) }), copyObject: sinon.stub().returns(EmptyPromise), deleteObject: sinon.stub().returns(EmptyPromise), deleteObjects: sinon.stub().returns(EmptyPromise) } S3 = sinon.stub().returns(S3Client) - S3PersistorManager = SandboxedModule.require(modulePath, { + Hash = { + end: sinon.stub(), + read: sinon.stub().returns(md5), + setEncoding: sinon.stub() + } + crypto = { + createHash: sinon.stub().returns(Hash) + } + + Logger = { + warn: sinon.stub() + } + + S3Persistor = SandboxedModule.require(modulePath, { requires: { 'aws-sdk/clients/s3': S3, 'settings-sharelatex': settings, + 'logger-sharelatex': Logger, './Errors': Errors, fs: Fs, 'stream-meter': Meter, - 'metrics-sharelatex': Metrics + stream: Stream, + 'metrics-sharelatex': Metrics, + crypto }, globals: { console } }) @@ -133,11 +166,11 @@ describe('S3PersistorManagerTests', function() { let stream beforeEach(async function() { - stream = await S3PersistorManager.promises.getFileStream(bucket, key) + stream = await S3Persistor.promises.getFileStream(bucket, key) }) - it('returns a stream', function() { - expect(stream).to.equal('s3Stream') + it('returns a metered stream', function() { + expect(stream).to.equal(MeteredStream) }) it('sets the AWS client up with credentials from settings', function() { @@ -152,7 +185,10 @@ describe('S3PersistorManagerTests', function() { }) it('pipes the stream through the meter', function() { - expect(S3ReadStream.pipe).to.have.been.calledWith(MeteredStream) + expect(Stream.pipeline).to.have.been.calledWith( + S3ReadStream, + MeteredStream + ) }) it('records an ingress metric', function() { @@ -164,14 +200,14 @@ describe('S3PersistorManagerTests', function() { let stream beforeEach(async function() { - stream = await S3PersistorManager.promises.getFileStream(bucket, key, { + stream = await S3Persistor.promises.getFileStream(bucket, key, { start: 5, end: 10 }) }) - it('returns a stream', function() { - expect(stream).to.equal('s3Stream') + it('returns a metered stream', function() { + expect(stream).to.equal(MeteredStream) }) it('passes the byte range on to S3', function() { @@ -201,11 +237,11 @@ describe('S3PersistorManagerTests', function() { auth_secret: alternativeSecret } - stream = await S3PersistorManager.promises.getFileStream(bucket, key) + stream = await S3Persistor.promises.getFileStream(bucket, key) }) - it('returns a stream', function() { - expect(stream).to.equal('s3Stream') + it('returns a metered stream', function() { + expect(stream).to.equal(MeteredStream) }) it('sets the AWS client up with the alternative credentials', function() { @@ -220,16 +256,13 @@ describe('S3PersistorManagerTests', function() { }) it('caches the credentials', async function() { - stream = await S3PersistorManager.promises.getFileStream(bucket, key) + stream = await S3Persistor.promises.getFileStream(bucket, key) expect(S3).to.have.been.calledOnceWith(alternativeS3Credentials) }) it('uses the default credentials for an unknown bucket', async function() { - stream = await S3PersistorManager.promises.getFileStream( - 'anotherBucket', - key - ) + stream = await S3Persistor.promises.getFileStream('anotherBucket', key) expect(S3).to.have.been.calledTwice expect(S3.firstCall).to.have.been.calledWith(alternativeS3Credentials) @@ -237,14 +270,8 @@ describe('S3PersistorManagerTests', function() { }) it('caches the default credentials', async function() { - stream = await S3PersistorManager.promises.getFileStream( - 'anotherBucket', - key - ) - stream = await S3PersistorManager.promises.getFileStream( - 'anotherBucket', - key - ) + stream = await S3Persistor.promises.getFileStream('anotherBucket', key) + stream = await S3Persistor.promises.getFileStream('anotherBucket', key) expect(S3).to.have.been.calledTwice expect(S3.firstCall).to.have.been.calledWith(alternativeS3Credentials) @@ -256,7 +283,7 @@ describe('S3PersistorManagerTests', function() { delete settings.filestore.s3.secret await expect( - S3PersistorManager.promises.getFileStream('anotherBucket', key) + S3Persistor.promises.getFileStream('anotherBucket', key) ).to.eventually.be.rejected.and.be.an.instanceOf(Errors.SettingsError) }) }) @@ -268,7 +295,7 @@ describe('S3PersistorManagerTests', function() { S3ReadStream.on = sinon.stub() S3ReadStream.on.withArgs('error').yields(S3NotFoundError) try { - stream = await S3PersistorManager.promises.getFileStream(bucket, key) + stream = await S3Persistor.promises.getFileStream(bucket, key) } catch (err) { error = err } @@ -282,12 +309,12 @@ describe('S3PersistorManagerTests', function() { expect(error).to.be.an.instanceOf(Errors.NotFoundError) }) - it('wraps the error from S3', function() { - expect(error.cause).to.equal(S3NotFoundError) + it('wraps the error', function() { + expect(error.cause).to.exist }) it('stores the bucket and key in the error', function() { - expect(error.info).to.deep.equal({ Bucket: bucket, Key: key }) + expect(error.info).to.include({ bucketName: bucket, key: key }) }) }) @@ -298,7 +325,7 @@ describe('S3PersistorManagerTests', function() { S3ReadStream.on = sinon.stub() S3ReadStream.on.withArgs('error').yields(S3AccessDeniedError) try { - stream = await S3PersistorManager.promises.getFileStream(bucket, key) + stream = await S3Persistor.promises.getFileStream(bucket, key) } catch (err) { error = err } @@ -312,12 +339,12 @@ describe('S3PersistorManagerTests', function() { expect(error).to.be.an.instanceOf(Errors.NotFoundError) }) - it('wraps the error from S3', function() { - expect(error.cause).to.equal(S3AccessDeniedError) + it('wraps the error', function() { + expect(error.cause).to.exist }) it('stores the bucket and key in the error', function() { - expect(error.info).to.deep.equal({ Bucket: bucket, Key: key }) + expect(error.info).to.include({ bucketName: bucket, key: key }) }) }) @@ -328,7 +355,7 @@ describe('S3PersistorManagerTests', function() { S3ReadStream.on = sinon.stub() S3ReadStream.on.withArgs('error').yields(genericError) try { - stream = await S3PersistorManager.promises.getFileStream(bucket, key) + stream = await S3Persistor.promises.getFileStream(bucket, key) } catch (err) { error = err } @@ -342,12 +369,12 @@ describe('S3PersistorManagerTests', function() { expect(error).to.be.an.instanceOf(Errors.ReadError) }) - it('wraps the error from S3', function() { - expect(error.cause).to.equal(genericError) + it('wraps the error', function() { + expect(error.cause).to.exist }) it('stores the bucket and key in the error', function() { - expect(error.info).to.deep.equal({ Bucket: bucket, Key: key }) + expect(error.info).to.include({ bucketName: bucket, key: key }) }) }) }) @@ -357,7 +384,7 @@ describe('S3PersistorManagerTests', function() { let size beforeEach(async function() { - size = await S3PersistorManager.promises.getFileSize(bucket, key) + size = await S3Persistor.promises.getFileSize(bucket, key) }) it('should return the object size', function() { @@ -380,7 +407,7 @@ describe('S3PersistorManagerTests', function() { promise: sinon.stub().rejects(S3NotFoundError) }) try { - await S3PersistorManager.promises.getFileSize(bucket, key) + await S3Persistor.promises.getFileSize(bucket, key) } catch (err) { error = err } @@ -403,7 +430,7 @@ describe('S3PersistorManagerTests', function() { promise: sinon.stub().rejects(genericError) }) try { - await S3PersistorManager.promises.getFileSize(bucket, key) + await S3Persistor.promises.getFileSize(bucket, key) } catch (err) { error = err } @@ -422,24 +449,62 @@ describe('S3PersistorManagerTests', function() { describe('sendStream', function() { describe('with valid parameters', function() { beforeEach(async function() { - return S3PersistorManager.promises.sendStream(bucket, key, ReadStream) + return S3Persistor.promises.sendStream(bucket, key, ReadStream) }) it('should upload the stream', function() { expect(S3Client.upload).to.have.been.calledWith({ Bucket: bucket, Key: key, - Body: 'readStream' + Body: MeteredStream }) }) it('should meter the stream', function() { - expect(ReadStream.pipe).to.have.been.calledWith(MeteredStream) + expect(Stream.pipeline).to.have.been.calledWith( + ReadStream, + MeteredStream + ) }) it('should record an egress metric', function() { expect(Metrics.count).to.have.been.calledWith('s3.egress', objectSize) }) + + it('calculates the md5 hash of the file', function() { + expect(Stream.pipeline).to.have.been.calledWith(ReadStream, Hash) + }) + }) + + describe('when a hash is supploed', function() { + beforeEach(async function() { + return S3Persistor.promises.sendStream( + bucket, + key, + ReadStream, + 'aaaaaaaabbbbbbbbaaaaaaaabbbbbbbb' + ) + }) + + it('should not calculate the md5 hash of the file', function() { + expect(Stream.pipeline).not.to.have.been.calledWith( + sinon.match.any, + Hash + ) + }) + + it('sends the hash in base64', function() { + expect(S3Client.upload).to.have.been.calledWith({ + Bucket: bucket, + Key: key, + Body: MeteredStream, + ContentMD5: 'qqqqqru7u7uqqqqqu7u7uw==' + }) + }) + + it('does not fetch the md5 hash of the uploaded file', function() { + expect(S3Client.headObject).not.to.have.been.called + }) }) describe('when the upload fails', function() { @@ -449,7 +514,7 @@ describe('S3PersistorManagerTests', function() { promise: sinon.stub().rejects(genericError) }) try { - await S3PersistorManager.promises.sendStream(bucket, key, ReadStream) + await S3Persistor.promises.sendStream(bucket, key, ReadStream) } catch (err) { error = err } @@ -464,7 +529,7 @@ describe('S3PersistorManagerTests', function() { describe('sendFile', function() { describe('with valid parameters', function() { beforeEach(async function() { - return S3PersistorManager.promises.sendFile(bucket, key, filename) + return S3Persistor.promises.sendFile(bucket, key, filename) }) it('should create a read stream for the file', function() { @@ -475,7 +540,7 @@ describe('S3PersistorManagerTests', function() { expect(S3Client.upload).to.have.been.calledWith({ Bucket: bucket, Key: key, - Body: 'readStream' + Body: MeteredStream }) }) }) @@ -486,7 +551,7 @@ describe('S3PersistorManagerTests', function() { beforeEach(async function() { Fs.createReadStream = sinon.stub().throws(FileNotFoundError) try { - await S3PersistorManager.promises.sendFile(bucket, key, filename) + await S3Persistor.promises.sendFile(bucket, key, filename) } catch (err) { error = err } @@ -507,7 +572,7 @@ describe('S3PersistorManagerTests', function() { beforeEach(async function() { Fs.createReadStream = sinon.stub().throws(genericError) try { - await S3PersistorManager.promises.sendFile(bucket, key, filename) + await S3Persistor.promises.sendFile(bucket, key, filename) } catch (err) { error = err } @@ -526,7 +591,7 @@ describe('S3PersistorManagerTests', function() { describe('copyFile', function() { describe('with valid parameters', function() { beforeEach(async function() { - return S3PersistorManager.promises.copyFile(bucket, key, destKey) + return S3Persistor.promises.copyFile(bucket, key, destKey) }) it('should copy the object', function() { @@ -546,7 +611,7 @@ describe('S3PersistorManagerTests', function() { promise: sinon.stub().rejects(S3NotFoundError) }) try { - await S3PersistorManager.promises.copyFile(bucket, key, destKey) + await S3Persistor.promises.copyFile(bucket, key, destKey) } catch (err) { error = err } @@ -561,7 +626,7 @@ describe('S3PersistorManagerTests', function() { describe('deleteFile', function() { describe('with valid parameters', function() { beforeEach(async function() { - return S3PersistorManager.promises.deleteFile(bucket, key) + return S3Persistor.promises.deleteFile(bucket, key) }) it('should delete the object', function() { @@ -580,7 +645,7 @@ describe('S3PersistorManagerTests', function() { promise: sinon.stub().rejects(S3NotFoundError) }) try { - await S3PersistorManager.promises.deleteFile(bucket, key) + await S3Persistor.promises.deleteFile(bucket, key) } catch (err) { error = err } @@ -595,7 +660,7 @@ describe('S3PersistorManagerTests', function() { describe('deleteDirectory', function() { describe('with valid parameters', function() { beforeEach(async function() { - return S3PersistorManager.promises.deleteDirectory(bucket, key) + return S3Persistor.promises.deleteDirectory(bucket, key) }) it('should list the objects in the directory', function() { @@ -621,7 +686,7 @@ describe('S3PersistorManagerTests', function() { S3Client.listObjects = sinon .stub() .returns({ promise: sinon.stub().resolves({ Contents: [] }) }) - return S3PersistorManager.promises.deleteDirectory(bucket, key) + return S3Persistor.promises.deleteDirectory(bucket, key) }) it('should list the objects in the directory', function() { @@ -644,7 +709,7 @@ describe('S3PersistorManagerTests', function() { .stub() .returns({ promise: sinon.stub().rejects(genericError) }) try { - await S3PersistorManager.promises.deleteDirectory(bucket, key) + await S3Persistor.promises.deleteDirectory(bucket, key) } catch (err) { error = err } @@ -671,7 +736,7 @@ describe('S3PersistorManagerTests', function() { .stub() .returns({ promise: sinon.stub().rejects(genericError) }) try { - await S3PersistorManager.promises.deleteDirectory(bucket, key) + await S3Persistor.promises.deleteDirectory(bucket, key) } catch (err) { error = err } @@ -692,7 +757,7 @@ describe('S3PersistorManagerTests', function() { let size beforeEach(async function() { - size = await S3PersistorManager.promises.directorySize(bucket, key) + size = await S3Persistor.promises.directorySize(bucket, key) }) it('should list the objects in the directory', function() { @@ -714,7 +779,7 @@ describe('S3PersistorManagerTests', function() { S3Client.listObjects = sinon .stub() .returns({ promise: sinon.stub().resolves({ Contents: [] }) }) - size = await S3PersistorManager.promises.directorySize(bucket, key) + size = await S3Persistor.promises.directorySize(bucket, key) }) it('should list the objects in the directory', function() { @@ -737,7 +802,7 @@ describe('S3PersistorManagerTests', function() { .stub() .returns({ promise: sinon.stub().rejects(genericError) }) try { - await S3PersistorManager.promises.directorySize(bucket, key) + await S3Persistor.promises.directorySize(bucket, key) } catch (err) { error = err } @@ -758,10 +823,7 @@ describe('S3PersistorManagerTests', function() { let exists beforeEach(async function() { - exists = await S3PersistorManager.promises.checkIfFileExists( - bucket, - key - ) + exists = await S3Persistor.promises.checkIfFileExists(bucket, key) }) it('should get the object header', function() { @@ -783,10 +845,7 @@ describe('S3PersistorManagerTests', function() { S3Client.headObject = sinon .stub() .returns({ promise: sinon.stub().rejects(S3NotFoundError) }) - exists = await S3PersistorManager.promises.checkIfFileExists( - bucket, - key - ) + exists = await S3Persistor.promises.checkIfFileExists(bucket, key) }) it('should get the object header', function() { @@ -809,7 +868,7 @@ describe('S3PersistorManagerTests', function() { .stub() .returns({ promise: sinon.stub().rejects(genericError) }) try { - await S3PersistorManager.promises.checkIfFileExists(bucket, key) + await S3Persistor.promises.checkIfFileExists(bucket, key) } catch (err) { error = err }