Add mechanisms to transfer files with md5-based integrity checks

Fix error in settings and tidy up tests

Remove unused variable declaration

Remove .only from tests and update eslint rules to catch it in future

Use  to catch errors more safely getting md5 hash

Avoid unnecessary call to S3 to get md5 response
This commit is contained in:
Simon Detheridge 2020-01-27 11:26:37 +00:00
parent 2625e03a31
commit b4b7fd226e
11 changed files with 535 additions and 97 deletions

View file

@ -23,7 +23,8 @@
"rules": {
// Swap the no-unused-expressions rule with a more chai-friendly one
"no-unused-expressions": 0,
"chai-friendly/no-unused-expressions": "error"
"chai-friendly/no-unused-expressions": "error",
"no-console": "error"
},
"overrides": [
{

View file

@ -1,6 +1,7 @@
const fs = require('fs')
const glob = require('glob')
const path = require('path')
const crypto = require('crypto')
const rimraf = require('rimraf')
const Stream = require('stream')
const { promisify, callbackify } = require('util')
@ -36,11 +37,22 @@ async function sendFile(location, target, source) {
}
}
async function sendStream(location, target, sourceStream) {
async function sendStream(location, target, sourceStream, sourceMd5) {
const fsPath = await LocalFileWriter.writeStream(sourceStream)
if (!sourceMd5) {
sourceMd5 = await _getFileMd5HashForPath(fsPath)
}
try {
await sendFile(location, target, fsPath)
const destMd5 = await getFileMd5Hash(location, target)
if (sourceMd5 !== destMd5) {
await LocalFileWriter.deleteFile(`${location}/${filterName(target)}`)
throw new WriteError({
message: 'md5 hash mismatch',
info: { sourceMd5, destMd5, location, target }
})
}
} finally {
await LocalFileWriter.deleteFile(fsPath)
}
@ -80,6 +92,31 @@ async function getFileSize(location, filename) {
}
}
async function getFileMd5Hash(location, filename) {
const fullPath = path.join(location, filterName(filename))
try {
return await _getFileMd5HashForPath(fullPath)
} catch (err) {
throw new ReadError({
message: 'unable to get md5 hash from file',
info: { location, filename }
}).withCause(err)
}
}
async function _getFileMd5HashForPath(fullPath) {
return new Promise((resolve, reject) => {
const readStream = fs.createReadStream(fullPath)
const hash = crypto.createHash('md5')
hash.setEncoding('hex')
readStream.on('end', () => {
hash.end()
resolve(hash.read())
})
pipeline(readStream, hash).catch(reject)
})
}
async function copyFile(location, fromName, toName) {
const filteredFromName = filterName(fromName)
const filteredToName = filterName(toName)
@ -202,6 +239,7 @@ module.exports = {
sendStream: callbackify(sendStream),
getFileStream: callbackify(getFileStream),
getFileSize: callbackify(getFileSize),
getFileMd5Hash: callbackify(getFileMd5Hash),
copyFile: callbackify(copyFile),
deleteFile: callbackify(deleteFile),
deleteDirectory: callbackify(deleteDirectory),
@ -212,6 +250,7 @@ module.exports = {
sendStream,
getFileStream,
getFileSize,
getFileMd5Hash,
copyFile,
deleteFile,
deleteDirectory,

View file

@ -1,8 +1,9 @@
const metrics = require('metrics-sharelatex')
const Settings = require('settings-sharelatex')
const logger = require('logger-sharelatex')
const Minipass = require('minipass')
const { callbackify } = require('util')
const { NotFoundError } = require('./Errors')
const { NotFoundError, WriteError } = require('./Errors')
// Persistor that wraps two other persistors. Talks to the 'primary' by default,
// but will fall back to an older persistor in the case of a not-found error.
@ -14,7 +15,7 @@ const { NotFoundError } = require('./Errors')
// e.g.
// Settings.filestore.fallback.buckets = {
// myBucketOnS3: 'myBucketOnGCS'
// }s
// }
module.exports = function(primary, fallback) {
function _wrapMethodOnBothPersistors(method) {
@ -40,10 +41,7 @@ module.exports = function(primary, fallback) {
}
function _getFallbackBucket(bucket) {
return (
Settings.filestore.fallback.buckets &&
Settings.filestore.fallback.buckets[bucket]
)
return Settings.filestore.fallback.buckets[bucket]
}
function _wrapFallbackMethod(method, enableCopy = true) {
@ -68,20 +66,130 @@ module.exports = function(primary, fallback) {
}
}
async function _copyFileFromFallback(
async function _getFileStreamAndCopyIfRequired(bucketName, key, opts) {
const shouldCopy =
Settings.filestore.fallback.copyOnMiss && !opts.start && !opts.end
try {
return await primary.promises.getFileStream(bucketName, key, opts)
} catch (err) {
if (err instanceof NotFoundError) {
const fallbackBucket = _getFallbackBucket(bucketName)
if (shouldCopy) {
return _copyFileFromFallback(
fallbackBucket,
bucketName,
key,
key,
true
)
} else {
return fallback.promises.getFileStream(fallbackBucket, key, opts)
}
}
throw err
}
}
async function _copyFromFallbackStreamAndVerify(
stream,
sourceBucket,
destBucket,
sourceKey,
destKey
) {
try {
let sourceMd5
try {
sourceMd5 = await fallback.promises.getFileMd5Hash(
sourceBucket,
sourceKey
)
} catch (err) {
logger.warn(err, 'error getting md5 hash from fallback persistor')
}
await primary.promises.sendStream(destBucket, destKey, stream, sourceMd5)
} catch (err) {
let error = err
metrics.inc('fallback.copy.failure')
try {
await primary.promises.deleteFile(destBucket, destKey)
} catch (err) {
error = new WriteError({
message: 'unable to clean up destination copy artifact',
info: {
destBucket,
destKey
}
}).withCause(err)
}
error = new WriteError({
message: 'unable to copy file to destination persistor',
info: {
sourceBucket,
destBucket,
sourceKey,
destKey
}
}).withCause(error)
logger.warn({ error }, 'failed to copy file from fallback')
throw error
}
}
async function _copyFileFromFallback(
sourceBucket,
destBucket,
sourceKey,
destKey,
returnStream = false
) {
metrics.inc('fallback.copy')
const sourceStream = await fallback.promises.getFileStream(
sourceBucket,
sourceKey,
{}
)
await primary.promises.sendStream(destBucket, destKey, sourceStream)
metrics.inc('fallback.copy')
if (!returnStream) {
return _copyFromFallbackStreamAndVerify(
sourceStream,
sourceBucket,
destBucket,
sourceKey,
destKey
)
}
const tee = new Minipass()
const clientStream = new Minipass()
const copyStream = new Minipass()
tee.pipe(clientStream)
tee.pipe(copyStream)
// copy the file in the background
_copyFromFallbackStreamAndVerify(
copyStream,
sourceBucket,
destBucket,
sourceKey,
destKey
).catch(
// the error handler in this method will log a metric and a warning, so
// we don't need to do anything extra here, but catching it will prevent
// unhandled promise rejection warnings
() => {}
)
// start piping the source stream into the tee after everything is set up,
// otherwise one stream may consume bytes that don't arrive at the other
sourceStream.pipe(tee)
return clientStream
}
return {
@ -89,7 +197,8 @@ module.exports = function(primary, fallback) {
fallbackPersistor: fallback,
sendFile: primary.sendFile,
sendStream: primary.sendStream,
getFileStream: callbackify(_wrapFallbackMethod('getFileStream')),
getFileStream: callbackify(_getFileStreamAndCopyIfRequired),
getFileMd5Hash: callbackify(_wrapFallbackMethod('getFileMd5Hash')),
deleteDirectory: callbackify(
_wrapMethodOnBothPersistors('deleteDirectory')
),
@ -97,17 +206,18 @@ module.exports = function(primary, fallback) {
deleteFile: callbackify(_wrapMethodOnBothPersistors('deleteFile')),
copyFile: callbackify(copyFileWithFallback),
checkIfFileExists: callbackify(_wrapFallbackMethod('checkIfFileExists')),
directorySize: callbackify(_wrapFallbackMethod('directorySize', false)),
directorySize: callbackify(_wrapFallbackMethod('directorySize')),
promises: {
sendFile: primary.promises.sendFile,
sendStream: primary.promises.sendStream,
getFileStream: _wrapFallbackMethod('getFileStream'),
getFileStream: _getFileStreamAndCopyIfRequired,
getFileMd5Hash: _wrapFallbackMethod('getFileMd5Hash'),
deleteDirectory: _wrapMethodOnBothPersistors('deleteDirectory'),
getFileSize: _wrapFallbackMethod('getFileSize'),
deleteFile: _wrapMethodOnBothPersistors('deleteFile'),
copyFile: copyFileWithFallback,
checkIfFileExists: _wrapFallbackMethod('checkIfFileExists'),
directorySize: _wrapFallbackMethod('directorySize', false)
directorySize: _wrapFallbackMethod('directorySize')
}
}
}

View file

@ -5,8 +5,11 @@ https.globalAgent.maxSockets = 300
const settings = require('settings-sharelatex')
const metrics = require('metrics-sharelatex')
const logger = require('logger-sharelatex')
const Minipass = require('minipass')
const meter = require('stream-meter')
const crypto = require('crypto')
const fs = require('fs')
const S3 = require('aws-sdk/clients/s3')
const { URL } = require('url')
@ -22,6 +25,7 @@ module.exports = {
sendFile: callbackify(sendFile),
sendStream: callbackify(sendStream),
getFileStream: callbackify(getFileStream),
getFileMd5Hash: callbackify(getFileMd5Hash),
deleteDirectory: callbackify(deleteDirectory),
getFileSize: callbackify(getFileSize),
deleteFile: callbackify(deleteFile),
@ -32,6 +36,7 @@ module.exports = {
sendFile,
sendStream,
getFileStream,
getFileMd5Hash,
deleteDirectory,
getFileSize,
deleteFile,
@ -41,6 +46,10 @@ module.exports = {
}
}
function hexToBase64(hex) {
return Buffer.from(hex, 'hex').toString('base64')
}
async function sendFile(bucketName, key, fsPath) {
let readStream
try {
@ -56,20 +65,79 @@ async function sendFile(bucketName, key, fsPath) {
return sendStream(bucketName, key, readStream)
}
async function sendStream(bucketName, key, readStream) {
async function sendStream(bucketName, key, readStream, sourceMd5) {
try {
// if there is no supplied md5 hash, we calculate the hash as the data passes through
const passthroughStream = new Minipass()
let hashPromise
let b64Hash
if (sourceMd5) {
b64Hash = hexToBase64(sourceMd5)
} else {
const hash = crypto.createHash('md5')
hash.setEncoding('hex')
passthroughStream.pipe(hash)
hashPromise = new Promise((resolve, reject) => {
passthroughStream.on('end', () => {
hash.end()
resolve(hash.read())
})
passthroughStream.on('error', err => {
reject(err)
})
})
}
const meteredStream = meter()
passthroughStream.pipe(meteredStream)
meteredStream.on('finish', () => {
metrics.count('s3.egress', meteredStream.bytes)
})
await _getClientForBucket(bucketName)
.upload({
Bucket: bucketName,
Key: key,
Body: readStream.pipe(meteredStream)
})
// pipe the readstream through minipass, which can write to both the metered
// stream (which goes on to S3) and the md5 generator if necessary
// - we do this last so that a listener streams does not consume data meant
// for both destinations
readStream.pipe(passthroughStream)
// if we have an md5 hash, pass this to S3 to verify the upload
const uploadOptions = {
Bucket: bucketName,
Key: key,
Body: meteredStream
}
if (b64Hash) {
uploadOptions.ContentMD5 = b64Hash
}
const response = await _getClientForBucket(bucketName)
.upload(uploadOptions)
.promise()
const destMd5 = _md5FromResponse(response)
// if we didn't have an md5 hash, compare our computed one with S3's
if (hashPromise) {
sourceMd5 = await hashPromise
if (sourceMd5 !== destMd5) {
try {
await deleteFile(bucketName, key)
} catch (err) {
logger.warn(err, 'error deleting file for invalid upload')
}
throw new WriteError({
message: 'source and destination hashes do not match',
info: {
sourceMd5,
destMd5,
bucketName,
key
}
})
}
}
} catch (err) {
throw _wrapError(
err,
@ -167,6 +235,23 @@ async function getFileSize(bucketName, key) {
}
}
async function getFileMd5Hash(bucketName, key) {
try {
const response = await _getClientForBucket(bucketName)
.headObject({ Bucket: bucketName, Key: key })
.promise()
const md5 = _md5FromResponse(response)
return md5
} catch (err) {
throw _wrapError(
err,
'error getting hash of s3 object',
{ bucketName, key },
ReadError
)
}
}
async function deleteFile(bucketName, key) {
try {
await _getClientForBucket(bucketName)
@ -314,3 +399,18 @@ function _buildClientOptions(bucketCredentials) {
return options
}
function _md5FromResponse(response) {
const md5 = (response.ETag || '').replace(/[ "]/g, '')
if (!md5.match(/^[a-f0-9]{32}$/)) {
throw new ReadError({
message: 's3 etag not in md5-hash format',
info: {
md5,
eTag: response.ETag
}
})
}
return md5
}

View file

@ -17,8 +17,8 @@ unless process.env['BACKEND']?
else
process.env['BACKEND'] = "fs"
process.env['USER_FILES_BUCKET_NAME'] = Path.resolve(__dirname + "/../user_files")
process.env['TEMPLATE_FILES_BUCKET_NAME'] = Path.resolve(__dirname + "/../public_files")
process.env['PUBLIC_FILES_BUCKET_NAME'] = Path.resolve(__dirname + "/../template_files")
process.env['TEMPLATE_FILES_BUCKET_NAME'] = Path.resolve(__dirname + "/../template_files")
process.env['PUBLIC_FILES_BUCKET_NAME'] = Path.resolve(__dirname + "/../public_files")
settings =
internal:
@ -51,8 +51,8 @@ settings =
backend: process.env['FALLBACK_BACKEND']
# mapping of bucket names on the fallback, to bucket names on the primary.
# e.g. { myS3UserFilesBucketName: 'myGoogleUserFilesBucketName' }
buckets: JSON.parse process.env['FALLBACK_BUCKET_MAPPING'] if process.env['FALLBACK_BUCKET_MAPPING']?
copyOnMiss: if process.env['COPY_ON_MISS'] == 'true' then true else false
buckets: JSON.parse(process.env['FALLBACK_BUCKET_MAPPING'] || '{}')
copyOnMiss: process.env['COPY_ON_MISS'] == 'true'
path:
uploadFolder: Path.resolve(__dirname + "/../uploads")

View file

@ -3129,6 +3129,21 @@
"resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz",
"integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0="
},
"minipass": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/minipass/-/minipass-3.1.1.tgz",
"integrity": "sha512-UFqVihv6PQgwj8/yTGvl9kPz7xIAY+R5z6XYjRInD3Gk3qx6QGSD6zEcpeG4Dy/lQnv1J6zv8ejV90hyYIKf3w==",
"requires": {
"yallist": "^4.0.0"
},
"dependencies": {
"yallist": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
"integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
}
}
},
"mkdirp": {
"version": "0.5.1",
"resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz",

View file

@ -31,6 +31,7 @@
"knox": "~0.9.1",
"logger-sharelatex": "^1.7.0",
"metrics-sharelatex": "^2.2.0",
"minipass": "^3.1.1",
"mocha": "5.2.0",
"node-transloadit": "0.0.4",
"node-uuid": "~1.4.1",

View file

@ -84,11 +84,11 @@ const BackendSettings = {
__dirname,
'../../../user_files'
),
[process.env.AWS_S3_TEMPLATE_FILES_BUCKET_NAME]: Path.resolve(
[process.env.AWS_S3_PUBLIC_FILES_BUCKET_NAME]: Path.resolve(
__dirname,
'../../../public_files'
),
[process.env.AWS_S3_PUBLIC_FILES_BUCKET_NAME]: Path.resolve(
[process.env.AWS_S3_TEMPLATE_FILES_BUCKET_NAME]: Path.resolve(
__dirname,
'../../../template_files'
)
@ -114,9 +114,9 @@ const BackendSettings = {
[Path.resolve(__dirname, '../../../user_files')]: process.env
.AWS_S3_USER_FILES_BUCKET_NAME,
[Path.resolve(__dirname, '../../../public_files')]: process.env
.AWS_S3_TEMPLATE_FILES_BUCKET_NAME,
.AWS_S3_PUBLIC_FILES_BUCKET_NAME,
[Path.resolve(__dirname, '../../../template_files')]: process.env
.AWS_S3_PUBLIC_FILES_BUCKET_NAME
.AWS_S3_TEMPLATE_FILES_BUCKET_NAME
}
}
}
@ -130,7 +130,7 @@ describe('Filestore', function() {
// redefine the test suite for every available backend
Object.keys(BackendSettings).forEach(backend => {
describe(backend, function() {
let app, previousEgress, previousIngress
let app, previousEgress, previousIngress, projectId
before(async function() {
// create the app with the relevant filestore settings
@ -151,6 +151,7 @@ describe('Filestore', function() {
getMetric(filestoreUrl, 's3_ingress')
])
}
projectId = `acceptance_tests_${Math.random()}`
})
it('should send a 200 for the status endpoint', async function() {
@ -174,7 +175,7 @@ describe('Filestore', function() {
beforeEach(async function() {
fileId = Math.random()
fileUrl = `${filestoreUrl}/project/acceptance_tests/file/${directoryName}%2F${fileId}`
fileUrl = `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileId}`
constantFileContent = [
'hello world',
`line 2 goes here ${Math.random()}`,
@ -242,7 +243,7 @@ describe('Filestore', function() {
})
it('should be able to copy files', async function() {
const newProjectID = 'acceptance_tests_copied_project'
const newProjectID = `acceptance_tests_copied_project_${Math.random()}`
const newFileId = Math.random()
const newFileUrl = `${filestoreUrl}/project/${newProjectID}/file/${directoryName}%2F${newFileId}`
const opts = {
@ -250,7 +251,7 @@ describe('Filestore', function() {
uri: newFileUrl,
json: {
source: {
project_id: 'acceptance_tests',
project_id: projectId,
file_id: `${directoryName}/${fileId}`
}
}
@ -304,7 +305,7 @@ describe('Filestore', function() {
})
describe('with multiple files', function() {
let fileIds, fileUrls, project
let fileIds, fileUrls
const directoryName = 'directory'
const localFileReadPaths = [
'/tmp/filestore_acceptance_tests_file_read_1.txt',
@ -331,11 +332,10 @@ describe('Filestore', function() {
})
beforeEach(async function() {
project = `acceptance_tests_${Math.random()}`
fileIds = [Math.random(), Math.random()]
fileUrls = [
`${filestoreUrl}/project/${project}/file/${directoryName}%2F${fileIds[0]}`,
`${filestoreUrl}/project/${project}/file/${directoryName}%2F${fileIds[1]}`
`${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileIds[0]}`,
`${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileIds[1]}`
]
const writeStreams = [
@ -359,7 +359,7 @@ describe('Filestore', function() {
it('should get the directory size', async function() {
const response = await rp.get(
`${filestoreUrl}/project/${project}/size`
`${filestoreUrl}/project/${projectId}/size`
)
expect(parseInt(JSON.parse(response.body)['total bytes'])).to.equal(
constantFileContents[0].length + constantFileContents[1].length
@ -459,7 +459,6 @@ describe('Filestore', function() {
fileUrl,
bucket,
fallbackBucket
const projectId = 'acceptance_tests'
beforeEach(function() {
constantFileContent = `This is yet more file content ${Math.random()}`
@ -503,14 +502,20 @@ describe('Filestore', function() {
expect(res.body).to.equal(constantFileContent)
})
it('should not copy the file to the primary', async function() {
await rp.get(fileUrl)
describe('when copyOnMiss is disabled', function() {
beforeEach(function() {
Settings.filestore.fallback.copyOnMiss = false
})
await expectPersistorNotToHaveFile(
app.persistor.primaryPersistor,
bucket,
fileKey
)
it('should not copy the file to the primary', async function() {
await rp.get(fileUrl)
await expectPersistorNotToHaveFile(
app.persistor.primaryPersistor,
bucket,
fileKey
)
})
})
describe('when copyOnMiss is enabled', function() {
@ -534,9 +539,9 @@ describe('Filestore', function() {
describe('when copying a file', function() {
let newFileId, newFileUrl, newFileKey
const newProjectID = 'acceptance_tests_copied_project'
beforeEach(async function() {
const newProjectID = `acceptance_tests_copied_project_${Math.random()}`
newFileId = Math.random()
newFileUrl = `${filestoreUrl}/project/${newProjectID}/file/${directoryName}%2F${newFileId}`
newFileKey = `${newProjectID}/${directoryName}/${newFileId}`
@ -546,7 +551,7 @@ describe('Filestore', function() {
uri: newFileUrl,
json: {
source: {
project_id: 'acceptance_tests',
project_id: projectId,
file_id: `${directoryName}/${fileId}`
}
}
@ -616,7 +621,7 @@ describe('Filestore', function() {
await expectPersistorNotToHaveFile(
app.persistor.fallbackPersistor,
fallbackBucket,
`acceptance_tests/${directoryName}/${fileId}`
`${projectId}/${directoryName}/${fileId}`
)
})
})
@ -706,7 +711,7 @@ describe('Filestore', function() {
beforeEach(async function() {
fileId = Math.random()
fileUrl = `${filestoreUrl}/project/acceptance_tests/file/${directoryName}%2F${fileId}`
fileUrl = `${filestoreUrl}/project/${projectId}/file/${directoryName}%2F${fileId}`
const stat = await fsStat(localFileReadPath)
localFileSize = stat.size
const writeStream = request.post(fileUrl)

View file

@ -12,19 +12,32 @@ const modulePath = '../../../app/js/FSPersistor.js'
describe('FSPersistorTests', function() {
const stat = { size: 4, isFile: sinon.stub().returns(true) }
const fd = 1234
const readStream = 'readStream'
const writeStream = 'writeStream'
const remoteStream = 'remoteStream'
const tempFile = '/tmp/potato.txt'
const location = '/foo'
const error = new Error('guru meditation error')
const md5 = 'ffffffff'
const files = ['animals/wombat.tex', 'vegetables/potato.tex']
const globs = [`${location}/${files[0]}`, `${location}/${files[1]}`]
const filteredFilenames = ['animals_wombat.tex', 'vegetables_potato.tex']
let fs, rimraf, stream, LocalFileWriter, FSPersistor, glob
let fs,
rimraf,
stream,
LocalFileWriter,
FSPersistor,
glob,
readStream,
crypto,
Hash
beforeEach(function() {
readStream = {
name: 'readStream',
on: sinon.stub().yields(),
pipe: sinon.stub()
}
fs = {
createReadStream: sinon.stub().returns(readStream),
createWriteStream: sinon.stub().returns(writeStream),
@ -41,6 +54,14 @@ describe('FSPersistorTests', function() {
deleteFile: sinon.stub().resolves()
}
}
Hash = {
end: sinon.stub(),
read: sinon.stub().returns(md5),
setEncoding: sinon.stub()
}
crypto = {
createHash: sinon.stub().returns(Hash)
}
FSPersistor = SandboxedModule.require(modulePath, {
requires: {
'./LocalFileWriter': LocalFileWriter,
@ -48,7 +69,8 @@ describe('FSPersistorTests', function() {
fs,
glob,
rimraf,
stream
stream,
crypto
},
globals: { console }
})
@ -103,6 +125,35 @@ describe('FSPersistorTests', function() {
await FSPersistor.promises.sendStream(location, files[0], remoteStream)
expect(fs.createReadStream).to.have.been.calledWith(tempFile)
})
describe('when the md5 hash does not match', function() {
it('should return a write error', async function() {
await expect(
FSPersistor.promises.sendStream(
location,
files[0],
remoteStream,
'00000000'
)
)
.to.eventually.be.rejected.and.be.an.instanceOf(Errors.WriteError)
.and.have.property('message', 'md5 hash mismatch')
})
it('deletes the copied file', async function() {
try {
await FSPersistor.promises.sendStream(
location,
files[0],
remoteStream,
'00000000'
)
} catch (_) {}
expect(LocalFileWriter.promises.deleteFile).to.have.been.calledWith(
`${location}/${filteredFilenames[0]}`
)
})
})
})
describe('getFileStream', function() {

View file

@ -21,35 +21,53 @@ describe('MigrationPersistorTests', function() {
const genericError = new Error('guru meditation error')
const notFoundError = new Errors.NotFoundError('not found')
const size = 33
const fileStream = 'fileStream'
const md5 = 'ffffffff'
function newPersistor(hasFile) {
return {
promises: {
sendFile: sinon.stub().resolves(),
sendStream: sinon.stub().resolves(),
getFileStream: hasFile
? sinon.stub().resolves(fileStream)
: sinon.stub().rejects(notFoundError),
deleteDirectory: sinon.stub().resolves(),
getFileSize: hasFile
? sinon.stub().resolves(size)
: sinon.stub().rejects(notFoundError),
deleteFile: sinon.stub().resolves(),
copyFile: hasFile
? sinon.stub().resolves()
: sinon.stub().rejects(notFoundError),
checkIfFileExists: sinon.stub().resolves(hasFile),
directorySize: hasFile
? sinon.stub().resolves(size)
: sinon.stub().rejects(notFoundError)
}
}
}
let Metrics, Settings, Logger, MigrationPersistor
let Metrics,
Settings,
Logger,
MigrationPersistor,
Minipass,
fileStream,
newPersistor
beforeEach(function() {
fileStream = {
name: 'fileStream',
on: sinon
.stub()
.withArgs('end')
.yields(),
pipe: sinon.stub()
}
newPersistor = function(hasFile) {
return {
promises: {
sendFile: sinon.stub().resolves(),
sendStream: sinon.stub().resolves(),
getFileStream: hasFile
? sinon.stub().resolves(fileStream)
: sinon.stub().rejects(notFoundError),
deleteDirectory: sinon.stub().resolves(),
getFileSize: hasFile
? sinon.stub().resolves(size)
: sinon.stub().rejects(notFoundError),
deleteFile: sinon.stub().resolves(),
copyFile: hasFile
? sinon.stub().resolves()
: sinon.stub().rejects(notFoundError),
checkIfFileExists: sinon.stub().resolves(hasFile),
directorySize: hasFile
? sinon.stub().resolves(size)
: sinon.stub().rejects(notFoundError),
getFileMd5Hash: hasFile
? sinon.stub().resolves(md5)
: sinon.stub().rejects(notFoundError)
}
}
}
Settings = {
filestore: {
fallback: {
@ -68,12 +86,20 @@ describe('MigrationPersistorTests', function() {
warn: sinon.stub()
}
Minipass = sinon.stub()
Minipass.prototype.on = sinon
.stub()
.withArgs('end')
.yields()
Minipass.prototype.pipe = sinon.stub()
MigrationPersistor = SandboxedModule.require(modulePath, {
requires: {
'settings-sharelatex': Settings,
'./Errors': Errors,
'metrics-sharelatex': Metrics,
'logger-sharelatex': Logger
'logger-sharelatex': Logger,
minipass: Minipass
},
globals: { console }
})
@ -144,7 +170,7 @@ describe('MigrationPersistorTests', function() {
).to.have.been.calledWithExactly(fallbackBucket, key, options)
})
it('should only create one stream', function() {
it('should create one read stream', function() {
expect(fallbackPersistor.promises.getFileStream).to.have.been.calledOnce
})
@ -154,7 +180,10 @@ describe('MigrationPersistorTests', function() {
})
describe('when the file should be copied to the primary', function() {
let primaryPersistor, fallbackPersistor, migrationPersistor
let primaryPersistor,
fallbackPersistor,
migrationPersistor,
returnedStream
beforeEach(async function() {
primaryPersistor = newPersistor(false)
fallbackPersistor = newPersistor(true)
@ -163,18 +192,36 @@ describe('MigrationPersistorTests', function() {
fallbackPersistor
)
Settings.filestore.fallback.copyOnMiss = true
return migrationPersistor.promises.getFileStream(bucket, key, options)
returnedStream = await migrationPersistor.promises.getFileStream(
bucket,
key,
options
)
})
it('should create two streams', function() {
expect(fallbackPersistor.promises.getFileStream).to.have.been
.calledTwice
it('should create one read stream', function() {
expect(fallbackPersistor.promises.getFileStream).to.have.been.calledOnce
})
it('should send one of the streams to the primary', function() {
it('should get the md5 hash from the source', function() {
expect(
fallbackPersistor.promises.getFileMd5Hash
).to.have.been.calledWith(fallbackBucket, key)
})
it('should send a stream to the primary', function() {
expect(
primaryPersistor.promises.sendStream
).to.have.been.calledWithExactly(bucket, key, fileStream)
).to.have.been.calledWithExactly(
bucket,
key,
sinon.match.instanceOf(Minipass),
md5
)
})
it('should send a stream to the client', function() {
expect(returnedStream).to.be.an.instanceOf(Minipass)
})
})
@ -420,10 +467,16 @@ describe('MigrationPersistorTests', function() {
).not.to.have.been.calledWithExactly(fallbackBucket, key)
})
it('should get the md5 hash from the source', function() {
expect(
fallbackPersistor.promises.getFileMd5Hash
).to.have.been.calledWith(fallbackBucket, key)
})
it('should send the file to the primary', function() {
expect(
primaryPersistor.promises.sendStream
).to.have.been.calledWithExactly(bucket, destKey, fileStream)
).to.have.been.calledWithExactly(bucket, destKey, fileStream, md5)
})
})

View file

@ -26,8 +26,10 @@ describe('S3PersistorTests', function() {
{ Key: 'hippo', Size: 22 }
]
const filesSize = 33
const md5 = 'ffffffff00000000ffffffff00000000'
let Metrics,
Logger,
S3,
Fs,
Meter,
@ -40,7 +42,10 @@ describe('S3PersistorTests', function() {
S3AccessDeniedError,
FileNotFoundError,
EmptyPromise,
settings
settings,
Minipass,
Hash,
crypto
beforeEach(function() {
settings = {
@ -100,7 +105,8 @@ describe('S3PersistorTests', function() {
}),
headObject: sinon.stub().returns({
promise: sinon.stub().resolves({
ContentLength: objectSize
ContentLength: objectSize,
ETag: md5
})
}),
listObjects: sinon.stub().returns({
@ -108,21 +114,46 @@ describe('S3PersistorTests', function() {
Contents: files
})
}),
upload: sinon.stub().returns(EmptyPromise),
upload: sinon
.stub()
.returns({ promise: sinon.stub().resolves({ ETag: `"${md5}"` }) }),
copyObject: sinon.stub().returns(EmptyPromise),
deleteObject: sinon.stub().returns(EmptyPromise),
deleteObjects: sinon.stub().returns(EmptyPromise)
}
S3 = sinon.stub().returns(S3Client)
Hash = {
end: sinon.stub(),
read: sinon.stub().returns(md5),
setEncoding: sinon.stub()
}
crypto = {
createHash: sinon.stub().returns(Hash)
}
Minipass = sinon.stub()
Minipass.prototype.on = sinon
.stub()
.withArgs('end')
.yields()
Minipass.prototype.pipe = sinon.stub()
Logger = {
warn: sinon.stub()
}
S3Persistor = SandboxedModule.require(modulePath, {
requires: {
'aws-sdk/clients/s3': S3,
'settings-sharelatex': settings,
'logger-sharelatex': Logger,
'./Errors': Errors,
fs: Fs,
'stream-meter': Meter,
'metrics-sharelatex': Metrics
'metrics-sharelatex': Metrics,
minipass: Minipass,
crypto
},
globals: { console }
})
@ -420,17 +451,49 @@ describe('S3PersistorTests', function() {
expect(S3Client.upload).to.have.been.calledWith({
Bucket: bucket,
Key: key,
Body: 'readStream'
Body: MeteredStream
})
})
it('should meter the stream', function() {
expect(ReadStream.pipe).to.have.been.calledWith(MeteredStream)
expect(Minipass.prototype.pipe).to.have.been.calledWith(MeteredStream)
})
it('should record an egress metric', function() {
expect(Metrics.count).to.have.been.calledWith('s3.egress', objectSize)
})
it('calculates the md5 hash of the file', function() {
expect(Minipass.prototype.pipe).to.have.been.calledWith(Hash)
})
})
describe('when a hash is supploed', function() {
beforeEach(async function() {
return S3Persistor.promises.sendStream(
bucket,
key,
ReadStream,
'aaaaaaaabbbbbbbbaaaaaaaabbbbbbbb'
)
})
it('should not calculate the md5 hash of the file', function() {
expect(Minipass.prototype.pipe).not.to.have.been.calledWith(Hash)
})
it('sends the hash in base64', function() {
expect(S3Client.upload).to.have.been.calledWith({
Bucket: bucket,
Key: key,
Body: MeteredStream,
ContentMD5: 'qqqqqru7u7uqqqqqu7u7uw=='
})
})
it('does not fetch the md5 hash of the uploaded file', function() {
expect(S3Client.headObject).not.to.have.been.called
})
})
describe('when the upload fails', function() {
@ -466,7 +529,7 @@ describe('S3PersistorTests', function() {
expect(S3Client.upload).to.have.been.calledWith({
Bucket: bucket,
Key: key,
Body: 'readStream'
Body: MeteredStream
})
})
})