2020-01-16 11:25:12 -05:00
|
|
|
const metrics = require('metrics-sharelatex')
|
|
|
|
const Settings = require('settings-sharelatex')
|
|
|
|
const logger = require('logger-sharelatex')
|
2020-02-03 10:55:17 -05:00
|
|
|
const Stream = require('stream')
|
|
|
|
const { callbackify, promisify } = require('util')
|
2020-01-27 06:26:37 -05:00
|
|
|
const { NotFoundError, WriteError } = require('./Errors')
|
2020-01-16 11:25:12 -05:00
|
|
|
|
2020-02-03 10:55:17 -05:00
|
|
|
const pipeline = promisify(Stream.pipeline)
|
|
|
|
|
2020-01-16 11:25:12 -05:00
|
|
|
// Persistor that wraps two other persistors. Talks to the 'primary' by default,
|
|
|
|
// but will fall back to an older persistor in the case of a not-found error.
|
|
|
|
// If `Settings.filestore.fallback.copyOnMiss` is set, this will copy files from the fallback
|
|
|
|
// to the primary, in the event that they are missing.
|
|
|
|
//
|
|
|
|
// It is unlikely that the bucket/location name will be the same on the fallback
|
|
|
|
// as the primary. The bucket names should be overridden in `Settings.filestore.fallback.buckets`
|
|
|
|
// e.g.
|
|
|
|
// Settings.filestore.fallback.buckets = {
|
|
|
|
// myBucketOnS3: 'myBucketOnGCS'
|
2020-01-27 06:26:37 -05:00
|
|
|
// }
|
2020-01-16 11:25:12 -05:00
|
|
|
|
|
|
|
module.exports = function(primary, fallback) {
|
|
|
|
function _wrapMethodOnBothPersistors(method) {
|
|
|
|
return async function(bucket, key, ...moreArgs) {
|
|
|
|
const fallbackBucket = _getFallbackBucket(bucket)
|
|
|
|
|
|
|
|
await Promise.all([
|
|
|
|
primary.promises[method](bucket, key, ...moreArgs),
|
|
|
|
fallback.promises[method](fallbackBucket, key, ...moreArgs)
|
|
|
|
])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-03 10:55:17 -05:00
|
|
|
async function getFileStreamWithFallback(bucket, key, opts) {
|
|
|
|
const shouldCopy =
|
|
|
|
Settings.filestore.fallback.copyOnMiss && !opts.start && !opts.end
|
|
|
|
|
|
|
|
try {
|
|
|
|
return await primary.promises.getFileStream(bucket, key, opts)
|
|
|
|
} catch (err) {
|
|
|
|
if (err instanceof NotFoundError) {
|
|
|
|
const fallbackBucket = _getFallbackBucket(bucket)
|
|
|
|
const fallbackStream = await fallback.promises.getFileStream(
|
|
|
|
fallbackBucket,
|
|
|
|
key,
|
|
|
|
opts
|
|
|
|
)
|
|
|
|
// tee the stream to the client, and as a copy to the primary (if necessary)
|
|
|
|
// start listening on both straight away so that we don't consume bytes
|
|
|
|
// in one place before the other
|
|
|
|
const returnStream = new Stream.PassThrough()
|
|
|
|
pipeline(fallbackStream, returnStream)
|
|
|
|
|
|
|
|
if (shouldCopy) {
|
|
|
|
const copyStream = new Stream.PassThrough()
|
|
|
|
pipeline(fallbackStream, copyStream)
|
|
|
|
|
|
|
|
_copyStreamFromFallbackAndVerify(
|
|
|
|
copyStream,
|
|
|
|
fallbackBucket,
|
|
|
|
bucket,
|
|
|
|
key,
|
|
|
|
key
|
|
|
|
).catch(() => {
|
|
|
|
// swallow errors, as this runs in the background and will log a warning
|
|
|
|
})
|
|
|
|
}
|
|
|
|
return returnStream
|
|
|
|
}
|
|
|
|
throw err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-16 11:25:12 -05:00
|
|
|
async function copyFileWithFallback(bucket, sourceKey, destKey) {
|
|
|
|
try {
|
|
|
|
return await primary.promises.copyFile(bucket, sourceKey, destKey)
|
|
|
|
} catch (err) {
|
|
|
|
if (err instanceof NotFoundError) {
|
|
|
|
const fallbackBucket = _getFallbackBucket(bucket)
|
2020-02-03 10:55:17 -05:00
|
|
|
const fallbackStream = await fallback.promises.getFileStream(
|
|
|
|
fallbackBucket,
|
|
|
|
sourceKey,
|
|
|
|
{}
|
|
|
|
)
|
|
|
|
|
|
|
|
const copyStream = new Stream.PassThrough()
|
|
|
|
pipeline(fallbackStream, copyStream)
|
|
|
|
|
|
|
|
if (Settings.filestore.fallback.copyOnMiss) {
|
|
|
|
const missStream = new Stream.PassThrough()
|
|
|
|
pipeline(fallbackStream, missStream)
|
|
|
|
|
|
|
|
// copy from sourceKey -> sourceKey
|
|
|
|
_copyStreamFromFallbackAndVerify(
|
|
|
|
missStream,
|
|
|
|
fallbackBucket,
|
|
|
|
bucket,
|
|
|
|
sourceKey,
|
|
|
|
sourceKey
|
|
|
|
).then(() => {
|
|
|
|
// swallow errors, as this runs in the background and will log a warning
|
|
|
|
})
|
|
|
|
}
|
|
|
|
// copy from sourceKey -> destKey
|
|
|
|
return _copyStreamFromFallbackAndVerify(
|
|
|
|
copyStream,
|
|
|
|
fallbackBucket,
|
|
|
|
bucket,
|
|
|
|
sourceKey,
|
|
|
|
destKey
|
|
|
|
)
|
2020-01-16 11:25:12 -05:00
|
|
|
}
|
2020-02-03 10:55:17 -05:00
|
|
|
throw err
|
2020-01-16 11:25:12 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function _getFallbackBucket(bucket) {
|
2020-01-27 06:26:37 -05:00
|
|
|
return Settings.filestore.fallback.buckets[bucket]
|
2020-01-16 11:25:12 -05:00
|
|
|
}
|
|
|
|
|
2020-02-03 10:55:17 -05:00
|
|
|
function _wrapFallbackMethod(method) {
|
2020-01-16 11:25:12 -05:00
|
|
|
return async function(bucket, key, ...moreArgs) {
|
|
|
|
try {
|
|
|
|
return await primary.promises[method](bucket, key, ...moreArgs)
|
|
|
|
} catch (err) {
|
|
|
|
if (err instanceof NotFoundError) {
|
|
|
|
const fallbackBucket = _getFallbackBucket(bucket)
|
2020-02-03 10:55:17 -05:00
|
|
|
if (Settings.filestore.fallback.copyOnMiss) {
|
|
|
|
const fallbackStream = await fallback.promises.getFileStream(
|
|
|
|
fallbackBucket,
|
|
|
|
key,
|
|
|
|
{}
|
2020-01-16 11:25:12 -05:00
|
|
|
)
|
2020-02-03 10:55:17 -05:00
|
|
|
// run in background
|
|
|
|
_copyStreamFromFallbackAndVerify(
|
|
|
|
fallbackStream,
|
|
|
|
fallbackBucket,
|
|
|
|
bucket,
|
|
|
|
key,
|
|
|
|
key
|
|
|
|
).catch(err => {
|
|
|
|
logger.warn({ err }, 'failed to copy file from fallback')
|
|
|
|
})
|
2020-01-16 11:25:12 -05:00
|
|
|
}
|
|
|
|
return fallback.promises[method](fallbackBucket, key, ...moreArgs)
|
|
|
|
}
|
|
|
|
throw err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-03 10:55:17 -05:00
|
|
|
async function _copyStreamFromFallbackAndVerify(
|
2020-01-27 06:26:37 -05:00
|
|
|
stream,
|
2020-01-16 11:25:12 -05:00
|
|
|
sourceBucket,
|
|
|
|
destBucket,
|
|
|
|
sourceKey,
|
|
|
|
destKey
|
|
|
|
) {
|
2020-01-27 06:26:37 -05:00
|
|
|
try {
|
|
|
|
let sourceMd5
|
|
|
|
try {
|
|
|
|
sourceMd5 = await fallback.promises.getFileMd5Hash(
|
|
|
|
sourceBucket,
|
|
|
|
sourceKey
|
|
|
|
)
|
|
|
|
} catch (err) {
|
|
|
|
logger.warn(err, 'error getting md5 hash from fallback persistor')
|
|
|
|
}
|
|
|
|
|
|
|
|
await primary.promises.sendStream(destBucket, destKey, stream, sourceMd5)
|
|
|
|
} catch (err) {
|
2020-01-29 06:09:20 -05:00
|
|
|
const error = new WriteError({
|
|
|
|
message: 'unable to copy file to destination persistor',
|
|
|
|
info: {
|
|
|
|
sourceBucket,
|
|
|
|
destBucket,
|
|
|
|
sourceKey,
|
|
|
|
destKey
|
|
|
|
}
|
|
|
|
}).withCause(err)
|
2020-01-27 06:26:37 -05:00
|
|
|
metrics.inc('fallback.copy.failure')
|
|
|
|
|
|
|
|
try {
|
|
|
|
await primary.promises.deleteFile(destBucket, destKey)
|
|
|
|
} catch (err) {
|
2020-01-29 06:09:20 -05:00
|
|
|
error.info.cleanupError = new WriteError({
|
2020-01-27 06:26:37 -05:00
|
|
|
message: 'unable to clean up destination copy artifact',
|
|
|
|
info: {
|
|
|
|
destBucket,
|
|
|
|
destKey
|
|
|
|
}
|
|
|
|
}).withCause(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
logger.warn({ error }, 'failed to copy file from fallback')
|
|
|
|
throw error
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-16 11:25:12 -05:00
|
|
|
return {
|
|
|
|
primaryPersistor: primary,
|
|
|
|
fallbackPersistor: fallback,
|
|
|
|
sendFile: primary.sendFile,
|
|
|
|
sendStream: primary.sendStream,
|
2020-02-03 10:55:17 -05:00
|
|
|
getFileStream: callbackify(getFileStreamWithFallback),
|
2020-01-27 06:26:37 -05:00
|
|
|
getFileMd5Hash: callbackify(_wrapFallbackMethod('getFileMd5Hash')),
|
2020-01-16 11:25:12 -05:00
|
|
|
deleteDirectory: callbackify(
|
|
|
|
_wrapMethodOnBothPersistors('deleteDirectory')
|
|
|
|
),
|
|
|
|
getFileSize: callbackify(_wrapFallbackMethod('getFileSize')),
|
|
|
|
deleteFile: callbackify(_wrapMethodOnBothPersistors('deleteFile')),
|
|
|
|
copyFile: callbackify(copyFileWithFallback),
|
|
|
|
checkIfFileExists: callbackify(_wrapFallbackMethod('checkIfFileExists')),
|
2020-01-27 06:26:37 -05:00
|
|
|
directorySize: callbackify(_wrapFallbackMethod('directorySize')),
|
2020-01-16 11:25:12 -05:00
|
|
|
promises: {
|
|
|
|
sendFile: primary.promises.sendFile,
|
|
|
|
sendStream: primary.promises.sendStream,
|
2020-02-03 10:55:17 -05:00
|
|
|
getFileStream: getFileStreamWithFallback,
|
2020-01-27 06:26:37 -05:00
|
|
|
getFileMd5Hash: _wrapFallbackMethod('getFileMd5Hash'),
|
2020-01-16 11:25:12 -05:00
|
|
|
deleteDirectory: _wrapMethodOnBothPersistors('deleteDirectory'),
|
|
|
|
getFileSize: _wrapFallbackMethod('getFileSize'),
|
|
|
|
deleteFile: _wrapMethodOnBothPersistors('deleteFile'),
|
|
|
|
copyFile: copyFileWithFallback,
|
|
|
|
checkIfFileExists: _wrapFallbackMethod('checkIfFileExists'),
|
2020-01-27 06:26:37 -05:00
|
|
|
directorySize: _wrapFallbackMethod('directorySize')
|
2020-01-16 11:25:12 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|