2020-02-12 06:00:45 -05:00
|
|
|
const settings = require('settings-sharelatex')
|
|
|
|
const fs = require('fs')
|
|
|
|
const { promisify } = require('util')
|
|
|
|
const Stream = require('stream')
|
|
|
|
const { Storage } = require('@google-cloud/storage')
|
|
|
|
const { callbackify } = require('util')
|
|
|
|
const { WriteError, ReadError, NotFoundError } = require('./Errors')
|
2020-03-14 10:02:58 -04:00
|
|
|
const asyncPool = require('tiny-async-pool')
|
2020-02-12 06:00:45 -05:00
|
|
|
const PersistorHelper = require('./PersistorHelper')
|
|
|
|
|
|
|
|
const pipeline = promisify(Stream.pipeline)
|
|
|
|
|
2020-03-13 12:14:06 -04:00
|
|
|
// endpoint settings will be null by default except for tests
|
2020-02-12 06:00:45 -05:00
|
|
|
// that's OK - GCS uses the locally-configured service account by default
|
2020-03-13 12:14:06 -04:00
|
|
|
const storage = new Storage(settings.filestore.gcs.endpoint)
|
2020-02-12 06:00:45 -05:00
|
|
|
// workaround for broken uploads with custom endpoints:
|
|
|
|
// https://github.com/googleapis/nodejs-storage/issues/898
|
2020-03-13 12:14:06 -04:00
|
|
|
if (
|
|
|
|
settings.filestore.gcs.endpoint &&
|
|
|
|
settings.filestore.gcs.endpoint.apiEndpoint
|
|
|
|
) {
|
2020-02-12 06:00:45 -05:00
|
|
|
storage.interceptors.push({
|
|
|
|
request: function(reqOpts) {
|
|
|
|
const url = new URL(reqOpts.uri)
|
2020-03-13 12:14:06 -04:00
|
|
|
url.host = settings.filestore.gcs.endpoint.apiEndpoint
|
|
|
|
if (settings.filestore.gcs.endpoint.apiScheme) {
|
|
|
|
url.protocol = settings.filestore.gcs.endpoint.apiScheme
|
2020-03-04 12:01:20 -05:00
|
|
|
}
|
2020-02-12 06:00:45 -05:00
|
|
|
reqOpts.uri = url.toString()
|
|
|
|
return reqOpts
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
const GcsPersistor = {
|
|
|
|
sendFile: callbackify(sendFile),
|
|
|
|
sendStream: callbackify(sendStream),
|
|
|
|
getFileStream: callbackify(getFileStream),
|
|
|
|
getFileMd5Hash: callbackify(getFileMd5Hash),
|
|
|
|
deleteDirectory: callbackify(deleteDirectory),
|
|
|
|
getFileSize: callbackify(getFileSize),
|
|
|
|
deleteFile: callbackify(deleteFile),
|
|
|
|
copyFile: callbackify(copyFile),
|
|
|
|
checkIfFileExists: callbackify(checkIfFileExists),
|
|
|
|
directorySize: callbackify(directorySize),
|
|
|
|
promises: {
|
|
|
|
sendFile,
|
|
|
|
sendStream,
|
|
|
|
getFileStream,
|
|
|
|
getFileMd5Hash,
|
|
|
|
deleteDirectory,
|
|
|
|
getFileSize,
|
|
|
|
deleteFile,
|
|
|
|
copyFile,
|
|
|
|
checkIfFileExists,
|
|
|
|
directorySize
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = GcsPersistor
|
|
|
|
|
2020-03-04 11:04:26 -05:00
|
|
|
async function sendFile(bucketName, key, fsPath) {
|
2020-03-04 11:38:05 -05:00
|
|
|
return sendStream(bucketName, key, fs.createReadStream(fsPath))
|
2020-02-12 06:00:45 -05:00
|
|
|
}
|
|
|
|
|
2020-03-04 11:04:26 -05:00
|
|
|
async function sendStream(bucketName, key, readStream, sourceMd5) {
|
2020-02-12 06:00:45 -05:00
|
|
|
try {
|
2020-03-25 12:59:51 -04:00
|
|
|
// egress from us to gcs
|
|
|
|
const observeOptions = { metric: 'gcs.egress' }
|
2020-02-12 06:00:45 -05:00
|
|
|
|
|
|
|
if (!sourceMd5) {
|
2020-03-25 12:59:51 -04:00
|
|
|
// if there is no supplied md5 hash, we calculate the hash as the data passes through
|
|
|
|
observeOptions.hash = 'md5'
|
2020-02-12 06:00:45 -05:00
|
|
|
}
|
|
|
|
|
2020-03-25 12:59:51 -04:00
|
|
|
const observer = new PersistorHelper.ObserverStream(observeOptions)
|
2020-02-12 06:00:45 -05:00
|
|
|
|
|
|
|
const writeOptions = {
|
2020-03-04 11:25:11 -05:00
|
|
|
// disabling of resumable uploads is recommended by Google:
|
|
|
|
resumable: false
|
2020-02-12 06:00:45 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if (sourceMd5) {
|
|
|
|
writeOptions.validation = 'md5'
|
|
|
|
writeOptions.metadata = {
|
2020-02-13 11:55:01 -05:00
|
|
|
md5Hash: PersistorHelper.hexToBase64(sourceMd5)
|
2020-02-12 06:00:45 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const uploadStream = storage
|
2020-03-04 11:04:26 -05:00
|
|
|
.bucket(bucketName)
|
2020-02-12 06:00:45 -05:00
|
|
|
.file(key)
|
|
|
|
.createWriteStream(writeOptions)
|
|
|
|
|
2020-03-25 12:59:51 -04:00
|
|
|
await pipeline(readStream, observer, uploadStream)
|
2020-02-12 06:00:45 -05:00
|
|
|
|
|
|
|
// if we didn't have an md5 hash, we should compare our computed one with Google's
|
|
|
|
// as we couldn't tell GCS about it beforehand
|
2020-03-25 12:59:51 -04:00
|
|
|
if (!sourceMd5) {
|
|
|
|
sourceMd5 = observer.getHash()
|
2020-02-12 06:00:45 -05:00
|
|
|
// throws on mismatch
|
2020-03-04 11:04:26 -05:00
|
|
|
await PersistorHelper.verifyMd5(GcsPersistor, bucketName, key, sourceMd5)
|
2020-02-12 06:00:45 -05:00
|
|
|
}
|
|
|
|
} catch (err) {
|
|
|
|
throw PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'upload to GCS failed',
|
2020-03-04 11:04:26 -05:00
|
|
|
{ bucketName, key },
|
2020-02-12 06:00:45 -05:00
|
|
|
WriteError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-16 11:57:37 -04:00
|
|
|
async function getFileStream(bucketName, key, _opts = {}) {
|
|
|
|
const opts = Object.assign({}, _opts)
|
2020-02-12 06:00:45 -05:00
|
|
|
if (opts.end) {
|
|
|
|
// S3 (and http range headers) treat 'end' as inclusive, so increase this by 1
|
|
|
|
opts.end++
|
|
|
|
}
|
|
|
|
const stream = storage
|
2020-03-04 11:04:26 -05:00
|
|
|
.bucket(bucketName)
|
2020-02-12 06:00:45 -05:00
|
|
|
.file(key)
|
|
|
|
.createReadStream(opts)
|
|
|
|
|
2020-03-25 12:59:51 -04:00
|
|
|
// ingress to us from gcs
|
|
|
|
const observer = new PersistorHelper.ObserverStream({
|
|
|
|
metric: 'gcs.ingress'
|
|
|
|
})
|
2020-02-12 06:00:45 -05:00
|
|
|
|
|
|
|
try {
|
2020-03-26 12:24:08 -04:00
|
|
|
// wait for the pipeline to be ready, to catch non-200s
|
|
|
|
await PersistorHelper.getReadyPipeline(stream, observer)
|
2020-03-25 12:59:51 -04:00
|
|
|
return observer
|
2020-02-12 06:00:45 -05:00
|
|
|
} catch (err) {
|
|
|
|
throw PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'error reading file from GCS',
|
2020-03-04 11:04:26 -05:00
|
|
|
{ bucketName, key, opts },
|
2020-02-12 06:00:45 -05:00
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-04 11:04:26 -05:00
|
|
|
async function getFileSize(bucketName, key) {
|
2020-02-12 06:00:45 -05:00
|
|
|
try {
|
2020-03-05 08:45:46 -05:00
|
|
|
const [metadata] = await storage
|
2020-03-04 11:04:26 -05:00
|
|
|
.bucket(bucketName)
|
2020-02-12 06:00:45 -05:00
|
|
|
.file(key)
|
|
|
|
.getMetadata()
|
2020-03-05 08:45:46 -05:00
|
|
|
return metadata.size
|
2020-02-12 06:00:45 -05:00
|
|
|
} catch (err) {
|
|
|
|
throw PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'error getting size of GCS object',
|
2020-03-04 11:04:26 -05:00
|
|
|
{ bucketName, key },
|
2020-02-12 06:00:45 -05:00
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-04 11:04:26 -05:00
|
|
|
async function getFileMd5Hash(bucketName, key) {
|
2020-02-12 06:00:45 -05:00
|
|
|
try {
|
2020-03-05 08:45:46 -05:00
|
|
|
const [metadata] = await storage
|
2020-03-04 11:04:26 -05:00
|
|
|
.bucket(bucketName)
|
2020-02-12 06:00:45 -05:00
|
|
|
.file(key)
|
|
|
|
.getMetadata()
|
2020-03-05 08:45:46 -05:00
|
|
|
return PersistorHelper.base64ToHex(metadata.md5Hash)
|
2020-02-12 06:00:45 -05:00
|
|
|
} catch (err) {
|
|
|
|
throw PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'error getting hash of GCS object',
|
2020-03-04 11:04:26 -05:00
|
|
|
{ bucketName, key },
|
2020-02-12 06:00:45 -05:00
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-04 11:04:26 -05:00
|
|
|
async function deleteFile(bucketName, key) {
|
2020-02-12 06:00:45 -05:00
|
|
|
try {
|
2020-03-13 12:14:06 -04:00
|
|
|
const file = storage.bucket(bucketName).file(key)
|
|
|
|
|
|
|
|
if (settings.filestore.gcs.deletedBucketSuffix) {
|
|
|
|
await file.copy(
|
2020-03-14 10:11:17 -04:00
|
|
|
storage
|
|
|
|
.bucket(`${bucketName}${settings.filestore.gcs.deletedBucketSuffix}`)
|
2020-03-16 12:09:56 -04:00
|
|
|
.file(`${key}-${new Date().toISOString()}`)
|
2020-03-13 12:14:06 -04:00
|
|
|
)
|
|
|
|
}
|
2020-03-16 11:53:45 -04:00
|
|
|
if (settings.filestore.gcs.unlockBeforeDelete) {
|
|
|
|
await file.setMetadata({ eventBasedHold: false })
|
|
|
|
}
|
2020-03-13 12:14:06 -04:00
|
|
|
await file.delete()
|
2020-02-12 06:00:45 -05:00
|
|
|
} catch (err) {
|
|
|
|
const error = PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'error deleting GCS object',
|
2020-03-04 11:04:26 -05:00
|
|
|
{ bucketName, key },
|
2020-02-12 06:00:45 -05:00
|
|
|
WriteError
|
|
|
|
)
|
|
|
|
if (!(error instanceof NotFoundError)) {
|
|
|
|
throw error
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-04 11:04:26 -05:00
|
|
|
async function deleteDirectory(bucketName, key) {
|
2020-02-12 06:00:45 -05:00
|
|
|
try {
|
2020-03-13 12:14:06 -04:00
|
|
|
const [files] = await storage
|
2020-03-04 11:04:26 -05:00
|
|
|
.bucket(bucketName)
|
2020-03-13 12:14:06 -04:00
|
|
|
.getFiles({ directory: key })
|
|
|
|
|
2020-03-16 11:54:05 -04:00
|
|
|
await asyncPool(
|
|
|
|
settings.filestore.gcs.deleteConcurrency,
|
|
|
|
files,
|
|
|
|
async file => {
|
|
|
|
await deleteFile(bucketName, file.name)
|
|
|
|
}
|
|
|
|
)
|
2020-02-12 06:00:45 -05:00
|
|
|
} catch (err) {
|
|
|
|
const error = PersistorHelper.wrapError(
|
|
|
|
err,
|
2020-03-05 09:12:15 -05:00
|
|
|
'failed to delete directory in GCS',
|
2020-03-04 11:04:26 -05:00
|
|
|
{ bucketName, key },
|
2020-03-05 09:12:15 -05:00
|
|
|
WriteError
|
2020-02-12 06:00:45 -05:00
|
|
|
)
|
|
|
|
if (error instanceof NotFoundError) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
throw error
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-04 11:04:26 -05:00
|
|
|
async function directorySize(bucketName, key) {
|
2020-02-12 06:00:45 -05:00
|
|
|
let files
|
|
|
|
|
|
|
|
try {
|
2020-03-05 08:45:46 -05:00
|
|
|
const [response] = await storage
|
2020-03-04 11:04:26 -05:00
|
|
|
.bucket(bucketName)
|
|
|
|
.getFiles({ directory: key })
|
2020-03-05 08:45:46 -05:00
|
|
|
files = response
|
2020-02-12 06:00:45 -05:00
|
|
|
} catch (err) {
|
|
|
|
throw PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'failed to list objects in GCS',
|
2020-03-04 11:04:26 -05:00
|
|
|
{ bucketName, key },
|
2020-02-12 06:00:45 -05:00
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
return files.reduce((acc, file) => Number(file.metadata.size) + acc, 0)
|
|
|
|
}
|
|
|
|
|
2020-03-04 11:04:26 -05:00
|
|
|
async function checkIfFileExists(bucketName, key) {
|
2020-02-12 06:00:45 -05:00
|
|
|
try {
|
2020-03-05 08:45:46 -05:00
|
|
|
const [response] = await storage
|
2020-03-04 11:04:26 -05:00
|
|
|
.bucket(bucketName)
|
2020-02-12 06:00:45 -05:00
|
|
|
.file(key)
|
|
|
|
.exists()
|
2020-03-05 08:45:46 -05:00
|
|
|
return response
|
2020-02-12 06:00:45 -05:00
|
|
|
} catch (err) {
|
|
|
|
throw PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'error checking if file exists in GCS',
|
2020-03-04 11:04:26 -05:00
|
|
|
{ bucketName, key },
|
2020-02-12 06:00:45 -05:00
|
|
|
ReadError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-04 11:04:26 -05:00
|
|
|
async function copyFile(bucketName, sourceKey, destKey) {
|
2020-02-12 06:00:45 -05:00
|
|
|
try {
|
2020-03-04 11:04:26 -05:00
|
|
|
const src = storage.bucket(bucketName).file(sourceKey)
|
|
|
|
const dest = storage.bucket(bucketName).file(destKey)
|
2020-02-12 06:00:45 -05:00
|
|
|
await src.copy(dest)
|
|
|
|
} catch (err) {
|
|
|
|
// fake-gcs-server has a bug that returns an invalid response when the file does not exist
|
|
|
|
if (err.message === 'Cannot parse response as JSON: not found\n') {
|
|
|
|
err.code = 404
|
|
|
|
}
|
|
|
|
throw PersistorHelper.wrapError(
|
|
|
|
err,
|
|
|
|
'failed to copy file in GCS',
|
2020-03-04 11:04:26 -05:00
|
|
|
{ bucketName, sourceKey, destKey },
|
2020-02-12 06:00:45 -05:00
|
|
|
WriteError
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|