overleaf/services/filestore/app/js/GcsPersistor.js

288 lines
7.3 KiB
JavaScript
Raw Normal View History

2020-02-12 06:00:45 -05:00
const settings = require('settings-sharelatex')
const fs = require('fs')
const { promisify } = require('util')
const Stream = require('stream')
const { Storage } = require('@google-cloud/storage')
const { callbackify } = require('util')
const { WriteError, ReadError, NotFoundError } = require('./Errors')
const asyncPool = require('tiny-async-pool')
2020-02-12 06:00:45 -05:00
const PersistorHelper = require('./PersistorHelper')
const pipeline = promisify(Stream.pipeline)
// endpoint settings will be null by default except for tests
2020-02-12 06:00:45 -05:00
// that's OK - GCS uses the locally-configured service account by default
const storage = new Storage(settings.filestore.gcs.endpoint)
2020-02-12 06:00:45 -05:00
// workaround for broken uploads with custom endpoints:
// https://github.com/googleapis/nodejs-storage/issues/898
if (
settings.filestore.gcs.endpoint &&
settings.filestore.gcs.endpoint.apiEndpoint
) {
2020-02-12 06:00:45 -05:00
storage.interceptors.push({
request: function(reqOpts) {
const url = new URL(reqOpts.uri)
url.host = settings.filestore.gcs.endpoint.apiEndpoint
if (settings.filestore.gcs.endpoint.apiScheme) {
url.protocol = settings.filestore.gcs.endpoint.apiScheme
2020-03-04 12:01:20 -05:00
}
2020-02-12 06:00:45 -05:00
reqOpts.uri = url.toString()
return reqOpts
}
})
}
const GcsPersistor = {
sendFile: callbackify(sendFile),
sendStream: callbackify(sendStream),
getFileStream: callbackify(getFileStream),
getFileMd5Hash: callbackify(getFileMd5Hash),
deleteDirectory: callbackify(deleteDirectory),
getFileSize: callbackify(getFileSize),
deleteFile: callbackify(deleteFile),
copyFile: callbackify(copyFile),
checkIfFileExists: callbackify(checkIfFileExists),
directorySize: callbackify(directorySize),
promises: {
sendFile,
sendStream,
getFileStream,
getFileMd5Hash,
deleteDirectory,
getFileSize,
deleteFile,
copyFile,
checkIfFileExists,
directorySize
}
}
module.exports = GcsPersistor
async function sendFile(bucketName, key, fsPath) {
return sendStream(bucketName, key, fs.createReadStream(fsPath))
2020-02-12 06:00:45 -05:00
}
async function sendStream(bucketName, key, readStream, sourceMd5) {
2020-02-12 06:00:45 -05:00
try {
// egress from us to gcs
const observeOptions = { metric: 'gcs.egress' }
2020-02-12 06:00:45 -05:00
if (!sourceMd5) {
// if there is no supplied md5 hash, we calculate the hash as the data passes through
observeOptions.hash = 'md5'
2020-02-12 06:00:45 -05:00
}
const observer = new PersistorHelper.ObserverStream(observeOptions)
2020-02-12 06:00:45 -05:00
const writeOptions = {
2020-03-04 11:25:11 -05:00
// disabling of resumable uploads is recommended by Google:
resumable: false
2020-02-12 06:00:45 -05:00
}
if (sourceMd5) {
writeOptions.validation = 'md5'
writeOptions.metadata = {
md5Hash: PersistorHelper.hexToBase64(sourceMd5)
2020-02-12 06:00:45 -05:00
}
}
const uploadStream = storage
.bucket(bucketName)
2020-02-12 06:00:45 -05:00
.file(key)
.createWriteStream(writeOptions)
await pipeline(readStream, observer, uploadStream)
2020-02-12 06:00:45 -05:00
// if we didn't have an md5 hash, we should compare our computed one with Google's
// as we couldn't tell GCS about it beforehand
if (!sourceMd5) {
sourceMd5 = observer.getHash()
2020-02-12 06:00:45 -05:00
// throws on mismatch
await PersistorHelper.verifyMd5(GcsPersistor, bucketName, key, sourceMd5)
2020-02-12 06:00:45 -05:00
}
} catch (err) {
throw PersistorHelper.wrapError(
err,
'upload to GCS failed',
{ bucketName, key },
2020-02-12 06:00:45 -05:00
WriteError
)
}
}
2020-03-16 11:57:37 -04:00
async function getFileStream(bucketName, key, _opts = {}) {
const opts = Object.assign({}, _opts)
2020-02-12 06:00:45 -05:00
if (opts.end) {
// S3 (and http range headers) treat 'end' as inclusive, so increase this by 1
opts.end++
}
const stream = storage
.bucket(bucketName)
2020-02-12 06:00:45 -05:00
.file(key)
.createReadStream(opts)
// ingress to us from gcs
const observer = new PersistorHelper.ObserverStream({
metric: 'gcs.ingress'
})
2020-02-12 06:00:45 -05:00
try {
// wait for the pipeline to be ready, to catch non-200s
await PersistorHelper.getReadyPipeline(stream, observer)
return observer
2020-02-12 06:00:45 -05:00
} catch (err) {
throw PersistorHelper.wrapError(
err,
'error reading file from GCS',
{ bucketName, key, opts },
2020-02-12 06:00:45 -05:00
ReadError
)
}
}
async function getFileSize(bucketName, key) {
2020-02-12 06:00:45 -05:00
try {
const [metadata] = await storage
.bucket(bucketName)
2020-02-12 06:00:45 -05:00
.file(key)
.getMetadata()
return metadata.size
2020-02-12 06:00:45 -05:00
} catch (err) {
throw PersistorHelper.wrapError(
err,
'error getting size of GCS object',
{ bucketName, key },
2020-02-12 06:00:45 -05:00
ReadError
)
}
}
async function getFileMd5Hash(bucketName, key) {
2020-02-12 06:00:45 -05:00
try {
const [metadata] = await storage
.bucket(bucketName)
2020-02-12 06:00:45 -05:00
.file(key)
.getMetadata()
return PersistorHelper.base64ToHex(metadata.md5Hash)
2020-02-12 06:00:45 -05:00
} catch (err) {
throw PersistorHelper.wrapError(
err,
'error getting hash of GCS object',
{ bucketName, key },
2020-02-12 06:00:45 -05:00
ReadError
)
}
}
async function deleteFile(bucketName, key) {
2020-02-12 06:00:45 -05:00
try {
const file = storage.bucket(bucketName).file(key)
if (settings.filestore.gcs.deletedBucketSuffix) {
await file.copy(
storage
.bucket(`${bucketName}${settings.filestore.gcs.deletedBucketSuffix}`)
2020-03-16 12:09:56 -04:00
.file(`${key}-${new Date().toISOString()}`)
)
}
2020-03-16 11:53:45 -04:00
if (settings.filestore.gcs.unlockBeforeDelete) {
await file.setMetadata({ eventBasedHold: false })
}
await file.delete()
2020-02-12 06:00:45 -05:00
} catch (err) {
const error = PersistorHelper.wrapError(
err,
'error deleting GCS object',
{ bucketName, key },
2020-02-12 06:00:45 -05:00
WriteError
)
if (!(error instanceof NotFoundError)) {
throw error
}
}
}
async function deleteDirectory(bucketName, key) {
2020-02-12 06:00:45 -05:00
try {
const [files] = await storage
.bucket(bucketName)
.getFiles({ directory: key })
await asyncPool(
settings.filestore.gcs.deleteConcurrency,
files,
async file => {
await deleteFile(bucketName, file.name)
}
)
2020-02-12 06:00:45 -05:00
} catch (err) {
const error = PersistorHelper.wrapError(
err,
'failed to delete directory in GCS',
{ bucketName, key },
WriteError
2020-02-12 06:00:45 -05:00
)
if (error instanceof NotFoundError) {
return
}
throw error
}
}
async function directorySize(bucketName, key) {
2020-02-12 06:00:45 -05:00
let files
try {
const [response] = await storage
.bucket(bucketName)
.getFiles({ directory: key })
files = response
2020-02-12 06:00:45 -05:00
} catch (err) {
throw PersistorHelper.wrapError(
err,
'failed to list objects in GCS',
{ bucketName, key },
2020-02-12 06:00:45 -05:00
ReadError
)
}
return files.reduce((acc, file) => Number(file.metadata.size) + acc, 0)
}
async function checkIfFileExists(bucketName, key) {
2020-02-12 06:00:45 -05:00
try {
const [response] = await storage
.bucket(bucketName)
2020-02-12 06:00:45 -05:00
.file(key)
.exists()
return response
2020-02-12 06:00:45 -05:00
} catch (err) {
throw PersistorHelper.wrapError(
err,
'error checking if file exists in GCS',
{ bucketName, key },
2020-02-12 06:00:45 -05:00
ReadError
)
}
}
async function copyFile(bucketName, sourceKey, destKey) {
2020-02-12 06:00:45 -05:00
try {
const src = storage.bucket(bucketName).file(sourceKey)
const dest = storage.bucket(bucketName).file(destKey)
2020-02-12 06:00:45 -05:00
await src.copy(dest)
} catch (err) {
// fake-gcs-server has a bug that returns an invalid response when the file does not exist
if (err.message === 'Cannot parse response as JSON: not found\n') {
err.code = 404
}
throw PersistorHelper.wrapError(
err,
'failed to copy file in GCS',
{ bucketName, sourceKey, destKey },
2020-02-12 06:00:45 -05:00
WriteError
)
}
}