diff --git a/libraries/object-persistor/package.json b/libraries/object-persistor/package.json index 8b3fcb6480..cd9233dc47 100644 --- a/libraries/object-persistor/package.json +++ b/libraries/object-persistor/package.json @@ -24,6 +24,7 @@ "@overleaf/logger": "*", "@overleaf/metrics": "*", "@overleaf/o-error": "*", + "@overleaf/stream-utils": "*", "aws-sdk": "^2.1691.0", "fast-crc32c": "overleaf/node-fast-crc32c#aae6b2a4c7a7a159395df9cc6c38dfde702d6f51", "glob": "^7.1.6", diff --git a/libraries/object-persistor/src/AbstractPersistor.js b/libraries/object-persistor/src/AbstractPersistor.js index b17d3e7325..a1a432d9fc 100644 --- a/libraries/object-persistor/src/AbstractPersistor.js +++ b/libraries/object-persistor/src/AbstractPersistor.js @@ -1,6 +1,12 @@ const { NotImplementedError } = require('./Errors') module.exports = class AbstractPersistor { + /** + * @param location + * @param target + * @param {string} source + * @return {Promise} + */ async sendFile(location, target, source) { throw new NotImplementedError('method not implemented in persistor', { method: 'sendFile', @@ -10,6 +16,13 @@ module.exports = class AbstractPersistor { }) } + /** + * @param location + * @param target + * @param {NodeJS.ReadableStream} sourceStream + * @param {Object} opts + * @return {Promise} + */ async sendStream(location, target, sourceStream, opts = {}) { throw new NotImplementedError('method not implemented in persistor', { method: 'sendStream', @@ -25,7 +38,7 @@ module.exports = class AbstractPersistor { * @param {Object} opts * @param {Number} opts.start * @param {Number} opts.end - * @return {Promise} + * @return {Promise} */ async getObjectStream(location, name, opts) { throw new NotImplementedError('method not implemented in persistor', { @@ -36,6 +49,11 @@ module.exports = class AbstractPersistor { }) } + /** + * @param {string} location + * @param {string} name + * @return {Promise} + */ async getRedirectUrl(location, name) { throw new NotImplementedError('method not implemented in persistor', { method: 'getRedirectUrl', @@ -44,7 +62,13 @@ module.exports = class AbstractPersistor { }) } - async getObjectSize(location, name) { + /** + * @param {string} location + * @param {string} name + * @param {Object} opts + * @return {Promise} + */ + async getObjectSize(location, name, opts) { throw new NotImplementedError('method not implemented in persistor', { method: 'getObjectSize', location, @@ -52,7 +76,13 @@ module.exports = class AbstractPersistor { }) } - async getObjectMd5Hash(location, name) { + /** + * @param {string} location + * @param {string} name + * @param {Object} opts + * @return {Promise} + */ + async getObjectMd5Hash(location, name, opts) { throw new NotImplementedError('method not implemented in persistor', { method: 'getObjectMd5Hash', location, @@ -60,7 +90,14 @@ module.exports = class AbstractPersistor { }) } - async copyObject(location, fromName, toName) { + /** + * @param {string} location + * @param {string} fromName + * @param {string} toName + * @param {Object} opts + * @return {Promise} + */ + async copyObject(location, fromName, toName, opts) { throw new NotImplementedError('method not implemented in persistor', { method: 'copyObject', location, @@ -69,6 +106,11 @@ module.exports = class AbstractPersistor { }) } + /** + * @param {string} location + * @param {string} name + * @return {Promise} + */ async deleteObject(location, name) { throw new NotImplementedError('method not implemented in persistor', { method: 'deleteObject', @@ -77,7 +119,13 @@ module.exports = class AbstractPersistor { }) } - async deleteDirectory(location, name) { + /** + * @param {string} location + * @param {string} name + * @param {string} [continuationToken] + * @return {Promise} + */ + async deleteDirectory(location, name, continuationToken) { throw new NotImplementedError('method not implemented in persistor', { method: 'deleteDirectory', location, @@ -85,7 +133,13 @@ module.exports = class AbstractPersistor { }) } - async checkIfObjectExists(location, name) { + /** + * @param {string} location + * @param {string} name + * @param {Object} opts + * @return {Promise} + */ + async checkIfObjectExists(location, name, opts) { throw new NotImplementedError('method not implemented in persistor', { method: 'checkIfObjectExists', location, @@ -93,7 +147,13 @@ module.exports = class AbstractPersistor { }) } - async directorySize(location, name) { + /** + * @param {string} location + * @param {string} name + * @param {string} [continuationToken] + * @return {Promise} + */ + async directorySize(location, name, continuationToken) { throw new NotImplementedError('method not implemented in persistor', { method: 'directorySize', location, diff --git a/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js b/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js new file mode 100644 index 0000000000..ba9550839f --- /dev/null +++ b/libraries/object-persistor/src/PerProjectEncryptedS3Persistor.js @@ -0,0 +1,282 @@ +// @ts-check +const Stream = require('stream') +const { promisify } = require('util') +const Crypto = require('crypto') +const { WritableBuffer } = require('@overleaf/stream-utils') +const { S3Persistor, SSECOptions } = require('./S3Persistor.js') + +const generateKey = promisify(Crypto.generateKey) + +/** + * @typedef {Object} Settings + * @property {(bucketName: string, path: string) => {bucketName: string, path: string}} pathToDataEncryptionKeyPath + * @property {(bucketName: string, path: string) => boolean} pathIsProjectFolder + * @property {() => Promise} getKeyEncryptionKey + */ + +const { + NotFoundError, + NotImplementedError, + AlreadyWrittenError, +} = require('./Errors') +const fs = require('fs') + +class PerProjectEncryptedS3Persistor extends S3Persistor { + /** @type Settings */ + #settings + /** @type Promise */ + #keyEncryptionKeyOptions + + /** + * @param {Settings} settings + */ + constructor(settings) { + super(settings) + this.#settings = settings + this.#keyEncryptionKeyOptions = this.#settings + .getKeyEncryptionKey() + .then(keyAsBuffer => new SSECOptions(keyAsBuffer)) + } + + async ensureKeyEncryptionKeyLoaded() { + await this.#keyEncryptionKeyOptions + } + + /** + * @param {string} bucketName + * @param {string} path + */ + async getDataEncryptionKeySize(bucketName, path) { + const dekPath = this.#settings.pathToDataEncryptionKeyPath(bucketName, path) + return await super.getObjectSize(dekPath.bucketName, dekPath.path, { + ssecOptions: await this.#keyEncryptionKeyOptions, + }) + } + + /** + * @param {string} bucketName + * @param {string} path + * @return {Promise} + */ + async forProject(bucketName, path) { + return new CachedPerProjectEncryptedS3Persistor( + this, + await this.#getDataEncryptionKeyOptions(bucketName, path) + ) + } + + /** + * @param {string} bucketName + * @param {string} path + * @return {Promise} + */ + async generateDataEncryptionKey(bucketName, path) { + await this.#generateDataEncryptionKeyOptions(bucketName, path) + } + + /** + * @param {string} bucketName + * @param {string} path + * @return {Promise} + */ + async #generateDataEncryptionKeyOptions(bucketName, path) { + const dataEncryptionKey = ( + await generateKey('aes', { length: 256 }) + ).export() + const dekPath = this.#settings.pathToDataEncryptionKeyPath(bucketName, path) + await super.sendStream( + dekPath.bucketName, + dekPath.path, + Stream.Readable.from([dataEncryptionKey]), + { + // Do not overwrite any objects if already created + ifNoneMatch: '*', + ssecOptions: await this.#keyEncryptionKeyOptions, + } + ) + return new SSECOptions(dataEncryptionKey) + } + + /** + * @param {string} bucketName + * @param {string} path + * @return {Promise} + */ + async #getExistingDataEncryptionKeyOptions(bucketName, path) { + const dekPath = this.#settings.pathToDataEncryptionKeyPath(bucketName, path) + const res = await super.getObjectStream(dekPath.bucketName, dekPath.path, { + ssecOptions: await this.#keyEncryptionKeyOptions, + }) + const buf = new WritableBuffer() + await Stream.promises.pipeline(res, buf) + return new SSECOptions(buf.getContents()) + } + + /** + * @param {string} bucketName + * @param {string} path + * @return {Promise} + */ + async #getDataEncryptionKeyOptions(bucketName, path) { + try { + return await this.#getExistingDataEncryptionKeyOptions(bucketName, path) + } catch (err) { + if (err instanceof NotFoundError) { + try { + return await this.#generateDataEncryptionKeyOptions(bucketName, path) + } catch (err2) { + if (err2 instanceof AlreadyWrittenError) { + // Concurrent initial write + return await this.#getExistingDataEncryptionKeyOptions( + bucketName, + path + ) + } + throw err2 + } + } + throw err + } + } + + async sendStream(bucketName, path, sourceStream, opts = {}) { + const ssecOptions = + opts.ssecOptions || + (await this.#getDataEncryptionKeyOptions(bucketName, path)) + return await super.sendStream(bucketName, path, sourceStream, { + ...opts, + ssecOptions, + }) + } + + async getObjectStream(bucketName, path, opts = {}) { + const ssecOptions = + opts.ssecOptions || + (await this.#getExistingDataEncryptionKeyOptions(bucketName, path)) + return await super.getObjectStream(bucketName, path, { + ...opts, + ssecOptions, + }) + } + + async getObjectSize(bucketName, path, opts = {}) { + const ssecOptions = + opts.ssecOptions || + (await this.#getExistingDataEncryptionKeyOptions(bucketName, path)) + return await super.getObjectSize(bucketName, path, { ...opts, ssecOptions }) + } + + async directorySize(bucketName, path, continuationToken) { + // Note: Listing a bucket does not require SSE-C credentials. + return await super.directorySize(bucketName, path, continuationToken) + } + + async deleteDirectory(bucketName, path, continuationToken) { + // Note: Listing/Deleting a prefix does not require SSE-C credentials. + await super.deleteDirectory(bucketName, path, continuationToken) + if (this.#settings.pathIsProjectFolder(bucketName, path)) { + const dekPath = this.#settings.pathToDataEncryptionKeyPath( + bucketName, + path + ) + await super.deleteObject(dekPath.bucketName, dekPath.path) + } + } + + async getObjectMd5Hash(bucketName, path, opts = {}) { + // The ETag in object metadata is not the MD5 content hash, skip the HEAD request. + opts = { ...opts, etagIsNotMD5: true } + return await super.getObjectMd5Hash(bucketName, path, opts) + } + + async copyObject(bucketName, sourcePath, destinationPath, opts = {}) { + const ssecOptions = + opts.ssecOptions || + (await this.#getDataEncryptionKeyOptions(bucketName, destinationPath)) + const ssecSrcOptions = + opts.ssecSrcOptions || + (await this.#getExistingDataEncryptionKeyOptions(bucketName, sourcePath)) + return await super.copyObject(bucketName, sourcePath, destinationPath, { + ...opts, + ssecOptions, + ssecSrcOptions, + }) + } + + /** + * @param {string} bucketName + * @param {string} path + * @return {Promise} + */ + async getRedirectUrl(bucketName, path) { + throw new NotImplementedError('signed links are not supported with SSE-C') + } +} + +/** + * Helper class for batch updates to avoid repeated fetching of the project path. + * + * A general "cache" for project keys is another alternative. For now, use a helper class. + */ +class CachedPerProjectEncryptedS3Persistor { + /** @type SSECOptions */ + #projectKeyOptions + /** @type PerProjectEncryptedS3Persistor */ + #parent + + /** + * @param {PerProjectEncryptedS3Persistor} parent + * @param {SSECOptions} projectKeyOptions + */ + constructor(parent, projectKeyOptions) { + this.#parent = parent + this.#projectKeyOptions = projectKeyOptions + } + + /** + * @param {string} bucketName + * @param {string} path + * @param {string} fsPath + */ + async sendFile(bucketName, path, fsPath) { + return await this.sendStream(bucketName, path, fs.createReadStream(fsPath)) + } + + /** + * @param {string} bucketName + * @param {string} path + * @param {NodeJS.ReadableStream} sourceStream + * @param {Object} opts + * @param {string} [opts.contentType] + * @param {string} [opts.contentEncoding] + * @param {'*'} [opts.ifNoneMatch] + * @param {SSECOptions} [opts.ssecOptions] + * @param {string} [opts.sourceMd5] + * @return {Promise} + */ + async sendStream(bucketName, path, sourceStream, opts = {}) { + return await this.#parent.sendStream(bucketName, path, sourceStream, { + ...opts, + ssecOptions: this.#projectKeyOptions, + }) + } + + /** + * @param {string} bucketName + * @param {string} path + * @param {Object} opts + * @param {number} [opts.start] + * @param {number} [opts.end] + * @param {string} [opts.contentEncoding] + * @param {SSECOptions} [opts.ssecOptions] + * @return {Promise} + */ + async getObjectStream(bucketName, path, opts = {}) { + return await this.#parent.getObjectStream(bucketName, path, { + ...opts, + ssecOptions: this.#projectKeyOptions, + }) + } +} + +module.exports = PerProjectEncryptedS3Persistor diff --git a/libraries/object-persistor/src/PersistorFactory.js b/libraries/object-persistor/src/PersistorFactory.js index 7923f9c06c..d71605ca66 100644 --- a/libraries/object-persistor/src/PersistorFactory.js +++ b/libraries/object-persistor/src/PersistorFactory.js @@ -1,15 +1,18 @@ const Logger = require('@overleaf/logger') const { SettingsError } = require('./Errors') const GcsPersistor = require('./GcsPersistor') -const S3Persistor = require('./S3Persistor') +const { S3Persistor } = require('./S3Persistor') const FSPersistor = require('./FSPersistor') const MigrationPersistor = require('./MigrationPersistor') +const PerProjectEncryptedS3Persistor = require('./PerProjectEncryptedS3Persistor') function getPersistor(backend, settings) { switch (backend) { case 'aws-sdk': case 's3': return new S3Persistor(settings.s3) + case 's3SSEC': + return new PerProjectEncryptedS3Persistor(settings.s3SSEC) case 'fs': return new FSPersistor({ useSubdirectories: settings.useSubdirectories, diff --git a/libraries/object-persistor/src/PersistorHelper.js b/libraries/object-persistor/src/PersistorHelper.js index 087846e476..d54d6985a0 100644 --- a/libraries/object-persistor/src/PersistorHelper.js +++ b/libraries/object-persistor/src/PersistorHelper.js @@ -26,12 +26,14 @@ const SIZE_BUCKETS = [ */ class ObserverStream extends Stream.Transform { /** - * @param {string} metric prefix for metrics - * @param {string} bucket name of source/target bucket - * @param {string} hash optional hash algorithm, e.g. 'md5' + * @param {Object} opts + * @param {string} opts.metric prefix for metrics + * @param {string} opts.bucket name of source/target bucket + * @param {string} [opts.hash] optional hash algorithm, e.g. 'md5' */ - constructor({ metric, bucket, hash = '' }) { + constructor(opts) { super({ autoDestroy: true }) + const { metric, bucket, hash = '' } = opts this.bytes = 0 this.start = performance.now() diff --git a/libraries/object-persistor/src/S3Persistor.js b/libraries/object-persistor/src/S3Persistor.js index f0c834b185..54ff33c940 100644 --- a/libraries/object-persistor/src/S3Persistor.js +++ b/libraries/object-persistor/src/S3Persistor.js @@ -1,3 +1,4 @@ +// @ts-check const http = require('http') const https = require('https') if (http.globalAgent.maxSockets < 300) { @@ -7,6 +8,7 @@ if (https.globalAgent.maxSockets < 300) { https.globalAgent.maxSockets = 300 } +const Crypto = require('crypto') const Metrics = require('@overleaf/metrics') const AbstractPersistor = require('./AbstractPersistor') const PersistorHelper = require('./PersistorHelper') @@ -17,17 +19,75 @@ const S3 = require('aws-sdk/clients/s3') const { URL } = require('url') const { WriteError, ReadError, NotFoundError } = require('./Errors') -module.exports = class S3Persistor extends AbstractPersistor { +/** + * Wrapper with private fields to avoid revealing them on console, JSON.stringify or similar. + */ +class SSECOptions { + #keyAsBuffer + #keyMD5 + + /** + * @param {Buffer} keyAsBuffer + */ + constructor(keyAsBuffer) { + this.#keyAsBuffer = keyAsBuffer + this.#keyMD5 = Crypto.createHash('md5').update(keyAsBuffer).digest('base64') + } + + getPutOptions() { + return { + SSECustomerKey: this.#keyAsBuffer, + SSECustomerKeyMD5: this.#keyMD5, + SSECustomerAlgorithm: 'AES256', + } + } + + getGetOptions() { + return { + SSECustomerKey: this.#keyAsBuffer, + SSECustomerKeyMD5: this.#keyMD5, + SSECustomerAlgorithm: 'AES256', + } + } + + getCopyOptions() { + return { + CopySourceSSECustomerKey: this.#keyAsBuffer, + CopySourceSSECustomerKeyMD5: this.#keyMD5, + CopySourceSSECustomerAlgorithm: 'AES256', + } + } +} + +class S3Persistor extends AbstractPersistor { constructor(settings = {}) { super() this.settings = settings } + /** + * @param {string} bucketName + * @param {string} key + * @param {string} fsPath + * @return {Promise} + */ async sendFile(bucketName, key, fsPath) { - return await this.sendStream(bucketName, key, fs.createReadStream(fsPath)) + await this.sendStream(bucketName, key, fs.createReadStream(fsPath)) } + /** + * @param {string} bucketName + * @param {string} key + * @param {NodeJS.ReadableStream} readStream + * @param {Object} opts + * @param {string} [opts.contentType] + * @param {string} [opts.contentEncoding] + * @param {'*'} [opts.ifNoneMatch] + * @param {SSECOptions} [opts.ssecOptions] + * @param {string} [opts.sourceMd5] + * @return {Promise} + */ async sendStream(bucketName, key, readStream, opts = {}) { try { const observeOptions = { @@ -55,6 +115,9 @@ module.exports = class S3Persistor extends AbstractPersistor { if (opts.ifNoneMatch === '*') { uploadOptions.IfNoneMatch = '*' } + if (opts.ssecOptions) { + Object.assign(uploadOptions, opts.ssecOptions.getPutOptions()) + } // if we have an md5 hash, pass this to S3 to verify the upload - otherwise // we rely on the S3 client's checksum calculation to validate the upload @@ -78,6 +141,16 @@ module.exports = class S3Persistor extends AbstractPersistor { } } + /** + * @param {string} bucketName + * @param {string} key + * @param {Object} opts + * @param {number} [opts.start] + * @param {number} [opts.end] + * @param {string} [opts.contentEncoding] + * @param {SSECOptions} [opts.ssecOptions] + * @return {Promise} + */ async getObjectStream(bucketName, key, opts) { opts = opts || {} @@ -88,6 +161,9 @@ module.exports = class S3Persistor extends AbstractPersistor { if (opts.start != null && opts.end != null) { params.Range = `bytes=${opts.start}-${opts.end}` } + if (opts.ssecOptions) { + Object.assign(params, opts.ssecOptions.getGetOptions()) + } const observer = new PersistorHelper.ObserverStream({ metric: 's3.ingress', // ingress from S3 to us bucket: bucketName, @@ -102,10 +178,10 @@ module.exports = class S3Persistor extends AbstractPersistor { switch (statusCode) { case 200: // full response case 206: // partial response - return resolve() + return resolve(undefined) case 403: // AccessDenied is handled the same as NoSuchKey case 404: // NoSuchKey - return reject(new NotFoundError()) + return reject(new NotFoundError('not found')) default: return reject(new Error('non success status: ' + statusCode)) } @@ -131,17 +207,22 @@ module.exports = class S3Persistor extends AbstractPersistor { return pass } + /** + * @param {string} bucketName + * @param {string} key + * @return {Promise} + */ async getRedirectUrl(bucketName, key) { const expiresSeconds = Math.round(this.settings.signedUrlExpiryInMs / 1000) try { - const url = await this._getClientForBucket( - bucketName - ).getSignedUrlPromise('getObject', { - Bucket: bucketName, - Key: key, - Expires: expiresSeconds, - }) - return url + return await this._getClientForBucket(bucketName).getSignedUrlPromise( + 'getObject', + { + Bucket: bucketName, + Key: key, + Expires: expiresSeconds, + } + ) } catch (err) { throw PersistorHelper.wrapError( err, @@ -152,6 +233,12 @@ module.exports = class S3Persistor extends AbstractPersistor { } } + /** + * @param {string} bucketName + * @param {string} key + * @param {string} [continuationToken] + * @return {Promise} + */ async deleteDirectory(bucketName, key, continuationToken) { let response const options = { Bucket: bucketName, Prefix: key } @@ -172,8 +259,8 @@ module.exports = class S3Persistor extends AbstractPersistor { ) } - const objects = response.Contents.map(item => ({ Key: item.Key })) - if (objects.length) { + const objects = response.Contents?.map(item => ({ Key: item.Key || '' })) + if (objects?.length) { try { await this._getClientForBucket(bucketName) .deleteObjects({ @@ -203,12 +290,22 @@ module.exports = class S3Persistor extends AbstractPersistor { } } - async getObjectSize(bucketName, key) { + /** + * @param {string} bucketName + * @param {string} key + * @param {Object} opts + * @param {SSECOptions} [opts.ssecOptions] + * @return {Promise} + */ + async #headObject(bucketName, key, opts = {}) { + const params = { Bucket: bucketName, Key: key } + if (opts.ssecOptions) { + Object.assign(params, opts.ssecOptions.getGetOptions()) + } try { - const response = await this._getClientForBucket(bucketName) - .headObject({ Bucket: bucketName, Key: key }) + return await this._getClientForBucket(bucketName) + .headObject(params) .promise() - return response.ContentLength } catch (err) { throw PersistorHelper.wrapError( err, @@ -219,19 +316,39 @@ module.exports = class S3Persistor extends AbstractPersistor { } } - async getObjectMd5Hash(bucketName, key) { + /** + * @param {string} bucketName + * @param {string} key + * @param {Object} opts + * @param {SSECOptions} [opts.ssecOptions] + * @return {Promise} + */ + async getObjectSize(bucketName, key, opts = {}) { + const response = await this.#headObject(bucketName, key, opts) + return response.ContentLength || 0 + } + + /** + * @param {string} bucketName + * @param {string} key + * @param {Object} opts + * @param {SSECOptions} [opts.ssecOptions] + * @param {boolean} [opts.etagIsNotMD5] + * @return {Promise} + */ + async getObjectMd5Hash(bucketName, key, opts = {}) { try { - const response = await this._getClientForBucket(bucketName) - .headObject({ Bucket: bucketName, Key: key }) - .promise() - const md5 = S3Persistor._md5FromResponse(response) - if (md5) { - return md5 + if (!opts.etagIsNotMD5) { + const response = await this.#headObject(bucketName, key, opts) + const md5 = S3Persistor._md5FromResponse(response) + if (md5) { + return md5 + } } // etag is not in md5 format Metrics.inc('s3.md5Download') return await PersistorHelper.calculateStreamMd5( - await this.getObjectStream(bucketName, key) + await this.getObjectStream(bucketName, key, opts) ) } catch (err) { throw PersistorHelper.wrapError( @@ -243,6 +360,11 @@ module.exports = class S3Persistor extends AbstractPersistor { } } + /** + * @param {string} bucketName + * @param {string} key + * @return {Promise} + */ async deleteObject(bucketName, key) { try { await this._getClientForBucket(bucketName) @@ -259,12 +381,27 @@ module.exports = class S3Persistor extends AbstractPersistor { } } - async copyObject(bucketName, sourceKey, destKey) { + /** + * @param {string} bucketName + * @param {string} sourceKey + * @param {string} destKey + * @param {Object} opts + * @param {SSECOptions} [opts.ssecSrcOptions] + * @param {SSECOptions} [opts.ssecOptions] + * @return {Promise} + */ + async copyObject(bucketName, sourceKey, destKey, opts = {}) { const params = { Bucket: bucketName, Key: destKey, CopySource: `${bucketName}/${sourceKey}`, } + if (opts.ssecSrcOptions) { + Object.assign(params, opts.ssecSrcOptions.getCopyOptions()) + } + if (opts.ssecOptions) { + Object.assign(params, opts.ssecOptions.getPutOptions()) + } try { await this._getClientForBucket(bucketName).copyObject(params).promise() } catch (err) { @@ -277,9 +414,16 @@ module.exports = class S3Persistor extends AbstractPersistor { } } - async checkIfObjectExists(bucketName, key) { + /** + * @param {string} bucketName + * @param {string} key + * @param {Object} opts + * @param {SSECOptions} [opts.ssecOptions] + * @return {Promise} + */ + async checkIfObjectExists(bucketName, key, opts) { try { - await this.getObjectSize(bucketName, key) + await this.getObjectSize(bucketName, key, opts) return true } catch (err) { if (err instanceof NotFoundError) { @@ -294,6 +438,12 @@ module.exports = class S3Persistor extends AbstractPersistor { } } + /** + * @param {string} bucketName + * @param {string} key + * @param {string} [continuationToken] + * @return {Promise} + */ async directorySize(bucketName, key, continuationToken) { try { const options = { @@ -307,7 +457,8 @@ module.exports = class S3Persistor extends AbstractPersistor { .listObjectsV2(options) .promise() - const size = response.Contents.reduce((acc, item) => item.Size + acc, 0) + const size = + response.Contents?.reduce((acc, item) => (item.Size || 0) + acc, 0) || 0 if (response.IsTruncated) { return ( size + @@ -329,6 +480,12 @@ module.exports = class S3Persistor extends AbstractPersistor { } } + /** + * @param {string} bucket + * @param {Object} [clientOptions] + * @return {S3} + * @private + */ _getClientForBucket(bucket, clientOptions) { return new S3( this._buildClientOptions( @@ -338,6 +495,12 @@ module.exports = class S3Persistor extends AbstractPersistor { ) } + /** + * @param {Object} bucketCredentials + * @param {Object} clientOptions + * @return {Object} + * @private + */ _buildClientOptions(bucketCredentials, clientOptions) { const options = clientOptions || {} @@ -376,6 +539,11 @@ module.exports = class S3Persistor extends AbstractPersistor { return options } + /** + * @param {S3.HeadObjectOutput} response + * @return {string|null} + * @private + */ static _md5FromResponse(response) { const md5 = (response.ETag || '').replace(/[ "]/g, '') if (!md5.match(/^[a-f0-9]{32}$/)) { @@ -385,3 +553,8 @@ module.exports = class S3Persistor extends AbstractPersistor { return md5 } } + +module.exports = { + S3Persistor, + SSECOptions, +} diff --git a/libraries/object-persistor/test/unit/PersistorFactoryTests.js b/libraries/object-persistor/test/unit/PersistorFactoryTests.js index 0bbc6e67ce..0a41b92cd0 100644 --- a/libraries/object-persistor/test/unit/PersistorFactoryTests.js +++ b/libraries/object-persistor/test/unit/PersistorFactoryTests.js @@ -32,7 +32,7 @@ describe('PersistorManager', function () { Settings = {} const requires = { './GcsPersistor': GcsPersistor, - './S3Persistor': S3Persistor, + './S3Persistor': { S3Persistor }, './FSPersistor': FSPersistor, '@overleaf/logger': { info() {}, diff --git a/libraries/object-persistor/test/unit/S3PersistorTests.js b/libraries/object-persistor/test/unit/S3PersistorTests.js index 7866247f23..60d639fce5 100644 --- a/libraries/object-persistor/test/unit/S3PersistorTests.js +++ b/libraries/object-persistor/test/unit/S3PersistorTests.js @@ -159,7 +159,7 @@ describe('S3PersistorTests', function () { crypto, }, globals: { console, Buffer }, - }))(settings) + }).S3Persistor)(settings) }) describe('getObjectStream', function () { diff --git a/libraries/stream-utils/index.js b/libraries/stream-utils/index.js index ac3e6238a1..cdd8dcb5cf 100644 --- a/libraries/stream-utils/index.js +++ b/libraries/stream-utils/index.js @@ -2,7 +2,7 @@ const { Writable, Readable, PassThrough, Transform } = require('stream') /** * A writable stream that stores all data written to it in a node Buffer. - * @extends stream.Writable + * @extends Writable * @example * const { WritableBuffer } = require('@overleaf/stream-utils') * const bufferStream = new WritableBuffer() @@ -43,7 +43,7 @@ class WritableBuffer extends Writable { /** * A readable stream created from a string. - * @extends stream.Readable + * @extends Readable * @example * const { ReadableString } = require('@overleaf/stream-utils') * const stringStream = new ReadableString('hello world') @@ -66,7 +66,7 @@ class SizeExceededError extends Error {} /** * Limited size stream which will emit a SizeExceededError if the size is exceeded - * @extends stream.Transform + * @extends Transform */ class LimitedStream extends Transform { constructor(maxSize) { @@ -93,7 +93,7 @@ class AbortError extends Error {} /** * TimeoutStream which will emit an AbortError if it exceeds a user specified timeout - * @extends stream.PassThrough + * @extends PassThrough */ class TimeoutStream extends PassThrough { constructor(timeout) { @@ -111,7 +111,7 @@ class TimeoutStream extends PassThrough { /** * LoggerStream which will call the provided logger function when the stream exceeds a user specified limit. It will call the provided function again when flushing the stream and it exceeded the user specified limit before. - * @extends stream.Transform + * @extends Transform */ class LoggerStream extends Transform { /** diff --git a/package-lock.json b/package-lock.json index d26252efaf..613e000184 100644 --- a/package-lock.json +++ b/package-lock.json @@ -258,6 +258,7 @@ "@overleaf/logger": "*", "@overleaf/metrics": "*", "@overleaf/o-error": "*", + "@overleaf/stream-utils": "*", "aws-sdk": "^2.1691.0", "fast-crc32c": "overleaf/node-fast-crc32c#aae6b2a4c7a7a159395df9cc6c38dfde702d6f51", "glob": "^7.1.6", @@ -50298,6 +50299,7 @@ "@overleaf/logger": "*", "@overleaf/metrics": "*", "@overleaf/o-error": "*", + "@overleaf/stream-utils": "*", "aws-sdk": "^2.1691.0", "chai": "^4.3.6", "chai-as-promised": "^7.1.1", diff --git a/services/filestore/app/js/FileHandler.js b/services/filestore/app/js/FileHandler.js index 2c95003d62..f3e4f86590 100644 --- a/services/filestore/app/js/FileHandler.js +++ b/services/filestore/app/js/FileHandler.js @@ -130,7 +130,7 @@ async function getFileSize(bucket, key) { } async function getDirectorySize(bucket, projectId) { - return PersistorManager.directorySize(bucket, projectId) + return await PersistorManager.directorySize(bucket, projectId) } async function _getConvertedFile(bucket, key, opts) { diff --git a/services/filestore/buildscript.txt b/services/filestore/buildscript.txt index ccb440316e..09b3443d17 100644 --- a/services/filestore/buildscript.txt +++ b/services/filestore/buildscript.txt @@ -2,7 +2,7 @@ filestore --data-dirs=uploads,user_files,template_files --dependencies=s3,gcs --docker-repos=us-east1-docker.pkg.dev/overleaf-ops/ol-docker ---env-add=ENABLE_CONVERSIONS="true",USE_PROM_METRICS="true",AWS_S3_USER_FILES_BUCKET_NAME=fake-user-files,AWS_S3_TEMPLATE_FILES_BUCKET_NAME=fake-template-files,GCS_USER_FILES_BUCKET_NAME=fake-gcs-user-files,GCS_TEMPLATE_FILES_BUCKET_NAME=fake-gcs-template-files +--env-add=ENABLE_CONVERSIONS="true",USE_PROM_METRICS="true",AWS_S3_USER_FILES_BUCKET_NAME=fake-user-files,AWS_S3_USER_FILES_DEK_BUCKET_NAME=fake-user-files-dek,AWS_S3_TEMPLATE_FILES_BUCKET_NAME=fake-template-files,GCS_USER_FILES_BUCKET_NAME=fake-gcs-user-files,GCS_TEMPLATE_FILES_BUCKET_NAME=fake-gcs-template-files --env-pass-through= --esmock-loader=False --node-version=18.20.4 diff --git a/services/filestore/docker-compose.ci.yml b/services/filestore/docker-compose.ci.yml index e19a99098c..408ec32828 100644 --- a/services/filestore/docker-compose.ci.yml +++ b/services/filestore/docker-compose.ci.yml @@ -36,6 +36,7 @@ services: ENABLE_CONVERSIONS: "true" USE_PROM_METRICS: "true" AWS_S3_USER_FILES_BUCKET_NAME: fake-user-files + AWS_S3_USER_FILES_DEK_BUCKET_NAME: fake-user-files-dek AWS_S3_TEMPLATE_FILES_BUCKET_NAME: fake-template-files GCS_USER_FILES_BUCKET_NAME: fake-gcs-user-files GCS_TEMPLATE_FILES_BUCKET_NAME: fake-gcs-template-files @@ -113,6 +114,7 @@ services: mc alias set s3 https://minio:9000 MINIO_ROOT_USER MINIO_ROOT_PASSWORD mc mb --ignore-existing s3/fake-user-files + mc mb --ignore-existing s3/fake-user-files-dek mc mb --ignore-existing s3/fake-template-files mc admin user add s3 \ OVERLEAF_FILESTORE_S3_ACCESS_KEY_ID \ @@ -138,6 +140,22 @@ services: ], "Resource": "arn:aws:s3:::fake-user-files/*" }, + { + "Effect": "Allow", + "Action": [ + "s3:ListBucket" + ], + "Resource": "arn:aws:s3:::fake-user-files-dek" + }, + { + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "s3:GetObject", + "s3:DeleteObject" + ], + "Resource": "arn:aws:s3:::fake-user-files-dek/*" + }, { "Effect": "Allow", "Action": [ diff --git a/services/filestore/docker-compose.yml b/services/filestore/docker-compose.yml index e5ce7dcc8a..0cbd391ff3 100644 --- a/services/filestore/docker-compose.yml +++ b/services/filestore/docker-compose.yml @@ -53,6 +53,7 @@ services: ENABLE_CONVERSIONS: "true" USE_PROM_METRICS: "true" AWS_S3_USER_FILES_BUCKET_NAME: fake-user-files + AWS_S3_USER_FILES_DEK_BUCKET_NAME: fake-user-files-dek AWS_S3_TEMPLATE_FILES_BUCKET_NAME: fake-template-files GCS_USER_FILES_BUCKET_NAME: fake-gcs-user-files GCS_TEMPLATE_FILES_BUCKET_NAME: fake-gcs-template-files @@ -120,6 +121,7 @@ services: mc alias set s3 https://minio:9000 MINIO_ROOT_USER MINIO_ROOT_PASSWORD mc mb --ignore-existing s3/fake-user-files + mc mb --ignore-existing s3/fake-user-files-dek mc mb --ignore-existing s3/fake-template-files mc admin user add s3 \ OVERLEAF_FILESTORE_S3_ACCESS_KEY_ID \ @@ -145,6 +147,22 @@ services: ], "Resource": "arn:aws:s3:::fake-user-files/*" }, + { + "Effect": "Allow", + "Action": [ + "s3:ListBucket" + ], + "Resource": "arn:aws:s3:::fake-user-files-dek" + }, + { + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "s3:GetObject", + "s3:DeleteObject" + ], + "Resource": "arn:aws:s3:::fake-user-files-dek/*" + }, { "Effect": "Allow", "Action": [ diff --git a/services/filestore/test/acceptance/js/FilestoreTests.js b/services/filestore/test/acceptance/js/FilestoreTests.js index eb6cf17376..5254daf9c5 100644 --- a/services/filestore/test/acceptance/js/FilestoreTests.js +++ b/services/filestore/test/acceptance/js/FilestoreTests.js @@ -1,6 +1,7 @@ const chai = require('chai') const { expect } = chai const fs = require('fs') +const Stream = require('stream') const Settings = require('@overleaf/settings') const Path = require('path') const FilestoreApp = require('./FilestoreApp') @@ -34,6 +35,7 @@ process.on('unhandledRejection', e => { const { BackendSettings, s3Config } = require('./TestConfig') const { AlreadyWrittenError, + NotFoundError, NotImplementedError, } = require('@overleaf/object-persistor/src/Errors') @@ -91,6 +93,9 @@ describe('Filestore', function () { projectId, otherProjectId + const dataEncryptionKeySize = + backendSettings.backend === 's3SSEC' ? 32 : 0 + const BUCKET_NAMES = [ process.env.GCS_USER_FILES_BUCKET_NAME, process.env.GCS_TEMPLATE_FILES_BUCKET_NAME, @@ -98,7 +103,7 @@ describe('Filestore', function () { `${process.env.GCS_TEMPLATE_FILES_BUCKET_NAME}-deleted`, ] - before(async function () { + before('start filestore with new settings', async function () { // create the app with the relevant filestore settings Settings.filestore = backendSettings app = new FilestoreApp() @@ -106,7 +111,7 @@ describe('Filestore', function () { }) if (backendSettings.gcs) { - before(async function () { + before('create gcs buckets', async function () { // create test buckets for gcs const storage = new Storage(Settings.filestore.gcs.endpoint) for (const bucketName of BUCKET_NAMES) { @@ -114,7 +119,7 @@ describe('Filestore', function () { } }) - after(async function () { + after('delete gcs buckets', async function () { // tear down all the gcs buckets const storage = new Storage(Settings.filestore.gcs.endpoint) for (const bucketName of BUCKET_NAMES) { @@ -125,15 +130,14 @@ describe('Filestore', function () { }) } - after(async function () { - await msleep(3000) + after('stop filestore app', async function () { await app.stop() }) - beforeEach(async function () { + beforeEach('fetch previous egress metric', async function () { // retrieve previous metrics from the app - if (['s3', 'gcs'].includes(Settings.filestore.backend)) { - metricPrefix = Settings.filestore.backend + if (['s3', 's3SSEC', 'gcs'].includes(Settings.filestore.backend)) { + metricPrefix = Settings.filestore.backend.replace('SSEC', '') previousEgress = await TestHelper.getMetric( filestoreUrl, `${metricPrefix}_egress` @@ -157,7 +161,7 @@ describe('Filestore', function () { const localFileReadPath = '/tmp/filestore_acceptance_tests_file_read.txt' - beforeEach(async function () { + beforeEach('upload file', async function () { fileId = new ObjectId().toString() fileUrl = `${filestoreUrl}/project/${projectId}/file/${fileId}` constantFileContent = [ @@ -173,11 +177,11 @@ describe('Filestore', function () { if (!res.ok) throw new Error(res.statusText) }) - beforeEach(async function retrievePreviousIngressMetrics() { + beforeEach('retrieve previous ingress metric', async function () { // The upload request can bump the ingress metric. // The content hash validation might require a full download // in case the ETag field of the upload response is not a md5 sum. - if (['s3', 'gcs'].includes(Settings.filestore.backend)) { + if (['s3', 's3SSEC', 'gcs'].includes(Settings.filestore.backend)) { previousIngress = await TestHelper.getMetric( filestoreUrl, `${metricPrefix}_ingress` @@ -317,7 +321,9 @@ describe('Filestore', function () { filestoreUrl, `${metricPrefix}_egress` ) - expect(metric - previousEgress).to.equal(constantFileContent.length) + expect(metric - previousEgress).to.equal( + constantFileContent.length + dataEncryptionKeySize + ) }) it('should record an ingress metric when downloading the file', async function () { @@ -329,7 +335,7 @@ describe('Filestore', function () { `${metricPrefix}_ingress` ) expect(metric - previousIngress).to.equal( - constantFileContent.length + constantFileContent.length + dataEncryptionKeySize ) }) @@ -343,7 +349,7 @@ describe('Filestore', function () { filestoreUrl, `${metricPrefix}_ingress` ) - expect(metric - previousIngress).to.equal(9) + expect(metric - previousIngress).to.equal(9 + dataEncryptionKeySize) }) } }) @@ -373,7 +379,7 @@ describe('Filestore', function () { ].join('\n'), ] - before(async function () { + before('create local files', async function () { return await Promise.all([ fsWriteFile(localFileReadPaths[0], constantFileContents[0]), fsWriteFile(localFileReadPaths[1], constantFileContents[1]), @@ -381,7 +387,7 @@ describe('Filestore', function () { ]) }) - beforeEach(async function () { + beforeEach('upload two files', async function () { projectUrl = `${filestoreUrl}/project/${projectId}` otherProjectUrl = `${filestoreUrl}/project/${otherProjectId}` fileIds = [ @@ -457,7 +463,7 @@ describe('Filestore', function () { describe('with a large file', function () { let fileId, fileUrl, largeFileContent, error - beforeEach(async function () { + beforeEach('upload large file', async function () { fileId = new ObjectId().toString() fileUrl = `${filestoreUrl}/project/${projectId}/file/${fileId}` @@ -502,7 +508,7 @@ describe('Filestore', function () { describe('with a file in a specific bucket', function () { let constantFileContent, fileId, fileUrl, bucketName - beforeEach(async function () { + beforeEach('upload file into random bucket', async function () { constantFileContent = `This is a file in a different S3 bucket ${Math.random()}` fileId = new ObjectId().toString() bucketName = `random-bucket-${new ObjectId().toString()}` @@ -546,7 +552,7 @@ describe('Filestore', function () { describe('when deleting a file in GCS', function () { let fileId, fileUrl, content, error, dateBefore, dateAfter - beforeEach(async function () { + beforeEach('upload and delete file', async function () { fileId = new ObjectId() fileUrl = `${filestoreUrl}/project/${projectId}/file/${fileId}` @@ -599,7 +605,7 @@ describe('Filestore', function () { bucket, fallbackBucket - beforeEach(function () { + beforeEach('prepare fallback', function () { constantFileContent = `This is yet more file content ${Math.random()}` fileId = new ObjectId().toString() fileKey = `${projectId}/${fileId}` @@ -610,7 +616,7 @@ describe('Filestore', function () { }) describe('with a file in the fallback bucket', function () { - beforeEach(async function () { + beforeEach('upload into fallback', async function () { await TestHelper.uploadStringToPersistor( app.persistor.fallbackPersistor, fallbackBucket, @@ -637,7 +643,7 @@ describe('Filestore', function () { }) describe('when copyOnMiss is disabled', function () { - beforeEach(function () { + beforeEach('swap copyOnMiss=false', function () { app.persistor.settings.copyOnMiss = false }) @@ -661,7 +667,7 @@ describe('Filestore', function () { }) describe('when copyOnMiss is enabled', function () { - beforeEach(function () { + beforeEach('swap copyOnMiss=true', function () { app.persistor.settings.copyOnMiss = true }) @@ -690,7 +696,7 @@ describe('Filestore', function () { describe('when copying a file', function () { let newFileId, newFileUrl, newFileKey, opts - beforeEach(function () { + beforeEach('prepare to copy file', function () { const newProjectID = new ObjectId().toString() newFileId = new ObjectId().toString() newFileUrl = `${filestoreUrl}/project/${newProjectID}/file/${newFileId}` @@ -711,7 +717,7 @@ describe('Filestore', function () { }) describe('when copyOnMiss is false', function () { - beforeEach(async function () { + beforeEach('copy with copyOnMiss=false', async function () { app.persistor.settings.copyOnMiss = false const response = await fetch(newFileUrl, opts) @@ -757,7 +763,7 @@ describe('Filestore', function () { }) describe('when copyOnMiss is true', function () { - beforeEach(async function () { + beforeEach('copy with copyOnMiss=false', async function () { app.persistor.settings.copyOnMiss = true const response = await fetch(newFileUrl, opts) @@ -806,7 +812,7 @@ describe('Filestore', function () { }) describe('when sending a file', function () { - beforeEach(async function () { + beforeEach('upload file', async function () { const readStream = streamifier.createReadStream(constantFileContent) const res = await fetch(fileUrl, { @@ -836,7 +842,7 @@ describe('Filestore', function () { describe('when deleting a file', function () { describe('when the file exists on the primary', function () { - beforeEach(async function () { + beforeEach('upload into primary', async function () { await TestHelper.uploadStringToPersistor( app.persistor.primaryPersistor, bucket, @@ -854,7 +860,7 @@ describe('Filestore', function () { }) describe('when the file exists on the fallback', function () { - beforeEach(async function () { + beforeEach('upload into fallback', async function () { await TestHelper.uploadStringToPersistor( app.persistor.fallbackPersistor, fallbackBucket, @@ -872,20 +878,23 @@ describe('Filestore', function () { }) describe('when the file exists on both the primary and the fallback', function () { - beforeEach(async function () { - await TestHelper.uploadStringToPersistor( - app.persistor.primaryPersistor, - bucket, - fileKey, - constantFileContent - ) - await TestHelper.uploadStringToPersistor( - app.persistor.fallbackPersistor, - fallbackBucket, - fileKey, - constantFileContent - ) - }) + beforeEach( + 'upload into both primary and fallback', + async function () { + await TestHelper.uploadStringToPersistor( + app.persistor.primaryPersistor, + bucket, + fileKey, + constantFileContent + ) + await TestHelper.uploadStringToPersistor( + app.persistor.fallbackPersistor, + fallbackBucket, + fileKey, + constantFileContent + ) + } + ) it('should delete the files', async function () { const response1 = await fetch(fileUrl, { method: 'DELETE' }) @@ -914,7 +923,7 @@ describe('Filestore', function () { '../../fixtures/test.pdf' ) - beforeEach(async function () { + beforeEach('upload test.pdf', async function () { fileId = new ObjectId().toString() fileUrl = `${filestoreUrl}/project/${projectId}/file/${fileId}` const stat = await fsStat(localFileReadPath) @@ -936,7 +945,9 @@ describe('Filestore', function () { filestoreUrl, `${metricPrefix}_egress` ) - expect(metric - previousEgress).to.equal(localFileSize) + expect(metric - previousEgress).to.equal( + localFileSize + dataEncryptionKeySize + ) }) } @@ -944,7 +955,7 @@ describe('Filestore', function () { this.timeout(1000 * 20) let previewFileUrl - beforeEach(function () { + beforeEach('prepare previewFileUrl for preview', function () { previewFileUrl = `${fileUrl}?style=preview` }) @@ -957,6 +968,7 @@ describe('Filestore', function () { it('should respond with image data', async function () { // note: this test relies of the imagemagick conversion working const response = await fetch(previewFileUrl) + expect(response.status).to.equal(200) const body = await response.text() expect(body.length).to.be.greaterThan(400) expect(body.substr(1, 3)).to.equal('PNG') @@ -967,7 +979,7 @@ describe('Filestore', function () { this.timeout(1000 * 20) let previewFileUrl - beforeEach(function () { + beforeEach('prepare previewFileUrl for cacheWarn', function () { previewFileUrl = `${fileUrl}?style=preview&cacheWarm=true` }) @@ -995,6 +1007,238 @@ describe('Filestore', function () { }) }) }) + + describe('with server side encryption', function () { + if (backendSettings.backend !== 's3SSEC') return + + before('sanity check top-level variable', function () { + expect(dataEncryptionKeySize).to.equal(32) + }) + + let fileId1, fileId2, fileKey1, fileKey2, fileUrl1, fileUrl2 + beforeEach('prepare ids', function () { + fileId1 = new ObjectId().toString() + fileId2 = new ObjectId().toString() + fileKey1 = `${projectId}/${fileId1}` + fileKey2 = `${projectId}/${fileId2}` + fileUrl1 = `${filestoreUrl}/project/${projectId}/file/${fileId1}` + fileUrl2 = `${filestoreUrl}/project/${projectId}/file/${fileId2}` + }) + + beforeEach('ensure DEK is missing', async function () { + // Cannot use test helper expectPersistorNotToHaveFile here, we need to use the KEK. + await expect( + app.persistor.getDataEncryptionKeySize( + backendSettings.stores.user_files, + fileKey1 + ) + ).to.rejectedWith(NotFoundError) + }) + + async function createRandomContent(url, suffix = '') { + const content = Math.random().toString() + suffix + const res = await fetch(url, { + method: 'POST', + body: Stream.Readable.from([content]), + }) + if (!res.ok) throw new Error(res.statusText) + return async () => { + const res = await fetch(url, { method: 'GET' }) + if (!res.ok) throw new Error(res.statusText) + expect(await res.text()).to.equal(content) + } + } + + it('should create a DEK when asked explicitly', async function () { + await app.persistor.generateDataEncryptionKey( + backendSettings.stores.user_files, + fileKey1 + ) + expect( + await app.persistor.getDataEncryptionKeySize( + backendSettings.stores.user_files, + fileKey1 + ) + ).to.equal(32) + }) + + it('should create a DEK from writes', async function () { + await createRandomContent(fileUrl1) + expect( + await app.persistor.getDataEncryptionKeySize( + backendSettings.stores.user_files, + fileKey1 + ) + ).to.equal(32) + }) + + it('should not create a DEK from reads', async function () { + const res = await fetch(fileUrl1, { + method: 'GET', + }) + if (res.status !== 404) throw new Error(`${res.status} should be 404`) + + // Cannot use test helper expectPersistorNotToHaveFile here, we need to use the KEK. + await expect( + app.persistor.getDataEncryptionKeySize( + backendSettings.stores.user_files, + fileKey1 + ) + ).to.rejectedWith(NotFoundError) + }) + + it('should never overwrite a data encryption key', async function () { + const checkGET = await createRandomContent(fileUrl1) + + await expect( + app.persistor.generateDataEncryptionKey( + backendSettings.stores.user_files, + fileKey1 + ) + ).to.rejectedWith(AlreadyWrittenError) + + await checkGET() + }) + + it('should re-use the data encryption key after a write', async function () { + const checkGET1 = await createRandomContent(fileUrl1, '1') + const checkGET2 = await createRandomContent(fileUrl2, '2') + await checkGET1() + await checkGET2() + }) + + let s3Client + before('create s3Client', function () { + const cfg = s3Config() + const s3ClientSettings = { + accessKeyId: cfg.key, + secretAccessKey: cfg.secret, + endpoint: cfg.endpoint, + httpOptions: cfg.httpOptions, + s3ForcePathStyle: cfg.pathStyle, + } + s3Client = new S3(s3ClientSettings) + }) + + async function checkDEKStorage({ + dekBucketKeys = [], + userFilesBucketKeys = [], + }) { + await createRandomContent(fileUrl1) + + const { Contents: dekEntries } = await s3Client + .listObjectsV2({ + Bucket: process.env.AWS_S3_USER_FILES_DEK_BUCKET_NAME, + Prefix: `${projectId}/`, + }) + .promise() + expect(dekEntries).to.have.length(dekBucketKeys.length) + // Order is not predictable, use members + expect(dekEntries.map(o => o.Key)).to.have.members(dekBucketKeys) + + const { Contents: userFilesEntries } = await s3Client + .listObjectsV2({ + Bucket: backendSettings.stores.user_files, + Prefix: `${projectId}/`, + }) + .promise() + expect(userFilesEntries).to.have.length(userFilesBucketKeys.length) + // Order is not predictable, use members + expect(userFilesEntries.map(o => o.Key)).to.have.members( + userFilesBucketKeys + ) + } + + it('should use a custom bucket for DEKs', async function () { + await checkDEKStorage({ + dekBucketKeys: [`${projectId}/dek`], + userFilesBucketKeys: [fileKey1], + }) + }) + + describe('deleteDirectory', function () { + let checkGET2 + beforeEach('create files', async function () { + await createRandomContent(fileUrl1, '1') + checkGET2 = await createRandomContent(fileUrl2, '2') + }) + it('should delete sub-folder and keep DEK', async function () { + await app.persistor.deleteDirectory( + Settings.filestore.stores.user_files, + fileKey1 // not really a sub-folder, but it will do for this test. + ) + expect( + await app.persistor.checkIfObjectExists( + Settings.filestore.stores.user_files, + fileKey1 + ) + ).to.equal(false) + expect( + await app.persistor.checkIfObjectExists( + Settings.filestore.stores.user_files, + fileKey2 + ) + ).to.equal(true) + expect( + await app.persistor.getDataEncryptionKeySize( + Settings.filestore.stores.user_files, + fileKey2 + ) + ).to.equal(32) + await checkGET2() + }) + it('should delete project folder and DEK', async function () { + await app.persistor.deleteDirectory( + Settings.filestore.stores.user_files, + `${projectId}/` + ) + expect( + await app.persistor.checkIfObjectExists( + Settings.filestore.stores.user_files, + fileKey1 + ) + ).to.equal(false) + expect( + await app.persistor.checkIfObjectExists( + Settings.filestore.stores.user_files, + fileKey2 + ) + ).to.equal(false) + await expect( + app.persistor.getDataEncryptionKeySize( + Settings.filestore.stores.user_files, + fileKey2 + ) + ).to.rejectedWith(NotFoundError) + }) + }) + }) + + describe('checkIfObjectExists', function () { + it('should return false when the object does not exist', async function () { + expect( + await app.persistor.checkIfObjectExists( + Settings.filestore.stores.user_files, + `${projectId}/${new ObjectId().toString()}` + ) + ).to.equal(false) + }) + it('should return true when the object exists', async function () { + const fileId = new ObjectId().toString() + const fileUrl = `${filestoreUrl}/project/${projectId}/file/${fileId}` + const res = await fetch(fileUrl, { + method: 'POST', + body: Stream.Readable.from(['hello']), + }) + if (!res.ok) throw new Error(res.statusText) + expect( + await app.persistor.checkIfObjectExists( + Settings.filestore.stores.user_files, + `${projectId}/${fileId}` + ) + ).to.equal(true) + }) + }) }) } }) diff --git a/services/filestore/test/acceptance/js/TestConfig.js b/services/filestore/test/acceptance/js/TestConfig.js index 4bd97ee0e0..ffc94601d3 100644 --- a/services/filestore/test/acceptance/js/TestConfig.js +++ b/services/filestore/test/acceptance/js/TestConfig.js @@ -1,5 +1,6 @@ const fs = require('fs') const Path = require('path') +const crypto = require('crypto') const https = require('https') // use functions to get a fresh copy, not a reference, each time @@ -25,6 +26,25 @@ function s3Config() { } } +function s3SSECConfig() { + return { + ...s3Config(), + pathIsProjectFolder(_bucketName, path) { + return !!path.match(/^[a-f0-9]+\/$/) + }, + pathToDataEncryptionKeyPath(_bucketName, path) { + const [projectFolder] = path.match(/^[a-f0-9]+\//) + return { + bucketName: process.env.AWS_S3_USER_FILES_DEK_BUCKET_NAME, + path: Path.join(projectFolder, 'dek'), + } + }, + async getKeyEncryptionKey() { + return crypto.generateKeySync('aes', { length: 256 }).export() + }, + } +} + function s3ConfigDefaultProviderCredentials() { return { ...s3BaseConfig(), @@ -91,6 +111,11 @@ const BackendSettings = { gcs: gcsConfig(), stores: gcsStores(), }, + SHARD_01_PerProjectEncryptedS3Persistor: { + backend: 's3SSEC', + s3SSEC: s3SSECConfig(), + stores: s3Stores(), + }, SHARD_02_FallbackS3ToFSPersistor: { backend: 's3', s3: s3Config(),