Merge pull request #21908 from overleaf/jpa-storage-class

[object-persistor] s3: add support for setting storage class on upload

GitOrigin-RevId: dc5480a95ec7f1551f08848f7fa45f760d97ab22
This commit is contained in:
Jakob Ackermann 2024-11-15 11:56:53 +01:00 committed by Copybot
parent 36acad4d1b
commit d6e94d2586
9 changed files with 98 additions and 5 deletions

View file

@ -15,6 +15,12 @@ const glob = promisify(globCallbacks)
module.exports = class FSPersistor extends AbstractPersistor {
constructor(settings = {}) {
if (settings.storageClass) {
throw new NotImplementedError(
'FS backend does not support storage classes'
)
}
super()
this.useSubdirectories = Boolean(settings.useSubdirectories)
}

View file

@ -2,7 +2,12 @@ const fs = require('node:fs')
const { pipeline } = require('node:stream/promises')
const { PassThrough } = require('node:stream')
const { Storage, IdempotencyStrategy } = require('@google-cloud/storage')
const { WriteError, ReadError, NotFoundError } = require('./Errors')
const {
WriteError,
ReadError,
NotFoundError,
NotImplementedError,
} = require('./Errors')
const asyncPool = require('tiny-async-pool')
const AbstractPersistor = require('./AbstractPersistor')
const PersistorHelper = require('./PersistorHelper')
@ -11,8 +16,13 @@ const zlib = require('node:zlib')
module.exports = class GcsPersistor extends AbstractPersistor {
constructor(settings) {
super()
if (settings.storageClass) {
throw new NotImplementedError(
'Use default bucket class for GCS instead of settings.storageClass'
)
}
super()
this.settings = settings
// endpoint settings will be null by default except for tests

View file

@ -311,6 +311,16 @@ class PerProjectEncryptedS3Persistor extends S3Persistor {
return await super.getObjectSize(bucketName, path, { ...opts, ssecOptions })
}
async getObjectStorageClass(bucketName, path, opts = {}) {
const ssecOptions =
opts.ssecOptions ||
(await this.#getExistingDataEncryptionKeyOptions(bucketName, path))
return await super.getObjectStorageClass(bucketName, path, {
...opts,
ssecOptions,
})
}
async directorySize(bucketName, path, continuationToken) {
// Note: Listing a bucket does not require SSE-C credentials.
return await super.directorySize(bucketName, path, continuationToken)

View file

@ -64,6 +64,7 @@ class S3Persistor extends AbstractPersistor {
constructor(settings = {}) {
super()
settings.storageClass = settings.storageClass || {}
this.settings = settings
}
@ -100,13 +101,17 @@ class S3Persistor extends AbstractPersistor {
// observer will catch errors, clean up and log a warning
pipeline(readStream, observer, () => {})
// if we have an md5 hash, pass this to S3 to verify the upload
/** @type {S3.PutObjectRequest} */
const uploadOptions = {
Bucket: bucketName,
Key: key,
Body: observer,
}
if (this.settings.storageClass[bucketName]) {
uploadOptions.StorageClass = this.settings.storageClass[bucketName]
}
if (opts.contentType) {
uploadOptions.ContentType = opts.contentType
}
@ -336,6 +341,18 @@ class S3Persistor extends AbstractPersistor {
return response.ContentLength || 0
}
/**
* @param {string} bucketName
* @param {string} key
* @param {Object} opts
* @param {SSECOptions} [opts.ssecOptions]
* @return {Promise<string | undefined>}
*/
async getObjectStorageClass(bucketName, key, opts = {}) {
const response = await this.#headObject(bucketName, key, opts)
return response.StorageClass
}
/**
* @param {string} bucketName
* @param {string} key

View file

@ -2,7 +2,7 @@ filestore
--data-dirs=uploads,user_files,template_files
--dependencies=s3,gcs
--docker-repos=us-east1-docker.pkg.dev/overleaf-ops/ol-docker
--env-add=ENABLE_CONVERSIONS="true",USE_PROM_METRICS="true",AWS_S3_USER_FILES_BUCKET_NAME=fake-user-files,AWS_S3_USER_FILES_DEK_BUCKET_NAME=fake-user-files-dek,AWS_S3_TEMPLATE_FILES_BUCKET_NAME=fake-template-files,GCS_USER_FILES_BUCKET_NAME=fake-gcs-user-files,GCS_TEMPLATE_FILES_BUCKET_NAME=fake-gcs-template-files
--env-add=ENABLE_CONVERSIONS="true",USE_PROM_METRICS="true",AWS_S3_USER_FILES_STORAGE_CLASS=REDUCED_REDUNDANCY,AWS_S3_USER_FILES_BUCKET_NAME=fake-user-files,AWS_S3_USER_FILES_DEK_BUCKET_NAME=fake-user-files-dek,AWS_S3_TEMPLATE_FILES_BUCKET_NAME=fake-template-files,GCS_USER_FILES_BUCKET_NAME=fake-gcs-user-files,GCS_TEMPLATE_FILES_BUCKET_NAME=fake-gcs-template-files
--env-pass-through=
--esmock-loader=False
--node-version=20.18.0

View file

@ -35,6 +35,7 @@ services:
NODE_OPTIONS: "--unhandled-rejections=strict"
ENABLE_CONVERSIONS: "true"
USE_PROM_METRICS: "true"
AWS_S3_USER_FILES_STORAGE_CLASS: REDUCED_REDUNDANCY
AWS_S3_USER_FILES_BUCKET_NAME: fake-user-files
AWS_S3_USER_FILES_DEK_BUCKET_NAME: fake-user-files-dek
AWS_S3_TEMPLATE_FILES_BUCKET_NAME: fake-template-files

View file

@ -52,6 +52,7 @@ services:
NODE_OPTIONS: "--unhandled-rejections=strict"
ENABLE_CONVERSIONS: "true"
USE_PROM_METRICS: "true"
AWS_S3_USER_FILES_STORAGE_CLASS: REDUCED_REDUNDANCY
AWS_S3_USER_FILES_BUCKET_NAME: fake-user-files
AWS_S3_USER_FILES_DEK_BUCKET_NAME: fake-user-files-dek
AWS_S3_TEMPLATE_FILES_BUCKET_NAME: fake-template-files

View file

@ -31,7 +31,12 @@ process.on('unhandledRejection', e => {
// store settings for multiple backends, so that we can test each one.
// fs will always be available - add others if they are configured
const { BackendSettings, s3Config, s3SSECConfig } = require('./TestConfig')
const {
BackendSettings,
s3Config,
s3SSECConfig,
AWS_S3_USER_FILES_STORAGE_CLASS,
} = require('./TestConfig')
const {
AlreadyWrittenError,
NotFoundError,
@ -1257,6 +1262,7 @@ describe('Filestore', function () {
})
})
/** @type {import('aws-sdk/clients/s3')} */
let s3Client
before('create s3 client', function () {
s3Client = new S3Persistor(s3Config())._getClientForBucket('')
@ -1401,6 +1407,40 @@ describe('Filestore', function () {
})
})
if (backendSettings.backend === 's3SSEC') {
describe('storageClass', function () {
it('should use the default storage class for dek', async function () {
const key = `${projectId}/${new ObjectId()}`
const dekBucket = process.env.AWS_S3_USER_FILES_DEK_BUCKET_NAME
await app.persistor.sendStream(
dekBucket,
key,
Stream.Readable.from(['hello'])
)
expect(
await app.persistor.getObjectStorageClass(dekBucket, key)
).to.equal(undefined)
})
it('should use the custom storage class for user files', async function () {
const key = `${projectId}/${new ObjectId()}`
await app.persistor.sendStream(
Settings.filestore.stores.user_files,
key,
Stream.Readable.from(['hello'])
)
const sc = AWS_S3_USER_FILES_STORAGE_CLASS
expect(sc).to.exist
expect(
await app.persistor.getObjectStorageClass(
Settings.filestore.stores.user_files,
key
)
).to.equal(sc)
})
})
}
describe('autoGunzip', function () {
let key
beforeEach('new key', function () {

View file

@ -5,6 +5,9 @@ const {
RootKeyEncryptionKey,
} = require('@overleaf/object-persistor/src/PerProjectEncryptedS3Persistor')
const AWS_S3_USER_FILES_STORAGE_CLASS =
process.env.AWS_S3_USER_FILES_STORAGE_CLASS
// use functions to get a fresh copy, not a reference, each time
function s3BaseConfig() {
return {
@ -43,6 +46,10 @@ function s3SSECConfig() {
async getRootKeyEncryptionKeys() {
return S3SSECKeys
},
storageClass: {
[process.env.AWS_S3_USER_FILES_BUCKET_NAME]:
AWS_S3_USER_FILES_STORAGE_CLASS,
},
}
}
@ -176,6 +183,7 @@ function checkForUnexpectedTestFile() {
checkForUnexpectedTestFile()
module.exports = {
AWS_S3_USER_FILES_STORAGE_CLASS,
BackendSettings,
s3Config,
s3SSECConfig,