overleaf/libraries/object-persistor/test/unit/S3PersistorTests.js
Jakob Ackermann 2760ad35fc Merge pull request #12899 from overleaf/jpa-object-persistor-pipe
[object-persistor] move away from manual .pipe()

GitOrigin-RevId: 5407d48fbbb026ba81f43c891499bd3a5ff59518
2023-05-23 08:05:12 +00:00

986 lines
27 KiB
JavaScript

const sinon = require('sinon')
const chai = require('chai')
const { expect } = chai
const SandboxedModule = require('sandboxed-module')
const Errors = require('../../src/Errors')
const { EventEmitter } = require('events')
const MODULE_PATH = '../../src/S3Persistor.js'
describe('S3PersistorTests', function () {
const defaultS3Key = 'frog'
const defaultS3Secret = 'prince'
const defaultS3Credentials = {
credentials: {
accessKeyId: defaultS3Key,
secretAccessKey: defaultS3Secret,
},
}
const filename = '/wombat/potato.tex'
const bucket = 'womBucket'
const key = 'monKey'
const destKey = 'donKey'
const objectSize = 5555
const genericError = new Error('guru meditation error')
const files = [
{ Key: 'llama', Size: 11 },
{ Key: 'hippo', Size: 22 },
]
const filesSize = 33
const md5 = 'ffffffff00000000ffffffff00000000'
const redirectUrl = 'https://wombat.potato/giraffe'
let Logger,
Transform,
PassThrough,
S3,
Fs,
ReadStream,
Stream,
StreamPromises,
S3GetObjectRequest,
S3Persistor,
S3Client,
S3NotFoundError,
S3AccessDeniedError,
FileNotFoundError,
EmptyPromise,
settings,
Hash,
crypto
beforeEach(function () {
settings = {
secret: defaultS3Secret,
key: defaultS3Key,
partSize: 100 * 1024 * 1024,
}
Transform = class {
once() {}
}
PassThrough = class {}
Stream = {
Transform,
PassThrough,
pipeline: sinon.stub().yields(),
}
StreamPromises = {
pipeline: sinon.stub().resolves(),
}
EmptyPromise = {
promise: sinon.stub().resolves(),
}
ReadStream = new EventEmitter()
class FakeS3GetObjectRequest extends EventEmitter {
constructor() {
super()
this.statusCode = 200
this.err = null
this.aborted = false
}
abort() {
this.aborted = true
}
createReadStream() {
setTimeout(() => {
if (this.err) return ReadStream.emit('error', this.err)
this.emit('httpHeaders', this.statusCode)
})
return ReadStream
}
}
S3GetObjectRequest = new FakeS3GetObjectRequest()
FileNotFoundError = new Error('File not found')
FileNotFoundError.code = 'ENOENT'
Fs = {
createReadStream: sinon.stub().returns(ReadStream),
}
S3NotFoundError = new Error('not found')
S3NotFoundError.code = 'NoSuchKey'
S3AccessDeniedError = new Error('access denied')
S3AccessDeniedError.code = 'AccessDenied'
S3Client = {
getObject: sinon.stub().returns(S3GetObjectRequest),
headObject: sinon.stub().returns({
promise: sinon.stub().resolves({
ContentLength: objectSize,
ETag: md5,
}),
}),
listObjectsV2: sinon.stub().returns({
promise: sinon.stub().resolves({
Contents: files,
}),
}),
upload: sinon
.stub()
.returns({ promise: sinon.stub().resolves({ ETag: `"${md5}"` }) }),
copyObject: sinon.stub().returns(EmptyPromise),
deleteObject: sinon.stub().returns(EmptyPromise),
deleteObjects: sinon.stub().returns(EmptyPromise),
getSignedUrlPromise: sinon.stub().resolves(redirectUrl),
}
S3 = sinon.stub().returns(S3Client)
Hash = {
end: sinon.stub(),
read: sinon.stub().returns(md5),
setEncoding: sinon.stub(),
}
crypto = {
createHash: sinon.stub().returns(Hash),
}
Logger = {
warn: sinon.stub(),
}
S3Persistor = new (SandboxedModule.require(MODULE_PATH, {
requires: {
'aws-sdk/clients/s3': S3,
'@overleaf/logger': Logger,
'./Errors': Errors,
fs: Fs,
stream: Stream,
'stream/promises': StreamPromises,
crypto,
},
globals: { console, Buffer },
}))(settings)
})
describe('getObjectStream', function () {
describe('when called with valid parameters', function () {
let stream
beforeEach(async function () {
stream = await S3Persistor.getObjectStream(bucket, key)
})
it('returns a PassThrough stream', function () {
expect(stream).to.be.instanceOf(PassThrough)
})
it('sets the AWS client up with credentials from settings', function () {
expect(S3).to.have.been.calledWith(defaultS3Credentials)
})
it('fetches the right key from the right bucket', function () {
expect(S3Client.getObject).to.have.been.calledWith({
Bucket: bucket,
Key: key,
})
})
it('pipes the stream through the meter', async function () {
expect(Stream.pipeline).to.have.been.calledWith(
ReadStream,
sinon.match.instanceOf(Transform),
sinon.match.instanceOf(PassThrough)
)
})
it('does not abort the request', function () {
expect(S3GetObjectRequest.aborted).to.equal(false)
})
})
describe('when called with a byte range', function () {
let stream
beforeEach(async function () {
stream = await S3Persistor.getObjectStream(bucket, key, {
start: 5,
end: 10,
})
})
it('returns a PassThrough stream', function () {
expect(stream).to.be.instanceOf(Stream.PassThrough)
})
it('passes the byte range on to S3', function () {
expect(S3Client.getObject).to.have.been.calledWith({
Bucket: bucket,
Key: key,
Range: 'bytes=5-10',
})
})
})
describe('when streaming fails', function () {
let stream
beforeEach(async function () {
Stream.pipeline.yields(new Error())
stream = await S3Persistor.getObjectStream(bucket, key)
})
it('returns a PassThrough stream', function () {
expect(stream).to.be.instanceOf(Stream.PassThrough)
})
it('aborts the request', function () {
expect(S3GetObjectRequest.aborted).to.equal(true)
})
})
describe('when there are alternative credentials', function () {
let stream
const alternativeSecret = 'giraffe'
const alternativeKey = 'hippo'
const alternativeS3Credentials = {
credentials: {
accessKeyId: alternativeKey,
secretAccessKey: alternativeSecret,
},
}
beforeEach(async function () {
settings.bucketCreds = {}
settings.bucketCreds[bucket] = {
auth_key: alternativeKey,
auth_secret: alternativeSecret,
}
stream = await S3Persistor.getObjectStream(bucket, key)
})
it('returns a PassThrough stream', function () {
expect(stream).to.be.instanceOf(Stream.PassThrough)
})
it('sets the AWS client up with the alternative credentials', function () {
expect(S3).to.have.been.calledWith(alternativeS3Credentials)
})
it('fetches the right key from the right bucket', function () {
expect(S3Client.getObject).to.have.been.calledWith({
Bucket: bucket,
Key: key,
})
})
it('uses the default credentials for an unknown bucket', async function () {
stream = await S3Persistor.getObjectStream('anotherBucket', key)
expect(S3).to.have.been.calledTwice
expect(S3.firstCall).to.have.been.calledWith(alternativeS3Credentials)
expect(S3.secondCall).to.have.been.calledWith(defaultS3Credentials)
})
it('throws an error if there are no credentials for the bucket', async function () {
delete settings.key
delete settings.secret
await expect(
S3Persistor.getObjectStream('anotherBucket', key)
).to.eventually.be.rejected.and.be.an.instanceOf(Errors.SettingsError)
})
})
describe('when given S3 options', function () {
const httpOptions = { timeout: 2000 }
const maxRetries = 2
beforeEach(async function () {
settings.httpOptions = httpOptions
settings.maxRetries = maxRetries
await S3Persistor.getObjectStream(bucket, key)
})
it('configures the S3 client appropriately', function () {
expect(S3).to.have.been.calledWithMatch({ httpOptions, maxRetries })
})
})
describe("when the file doesn't exist", function () {
let error, stream
beforeEach(async function () {
S3GetObjectRequest.statusCode = 404
try {
stream = await S3Persistor.getObjectStream(bucket, key)
} catch (err) {
error = err
}
})
it('does not return a stream', function () {
expect(stream).not.to.exist
})
it('throws a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
it('wraps the error', function () {
expect(error.cause).to.exist
})
it('stores the bucket and key in the error', function () {
expect(error.info).to.include({ bucketName: bucket, key })
})
})
describe('when access to the file is denied', function () {
let error, stream
beforeEach(async function () {
S3GetObjectRequest.statusCode = 403
try {
stream = await S3Persistor.getObjectStream(bucket, key)
} catch (err) {
error = err
}
})
it('does not return a stream', function () {
expect(stream).not.to.exist
})
it('throws a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
it('wraps the error', function () {
expect(error.cause).to.exist
})
it('stores the bucket and key in the error', function () {
expect(error.info).to.include({ bucketName: bucket, key })
})
})
describe('when S3 encounters an unknown error', function () {
let error, stream
beforeEach(async function () {
S3GetObjectRequest.err = genericError
try {
stream = await S3Persistor.getObjectStream(bucket, key)
} catch (err) {
error = err
}
})
it('does not return a stream', function () {
expect(stream).not.to.exist
})
it('throws a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('wraps the error', function () {
expect(error.cause).to.exist
})
it('stores the bucket and key in the error', function () {
expect(error.info).to.include({ bucketName: bucket, key })
})
})
})
describe('getRedirectUrl', function () {
let signedUrl
beforeEach(async function () {
signedUrl = await S3Persistor.getRedirectUrl(bucket, key)
})
it('should request a signed URL', function () {
expect(S3Client.getSignedUrlPromise).to.have.been.called
})
it('should return the url', function () {
expect(signedUrl).to.equal(redirectUrl)
})
})
describe('getObjectSize', function () {
describe('when called with valid parameters', function () {
let size
beforeEach(async function () {
size = await S3Persistor.getObjectSize(bucket, key)
})
it('should return the object size', function () {
expect(size).to.equal(objectSize)
})
it('should pass the bucket and key to S3', function () {
expect(S3Client.headObject).to.have.been.calledWith({
Bucket: bucket,
Key: key,
})
})
})
describe('when the object is not found', function () {
let error
beforeEach(async function () {
S3Client.headObject = sinon.stub().returns({
promise: sinon.stub().rejects(S3NotFoundError),
})
try {
await S3Persistor.getObjectSize(bucket, key)
} catch (err) {
error = err
}
})
it('should return a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(S3NotFoundError)
})
})
describe('when S3 returns an error', function () {
let error
beforeEach(async function () {
S3Client.headObject = sinon.stub().returns({
promise: sinon.stub().rejects(genericError),
})
try {
await S3Persistor.getObjectSize(bucket, key)
} catch (err) {
error = err
}
})
it('should return a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('sendStream', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return S3Persistor.sendStream(bucket, key, ReadStream)
})
it('should upload the stream', function () {
expect(S3Client.upload).to.have.been.calledWith({
Bucket: bucket,
Key: key,
Body: sinon.match.instanceOf(Stream.Transform),
})
})
it('should upload files in a single part', function () {
expect(S3Client.upload).to.have.been.calledWith(sinon.match.any, {
partSize: 100 * 1024 * 1024,
})
})
it('should meter the stream', function () {
expect(Stream.pipeline).to.have.been.calledWith(
ReadStream,
sinon.match.instanceOf(Stream.Transform)
)
})
})
describe('when a hash is supplied', function () {
beforeEach(async function () {
return S3Persistor.sendStream(bucket, key, ReadStream, {
sourceMd5: 'aaaaaaaabbbbbbbbaaaaaaaabbbbbbbb',
})
})
it('sends the hash in base64', function () {
expect(S3Client.upload).to.have.been.calledWith({
Bucket: bucket,
Key: key,
Body: sinon.match.instanceOf(Transform),
ContentMD5: 'qqqqqru7u7uqqqqqu7u7uw==',
})
})
})
describe('when metadata is supplied', function () {
const contentType = 'text/csv'
const contentEncoding = 'gzip'
beforeEach(async function () {
return S3Persistor.sendStream(bucket, key, ReadStream, {
contentType,
contentEncoding,
})
})
it('sends the metadata to S3', function () {
expect(S3Client.upload).to.have.been.calledWith({
Bucket: bucket,
Key: key,
Body: sinon.match.instanceOf(Transform),
ContentType: contentType,
ContentEncoding: contentEncoding,
})
})
})
describe('when the upload fails', function () {
let error
beforeEach(async function () {
S3Client.upload = sinon.stub().returns({
promise: sinon.stub().rejects(genericError),
})
try {
await S3Persistor.sendStream(bucket, key, ReadStream)
} catch (err) {
error = err
}
})
it('throws a WriteError', function () {
expect(error).to.be.an.instanceOf(Errors.WriteError)
})
})
})
describe('sendFile', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return S3Persistor.sendFile(bucket, key, filename)
})
it('should create a read stream for the file', function () {
expect(Fs.createReadStream).to.have.been.calledWith(filename)
})
it('should upload the stream', function () {
expect(S3Client.upload).to.have.been.calledWith({
Bucket: bucket,
Key: key,
Body: sinon.match.instanceOf(Transform),
})
})
})
})
describe('getObjectMd5Hash', function () {
describe('when the etag is a valid md5 hash', function () {
let hash
beforeEach(async function () {
hash = await S3Persistor.getObjectMd5Hash(bucket, key)
})
it('should return the object hash', function () {
expect(hash).to.equal(md5)
})
it('should get the hash from the object metadata', function () {
expect(S3Client.headObject).to.have.been.calledWith({
Bucket: bucket,
Key: key,
})
})
it('should not download the object', function () {
expect(S3Client.getObject).not.to.have.been.called
})
})
describe("when the etag isn't a valid md5 hash", function () {
let hash
beforeEach(async function () {
S3Client.headObject = sinon.stub().returns({
promise: sinon.stub().resolves({
ETag: 'somethingthatisntanmd5',
Bucket: bucket,
Key: key,
}),
})
hash = await S3Persistor.getObjectMd5Hash(bucket, key)
})
it('should re-fetch the file to verify it', function () {
expect(S3Client.getObject).to.have.been.calledWith({
Bucket: bucket,
Key: key,
})
})
it('should calculate the md5 hash from the file', function () {
expect(Hash.read).to.have.been.called
})
it('should return the md5 hash', function () {
expect(hash).to.equal(md5)
})
})
})
describe('copyObject', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return S3Persistor.copyObject(bucket, key, destKey)
})
it('should copy the object', function () {
expect(S3Client.copyObject).to.have.been.calledWith({
Bucket: bucket,
Key: destKey,
CopySource: `${bucket}/${key}`,
})
})
})
describe('when the file does not exist', function () {
let error
beforeEach(async function () {
S3Client.copyObject = sinon.stub().returns({
promise: sinon.stub().rejects(S3NotFoundError),
})
try {
await S3Persistor.copyObject(bucket, key, destKey)
} catch (err) {
error = err
}
})
it('should throw a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
})
})
describe('deleteObject', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return S3Persistor.deleteObject(bucket, key)
})
it('should delete the object', function () {
expect(S3Client.deleteObject).to.have.been.calledWith({
Bucket: bucket,
Key: key,
})
})
})
})
describe('deleteDirectory', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return S3Persistor.deleteDirectory(bucket, key)
})
it('should list the objects in the directory', function () {
expect(S3Client.listObjectsV2).to.have.been.calledWith({
Bucket: bucket,
Prefix: key,
})
})
it('should delete the objects using their keys', function () {
expect(S3Client.deleteObjects).to.have.been.calledWith({
Bucket: bucket,
Delete: {
Objects: [{ Key: 'llama' }, { Key: 'hippo' }],
Quiet: true,
},
})
})
})
describe('when there are no files', function () {
beforeEach(async function () {
S3Client.listObjectsV2 = sinon
.stub()
.returns({ promise: sinon.stub().resolves({ Contents: [] }) })
return S3Persistor.deleteDirectory(bucket, key)
})
it('should list the objects in the directory', function () {
expect(S3Client.listObjectsV2).to.have.been.calledWith({
Bucket: bucket,
Prefix: key,
})
})
it('should not try to delete any objects', function () {
expect(S3Client.deleteObjects).not.to.have.been.called
})
})
describe('when there are more files available', function () {
const continuationToken = 'wombat'
beforeEach(async function () {
S3Client.listObjectsV2.onCall(0).returns({
promise: sinon.stub().resolves({
Contents: files,
IsTruncated: true,
NextContinuationToken: continuationToken,
}),
})
return S3Persistor.deleteDirectory(bucket, key)
})
it('should list the objects a second time, with a continuation token', function () {
expect(S3Client.listObjectsV2).to.be.calledTwice
expect(S3Client.listObjectsV2).to.be.calledWith({
Bucket: bucket,
Prefix: key,
})
expect(S3Client.listObjectsV2).to.be.calledWith({
Bucket: bucket,
Prefix: key,
ContinuationToken: continuationToken,
})
})
it('should delete both sets of files', function () {
expect(S3Client.deleteObjects).to.have.been.calledTwice
})
})
describe('when there is an error listing the objects', function () {
let error
beforeEach(async function () {
S3Client.listObjectsV2 = sinon
.stub()
.returns({ promise: sinon.stub().rejects(genericError) })
try {
await S3Persistor.deleteDirectory(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
it('should not try to delete any objects', function () {
expect(S3Client.deleteObjects).not.to.have.been.called
})
})
describe('when there is an error deleting the objects', function () {
let error
beforeEach(async function () {
S3Client.deleteObjects = sinon
.stub()
.returns({ promise: sinon.stub().rejects(genericError) })
try {
await S3Persistor.deleteDirectory(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a WriteError', function () {
expect(error).to.be.an.instanceOf(Errors.WriteError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('directorySize', function () {
describe('with valid parameters', function () {
let size
beforeEach(async function () {
size = await S3Persistor.directorySize(bucket, key)
})
it('should list the objects in the directory', function () {
expect(S3Client.listObjectsV2).to.have.been.calledWith({
Bucket: bucket,
Prefix: key,
})
})
it('should return the directory size', function () {
expect(size).to.equal(filesSize)
})
})
describe('when there are no files', function () {
let size
beforeEach(async function () {
S3Client.listObjectsV2 = sinon
.stub()
.returns({ promise: sinon.stub().resolves({ Contents: [] }) })
size = await S3Persistor.directorySize(bucket, key)
})
it('should list the objects in the directory', function () {
expect(S3Client.listObjectsV2).to.have.been.calledWith({
Bucket: bucket,
Prefix: key,
})
})
it('should return zero', function () {
expect(size).to.equal(0)
})
})
describe('when there are more files available', function () {
const continuationToken = 'wombat'
let size
beforeEach(async function () {
S3Client.listObjectsV2.onCall(0).returns({
promise: sinon.stub().resolves({
Contents: files,
IsTruncated: true,
NextContinuationToken: continuationToken,
}),
})
size = await S3Persistor.directorySize(bucket, key)
})
it('should list the objects a second time, with a continuation token', function () {
expect(S3Client.listObjectsV2).to.be.calledTwice
expect(S3Client.listObjectsV2).to.be.calledWith({
Bucket: bucket,
Prefix: key,
})
expect(S3Client.listObjectsV2).to.be.calledWith({
Bucket: bucket,
Prefix: key,
ContinuationToken: continuationToken,
})
})
it('should return the size of both sets of files', function () {
expect(size).to.equal(filesSize * 2)
})
})
describe('when there is an error listing the objects', function () {
let error
beforeEach(async function () {
S3Client.listObjectsV2 = sinon
.stub()
.returns({ promise: sinon.stub().rejects(genericError) })
try {
await S3Persistor.directorySize(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('checkIfObjectExists', function () {
describe('when the file exists', function () {
let exists
beforeEach(async function () {
exists = await S3Persistor.checkIfObjectExists(bucket, key)
})
it('should get the object header', function () {
expect(S3Client.headObject).to.have.been.calledWith({
Bucket: bucket,
Key: key,
})
})
it('should return that the file exists', function () {
expect(exists).to.equal(true)
})
})
describe('when the file does not exist', function () {
let exists
beforeEach(async function () {
S3Client.headObject = sinon
.stub()
.returns({ promise: sinon.stub().rejects(S3NotFoundError) })
exists = await S3Persistor.checkIfObjectExists(bucket, key)
})
it('should get the object header', function () {
expect(S3Client.headObject).to.have.been.calledWith({
Bucket: bucket,
Key: key,
})
})
it('should return that the file does not exist', function () {
expect(exists).to.equal(false)
})
})
describe('when there is an error', function () {
let error
beforeEach(async function () {
S3Client.headObject = sinon
.stub()
.returns({ promise: sinon.stub().rejects(genericError) })
try {
await S3Persistor.checkIfObjectExists(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the upstream ReadError', function () {
expect(error.cause).to.be.an.instanceOf(Errors.ReadError)
})
it('should eventually wrap the error', function () {
expect(error.cause.cause).to.equal(genericError)
})
})
})
})