overleaf/libraries/object-persistor/test/unit/S3PersistorTests.js
Simon Detheridge e92b75a2f8 Create new module from overleaf/filestore persistors (#1)
* Create new module from overleaf/filestore persistors

* Convert persistors to ES6 classes with local settings

* Update README.md

Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>

* Update README.md

Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>

* Update .gitignore

Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>

* Switch to AGPL license

* Paginate S3 list-object results

* Remove S3 client caching

* Clean up S3 md5-verification mechanism

* Update README for recent changes

* Update README.md

Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>

* Remove package-lock

* Remove comment about FileHandler

* Add directory marker to FSPersistor.deleteDirectory

* Don't copy opts in GcsPersistor.getObjectStream

* Use Date.now instead of getTime

* Catch errors in migration persistor

* Check that settings.buckets exists

* Don't mutate options in ObserverStream constructor

* Update src/PersistorHelper.js

Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>

* Lint and format fixes

Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>
2020-07-02 14:19:45 +01:00

921 lines
25 KiB
JavaScript

const sinon = require('sinon')
const chai = require('chai')
const { expect } = chai
const modulePath = '../../src/S3Persistor.js'
const SandboxedModule = require('sandboxed-module')
const Errors = require('../../src/Errors')
describe('S3PersistorTests', function () {
const defaultS3Key = 'frog'
const defaultS3Secret = 'prince'
const defaultS3Credentials = {
credentials: {
accessKeyId: defaultS3Key,
secretAccessKey: defaultS3Secret
}
}
const filename = '/wombat/potato.tex'
const bucket = 'womBucket'
const key = 'monKey'
const destKey = 'donKey'
const objectSize = 5555
const genericError = new Error('guru meditation error')
const files = [
{ Key: 'llama', Size: 11 },
{ Key: 'hippo', Size: 22 }
]
const filesSize = 33
const md5 = 'ffffffff00000000ffffffff00000000'
let Metrics,
Logger,
Transform,
S3,
Fs,
ReadStream,
Stream,
S3Persistor,
S3Client,
S3ReadStream,
S3NotFoundError,
S3AccessDeniedError,
FileNotFoundError,
EmptyPromise,
settings,
Hash,
crypto
beforeEach(function () {
settings = {
secret: defaultS3Secret,
key: defaultS3Key,
partSize: 100 * 1024 * 1024
}
Transform = class {
on(event, callback) {
if (event === 'readable') {
callback()
}
}
once() {}
removeListener() {}
}
Stream = {
pipeline: sinon.stub().yields(),
Transform: Transform
}
EmptyPromise = {
promise: sinon.stub().resolves()
}
Metrics = {
count: sinon.stub()
}
ReadStream = {
pipe: sinon.stub().returns('readStream'),
on: sinon.stub(),
removeListener: sinon.stub()
}
ReadStream.on.withArgs('end').yields()
ReadStream.on.withArgs('pipe').yields({
unpipe: sinon.stub(),
resume: sinon.stub()
})
FileNotFoundError = new Error('File not found')
FileNotFoundError.code = 'ENOENT'
Fs = {
createReadStream: sinon.stub().returns(ReadStream)
}
S3NotFoundError = new Error('not found')
S3NotFoundError.code = 'NoSuchKey'
S3AccessDeniedError = new Error('access denied')
S3AccessDeniedError.code = 'AccessDenied'
S3ReadStream = {
on: sinon.stub(),
pipe: sinon.stub(),
removeListener: sinon.stub()
}
S3ReadStream.on.withArgs('end').yields()
S3ReadStream.on.withArgs('pipe').yields({
unpipe: sinon.stub(),
resume: sinon.stub()
})
S3Client = {
getObject: sinon.stub().returns({
createReadStream: sinon.stub().returns(S3ReadStream)
}),
headObject: sinon.stub().returns({
promise: sinon.stub().resolves({
ContentLength: objectSize,
ETag: md5
})
}),
listObjectsV2: sinon.stub().returns({
promise: sinon.stub().resolves({
Contents: files
})
}),
upload: sinon
.stub()
.returns({ promise: sinon.stub().resolves({ ETag: `"${md5}"` }) }),
copyObject: sinon.stub().returns(EmptyPromise),
deleteObject: sinon.stub().returns(EmptyPromise),
deleteObjects: sinon.stub().returns(EmptyPromise)
}
S3 = sinon.stub().returns(S3Client)
Hash = {
end: sinon.stub(),
read: sinon.stub().returns(md5),
setEncoding: sinon.stub()
}
crypto = {
createHash: sinon.stub().returns(Hash)
}
Logger = {
warn: sinon.stub()
}
S3Persistor = new (SandboxedModule.require(modulePath, {
requires: {
'aws-sdk/clients/s3': S3,
'logger-sharelatex': Logger,
'./Errors': Errors,
fs: Fs,
stream: Stream,
'metrics-sharelatex': Metrics,
crypto
},
globals: { console, Buffer }
}))(settings)
})
describe('getObjectStream', function () {
describe('when called with valid parameters', function () {
let stream
beforeEach(async function () {
stream = await S3Persistor.getObjectStream(bucket, key)
})
it('returns a metered stream', function () {
expect(stream).to.be.instanceOf(Transform)
})
it('sets the AWS client up with credentials from settings', function () {
expect(S3).to.have.been.calledWith(defaultS3Credentials)
})
it('fetches the right key from the right bucket', function () {
expect(S3Client.getObject).to.have.been.calledWith({
Bucket: bucket,
Key: key
})
})
it('pipes the stream through the meter', async function () {
expect(S3ReadStream.pipe).to.have.been.calledWith(
sinon.match.instanceOf(Transform)
)
})
})
describe('when called with a byte range', function () {
let stream
beforeEach(async function () {
stream = await S3Persistor.getObjectStream(bucket, key, {
start: 5,
end: 10
})
})
it('returns a metered stream', function () {
expect(stream).to.be.instanceOf(Stream.Transform)
})
it('passes the byte range on to S3', function () {
expect(S3Client.getObject).to.have.been.calledWith({
Bucket: bucket,
Key: key,
Range: 'bytes=5-10'
})
})
})
describe('when there are alternative credentials', function () {
let stream
const alternativeSecret = 'giraffe'
const alternativeKey = 'hippo'
const alternativeS3Credentials = {
credentials: {
accessKeyId: alternativeKey,
secretAccessKey: alternativeSecret
}
}
beforeEach(async function () {
settings.bucketCreds = {}
settings.bucketCreds[bucket] = {
auth_key: alternativeKey,
auth_secret: alternativeSecret
}
stream = await S3Persistor.getObjectStream(bucket, key)
})
it('returns a metered stream', function () {
expect(stream).to.be.instanceOf(Stream.Transform)
})
it('sets the AWS client up with the alternative credentials', function () {
expect(S3).to.have.been.calledWith(alternativeS3Credentials)
})
it('fetches the right key from the right bucket', function () {
expect(S3Client.getObject).to.have.been.calledWith({
Bucket: bucket,
Key: key
})
})
it('uses the default credentials for an unknown bucket', async function () {
stream = await S3Persistor.getObjectStream('anotherBucket', key)
expect(S3).to.have.been.calledTwice
expect(S3.firstCall).to.have.been.calledWith(alternativeS3Credentials)
expect(S3.secondCall).to.have.been.calledWith(defaultS3Credentials)
})
it('throws an error if there are no credentials for the bucket', async function () {
delete settings.key
delete settings.secret
await expect(
S3Persistor.getObjectStream('anotherBucket', key)
).to.eventually.be.rejected.and.be.an.instanceOf(Errors.SettingsError)
})
})
describe("when the file doesn't exist", function () {
let error, stream
beforeEach(async function () {
Transform.prototype.on = sinon.stub()
S3ReadStream.on.withArgs('error').yields(S3NotFoundError)
try {
stream = await S3Persistor.getObjectStream(bucket, key)
} catch (err) {
error = err
}
})
it('does not return a stream', function () {
expect(stream).not.to.exist
})
it('throws a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
it('wraps the error', function () {
expect(error.cause).to.exist
})
it('stores the bucket and key in the error', function () {
expect(error.info).to.include({ bucketName: bucket, key: key })
})
})
describe('when access to the file is denied', function () {
let error, stream
beforeEach(async function () {
Transform.prototype.on = sinon.stub()
S3ReadStream.on.withArgs('error').yields(S3AccessDeniedError)
try {
stream = await S3Persistor.getObjectStream(bucket, key)
} catch (err) {
error = err
}
})
it('does not return a stream', function () {
expect(stream).not.to.exist
})
it('throws a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
it('wraps the error', function () {
expect(error.cause).to.exist
})
it('stores the bucket and key in the error', function () {
expect(error.info).to.include({ bucketName: bucket, key: key })
})
})
describe('when S3 encounters an unkown error', function () {
let error, stream
beforeEach(async function () {
Transform.prototype.on = sinon.stub()
S3ReadStream.on.withArgs('error').yields(genericError)
try {
stream = await S3Persistor.getObjectStream(bucket, key)
} catch (err) {
error = err
}
})
it('does not return a stream', function () {
expect(stream).not.to.exist
})
it('throws a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('wraps the error', function () {
expect(error.cause).to.exist
})
it('stores the bucket and key in the error', function () {
expect(error.info).to.include({ bucketName: bucket, key: key })
})
})
})
describe('getObjectSize', function () {
describe('when called with valid parameters', function () {
let size
beforeEach(async function () {
size = await S3Persistor.getObjectSize(bucket, key)
})
it('should return the object size', function () {
expect(size).to.equal(objectSize)
})
it('should pass the bucket and key to S3', function () {
expect(S3Client.headObject).to.have.been.calledWith({
Bucket: bucket,
Key: key
})
})
})
describe('when the object is not found', function () {
let error
beforeEach(async function () {
S3Client.headObject = sinon.stub().returns({
promise: sinon.stub().rejects(S3NotFoundError)
})
try {
await S3Persistor.getObjectSize(bucket, key)
} catch (err) {
error = err
}
})
it('should return a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(S3NotFoundError)
})
})
describe('when S3 returns an error', function () {
let error
beforeEach(async function () {
S3Client.headObject = sinon.stub().returns({
promise: sinon.stub().rejects(genericError)
})
try {
await S3Persistor.getObjectSize(bucket, key)
} catch (err) {
error = err
}
})
it('should return a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('sendStream', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return S3Persistor.sendStream(bucket, key, ReadStream)
})
it('should upload the stream', function () {
expect(S3Client.upload).to.have.been.calledWith({
Bucket: bucket,
Key: key,
Body: sinon.match.instanceOf(Stream.Transform)
})
})
it('should upload files in a single part', function () {
expect(S3Client.upload).to.have.been.calledWith(sinon.match.any, {
partSize: 100 * 1024 * 1024
})
})
it('should meter the stream', function () {
expect(ReadStream.pipe).to.have.been.calledWith(
sinon.match.instanceOf(Stream.Transform)
)
})
})
describe('when a hash is supploed', function () {
beforeEach(async function () {
return S3Persistor.sendStream(
bucket,
key,
ReadStream,
'aaaaaaaabbbbbbbbaaaaaaaabbbbbbbb'
)
})
it('sends the hash in base64', function () {
expect(S3Client.upload).to.have.been.calledWith({
Bucket: bucket,
Key: key,
Body: sinon.match.instanceOf(Transform),
ContentMD5: 'qqqqqru7u7uqqqqqu7u7uw=='
})
})
})
describe('when the upload fails', function () {
let error
beforeEach(async function () {
S3Client.upload = sinon.stub().returns({
promise: sinon.stub().rejects(genericError)
})
try {
await S3Persistor.sendStream(bucket, key, ReadStream)
} catch (err) {
error = err
}
})
it('throws a WriteError', function () {
expect(error).to.be.an.instanceOf(Errors.WriteError)
})
})
})
describe('sendFile', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return S3Persistor.sendFile(bucket, key, filename)
})
it('should create a read stream for the file', function () {
expect(Fs.createReadStream).to.have.been.calledWith(filename)
})
it('should upload the stream', function () {
expect(S3Client.upload).to.have.been.calledWith({
Bucket: bucket,
Key: key,
Body: sinon.match.instanceOf(Transform)
})
})
})
})
describe('getObjectMd5Hash', function () {
describe('when the etag is a valid md5 hash', function () {
let hash
beforeEach(async function () {
hash = await S3Persistor.getObjectMd5Hash(bucket, key)
})
it('should return the object hash', function () {
expect(hash).to.equal(md5)
})
it('should get the hash from the object metadata', function () {
expect(S3Client.headObject).to.have.been.calledWith({
Bucket: bucket,
Key: key
})
})
it('should not download the object', function () {
expect(S3Client.getObject).not.to.have.been.called
})
})
describe("when the etag isn't a valid md5 hash", function () {
let hash
beforeEach(async function () {
S3Client.headObject = sinon.stub().returns({
promise: sinon.stub().resolves({
ETag: 'somethingthatisntanmd5',
Bucket: bucket,
Key: key
})
})
hash = await S3Persistor.getObjectMd5Hash(bucket, key)
})
it('should re-fetch the file to verify it', function () {
expect(S3Client.getObject).to.have.been.calledWith({
Bucket: bucket,
Key: key
})
})
it('should meter the download', function () {
expect(S3ReadStream.pipe).to.have.been.calledWith(
sinon.match.instanceOf(Stream.Transform)
)
})
it('should calculate the md5 hash from the file', function () {
expect(Hash.read).to.have.been.called
})
it('should return the md5 hash', function () {
expect(hash).to.equal(md5)
})
})
})
describe('copyObject', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return S3Persistor.copyObject(bucket, key, destKey)
})
it('should copy the object', function () {
expect(S3Client.copyObject).to.have.been.calledWith({
Bucket: bucket,
Key: destKey,
CopySource: `${bucket}/${key}`
})
})
})
describe('when the file does not exist', function () {
let error
beforeEach(async function () {
S3Client.copyObject = sinon.stub().returns({
promise: sinon.stub().rejects(S3NotFoundError)
})
try {
await S3Persistor.copyObject(bucket, key, destKey)
} catch (err) {
error = err
}
})
it('should throw a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
})
})
describe('deleteObject', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return S3Persistor.deleteObject(bucket, key)
})
it('should delete the object', function () {
expect(S3Client.deleteObject).to.have.been.calledWith({
Bucket: bucket,
Key: key
})
})
})
})
describe('deleteDirectory', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return S3Persistor.deleteDirectory(bucket, key)
})
it('should list the objects in the directory', function () {
expect(S3Client.listObjectsV2).to.have.been.calledWith({
Bucket: bucket,
Prefix: key
})
})
it('should delete the objects using their keys', function () {
expect(S3Client.deleteObjects).to.have.been.calledWith({
Bucket: bucket,
Delete: {
Objects: [{ Key: 'llama' }, { Key: 'hippo' }],
Quiet: true
}
})
})
})
describe('when there are no files', function () {
beforeEach(async function () {
S3Client.listObjectsV2 = sinon
.stub()
.returns({ promise: sinon.stub().resolves({ Contents: [] }) })
return S3Persistor.deleteDirectory(bucket, key)
})
it('should list the objects in the directory', function () {
expect(S3Client.listObjectsV2).to.have.been.calledWith({
Bucket: bucket,
Prefix: key
})
})
it('should not try to delete any objects', function () {
expect(S3Client.deleteObjects).not.to.have.been.called
})
})
describe('when there are more files available', function () {
const continuationToken = 'wombat'
beforeEach(async function () {
S3Client.listObjectsV2.onCall(0).returns({
promise: sinon.stub().resolves({
Contents: files,
IsTruncated: true,
NextContinuationToken: continuationToken
})
})
return S3Persistor.deleteDirectory(bucket, key)
})
it('should list the objects a second time, with a continuation token', function () {
expect(S3Client.listObjectsV2).to.be.calledTwice
expect(S3Client.listObjectsV2).to.be.calledWith({
Bucket: bucket,
Prefix: key
})
expect(S3Client.listObjectsV2).to.be.calledWith({
Bucket: bucket,
Prefix: key,
ContinuationToken: continuationToken
})
})
it('should delete both sets of files', function () {
expect(S3Client.deleteObjects).to.have.been.calledTwice
})
})
describe('when there is an error listing the objects', function () {
let error
beforeEach(async function () {
S3Client.listObjectsV2 = sinon
.stub()
.returns({ promise: sinon.stub().rejects(genericError) })
try {
await S3Persistor.deleteDirectory(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
it('should not try to delete any objects', function () {
expect(S3Client.deleteObjects).not.to.have.been.called
})
})
describe('when there is an error deleting the objects', function () {
let error
beforeEach(async function () {
S3Client.deleteObjects = sinon
.stub()
.returns({ promise: sinon.stub().rejects(genericError) })
try {
await S3Persistor.deleteDirectory(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a WriteError', function () {
expect(error).to.be.an.instanceOf(Errors.WriteError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('directorySize', function () {
describe('with valid parameters', function () {
let size
beforeEach(async function () {
size = await S3Persistor.directorySize(bucket, key)
})
it('should list the objects in the directory', function () {
expect(S3Client.listObjectsV2).to.have.been.calledWith({
Bucket: bucket,
Prefix: key
})
})
it('should return the directory size', function () {
expect(size).to.equal(filesSize)
})
})
describe('when there are no files', function () {
let size
beforeEach(async function () {
S3Client.listObjectsV2 = sinon
.stub()
.returns({ promise: sinon.stub().resolves({ Contents: [] }) })
size = await S3Persistor.directorySize(bucket, key)
})
it('should list the objects in the directory', function () {
expect(S3Client.listObjectsV2).to.have.been.calledWith({
Bucket: bucket,
Prefix: key
})
})
it('should return zero', function () {
expect(size).to.equal(0)
})
})
describe('when there are more files available', function () {
const continuationToken = 'wombat'
let size
beforeEach(async function () {
S3Client.listObjectsV2.onCall(0).returns({
promise: sinon.stub().resolves({
Contents: files,
IsTruncated: true,
NextContinuationToken: continuationToken
})
})
size = await S3Persistor.directorySize(bucket, key)
})
it('should list the objects a second time, with a continuation token', function () {
expect(S3Client.listObjectsV2).to.be.calledTwice
expect(S3Client.listObjectsV2).to.be.calledWith({
Bucket: bucket,
Prefix: key
})
expect(S3Client.listObjectsV2).to.be.calledWith({
Bucket: bucket,
Prefix: key,
ContinuationToken: continuationToken
})
})
it('should return the size of both sets of files', function () {
expect(size).to.equal(filesSize * 2)
})
})
describe('when there is an error listing the objects', function () {
let error
beforeEach(async function () {
S3Client.listObjectsV2 = sinon
.stub()
.returns({ promise: sinon.stub().rejects(genericError) })
try {
await S3Persistor.directorySize(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('checkIfObjectExists', function () {
describe('when the file exists', function () {
let exists
beforeEach(async function () {
exists = await S3Persistor.checkIfObjectExists(bucket, key)
})
it('should get the object header', function () {
expect(S3Client.headObject).to.have.been.calledWith({
Bucket: bucket,
Key: key
})
})
it('should return that the file exists', function () {
expect(exists).to.equal(true)
})
})
describe('when the file does not exist', function () {
let exists
beforeEach(async function () {
S3Client.headObject = sinon
.stub()
.returns({ promise: sinon.stub().rejects(S3NotFoundError) })
exists = await S3Persistor.checkIfObjectExists(bucket, key)
})
it('should get the object header', function () {
expect(S3Client.headObject).to.have.been.calledWith({
Bucket: bucket,
Key: key
})
})
it('should return that the file does not exist', function () {
expect(exists).to.equal(false)
})
})
describe('when there is an error', function () {
let error
beforeEach(async function () {
S3Client.headObject = sinon
.stub()
.returns({ promise: sinon.stub().rejects(genericError) })
try {
await S3Persistor.checkIfObjectExists(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the upstream ReadError', function () {
expect(error.cause).to.be.an.instanceOf(Errors.ReadError)
})
it('should eventually wrap the error', function () {
expect(error.cause.cause).to.equal(genericError)
})
})
})
})