overleaf/libraries/object-persistor/test/unit/GcsPersistorTests.js
Brian Gough d4563c8786 Merge pull request #4866 from overleaf/bg-gcs-delete-directory-batch
make object-persistor delete gcs files in batches

GitOrigin-RevId: 8ebc892c5f6eb30507ec41d5d3a108e650af5cac
2021-08-27 08:03:25 +00:00

725 lines
20 KiB
JavaScript

const sinon = require('sinon')
const chai = require('chai')
const { expect } = chai
const modulePath = '../../src/GcsPersistor.js'
const SandboxedModule = require('sandboxed-module')
const { ObjectId } = require('mongodb')
const asyncPool = require('tiny-async-pool')
const Errors = require('../../src/Errors')
describe('GcsPersistorTests', function () {
const filename = '/wombat/potato.tex'
const bucket = 'womBucket'
const key = 'monKey'
const destKey = 'donKey'
const genericError = new Error('guru meditation error')
const filesSize = 33
const md5 = 'ffffffff00000000ffffffff00000000'
const WriteStream = 'writeStream'
const redirectUrl = 'https://wombat.potato/giraffe'
let Logger,
Transform,
Storage,
Fs,
GcsNotFoundError,
ReadStream,
Stream,
GcsBucket,
GcsFile,
GcsPersistor,
FileNotFoundError,
Hash,
Settings,
crypto,
files
beforeEach(function () {
Settings = {
directoryKeyRegex: /^[0-9a-fA-F]{24}\/[0-9a-fA-F]{24}/,
Metrics: {
count: sinon.stub()
}
}
files = [
{
metadata: { size: 11, md5Hash: '/////wAAAAD/////AAAAAA==' },
delete: sinon.stub()
},
{
metadata: { size: 22, md5Hash: '/////wAAAAD/////AAAAAA==' },
delete: sinon.stub()
}
]
ReadStream = {
pipe: sinon.stub().returns('readStream'),
on: sinon.stub(),
removeListener: sinon.stub()
}
ReadStream.on.withArgs('end').yields()
ReadStream.on.withArgs('pipe').yields({
unpipe: sinon.stub(),
resume: sinon.stub(),
on: sinon.stub()
})
Transform = class {
on(event, callback) {
if (event === 'readable') {
callback()
}
}
once() {}
removeListener() {}
}
Stream = {
pipeline: sinon.stub().yields(),
Transform: Transform
}
GcsFile = {
delete: sinon.stub().resolves(),
createReadStream: sinon.stub().returns(ReadStream),
getMetadata: sinon.stub().resolves([files[0].metadata]),
createWriteStream: sinon.stub().returns(WriteStream),
copy: sinon.stub().resolves(),
exists: sinon.stub().resolves([true]),
getSignedUrl: sinon.stub().resolves([redirectUrl])
}
GcsBucket = {
file: sinon.stub().returns(GcsFile),
getFiles: sinon.stub().resolves([files])
}
Storage = class {
constructor() {
this.interceptors = []
}
}
Storage.prototype.bucket = sinon.stub().returns(GcsBucket)
GcsNotFoundError = new Error('File not found')
GcsNotFoundError.code = 404
Fs = {
createReadStream: sinon.stub().returns(ReadStream)
}
FileNotFoundError = new Error('File not found')
FileNotFoundError.code = 'ENOENT'
Hash = {
end: sinon.stub(),
read: sinon.stub().returns(md5),
digest: sinon.stub().returns(md5),
setEncoding: sinon.stub()
}
crypto = {
createHash: sinon.stub().returns(Hash)
}
Logger = {
warn: sinon.stub()
}
GcsPersistor = new (SandboxedModule.require(modulePath, {
requires: {
'@google-cloud/storage': { Storage },
'logger-sharelatex': Logger,
'tiny-async-pool': asyncPool,
'./Errors': Errors,
fs: Fs,
stream: Stream,
crypto
},
globals: { console, Buffer }
}))(Settings)
})
describe('getObjectStream', function () {
describe('when called with valid parameters', function () {
let stream
beforeEach(async function () {
stream = await GcsPersistor.getObjectStream(bucket, key)
})
it('returns a metered stream', function () {
expect(stream).to.be.instanceOf(Transform)
})
it('fetches the right key from the right bucket', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.createReadStream).to.have.been.called
})
it('disables automatic decompression', function () {
expect(GcsFile.createReadStream).to.have.been.calledWith({
decompress: false
})
})
it('pipes the stream through the meter', function () {
expect(ReadStream.pipe).to.have.been.calledWith(
sinon.match.instanceOf(Transform)
)
})
})
describe('when called with a byte range', function () {
let stream
beforeEach(async function () {
stream = await GcsPersistor.getObjectStream(bucket, key, {
start: 5,
end: 10
})
})
it('returns a metered stream', function () {
expect(stream).to.be.instanceOf(Transform)
})
it('passes the byte range on to GCS', function () {
expect(GcsFile.createReadStream).to.have.been.calledWith({
decompress: false,
start: 5,
end: 10
})
})
})
describe("when the file doesn't exist", function () {
let error, stream
beforeEach(async function () {
Transform.prototype.on = sinon.stub()
ReadStream.on.withArgs('error').yields(GcsNotFoundError)
try {
stream = await GcsPersistor.getObjectStream(bucket, key)
} catch (e) {
error = e
}
})
it('does not return a stream', function () {
expect(stream).not.to.exist
})
it('throws a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
it('wraps the error', function () {
expect(error.cause).to.exist
})
it('stores the bucket and key in the error', function () {
expect(error.info).to.include({ bucketName: bucket, key: key })
})
})
describe('when Gcs encounters an unkown error', function () {
let error, stream
beforeEach(async function () {
Transform.prototype.on = sinon.stub()
ReadStream.on.withArgs('error').yields(genericError)
try {
stream = await GcsPersistor.getObjectStream(bucket, key)
} catch (err) {
error = err
}
})
it('does not return a stream', function () {
expect(stream).not.to.exist
})
it('throws a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('wraps the error', function () {
expect(error.cause).to.exist
})
it('stores the bucket and key in the error', function () {
expect(error.info).to.include({ bucketName: bucket, key: key })
})
})
})
describe('getRedirectUrl', function () {
let signedUrl
describe('with signed URLs', function () {
beforeEach(async function () {
signedUrl = await GcsPersistor.getRedirectUrl(bucket, key)
})
it('should request a signed URL', function () {
expect(GcsFile.getSignedUrl).to.have.been.called
})
it('should return the url', function () {
expect(signedUrl).to.equal(redirectUrl)
})
})
describe('with unsigned URLs', function () {
beforeEach(async function () {
GcsPersistor.settings.unsignedUrls = true
GcsPersistor.settings.endpoint = {
apiScheme: 'http',
apiEndpoint: 'custom.endpoint'
}
signedUrl = await GcsPersistor.getRedirectUrl(bucket, key)
})
it('should return a plain URL', function () {
expect(signedUrl).to.equal(
`http://custom.endpoint/download/storage/v1/b/${bucket}/o/${key}?alt=media`
)
})
})
})
describe('getObjectSize', function () {
describe('when called with valid parameters', function () {
let size
beforeEach(async function () {
size = await GcsPersistor.getObjectSize(bucket, key)
})
it('should return the object size', function () {
expect(size).to.equal(files[0].metadata.size)
})
it('should pass the bucket and key to GCS', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.getMetadata).to.have.been.called
})
})
describe('when the object is not found', function () {
let error
beforeEach(async function () {
GcsFile.getMetadata = sinon.stub().rejects(GcsNotFoundError)
try {
await GcsPersistor.getObjectSize(bucket, key)
} catch (err) {
error = err
}
})
it('should return a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(GcsNotFoundError)
})
})
describe('when GCS returns an error', function () {
let error
beforeEach(async function () {
GcsFile.getMetadata = sinon.stub().rejects(genericError)
try {
await GcsPersistor.getObjectSize(bucket, key)
} catch (err) {
error = err
}
})
it('should return a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('sendStream', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return GcsPersistor.sendStream(bucket, key, ReadStream)
})
it('should upload the stream', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.createWriteStream).to.have.been.called
})
it('should not try to create a resumable upload', function () {
expect(GcsFile.createWriteStream).to.have.been.calledWith({
resumable: false
})
})
it('should meter the stream and pass it to GCS', function () {
expect(Stream.pipeline).to.have.been.calledWith(
ReadStream,
sinon.match.instanceOf(Transform),
WriteStream
)
})
it('calculates the md5 hash of the file', function () {
expect(Hash.digest).to.have.been.called
})
})
describe('when a hash is supplied', function () {
beforeEach(async function () {
return GcsPersistor.sendStream(bucket, key, ReadStream, {
sourceMd5: 'aaaaaaaabbbbbbbbaaaaaaaabbbbbbbb'
})
})
it('should not calculate the md5 hash of the file', function () {
expect(Hash.digest).not.to.have.been.called
})
it('sends the hash in base64', function () {
expect(GcsFile.createWriteStream).to.have.been.calledWith({
validation: 'md5',
metadata: {
md5Hash: 'qqqqqru7u7uqqqqqu7u7uw=='
},
resumable: false
})
})
it('does not fetch the md5 hash of the uploaded file', function () {
expect(GcsFile.getMetadata).not.to.have.been.called
})
})
describe('when metadata is supplied', function () {
const contentType = 'text/csv'
const contentEncoding = 'gzip'
beforeEach(async function () {
return GcsPersistor.sendStream(bucket, key, ReadStream, {
contentType,
contentEncoding
})
})
it('should send the metadata to GCS', function () {
expect(GcsFile.createWriteStream).to.have.been.calledWith({
metadata: { contentType, contentEncoding },
resumable: false
})
})
})
describe('when the upload fails', function () {
let error
beforeEach(async function () {
Stream.pipeline
.withArgs(
ReadStream,
sinon.match.instanceOf(Transform),
WriteStream,
sinon.match.any
)
.yields(genericError)
try {
await GcsPersistor.sendStream(bucket, key, ReadStream)
} catch (err) {
error = err
}
})
it('throws a WriteError', function () {
expect(error).to.be.an.instanceOf(Errors.WriteError)
})
it('wraps the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('sendFile', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return GcsPersistor.sendFile(bucket, key, filename)
})
it('should create a read stream for the file', function () {
expect(Fs.createReadStream).to.have.been.calledWith(filename)
})
it('should create a write stream', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.createWriteStream).to.have.been.called
})
it('should upload the stream via the meter', function () {
expect(Stream.pipeline).to.have.been.calledWith(
ReadStream,
sinon.match.instanceOf(Transform),
WriteStream
)
})
})
})
describe('copyObject', function () {
const destinationFile = 'destFile'
beforeEach(function () {
GcsBucket.file.withArgs(destKey).returns(destinationFile)
})
describe('with valid parameters', function () {
beforeEach(async function () {
return GcsPersistor.copyObject(bucket, key, destKey)
})
it('should copy the object', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.copy).to.have.been.calledWith(destinationFile)
})
})
describe('when the file does not exist', function () {
let error
beforeEach(async function () {
GcsFile.copy = sinon.stub().rejects(GcsNotFoundError)
try {
await GcsPersistor.copyObject(bucket, key, destKey)
} catch (err) {
error = err
}
})
it('should throw a NotFoundError', function () {
expect(error).to.be.an.instanceOf(Errors.NotFoundError)
})
})
})
describe('deleteObject', function () {
describe('with valid parameters', function () {
beforeEach(async function () {
return GcsPersistor.deleteObject(bucket, key)
})
it('should delete the object', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.delete).to.have.been.called
})
})
describe('when the file does not exist', function () {
let error
beforeEach(async function () {
GcsFile.delete = sinon.stub().rejects(GcsNotFoundError)
try {
await GcsPersistor.deleteObject(bucket, key)
} catch (err) {
error = err
}
})
it('should not throw an error', function () {
expect(error).not.to.exist
})
})
})
describe('deleteDirectory', function () {
const directoryName = `${ObjectId()}/${ObjectId()}`
describe('with valid parameters', function () {
beforeEach(async function () {
GcsBucket.getFiles = sinon.stub()
// set up multiple paginated calls to getFiles
GcsBucket.getFiles
.withArgs({ directory: directoryName, autoPaginate: false })
.resolves([['aaa', 'bbb'], 'call-1'])
GcsBucket.getFiles
.withArgs('call-1')
.resolves([['ccc', 'ddd', 'eee'], 'call-2'])
GcsBucket.getFiles.withArgs('call-2').resolves([['fff', 'ggg']])
return GcsPersistor.deleteDirectory(bucket, directoryName)
})
it('should list the objects in the directory', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.getFiles).to.have.been.calledWith({
directory: directoryName,
autoPaginate: false
})
expect(GcsBucket.getFiles).to.have.been.calledWith('call-1')
expect(GcsBucket.getFiles).to.have.been.calledWith('call-2')
})
it('should delete the files', function () {
expect(GcsFile.delete.callCount).to.equal(7)
})
})
describe('when there is an error listing the objects', function () {
let error
beforeEach(async function () {
GcsBucket.getFiles = sinon.stub().rejects(genericError)
try {
await GcsPersistor.deleteDirectory(bucket, directoryName)
} catch (err) {
error = err
}
})
it('should generate a WriteError', function () {
expect(error).to.be.an.instanceOf(Errors.WriteError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('directorySize', function () {
describe('with valid parameters', function () {
let size
beforeEach(async function () {
size = await GcsPersistor.directorySize(bucket, key)
})
it('should list the objects in the directory', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.getFiles).to.have.been.calledWith({ directory: key })
})
it('should return the directory size', function () {
expect(size).to.equal(filesSize)
})
})
describe('when there are no files', function () {
let size
beforeEach(async function () {
GcsBucket.getFiles.resolves([[]])
size = await GcsPersistor.directorySize(bucket, key)
})
it('should list the objects in the directory', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.getFiles).to.have.been.calledWith({ directory: key })
})
it('should return zero', function () {
expect(size).to.equal(0)
})
})
describe('when there is an error listing the objects', function () {
let error
beforeEach(async function () {
GcsBucket.getFiles.rejects(genericError)
try {
await GcsPersistor.directorySize(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
describe('checkIfObjectExists', function () {
describe('when the file exists', function () {
let exists
beforeEach(async function () {
exists = await GcsPersistor.checkIfObjectExists(bucket, key)
})
it('should ask the file if it exists', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.exists).to.have.been.called
})
it('should return that the file exists', function () {
expect(exists).to.equal(true)
})
})
describe('when the file does not exist', function () {
let exists
beforeEach(async function () {
GcsFile.exists = sinon.stub().resolves([false])
exists = await GcsPersistor.checkIfObjectExists(bucket, key)
})
it('should get the object header', function () {
expect(Storage.prototype.bucket).to.have.been.calledWith(bucket)
expect(GcsBucket.file).to.have.been.calledWith(key)
expect(GcsFile.exists).to.have.been.called
})
it('should return that the file does not exist', function () {
expect(exists).to.equal(false)
})
})
describe('when there is an error', function () {
let error
beforeEach(async function () {
GcsFile.exists = sinon.stub().rejects(genericError)
try {
await GcsPersistor.checkIfObjectExists(bucket, key)
} catch (err) {
error = err
}
})
it('should generate a ReadError', function () {
expect(error).to.be.an.instanceOf(Errors.ReadError)
})
it('should wrap the error', function () {
expect(error.cause).to.equal(genericError)
})
})
})
})