From d4563c8786757bcfe5dd1b3e5ce3250f77789604 Mon Sep 17 00:00:00 2001 From: Brian Gough Date: Thu, 26 Aug 2021 10:16:11 +0100 Subject: [PATCH] Merge pull request #4866 from overleaf/bg-gcs-delete-directory-batch make object-persistor delete gcs files in batches GitOrigin-RevId: 8ebc892c5f6eb30507ec41d5d3a108e650af5cac --- libraries/object-persistor/package-lock.json | 2 +- libraries/object-persistor/package.json | 2 +- .../object-persistor/src/GcsPersistor.js | 52 ++++++++++--------- .../test/unit/GcsPersistorTests.js | 16 +++++- 4 files changed, 44 insertions(+), 28 deletions(-) diff --git a/libraries/object-persistor/package-lock.json b/libraries/object-persistor/package-lock.json index a2cfc9c742..96ce722883 100644 --- a/libraries/object-persistor/package-lock.json +++ b/libraries/object-persistor/package-lock.json @@ -1,6 +1,6 @@ { "name": "@overleaf/object-persistor", - "version": "1.0.1", + "version": "1.0.2", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/libraries/object-persistor/package.json b/libraries/object-persistor/package.json index bfa8323e56..8fa0ba6aa4 100644 --- a/libraries/object-persistor/package.json +++ b/libraries/object-persistor/package.json @@ -1,6 +1,6 @@ { "name": "@overleaf/object-persistor", - "version": "1.0.1", + "version": "1.0.2", "description": "Module for storing objects in multiple backends, with fallback on 404 to assist migration between them", "main": "index.js", "scripts": { diff --git a/libraries/object-persistor/src/GcsPersistor.js b/libraries/object-persistor/src/GcsPersistor.js index bc9ed1772e..f71a42d0a7 100644 --- a/libraries/object-persistor/src/GcsPersistor.js +++ b/libraries/object-persistor/src/GcsPersistor.js @@ -216,32 +216,36 @@ module.exports = class GcsPersistor extends AbstractPersistor { } async deleteDirectory(bucketName, key) { - try { - const [files] = await this.storage - .bucket(bucketName) - .getFiles({ directory: key }) - - if (Array.isArray(files) && files.length > 0) { - await asyncPool( - this.settings.deleteConcurrency, - files, - async (file) => { - await this.deleteObject(bucketName, file.name) - } + let query = { directory: key, autoPaginate: false } + do { + try { + const [files, nextQuery] = await this.storage + .bucket(bucketName) + .getFiles(query) + // iterate over paginated results using the nextQuery returned by getFiles + query = nextQuery + if (Array.isArray(files) && files.length > 0) { + await asyncPool( + this.settings.deleteConcurrency, + files, + async (file) => { + await this.deleteObject(bucketName, file.name) + } + ) + } + } catch (err) { + const error = PersistorHelper.wrapError( + err, + 'failed to delete directory in GCS', + { bucketName, key }, + WriteError ) + if (error instanceof NotFoundError) { + return + } + throw error } - } catch (err) { - const error = PersistorHelper.wrapError( - err, - 'failed to delete directory in GCS', - { bucketName, key }, - WriteError - ) - if (error instanceof NotFoundError) { - return - } - throw error - } + } while (query) } async directorySize(bucketName, key) { diff --git a/libraries/object-persistor/test/unit/GcsPersistorTests.js b/libraries/object-persistor/test/unit/GcsPersistorTests.js index 5558d9709b..030cc4342e 100644 --- a/libraries/object-persistor/test/unit/GcsPersistorTests.js +++ b/libraries/object-persistor/test/unit/GcsPersistorTests.js @@ -556,18 +556,30 @@ describe('GcsPersistorTests', function () { const directoryName = `${ObjectId()}/${ObjectId()}` describe('with valid parameters', function () { beforeEach(async function () { + GcsBucket.getFiles = sinon.stub() + // set up multiple paginated calls to getFiles + GcsBucket.getFiles + .withArgs({ directory: directoryName, autoPaginate: false }) + .resolves([['aaa', 'bbb'], 'call-1']) + GcsBucket.getFiles + .withArgs('call-1') + .resolves([['ccc', 'ddd', 'eee'], 'call-2']) + GcsBucket.getFiles.withArgs('call-2').resolves([['fff', 'ggg']]) return GcsPersistor.deleteDirectory(bucket, directoryName) }) it('should list the objects in the directory', function () { expect(Storage.prototype.bucket).to.have.been.calledWith(bucket) expect(GcsBucket.getFiles).to.have.been.calledWith({ - directory: directoryName + directory: directoryName, + autoPaginate: false }) + expect(GcsBucket.getFiles).to.have.been.calledWith('call-1') + expect(GcsBucket.getFiles).to.have.been.calledWith('call-2') }) it('should delete the files', function () { - expect(GcsFile.delete).to.have.been.calledTwice + expect(GcsFile.delete.callCount).to.equal(7) }) })