From b4b369fea52e193b869d747475b07e7364aace75 Mon Sep 17 00:00:00 2001 From: Jakob Ackermann Date: Mon, 8 Jan 2024 10:43:41 +0000 Subject: [PATCH] Merge pull request #16428 from overleaf/jpa-clsi-replace-fs-extra [clsi] replace fs-extra to avoid excess syscalls GitOrigin-RevId: cbc8ec01b8ac9fd973e86a6d763c8f599323db94 --- package-lock.json | 4 +- services/clsi/app/js/CompileManager.js | 7 +- services/clsi/app/js/OutputCacheManager.js | 102 ++++++++++++------ services/clsi/app/js/UrlCache.js | 6 +- services/clsi/package.json | 1 - .../acceptance/js/ExampleDocumentTests.js | 5 +- .../clsi/test/unit/js/CompileManagerTests.js | 13 +-- services/clsi/test/unit/js/UrlCacheTests.js | 7 +- 8 files changed, 90 insertions(+), 55 deletions(-) diff --git a/package-lock.json b/package-lock.json index 7fcdb2d627..6474919f4c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -25690,6 +25690,7 @@ "version": "10.0.0", "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.0.0.tgz", "integrity": "sha512-C5owb14u9eJwizKGdchcDUQeFtlSHHthBk8pbX9Vc1PFZrLombudjDnNns88aYslCyF6IY5SUw3Roz6xShcEIQ==", + "dev": true, "dependencies": { "graceful-fs": "^4.2.0", "jsonfile": "^6.0.1", @@ -43509,7 +43510,6 @@ "diskusage": "^1.1.3", "dockerode": "^3.1.0", "express": "^4.18.2", - "fs-extra": "^10.0.0", "lockfile": "^1.0.4", "lodash": "^4.17.21", "p-limit": "^3.1.0", @@ -53819,7 +53819,6 @@ "diskusage": "^1.1.3", "dockerode": "^3.1.0", "express": "^4.18.2", - "fs-extra": "^10.0.0", "lockfile": "^1.0.4", "lodash": "^4.17.21", "mocha": "^10.2.0", @@ -69135,6 +69134,7 @@ "version": "10.0.0", "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-10.0.0.tgz", "integrity": "sha512-C5owb14u9eJwizKGdchcDUQeFtlSHHthBk8pbX9Vc1PFZrLombudjDnNns88aYslCyF6IY5SUw3Roz6xShcEIQ==", + "dev": true, "requires": { "graceful-fs": "^4.2.0", "jsonfile": "^6.0.1", diff --git a/services/clsi/app/js/CompileManager.js b/services/clsi/app/js/CompileManager.js index 784d191583..c4cd88aed6 100644 --- a/services/clsi/app/js/CompileManager.js +++ b/services/clsi/app/js/CompileManager.js @@ -1,6 +1,5 @@ const childProcess = require('child_process') const fsPromises = require('fs/promises') -const fse = require('fs-extra') const os = require('os') const Path = require('path') const { callbackify, promisify } = require('util') @@ -51,7 +50,7 @@ async function doCompileWithLock(request) { // use a .project-lock file in the compile directory to prevent // simultaneous compiles const lockFile = Path.join(compileDir, '.project-lock') - await fse.ensureDir(compileDir) + await fsPromises.mkdir(compileDir, { recursive: true }) const lock = await LockManager.acquire(lockFile) try { return await doCompile(request) @@ -378,7 +377,7 @@ async function clearExpiredProjects(maxCacheAgeMs) { const age = now - stats.mtime const hasExpired = age > maxCacheAgeMs if (hasExpired) { - await fse.remove(dir) + await fsPromises.rm(dir, { force: true, recursive: true }) } } } @@ -519,7 +518,7 @@ async function wordcount(projectId, userId, filename, image) { const compileName = getCompileName(projectId, userId) const compileGroup = 'wordcount' try { - await fse.ensureDir(compileDir) + await fsPromises.mkdir(compileDir, { recursive: true }) } catch (err) { throw OError.tag(err, 'error ensuring dir for wordcount', { projectId, diff --git a/services/clsi/app/js/OutputCacheManager.js b/services/clsi/app/js/OutputCacheManager.js index 55c856efc0..b92b34706d 100644 --- a/services/clsi/app/js/OutputCacheManager.js +++ b/services/clsi/app/js/OutputCacheManager.js @@ -2,7 +2,6 @@ let OutputCacheManager const { callbackify, promisify } = require('util') const async = require('async') const fs = require('fs') -const fse = require('fs-extra') const Path = require('path') const logger = require('@overleaf/logger') const _ = require('lodash') @@ -240,7 +239,7 @@ module.exports = OutputCacheManager = { } // make the new cache directory - fse.ensureDir(cacheDir, function (err) { + fs.mkdir(cacheDir, { recursive: true }, function (err) { if (err) { logger.error( { err, directory: cacheDir }, @@ -250,6 +249,8 @@ module.exports = OutputCacheManager = { } else { // copy all the output files into the new cache directory const results = [] + const dirCache = new Set() + dirCache.add(cacheDir) async.mapSeries( outputFiles, function (file, cb) { @@ -281,7 +282,7 @@ module.exports = OutputCacheManager = { if (!shouldCopy) { return cb() } - OutputCacheManager._copyFile(src, dst, function (err) { + OutputCacheManager._copyFile(src, dst, dirCache, err => { if (err) { return cb(err) } @@ -298,7 +299,7 @@ module.exports = OutputCacheManager = { // pass back the original files if we encountered *any* error callback(err, outputFiles) // clean up the directory we just created - fse.remove(cacheDir, function (err) { + fs.rm(cacheDir, { force: true, recursive: true }, function (err) { if (err) { return logger.error( { err, dir: cacheDir }, @@ -447,7 +448,7 @@ module.exports = OutputCacheManager = { }, ensureContentDir(contentRoot, callback) { - fse.ensureDir(contentRoot, function (err) { + fs.mkdir(contentRoot, { recursive: true }, function (err) { if (err) { return callback(err) } @@ -466,7 +467,7 @@ module.exports = OutputCacheManager = { return callback(err) } const contentDir = Path.join(contentRoot, contentId) - fse.ensureDir(contentDir, function (err) { + fs.mkdir(contentDir, { recursive: true }, function (err) { if (err) { return callback(err) } @@ -485,10 +486,12 @@ module.exports = OutputCacheManager = { buildId ) logger.debug({ dir: archiveDir }, 'archiving log files for project') - fse.ensureDir(archiveDir, function (err) { + fs.mkdir(archiveDir, { recursive: true }, function (err) { if (err) { return callback(err) } + const dirCache = new Set() + dirCache.add(archiveDir) async.mapSeries( outputFiles, function (file, cb) { @@ -510,7 +513,7 @@ module.exports = OutputCacheManager = { if (!shouldArchive) { return cb() } - OutputCacheManager._copyFile(src, dst, cb) + OutputCacheManager._copyFile(src, dst, dirCache, cb) } ) }) @@ -523,7 +526,7 @@ module.exports = OutputCacheManager = { expireOutputFiles(outputDir, options, callback) { // look in compileDir for build dirs and delete if > N or age of mod time > T const cleanupAll = cb => { - fse.remove(outputDir, err => { + fs.rm(outputDir, { force: true, recursive: true }, err => { if (err) { return cb(err) } @@ -582,13 +585,17 @@ module.exports = OutputCacheManager = { } const removeDir = (dir, cb) => - fse.remove(Path.join(cacheRoot, dir), function (err, result) { - logger.debug({ cache: cacheRoot, dir }, 'removed expired cache dir') - if (err) { - logger.error({ err, dir }, 'cache remove error') + fs.rm( + Path.join(cacheRoot, dir), + { force: true, recursive: true }, + function (err, result) { + logger.debug({ cache: cacheRoot, dir }, 'removed expired cache dir') + if (err) { + logger.error({ err, dir }, 'cache remove error') + } + cb(err, result) } - cb(err, result) - }) + ) async.eachSeries( toRemove, (dir, cb) => removeDir(dir, cb), @@ -637,28 +644,53 @@ module.exports = OutputCacheManager = { }) }, - _copyFile(src, dst, callback) { - // copy output file into the cache - fse.copy(src, dst, function (err) { - if (err?.code === 'ENOENT') { - logger.warn( - { err, file: src }, - 'file has disappeared when copying to build cache' - ) - callback(err, false) - } else if (err) { - logger.error({ err, src, dst }, 'copy error for file in cache') - callback(err) - } else { - if (Settings.clsi?.optimiseInDocker) { - // don't run any optimisations on the pdf when they are done - // in the docker container - callback() - } else { - // call the optimiser for the file too - OutputFileOptimiser.optimiseFile(src, dst, callback) + _ensureParentExists(dst, dirCache, callback) { + let parent = Path.dirname(dst) + if (dirCache.has(parent)) { + callback() + } else { + fs.mkdir(parent, { recursive: true }, err => { + if (err) return callback(err) + while (!dirCache.has(parent)) { + dirCache.add(parent) + parent = Path.dirname(parent) } + callback() + }) + } + }, + + _copyFile(src, dst, dirCache, callback) { + OutputCacheManager._ensureParentExists(dst, dirCache, err => { + if (err) { + logger.warn( + { err, dst }, + 'creating parent directory in output cache failed' + ) + return callback(err, false) } + // copy output file into the cache + fs.copyFile(src, dst, function (err) { + if (err?.code === 'ENOENT') { + logger.warn( + { err, file: src }, + 'file has disappeared when copying to build cache' + ) + callback(err, false) + } else if (err) { + logger.error({ err, src, dst }, 'copy error for file in cache') + callback(err) + } else { + if (Settings.clsi?.optimiseInDocker) { + // don't run any optimisations on the pdf when they are done + // in the docker container + callback() + } else { + // call the optimiser for the file too + OutputFileOptimiser.optimiseFile(src, dst, callback) + } + } + }) }) }, diff --git a/services/clsi/app/js/UrlCache.js b/services/clsi/app/js/UrlCache.js index 9fbd168436..919479b096 100644 --- a/services/clsi/app/js/UrlCache.js +++ b/services/clsi/app/js/UrlCache.js @@ -13,7 +13,6 @@ const UrlFetcher = require('./UrlFetcher') const Settings = require('@overleaf/settings') const fs = require('fs') -const fse = require('fs-extra') const Path = require('path') const { callbackify } = require('util') @@ -32,7 +31,10 @@ function getCachePath(projectId, url, lastModified) { } async function clearProject(projectId) { - await fse.remove(getProjectDir(projectId)) + await fs.promises.rm(getProjectDir(projectId), { + force: true, + recursive: true, + }) } async function createProjectDir(projectId) { diff --git a/services/clsi/package.json b/services/clsi/package.json index 448229cc48..11c978b1e0 100644 --- a/services/clsi/package.json +++ b/services/clsi/package.json @@ -27,7 +27,6 @@ "diskusage": "^1.1.3", "dockerode": "^3.1.0", "express": "^4.18.2", - "fs-extra": "^10.0.0", "lockfile": "^1.0.4", "lodash": "^4.17.21", "p-limit": "^3.1.0", diff --git a/services/clsi/test/acceptance/js/ExampleDocumentTests.js b/services/clsi/test/acceptance/js/ExampleDocumentTests.js index 33dca09ce7..404f8c4e90 100644 --- a/services/clsi/test/acceptance/js/ExampleDocumentTests.js +++ b/services/clsi/test/acceptance/js/ExampleDocumentTests.js @@ -16,7 +16,6 @@ const Client = require('./helpers/Client') const fetch = require('node-fetch') const { pipeline } = require('stream') const fs = require('fs') -const fsExtra = require('fs-extra') const ChildProcess = require('child_process') const ClsiApp = require('./helpers/ClsiApp') const logger = require('@overleaf/logger') @@ -204,13 +203,13 @@ describe('Example Documents', function () { ClsiApp.ensureRunning(done) }) before(function (done) { - fsExtra.remove(fixturePath('tmp'), done) + fs.rm(fixturePath('tmp'), { force: true, recursive: true }, done) }) before(function (done) { fs.mkdir(fixturePath('tmp'), done) }) after(function (done) { - fsExtra.remove(fixturePath('tmp'), done) + fs.rm(fixturePath('tmp'), { force: true, recursive: true }, done) }) return Array.from(fs.readdirSync(fixturePath('examples'))).map(exampleDir => diff --git a/services/clsi/test/unit/js/CompileManagerTests.js b/services/clsi/test/unit/js/CompileManagerTests.js index c014efcfa1..be8c01c51f 100644 --- a/services/clsi/test/unit/js/CompileManagerTests.js +++ b/services/clsi/test/unit/js/CompileManagerTests.js @@ -116,14 +116,12 @@ describe('CompileManager', function () { lstat: sinon.stub(), stat: sinon.stub(), readFile: sinon.stub(), + mkdir: sinon.stub().resolves(), } this.fsPromises.lstat.withArgs(this.compileDir).resolves(this.dirStats) this.fsPromises.stat .withArgs(Path.join(this.compileDir, 'output.synctex.gz')) .resolves(this.fileStats) - this.fse = { - ensureDir: sinon.stub().resolves(), - } this.CompileManager = SandboxedModule.require(MODULE_PATH, { requires: { @@ -145,7 +143,6 @@ describe('CompileManager', function () { './LockManager': this.LockManager, './SynctexOutputParser': this.SynctexOutputParser, 'fs/promises': this.fsPromises, - 'fs-extra': this.fse, }, }) }) @@ -177,7 +174,9 @@ describe('CompileManager', function () { }) it('should ensure that the compile directory exists', function () { - expect(this.fse.ensureDir).to.have.been.calledWith(this.compileDir) + expect(this.fsPromises.mkdir).to.have.been.calledWith(this.compileDir, { + recursive: true, + }) }) it('should not run LaTeX', function () { @@ -193,7 +192,9 @@ describe('CompileManager', function () { }) it('should ensure that the compile directory exists', function () { - expect(this.fse.ensureDir).to.have.been.calledWith(this.compileDir) + expect(this.fsPromises.mkdir).to.have.been.calledWith(this.compileDir, { + recursive: true, + }) }) it('should write the resources to disk', function () { diff --git a/services/clsi/test/unit/js/UrlCacheTests.js b/services/clsi/test/unit/js/UrlCacheTests.js index 1b05c4050b..37dc3bec80 100644 --- a/services/clsi/test/unit/js/UrlCacheTests.js +++ b/services/clsi/test/unit/js/UrlCacheTests.js @@ -29,9 +29,9 @@ describe('UrlCache', function () { '@overleaf/settings': (this.Settings = { path: { clsiCacheDir: '/cache/dir' }, }), - 'fs-extra': (this.fse = { remove: sinon.stub().resolves() }), fs: (this.fs = { promises: { + rm: sinon.stub().resolves(), copyFile: sinon.stub().resolves(), }, }), @@ -105,7 +105,10 @@ describe('UrlCache', function () { it('should clear the cache in bulk', function () { expect( - this.fse.remove.calledWith('/cache/dir/' + this.project_id) + this.fs.promises.rm.calledWith('/cache/dir/' + this.project_id, { + force: true, + recursive: true, + }) ).to.equal(true) }) })