Merge pull request #16438 from overleaf/jpa-em-replace-find-subprocess

[clsi] replace find subprocess for listing compile dir contents

GitOrigin-RevId: 36c8230ea6d787b1d948407d6473c14af8d6b5f6
This commit is contained in:
Jakob Ackermann 2024-01-10 12:02:06 +00:00 committed by Copybot
parent 1e86897556
commit 5aeb1f1459
6 changed files with 148 additions and 227 deletions

2
package-lock.json generated
View file

@ -43503,6 +43503,7 @@
"@overleaf/logger": "*",
"@overleaf/metrics": "*",
"@overleaf/o-error": "*",
"@overleaf/promise-utils": "*",
"@overleaf/settings": "*",
"async": "3.2.2",
"body-parser": "^1.19.0",
@ -53802,6 +53803,7 @@
"@overleaf/logger": "*",
"@overleaf/metrics": "*",
"@overleaf/o-error": "*",
"@overleaf/promise-utils": "*",
"@overleaf/settings": "*",
"async": "3.2.2",
"body-parser": "^1.19.0",

View file

@ -1,8 +1,7 @@
const childProcess = require('child_process')
const fsPromises = require('fs/promises')
const os = require('os')
const Path = require('path')
const { callbackify, promisify } = require('util')
const { callbackify } = require('util')
const Settings = require('@overleaf/settings')
const logger = require('@overleaf/logger')
@ -21,8 +20,6 @@ const CommandRunner = require('./CommandRunner')
const { emitPdfStats } = require('./ContentCacheMetrics')
const SynctexOutputParser = require('./SynctexOutputParser')
const execFile = promisify(childProcess.execFile)
const COMPILE_TIME_BUCKETS = [
// NOTE: These buckets are locked in per metric name.
// If you want to change them, you will need to rename metrics.
@ -211,7 +208,7 @@ async function doCompile(request) {
Metrics.inc('compiles-timeout', 1, request.metricsOpts)
}
const outputFiles = await _saveOutputFiles({
const { outputFiles, allEntries } = await _saveOutputFiles({
request,
compileDir,
resourceList,
@ -222,7 +219,11 @@ async function doCompile(request) {
// Clear project if this compile was abruptly terminated
if (error.terminated || error.timedout) {
await clearProject(request.project_id, request.user_id)
await clearProjectWithListing(
request.project_id,
request.user_id,
allEntries
)
}
throw error
@ -279,7 +280,7 @@ async function doCompile(request) {
// Emit compile time.
timings.compile = ts
const outputFiles = await _saveOutputFiles({
const { outputFiles } = await _saveOutputFiles({
request,
compileDir,
resourceList,
@ -312,10 +313,8 @@ async function _saveOutputFiles({
)
const outputDir = getOutputDir(request.project_id, request.user_id)
let { outputFiles } = await OutputFileFinder.promises.findOutputFiles(
resourceList,
compileDir
)
let { outputFiles, allEntries } =
await OutputFileFinder.promises.findOutputFiles(resourceList, compileDir)
try {
outputFiles = await OutputCacheManager.promises.saveOutputFiles(
@ -330,7 +329,7 @@ async function _saveOutputFiles({
}
timings.output = timer.done()
return outputFiles
return { outputFiles, allEntries }
}
async function stopCompile(projectId, userId) {
@ -340,6 +339,11 @@ async function stopCompile(projectId, userId) {
async function clearProject(projectId, userId) {
const compileDir = getCompileDir(projectId, userId)
await fsPromises.rm(compileDir, { force: true, recursive: true })
}
async function clearProjectWithListing(projectId, userId, allEntries) {
const compileDir = getCompileDir(projectId, userId)
const exists = await _checkDirectory(compileDir)
if (!exists) {
@ -347,12 +351,15 @@ async function clearProject(projectId, userId) {
return
}
try {
await execFile('rm', ['-r', '-f', '--', compileDir])
} catch (err) {
OError.tag(err, `rm -r failed`, { compileDir, stderr: err.stderr })
throw err
for (const pathInProject of allEntries) {
const path = Path.join(compileDir, pathInProject)
if (path.endsWith('/')) {
await fsPromises.rmdir(path)
} else {
await fsPromises.unlink(path)
}
}
await fsPromises.rmdir(compileDir)
}
async function _findAllDirs() {

View file

@ -1,95 +1,54 @@
let OutputFileFinder
const Path = require('path')
const _ = require('lodash')
const { spawn } = require('child_process')
const logger = require('@overleaf/logger')
const fs = require('fs')
const { callbackifyMultiResult } = require('@overleaf/promise-utils')
module.exports = OutputFileFinder = {
findOutputFiles(resources, directory, callback) {
const incomingResources = new Set(resources.map(resource => resource.path))
OutputFileFinder._getAllFiles(directory, function (error, allFiles) {
if (allFiles == null) {
allFiles = []
}
if (error) {
logger.err({ err: error }, 'error finding all output files')
return callback(error)
}
const outputFiles = []
for (const file of allFiles) {
if (!incomingResources.has(file)) {
outputFiles.push({
path: file,
type: Path.extname(file).replace(/^\./, '') || undefined,
})
}
}
callback(null, outputFiles, allFiles)
})
},
_getAllFiles(directory, callback) {
callback = _.once(callback)
// don't include clsi-specific files/directories in the output list
const EXCLUDE_DIRS = [
'-name',
'.cache',
'-o',
'-name',
'.archive',
'-o',
'-name',
'.project-*',
]
const args = [
directory,
'(',
...EXCLUDE_DIRS,
')',
'-prune',
'-o',
'-type',
'f',
'-print',
]
logger.debug({ args }, 'running find command')
const proc = spawn('find', args)
let stdout = ''
proc.stdout.setEncoding('utf8').on('data', chunk => (stdout += chunk))
proc.on('error', callback)
proc.on('close', function (code) {
if (code !== 0) {
logger.warn(
{ directory, code },
"find returned error, directory likely doesn't exist"
)
return callback(null, [])
}
let fileList = stdout.trim().split('\n')
fileList = fileList.map(function (file) {
// Strip leading directory
return Path.relative(directory, file)
})
callback(null, fileList)
})
},
async function walkFolder(compileDir, d, files, allEntries) {
const dirents = await fs.promises.readdir(Path.join(compileDir, d), {
withFileTypes: true,
})
for (const dirent of dirents) {
const p = Path.join(d, dirent.name)
if (dirent.isDirectory()) {
await walkFolder(compileDir, p, files, allEntries)
allEntries.push(p + '/')
} else if (dirent.isFile()) {
files.push(p)
allEntries.push(p)
} else {
allEntries.push(p)
}
}
}
module.exports.promises = {
findOutputFiles: (resources, directory) =>
new Promise((resolve, reject) => {
OutputFileFinder.findOutputFiles(
resources,
directory,
(err, outputFiles, allFiles) => {
if (err) {
reject(err)
} else {
resolve({ outputFiles, allFiles })
}
}
)
}),
async function findOutputFiles(resources, directory) {
const files = []
const allEntries = []
await walkFolder(directory, '', files, allEntries)
const incomingResources = new Set(resources.map(resource => resource.path))
const outputFiles = []
for (const path of files) {
if (incomingResources.has(path)) continue
if (path === '.project-sync-state') continue
if (path === '.project-lock') continue
outputFiles.push({
path,
type: Path.extname(path).replace(/^\./, '') || undefined,
})
}
return {
outputFiles,
allEntries,
}
}
module.exports = {
findOutputFiles: callbackifyMultiResult(findOutputFiles, [
'outputFiles',
'allEntries',
]),
promises: {
findOutputFiles,
},
}

View file

@ -20,6 +20,7 @@
"@overleaf/logger": "*",
"@overleaf/metrics": "*",
"@overleaf/o-error": "*",
"@overleaf/promise-utils": "*",
"@overleaf/settings": "*",
"async": "3.2.2",
"body-parser": "^1.19.0",

View file

@ -53,7 +53,10 @@ describe('CompileManager', function () {
}
this.OutputFileFinder = {
promises: {
findOutputFiles: sinon.stub().resolves(this.outputFiles),
findOutputFiles: sinon.stub().resolves({
outputFiles: this.outputFiles,
allEntries: this.outputFiles.map(f => f.path).concat(['main.tex']),
}),
},
}
this.OutputCacheManager = {
@ -117,6 +120,9 @@ describe('CompileManager', function () {
stat: sinon.stub(),
readFile: sinon.stub(),
mkdir: sinon.stub().resolves(),
rm: sinon.stub().resolves(),
unlink: sinon.stub().resolves(),
rmdir: sinon.stub().resolves(),
}
this.fsPromises.lstat.withArgs(this.compileDir).resolves(this.dirStats)
this.fsPromises.stat
@ -319,12 +325,15 @@ describe('CompileManager', function () {
})
it('should clear the compile directory', function () {
expect(this.child_process.execFile).to.have.been.calledWith('rm', [
'-r',
'-f',
'--',
this.compileDir,
])
for (const { path } of this.buildFiles) {
expect(this.fsPromises.unlink).to.have.been.calledWith(
this.compileDir + '/' + path
)
}
expect(this.fsPromises.unlink).to.have.been.calledWith(
this.compileDir + '/main.tex'
)
expect(this.fsPromises.rmdir).to.have.been.calledWith(this.compileDir)
})
})
@ -339,50 +348,29 @@ describe('CompileManager', function () {
})
it('should clear the compile directory', function () {
expect(this.child_process.execFile).to.have.been.calledWith('rm', [
'-r',
'-f',
'--',
this.compileDir,
])
for (const { path } of this.buildFiles) {
expect(this.fsPromises.unlink).to.have.been.calledWith(
this.compileDir + '/' + path
)
}
expect(this.fsPromises.unlink).to.have.been.calledWith(
this.compileDir + '/main.tex'
)
expect(this.fsPromises.rmdir).to.have.been.calledWith(this.compileDir)
})
})
})
describe('clearProject', function () {
describe('successfully', function () {
beforeEach(async function () {
await this.CompileManager.promises.clearProject(
this.projectId,
this.userId
)
})
it('should clear the compile directory', async function () {
await this.CompileManager.promises.clearProject(
this.projectId,
this.userId
)
it('should remove the project directory', function () {
expect(this.child_process.execFile).to.have.been.calledWith('rm', [
'-r',
'-f',
'--',
this.compileDir,
])
})
})
describe('with a non-success status code', function () {
beforeEach(async function () {
this.child_process.execFile.yields(new Error('oops'))
await expect(
this.CompileManager.promises.clearProject(this.projectId, this.userId)
).to.be.rejected
})
it('should remove the project directory', function () {
expect(this.child_process.execFile).to.have.been.calledWith('rm', [
'-r',
'-f',
'--',
this.compileDir,
])
expect(this.fsPromises.rm).to.have.been.calledWith(this.compileDir, {
force: true,
recursive: true,
})
})
})

View file

@ -1,61 +1,55 @@
/* eslint-disable
no-return-assign,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const SandboxedModule = require('sandboxed-module')
const sinon = require('sinon')
const modulePath = require('path').join(
__dirname,
'../../../app/js/OutputFileFinder'
)
const path = require('path')
const { expect } = require('chai')
const { EventEmitter } = require('events')
const mockFs = require('mock-fs')
describe('OutputFileFinder', function () {
beforeEach(function () {
this.OutputFileFinder = SandboxedModule.require(modulePath, {
requires: {
fs: (this.fs = {}),
child_process: { spawn: (this.spawn = sinon.stub()) },
},
globals: {
Math, // used by lodash
this.OutputFileFinder = SandboxedModule.require(modulePath, {})
this.directory = '/test/dir'
this.callback = sinon.stub()
mockFs({
[this.directory]: {
resource: {
'path.tex': 'a source file',
},
'output.pdf': 'a generated pdf file',
extra: {
'file.tex': 'a generated tex file',
},
'sneaky-file': mockFs.symlink({
path: '../foo',
}),
},
})
this.directory = '/test/dir'
return (this.callback = sinon.stub())
})
afterEach(function () {
mockFs.restore()
})
describe('findOutputFiles', function () {
beforeEach(function (done) {
beforeEach(async function () {
this.resource_path = 'resource/path.tex'
this.output_paths = ['output.pdf', 'extra/file.tex']
this.all_paths = this.output_paths.concat([this.resource_path])
this.resources = [{ path: (this.resource_path = 'resource/path.tex') }]
this.OutputFileFinder._getAllFiles = sinon
.stub()
.callsArgWith(1, null, this.all_paths)
return this.OutputFileFinder.findOutputFiles(
this.resources,
this.directory,
(error, outputFiles) => {
if (error) return done(error)
this.outputFiles = outputFiles
done()
}
)
const { outputFiles, allEntries } =
await this.OutputFileFinder.promises.findOutputFiles(
this.resources,
this.directory
)
this.outputFiles = outputFiles
this.allEntries = allEntries
})
return it('should only return the output files, not directories or resource paths', function () {
return expect(this.outputFiles).to.deep.equal([
it('should only return the output files, not directories or resource paths', function () {
expect(this.outputFiles).to.have.deep.members([
{
path: 'output.pdf',
type: 'pdf',
@ -65,44 +59,14 @@ describe('OutputFileFinder', function () {
type: 'tex',
},
])
})
})
return describe('_getAllFiles', function () {
beforeEach(function () {
this.proc = new EventEmitter()
this.proc.stdout = new EventEmitter()
this.proc.stdout.setEncoding = sinon.stub().returns(this.proc.stdout)
this.spawn.returns(this.proc)
this.directory = '/base/dir'
return this.OutputFileFinder._getAllFiles(this.directory, this.callback)
})
describe('successfully', function () {
beforeEach(function () {
this.proc.stdout.emit(
'data',
['/base/dir/main.tex', '/base/dir/chapters/chapter1.tex'].join('\n') +
'\n'
)
return this.proc.emit('close', 0)
})
return it('should call the callback with the relative file paths', function () {
return this.callback
.calledWith(null, ['main.tex', 'chapters/chapter1.tex'])
.should.equal(true)
})
})
return describe("when the directory doesn't exist", function () {
beforeEach(function () {
return this.proc.emit('close', 1)
})
return it('should call the callback with a blank array', function () {
return this.callback.calledWith(null, []).should.equal(true)
})
expect(this.allEntries).to.deep.equal([
'extra/file.tex',
'extra/',
'output.pdf',
'resource/path.tex',
'resource/',
'sneaky-file',
])
})
})
})