Merge pull request #18148 from overleaf/ar-add-output-zip-endpoint-to-clsi

[clsi] Add endpoints to get zip of output files

GitOrigin-RevId: a1a935e8170ab5a8d40baa6d96f8e42fe22c2e8c
This commit is contained in:
Andrew Rumble 2024-05-01 14:52:12 +01:00 committed by Copybot
parent d02f175afa
commit 92f62f91c1
7 changed files with 556 additions and 18 deletions

87
package-lock.json generated
View file

@ -14340,15 +14340,15 @@
]
},
"node_modules/archiver": {
"version": "5.3.0",
"resolved": "https://registry.npmjs.org/archiver/-/archiver-5.3.0.tgz",
"integrity": "sha512-iUw+oDwK0fgNpvveEsdQ0Ase6IIKztBJU2U0E9MzszMfmVVUyv1QJhS2ITW9ZCqx8dktAxVAjWWkKehuZE8OPg==",
"version": "5.3.2",
"resolved": "https://registry.npmjs.org/archiver/-/archiver-5.3.2.tgz",
"integrity": "sha512-+25nxyyznAXF7Nef3y0EbBeqmGZgeN/BxHX29Rs39djAfaFalmQ89SE6CWyDCHzGL0yt/ycBtNOmGTW0FyGWNw==",
"dependencies": {
"archiver-utils": "^2.1.0",
"async": "^3.2.0",
"async": "^3.2.4",
"buffer-crc32": "^0.2.1",
"readable-stream": "^3.6.0",
"readdir-glob": "^1.0.0",
"readdir-glob": "^1.1.2",
"tar-stream": "^2.2.0",
"zip-stream": "^4.1.0"
},
@ -14403,6 +14403,11 @@
"safe-buffer": "~5.1.0"
}
},
"node_modules/archiver/node_modules/async": {
"version": "3.2.5",
"resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz",
"integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg=="
},
"node_modules/are-we-there-yet": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-2.0.0.tgz",
@ -34479,11 +34484,30 @@
}
},
"node_modules/readdir-glob": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/readdir-glob/-/readdir-glob-1.1.1.tgz",
"integrity": "sha512-91/k1EzZwDx6HbERR+zucygRFfiPl2zkIYZtv3Jjr6Mn7SkKcVct8aVO+sSRiGMc6fLf72du3d92/uY63YPdEA==",
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/readdir-glob/-/readdir-glob-1.1.3.tgz",
"integrity": "sha512-v05I2k7xN8zXvPD9N+z/uhXPaj0sUFCe2rcWZIpBsqxfP7xXFQ0tipAd/wjj1YxWyWtUS5IDJpOG82JKt2EAVA==",
"dependencies": {
"minimatch": "^3.0.4"
"minimatch": "^5.1.0"
}
},
"node_modules/readdir-glob/node_modules/brace-expansion": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
"dependencies": {
"balanced-match": "^1.0.0"
}
},
"node_modules/readdir-glob/node_modules/minimatch": {
"version": "5.1.6",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz",
"integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
"dependencies": {
"brace-expansion": "^2.0.1"
},
"engines": {
"node": ">=10"
}
},
"node_modules/readdirp": {
@ -41120,6 +41144,7 @@
"@overleaf/o-error": "*",
"@overleaf/promise-utils": "*",
"@overleaf/settings": "*",
"archiver": "5.3.2",
"async": "3.2.2",
"body-parser": "^1.19.0",
"bunyan": "^1.8.15",
@ -51163,6 +51188,7 @@
"@overleaf/o-error": "*",
"@overleaf/promise-utils": "*",
"@overleaf/settings": "*",
"archiver": "5.3.2",
"async": "3.2.2",
"body-parser": "^1.19.0",
"bunyan": "^1.8.15",
@ -58349,17 +58375,24 @@
"dev": true
},
"archiver": {
"version": "5.3.0",
"resolved": "https://registry.npmjs.org/archiver/-/archiver-5.3.0.tgz",
"integrity": "sha512-iUw+oDwK0fgNpvveEsdQ0Ase6IIKztBJU2U0E9MzszMfmVVUyv1QJhS2ITW9ZCqx8dktAxVAjWWkKehuZE8OPg==",
"version": "5.3.2",
"resolved": "https://registry.npmjs.org/archiver/-/archiver-5.3.2.tgz",
"integrity": "sha512-+25nxyyznAXF7Nef3y0EbBeqmGZgeN/BxHX29Rs39djAfaFalmQ89SE6CWyDCHzGL0yt/ycBtNOmGTW0FyGWNw==",
"requires": {
"archiver-utils": "^2.1.0",
"async": "^3.2.0",
"async": "^3.2.4",
"buffer-crc32": "^0.2.1",
"readable-stream": "^3.6.0",
"readdir-glob": "^1.0.0",
"readdir-glob": "^1.1.2",
"tar-stream": "^2.2.0",
"zip-stream": "^4.1.0"
},
"dependencies": {
"async": {
"version": "3.2.5",
"resolved": "https://registry.npmjs.org/async/-/async-3.2.5.tgz",
"integrity": "sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg=="
}
}
},
"archiver-utils": {
@ -74479,11 +74512,29 @@
}
},
"readdir-glob": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/readdir-glob/-/readdir-glob-1.1.1.tgz",
"integrity": "sha512-91/k1EzZwDx6HbERR+zucygRFfiPl2zkIYZtv3Jjr6Mn7SkKcVct8aVO+sSRiGMc6fLf72du3d92/uY63YPdEA==",
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/readdir-glob/-/readdir-glob-1.1.3.tgz",
"integrity": "sha512-v05I2k7xN8zXvPD9N+z/uhXPaj0sUFCe2rcWZIpBsqxfP7xXFQ0tipAd/wjj1YxWyWtUS5IDJpOG82JKt2EAVA==",
"requires": {
"minimatch": "^3.0.4"
"minimatch": "^5.1.0"
},
"dependencies": {
"brace-expansion": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
"requires": {
"balanced-match": "^1.0.0"
}
},
"minimatch": {
"version": "5.1.6",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.6.tgz",
"integrity": "sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==",
"requires": {
"brace-expansion": "^2.0.1"
}
}
}
},
"readdirp": {

View file

@ -14,6 +14,7 @@ const Metrics = require('@overleaf/metrics')
const smokeTest = require('./test/smoke/js/SmokeTests')
const ContentTypeMapper = require('./app/js/ContentTypeMapper')
const Errors = require('./app/js/Errors')
const { createOutputZip } = require('./app/js/OutputController')
const Path = require('path')
@ -170,6 +171,20 @@ const staticOutputServer = ForbidSymlinks(
}
)
// This needs to be before GET /project/:project_id/build/:build_id/output/*
app.get(
'/project/:project_id/build/:build_id/output/output.zip',
bodyParser.json(),
createOutputZip
)
// This needs to be before GET /project/:project_id/user/:user_id/build/:build_id/output/*
app.get(
'/project/:project_id/user/:user_id/build/:build_id/output/output.zip',
bodyParser.json(),
createOutputZip
)
app.get(
'/project/:project_id/user/:user_id/build/:build_id/output/*',
function (req, res, next) {

View file

@ -0,0 +1,37 @@
const logger = require('@overleaf/logger')
const OutputFileArchiveManager = require('./OutputFileArchiveManager')
const { expressify } = require('@overleaf/promise-utils')
function cleanFiles(files) {
if (!Array.isArray(files)) {
return []
}
return files.filter(file => /^output\./g.test(file))
}
async function createOutputZip(req, res) {
const {
project_id: projectId,
user_id: userId,
build_id: buildId,
} = req.params
const files = cleanFiles(req.query.files)
logger.debug({ projectId, userId, buildId, files }, 'Will create zip file')
const archive = await OutputFileArchiveManager.archiveFilesForBuild(
projectId,
userId,
buildId,
files
)
archive.on('error', err => {
logger.warn({ err }, 'error emitted when creating output files archive')
})
res.attachment('output.zip')
res.setHeader('X-Content-Type-Options', 'nosniff')
archive.pipe(res)
}
module.exports = { createOutputZip: expressify(createOutputZip) }

View file

@ -0,0 +1,90 @@
let OutputFileArchiveManager
const archiver = require('archiver')
const OutputCacheManager = require('./OutputCacheManager')
const OutputFileFinder = require('./OutputFileFinder')
const Settings = require('@overleaf/settings')
const { open } = require('node:fs/promises')
const path = require('node:path')
const { NotFoundError } = require('./Errors')
function getContentDir(projectId, userId) {
let subDir
if (userId != null) {
subDir = `${projectId}-${userId}`
} else {
subDir = projectId
}
return `${Settings.path.outputDir}/${subDir}/`
}
module.exports = OutputFileArchiveManager = {
async archiveFilesForBuild(projectId, userId, build, files = []) {
const contentDir = getContentDir(projectId, userId)
const validFiles = await (files.length > 0
? this._getRequestedOutputFiles(projectId, userId, build, files)
: this._getAllOutputFiles(projectId, userId, build))
const archive = archiver('zip')
const missingFiles = files.filter(
file => !validFiles.includes(path.basename(file))
)
for (const file of validFiles) {
try {
const fileHandle = await open(
`${contentDir}${OutputCacheManager.path(build, file)}`
)
const fileStream = fileHandle.createReadStream()
archive.append(fileStream, { name: file })
} catch (error) {
missingFiles.push(file)
}
}
if (missingFiles.length > 0) {
archive.append(missingFiles.join('\n'), {
name: 'missing_files.txt',
})
}
await archive.finalize()
return archive
},
async _getAllOutputFiles(projectId, userId, build) {
const contentDir = getContentDir(projectId, userId)
try {
const { outputFiles } = await OutputFileFinder.promises.findOutputFiles(
[],
`${contentDir}${OutputCacheManager.path(build, '.')}`
)
return outputFiles.map(({ path }) => path)
} catch (error) {
if (
error.code === 'ENOENT' ||
error.code === 'ENOTDIR' ||
error.code === 'EACCES'
) {
throw new NotFoundError('Output files not found')
}
throw error
}
},
async _getRequestedOutputFiles(projectId, userId, build, files) {
const outputFiles = new Set(
await OutputFileArchiveManager._getAllOutputFiles(
projectId,
userId,
build
)
)
return files.filter(file => outputFiles.has(file))
},
}

View file

@ -23,6 +23,7 @@
"@overleaf/o-error": "*",
"@overleaf/promise-utils": "*",
"@overleaf/settings": "*",
"archiver": "5.3.2",
"async": "3.2.2",
"body-parser": "^1.19.0",
"bunyan": "^1.8.15",

View file

@ -0,0 +1,110 @@
const SandboxedModule = require('sandboxed-module')
const sinon = require('sinon')
const MODULE_PATH = require('path').join(
__dirname,
'../../../app/js/OutputController'
)
describe('OutputController', function () {
describe('createOutputZip', function () {
beforeEach(function () {
this.archive = {
on: sinon.stub(),
pipe: sinon.stub(),
}
this.archiveFilesForBuild = sinon.stub().resolves(this.archive)
this.OutputController = SandboxedModule.require(MODULE_PATH, {
requires: {
'./OutputFileArchiveManager': {
archiveFilesForBuild: this.archiveFilesForBuild,
},
},
})
})
describe('when OutputFileArchiveManager creates an archive', function () {
beforeEach(function (done) {
this.res = {
attachment: sinon.stub(),
setHeader: sinon.stub(),
}
this.req = {
params: {
project_id: 'project-id-123',
user_id: 'user-id-123',
build_id: 'build-id-123',
},
query: {
files: ['output.tex', 'not-output.tex'],
},
}
this.archive.pipe.callsFake(() => done())
this.OutputController.createOutputZip(this.req, this.res)
})
it('does not pass files that do not start with "output" to OutputFileArchiveManager', function () {
sinon.assert.calledWith(
this.archiveFilesForBuild,
'project-id-123',
'user-id-123',
'build-id-123',
['output.tex']
)
})
it('pipes the archive to the response', function () {
sinon.assert.calledWith(this.archive.pipe, this.res)
})
it('calls the express convenience method to set attachment headers', function () {
sinon.assert.calledWith(this.res.attachment, 'output.zip')
})
it('sets the X-Content-Type-Options header to nosniff', function () {
sinon.assert.calledWith(
this.res.setHeader,
'X-Content-Type-Options',
'nosniff'
)
})
})
describe('when OutputFileArchiveManager throws an error', function () {
let error
beforeEach(function (done) {
error = new Error('error message')
this.archiveFilesForBuild.rejects(error)
this.res = {
status: sinon.stub().returnsThis(),
send: sinon.stub(),
}
this.req = {
params: {
project_id: 'project-id-123',
user_id: 'user-id-123',
build_id: 'build-id-123',
},
query: {
files: ['output.tex'],
},
}
this.OutputController.createOutputZip(
this.req,
this.res,
(this.next = sinon.stub().callsFake(() => {
done()
}))
)
})
it('calls next with the error', function () {
sinon.assert.calledWith(this.next, error)
})
})
})
})

View file

@ -0,0 +1,234 @@
const SandboxedModule = require('sandboxed-module')
const sinon = require('sinon')
const { assert, expect } = require('chai')
const MODULE_PATH = require('path').join(
__dirname,
'../../../app/js/OutputFileArchiveManager'
)
describe('OutputFileArchiveManager', function () {
const userId = 'user-id-123'
const projectId = 'project-id-123'
const buildId = 'build-id-123'
afterEach(function () {
sinon.restore()
})
beforeEach(function () {
this.OutputFileFinder = {
promises: {
findOutputFiles: sinon.stub().resolves({ outputFiles: [] }),
},
}
this.OutputCacheManger = {
path: sinon.stub().callsFake((build, path) => {
return `${build}/${path}`
}),
}
this.archive = {
append: sinon.stub(),
finalize: sinon.stub(),
}
this.archiver = sinon.stub().returns(this.archive)
this.outputDir = '/output/dir'
this.fs = {
open: sinon.stub().callsFake(file => ({
createReadStream: sinon.stub().returns(`handle: ${file}`),
})),
}
this.OutputFileArchiveManager = SandboxedModule.require(MODULE_PATH, {
requires: {
'./OutputFileFinder': this.OutputFileFinder,
'./OutputCacheManager': this.OutputCacheManger,
archiver: this.archiver,
'node:fs/promises': this.fs,
'node:path': {
basename: sinon.stub().callsFake(path => path.split('/').pop()),
},
'@overleaf/settings': {
path: {
outputDir: this.outputDir,
},
},
},
})
})
describe('when called with no files', function () {
beforeEach(async function () {
this.OutputFileFinder.promises.findOutputFiles.resolves({
outputFiles: [
{ path: 'file_1' },
{ path: 'file_2' },
{ path: 'file_3' },
{ path: 'file_4' },
],
})
await this.OutputFileArchiveManager.archiveFilesForBuild(
projectId,
userId,
buildId
)
})
it('creates a zip archive', function () {
sinon.assert.calledWith(this.archiver, 'zip')
})
it('adds all the output files to the archive', function () {
expect(this.archive.append.callCount).to.equal(4)
sinon.assert.calledWith(
this.archive.append,
`handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_1`,
sinon.match({ name: 'file_1' })
)
sinon.assert.calledWith(
this.archive.append,
`handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_2`,
sinon.match({ name: 'file_2' })
)
sinon.assert.calledWith(
this.archive.append,
`handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_3`,
sinon.match({ name: 'file_3' })
)
sinon.assert.calledWith(
this.archive.append,
`handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_4`,
sinon.match({ name: 'file_4' })
)
})
it('finalizes the archive after all files are appended', function () {
sinon.assert.called(this.archive.finalize)
expect(this.archive.finalize.calledBefore(this.archive.append)).to.be
.false
})
})
describe('when called with a list of files that all are in the output directory', function () {
beforeEach(async function () {
this.OutputFileFinder.promises.findOutputFiles.resolves({
outputFiles: [
{ path: 'file_1' },
{ path: 'file_2' },
{ path: 'file_3' },
{ path: 'file_4' },
],
})
await this.OutputFileArchiveManager.archiveFilesForBuild(
projectId,
userId,
buildId,
['file_1', 'file_4']
)
})
it('creates a zip archive', function () {
sinon.assert.calledWith(this.archiver, 'zip')
})
it('adds only output files from the list of files to the archive', function () {
expect(this.archive.append.callCount).to.equal(2)
sinon.assert.calledWith(
this.archive.append,
`handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_1`,
sinon.match({
name: 'file_1',
})
)
sinon.assert.calledWith(
this.archive.append,
`handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_4`,
sinon.match({
name: 'file_4',
})
)
})
it('finalizes the archive after all files are appended', function () {
sinon.assert.called(this.archive.finalize)
expect(this.archive.finalize.calledBefore(this.archive.append)).to.be
.false
})
})
describe('when called with a list of files and one of the files is missing from the output directory', function () {
beforeEach(async function () {
this.OutputFileFinder.promises.findOutputFiles.resolves({
outputFiles: [
{ path: 'file_1' },
{ path: 'file_2' },
{ path: 'file_3' },
],
})
await this.OutputFileArchiveManager.archiveFilesForBuild(
projectId,
userId,
buildId,
['file_1', 'file_4']
)
})
it('creates a zip archive', function () {
sinon.assert.calledWith(this.archiver, 'zip')
})
it('adds the files that were found to the archive', function () {
sinon.assert.calledWith(
this.archive.append,
`handle: ${this.outputDir}/${projectId}-${userId}/${buildId}/file_1`,
sinon.match({ name: 'file_1' })
)
})
it('adds a file listing any missing files', function () {
sinon.assert.calledWith(
this.archive.append,
'file_4',
sinon.match({
name: 'missing_files.txt',
})
)
})
it('finalizes the archive after all files are appended', function () {
sinon.assert.called(this.archive.finalize)
expect(this.archive.finalize.calledBefore(this.archive.append)).to.be
.false
})
})
describe('when the output directory cannot be accessed', function () {
beforeEach(async function () {
this.OutputFileFinder.promises.findOutputFiles.rejects({
code: 'ENOENT',
})
})
it('rejects with a NotFoundError', async function () {
try {
await this.OutputFileArchiveManager.archiveFilesForBuild(
projectId,
userId,
buildId
)
assert.fail('should have thrown a NotFoundError')
} catch (err) {
expect(err).to.haveOwnProperty('name', 'NotFoundError')
}
})
it('does not create an archive', function () {
expect(this.archiver.called).to.be.false
})
})
})