overleaf/services/clsi/app/js/XrefParser.js
Jakob Ackermann 956cacaef7 Merge pull request #10139 from overleaf/jpa-split-test-min-chunk-size
[misc] add split test for a per request pdfCachingMinChunkSize

GitOrigin-RevId: 6a8a3c6267501789f2047a67b03db6ac6df427c3
2022-10-26 08:03:39 +00:00

67 lines
2.2 KiB
JavaScript

const { NoXrefTableError } = require('./Errors')
const fs = require('fs')
const { O_RDONLY, O_NOFOLLOW } = fs.constants
const MAX_XREF_FILE_SIZE = 1024 * 1024
/** Parse qpdf --show-xref output to get a table of xref entries
*
* @param {string} filePath
* @param {number} pdfFileSize
* @returns
*/
async function parseXrefTable(filePath, pdfFileSize) {
try {
// the xref table will be written to output.pdfxref when available
const xRefFilePath = filePath + 'xref'
// check the size of the file (as it is untrusted)
const stats = await fs.promises.stat(xRefFilePath)
if (!stats.isFile()) {
throw new NoXrefTableError('xref file invalid type')
}
if (stats.size === 0) {
throw new NoXrefTableError('xref file empty')
}
if (stats.size > MAX_XREF_FILE_SIZE) {
throw new NoXrefTableError('xref file too large')
}
const content = await fs.promises.readFile(xRefFilePath, {
encoding: 'ascii',
flag: O_RDONLY | O_NOFOLLOW,
})
// the qpdf xref table output looks like this:
//
// 3/0: uncompressed; offset = 194159
//
// we only need the uncompressed objects
const matches = content.matchAll(
// put an upper limit of 10^10 on all the matched numbers for safety
// ignore the generation id in "id/gen"
// in a linearized pdf all objects must have generation number 0
/^\d{1,9}\/\d{1,9}: uncompressed; offset = (\d{1,9})$/gm
)
// include a zero-index object for backwards compatibility with
// our existing xref table parsing code
const xRefEntries = [{ offset: 0 }]
// extract all the xref table entries
for (const match of matches) {
const offset = parseInt(match[1], 10)
xRefEntries.push({ offset, uncompressed: true })
}
if (xRefEntries.length === 1) {
throw new NoXrefTableError('xref file has no objects')
}
return { xRefEntries }
} catch (err) {
if (err instanceof NoXrefTableError) {
throw err
} else if (err.code) {
throw new NoXrefTableError(`xref file error ${err.code}`)
} else {
throw new NoXrefTableError('xref file parse error')
}
}
}
module.exports = {
parseXrefTable,
}