overleaf/services/clsi/app/lib/pdfjs/parseXrefTable.js
Jakob Ackermann 294088fb27 [ContentCacheManager] use PDF.js Xref table instead of stream detection (#242)
* make the content cache manager tests configurable

* extend stream content in unit tests

* [ContentCacheManagerTests] prepare for full object caching

* filesystem stream for pdfjs

* working??

* cleaning up

* handle overflow

* [misc] install pdfjs-dist

* [misc] move pdfjs code into app/lib/ and scripts/, also use CamelCase

* [misc] abstract the file loading and parsing of xRef tables into helper

* [misc] pdfjsTests: add snapshot based tests for the Xref table parser

* [misc] FSStream: throw proper error and drop commented code

* [misc] FSStream: integrate throwing of MissingDataException into getter

* [misc] pdfjs: fix eslint errors

* [misc] pdfjs: run format_fix

* [misc] pdfjs: allocate very small non empty dummy buffers explicitly

* [misc] install @overleaf/o-error

* [ContentCacheManager] use PDF.js Xref table instead of stream detection

Co-Authored-By: Brian Gough <brian.gough@overleaf.com>

* [pdfjs] parseXrefTable: handle empty PDF files gracefully

Co-authored-by: Brian Gough <brian.gough@overleaf.com>
2021-05-31 09:20:25 +01:00

24 lines
527 B
JavaScript

const fs = require('fs')
const { FSPdfManager } = require('./FSPdfManager')
async function parseXrefTable(path, size) {
if (size === 0) {
return []
}
const file = await fs.promises.open(path)
try {
const manager = new FSPdfManager(0, { fh: file, size })
await manager.ensureDoc('checkHeader')
await manager.ensureDoc('parseStartXRef')
await manager.ensureDoc('parse')
return manager.pdfDocument.catalog.xref.entries
} finally {
file.close()
}
}
module.exports = {
parseXrefTable
}