Merge pull request #8847 from overleaf/jpa-emit-start-of-xref-table

[clsi] try to emit the start of the xref table

GitOrigin-RevId: 6d8348a349572cc997ac5924664428228c00fed1
This commit is contained in:
Jakob Ackermann 2022-07-15 09:03:40 +01:00 committed by Copybot
parent 2173821121
commit 97624d0c6c
30 changed files with 3278 additions and 3189 deletions

View file

@ -119,22 +119,26 @@ async function doUpdateInternal(contentDir, filePath, size, checkDeadline) {
checkDeadline('after init HashFileTracker')
const rawTable = await parseXrefTable(filePath, size, checkDeadline)
rawTable.sort((a, b) => {
const { xRefEntries, startXRefTable } = await parseXrefTable(
filePath,
size,
checkDeadline
)
xRefEntries.sort((a, b) => {
return a.offset - b.offset
})
rawTable.forEach((obj, idx) => {
xRefEntries.forEach((obj, idx) => {
obj.idx = idx
})
checkDeadline('after parsing')
const uncompressedObjects = []
for (const object of rawTable) {
for (const object of xRefEntries) {
if (!object.uncompressed) {
continue
}
const nextObject = rawTable[object.idx + 1]
const nextObject = xRefEntries[object.idx + 1]
if (!nextObject) {
// Ignore this possible edge case.
// The last object should be part of the xRef table.
@ -204,7 +208,7 @@ async function doUpdateInternal(contentDir, filePath, size, checkDeadline) {
// Let the next compile use the already written ranges.
const reclaimedSpace = await tracker.deleteStaleHashes(5)
await tracker.flush()
return [ranges, newRanges, reclaimedSpace]
return [ranges, newRanges, reclaimedSpace, startXRefTable]
}
function getStatePath(contentDir) {

View file

@ -392,7 +392,12 @@ module.exports = OutputCacheManager = {
return callback(null, outputFiles)
}
if (err) return callback(err, outputFiles)
const [contentRanges, newContentRanges, reclaimedSpace] = result
const [
contentRanges,
newContentRanges,
reclaimedSpace,
startXRefTable,
] = result
if (Settings.enablePdfCachingDark) {
// In dark mode we are doing the computation only and do not emit
@ -400,6 +405,7 @@ module.exports = OutputCacheManager = {
} else {
outputFile.contentId = Path.basename(contentDir)
outputFile.ranges = contentRanges
outputFile.startXRefTable = startXRefTable
}
timings['compute-pdf-caching'] = timer.done()

View file

@ -16,7 +16,9 @@ async function parseXrefTable(path, size, checkDeadline) {
checkDeadline('pdfjs: after parseStartXRef')
await manager.ensureDoc('parse')
checkDeadline('pdfjs: after parse')
return manager.pdfDocument.catalog.xref.entries
const xRefEntries = manager.pdfDocument.xref.entries
const startXRefTable = manager.pdfDocument.xref.topDict?.get('Prev')
return { xRefEntries, startXRefTable }
} finally {
file.close()
}

View file

@ -5,7 +5,7 @@ const pdfPath = process.argv[2]
async function main() {
const size = (await fs.promises.stat(pdfPath)).size
const xRefEntries = await parseXrefTable(pdfPath, size)
const { xRefEntries } = await parseXrefTable(pdfPath, size)
console.log('Xref entries', xRefEntries)
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -353,4 +354,6 @@
"offset": 6,
"gen": 55
}
]
],
"startXRefTable": 123422
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -142,4 +143,6 @@
"offset": 14,
"gen": 16
}
]
],
"startXRefTable": 59561
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -139,4 +140,6 @@
"offset": 15,
"gen": 14
}
]
],
"startXRefTable": 31614
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -129,4 +130,6 @@
"offset": 13,
"gen": 14
}
]
],
"startXRefTable": 27312
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 65535,
@ -104,4 +105,6 @@
"gen": 0,
"uncompressed": true
}
]
],
"startXRefTable": 6682
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -107,4 +108,6 @@
"offset": 11,
"gen": 11
}
]
],
"startXRefTable": 31307
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -123,4 +124,6 @@
"offset": 15,
"gen": 10
}
]
],
"startXRefTable": 6344
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -108,4 +109,6 @@
"offset": 12,
"gen": 10
}
]
],
"startXRefTable": 35015
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -103,4 +104,6 @@
"offset": 11,
"gen": 10
}
]
],
"startXRefTable": 23543
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -95,4 +96,6 @@
"offset": 11,
"gen": 8
}
]
],
"startXRefTable": 24739
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -142,4 +143,6 @@
"offset": 14,
"gen": 16
}
]
],
"startXRefTable": 43799
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -176,4 +177,6 @@
"offset": 16,
"gen": 22
}
]
],
"startXRefTable": 75548
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 65535,
@ -134,4 +135,6 @@
"gen": 0,
"uncompressed": true
}
]
],
"startXRefTable": 26815
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -88,4 +89,6 @@
"offset": 12,
"gen": 5
}
]
],
"startXRefTable": 3777
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -122,4 +123,6 @@
"offset": 13,
"gen": 11
}
]
],
"startXRefTable": 32412
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -105,4 +106,6 @@
"offset": 12,
"gen": 8
}
]
],
"startXRefTable": 25186
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -91,4 +92,6 @@
"offset": 11,
"gen": 7
}
]
],
"startXRefTable": 20927
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -150,4 +151,6 @@
"offset": 16,
"gen": 13
}
]
],
"startXRefTable": 43050
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -108,4 +109,6 @@
"offset": 12,
"gen": 10
}
]
],
"startXRefTable": 34350
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -105,4 +106,6 @@
"offset": 12,
"gen": 8
}
]
],
"startXRefTable": 17541
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -108,4 +109,6 @@
"offset": 12,
"gen": 10
}
]
],
"startXRefTable": 37530
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -122,4 +123,6 @@
"offset": 14,
"gen": 11
}
]
],
"startXRefTable": 48442
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -162,4 +163,6 @@
"offset": 18,
"gen": 16
}
]
],
"startXRefTable": 35699
}

View file

@ -1,4 +1,5 @@
[
{
"xRefEntries": [
{
"offset": 0,
"gen": 0,
@ -92,4 +93,6 @@
"offset": 12,
"gen": 6
}
]
],
"startXRefTable": 8578
}

View file

@ -579,6 +579,7 @@ const ClsiManager = {
f.contentId = file.contentId
f.ranges = file.ranges || []
f.size = file.size
f.startXRefTable = file.startXRefTable
f.createdAt = new Date()
}
outputFiles.push(f)

View file

@ -147,6 +147,7 @@ describe('ClsiManager', function () {
// gets dropped by JSON.stringify
contentId: undefined,
size: undefined,
startXRefTable: undefined,
},
{
url: `/project/${this.project_id}/user/${this.user_id}/build/1234/output/output.log`,
@ -178,6 +179,7 @@ describe('ClsiManager', function () {
build: 1234,
contentId: '123-321',
ranges: [{ start: 1, end: 42, hash: 'foo' }],
startXRefTable: 42,
size: 42,
},
{
@ -209,6 +211,7 @@ describe('ClsiManager', function () {
build: 1234,
contentId: '123-321',
ranges: [{ start: 1, end: 42, hash: 'foo' }],
startXRefTable: 42,
size: 42,
createdAt: new Date(),
},
@ -422,6 +425,7 @@ describe('ClsiManager', function () {
// gets dropped by JSON.stringify
contentId: undefined,
size: undefined,
startXRefTable: undefined,
},
{
url: `/project/${this.submission_id}/build/1234/output/output.log`,