Merge pull request #8847 from overleaf/jpa-emit-start-of-xref-table

[clsi] try to emit the start of the xref table

GitOrigin-RevId: 6d8348a349572cc997ac5924664428228c00fed1
This commit is contained in:
Jakob Ackermann 2022-07-15 09:03:40 +01:00 committed by Copybot
parent 2173821121
commit 97624d0c6c
30 changed files with 3278 additions and 3189 deletions

View file

@ -119,22 +119,26 @@ async function doUpdateInternal(contentDir, filePath, size, checkDeadline) {
checkDeadline('after init HashFileTracker') checkDeadline('after init HashFileTracker')
const rawTable = await parseXrefTable(filePath, size, checkDeadline) const { xRefEntries, startXRefTable } = await parseXrefTable(
rawTable.sort((a, b) => { filePath,
size,
checkDeadline
)
xRefEntries.sort((a, b) => {
return a.offset - b.offset return a.offset - b.offset
}) })
rawTable.forEach((obj, idx) => { xRefEntries.forEach((obj, idx) => {
obj.idx = idx obj.idx = idx
}) })
checkDeadline('after parsing') checkDeadline('after parsing')
const uncompressedObjects = [] const uncompressedObjects = []
for (const object of rawTable) { for (const object of xRefEntries) {
if (!object.uncompressed) { if (!object.uncompressed) {
continue continue
} }
const nextObject = rawTable[object.idx + 1] const nextObject = xRefEntries[object.idx + 1]
if (!nextObject) { if (!nextObject) {
// Ignore this possible edge case. // Ignore this possible edge case.
// The last object should be part of the xRef table. // The last object should be part of the xRef table.
@ -204,7 +208,7 @@ async function doUpdateInternal(contentDir, filePath, size, checkDeadline) {
// Let the next compile use the already written ranges. // Let the next compile use the already written ranges.
const reclaimedSpace = await tracker.deleteStaleHashes(5) const reclaimedSpace = await tracker.deleteStaleHashes(5)
await tracker.flush() await tracker.flush()
return [ranges, newRanges, reclaimedSpace] return [ranges, newRanges, reclaimedSpace, startXRefTable]
} }
function getStatePath(contentDir) { function getStatePath(contentDir) {

View file

@ -392,7 +392,12 @@ module.exports = OutputCacheManager = {
return callback(null, outputFiles) return callback(null, outputFiles)
} }
if (err) return callback(err, outputFiles) if (err) return callback(err, outputFiles)
const [contentRanges, newContentRanges, reclaimedSpace] = result const [
contentRanges,
newContentRanges,
reclaimedSpace,
startXRefTable,
] = result
if (Settings.enablePdfCachingDark) { if (Settings.enablePdfCachingDark) {
// In dark mode we are doing the computation only and do not emit // In dark mode we are doing the computation only and do not emit
@ -400,6 +405,7 @@ module.exports = OutputCacheManager = {
} else { } else {
outputFile.contentId = Path.basename(contentDir) outputFile.contentId = Path.basename(contentDir)
outputFile.ranges = contentRanges outputFile.ranges = contentRanges
outputFile.startXRefTable = startXRefTable
} }
timings['compute-pdf-caching'] = timer.done() timings['compute-pdf-caching'] = timer.done()

View file

@ -16,7 +16,9 @@ async function parseXrefTable(path, size, checkDeadline) {
checkDeadline('pdfjs: after parseStartXRef') checkDeadline('pdfjs: after parseStartXRef')
await manager.ensureDoc('parse') await manager.ensureDoc('parse')
checkDeadline('pdfjs: after parse') checkDeadline('pdfjs: after parse')
return manager.pdfDocument.catalog.xref.entries const xRefEntries = manager.pdfDocument.xref.entries
const startXRefTable = manager.pdfDocument.xref.topDict?.get('Prev')
return { xRefEntries, startXRefTable }
} finally { } finally {
file.close() file.close()
} }

View file

@ -5,7 +5,7 @@ const pdfPath = process.argv[2]
async function main() { async function main() {
const size = (await fs.promises.stat(pdfPath)).size const size = (await fs.promises.stat(pdfPath)).size
const xRefEntries = await parseXrefTable(pdfPath, size) const { xRefEntries } = await parseXrefTable(pdfPath, size)
console.log('Xref entries', xRefEntries) console.log('Xref entries', xRefEntries)
} }

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -353,4 +354,6 @@
"offset": 6, "offset": 6,
"gen": 55 "gen": 55
} }
] ],
"startXRefTable": 123422
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -142,4 +143,6 @@
"offset": 14, "offset": 14,
"gen": 16 "gen": 16
} }
] ],
"startXRefTable": 59561
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -139,4 +140,6 @@
"offset": 15, "offset": 15,
"gen": 14 "gen": 14
} }
] ],
"startXRefTable": 31614
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -129,4 +130,6 @@
"offset": 13, "offset": 13,
"gen": 14 "gen": 14
} }
] ],
"startXRefTable": 27312
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 65535, "gen": 65535,
@ -104,4 +105,6 @@
"gen": 0, "gen": 0,
"uncompressed": true "uncompressed": true
} }
] ],
"startXRefTable": 6682
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -107,4 +108,6 @@
"offset": 11, "offset": 11,
"gen": 11 "gen": 11
} }
] ],
"startXRefTable": 31307
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -123,4 +124,6 @@
"offset": 15, "offset": 15,
"gen": 10 "gen": 10
} }
] ],
"startXRefTable": 6344
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -108,4 +109,6 @@
"offset": 12, "offset": 12,
"gen": 10 "gen": 10
} }
] ],
"startXRefTable": 35015
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -103,4 +104,6 @@
"offset": 11, "offset": 11,
"gen": 10 "gen": 10
} }
] ],
"startXRefTable": 23543
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -95,4 +96,6 @@
"offset": 11, "offset": 11,
"gen": 8 "gen": 8
} }
] ],
"startXRefTable": 24739
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -142,4 +143,6 @@
"offset": 14, "offset": 14,
"gen": 16 "gen": 16
} }
] ],
"startXRefTable": 43799
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -176,4 +177,6 @@
"offset": 16, "offset": 16,
"gen": 22 "gen": 22
} }
] ],
"startXRefTable": 75548
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 65535, "gen": 65535,
@ -134,4 +135,6 @@
"gen": 0, "gen": 0,
"uncompressed": true "uncompressed": true
} }
] ],
"startXRefTable": 26815
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -88,4 +89,6 @@
"offset": 12, "offset": 12,
"gen": 5 "gen": 5
} }
] ],
"startXRefTable": 3777
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -122,4 +123,6 @@
"offset": 13, "offset": 13,
"gen": 11 "gen": 11
} }
] ],
"startXRefTable": 32412
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -105,4 +106,6 @@
"offset": 12, "offset": 12,
"gen": 8 "gen": 8
} }
] ],
"startXRefTable": 25186
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -91,4 +92,6 @@
"offset": 11, "offset": 11,
"gen": 7 "gen": 7
} }
] ],
"startXRefTable": 20927
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -150,4 +151,6 @@
"offset": 16, "offset": 16,
"gen": 13 "gen": 13
} }
] ],
"startXRefTable": 43050
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -108,4 +109,6 @@
"offset": 12, "offset": 12,
"gen": 10 "gen": 10
} }
] ],
"startXRefTable": 34350
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -105,4 +106,6 @@
"offset": 12, "offset": 12,
"gen": 8 "gen": 8
} }
] ],
"startXRefTable": 17541
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -108,4 +109,6 @@
"offset": 12, "offset": 12,
"gen": 10 "gen": 10
} }
] ],
"startXRefTable": 37530
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -122,4 +123,6 @@
"offset": 14, "offset": 14,
"gen": 11 "gen": 11
} }
] ],
"startXRefTable": 48442
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -162,4 +163,6 @@
"offset": 18, "offset": 18,
"gen": 16 "gen": 16
} }
] ],
"startXRefTable": 35699
}

View file

@ -1,4 +1,5 @@
[ {
"xRefEntries": [
{ {
"offset": 0, "offset": 0,
"gen": 0, "gen": 0,
@ -92,4 +93,6 @@
"offset": 12, "offset": 12,
"gen": 6 "gen": 6
} }
] ],
"startXRefTable": 8578
}

View file

@ -579,6 +579,7 @@ const ClsiManager = {
f.contentId = file.contentId f.contentId = file.contentId
f.ranges = file.ranges || [] f.ranges = file.ranges || []
f.size = file.size f.size = file.size
f.startXRefTable = file.startXRefTable
f.createdAt = new Date() f.createdAt = new Date()
} }
outputFiles.push(f) outputFiles.push(f)

View file

@ -147,6 +147,7 @@ describe('ClsiManager', function () {
// gets dropped by JSON.stringify // gets dropped by JSON.stringify
contentId: undefined, contentId: undefined,
size: undefined, size: undefined,
startXRefTable: undefined,
}, },
{ {
url: `/project/${this.project_id}/user/${this.user_id}/build/1234/output/output.log`, url: `/project/${this.project_id}/user/${this.user_id}/build/1234/output/output.log`,
@ -178,6 +179,7 @@ describe('ClsiManager', function () {
build: 1234, build: 1234,
contentId: '123-321', contentId: '123-321',
ranges: [{ start: 1, end: 42, hash: 'foo' }], ranges: [{ start: 1, end: 42, hash: 'foo' }],
startXRefTable: 42,
size: 42, size: 42,
}, },
{ {
@ -209,6 +211,7 @@ describe('ClsiManager', function () {
build: 1234, build: 1234,
contentId: '123-321', contentId: '123-321',
ranges: [{ start: 1, end: 42, hash: 'foo' }], ranges: [{ start: 1, end: 42, hash: 'foo' }],
startXRefTable: 42,
size: 42, size: 42,
createdAt: new Date(), createdAt: new Date(),
}, },
@ -422,6 +425,7 @@ describe('ClsiManager', function () {
// gets dropped by JSON.stringify // gets dropped by JSON.stringify
contentId: undefined, contentId: undefined,
size: undefined, size: undefined,
startXRefTable: undefined,
}, },
{ {
url: `/project/${this.submission_id}/build/1234/output/output.log`, url: `/project/${this.submission_id}/build/1234/output/output.log`,