mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-07 20:31:06 -05:00
Merge pull request #10239 from overleaf/jpa-pdf-caching-backend-tweaks
[clsi] pdf-caching: emit partial set of processed ranges on timeout GitOrigin-RevId: 0038b5a30ac33fcdcab523d7ddc72fea9f2f5be9
This commit is contained in:
parent
24dc307d7c
commit
1a434558d7
3 changed files with 77 additions and 20 deletions
|
@ -102,7 +102,7 @@ async function updateOtherEventLoop({
|
|||
pdfCachingMinChunkSize,
|
||||
compileTime,
|
||||
}) {
|
||||
const workerLatencyInMs = 20
|
||||
const workerLatencyInMs = 100
|
||||
// Prefer getting the timeout error from the worker vs timing out the worker.
|
||||
const timeout = getMaxOverhead(compileTime) + workerLatencyInMs
|
||||
try {
|
||||
|
@ -145,14 +145,15 @@ async function updateSameEventLoop({
|
|||
compileTime,
|
||||
}) {
|
||||
const checkDeadline = getDeadlineChecker(compileTime)
|
||||
const contentRanges = []
|
||||
const newContentRanges = []
|
||||
// keep track of hashes expire old ones when they reach a generation > N.
|
||||
const tracker = await HashFileTracker.from(contentDir)
|
||||
tracker.updateAge()
|
||||
|
||||
checkDeadline('after init HashFileTracker')
|
||||
|
||||
const [reclaimedSpace, overheadDeleteStaleHashes] =
|
||||
await tracker.deleteStaleHashes(5)
|
||||
checkDeadline('after delete stale hashes')
|
||||
|
||||
const { xRefEntries, startXRefTable } = await parseXrefTable(
|
||||
filePath,
|
||||
pdfSize
|
||||
|
@ -190,6 +191,9 @@ async function updateSameEventLoop({
|
|||
|
||||
checkDeadline('after finding uncompressed')
|
||||
|
||||
let timedOutErr = null
|
||||
const contentRanges = []
|
||||
const newContentRanges = []
|
||||
const handle = await fs.promises.open(filePath)
|
||||
try {
|
||||
for (const { object, idx } of uncompressedObjects) {
|
||||
|
@ -225,24 +229,42 @@ async function updateSameEventLoop({
|
|||
end: object.endOffset,
|
||||
hash,
|
||||
}
|
||||
contentRanges.push(range)
|
||||
|
||||
// Optimization: Skip writing of duplicate streams.
|
||||
if (tracker.track(range)) continue
|
||||
if (tracker.has(range.hash)) {
|
||||
// Optimization: Skip writing of already seen hashes.
|
||||
tracker.track(range)
|
||||
contentRanges.push(range)
|
||||
continue
|
||||
}
|
||||
|
||||
await writePdfStream(contentDir, hash, buffer)
|
||||
checkDeadline('after write ' + idx)
|
||||
tracker.track(range)
|
||||
contentRanges.push(range)
|
||||
newContentRanges.push(range)
|
||||
checkDeadline('after write ' + idx)
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof TimedOutError) {
|
||||
// Let the frontend use ranges that were processed so far.
|
||||
timedOutErr = err
|
||||
} else {
|
||||
throw err
|
||||
}
|
||||
} finally {
|
||||
await handle.close()
|
||||
}
|
||||
|
||||
// NOTE: Bailing out below does not make sense.
|
||||
// Let the next compile use the already written ranges.
|
||||
const reclaimedSpace = await tracker.deleteStaleHashes(5)
|
||||
await tracker.flush()
|
||||
return { contentRanges, newContentRanges, reclaimedSpace, startXRefTable }
|
||||
// Flush from both success and failure code path. This allows the next
|
||||
// cycle to complete faster as it can use the already written ranges.
|
||||
await tracker.flush()
|
||||
}
|
||||
return {
|
||||
contentRanges,
|
||||
newContentRanges,
|
||||
reclaimedSpace,
|
||||
startXRefTable,
|
||||
overheadDeleteStaleHashes,
|
||||
timedOutErr,
|
||||
}
|
||||
}
|
||||
|
||||
function getStatePath(contentDir) {
|
||||
|
@ -266,13 +288,15 @@ class HashFileTracker {
|
|||
return new HashFileTracker(contentDir, state)
|
||||
}
|
||||
|
||||
has(hash) {
|
||||
return this.hashAge.has(hash)
|
||||
}
|
||||
|
||||
track(range) {
|
||||
const exists = this.hashAge.has(range.hash)
|
||||
if (!exists) {
|
||||
if (!this.hashSize.has(range.hash)) {
|
||||
this.hashSize.set(range.hash, range.end - range.start)
|
||||
}
|
||||
this.hashAge.set(range.hash, 0)
|
||||
return exists
|
||||
}
|
||||
|
||||
updateAge() {
|
||||
|
@ -318,12 +342,13 @@ class HashFileTracker {
|
|||
}
|
||||
|
||||
async deleteStaleHashes(n) {
|
||||
const t0 = Date.now()
|
||||
// delete any hash file older than N generations
|
||||
const hashes = this.findStale(n)
|
||||
|
||||
let reclaimedSpace = 0
|
||||
if (hashes.length === 0) {
|
||||
return reclaimedSpace
|
||||
return [reclaimedSpace, Date.now() - t0]
|
||||
}
|
||||
|
||||
await promiseMapWithLimit(10, hashes, async hash => {
|
||||
|
@ -332,7 +357,7 @@ class HashFileTracker {
|
|||
reclaimedSpace += this.hashSize.get(hash)
|
||||
this.hashSize.delete(hash)
|
||||
})
|
||||
return reclaimedSpace
|
||||
return [reclaimedSpace, Date.now() - t0]
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,6 +30,17 @@ function emitPdfStats(stats, timings, request) {
|
|||
function emitPdfCachingStats(stats, timings, request) {
|
||||
if (!stats['pdf-size']) return // double check
|
||||
|
||||
if (stats['pdf-caching-timed-out']) {
|
||||
Metrics.inc('pdf-caching-timed-out', 1, request.metricsOpts)
|
||||
}
|
||||
if (timings['pdf-caching-overhead-delete-stale-hashes'] !== undefined) {
|
||||
Metrics.summary(
|
||||
'pdf-caching-overhead-delete-stale-hashes',
|
||||
timings['pdf-caching-overhead-delete-stale-hashes'],
|
||||
request.metricsOpts
|
||||
)
|
||||
}
|
||||
|
||||
// How much extra time did we spent in PDF.js?
|
||||
Metrics.timing(
|
||||
'compute-pdf-caching',
|
||||
|
|
|
@ -425,9 +425,28 @@ module.exports = OutputCacheManager = {
|
|||
contentRanges,
|
||||
newContentRanges,
|
||||
reclaimedSpace,
|
||||
overheadDeleteStaleHashes,
|
||||
timedOutErr,
|
||||
startXRefTable,
|
||||
} = result
|
||||
|
||||
let status = 'success'
|
||||
if (timedOutErr) {
|
||||
// Soft failure: let the frontend use partial set of ranges.
|
||||
logger.warn(
|
||||
{
|
||||
err: timedOutErr,
|
||||
overheadDeleteStaleHashes,
|
||||
outputDir,
|
||||
stats,
|
||||
timings,
|
||||
},
|
||||
'pdf caching timed out - soft failure'
|
||||
)
|
||||
stats['pdf-caching-timed-out'] = 1
|
||||
status = 'timed-out-soft-failure'
|
||||
}
|
||||
|
||||
if (enablePdfCachingDark) {
|
||||
// In dark mode we are doing the computation only and do not emit
|
||||
// any ranges to the frontend.
|
||||
|
@ -449,7 +468,9 @@ module.exports = OutputCacheManager = {
|
|||
0
|
||||
)
|
||||
stats['pdf-caching-reclaimed-space'] = reclaimedSpace
|
||||
callback(null, 'success')
|
||||
timings['pdf-caching-overhead-delete-stale-hashes'] =
|
||||
overheadDeleteStaleHashes
|
||||
callback(null, status)
|
||||
}
|
||||
)
|
||||
} else {
|
||||
|
|
Loading…
Reference in a new issue