mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-07 20:31:06 -05:00
Merge pull request #8952 from overleaf/jpa-pdf-caching-tweaks
[clsi] server side pdf caching tweaks GitOrigin-RevId: 758cbcc45b5a7ca0fe3dbf31bc43d9b0ef36e599
This commit is contained in:
parent
335ac6a67b
commit
dd5128c1d7
4 changed files with 32 additions and 36 deletions
|
@ -49,6 +49,14 @@ if (Settings.pdfCachingEnableWorkerPool && workerpool.isMainThread) {
|
|||
* @param {number} compileTime
|
||||
*/
|
||||
async function update(contentDir, filePath, size, compileTime) {
|
||||
if (size < Settings.pdfCachingMinChunkSize) {
|
||||
return {
|
||||
contentRanges: [],
|
||||
newContentRanges: [],
|
||||
reclaimedSpace: 0,
|
||||
startXRefTable: undefined,
|
||||
}
|
||||
}
|
||||
if (Settings.pdfCachingEnableWorkerPool) {
|
||||
return await updateOtherEventLoop(contentDir, filePath, size, compileTime)
|
||||
} else {
|
||||
|
@ -64,18 +72,21 @@ async function update(contentDir, filePath, size, compileTime) {
|
|||
* @param {number} compileTime
|
||||
*/
|
||||
async function updateOtherEventLoop(contentDir, filePath, size, compileTime) {
|
||||
const timeout = getMaxOverhead(compileTime)
|
||||
const workerLatencyInMs = 20
|
||||
// Prefer getting the timeout error from the worker vs timing out the worker.
|
||||
const timeout = getMaxOverhead(compileTime) + workerLatencyInMs
|
||||
try {
|
||||
return await WORKER_POOL.exec('doUpdateInternalNoDeadline', [
|
||||
return await WORKER_POOL.exec('updateSameEventLoop', [
|
||||
contentDir,
|
||||
filePath,
|
||||
size,
|
||||
compileTime,
|
||||
]).timeout(timeout)
|
||||
} catch (e) {
|
||||
if (e instanceof workerpool.Promise.TimeoutError) {
|
||||
throw new TimedOutError('context-lost-in-worker')
|
||||
throw new TimedOutError('context-lost-in-worker', { timeout })
|
||||
}
|
||||
if (e.message.includes('Max queue size of ')) {
|
||||
if (e.message?.includes?.('Max queue size of ')) {
|
||||
throw new QueueLimitReachedError()
|
||||
}
|
||||
throw e
|
||||
|
@ -91,28 +102,8 @@ async function updateOtherEventLoop(contentDir, filePath, size, compileTime) {
|
|||
*/
|
||||
async function updateSameEventLoop(contentDir, filePath, size, compileTime) {
|
||||
const checkDeadline = getDeadlineChecker(compileTime)
|
||||
return doUpdateInternal(contentDir, filePath, size, checkDeadline)
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {String} contentDir path to directory where content hash files are cached
|
||||
* @param {String} filePath the pdf file to scan for streams
|
||||
* @param {number} size the pdf size
|
||||
*/
|
||||
async function doUpdateInternalNoDeadline(contentDir, filePath, size) {
|
||||
return doUpdateInternal(contentDir, filePath, size, () => {})
|
||||
}
|
||||
/**
|
||||
*
|
||||
* @param {String} contentDir path to directory where content hash files are cached
|
||||
* @param {String} filePath the pdf file to scan for streams
|
||||
* @param {number} size the pdf size
|
||||
* @param {function} checkDeadline
|
||||
*/
|
||||
async function doUpdateInternal(contentDir, filePath, size, checkDeadline) {
|
||||
const ranges = []
|
||||
const newRanges = []
|
||||
const contentRanges = []
|
||||
const newContentRanges = []
|
||||
// keep track of hashes expire old ones when they reach a generation > N.
|
||||
const tracker = await HashFileTracker.from(contentDir)
|
||||
tracker.updateAge()
|
||||
|
@ -191,14 +182,14 @@ async function doUpdateInternal(contentDir, filePath, size, checkDeadline) {
|
|||
end: object.endOffset,
|
||||
hash,
|
||||
}
|
||||
ranges.push(range)
|
||||
contentRanges.push(range)
|
||||
|
||||
// Optimization: Skip writing of duplicate streams.
|
||||
if (tracker.track(range)) continue
|
||||
|
||||
await writePdfStream(contentDir, hash, buffer)
|
||||
checkDeadline('after write ' + idx)
|
||||
newRanges.push(range)
|
||||
newContentRanges.push(range)
|
||||
}
|
||||
} finally {
|
||||
await handle.close()
|
||||
|
@ -208,7 +199,7 @@ async function doUpdateInternal(contentDir, filePath, size, checkDeadline) {
|
|||
// Let the next compile use the already written ranges.
|
||||
const reclaimedSpace = await tracker.deleteStaleHashes(5)
|
||||
await tracker.flush()
|
||||
return [ranges, newRanges, reclaimedSpace, startXRefTable]
|
||||
return { contentRanges, newContentRanges, reclaimedSpace, startXRefTable }
|
||||
}
|
||||
|
||||
function getStatePath(contentDir) {
|
||||
|
@ -334,15 +325,16 @@ function getMaxOverhead(compileTime) {
|
|||
}
|
||||
|
||||
function getDeadlineChecker(compileTime) {
|
||||
const maxOverhead = getMaxOverhead(compileTime)
|
||||
const timeout = getMaxOverhead(compileTime)
|
||||
|
||||
const deadline = Date.now() + maxOverhead
|
||||
const deadline = Date.now() + timeout
|
||||
let lastStage = { stage: 'start', now: Date.now() }
|
||||
let completedStages = 0
|
||||
return function (stage) {
|
||||
const now = Date.now()
|
||||
if (now > deadline) {
|
||||
throw new TimedOutError(stage, {
|
||||
timeout,
|
||||
completedStages,
|
||||
lastStage: lastStage.stage,
|
||||
diffToLastStage: now - lastStage.now,
|
||||
|
@ -363,6 +355,6 @@ module.exports = {
|
|||
update: callbackify(update),
|
||||
promises: {
|
||||
update,
|
||||
doUpdateInternalNoDeadline,
|
||||
updateSameEventLoop,
|
||||
},
|
||||
}
|
||||
|
|
|
@ -411,12 +411,12 @@ module.exports = OutputCacheManager = {
|
|||
return callback(null, 'timed-out')
|
||||
}
|
||||
if (err) return callback(err, 'failed')
|
||||
const [
|
||||
const {
|
||||
contentRanges,
|
||||
newContentRanges,
|
||||
reclaimedSpace,
|
||||
startXRefTable,
|
||||
] = result
|
||||
} = result
|
||||
|
||||
if (enablePdfCachingDark) {
|
||||
// In dark mode we are doing the computation only and do not emit
|
||||
|
|
|
@ -16,7 +16,7 @@ async function parseXrefTable(path, size, checkDeadline) {
|
|||
checkDeadline('pdfjs: after parseStartXRef')
|
||||
await manager.ensureDoc('parse')
|
||||
checkDeadline('pdfjs: after parse')
|
||||
const xRefEntries = manager.pdfDocument.xref.entries
|
||||
const xRefEntries = manager.pdfDocument.xref.entries || []
|
||||
const startXRefTable = manager.pdfDocument.xref.topDict?.get('Prev')
|
||||
return { xRefEntries, startXRefTable }
|
||||
} finally {
|
||||
|
|
|
@ -19,7 +19,11 @@ describe('ContentCacheManager', function () {
|
|||
size
|
||||
)
|
||||
let newlyReclaimed
|
||||
;[contentRanges, newContentRanges, newlyReclaimed] = result
|
||||
;({
|
||||
contentRanges,
|
||||
newContentRanges,
|
||||
reclaimedSpace: newlyReclaimed,
|
||||
} = result)
|
||||
reclaimed += newlyReclaimed
|
||||
|
||||
const fileNames = await fs.promises.readdir(contentDir)
|
||||
|
|
Loading…
Reference in a new issue