Merge pull request #8931 from overleaf/jpa-pdf-caching-fixes

[web] fixes/improvements for pdf caching

GitOrigin-RevId: 2ba0ed4798fd4dfb57f2dfca8be92ae1a769934f
This commit is contained in:
Jakob Ackermann 2022-07-20 09:15:58 +01:00 committed by Copybot
parent 0409676c41
commit 1a9487d5fd
3 changed files with 59 additions and 32 deletions

View file

@ -7,6 +7,7 @@ export function generatePdfCachingTransportFactory(PDFJS) {
if (getMeta('ol-pdfCachingMode') !== 'enabled') {
return () => null
}
let failedOnce = false
const cached = new Set()
const metrics = Object.assign(getPdfCachingMetrics(), {
failedCount: 0,
@ -28,9 +29,15 @@ export function generatePdfCachingTransportFactory(PDFJS) {
this.url = url
this.pdfFile = pdfFile
this.reject = reject
this.abortController = new AbortController()
}
abort() {
this.abortController.abort()
}
requestDataRange(start, end) {
const abortSignal = this.abortController.signal
fetchRange({
url: this.url,
start,
@ -39,12 +46,14 @@ export function generatePdfCachingTransportFactory(PDFJS) {
metrics,
cached,
verifyChunks,
abortSignal,
})
.catch(err => {
metrics.failedCount++
failedOnce = true
console.error('optimized pdf download error', err)
captureException(err)
return fallbackRequest({ url: this.url, start, end })
return fallbackRequest({ url: this.url, start, end, abortSignal })
})
.then(blob => {
this.onDataRange(start, blob)
@ -58,6 +67,11 @@ export function generatePdfCachingTransportFactory(PDFJS) {
}
return function (url, pdfFile, reject) {
if (failedOnce) {
// Disable pdf caching once any fetch request failed.
// Be trigger-happy here until we reached a stable state of the feature.
return null
}
return new PDFDataRangeTransport(url, pdfFile, reject)
}
}

View file

@ -212,15 +212,13 @@ function getMultipartBoundary(response, chunk) {
}
/**
* @param {Object} response
* @param {Object} file
* @param {Array} chunks
* @param {Uint8Array} data
* @param {string} boundary
* @param {Object} metrics
*/
function resolveMultiPartResponses(response, file, metrics) {
const { chunk: chunks, data, boundary } = response
if (!boundary) {
return [response]
}
function resolveMultiPartResponses({ file, chunks, data, boundary, metrics }) {
const responses = []
let offsetStart = 0
for (const chunk of chunks) {
@ -272,14 +270,15 @@ function checkChunkResponse(response) {
* @param {string} url
* @param {number} start
* @param {number} end
* @param {AbortSignal} abortSignal
*/
export function fallbackRequest({ url, start, end }) {
return fetch(url, { headers: { Range: `bytes=${start}-${end - 1}` } }).then(
response => {
checkChunkResponse(response)
return response.arrayBuffer()
}
)
export async function fallbackRequest({ url, start, end, abortSignal }) {
const response = await fetch(url, {
headers: { Range: `bytes=${start}-${end - 1}` },
signal: abortSignal,
})
checkChunkResponse(response)
return response.arrayBuffer()
}
/**
@ -289,11 +288,12 @@ export function fallbackRequest({ url, start, end }) {
* @param {number} end
* @param {Object} metrics
* @param {Uint8Array} actual
* @param {AbortSignal} abortSignal
*/
async function verifyRange({ url, start, end, metrics, actual }) {
async function verifyRange({ url, start, end, metrics, actual, abortSignal }) {
let expectedRaw
try {
expectedRaw = await fallbackRequest({ url, start, end })
expectedRaw = await fallbackRequest({ url, start, end, abortSignal })
} catch (error) {
throw OError.tag(error, 'cannot verify range', { url, start, end })
}
@ -319,6 +319,7 @@ async function verifyRange({ url, start, end, metrics, actual }) {
* @param {Object} metrics
* @param {Set} cached
* @param {boolean} verifyChunks
* @param {AbortSignal} abortSignal
*/
export async function fetchRange({
url,
@ -328,6 +329,7 @@ export async function fetchRange({
metrics,
cached,
verifyChunks,
abortSignal,
}) {
file.createdAt = new Date(file.createdAt)
backfillEdgeBounds(file)
@ -352,7 +354,7 @@ export async function fetchRange({
fetchedCount: 1,
fetchedBytes: size,
})
return fallbackRequest({ url, start, end })
return fallbackRequest({ url, start, end, abortSignal })
}
if (
chunksSize > MAX_SUB_REQUEST_BYTES &&
@ -369,7 +371,7 @@ export async function fetchRange({
fetchedCount: 1,
fetchedBytes: size,
})
return fallbackRequest({ url, start, end })
return fallbackRequest({ url, start, end, abortSignal })
}
const byteRanges = dynamicChunks
@ -408,6 +410,7 @@ export async function fetchRange({
.map(chunk => ({
chunk,
url: `${perUserPrefix}/content/${file.contentId}/${chunk.hash}?${query}`,
init: {},
}))
.concat(coalescedDynamicChunks)
let cachedCount = 0
@ -419,7 +422,7 @@ export async function fetchRange({
const rawResponses = await Promise.all(
requests.map(async ({ chunk, url, init }) => {
try {
const response = await fetch(url, init)
const response = await fetch(url, { ...init, signal: abortSignal })
checkChunkResponse(response)
const boundary = getMultipartBoundary(response, chunk)
const blobFetchDate = getServerTime(response)
@ -443,23 +446,25 @@ export async function fetchRange({
fetchedBytes += blobSize
}
}
return {
boundary,
chunk,
data: backFillObjectContext(
chunk,
// response.arrayBuffer() yields the first multipart section only.
await (await response.blob()).arrayBuffer()
),
const data = backFillObjectContext(chunk, await response.arrayBuffer())
if (!Array.isArray(chunk)) {
return [{ chunk, data }]
}
} catch (error) {
throw OError.tag(error, 'cannot fetch chunk', { url })
return resolveMultiPartResponses({
file,
chunks: chunk,
data,
boundary,
metrics,
})
} catch (err) {
throw OError.tag(err, 'cannot fetch chunk', { chunk, url, init })
}
})
)
rawResponses
.flatMap(r => resolveMultiPartResponses(r, file, metrics))
.flat() // flatten after splitting multipart responses
.forEach(({ chunk, data }) => {
// overlap:
// | REQUESTED_RANGE |
@ -493,6 +498,7 @@ export async function fetchRange({
end,
metrics,
actual: reassembledBlob,
abortSignal,
})
}
return reassembledBlob

View file

@ -5,7 +5,7 @@ const params = new URLSearchParams(window.location.search)
const disableFontFace = params.get('disable-font-face') === 'true'
const disableStream = process.env.NODE_ENV !== 'test'
const rangeChunkSize = 128 * 1024 // 128K chunks
const DEFAULT_RANGE_CHUNK_SIZE = 128 * 1024 // 128K chunks
export default class PDFJSWrapper {
constructor(container) {
@ -67,6 +67,13 @@ export default class PDFJSWrapper {
}
return new Promise((resolve, reject) => {
const rangeTransport = this.genPdfCachingTransport(url, pdfFile, reject)
let rangeChunkSize = DEFAULT_RANGE_CHUNK_SIZE
if (rangeTransport && pdfFile.size < 2 * DEFAULT_RANGE_CHUNK_SIZE) {
// pdf.js disables the "bulk" download optimization when providing a
// custom range transport. Restore it by bumping the chunk size.
rangeChunkSize = pdfFile.size
}
this.loadDocumentTask = this.PDFJS.getDocument({
url,
cMapUrl: this.cMapUrl,
@ -76,7 +83,7 @@ export default class PDFJSWrapper {
disableAutoFetch: true,
disableStream,
textLayerMode: 2, // PDFJSViewer.TextLayerMode.ENABLE,
range: this.genPdfCachingTransport(url, pdfFile, reject),
range: rangeTransport,
})
this.loadDocumentTask.promise