mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
64184a7f56
[web] refactor pdf caching GitOrigin-RevId: af6b871fad652f757d8c465085b33a79b955cd6b
330 lines
8.9 KiB
JavaScript
330 lines
8.9 KiB
JavaScript
import { v4 as uuid } from 'uuid'
|
|
import { fetchRange } from './features/pdf-preview/util/pdf-caching'
|
|
const OError = require('@overleaf/o-error')
|
|
|
|
// VERSION should get incremented when making changes to caching behavior or
|
|
// adjusting metrics collection.
|
|
// Keep in sync with PdfJsMetrics.
|
|
const VERSION = 3
|
|
|
|
const CLEAR_CACHE_REQUEST_MATCHER = /^\/project\/[0-9a-f]{24}\/output$/
|
|
const COMPILE_REQUEST_MATCHER = /^\/project\/[0-9a-f]{24}\/compile$/
|
|
const PDF_REQUEST_MATCHER =
|
|
/^(\/zone\/.)?(\/project\/[0-9a-f]{24}\/.*\/output.pdf)$/
|
|
const PDF_JS_CHUNK_SIZE = 128 * 1024
|
|
|
|
// Each compile request defines a context (essentially the specific pdf file for
|
|
// that compile), requests for that pdf file can use the hashes in the compile
|
|
// response, which are stored in the context.
|
|
|
|
const CLIENT_CONTEXT = new Map()
|
|
|
|
/**
|
|
* @param {string} clientId
|
|
*/
|
|
function getClientContext(clientId) {
|
|
let clientContext = CLIENT_CONTEXT.get(clientId)
|
|
if (!clientContext) {
|
|
const cached = new Set()
|
|
const pdfs = new Map()
|
|
const metrics = {
|
|
version: VERSION,
|
|
id: uuid(),
|
|
epoch: Date.now(),
|
|
failedCount: 0,
|
|
tooLargeOverheadCount: 0,
|
|
tooManyRequestsCount: 0,
|
|
cachedCount: 0,
|
|
cachedBytes: 0,
|
|
fetchedCount: 0,
|
|
fetchedBytes: 0,
|
|
requestedCount: 0,
|
|
requestedBytes: 0,
|
|
compileCount: 0,
|
|
}
|
|
clientContext = { pdfs, metrics, cached }
|
|
CLIENT_CONTEXT.set(clientId, clientContext)
|
|
// clean up old client maps
|
|
expirePdfContexts()
|
|
}
|
|
return clientContext
|
|
}
|
|
|
|
/**
|
|
* @param {string} clientId
|
|
* @param {string} path
|
|
* @param {Object} pdfContext
|
|
*/
|
|
function registerPdfContext(clientId, path, pdfContext) {
|
|
const clientContext = getClientContext(clientId)
|
|
const { pdfs, metrics, cached, clsiServerId } = clientContext
|
|
pdfContext.metrics = metrics
|
|
pdfContext.cached = cached
|
|
if (pdfContext.clsiServerId !== clsiServerId) {
|
|
// VM changed, this invalidates all browser caches.
|
|
clientContext.clsiServerId = pdfContext.clsiServerId
|
|
cached.clear()
|
|
}
|
|
// we only need to keep the last 3 contexts
|
|
for (const key of pdfs.keys()) {
|
|
if (pdfs.size < 3) {
|
|
break
|
|
}
|
|
pdfs.delete(key) // the map keys are returned in insertion order, so we are deleting the oldest entry here
|
|
}
|
|
pdfs.set(path, pdfContext)
|
|
}
|
|
|
|
/**
|
|
* @param {string} clientId
|
|
* @param {string} path
|
|
*/
|
|
function getPdfContext(clientId, path) {
|
|
const { pdfs } = getClientContext(clientId)
|
|
return pdfs.get(path)
|
|
}
|
|
|
|
function expirePdfContexts() {
|
|
// discard client maps for clients that are no longer connected
|
|
const currentClientSet = new Set()
|
|
self.clients.matchAll().then(function (clientList) {
|
|
clientList.forEach(client => {
|
|
currentClientSet.add(client.id)
|
|
})
|
|
CLIENT_CONTEXT.forEach((map, clientId) => {
|
|
if (!currentClientSet.has(clientId)) {
|
|
CLIENT_CONTEXT.delete(clientId)
|
|
}
|
|
})
|
|
})
|
|
}
|
|
|
|
/**
|
|
* @param {FetchEvent} event
|
|
*/
|
|
function onFetch(event) {
|
|
const url = new URL(event.request.url)
|
|
const path = url.pathname
|
|
|
|
if (path.match(COMPILE_REQUEST_MATCHER)) {
|
|
return processCompileRequest(event)
|
|
}
|
|
|
|
const match = path.match(PDF_REQUEST_MATCHER)
|
|
if (match) {
|
|
const ctx = getPdfContext(event.clientId, match[2])
|
|
if (ctx) {
|
|
return processPdfRequest(event, ctx)
|
|
}
|
|
}
|
|
|
|
if (
|
|
event.request.method === 'DELETE' &&
|
|
path.match(CLEAR_CACHE_REQUEST_MATCHER)
|
|
) {
|
|
return processClearCacheRequest(event)
|
|
}
|
|
|
|
// other request, ignore
|
|
}
|
|
|
|
/**
|
|
* @param {FetchEvent} event
|
|
*/
|
|
function processClearCacheRequest(event) {
|
|
CLIENT_CONTEXT.delete(event.clientId)
|
|
// use default request proxy.
|
|
}
|
|
|
|
/**
|
|
* @param {FetchEvent} event
|
|
*/
|
|
function processCompileRequest(event) {
|
|
event.respondWith(
|
|
fetch(event.request).then(response => {
|
|
if (response.status !== 200) return response
|
|
|
|
return response.json().then(body => {
|
|
handleCompileResponse(event, response, body)
|
|
|
|
// Send the service workers metrics to the frontend.
|
|
const { metrics } = getClientContext(event.clientId)
|
|
metrics.compileCount++
|
|
body.serviceWorkerMetrics = metrics
|
|
|
|
return new Response(JSON.stringify(body), response)
|
|
})
|
|
})
|
|
)
|
|
}
|
|
|
|
/**
|
|
* @param {Request} request
|
|
* @param {Object} file
|
|
* @return {Response}
|
|
*/
|
|
function handleProbeRequest(request, file) {
|
|
// PDF.js starts the pdf download with a probe request that has no
|
|
// range headers on it.
|
|
// Upon seeing the response headers, it decides whether to upgrade the
|
|
// transport to chunked requests or keep reading the response body.
|
|
// For small PDFs (2*chunkSize = 2*128kB) it just sends one request.
|
|
// We will fetch all the ranges in bulk and emit them.
|
|
// For large PDFs it sends this probe request, aborts that request before
|
|
// reading any data and then sends multiple range requests.
|
|
// It would be wasteful to action this probe request with all the ranges
|
|
// that are available in the PDF and serve the full PDF content to
|
|
// PDF.js for the probe request.
|
|
// We are emitting a dummy response to the probe request instead.
|
|
// It triggers the chunked transfer and subsequent fewer ranges need to be
|
|
// requested -- only those of visible pages in the pdf viewer.
|
|
// https://github.com/mozilla/pdf.js/blob/6fd899dc443425747098935207096328e7b55eb2/src/display/network_utils.js#L43-L47
|
|
const pdfJSWillUseChunkedTransfer = file.size > 2 * PDF_JS_CHUNK_SIZE
|
|
const isRangeRequest = request.headers.has('Range')
|
|
if (!isRangeRequest && pdfJSWillUseChunkedTransfer) {
|
|
const headers = new Headers()
|
|
headers.set('Accept-Ranges', 'bytes')
|
|
headers.set('Content-Length', file.size)
|
|
headers.set('Content-Type', 'application/pdf')
|
|
return new Response('', {
|
|
headers,
|
|
status: 200,
|
|
statusText: 'OK',
|
|
})
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {FetchEvent} event
|
|
* @param {Object} file
|
|
* @param {string} clsiServerId
|
|
* @param {string} compileGroup
|
|
* @param {Date} pdfCreatedAt
|
|
* @param {Object} metrics
|
|
* @param {Set} cached
|
|
*/
|
|
function processPdfRequest(
|
|
event,
|
|
{ file, clsiServerId, compileGroup, pdfCreatedAt, metrics, cached }
|
|
) {
|
|
const response = handleProbeRequest(event.request, file)
|
|
if (response) {
|
|
return event.respondWith(response)
|
|
}
|
|
|
|
const rangeHeader =
|
|
event.request.headers.get('Range') || `bytes=0-${file.size - 1}`
|
|
const [start, last] = rangeHeader
|
|
.slice('bytes='.length)
|
|
.split('-')
|
|
.map(i => parseInt(i, 10))
|
|
const end = last + 1
|
|
|
|
return event.respondWith(
|
|
fetchRange({
|
|
url: event.request.url,
|
|
start,
|
|
end,
|
|
file,
|
|
pdfCreatedAt,
|
|
metrics,
|
|
cached,
|
|
})
|
|
.then(blob => {
|
|
return new Response(blob, {
|
|
status: 206,
|
|
headers: {
|
|
'Accept-Ranges': 'bytes',
|
|
'Content-Length': end - start,
|
|
'Content-Range': `bytes ${start}-${last}/${file.size}`,
|
|
'Content-Type': 'application/pdf',
|
|
},
|
|
})
|
|
})
|
|
.catch(error => {
|
|
metrics.failedCount++
|
|
reportError(event, OError.tag(error, 'failed to compose pdf response'))
|
|
return fetch(event.request)
|
|
})
|
|
)
|
|
}
|
|
|
|
/**
|
|
* @param {FetchEvent} event
|
|
* @param {Response} response
|
|
* @param {Object} body
|
|
*/
|
|
function handleCompileResponse(event, response, body) {
|
|
if (!body || body.status !== 'success') return
|
|
|
|
for (const file of body.outputFiles) {
|
|
if (file.path !== 'output.pdf') continue // not the pdf used for rendering
|
|
if (file.ranges?.length) {
|
|
const { clsiServerId, compileGroup } = body
|
|
registerPdfContext(event.clientId, file.url, {
|
|
file,
|
|
clsiServerId,
|
|
compileGroup,
|
|
})
|
|
}
|
|
break
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param {FetchEvent} event
|
|
*/
|
|
function onFetchWithErrorHandling(event) {
|
|
try {
|
|
onFetch(event)
|
|
} catch (error) {
|
|
reportError(event, OError.tag(error, 'low level error in onFetch'))
|
|
}
|
|
}
|
|
// allow fetch event listener to be removed if necessary
|
|
const controller = new AbortController()
|
|
// listen to all network requests
|
|
self.addEventListener('fetch', onFetchWithErrorHandling, {
|
|
signal: controller.signal,
|
|
})
|
|
|
|
// complete setup ASAP
|
|
self.addEventListener('install', event => {
|
|
event.waitUntil(self.skipWaiting())
|
|
})
|
|
self.addEventListener('activate', event => {
|
|
event.waitUntil(self.clients.claim())
|
|
})
|
|
self.addEventListener('message', event => {
|
|
if (event.data && event.data.type === 'disable') {
|
|
controller.abort() // removes the fetch event listener
|
|
}
|
|
})
|
|
|
|
/**
|
|
*
|
|
* @param {FetchEvent} event
|
|
* @param {Error} error
|
|
*/
|
|
function reportError(event, error) {
|
|
self.clients
|
|
.get(event.clientId)
|
|
.then(client => {
|
|
if (!client) {
|
|
// The client disconnected.
|
|
return
|
|
}
|
|
client.postMessage(
|
|
JSON.stringify({
|
|
extra: { url: event.request.url, info: OError.getFullInfo(error) },
|
|
error: {
|
|
name: error.name,
|
|
message: error.message,
|
|
stack: OError.getFullStack(error),
|
|
},
|
|
})
|
|
)
|
|
})
|
|
.catch(() => {})
|
|
}
|