mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-30 06:05:30 -05:00
f0b3d8a26a
[misc] submit events and prepare roll-out for pdf caching w/ split test GitOrigin-RevId: a7b7af65e1adf5bf003b65d96f1641a343b4b09c
349 lines
9.3 KiB
JavaScript
349 lines
9.3 KiB
JavaScript
import { v4 as uuid } from 'uuid'
|
|
const COMPILE_REQUEST_MATCHER = /^\/project\/[0-9a-f]{24}\/compile$/
|
|
const MIN_CHUNK_SIZE = 128 * 1024
|
|
|
|
const PDF_FILES = new Map()
|
|
|
|
const METRICS = {
|
|
id: uuid(),
|
|
epoch: Date.now(),
|
|
cachedBytes: 0,
|
|
fetchedBytes: 0,
|
|
requestedBytes: 0,
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {number} size
|
|
* @param {number} cachedBytes
|
|
* @param {number} fetchedBytes
|
|
*/
|
|
function trackStats({ size, cachedBytes, fetchedBytes }) {
|
|
METRICS.cachedBytes += cachedBytes
|
|
METRICS.fetchedBytes += fetchedBytes
|
|
METRICS.requestedBytes += size
|
|
}
|
|
|
|
/**
|
|
* @param {boolean} sizeDiffers
|
|
* @param {boolean} mismatch
|
|
* @param {boolean} success
|
|
*/
|
|
function trackChunkVerify({ sizeDiffers, mismatch, success }) {
|
|
if (sizeDiffers) {
|
|
METRICS.chunkVerifySizeDiffers |= 0
|
|
METRICS.chunkVerifySizeDiffers += 1
|
|
}
|
|
if (mismatch) {
|
|
METRICS.chunkVerifyMismatch |= 0
|
|
METRICS.chunkVerifyMismatch += 1
|
|
}
|
|
if (success) {
|
|
METRICS.chunkVerifySuccess |= 0
|
|
METRICS.chunkVerifySuccess += 1
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param {FetchEvent} event
|
|
*/
|
|
function onFetch(event) {
|
|
const url = new URL(event.request.url)
|
|
const path = url.pathname
|
|
|
|
if (path.match(COMPILE_REQUEST_MATCHER)) {
|
|
return processCompileRequest(event)
|
|
}
|
|
|
|
const ctx = PDF_FILES.get(path)
|
|
if (ctx) {
|
|
return processPdfRequest(event, ctx)
|
|
}
|
|
|
|
// other request, ignore
|
|
}
|
|
|
|
function processCompileRequest(event) {
|
|
event.respondWith(
|
|
fetch(event.request).then(response => {
|
|
if (response.status !== 200) return response
|
|
|
|
return response.json().then(body => {
|
|
handleCompileResponse(response, body)
|
|
// Send the service workers metrics to the frontend.
|
|
body.serviceWorkerMetrics = METRICS
|
|
return new Response(JSON.stringify(body), response)
|
|
})
|
|
})
|
|
)
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {FetchEvent} event
|
|
* @param {Object} file
|
|
* @param {string} clsiServerId
|
|
* @param {string} compileGroup
|
|
* @param {Date} pdfCreatedAt
|
|
*/
|
|
function processPdfRequest(
|
|
event,
|
|
{ file, clsiServerId, compileGroup, pdfCreatedAt }
|
|
) {
|
|
if (!event.request.headers.has('Range') && file.size > MIN_CHUNK_SIZE) {
|
|
// skip probe request
|
|
const headers = new Headers()
|
|
headers.set('Accept-Ranges', 'bytes')
|
|
headers.set('Content-Length', file.size)
|
|
headers.set('Content-Type', 'application/pdf')
|
|
return event.respondWith(
|
|
new Response('', {
|
|
headers,
|
|
status: 200,
|
|
statusText: 'OK',
|
|
})
|
|
)
|
|
}
|
|
|
|
const verifyChunks = event.request.url.includes('verify_chunks=true')
|
|
const rangeHeader =
|
|
event.request.headers.get('Range') || `bytes=0-${file.size}`
|
|
const [start, last] = rangeHeader
|
|
.slice('bytes='.length)
|
|
.split('-')
|
|
.map(i => parseInt(i, 10))
|
|
const end = last + 1
|
|
|
|
const chunks = getMatchingChunks(file.ranges, start, end)
|
|
|
|
const dynamicChunks = getInterleavingDynamicChunks(chunks, start, end)
|
|
|
|
// URL prefix is /project/:id/user/:id/build/... or /project/:id/build/...
|
|
// for authenticated and unauthenticated users respectively.
|
|
const perUserPrefix = file.url.slice(0, file.url.indexOf('/build/'))
|
|
const requests = chunks
|
|
.map(chunk => {
|
|
const path = `${perUserPrefix}/content/${file.contentId}/${chunk.hash}`
|
|
const url = new URL(path, event.request.url)
|
|
if (clsiServerId) {
|
|
url.searchParams.set('clsiserverid', clsiServerId)
|
|
}
|
|
if (compileGroup) {
|
|
url.searchParams.set('compileGroup', compileGroup)
|
|
}
|
|
return { chunk, url: url.toString() }
|
|
})
|
|
.concat(
|
|
dynamicChunks.map(chunk => {
|
|
const { start, end } = chunk
|
|
return {
|
|
chunk,
|
|
url: event.request.url,
|
|
init: { headers: { Range: `bytes=${start}-${end - 1}` } },
|
|
}
|
|
})
|
|
)
|
|
const size = end - start
|
|
let cachedBytes = 0
|
|
let fetchedBytes = 0
|
|
const reAssembledBlob = new Uint8Array(size)
|
|
event.respondWith(
|
|
Promise.all(
|
|
requests.map(({ chunk, url, init }) =>
|
|
fetch(url, init)
|
|
.then(response => {
|
|
if (!(response.status === 206 || response.status === 200)) {
|
|
throw new Error(
|
|
`could not fetch ${url} ${JSON.stringify(init)}: ${
|
|
response.status
|
|
}`
|
|
)
|
|
}
|
|
const blobFetchDate = getServerTime(response)
|
|
const blobSize = getResponseSize(response)
|
|
if (blobFetchDate && blobSize) {
|
|
const chunkSize =
|
|
Math.min(end, chunk.end) - Math.max(start, chunk.start)
|
|
// Example: 2MB PDF, 1MB image, 128KB PDF.js chunk.
|
|
// | pdf.js chunk |
|
|
// | A BIG IMAGE BLOB |
|
|
// | THE FULL PDF |
|
|
if (blobFetchDate < pdfCreatedAt) {
|
|
cachedBytes += chunkSize
|
|
} else {
|
|
// Blobs are fetched in bulk.
|
|
fetchedBytes += blobSize
|
|
}
|
|
}
|
|
return response.arrayBuffer()
|
|
})
|
|
.then(arrayBuffer => {
|
|
return { chunk, arrayBuffer }
|
|
})
|
|
)
|
|
)
|
|
.then(responses => {
|
|
responses.forEach(({ chunk, arrayBuffer }) => {
|
|
// overlap:
|
|
// | REQUESTED_RANGE |
|
|
// | CHUNK |
|
|
const offsetStart = Math.max(start - chunk.start, 0)
|
|
// overlap:
|
|
// | REQUESTED_RANGE |
|
|
// | CHUNK |
|
|
const offsetEnd = Math.max(chunk.end - end, 0)
|
|
if (offsetStart > 0 || offsetEnd > 0) {
|
|
// compute index positions for slice to handle case where offsetEnd=0
|
|
const chunkSize = chunk.end - chunk.start
|
|
arrayBuffer = arrayBuffer.slice(offsetStart, chunkSize - offsetEnd)
|
|
}
|
|
const insertPosition = Math.max(chunk.start - start, 0)
|
|
reAssembledBlob.set(new Uint8Array(arrayBuffer), insertPosition)
|
|
})
|
|
|
|
let verifyProcess = Promise.resolve(reAssembledBlob)
|
|
if (verifyChunks) {
|
|
verifyProcess = fetch(event.request)
|
|
.then(response => response.arrayBuffer())
|
|
.then(arrayBuffer => {
|
|
const fullBlob = new Uint8Array(arrayBuffer)
|
|
const metrics = {}
|
|
if (reAssembledBlob.byteLength !== fullBlob.byteLength) {
|
|
metrics.sizeDiffers = true
|
|
} else if (
|
|
!reAssembledBlob.every((v, idx) => v === fullBlob[idx])
|
|
) {
|
|
metrics.mismatch = true
|
|
} else {
|
|
metrics.success = true
|
|
}
|
|
trackChunkVerify(metrics)
|
|
if (metrics.success === true) {
|
|
return reAssembledBlob
|
|
} else {
|
|
return fullBlob
|
|
}
|
|
})
|
|
}
|
|
|
|
return verifyProcess.then(blob => {
|
|
trackStats({ size, cachedBytes, fetchedBytes })
|
|
return new Response(blob, {
|
|
status: 206,
|
|
headers: {
|
|
'Accept-Ranges': 'bytes',
|
|
'Content-Length': size,
|
|
'Content-Range': `bytes ${start}-${last}/${file.size}`,
|
|
'Content-Type': 'application/pdf',
|
|
},
|
|
})
|
|
})
|
|
})
|
|
.catch(error => {
|
|
console.error('Could not fetch partial pdf chunks', error)
|
|
return fetch(event.request)
|
|
})
|
|
)
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {Response} response
|
|
*/
|
|
function getServerTime(response) {
|
|
const raw = response.headers.get('Date')
|
|
if (!raw) return undefined
|
|
return new Date(raw)
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @param {Response} response
|
|
*/
|
|
function getResponseSize(response) {
|
|
const raw = response.headers.get('Content-Length')
|
|
if (!raw) return 0
|
|
return parseInt(raw, 10)
|
|
}
|
|
|
|
/**
|
|
* @param {Response} response
|
|
* @param {Object} body
|
|
*/
|
|
function handleCompileResponse(response, body) {
|
|
if (!body || body.status !== 'success') return
|
|
|
|
const pdfCreatedAt = getServerTime(response)
|
|
|
|
for (const file of body.outputFiles) {
|
|
if (file.path !== 'output.pdf') continue // not the pdf used for rendering
|
|
if (file.ranges) {
|
|
const { clsiServerId, compileGroup } = body
|
|
PDF_FILES.set(file.url, {
|
|
pdfCreatedAt,
|
|
file,
|
|
clsiServerId,
|
|
compileGroup,
|
|
})
|
|
}
|
|
break
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param {Array} chunks
|
|
* @param {number} start
|
|
* @param {number} end
|
|
* @returns {Array}
|
|
*/
|
|
function getMatchingChunks(chunks, start, end) {
|
|
const matchingChunks = []
|
|
for (const chunk of chunks) {
|
|
if (chunk.end <= start) {
|
|
// no overlap:
|
|
// | REQUESTED_RANGE |
|
|
// | CHUNK |
|
|
continue
|
|
}
|
|
if (chunk.start >= end) {
|
|
// no overlap:
|
|
// | REQUESTED_RANGE |
|
|
// | CHUNK |
|
|
break
|
|
}
|
|
matchingChunks.push(chunk)
|
|
}
|
|
return matchingChunks
|
|
}
|
|
|
|
/**
|
|
* @param {Array} chunks
|
|
* @param {number} start
|
|
* @param {number} end
|
|
* @returns {Array}
|
|
*/
|
|
function getInterleavingDynamicChunks(chunks, start, end) {
|
|
const dynamicChunks = []
|
|
for (const chunk of chunks) {
|
|
if (start < chunk.start) {
|
|
dynamicChunks.push({ start, end: chunk.start })
|
|
}
|
|
start = chunk.end
|
|
}
|
|
|
|
if (start < end) {
|
|
dynamicChunks.push({ start, end })
|
|
}
|
|
return dynamicChunks
|
|
}
|
|
|
|
// listen to all network requests
|
|
self.addEventListener('fetch', onFetch)
|
|
|
|
// complete setup ASAP
|
|
self.addEventListener('install', event => {
|
|
event.waitUntil(self.skipWaiting())
|
|
})
|
|
self.addEventListener('activate', event => {
|
|
event.waitUntil(self.clients.claim())
|
|
})
|