mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
Merge pull request #8730 from overleaf/jpa-refactor-pdf-caching
[web] refactor pdf caching GitOrigin-RevId: af6b871fad652f757d8c465085b33a79b955cd6b
This commit is contained in:
parent
0c63515de6
commit
64184a7f56
18 changed files with 683 additions and 534 deletions
|
@ -569,15 +569,19 @@ const ClsiManager = {
|
|||
_parseOutputFiles(projectId, rawOutputFiles = []) {
|
||||
const outputFiles = []
|
||||
for (const file of rawOutputFiles) {
|
||||
outputFiles.push({
|
||||
const f = {
|
||||
path: file.path, // the clsi is now sending this to web
|
||||
url: new URL(file.url).pathname, // the location of the file on the clsi, excluding the host part
|
||||
type: file.type,
|
||||
build: file.build,
|
||||
contentId: file.contentId,
|
||||
ranges: file.ranges,
|
||||
size: file.size,
|
||||
})
|
||||
}
|
||||
if (file.path === 'output.pdf') {
|
||||
f.contentId = file.contentId
|
||||
f.ranges = file.ranges || []
|
||||
f.size = file.size
|
||||
f.createdAt = new Date()
|
||||
}
|
||||
outputFiles.push(f)
|
||||
}
|
||||
return outputFiles
|
||||
},
|
||||
|
|
|
@ -1037,7 +1037,7 @@ const ProjectController = {
|
|||
}
|
||||
}
|
||||
|
||||
function partOfPdfCachingRollout(flag) {
|
||||
function getTrackPdfDownload() {
|
||||
if (!Settings.enablePdfCaching) {
|
||||
// The feature is disabled globally.
|
||||
return false
|
||||
|
@ -1045,7 +1045,20 @@ const ProjectController = {
|
|||
// Let the user opt-in only.
|
||||
// The flag will opt-out of both caching and metrics collection,
|
||||
// as if this editing session never happened.
|
||||
return shouldDisplayFeature('enable_pdf_caching', false)
|
||||
return shouldDisplayFeature('track-pdf-download', false)
|
||||
}
|
||||
|
||||
function getPdfCachingMode() {
|
||||
if (!Settings.enablePdfCaching) {
|
||||
// The feature is disabled globally.
|
||||
return ''
|
||||
}
|
||||
// Let the user opt-in only.
|
||||
const v = req.query['pdf-caching-mode']
|
||||
if (['service-worker', 'no-service-worker'].includes(v)) {
|
||||
return v
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
||||
const showPdfDetach = shouldDisplayFeature(
|
||||
|
@ -1152,11 +1165,8 @@ const ProjectController = {
|
|||
showNewSourceEditorOption,
|
||||
showSymbolPalette,
|
||||
showStopOnFirstError,
|
||||
trackPdfDownload: partOfPdfCachingRollout('collect-metrics'),
|
||||
enablePdfCaching: partOfPdfCachingRollout('enable-caching'),
|
||||
resetServiceWorker:
|
||||
Boolean(Settings.resetServiceWorker) &&
|
||||
!shouldDisplayFeature('enable_pdf_caching', false),
|
||||
trackPdfDownload: getTrackPdfDownload(),
|
||||
pdfCachingMode: getPdfCachingMode(),
|
||||
detachRole,
|
||||
metadata: { viewport: false },
|
||||
showHeaderUpgradePrompt,
|
||||
|
|
|
@ -26,9 +26,8 @@ meta(name="ol-showPdfDetach" data-type="boolean" content=showPdfDetach)
|
|||
meta(name="ol-debugPdfDetach" data-type="boolean" content=debugPdfDetach)
|
||||
meta(name="ol-showNewSourceEditorOption" data-type="boolean" content=showNewSourceEditorOption)
|
||||
meta(name="ol-showSymbolPalette" data-type="boolean" content=showSymbolPalette)
|
||||
meta(name="ol-enablePdfCaching" data-type="boolean" content=enablePdfCaching)
|
||||
meta(name="ol-pdfCachingMode" content=pdfCachingMode)
|
||||
meta(name="ol-trackPdfDownload" data-type="boolean" content=trackPdfDownload)
|
||||
meta(name="ol-resetServiceWorker" data-type="boolean" content=resetServiceWorker)
|
||||
meta(name="ol-detachRole" data-type="string" content=detachRole)
|
||||
meta(name="ol-showHeaderUpgradePrompt" data-type="boolean" content=showHeaderUpgradePrompt)
|
||||
meta(name="ol-showStopOnFirstError" data-type="boolean" content=showStopOnFirstError)
|
||||
|
|
|
@ -10,7 +10,7 @@ import { useProjectContext } from '../../../shared/context/project-context'
|
|||
const SAY_THANKS_TIMEOUT = 10 * 1000
|
||||
|
||||
function FasterCompilesFeedbackContent() {
|
||||
const { clsiServerId, deliveryLatencies, pdfSize, pdfUrl } =
|
||||
const { clsiServerId, deliveryLatencies, pdfFile, pdfUrl } =
|
||||
useCompileContext()
|
||||
const { _id: projectId } = useProjectContext()
|
||||
|
||||
|
@ -52,7 +52,7 @@ function FasterCompilesFeedbackContent() {
|
|||
projectId,
|
||||
server: clsiServerId?.includes('-c2d-') ? 'faster' : 'normal',
|
||||
feedback,
|
||||
pdfSize,
|
||||
pdfSize: pdfFile.size,
|
||||
...deliveryLatencies,
|
||||
})
|
||||
setHasRatedProject(true)
|
||||
|
|
|
@ -11,7 +11,7 @@ import ErrorBoundaryFallback from './error-boundary-fallback'
|
|||
import { useDetachCompileContext as useCompileContext } from '../../../shared/context/detach-compile-context'
|
||||
import { captureException } from '../../../infrastructure/error-reporter'
|
||||
|
||||
function PdfJsViewer({ url }) {
|
||||
function PdfJsViewer({ url, pdfFile }) {
|
||||
const { _id: projectId } = useProjectContext()
|
||||
|
||||
const {
|
||||
|
@ -90,13 +90,13 @@ function PdfJsViewer({ url }) {
|
|||
setInitialised(false)
|
||||
setError(undefined)
|
||||
|
||||
pdfJsWrapper.loadDocument(url).catch(error => {
|
||||
pdfJsWrapper.loadDocument(url, pdfFile).catch(error => {
|
||||
console.error(error)
|
||||
setError('rendering-error')
|
||||
})
|
||||
return () => pdfJsWrapper.abortDocumentLoading()
|
||||
}
|
||||
}, [pdfJsWrapper, url, setError])
|
||||
}, [pdfJsWrapper, url, pdfFile, setError])
|
||||
|
||||
// listen for scroll events
|
||||
useEffect(() => {
|
||||
|
@ -344,6 +344,7 @@ function PdfJsViewer({ url }) {
|
|||
|
||||
PdfJsViewer.propTypes = {
|
||||
url: PropTypes.string.isRequired,
|
||||
pdfFile: PropTypes.object,
|
||||
}
|
||||
|
||||
export default withErrorBoundary(memo(PdfJsViewer), () => (
|
||||
|
|
|
@ -6,7 +6,7 @@ const PdfJsViewer = lazy(() =>
|
|||
)
|
||||
|
||||
function PdfViewer() {
|
||||
const { pdfUrl, pdfViewer } = useCompileContext()
|
||||
const { pdfUrl, pdfFile, pdfViewer } = useCompileContext()
|
||||
|
||||
if (!pdfUrl) {
|
||||
return null
|
||||
|
@ -18,7 +18,7 @@ function PdfViewer() {
|
|||
|
||||
case 'pdfjs':
|
||||
default:
|
||||
return <PdfJsViewer url={pdfUrl} />
|
||||
return <PdfJsViewer url={pdfUrl} pdfFile={pdfFile} />
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -167,7 +167,7 @@ export default class DocumentCompiler {
|
|||
}
|
||||
|
||||
// use the feature flag to enable PDF caching in a ServiceWorker
|
||||
if (getMeta('ol-enablePdfCaching')) {
|
||||
if (getMeta('ol-pdfCachingMode')) {
|
||||
params.set('enable_pdf_caching', 'true')
|
||||
}
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ import getMeta from '../../../utils/meta'
|
|||
// VERSION should get incremented when making changes to caching behavior or
|
||||
// adjusting metrics collection.
|
||||
// Keep in sync with the service worker.
|
||||
const VERSION = 2
|
||||
const VERSION = 3
|
||||
|
||||
const pdfJsMetrics = {
|
||||
version: VERSION,
|
||||
|
@ -14,6 +14,12 @@ const pdfJsMetrics = {
|
|||
totalBandwidth: 0,
|
||||
}
|
||||
|
||||
let pdfCachingMetrics
|
||||
|
||||
export function setCachingMetrics(metrics) {
|
||||
pdfCachingMetrics = metrics
|
||||
}
|
||||
|
||||
const SAMPLING_RATE = 0.01
|
||||
|
||||
export function trackPdfDownload(response, compileTimeClientE2E) {
|
||||
|
@ -60,8 +66,10 @@ export function trackPdfDownload(response, compileTimeClientE2E) {
|
|||
timings,
|
||||
})
|
||||
})
|
||||
// Submit bandwidth counter separate from compile context.
|
||||
submitPDFBandwidth({ pdfJsMetrics, serviceWorkerMetrics })
|
||||
if (getMeta('ol-pdfCachingMode') === 'service-worker') {
|
||||
// Submit (serviceWorker) bandwidth counter separate from compile context.
|
||||
submitPDFBandwidth({ pdfJsMetrics, serviceWorkerMetrics })
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
|
@ -78,9 +86,11 @@ function submitCompileMetrics(metrics) {
|
|||
latencyFetch,
|
||||
latencyRender,
|
||||
compileTimeClientE2E,
|
||||
id: pdfJsMetrics.id,
|
||||
...(pdfCachingMetrics || {}),
|
||||
}
|
||||
sl_console.log('/event/compile-metrics', JSON.stringify(metrics))
|
||||
sendMB('compile-metrics-v5', leanMetrics, SAMPLING_RATE)
|
||||
sendMB('compile-metrics-v6', leanMetrics, SAMPLING_RATE)
|
||||
}
|
||||
|
||||
function submitPDFBandwidth(metrics) {
|
||||
|
@ -110,5 +120,5 @@ function submitPDFBandwidth(metrics) {
|
|||
return
|
||||
}
|
||||
sl_console.log('/event/pdf-bandwidth', JSON.stringify(metrics))
|
||||
sendMB('pdf-bandwidth-v5', leanMetrics, SAMPLING_RATE)
|
||||
sendMB('pdf-bandwidth-v6', leanMetrics, SAMPLING_RATE)
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ export function handleOutputFiles(outputFiles, projectId, data) {
|
|||
const result = {}
|
||||
|
||||
const outputFile = outputFiles.get('output.pdf')
|
||||
result.pdfSize = outputFile?.size
|
||||
result.pdfFile = outputFile
|
||||
|
||||
if (outputFile) {
|
||||
// build the URL for viewing the PDF in the preview UI
|
||||
|
@ -28,9 +28,9 @@ export function handleOutputFiles(outputFiles, projectId, data) {
|
|||
params.set('verify_chunks', 'true')
|
||||
}
|
||||
|
||||
if (getMeta('ol-enablePdfCaching')) {
|
||||
if (getMeta('ol-pdfCachingMode')) {
|
||||
// Tag traffic that uses the pdf caching logic.
|
||||
params.set('enable_pdf_caching', 'true')
|
||||
params.set('enable_pdf_caching', getMeta('ol-pdfCachingMode'))
|
||||
}
|
||||
|
||||
result.pdfUrl = `${buildURL(outputFile, data.pdfDownloadDomain)}?${params}`
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
import { fallbackRequest, fetchRange } from './pdf-caching'
|
||||
import getMeta from '../../../utils/meta'
|
||||
import { captureException } from '../../../infrastructure/error-reporter'
|
||||
import { setCachingMetrics } from './metrics'
|
||||
|
||||
export function generatePdfCachingTransportFactory(PDFJS) {
|
||||
if (getMeta('ol-pdfCachingMode') !== 'no-service-worker') {
|
||||
return () => null
|
||||
}
|
||||
const cached = new Set()
|
||||
const metrics = {
|
||||
failedCount: 0,
|
||||
tooLargeOverheadCount: 0,
|
||||
tooManyRequestsCount: 0,
|
||||
cachedCount: 0,
|
||||
cachedBytes: 0,
|
||||
fetchedCount: 0,
|
||||
fetchedBytes: 0,
|
||||
requestedCount: 0,
|
||||
requestedBytes: 0,
|
||||
}
|
||||
setCachingMetrics(metrics)
|
||||
|
||||
class PDFDataRangeTransport extends PDFJS.PDFDataRangeTransport {
|
||||
constructor(url, pdfFile, reject) {
|
||||
super(pdfFile.size, new Uint8Array())
|
||||
this.url = url
|
||||
this.pdfFile = pdfFile
|
||||
this.reject = reject
|
||||
}
|
||||
|
||||
requestDataRange(start, end) {
|
||||
fetchRange({
|
||||
url: this.url,
|
||||
start,
|
||||
end,
|
||||
file: this.pdfFile,
|
||||
metrics,
|
||||
cached,
|
||||
})
|
||||
.catch(err => {
|
||||
metrics.failedCount++
|
||||
console.error('optimized pdf download error', err)
|
||||
captureException(err)
|
||||
return fallbackRequest({ url: this.url, start, end })
|
||||
})
|
||||
.then(blob => {
|
||||
this.onDataRange(start, blob)
|
||||
})
|
||||
.catch(err => {
|
||||
console.error('fatal pdf download error', err)
|
||||
captureException(err)
|
||||
this.reject(err)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return function (url, pdfFile, reject) {
|
||||
return new PDFDataRangeTransport(url, pdfFile, reject)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,490 @@
|
|||
import OError from '@overleaf/o-error'
|
||||
|
||||
const PDF_JS_CHUNK_SIZE = 128 * 1024
|
||||
const MAX_SUB_REQUEST_COUNT = 4
|
||||
const MAX_SUB_REQUEST_BYTES = 4 * PDF_JS_CHUNK_SIZE
|
||||
const INCREMENTAL_CACHE_SIZE = 1000
|
||||
|
||||
const ENCODER = new TextEncoder()
|
||||
function backfillEdgeBounds(file) {
|
||||
if (!file.backfilledEdgeBoundsOnce) {
|
||||
for (const range of file.ranges) {
|
||||
if (range.objectId) {
|
||||
range.objectId = ENCODER.encode(range.objectId)
|
||||
range.start -= range.objectId.byteLength
|
||||
}
|
||||
}
|
||||
}
|
||||
file.backfilledEdgeBoundsOnce = true
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Array} chunks
|
||||
*/
|
||||
function countBytes(chunks) {
|
||||
return chunks.reduce((totalBytes, chunk) => {
|
||||
return totalBytes + (chunk.end - chunk.start)
|
||||
}, 0)
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Object} metrics
|
||||
* @param {number} size
|
||||
* @param {number} cachedCount
|
||||
* @param {number} cachedBytes
|
||||
* @param {number} fetchedCount
|
||||
* @param {number} fetchedBytes
|
||||
*/
|
||||
function trackDownloadStats(
|
||||
metrics,
|
||||
{ size, cachedCount, cachedBytes, fetchedCount, fetchedBytes }
|
||||
) {
|
||||
metrics.cachedCount += cachedCount
|
||||
metrics.cachedBytes += cachedBytes
|
||||
metrics.fetchedCount += fetchedCount
|
||||
metrics.fetchedBytes += fetchedBytes
|
||||
metrics.requestedCount++
|
||||
metrics.requestedBytes += size
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Object} metrics
|
||||
* @param {boolean} sizeDiffers
|
||||
* @param {boolean} mismatch
|
||||
* @param {boolean} success
|
||||
*/
|
||||
function trackChunkVerify(metrics, { sizeDiffers, mismatch, success }) {
|
||||
if (sizeDiffers) {
|
||||
metrics.chunkVerifySizeDiffers |= 0
|
||||
metrics.chunkVerifySizeDiffers += 1
|
||||
}
|
||||
if (mismatch) {
|
||||
metrics.chunkVerifyMismatch |= 0
|
||||
metrics.chunkVerifyMismatch += 1
|
||||
}
|
||||
if (success) {
|
||||
metrics.chunkVerifySuccess |= 0
|
||||
metrics.chunkVerifySuccess += 1
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param chunk
|
||||
* @param {ArrayBuffer} arrayBuffer
|
||||
* @return {Uint8Array}
|
||||
*/
|
||||
function backFillObjectContext(chunk, arrayBuffer) {
|
||||
if (!chunk.objectId) {
|
||||
// This is a dynamic chunk
|
||||
return new Uint8Array(arrayBuffer)
|
||||
}
|
||||
const { start, end, objectId } = chunk
|
||||
const header = Uint8Array.from(objectId)
|
||||
const fullBuffer = new Uint8Array(end - start)
|
||||
fullBuffer.set(header, 0)
|
||||
fullBuffer.set(new Uint8Array(arrayBuffer), objectId.length)
|
||||
return fullBuffer
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Array} chunks
|
||||
* @param {number} start
|
||||
* @param {number} end
|
||||
* @returns {Array}
|
||||
*/
|
||||
function getMatchingChunks(chunks, start, end) {
|
||||
const matchingChunks = []
|
||||
for (const chunk of chunks) {
|
||||
if (chunk.end <= start) {
|
||||
// no overlap:
|
||||
// | REQUESTED_RANGE |
|
||||
// | CHUNK |
|
||||
continue
|
||||
}
|
||||
if (chunk.start >= end) {
|
||||
// no overlap:
|
||||
// | REQUESTED_RANGE |
|
||||
// | CHUNK |
|
||||
break
|
||||
}
|
||||
matchingChunks.push(chunk)
|
||||
}
|
||||
return matchingChunks
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Array} potentialChunks
|
||||
* @param {Set} cached
|
||||
* @param {Object} metrics
|
||||
*/
|
||||
function cutRequestAmplification(potentialChunks, cached, metrics) {
|
||||
const chunks = []
|
||||
const newChunks = []
|
||||
let tooManyRequests = false
|
||||
for (const chunk of potentialChunks) {
|
||||
if (cached.has(chunk.hash)) {
|
||||
chunks.push(chunk)
|
||||
continue
|
||||
}
|
||||
if (newChunks.length < MAX_SUB_REQUEST_COUNT) {
|
||||
chunks.push(chunk)
|
||||
newChunks.push(chunk)
|
||||
} else {
|
||||
tooManyRequests = true
|
||||
}
|
||||
}
|
||||
if (tooManyRequests) {
|
||||
metrics.tooManyRequestsCount++
|
||||
}
|
||||
if (cached.size > INCREMENTAL_CACHE_SIZE) {
|
||||
for (const key of cached) {
|
||||
if (cached.size < INCREMENTAL_CACHE_SIZE) {
|
||||
break
|
||||
}
|
||||
// Map keys are stored in insertion order.
|
||||
// We re-insert keys on cache hit, 'cached' is a cheap LRU.
|
||||
cached.delete(key)
|
||||
}
|
||||
}
|
||||
return { chunks, newChunks }
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Array} chunks
|
||||
* @param {number} start
|
||||
* @param {number} end
|
||||
* @returns {Array}
|
||||
*/
|
||||
function getInterleavingDynamicChunks(chunks, start, end) {
|
||||
const dynamicChunks = []
|
||||
for (const chunk of chunks) {
|
||||
if (start < chunk.start) {
|
||||
dynamicChunks.push({ start, end: chunk.start })
|
||||
}
|
||||
start = chunk.end
|
||||
}
|
||||
|
||||
if (start < end) {
|
||||
dynamicChunks.push({ start, end })
|
||||
}
|
||||
return dynamicChunks
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Response} response
|
||||
*/
|
||||
function getServerTime(response) {
|
||||
const raw = response.headers.get('Date')
|
||||
if (!raw) return new Date()
|
||||
return new Date(raw)
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Response} response
|
||||
*/
|
||||
function getResponseSize(response) {
|
||||
const raw = response.headers.get('Content-Length')
|
||||
if (!raw) return 0
|
||||
return parseInt(raw, 10)
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Response} response
|
||||
* @param chunk
|
||||
*/
|
||||
function getMultipartBoundary(response, chunk) {
|
||||
if (!Array.isArray(chunk)) return ''
|
||||
|
||||
const raw = response.headers.get('Content-Type')
|
||||
if (raw.includes('multipart/byteranges')) {
|
||||
const idx = raw.indexOf('boundary=')
|
||||
if (idx !== -1) return raw.slice(idx + 'boundary='.length)
|
||||
}
|
||||
|
||||
throw new OError('missing boundary on multipart request', {
|
||||
headers: Object.fromEntries(response.headers.entries()),
|
||||
chunk,
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Object} response
|
||||
* @param {Object} file
|
||||
* @param {Object} metrics
|
||||
*/
|
||||
function resolveMultiPartResponses(response, file, metrics) {
|
||||
const { chunk: chunks, data, boundary } = response
|
||||
if (!boundary) {
|
||||
return [response]
|
||||
}
|
||||
const responses = []
|
||||
let offsetStart = 0
|
||||
for (const chunk of chunks) {
|
||||
const header = `\r\n--${boundary}\r\nContent-Type: application/pdf\r\nContent-Range: bytes ${
|
||||
chunk.start
|
||||
}-${chunk.end - 1}/${file.size}\r\n\r\n`
|
||||
const headerSize = header.length
|
||||
|
||||
// Verify header content. A proxy might have tampered with it.
|
||||
const headerRaw = ENCODER.encode(header)
|
||||
if (
|
||||
!data
|
||||
.subarray(offsetStart, offsetStart + headerSize)
|
||||
.every((v, idx) => v === headerRaw[idx])
|
||||
) {
|
||||
metrics.headerVerifyFailure |= 0
|
||||
metrics.headerVerifyFailure++
|
||||
throw new OError('multipart response header does not match', {
|
||||
actual: new TextDecoder().decode(
|
||||
data.subarray(offsetStart, offsetStart + headerSize)
|
||||
),
|
||||
expected: header,
|
||||
})
|
||||
}
|
||||
|
||||
offsetStart += headerSize
|
||||
const chunkSize = chunk.end - chunk.start
|
||||
responses.push({
|
||||
chunk,
|
||||
data: data.subarray(offsetStart, offsetStart + chunkSize),
|
||||
})
|
||||
offsetStart += chunkSize
|
||||
}
|
||||
return responses
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Response} response
|
||||
*/
|
||||
function checkChunkResponse(response) {
|
||||
if (!(response.status === 206 || response.status === 200)) {
|
||||
throw new OError('non successful response status: ' + response.status)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} url
|
||||
* @param {number} start
|
||||
* @param {number} end
|
||||
*/
|
||||
export function fallbackRequest({ url, start, end }) {
|
||||
return fetch(url, { headers: { Range: `bytes=${start}-${end - 1}` } }).then(
|
||||
response => {
|
||||
checkChunkResponse(response)
|
||||
return response.arrayBuffer()
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} url
|
||||
* @param {number} start
|
||||
* @param {number} end
|
||||
* @param {Object} metrics
|
||||
* @param {Uint8Array} actual
|
||||
*/
|
||||
async function verifyRange({ url, start, end, metrics, actual }) {
|
||||
let expectedRaw
|
||||
try {
|
||||
expectedRaw = await fallbackRequest({ url, start, end })
|
||||
} catch (error) {
|
||||
throw OError.tag(error, 'cannot verify range', { url, start, end })
|
||||
}
|
||||
const expected = new Uint8Array(expectedRaw)
|
||||
const stats = {}
|
||||
if (actual.byteLength !== expected.byteLength) {
|
||||
stats.sizeDiffers = true
|
||||
} else if (!expected.every((v, idx) => v === actual[idx])) {
|
||||
stats.mismatch = true
|
||||
} else {
|
||||
stats.success = true
|
||||
}
|
||||
trackChunkVerify(metrics, stats)
|
||||
return expected
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} url
|
||||
* @param {number} start
|
||||
* @param {number} end
|
||||
* @param {Object} file
|
||||
* @param {Object} metrics
|
||||
* @param {Set} cached
|
||||
*/
|
||||
export async function fetchRange({ url, start, end, file, metrics, cached }) {
|
||||
file.createdAt = new Date(file.createdAt)
|
||||
backfillEdgeBounds(file)
|
||||
|
||||
// Check that handling the range request won't trigger excessive sub-requests,
|
||||
// (to avoid unwanted latency compared to the original request).
|
||||
const { chunks, newChunks } = cutRequestAmplification(
|
||||
getMatchingChunks(file.ranges, start, end),
|
||||
cached,
|
||||
metrics
|
||||
)
|
||||
const dynamicChunks = getInterleavingDynamicChunks(chunks, start, end)
|
||||
const chunksSize = countBytes(newChunks)
|
||||
const size = end - start
|
||||
|
||||
if (chunks.length === 0 && dynamicChunks.length === 1) {
|
||||
// fall back to the original range request when no chunks are cached.
|
||||
trackDownloadStats(metrics, {
|
||||
size,
|
||||
cachedCount: 0,
|
||||
cachedBytes: 0,
|
||||
fetchedCount: 1,
|
||||
fetchedBytes: size,
|
||||
})
|
||||
return fallbackRequest({ url, start, end })
|
||||
}
|
||||
if (
|
||||
chunksSize > MAX_SUB_REQUEST_BYTES &&
|
||||
!(dynamicChunks.length === 0 && newChunks.length <= 1)
|
||||
) {
|
||||
// fall back to the original range request when a very large amount of
|
||||
// object data would be requested, unless it is the only object in the
|
||||
// request or everything is already cached.
|
||||
metrics.tooLargeOverheadCount++
|
||||
trackDownloadStats(metrics, {
|
||||
size,
|
||||
cachedCount: 0,
|
||||
cachedBytes: 0,
|
||||
fetchedCount: 1,
|
||||
fetchedBytes: size,
|
||||
})
|
||||
return fallbackRequest({ url, start, end })
|
||||
}
|
||||
|
||||
const byteRanges = dynamicChunks
|
||||
.map(chunk => `${chunk.start}-${chunk.end - 1}`)
|
||||
.join(',')
|
||||
const coalescedDynamicChunks = []
|
||||
switch (dynamicChunks.length) {
|
||||
case 0:
|
||||
break
|
||||
case 1:
|
||||
coalescedDynamicChunks.push({
|
||||
chunk: dynamicChunks[0],
|
||||
url,
|
||||
init: { headers: { Range: `bytes=${byteRanges}` } },
|
||||
})
|
||||
break
|
||||
default:
|
||||
coalescedDynamicChunks.push({
|
||||
chunk: dynamicChunks,
|
||||
url,
|
||||
init: { headers: { Range: `bytes=${byteRanges}` } },
|
||||
})
|
||||
}
|
||||
|
||||
const params = new URL(url).searchParams
|
||||
// drop no needed params
|
||||
params.delete('enable_pdf_caching')
|
||||
params.delete('verify_chunks')
|
||||
const query = params.toString()
|
||||
// The schema of `url` is https://domain/project/:id/user/:id/build/... for
|
||||
// authenticated and https://domain/project/:id/build/... for
|
||||
// unauthenticated users. Cut it before /build/.
|
||||
// The path may have an optional /zone/b prefix too.
|
||||
const perUserPrefix = url.slice(0, url.indexOf('/build/'))
|
||||
const requests = chunks
|
||||
.map(chunk => ({
|
||||
chunk,
|
||||
url: `${perUserPrefix}/content/${file.contentId}/${chunk.hash}?${query}`,
|
||||
}))
|
||||
.concat(coalescedDynamicChunks)
|
||||
let cachedCount = 0
|
||||
let cachedBytes = 0
|
||||
let fetchedCount = 0
|
||||
let fetchedBytes = 0
|
||||
const reassembledBlob = new Uint8Array(size)
|
||||
|
||||
const rawResponses = await Promise.all(
|
||||
requests.map(async ({ chunk, url, init }) => {
|
||||
try {
|
||||
const response = await fetch(url, init)
|
||||
checkChunkResponse(response)
|
||||
const boundary = getMultipartBoundary(response, chunk)
|
||||
const blobFetchDate = getServerTime(response)
|
||||
const blobSize = getResponseSize(response)
|
||||
if (blobFetchDate && blobSize) {
|
||||
// Example: 2MB PDF, 1MB image, 128KB PDF.js chunk.
|
||||
// | pdf.js chunk |
|
||||
// | A BIG IMAGE BLOB |
|
||||
// | THE FULL PDF |
|
||||
if (chunk.hash && blobFetchDate < file.createdAt) {
|
||||
const usedChunkSection =
|
||||
Math.min(end, chunk.end) - Math.max(start, chunk.start)
|
||||
cachedCount++
|
||||
cachedBytes += usedChunkSection
|
||||
// Roll the position of the hash in the Map.
|
||||
cached.delete(chunk.hash)
|
||||
cached.add(chunk.hash)
|
||||
} else {
|
||||
// Blobs are fetched in bulk, record the full size.
|
||||
fetchedCount++
|
||||
fetchedBytes += blobSize
|
||||
}
|
||||
}
|
||||
return {
|
||||
boundary,
|
||||
chunk,
|
||||
data: backFillObjectContext(
|
||||
chunk,
|
||||
// response.arrayBuffer() yields the first multipart section only.
|
||||
await (await response.blob()).arrayBuffer()
|
||||
),
|
||||
}
|
||||
} catch (error) {
|
||||
throw OError.tag(error, 'cannot fetch chunk', { url })
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
rawResponses
|
||||
.flatMap(r => resolveMultiPartResponses(r, file, metrics))
|
||||
.forEach(({ chunk, data }) => {
|
||||
// overlap:
|
||||
// | REQUESTED_RANGE |
|
||||
// | CHUNK |
|
||||
const offsetStart = Math.max(start - chunk.start, 0)
|
||||
// overlap:
|
||||
// | REQUESTED_RANGE |
|
||||
// | CHUNK |
|
||||
const offsetEnd = Math.max(chunk.end - end, 0)
|
||||
if (offsetStart > 0 || offsetEnd > 0) {
|
||||
// compute index positions for slice to handle case where offsetEnd=0
|
||||
const chunkSize = chunk.end - chunk.start
|
||||
data = data.subarray(offsetStart, chunkSize - offsetEnd)
|
||||
}
|
||||
const insertPosition = Math.max(chunk.start - start, 0)
|
||||
reassembledBlob.set(data, insertPosition)
|
||||
})
|
||||
|
||||
trackDownloadStats(metrics, {
|
||||
size,
|
||||
cachedCount,
|
||||
cachedBytes,
|
||||
fetchedCount,
|
||||
fetchedBytes,
|
||||
})
|
||||
|
||||
if (url.includes('verify_chunks=true')) {
|
||||
return await verifyRange({
|
||||
url,
|
||||
start,
|
||||
end,
|
||||
metrics,
|
||||
actual: reassembledBlob,
|
||||
})
|
||||
}
|
||||
return reassembledBlob
|
||||
}
|
|
@ -1,4 +1,5 @@
|
|||
import { captureMessage } from '../../../infrastructure/error-reporter'
|
||||
import { generatePdfCachingTransportFactory } from './pdf-caching-transport'
|
||||
|
||||
const params = new URLSearchParams(window.location.search)
|
||||
const disableFontFace = params.get('disable-font-face') === 'true'
|
||||
|
@ -19,6 +20,7 @@ export default class PDFJSWrapper {
|
|||
})
|
||||
|
||||
this.PDFJS = PDFJS
|
||||
this.genPdfCachingTransport = generatePdfCachingTransportFactory(PDFJS)
|
||||
this.PDFJSViewer = PDFJSViewer
|
||||
this.cMapUrl = cMapUrl
|
||||
this.imageResourcesPath = imageResourcesPath
|
||||
|
@ -57,7 +59,7 @@ export default class PDFJSWrapper {
|
|||
}
|
||||
|
||||
// load a document from a URL
|
||||
loadDocument(url) {
|
||||
loadDocument(url, pdfFile) {
|
||||
// cancel any previous loading task
|
||||
if (this.loadDocumentTask) {
|
||||
this.loadDocumentTask.destroy()
|
||||
|
@ -74,6 +76,7 @@ export default class PDFJSWrapper {
|
|||
disableAutoFetch: true,
|
||||
disableStream,
|
||||
textLayerMode: 2, // PDFJSViewer.TextLayerMode.ENABLE,
|
||||
range: this.genPdfCachingTransport(url, pdfFile, reject),
|
||||
})
|
||||
|
||||
this.loadDocumentTask.promise
|
||||
|
|
|
@ -435,10 +435,10 @@ If the project has been renamed please look in your project list for a new proje
|
|||
}
|
||||
)
|
||||
|
||||
if (getMeta('ol-resetServiceWorker')) {
|
||||
unregisterServiceWorker()
|
||||
} else if (getMeta('ol-enablePdfCaching')) {
|
||||
if (getMeta('ol-pdfCachingMode') === 'service-worker') {
|
||||
loadServiceWorker()
|
||||
} else {
|
||||
unregisterServiceWorker()
|
||||
}
|
||||
|
||||
angular.module('SharelatexApp').config(function ($provide) {
|
||||
|
|
|
@ -1,19 +1,17 @@
|
|||
import { v4 as uuid } from 'uuid'
|
||||
import { fetchRange } from './features/pdf-preview/util/pdf-caching'
|
||||
const OError = require('@overleaf/o-error')
|
||||
|
||||
// VERSION should get incremented when making changes to caching behavior or
|
||||
// adjusting metrics collection.
|
||||
// Keep in sync with PdfJsMetrics.
|
||||
const VERSION = 2
|
||||
const VERSION = 3
|
||||
|
||||
const CLEAR_CACHE_REQUEST_MATCHER = /^\/project\/[0-9a-f]{24}\/output$/
|
||||
const COMPILE_REQUEST_MATCHER = /^\/project\/[0-9a-f]{24}\/compile$/
|
||||
const PDF_REQUEST_MATCHER =
|
||||
/^(\/zone\/.)?(\/project\/[0-9a-f]{24}\/.*\/output.pdf)$/
|
||||
const PDF_JS_CHUNK_SIZE = 128 * 1024
|
||||
const MAX_SUBREQUEST_COUNT = 4
|
||||
const MAX_SUBREQUEST_BYTES = 4 * PDF_JS_CHUNK_SIZE
|
||||
const INCREMENTAL_CACHE_SIZE = 1000
|
||||
|
||||
// Each compile request defines a context (essentially the specific pdf file for
|
||||
// that compile), requests for that pdf file can use the hashes in the compile
|
||||
|
@ -101,57 +99,6 @@ function expirePdfContexts() {
|
|||
})
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Object} metrics
|
||||
* @param {number} size
|
||||
* @param {number} cachedCount
|
||||
* @param {number} cachedBytes
|
||||
* @param {number} fetchedCount
|
||||
* @param {number} fetchedBytes
|
||||
*/
|
||||
function trackDownloadStats(
|
||||
metrics,
|
||||
{ size, cachedCount, cachedBytes, fetchedCount, fetchedBytes }
|
||||
) {
|
||||
metrics.cachedCount += cachedCount
|
||||
metrics.cachedBytes += cachedBytes
|
||||
metrics.fetchedCount += fetchedCount
|
||||
metrics.fetchedBytes += fetchedBytes
|
||||
metrics.requestedCount++
|
||||
metrics.requestedBytes += size
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Object} metrics
|
||||
* @param {boolean} sizeDiffers
|
||||
* @param {boolean} mismatch
|
||||
* @param {boolean} success
|
||||
*/
|
||||
function trackChunkVerify(metrics, { sizeDiffers, mismatch, success }) {
|
||||
if (sizeDiffers) {
|
||||
metrics.chunkVerifySizeDiffers |= 0
|
||||
metrics.chunkVerifySizeDiffers += 1
|
||||
}
|
||||
if (mismatch) {
|
||||
metrics.chunkVerifyMismatch |= 0
|
||||
metrics.chunkVerifyMismatch += 1
|
||||
}
|
||||
if (success) {
|
||||
metrics.chunkVerifySuccess |= 0
|
||||
metrics.chunkVerifySuccess += 1
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Array} chunks
|
||||
*/
|
||||
function countBytes(chunks) {
|
||||
return chunks.reduce((totalBytes, chunk) => {
|
||||
return totalBytes + (chunk.end - chunk.start)
|
||||
}, 0)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {FetchEvent} event
|
||||
*/
|
||||
|
@ -266,7 +213,6 @@ function processPdfRequest(
|
|||
return event.respondWith(response)
|
||||
}
|
||||
|
||||
const verifyChunks = event.request.url.includes('verify_chunks=true')
|
||||
const rangeHeader =
|
||||
event.request.headers.get('Range') || `bytes=0-${file.size - 1}`
|
||||
const [start, last] = rangeHeader
|
||||
|
@ -275,307 +221,35 @@ function processPdfRequest(
|
|||
.map(i => parseInt(i, 10))
|
||||
const end = last + 1
|
||||
|
||||
// Check that handling the range request won't trigger excessive subrequests,
|
||||
// (to avoid unwanted latency compared to the original request).
|
||||
const { chunks, newChunks } = cutRequestAmplification(
|
||||
getMatchingChunks(file.ranges, start, end),
|
||||
cached,
|
||||
metrics
|
||||
)
|
||||
const dynamicChunks = getInterleavingDynamicChunks(chunks, start, end)
|
||||
const chunksSize = countBytes(newChunks)
|
||||
const size = end - start
|
||||
|
||||
if (chunks.length === 0 && dynamicChunks.length === 1) {
|
||||
// fall back to the original range request when no chunks are cached.
|
||||
trackDownloadStats(metrics, {
|
||||
size,
|
||||
cachedCount: 0,
|
||||
cachedBytes: 0,
|
||||
fetchedCount: 1,
|
||||
fetchedBytes: size,
|
||||
return event.respondWith(
|
||||
fetchRange({
|
||||
url: event.request.url,
|
||||
start,
|
||||
end,
|
||||
file,
|
||||
pdfCreatedAt,
|
||||
metrics,
|
||||
cached,
|
||||
})
|
||||
return
|
||||
}
|
||||
if (
|
||||
chunksSize > MAX_SUBREQUEST_BYTES &&
|
||||
!(dynamicChunks.length === 0 && newChunks.length <= 1)
|
||||
) {
|
||||
// fall back to the original range request when a very large amount of
|
||||
// object data would be requested, unless it is the only object in the
|
||||
// request or everything is already cached.
|
||||
metrics.tooLargeOverheadCount++
|
||||
trackDownloadStats(metrics, {
|
||||
size,
|
||||
cachedCount: 0,
|
||||
cachedBytes: 0,
|
||||
fetchedCount: 1,
|
||||
fetchedBytes: size,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// URL prefix is /project/:id/user/:id/build/... or /project/:id/build/...
|
||||
// for authenticated and unauthenticated users respectively.
|
||||
const perUserPrefix = file.url.slice(0, file.url.indexOf('/build/'))
|
||||
const byteRanges = dynamicChunks
|
||||
.map(chunk => `${chunk.start}-${chunk.end - 1}`)
|
||||
.join(',')
|
||||
const coalescedDynamicChunks = []
|
||||
switch (dynamicChunks.length) {
|
||||
case 0:
|
||||
break
|
||||
case 1:
|
||||
coalescedDynamicChunks.push({
|
||||
chunk: dynamicChunks[0],
|
||||
url: event.request.url,
|
||||
init: { headers: { Range: `bytes=${byteRanges}` } },
|
||||
})
|
||||
break
|
||||
default:
|
||||
coalescedDynamicChunks.push({
|
||||
chunk: dynamicChunks,
|
||||
url: event.request.url,
|
||||
init: { headers: { Range: `bytes=${byteRanges}` } },
|
||||
})
|
||||
}
|
||||
const requests = chunks
|
||||
.map(chunk => {
|
||||
const path = `${perUserPrefix}/content/${file.contentId}/${chunk.hash}`
|
||||
const url = new URL(path, event.request.url)
|
||||
if (clsiServerId) {
|
||||
url.searchParams.set('clsiserverid', clsiServerId)
|
||||
}
|
||||
if (compileGroup) {
|
||||
url.searchParams.set('compileGroup', compileGroup)
|
||||
}
|
||||
return { chunk, url: url.toString() }
|
||||
})
|
||||
.concat(coalescedDynamicChunks)
|
||||
let cachedCount = 0
|
||||
let cachedBytes = 0
|
||||
let fetchedCount = 0
|
||||
let fetchedBytes = 0
|
||||
const reAssembledBlob = new Uint8Array(size)
|
||||
event.respondWith(
|
||||
Promise.all(
|
||||
requests.map(({ chunk, url, init }) =>
|
||||
fetch(url, init)
|
||||
.then(response => {
|
||||
if (!(response.status === 206 || response.status === 200)) {
|
||||
throw new OError(
|
||||
'non successful response status: ' + response.status
|
||||
)
|
||||
}
|
||||
const boundary = getMultipartBoundary(response)
|
||||
if (Array.isArray(chunk) && !boundary) {
|
||||
throw new OError('missing boundary on multipart request', {
|
||||
headers: Object.fromEntries(response.headers.entries()),
|
||||
chunk,
|
||||
})
|
||||
}
|
||||
const blobFetchDate = getServerTime(response)
|
||||
const blobSize = getResponseSize(response)
|
||||
if (blobFetchDate && blobSize) {
|
||||
const chunkSize =
|
||||
Math.min(end, chunk.end) - Math.max(start, chunk.start)
|
||||
// Example: 2MB PDF, 1MB image, 128KB PDF.js chunk.
|
||||
// | pdf.js chunk |
|
||||
// | A BIG IMAGE BLOB |
|
||||
// | THE FULL PDF |
|
||||
if (blobFetchDate < pdfCreatedAt) {
|
||||
cachedCount++
|
||||
cachedBytes += chunkSize
|
||||
// Roll the position of the hash in the Map.
|
||||
cached.delete(chunk.hash)
|
||||
cached.add(chunk.hash)
|
||||
} else {
|
||||
// Blobs are fetched in bulk.
|
||||
fetchedCount++
|
||||
fetchedBytes += blobSize
|
||||
}
|
||||
}
|
||||
return response
|
||||
.blob()
|
||||
.then(blob => blob.arrayBuffer())
|
||||
.then(arraybuffer => {
|
||||
return {
|
||||
boundary,
|
||||
chunk,
|
||||
data: backFillObjectContext(chunk, arraybuffer),
|
||||
}
|
||||
})
|
||||
})
|
||||
.catch(error => {
|
||||
throw OError.tag(error, 'cannot fetch chunk', { url })
|
||||
})
|
||||
)
|
||||
)
|
||||
.then(rawResponses => {
|
||||
const responses = []
|
||||
for (const response of rawResponses) {
|
||||
if (response.boundary) {
|
||||
responses.push(
|
||||
...getMultiPartResponses(response, file, metrics, verifyChunks)
|
||||
)
|
||||
} else {
|
||||
responses.push(response)
|
||||
}
|
||||
}
|
||||
responses.forEach(({ chunk, data }) => {
|
||||
// overlap:
|
||||
// | REQUESTED_RANGE |
|
||||
// | CHUNK |
|
||||
const offsetStart = Math.max(start - chunk.start, 0)
|
||||
// overlap:
|
||||
// | REQUESTED_RANGE |
|
||||
// | CHUNK |
|
||||
const offsetEnd = Math.max(chunk.end - end, 0)
|
||||
if (offsetStart > 0 || offsetEnd > 0) {
|
||||
// compute index positions for slice to handle case where offsetEnd=0
|
||||
const chunkSize = chunk.end - chunk.start
|
||||
data = data.subarray(offsetStart, chunkSize - offsetEnd)
|
||||
}
|
||||
const insertPosition = Math.max(chunk.start - start, 0)
|
||||
reAssembledBlob.set(data, insertPosition)
|
||||
})
|
||||
|
||||
let verifyProcess = Promise.resolve(reAssembledBlob)
|
||||
if (verifyChunks) {
|
||||
verifyProcess = fetch(event.request)
|
||||
.then(response => response.arrayBuffer())
|
||||
.then(arrayBuffer => {
|
||||
const fullBlob = new Uint8Array(arrayBuffer)
|
||||
const stats = {}
|
||||
if (reAssembledBlob.byteLength !== fullBlob.byteLength) {
|
||||
stats.sizeDiffers = true
|
||||
} else if (
|
||||
!reAssembledBlob.every((v, idx) => v === fullBlob[idx])
|
||||
) {
|
||||
stats.mismatch = true
|
||||
} else {
|
||||
stats.success = true
|
||||
}
|
||||
trackChunkVerify(metrics, stats)
|
||||
if (stats.success === true) {
|
||||
return reAssembledBlob
|
||||
} else {
|
||||
return fullBlob
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return verifyProcess.then(blob => {
|
||||
trackDownloadStats(metrics, {
|
||||
size,
|
||||
cachedCount,
|
||||
cachedBytes,
|
||||
fetchedCount,
|
||||
fetchedBytes,
|
||||
})
|
||||
return new Response(blob, {
|
||||
status: 206,
|
||||
headers: {
|
||||
'Accept-Ranges': 'bytes',
|
||||
'Content-Length': size,
|
||||
'Content-Range': `bytes ${start}-${last}/${file.size}`,
|
||||
'Content-Type': 'application/pdf',
|
||||
},
|
||||
})
|
||||
.then(blob => {
|
||||
return new Response(blob, {
|
||||
status: 206,
|
||||
headers: {
|
||||
'Accept-Ranges': 'bytes',
|
||||
'Content-Length': end - start,
|
||||
'Content-Range': `bytes ${start}-${last}/${file.size}`,
|
||||
'Content-Type': 'application/pdf',
|
||||
},
|
||||
})
|
||||
})
|
||||
.catch(error => {
|
||||
fetchedBytes += size
|
||||
metrics.failedCount++
|
||||
trackDownloadStats(metrics, {
|
||||
size,
|
||||
cachedCount: 0,
|
||||
cachedBytes: 0,
|
||||
fetchedCount,
|
||||
fetchedBytes,
|
||||
})
|
||||
reportError(event, OError.tag(error, 'failed to compose pdf response'))
|
||||
return fetch(event.request)
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Response} response
|
||||
*/
|
||||
function getServerTime(response) {
|
||||
const raw = response.headers.get('Date')
|
||||
if (!raw) return new Date()
|
||||
return new Date(raw)
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Response} response
|
||||
*/
|
||||
function getResponseSize(response) {
|
||||
const raw = response.headers.get('Content-Length')
|
||||
if (!raw) return 0
|
||||
return parseInt(raw, 10)
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {Response} response
|
||||
*/
|
||||
function getMultipartBoundary(response) {
|
||||
const raw = response.headers.get('Content-Type')
|
||||
if (!raw.includes('multipart/byteranges')) return ''
|
||||
const idx = raw.indexOf('boundary=')
|
||||
if (idx === -1) return ''
|
||||
return raw.slice(idx + 'boundary='.length)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Object} response
|
||||
* @param {Object} file
|
||||
* @param {Object} metrics
|
||||
* @param {boolean} verifyChunks
|
||||
*/
|
||||
function getMultiPartResponses(response, file, metrics, verifyChunks) {
|
||||
const { chunk: chunks, data, boundary } = response
|
||||
const responses = []
|
||||
let offsetStart = 0
|
||||
for (const chunk of chunks) {
|
||||
const header = `\r\n--${boundary}\r\nContent-Type: application/pdf\r\nContent-Range: bytes ${
|
||||
chunk.start
|
||||
}-${chunk.end - 1}/${file.size}\r\n\r\n`
|
||||
const headerSize = header.length
|
||||
|
||||
// Verify header content. A proxy might have tampered with it.
|
||||
const headerRaw = ENCODER.encode(header)
|
||||
if (
|
||||
!data
|
||||
.subarray(offsetStart, offsetStart + headerSize)
|
||||
.every((v, idx) => v === headerRaw[idx])
|
||||
) {
|
||||
metrics.headerVerifyFailure |= 0
|
||||
metrics.headerVerifyFailure++
|
||||
throw new OError('multipart response header does not match', {
|
||||
actual: new TextDecoder().decode(
|
||||
data.subarray(offsetStart, offsetStart + headerSize)
|
||||
),
|
||||
expected: header,
|
||||
})
|
||||
}
|
||||
|
||||
offsetStart += headerSize
|
||||
const chunkSize = chunk.end - chunk.start
|
||||
responses.push({
|
||||
chunk,
|
||||
data: data.subarray(offsetStart, offsetStart + chunkSize),
|
||||
})
|
||||
offsetStart += chunkSize
|
||||
}
|
||||
return responses
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {FetchEvent} event
|
||||
* @param {Response} response
|
||||
|
@ -584,15 +258,11 @@ function getMultiPartResponses(response, file, metrics, verifyChunks) {
|
|||
function handleCompileResponse(event, response, body) {
|
||||
if (!body || body.status !== 'success') return
|
||||
|
||||
const pdfCreatedAt = getServerTime(response)
|
||||
|
||||
for (const file of body.outputFiles) {
|
||||
if (file.path !== 'output.pdf') continue // not the pdf used for rendering
|
||||
if (file.ranges) {
|
||||
file.ranges.forEach(backFillEdgeBounds)
|
||||
if (file.ranges?.length) {
|
||||
const { clsiServerId, compileGroup } = body
|
||||
registerPdfContext(event.clientId, file.url, {
|
||||
pdfCreatedAt,
|
||||
file,
|
||||
clsiServerId,
|
||||
compileGroup,
|
||||
|
@ -602,117 +272,6 @@ function handleCompileResponse(event, response, body) {
|
|||
}
|
||||
}
|
||||
|
||||
const ENCODER = new TextEncoder()
|
||||
function backFillEdgeBounds(chunk) {
|
||||
if (chunk.objectId) {
|
||||
chunk.objectId = ENCODER.encode(chunk.objectId)
|
||||
chunk.start -= chunk.objectId.byteLength
|
||||
}
|
||||
return chunk
|
||||
}
|
||||
|
||||
/**
|
||||
* @param chunk
|
||||
* @param {ArrayBuffer} arrayBuffer
|
||||
* @return {Uint8Array}
|
||||
*/
|
||||
function backFillObjectContext(chunk, arrayBuffer) {
|
||||
if (!chunk.objectId) {
|
||||
// This is a dynamic chunk
|
||||
return new Uint8Array(arrayBuffer)
|
||||
}
|
||||
const { start, end, objectId } = chunk
|
||||
const header = Uint8Array.from(objectId)
|
||||
const fullBuffer = new Uint8Array(end - start)
|
||||
fullBuffer.set(header, 0)
|
||||
fullBuffer.set(new Uint8Array(arrayBuffer), objectId.length)
|
||||
return fullBuffer
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Array} chunks
|
||||
* @param {number} start
|
||||
* @param {number} end
|
||||
* @returns {Array}
|
||||
*/
|
||||
function getMatchingChunks(chunks, start, end) {
|
||||
const matchingChunks = []
|
||||
for (const chunk of chunks) {
|
||||
if (chunk.end <= start) {
|
||||
// no overlap:
|
||||
// | REQUESTED_RANGE |
|
||||
// | CHUNK |
|
||||
continue
|
||||
}
|
||||
if (chunk.start >= end) {
|
||||
// no overlap:
|
||||
// | REQUESTED_RANGE |
|
||||
// | CHUNK |
|
||||
break
|
||||
}
|
||||
matchingChunks.push(chunk)
|
||||
}
|
||||
return matchingChunks
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Array} potentialChunks
|
||||
* @param {Set} cached
|
||||
* @param {Object} metrics
|
||||
*/
|
||||
function cutRequestAmplification(potentialChunks, cached, metrics) {
|
||||
const chunks = []
|
||||
const newChunks = []
|
||||
let tooManyRequests = false
|
||||
for (const chunk of potentialChunks) {
|
||||
if (cached.has(chunk.hash)) {
|
||||
chunks.push(chunk)
|
||||
continue
|
||||
}
|
||||
if (newChunks.length < MAX_SUBREQUEST_COUNT) {
|
||||
chunks.push(chunk)
|
||||
newChunks.push(chunk)
|
||||
} else {
|
||||
tooManyRequests = true
|
||||
}
|
||||
}
|
||||
if (tooManyRequests) {
|
||||
metrics.tooManyRequestsCount++
|
||||
}
|
||||
if (cached.size > INCREMENTAL_CACHE_SIZE) {
|
||||
for (const key of cached) {
|
||||
if (cached.size < INCREMENTAL_CACHE_SIZE) {
|
||||
break
|
||||
}
|
||||
// Map keys are stored in insertion order.
|
||||
// We re-insert keys on cache hit, 'cached' is a cheap LRU.
|
||||
cached.delete(key)
|
||||
}
|
||||
}
|
||||
return { chunks, newChunks }
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Array} chunks
|
||||
* @param {number} start
|
||||
* @param {number} end
|
||||
* @returns {Array}
|
||||
*/
|
||||
function getInterleavingDynamicChunks(chunks, start, end) {
|
||||
const dynamicChunks = []
|
||||
for (const chunk of chunks) {
|
||||
if (start < chunk.start) {
|
||||
dynamicChunks.push({ start, end: chunk.start })
|
||||
}
|
||||
start = chunk.end
|
||||
}
|
||||
|
||||
if (start < end) {
|
||||
dynamicChunks.push({ start, end })
|
||||
}
|
||||
return dynamicChunks
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {FetchEvent} event
|
||||
*/
|
||||
|
|
|
@ -36,7 +36,7 @@ export function DetachCompileProvider({ children }) {
|
|||
logEntries: _logEntries,
|
||||
logEntryAnnotations: _logEntryAnnotations,
|
||||
pdfDownloadUrl: _pdfDownloadUrl,
|
||||
pdfSize: _pdfSize,
|
||||
pdfFile: _pdfFile,
|
||||
pdfUrl: _pdfUrl,
|
||||
pdfViewer: _pdfViewer,
|
||||
position: _position,
|
||||
|
@ -155,9 +155,9 @@ export function DetachCompileProvider({ children }) {
|
|||
'detacher',
|
||||
'detached'
|
||||
)
|
||||
const [pdfSize] = useDetachStateWatcher(
|
||||
'pdfSize',
|
||||
_pdfSize,
|
||||
const [pdfFile] = useDetachStateWatcher(
|
||||
'pdfFile',
|
||||
_pdfFile,
|
||||
'detacher',
|
||||
'detached'
|
||||
)
|
||||
|
@ -362,7 +362,7 @@ export function DetachCompileProvider({ children }) {
|
|||
logEntryAnnotations,
|
||||
logEntries,
|
||||
pdfDownloadUrl,
|
||||
pdfSize,
|
||||
pdfFile,
|
||||
pdfUrl,
|
||||
pdfViewer,
|
||||
position,
|
||||
|
@ -411,7 +411,7 @@ export function DetachCompileProvider({ children }) {
|
|||
logEntryAnnotations,
|
||||
logEntries,
|
||||
pdfDownloadUrl,
|
||||
pdfSize,
|
||||
pdfFile,
|
||||
pdfUrl,
|
||||
pdfViewer,
|
||||
position,
|
||||
|
|
|
@ -46,7 +46,7 @@ export const CompileContextPropTypes = {
|
|||
logEntries: PropTypes.object,
|
||||
logEntryAnnotations: PropTypes.object,
|
||||
pdfDownloadUrl: PropTypes.string,
|
||||
pdfSize: PropTypes.number,
|
||||
pdfFile: PropTypes.object,
|
||||
pdfUrl: PropTypes.string,
|
||||
pdfViewer: PropTypes.string,
|
||||
position: PropTypes.object,
|
||||
|
@ -103,7 +103,8 @@ export function LocalCompileProvider({ children }) {
|
|||
// the URL for loading the PDF in the preview pane
|
||||
const [pdfUrl, setPdfUrl] = useScopeValueSetterOnly('pdf.url')
|
||||
|
||||
const [pdfSize, setPdfSize] = useState(0)
|
||||
// low level details for metrics
|
||||
const [pdfFile, setPdfFile] = useState()
|
||||
|
||||
// the project is considered to be "uncompiled" if a doc has changed since the last compile started
|
||||
const [uncompiled, setUncompiled] = useScopeValue('pdf.uncompiled')
|
||||
|
@ -288,7 +289,7 @@ export function LocalCompileProvider({ children }) {
|
|||
const result = handleOutputFiles(outputFiles, projectId, data)
|
||||
if (data.status === 'success') {
|
||||
setPdfDownloadUrl(result.pdfDownloadUrl)
|
||||
setPdfSize(result.pdfSize)
|
||||
setPdfFile(result.pdfFile)
|
||||
setPdfUrl(result.pdfUrl)
|
||||
}
|
||||
|
||||
|
@ -406,7 +407,7 @@ export function LocalCompileProvider({ children }) {
|
|||
setLogEntries,
|
||||
setLogEntryAnnotations,
|
||||
setPdfDownloadUrl,
|
||||
setPdfSize,
|
||||
setPdfFile,
|
||||
setPdfUrl,
|
||||
])
|
||||
|
||||
|
@ -507,7 +508,7 @@ export function LocalCompileProvider({ children }) {
|
|||
logEntryAnnotations,
|
||||
logEntries,
|
||||
pdfDownloadUrl,
|
||||
pdfSize,
|
||||
pdfFile,
|
||||
pdfUrl,
|
||||
pdfViewer,
|
||||
position,
|
||||
|
@ -557,7 +558,7 @@ export function LocalCompileProvider({ children }) {
|
|||
logEntryAnnotations,
|
||||
position,
|
||||
pdfDownloadUrl,
|
||||
pdfSize,
|
||||
pdfFile,
|
||||
pdfUrl,
|
||||
pdfViewer,
|
||||
rawLog,
|
||||
|
|
|
@ -2,6 +2,7 @@ const sinon = require('sinon')
|
|||
const { expect } = require('chai')
|
||||
const modulePath = '../../../../app/src/Features/Compile/ClsiManager.js'
|
||||
const SandboxedModule = require('sandboxed-module')
|
||||
const tk = require('timekeeper')
|
||||
|
||||
describe('ClsiManager', function () {
|
||||
beforeEach(function () {
|
||||
|
@ -72,6 +73,11 @@ describe('ClsiManager', function () {
|
|||
this.project_id = 'project-id'
|
||||
this.user_id = 'user-id'
|
||||
this.callback = sinon.stub()
|
||||
tk.freeze(Date.now())
|
||||
})
|
||||
|
||||
after(function () {
|
||||
tk.reset()
|
||||
})
|
||||
|
||||
describe('sendRequest', function () {
|
||||
|
@ -136,9 +142,10 @@ describe('ClsiManager', function () {
|
|||
path: 'output.pdf',
|
||||
type: 'pdf',
|
||||
build: 1234,
|
||||
ranges: [],
|
||||
createdAt: new Date(),
|
||||
// gets dropped by JSON.stringify
|
||||
contentId: undefined,
|
||||
ranges: undefined,
|
||||
size: undefined,
|
||||
},
|
||||
{
|
||||
|
@ -146,10 +153,6 @@ describe('ClsiManager', function () {
|
|||
path: 'output.log',
|
||||
type: 'log',
|
||||
build: 1234,
|
||||
// gets dropped by JSON.stringify
|
||||
contentId: undefined,
|
||||
ranges: undefined,
|
||||
size: undefined,
|
||||
},
|
||||
]
|
||||
this.callback
|
||||
|
@ -207,16 +210,13 @@ describe('ClsiManager', function () {
|
|||
contentId: '123-321',
|
||||
ranges: [{ start: 1, end: 42, hash: 'foo' }],
|
||||
size: 42,
|
||||
createdAt: new Date(),
|
||||
},
|
||||
{
|
||||
url: `/project/${this.project_id}/user/${this.user_id}/build/1234/output/output.log`,
|
||||
path: 'output.log',
|
||||
type: 'log',
|
||||
build: 1234,
|
||||
// gets dropped by JSON.stringify
|
||||
contentId: undefined,
|
||||
ranges: undefined,
|
||||
size: undefined,
|
||||
},
|
||||
]
|
||||
const validationError = undefined
|
||||
|
@ -417,9 +417,10 @@ describe('ClsiManager', function () {
|
|||
path: 'output.pdf',
|
||||
type: 'pdf',
|
||||
build: 1234,
|
||||
ranges: [],
|
||||
createdAt: new Date(),
|
||||
// gets dropped by JSON.stringify
|
||||
contentId: undefined,
|
||||
ranges: undefined,
|
||||
size: undefined,
|
||||
},
|
||||
{
|
||||
|
@ -427,15 +428,13 @@ describe('ClsiManager', function () {
|
|||
path: 'output.log',
|
||||
type: 'log',
|
||||
build: 1234,
|
||||
// gets dropped by JSON.stringify
|
||||
contentId: undefined,
|
||||
ranges: undefined,
|
||||
size: undefined,
|
||||
},
|
||||
]
|
||||
this.callback
|
||||
.calledWith(null, this.status, outputFiles)
|
||||
.should.equal(true)
|
||||
this.callback.should.have.been.calledWith(
|
||||
null,
|
||||
this.status,
|
||||
outputFiles
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
|
|
|
@ -1136,10 +1136,10 @@ describe('ProjectController', function () {
|
|||
this.ProjectController.loadEditor(this.req, this.res)
|
||||
})
|
||||
}
|
||||
function expectPDFCachingEnabled() {
|
||||
function expectPDFCachingEnabled(mode) {
|
||||
it('should enable pdf caching', function (done) {
|
||||
this.res.render = (pageName, opts) => {
|
||||
expect(opts.enablePdfCaching).to.equal(true)
|
||||
expect(opts.pdfCachingMode).to.equal(mode)
|
||||
done()
|
||||
}
|
||||
this.ProjectController.loadEditor(this.req, this.res)
|
||||
|
@ -1157,7 +1157,7 @@ describe('ProjectController', function () {
|
|||
function expectPDFCachingDisabled() {
|
||||
it('should disable pdf caching', function (done) {
|
||||
this.res.render = (pageName, opts) => {
|
||||
expect(opts.enablePdfCaching).to.equal(false)
|
||||
expect(opts.pdfCachingMode).to.equal('')
|
||||
done()
|
||||
}
|
||||
this.ProjectController.loadEditor(this.req, this.res)
|
||||
|
@ -1174,20 +1174,32 @@ describe('ProjectController', function () {
|
|||
expectPDFCachingDisabled()
|
||||
})
|
||||
|
||||
describe('with enable_pdf_caching=false', function () {
|
||||
describe('with track-pdf-download=false', function () {
|
||||
beforeEach(function () {
|
||||
this.req.query.enable_pdf_caching = 'false'
|
||||
this.req.query['track-pdf-download'] = 'false'
|
||||
})
|
||||
expectBandwidthTrackingDisabled()
|
||||
expectPDFCachingDisabled()
|
||||
})
|
||||
|
||||
describe('with enable_pdf_caching=true', function () {
|
||||
describe('with track-pdf-download=true', function () {
|
||||
beforeEach(function () {
|
||||
this.req.query.enable_pdf_caching = 'true'
|
||||
this.req.query['track-pdf-download'] = 'true'
|
||||
})
|
||||
expectBandwidthTrackingEnabled()
|
||||
expectPDFCachingEnabled()
|
||||
})
|
||||
|
||||
describe('with pdf-caching-mode=no-service-worker', function () {
|
||||
beforeEach(function () {
|
||||
this.req.query['pdf-caching-mode'] = 'no-service-worker'
|
||||
})
|
||||
expectPDFCachingEnabled('no-service-worker')
|
||||
})
|
||||
|
||||
describe('with pdf-caching-mode=service-worker', function () {
|
||||
beforeEach(function () {
|
||||
this.req.query['pdf-caching-mode'] = 'service-worker'
|
||||
})
|
||||
expectPDFCachingEnabled('service-worker')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
Loading…
Reference in a new issue