[misc] merge pdf caching into main (#4033)

* [frontend] WIP: pdf caching using service worker -- squashed

Ref: 920fbaa00b31530f7c457a2d93bad5e553798057
Co-Authored-By: Brian Gough <brian.gough@overleaf.com>
Co-Authored-By: Eric Mc Sween <eric.mcsween@overleaf.com>

* [misc] add contentId into the URL for protecting PDF stream contents

* [misc] gracefully handle missing ranges in serviceWorker

* [misc] support PDF stream caching for anonymous users

* [misc] polish header names and add URL to error message when fetch fails

* [misc] polish event handler registration

* [misc] limit serviceWorker scope to /project/ -- trailing slash

This will block the service worker from intercepting requests on the
 project dashboard.

* [misc] add per-request feature flag for enabling PDF stream caching

* [misc] expose compile stats and timings to the frontend

* [misc] serviceWorker: support clsiServerId and compileGroup url params

* [misc] serviceWorker: polish header maps

* [misc] serviceWorker: drop TODO for p-limit -- the browser has a queue

* [misc] serviceWorker: drop verbose log message on every fetch

* [misc] cut down size of diff in backend code

* [misc] add test case for forwarding of pdf caching and metrics details

* [misc] serviceWorker: drop all the log lines

* [misc] serviceWorker: add boundary guards to the compile request regex

Co-authored-by: Brian Gough <brian.gough@overleaf.com>
Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>
GitOrigin-RevId: 4b291b4a4f2866cf07bccf8ec9068f33bbfdc916
This commit is contained in:
Jakob Ackermann 2021-05-17 11:38:18 +02:00 committed by Copybot
parent 2901de7830
commit 7db7cd4a49
13 changed files with 420 additions and 19 deletions

View file

@ -256,11 +256,19 @@ const ClsiManager = {
projectId,
response && response.compile && response.compile.outputFiles
)
const compile = (response && response.compile) || {}
const status = compile.status
const stats = compile.stats
const timings = compile.timings
const validationProblems = undefined
callback(
null,
response && response.compile && response.compile.status,
status,
outputFiles,
clsiServerId
clsiServerId,
validationProblems,
stats,
timings
)
})
}
@ -496,6 +504,9 @@ const ClsiManager = {
url: Url.parse(file.url).path, // the location of the file on the clsi, excluding the host part
type: file.type,
build: file.build,
contentId: file.contentId,
ranges: file.ranges,
size: file.size,
})
}
return outputFiles
@ -813,6 +824,8 @@ const ClsiManager = {
syncType: options.syncType,
syncState: options.syncState,
compileGroup: options.compileGroup,
enablePdfCaching:
(Settings.enablePdfCaching && options.enablePdfCaching) || false,
},
rootResourcePath,
resources,

View file

@ -46,9 +46,11 @@ module.exports = CompileController = {
res.setTimeout(COMPILE_TIMEOUT_MS)
const project_id = req.params.Project_id
const isAutoCompile = !!req.query.auto_compile
const enablePdfCaching = !!req.query.enable_pdf_caching
const user_id = AuthenticationController.getLoggedInUserId(req)
const options = {
isAutoCompile,
enablePdfCaching,
}
if (req.body.rootDoc_id) {
@ -83,7 +85,9 @@ module.exports = CompileController = {
outputFiles,
clsiServerId,
limits,
validationProblems
validationProblems,
stats,
timings
) => {
if (error) {
Metrics.inc('compile-error')
@ -96,6 +100,8 @@ module.exports = CompileController = {
compileGroup: limits != null ? limits.compileGroup : undefined,
clsiServerId,
validationProblems,
stats,
timings,
pdfDownloadDomain: Settings.pdfDownloadDomain,
})
}

View file

@ -94,7 +94,9 @@ module.exports = CompileManager = {
status,
outputFiles,
clsiServerId,
validationProblems
validationProblems,
stats,
timings
) {
if (error != null) {
return callback(error)
@ -105,7 +107,9 @@ module.exports = CompileManager = {
outputFiles,
clsiServerId,
limits,
validationProblems
validationProblems,
stats,
timings
)
}
)

View file

@ -874,6 +874,9 @@ const ProjectController = {
),
showNewBinaryFileUI: shouldDisplayFeature('new_binary_file'),
showSymbolPalette: shouldDisplayFeature('symbol_palette'),
enablePdfCaching:
Settings.enablePdfCaching &&
shouldDisplayFeature('enable_pdf_caching', user.alphaProgram),
})
timer.done()
}

View file

@ -187,6 +187,7 @@ block append meta
meta(name="ol-showNewLogsUI" data-type="boolean" content=showNewLogsUI)
meta(name="ol-logsUISubvariant" content=logsUISubvariant)
meta(name="ol-showSymbolPalette" data-type="boolean" content=showSymbolPalette)
meta(name="ol-enablePdfCaching" data-type="boolean" content=enablePdfCaching)
- var fileActionI18n = ['edited', 'renamed', 'created', 'deleted'].reduce((acc, i) => {acc[i] = translate('file_action_' + i); return acc}, {})
meta(name="ol-fileActionI18n" data-type="json" content=fileActionI18n)

View file

@ -527,6 +527,9 @@ module.exports = settings =
# Domain the client (pdfjs) should download the compiled pdf from
pdfDownloadDomain: process.env["PDF_DOWNLOAD_DOMAIN"] #"http://clsi-lb:3014"
# By default turn on feature flag, can be overridden per request.
enablePdfCaching: process.env["ENABLE_PDF_CACHING"] == "true"
# Maximum size of text documents in the real-time editing system.
max_doc_length: 2 * 1024 * 1024 # 2mb

View file

@ -32,6 +32,7 @@ import MetadataManager from './ide/metadata/MetadataManager'
import ReviewPanelManager from './ide/review-panel/ReviewPanelManager'
import OutlineManager from './features/outline/outline-manager'
import SafariScrollPatcher from './ide/SafariScrollPatcher'
import { loadServiceWorker } from './ide/pdfng/directives/serviceWorkerManager'
import './ide/cobranding/CobrandingDataService'
import './ide/settings/index'
import './ide/share/index'
@ -64,6 +65,7 @@ import './main/system-messages'
import '../../modules/modules-ide.js'
import './shared/context/controllers/root-context-controller'
import './features/editor-navigation-toolbar/controllers/editor-navigation-toolbar-controller'
import getMeta from './utils/meta'
App.controller(
'IdeController',
@ -356,6 +358,10 @@ If the project has been renamed please look in your project list for a new proje
}
)
if (getMeta('ol-enablePdfCaching')) {
loadServiceWorker()
}
export default angular.bootstrap(document.body, ['SharelatexApp'])
function __guard__(value, transform) {

View file

@ -5,6 +5,8 @@ import PreviewPane from '../../../features/preview/components/preview-pane'
import { react2angular } from 'react2angular'
import { rootContext } from '../../../shared/context/root-context'
import 'ace/ace'
import getMeta from '../../../utils/meta'
import { waitForServiceWorker } from '../../pdfng/directives/serviceWorkerManager'
const AUTO_COMPILE_MAX_WAIT = 5000
// We add a 1 second debounce to sending user changes to server if they aren't
@ -274,10 +276,15 @@ App.controller(
options = {}
}
const url = `/project/${$scope.project_id}/compile`
let setup = Promise.resolve()
const params = {}
if (options.isAutoCompileOnLoad || options.isAutoCompileOnChange) {
params.auto_compile = true
}
if (getMeta('ol-enablePdfCaching')) {
setup = waitForServiceWorker()
params.enable_pdf_caching = true
}
// if the previous run was a check, clear the error logs
if ($scope.check) {
$scope.pdf.logEntries = {}
@ -306,18 +313,20 @@ App.controller(
checkType = 'silent'
}
return $http.post(
url,
{
rootDoc_id: options.rootDocOverride_id || null,
draft: $scope.draft,
check: checkType,
// use incremental compile for all users but revert to a full
// compile if there is a server error
incrementalCompilesEnabled: !$scope.pdf.error,
_csrf: window.csrfToken,
},
{ params }
return setup.then(() =>
$http.post(
url,
{
rootDoc_id: options.rootDocOverride_id || null,
draft: $scope.draft,
check: checkType,
// use incremental compile for all users but revert to a full
// compile if there is a server error
incrementalCompilesEnabled: !$scope.pdf.error,
_csrf: window.csrfToken,
},
{ params }
)
)
}

View file

@ -0,0 +1,19 @@
let pendingWorkerSetup = Promise.resolve()
function supportsServiceWorker() {
return 'serviceWorker' in navigator
}
export function waitForServiceWorker() {
return pendingWorkerSetup
}
export function loadServiceWorker() {
if (supportsServiceWorker()) {
pendingWorkerSetup = navigator.serviceWorker
.register('/serviceWorker.js', {
scope: '/project/',
})
.catch(error => console.warn('Cannot register serviceWorker', error))
}
}

View file

@ -0,0 +1,230 @@
const COMPILE_REQUEST_MATCHER = /^\/project\/[0-9a-f]{24}\/compile$/
const MIN_CHUNK_SIZE = 128 * 1024
const PDF_FILES = new Map()
/**
* @param {FetchEvent} event
*/
function onFetch(event) {
const url = new URL(event.request.url)
const path = url.pathname
if (path.match(COMPILE_REQUEST_MATCHER)) {
return processCompileRequest(event)
}
const ctx = PDF_FILES.get(path)
if (ctx) {
return processPdfRequest(event, ctx)
}
// other request, ignore
}
function processCompileRequest(event) {
event.respondWith(
fetch(event.request).then(response => {
if (response.status !== 200) return response
return response.json().then(body => {
handleCompileResponse(body)
// The response body is consumed, serialize it again.
return new Response(JSON.stringify(body), response)
})
})
)
}
/**
*
* @param {FetchEvent} event
* @param {Object} file
* @param {string} clsiServerId
* @param {string} compileGroup
*/
function processPdfRequest(event, { file, clsiServerId, compileGroup }) {
if (!event.request.headers.has('Range') && file.size > MIN_CHUNK_SIZE) {
// skip probe request
const headers = new Headers()
headers.set('Accept-Ranges', 'bytes')
headers.set('Content-Length', file.size)
headers.set('Content-Type', 'application/pdf')
return event.respondWith(
new Response('', {
headers,
status: 200,
statusText: 'OK',
})
)
}
const rangeHeader =
event.request.headers.get('Range') || `bytes=0-${file.size}`
const [start, last] = rangeHeader
.slice('bytes='.length)
.split('-')
.map(i => parseInt(i, 10))
const end = last + 1
const chunks = getMatchingChunks(file.ranges, start, end)
const dynamicChunks = getInterleavingDynamicChunks(chunks, start, end)
// URL prefix is /project/:id/user/:id/build/... or /project/:id/build/...
// for authenticated and unauthenticated users respectively.
const perUserPrefix = file.url.slice(0, file.url.indexOf('/build/'))
const requests = chunks
.map(chunk => {
const path = `${perUserPrefix}/content/${file.contentId}/${chunk.hash}`
const url = new URL(path, event.request.url)
if (clsiServerId) {
url.searchParams.set('clsiServerId', clsiServerId)
}
if (compileGroup) {
url.searchParams.set('compileGroup', compileGroup)
}
return { chunk, url: url.toString() }
})
.concat(
dynamicChunks.map(chunk => {
const { start, end } = chunk
return {
chunk,
url: event.request.url,
init: { headers: { Range: `bytes=${start}-${end - 1}` } },
}
})
)
const size = end - start
const reAssembledBlob = new Uint8Array(size)
event.respondWith(
Promise.all(
requests.map(({ chunk, url, init }) =>
fetch(url, init)
.then(response => {
if (!(response.status === 206 || response.status === 200)) {
throw new Error(
`could not fetch ${url} ${JSON.stringify(init)}: ${
response.status
}`
)
}
return response.arrayBuffer()
})
.then(arrayBuffer => {
return { chunk, arrayBuffer }
})
)
)
.then(responses => {
responses.map(({ chunk, arrayBuffer }) => {
// overlap:
// | REQUESTED_RANGE |
// | CHUNK |
const offsetStart = Math.max(start - chunk.start, 0)
// overlap:
// | REQUESTED_RANGE |
// | CHUNK |
const offsetEnd = Math.max(chunk.end - end, 0)
if (offsetStart > 0 || offsetEnd > 0) {
// compute index positions for slice to handle case where offsetEnd=0
const chunkSize = chunk.end - chunk.start
arrayBuffer = arrayBuffer.slice(offsetStart, chunkSize - offsetEnd)
}
const insertPosition = Math.max(chunk.start - start, 0)
reAssembledBlob.set(new Uint8Array(arrayBuffer), insertPosition)
})
return new Response(reAssembledBlob, {
status: 206,
headers: {
'Accept-Ranges': 'bytes',
'Content-Length': size,
'Content-Range': `bytes ${start}-${last}/${file.size}`,
'Content-Type': 'application/pdf',
},
})
})
.catch(error => {
console.error('Could not fetch partial pdf chunks', error)
return fetch(event.request)
})
)
}
/**
* @param {Object} body
*/
function handleCompileResponse(body) {
if (!body || body.status !== 'success') return
for (const file of body.outputFiles) {
if (file.path !== 'output.pdf') continue // not the pdf used for rendering
if (file.ranges) {
const { clsiServerId, compileGroup } = body
PDF_FILES.set(file.url, {
file,
clsiServerId,
compileGroup,
})
}
break
}
}
/**
* @param {Array} chunks
* @param {number} start
* @param {number} end
* @returns {Array}
*/
function getMatchingChunks(chunks, start, end) {
const matchingChunks = []
for (const chunk of chunks) {
if (chunk.end <= start) {
// no overlap:
// | REQUESTED_RANGE |
// | CHUNK |
continue
}
if (chunk.start >= end) {
// no overlap:
// | REQUESTED_RANGE |
// | CHUNK |
break
}
matchingChunks.push(chunk)
}
return matchingChunks
}
/**
* @param {Array} chunks
* @param {number} start
* @param {number} end
* @returns {Array}
*/
function getInterleavingDynamicChunks(chunks, start, end) {
const dynamicChunks = []
for (const chunk of chunks) {
if (start < chunk.start) {
dynamicChunks.push({ start, end: chunk.start })
}
start = chunk.end
}
if (start < end) {
dynamicChunks.push({ start, end })
}
return dynamicChunks
}
// listen to all network requests
self.addEventListener('fetch', onFetch)
// complete setup ASAP
self.addEventListener('install', event => {
event.waitUntil(self.skipWaiting())
})
self.addEventListener('activate', event => {
event.waitUntil(self.clients.claim())
})

View file

@ -124,12 +124,20 @@ describe('ClsiManager', function () {
path: 'output.pdf',
type: 'pdf',
build: 1234,
// gets dropped by JSON.stringify
contentId: undefined,
ranges: undefined,
size: undefined,
},
{
url: `/project/${this.project_id}/user/${this.user_id}/build/1234/output/output.log`,
path: 'output.log',
type: 'log',
build: 1234,
// gets dropped by JSON.stringify
contentId: undefined,
ranges: undefined,
size: undefined,
},
]
this.callback
@ -138,6 +146,76 @@ describe('ClsiManager', function () {
})
})
describe('with ranges on the pdf and stats/timings details', function () {
beforeEach(function () {
this.ClsiManager._postToClsi = sinon.stub().yields(null, {
compile: {
status: 'success',
stats: { fooStat: 1 },
timings: { barTiming: 2 },
outputFiles: [
{
url: `${this.settings.apis.clsi.url}/project/${this.project_id}/user/${this.user_id}/build/1234/output/output.pdf`,
path: 'output.pdf',
type: 'pdf',
build: 1234,
contentId: '123-321',
ranges: [{ start: 1, end: 42, hash: 'foo' }],
size: 42,
},
{
url: `${this.settings.apis.clsi.url}/project/${this.project_id}/user/${this.user_id}/build/1234/output/output.log`,
path: 'output.log',
type: 'log',
build: 1234,
},
],
},
})
this.ClsiCookieManager._getServerId.yields(null, 'clsi-server-id-42')
this.ClsiManager.sendRequest(
this.project_id,
this.user_id,
{ compileGroup: 'standard' },
this.callback
)
})
it('should emit the caching details and stats/timings', function () {
const outputFiles = [
{
url: `/project/${this.project_id}/user/${this.user_id}/build/1234/output/output.pdf`,
path: 'output.pdf',
type: 'pdf',
build: 1234,
contentId: '123-321',
ranges: [{ start: 1, end: 42, hash: 'foo' }],
size: 42,
},
{
url: `/project/${this.project_id}/user/${this.user_id}/build/1234/output/output.log`,
path: 'output.log',
type: 'log',
build: 1234,
// gets dropped by JSON.stringify
contentId: undefined,
ranges: undefined,
size: undefined,
},
]
const validationError = undefined
expect(this.callback).to.have.been.calledWith(
null,
'success',
outputFiles,
'clsi-server-id-42',
validationError,
{ fooStat: 1 },
{ barTiming: 2 }
)
})
})
describe('with a failed compile', function () {
beforeEach(function () {
this.ClsiManager._postToClsi = sinon.stub().callsArgWith(4, null, {
@ -317,12 +395,20 @@ describe('ClsiManager', function () {
path: 'output.pdf',
type: 'pdf',
build: 1234,
// gets dropped by JSON.stringify
contentId: undefined,
ranges: undefined,
size: undefined,
},
{
url: `/project/${this.submission_id}/build/1234/output/output.log`,
path: 'output.log',
type: 'log',
build: 1234,
// gets dropped by JSON.stringify
contentId: undefined,
ranges: undefined,
size: undefined,
},
]
this.callback
@ -528,6 +614,7 @@ describe('ClsiManager', function () {
syncType: undefined, // "full"
syncState: undefined,
compileGroup: 'standard',
enablePdfCaching: false,
}, // "01234567890abcdef"
rootResourcePath: 'main.tex',
resources: [
@ -623,6 +710,7 @@ describe('ClsiManager', function () {
syncType: 'incremental',
syncState: '01234567890abcdef',
compileGroup: 'priority',
enablePdfCaching: false,
},
rootResourcePath: 'main.tex',
resources: [

View file

@ -105,7 +105,10 @@ describe('CompileController', function () {
it('should do the compile without the auto compile flag', function () {
return this.CompileManager.compile
.calledWith(this.project_id, this.user_id, { isAutoCompile: false })
.calledWith(this.project_id, this.user_id, {
isAutoCompile: false,
enablePdfCaching: false,
})
.should.equal(true)
})
@ -132,7 +135,10 @@ describe('CompileController', function () {
it('should do the compile with the auto compile flag', function () {
return this.CompileManager.compile
.calledWith(this.project_id, this.user_id, { isAutoCompile: true })
.calledWith(this.project_id, this.user_id, {
isAutoCompile: true,
enablePdfCaching: false,
})
.should.equal(true)
})
})
@ -147,6 +153,7 @@ describe('CompileController', function () {
return this.CompileManager.compile
.calledWith(this.project_id, this.user_id, {
isAutoCompile: false,
enablePdfCaching: false,
draft: true,
})
.should.equal(true)

View file

@ -11,6 +11,7 @@ const MODULES_PATH = path.join(__dirname, '/modules')
// Generate a hash of entry points, including modules
const entryPoints = {
serviceWorker: './frontend/js/serviceWorker.js',
main: './frontend/js/main.js',
ide: './frontend/js/ide.js',
style: './frontend/stylesheets/style.less',
@ -92,6 +93,17 @@ module.exports = {
},
],
},
{
test: /serviceWorker.js$/,
use: [
{
loader: 'worker-loader',
options: {
name: 'serviceWorker.js',
},
},
],
},
{
// Pass Less files through less-loader/css-loader/mini-css-extract-
// plugin (note: run in reverse order)