Merge pull request #10139 from overleaf/jpa-split-test-min-chunk-size

[misc] add split test for a per request pdfCachingMinChunkSize

GitOrigin-RevId: 6a8a3c6267501789f2047a67b03db6ac6df427c3
This commit is contained in:
Jakob Ackermann 2022-10-25 10:24:11 +01:00 committed by Copybot
parent abaecac268
commit 956cacaef7
10 changed files with 148 additions and 66 deletions

View file

@ -49,11 +49,18 @@ if (Settings.pdfCachingEnableWorkerPool && workerpool.isMainThread) {
*
* @param {String} contentDir path to directory where content hash files are cached
* @param {String} filePath the pdf file to scan for streams
* @param {number} size the pdf size
* @param {number} pdfSize the pdf size
* @param {number} pdfCachingMinChunkSize per request threshold
* @param {number} compileTime
*/
async function update(contentDir, filePath, size, compileTime) {
if (size < Settings.pdfCachingMinChunkSize) {
async function update({
contentDir,
filePath,
pdfSize,
pdfCachingMinChunkSize,
compileTime,
}) {
if (pdfSize < pdfCachingMinChunkSize) {
return {
contentRanges: [],
newContentRanges: [],
@ -62,9 +69,21 @@ async function update(contentDir, filePath, size, compileTime) {
}
}
if (Settings.pdfCachingEnableWorkerPool) {
return await updateOtherEventLoop(contentDir, filePath, size, compileTime)
return await updateOtherEventLoop({
contentDir,
filePath,
pdfSize,
pdfCachingMinChunkSize,
compileTime,
})
} else {
return await updateSameEventLoop(contentDir, filePath, size, compileTime)
return await updateSameEventLoop({
contentDir,
filePath,
pdfSize,
pdfCachingMinChunkSize,
compileTime,
})
}
}
@ -72,19 +91,29 @@ async function update(contentDir, filePath, size, compileTime) {
*
* @param {String} contentDir path to directory where content hash files are cached
* @param {String} filePath the pdf file to scan for streams
* @param {number} size the pdf size
* @param {number} pdfSize the pdf size
* @param {number} pdfCachingMinChunkSize per request threshold
* @param {number} compileTime
*/
async function updateOtherEventLoop(contentDir, filePath, size, compileTime) {
async function updateOtherEventLoop({
contentDir,
filePath,
pdfSize,
pdfCachingMinChunkSize,
compileTime,
}) {
const workerLatencyInMs = 20
// Prefer getting the timeout error from the worker vs timing out the worker.
const timeout = getMaxOverhead(compileTime) + workerLatencyInMs
try {
return await WORKER_POOL.exec('updateSameEventLoop', [
{
contentDir,
filePath,
size,
pdfSize,
pdfCachingMinChunkSize,
compileTime,
},
]).timeout(timeout)
} catch (e) {
if (e instanceof workerpool.Promise.TimeoutError) {
@ -104,10 +133,17 @@ async function updateOtherEventLoop(contentDir, filePath, size, compileTime) {
*
* @param {String} contentDir path to directory where content hash files are cached
* @param {String} filePath the pdf file to scan for streams
* @param {number} size the pdf size
* @param {number} pdfSize the pdf size
* @param {number} pdfCachingMinChunkSize per request threshold
* @param {number} compileTime
*/
async function updateSameEventLoop(contentDir, filePath, size, compileTime) {
async function updateSameEventLoop({
contentDir,
filePath,
pdfSize,
pdfCachingMinChunkSize,
compileTime,
}) {
const checkDeadline = getDeadlineChecker(compileTime)
const contentRanges = []
const newContentRanges = []
@ -119,8 +155,7 @@ async function updateSameEventLoop(contentDir, filePath, size, compileTime) {
const { xRefEntries, startXRefTable } = await parseXrefTable(
filePath,
size,
checkDeadline
pdfSize
)
xRefEntries.sort((a, b) => {
@ -147,7 +182,7 @@ async function updateSameEventLoop(contentDir, filePath, size, compileTime) {
}
const size = object.endOffset - object.offset
object.size = size
if (size < Settings.pdfCachingMinChunkSize) {
if (size < pdfCachingMinChunkSize) {
continue
}
uncompressedObjects.push({ object, idx: uncompressedObjects.length })

View file

@ -396,10 +396,13 @@ module.exports = OutputCacheManager = {
request.metricsOpts
)
ContentCacheManager.update(
{
contentDir,
outputFilePath,
filePath: outputFilePath,
pdfSize,
timings.compile,
pdfCachingMinChunkSize: request.pdfCachingMinChunkSize,
compileTime: timings.compile,
},
function (err, result) {
if (err && err instanceof NoXrefTableError) {
return callback(null, err.message)

View file

@ -41,6 +41,14 @@ function parse(body, callback) {
type: 'boolean',
}
)
response.pdfCachingMinChunkSize = _parseAttribute(
'pdfCachingMinChunkSize',
compile.options.pdfCachingMinChunkSize,
{
default: settings.pdfCachingMinChunkSize,
type: 'number',
}
)
response.timeout = _parseAttribute('timeout', compile.options.timeout, {
default: MAX_TIMEOUT,
type: 'number',

View file

@ -6,7 +6,7 @@ const MAX_XREF_FILE_SIZE = 1024 * 1024
/** Parse qpdf --show-xref output to get a table of xref entries
*
* @param {string} filePath
* @param {integer} pdfFileSize
* @param {number} pdfFileSize
* @returns
*/
async function parseXrefTable(filePath, pdfFileSize) {

View file

@ -12,12 +12,14 @@ describe('ContentCacheManager', function () {
ContentCacheManager = require(MODULE_PATH)
})
let contentRanges, newContentRanges, reclaimed
async function run(filePath, size) {
const result = await ContentCacheManager.promises.update(
async function run(filePath, pdfSize, pdfCachingMinChunkSize) {
const result = await ContentCacheManager.promises.update({
contentDir,
filePath,
size
)
pdfSize,
pdfCachingMinChunkSize,
compileTime: 1337,
})
let newlyReclaimed
;({
contentRanges,
@ -78,16 +80,13 @@ describe('ContentCacheManager', function () {
START_2 = MINIMAL.indexOf(RANGE_2)
END_2 = START_2 + RANGE_2.byteLength
})
async function runWithMinimal() {
await run(pdfPath, MINIMAL_SIZE)
async function runWithMinimal(pdfCachingMinChunkSize) {
await run(pdfPath, MINIMAL_SIZE, pdfCachingMinChunkSize)
}
describe('with two ranges qualifying', function () {
before(function () {
Settings.pdfCachingMinChunkSize = 500
})
before(async function () {
await runWithMinimal()
await runWithMinimal(500)
})
it('should produce two ranges', function () {
expect(contentRanges).to.have.length(2)
@ -134,12 +133,8 @@ describe('ContentCacheManager', function () {
})
describe('when re-running with one range too small', function () {
before(function () {
Settings.pdfCachingMinChunkSize = 1024
})
before(async function () {
await runWithMinimal()
await runWithMinimal(1024)
})
it('should produce one range', function () {
@ -183,7 +178,7 @@ describe('ContentCacheManager', function () {
describe('when re-running 5 more times', function () {
for (let i = 0; i < 5; i++) {
before(async function () {
await runWithMinimal()
await runWithMinimal(1024)
})
}

View file

@ -896,6 +896,7 @@ const ClsiManager = {
compileGroup: options.compileGroup,
enablePdfCaching:
(Settings.enablePdfCaching && options.enablePdfCaching) || false,
pdfCachingMinChunkSize: options.pdfCachingMinChunkSize,
flags,
metricsMethod: options.compileGroup,
},

View file

@ -16,6 +16,7 @@ const ClsiCookieManager = require('./ClsiCookieManager')(
const Path = require('path')
const AnalyticsManager = require('../Analytics/AnalyticsManager')
const SplitTestHandler = require('../SplitTests/SplitTestHandler')
const { callbackify } = require('../../util/promises')
const COMPILE_TIMEOUT_MS = 10 * 60 * 1000
@ -27,10 +28,20 @@ function getImageNameForProject(projectId, callback) {
})
}
function getEnablePdfCaching(req, res, cb) {
async function getPdfCachingMinChunkSize(req, res) {
const { variant } = await SplitTestHandler.promises.getAssignment(
req,
res,
'pdf-caching-min-chunk-size'
)
if (variant === 'default') return 1_000_000
return parseInt(variant, 10)
}
const getPdfCachingOptions = callbackify(async function (req, res) {
if (!req.query.enable_pdf_caching) {
// The frontend does not want to do pdf caching.
return cb(null, false)
return { enablePdfCaching: false }
}
// Use the query flags from the editor request for overriding the split test.
@ -44,16 +55,22 @@ function getEnablePdfCaching(req, res, cb) {
// Double check with the latest split test assignment.
// We may need to turn off the feature on a short notice, without requiring
// all users to reload their editor page to disable the feature.
SplitTestHandler.getAssignment(
const { variant } = await SplitTestHandler.promises.getAssignment(
editorReq,
res,
'pdf-caching-mode',
(err, assignment) => {
if (err) return cb(null, false)
cb(null, assignment?.variant === 'enabled')
}
'pdf-caching-mode'
)
const enablePdfCaching = variant === 'enabled'
if (!enablePdfCaching) {
// Skip the lookup of the chunk size when caching is not enabled.
return { enablePdfCaching: false }
}
const pdfCachingMinChunkSize = await getPdfCachingMinChunkSize(editorReq, res)
return {
enablePdfCaching,
pdfCachingMinChunkSize,
}
})
module.exports = CompileController = {
compile(req, res, next) {
@ -91,9 +108,13 @@ module.exports = CompileController = {
options.incrementalCompilesEnabled = true
}
getEnablePdfCaching(req, res, (err, enablePdfCaching) => {
getPdfCachingOptions(req, res, (err, pdfCachingOptions) => {
if (err) return next(err)
const { enablePdfCaching, pdfCachingMinChunkSize } = pdfCachingOptions
options.enablePdfCaching = enablePdfCaching
if (enablePdfCaching) {
options.pdfCachingMinChunkSize = pdfCachingMinChunkSize
}
CompileManager.compile(
projectId,
@ -149,6 +170,7 @@ module.exports = CompileController = {
stats,
timings,
pdfDownloadDomain,
pdfCachingMinChunkSize,
})
}
)

View file

@ -18,7 +18,7 @@ export function getPdfCachingMetrics() {
}
export function trackPdfDownload(response, compileTimeClientE2E, t0) {
const { timings } = response
const { timings, pdfCachingMinChunkSize } = response
const deliveryLatencies = {
compileTimeClientE2E,
@ -41,6 +41,7 @@ export function trackPdfDownload(response, compileTimeClientE2E, t0) {
if (trackPdfDownloadEnabled) {
// Submit latency along with compile context.
submitCompileMetrics({
pdfCachingMinChunkSize,
...deliveryLatencies,
...pdfCachingMetrics,
})

View file

@ -50,6 +50,7 @@ describe('ClsiManager', function () {
url: 'https://clsipremium.example.com',
},
},
enablePdfCaching: true,
}),
'../../models/Project': {
Project: this.Project,
@ -642,6 +643,7 @@ describe('ClsiManager', function () {
syncState: undefined,
compileGroup: 'standard',
enablePdfCaching: false,
pdfCachingMinChunkSize: undefined,
flags: undefined,
metricsMethod: 'standard',
stopOnFirstError: false,
@ -687,6 +689,8 @@ describe('ClsiManager', function () {
timeout: 100,
incrementalCompilesEnabled: true,
compileGroup: 'priority',
enablePdfCaching: true,
pdfCachingMinChunkSize: 1337,
},
(err, request) => {
if (err != null) {
@ -737,7 +741,8 @@ describe('ClsiManager', function () {
syncType: 'incremental',
syncState: '01234567890abcdef',
compileGroup: 'priority',
enablePdfCaching: false,
enablePdfCaching: true,
pdfCachingMinChunkSize: 1337,
flags: undefined,
metricsMethod: 'priority',
stopOnFirstError: false,

View file

@ -1,3 +1,4 @@
/* eslint-disable mocha/handle-done-callback */
const sinon = require('sinon')
const { expect } = require('chai')
const modulePath = '../../../../app/src/Features/Compile/CompileController.js'
@ -97,7 +98,8 @@ describe('CompileController', function () {
})
describe('when clsi does not emit zone prefix', function () {
beforeEach(function () {
beforeEach(function (done) {
this.res.callback = done
this.CompileController.compile(this.req, this.res, this.next)
})
@ -120,7 +122,8 @@ describe('CompileController', function () {
})
describe('when clsi emits a zone prefix', function () {
beforeEach(function () {
beforeEach(function (done) {
this.res.callback = done
this.CompileManager.compile = sinon.stub().callsArgWith(
3,
null,
@ -162,7 +165,8 @@ describe('CompileController', function () {
})
describe('when not an auto compile', function () {
beforeEach(function () {
beforeEach(function (done) {
this.res.callback = done
this.CompileController.compile(this.req, this.res, this.next)
})
@ -173,14 +177,16 @@ describe('CompileController', function () {
})
it('should do the compile without the auto compile flag', function () {
this.CompileManager.compile
.calledWith(this.projectId, this.user_id, {
this.CompileManager.compile.should.have.been.calledWith(
this.projectId,
this.user_id,
{
isAutoCompile: false,
enablePdfCaching: false,
fileLineErrors: false,
stopOnFirstError: false,
})
.should.equal(true)
}
)
})
it('should set the content-type of the response to application/json', function () {
@ -199,39 +205,45 @@ describe('CompileController', function () {
})
describe('when an auto compile', function () {
beforeEach(function () {
beforeEach(function (done) {
this.res.callback = done
this.req.query = { auto_compile: 'true' }
this.CompileController.compile(this.req, this.res, this.next)
})
it('should do the compile with the auto compile flag', function () {
this.CompileManager.compile
.calledWith(this.projectId, this.user_id, {
this.CompileManager.compile.should.have.been.calledWith(
this.projectId,
this.user_id,
{
isAutoCompile: true,
enablePdfCaching: false,
fileLineErrors: false,
stopOnFirstError: false,
})
.should.equal(true)
}
)
})
})
describe('with the draft attribute', function () {
beforeEach(function () {
beforeEach(function (done) {
this.res.callback = done
this.req.body = { draft: true }
this.CompileController.compile(this.req, this.res, this.next)
})
it('should do the compile without the draft compile flag', function () {
this.CompileManager.compile
.calledWith(this.projectId, this.user_id, {
this.CompileManager.compile.should.have.been.calledWith(
this.projectId,
this.user_id,
{
isAutoCompile: false,
enablePdfCaching: false,
draft: true,
fileLineErrors: false,
stopOnFirstError: false,
})
.should.equal(true)
}
)
})
})
})