mirror of
https://github.com/overleaf/overleaf.git
synced 2025-01-06 14:31:33 +00:00
56a3b0dcde
[clsi] pdf-caching: move cpu intensive work onto a new event loop GitOrigin-RevId: 4cb5cd4528fa1c5df6a8e91f9caa38cb64d94463
118 lines
3.4 KiB
JavaScript
118 lines
3.4 KiB
JavaScript
const logger = require('logger-sharelatex')
|
|
const Metrics = require('./Metrics')
|
|
const os = require('os')
|
|
|
|
let CACHED_LOAD = {
|
|
expires: -1,
|
|
load: [0, 0, 0],
|
|
}
|
|
function getSystemLoad() {
|
|
if (CACHED_LOAD.expires < Date.now()) {
|
|
CACHED_LOAD = {
|
|
expires: Date.now() + 10 * 1000,
|
|
load: os.loadavg(),
|
|
}
|
|
}
|
|
return CACHED_LOAD.load
|
|
}
|
|
|
|
const ONE_MB = 1024 * 1024
|
|
|
|
function emitPdfStats(stats, timings) {
|
|
if (stats['pdf-caching-timed-out']) {
|
|
Metrics.inc('pdf-caching-timed-out')
|
|
}
|
|
if (stats['pdf-caching-queue-limit-reached']) {
|
|
Metrics.inc('pdf-caching-queue-limit-reached')
|
|
}
|
|
if (timings['compute-pdf-caching']) {
|
|
emitPdfCachingStats(stats, timings)
|
|
} else {
|
|
// How much bandwidth will the pdf incur when downloaded in full?
|
|
Metrics.summary('pdf-bandwidth', stats['pdf-size'])
|
|
}
|
|
}
|
|
|
|
function emitPdfCachingStats(stats, timings) {
|
|
if (!stats['pdf-size']) return // double check
|
|
|
|
// How much extra time did we spent in PDF.js?
|
|
Metrics.timing('compute-pdf-caching', timings['compute-pdf-caching'])
|
|
|
|
// How large is the overhead of hashing up-front?
|
|
const fraction =
|
|
timings.compileE2E - timings['compute-pdf-caching'] !== 0
|
|
? timings.compileE2E /
|
|
(timings.compileE2E - timings['compute-pdf-caching'])
|
|
: 1
|
|
if (fraction > 1.5 && timings.compileE2E > 10 * 1000) {
|
|
logger.warn(
|
|
{
|
|
stats,
|
|
timings,
|
|
load: getSystemLoad(),
|
|
},
|
|
'slow pdf caching'
|
|
)
|
|
}
|
|
Metrics.summary('overhead-compute-pdf-ranges', fraction * 100 - 100)
|
|
|
|
// How does the hashing scale to pdf size in MB?
|
|
Metrics.timing(
|
|
'compute-pdf-caching-relative-to-pdf-size',
|
|
timings['compute-pdf-caching'] / (stats['pdf-size'] / ONE_MB)
|
|
)
|
|
if (stats['pdf-caching-total-ranges-size']) {
|
|
// How does the hashing scale to total ranges size in MB?
|
|
Metrics.timing(
|
|
'compute-pdf-caching-relative-to-total-ranges-size',
|
|
timings['compute-pdf-caching'] /
|
|
(stats['pdf-caching-total-ranges-size'] / ONE_MB)
|
|
)
|
|
// How fast is the hashing per range on average?
|
|
Metrics.timing(
|
|
'compute-pdf-caching-relative-to-ranges-count',
|
|
timings['compute-pdf-caching'] / stats['pdf-caching-n-ranges']
|
|
)
|
|
|
|
// How many ranges are new?
|
|
Metrics.summary(
|
|
'new-pdf-ranges-relative-to-total-ranges',
|
|
(stats['pdf-caching-n-new-ranges'] / stats['pdf-caching-n-ranges']) * 100
|
|
)
|
|
}
|
|
|
|
// How much content is cacheable?
|
|
Metrics.summary(
|
|
'cacheable-ranges-to-pdf-size',
|
|
(stats['pdf-caching-total-ranges-size'] / stats['pdf-size']) * 100
|
|
)
|
|
|
|
const sizeWhenDownloadedInFull =
|
|
// All of the pdf
|
|
stats['pdf-size'] -
|
|
// These ranges are potentially cached.
|
|
stats['pdf-caching-total-ranges-size'] +
|
|
// These ranges are not cached.
|
|
stats['pdf-caching-new-ranges-size']
|
|
|
|
// How much bandwidth can we save when downloading the pdf in full?
|
|
Metrics.summary(
|
|
'pdf-bandwidth-savings',
|
|
100 - (sizeWhenDownloadedInFull / stats['pdf-size']) * 100
|
|
)
|
|
|
|
// How much bandwidth will the pdf incur when downloaded in full?
|
|
Metrics.summary('pdf-bandwidth', sizeWhenDownloadedInFull)
|
|
|
|
// How much space do the ranges use?
|
|
// This will accumulate the ranges size over time, skipping already written ranges.
|
|
Metrics.summary(
|
|
'pdf-ranges-disk-size',
|
|
stats['pdf-caching-new-ranges-size'] - stats['pdf-caching-reclaimed-space']
|
|
)
|
|
}
|
|
|
|
module.exports = {
|
|
emitPdfStats,
|
|
}
|