overleaf/services/clsi/app/js/ContentCacheMetrics.js
Jakob Ackermann 56a3b0dcde Merge pull request #4819 from overleaf/jpa-pdf-caching-new-event-loop
[clsi] pdf-caching: move cpu intensive work onto a new event loop

GitOrigin-RevId: 4cb5cd4528fa1c5df6a8e91f9caa38cb64d94463
2021-08-25 08:03:38 +00:00

118 lines
3.4 KiB
JavaScript

const logger = require('logger-sharelatex')
const Metrics = require('./Metrics')
const os = require('os')
let CACHED_LOAD = {
expires: -1,
load: [0, 0, 0],
}
function getSystemLoad() {
if (CACHED_LOAD.expires < Date.now()) {
CACHED_LOAD = {
expires: Date.now() + 10 * 1000,
load: os.loadavg(),
}
}
return CACHED_LOAD.load
}
const ONE_MB = 1024 * 1024
function emitPdfStats(stats, timings) {
if (stats['pdf-caching-timed-out']) {
Metrics.inc('pdf-caching-timed-out')
}
if (stats['pdf-caching-queue-limit-reached']) {
Metrics.inc('pdf-caching-queue-limit-reached')
}
if (timings['compute-pdf-caching']) {
emitPdfCachingStats(stats, timings)
} else {
// How much bandwidth will the pdf incur when downloaded in full?
Metrics.summary('pdf-bandwidth', stats['pdf-size'])
}
}
function emitPdfCachingStats(stats, timings) {
if (!stats['pdf-size']) return // double check
// How much extra time did we spent in PDF.js?
Metrics.timing('compute-pdf-caching', timings['compute-pdf-caching'])
// How large is the overhead of hashing up-front?
const fraction =
timings.compileE2E - timings['compute-pdf-caching'] !== 0
? timings.compileE2E /
(timings.compileE2E - timings['compute-pdf-caching'])
: 1
if (fraction > 1.5 && timings.compileE2E > 10 * 1000) {
logger.warn(
{
stats,
timings,
load: getSystemLoad(),
},
'slow pdf caching'
)
}
Metrics.summary('overhead-compute-pdf-ranges', fraction * 100 - 100)
// How does the hashing scale to pdf size in MB?
Metrics.timing(
'compute-pdf-caching-relative-to-pdf-size',
timings['compute-pdf-caching'] / (stats['pdf-size'] / ONE_MB)
)
if (stats['pdf-caching-total-ranges-size']) {
// How does the hashing scale to total ranges size in MB?
Metrics.timing(
'compute-pdf-caching-relative-to-total-ranges-size',
timings['compute-pdf-caching'] /
(stats['pdf-caching-total-ranges-size'] / ONE_MB)
)
// How fast is the hashing per range on average?
Metrics.timing(
'compute-pdf-caching-relative-to-ranges-count',
timings['compute-pdf-caching'] / stats['pdf-caching-n-ranges']
)
// How many ranges are new?
Metrics.summary(
'new-pdf-ranges-relative-to-total-ranges',
(stats['pdf-caching-n-new-ranges'] / stats['pdf-caching-n-ranges']) * 100
)
}
// How much content is cacheable?
Metrics.summary(
'cacheable-ranges-to-pdf-size',
(stats['pdf-caching-total-ranges-size'] / stats['pdf-size']) * 100
)
const sizeWhenDownloadedInFull =
// All of the pdf
stats['pdf-size'] -
// These ranges are potentially cached.
stats['pdf-caching-total-ranges-size'] +
// These ranges are not cached.
stats['pdf-caching-new-ranges-size']
// How much bandwidth can we save when downloading the pdf in full?
Metrics.summary(
'pdf-bandwidth-savings',
100 - (sizeWhenDownloadedInFull / stats['pdf-size']) * 100
)
// How much bandwidth will the pdf incur when downloaded in full?
Metrics.summary('pdf-bandwidth', sizeWhenDownloadedInFull)
// How much space do the ranges use?
// This will accumulate the ranges size over time, skipping already written ranges.
Metrics.summary(
'pdf-ranges-disk-size',
stats['pdf-caching-new-ranges-size'] - stats['pdf-caching-reclaimed-space']
)
}
module.exports = {
emitPdfStats,
}