2020-02-19 11:14:28 +00:00
|
|
|
// TODO: This file was created by bulk-decaffeinate.
|
|
|
|
// Fix any style issues and re-enable lint.
|
2020-02-19 11:14:14 +00:00
|
|
|
/*
|
|
|
|
* decaffeinate suggestions:
|
|
|
|
* DS101: Remove unnecessary use of Array.from
|
|
|
|
* DS102: Remove unnecessary code created because of implicit returns
|
|
|
|
* DS103: Rewrite code to no longer use __guard__
|
|
|
|
* DS104: Avoid inline assignments
|
|
|
|
* DS204: Change includes calls to have a more natural evaluation order
|
|
|
|
* DS207: Consider shorter variations of null checks
|
|
|
|
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
|
|
|
*/
|
2020-02-19 11:14:37 +00:00
|
|
|
let OutputCacheManager
|
2021-11-15 12:07:17 +00:00
|
|
|
const { callbackify, promisify } = require('util')
|
2020-02-19 11:14:37 +00:00
|
|
|
const async = require('async')
|
|
|
|
const fs = require('fs')
|
|
|
|
const fse = require('fs-extra')
|
|
|
|
const Path = require('path')
|
2022-03-01 15:09:36 +00:00
|
|
|
const logger = require('@overleaf/logger')
|
2020-06-15 08:52:21 +00:00
|
|
|
const _ = require('lodash')
|
2021-07-12 16:47:21 +00:00
|
|
|
const Settings = require('@overleaf/settings')
|
2020-02-19 11:14:37 +00:00
|
|
|
const crypto = require('crypto')
|
2021-05-13 13:07:54 +00:00
|
|
|
const Metrics = require('./Metrics')
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
const OutputFileOptimiser = require('./OutputFileOptimiser')
|
2021-05-13 13:07:54 +00:00
|
|
|
const ContentCacheManager = require('./ContentCacheManager')
|
2022-08-02 09:09:22 +00:00
|
|
|
const {
|
|
|
|
QueueLimitReachedError,
|
|
|
|
TimedOutError,
|
|
|
|
NoXrefTableError,
|
|
|
|
} = require('./Errors')
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2021-11-15 12:07:17 +00:00
|
|
|
const OLDEST_BUILD_DIR = new Map()
|
|
|
|
const PENDING_PROJECT_ACTIONS = new Map()
|
|
|
|
|
|
|
|
function init() {
|
|
|
|
doInit().catch(err => {
|
|
|
|
logger.fatal({ err }, 'low level error setting up cleanup of output dir')
|
|
|
|
// consider shutting down?
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
async function doInit() {
|
|
|
|
await fillCache()
|
|
|
|
const oldestTimestamp = await runBulkCleanup()
|
|
|
|
scheduleBulkCleanup(oldestTimestamp)
|
|
|
|
}
|
|
|
|
|
|
|
|
function scheduleBulkCleanup(oldestTimestamp) {
|
|
|
|
const delay =
|
|
|
|
Math.max(OutputCacheManager.CACHE_AGE + oldestTimestamp - Date.now(), 0) +
|
|
|
|
60 * 1000
|
|
|
|
setTimeout(async function () {
|
|
|
|
const oldestTimestamp = await runBulkCleanup()
|
|
|
|
scheduleBulkCleanup(oldestTimestamp)
|
|
|
|
}, delay)
|
|
|
|
}
|
|
|
|
|
|
|
|
async function fillCache() {
|
|
|
|
const handle = await fs.promises.opendir(Settings.path.outputDir)
|
|
|
|
try {
|
|
|
|
for await (const { name: projectIdAndUserId } of handle) {
|
|
|
|
OLDEST_BUILD_DIR.set(
|
|
|
|
Path.join(Settings.path.outputDir, projectIdAndUserId),
|
|
|
|
// Queue them for cleanup in the next hour.
|
|
|
|
Date.now() - Math.random() * OutputCacheManager.CACHE_AGE
|
|
|
|
)
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
try {
|
|
|
|
await handle.close()
|
|
|
|
} catch (e) {}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async function runBulkCleanup() {
|
|
|
|
const cleanupThreshold = Date.now() - OutputCacheManager.CACHE_AGE
|
|
|
|
let oldestTimestamp = Date.now()
|
|
|
|
for (const [dir, timeStamp] of OLDEST_BUILD_DIR.entries()) {
|
|
|
|
if (timeStamp < cleanupThreshold) {
|
|
|
|
await cleanupDirectory(dir, { limit: OutputCacheManager.CACHE_LIMIT })
|
|
|
|
} else if (timeStamp < oldestTimestamp) {
|
|
|
|
oldestTimestamp = timeStamp
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return oldestTimestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
async function cleanupDirectory(dir, options) {
|
|
|
|
return queueDirOperation(dir, async () => {
|
|
|
|
try {
|
|
|
|
await OutputCacheManager.promises.expireOutputFiles(dir, options)
|
|
|
|
} catch (err) {
|
|
|
|
logger.err({ dir, err }, 'cleanup of output directory failed')
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
async function queueDirOperation(dir, fn) {
|
|
|
|
const pending = PENDING_PROJECT_ACTIONS.get(dir) || Promise.resolve()
|
|
|
|
const p = pending.then(fn, fn).finally(() => {
|
|
|
|
if (PENDING_PROJECT_ACTIONS.get(dir) === p) {
|
|
|
|
PENDING_PROJECT_ACTIONS.delete(dir)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
PENDING_PROJECT_ACTIONS.set(dir, p)
|
|
|
|
return p
|
|
|
|
}
|
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
module.exports = OutputCacheManager = {
|
2021-05-13 13:07:54 +00:00
|
|
|
CONTENT_SUBDIR: 'content',
|
2020-12-15 14:59:05 +00:00
|
|
|
CACHE_SUBDIR: 'generated-files',
|
|
|
|
ARCHIVE_SUBDIR: 'archived-logs',
|
2020-02-19 11:14:37 +00:00
|
|
|
// build id is HEXDATE-HEXRANDOM from Date.now()and RandomBytes
|
|
|
|
// for backwards compatibility, make the randombytes part optional
|
|
|
|
BUILD_REGEX: /^[0-9a-f]+(-[0-9a-f]+)?$/,
|
2021-05-13 13:56:15 +00:00
|
|
|
CONTENT_REGEX: /^[0-9a-f]+(-[0-9a-f]+)?$/,
|
2020-02-19 11:14:37 +00:00
|
|
|
CACHE_LIMIT: 2, // maximum number of cache directories
|
|
|
|
CACHE_AGE: 60 * 60 * 1000, // up to one hour old
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2021-11-15 12:07:17 +00:00
|
|
|
init,
|
|
|
|
queueDirOperation: callbackify(queueDirOperation),
|
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
path(buildId, file) {
|
|
|
|
// used by static server, given build id return '.cache/clsi/buildId'
|
|
|
|
if (buildId.match(OutputCacheManager.BUILD_REGEX)) {
|
|
|
|
return Path.join(OutputCacheManager.CACHE_SUBDIR, buildId, file)
|
|
|
|
} else {
|
|
|
|
// for invalid build id, return top level
|
|
|
|
return file
|
|
|
|
}
|
|
|
|
},
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
generateBuildId(callback) {
|
|
|
|
// generate a secure build id from Date.now() and 8 random bytes in hex
|
|
|
|
if (callback == null) {
|
2021-10-27 09:49:18 +00:00
|
|
|
callback = function () {}
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
2020-08-10 16:01:11 +00:00
|
|
|
return crypto.randomBytes(8, function (err, buf) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if (err != null) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
const random = buf.toString('hex')
|
|
|
|
const date = Date.now().toString(16)
|
|
|
|
return callback(err, `${date}-${random}`)
|
|
|
|
})
|
|
|
|
},
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2021-05-13 13:07:54 +00:00
|
|
|
saveOutputFiles(
|
|
|
|
{ request, stats, timings },
|
|
|
|
outputFiles,
|
|
|
|
compileDir,
|
|
|
|
outputDir,
|
|
|
|
callback
|
|
|
|
) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if (callback == null) {
|
2021-10-27 09:49:18 +00:00
|
|
|
callback = function () {}
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
2020-08-10 16:01:11 +00:00
|
|
|
return OutputCacheManager.generateBuildId(function (err, buildId) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if (err != null) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
2021-11-15 12:07:17 +00:00
|
|
|
if (!OLDEST_BUILD_DIR.has(outputDir)) {
|
|
|
|
// Register for cleanup
|
|
|
|
OLDEST_BUILD_DIR.set(outputDir, Date.now())
|
|
|
|
}
|
|
|
|
|
|
|
|
OutputCacheManager.queueDirOperation(
|
2020-12-15 14:59:05 +00:00
|
|
|
outputDir,
|
2021-11-15 12:07:17 +00:00
|
|
|
() =>
|
|
|
|
OutputCacheManager.promises.saveOutputFilesInBuildDir(
|
|
|
|
outputFiles,
|
|
|
|
compileDir,
|
|
|
|
outputDir,
|
|
|
|
buildId
|
|
|
|
),
|
2021-05-13 13:07:54 +00:00
|
|
|
function (err, result) {
|
|
|
|
if (err != null) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
OutputCacheManager.collectOutputPdfSize(
|
|
|
|
result,
|
|
|
|
outputDir,
|
|
|
|
stats,
|
2022-07-20 08:33:34 +00:00
|
|
|
(err, outputFiles) => {
|
|
|
|
if (err) return callback(err, outputFiles)
|
2021-05-13 13:07:54 +00:00
|
|
|
|
2022-07-20 08:33:34 +00:00
|
|
|
const enablePdfCaching = request.enablePdfCaching
|
|
|
|
const enablePdfCachingDark =
|
|
|
|
Settings.enablePdfCachingDark && !request.enablePdfCaching
|
|
|
|
if (
|
|
|
|
!Settings.enablePdfCaching ||
|
|
|
|
(!enablePdfCaching && !enablePdfCachingDark)
|
|
|
|
) {
|
|
|
|
return callback(null, outputFiles)
|
2021-05-13 13:07:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
OutputCacheManager.saveStreamsInContentDir(
|
2022-07-20 08:33:34 +00:00
|
|
|
{ request, stats, timings, enablePdfCachingDark },
|
|
|
|
outputFiles,
|
2021-05-13 13:07:54 +00:00
|
|
|
compileDir,
|
|
|
|
outputDir,
|
2022-07-20 08:33:44 +00:00
|
|
|
(err, status) => {
|
|
|
|
Metrics.inc('pdf-caching-status', 1, {
|
|
|
|
status,
|
|
|
|
...request.metricsOpts,
|
|
|
|
})
|
2022-07-20 08:33:34 +00:00
|
|
|
if (err) {
|
|
|
|
logger.warn(
|
|
|
|
{ err, outputDir, stats, timings },
|
|
|
|
'pdf caching failed'
|
|
|
|
)
|
|
|
|
return callback(null, outputFiles)
|
|
|
|
}
|
|
|
|
callback(err, outputFiles)
|
|
|
|
}
|
2021-05-13 13:07:54 +00:00
|
|
|
)
|
|
|
|
}
|
|
|
|
)
|
|
|
|
}
|
2020-02-19 11:14:37 +00:00
|
|
|
)
|
|
|
|
})
|
|
|
|
},
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2020-12-15 14:59:05 +00:00
|
|
|
saveOutputFilesInBuildDir(
|
|
|
|
outputFiles,
|
|
|
|
compileDir,
|
|
|
|
outputDir,
|
|
|
|
buildId,
|
|
|
|
callback
|
|
|
|
) {
|
2020-02-19 11:14:37 +00:00
|
|
|
// make a compileDir/CACHE_SUBDIR/build_id directory and
|
|
|
|
// copy all the output files into it
|
|
|
|
if (callback == null) {
|
2021-10-27 09:49:18 +00:00
|
|
|
callback = function () {}
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
|
|
|
// Put the files into a new cache subdirectory
|
|
|
|
const cacheDir = Path.join(
|
2020-12-15 14:59:05 +00:00
|
|
|
outputDir,
|
2020-02-19 11:14:37 +00:00
|
|
|
OutputCacheManager.CACHE_SUBDIR,
|
|
|
|
buildId
|
|
|
|
)
|
|
|
|
// Is it a per-user compile? check if compile directory is PROJECTID-USERID
|
|
|
|
const perUser = Path.basename(compileDir).match(
|
|
|
|
/^[0-9a-f]{24}-[0-9a-f]{24}$/
|
|
|
|
)
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
// Archive logs in background
|
|
|
|
if (
|
|
|
|
(Settings.clsi != null ? Settings.clsi.archive_logs : undefined) ||
|
|
|
|
(Settings.clsi != null ? Settings.clsi.strace : undefined)
|
|
|
|
) {
|
2020-08-10 16:01:11 +00:00
|
|
|
OutputCacheManager.archiveLogs(
|
|
|
|
outputFiles,
|
|
|
|
compileDir,
|
2020-12-15 14:59:05 +00:00
|
|
|
outputDir,
|
2020-08-10 16:01:11 +00:00
|
|
|
buildId,
|
|
|
|
function (err) {
|
|
|
|
if (err != null) {
|
|
|
|
return logger.warn({ err }, 'erroring archiving log files')
|
|
|
|
}
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
2020-08-10 16:01:11 +00:00
|
|
|
)
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
// make the new cache directory
|
2020-08-10 16:01:11 +00:00
|
|
|
return fse.ensureDir(cacheDir, function (err) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if (err != null) {
|
|
|
|
logger.error(
|
|
|
|
{ err, directory: cacheDir },
|
|
|
|
'error creating cache directory'
|
|
|
|
)
|
|
|
|
return callback(err, outputFiles)
|
|
|
|
} else {
|
|
|
|
// copy all the output files into the new cache directory
|
|
|
|
const results = []
|
|
|
|
return async.mapSeries(
|
|
|
|
outputFiles,
|
2020-08-10 16:01:11 +00:00
|
|
|
function (file, cb) {
|
2020-02-19 11:14:37 +00:00
|
|
|
// don't send dot files as output, express doesn't serve them
|
|
|
|
if (OutputCacheManager._fileIsHidden(file.path)) {
|
|
|
|
logger.debug(
|
|
|
|
{ compileDir, path: file.path },
|
|
|
|
'ignoring dotfile in output'
|
|
|
|
)
|
|
|
|
return cb()
|
|
|
|
}
|
|
|
|
// copy other files into cache directory if valid
|
|
|
|
const newFile = _.clone(file)
|
|
|
|
const [src, dst] = Array.from([
|
|
|
|
Path.join(compileDir, file.path),
|
2021-07-13 11:04:48 +00:00
|
|
|
Path.join(cacheDir, file.path),
|
2020-02-19 11:14:37 +00:00
|
|
|
])
|
2021-07-13 11:04:48 +00:00
|
|
|
return OutputCacheManager._checkFileIsSafe(
|
|
|
|
src,
|
|
|
|
function (err, isSafe) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if (err != null) {
|
|
|
|
return cb(err)
|
|
|
|
}
|
2021-07-13 11:04:48 +00:00
|
|
|
if (!isSafe) {
|
2020-02-19 11:14:37 +00:00
|
|
|
return cb()
|
|
|
|
}
|
2021-07-13 11:04:48 +00:00
|
|
|
return OutputCacheManager._checkIfShouldCopy(
|
|
|
|
src,
|
|
|
|
function (err, shouldCopy) {
|
|
|
|
if (err != null) {
|
|
|
|
return cb(err)
|
|
|
|
}
|
|
|
|
if (!shouldCopy) {
|
|
|
|
return cb()
|
|
|
|
}
|
|
|
|
return OutputCacheManager._copyFile(
|
|
|
|
src,
|
|
|
|
dst,
|
|
|
|
function (err) {
|
|
|
|
if (err != null) {
|
|
|
|
return cb(err)
|
|
|
|
}
|
|
|
|
newFile.build = buildId // attach a build id if we cached the file
|
|
|
|
results.push(newFile)
|
|
|
|
return cb()
|
|
|
|
}
|
|
|
|
)
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
2021-07-13 11:04:48 +00:00
|
|
|
)
|
|
|
|
}
|
|
|
|
)
|
2020-02-19 11:14:37 +00:00
|
|
|
},
|
2020-08-10 16:01:11 +00:00
|
|
|
function (err) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if (err != null) {
|
|
|
|
// pass back the original files if we encountered *any* error
|
|
|
|
callback(err, outputFiles)
|
|
|
|
// clean up the directory we just created
|
2020-08-10 16:01:11 +00:00
|
|
|
return fse.remove(cacheDir, function (err) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if (err != null) {
|
|
|
|
return logger.error(
|
|
|
|
{ err, dir: cacheDir },
|
|
|
|
'error removing cache dir after failure'
|
|
|
|
)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
// pass back the list of new files in the cache
|
|
|
|
callback(err, results)
|
|
|
|
// let file expiry run in the background, expire all previous files if per-user
|
2021-11-15 12:07:17 +00:00
|
|
|
cleanupDirectory(outputDir, {
|
2020-02-19 11:14:37 +00:00
|
|
|
keep: buildId,
|
2021-07-13 11:04:48 +00:00
|
|
|
limit: perUser ? 1 : null,
|
2021-11-15 12:07:17 +00:00
|
|
|
}).catch(() => {})
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
},
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2021-05-13 13:07:54 +00:00
|
|
|
collectOutputPdfSize(outputFiles, outputDir, stats, callback) {
|
2021-07-13 11:04:48 +00:00
|
|
|
const outputFile = outputFiles.find(x => x.path === 'output.pdf')
|
2021-05-13 13:07:54 +00:00
|
|
|
if (!outputFile) return callback(null, outputFiles)
|
|
|
|
const outputFilePath = Path.join(
|
|
|
|
outputDir,
|
|
|
|
OutputCacheManager.path(outputFile.build, outputFile.path)
|
|
|
|
)
|
|
|
|
fs.stat(outputFilePath, (err, stat) => {
|
|
|
|
if (err) return callback(err, outputFiles)
|
|
|
|
|
|
|
|
outputFile.size = stat.size
|
|
|
|
stats['pdf-size'] = outputFile.size
|
|
|
|
callback(null, outputFiles)
|
|
|
|
})
|
|
|
|
},
|
|
|
|
|
|
|
|
saveStreamsInContentDir(
|
2022-07-20 08:33:34 +00:00
|
|
|
{ request, stats, timings, enablePdfCachingDark },
|
2021-05-13 13:07:54 +00:00
|
|
|
outputFiles,
|
|
|
|
compileDir,
|
|
|
|
outputDir,
|
|
|
|
callback
|
|
|
|
) {
|
|
|
|
const cacheRoot = Path.join(outputDir, OutputCacheManager.CONTENT_SUBDIR)
|
|
|
|
// check if content dir exists
|
|
|
|
OutputCacheManager.ensureContentDir(cacheRoot, function (err, contentDir) {
|
2022-07-20 08:33:44 +00:00
|
|
|
if (err) return callback(err, 'content-dir-unavailable')
|
2021-05-13 13:07:54 +00:00
|
|
|
|
2021-07-13 11:04:48 +00:00
|
|
|
const outputFile = outputFiles.find(x => x.path === 'output.pdf')
|
2021-05-13 13:07:54 +00:00
|
|
|
if (outputFile) {
|
|
|
|
// possibly we should copy the file from the build dir here
|
|
|
|
const outputFilePath = Path.join(
|
|
|
|
outputDir,
|
|
|
|
OutputCacheManager.path(outputFile.build, outputFile.path)
|
|
|
|
)
|
2021-05-31 08:20:25 +00:00
|
|
|
const pdfSize = outputFile.size
|
2022-03-01 15:09:55 +00:00
|
|
|
const timer = new Metrics.Timer(
|
|
|
|
'compute-pdf-ranges',
|
|
|
|
1,
|
|
|
|
request.metricsOpts
|
|
|
|
)
|
2021-05-31 08:20:25 +00:00
|
|
|
ContentCacheManager.update(
|
|
|
|
contentDir,
|
|
|
|
outputFilePath,
|
|
|
|
pdfSize,
|
2021-06-23 13:14:28 +00:00
|
|
|
timings.compile,
|
2021-05-31 08:20:25 +00:00
|
|
|
function (err, result) {
|
2022-08-02 09:09:22 +00:00
|
|
|
if (err && err instanceof NoXrefTableError) {
|
|
|
|
return callback(null, err.message)
|
|
|
|
}
|
2021-08-24 09:22:30 +00:00
|
|
|
if (err && err instanceof QueueLimitReachedError) {
|
|
|
|
logger.warn({ err, outputDir }, 'pdf caching queue limit reached')
|
|
|
|
stats['pdf-caching-queue-limit-reached'] = 1
|
2022-07-20 08:33:44 +00:00
|
|
|
return callback(null, 'queue-limit')
|
2021-08-24 09:22:30 +00:00
|
|
|
}
|
2021-06-23 13:14:28 +00:00
|
|
|
if (err && err instanceof TimedOutError) {
|
|
|
|
logger.warn(
|
|
|
|
{ err, outputDir, stats, timings },
|
|
|
|
'pdf caching timed out'
|
|
|
|
)
|
|
|
|
stats['pdf-caching-timed-out'] = 1
|
2022-07-20 08:33:44 +00:00
|
|
|
return callback(null, 'timed-out')
|
2021-06-23 13:14:28 +00:00
|
|
|
}
|
2022-07-20 08:33:44 +00:00
|
|
|
if (err) return callback(err, 'failed')
|
2022-07-20 14:17:41 +00:00
|
|
|
const {
|
2022-07-15 08:03:40 +00:00
|
|
|
contentRanges,
|
|
|
|
newContentRanges,
|
|
|
|
reclaimedSpace,
|
|
|
|
startXRefTable,
|
2022-07-20 14:17:41 +00:00
|
|
|
} = result
|
2021-05-13 13:07:54 +00:00
|
|
|
|
2022-07-20 08:33:34 +00:00
|
|
|
if (enablePdfCachingDark) {
|
2021-05-31 08:20:25 +00:00
|
|
|
// In dark mode we are doing the computation only and do not emit
|
|
|
|
// any ranges to the frontend.
|
|
|
|
} else {
|
|
|
|
outputFile.contentId = Path.basename(contentDir)
|
|
|
|
outputFile.ranges = contentRanges
|
2022-07-15 08:03:40 +00:00
|
|
|
outputFile.startXRefTable = startXRefTable
|
2021-05-31 08:20:25 +00:00
|
|
|
}
|
2021-05-13 13:07:54 +00:00
|
|
|
|
2021-05-31 08:20:25 +00:00
|
|
|
timings['compute-pdf-caching'] = timer.done()
|
|
|
|
stats['pdf-caching-n-ranges'] = contentRanges.length
|
|
|
|
stats['pdf-caching-total-ranges-size'] = contentRanges.reduce(
|
|
|
|
(sum, next) => sum + (next.end - next.start),
|
|
|
|
0
|
|
|
|
)
|
|
|
|
stats['pdf-caching-n-new-ranges'] = newContentRanges.length
|
|
|
|
stats['pdf-caching-new-ranges-size'] = newContentRanges.reduce(
|
|
|
|
(sum, next) => sum + (next.end - next.start),
|
|
|
|
0
|
|
|
|
)
|
|
|
|
stats['pdf-caching-reclaimed-space'] = reclaimedSpace
|
2022-07-20 08:33:44 +00:00
|
|
|
callback(null, 'success')
|
2021-05-31 08:20:25 +00:00
|
|
|
}
|
|
|
|
)
|
2021-05-13 13:07:54 +00:00
|
|
|
} else {
|
2022-07-20 08:33:44 +00:00
|
|
|
callback(null, 'missing-pdf')
|
2021-05-13 13:07:54 +00:00
|
|
|
}
|
|
|
|
})
|
|
|
|
},
|
|
|
|
|
|
|
|
ensureContentDir(contentRoot, callback) {
|
|
|
|
fse.ensureDir(contentRoot, function (err) {
|
|
|
|
if (err != null) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
fs.readdir(contentRoot, function (err, results) {
|
2021-10-27 09:49:18 +00:00
|
|
|
if (err) return callback(err)
|
2021-05-13 13:07:54 +00:00
|
|
|
const dirs = results.sort()
|
2021-07-13 11:04:48 +00:00
|
|
|
const contentId = dirs.find(dir =>
|
2021-05-13 13:07:54 +00:00
|
|
|
OutputCacheManager.BUILD_REGEX.test(dir)
|
|
|
|
)
|
|
|
|
if (contentId) {
|
|
|
|
callback(null, Path.join(contentRoot, contentId))
|
|
|
|
} else {
|
|
|
|
// make a content directory
|
|
|
|
OutputCacheManager.generateBuildId(function (err, contentId) {
|
|
|
|
if (err) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
const contentDir = Path.join(contentRoot, contentId)
|
|
|
|
fse.ensureDir(contentDir, function (err) {
|
|
|
|
if (err) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
return callback(null, contentDir)
|
|
|
|
})
|
|
|
|
})
|
|
|
|
}
|
|
|
|
})
|
|
|
|
})
|
|
|
|
},
|
|
|
|
|
2020-12-15 14:59:05 +00:00
|
|
|
archiveLogs(outputFiles, compileDir, outputDir, buildId, callback) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if (callback == null) {
|
2021-10-27 09:49:18 +00:00
|
|
|
callback = function () {}
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
|
|
|
const archiveDir = Path.join(
|
2020-12-15 14:59:05 +00:00
|
|
|
outputDir,
|
2020-02-19 11:14:37 +00:00
|
|
|
OutputCacheManager.ARCHIVE_SUBDIR,
|
|
|
|
buildId
|
|
|
|
)
|
2022-05-16 12:38:18 +00:00
|
|
|
logger.debug({ dir: archiveDir }, 'archiving log files for project')
|
2020-08-10 16:01:11 +00:00
|
|
|
return fse.ensureDir(archiveDir, function (err) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if (err != null) {
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
return async.mapSeries(
|
|
|
|
outputFiles,
|
2020-08-10 16:01:11 +00:00
|
|
|
function (file, cb) {
|
2020-02-19 11:14:37 +00:00
|
|
|
const [src, dst] = Array.from([
|
|
|
|
Path.join(compileDir, file.path),
|
2021-07-13 11:04:48 +00:00
|
|
|
Path.join(archiveDir, file.path),
|
2020-02-19 11:14:37 +00:00
|
|
|
])
|
2021-07-13 11:04:48 +00:00
|
|
|
return OutputCacheManager._checkFileIsSafe(
|
|
|
|
src,
|
|
|
|
function (err, isSafe) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if (err != null) {
|
|
|
|
return cb(err)
|
|
|
|
}
|
2021-07-13 11:04:48 +00:00
|
|
|
if (!isSafe) {
|
2020-02-19 11:14:37 +00:00
|
|
|
return cb()
|
|
|
|
}
|
2021-07-13 11:04:48 +00:00
|
|
|
return OutputCacheManager._checkIfShouldArchive(
|
|
|
|
src,
|
|
|
|
function (err, shouldArchive) {
|
|
|
|
if (err != null) {
|
|
|
|
return cb(err)
|
|
|
|
}
|
|
|
|
if (!shouldArchive) {
|
|
|
|
return cb()
|
|
|
|
}
|
|
|
|
return OutputCacheManager._copyFile(src, dst, cb)
|
|
|
|
}
|
|
|
|
)
|
|
|
|
}
|
|
|
|
)
|
2020-02-19 11:14:37 +00:00
|
|
|
},
|
|
|
|
callback
|
|
|
|
)
|
|
|
|
})
|
|
|
|
},
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2021-11-15 12:07:17 +00:00
|
|
|
expireOutputFiles(outputDir, options, callback) {
|
2020-02-19 11:14:37 +00:00
|
|
|
// look in compileDir for build dirs and delete if > N or age of mod time > T
|
|
|
|
if (callback == null) {
|
2021-10-27 09:49:18 +00:00
|
|
|
callback = function () {}
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
2021-11-15 12:07:17 +00:00
|
|
|
const cleanupAll = cb => {
|
|
|
|
fse.remove(outputDir, err => {
|
|
|
|
if (err) {
|
|
|
|
return cb(err)
|
|
|
|
}
|
|
|
|
// Drop reference after successful cleanup of the output dir.
|
|
|
|
OLDEST_BUILD_DIR.delete(outputDir)
|
|
|
|
cb(null)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
const cacheRoot = Path.join(outputDir, OutputCacheManager.CACHE_SUBDIR)
|
2020-08-10 16:01:11 +00:00
|
|
|
return fs.readdir(cacheRoot, function (err, results) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if (err != null) {
|
|
|
|
if (err.code === 'ENOENT') {
|
2021-11-15 12:07:17 +00:00
|
|
|
// cache directory is empty
|
|
|
|
return cleanupAll(callback)
|
|
|
|
}
|
2020-02-19 11:14:37 +00:00
|
|
|
logger.error({ err, project_id: cacheRoot }, 'error clearing cache')
|
|
|
|
return callback(err)
|
|
|
|
}
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
const dirs = results.sort().reverse()
|
|
|
|
const currentTime = Date.now()
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2021-11-15 12:07:17 +00:00
|
|
|
let oldestDirTimeToKeep = 0
|
|
|
|
|
2020-08-10 16:01:11 +00:00
|
|
|
const isExpired = function (dir, index) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if ((options != null ? options.keep : undefined) === dir) {
|
2021-11-15 12:07:17 +00:00
|
|
|
// This is the directory we just created for the compile request.
|
|
|
|
oldestDirTimeToKeep = currentTime
|
2020-02-19 11:14:37 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
// remove any directories over the requested (non-null) limit
|
|
|
|
if (
|
|
|
|
(options != null ? options.limit : undefined) != null &&
|
|
|
|
index > options.limit
|
|
|
|
) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
// remove any directories over the hard limit
|
|
|
|
if (index > OutputCacheManager.CACHE_LIMIT) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
// we can get the build time from the first part of the directory name DDDD-RRRR
|
|
|
|
// DDDD is date and RRRR is random bytes
|
|
|
|
const dirTime = parseInt(
|
2021-07-13 11:04:48 +00:00
|
|
|
__guard__(dir.split('-'), x => x[0]),
|
2020-02-19 11:14:37 +00:00
|
|
|
16
|
|
|
|
)
|
|
|
|
const age = currentTime - dirTime
|
2021-11-15 12:07:17 +00:00
|
|
|
const expired = age > OutputCacheManager.CACHE_AGE
|
|
|
|
if (expired) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
oldestDirTimeToKeep = dirTime
|
|
|
|
return false
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
const toRemove = _.filter(dirs, isExpired)
|
2021-11-15 12:07:17 +00:00
|
|
|
if (toRemove.length === dirs.length) {
|
|
|
|
// No builds left after cleanup.
|
|
|
|
return cleanupAll(callback)
|
|
|
|
}
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
const removeDir = (dir, cb) =>
|
2020-08-10 16:01:11 +00:00
|
|
|
fse.remove(Path.join(cacheRoot, dir), function (err, result) {
|
2022-05-16 12:38:18 +00:00
|
|
|
logger.debug({ cache: cacheRoot, dir }, 'removed expired cache dir')
|
2020-02-19 11:14:37 +00:00
|
|
|
if (err != null) {
|
|
|
|
logger.error({ err, dir }, 'cache remove error')
|
|
|
|
}
|
|
|
|
return cb(err, result)
|
|
|
|
})
|
|
|
|
return async.eachSeries(
|
|
|
|
toRemove,
|
|
|
|
(dir, cb) => removeDir(dir, cb),
|
2021-11-15 12:07:17 +00:00
|
|
|
err => {
|
|
|
|
if (err) {
|
|
|
|
// On error: keep the timestamp in the past.
|
|
|
|
// The next iteration of the cleanup loop will retry the deletion.
|
|
|
|
return callback(err)
|
|
|
|
}
|
|
|
|
// On success: push the timestamp into the future.
|
|
|
|
OLDEST_BUILD_DIR.set(outputDir, oldestDirTimeToKeep)
|
|
|
|
callback(null)
|
|
|
|
}
|
2020-02-19 11:14:37 +00:00
|
|
|
)
|
|
|
|
})
|
|
|
|
},
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
_fileIsHidden(path) {
|
|
|
|
return (path != null ? path.match(/^\.|\/\./) : undefined) != null
|
|
|
|
},
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
_checkFileIsSafe(src, callback) {
|
|
|
|
// check if we have a valid file to copy into the cache
|
|
|
|
if (callback == null) {
|
2021-10-27 09:49:18 +00:00
|
|
|
callback = function () {}
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
2020-08-10 16:01:11 +00:00
|
|
|
return fs.stat(src, function (err, stats) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if ((err != null ? err.code : undefined) === 'ENOENT') {
|
|
|
|
logger.warn(
|
|
|
|
{ err, file: src },
|
|
|
|
'file has disappeared before copying to build cache'
|
|
|
|
)
|
|
|
|
return callback(err, false)
|
|
|
|
} else if (err != null) {
|
|
|
|
// some other problem reading the file
|
|
|
|
logger.error({ err, file: src }, 'stat error for file in cache')
|
|
|
|
return callback(err, false)
|
|
|
|
} else if (!stats.isFile()) {
|
|
|
|
// other filetype - reject it
|
|
|
|
logger.warn(
|
|
|
|
{ src, stat: stats },
|
|
|
|
'nonfile output - refusing to copy to cache'
|
|
|
|
)
|
|
|
|
return callback(null, false)
|
|
|
|
} else {
|
|
|
|
// it's a plain file, ok to copy
|
|
|
|
return callback(null, true)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
},
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
_copyFile(src, dst, callback) {
|
|
|
|
// copy output file into the cache
|
2020-08-10 16:01:11 +00:00
|
|
|
return fse.copy(src, dst, function (err) {
|
2020-02-19 11:14:37 +00:00
|
|
|
if ((err != null ? err.code : undefined) === 'ENOENT') {
|
|
|
|
logger.warn(
|
|
|
|
{ err, file: src },
|
|
|
|
'file has disappeared when copying to build cache'
|
|
|
|
)
|
|
|
|
return callback(err, false)
|
|
|
|
} else if (err != null) {
|
|
|
|
logger.error({ err, src, dst }, 'copy error for file in cache')
|
|
|
|
return callback(err)
|
|
|
|
} else {
|
|
|
|
if (
|
|
|
|
Settings.clsi != null ? Settings.clsi.optimiseInDocker : undefined
|
|
|
|
) {
|
|
|
|
// don't run any optimisations on the pdf when they are done
|
|
|
|
// in the docker container
|
|
|
|
return callback()
|
|
|
|
} else {
|
|
|
|
// call the optimiser for the file too
|
|
|
|
return OutputFileOptimiser.optimiseFile(src, dst, callback)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
},
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
_checkIfShouldCopy(src, callback) {
|
|
|
|
if (callback == null) {
|
2021-10-27 09:49:18 +00:00
|
|
|
callback = function () {}
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
|
|
|
return callback(null, !Path.basename(src).match(/^strace/))
|
|
|
|
},
|
2020-02-19 11:14:14 +00:00
|
|
|
|
2020-02-19 11:14:37 +00:00
|
|
|
_checkIfShouldArchive(src, callback) {
|
|
|
|
let needle
|
|
|
|
if (callback == null) {
|
2021-10-27 09:49:18 +00:00
|
|
|
callback = function () {}
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
|
|
|
if (Path.basename(src).match(/^strace/)) {
|
|
|
|
return callback(null, true)
|
|
|
|
}
|
|
|
|
if (
|
|
|
|
(Settings.clsi != null ? Settings.clsi.archive_logs : undefined) &&
|
|
|
|
((needle = Path.basename(src)),
|
|
|
|
['output.log', 'output.blg'].includes(needle))
|
|
|
|
) {
|
|
|
|
return callback(null, true)
|
|
|
|
}
|
|
|
|
return callback(null, false)
|
2021-07-13 11:04:48 +00:00
|
|
|
},
|
2020-02-19 11:14:37 +00:00
|
|
|
}
|
2020-02-19 11:14:14 +00:00
|
|
|
|
|
|
|
function __guard__(value, transform) {
|
2020-02-19 11:14:37 +00:00
|
|
|
return typeof value !== 'undefined' && value !== null
|
|
|
|
? transform(value)
|
|
|
|
: undefined
|
|
|
|
}
|
2021-11-15 12:07:17 +00:00
|
|
|
|
|
|
|
OutputCacheManager.promises = {
|
|
|
|
expireOutputFiles: promisify(OutputCacheManager.expireOutputFiles),
|
2022-07-07 12:27:20 +00:00
|
|
|
saveOutputFiles: promisify(OutputCacheManager.saveOutputFiles),
|
2021-11-15 12:07:17 +00:00
|
|
|
saveOutputFilesInBuildDir: promisify(
|
|
|
|
OutputCacheManager.saveOutputFilesInBuildDir
|
|
|
|
),
|
|
|
|
}
|