Merge pull request #20297 from overleaf/mj-stream-bib-log

[web] Stream .blg files before parsing

GitOrigin-RevId: 2e7a16a19e62fc32bbcb7fe35bed745b938e6963
This commit is contained in:
Mathias Jakobsen 2024-09-17 11:47:46 +01:00 committed by Copybot
parent 3b839dc885
commit a807a810e6

View file

@ -9,6 +9,7 @@ import '@/utils/readable-stream-async-iterator-polyfill'
const TRANSIENT_WARNING_REGEX = /^(Reference|Citation).+undefined on input line/ const TRANSIENT_WARNING_REGEX = /^(Reference|Citation).+undefined on input line/
const MAX_LOG_SIZE = 1024 * 1024 // 1MB const MAX_LOG_SIZE = 1024 * 1024 // 1MB
const MAX_BIB_LOG_SIZE_PER_FILE = MAX_LOG_SIZE
export function handleOutputFiles(outputFiles, projectId, data) { export function handleOutputFiles(outputFiles, projectId, data) {
const outputFile = outputFiles.get('output.pdf') const outputFile = outputFiles.get('output.pdf')
@ -77,31 +78,11 @@ export const handleLogFiles = async (outputFiles, data, signal) => {
const logFile = outputFiles.get('output.log') const logFile = outputFiles.get('output.log')
if (logFile) { if (logFile) {
try { result.log = await fetchFileWithSizeLimit(
const logFileAbortController = new AbortController() buildURL(logFile, data.pdfDownloadDomain),
signal,
// abort fetching the log file if the main signal is aborted MAX_LOG_SIZE
signal.addEventListener('abort', () => { )
logFileAbortController.abort()
})
const response = await fetch(buildURL(logFile, data.pdfDownloadDomain), {
signal: logFileAbortController.signal,
})
result.log = ''
const reader = response.body.pipeThrough(new TextDecoderStream())
for await (const chunk of reader) {
result.log += chunk
if (result.log.length > MAX_LOG_SIZE) {
logFileAbortController.abort()
}
}
} catch (e) {
debugConsole.warn(e) // ignore failure to fetch the log file, but log a warning
}
try { try {
let { errors, warnings, typesetting } = HumanReadableLogs.parse( let { errors, warnings, typesetting } = HumanReadableLogs.parse(
result.log, result.log,
@ -129,23 +110,18 @@ export const handleLogFiles = async (outputFiles, data, signal) => {
} }
} }
for (const blgFile of blgFiles) { for (const blgFile of blgFiles) {
const log = await fetchFileWithSizeLimit(
buildURL(blgFile, data.pdfDownloadDomain),
signal,
MAX_BIB_LOG_SIZE_PER_FILE
)
try { try {
const response = await fetch(buildURL(blgFile, data.pdfDownloadDomain), { const { errors, warnings } = new BibLogParser(log, {
signal, maxErrors: 100,
}) }).parse()
accumulateResults({ errors, warnings }, 'BibTeX:')
const log = await response.text()
try {
const { errors, warnings } = new BibLogParser(log, {
maxErrors: 100,
}).parse()
accumulateResults({ errors, warnings }, 'BibTeX:')
} catch (e) {
// BibLog parsing errors are ignored
}
} catch (e) { } catch (e) {
debugConsole.warn(e) // ignore failure to fetch/parse the log file, but log a warning // BibLog parsing errors are ignored
} }
} }
@ -260,3 +236,33 @@ function normalizeFilePath(path, rootDocDirname) {
function isTransientWarning(warning) { function isTransientWarning(warning) {
return TRANSIENT_WARNING_REGEX.test(warning.message) return TRANSIENT_WARNING_REGEX.test(warning.message)
} }
async function fetchFileWithSizeLimit(url, signal, maxSize) {
let result = ''
try {
const abortController = new AbortController()
// abort fetching the log file if the main signal is aborted
signal.addEventListener('abort', () => {
abortController.abort()
})
const response = await fetch(url, {
signal: abortController.signal,
})
if (!response.ok) {
throw new Error('Failed to fetch log file')
}
const reader = response.body.pipeThrough(new TextDecoderStream())
for await (const chunk of reader) {
result += chunk
if (result.length > maxSize) {
abortController.abort()
}
}
} catch (e) {
debugConsole.warn(e) // ignore failure to fetch the log file, but log a warning
}
return result
}