Merge pull request #12367 from overleaf/jlm-jpa-log-large-unarchive-job

[docstore] log a warning when a potentially large doc is unarchived

GitOrigin-RevId: 665b6cf700e76e7ea433788c09413823f95e7829
This commit is contained in:
Jakob Ackermann 2023-03-23 14:22:55 +00:00 committed by Copybot
parent d1d47b2767
commit ff492d572c

View file

@ -126,7 +126,7 @@ async function getDoc(projectId, docId) {
key
)
stream.resume()
const buffer = await _streamToBuffer(stream)
const buffer = await _streamToBuffer(projectId, docId, stream)
const md5 = crypto.createHash('md5').update(buffer).digest('hex')
if (sourceMd5 !== md5) {
throw new Errors.Md5MismatchError('md5 mismatch when downloading doc', {
@ -171,12 +171,31 @@ async function destroyProject(projectId) {
await Promise.all(tasks)
}
async function _streamToBuffer(stream) {
async function _streamToBuffer(projectId, docId, stream) {
const chunks = []
let size
let logged = false
const logIfTooLarge = finishedReading => {
if (size <= Settings.max_doc_length) return
// Log progress once and then again at the end.
if (logged && !finishedReading) return
logger.warn(
{ projectId, docId, size, finishedReading },
'potentially large doc pulled down from gcs'
)
logged = true
}
return new Promise((resolve, reject) => {
stream.on('data', chunk => chunks.push(chunk))
stream.on('data', chunk => {
size += chunk.byteLength
logIfTooLarge(false)
chunks.push(chunk)
})
stream.on('error', reject)
stream.on('end', () => resolve(Buffer.concat(chunks)))
stream.on('end', () => {
logIfTooLarge(true)
resolve(Buffer.concat(chunks))
})
})
}