[ContentCacheManager] skip writing of duplicate streams

This commit is contained in:
Jakob Ackermann 2021-05-18 09:50:13 +01:00
parent b456ea726d
commit bc1ed82c6c

View file

@ -20,13 +20,20 @@ async function update(contentDir, filePath) {
const extractor = new PdfStreamsExtractor()
const ranges = []
const newRanges = []
const seenHashes = new Set()
for await (const chunk of stream) {
const pdfStreams = extractor.consume(chunk)
for (const pdfStream of pdfStreams) {
if (pdfStream.end - pdfStream.start < MIN_CHUNK_SIZE) continue
const hash = pdfStreamHash(pdfStream.buffers)
const range = { start: pdfStream.start, end: pdfStream.end, hash }
ranges.push(range)
// Optimization: Skip writing of duplicate streams.
if (seenHashes.has(hash)) continue
seenHashes.add(hash)
if (await writePdfStream(contentDir, hash, pdfStream.buffers)) {
newRanges.push(range)
}