mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
Merge pull request #236 from overleaf/jpa-skip-duplicate-writes
[ContentCacheManager] skip writing of duplicate streams
This commit is contained in:
commit
23dd93ae50
1 changed files with 7 additions and 0 deletions
|
@ -25,13 +25,20 @@ async function update(contentDir, filePath) {
|
|||
const extractor = new PdfStreamsExtractor()
|
||||
const ranges = []
|
||||
const newRanges = []
|
||||
const seenHashes = new Set()
|
||||
for await (const chunk of stream) {
|
||||
const pdfStreams = extractor.consume(chunk)
|
||||
for (const pdfStream of pdfStreams) {
|
||||
if (pdfStream.end - pdfStream.start < MIN_CHUNK_SIZE) continue
|
||||
const hash = pdfStreamHash(pdfStream.buffers)
|
||||
|
||||
const range = { start: pdfStream.start, end: pdfStream.end, hash }
|
||||
ranges.push(range)
|
||||
|
||||
// Optimization: Skip writing of duplicate streams.
|
||||
if (seenHashes.has(hash)) continue
|
||||
seenHashes.add(hash)
|
||||
|
||||
if (await writePdfStream(contentDir, hash, pdfStream.buffers)) {
|
||||
newRanges.push(range)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue