overleaf/services/clsi/test/unit/js/ContentCacheManagerTests.js
Jakob Ackermann 294088fb27 [ContentCacheManager] use PDF.js Xref table instead of stream detection (#242)
* make the content cache manager tests configurable

* extend stream content in unit tests

* [ContentCacheManagerTests] prepare for full object caching

* filesystem stream for pdfjs

* working??

* cleaning up

* handle overflow

* [misc] install pdfjs-dist

* [misc] move pdfjs code into app/lib/ and scripts/, also use CamelCase

* [misc] abstract the file loading and parsing of xRef tables into helper

* [misc] pdfjsTests: add snapshot based tests for the Xref table parser

* [misc] FSStream: throw proper error and drop commented code

* [misc] FSStream: integrate throwing of MissingDataException into getter

* [misc] pdfjs: fix eslint errors

* [misc] pdfjs: run format_fix

* [misc] pdfjs: allocate very small non empty dummy buffers explicitly

* [misc] install @overleaf/o-error

* [ContentCacheManager] use PDF.js Xref table instead of stream detection

Co-Authored-By: Brian Gough <brian.gough@overleaf.com>

* [pdfjs] parseXrefTable: handle empty PDF files gracefully

Co-authored-by: Brian Gough <brian.gough@overleaf.com>
2021-05-31 09:20:25 +01:00

220 lines
6.6 KiB
JavaScript

const fs = require('fs')
const Path = require('path')
const { expect } = require('chai')
const MODULE_PATH = '../../../app/js/ContentCacheManager'
describe('ContentCacheManager', function () {
let contentDir, pdfPath
let ContentCacheManager, files, Settings
before(function () {
Settings = require('settings-sharelatex')
ContentCacheManager = require(MODULE_PATH)
})
let contentRanges, newContentRanges, reclaimed
async function run(filePath, size) {
const result = await ContentCacheManager.promises.update(
contentDir,
filePath,
size
)
let newlyReclaimed
;[contentRanges, newContentRanges, newlyReclaimed] = result
reclaimed += newlyReclaimed
const fileNames = await fs.promises.readdir(contentDir)
files = {}
for (const fileName of fileNames) {
const path = Path.join(contentDir, fileName)
files[path] = await fs.promises.readFile(path)
}
}
before(function () {
contentDir =
'/app/output/602cee6f6460fca0ba7921e6/content/1797a7f48f9-5abc1998509dea1f'
pdfPath =
'/app/output/602cee6f6460fca0ba7921e6/generated-files/1797a7f48ea-8ac6805139f43351/output.pdf'
reclaimed = 0
Settings.pdfCachingMinChunkSize = 1024
})
before(async function () {
await fs.promises.rmdir(contentDir, { recursive: true })
await fs.promises.mkdir(contentDir, { recursive: true })
await fs.promises.mkdir(Path.dirname(pdfPath), { recursive: true })
})
describe('minimal', function () {
const PATH_MINIMAL = 'test/acceptance/fixtures/minimal.pdf'
const OBJECT_ID_1 = '9 0 '
const HASH_LARGE =
'd7cfc73ad2fba4578a437517923e3714927bbf35e63ea88bd93c7a8076cf1fcd'
const OBJECT_ID_2 = '10 0 '
const HASH_SMALL =
'896749b8343851b0dc385f71616916a7ba0434fcfb56d1fc7e27cd139eaa2f71'
function getChunkPath(hash) {
return Path.join('test/unit/js/snapshots/minimalCompile/chunks', hash)
}
let MINIMAL_SIZE, RANGE_1, RANGE_2, h1, h2, START_1, START_2, END_1, END_2
before(async function () {
await fs.promises.copyFile(PATH_MINIMAL, pdfPath)
const MINIMAL = await fs.promises.readFile(PATH_MINIMAL)
MINIMAL_SIZE = (await fs.promises.stat(PATH_MINIMAL)).size
RANGE_1 = await fs.promises.readFile(getChunkPath(HASH_LARGE))
RANGE_2 = await fs.promises.readFile(getChunkPath(HASH_SMALL))
h1 = HASH_LARGE
h2 = HASH_SMALL
START_1 = MINIMAL.indexOf(RANGE_1)
END_1 = START_1 + RANGE_1.byteLength
START_2 = MINIMAL.indexOf(RANGE_2)
END_2 = START_2 + RANGE_2.byteLength
})
async function runWithMinimal() {
await run(pdfPath, MINIMAL_SIZE)
}
describe('with two ranges qualifying', function () {
before(function () {
Settings.pdfCachingMinChunkSize = 500
})
before(async function () {
await runWithMinimal()
})
it('should produce two ranges', function () {
expect(contentRanges).to.have.length(2)
})
it('should find the correct offsets', function () {
expect(contentRanges).to.deep.equal([
{
objectId: OBJECT_ID_1,
start: START_1,
end: END_1,
hash: h1
},
{
objectId: OBJECT_ID_2,
start: START_2,
end: END_2,
hash: h2
}
])
})
it('should store the contents', function () {
expect(files).to.deep.equal({
[Path.join(contentDir, h1)]: RANGE_1,
[Path.join(contentDir, h2)]: RANGE_2,
[Path.join(contentDir, '.state.v0.json')]: Buffer.from(
JSON.stringify({
hashAge: [
[h1, 0],
[h2, 0]
],
hashSize: [
[h1, RANGE_1.byteLength],
[h2, RANGE_2.byteLength]
]
})
)
})
})
it('should mark all ranges as new', function () {
expect(contentRanges).to.deep.equal(newContentRanges)
})
describe('when re-running with one range too small', function () {
before(function () {
Settings.pdfCachingMinChunkSize = 1024
})
before(async function () {
await runWithMinimal()
})
it('should produce one range', function () {
expect(contentRanges).to.have.length(1)
})
it('should find the correct offsets', function () {
expect(contentRanges).to.deep.equal([
{
objectId: OBJECT_ID_1,
start: START_1,
end: END_1,
hash: h1
}
])
})
it('should update the age of the 2nd range', function () {
expect(files).to.deep.equal({
[Path.join(contentDir, h1)]: RANGE_1,
[Path.join(contentDir, h2)]: RANGE_2,
[Path.join(contentDir, '.state.v0.json')]: Buffer.from(
JSON.stringify({
hashAge: [
[h1, 0],
[h2, 1]
],
hashSize: [
[h1, RANGE_1.byteLength],
[h2, RANGE_2.byteLength]
]
})
)
})
})
it('should find no new ranges', function () {
expect(newContentRanges).to.deep.equal([])
})
describe('when re-running 5 more times', function () {
for (let i = 0; i < 5; i++) {
before(async function () {
await runWithMinimal()
})
}
it('should still produce one range', function () {
expect(contentRanges).to.have.length(1)
})
it('should still find the correct offsets', function () {
expect(contentRanges).to.deep.equal([
{
objectId: OBJECT_ID_1,
start: START_1,
end: END_1,
hash: h1
}
])
})
it('should delete the 2nd range', function () {
expect(files).to.deep.equal({
[Path.join(contentDir, h1)]: RANGE_1,
[Path.join(contentDir, '.state.v0.json')]: Buffer.from(
JSON.stringify({
hashAge: [[h1, 0]],
hashSize: [[h1, RANGE_1.byteLength]]
})
)
})
})
it('should find no new ranges', function () {
expect(newContentRanges).to.deep.equal([])
})
it('should yield the reclaimed space', function () {
expect(reclaimed).to.equal(RANGE_2.byteLength)
})
})
})
})
})
})