diff --git a/services/web/app/src/Features/Uploads/FileTypeManager.js b/services/web/app/src/Features/Uploads/FileTypeManager.js index 76506690e1..5dc7ebbb20 100644 --- a/services/web/app/src/Features/Uploads/FileTypeManager.js +++ b/services/web/app/src/Features/Uploads/FileTypeManager.js @@ -15,7 +15,7 @@ const FileTypeManager = { TEXT_EXTENSIONS: new Set(Settings.textExtensions.map(ext => `.${ext}`)), EDITABLE_FILENAMES: Settings.editableFilenames, - MAX_TEXT_FILE_SIZE: 1 * 1024 * 1024, // 1 MB + MAX_TEXT_FILE_SIZE: 3 * Settings.max_doc_length, // allow 3 bytes for every character isDirectory(path, callback) { fs.stat(path, (error, stats) => { @@ -61,6 +61,9 @@ const FileTypeManager = { } const encoding = _detectEncoding(bytes) const text = bytes.toString(encoding) + if (text.length >= Settings.max_doc_length) { + return callback(null, { binary: true }) // Treat large text file as binary + } // For compatibility with the history service, only accept valid utf8 with no // nulls or non-BMP characters as text, eveything else is binary. if (text.includes('\x00')) { @@ -75,28 +78,6 @@ const FileTypeManager = { }) }, - getStrictTypeFromContent(name, contents) { - const basename = Path.basename(name) - const isText = _isTextFilename(basename) - - if (!isText) { - return false - } - if ( - Buffer.byteLength(contents, 'utf8') > FileTypeManager.MAX_TEXT_FILE_SIZE - ) { - return false - } - if (contents.indexOf('\x00') !== -1) { - return false - } - if (/[\uD800-\uDFFF]/.test(contents)) { - // non-BMP characters (high and low surrogate characters) - return false - } - return true - }, - // FIXME: we can convert this to a synchronous function if we want to shouldIgnore(path, callback) { // use minimatch file matching to check if the path should be ignored diff --git a/services/web/test/unit/src/Uploads/FileTypeManagerTests.js b/services/web/test/unit/src/Uploads/FileTypeManagerTests.js index 16c4304060..ae8645b034 100644 --- a/services/web/test/unit/src/Uploads/FileTypeManagerTests.js +++ b/services/web/test/unit/src/Uploads/FileTypeManagerTests.js @@ -38,6 +38,18 @@ describe('FileTypeManager', function () { this.fs.readFile .withArgs('utf8-control-chars.tex') .yields(null, Buffer.from(`${fileContents}\x0c${fileContents}`)) + this.fs.readFile + .withArgs('text-short.tex') + .yields(null, Buffer.from('a'.repeat(0.5 * 1024 * 1024), 'utf-8')) + this.fs.readFile + .withArgs('text-smaller.tex') + .yields(null, Buffer.from('a'.repeat(2 * 1024 * 1024 - 1), 'utf-8')) + this.fs.readFile + .withArgs('text-exact.tex') + .yields(null, Buffer.from('a'.repeat(2 * 1024 * 1024), 'utf-8')) + this.fs.readFile + .withArgs('text-long.tex') + .yields(null, Buffer.from('a'.repeat(3 * 1024 * 1024), 'utf-8')) this.callback = sinon.stub() this.DocumentHelper = { getEncodingFromTexContent: sinon.stub() } this.FileTypeManager = SandboxedModule.require(modulePath, { @@ -117,8 +129,72 @@ describe('FileTypeManager', function () { }) }) + it('should not classify short text files as binary', function (done) { + this.stats.size = 2 * 1024 * 1024 // 2MB + this.FileTypeManager.getType( + '/file.tex', + 'text-short.tex', + null, + (err, { binary }) => { + if (err) { + return done(err) + } + binary.should.equal(false) + done() + } + ) + }) + + it('should not classify text files just under the size limit as binary', function (done) { + this.stats.size = 2 * 1024 * 1024 // 2MB + this.FileTypeManager.getType( + '/file.tex', + 'text-smaller.tex', + null, + (err, { binary }) => { + if (err) { + return done(err) + } + binary.should.equal(false) + done() + } + ) + }) + + it('should classify text files at the size limit as binary', function (done) { + this.stats.size = 2 * 1024 * 1024 // 2MB + this.FileTypeManager.getType( + '/file.tex', + 'text-exact.tex', + null, + (err, { binary }) => { + if (err) { + return done(err) + } + binary.should.equal(true) + done() + } + ) + }) + + it('should classify long text files as binary', function (done) { + this.stats.size = 2 * 1024 * 1024 // 2MB + this.FileTypeManager.getType( + '/file.tex', + 'text-long.tex', + null, + (err, { binary }) => { + if (err) { + return done(err) + } + binary.should.equal(true) + done() + } + ) + }) + it('should classify large text files as binary', function (done) { - this.stats.size = 2 * 1024 * 1024 // 2Mb + this.stats.size = 8 * 1024 * 1024 // 8MB this.FileTypeManager.getType( '/file.tex', 'utf8.tex', @@ -134,7 +210,7 @@ describe('FileTypeManager', function () { }) it('should not try to determine the encoding of large files', function (done) { - this.stats.size = 2 * 1024 * 1024 // 2Mb + this.stats.size = 8 * 1024 * 1024 // 8MB this.FileTypeManager.getType('/file.tex', 'utf8.tex', null, err => { if (err) { return done(err)