mirror of
https://github.com/overleaf/overleaf.git
synced 2025-04-23 06:07:56 +00:00
Restrict editable files by number of characters (#20462)
GitOrigin-RevId: fad0c14444a9d382ad05c14921118ef3883ede79
This commit is contained in:
parent
a82952f9df
commit
aae03bdf6e
2 changed files with 82 additions and 25 deletions
|
@ -15,7 +15,7 @@ const FileTypeManager = {
|
|||
TEXT_EXTENSIONS: new Set(Settings.textExtensions.map(ext => `.${ext}`)),
|
||||
EDITABLE_FILENAMES: Settings.editableFilenames,
|
||||
|
||||
MAX_TEXT_FILE_SIZE: 1 * 1024 * 1024, // 1 MB
|
||||
MAX_TEXT_FILE_SIZE: 3 * Settings.max_doc_length, // allow 3 bytes for every character
|
||||
|
||||
isDirectory(path, callback) {
|
||||
fs.stat(path, (error, stats) => {
|
||||
|
@ -61,6 +61,9 @@ const FileTypeManager = {
|
|||
}
|
||||
const encoding = _detectEncoding(bytes)
|
||||
const text = bytes.toString(encoding)
|
||||
if (text.length >= Settings.max_doc_length) {
|
||||
return callback(null, { binary: true }) // Treat large text file as binary
|
||||
}
|
||||
// For compatibility with the history service, only accept valid utf8 with no
|
||||
// nulls or non-BMP characters as text, eveything else is binary.
|
||||
if (text.includes('\x00')) {
|
||||
|
@ -75,28 +78,6 @@ const FileTypeManager = {
|
|||
})
|
||||
},
|
||||
|
||||
getStrictTypeFromContent(name, contents) {
|
||||
const basename = Path.basename(name)
|
||||
const isText = _isTextFilename(basename)
|
||||
|
||||
if (!isText) {
|
||||
return false
|
||||
}
|
||||
if (
|
||||
Buffer.byteLength(contents, 'utf8') > FileTypeManager.MAX_TEXT_FILE_SIZE
|
||||
) {
|
||||
return false
|
||||
}
|
||||
if (contents.indexOf('\x00') !== -1) {
|
||||
return false
|
||||
}
|
||||
if (/[\uD800-\uDFFF]/.test(contents)) {
|
||||
// non-BMP characters (high and low surrogate characters)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
},
|
||||
|
||||
// FIXME: we can convert this to a synchronous function if we want to
|
||||
shouldIgnore(path, callback) {
|
||||
// use minimatch file matching to check if the path should be ignored
|
||||
|
|
|
@ -38,6 +38,18 @@ describe('FileTypeManager', function () {
|
|||
this.fs.readFile
|
||||
.withArgs('utf8-control-chars.tex')
|
||||
.yields(null, Buffer.from(`${fileContents}\x0c${fileContents}`))
|
||||
this.fs.readFile
|
||||
.withArgs('text-short.tex')
|
||||
.yields(null, Buffer.from('a'.repeat(0.5 * 1024 * 1024), 'utf-8'))
|
||||
this.fs.readFile
|
||||
.withArgs('text-smaller.tex')
|
||||
.yields(null, Buffer.from('a'.repeat(2 * 1024 * 1024 - 1), 'utf-8'))
|
||||
this.fs.readFile
|
||||
.withArgs('text-exact.tex')
|
||||
.yields(null, Buffer.from('a'.repeat(2 * 1024 * 1024), 'utf-8'))
|
||||
this.fs.readFile
|
||||
.withArgs('text-long.tex')
|
||||
.yields(null, Buffer.from('a'.repeat(3 * 1024 * 1024), 'utf-8'))
|
||||
this.callback = sinon.stub()
|
||||
this.DocumentHelper = { getEncodingFromTexContent: sinon.stub() }
|
||||
this.FileTypeManager = SandboxedModule.require(modulePath, {
|
||||
|
@ -117,8 +129,72 @@ describe('FileTypeManager', function () {
|
|||
})
|
||||
})
|
||||
|
||||
it('should not classify short text files as binary', function (done) {
|
||||
this.stats.size = 2 * 1024 * 1024 // 2MB
|
||||
this.FileTypeManager.getType(
|
||||
'/file.tex',
|
||||
'text-short.tex',
|
||||
null,
|
||||
(err, { binary }) => {
|
||||
if (err) {
|
||||
return done(err)
|
||||
}
|
||||
binary.should.equal(false)
|
||||
done()
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
it('should not classify text files just under the size limit as binary', function (done) {
|
||||
this.stats.size = 2 * 1024 * 1024 // 2MB
|
||||
this.FileTypeManager.getType(
|
||||
'/file.tex',
|
||||
'text-smaller.tex',
|
||||
null,
|
||||
(err, { binary }) => {
|
||||
if (err) {
|
||||
return done(err)
|
||||
}
|
||||
binary.should.equal(false)
|
||||
done()
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
it('should classify text files at the size limit as binary', function (done) {
|
||||
this.stats.size = 2 * 1024 * 1024 // 2MB
|
||||
this.FileTypeManager.getType(
|
||||
'/file.tex',
|
||||
'text-exact.tex',
|
||||
null,
|
||||
(err, { binary }) => {
|
||||
if (err) {
|
||||
return done(err)
|
||||
}
|
||||
binary.should.equal(true)
|
||||
done()
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
it('should classify long text files as binary', function (done) {
|
||||
this.stats.size = 2 * 1024 * 1024 // 2MB
|
||||
this.FileTypeManager.getType(
|
||||
'/file.tex',
|
||||
'text-long.tex',
|
||||
null,
|
||||
(err, { binary }) => {
|
||||
if (err) {
|
||||
return done(err)
|
||||
}
|
||||
binary.should.equal(true)
|
||||
done()
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
it('should classify large text files as binary', function (done) {
|
||||
this.stats.size = 2 * 1024 * 1024 // 2Mb
|
||||
this.stats.size = 8 * 1024 * 1024 // 8MB
|
||||
this.FileTypeManager.getType(
|
||||
'/file.tex',
|
||||
'utf8.tex',
|
||||
|
@ -134,7 +210,7 @@ describe('FileTypeManager', function () {
|
|||
})
|
||||
|
||||
it('should not try to determine the encoding of large files', function (done) {
|
||||
this.stats.size = 2 * 1024 * 1024 // 2Mb
|
||||
this.stats.size = 8 * 1024 * 1024 // 8MB
|
||||
this.FileTypeManager.getType('/file.tex', 'utf8.tex', null, err => {
|
||||
if (err) {
|
||||
return done(err)
|
||||
|
|
Loading…
Add table
Reference in a new issue