Restrict editable files by number of characters (#20462)

GitOrigin-RevId: fad0c14444a9d382ad05c14921118ef3883ede79
This commit is contained in:
Alf Eaton 2024-11-29 09:10:12 +00:00 committed by Copybot
parent a82952f9df
commit aae03bdf6e
2 changed files with 82 additions and 25 deletions

View file

@ -15,7 +15,7 @@ const FileTypeManager = {
TEXT_EXTENSIONS: new Set(Settings.textExtensions.map(ext => `.${ext}`)),
EDITABLE_FILENAMES: Settings.editableFilenames,
MAX_TEXT_FILE_SIZE: 1 * 1024 * 1024, // 1 MB
MAX_TEXT_FILE_SIZE: 3 * Settings.max_doc_length, // allow 3 bytes for every character
isDirectory(path, callback) {
fs.stat(path, (error, stats) => {
@ -61,6 +61,9 @@ const FileTypeManager = {
}
const encoding = _detectEncoding(bytes)
const text = bytes.toString(encoding)
if (text.length >= Settings.max_doc_length) {
return callback(null, { binary: true }) // Treat large text file as binary
}
// For compatibility with the history service, only accept valid utf8 with no
// nulls or non-BMP characters as text, eveything else is binary.
if (text.includes('\x00')) {
@ -75,28 +78,6 @@ const FileTypeManager = {
})
},
getStrictTypeFromContent(name, contents) {
const basename = Path.basename(name)
const isText = _isTextFilename(basename)
if (!isText) {
return false
}
if (
Buffer.byteLength(contents, 'utf8') > FileTypeManager.MAX_TEXT_FILE_SIZE
) {
return false
}
if (contents.indexOf('\x00') !== -1) {
return false
}
if (/[\uD800-\uDFFF]/.test(contents)) {
// non-BMP characters (high and low surrogate characters)
return false
}
return true
},
// FIXME: we can convert this to a synchronous function if we want to
shouldIgnore(path, callback) {
// use minimatch file matching to check if the path should be ignored

View file

@ -38,6 +38,18 @@ describe('FileTypeManager', function () {
this.fs.readFile
.withArgs('utf8-control-chars.tex')
.yields(null, Buffer.from(`${fileContents}\x0c${fileContents}`))
this.fs.readFile
.withArgs('text-short.tex')
.yields(null, Buffer.from('a'.repeat(0.5 * 1024 * 1024), 'utf-8'))
this.fs.readFile
.withArgs('text-smaller.tex')
.yields(null, Buffer.from('a'.repeat(2 * 1024 * 1024 - 1), 'utf-8'))
this.fs.readFile
.withArgs('text-exact.tex')
.yields(null, Buffer.from('a'.repeat(2 * 1024 * 1024), 'utf-8'))
this.fs.readFile
.withArgs('text-long.tex')
.yields(null, Buffer.from('a'.repeat(3 * 1024 * 1024), 'utf-8'))
this.callback = sinon.stub()
this.DocumentHelper = { getEncodingFromTexContent: sinon.stub() }
this.FileTypeManager = SandboxedModule.require(modulePath, {
@ -117,8 +129,72 @@ describe('FileTypeManager', function () {
})
})
it('should not classify short text files as binary', function (done) {
this.stats.size = 2 * 1024 * 1024 // 2MB
this.FileTypeManager.getType(
'/file.tex',
'text-short.tex',
null,
(err, { binary }) => {
if (err) {
return done(err)
}
binary.should.equal(false)
done()
}
)
})
it('should not classify text files just under the size limit as binary', function (done) {
this.stats.size = 2 * 1024 * 1024 // 2MB
this.FileTypeManager.getType(
'/file.tex',
'text-smaller.tex',
null,
(err, { binary }) => {
if (err) {
return done(err)
}
binary.should.equal(false)
done()
}
)
})
it('should classify text files at the size limit as binary', function (done) {
this.stats.size = 2 * 1024 * 1024 // 2MB
this.FileTypeManager.getType(
'/file.tex',
'text-exact.tex',
null,
(err, { binary }) => {
if (err) {
return done(err)
}
binary.should.equal(true)
done()
}
)
})
it('should classify long text files as binary', function (done) {
this.stats.size = 2 * 1024 * 1024 // 2MB
this.FileTypeManager.getType(
'/file.tex',
'text-long.tex',
null,
(err, { binary }) => {
if (err) {
return done(err)
}
binary.should.equal(true)
done()
}
)
})
it('should classify large text files as binary', function (done) {
this.stats.size = 2 * 1024 * 1024 // 2Mb
this.stats.size = 8 * 1024 * 1024 // 8MB
this.FileTypeManager.getType(
'/file.tex',
'utf8.tex',
@ -134,7 +210,7 @@ describe('FileTypeManager', function () {
})
it('should not try to determine the encoding of large files', function (done) {
this.stats.size = 2 * 1024 * 1024 // 2Mb
this.stats.size = 8 * 1024 * 1024 // 8MB
this.FileTypeManager.getType('/file.tex', 'utf8.tex', null, err => {
if (err) {
return done(err)