Merge pull request #1476 from sharelatex/spd-character-sets

Detect encoding of non-utf8 files and convert during load

GitOrigin-RevId: 1856a42457c52b125793dd001b92a014f358da69
This commit is contained in:
Simon Detheridge 2019-03-04 11:59:47 +00:00 committed by James Allen
parent 68c7cc6fa3
commit 870f183595
15 changed files with 300 additions and 91 deletions

View file

@ -24,10 +24,10 @@ module.exports = UpdateMerger =
return callback(null, "existing-file")
if _.some(docs, (d) -> d.path is path)
return callback(null, "existing-doc")
# existing file not found in project, fall back to extension check
FileTypeManager.isBinary path, fsPath, (err, isFile)->
# existing file not found in project, so check the file type to determine if doc
FileTypeManager.getType path, fsPath, (err, isBinary)->
return callback(err) if err?
if isFile
if isBinary
callback(null, "new-file") # extension was not text
else
callback(null, "new-doc")

View file

@ -6,14 +6,14 @@ EditorController = require "../Editor/EditorController"
logger = require("logger-sharelatex")
module.exports = FileSystemImportManager =
addDoc: (user_id, project_id, folder_id, name, path, replace, callback = (error, doc)-> )->
addDoc: (user_id, project_id, folder_id, name, path, charset, replace, callback = (error, doc)-> )->
FileSystemImportManager._isSafeOnFileSystem path, (err, isSafe)->
if !isSafe
logger.log user_id:user_id, project_id:project_id, folder_id:folder_id, name:name, path:path, "add doc is from symlink, stopping process"
return callback("path is symlink")
fs.readFile path, "utf8", (error, content = "") ->
fs.readFile path, charset, (error, content) ->
return callback(error) if error?
content = content.replace(/\r/g, "")
content = content.replace(/\r\n?/g, "\n") # convert Windows line endings to unix. very old macs also created \r-separated lines
lines = content.split("\n")
if replace
EditorController.upsertDoc project_id, folder_id, name, lines, "upload", user_id, callback
@ -72,14 +72,14 @@ module.exports = FileSystemImportManager =
if isDirectory
FileSystemImportManager.addFolder user_id, project_id, folder_id, name, path, replace, callback
else
FileTypeManager.isBinary name, path, (error, isBinary) =>
FileTypeManager.getType name, path, (error, isBinary, charset) =>
return callback(error) if error?
if isBinary
FileSystemImportManager.addFile user_id, project_id, folder_id, name, path, replace, (err, entity) ->
entity?.type = 'file'
callback(err, entity)
else
FileSystemImportManager.addDoc user_id, project_id, folder_id, name, path, replace, (err, entity) ->
FileSystemImportManager.addDoc user_id, project_id, folder_id, name, path, charset, replace, (err, entity) ->
entity?.type = 'doc'
callback(err, entity)

View file

@ -1,9 +1,11 @@
fs = require "fs"
Path = require("path")
isUtf8 = require('is-utf8');
module.exports = FileTypeManager =
TEXT_EXTENSIONS : [
"tex", "latex", "sty", "cls", "bst", "bib", "bibtex", "txt", "tikz", "rtex", "md", "asy", "latexmkrc"
"tex", "latex", "sty", "cls", "bst", "bib", "bibtex", "txt", "tikz", "rtex", "md", "asy", "latexmkrc", "lbx", "bbx", "cbx", "m"
]
IGNORE_EXTENSIONS : [
@ -29,22 +31,29 @@ module.exports = FileTypeManager =
return callback(error) if error?
callback(null, stats?.isDirectory())
isBinary: (name, fsPath, callback = (error, result) ->) ->
# returns charset as understood by fs.readFile,
getType: (name, fsPath, callback = (error, isBinary, charset) ->) ->
parts = name.split(".")
extension = parts.slice(-1)[0]
if extension?
extension = extension.toLowerCase()
binaryFile = (@TEXT_EXTENSIONS.indexOf(extension) == -1 or parts.length <= 1) and parts[0] != 'latexmkrc'
extension = parts.slice(-1)[0].toLowerCase()
isText = (FileTypeManager.TEXT_EXTENSIONS.indexOf(extension) > -1 and parts.length > 1) or parts[0] == 'latexmkrc'
if binaryFile
return callback null, true
return callback null, true unless isText
fs.stat fsPath, (error, stat) ->
return callback(error) if error?
if stat.size > FileTypeManager.MAX_TEXT_FILE_SIZE
return callback null, true # Treat large text file as binary
else
return callback null, false
fs.readFile fsPath, (err, bytes) ->
return callback(err) if err?
if isUtf8(bytes)
return callback null, false, "utf-8"
# check for little-endian unicode bom (nodejs does not support big-endian)
if bytes[0] == 0xFF and bytes[1] == 0xFE
return callback null, false, "utf-16le"
callback null, false, "latin1"
shouldIgnore: (path, callback = (error, result) ->) ->
name = Path.basename(path)

View file

@ -5622,9 +5622,8 @@
},
"is-utf8": {
"version": "0.2.1",
"from": "is-utf8@>=0.2.0 <0.3.0",
"resolved": "https://registry.npmjs.org/is-utf8/-/is-utf8-0.2.1.tgz",
"dev": true
"from": "is-utf8@>=0.2.1 <0.3.0",
"resolved": "https://registry.npmjs.org/is-utf8/-/is-utf8-0.2.1.tgz"
},
"is-windows": {
"version": "1.0.2",

View file

@ -54,6 +54,7 @@
"heapdump": "^0.3.7",
"helmet": "^3.8.1",
"http-proxy": "^1.8.1",
"is-utf8": "^0.2.1",
"jade": "~1.3.1",
"jquery": "^1.11.1",
"json2csv": "^4.3.3",

View file

@ -252,6 +252,47 @@ describe "ProjectStructureChanges", ->
expect(project.rootFolder[0].folders[0].docs[0].name).to.equal('ao.sty')
done()
describe "uploading a project with files in different encodings", ->
before (done) ->
MockDocUpdaterApi.clearProjectStructureUpdates()
zip_file = fs.createReadStream(Path.resolve(__dirname + '/../files/charsets/charsets.zip'))
@owner.request.post {
uri: "project/new/upload",
formData:
qqfile: zip_file
}, (error, res, body) =>
throw error if error?
if res.statusCode < 200 || res.statusCode >= 300
throw new Error("failed to upload project #{res.statusCode}")
@uploaded_project_id = JSON.parse(body).project_id
done()
it "should correctly parse windows-1252", ->
{docUpdates: updates} = MockDocUpdaterApi.getProjectStructureUpdates(@uploaded_project_id)
update = _.find updates, (update) ->
update.pathname == '/test-german-windows-1252.tex'
expect(update.docLines).to.contain("Der schnelle braune Fuchs sprang träge über den Hund.")
it "should correctly parse German utf8", ->
{docUpdates: updates} = MockDocUpdaterApi.getProjectStructureUpdates(@uploaded_project_id)
update = _.find updates, (update) ->
update.pathname == '/test-german-utf8x.tex'
expect(update.docLines).to.contain("Der schnelle braune Fuchs sprang träge über den Hund.")
it "should correctly parse little-endian utf16", ->
{docUpdates: updates} = MockDocUpdaterApi.getProjectStructureUpdates(@uploaded_project_id)
update = _.find updates, (update) ->
update.pathname == '/test-greek-utf16-le-bom.tex'
expect(update.docLines).to.contain("Η γρήγορη καστανή αλεπού πήδηξε χαλαρά πάνω από το σκυλί.")
it "should correctly parse Greek utf8", ->
{docUpdates: updates} = MockDocUpdaterApi.getProjectStructureUpdates(@uploaded_project_id)
update = _.find updates, (update) ->
update.pathname == '/test-greek-utf8x.tex'
expect(update.docLines).to.contain("Η γρήγορη καστανή αλεπού πήδηξε χαλαρά πάνω από το σκυλί.")
describe "uploading a file", ->
beforeEach (done) ->
MockDocUpdaterApi.clearProjectStructureUpdates()
@ -740,3 +781,65 @@ describe "ProjectStructureChanges", ->
expect(@project_1.version).to.equal(@project_0.version + 1)
done()
describe "uploading a document", ->
beforeEach (done) ->
MockDocUpdaterApi.clearProjectStructureUpdates()
ProjectGetter.getProject example_project_id, (error, project) =>
throw error if error?
@root_folder_id = project.rootFolder[0]._id.toString()
@project_0 = project
done()
describe "with an unusual character set", ->
it "should correctly handle utf16-le data", (done) ->
document_file = fs.createReadStream(Path.resolve(__dirname + '/../files/charsets/test-greek-utf16-le-bom.tex'))
req = @owner.request.post {
uri: "project/#{example_project_id}/upload",
qs:
folder_id: @root_folder_id
formData:
qqfile:
value: document_file
options:
filename: 'test-greek-utf16-le-bom.tex',
contentType: 'text/x-tex'
}, (error, res, body) =>
throw error if error?
if res.statusCode < 200 || res.statusCode >= 300
throw new Error("failed to upload file #{res.statusCode}")
example_file_id = JSON.parse(body).entity_id
{docUpdates:updates} = MockDocUpdaterApi.getProjectStructureUpdates(example_project_id)
update = updates[0]
expect(update.pathname).to.equal('/test-greek-utf16-le-bom.tex')
expect(update.docLines).to.contain("Η γρήγορη καστανή αλεπού πήδηξε χαλαρά πάνω από το σκυλί.")
done()
it "should correctly handle windows1252/iso-8859-1/latin1 data", (done) ->
document_file = fs.createReadStream(Path.resolve(__dirname + '/../files/charsets/test-german-windows-1252.tex'))
req = @owner.request.post {
uri: "project/#{example_project_id}/upload",
qs:
folder_id: @root_folder_id
formData:
qqfile:
value: document_file
options:
filename: 'test-german-windows-1252.tex',
contentType: 'text/x-tex'
}, (error, res, body) =>
throw error if error?
if res.statusCode < 200 || res.statusCode >= 300
throw new Error("failed to upload file #{res.statusCode}")
example_file_id = JSON.parse(body).entity_id
{docUpdates:updates} = MockDocUpdaterApi.getProjectStructureUpdates(example_project_id)
update = updates[0]
expect(update.pathname).to.equal('/test-german-windows-1252.tex')
expect(update.docLines).to.contain("Der schnelle braune Fuchs sprang träge über den Hund.")
done()

View file

@ -1,6 +1,8 @@
async = require "async"
expect = require("chai").expect
_ = require 'underscore'
fs = require 'fs'
Path = require 'path'
ProjectGetter = require "../../../app/js/Features/Project/ProjectGetter.js"
@ -83,7 +85,8 @@ describe "RestoringFiles", ->
describe "restoring a binary file", ->
beforeEach (done) ->
MockProjectHistoryApi.addOldFile(@project_id, 42, "image.png", "Mock image.png content")
@pngData = fs.readFileSync(Path.resolve(__dirname, '../files/1pixel.png'), 'binary')
MockProjectHistoryApi.addOldFile(@project_id, 42, "image.png", @pngData)
@owner.request {
method: "POST",
url: "/project/#{@project_id}/restore_file",
@ -101,7 +104,7 @@ describe "RestoringFiles", ->
file = _.find project.rootFolder[0].fileRefs, (file) ->
file.name == 'image.png'
file = MockFileStoreApi.files[@project_id][file._id]
expect(file.content).to.equal "Mock image.png content"
expect(file.content).to.equal @pngData
done()
describe "restoring to a directory that exists", ->

View file

@ -0,0 +1,8 @@
\documentclass[12pt]{article}
\usepackage[german,english]{babel}
\usepackage[utf8x]{inputenc}
\begin{document}
\title{Untitled}
\selectlanguage{german}
Der schnelle braune Fuchs sprang träge über den Hund.
\end{document}

View file

@ -0,0 +1,8 @@
\documentclass[12pt]{article}
\usepackage[german,english]{babel}
\usepackage[cp1252]{inputenc}
\begin{document}
\title{Untitled}
\selectlanguage{german}
Der schnelle braune Fuchs sprang träge über den Hund.
\end{document}

View file

@ -0,0 +1,8 @@
\documentclass[12pt]{article}
\usepackage[greek,english]{babel}
\usepackage[utf8x]{inputenc}
\begin{document}
\title{Untitled}
\selectlanguage{greek}
Η γρήγορη καστανή αλεπού πήδηξε χαλαρά πάνω από το σκυλί.
\end{document}

View file

@ -50,12 +50,12 @@ describe 'UpdateMerger :', ->
describe "doc updates for a new doc", ->
beforeEach ->
@FileTypeManager.isBinary = sinon.stub().yields(null, false)
@FileTypeManager.getType = sinon.stub().yields(null, false)
@updateMerger.p.processDoc = sinon.stub().yields()
@updateMerger.mergeUpdate @user_id, @project_id, @docPath, @updateRequest, @source, @callback
it 'should look at the file contents', ->
@FileTypeManager.isBinary.called.should.equal true
@FileTypeManager.getType.called.should.equal true
it 'should process update as doc', ->
@updateMerger.p.processDoc
@ -67,12 +67,12 @@ describe 'UpdateMerger :', ->
describe "file updates for a new file ", ->
beforeEach ->
@FileTypeManager.isBinary = sinon.stub().yields(null, true)
@FileTypeManager.getType = sinon.stub().yields(null, true)
@updateMerger.p.processFile = sinon.stub().yields()
@updateMerger.mergeUpdate @user_id, @project_id, @filePath, @updateRequest, @source, @callback
it 'should look at the file contents', ->
@FileTypeManager.isBinary.called.should.equal true
@FileTypeManager.getType.called.should.equal true
it 'should process update as file', ->
@updateMerger.p.processFile
@ -84,12 +84,12 @@ describe 'UpdateMerger :', ->
describe "doc updates for an existing doc", ->
beforeEach ->
@FileTypeManager.isBinary = sinon.stub()
@FileTypeManager.getType = sinon.stub()
@updateMerger.p.processDoc = sinon.stub().yields()
@updateMerger.mergeUpdate @user_id, @project_id, @existingDocPath, @updateRequest, @source, @callback
it 'should not look at the file contents', ->
@FileTypeManager.isBinary.called.should.equal false
@FileTypeManager.getType.called.should.equal false
it 'should process update as doc', ->
@updateMerger.p.processDoc
@ -101,12 +101,12 @@ describe 'UpdateMerger :', ->
describe "file updates for an existing file", ->
beforeEach ->
@FileTypeManager.isBinary = sinon.stub()
@FileTypeManager.getType = sinon.stub()
@updateMerger.p.processFile = sinon.stub().yields()
@updateMerger.mergeUpdate @user_id, @project_id, @existingFilePath, @updateRequest, @source, @callback
it 'should not look at the file contents', ->
@FileTypeManager.isBinary.called.should.equal false
@FileTypeManager.getType.called.should.equal false
it 'should process update as file', ->
@updateMerger.p.processFile

View file

@ -13,11 +13,15 @@ describe "FileSystemImportManager", ->
@replace = "replace-boolean-flag-mock"
@user_id = "mock-user-123"
@callback = sinon.stub()
@encoding = "latin1"
@DocumentHelper =
convertTexEncodingsToUtf8: sinon.stub().returnsArg(0)
@FileSystemImportManager = SandboxedModule.require modulePath, requires:
"fs" : @fs = {}
"../Editor/EditorController": @EditorController = {}
"./FileTypeManager": @FileTypeManager = {}
"../Project/ProjectLocator": @ProjectLocator = {}
"../Documents/DocumentHelper": @DocumentHelper
"logger-sharelatex":
log:->
err:->
@ -34,7 +38,7 @@ describe "FileSystemImportManager", ->
beforeEach ->
@FileSystemImportManager._isSafeOnFileSystem = sinon.stub().callsArgWith(1, null, false)
@EditorController.addDoc = sinon.stub()
@FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, false, @callback
@FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, @encoding, false, @callback
it "should not read the file from disk", ->
@fs.readFile.called.should.equal false
@ -45,10 +49,10 @@ describe "FileSystemImportManager", ->
describe "with replace set to false", ->
beforeEach ->
@EditorController.addDoc = sinon.stub().callsArg(6)
@FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, false, @callback
@FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, @encoding, false, @callback
it "should read the file from disk", ->
@fs.readFile.calledWith(@path_on_disk, "utf8").should.equal true
@fs.readFile.calledWith(@path_on_disk).should.equal true
it "should insert the doc", ->
@EditorController.addDoc.calledWith(@project_id, @folder_id, @name, @docLines, "upload", @user_id)
@ -60,22 +64,37 @@ describe "FileSystemImportManager", ->
@docLines = ["one", "two", "three"]
@fs.readFile = sinon.stub().callsArgWith(2, null, @docContent)
@EditorController.addDoc = sinon.stub().callsArg(6)
@FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, false, @callback
@FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, @encoding, false, @callback
it "should strip the \\r characters before adding", ->
@EditorController.addDoc.calledWith(@project_id, @folder_id, @name, @docLines, "upload", @user_id)
.should.equal true
describe "with \r line endings", ->
beforeEach ->
@docContent = "one\rtwo\rthree"
@docLines = ["one", "two", "three"]
@fs.readFile = sinon.stub().callsArgWith(2, null, @docContent)
@EditorController.addDoc = sinon.stub().callsArg(6)
@FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, @encoding, false, @callback
it "should treat the \\r characters as newlines", ->
@EditorController.addDoc.calledWith(@project_id, @folder_id, @name, @docLines, "upload", @user_id)
.should.equal true
describe "with replace set to true", ->
beforeEach ->
@EditorController.upsertDoc = sinon.stub().yields()
@FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, true, @callback
@FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, @encoding, true, @callback
it "should upsert the doc", ->
@EditorController.upsertDoc
.calledWith(@project_id, @folder_id, @name, @docLines, "upload", @user_id)
.should.equal true
it "should read the file with the correct encoding", ->
sinon.assert.calledWith(@fs.readFile, @path_on_disk, @encoding)
describe "addFile with replace set to false", ->
beforeEach ->
@EditorController.addFile = sinon.stub().yields()
@ -176,7 +195,7 @@ describe "FileSystemImportManager", ->
describe "with binary file", ->
beforeEach ->
@FileTypeManager.isDirectory = sinon.stub().callsArgWith(1, null, false)
@FileTypeManager.isBinary = sinon.stub().callsArgWith(2, null, true)
@FileTypeManager.getType = sinon.stub().callsArgWith(2, null, true)
@FileSystemImportManager._isSafeOnFileSystem = sinon.stub().callsArgWith(1, null, true)
@FileSystemImportManager.addFile = sinon.stub().callsArg(6)
@FileSystemImportManager.addEntity @user_id, @project_id, @folder_id, @name, @path_on_disk, @replace, @callback
@ -188,13 +207,10 @@ describe "FileSystemImportManager", ->
describe "with text file", ->
beforeEach ->
@FileTypeManager.isDirectory = sinon.stub().callsArgWith(1, null, false)
@FileTypeManager.isBinary = sinon.stub().callsArgWith(2, null, false)
@FileSystemImportManager.addDoc = sinon.stub().callsArg(6)
@FileTypeManager.getType = sinon.stub().callsArgWith(2, null, false, 'latin1')
@FileSystemImportManager.addDoc = sinon.stub().callsArg(7)
@FileSystemImportManager._isSafeOnFileSystem = sinon.stub().callsArgWith(1, null, true)
@FileSystemImportManager.addEntity @user_id, @project_id, @folder_id, @name, @path_on_disk, @replace, @callback
it "should call addFile", ->
@FileSystemImportManager.addDoc.calledWith(@user_id, @project_id, @folder_id, @name, @path_on_disk, @replace)
.should.equal true
sinon.assert.calledWith(@FileSystemImportManager.addDoc, @user_id, @project_id, @folder_id, @name, @path_on_disk, "latin1", @replace)

View file

@ -3,16 +3,20 @@ chai = require('chai')
should = chai.should()
modulePath = "../../../../app/js/Features/Uploads/FileTypeManager.js"
SandboxedModule = require('sandboxed-module')
isUtf8 = require('is-utf8')
describe "FileTypeManager", ->
beforeEach ->
@isUtf8 = sinon.spy(isUtf8)
@fs = {}
@Magic = {}
@path = "/path/to/test"
@callback = sinon.stub()
@ced = sinon.stub()
@DocumentHelper =
getEncodingFromTexContent: sinon.stub()
@FileTypeManager = SandboxedModule.require modulePath, requires:
"fs": @fs
"mmmagic" : Magic: (options) => @Magic
"is-utf8": @isUtf8
describe "isDirectory", ->
beforeEach ->
@ -35,68 +39,118 @@ describe "FileTypeManager", ->
it "should return false", ->
@callback.calledWith(null, false).should.equal true
describe "isBinary", ->
describe "getType", ->
beforeEach ->
@stat = { size: 100 }
@contents = "Ich bin eine kleine Teekanne, kurz und kräftig."
@fs.stat = sinon.stub().callsArgWith(1, null, @stat)
@fs.readFile = sinon.stub().callsArgWith(1, null, Buffer.from(@contents, "utf-8"))
@fs.readFile.withArgs("/path/on/disk/utf16.tex").callsArgWith(1, null, Buffer.from("\uFEFF" + @contents, "utf-16le"))
@fs.readFile.withArgs("/path/on/disk/latin1.tex").callsArgWith(1, null, Buffer.from(@contents, "latin1"))
@encoding = "ASCII"
describe "when the file extension is text", ->
it "should return .tex files as not binary", ->
@FileTypeManager.isBinary "file.tex", "/path/on/disk", (error, binary) ->
@FileTypeManager.getType "file.tex", "/path/on/disk", (error, binary) ->
binary.should.equal false
it "should return .bib files as not binary", ->
@FileTypeManager.isBinary "file.bib", "/path/on/disk", (error, binary) ->
@FileTypeManager.getType "file.bib", "/path/on/disk", (error, binary) ->
binary.should.equal false
it "should return .bibtex files as not binary", ->
@FileTypeManager.isBinary "file.bibtex", "/path/on/disk", (error, binary) ->
@FileTypeManager.getType "file.bibtex", "/path/on/disk", (error, binary) ->
binary.should.equal false
it "should return .cls files as not binary", ->
@FileTypeManager.isBinary "file.cls", "/path/on/disk", (error, binary) ->
@FileTypeManager.getType "file.cls", "/path/on/disk", (error, binary) ->
binary.should.equal false
it "should return .sty files as not binary", ->
@FileTypeManager.isBinary "file.sty", "/path/on/disk", (error, binary) ->
@FileTypeManager.getType "file.sty", "/path/on/disk", (error, binary) ->
binary.should.equal false
it "should return .bst files as not binary", ->
@FileTypeManager.isBinary "file.bst", "/path/on/disk", (error, binary) ->
@FileTypeManager.getType "file.bst", "/path/on/disk", (error, binary) ->
binary.should.equal false
it "should return .eps files as binary", ->
@FileTypeManager.isBinary "file.eps", "/path/on/disk", (error, binary) ->
binary.should.equal true
it "should return .dvi files as binary", ->
@FileTypeManager.isBinary "file.dvi", "/path/on/disk", (error, binary) ->
binary.should.equal true
it "should return .png files as binary", ->
@FileTypeManager.isBinary "file.png", "/path/on/disk", (error, binary) ->
binary.should.equal true
it "should return files without extensions as binary", ->
@FileTypeManager.isBinary "tex", "/path/on/disk", (error, binary) ->
binary.should.equal true
it "should return .latexmkrc file as not binary", ->
@FileTypeManager.isBinary ".latexmkrc", "/path/on/disk", (error, binary) ->
@FileTypeManager.getType ".latexmkrc", "/path/on/disk", (error, binary) ->
binary.should.equal false
it "should return latexmkrc file as not binary", ->
@FileTypeManager.isBinary "latexmkrc", "/path/on/disk", (error, binary) ->
@FileTypeManager.getType "latexmkrc", "/path/on/disk", (error, binary) ->
binary.should.equal false
it "should return lbx file as not binary", ->
@FileTypeManager.getType "file.lbx", "/path/on/disk", (error, binary) ->
binary.should.equal false
it "should return bbx file as not binary", ->
@FileTypeManager.getType "file.bbx", "/path/on/disk", (error, binary) ->
binary.should.equal false
it "should return cbx file as not binary", ->
@FileTypeManager.getType "file.cbx", "/path/on/disk", (error, binary) ->
binary.should.equal false
it "should return m file as not binary", ->
@FileTypeManager.getType "file.m", "/path/on/disk", (error, binary) ->
binary.should.equal false
it "should ignore the case of an extension", ->
@FileTypeManager.isBinary "file.TEX", "/path/on/disk", (error, binary) ->
@FileTypeManager.getType "file.TEX", "/path/on/disk", (error, binary) ->
binary.should.equal false
it "should return large text files as binary", ->
@stat.size = 2 * 1024 * 1024 # 2Mb
@FileTypeManager.isBinary "file.tex", "/path/on/disk", (error, binary) ->
@FileTypeManager.getType "file.tex", "/path/on/disk", (error, binary) ->
binary.should.equal true
it "should return try to determine the encoding of large files", ->
@stat.size = 2 * 1024 * 1024 # 2Mb
@FileTypeManager.getType "file.tex", "/path/on/disk", =>
sinon.assert.notCalled(@isUtf8)
it "should detect the file as utf8", ->
@FileTypeManager.getType "file.tex", "/path/on/disk", (error, binary, encoding) =>
sinon.assert.calledOnce(@isUtf8)
@isUtf8.returned(true).should.equal true
encoding.should.equal "utf-8"
it "should return 'latin1' for non-unicode encodings", ->
@FileTypeManager.getType "file.tex", "/path/on/disk/latin1.tex", (error, binary, encoding) =>
sinon.assert.calledOnce(@isUtf8)
@isUtf8.returned(false).should.equal true
encoding.should.equal "latin1"
it "should detect utf16 with BOM as utf-16", ->
@FileTypeManager.getType "file.tex", "/path/on/disk/utf16.tex", (error, binary, encoding) =>
sinon.assert.calledOnce(@isUtf8)
@isUtf8.returned(false).should.equal true
encoding.should.equal "utf-16le"
describe "when the file extension is non-text", ->
it "should return .eps files as binary", ->
@FileTypeManager.getType "file.eps", "/path/on/disk", (error, binary) ->
binary.should.equal true
it "should return .dvi files as binary", ->
@FileTypeManager.getType "file.dvi", "/path/on/disk", (error, binary) ->
binary.should.equal true
it "should return .png files as binary", ->
@FileTypeManager.getType "file.png", "/path/on/disk", (error, binary) ->
binary.should.equal true
it "should return files without extensions as binary", ->
@FileTypeManager.getType "tex", "/path/on/disk", (error, binary) ->
binary.should.equal true
it "should not try to get the character encoding", ->
@FileTypeManager.getType "file.png", "/path/on/disk", =>
sinon.assert.notCalled(@isUtf8)
describe "shouldIgnore", ->
it "should ignore tex auxiliary files", ->
@FileTypeManager.shouldIgnore "file.aux", (error, ignore) ->