diff --git a/services/web/app/coffee/Features/ThirdPartyDataStore/UpdateMerger.coffee b/services/web/app/coffee/Features/ThirdPartyDataStore/UpdateMerger.coffee index 451d724f4c..8356a7cb91 100644 --- a/services/web/app/coffee/Features/ThirdPartyDataStore/UpdateMerger.coffee +++ b/services/web/app/coffee/Features/ThirdPartyDataStore/UpdateMerger.coffee @@ -24,10 +24,10 @@ module.exports = UpdateMerger = return callback(null, "existing-file") if _.some(docs, (d) -> d.path is path) return callback(null, "existing-doc") - # existing file not found in project, fall back to extension check - FileTypeManager.isBinary path, fsPath, (err, isFile)-> + # existing file not found in project, so check the file type to determine if doc + FileTypeManager.getType path, fsPath, (err, isBinary)-> return callback(err) if err? - if isFile + if isBinary callback(null, "new-file") # extension was not text else callback(null, "new-doc") diff --git a/services/web/app/coffee/Features/Uploads/FileSystemImportManager.coffee b/services/web/app/coffee/Features/Uploads/FileSystemImportManager.coffee index 856408cc64..92352bf744 100644 --- a/services/web/app/coffee/Features/Uploads/FileSystemImportManager.coffee +++ b/services/web/app/coffee/Features/Uploads/FileSystemImportManager.coffee @@ -6,14 +6,14 @@ EditorController = require "../Editor/EditorController" logger = require("logger-sharelatex") module.exports = FileSystemImportManager = - addDoc: (user_id, project_id, folder_id, name, path, replace, callback = (error, doc)-> )-> + addDoc: (user_id, project_id, folder_id, name, path, charset, replace, callback = (error, doc)-> )-> FileSystemImportManager._isSafeOnFileSystem path, (err, isSafe)-> if !isSafe logger.log user_id:user_id, project_id:project_id, folder_id:folder_id, name:name, path:path, "add doc is from symlink, stopping process" return callback("path is symlink") - fs.readFile path, "utf8", (error, content = "") -> + fs.readFile path, charset, (error, content) -> return callback(error) if error? - content = content.replace(/\r/g, "") + content = content.replace(/\r\n?/g, "\n") # convert Windows line endings to unix. very old macs also created \r-separated lines lines = content.split("\n") if replace EditorController.upsertDoc project_id, folder_id, name, lines, "upload", user_id, callback @@ -72,14 +72,14 @@ module.exports = FileSystemImportManager = if isDirectory FileSystemImportManager.addFolder user_id, project_id, folder_id, name, path, replace, callback else - FileTypeManager.isBinary name, path, (error, isBinary) => + FileTypeManager.getType name, path, (error, isBinary, charset) => return callback(error) if error? if isBinary FileSystemImportManager.addFile user_id, project_id, folder_id, name, path, replace, (err, entity) -> entity?.type = 'file' callback(err, entity) else - FileSystemImportManager.addDoc user_id, project_id, folder_id, name, path, replace, (err, entity) -> + FileSystemImportManager.addDoc user_id, project_id, folder_id, name, path, charset, replace, (err, entity) -> entity?.type = 'doc' callback(err, entity) diff --git a/services/web/app/coffee/Features/Uploads/FileTypeManager.coffee b/services/web/app/coffee/Features/Uploads/FileTypeManager.coffee index 94a9775a80..46cb86cfc3 100644 --- a/services/web/app/coffee/Features/Uploads/FileTypeManager.coffee +++ b/services/web/app/coffee/Features/Uploads/FileTypeManager.coffee @@ -1,9 +1,11 @@ fs = require "fs" Path = require("path") +isUtf8 = require('is-utf8'); + module.exports = FileTypeManager = TEXT_EXTENSIONS : [ - "tex", "latex", "sty", "cls", "bst", "bib", "bibtex", "txt", "tikz", "rtex", "md", "asy", "latexmkrc" + "tex", "latex", "sty", "cls", "bst", "bib", "bibtex", "txt", "tikz", "rtex", "md", "asy", "latexmkrc", "lbx", "bbx", "cbx", "m" ] IGNORE_EXTENSIONS : [ @@ -29,22 +31,29 @@ module.exports = FileTypeManager = return callback(error) if error? callback(null, stats?.isDirectory()) - isBinary: (name, fsPath, callback = (error, result) ->) -> + # returns charset as understood by fs.readFile, + getType: (name, fsPath, callback = (error, isBinary, charset) ->) -> parts = name.split(".") - extension = parts.slice(-1)[0] - if extension? - extension = extension.toLowerCase() - binaryFile = (@TEXT_EXTENSIONS.indexOf(extension) == -1 or parts.length <= 1) and parts[0] != 'latexmkrc' - - if binaryFile - return callback null, true + extension = parts.slice(-1)[0].toLowerCase() + isText = (FileTypeManager.TEXT_EXTENSIONS.indexOf(extension) > -1 and parts.length > 1) or parts[0] == 'latexmkrc' + + return callback null, true unless isText fs.stat fsPath, (error, stat) -> return callback(error) if error? if stat.size > FileTypeManager.MAX_TEXT_FILE_SIZE return callback null, true # Treat large text file as binary - else - return callback null, false + + fs.readFile fsPath, (err, bytes) -> + return callback(err) if err? + + if isUtf8(bytes) + return callback null, false, "utf-8" + # check for little-endian unicode bom (nodejs does not support big-endian) + if bytes[0] == 0xFF and bytes[1] == 0xFE + return callback null, false, "utf-16le" + + callback null, false, "latin1" shouldIgnore: (path, callback = (error, result) ->) -> name = Path.basename(path) diff --git a/services/web/npm-shrinkwrap.json b/services/web/npm-shrinkwrap.json index 923423a0b5..7216d4a7f6 100644 --- a/services/web/npm-shrinkwrap.json +++ b/services/web/npm-shrinkwrap.json @@ -5622,9 +5622,8 @@ }, "is-utf8": { "version": "0.2.1", - "from": "is-utf8@>=0.2.0 <0.3.0", - "resolved": "https://registry.npmjs.org/is-utf8/-/is-utf8-0.2.1.tgz", - "dev": true + "from": "is-utf8@>=0.2.1 <0.3.0", + "resolved": "https://registry.npmjs.org/is-utf8/-/is-utf8-0.2.1.tgz" }, "is-windows": { "version": "1.0.2", diff --git a/services/web/package.json b/services/web/package.json index fe6ef7dcf0..04509698e0 100644 --- a/services/web/package.json +++ b/services/web/package.json @@ -54,6 +54,7 @@ "heapdump": "^0.3.7", "helmet": "^3.8.1", "http-proxy": "^1.8.1", + "is-utf8": "^0.2.1", "jade": "~1.3.1", "jquery": "^1.11.1", "json2csv": "^4.3.3", diff --git a/services/web/test/acceptance/coffee/ProjectStructureTests.coffee b/services/web/test/acceptance/coffee/ProjectStructureTests.coffee index 84f5730d81..cbd6825a93 100644 --- a/services/web/test/acceptance/coffee/ProjectStructureTests.coffee +++ b/services/web/test/acceptance/coffee/ProjectStructureTests.coffee @@ -252,6 +252,47 @@ describe "ProjectStructureChanges", -> expect(project.rootFolder[0].folders[0].docs[0].name).to.equal('ao.sty') done() + describe "uploading a project with files in different encodings", -> + before (done) -> + MockDocUpdaterApi.clearProjectStructureUpdates() + + zip_file = fs.createReadStream(Path.resolve(__dirname + '/../files/charsets/charsets.zip')) + + @owner.request.post { + uri: "project/new/upload", + formData: + qqfile: zip_file + }, (error, res, body) => + throw error if error? + if res.statusCode < 200 || res.statusCode >= 300 + throw new Error("failed to upload project #{res.statusCode}") + @uploaded_project_id = JSON.parse(body).project_id + done() + + it "should correctly parse windows-1252", -> + {docUpdates: updates} = MockDocUpdaterApi.getProjectStructureUpdates(@uploaded_project_id) + update = _.find updates, (update) -> + update.pathname == '/test-german-windows-1252.tex' + expect(update.docLines).to.contain("Der schnelle braune Fuchs sprang träge über den Hund.") + + it "should correctly parse German utf8", -> + {docUpdates: updates} = MockDocUpdaterApi.getProjectStructureUpdates(@uploaded_project_id) + update = _.find updates, (update) -> + update.pathname == '/test-german-utf8x.tex' + expect(update.docLines).to.contain("Der schnelle braune Fuchs sprang träge über den Hund.") + + it "should correctly parse little-endian utf16", -> + {docUpdates: updates} = MockDocUpdaterApi.getProjectStructureUpdates(@uploaded_project_id) + update = _.find updates, (update) -> + update.pathname == '/test-greek-utf16-le-bom.tex' + expect(update.docLines).to.contain("Η γρήγορη καστανή αλεπού πήδηξε χαλαρά πάνω από το σκυλί.") + + it "should correctly parse Greek utf8", -> + {docUpdates: updates} = MockDocUpdaterApi.getProjectStructureUpdates(@uploaded_project_id) + update = _.find updates, (update) -> + update.pathname == '/test-greek-utf8x.tex' + expect(update.docLines).to.contain("Η γρήγορη καστανή αλεπού πήδηξε χαλαρά πάνω από το σκυλί.") + describe "uploading a file", -> beforeEach (done) -> MockDocUpdaterApi.clearProjectStructureUpdates() @@ -260,7 +301,7 @@ describe "ProjectStructureChanges", -> @root_folder_id = project.rootFolder[0]._id.toString() @project_0 = project done() - + it "should version a newly uploaded file", (done) -> image_file = fs.createReadStream(Path.resolve(__dirname + '/../files/1pixel.png')) @@ -740,3 +781,65 @@ describe "ProjectStructureChanges", -> expect(@project_1.version).to.equal(@project_0.version + 1) done() + + describe "uploading a document", -> + beforeEach (done) -> + MockDocUpdaterApi.clearProjectStructureUpdates() + ProjectGetter.getProject example_project_id, (error, project) => + throw error if error? + @root_folder_id = project.rootFolder[0]._id.toString() + @project_0 = project + done() + + describe "with an unusual character set", -> + it "should correctly handle utf16-le data", (done) -> + document_file = fs.createReadStream(Path.resolve(__dirname + '/../files/charsets/test-greek-utf16-le-bom.tex')) + + req = @owner.request.post { + uri: "project/#{example_project_id}/upload", + qs: + folder_id: @root_folder_id + formData: + qqfile: + value: document_file + options: + filename: 'test-greek-utf16-le-bom.tex', + contentType: 'text/x-tex' + }, (error, res, body) => + throw error if error? + if res.statusCode < 200 || res.statusCode >= 300 + throw new Error("failed to upload file #{res.statusCode}") + + example_file_id = JSON.parse(body).entity_id + + {docUpdates:updates} = MockDocUpdaterApi.getProjectStructureUpdates(example_project_id) + update = updates[0] + expect(update.pathname).to.equal('/test-greek-utf16-le-bom.tex') + expect(update.docLines).to.contain("Η γρήγορη καστανή αλεπού πήδηξε χαλαρά πάνω από το σκυλί.") + done() + + it "should correctly handle windows1252/iso-8859-1/latin1 data", (done) -> + document_file = fs.createReadStream(Path.resolve(__dirname + '/../files/charsets/test-german-windows-1252.tex')) + + req = @owner.request.post { + uri: "project/#{example_project_id}/upload", + qs: + folder_id: @root_folder_id + formData: + qqfile: + value: document_file + options: + filename: 'test-german-windows-1252.tex', + contentType: 'text/x-tex' + }, (error, res, body) => + throw error if error? + if res.statusCode < 200 || res.statusCode >= 300 + throw new Error("failed to upload file #{res.statusCode}") + + example_file_id = JSON.parse(body).entity_id + + {docUpdates:updates} = MockDocUpdaterApi.getProjectStructureUpdates(example_project_id) + update = updates[0] + expect(update.pathname).to.equal('/test-german-windows-1252.tex') + expect(update.docLines).to.contain("Der schnelle braune Fuchs sprang träge über den Hund.") + done() diff --git a/services/web/test/acceptance/coffee/RestoringFilesTest.coffee b/services/web/test/acceptance/coffee/RestoringFilesTest.coffee index 57d711e9a9..14990be480 100644 --- a/services/web/test/acceptance/coffee/RestoringFilesTest.coffee +++ b/services/web/test/acceptance/coffee/RestoringFilesTest.coffee @@ -1,6 +1,8 @@ async = require "async" expect = require("chai").expect _ = require 'underscore' +fs = require 'fs' +Path = require 'path' ProjectGetter = require "../../../app/js/Features/Project/ProjectGetter.js" @@ -83,7 +85,8 @@ describe "RestoringFiles", -> describe "restoring a binary file", -> beforeEach (done) -> - MockProjectHistoryApi.addOldFile(@project_id, 42, "image.png", "Mock image.png content") + @pngData = fs.readFileSync(Path.resolve(__dirname, '../files/1pixel.png'), 'binary') + MockProjectHistoryApi.addOldFile(@project_id, 42, "image.png", @pngData) @owner.request { method: "POST", url: "/project/#{@project_id}/restore_file", @@ -101,7 +104,7 @@ describe "RestoringFiles", -> file = _.find project.rootFolder[0].fileRefs, (file) -> file.name == 'image.png' file = MockFileStoreApi.files[@project_id][file._id] - expect(file.content).to.equal "Mock image.png content" + expect(file.content).to.equal @pngData done() describe "restoring to a directory that exists", -> diff --git a/services/web/test/acceptance/files/charsets/charsets.zip b/services/web/test/acceptance/files/charsets/charsets.zip new file mode 100644 index 0000000000..4c50723b70 Binary files /dev/null and b/services/web/test/acceptance/files/charsets/charsets.zip differ diff --git a/services/web/test/acceptance/files/charsets/test-german-utf8x.tex b/services/web/test/acceptance/files/charsets/test-german-utf8x.tex new file mode 100644 index 0000000000..02f648b5f4 --- /dev/null +++ b/services/web/test/acceptance/files/charsets/test-german-utf8x.tex @@ -0,0 +1,8 @@ +\documentclass[12pt]{article} +\usepackage[german,english]{babel} +\usepackage[utf8x]{inputenc} +\begin{document} + \title{Untitled} + \selectlanguage{german} + Der schnelle braune Fuchs sprang träge über den Hund. +\end{document} diff --git a/services/web/test/acceptance/files/charsets/test-german-windows-1252.tex b/services/web/test/acceptance/files/charsets/test-german-windows-1252.tex new file mode 100644 index 0000000000..fa8ba1eb80 --- /dev/null +++ b/services/web/test/acceptance/files/charsets/test-german-windows-1252.tex @@ -0,0 +1,8 @@ +\documentclass[12pt]{article} +\usepackage[german,english]{babel} +\usepackage[cp1252]{inputenc} +\begin{document} + \title{Untitled} + \selectlanguage{german} + Der schnelle braune Fuchs sprang trge ber den Hund. +\end{document} diff --git a/services/web/test/acceptance/files/charsets/test-greek-utf16-le-bom.tex b/services/web/test/acceptance/files/charsets/test-greek-utf16-le-bom.tex new file mode 100644 index 0000000000..0405c63272 Binary files /dev/null and b/services/web/test/acceptance/files/charsets/test-greek-utf16-le-bom.tex differ diff --git a/services/web/test/acceptance/files/charsets/test-greek-utf8x.tex b/services/web/test/acceptance/files/charsets/test-greek-utf8x.tex new file mode 100644 index 0000000000..08ddbe8b0c --- /dev/null +++ b/services/web/test/acceptance/files/charsets/test-greek-utf8x.tex @@ -0,0 +1,8 @@ +\documentclass[12pt]{article} +\usepackage[greek,english]{babel} +\usepackage[utf8x]{inputenc} +\begin{document} + \title{Untitled} + \selectlanguage{greek} + Η γρήγορη καστανή αλεπού πήδηξε χαλαρά πάνω από το σκυλί. +\end{document} diff --git a/services/web/test/unit/coffee/ThirdPartyDataStore/UpdateMergerTests.coffee b/services/web/test/unit/coffee/ThirdPartyDataStore/UpdateMergerTests.coffee index 1ecf3295aa..e65c8c1c50 100644 --- a/services/web/test/unit/coffee/ThirdPartyDataStore/UpdateMergerTests.coffee +++ b/services/web/test/unit/coffee/ThirdPartyDataStore/UpdateMergerTests.coffee @@ -50,12 +50,12 @@ describe 'UpdateMerger :', -> describe "doc updates for a new doc", -> beforeEach -> - @FileTypeManager.isBinary = sinon.stub().yields(null, false) + @FileTypeManager.getType = sinon.stub().yields(null, false) @updateMerger.p.processDoc = sinon.stub().yields() @updateMerger.mergeUpdate @user_id, @project_id, @docPath, @updateRequest, @source, @callback it 'should look at the file contents', -> - @FileTypeManager.isBinary.called.should.equal true + @FileTypeManager.getType.called.should.equal true it 'should process update as doc', -> @updateMerger.p.processDoc @@ -67,12 +67,12 @@ describe 'UpdateMerger :', -> describe "file updates for a new file ", -> beforeEach -> - @FileTypeManager.isBinary = sinon.stub().yields(null, true) + @FileTypeManager.getType = sinon.stub().yields(null, true) @updateMerger.p.processFile = sinon.stub().yields() @updateMerger.mergeUpdate @user_id, @project_id, @filePath, @updateRequest, @source, @callback it 'should look at the file contents', -> - @FileTypeManager.isBinary.called.should.equal true + @FileTypeManager.getType.called.should.equal true it 'should process update as file', -> @updateMerger.p.processFile @@ -84,12 +84,12 @@ describe 'UpdateMerger :', -> describe "doc updates for an existing doc", -> beforeEach -> - @FileTypeManager.isBinary = sinon.stub() + @FileTypeManager.getType = sinon.stub() @updateMerger.p.processDoc = sinon.stub().yields() @updateMerger.mergeUpdate @user_id, @project_id, @existingDocPath, @updateRequest, @source, @callback it 'should not look at the file contents', -> - @FileTypeManager.isBinary.called.should.equal false + @FileTypeManager.getType.called.should.equal false it 'should process update as doc', -> @updateMerger.p.processDoc @@ -101,12 +101,12 @@ describe 'UpdateMerger :', -> describe "file updates for an existing file", -> beforeEach -> - @FileTypeManager.isBinary = sinon.stub() + @FileTypeManager.getType = sinon.stub() @updateMerger.p.processFile = sinon.stub().yields() @updateMerger.mergeUpdate @user_id, @project_id, @existingFilePath, @updateRequest, @source, @callback it 'should not look at the file contents', -> - @FileTypeManager.isBinary.called.should.equal false + @FileTypeManager.getType.called.should.equal false it 'should process update as file', -> @updateMerger.p.processFile diff --git a/services/web/test/unit/coffee/Uploads/FileSystemImportManagerTests.coffee b/services/web/test/unit/coffee/Uploads/FileSystemImportManagerTests.coffee index ee22a8f7d8..80bf69e13d 100644 --- a/services/web/test/unit/coffee/Uploads/FileSystemImportManagerTests.coffee +++ b/services/web/test/unit/coffee/Uploads/FileSystemImportManagerTests.coffee @@ -13,11 +13,15 @@ describe "FileSystemImportManager", -> @replace = "replace-boolean-flag-mock" @user_id = "mock-user-123" @callback = sinon.stub() + @encoding = "latin1" + @DocumentHelper = + convertTexEncodingsToUtf8: sinon.stub().returnsArg(0) @FileSystemImportManager = SandboxedModule.require modulePath, requires: "fs" : @fs = {} "../Editor/EditorController": @EditorController = {} "./FileTypeManager": @FileTypeManager = {} "../Project/ProjectLocator": @ProjectLocator = {} + "../Documents/DocumentHelper": @DocumentHelper "logger-sharelatex": log:-> err:-> @@ -34,7 +38,7 @@ describe "FileSystemImportManager", -> beforeEach -> @FileSystemImportManager._isSafeOnFileSystem = sinon.stub().callsArgWith(1, null, false) @EditorController.addDoc = sinon.stub() - @FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, false, @callback + @FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, @encoding, false, @callback it "should not read the file from disk", -> @fs.readFile.called.should.equal false @@ -45,10 +49,10 @@ describe "FileSystemImportManager", -> describe "with replace set to false", -> beforeEach -> @EditorController.addDoc = sinon.stub().callsArg(6) - @FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, false, @callback + @FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, @encoding, false, @callback it "should read the file from disk", -> - @fs.readFile.calledWith(@path_on_disk, "utf8").should.equal true + @fs.readFile.calledWith(@path_on_disk).should.equal true it "should insert the doc", -> @EditorController.addDoc.calledWith(@project_id, @folder_id, @name, @docLines, "upload", @user_id) @@ -60,22 +64,37 @@ describe "FileSystemImportManager", -> @docLines = ["one", "two", "three"] @fs.readFile = sinon.stub().callsArgWith(2, null, @docContent) @EditorController.addDoc = sinon.stub().callsArg(6) - @FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, false, @callback + @FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, @encoding, false, @callback it "should strip the \\r characters before adding", -> @EditorController.addDoc.calledWith(@project_id, @folder_id, @name, @docLines, "upload", @user_id) .should.equal true + + describe "with \r line endings", -> + beforeEach -> + @docContent = "one\rtwo\rthree" + @docLines = ["one", "two", "three"] + @fs.readFile = sinon.stub().callsArgWith(2, null, @docContent) + @EditorController.addDoc = sinon.stub().callsArg(6) + @FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, @encoding, false, @callback + + it "should treat the \\r characters as newlines", -> + @EditorController.addDoc.calledWith(@project_id, @folder_id, @name, @docLines, "upload", @user_id) + .should.equal true describe "with replace set to true", -> beforeEach -> @EditorController.upsertDoc = sinon.stub().yields() - @FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, true, @callback + @FileSystemImportManager.addDoc @user_id, @project_id, @folder_id, @name, @path_on_disk, @encoding, true, @callback it "should upsert the doc", -> @EditorController.upsertDoc .calledWith(@project_id, @folder_id, @name, @docLines, "upload", @user_id) .should.equal true + it "should read the file with the correct encoding", -> + sinon.assert.calledWith(@fs.readFile, @path_on_disk, @encoding) + describe "addFile with replace set to false", -> beforeEach -> @EditorController.addFile = sinon.stub().yields() @@ -176,7 +195,7 @@ describe "FileSystemImportManager", -> describe "with binary file", -> beforeEach -> @FileTypeManager.isDirectory = sinon.stub().callsArgWith(1, null, false) - @FileTypeManager.isBinary = sinon.stub().callsArgWith(2, null, true) + @FileTypeManager.getType = sinon.stub().callsArgWith(2, null, true) @FileSystemImportManager._isSafeOnFileSystem = sinon.stub().callsArgWith(1, null, true) @FileSystemImportManager.addFile = sinon.stub().callsArg(6) @FileSystemImportManager.addEntity @user_id, @project_id, @folder_id, @name, @path_on_disk, @replace, @callback @@ -188,13 +207,10 @@ describe "FileSystemImportManager", -> describe "with text file", -> beforeEach -> @FileTypeManager.isDirectory = sinon.stub().callsArgWith(1, null, false) - @FileTypeManager.isBinary = sinon.stub().callsArgWith(2, null, false) - @FileSystemImportManager.addDoc = sinon.stub().callsArg(6) + @FileTypeManager.getType = sinon.stub().callsArgWith(2, null, false, 'latin1') + @FileSystemImportManager.addDoc = sinon.stub().callsArg(7) @FileSystemImportManager._isSafeOnFileSystem = sinon.stub().callsArgWith(1, null, true) @FileSystemImportManager.addEntity @user_id, @project_id, @folder_id, @name, @path_on_disk, @replace, @callback it "should call addFile", -> - @FileSystemImportManager.addDoc.calledWith(@user_id, @project_id, @folder_id, @name, @path_on_disk, @replace) - .should.equal true - - + sinon.assert.calledWith(@FileSystemImportManager.addDoc, @user_id, @project_id, @folder_id, @name, @path_on_disk, "latin1", @replace) diff --git a/services/web/test/unit/coffee/Uploads/FileTypeManagerTests.coffee b/services/web/test/unit/coffee/Uploads/FileTypeManagerTests.coffee index be456c74a5..232b29c9dd 100644 --- a/services/web/test/unit/coffee/Uploads/FileTypeManagerTests.coffee +++ b/services/web/test/unit/coffee/Uploads/FileTypeManagerTests.coffee @@ -3,16 +3,20 @@ chai = require('chai') should = chai.should() modulePath = "../../../../app/js/Features/Uploads/FileTypeManager.js" SandboxedModule = require('sandboxed-module') +isUtf8 = require('is-utf8') describe "FileTypeManager", -> beforeEach -> + @isUtf8 = sinon.spy(isUtf8) @fs = {} - @Magic = {} @path = "/path/to/test" @callback = sinon.stub() + @ced = sinon.stub() + @DocumentHelper = + getEncodingFromTexContent: sinon.stub() @FileTypeManager = SandboxedModule.require modulePath, requires: "fs": @fs - "mmmagic" : Magic: (options) => @Magic + "is-utf8": @isUtf8 describe "isDirectory", -> beforeEach -> @@ -35,67 +39,117 @@ describe "FileTypeManager", -> it "should return false", -> @callback.calledWith(null, false).should.equal true - describe "isBinary", -> + describe "getType", -> beforeEach -> @stat = { size: 100 } + @contents = "Ich bin eine kleine Teekanne, kurz und kräftig." @fs.stat = sinon.stub().callsArgWith(1, null, @stat) + @fs.readFile = sinon.stub().callsArgWith(1, null, Buffer.from(@contents, "utf-8")) + @fs.readFile.withArgs("/path/on/disk/utf16.tex").callsArgWith(1, null, Buffer.from("\uFEFF" + @contents, "utf-16le")) + @fs.readFile.withArgs("/path/on/disk/latin1.tex").callsArgWith(1, null, Buffer.from(@contents, "latin1")) + @encoding = "ASCII" - it "should return .tex files as not binary", -> - @FileTypeManager.isBinary "file.tex", "/path/on/disk", (error, binary) -> - binary.should.equal false + describe "when the file extension is text", -> + it "should return .tex files as not binary", -> + @FileTypeManager.getType "file.tex", "/path/on/disk", (error, binary) -> + binary.should.equal false - it "should return .bib files as not binary", -> - @FileTypeManager.isBinary "file.bib", "/path/on/disk", (error, binary) -> - binary.should.equal false + it "should return .bib files as not binary", -> + @FileTypeManager.getType "file.bib", "/path/on/disk", (error, binary) -> + binary.should.equal false - it "should return .bibtex files as not binary", -> - @FileTypeManager.isBinary "file.bibtex", "/path/on/disk", (error, binary) -> - binary.should.equal false + it "should return .bibtex files as not binary", -> + @FileTypeManager.getType "file.bibtex", "/path/on/disk", (error, binary) -> + binary.should.equal false - it "should return .cls files as not binary", -> - @FileTypeManager.isBinary "file.cls", "/path/on/disk", (error, binary) -> - binary.should.equal false + it "should return .cls files as not binary", -> + @FileTypeManager.getType "file.cls", "/path/on/disk", (error, binary) -> + binary.should.equal false - it "should return .sty files as not binary", -> - @FileTypeManager.isBinary "file.sty", "/path/on/disk", (error, binary) -> - binary.should.equal false + it "should return .sty files as not binary", -> + @FileTypeManager.getType "file.sty", "/path/on/disk", (error, binary) -> + binary.should.equal false - it "should return .bst files as not binary", -> - @FileTypeManager.isBinary "file.bst", "/path/on/disk", (error, binary) -> - binary.should.equal false + it "should return .bst files as not binary", -> + @FileTypeManager.getType "file.bst", "/path/on/disk", (error, binary) -> + binary.should.equal false - it "should return .eps files as binary", -> - @FileTypeManager.isBinary "file.eps", "/path/on/disk", (error, binary) -> - binary.should.equal true + it "should return .latexmkrc file as not binary", -> + @FileTypeManager.getType ".latexmkrc", "/path/on/disk", (error, binary) -> + binary.should.equal false - it "should return .dvi files as binary", -> - @FileTypeManager.isBinary "file.dvi", "/path/on/disk", (error, binary) -> - binary.should.equal true + it "should return latexmkrc file as not binary", -> + @FileTypeManager.getType "latexmkrc", "/path/on/disk", (error, binary) -> + binary.should.equal false - it "should return .png files as binary", -> - @FileTypeManager.isBinary "file.png", "/path/on/disk", (error, binary) -> - binary.should.equal true + it "should return lbx file as not binary", -> + @FileTypeManager.getType "file.lbx", "/path/on/disk", (error, binary) -> + binary.should.equal false - it "should return files without extensions as binary", -> - @FileTypeManager.isBinary "tex", "/path/on/disk", (error, binary) -> - binary.should.equal true + it "should return bbx file as not binary", -> + @FileTypeManager.getType "file.bbx", "/path/on/disk", (error, binary) -> + binary.should.equal false - it "should return .latexmkrc file as not binary", -> - @FileTypeManager.isBinary ".latexmkrc", "/path/on/disk", (error, binary) -> - binary.should.equal false + it "should return cbx file as not binary", -> + @FileTypeManager.getType "file.cbx", "/path/on/disk", (error, binary) -> + binary.should.equal false - it "should return latexmkrc file as not binary", -> - @FileTypeManager.isBinary "latexmkrc", "/path/on/disk", (error, binary) -> - binary.should.equal false + it "should return m file as not binary", -> + @FileTypeManager.getType "file.m", "/path/on/disk", (error, binary) -> + binary.should.equal false - it "should ignore the case of an extension", -> - @FileTypeManager.isBinary "file.TEX", "/path/on/disk", (error, binary) -> - binary.should.equal false + it "should ignore the case of an extension", -> + @FileTypeManager.getType "file.TEX", "/path/on/disk", (error, binary) -> + binary.should.equal false - it "should return large text files as binary", -> - @stat.size = 2 * 1024 * 1024 # 2Mb - @FileTypeManager.isBinary "file.tex", "/path/on/disk", (error, binary) -> - binary.should.equal true + it "should return large text files as binary", -> + @stat.size = 2 * 1024 * 1024 # 2Mb + @FileTypeManager.getType "file.tex", "/path/on/disk", (error, binary) -> + binary.should.equal true + + it "should return try to determine the encoding of large files", -> + @stat.size = 2 * 1024 * 1024 # 2Mb + @FileTypeManager.getType "file.tex", "/path/on/disk", => + sinon.assert.notCalled(@isUtf8) + + it "should detect the file as utf8", -> + @FileTypeManager.getType "file.tex", "/path/on/disk", (error, binary, encoding) => + sinon.assert.calledOnce(@isUtf8) + @isUtf8.returned(true).should.equal true + encoding.should.equal "utf-8" + + it "should return 'latin1' for non-unicode encodings", -> + @FileTypeManager.getType "file.tex", "/path/on/disk/latin1.tex", (error, binary, encoding) => + sinon.assert.calledOnce(@isUtf8) + @isUtf8.returned(false).should.equal true + encoding.should.equal "latin1" + + it "should detect utf16 with BOM as utf-16", -> + @FileTypeManager.getType "file.tex", "/path/on/disk/utf16.tex", (error, binary, encoding) => + sinon.assert.calledOnce(@isUtf8) + @isUtf8.returned(false).should.equal true + encoding.should.equal "utf-16le" + + describe "when the file extension is non-text", -> + it "should return .eps files as binary", -> + @FileTypeManager.getType "file.eps", "/path/on/disk", (error, binary) -> + binary.should.equal true + + it "should return .dvi files as binary", -> + @FileTypeManager.getType "file.dvi", "/path/on/disk", (error, binary) -> + binary.should.equal true + + it "should return .png files as binary", -> + @FileTypeManager.getType "file.png", "/path/on/disk", (error, binary) -> + binary.should.equal true + + it "should return files without extensions as binary", -> + @FileTypeManager.getType "tex", "/path/on/disk", (error, binary) -> + binary.should.equal true + + it "should not try to get the character encoding", -> + @FileTypeManager.getType "file.png", "/path/on/disk", => + sinon.assert.notCalled(@isUtf8) describe "shouldIgnore", -> it "should ignore tex auxiliary files", ->