diff --git a/services/clsi/app.coffee b/services/clsi/app.coffee index 0c1248a5df..8b4e2a9dcd 100644 --- a/services/clsi/app.coffee +++ b/services/clsi/app.coffee @@ -37,6 +37,7 @@ app.delete "/project/:project_id", CompileController.clearCache app.get "/project/:project_id/sync/code", CompileController.syncFromCode app.get "/project/:project_id/sync/pdf", CompileController.syncFromPdf +app.get "/project/:project_id/wordcount", CompileController.wordcount ForbidSymlinks = require "./app/js/StaticServerForbidSymlinks" diff --git a/services/clsi/app/coffee/CompileController.coffee b/services/clsi/app/coffee/CompileController.coffee index 71368be737..f7d46b1305 100644 --- a/services/clsi/app/coffee/CompileController.coffee +++ b/services/clsi/app/coffee/CompileController.coffee @@ -66,3 +66,13 @@ module.exports = CompileController = res.send JSON.stringify { code: codePositions } + + wordcount: (req, res, next = (error) ->) -> + file = req.query.file || "main.tex" + project_id = req.params.project_id + + CompileManager.wordcount project_id, file, (error, result) -> + return next(error) if error? + res.send JSON.stringify { + texcount: result + } diff --git a/services/clsi/app/coffee/CompileManager.coffee b/services/clsi/app/coffee/CompileManager.coffee index ac0c241149..296522f6b4 100644 --- a/services/clsi/app/coffee/CompileManager.coffee +++ b/services/clsi/app/coffee/CompileManager.coffee @@ -7,6 +7,8 @@ Path = require "path" logger = require "logger-sharelatex" Metrics = require "./Metrics" child_process = require "child_process" +CommandRunner = require(Settings.clsi?.commandRunner or "./CommandRunner") +fs = require("fs") module.exports = CompileManager = doCompile: (request, callback = (error, outputFiles) ->) -> @@ -107,4 +109,47 @@ module.exports = CompileManager = line: parseInt(line, 10) column: parseInt(column, 10) } - return results \ No newline at end of file + return results + + wordcount: (project_id, file_name, callback = (error, pdfPositions) ->) -> + logger.log project_id:project_id, file_name:file_name, "running wordcount" + file_path = "$COMPILE_DIR/" + file_name + command = [ "texcount", '-inc', file_path, "-out=" + file_path + ".wc"] + directory = Path.join(Settings.path.compilesDir, project_id) + timeout = 10 * 1000 + + CommandRunner.run project_id, command, directory, timeout, (error) -> + return callback(error) if error? + stdout = fs.readFileSync(directory + "/" + file_name + ".wc", "utf-8") + callback null, CompileManager._parseWordcountFromOutput(stdout) + + _parseWordcountFromOutput: (output) -> + results = { + encode: "" + textWords: 0 + headWords: 0 + outside: 0 + headers: 0 + elements: 0 + mathInline: 0 + mathDisplay: 0 + } + for line in output.split("\n") + [data, info] = line.split(":") + if data.indexOf("Encoding") > -1 + results['encode'] = info.trim() + if data.indexOf("in text") > -1 + results['textWords'] = parseInt(info, 10) + if data.indexOf("in head") > -1 + results['headWords'] = parseInt(info, 10) + if data.indexOf("outside") > -1 + results['outside'] = parseInt(info, 10) + if data.indexOf("of head") > -1 + results['headers'] = parseInt(info, 10) + if data.indexOf("Number of floats/tables/figures") > -1 + results['elements'] = parseInt(info, 10) + if data.indexOf("Number of math inlines") > -1 + results['mathInline'] = parseInt(info, 10) + if data.indexOf("Number of math displayed") > -1 + results['mathDisplay'] = parseInt(info, 10) + return results diff --git a/services/clsi/test/acceptance/coffee/SynctexTests.coffee b/services/clsi/test/acceptance/coffee/SynctexTests.coffee index 330eaff22e..02b2397897 100644 --- a/services/clsi/test/acceptance/coffee/SynctexTests.coffee +++ b/services/clsi/test/acceptance/coffee/SynctexTests.coffee @@ -35,4 +35,3 @@ describe "Syncing", -> code: [ { file: 'main.tex', line: 3, column: -1 } ] ) done() - diff --git a/services/clsi/test/acceptance/coffee/WordcountTests.coffee b/services/clsi/test/acceptance/coffee/WordcountTests.coffee new file mode 100644 index 0000000000..1789cfcdd0 --- /dev/null +++ b/services/clsi/test/acceptance/coffee/WordcountTests.coffee @@ -0,0 +1,34 @@ +Client = require "./helpers/Client" +request = require "request" +require("chai").should() +expect = require("chai").expect +path = require("path") +fs = require("fs") + +describe "Syncing", -> + before (done) -> + @request = + resources: [ + path: "main.tex" + content: fs.readFileSync(path.join(__dirname,"../fixtures/naugty_strings.txt"),"utf-8") + ] + @project_id = Client.randomId() + Client.compile @project_id, @request, (@error, @res, @body) => done() + + describe "wordcount file", -> + it "should return wordcount info", (done) -> + Client.wordcount @project_id, "main.tex", (error, result) -> + throw error if error? + expect(result).to.deep.equal( + texcount: { + encode: "utf8" + textWords: 2281 + headWords: 2 + outside: 0 + headers: 2 + elements: 0 + mathInline: 6 + mathDisplay: 0 + } + ) + done() diff --git a/services/clsi/test/acceptance/coffee/helpers/Client.coffee b/services/clsi/test/acceptance/coffee/helpers/Client.coffee index 7025b79cce..3e1a5b86d0 100644 --- a/services/clsi/test/acceptance/coffee/helpers/Client.coffee +++ b/services/clsi/test/acceptance/coffee/helpers/Client.coffee @@ -90,3 +90,12 @@ module.exports = Client = @compile project_id, req, callback + wordcount: (project_id, file, callback = (error, pdfPositions) ->) -> + request.get { + url: "#{@host}/project/#{project_id}/wordcount" + qs: { + file: file + } + }, (error, response, body) -> + return callback(error) if error? + callback null, JSON.parse(body) diff --git a/services/clsi/test/acceptance/fixtures/naugty_strings.txt b/services/clsi/test/acceptance/fixtures/naugty_strings.txt new file mode 100644 index 0000000000..92eb1ddce6 --- /dev/null +++ b/services/clsi/test/acceptance/fixtures/naugty_strings.txt @@ -0,0 +1,626 @@ +\documentclass{article} +\usepackage[utf8]{inputenc} + +\title{eee} +\author{henry.oswald } +\date{September 2015} + +\usepackage{natbib} +\usepackage{graphicx} + +\begin{document} + +\maketitle + +\section{Introduction} + +Encoding: utf8 + +# Reserved Strings +# +# Strings which may be used elsewhere in code + +undefined +undef +null +NULL +(null) +nil +NIL +true +false +True +False +None +\ +\\ + +# Numeric Strings +# +# Strings which can be interpreted as numeric + +0 +1 +1.00 +$1.00 +1/2 +1E2 +1E02 +1E+02 +-1 +-1.00 +-$1.00 +-1/2 +-1E2 +-1E02 +-1E+02 +1/0 +0/0 +-2147483648/-1 +-9223372036854775808/-1 +0.00 +0..0 +. +0.0.0 +0,00 +0,,0 +, +0,0,0 +0.0/0 +1.0/0.0 +0.0/0.0 +1,0/0,0 +0,0/0,0 +--1 +- +-. +-, +999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 +NaN +Infinity +-Infinity +0x0 +0xffffffff +0xffffffffffffffff +0xabad1dea +123456789012345678901234567890123456789 +1,000.00 +1 000.00 +1'000.00 +1,000,000.00 +1 000 000.00 +1'000'000.00 +1.000,00 +1 000,00 +1'000,00 +1.000.000,00 +1 000 000,00 +1'000'000,00 +01000 +08 +09 +2.2250738585072011e-308 + +# Special Characters +# +# Strings which contain common special ASCII characters (may need to be escaped) + +,./;'[]\-= +<>?:"{}|_+ +!@#$%^&*()`~ + +# Unicode Symbols +# +# Strings which contain common unicode symbols (e.g. smart quotes) + +Ω≈ç√∫˜µ≤≥÷ +åß∂ƒ©˙∆˚¬…æ +œ∑´®†¥¨ˆøπ“‘ +¡™£¢∞§¶•ªº–≠ +¸˛Ç◊ı˜Â¯˘¿ +ÅÍÎÏ˝ÓÔÒÚÆ☃ +Œ„´‰ˇÁ¨ˆØ∏”’ +`⁄€‹›fifl‡°·‚—± +⅛⅜⅝⅞ +ЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя +٠١٢٣٤٥٦٧٨٩ + +# Unicode Subscript/Superscript +# +# Strings which contain unicode subscripts/superscripts; can cause rendering issues + +⁰⁴⁵ +₀₁₂ +⁰⁴⁵₀₁₂ + +# Quotation Marks +# +# Strings which contain misplaced quotation marks; can cause encoding errors + +' +" +'' +"" +'"' +"''''"'" +"'"'"''''" + +# Two-Byte Characters +# +# Strings which contain two-byte characters: can cause rendering issues or character-length issues + +田中さんにあげて下さい +パーティーへ行かないか +和製漢語 +部落格 +사회과학원 어학연구소 +찦차를 타고 온 펲시맨과 쑛다리 똠방각하 +社會科學院語學研究所 +울란바토르 +𠜎𠜱𠝹𠱓𠱸𠲖𠳏 + +# Japanese Emoticons +# +# Strings which consists of Japanese-style emoticons which are popular on the web + +ヽ༼ຈل͜ຈ༽ノ ヽ༼ຈل͜ຈ༽ノ +(。◕ ∀ ◕。) +`ィ(´∀`∩ +__ロ(,_,*) +・( ̄∀ ̄)・:*: +゚・✿ヾ╲(。◕‿◕。)╱✿・゚ +,。・:*:・゜’( ☻ ω ☻ )。・:*:・゜’ +(╯°□°)╯︵ ┻━┻) +(ノಥ益ಥ)ノ ┻━┻ +( ͡° ͜ʖ ͡°) + +# Emoji +# +# Strings which contain Emoji; should be the same behavior as two-byte characters, but not always + +😍 +👩🏽 +👾 🙇 💁 🙅 🙆 🙋 🙎 🙍 +🐵 🙈 🙉 🙊 +❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙 +✋🏿 💪🏿 👐🏿 🙌🏿 👏🏿 🙏🏿 +🚾 🆒 🆓 🆕 🆖 🆗 🆙 🏧 +0️⃣ 1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ 🔟 + +# Unicode Numbers +# +# Strings which contain unicode numbers; if the code is localized, it should see the input as numeric + +123 +١٢٣ + +# Right-To-Left Strings +# +# Strings which contain text that should be rendered RTL if possible (e.g. Arabic, Hebrew) + +ثم نفس سقطت وبالتحديد،, جزيرتي باستخدام أن دنو. إذ هنا؟ الستار وتنصيب كان. أهّل ايطاليا، بريطانيا-فرنسا قد أخذ. سليمان، إتفاقية بين ما, يذكر الحدود أي بعد, معاملة بولندا، الإطلاق عل إيو. +בְּרֵאשִׁית, בָּרָא אֱלֹהִים, אֵת הַשָּׁמַיִם, וְאֵת הָאָרֶץ +הָיְתָהtestالصفحات التّحول +﷽ +ﷺ + +# Unicode Spaces +# +# Strings which contain unicode space characters with special properties (c.f. https://www.cs.tut.fi/~jkorpela/chars/spaces.html) + + + + + + +␣ +␢ +␡ + +# Trick Unicode +# +# Strings which contain unicode with unusual properties (e.g. Right-to-left override) (c.f. http://www.unicode.org/charts/PDF/U2000.pdf) + +test +test + test +testtest +test + +# Zalgo Text +# +# Strings which contain "corrupted" text. The corruption will not appear in non-HTML text, however. (via http://www.eeemo.net) + +Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣ +̡͓̞ͅI̗̘̦͝n͇͇͙v̮̫ok̲̫̙͈i̖͙̭̹̠̞n̡̻̮̣̺g̲͈͙̭͙̬͎ ̰t͔̦h̞̲e̢̤ ͍̬̲͖f̴̘͕̣è͖ẹ̥̩l͖͔͚i͓͚̦͠n͖͍̗͓̳̮g͍ ̨o͚̪͡f̘̣̬ ̖̘͖̟͙̮c҉͔̫͖͓͇͖ͅh̵̤̣͚͔á̗̼͕ͅo̼̣̥s̱͈̺̖̦̻͢.̛̖̞̠̫̰ +̗̺͖̹̯͓Ṯ̤͍̥͇͈h̲́e͏͓̼̗̙̼̣͔ ͇̜̱̠͓͍ͅN͕͠e̗̱z̘̝̜̺͙p̤̺̹͍̯͚e̠̻̠͜r̨̤͍̺̖͔̖̖d̠̟̭̬̝͟i̦͖̩͓͔̤a̠̗̬͉̙n͚͜ ̻̞̰͚ͅh̵͉i̳̞v̢͇ḙ͎͟-҉̭̩̼͔m̤̭̫i͕͇̝̦n̗͙ḍ̟ ̯̲͕͞ǫ̟̯̰̲͙̻̝f ̪̰̰̗̖̭̘͘c̦͍̲̞͍̩̙ḥ͚a̮͎̟̙͜ơ̩̹͎s̤.̝̝ ҉Z̡̖̜͖̰̣͉̜a͖̰͙̬͡l̲̫̳͍̩g̡̟̼̱͚̞̬ͅo̗͜.̟ +̦H̬̤̗̤͝e͜ ̜̥̝̻͍̟́w̕h̖̯͓o̝͙̖͎̱̮ ҉̺̙̞̟͈W̷̼̭a̺̪͍į͈͕̭͙̯̜t̶̼̮s̘͙͖̕ ̠̫̠B̻͍͙͉̳ͅe̵h̵̬͇̫͙i̹͓̳̳̮͎̫̕n͟d̴̪̜̖ ̰͉̩͇͙̲͞ͅT͖̼͓̪͢h͏͓̮̻e̬̝̟ͅ ̤̹̝W͙̞̝͔͇͝ͅa͏͓͔̹̼̣l̴͔̰̤̟͔ḽ̫.͕ +Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮ + +# Unicode Upsidedown +# +# Strings which contain unicode with an "upsidedown" effect (via http://www.upsidedowntext.com) + +˙ɐnbᴉlɐ ɐuƃɐɯ ǝɹolop ʇǝ ǝɹoqɐl ʇn ʇunpᴉpᴉɔuᴉ ɹodɯǝʇ poɯsnᴉǝ op pǝs 'ʇᴉlǝ ƃuᴉɔsᴉdᴉpɐ ɹnʇǝʇɔǝsuoɔ 'ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥ +00˙Ɩ$- + +# Unicode font +# +# Strings which contain bold/italic/etc. versions of normal characters + +The quick brown fox jumps over the lazy dog +𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠 +𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌 +𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈 +𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰 +𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘 +𝚃𝚑𝚎 𝚚𝚞𝚒𝚌𝚔 𝚋𝚛𝚘𝚠𝚗 𝚏𝚘𝚡 𝚓𝚞𝚖𝚙𝚜 𝚘𝚟𝚎𝚛 𝚝𝚑𝚎 𝚕𝚊𝚣𝚢 𝚍𝚘𝚐 +⒯⒣⒠ ⒬⒰⒤⒞⒦ ⒝⒭⒪⒲⒩ ⒡⒪⒳ ⒥⒰⒨⒫⒮ ⒪⒱⒠⒭ ⒯⒣⒠ ⒧⒜⒵⒴ ⒟⒪⒢ + +# Script Injection +# +# Strings which attempt to invoke a benign script injection; shows vulnerability to XSS + + +<script>alert('123');</script> + + +"> +'> +> + +< / script >< script >alert(123)< / script > + onfocus=JaVaSCript:alert(123) autofocus +" onfocus=JaVaSCript:alert(123) autofocus +' onfocus=JaVaSCript:alert(123) autofocus +<script>alert(123)</script> +ript>alert(123)ript> +--> +";alert(123);t=" +';alert(123);t=' +JavaSCript:alert(123) +;alert(123); +src=JaVaSCript:prompt(132) +">javascript:alert(1); + + + + + + +'`"><\x3Cscript>javascript:alert(1) +'`"><\x00script>javascript:alert(1) +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +ABCDEF +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +`"'> +`"'> +`"'> +`"'> +`"'> +`"'> +`"'> +`"'> +`"'> +`"'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +XXX + + + + + + + + +"> + + + + + + + + + + + +perl -e 'print "";' > out + + + + +< +