Initial open source commit

This commit is contained in:
James Allen 2014-08-15 12:13:35 +01:00
commit 421647ff63
16 changed files with 663 additions and 0 deletions

6
services/spelling/.gitignore vendored Normal file
View file

@ -0,0 +1,6 @@
**.swp
**.swo
app/js/*
app.js
test/UnitTests/js/*
node_modules/*

View file

@ -0,0 +1,26 @@
Settings = require 'settings-sharelatex'
logger = require 'logger-sharelatex'
logger.initialize("spelling-sharelatex")
SpellingAPIController = require './app/js/SpellingAPIController'
restify = require 'restify'
Path = require("path")
metrics = require("metrics-sharelatex")
metrics.initialize("tpds")
metrics.mongodb.monitor(Path.resolve(__dirname + "/node_modules/mongojs/node_modules/mongodb"), logger)
server = restify.createServer
name: "spelling-sharelatex",
version: "0.0.1"
server.use restify.bodyParser(mapParams: false)
server.use metrics.http.monitor(logger)
server.post "/user/:user_id/check", SpellingAPIController.check
server.post "/user/:user_id/learn", SpellingAPIController.learn
server.get "/status", (req, res)->
res.send(status:'spelling api is up')
host = Settings.host || "localhost"
port = Settings.port || 3005
server.listen port, host, () ->
console.log "#{server.name} listening at #{host}:#{port}"

View file

@ -0,0 +1,100 @@
child_process = require("child_process")
async = require "async"
_ = require "underscore"
class ASpellRunner
checkWords: (language, words, callback = (error, result) ->) ->
@runAspellOnWords language, words, (error, output) =>
return callback(error) if error?
output = @removeAspellHeader(output)
suggestions = @getSuggestions(output)
results = []
for word, i in words
if suggestions[word]?
results.push index: i, suggestions: suggestions[word]
callback null, results
getSuggestions: (output) ->
lines = output.split("\n")
suggestions = {}
for line in lines
if line[0] == "&" # Suggestions found
parts = line.split(" ")
if parts.length > 1
word = parts[1]
suggestionsString = line.slice(line.indexOf(":") + 2)
suggestions[word] = suggestionsString.split(", ")
else if line[0] == "#" # No suggestions
parts = line.split(" ")
if parts.length > 1
word = parts[1]
suggestions[word] = []
return suggestions
removeAspellHeader: (output) -> output.slice(1)
runAspellOnWords: (language, words, callback = (error, output) ->) ->
@open(language)
@captureOutput(callback)
@setTerseMode()
start = new Date()
i = 0
do tick = () =>
if new Date() - start > ASpell.ASPELL_TIMEOUT
@close(true)
else if i < words.length
word = words[i]
@sendWord(word)
i++
process.nextTick tick
else
@close()
captureOutput: (callback = (error, output) ->) ->
output = ""
error = ""
@aspell.stdout.on "data", (chunk) ->
output = output + chunk
@aspell.stderr.on "data", (chunk) =>
error = error + chunk
@aspell.stdout.on "end", () ->
if error == ""
callback null, output
else
callback new Error(error), output
open: (language) ->
@finished = false
@aspell = child_process.spawn("aspell", ["pipe", "-t", "--encoding=utf-8", "-d", language])
close: (force) ->
@finished = true
@aspell.stdin.end()
if force && !@aspell.exitCode?
@aspell.kill("SIGKILL")
setTerseMode: () ->
@sendCommand("!")
sendWord: (word) ->
@sendCommand("^" + word)
sendCommand: (command) ->
@aspell.stdin.write(command + "\n")
module.exports = ASpell =
# The description of how to call aspell from another program can be found here:
# http://aspell.net/man-html/Through-A-Pipe.html
checkWords: (language, words, callback = (error, result) ->) ->
runner = new ASpellRunner()
callback = _.once callback
runner.checkWords language, words, callback
forceClose = ->
runner.close(true)
callback("process killed")
setTimeout forceClose, @ASPELL_TIMEOUT
ASPELL_TIMEOUT : 4000

View file

@ -0,0 +1,35 @@
redis = require('redis')
settings = require('settings-sharelatex')
rclient = redis.createClient(settings.redis.port, settings.redis.host)
rclient.auth(settings.redis.password)
logger = require('logger-sharelatex')
thirtyMinutes = (60 * 60 * 30)
module.exports =
break: (key, callback)->
rclient.del buildKey(key), callback
set :(key, value, callback)->
value = JSON.stringify value
builtKey = buildKey(key)
multi = rclient.multi()
multi.set builtKey, value
multi.expire builtKey, thirtyMinutes
multi.exec callback
get :(key, callback)->
builtKey = buildKey(key)
rclient.get builtKey, (err, result)->
return callback(err) if err?
if !result?
logger.log key:key, "cache miss"
callback()
else
result = JSON.parse result
logger.log key:key, foundId:result._id, "cache hit"
callback null, result
buildKey = (key)->
return "user-learned-words:#{key}"

View file

@ -0,0 +1,4 @@
MongoJS = require "mongojs"
Settings = require "settings-sharelatex"
module.exports = MongoJS(Settings.mongo.url, ["spellingPreferences"])

View file

@ -0,0 +1,18 @@
db = require("./DB")
module.exports = LearnedWordsManager =
learnWord: (user_token, word, callback = (error)->) ->
db.spellingPreferences.update {
token: user_token
}, {
$push: learnedWords: word
}, {
upsert: true
}, callback
getLearnedWords: (user_token, callback = (error, words)->) ->
db.spellingPreferences.findOne token: user_token, (error, preferences) ->
return callback error if error?
callback null, (preferences?.learnedWords || [])

View file

@ -0,0 +1,23 @@
StatsD = require('node-statsd').StatsD
statsd = new StatsD('localhost',8125)
buildKey = (key)-> "spelling.#{process.env.NODE_ENV}.#{key}"
module.exports =
inc : (key, sampleRate)->
statsd.increment buildKey(key, sampleRate)
Timer : class
constructor :(key)->
this.start = new Date()
this.key = buildKey(key)
done:->
timeSpan = new Date - this.start
statsd.timing("#{this.key}-time", timeSpan)
statsd.increment "#{this.key}-count"
gauge : (key, value, sampleRate)->
stats = {};
stat = buildKey(key)
stats[stat] = value+"|g";
statsd.send(stats, sampleRate);

View file

@ -0,0 +1,31 @@
SpellingAPIManager = require './SpellingAPIManager'
restify = require 'restify'
logger = require 'logger-sharelatex'
metrics = require('./Metrics')
module.exports = SpellingAPIController =
check: (req, res, next) ->
metrics.inc "spelling-check", 0.1
if req.is("json")
logger.log token: req?.params?.user_id, word_count: req?.body?.words?.length, "running check"
SpellingAPIManager.runRequest req.params.user_id, req.body, (error, result) ->
if err?
logger.err err:err, user_id:req?.params?.user_id, word_count: req?.body?.words?.length, "error processing spelling request"
return res.send(500)
res.send(result)
else
next(new restify.NotAcceptableError("Please provide a JSON request"))
learn: (req, res, next) ->
metrics.inc "spelling-learn", 0.1
if req.is("json")
logger.log token: req?.params?.user_id, word: req?.body?.word, "learning word"
SpellingAPIManager.learnWord req.params.user_id, req.body, (error, result) ->
return next(error) if error?
res.send(200)
next()
else
next(new restify.NotAcceptableError("Please provide a JSON request"))

View file

@ -0,0 +1,37 @@
ASpell = require './ASpell'
LearnedWordsManager = require './LearnedWordsManager'
async = require 'async'
module.exports = SpellingAPIManager =
runRequest: (token, request, callback = (error, result) ->) ->
if !request.words?
return callback(new Error("malformed JSON"))
lang = request.language || "en"
check = (words, callback) ->
ASpell.checkWords lang, words, (error, misspellings) ->
callback error, misspellings: misspellings
if token?
LearnedWordsManager.getLearnedWords token, (error, learnedWords) ->
return callback(error) if error?
words = (request.words || []).slice(0,10000)
check words, (error, result) ->
return callback error if error?
result.misspellings = result.misspellings.filter (m) ->
word = words[m.index]
learnedWords.indexOf(word) == -1
callback error, result
else
check(request.words, callback)
learnWord: (token, request, callback = (error) ->) ->
if !request.word?
return callback(new Error("malformed JSON"))
if !token?
return callback(new Error("no token provided"))
LearnedWordsManager.learnWord token, request.word, callback

View file

@ -0,0 +1,10 @@
module.exports = Settings =
port: 3005
host: "localhost"
redis:
port:6379
host:"127.0.0.1"
password:""
mongo:
url : 'mongodb://127.0.0.1/sharelatexTesting'

View file

@ -0,0 +1,23 @@
{
"name": "spelling-sharelatex",
"author": "ShareLaTeX <team@sharelatex.com>",
"description": "A JSON API wrapper around aspell",
"version": "0.0.1",
"dependencies": {
"express": "3.1.0",
"async": "0.1.22",
"restify": "2.5.1",
"settings": "git+ssh://git@bitbucket.org:sharelatex/settings-sharelatex.git#master",
"logger": "git+ssh://git@bitbucket.org:sharelatex/logger-sharelatex.git#bunyan",
"metrics-sharelatex": "git+https://github.com/sharelatex/metrics-sharelatex.git#master",
"node-statsd": "0.0.3",
"underscore": "1.4.4",
"mongojs": "0.9.11",
"redis": "~0.8.4"
},
"devDependencies": {
"sinon": "",
"chai": "",
"sandboxed-module": ""
}
}

View file

@ -0,0 +1,49 @@
namespace 'run' do
desc "compiles and runs the spelling-sharelatex server"
task :app => ["compile:app"] do
sh %{node app.js | bunyan}
end
end
namespace 'compile' do
desc "compiles application files"
task :app do
FileUtils.rm_rf "app/js"
sh %{coffee -c -o app/js/ app/coffee/} do |ok, res|
if ! ok
raise "error compiling app folder tests : #{res}"
end
puts 'finished app/coffee compile'
end
sh %{coffee -c app.coffee} do |ok, res|
if ! ok
raise "error compiling root app file: #{res}"
end
puts 'finished app.coffee compile'
end
end
desc "compiles unit tests"
task :unit_tests => ["compile:app"] do
FileUtils.rm_rf "test/UnitTests/js"
puts "Compiling Unit Tests to JS"
sh %{coffee -c -o test/UnitTests/js/ test/UnitTests/coffee/} do |ok, res|
if ! ok
raise "error compiling tests : #{res}"
end
puts 'finished unit tests compile'
end
end
end
namespace 'test' do
desc "Run Unit Tests"
task :unit => ["compile:unit_tests"]do
puts "Running Unit Tests"
sh %{mocha -R spec test/UnitTests/js/*} do |ok, res|
if ! ok
raise "error running unit tests : #{res}"
end
end
end
end

View file

@ -0,0 +1,58 @@
sinon = require 'sinon'
chai = require 'chai'
should = chai.should()
describe "ASpell", ->
beforeEach ->
@ASpell = require("../../../app/js/ASpell")
describe "a correctly spelled word", ->
beforeEach (done) ->
@ASpell.checkWords "en", ["word"], (error, @result) => done()
it "should not correct the word", ->
@result.length.should.equal 0
describe "a misspelled word", ->
beforeEach (done) ->
@ASpell.checkWords "en", ["bussines"], (error, @result) => done()
it "should correct the word", ->
@result.length.should.equal 1
@result[0].suggestions.indexOf("business").should.not.equal -1
describe "multiple words", ->
beforeEach (done) ->
@ASpell.checkWords "en", ["bussines", "word", "neccesary"], (error, @result) => done()
it "should correct the incorrect words", ->
@result[0].index.should.equal 0
@result[0].suggestions.indexOf("business").should.not.equal -1
@result[1].index.should.equal 2
@result[1].suggestions.indexOf("necessary").should.not.equal -1
describe "without a valid language", ->
beforeEach (done) ->
@ASpell.checkWords "notALang", ["banana"], (@error, @result) => done()
it "should return an error", ->
should.exist @error
describe "when there are no suggestions", ->
beforeEach (done) ->
@ASpell.checkWords "en", ["asdkfjalkdjfadhfkajsdhfashdfjhadflkjadhflajsd"], (@error, @result) => done()
it "should return a blank array", ->
@result.length.should.equal 1
@result[0].suggestions.should.deep.equal []
describe "when the request times out", ->
beforeEach (done) ->
words = ("abcdefg" for i in [0..1000000])
@ASpell.ASPELL_TIMEOUT = 100
@start = new Date()
@ASpell.checkWords "en", words, (error, @result) => done()
it "should return in reasonable time", (done) ->
done()

View file

@ -0,0 +1,56 @@
modulePath = "../../../app/js/Cache.js"
should = require('chai').should()
SandboxedModule = require('sandboxed-module')
assert = require('chai').assert
path = require 'path'
user_token = "23ionisou90iilkn"
spellings = ["bob", "smith", "words"]
describe 'Cache', ->
it 'should save the user into redis', (done)->
@redis =
expire: (key, value)->
key.should.equal "user-learned-words:#{user_token}"
(value > 200).should.equal true
set: (key, value)->
key.should.equal "user-learned-words:#{user_token}"
value.should.equal JSON.stringify(spellings)
exec:->
done()
@cache = SandboxedModule.require modulePath, requires:
'redis': createClient :=> {multi:=> @redis}
@cache.set user_token, spellings, ->
it 'should get the user from redis', (done)->
@redis = get: (key, cb)->
key.should.equal "user-learned-words:#{user_token}"
cb(null, JSON.stringify(spellings))
@cache = SandboxedModule.require modulePath, requires:
'redis': createClient :=> return @redis
@cache.get user_token, (err, returnedSpellings)->
assert.deepEqual returnedSpellings, spellings
assert.equal err, null
done()
it 'should return nothing if the key doesnt exist', (done)->
@redis = get: (key, cb)->
cb(null, null)
@cache = SandboxedModule.require modulePath, requires:
'redis': createClient :=> return @redis
@cache.get user_token, (err, founduser)->
assert.equal founduser, undefined
done()
it 'should be able to delete from redis to break cache', (done)->
@redis = del: (key, cb)->
key.should.equal "user-learned-words:#{user_token}"
cb(null)
@cache = SandboxedModule.require modulePath, requires:
'redis': createClient :=> return @redis
@cache.break user_token, done

View file

@ -0,0 +1,83 @@
sinon = require('sinon')
chai = require 'chai'
expect = chai.expect
SandboxedModule = require('sandboxed-module')
modulePath = require('path').join __dirname, '../../../app/js/LearnedWordsManager'
describe "LearnedWordsManager", ->
beforeEach ->
@token = "a6b3cd919ge"
@callback = sinon.stub()
@db =
spellingPreferences:
update: sinon.stub().callsArg(3)
@cache =
get: sinon.stub()
set: sinon.stub()
break: sinon.stub()
@LearnedWordsManager = SandboxedModule.require modulePath, requires:
"./DB" : @db
"./Cache":@cache
describe "learnWord", ->
beforeEach ->
@word = "instanton"
@LearnedWordsManager.learnWord @token, @word, @callback
it "should insert the word in the word list in the database", ->
expect(
@db.spellingPreferences.update.calledWith({
token: @token
}, {
$push : learnedWords: @word
}, {
upsert: true
})
).to.equal true
it "should call the callback", ->
expect(@callback.called).to.equal true
describe "getLearnedWords", ->
beforeEach ->
@cache.get.callsArgWith(1)
@wordList = ["apples", "bananas", "pears"]
@db.spellingPreferences.findOne = (conditions, callback) =>
callback null, learnedWords: @wordList
sinon.spy @db.spellingPreferences, "findOne"
@LearnedWordsManager.getLearnedWords @token, @callback
it "should get the word list for the given user", ->
expect(
@db.spellingPreferences.findOne.calledWith token: @token
).to.equal true
it "should return the word list in the callback", ->
expect(@callback.calledWith null, @wordList).to.equal true
###
describe "caching the result", ->
it 'should use the cache first if it is primed', (done)->
@wordList = ["apples", "bananas", "pears"]
@cache.get.callsArgWith(1, null, learnedWords: @wordList)
@db.spellingPreferences.findOne = sinon.stub()
@LearnedWordsManager.getLearnedWords @token, (err, spellings)=>
@db.spellingPreferences.find.called.should.equal false
@wordList.should.equal spellings
done()
it 'should set the cache after hitting the db', (done)->
@wordList = ["apples", "bananas", "pears"]
@cache.get.callsArgWith(1)
@db.spellingPreferences.findOne = sinon.stub().callsArgWith(1, null, learnedWords: @wordList)
@LearnedWordsManager.getLearnedWords @token, (err, spellings)=>
@cache.set.calledWith(@token, learnedWords:@wordList).should.equal true
done()
it 'should break cache when update is called', (done)->
@word = "instanton"
@LearnedWordsManager.learnWord @token, @word, =>
@cache.break.calledWith(@token).should.equal true
done()
###

View file

@ -0,0 +1,104 @@
sinon = require('sinon')
chai = require 'chai'
expect = chai.expect
chai.should()
SandboxedModule = require('sandboxed-module')
modulePath = require('path').join __dirname, '../../../app/js/SpellingAPIManager'
describe "SpellingAPIManager", ->
beforeEach ->
@token = "user-id-123"
@ASpell = {}
@learnedWords = ["lerned"]
@LearnedWordsManager =
getLearnedWords: sinon.stub().callsArgWith(1, null, @learnedWords)
learnWord: sinon.stub().callsArg(2)
@SpellingAPIManager = SandboxedModule.require modulePath, requires:
"./ASpell" : @ASpell
"./LearnedWordsManager" : @LearnedWordsManager
describe "runRequest", ->
beforeEach ->
@nonLearnedWords = ["some", "words", "htat", "are", "speled", "rong", "lerned"]
@allWords = @nonLearnedWords.concat(@learnedWords)
@misspellings = [
{ index: 2, suggestions: ["that"] }
{ index: 4, suggestions: ["spelled"] }
{ index: 5, suggestions: ["wrong", "ring"] }
{ index: 6, suggestions: ["learned"] }
]
@misspellingsWithoutLearnedWords = @misspellings.slice(0,3)
@ASpell.checkWords = (lang, word, callback) =>
callback null, @misspellings
sinon.spy @ASpell, "checkWords"
describe "with sensible JSON", ->
beforeEach (done) ->
@SpellingAPIManager.runRequest @token, words: @allWords, (error, @result) => done()
it "should return the words that are spelled incorrectly and not learned", ->
expect(@result.misspellings).to.deep.equal @misspellingsWithoutLearnedWords
describe "with a missing words array", ->
beforeEach (done) ->
@SpellingAPIManager.runRequest @token, {}, (@error, @result) => done()
it "should return an error", ->
expect(@error).to.deep.equal new Error("malformed JSON")
describe "with a missing token", ->
beforeEach (done) ->
@SpellingAPIManager.runRequest null, words: @allWords, (@error, @result) => done()
it "should spell check without using any learned words", ->
@LearnedWordsManager.getLearnedWords.called.should.equal false
describe "without a language", ->
beforeEach (done) ->
@SpellingAPIManager.runRequest @token, words: @allWords, (error, @result) => done()
it "should use en as the default", ->
@ASpell.checkWords.calledWith("en").should.equal true
describe "with a language", ->
beforeEach (done) ->
@SpellingAPIManager.runRequest @token, {
words: @allWords
language: @language = "fr"
}, (error, @result) => done()
it "should use the language", ->
@ASpell.checkWords.calledWith(@language).should.equal true
describe "with a very large collection of words", ->
beforeEach (done) ->
@manyWords = ("word" for i in [1..100000])
@SpellingAPIManager.runRequest @token, words: @manyWords, (error, @result) => done()
it "should truncate to 10,000 words", ->
@ASpell.checkWords.calledWith(sinon.match.any, @manyWords.slice(0, 10000)).should.equal true
describe "learnWord", ->
describe "without a token", ->
beforeEach (done) -> @SpellingAPIManager.learnWord null, word: "banana", (@error) => done()
it "should return an error", ->
expect(@error).to.deep.equal new Error("malformed JSON")
describe "without a word", ->
beforeEach (done) -> @SpellingAPIManager.learnWord @token, {}, (@error) => done()
it "should return an error", ->
expect(@error).to.deep.equal new Error("no token provided")
describe "with a word and a token", ->
beforeEach (done) ->
@word = "banana"
@SpellingAPIManager.learnWord @token, word: @word, (@error) => done()
it "should call LearnedWordsManager.learnWord", ->
@LearnedWordsManager.learnWord.calledWith(@token, @word).should.equal true