overleaf/services/spelling/app/coffee/ASpell.coffee

119 lines
3.6 KiB
CoffeeScript
Raw Normal View History

2014-08-15 07:13:35 -04:00
async = require "async"
2015-03-04 11:43:59 -05:00
ASpellWorkerPool = require "./ASpellWorkerPool"
LRU = require "lru-cache"
logger = require 'logger-sharelatex'
fs = require 'fs'
2016-12-13 04:14:09 -05:00
settings = require("settings-sharelatex")
Path = require("path")
2014-08-15 07:13:35 -04:00
2015-03-04 11:43:59 -05:00
cache = LRU(10000)
OneMinute = 60 * 1000
2016-12-13 04:14:09 -05:00
cacheFsPath = Path.resolve(settings.cacheDir, "spell.cache")
cacheFsPathTmp = cacheFsPath + ".tmp"
# load any existing cache
try
2016-12-13 04:14:09 -05:00
oldCache = fs.readFileSync cacheFsPath
cache.load JSON.parse(oldCache)
catch err
2016-12-13 04:14:09 -05:00
logger.log err:err, cacheFsPath:cacheFsPath, "could not load the cache file"
# write the cache every 30 minutes
setInterval () ->
dump = JSON.stringify cache.dump()
2016-12-13 04:14:09 -05:00
fs.writeFile cacheFsPathTmp, dump, (err) ->
if err?
logger.log {err}, "error writing cache file"
2016-12-13 04:14:09 -05:00
fs.unlink cacheFsPathTmp
else
2016-12-13 04:14:09 -05:00
fs.rename cacheFsPathTmp, cacheFsPath
logger.log {len: dump.length, cacheFsPath:cacheFsPath}, "wrote cache file"
, 30 * OneMinute
2014-08-15 07:13:35 -04:00
class ASpellRunner
checkWords: (language, words, callback = (error, result) ->) ->
@runAspellOnWords language, words, (error, output) =>
return callback(error) if error?
2015-03-04 11:43:59 -05:00
#output = @removeAspellHeader(output)
suggestions = @getSuggestions(language, output)
2014-08-15 07:13:35 -04:00
results = []
hits = 0
addToCache = {}
2014-08-15 07:13:35 -04:00
for word, i in words
2015-03-04 12:00:19 -05:00
key = language + ':' + word
cached = cache.get(key)
if cached?
hits++
if cached == true
# valid word, no need to do anything
continue
else
results.push index: i, suggestions: cached
else
if suggestions[key]?
addToCache[key] = suggestions[key]
results.push index: i, suggestions: suggestions[key]
else
# a valid word, but uncached
addToCache[key] = true
# update the cache after processing all words, to avoid cache
# changing while we use it
2015-03-11 11:57:33 -04:00
for k, v of addToCache
cache.set(k, v)
2015-03-12 05:53:58 -04:00
logger.log hits: hits, total: words.length, hitrate: (hits/words.length).toFixed(2), "cache hit rate"
2014-08-15 07:13:35 -04:00
callback null, results
getSuggestions: (language, output) ->
2014-08-15 07:13:35 -04:00
lines = output.split("\n")
suggestions = {}
for line in lines
if line[0] == "&" # Suggestions found
parts = line.split(" ")
if parts.length > 1
word = parts[1]
suggestionsString = line.slice(line.indexOf(":") + 2)
suggestions[language + ":" + word] = suggestionsString.split(", ")
2014-08-15 07:13:35 -04:00
else if line[0] == "#" # No suggestions
parts = line.split(" ")
if parts.length > 1
word = parts[1]
suggestions[language + ":" + word] = []
2014-08-15 07:13:35 -04:00
return suggestions
2015-03-04 11:43:59 -05:00
#removeAspellHeader: (output) -> output.slice(1)
2014-08-15 07:13:35 -04:00
2015-03-04 11:43:59 -05:00
runAspellOnWords: (language, words, callback = (error, output) ->) ->
# send words to aspell, get back string output for those words
# find a free pipe for the language (or start one)
# send the words down the pipe
# send an END marker that will generate a "*" line in the output
# when the output pipe receives the "*" return the data sofar and reset the pipe to be available
#
# @open(language)
# @captureOutput(callback)
# @setTerseMode()
# start = new Date()
2015-03-04 11:43:59 -05:00
newWord = {}
for word in words
2015-03-04 12:00:19 -05:00
newWord[word] = true if !newWord[word] && !cache.has(language + ':' + word)
2015-03-04 11:43:59 -05:00
words = Object.keys(newWord)
2015-03-04 11:43:59 -05:00
if words.length
WorkerPool.check(language, words, ASpell.ASPELL_TIMEOUT, callback)
else
2015-03-04 12:00:19 -05:00
callback null, ""
2014-08-15 07:13:35 -04:00
module.exports = ASpell =
# The description of how to call aspell from another program can be found here:
# http://aspell.net/man-html/Through-A-Pipe.html
checkWords: (language, words, callback = (error, result) ->) ->
runner = new ASpellRunner()
runner.checkWords language, words, callback
ASPELL_TIMEOUT : 4000
2015-03-04 11:43:59 -05:00
WorkerPool = new ASpellWorkerPool()