support Aspell worker pool

This commit is contained in:
Brian Gough 2015-03-04 16:43:59 +00:00
parent 726afb6b90
commit a49f80cb21
3 changed files with 144 additions and 63 deletions

View file

@ -1,17 +1,20 @@
child_process = require("child_process")
async = require "async"
_ = require "underscore"
ASpellWorkerPool = require "./ASpellWorkerPool"
LRU = require "lru-cache"
cache = LRU(10000)
class ASpellRunner
checkWords: (language, words, callback = (error, result) ->) ->
@runAspellOnWords language, words, (error, output) =>
return callback(error) if error?
output = @removeAspellHeader(output)
#output = @removeAspellHeader(output)
suggestions = @getSuggestions(output)
results = []
for word, i in words
if suggestions[word]?
cache.set(language + ':' + word, suggestions[word])
results.push index: i, suggestions: suggestions[word]
callback null, results
@ -31,65 +34,30 @@ class ASpellRunner
word = parts[1]
suggestions[word] = []
return suggestions
removeAspellHeader: (output) -> output.slice(1)
#removeAspellHeader: (output) -> output.slice(1)
runAspellOnWords: (language, words, callback = (error, output) ->) ->
@open(language)
@captureOutput(callback)
@setTerseMode()
start = new Date()
i = 0
do tick = () =>
if new Date() - start > ASpell.ASPELL_TIMEOUT
@close(true)
else if i < words.length
# batch up the words to check for efficiency
batch = words.slice(i, i + ASpell.ASPELL_BATCH_SIZE)
@sendWords(batch)
i += ASpell.ASPELL_BATCH_SIZE
setTimeout tick, 0
else
@close()
# send words to aspell, get back string output for those words
# find a free pipe for the language (or start one)
# send the words down the pipe
# send an END marker that will generate a "*" line in the output
# when the output pipe receives the "*" return the data sofar and reset the pipe to be available
#
# @open(language)
# @captureOutput(callback)
# @setTerseMode()
# start = new Date()
captureOutput: (callback = (error, output) ->) ->
output = ""
error = ""
@aspell.stdout.on "data", (chunk) ->
output = output + chunk
@aspell.stderr.on "data", (chunk) =>
error = error + chunk
@aspell.stdout.on "end", () ->
if error == ""
callback null, output
else
callback new Error(error), output
newWord = {}
for word in words
newWord[word] = true if !newWord[word] && !cache.get(language + ':' + word)?
words = Object.keys(newWord)
open: (language) ->
@finished = false
@aspell = child_process.spawn("aspell", ["pipe", "-t", "--encoding=utf-8", "-d", language])
close: (force) ->
@finished = true
@aspell.stdin.end()
if force && !@aspell.exitCode?
@aspell.kill("SIGKILL")
setTerseMode: () ->
@sendCommand("!")
sendWord: (word) ->
@sendCommand("^" + word)
sendWords: (words) ->
# Aspell accepts multiple words to check on the same line
# ^word1 word2 word3 ...
# See aspell.info, writing programs to use Aspell Through A Pipe
@sendCommand("^" + words.join(" "))
sendCommand: (command) ->
@aspell.stdin.write(command + "\n")
if words.length
WorkerPool.check(language, words, ASpell.ASPELL_TIMEOUT, callback)
else
callback null, []
module.exports = ASpell =
# The description of how to call aspell from another program can be found here:
@ -98,10 +66,7 @@ module.exports = ASpell =
runner = new ASpellRunner()
callback = _.once callback
runner.checkWords language, words, callback
forceClose = ->
runner.close(true)
callback("process killed")
setTimeout forceClose, @ASPELL_TIMEOUT
ASPELL_TIMEOUT : 4000
ASPELL_BATCH_SIZE : 100
WorkerPool = new ASpellWorkerPool()

View file

@ -0,0 +1,79 @@
child_process = require("child_process")
BATCH_SIZE = 100
class ASpellWorker
constructor: (language) ->
@language = language
@pipe = child_process.spawn("aspell", ["pipe", "-t", "--encoding=utf-8", "-d", language])
@pipe.on 'exit', () =>
@state = 'killed'
@pipe.on 'error', (err) =>
@callback err, []
@pipe.stdin.on 'error', (err) =>
@callback err, []
output = ""
@pipe.stdout.on "data", (chunk) =>
# TODO: strip aspell header
output = output + chunk
if chunk.toString().match(/^\*$/m)
@callback(null, output.slice())
output = ""
error = ""
@state = 'ready'
error = ""
@pipe.stderr.on "data", (chunk) =>
error = error + chunk
@pipe.stdout.on "end", () =>
# process has ended, remove it from the active list
if error == ""
@callback(null, output.slice())
else
@callback new Error(error), output.slice()
isReady: () ->
return @state == 'ready'
check: (words, callback) ->
@state = 'ready'
@callback = callback
@setTerseMode()
@write(words)
@flush()
write: (words) ->
i = 0
while i < words.length
# batch up the words to check for efficiency
batch = words.slice(i, i + BATCH_SIZE)
@sendWords batch
i += BATCH_SIZE
flush: () ->
# send an end of data marker
@sendCommand("%") # take the aspell pipe out of terse mode so we can look for a '*'
@sendCommand("end") # this is a valid word so it will generate a '*'
@sendCommand("!") # go back into terse mode
shutdown: () ->
@pipe.stdin.end()
setTerseMode: () ->
@sendCommand("!")
sendWord: (word) ->
@sendCommand("^" + word)
sendWords: (words) ->
# Aspell accepts multiple words to check on the same line
# ^word1 word2 word3 ...
# See aspell.info, writing programs to use Aspell Through A Pipe
@sendCommand("^" + words.join(" "))
sendCommand: (command) ->
@pipe.stdin.write(command + "\n")
module.exports = ASpellWorker

View file

@ -0,0 +1,37 @@
ASpellWorker = require "./ASpellWorker"
_ = require "underscore"
class ASpellWorkerPool
constructor: (@options) ->
@PROCESS_POOL = []
@timeout = 1000
create: (language) ->
worker = new ASpellWorker(language, @options)
worker.pipe.on 'exit', () =>
@cleanup
@PROCESS_POOL.push(worker)
return worker
cleanup: () ->
active = @PROCESS_POOL.filter (worker) ->
worker.state != 'killed'
@PROCESS_POOL = active
check: (language, words, timeout, callback) ->
# look for an existing process in the pool
availableWorker = _.find @PROCESS_POOL, (cached) ->
cached.language == language && cached.isReady()
if not availableWorker?
worker = @create(language)
else
worker = availableWorker
timer = setTimeout () ->
worker.pipe.kill('SIGKILL')
, timeout || 1000
worker.check words, (err, output) ->
clearTimeout timer
callback(err, output)
module.exports = ASpellWorkerPool