mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-07 20:31:06 -05:00
support Aspell worker pool
This commit is contained in:
parent
726afb6b90
commit
a49f80cb21
3 changed files with 144 additions and 63 deletions
|
@ -1,17 +1,20 @@
|
|||
child_process = require("child_process")
|
||||
async = require "async"
|
||||
_ = require "underscore"
|
||||
ASpellWorkerPool = require "./ASpellWorkerPool"
|
||||
LRU = require "lru-cache"
|
||||
|
||||
cache = LRU(10000)
|
||||
|
||||
class ASpellRunner
|
||||
checkWords: (language, words, callback = (error, result) ->) ->
|
||||
@runAspellOnWords language, words, (error, output) =>
|
||||
return callback(error) if error?
|
||||
output = @removeAspellHeader(output)
|
||||
#output = @removeAspellHeader(output)
|
||||
suggestions = @getSuggestions(output)
|
||||
results = []
|
||||
for word, i in words
|
||||
if suggestions[word]?
|
||||
cache.set(language + ':' + word, suggestions[word])
|
||||
results.push index: i, suggestions: suggestions[word]
|
||||
callback null, results
|
||||
|
||||
|
@ -31,65 +34,30 @@ class ASpellRunner
|
|||
word = parts[1]
|
||||
suggestions[word] = []
|
||||
return suggestions
|
||||
|
||||
removeAspellHeader: (output) -> output.slice(1)
|
||||
|
||||
#removeAspellHeader: (output) -> output.slice(1)
|
||||
|
||||
runAspellOnWords: (language, words, callback = (error, output) ->) ->
|
||||
@open(language)
|
||||
@captureOutput(callback)
|
||||
@setTerseMode()
|
||||
start = new Date()
|
||||
i = 0
|
||||
do tick = () =>
|
||||
if new Date() - start > ASpell.ASPELL_TIMEOUT
|
||||
@close(true)
|
||||
else if i < words.length
|
||||
# batch up the words to check for efficiency
|
||||
batch = words.slice(i, i + ASpell.ASPELL_BATCH_SIZE)
|
||||
@sendWords(batch)
|
||||
i += ASpell.ASPELL_BATCH_SIZE
|
||||
setTimeout tick, 0
|
||||
else
|
||||
@close()
|
||||
# send words to aspell, get back string output for those words
|
||||
# find a free pipe for the language (or start one)
|
||||
# send the words down the pipe
|
||||
# send an END marker that will generate a "*" line in the output
|
||||
# when the output pipe receives the "*" return the data sofar and reset the pipe to be available
|
||||
#
|
||||
# @open(language)
|
||||
# @captureOutput(callback)
|
||||
# @setTerseMode()
|
||||
# start = new Date()
|
||||
|
||||
captureOutput: (callback = (error, output) ->) ->
|
||||
output = ""
|
||||
error = ""
|
||||
@aspell.stdout.on "data", (chunk) ->
|
||||
output = output + chunk
|
||||
@aspell.stderr.on "data", (chunk) =>
|
||||
error = error + chunk
|
||||
@aspell.stdout.on "end", () ->
|
||||
if error == ""
|
||||
callback null, output
|
||||
else
|
||||
callback new Error(error), output
|
||||
newWord = {}
|
||||
for word in words
|
||||
newWord[word] = true if !newWord[word] && !cache.get(language + ':' + word)?
|
||||
words = Object.keys(newWord)
|
||||
|
||||
open: (language) ->
|
||||
@finished = false
|
||||
@aspell = child_process.spawn("aspell", ["pipe", "-t", "--encoding=utf-8", "-d", language])
|
||||
|
||||
close: (force) ->
|
||||
@finished = true
|
||||
@aspell.stdin.end()
|
||||
if force && !@aspell.exitCode?
|
||||
@aspell.kill("SIGKILL")
|
||||
|
||||
setTerseMode: () ->
|
||||
@sendCommand("!")
|
||||
|
||||
sendWord: (word) ->
|
||||
@sendCommand("^" + word)
|
||||
|
||||
sendWords: (words) ->
|
||||
# Aspell accepts multiple words to check on the same line
|
||||
# ^word1 word2 word3 ...
|
||||
# See aspell.info, writing programs to use Aspell Through A Pipe
|
||||
@sendCommand("^" + words.join(" "))
|
||||
|
||||
|
||||
sendCommand: (command) ->
|
||||
@aspell.stdin.write(command + "\n")
|
||||
if words.length
|
||||
WorkerPool.check(language, words, ASpell.ASPELL_TIMEOUT, callback)
|
||||
else
|
||||
callback null, []
|
||||
|
||||
module.exports = ASpell =
|
||||
# The description of how to call aspell from another program can be found here:
|
||||
|
@ -98,10 +66,7 @@ module.exports = ASpell =
|
|||
runner = new ASpellRunner()
|
||||
callback = _.once callback
|
||||
runner.checkWords language, words, callback
|
||||
|
||||
forceClose = ->
|
||||
runner.close(true)
|
||||
callback("process killed")
|
||||
setTimeout forceClose, @ASPELL_TIMEOUT
|
||||
ASPELL_TIMEOUT : 4000
|
||||
ASPELL_BATCH_SIZE : 100
|
||||
|
||||
WorkerPool = new ASpellWorkerPool()
|
||||
|
||||
|
|
79
services/spelling/app/coffee/ASpellWorker.coffee
Normal file
79
services/spelling/app/coffee/ASpellWorker.coffee
Normal file
|
@ -0,0 +1,79 @@
|
|||
child_process = require("child_process")
|
||||
|
||||
BATCH_SIZE = 100
|
||||
|
||||
class ASpellWorker
|
||||
constructor: (language) ->
|
||||
@language = language
|
||||
@pipe = child_process.spawn("aspell", ["pipe", "-t", "--encoding=utf-8", "-d", language])
|
||||
@pipe.on 'exit', () =>
|
||||
@state = 'killed'
|
||||
@pipe.on 'error', (err) =>
|
||||
@callback err, []
|
||||
@pipe.stdin.on 'error', (err) =>
|
||||
@callback err, []
|
||||
|
||||
output = ""
|
||||
@pipe.stdout.on "data", (chunk) =>
|
||||
# TODO: strip aspell header
|
||||
output = output + chunk
|
||||
if chunk.toString().match(/^\*$/m)
|
||||
@callback(null, output.slice())
|
||||
output = ""
|
||||
error = ""
|
||||
@state = 'ready'
|
||||
|
||||
error = ""
|
||||
@pipe.stderr.on "data", (chunk) =>
|
||||
error = error + chunk
|
||||
|
||||
@pipe.stdout.on "end", () =>
|
||||
# process has ended, remove it from the active list
|
||||
if error == ""
|
||||
@callback(null, output.slice())
|
||||
else
|
||||
@callback new Error(error), output.slice()
|
||||
|
||||
isReady: () ->
|
||||
return @state == 'ready'
|
||||
|
||||
check: (words, callback) ->
|
||||
@state = 'ready'
|
||||
@callback = callback
|
||||
@setTerseMode()
|
||||
@write(words)
|
||||
@flush()
|
||||
|
||||
write: (words) ->
|
||||
i = 0
|
||||
while i < words.length
|
||||
# batch up the words to check for efficiency
|
||||
batch = words.slice(i, i + BATCH_SIZE)
|
||||
@sendWords batch
|
||||
i += BATCH_SIZE
|
||||
|
||||
flush: () ->
|
||||
# send an end of data marker
|
||||
@sendCommand("%") # take the aspell pipe out of terse mode so we can look for a '*'
|
||||
@sendCommand("end") # this is a valid word so it will generate a '*'
|
||||
@sendCommand("!") # go back into terse mode
|
||||
|
||||
shutdown: () ->
|
||||
@pipe.stdin.end()
|
||||
|
||||
setTerseMode: () ->
|
||||
@sendCommand("!")
|
||||
|
||||
sendWord: (word) ->
|
||||
@sendCommand("^" + word)
|
||||
|
||||
sendWords: (words) ->
|
||||
# Aspell accepts multiple words to check on the same line
|
||||
# ^word1 word2 word3 ...
|
||||
# See aspell.info, writing programs to use Aspell Through A Pipe
|
||||
@sendCommand("^" + words.join(" "))
|
||||
|
||||
sendCommand: (command) ->
|
||||
@pipe.stdin.write(command + "\n")
|
||||
|
||||
module.exports = ASpellWorker
|
37
services/spelling/app/coffee/ASpellWorkerPool.coffee
Normal file
37
services/spelling/app/coffee/ASpellWorkerPool.coffee
Normal file
|
@ -0,0 +1,37 @@
|
|||
ASpellWorker = require "./ASpellWorker"
|
||||
_ = require "underscore"
|
||||
|
||||
class ASpellWorkerPool
|
||||
constructor: (@options) ->
|
||||
@PROCESS_POOL = []
|
||||
@timeout = 1000
|
||||
|
||||
create: (language) ->
|
||||
worker = new ASpellWorker(language, @options)
|
||||
worker.pipe.on 'exit', () =>
|
||||
@cleanup
|
||||
@PROCESS_POOL.push(worker)
|
||||
return worker
|
||||
|
||||
cleanup: () ->
|
||||
active = @PROCESS_POOL.filter (worker) ->
|
||||
worker.state != 'killed'
|
||||
@PROCESS_POOL = active
|
||||
|
||||
check: (language, words, timeout, callback) ->
|
||||
# look for an existing process in the pool
|
||||
availableWorker = _.find @PROCESS_POOL, (cached) ->
|
||||
cached.language == language && cached.isReady()
|
||||
if not availableWorker?
|
||||
worker = @create(language)
|
||||
else
|
||||
worker = availableWorker
|
||||
|
||||
timer = setTimeout () ->
|
||||
worker.pipe.kill('SIGKILL')
|
||||
, timeout || 1000
|
||||
worker.check words, (err, output) ->
|
||||
clearTimeout timer
|
||||
callback(err, output)
|
||||
|
||||
module.exports = ASpellWorkerPool
|
Loading…
Reference in a new issue