overleaf/services/spelling/app/coffee/ASpellWorker.coffee

118 lines
3.8 KiB
CoffeeScript
Raw Normal View History

2015-03-04 11:43:59 -05:00
child_process = require("child_process")
2015-03-10 09:32:42 -04:00
logger = require 'logger-sharelatex'
2015-03-12 05:54:57 -04:00
metrics = require('metrics-sharelatex')
_ = require "underscore"
2015-03-04 11:43:59 -05:00
BATCH_SIZE = 100
class ASpellWorker
constructor: (language) ->
@language = language
2015-03-10 09:32:42 -04:00
@count = 0
2015-03-04 11:43:59 -05:00
@pipe = child_process.spawn("aspell", ["pipe", "-t", "--encoding=utf-8", "-d", language])
2019-01-09 10:37:03 -05:00
logger.info process: @pipe.pid, lang: @language, "starting new aspell worker"
2019-01-14 05:35:32 -05:00
metrics.inc "aspellWorker", 1, {status: "start", method: @language}
2015-03-04 11:43:59 -05:00
@pipe.on 'exit', () =>
@state = 'killed'
2019-01-09 10:37:03 -05:00
logger.info process: @pipe.pid, lang: @language, "aspell worker has exited"
2019-01-14 05:35:32 -05:00
metrics.inc "aspellWorker" , 1, {status: "exit", method: @language}
@pipe.on 'close', () =>
@state = 'closed' unless @state == 'killed'
if @callback?
logger.err process: @pipe.pid, lang: @language, "aspell worker closed output streams with uncalled callback"
@callback new Error("aspell worker closed output streams with uncalled callback"), []
@callback = null
2015-03-04 11:43:59 -05:00
@pipe.on 'error', (err) =>
@state = 'error' unless @state == 'killed'
2015-03-12 07:12:46 -04:00
logger.log process: @pipe.pid, error: err, stdout: output.slice(-1024), stderr: error.slice(-1024), lang: @language, "aspell worker error"
2015-03-12 05:55:15 -04:00
if @callback?
@callback err, []
@callback = null
2015-03-04 11:43:59 -05:00
@pipe.stdin.on 'error', (err) =>
@state = 'error' unless @state == 'killed'
2019-01-09 10:37:03 -05:00
logger.info process: @pipe.pid, error: err, stdout: output.slice(-1024), stderr: error.slice(-1024), lang: @language, "aspell worker error on stdin"
2015-03-12 05:55:15 -04:00
if @callback?
@callback err, []
@callback = null
2015-03-04 11:43:59 -05:00
output = ""
endMarker = new RegExp("^[a-z][a-z]", "m")
2015-03-04 11:43:59 -05:00
@pipe.stdout.on "data", (chunk) =>
output = output + chunk
# We receive the language code from Aspell as the end of data marker
if chunk.toString().match(endMarker)
2016-03-23 09:44:29 -04:00
if @callback?
@callback(null, output.slice())
@callback = null # only allow one callback in use
else
logger.err process: @pipe.pid, lang: @language, "end of data marker received when callback already used"
2015-03-10 09:32:42 -04:00
@state = 'ready'
2015-03-04 11:43:59 -05:00
output = ""
error = ""
error = ""
@pipe.stderr.on "data", (chunk) =>
error = error + chunk
@pipe.stdout.on "end", () =>
2015-03-10 09:32:42 -04:00
# process has ended
@state = "end"
2015-03-04 11:43:59 -05:00
isReady: () ->
return @state == 'ready'
check: (words, callback) ->
2015-03-10 09:32:42 -04:00
# we will now send data to aspell, and be ready again when we
# receive the end of data marker
@state = 'busy'
if @callback? # only allow one callback in use
logger.err process: @pipe.pid, lang: @language, "CALLBACK ALREADY IN USE"
return @callback new Error("Aspell callback already in use - SHOULD NOT HAPPEN")
@callback = _.once callback # extra defence against double callback
2015-03-04 11:43:59 -05:00
@setTerseMode()
@write(words)
@flush()
write: (words) ->
i = 0
while i < words.length
# batch up the words to check for efficiency
batch = words.slice(i, i + BATCH_SIZE)
@sendWords batch
i += BATCH_SIZE
flush: () ->
2015-03-10 09:32:42 -04:00
# get aspell to send an end of data marker "*" when ready
#@sendCommand("%") # take the aspell pipe out of terse mode so we can look for a '*'
#@sendCommand("^ENDOFSTREAMMARKER") # send our marker which will generate a '*'
#@sendCommand("!") # go back into terse mode
@sendCommand("$$l")
2015-03-04 11:43:59 -05:00
2015-03-10 09:32:42 -04:00
shutdown: (reason) ->
2019-01-09 10:37:03 -05:00
logger.info process: @pipe.pid, reason: reason, 'shutting down'
2015-03-10 09:32:42 -04:00
@state = "closing"
2015-03-04 11:43:59 -05:00
@pipe.stdin.end()
kill: (reason) ->
2019-01-09 10:37:03 -05:00
logger.info process: @pipe.pid, reason: reason, 'killing'
return if @state == 'killed'
@pipe.kill('SIGKILL')
2015-03-04 11:43:59 -05:00
setTerseMode: () ->
@sendCommand("!")
sendWord: (word) ->
@sendCommand("^" + word)
sendWords: (words) ->
# Aspell accepts multiple words to check on the same line
# ^word1 word2 word3 ...
# See aspell.info, writing programs to use Aspell Through A Pipe
@sendCommand("^" + words.join(" "))
2015-03-10 09:32:42 -04:00
@count++
2015-03-04 11:43:59 -05:00
sendCommand: (command) ->
@pipe.stdin.write(command + "\n")
module.exports = ASpellWorker