overleaf/services/spelling/app/js/ASpellWorker.js

243 lines
6.8 KiB
JavaScript
Raw Normal View History

2019-07-03 08:41:01 -04:00
// TODO: This file was created by bulk-decaffeinate.
// Sanity-check the conversion and remove this comment.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const childProcess = require('child_process')
const logger = require('@overleaf/logger')
const metrics = require('@overleaf/metrics')
2019-07-03 08:41:01 -04:00
const _ = require('underscore')
const OError = require('@overleaf/o-error')
2019-07-03 08:41:01 -04:00
const BATCH_SIZE = 100
class ASpellWorker {
constructor(language) {
this.language = language
this.count = 0
2019-07-04 10:17:14 -04:00
this.closeReason = ''
2019-07-03 08:41:01 -04:00
this.pipe = childProcess.spawn('aspell', [
'pipe',
'-t',
'--encoding=utf-8',
'-d',
2021-07-13 07:04:47 -04:00
language,
2019-07-03 08:41:01 -04:00
])
logger.debug(
2019-07-03 08:41:01 -04:00
{ process: this.pipe.pid, lang: this.language },
'starting new aspell worker'
)
metrics.inc('aspellWorker', 1, { status: 'start', method: this.language })
this.pipe.on('exit', () => {
this.state = 'killed'
logger.debug(
2019-07-03 08:41:01 -04:00
{ process: this.pipe.pid, lang: this.language },
'aspell worker has exited'
)
metrics.inc('aspellWorker', 1, {
2019-07-03 08:41:01 -04:00
status: 'exit',
2021-07-13 07:04:47 -04:00
method: this.language,
2019-07-03 08:41:01 -04:00
})
})
this.pipe.on('close', () => {
2019-07-04 10:17:14 -04:00
const previousWorkerState = this.state
2019-07-03 08:41:01 -04:00
if (this.state !== 'killed') {
this.state = 'closed'
}
if (this.callback != null) {
2020-06-03 05:14:44 -04:00
const err = new OError(
'aspell worker closed output streams with uncalled callback',
{
2019-07-04 10:17:14 -04:00
process: this.pipe.pid,
lang: this.language,
stdout: output.slice(-1024),
stderr: error.slice(-1024),
workerState: this.state,
previousWorkerState,
2021-07-13 07:04:47 -04:00
closeReason: this.closeReason,
2019-07-04 10:17:14 -04:00
}
2020-06-03 05:14:44 -04:00
)
this.callback(err, [])
this.callback = null
2019-07-03 08:41:01 -04:00
}
})
2021-07-13 07:04:47 -04:00
this.pipe.on('error', err => {
2019-07-04 10:17:14 -04:00
const previousWorkerState = this.state
2019-07-03 08:41:01 -04:00
if (this.state !== 'killed') {
this.state = 'error'
}
2020-06-03 05:14:44 -04:00
OError.tag(err, 'aspell worker error', {
process: this.pipe.pid,
stdout: output.slice(-1024),
stderr: error.slice(-1024),
lang: this.language,
2019-07-04 10:17:14 -04:00
workerState: this.state,
previousWorkerState,
2021-07-13 07:04:47 -04:00
closeReason: this.closeReason,
2020-06-03 05:14:44 -04:00
})
2019-07-03 08:41:01 -04:00
if (this.callback != null) {
2020-06-03 05:14:44 -04:00
this.callback(err, [])
this.callback = null
} else {
2020-06-03 05:14:44 -04:00
logger.warn(err)
2019-07-03 08:41:01 -04:00
}
})
2021-07-13 07:04:47 -04:00
this.pipe.stdin.on('error', err => {
2019-07-04 10:17:14 -04:00
const previousWorkerState = this.state
2019-07-03 08:41:01 -04:00
if (this.state !== 'killed') {
this.state = 'error'
}
2020-06-03 05:14:44 -04:00
OError.tag(err, 'aspell worker error on stdin', {
process: this.pipe.pid,
stdout: output.slice(-1024),
stderr: error.slice(-1024),
lang: this.language,
2019-07-04 10:17:14 -04:00
workerState: this.state,
previousWorkerState,
2021-07-13 07:04:47 -04:00
closeReason: this.closeReason,
2020-06-03 05:14:44 -04:00
})
2019-07-03 08:41:01 -04:00
if (this.callback != null) {
2020-06-03 05:14:44 -04:00
this.callback(err, [])
this.callback = null
} else {
2020-06-03 05:14:44 -04:00
logger.warn(err)
2019-07-03 08:41:01 -04:00
}
})
this.pipe.stdout.setEncoding('utf8') // ensure utf8 output is handled correctly
let output = ''
const endMarkerRegex = /^[a-z]{2}/gm
2021-07-13 07:04:47 -04:00
this.pipe.stdout.on('data', data => {
// We receive the language code from Aspell as the end of data marker in
// the data. The input is a utf8 encoded string.
2020-06-03 05:00:35 -04:00
const oldPos = output.length
output = output + data
// The end marker may cross the end of a chunk, so we optimise the search
// using the regex lastIndex property.
endMarkerRegex.lastIndex = oldPos > 2 ? oldPos - 2 : 0
if (endMarkerRegex.test(output)) {
2019-07-03 08:41:01 -04:00
if (this.callback != null) {
this.callback(null, output.slice())
this.callback = null // only allow one callback in use
} else {
logger.err(
2020-06-03 05:14:44 -04:00
new OError(
'end of data marker received when callback already used',
{
process: this.pipe.pid,
lang: this.language,
2021-07-13 07:04:47 -04:00
workerState: this.state,
2020-06-03 05:14:44 -04:00
}
)
2019-07-03 08:41:01 -04:00
)
}
this.state = 'ready'
output = ''
}
})
let error = ''
2021-07-13 07:04:47 -04:00
this.pipe.stderr.on('data', chunk => {
2019-07-03 08:41:01 -04:00
return (error = error + chunk)
})
this.pipe.stdout.on('end', () => {
// process has ended
return (this.state = 'end')
})
}
isReady() {
return this.state === 'ready'
}
check(words, callback) {
// we will now send data to aspell, and be ready again when we
// receive the end of data marker
this.state = 'busy'
if (this.callback != null) {
// only allow one callback in use
return this.callback(
2020-06-03 05:14:44 -04:00
new OError('Aspell callback already in use - SHOULD NOT HAPPEN', {
process: this.pipe.pid,
lang: this.language,
2021-07-13 07:04:47 -04:00
workerState: this.state,
})
2019-07-03 08:41:01 -04:00
)
}
this.callback = _.once(callback) // extra defence against double callback
this.setTerseMode()
this.write(words)
return this.flush()
}
write(words) {
let i = 0
return (() => {
const result = []
while (i < words.length) {
// batch up the words to check for efficiency
const batch = words.slice(i, i + BATCH_SIZE)
this.sendWords(batch)
result.push((i += BATCH_SIZE))
}
return result
})()
}
flush() {
// get aspell to send an end of data marker "*" when ready
// @sendCommand("%") # take the aspell pipe out of terse mode so we can look for a '*'
// @sendCommand("^ENDOFSTREAMMARKER") # send our marker which will generate a '*'
// @sendCommand("!") # go back into terse mode
return this.sendCommand('$$l')
}
shutdown(reason) {
logger.debug({ process: this.pipe.pid, reason }, 'shutting down')
2019-07-03 08:41:01 -04:00
this.state = 'closing'
2019-07-04 10:17:14 -04:00
this.closeReason = reason
2019-07-03 08:41:01 -04:00
return this.pipe.stdin.end()
}
kill(reason) {
logger.debug({ process: this.pipe.pid, reason }, 'killing')
2019-07-04 10:17:14 -04:00
this.closeReason = reason
2019-07-03 08:41:01 -04:00
if (this.state === 'killed') {
return
}
return this.pipe.kill('SIGKILL')
}
setTerseMode() {
return this.sendCommand('!')
}
sendWord(word) {
return this.sendCommand(`^${word}`)
}
sendWords(words) {
// Aspell accepts multiple words to check on the same line
// ^word1 word2 word3 ...
// See aspell.info, writing programs to use Aspell Through A Pipe
this.sendCommand(`^${words.join(' ')}`)
return this.count++
}
sendCommand(command) {
// Sanitize user input. Reject line feed characters.
command = command.replace(/[\r\n]/g, '')
2019-07-03 08:41:01 -04:00
return this.pipe.stdin.write(command + '\n')
}
}
module.exports = ASpellWorker