2019-07-03 08:41:01 -04:00
|
|
|
// TODO: This file was created by bulk-decaffeinate.
|
|
|
|
// Sanity-check the conversion and remove this comment.
|
|
|
|
/*
|
|
|
|
* decaffeinate suggestions:
|
|
|
|
* DS102: Remove unnecessary code created because of implicit returns
|
|
|
|
* DS205: Consider reworking code to avoid use of IIFEs
|
|
|
|
* DS207: Consider shorter variations of null checks
|
|
|
|
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
|
|
|
*/
|
|
|
|
const childProcess = require('child_process')
|
2021-12-14 08:00:35 -05:00
|
|
|
const logger = require('@overleaf/logger')
|
2020-11-25 06:57:24 -05:00
|
|
|
const metrics = require('@overleaf/metrics')
|
2019-07-03 08:41:01 -04:00
|
|
|
const _ = require('underscore')
|
2019-07-11 06:29:00 -04:00
|
|
|
const OError = require('@overleaf/o-error')
|
2019-07-03 08:41:01 -04:00
|
|
|
|
|
|
|
const BATCH_SIZE = 100
|
|
|
|
|
|
|
|
class ASpellWorker {
|
|
|
|
constructor(language) {
|
|
|
|
this.language = language
|
|
|
|
this.count = 0
|
2019-07-04 10:17:14 -04:00
|
|
|
this.closeReason = ''
|
2019-07-03 08:41:01 -04:00
|
|
|
this.pipe = childProcess.spawn('aspell', [
|
|
|
|
'pipe',
|
|
|
|
'-t',
|
|
|
|
'--encoding=utf-8',
|
|
|
|
'-d',
|
2021-07-13 07:04:47 -04:00
|
|
|
language,
|
2019-07-03 08:41:01 -04:00
|
|
|
])
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug(
|
2019-07-03 08:41:01 -04:00
|
|
|
{ process: this.pipe.pid, lang: this.language },
|
|
|
|
'starting new aspell worker'
|
|
|
|
)
|
|
|
|
metrics.inc('aspellWorker', 1, { status: 'start', method: this.language })
|
|
|
|
this.pipe.on('exit', () => {
|
|
|
|
this.state = 'killed'
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug(
|
2019-07-03 08:41:01 -04:00
|
|
|
{ process: this.pipe.pid, lang: this.language },
|
|
|
|
'aspell worker has exited'
|
|
|
|
)
|
2019-07-04 06:55:16 -04:00
|
|
|
metrics.inc('aspellWorker', 1, {
|
2019-07-03 08:41:01 -04:00
|
|
|
status: 'exit',
|
2021-07-13 07:04:47 -04:00
|
|
|
method: this.language,
|
2019-07-03 08:41:01 -04:00
|
|
|
})
|
|
|
|
})
|
|
|
|
this.pipe.on('close', () => {
|
2019-07-04 10:17:14 -04:00
|
|
|
const previousWorkerState = this.state
|
2019-07-03 08:41:01 -04:00
|
|
|
if (this.state !== 'killed') {
|
|
|
|
this.state = 'closed'
|
|
|
|
}
|
|
|
|
if (this.callback != null) {
|
2020-06-03 05:14:44 -04:00
|
|
|
const err = new OError(
|
|
|
|
'aspell worker closed output streams with uncalled callback',
|
|
|
|
{
|
2019-07-04 10:17:14 -04:00
|
|
|
process: this.pipe.pid,
|
|
|
|
lang: this.language,
|
|
|
|
stdout: output.slice(-1024),
|
|
|
|
stderr: error.slice(-1024),
|
|
|
|
workerState: this.state,
|
|
|
|
previousWorkerState,
|
2021-07-13 07:04:47 -04:00
|
|
|
closeReason: this.closeReason,
|
2019-07-04 10:17:14 -04:00
|
|
|
}
|
2020-06-03 05:14:44 -04:00
|
|
|
)
|
2019-07-04 06:55:16 -04:00
|
|
|
this.callback(err, [])
|
|
|
|
this.callback = null
|
2019-07-03 08:41:01 -04:00
|
|
|
}
|
|
|
|
})
|
2021-07-13 07:04:47 -04:00
|
|
|
this.pipe.on('error', err => {
|
2019-07-04 10:17:14 -04:00
|
|
|
const previousWorkerState = this.state
|
2019-07-03 08:41:01 -04:00
|
|
|
if (this.state !== 'killed') {
|
|
|
|
this.state = 'error'
|
|
|
|
}
|
2020-06-03 05:14:44 -04:00
|
|
|
OError.tag(err, 'aspell worker error', {
|
2019-07-04 06:55:16 -04:00
|
|
|
process: this.pipe.pid,
|
|
|
|
stdout: output.slice(-1024),
|
|
|
|
stderr: error.slice(-1024),
|
|
|
|
lang: this.language,
|
2019-07-04 10:17:14 -04:00
|
|
|
workerState: this.state,
|
|
|
|
previousWorkerState,
|
2021-07-13 07:04:47 -04:00
|
|
|
closeReason: this.closeReason,
|
2020-06-03 05:14:44 -04:00
|
|
|
})
|
2019-07-04 06:55:16 -04:00
|
|
|
|
2019-07-03 08:41:01 -04:00
|
|
|
if (this.callback != null) {
|
2020-06-03 05:14:44 -04:00
|
|
|
this.callback(err, [])
|
2019-07-04 06:55:16 -04:00
|
|
|
this.callback = null
|
|
|
|
} else {
|
2020-06-03 05:14:44 -04:00
|
|
|
logger.warn(err)
|
2019-07-03 08:41:01 -04:00
|
|
|
}
|
|
|
|
})
|
2021-07-13 07:04:47 -04:00
|
|
|
this.pipe.stdin.on('error', err => {
|
2019-07-04 10:17:14 -04:00
|
|
|
const previousWorkerState = this.state
|
2019-07-03 08:41:01 -04:00
|
|
|
if (this.state !== 'killed') {
|
|
|
|
this.state = 'error'
|
|
|
|
}
|
2020-06-03 05:14:44 -04:00
|
|
|
|
|
|
|
OError.tag(err, 'aspell worker error on stdin', {
|
2019-07-04 06:55:16 -04:00
|
|
|
process: this.pipe.pid,
|
|
|
|
stdout: output.slice(-1024),
|
|
|
|
stderr: error.slice(-1024),
|
|
|
|
lang: this.language,
|
2019-07-04 10:17:14 -04:00
|
|
|
workerState: this.state,
|
|
|
|
previousWorkerState,
|
2021-07-13 07:04:47 -04:00
|
|
|
closeReason: this.closeReason,
|
2020-06-03 05:14:44 -04:00
|
|
|
})
|
2019-07-04 06:55:16 -04:00
|
|
|
|
2019-07-03 08:41:01 -04:00
|
|
|
if (this.callback != null) {
|
2020-06-03 05:14:44 -04:00
|
|
|
this.callback(err, [])
|
2019-07-04 06:55:16 -04:00
|
|
|
this.callback = null
|
|
|
|
} else {
|
2020-06-03 05:14:44 -04:00
|
|
|
logger.warn(err)
|
2019-07-03 08:41:01 -04:00
|
|
|
}
|
|
|
|
})
|
|
|
|
|
2019-09-27 06:28:03 -04:00
|
|
|
this.pipe.stdout.setEncoding('utf8') // ensure utf8 output is handled correctly
|
2021-10-26 04:08:56 -04:00
|
|
|
let output = ''
|
2021-10-27 05:50:03 -04:00
|
|
|
const endMarkerRegex = /^[a-z]{2}/gm
|
2021-07-13 07:04:47 -04:00
|
|
|
this.pipe.stdout.on('data', data => {
|
2019-09-27 06:57:45 -04:00
|
|
|
// We receive the language code from Aspell as the end of data marker in
|
|
|
|
// the data. The input is a utf8 encoded string.
|
2020-06-03 05:00:35 -04:00
|
|
|
const oldPos = output.length
|
2019-09-27 06:57:45 -04:00
|
|
|
output = output + data
|
|
|
|
// The end marker may cross the end of a chunk, so we optimise the search
|
|
|
|
// using the regex lastIndex property.
|
|
|
|
endMarkerRegex.lastIndex = oldPos > 2 ? oldPos - 2 : 0
|
|
|
|
if (endMarkerRegex.test(output)) {
|
2019-07-03 08:41:01 -04:00
|
|
|
if (this.callback != null) {
|
|
|
|
this.callback(null, output.slice())
|
|
|
|
this.callback = null // only allow one callback in use
|
|
|
|
} else {
|
|
|
|
logger.err(
|
2020-06-03 05:14:44 -04:00
|
|
|
new OError(
|
|
|
|
'end of data marker received when callback already used',
|
|
|
|
{
|
|
|
|
process: this.pipe.pid,
|
|
|
|
lang: this.language,
|
2021-07-13 07:04:47 -04:00
|
|
|
workerState: this.state,
|
2020-06-03 05:14:44 -04:00
|
|
|
}
|
|
|
|
)
|
2019-07-03 08:41:01 -04:00
|
|
|
)
|
|
|
|
}
|
|
|
|
this.state = 'ready'
|
|
|
|
output = ''
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
2021-10-26 04:08:56 -04:00
|
|
|
let error = ''
|
2021-07-13 07:04:47 -04:00
|
|
|
this.pipe.stderr.on('data', chunk => {
|
2019-07-03 08:41:01 -04:00
|
|
|
return (error = error + chunk)
|
|
|
|
})
|
|
|
|
|
|
|
|
this.pipe.stdout.on('end', () => {
|
|
|
|
// process has ended
|
|
|
|
return (this.state = 'end')
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
isReady() {
|
|
|
|
return this.state === 'ready'
|
|
|
|
}
|
|
|
|
|
|
|
|
check(words, callback) {
|
|
|
|
// we will now send data to aspell, and be ready again when we
|
|
|
|
// receive the end of data marker
|
|
|
|
this.state = 'busy'
|
|
|
|
if (this.callback != null) {
|
|
|
|
// only allow one callback in use
|
|
|
|
return this.callback(
|
2020-06-03 05:14:44 -04:00
|
|
|
new OError('Aspell callback already in use - SHOULD NOT HAPPEN', {
|
|
|
|
process: this.pipe.pid,
|
|
|
|
lang: this.language,
|
2021-07-13 07:04:47 -04:00
|
|
|
workerState: this.state,
|
2019-07-04 06:55:16 -04:00
|
|
|
})
|
2019-07-03 08:41:01 -04:00
|
|
|
)
|
|
|
|
}
|
|
|
|
this.callback = _.once(callback) // extra defence against double callback
|
|
|
|
this.setTerseMode()
|
|
|
|
this.write(words)
|
|
|
|
return this.flush()
|
|
|
|
}
|
|
|
|
|
|
|
|
write(words) {
|
|
|
|
let i = 0
|
|
|
|
return (() => {
|
|
|
|
const result = []
|
|
|
|
while (i < words.length) {
|
|
|
|
// batch up the words to check for efficiency
|
|
|
|
const batch = words.slice(i, i + BATCH_SIZE)
|
|
|
|
this.sendWords(batch)
|
|
|
|
result.push((i += BATCH_SIZE))
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
})()
|
|
|
|
}
|
|
|
|
|
|
|
|
flush() {
|
|
|
|
// get aspell to send an end of data marker "*" when ready
|
|
|
|
// @sendCommand("%") # take the aspell pipe out of terse mode so we can look for a '*'
|
|
|
|
// @sendCommand("^ENDOFSTREAMMARKER") # send our marker which will generate a '*'
|
|
|
|
// @sendCommand("!") # go back into terse mode
|
|
|
|
return this.sendCommand('$$l')
|
|
|
|
}
|
|
|
|
|
|
|
|
shutdown(reason) {
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug({ process: this.pipe.pid, reason }, 'shutting down')
|
2019-07-03 08:41:01 -04:00
|
|
|
this.state = 'closing'
|
2019-07-04 10:17:14 -04:00
|
|
|
this.closeReason = reason
|
2019-07-03 08:41:01 -04:00
|
|
|
return this.pipe.stdin.end()
|
|
|
|
}
|
|
|
|
|
|
|
|
kill(reason) {
|
2022-05-16 08:38:18 -04:00
|
|
|
logger.debug({ process: this.pipe.pid, reason }, 'killing')
|
2019-07-04 10:17:14 -04:00
|
|
|
this.closeReason = reason
|
2019-07-03 08:41:01 -04:00
|
|
|
if (this.state === 'killed') {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
return this.pipe.kill('SIGKILL')
|
|
|
|
}
|
|
|
|
|
|
|
|
setTerseMode() {
|
|
|
|
return this.sendCommand('!')
|
|
|
|
}
|
|
|
|
|
|
|
|
sendWord(word) {
|
|
|
|
return this.sendCommand(`^${word}`)
|
|
|
|
}
|
|
|
|
|
|
|
|
sendWords(words) {
|
|
|
|
// Aspell accepts multiple words to check on the same line
|
|
|
|
// ^word1 word2 word3 ...
|
|
|
|
// See aspell.info, writing programs to use Aspell Through A Pipe
|
|
|
|
this.sendCommand(`^${words.join(' ')}`)
|
|
|
|
return this.count++
|
|
|
|
}
|
|
|
|
|
|
|
|
sendCommand(command) {
|
2021-05-03 06:57:48 -04:00
|
|
|
// Sanitize user input. Reject line feed characters.
|
|
|
|
command = command.replace(/[\r\n]/g, '')
|
2019-07-03 08:41:01 -04:00
|
|
|
return this.pipe.stdin.write(command + '\n')
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = ASpellWorker
|