Merge pull request #36 from overleaf/bg-fix-aspell-pipe-encoding

fix aspell pipe encoding
This commit is contained in:
Brian Gough 2019-10-03 09:33:01 +01:00 committed by GitHub
commit bb442866ba
3 changed files with 168 additions and 7 deletions

View file

@ -128,12 +128,18 @@ class ASpellWorker {
}
})
this.pipe.stdout.setEncoding('utf8') // ensure utf8 output is handled correctly
var output = ''
const endMarker = new RegExp('^[a-z][a-z]', 'm')
this.pipe.stdout.on('data', chunk => {
output = output + chunk
// We receive the language code from Aspell as the end of data marker
if (chunk.toString().match(endMarker)) {
const endMarkerRegex = new RegExp('^[a-z][a-z]', 'gm')
this.pipe.stdout.on('data', data => {
// We receive the language code from Aspell as the end of data marker in
// the data. The input is a utf8 encoded string.
let oldPos = output.length
output = output + data
// The end marker may cross the end of a chunk, so we optimise the search
// using the regex lastIndex property.
endMarkerRegex.lastIndex = oldPos > 2 ? oldPos - 2 : 0
if (endMarkerRegex.test(output)) {
if (this.callback != null) {
this.callback(null, output.slice())
this.callback = null // only allow one callback in use

View file

@ -3,11 +3,12 @@ const request = require('./helpers/request')
const USER_ID = 101
const checkWord = words =>
const checkWord = (words, language) =>
request.post({
url: `/user/${USER_ID}/check`,
body: JSON.stringify({
words
words,
language
})
})
@ -78,4 +79,53 @@ describe('checking words', () => {
}
})
})
describe('when a very long list of words with utf8 responses', () => {
beforeEach(async () => {
let words = []
for (let i = 0; i <= 20000; i++) {
words.push('anéther')
}
response = await checkWord(words, 'bg') // use Bulgarian to generate utf8 response
})
it('should return misspellings for the first 10K results only', async () => {
const body = JSON.parse(response.body)
expect(body.misspellings.length).to.equal(10000)
})
it('should have misspelling suggestions with consecutive indexes', async () => {
const body = JSON.parse(response.body)
const indexList = body.misspellings.map(mspl => mspl.index)
expect(indexList.length).to.equal(10000) // avoid testing over an incorrect array
for (let i = 0; i < indexList.length - 1; i++) {
expect(indexList[i] + 1).to.equal(indexList[i + 1])
}
})
})
describe('when multiple words with utf8 are submitted', () => {
beforeEach(async () => {
response = await checkWord(['mneá', 'meniésn', 'meônoi', 'mneá'], 'pt_BR')
})
it('should return the misspellings for all the words', async () => {
const body = JSON.parse(response.body)
expect(body.misspellings.length).to.equal(4)
})
it('should have misspelling suggestions with consecutive indexes', async () => {
const body = JSON.parse(response.body)
const indexes = body.misspellings.map(mspl => mspl.index)
expect(indexes).to.deep.equal([0, 1, 2, 3])
})
it('should return identical suggestions for the same entry', async () => {
const body = JSON.parse(response.body)
expect(body.misspellings[0].suggestions).to.deep.equal(
body.misspellings[3].suggestions
)
})
})
})

View file

@ -0,0 +1,105 @@
/* eslint-disable
handle-callback-err,
no-undef
*/
const sinon = require('sinon')
const chai = require('chai')
const { expect } = chai
const SandboxedModule = require('sandboxed-module')
const EventEmitter = require('events')
describe('ASpellWorker', function () {
beforeEach(function () {
this.child_process = {}
return (this.ASpellWorker = SandboxedModule.require('../../../app/js/ASpellWorker', {
requires: {
'logger-sharelatex': {
log() { },
info() { },
err() { }
},
'metrics-sharelatex': {
gauge() { },
inc() { }
},
'child_process': this.child_process
}
}))
})
describe("creating a worker", function () {
beforeEach(function () {
this.pipe = {
'stdout': new EventEmitter(),
'stderr': { on: sinon.stub() },
'stdin': {on: sinon.stub() },
'on': sinon.stub(),
'pid': 12345
}
this.child_process.spawn = sinon.stub().returns(this.pipe)
this.pipe.stdout.setEncoding = sinon.stub()
worker = new this.ASpellWorker('en')
})
describe("with normal aspell output", function () {
beforeEach(function () {
this.callback = worker.callback = sinon.stub()
this.pipe.stdout.emit('data', '& hello\n')
this.pipe.stdout.emit('data', '& world\n')
this.pipe.stdout.emit('data', 'en\n')
this.pipe.stdout.emit('data', '& goodbye')
})
it('should call the callback', function() {
expect(this.callback.called).to.equal(true)
expect(this.callback.calledWith(null, "& hello\n& world\nen\n")).to.equal(true)
})
})
describe("with the aspell end marker split across chunks", function () {
beforeEach(function () {
this.callback = worker.callback = sinon.stub()
this.pipe.stdout.emit('data', '& hello\n')
this.pipe.stdout.emit('data', '& world\ne')
this.pipe.stdout.emit('data', 'n\n')
this.pipe.stdout.emit('data', '& goodbye')
})
it('should call the callback', function() {
expect(this.callback.called).to.equal(true)
expect(this.callback.calledWith(null, "& hello\n& world\nen\n")).to.equal(true)
})
})
describe("with the aspell end marker newline split across chunks", function () {
beforeEach(function () {
this.callback = worker.callback = sinon.stub()
this.pipe.stdout.emit('data', '& hello\n')
this.pipe.stdout.emit('data', '& world\n')
this.pipe.stdout.emit('data', 'en')
this.pipe.stdout.emit('data', '\n& goodbye')
})
it('should call the callback', function() {
expect(this.callback.called).to.equal(true)
expect(this.callback.calledWith(null, "& hello\n& world\nen")).to.equal(true)
})
})
describe("with everything split across chunks", function () {
beforeEach(function () {
this.callback = worker.callback = sinon.stub()
'& hello\n& world\nen\n& goodbye'.split('').forEach(x => {
this.pipe.stdout.emit('data', x)
})
})
it('should call the callback', function() {
expect(this.callback.called).to.equal(true)
expect(this.callback.calledWith(null, "& hello\n& world\nen")).to.equal(true)
})
})
})
})