mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-21 20:47:08 -05:00
Merge pull request #36 from overleaf/bg-fix-aspell-pipe-encoding
fix aspell pipe encoding
This commit is contained in:
commit
bb442866ba
3 changed files with 168 additions and 7 deletions
|
@ -128,12 +128,18 @@ class ASpellWorker {
|
|||
}
|
||||
})
|
||||
|
||||
this.pipe.stdout.setEncoding('utf8') // ensure utf8 output is handled correctly
|
||||
var output = ''
|
||||
const endMarker = new RegExp('^[a-z][a-z]', 'm')
|
||||
this.pipe.stdout.on('data', chunk => {
|
||||
output = output + chunk
|
||||
// We receive the language code from Aspell as the end of data marker
|
||||
if (chunk.toString().match(endMarker)) {
|
||||
const endMarkerRegex = new RegExp('^[a-z][a-z]', 'gm')
|
||||
this.pipe.stdout.on('data', data => {
|
||||
// We receive the language code from Aspell as the end of data marker in
|
||||
// the data. The input is a utf8 encoded string.
|
||||
let oldPos = output.length
|
||||
output = output + data
|
||||
// The end marker may cross the end of a chunk, so we optimise the search
|
||||
// using the regex lastIndex property.
|
||||
endMarkerRegex.lastIndex = oldPos > 2 ? oldPos - 2 : 0
|
||||
if (endMarkerRegex.test(output)) {
|
||||
if (this.callback != null) {
|
||||
this.callback(null, output.slice())
|
||||
this.callback = null // only allow one callback in use
|
||||
|
|
|
@ -3,11 +3,12 @@ const request = require('./helpers/request')
|
|||
|
||||
const USER_ID = 101
|
||||
|
||||
const checkWord = words =>
|
||||
const checkWord = (words, language) =>
|
||||
request.post({
|
||||
url: `/user/${USER_ID}/check`,
|
||||
body: JSON.stringify({
|
||||
words
|
||||
words,
|
||||
language
|
||||
})
|
||||
})
|
||||
|
||||
|
@ -78,4 +79,53 @@ describe('checking words', () => {
|
|||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('when a very long list of words with utf8 responses', () => {
|
||||
beforeEach(async () => {
|
||||
let words = []
|
||||
for (let i = 0; i <= 20000; i++) {
|
||||
words.push('anéther')
|
||||
}
|
||||
response = await checkWord(words, 'bg') // use Bulgarian to generate utf8 response
|
||||
})
|
||||
|
||||
it('should return misspellings for the first 10K results only', async () => {
|
||||
const body = JSON.parse(response.body)
|
||||
expect(body.misspellings.length).to.equal(10000)
|
||||
})
|
||||
|
||||
it('should have misspelling suggestions with consecutive indexes', async () => {
|
||||
const body = JSON.parse(response.body)
|
||||
const indexList = body.misspellings.map(mspl => mspl.index)
|
||||
expect(indexList.length).to.equal(10000) // avoid testing over an incorrect array
|
||||
for (let i = 0; i < indexList.length - 1; i++) {
|
||||
expect(indexList[i] + 1).to.equal(indexList[i + 1])
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
describe('when multiple words with utf8 are submitted', () => {
|
||||
beforeEach(async () => {
|
||||
response = await checkWord(['mneá', 'meniésn', 'meônoi', 'mneá'], 'pt_BR')
|
||||
})
|
||||
|
||||
it('should return the misspellings for all the words', async () => {
|
||||
const body = JSON.parse(response.body)
|
||||
expect(body.misspellings.length).to.equal(4)
|
||||
})
|
||||
|
||||
it('should have misspelling suggestions with consecutive indexes', async () => {
|
||||
const body = JSON.parse(response.body)
|
||||
const indexes = body.misspellings.map(mspl => mspl.index)
|
||||
expect(indexes).to.deep.equal([0, 1, 2, 3])
|
||||
})
|
||||
|
||||
it('should return identical suggestions for the same entry', async () => {
|
||||
const body = JSON.parse(response.body)
|
||||
expect(body.misspellings[0].suggestions).to.deep.equal(
|
||||
body.misspellings[3].suggestions
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
105
services/spelling/test/unit/js/ASpellWorkerTests.js
Normal file
105
services/spelling/test/unit/js/ASpellWorkerTests.js
Normal file
|
@ -0,0 +1,105 @@
|
|||
/* eslint-disable
|
||||
handle-callback-err,
|
||||
no-undef
|
||||
*/
|
||||
const sinon = require('sinon')
|
||||
const chai = require('chai')
|
||||
const { expect } = chai
|
||||
const SandboxedModule = require('sandboxed-module')
|
||||
const EventEmitter = require('events')
|
||||
|
||||
describe('ASpellWorker', function () {
|
||||
beforeEach(function () {
|
||||
this.child_process = {}
|
||||
return (this.ASpellWorker = SandboxedModule.require('../../../app/js/ASpellWorker', {
|
||||
requires: {
|
||||
'logger-sharelatex': {
|
||||
log() { },
|
||||
info() { },
|
||||
err() { }
|
||||
},
|
||||
'metrics-sharelatex': {
|
||||
gauge() { },
|
||||
inc() { }
|
||||
},
|
||||
'child_process': this.child_process
|
||||
}
|
||||
}))
|
||||
})
|
||||
|
||||
describe("creating a worker", function () {
|
||||
beforeEach(function () {
|
||||
this.pipe = {
|
||||
'stdout': new EventEmitter(),
|
||||
'stderr': { on: sinon.stub() },
|
||||
'stdin': {on: sinon.stub() },
|
||||
'on': sinon.stub(),
|
||||
'pid': 12345
|
||||
}
|
||||
this.child_process.spawn = sinon.stub().returns(this.pipe)
|
||||
this.pipe.stdout.setEncoding = sinon.stub()
|
||||
worker = new this.ASpellWorker('en')
|
||||
|
||||
})
|
||||
|
||||
describe("with normal aspell output", function () {
|
||||
beforeEach(function () {
|
||||
this.callback = worker.callback = sinon.stub()
|
||||
this.pipe.stdout.emit('data', '& hello\n')
|
||||
this.pipe.stdout.emit('data', '& world\n')
|
||||
this.pipe.stdout.emit('data', 'en\n')
|
||||
this.pipe.stdout.emit('data', '& goodbye')
|
||||
})
|
||||
|
||||
it('should call the callback', function() {
|
||||
expect(this.callback.called).to.equal(true)
|
||||
expect(this.callback.calledWith(null, "& hello\n& world\nen\n")).to.equal(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe("with the aspell end marker split across chunks", function () {
|
||||
beforeEach(function () {
|
||||
this.callback = worker.callback = sinon.stub()
|
||||
this.pipe.stdout.emit('data', '& hello\n')
|
||||
this.pipe.stdout.emit('data', '& world\ne')
|
||||
this.pipe.stdout.emit('data', 'n\n')
|
||||
this.pipe.stdout.emit('data', '& goodbye')
|
||||
})
|
||||
|
||||
it('should call the callback', function() {
|
||||
expect(this.callback.called).to.equal(true)
|
||||
expect(this.callback.calledWith(null, "& hello\n& world\nen\n")).to.equal(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe("with the aspell end marker newline split across chunks", function () {
|
||||
beforeEach(function () {
|
||||
this.callback = worker.callback = sinon.stub()
|
||||
this.pipe.stdout.emit('data', '& hello\n')
|
||||
this.pipe.stdout.emit('data', '& world\n')
|
||||
this.pipe.stdout.emit('data', 'en')
|
||||
this.pipe.stdout.emit('data', '\n& goodbye')
|
||||
})
|
||||
|
||||
it('should call the callback', function() {
|
||||
expect(this.callback.called).to.equal(true)
|
||||
expect(this.callback.calledWith(null, "& hello\n& world\nen")).to.equal(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe("with everything split across chunks", function () {
|
||||
beforeEach(function () {
|
||||
this.callback = worker.callback = sinon.stub()
|
||||
'& hello\n& world\nen\n& goodbye'.split('').forEach(x => {
|
||||
this.pipe.stdout.emit('data', x)
|
||||
})
|
||||
})
|
||||
|
||||
it('should call the callback', function() {
|
||||
expect(this.callback.called).to.equal(true)
|
||||
expect(this.callback.calledWith(null, "& hello\n& world\nen")).to.equal(true)
|
||||
})
|
||||
})
|
||||
|
||||
})
|
||||
})
|
Loading…
Reference in a new issue