overleaf/services/spelling/app/js/ASpell.js

192 lines
5.5 KiB
JavaScript
Raw Normal View History

2019-07-03 12:41:01 +00:00
// TODO: This file was created by bulk-decaffeinate.
// Sanity-check the conversion and remove this comment.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const ASpellWorkerPool = require('./ASpellWorkerPool')
const LRU = require('lru-cache')
const logger = require('logger-sharelatex')
const fs = require('fs')
const settings = require('@overleaf/settings')
2019-07-03 12:41:01 +00:00
const Path = require('path')
const { promisify } = require('util')
2020-06-03 09:14:44 +00:00
const OError = require('@overleaf/o-error')
2019-07-03 12:41:01 +00:00
const OneMinute = 60 * 1000
const opts = { max: 10000, maxAge: OneMinute * 60 * 10 }
const cache = new LRU(opts)
2019-07-03 12:41:01 +00:00
const cacheFsPath = Path.resolve(settings.cacheDir, 'spell.cache')
const cacheFsPathTmp = cacheFsPath + '.tmp'
// load any existing cache
try {
const oldCache = fs.readFileSync(cacheFsPath)
cache.load(JSON.parse(oldCache))
} catch (error) {
2020-06-03 09:14:44 +00:00
logger.log(
OError.tag(error, 'could not load the cache file', { cacheFsPath })
)
2019-07-03 12:41:01 +00:00
}
// write the cache every 30 minutes
2020-08-10 16:23:15 +00:00
const cacheDump = setInterval(function () {
2019-07-03 12:41:01 +00:00
const dump = JSON.stringify(cache.dump())
2020-08-10 16:23:15 +00:00
return fs.writeFile(cacheFsPathTmp, dump, function (err) {
2019-07-03 12:41:01 +00:00
if (err != null) {
2020-06-03 09:14:44 +00:00
logger.log(OError.tag(err, 'error writing cache file'))
fs.unlink(cacheFsPathTmp, () => {})
2019-07-03 12:41:01 +00:00
} else {
2021-07-13 11:04:47 +00:00
fs.rename(cacheFsPathTmp, cacheFsPath, err => {
2019-07-05 09:30:43 +00:00
if (err) {
2020-06-03 09:14:44 +00:00
logger.error(OError.tag(err, 'error renaming cache file'))
2019-07-05 09:30:43 +00:00
} else {
logger.log({ len: dump.length, cacheFsPath }, 'wrote cache file')
}
})
2019-07-03 12:41:01 +00:00
}
})
}, 30 * OneMinute)
class ASpellRunner {
checkWords(language, words, callback) {
if (callback == null) {
callback = () => {}
}
return this.runAspellOnWords(language, words, (error, output) => {
if (error != null) {
2020-06-03 09:14:44 +00:00
return callback(OError.tag(error))
2019-07-03 12:41:01 +00:00
}
// output = @removeAspellHeader(output)
const suggestions = this.getSuggestions(language, output)
const results = []
let hits = 0
const addToCache = {}
for (let i = 0; i < words.length; i++) {
const word = words[i]
const key = language + ':' + word
const cached = cache.get(key)
if (cached != null) {
hits++
if (cached === true) {
// valid word, no need to do anything
continue
} else {
results.push({ index: i, suggestions: cached })
}
} else {
if (suggestions[key] != null) {
addToCache[key] = suggestions[key]
results.push({ index: i, suggestions: suggestions[key] })
} else {
// a valid word, but uncached
addToCache[key] = true
}
}
}
// update the cache after processing all words, to avoid cache
// changing while we use it
2020-06-03 08:52:36 +00:00
for (const k in addToCache) {
2019-07-03 12:41:01 +00:00
const v = addToCache[k]
cache.set(k, v)
}
logger.info(
{
hits,
total: words.length,
2021-07-13 11:04:47 +00:00
hitrate: (hits / words.length).toFixed(2),
2019-07-03 12:41:01 +00:00
},
'cache hit rate'
)
return callback(null, results)
})
}
getSuggestions(language, output) {
const lines = output.split('\n')
const suggestions = {}
2020-06-03 08:52:36 +00:00
for (const line of Array.from(lines)) {
2019-07-03 12:41:01 +00:00
var parts, word
if (line[0] === '&') {
// Suggestions found
parts = line.split(' ')
if (parts.length > 1) {
word = parts[1]
const suggestionsString = line.slice(line.indexOf(':') + 2)
suggestions[language + ':' + word] = suggestionsString.split(', ')
}
} else if (line[0] === '#') {
// No suggestions
parts = line.split(' ')
if (parts.length > 1) {
word = parts[1]
suggestions[language + ':' + word] = []
}
}
}
return suggestions
}
// removeAspellHeader: (output) -> output.slice(1)
runAspellOnWords(language, words, callback) {
// send words to aspell, get back string output for those words
// find a free pipe for the language (or start one)
// send the words down the pipe
// send an END marker that will generate a "*" line in the output
// when the output pipe receives the "*" return the data sofar and reset the pipe to be available
//
// @open(language)
// @captureOutput(callback)
// @setTerseMode()
// start = new Date()
if (callback == null) {
callback = () => {}
}
const newWord = {}
2020-06-03 08:52:36 +00:00
for (const word of Array.from(words)) {
2019-07-03 12:41:01 +00:00
if (!newWord[word] && !cache.has(language + ':' + word)) {
newWord[word] = true
}
}
words = Object.keys(newWord)
if (words.length) {
return WorkerPool.check(language, words, ASpell.ASPELL_TIMEOUT, callback)
} else {
return callback(null, '')
}
}
}
const ASpell = {
2019-07-03 12:41:01 +00:00
// The description of how to call aspell from another program can be found here:
// http://aspell.net/man-html/Through-A-Pipe.html
checkWords(language, words, callback) {
if (callback == null) {
callback = () => {}
}
const runner = new ASpellRunner()
return runner.checkWords(language, words, callback)
},
2021-07-13 11:04:47 +00:00
ASPELL_TIMEOUT: 10000,
2019-07-03 12:41:01 +00:00
}
const promises = {
2021-07-13 11:04:47 +00:00
checkWords: promisify(ASpell.checkWords),
}
ASpell.promises = promises
module.exports = ASpell
2019-07-03 12:41:01 +00:00
var WorkerPool = new ASpellWorkerPool()
module.exports.cacheDump = cacheDump