From f75c5a1e2bb3dbff2925ea1794da0ff095aed7d3 Mon Sep 17 00:00:00 2001 From: koirand Date: Mon, 25 Mar 2019 02:02:50 +0900 Subject: [PATCH] Implement Bigram search #5 --- static/js/search.js | 47 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/static/js/search.js b/static/js/search.js index 3c52cd2..f12ce29 100644 --- a/static/js/search.js +++ b/static/js/search.js @@ -2,6 +2,49 @@ var lunrIndex var lunrResult var pagesIndex +var bigramTokeniser = function (obj, metadata) { + if (obj == null || obj == undefined) { + return [] + } + + if (Array.isArray(obj)) { + return obj.map(function (t) { + return new lunr.Token( + lunr.utils.asString(t).toLowerCase(), + lunr.utils.clone(metadata) + ) + }) + } + + var str = obj.toString().trim().toLowerCase(), + tokens = [] + + for(var i = 0; i <= str.length - 2; i++) { + var tokenMetadata = lunr.utils.clone(metadata) || {} + tokenMetadata["position"] = [i, i + 2] + tokenMetadata["index"] = tokens.length + tokens.push( + new lunr.Token ( + str.slice(i, i + 2), + tokenMetadata + ) + ) + } + + return tokens +} + +var queryNgramSeparator = function (query) { + var str = query.toString().trim().toLowerCase(), + tokens = [] + + for(var i = 0; i <= str.length - 2; i++) { + tokens.push(str.slice(i, i + 2)) + } + + return tokens.join(' ') +} + /** * Preparation for using lunr.js */ @@ -9,6 +52,8 @@ function initLunr () { $.getJSON('index.json').done(function (index) { pagesIndex = index lunrIndex = lunr(function () { + this.tokenizer = bigramTokeniser + this.pipeline.reset() this.ref('ref') this.field('title', { boost: 10 }) this.field('body') @@ -29,7 +74,7 @@ function initLunr () { * @return {Object[]} Array of search results */ function search (query) { - lunrResult = lunrIndex.search(query) + lunrResult = lunrIndex.search(queryNgramSeparator(query)) return lunrResult.map(function (result) { return pagesIndex.filter(function (page) { return page.ref === result.ref