2019-01-02 06:33:26 -05:00
|
|
|
// Copyright 2019 The Hugo Authors. All rights reserved.
|
2017-08-19 07:16:00 -04:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package related
|
|
|
|
|
|
|
|
import (
|
2023-02-11 10:20:24 -05:00
|
|
|
"context"
|
2017-08-19 07:16:00 -04:00
|
|
|
"fmt"
|
|
|
|
"math/rand"
|
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
2019-08-10 15:05:17 -04:00
|
|
|
qt "github.com/frankban/quicktest"
|
2017-08-19 07:16:00 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
type testDoc struct {
|
|
|
|
keywords map[string][]Keyword
|
|
|
|
date time.Time
|
2019-01-02 06:33:26 -05:00
|
|
|
name string
|
2017-08-19 07:16:00 -04:00
|
|
|
}
|
|
|
|
|
2017-09-25 22:25:33 -04:00
|
|
|
func (d *testDoc) String() string {
|
2017-08-19 07:16:00 -04:00
|
|
|
s := "\n"
|
2017-09-25 22:25:33 -04:00
|
|
|
for k, v := range d.keywords {
|
2017-08-19 07:16:00 -04:00
|
|
|
s += k + ":\t\t"
|
|
|
|
for _, vv := range v {
|
|
|
|
s += " " + vv.String()
|
|
|
|
}
|
|
|
|
s += "\n"
|
|
|
|
}
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
2019-01-02 06:33:26 -05:00
|
|
|
func (d *testDoc) Name() string {
|
|
|
|
return d.name
|
|
|
|
}
|
|
|
|
|
2017-08-19 07:16:00 -04:00
|
|
|
func newTestDoc(name string, keywords ...string) *testDoc {
|
2019-01-02 06:33:26 -05:00
|
|
|
time.Sleep(1 * time.Millisecond)
|
|
|
|
return newTestDocWithDate(name, time.Now(), keywords...)
|
|
|
|
}
|
|
|
|
|
|
|
|
func newTestDocWithDate(name string, date time.Time, keywords ...string) *testDoc {
|
2017-08-19 07:16:00 -04:00
|
|
|
km := make(map[string][]Keyword)
|
|
|
|
|
2019-01-02 06:33:26 -05:00
|
|
|
kw := &testDoc{keywords: km, date: date}
|
2017-08-19 07:16:00 -04:00
|
|
|
|
|
|
|
kw.addKeywords(name, keywords...)
|
|
|
|
return kw
|
|
|
|
}
|
|
|
|
|
2017-09-25 22:25:33 -04:00
|
|
|
func (d *testDoc) addKeywords(name string, keywords ...string) *testDoc {
|
2017-08-19 07:16:00 -04:00
|
|
|
keywordm := createTestKeywords(name, keywords...)
|
|
|
|
|
|
|
|
for k, v := range keywordm {
|
|
|
|
keywords := make([]Keyword, len(v))
|
|
|
|
for i := 0; i < len(v); i++ {
|
|
|
|
keywords[i] = StringKeyword(v[i])
|
|
|
|
}
|
2017-09-25 22:25:33 -04:00
|
|
|
d.keywords[k] = keywords
|
2017-08-19 07:16:00 -04:00
|
|
|
}
|
2017-09-25 22:25:33 -04:00
|
|
|
return d
|
2017-08-19 07:16:00 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func createTestKeywords(name string, keywords ...string) map[string][]string {
|
|
|
|
return map[string][]string{
|
|
|
|
name: keywords,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-02 06:33:26 -05:00
|
|
|
func (d *testDoc) RelatedKeywords(cfg IndexConfig) ([]Keyword, error) {
|
2017-09-25 22:25:33 -04:00
|
|
|
return d.keywords[cfg.Name], nil
|
2017-08-19 07:16:00 -04:00
|
|
|
}
|
|
|
|
|
2019-01-02 06:33:26 -05:00
|
|
|
func (d *testDoc) PublishDate() time.Time {
|
2017-09-25 22:25:33 -04:00
|
|
|
return d.date
|
2017-08-19 07:16:00 -04:00
|
|
|
}
|
|
|
|
|
2023-02-23 09:20:31 -05:00
|
|
|
func TestCardinalityThreshold(t *testing.T) {
|
|
|
|
c := qt.New(t)
|
|
|
|
config := Config{
|
|
|
|
Threshold: 90,
|
|
|
|
IncludeNewer: false,
|
2023-01-04 12:24:36 -05:00
|
|
|
Indices: IndicesConfig{
|
2023-02-23 09:20:31 -05:00
|
|
|
IndexConfig{Name: "tags", Weight: 50, CardinalityThreshold: 79},
|
|
|
|
IndexConfig{Name: "keywords", Weight: 65, CardinalityThreshold: 90},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
idx := NewInvertedIndex(config)
|
|
|
|
hasKeyword := func(index, keyword string) bool {
|
|
|
|
_, found := idx.index[index][StringKeyword(keyword)]
|
|
|
|
return found
|
|
|
|
}
|
|
|
|
|
|
|
|
docs := []Document{
|
|
|
|
newTestDoc("tags", "a", "b", "c", "d"),
|
|
|
|
newTestDoc("tags", "b", "d", "g"),
|
|
|
|
newTestDoc("tags", "b", "d", "g"),
|
|
|
|
newTestDoc("tags", "b", "h").addKeywords("keywords", "a"),
|
|
|
|
newTestDoc("tags", "g", "h").addKeywords("keywords", "a", "b", "z"),
|
|
|
|
}
|
|
|
|
|
|
|
|
idx.Add(context.Background(), docs...)
|
|
|
|
c.Assert(idx.Finalize(context.Background()), qt.IsNil)
|
|
|
|
// Only tags=b should be removed.
|
|
|
|
c.Assert(hasKeyword("tags", "a"), qt.Equals, true)
|
|
|
|
c.Assert(hasKeyword("tags", "b"), qt.Equals, false)
|
|
|
|
c.Assert(hasKeyword("tags", "d"), qt.Equals, true)
|
|
|
|
c.Assert(hasKeyword("keywords", "b"), qt.Equals, true)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2017-08-19 07:16:00 -04:00
|
|
|
func TestSearch(t *testing.T) {
|
|
|
|
config := Config{
|
|
|
|
Threshold: 90,
|
|
|
|
IncludeNewer: false,
|
2023-01-04 12:24:36 -05:00
|
|
|
Indices: IndicesConfig{
|
2017-08-19 07:16:00 -04:00
|
|
|
IndexConfig{Name: "tags", Weight: 50},
|
|
|
|
IndexConfig{Name: "keywords", Weight: 65},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
idx := NewInvertedIndex(config)
|
2020-12-02 07:23:25 -05:00
|
|
|
// idx.debug = true
|
2017-08-19 07:16:00 -04:00
|
|
|
|
|
|
|
docs := []Document{
|
|
|
|
newTestDoc("tags", "a", "b", "c", "d"),
|
|
|
|
newTestDoc("tags", "b", "d", "g"),
|
|
|
|
newTestDoc("tags", "b", "h").addKeywords("keywords", "a"),
|
|
|
|
newTestDoc("tags", "g", "h").addKeywords("keywords", "a", "b"),
|
|
|
|
}
|
|
|
|
|
2023-02-11 10:20:24 -05:00
|
|
|
idx.Add(context.Background(), docs...)
|
2017-08-19 07:16:00 -04:00
|
|
|
|
|
|
|
t.Run("count", func(t *testing.T) {
|
2019-08-10 15:05:17 -04:00
|
|
|
c := qt.New(t)
|
|
|
|
c.Assert(len(idx.index), qt.Equals, 2)
|
2017-08-19 07:16:00 -04:00
|
|
|
set1, found := idx.index["tags"]
|
2019-08-10 15:05:17 -04:00
|
|
|
c.Assert(found, qt.Equals, true)
|
2017-08-19 07:16:00 -04:00
|
|
|
// 6 tags
|
2019-08-10 15:05:17 -04:00
|
|
|
c.Assert(len(set1), qt.Equals, 6)
|
2017-08-19 07:16:00 -04:00
|
|
|
|
|
|
|
set2, found := idx.index["keywords"]
|
2019-08-10 15:05:17 -04:00
|
|
|
c.Assert(found, qt.Equals, true)
|
|
|
|
c.Assert(len(set2), qt.Equals, 2)
|
2017-08-19 07:16:00 -04:00
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("search-tags", func(t *testing.T) {
|
2019-08-10 15:05:17 -04:00
|
|
|
c := qt.New(t)
|
2023-02-11 10:20:24 -05:00
|
|
|
var cfg IndexConfig
|
|
|
|
m, err := idx.search(context.Background(), newQueryElement("tags", cfg.StringsToKeywords("a", "b", "d", "z")...))
|
2019-08-10 15:05:17 -04:00
|
|
|
c.Assert(err, qt.IsNil)
|
|
|
|
c.Assert(len(m), qt.Equals, 2)
|
|
|
|
c.Assert(m[0], qt.Equals, docs[0])
|
|
|
|
c.Assert(m[1], qt.Equals, docs[1])
|
2017-08-19 07:16:00 -04:00
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("search-tags-and-keywords", func(t *testing.T) {
|
2019-08-10 15:05:17 -04:00
|
|
|
c := qt.New(t)
|
2023-02-11 10:20:24 -05:00
|
|
|
var cfg IndexConfig
|
|
|
|
m, err := idx.search(context.Background(),
|
|
|
|
newQueryElement("tags", cfg.StringsToKeywords("a", "b", "z")...),
|
|
|
|
newQueryElement("keywords", cfg.StringsToKeywords("a", "b")...))
|
2019-08-10 15:05:17 -04:00
|
|
|
c.Assert(err, qt.IsNil)
|
|
|
|
c.Assert(len(m), qt.Equals, 3)
|
|
|
|
c.Assert(m[0], qt.Equals, docs[3])
|
|
|
|
c.Assert(m[1], qt.Equals, docs[2])
|
|
|
|
c.Assert(m[2], qt.Equals, docs[0])
|
2017-08-19 07:16:00 -04:00
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("searchdoc-all", func(t *testing.T) {
|
2019-08-10 15:05:17 -04:00
|
|
|
c := qt.New(t)
|
2017-08-19 07:16:00 -04:00
|
|
|
doc := newTestDoc("tags", "a").addKeywords("keywords", "a")
|
2023-02-11 10:20:24 -05:00
|
|
|
m, err := idx.Search(context.Background(), SearchOpts{Document: doc})
|
2019-08-10 15:05:17 -04:00
|
|
|
c.Assert(err, qt.IsNil)
|
|
|
|
c.Assert(len(m), qt.Equals, 2)
|
|
|
|
c.Assert(m[0], qt.Equals, docs[3])
|
|
|
|
c.Assert(m[1], qt.Equals, docs[2])
|
2017-08-19 07:16:00 -04:00
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("searchdoc-tags", func(t *testing.T) {
|
2019-08-10 15:05:17 -04:00
|
|
|
c := qt.New(t)
|
2017-08-19 07:16:00 -04:00
|
|
|
doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
|
2023-02-11 10:20:24 -05:00
|
|
|
m, err := idx.Search(context.Background(), SearchOpts{Document: doc, Indices: []string{"tags"}})
|
2019-08-10 15:05:17 -04:00
|
|
|
c.Assert(err, qt.IsNil)
|
|
|
|
c.Assert(len(m), qt.Equals, 2)
|
|
|
|
c.Assert(m[0], qt.Equals, docs[0])
|
|
|
|
c.Assert(m[1], qt.Equals, docs[1])
|
2017-08-19 07:16:00 -04:00
|
|
|
})
|
|
|
|
|
|
|
|
t.Run("searchdoc-keywords-date", func(t *testing.T) {
|
2019-08-10 15:05:17 -04:00
|
|
|
c := qt.New(t)
|
2017-08-19 07:16:00 -04:00
|
|
|
doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
|
|
|
|
// This will get a date newer than the others.
|
|
|
|
newDoc := newTestDoc("keywords", "a", "b")
|
2023-02-11 10:20:24 -05:00
|
|
|
idx.Add(context.Background(), newDoc)
|
2017-08-19 07:16:00 -04:00
|
|
|
|
2023-02-11 10:20:24 -05:00
|
|
|
m, err := idx.Search(context.Background(), SearchOpts{Document: doc, Indices: []string{"keywords"}})
|
2019-08-10 15:05:17 -04:00
|
|
|
c.Assert(err, qt.IsNil)
|
|
|
|
c.Assert(len(m), qt.Equals, 2)
|
|
|
|
c.Assert(m[0], qt.Equals, docs[3])
|
2017-08-19 07:16:00 -04:00
|
|
|
})
|
|
|
|
|
2019-01-02 06:33:26 -05:00
|
|
|
t.Run("searchdoc-keywords-same-date", func(t *testing.T) {
|
2019-08-10 15:05:17 -04:00
|
|
|
c := qt.New(t)
|
2019-01-02 06:33:26 -05:00
|
|
|
idx := NewInvertedIndex(config)
|
|
|
|
|
|
|
|
date := time.Now()
|
|
|
|
|
|
|
|
doc := newTestDocWithDate("keywords", date, "a", "b")
|
|
|
|
doc.name = "thedoc"
|
|
|
|
|
|
|
|
for i := 0; i < 10; i++ {
|
|
|
|
docc := *doc
|
|
|
|
docc.name = fmt.Sprintf("doc%d", i)
|
2023-02-11 10:20:24 -05:00
|
|
|
idx.Add(context.Background(), &docc)
|
2019-01-02 06:33:26 -05:00
|
|
|
}
|
|
|
|
|
2023-02-11 10:20:24 -05:00
|
|
|
m, err := idx.Search(context.Background(), SearchOpts{Document: doc, Indices: []string{"keywords"}})
|
2019-08-10 15:05:17 -04:00
|
|
|
c.Assert(err, qt.IsNil)
|
|
|
|
c.Assert(len(m), qt.Equals, 10)
|
2019-01-02 06:33:26 -05:00
|
|
|
for i := 0; i < 10; i++ {
|
2019-08-10 15:05:17 -04:00
|
|
|
c.Assert(m[i].Name(), qt.Equals, fmt.Sprintf("doc%d", i))
|
2019-01-02 06:33:26 -05:00
|
|
|
}
|
|
|
|
})
|
2017-08-19 07:16:00 -04:00
|
|
|
}
|
|
|
|
|
2020-04-21 11:44:48 -04:00
|
|
|
func TestToKeywordsToLower(t *testing.T) {
|
|
|
|
c := qt.New(t)
|
|
|
|
slice := []string{"A", "B", "C"}
|
|
|
|
config := IndexConfig{ToLower: true}
|
|
|
|
keywords, err := config.ToKeywords(slice)
|
|
|
|
c.Assert(err, qt.IsNil)
|
|
|
|
c.Assert(slice, qt.DeepEquals, []string{"A", "B", "C"})
|
|
|
|
c.Assert(keywords, qt.DeepEquals, []Keyword{
|
|
|
|
StringKeyword("a"),
|
|
|
|
StringKeyword("b"),
|
|
|
|
StringKeyword("c"),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2023-01-17 06:36:34 -05:00
|
|
|
func TestToKeywordsAnySlice(t *testing.T) {
|
|
|
|
c := qt.New(t)
|
|
|
|
var config IndexConfig
|
|
|
|
slice := []any{"A", 32, "C"}
|
|
|
|
keywords, err := config.ToKeywords(slice)
|
|
|
|
c.Assert(err, qt.IsNil)
|
|
|
|
c.Assert(keywords, qt.DeepEquals, []Keyword{
|
|
|
|
StringKeyword("A"),
|
|
|
|
StringKeyword("32"),
|
|
|
|
StringKeyword("C"),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2017-08-19 07:16:00 -04:00
|
|
|
func BenchmarkRelatedNewIndex(b *testing.B) {
|
|
|
|
pages := make([]*testDoc, 100)
|
|
|
|
numkeywords := 30
|
|
|
|
allKeywords := make([]string, numkeywords)
|
|
|
|
for i := 0; i < numkeywords; i++ {
|
|
|
|
allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := 0; i < len(pages); i++ {
|
|
|
|
start := rand.Intn(len(allKeywords))
|
|
|
|
end := start + 3
|
|
|
|
if end >= len(allKeywords) {
|
|
|
|
end = start + 1
|
|
|
|
}
|
|
|
|
|
|
|
|
kw := newTestDoc("tags", allKeywords[start:end]...)
|
|
|
|
if i%5 == 0 {
|
|
|
|
start := rand.Intn(len(allKeywords))
|
|
|
|
end := start + 3
|
|
|
|
if end >= len(allKeywords) {
|
|
|
|
end = start + 1
|
|
|
|
}
|
|
|
|
kw.addKeywords("keywords", allKeywords[start:end]...)
|
|
|
|
}
|
|
|
|
|
|
|
|
pages[i] = kw
|
|
|
|
}
|
|
|
|
|
|
|
|
cfg := Config{
|
|
|
|
Threshold: 50,
|
2023-01-04 12:24:36 -05:00
|
|
|
Indices: IndicesConfig{
|
2017-08-19 07:16:00 -04:00
|
|
|
IndexConfig{Name: "tags", Weight: 100},
|
|
|
|
IndexConfig{Name: "keywords", Weight: 200},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
b.Run("singles", func(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
idx := NewInvertedIndex(cfg)
|
|
|
|
for _, doc := range pages {
|
2023-02-11 10:20:24 -05:00
|
|
|
idx.Add(context.Background(), doc)
|
2017-08-19 07:16:00 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
b.Run("all", func(b *testing.B) {
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
idx := NewInvertedIndex(cfg)
|
|
|
|
docs := make([]Document, len(pages))
|
|
|
|
for i := 0; i < len(pages); i++ {
|
|
|
|
docs[i] = pages[i]
|
|
|
|
}
|
2023-02-11 10:20:24 -05:00
|
|
|
idx.Add(context.Background(), docs...)
|
2017-08-19 07:16:00 -04:00
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkRelatedMatchesIn(b *testing.B) {
|
2023-02-11 10:20:24 -05:00
|
|
|
var icfg IndexConfig
|
|
|
|
q1 := newQueryElement("tags", icfg.StringsToKeywords("keyword2", "keyword5", "keyword32", "asdf")...)
|
|
|
|
q2 := newQueryElement("keywords", icfg.StringsToKeywords("keyword3", "keyword4")...)
|
2017-08-19 07:16:00 -04:00
|
|
|
|
|
|
|
docs := make([]*testDoc, 1000)
|
|
|
|
numkeywords := 20
|
|
|
|
allKeywords := make([]string, numkeywords)
|
|
|
|
for i := 0; i < numkeywords; i++ {
|
|
|
|
allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
|
|
|
|
}
|
|
|
|
|
|
|
|
cfg := Config{
|
|
|
|
Threshold: 20,
|
2023-01-04 12:24:36 -05:00
|
|
|
Indices: IndicesConfig{
|
2017-08-19 07:16:00 -04:00
|
|
|
IndexConfig{Name: "tags", Weight: 100},
|
|
|
|
IndexConfig{Name: "keywords", Weight: 200},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
idx := NewInvertedIndex(cfg)
|
|
|
|
|
|
|
|
for i := 0; i < len(docs); i++ {
|
|
|
|
start := rand.Intn(len(allKeywords))
|
|
|
|
end := start + 3
|
|
|
|
if end >= len(allKeywords) {
|
|
|
|
end = start + 1
|
|
|
|
}
|
|
|
|
|
|
|
|
index := "tags"
|
|
|
|
if i%5 == 0 {
|
|
|
|
index = "keywords"
|
|
|
|
}
|
|
|
|
|
2023-02-11 10:20:24 -05:00
|
|
|
idx.Add(context.Background(), newTestDoc(index, allKeywords[start:end]...))
|
2017-08-19 07:16:00 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
b.ResetTimer()
|
2023-02-11 10:20:24 -05:00
|
|
|
ctx := context.Background()
|
2017-08-19 07:16:00 -04:00
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
if i%10 == 0 {
|
2023-02-11 10:20:24 -05:00
|
|
|
idx.search(ctx, q2)
|
2017-08-19 07:16:00 -04:00
|
|
|
} else {
|
2023-02-11 10:20:24 -05:00
|
|
|
idx.search(ctx, q1)
|
2017-08-19 07:16:00 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|