mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-29 05:52:10 -05:00
hugolib: Implement "related content"
This closes #98, even if this commit does not do full content text search. We may revisit that problem in the future, but that deserves its own issue. Fixes #98
This commit is contained in:
parent
16c9127663
commit
3b4f17bbc9
14 changed files with 1151 additions and 10 deletions
44
common/types/types.go
Normal file
44
common/types/types.go
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
// Copyright 2017-present The Hugo Authors. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package types contains types shared between packages in Hugo.
|
||||||
|
package types
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/spf13/cast"
|
||||||
|
)
|
||||||
|
|
||||||
|
// KeyValues holds an key and a slice of values.
|
||||||
|
type KeyValues struct {
|
||||||
|
Key interface{}
|
||||||
|
Values []interface{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// KeyString returns the key as a string, an empty string if conversion fails.
|
||||||
|
func (k KeyValues) KeyString() string {
|
||||||
|
return cast.ToString(k.Key)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k KeyValues) String() string {
|
||||||
|
return fmt.Sprintf("%v: %v", k.Key, k.Values)
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewKeyValuesStrings(key string, values ...string) KeyValues {
|
||||||
|
iv := make([]interface{}, len(values))
|
||||||
|
for i := 0; i < len(values); i++ {
|
||||||
|
iv[i] = values[i]
|
||||||
|
}
|
||||||
|
return KeyValues{Key: key, Values: iv}
|
||||||
|
}
|
29
common/types/types_test.go
Normal file
29
common/types/types_test.go
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
// Copyright 2017-present The Hugo Authors. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package types
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestKeyValues(t *testing.T) {
|
||||||
|
assert := require.New(t)
|
||||||
|
|
||||||
|
kv := NewKeyValuesStrings("key", "a1", "a2")
|
||||||
|
|
||||||
|
assert.Equal("key", kv.KeyString())
|
||||||
|
assert.Equal([]interface{}{"a1", "a2"}, kv.Values)
|
||||||
|
}
|
|
@ -20,6 +20,8 @@ import (
|
||||||
"reflect"
|
"reflect"
|
||||||
"unicode"
|
"unicode"
|
||||||
|
|
||||||
|
"github.com/gohugoio/hugo/related"
|
||||||
|
|
||||||
"github.com/bep/gitmap"
|
"github.com/bep/gitmap"
|
||||||
|
|
||||||
"github.com/gohugoio/hugo/helpers"
|
"github.com/gohugoio/hugo/helpers"
|
||||||
|
@ -54,6 +56,9 @@ var (
|
||||||
// Assert that it implements the Eqer interface.
|
// Assert that it implements the Eqer interface.
|
||||||
_ compare.Eqer = (*Page)(nil)
|
_ compare.Eqer = (*Page)(nil)
|
||||||
_ compare.Eqer = (*PageOutput)(nil)
|
_ compare.Eqer = (*PageOutput)(nil)
|
||||||
|
|
||||||
|
// Assert that it implements the interface needed for related searches.
|
||||||
|
_ related.Document = (*Page)(nil)
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -231,6 +236,28 @@ type Page struct {
|
||||||
targetPathDescriptorPrototype *targetPathDescriptor
|
targetPathDescriptorPrototype *targetPathDescriptor
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SearchKeywords implements the related.Document interface needed for fast page searches.
|
||||||
|
func (p *Page) SearchKeywords(cfg related.IndexConfig) ([]related.Keyword, error) {
|
||||||
|
|
||||||
|
v, err := p.Param(cfg.Name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return cfg.ToKeywords(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PubDate is when this page was or will be published.
|
||||||
|
// NOTE: This is currently used for search only and is not meant to be used
|
||||||
|
// directly in templates. We need to consolidate the dates in this struct.
|
||||||
|
// TODO(bep) see https://github.com/gohugoio/hugo/issues/3854
|
||||||
|
func (p *Page) PubDate() time.Time {
|
||||||
|
if !p.PublishDate.IsZero() {
|
||||||
|
return p.PublishDate
|
||||||
|
}
|
||||||
|
return p.Date
|
||||||
|
}
|
||||||
|
|
||||||
func (p *Page) RSSLink() template.URL {
|
func (p *Page) RSSLink() template.URL {
|
||||||
f, found := p.outputFormats.GetByName(output.RSSFormat.Name)
|
f, found := p.outputFormats.GetByName(output.RSSFormat.Name)
|
||||||
if !found {
|
if !found {
|
||||||
|
@ -329,6 +356,21 @@ func (ps Pages) findPagePosByFilePath(inPath string) int {
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ps Pages) removeFirstIfFound(p *Page) Pages {
|
||||||
|
ii := -1
|
||||||
|
for i, pp := range ps {
|
||||||
|
if pp == p {
|
||||||
|
ii = i
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ii != -1 {
|
||||||
|
ps = append(ps[:ii], ps[ii+1:]...)
|
||||||
|
}
|
||||||
|
return ps
|
||||||
|
}
|
||||||
|
|
||||||
func (ps Pages) findFirstPagePosByFilePathPrefix(prefix string) int {
|
func (ps Pages) findFirstPagePosByFilePathPrefix(prefix string) int {
|
||||||
if prefix == "" {
|
if prefix == "" {
|
||||||
return -1
|
return -1
|
||||||
|
|
|
@ -36,7 +36,7 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool)
|
||||||
c.RLock()
|
c.RLock()
|
||||||
if cached, ok := c.m[key]; ok {
|
if cached, ok := c.m[key]; ok {
|
||||||
for _, ps := range cached {
|
for _, ps := range cached {
|
||||||
if probablyEqualPages(p, ps[0]) {
|
if fastEqualPages(p, ps[0]) {
|
||||||
c.RUnlock()
|
c.RUnlock()
|
||||||
return ps[1], true
|
return ps[1], true
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,7 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool)
|
||||||
// double-check
|
// double-check
|
||||||
if cached, ok := c.m[key]; ok {
|
if cached, ok := c.m[key]; ok {
|
||||||
for _, ps := range cached {
|
for _, ps := range cached {
|
||||||
if probablyEqualPages(p, ps[0]) {
|
if fastEqualPages(p, ps[0]) {
|
||||||
return ps[1], true
|
return ps[1], true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -73,10 +73,10 @@ func (c *pageCache) get(key string, p Pages, apply func(p Pages)) (Pages, bool)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// "probably" as in: we do not compare every element for big slices, but that is
|
// "fast" as in: we do not compare every element for big slices, but that is
|
||||||
// good enough for our use case.
|
// good enough for our use cases.
|
||||||
// TODO(bep) there is a similar method in pagination.go. DRY.
|
// TODO(bep) there is a similar method in pagination.go. DRY.
|
||||||
func probablyEqualPages(p1, p2 Pages) bool {
|
func fastEqualPages(p1, p2 Pages) bool {
|
||||||
if p1 == nil && p2 == nil {
|
if p1 == nil && p2 == nil {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,8 +56,8 @@ func TestPageCache(t *testing.T) {
|
||||||
l1.Unlock()
|
l1.Unlock()
|
||||||
p2, c2 := c1.get("k1", p, nil)
|
p2, c2 := c1.get("k1", p, nil)
|
||||||
assert.True(t, c2)
|
assert.True(t, c2)
|
||||||
assert.True(t, probablyEqualPages(p, p2))
|
assert.True(t, fastEqualPages(p, p2))
|
||||||
assert.True(t, probablyEqualPages(p, pages))
|
assert.True(t, fastEqualPages(p, pages))
|
||||||
assert.NotNil(t, p)
|
assert.NotNil(t, p)
|
||||||
|
|
||||||
l2.Lock()
|
l2.Lock()
|
||||||
|
|
|
@ -25,7 +25,7 @@ import (
|
||||||
// The key is typically a year or similar.
|
// The key is typically a year or similar.
|
||||||
type PageGroup struct {
|
type PageGroup struct {
|
||||||
Key interface{}
|
Key interface{}
|
||||||
Pages Pages
|
Pages
|
||||||
}
|
}
|
||||||
|
|
||||||
type mapKeyValues []reflect.Value
|
type mapKeyValues []reflect.Value
|
||||||
|
|
|
@ -115,7 +115,7 @@ func TestPageSortReverse(t *testing.T) {
|
||||||
assert.Equal(t, 9, p2[0].fuzzyWordCount)
|
assert.Equal(t, 9, p2[0].fuzzyWordCount)
|
||||||
assert.Equal(t, 0, p2[9].fuzzyWordCount)
|
assert.Equal(t, 0, p2[9].fuzzyWordCount)
|
||||||
// cached
|
// cached
|
||||||
assert.True(t, probablyEqualPages(p2, p1.Reverse()))
|
assert.True(t, fastEqualPages(p2, p1.Reverse()))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestPageSortByParam(t *testing.T) {
|
func TestPageSortByParam(t *testing.T) {
|
||||||
|
|
191
hugolib/pages_related.go
Normal file
191
hugolib/pages_related.go
Normal file
|
@ -0,0 +1,191 @@
|
||||||
|
// Copyright 2017-present The Hugo Authors. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package hugolib
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/gohugoio/hugo/common/types"
|
||||||
|
"github.com/gohugoio/hugo/related"
|
||||||
|
"github.com/spf13/cast"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// Assert that Pages and PageGroup implements the PageGenealogist interface.
|
||||||
|
_ PageGenealogist = (Pages)(nil)
|
||||||
|
_ PageGenealogist = PageGroup{}
|
||||||
|
)
|
||||||
|
|
||||||
|
// A PageGenealogist finds related pages in a page collection. This interface is implemented
|
||||||
|
// by Pages and PageGroup, which makes it available as `{{ .RegularPages.Related . }}` etc.
|
||||||
|
type PageGenealogist interface {
|
||||||
|
|
||||||
|
// Template example:
|
||||||
|
// {{ $related := .RegularPages.Related . }}
|
||||||
|
Related(doc related.Document) (Pages, error)
|
||||||
|
|
||||||
|
// Template example:
|
||||||
|
// {{ $related := .RegularPages.RelatedIndices . "tags" "date" }}
|
||||||
|
RelatedIndices(doc related.Document, indices ...interface{}) (Pages, error)
|
||||||
|
|
||||||
|
// Template example:
|
||||||
|
// {{ $related := .RegularPages.RelatedTo ( keyVals "tags" "hugo", "rocks") ( keyVals "date" .Date ) }}
|
||||||
|
RelatedTo(args ...types.KeyValues) (Pages, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Related searches all the configured indices with the search keywords from the
|
||||||
|
// supplied document.
|
||||||
|
func (p Pages) Related(doc related.Document) (Pages, error) {
|
||||||
|
page, err := unwrapPage(doc)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := p.searchDoc(page)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.removeFirstIfFound(page), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RelatedIndices searches the given indices with the search keywords from the
|
||||||
|
// supplied document.
|
||||||
|
func (p Pages) RelatedIndices(doc related.Document, indices ...interface{}) (Pages, error) {
|
||||||
|
page, err := unwrapPage(doc)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
indicesStr, err := cast.ToStringSliceE(indices)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := p.searchDoc(page, indicesStr...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.removeFirstIfFound(page), nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// RelatedTo searches the given indices with the corresponding values.
|
||||||
|
func (p Pages) RelatedTo(args ...types.KeyValues) (Pages, error) {
|
||||||
|
if len(p) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return p.search(args...)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p Pages) search(args ...types.KeyValues) (Pages, error) {
|
||||||
|
return p.withInvertedIndex(func(idx *related.InvertedIndex) ([]related.Document, error) {
|
||||||
|
return idx.SearchKeyValues(args...)
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p Pages) searchDoc(doc related.Document, indices ...string) (Pages, error) {
|
||||||
|
return p.withInvertedIndex(func(idx *related.InvertedIndex) ([]related.Document, error) {
|
||||||
|
return idx.SearchDoc(doc, indices...)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p Pages) withInvertedIndex(search func(idx *related.InvertedIndex) ([]related.Document, error)) (Pages, error) {
|
||||||
|
if len(p) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
cache := p[0].s.relatedDocsHandler
|
||||||
|
|
||||||
|
searchIndex, err := cache.getOrCreateIndex(p)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := search(searchIndex)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(result) > 0 {
|
||||||
|
mp := make(Pages, len(result))
|
||||||
|
for i, match := range result {
|
||||||
|
mp[i] = match.(*Page)
|
||||||
|
}
|
||||||
|
return mp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type cachedPostingList struct {
|
||||||
|
p Pages
|
||||||
|
|
||||||
|
postingList *related.InvertedIndex
|
||||||
|
}
|
||||||
|
|
||||||
|
type relatedDocsHandler struct {
|
||||||
|
// This is configured in site or langugage config.
|
||||||
|
cfg related.Config
|
||||||
|
|
||||||
|
postingLists []*cachedPostingList
|
||||||
|
mu sync.RWMutex
|
||||||
|
}
|
||||||
|
|
||||||
|
func newSearchIndexHandler(cfg related.Config) *relatedDocsHandler {
|
||||||
|
return &relatedDocsHandler{cfg: cfg}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This assumes that a lock has been aquired.
|
||||||
|
func (s *relatedDocsHandler) getIndex(p Pages) *related.InvertedIndex {
|
||||||
|
for _, ci := range s.postingLists {
|
||||||
|
if fastEqualPages(p, ci.p) {
|
||||||
|
return ci.postingList
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *relatedDocsHandler) getOrCreateIndex(p Pages) (*related.InvertedIndex, error) {
|
||||||
|
s.mu.RLock()
|
||||||
|
cachedIndex := s.getIndex(p)
|
||||||
|
if cachedIndex != nil {
|
||||||
|
s.mu.RUnlock()
|
||||||
|
return cachedIndex, nil
|
||||||
|
}
|
||||||
|
s.mu.RUnlock()
|
||||||
|
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
if cachedIndex := s.getIndex(p); cachedIndex != nil {
|
||||||
|
return cachedIndex, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
searchIndex := related.NewInvertedIndex(s.cfg)
|
||||||
|
|
||||||
|
for _, page := range p {
|
||||||
|
if err := searchIndex.Add(page); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s.postingLists = append(s.postingLists, &cachedPostingList{p: p, postingList: searchIndex})
|
||||||
|
|
||||||
|
return searchIndex, nil
|
||||||
|
}
|
75
hugolib/pages_related_test.go
Normal file
75
hugolib/pages_related_test.go
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
// Copyright 2017-present The Hugo Authors. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package hugolib
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/gohugoio/hugo/common/types"
|
||||||
|
"github.com/gohugoio/hugo/deps"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRelated(t *testing.T) {
|
||||||
|
assert := require.New(t)
|
||||||
|
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
var (
|
||||||
|
cfg, fs = newTestCfg()
|
||||||
|
//th = testHelper{cfg, fs, t}
|
||||||
|
)
|
||||||
|
|
||||||
|
pageTmpl := `---
|
||||||
|
title: Page %d
|
||||||
|
keywords: [%s]
|
||||||
|
date: %s
|
||||||
|
---
|
||||||
|
|
||||||
|
Content
|
||||||
|
`
|
||||||
|
|
||||||
|
writeSource(t, fs, filepath.Join("content", "page1.md"), fmt.Sprintf(pageTmpl, 1, "hugo, says", "2017-01-03"))
|
||||||
|
writeSource(t, fs, filepath.Join("content", "page2.md"), fmt.Sprintf(pageTmpl, 2, "hugo, rocks", "2017-01-02"))
|
||||||
|
writeSource(t, fs, filepath.Join("content", "page3.md"), fmt.Sprintf(pageTmpl, 3, "bep, says", "2017-01-01"))
|
||||||
|
|
||||||
|
s := buildSingleSite(t, deps.DepsCfg{Fs: fs, Cfg: cfg}, BuildCfg{SkipRender: true})
|
||||||
|
assert.Len(s.RegularPages, 3)
|
||||||
|
|
||||||
|
result, err := s.RegularPages.RelatedTo(types.NewKeyValuesStrings("keywords", "hugo", "rocks"))
|
||||||
|
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.Len(result, 2)
|
||||||
|
assert.Equal("Page 2", result[0].Title)
|
||||||
|
assert.Equal("Page 1", result[1].Title)
|
||||||
|
|
||||||
|
result, err = s.RegularPages.Related(s.RegularPages[0])
|
||||||
|
assert.Len(result, 2)
|
||||||
|
assert.Equal("Page 2", result[0].Title)
|
||||||
|
assert.Equal("Page 3", result[1].Title)
|
||||||
|
|
||||||
|
result, err = s.RegularPages.RelatedIndices(s.RegularPages[0], "keywords")
|
||||||
|
assert.Len(result, 2)
|
||||||
|
assert.Equal("Page 2", result[0].Title)
|
||||||
|
assert.Equal("Page 3", result[1].Title)
|
||||||
|
|
||||||
|
result, err = s.RegularPages.RelatedTo(types.NewKeyValuesStrings("keywords", "bep", "rocks"))
|
||||||
|
assert.Len(result, 2)
|
||||||
|
assert.Equal("Page 2", result[0].Title)
|
||||||
|
assert.Equal("Page 3", result[1].Title)
|
||||||
|
|
||||||
|
}
|
|
@ -42,6 +42,7 @@ import (
|
||||||
"github.com/gohugoio/hugo/helpers"
|
"github.com/gohugoio/hugo/helpers"
|
||||||
"github.com/gohugoio/hugo/output"
|
"github.com/gohugoio/hugo/output"
|
||||||
"github.com/gohugoio/hugo/parser"
|
"github.com/gohugoio/hugo/parser"
|
||||||
|
"github.com/gohugoio/hugo/related"
|
||||||
"github.com/gohugoio/hugo/source"
|
"github.com/gohugoio/hugo/source"
|
||||||
"github.com/gohugoio/hugo/tpl"
|
"github.com/gohugoio/hugo/tpl"
|
||||||
"github.com/gohugoio/hugo/transform"
|
"github.com/gohugoio/hugo/transform"
|
||||||
|
@ -135,6 +136,8 @@ type Site struct {
|
||||||
// The func used to title case titles.
|
// The func used to title case titles.
|
||||||
titleFunc func(s string) string
|
titleFunc func(s string) string
|
||||||
|
|
||||||
|
relatedDocsHandler *relatedDocsHandler
|
||||||
|
|
||||||
siteStats *siteStats
|
siteStats *siteStats
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -176,6 +179,7 @@ func (s *Site) reset() *Site {
|
||||||
layoutHandler: output.NewLayoutHandler(s.PathSpec.ThemeSet()),
|
layoutHandler: output.NewLayoutHandler(s.PathSpec.ThemeSet()),
|
||||||
disabledKinds: s.disabledKinds,
|
disabledKinds: s.disabledKinds,
|
||||||
titleFunc: s.titleFunc,
|
titleFunc: s.titleFunc,
|
||||||
|
relatedDocsHandler: newSearchIndexHandler(s.relatedDocsHandler.cfg),
|
||||||
outputFormats: s.outputFormats,
|
outputFormats: s.outputFormats,
|
||||||
outputFormatsConfig: s.outputFormatsConfig,
|
outputFormatsConfig: s.outputFormatsConfig,
|
||||||
mediaTypesConfig: s.mediaTypesConfig,
|
mediaTypesConfig: s.mediaTypesConfig,
|
||||||
|
@ -231,6 +235,21 @@ func newSite(cfg deps.DepsCfg) (*Site, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var relatedContentConfig related.Config
|
||||||
|
|
||||||
|
if cfg.Language.IsSet("related") {
|
||||||
|
relatedContentConfig, err = related.DecodeConfig(cfg.Language.Get("related"))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
relatedContentConfig = related.DefaultConfig
|
||||||
|
taxonomies := cfg.Language.GetStringMapString("taxonomies")
|
||||||
|
if _, found := taxonomies["tag"]; found {
|
||||||
|
relatedContentConfig.Add(related.IndexConfig{Name: "tags", Weight: 80})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
titleFunc := helpers.GetTitleFunc(cfg.Language.GetString("titleCaseStyle"))
|
titleFunc := helpers.GetTitleFunc(cfg.Language.GetString("titleCaseStyle"))
|
||||||
|
|
||||||
s := &Site{
|
s := &Site{
|
||||||
|
@ -239,6 +258,7 @@ func newSite(cfg deps.DepsCfg) (*Site, error) {
|
||||||
Language: cfg.Language,
|
Language: cfg.Language,
|
||||||
disabledKinds: disabledKinds,
|
disabledKinds: disabledKinds,
|
||||||
titleFunc: titleFunc,
|
titleFunc: titleFunc,
|
||||||
|
relatedDocsHandler: newSearchIndexHandler(relatedContentConfig),
|
||||||
outputFormats: outputFormats,
|
outputFormats: outputFormats,
|
||||||
outputFormatsConfig: siteOutputFormatsConfig,
|
outputFormatsConfig: siteOutputFormatsConfig,
|
||||||
mediaTypesConfig: siteMediaTypesConfig,
|
mediaTypesConfig: siteMediaTypesConfig,
|
||||||
|
@ -1607,6 +1627,7 @@ func (s *Site) assembleTaxonomies() {
|
||||||
// Prepare site for a new full build.
|
// Prepare site for a new full build.
|
||||||
func (s *Site) resetBuildState() {
|
func (s *Site) resetBuildState() {
|
||||||
|
|
||||||
|
s.relatedDocsHandler = newSearchIndexHandler(s.relatedDocsHandler.cfg)
|
||||||
s.PageCollections = newPageCollectionsFromPages(s.rawAllPages)
|
s.PageCollections = newPageCollectionsFromPages(s.rawAllPages)
|
||||||
// TODO(bep) get rid of this double
|
// TODO(bep) get rid of this double
|
||||||
s.Info.PageCollections = s.PageCollections
|
s.Info.PageCollections = s.PageCollections
|
||||||
|
|
450
related/inverted_index.go
Normal file
450
related/inverted_index.go
Normal file
|
@ -0,0 +1,450 @@
|
||||||
|
// Copyright 2017-present The Hugo Authors. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package related holds code to help finding related content.
|
||||||
|
package related
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/gohugoio/hugo/common/types"
|
||||||
|
"github.com/mitchellh/mapstructure"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
_ Keyword = (*StringKeyword)(nil)
|
||||||
|
zeroDate = time.Time{}
|
||||||
|
|
||||||
|
// DefaultConfig is the default related config.
|
||||||
|
DefaultConfig = Config{
|
||||||
|
Threshold: 80,
|
||||||
|
Indices: IndexConfigs{
|
||||||
|
IndexConfig{Name: "keywords", Weight: 100},
|
||||||
|
IndexConfig{Name: "date", Weight: 10},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
/*
|
||||||
|
Config is the top level configuration element used to configure how to retrieve
|
||||||
|
related content in Hugo.
|
||||||
|
|
||||||
|
An example site config.toml:
|
||||||
|
|
||||||
|
[related]
|
||||||
|
threshold = 1
|
||||||
|
[[related.indices]]
|
||||||
|
name = "keywords"
|
||||||
|
weight = 200
|
||||||
|
[[related.indices]]
|
||||||
|
name = "tags"
|
||||||
|
weight = 100
|
||||||
|
[[related.indices]]
|
||||||
|
name = "date"
|
||||||
|
weight = 1
|
||||||
|
pattern = "2006"
|
||||||
|
*/
|
||||||
|
type Config struct {
|
||||||
|
// Only include matches >= threshold, a normalized rank between 0 and 100.
|
||||||
|
Threshold int
|
||||||
|
|
||||||
|
// To get stable "See also" sections we, by default, exclude newer related pages.
|
||||||
|
IncludeNewer bool
|
||||||
|
|
||||||
|
// Will lower case all string values and queries to the indices.
|
||||||
|
// May get better results, but at a slight performance cost.
|
||||||
|
ToLower bool
|
||||||
|
|
||||||
|
Indices IndexConfigs
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Config) Add(index IndexConfig) {
|
||||||
|
if c.ToLower {
|
||||||
|
index.ToLower = true
|
||||||
|
}
|
||||||
|
c.Indices = append(c.Indices, index)
|
||||||
|
}
|
||||||
|
|
||||||
|
// IndexConfigs holds a set of index configurations.
|
||||||
|
type IndexConfigs []IndexConfig
|
||||||
|
|
||||||
|
// IndexConfig configures an index.
|
||||||
|
type IndexConfig struct {
|
||||||
|
// The index name. This directly maps to a field or Param name.
|
||||||
|
Name string
|
||||||
|
|
||||||
|
// Contextual pattern used to convert the Param value into a string.
|
||||||
|
// Currently only used for dates. Can be used to, say, bump posts in the same
|
||||||
|
// time frame when searching for related documents.
|
||||||
|
// For dates it follows Go's time.Format patterns, i.e.
|
||||||
|
// "2006" for YYYY and "200601" for YYYYMM.
|
||||||
|
Pattern string
|
||||||
|
|
||||||
|
// This field's weight when doing multi-index searches. Higher is "better".
|
||||||
|
Weight int
|
||||||
|
|
||||||
|
// Will lower case all string values in and queries tothis index.
|
||||||
|
// May get better accurate results, but at a slight performance cost.
|
||||||
|
ToLower bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// Document is the interface an indexable document in Hugo must fulfill.
|
||||||
|
type Document interface {
|
||||||
|
// SearchKeywords returns a list of keywords for the given index config.
|
||||||
|
SearchKeywords(cfg IndexConfig) ([]Keyword, error)
|
||||||
|
|
||||||
|
// When this document was or will be published.
|
||||||
|
PubDate() time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
// InvertedIndex holds an inverted index, also sometimes named posting list, which
|
||||||
|
// lists, for every possible search term, the documents that contain that term.
|
||||||
|
type InvertedIndex struct {
|
||||||
|
cfg Config
|
||||||
|
index map[string]map[Keyword][]Document
|
||||||
|
|
||||||
|
minWeight int
|
||||||
|
maxWeight int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (idx *InvertedIndex) getIndexCfg(name string) (IndexConfig, bool) {
|
||||||
|
for _, conf := range idx.cfg.Indices {
|
||||||
|
if conf.Name == name {
|
||||||
|
return conf, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return IndexConfig{}, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewInvertedIndex creates a new InvertedIndex.
|
||||||
|
// Documents to index must be added in Add.
|
||||||
|
func NewInvertedIndex(cfg Config) *InvertedIndex {
|
||||||
|
idx := &InvertedIndex{index: make(map[string]map[Keyword][]Document), cfg: cfg}
|
||||||
|
for _, conf := range cfg.Indices {
|
||||||
|
idx.index[conf.Name] = make(map[Keyword][]Document)
|
||||||
|
if conf.Weight < idx.minWeight {
|
||||||
|
// By default, the weight scale starts at 0, but we allow
|
||||||
|
// negative weights.
|
||||||
|
idx.minWeight = conf.Weight
|
||||||
|
}
|
||||||
|
if conf.Weight > idx.maxWeight {
|
||||||
|
idx.maxWeight = conf.Weight
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return idx
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add documents to the inverted index.
|
||||||
|
// The value must support == and !=.
|
||||||
|
func (idx *InvertedIndex) Add(docs ...Document) error {
|
||||||
|
var err error
|
||||||
|
for _, config := range idx.cfg.Indices {
|
||||||
|
if config.Weight == 0 {
|
||||||
|
// Disabled
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
setm := idx.index[config.Name]
|
||||||
|
|
||||||
|
for _, doc := range docs {
|
||||||
|
var words []Keyword
|
||||||
|
words, err = doc.SearchKeywords(config)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, keyword := range words {
|
||||||
|
setm[keyword] = append(setm[keyword], doc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// queryElement holds the index name and keywords that can be used to compose a
|
||||||
|
// search for related content.
|
||||||
|
type queryElement struct {
|
||||||
|
Index string
|
||||||
|
Keywords []Keyword
|
||||||
|
}
|
||||||
|
|
||||||
|
func newQueryElement(index string, keywords ...Keyword) queryElement {
|
||||||
|
return queryElement{Index: index, Keywords: keywords}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ranks []*rank
|
||||||
|
|
||||||
|
type rank struct {
|
||||||
|
Doc Document
|
||||||
|
Weight int
|
||||||
|
Matches int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *rank) addWeight(w int) {
|
||||||
|
r.Weight += w
|
||||||
|
r.Matches++
|
||||||
|
}
|
||||||
|
|
||||||
|
func newRank(doc Document, weight int) *rank {
|
||||||
|
return &rank{Doc: doc, Weight: weight, Matches: 1}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r ranks) Len() int { return len(r) }
|
||||||
|
func (r ranks) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
|
||||||
|
func (r ranks) Less(i, j int) bool {
|
||||||
|
if r[i].Weight == r[j].Weight {
|
||||||
|
return r[i].Doc.PubDate().After(r[j].Doc.PubDate())
|
||||||
|
}
|
||||||
|
return r[i].Weight > r[j].Weight
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearchDoc finds the documents matching any of the keywords in the given indices
|
||||||
|
// against the given document.
|
||||||
|
// The resulting document set will be sorted according to number of matches
|
||||||
|
// and the index weights, and any matches with a rank below the configured
|
||||||
|
// threshold (normalize to 0..100) will be removed.
|
||||||
|
// If an index name is provided, only that index will be queried.
|
||||||
|
func (idx *InvertedIndex) SearchDoc(doc Document, indices ...string) ([]Document, error) {
|
||||||
|
var q []queryElement
|
||||||
|
|
||||||
|
var configs IndexConfigs
|
||||||
|
|
||||||
|
if len(indices) == 0 {
|
||||||
|
configs = idx.cfg.Indices
|
||||||
|
} else {
|
||||||
|
configs = make(IndexConfigs, len(indices))
|
||||||
|
for i, indexName := range indices {
|
||||||
|
cfg, found := idx.getIndexCfg(indexName)
|
||||||
|
if !found {
|
||||||
|
return nil, fmt.Errorf("index %q not found", indexName)
|
||||||
|
}
|
||||||
|
configs[i] = cfg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cfg := range configs {
|
||||||
|
keywords, err := doc.SearchKeywords(cfg)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
q = append(q, newQueryElement(cfg.Name, keywords...))
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return idx.searchDate(doc.PubDate(), q...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (cfg IndexConfig) ToKeywords(v interface{}) ([]Keyword, error) {
|
||||||
|
var (
|
||||||
|
keywords []Keyword
|
||||||
|
toLower = cfg.ToLower
|
||||||
|
)
|
||||||
|
switch vv := v.(type) {
|
||||||
|
case string:
|
||||||
|
if toLower {
|
||||||
|
vv = strings.ToLower(vv)
|
||||||
|
}
|
||||||
|
keywords = append(keywords, StringKeyword(vv))
|
||||||
|
case []string:
|
||||||
|
if toLower {
|
||||||
|
for i := 0; i < len(vv); i++ {
|
||||||
|
vv[i] = strings.ToLower(vv[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
keywords = append(keywords, StringsToKeywords(vv...)...)
|
||||||
|
case time.Time:
|
||||||
|
layout := "2006"
|
||||||
|
if cfg.Pattern != "" {
|
||||||
|
layout = cfg.Pattern
|
||||||
|
}
|
||||||
|
keywords = append(keywords, StringKeyword(vv.Format(layout)))
|
||||||
|
case nil:
|
||||||
|
return keywords, nil
|
||||||
|
default:
|
||||||
|
return keywords, fmt.Errorf("indexing currently not supported for for index %q and type %T", cfg.Name, vv)
|
||||||
|
}
|
||||||
|
|
||||||
|
return keywords, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearchKeyValues finds the documents matching any of the keywords in the given indices.
|
||||||
|
// The resulting document set will be sorted according to number of matches
|
||||||
|
// and the index weights, and any matches with a rank below the configured
|
||||||
|
// threshold (normalize to 0..100) will be removed.
|
||||||
|
func (idx *InvertedIndex) SearchKeyValues(args ...types.KeyValues) ([]Document, error) {
|
||||||
|
q := make([]queryElement, len(args))
|
||||||
|
|
||||||
|
for i, arg := range args {
|
||||||
|
var keywords []Keyword
|
||||||
|
key := arg.KeyString()
|
||||||
|
if key == "" {
|
||||||
|
return nil, fmt.Errorf("index %q not valid", arg.Key)
|
||||||
|
}
|
||||||
|
conf, found := idx.getIndexCfg(key)
|
||||||
|
if !found {
|
||||||
|
return nil, fmt.Errorf("index %q not found", key)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, val := range arg.Values {
|
||||||
|
k, err := conf.ToKeywords(val)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
keywords = append(keywords, k...)
|
||||||
|
}
|
||||||
|
|
||||||
|
q[i] = newQueryElement(conf.Name, keywords...)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return idx.search(q...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (idx *InvertedIndex) search(query ...queryElement) ([]Document, error) {
|
||||||
|
return idx.searchDate(zeroDate, query...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (idx *InvertedIndex) searchDate(upperDate time.Time, query ...queryElement) ([]Document, error) {
|
||||||
|
matchm := make(map[Document]*rank, 200)
|
||||||
|
applyDateFilter := !idx.cfg.IncludeNewer && !upperDate.IsZero()
|
||||||
|
|
||||||
|
for _, el := range query {
|
||||||
|
setm, found := idx.index[el.Index]
|
||||||
|
if !found {
|
||||||
|
return []Document{}, fmt.Errorf("index for %q not found", el.Index)
|
||||||
|
}
|
||||||
|
|
||||||
|
config, found := idx.getIndexCfg(el.Index)
|
||||||
|
if !found {
|
||||||
|
return []Document{}, fmt.Errorf("index config for %q not found", el.Index)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, kw := range el.Keywords {
|
||||||
|
if docs, found := setm[kw]; found {
|
||||||
|
for _, doc := range docs {
|
||||||
|
if applyDateFilter {
|
||||||
|
// Exclude newer than the limit given
|
||||||
|
if doc.PubDate().After(upperDate) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r, found := matchm[doc]
|
||||||
|
if !found {
|
||||||
|
matchm[doc] = newRank(doc, config.Weight)
|
||||||
|
} else {
|
||||||
|
r.addWeight(config.Weight)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(matchm) == 0 {
|
||||||
|
return []Document{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
matches := make(ranks, 0, 100)
|
||||||
|
|
||||||
|
for _, v := range matchm {
|
||||||
|
avgWeight := v.Weight / v.Matches
|
||||||
|
weight := norm(avgWeight, idx.minWeight, idx.maxWeight)
|
||||||
|
threshold := idx.cfg.Threshold / v.Matches
|
||||||
|
|
||||||
|
if weight >= threshold {
|
||||||
|
matches = append(matches, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Stable(matches)
|
||||||
|
|
||||||
|
result := make([]Document, len(matches))
|
||||||
|
|
||||||
|
for i, m := range matches {
|
||||||
|
result[i] = m.Doc
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizes num to a number between 0 and 100.
|
||||||
|
func norm(num, min, max int) int {
|
||||||
|
if min > max {
|
||||||
|
panic("min > max")
|
||||||
|
}
|
||||||
|
return int(math.Floor((float64(num-min) / float64(max-min) * 100) + 0.5))
|
||||||
|
}
|
||||||
|
|
||||||
|
// DecodeConfig decodes a slice of map into Config.
|
||||||
|
func DecodeConfig(in interface{}) (Config, error) {
|
||||||
|
if in == nil {
|
||||||
|
return Config{}, errors.New("no related config provided")
|
||||||
|
}
|
||||||
|
|
||||||
|
m, ok := in.(map[string]interface{})
|
||||||
|
if !ok {
|
||||||
|
return Config{}, fmt.Errorf("expected map[string]interface {} got %T", in)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(m) == 0 {
|
||||||
|
return Config{}, errors.New("empty related config provided")
|
||||||
|
}
|
||||||
|
|
||||||
|
var c Config
|
||||||
|
|
||||||
|
if err := mapstructure.WeakDecode(m, &c); err != nil {
|
||||||
|
return c, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.Threshold < 0 || c.Threshold > 100 {
|
||||||
|
return Config{}, errors.New("related threshold must be between 0 and 100")
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.ToLower {
|
||||||
|
for i, _ := range c.Indices {
|
||||||
|
c.Indices[i].ToLower = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return c, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// StringKeyword is a string search keyword.
|
||||||
|
type StringKeyword string
|
||||||
|
|
||||||
|
func (s StringKeyword) String() string {
|
||||||
|
return string(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keyword is the interface a keyword in the search index must implement.
|
||||||
|
type Keyword interface {
|
||||||
|
String() string
|
||||||
|
}
|
||||||
|
|
||||||
|
// StringsToKeywords converts the given slice of strings to a slice of Keyword.
|
||||||
|
func StringsToKeywords(s ...string) []Keyword {
|
||||||
|
kw := make([]Keyword, len(s))
|
||||||
|
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
kw[i] = StringKeyword(s[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
return kw
|
||||||
|
}
|
276
related/inverted_index_test.go
Normal file
276
related/inverted_index_test.go
Normal file
|
@ -0,0 +1,276 @@
|
||||||
|
// Copyright 2017-present The Hugo Authors. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package related
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
type testDoc struct {
|
||||||
|
keywords map[string][]Keyword
|
||||||
|
date time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k *testDoc) String() string {
|
||||||
|
s := "\n"
|
||||||
|
for k, v := range k.keywords {
|
||||||
|
s += k + ":\t\t"
|
||||||
|
for _, vv := range v {
|
||||||
|
s += " " + vv.String()
|
||||||
|
}
|
||||||
|
s += "\n"
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTestDoc(name string, keywords ...string) *testDoc {
|
||||||
|
km := make(map[string][]Keyword)
|
||||||
|
|
||||||
|
time.Sleep(1 * time.Millisecond)
|
||||||
|
kw := &testDoc{keywords: km, date: time.Now()}
|
||||||
|
|
||||||
|
kw.addKeywords(name, keywords...)
|
||||||
|
return kw
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *testDoc) addKeywords(name string, keywords ...string) *testDoc {
|
||||||
|
keywordm := createTestKeywords(name, keywords...)
|
||||||
|
|
||||||
|
for k, v := range keywordm {
|
||||||
|
keywords := make([]Keyword, len(v))
|
||||||
|
for i := 0; i < len(v); i++ {
|
||||||
|
keywords[i] = StringKeyword(v[i])
|
||||||
|
}
|
||||||
|
t.keywords[k] = keywords
|
||||||
|
}
|
||||||
|
return t
|
||||||
|
}
|
||||||
|
|
||||||
|
func createTestKeywords(name string, keywords ...string) map[string][]string {
|
||||||
|
return map[string][]string{
|
||||||
|
name: keywords,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k *testDoc) SearchKeywords(cfg IndexConfig) ([]Keyword, error) {
|
||||||
|
return k.keywords[cfg.Name], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k *testDoc) PubDate() time.Time {
|
||||||
|
return k.date
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSearch(t *testing.T) {
|
||||||
|
|
||||||
|
config := Config{
|
||||||
|
Threshold: 90,
|
||||||
|
IncludeNewer: false,
|
||||||
|
Indices: IndexConfigs{
|
||||||
|
IndexConfig{Name: "tags", Weight: 50},
|
||||||
|
IndexConfig{Name: "keywords", Weight: 65},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
idx := NewInvertedIndex(config)
|
||||||
|
//idx.debug = true
|
||||||
|
|
||||||
|
docs := []Document{
|
||||||
|
newTestDoc("tags", "a", "b", "c", "d"),
|
||||||
|
newTestDoc("tags", "b", "d", "g"),
|
||||||
|
newTestDoc("tags", "b", "h").addKeywords("keywords", "a"),
|
||||||
|
newTestDoc("tags", "g", "h").addKeywords("keywords", "a", "b"),
|
||||||
|
}
|
||||||
|
|
||||||
|
idx.Add(docs...)
|
||||||
|
|
||||||
|
t.Run("count", func(t *testing.T) {
|
||||||
|
assert := require.New(t)
|
||||||
|
assert.Len(idx.index, 2)
|
||||||
|
set1, found := idx.index["tags"]
|
||||||
|
assert.True(found)
|
||||||
|
// 6 tags
|
||||||
|
assert.Len(set1, 6)
|
||||||
|
|
||||||
|
set2, found := idx.index["keywords"]
|
||||||
|
assert.True(found)
|
||||||
|
assert.Len(set2, 2)
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("search-tags", func(t *testing.T) {
|
||||||
|
assert := require.New(t)
|
||||||
|
m, err := idx.search(newQueryElement("tags", StringsToKeywords("a", "b", "d", "z")...))
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.Len(m, 2)
|
||||||
|
assert.Equal(docs[0], m[0])
|
||||||
|
assert.Equal(docs[1], m[1])
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("search-tags-and-keywords", func(t *testing.T) {
|
||||||
|
assert := require.New(t)
|
||||||
|
m, err := idx.search(
|
||||||
|
newQueryElement("tags", StringsToKeywords("a", "b", "z")...),
|
||||||
|
newQueryElement("keywords", StringsToKeywords("a", "b")...))
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.Len(m, 3)
|
||||||
|
assert.Equal(docs[3], m[0])
|
||||||
|
assert.Equal(docs[2], m[1])
|
||||||
|
assert.Equal(docs[0], m[2])
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("searchdoc-all", func(t *testing.T) {
|
||||||
|
assert := require.New(t)
|
||||||
|
doc := newTestDoc("tags", "a").addKeywords("keywords", "a")
|
||||||
|
m, err := idx.SearchDoc(doc)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.Len(m, 2)
|
||||||
|
assert.Equal(docs[3], m[0])
|
||||||
|
assert.Equal(docs[2], m[1])
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("searchdoc-tags", func(t *testing.T) {
|
||||||
|
assert := require.New(t)
|
||||||
|
doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
|
||||||
|
m, err := idx.SearchDoc(doc, "tags")
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.Len(m, 2)
|
||||||
|
assert.Equal(docs[0], m[0])
|
||||||
|
assert.Equal(docs[1], m[1])
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("searchdoc-keywords-date", func(t *testing.T) {
|
||||||
|
assert := require.New(t)
|
||||||
|
doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
|
||||||
|
// This will get a date newer than the others.
|
||||||
|
newDoc := newTestDoc("keywords", "a", "b")
|
||||||
|
idx.Add(newDoc)
|
||||||
|
|
||||||
|
m, err := idx.SearchDoc(doc, "keywords")
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.Len(m, 2)
|
||||||
|
assert.Equal(docs[3], m[0])
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkRelatedNewIndex(b *testing.B) {
|
||||||
|
|
||||||
|
pages := make([]*testDoc, 100)
|
||||||
|
numkeywords := 30
|
||||||
|
allKeywords := make([]string, numkeywords)
|
||||||
|
for i := 0; i < numkeywords; i++ {
|
||||||
|
allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(pages); i++ {
|
||||||
|
start := rand.Intn(len(allKeywords))
|
||||||
|
end := start + 3
|
||||||
|
if end >= len(allKeywords) {
|
||||||
|
end = start + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
kw := newTestDoc("tags", allKeywords[start:end]...)
|
||||||
|
if i%5 == 0 {
|
||||||
|
start := rand.Intn(len(allKeywords))
|
||||||
|
end := start + 3
|
||||||
|
if end >= len(allKeywords) {
|
||||||
|
end = start + 1
|
||||||
|
}
|
||||||
|
kw.addKeywords("keywords", allKeywords[start:end]...)
|
||||||
|
}
|
||||||
|
|
||||||
|
pages[i] = kw
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg := Config{
|
||||||
|
Threshold: 50,
|
||||||
|
Indices: IndexConfigs{
|
||||||
|
IndexConfig{Name: "tags", Weight: 100},
|
||||||
|
IndexConfig{Name: "keywords", Weight: 200},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
b.Run("singles", func(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
idx := NewInvertedIndex(cfg)
|
||||||
|
for _, doc := range pages {
|
||||||
|
idx.Add(doc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
b.Run("all", func(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
idx := NewInvertedIndex(cfg)
|
||||||
|
docs := make([]Document, len(pages))
|
||||||
|
for i := 0; i < len(pages); i++ {
|
||||||
|
docs[i] = pages[i]
|
||||||
|
}
|
||||||
|
idx.Add(docs...)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkRelatedMatchesIn(b *testing.B) {
|
||||||
|
|
||||||
|
q1 := newQueryElement("tags", StringsToKeywords("keyword2", "keyword5", "keyword32", "asdf")...)
|
||||||
|
q2 := newQueryElement("keywords", StringsToKeywords("keyword3", "keyword4")...)
|
||||||
|
|
||||||
|
docs := make([]*testDoc, 1000)
|
||||||
|
numkeywords := 20
|
||||||
|
allKeywords := make([]string, numkeywords)
|
||||||
|
for i := 0; i < numkeywords; i++ {
|
||||||
|
allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg := Config{
|
||||||
|
Threshold: 20,
|
||||||
|
Indices: IndexConfigs{
|
||||||
|
IndexConfig{Name: "tags", Weight: 100},
|
||||||
|
IndexConfig{Name: "keywords", Weight: 200},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
idx := NewInvertedIndex(cfg)
|
||||||
|
|
||||||
|
for i := 0; i < len(docs); i++ {
|
||||||
|
start := rand.Intn(len(allKeywords))
|
||||||
|
end := start + 3
|
||||||
|
if end >= len(allKeywords) {
|
||||||
|
end = start + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
index := "tags"
|
||||||
|
if i%5 == 0 {
|
||||||
|
index = "keywords"
|
||||||
|
}
|
||||||
|
|
||||||
|
idx.Add(newTestDoc(index, allKeywords[start:end]...))
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
if i%10 == 0 {
|
||||||
|
idx.search(q2)
|
||||||
|
} else {
|
||||||
|
idx.search(q1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -23,6 +23,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/gohugoio/hugo/common/types"
|
||||||
"github.com/gohugoio/hugo/deps"
|
"github.com/gohugoio/hugo/deps"
|
||||||
"github.com/gohugoio/hugo/helpers"
|
"github.com/gohugoio/hugo/helpers"
|
||||||
"github.com/spf13/cast"
|
"github.com/spf13/cast"
|
||||||
|
@ -641,3 +642,8 @@ func (ns *Namespace) Uniq(l interface{}) (interface{}, error) {
|
||||||
}
|
}
|
||||||
return ret.Interface(), nil
|
return ret.Interface(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// KeyVals creates a key and values wrapper.
|
||||||
|
func (ns *Namespace) KeyVals(key interface{}, vals ...interface{}) (types.KeyValues, error) {
|
||||||
|
return types.KeyValues{Key: key, Values: vals}, nil
|
||||||
|
}
|
||||||
|
|
|
@ -63,6 +63,13 @@ func init() {
|
||||||
[][2]string{},
|
[][2]string{},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
ns.AddMethodMapping(ctx.KeyVals,
|
||||||
|
[]string{"keyVals"},
|
||||||
|
[][2]string{
|
||||||
|
{`{{ keyVals "key" "a" "b" }}`, `key: [a b]`},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
ns.AddMethodMapping(ctx.In,
|
ns.AddMethodMapping(ctx.In,
|
||||||
[]string{"in"},
|
[]string{"in"},
|
||||||
[][2]string{
|
[][2]string{
|
||||||
|
|
Loading…
Reference in a new issue