mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-07 20:30:36 -05:00
resources/page: Use binary search in Pages.Prev/Next if possible
This is obviously much faster for lager data sets: ```bash name old time/op new time/op delta SearchPage/ByWeight-100-4 267ns ± 4% 272ns ± 5% ~ (p=0.457 n=4+4) SearchPage/ByWeight-5000-4 10.8µs ± 3% 1.2µs ± 2% -88.99% (p=0.029 n=4+4) SearchPage/ByWeight-10000-4 21.1µs ± 1% 1.4µs ±11% -93.28% (p=0.029 n=4+4) ``` See #4500
This commit is contained in:
parent
f4f566edf4
commit
653e6856ea
8 changed files with 335 additions and 83 deletions
|
@ -63,7 +63,7 @@ func BenchmarkPagesPrevNext(b *testing.B) {
|
|||
Variant{"Pages.Shuffled.Prev", shufflePages, func(p page.Page, pages page.Pages) { pages.Prev(p) }},
|
||||
Variant{"Pages.ByTitle.Next", func(pages page.Pages) page.Pages { return pages.ByTitle() }, func(p page.Page, pages page.Pages) { pages.Next(p) }},
|
||||
} {
|
||||
for _, numPages := range []int{100, 300, 900, 5000} {
|
||||
for _, numPages := range []int{300, 5000} {
|
||||
b.Run(fmt.Sprintf("%s-pages-%d", variant.name, numPages), func(b *testing.B) {
|
||||
b.StopTimer()
|
||||
builder := newPagesPrevNextTestSite(b, numPages)
|
||||
|
|
|
@ -15,26 +15,21 @@ package page
|
|||
|
||||
// Next returns the next page reletive to the given
|
||||
func (p Pages) Next(cur Page) Page {
|
||||
for x, c := range p {
|
||||
if c.Eq(cur) {
|
||||
if x == 0 {
|
||||
return nil
|
||||
}
|
||||
return p[x-1]
|
||||
}
|
||||
x := searchPage(cur, p)
|
||||
if x <= 0 {
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
return p[x-1]
|
||||
}
|
||||
|
||||
// Prev returns the previous page reletive to the given
|
||||
func (p Pages) Prev(cur Page) Page {
|
||||
for x, c := range p {
|
||||
if c.Eq(cur) {
|
||||
if x < len(p)-1 {
|
||||
return p[x+1]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
x := searchPage(cur, p)
|
||||
|
||||
if x == -1 || len(p)-x < 2 {
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
|
||||
return p[x+1]
|
||||
|
||||
}
|
||||
|
|
|
@ -46,59 +46,78 @@ func (by pageBy) Sort(pages Pages) {
|
|||
sort.Stable(ps)
|
||||
}
|
||||
|
||||
// DefaultPageSort is the default sort func for pages in Hugo:
|
||||
// Order by Weight, Date, LinkTitle and then full file path.
|
||||
var DefaultPageSort = func(p1, p2 Page) bool {
|
||||
if p1.Weight() == p2.Weight() {
|
||||
if p1.Date().Unix() == p2.Date().Unix() {
|
||||
c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
|
||||
if c == 0 {
|
||||
if p1.File().IsZero() || p2.File().IsZero() {
|
||||
return p1.File().IsZero()
|
||||
}
|
||||
return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
|
||||
}
|
||||
return c < 0
|
||||
}
|
||||
return p1.Date().Unix() > p2.Date().Unix()
|
||||
}
|
||||
var (
|
||||
|
||||
if p2.Weight() == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
if p1.Weight() == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
return p1.Weight() < p2.Weight()
|
||||
}
|
||||
|
||||
var languagePageSort = func(p1, p2 Page) bool {
|
||||
|
||||
if p1.Language().Weight == p2.Language().Weight {
|
||||
if p1.Date().Unix() == p2.Date().Unix() {
|
||||
c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
|
||||
if c == 0 {
|
||||
if !p1.File().IsZero() && !p2.File().IsZero() {
|
||||
// DefaultPageSort is the default sort func for pages in Hugo:
|
||||
// Order by Weight, Date, LinkTitle and then full file path.
|
||||
DefaultPageSort = func(p1, p2 Page) bool {
|
||||
if p1.Weight() == p2.Weight() {
|
||||
if p1.Date().Unix() == p2.Date().Unix() {
|
||||
c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
|
||||
if c == 0 {
|
||||
if p1.File().IsZero() || p2.File().IsZero() {
|
||||
return p1.File().IsZero()
|
||||
}
|
||||
return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
|
||||
}
|
||||
return c < 0
|
||||
}
|
||||
return c < 0
|
||||
return p1.Date().Unix() > p2.Date().Unix()
|
||||
}
|
||||
return p1.Date().Unix() > p2.Date().Unix()
|
||||
|
||||
if p2.Weight() == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
if p1.Weight() == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
return p1.Weight() < p2.Weight()
|
||||
}
|
||||
|
||||
if p2.Language().Weight == 0 {
|
||||
return true
|
||||
lessPageLanguage = func(p1, p2 Page) bool {
|
||||
|
||||
if p1.Language().Weight == p2.Language().Weight {
|
||||
if p1.Date().Unix() == p2.Date().Unix() {
|
||||
c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
|
||||
if c == 0 {
|
||||
if !p1.File().IsZero() && !p2.File().IsZero() {
|
||||
return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
|
||||
}
|
||||
}
|
||||
return c < 0
|
||||
}
|
||||
return p1.Date().Unix() > p2.Date().Unix()
|
||||
}
|
||||
|
||||
if p2.Language().Weight == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
if p1.Language().Weight == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
return p1.Language().Weight < p2.Language().Weight
|
||||
}
|
||||
|
||||
if p1.Language().Weight == 0 {
|
||||
return false
|
||||
lessPageTitle = func(p1, p2 Page) bool {
|
||||
return compare.LessStrings(p1.Title(), p2.Title())
|
||||
}
|
||||
|
||||
return p1.Language().Weight < p2.Language().Weight
|
||||
}
|
||||
lessPageLinkTitle = func(p1, p2 Page) bool {
|
||||
return compare.LessStrings(p1.LinkTitle(), p2.LinkTitle())
|
||||
}
|
||||
|
||||
lessPageDate = func(p1, p2 Page) bool {
|
||||
return p1.Date().Unix() < p2.Date().Unix()
|
||||
}
|
||||
|
||||
lessPagePubDate = func(p1, p2 Page) bool {
|
||||
return p1.PublishDate().Unix() < p2.PublishDate().Unix()
|
||||
}
|
||||
)
|
||||
|
||||
func (ps *pageSorter) Len() int { return len(ps.pages) }
|
||||
func (ps *pageSorter) Swap(i, j int) { ps.pages[i], ps.pages[j] = ps.pages[j], ps.pages[i] }
|
||||
|
@ -139,11 +158,7 @@ func (p Pages) ByTitle() Pages {
|
|||
|
||||
const key = "pageSort.ByTitle"
|
||||
|
||||
title := func(p1, p2 Page) bool {
|
||||
return compare.LessStrings(p1.Title(), p2.Title())
|
||||
}
|
||||
|
||||
pages, _ := spc.get(key, pageBy(title).Sort, p)
|
||||
pages, _ := spc.get(key, pageBy(lessPageTitle).Sort, p)
|
||||
return pages
|
||||
}
|
||||
|
||||
|
@ -156,11 +171,7 @@ func (p Pages) ByLinkTitle() Pages {
|
|||
|
||||
const key = "pageSort.ByLinkTitle"
|
||||
|
||||
linkTitle := func(p1, p2 Page) bool {
|
||||
return compare.LessStrings(p1.LinkTitle(), p2.LinkTitle())
|
||||
}
|
||||
|
||||
pages, _ := spc.get(key, pageBy(linkTitle).Sort, p)
|
||||
pages, _ := spc.get(key, pageBy(lessPageLinkTitle).Sort, p)
|
||||
|
||||
return pages
|
||||
}
|
||||
|
@ -174,11 +185,7 @@ func (p Pages) ByDate() Pages {
|
|||
|
||||
const key = "pageSort.ByDate"
|
||||
|
||||
date := func(p1, p2 Page) bool {
|
||||
return p1.Date().Unix() < p2.Date().Unix()
|
||||
}
|
||||
|
||||
pages, _ := spc.get(key, pageBy(date).Sort, p)
|
||||
pages, _ := spc.get(key, pageBy(lessPageDate).Sort, p)
|
||||
|
||||
return pages
|
||||
}
|
||||
|
@ -192,11 +199,7 @@ func (p Pages) ByPublishDate() Pages {
|
|||
|
||||
const key = "pageSort.ByPublishDate"
|
||||
|
||||
pubDate := func(p1, p2 Page) bool {
|
||||
return p1.PublishDate().Unix() < p2.PublishDate().Unix()
|
||||
}
|
||||
|
||||
pages, _ := spc.get(key, pageBy(pubDate).Sort, p)
|
||||
pages, _ := spc.get(key, pageBy(lessPagePubDate).Sort, p)
|
||||
|
||||
return pages
|
||||
}
|
||||
|
@ -276,14 +279,14 @@ func (p Pages) ByLanguage() Pages {
|
|||
|
||||
const key = "pageSort.ByLanguage"
|
||||
|
||||
pages, _ := spc.get(key, pageBy(languagePageSort).Sort, p)
|
||||
pages, _ := spc.get(key, pageBy(lessPageLanguage).Sort, p)
|
||||
|
||||
return pages
|
||||
}
|
||||
|
||||
// SortByLanguage sorts the pages by language.
|
||||
func SortByLanguage(pages Pages) {
|
||||
pageBy(languagePageSort).Sort(pages)
|
||||
pageBy(lessPageLanguage).Sort(pages)
|
||||
}
|
||||
|
||||
// Reverse reverses the order in Pages and returns a copy.
|
||||
|
|
126
resources/page/pages_sort_search.go
Normal file
126
resources/page/pages_sort_search.go
Normal file
|
@ -0,0 +1,126 @@
|
|||
// Copyright 2019 The Hugo Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package page
|
||||
|
||||
import "sort"
|
||||
|
||||
// Used in page binary search, the most common in front.
|
||||
var pageLessFunctions = []func(p1, p2 Page) bool{
|
||||
DefaultPageSort,
|
||||
lessPageDate,
|
||||
lessPagePubDate,
|
||||
lessPageTitle,
|
||||
lessPageLinkTitle,
|
||||
}
|
||||
|
||||
func searchPage(p Page, pages Pages) int {
|
||||
if len(pages) < 1000 {
|
||||
// For smaller data sets, doing a linear search is faster.
|
||||
return searchPageLinear(p, pages, 0)
|
||||
}
|
||||
|
||||
less := isPagesProbablySorted(pages, pageLessFunctions...)
|
||||
if less == nil {
|
||||
return searchPageLinear(p, pages, 0)
|
||||
}
|
||||
|
||||
i := searchPageBinary(p, pages, less)
|
||||
if i != -1 {
|
||||
return i
|
||||
}
|
||||
|
||||
return searchPageLinear(p, pages, 0)
|
||||
}
|
||||
|
||||
func searchPageLinear(p Page, pages Pages, start int) int {
|
||||
for i := start; i < len(pages); i++ {
|
||||
c := pages[i]
|
||||
if c.Eq(p) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func searchPageBinary(p Page, pages Pages, less func(p1, p2 Page) bool) int {
|
||||
n := len(pages)
|
||||
|
||||
f := func(i int) bool {
|
||||
c := pages[i]
|
||||
isLess := less(c, p)
|
||||
return !isLess || c.Eq(p)
|
||||
}
|
||||
|
||||
i := sort.Search(n, f)
|
||||
|
||||
if i == n {
|
||||
return -1
|
||||
}
|
||||
|
||||
return searchPageLinear(p, pages, i)
|
||||
|
||||
}
|
||||
|
||||
// isProbablySorted tests if the pages slice is probably sorted.
|
||||
func isPagesProbablySorted(pages Pages, lessFuncs ...func(p1, p2 Page) bool) func(p1, p2 Page) bool {
|
||||
n := len(pages)
|
||||
step := 1
|
||||
if n > 500 {
|
||||
step = 50
|
||||
}
|
||||
|
||||
is := func(less func(p1, p2 Page) bool) bool {
|
||||
samples := 0
|
||||
|
||||
for i := n - 1; i > 0; i = i - step {
|
||||
if less(pages[i], pages[i-1]) {
|
||||
return false
|
||||
}
|
||||
samples++
|
||||
if samples >= 15 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return samples > 0
|
||||
}
|
||||
|
||||
isReverse := func(less func(p1, p2 Page) bool) bool {
|
||||
samples := 0
|
||||
|
||||
for i := 0; i < n-1; i = i + step {
|
||||
if less(pages[i], pages[i+1]) {
|
||||
return false
|
||||
}
|
||||
samples++
|
||||
|
||||
if samples > 15 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return samples > 0
|
||||
}
|
||||
|
||||
for _, less := range lessFuncs {
|
||||
if is(less) {
|
||||
return less
|
||||
}
|
||||
if isReverse(less) {
|
||||
return func(p1, p2 Page) bool {
|
||||
return less(p2, p1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
124
resources/page/pages_sort_search_test.go
Normal file
124
resources/page/pages_sort_search_test.go
Normal file
|
@ -0,0 +1,124 @@
|
|||
// Copyright 2019 The Hugo Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package page
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
qt "github.com/frankban/quicktest"
|
||||
)
|
||||
|
||||
func TestSearchPage(t *testing.T) {
|
||||
t.Parallel()
|
||||
c := qt.New(t)
|
||||
pages := createSortTestPages(10)
|
||||
for i, p := range pages {
|
||||
p.(*testPage).title = fmt.Sprintf("Title %d", i%2)
|
||||
}
|
||||
|
||||
for _, pages := range []Pages{pages.ByTitle(), pages.ByTitle().Reverse()} {
|
||||
less := isPagesProbablySorted(pages, lessPageTitle)
|
||||
c.Assert(less, qt.Not(qt.IsNil))
|
||||
for i, p := range pages {
|
||||
idx := searchPageBinary(p, pages, less)
|
||||
c.Assert(idx, qt.Equals, i)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func BenchmarkSearchPage(b *testing.B) {
|
||||
type Variant struct {
|
||||
name string
|
||||
preparePages func(pages Pages) Pages
|
||||
search func(p Page, pages Pages) int
|
||||
}
|
||||
|
||||
shufflePages := func(pages Pages) Pages {
|
||||
rand.Shuffle(len(pages), func(i, j int) { pages[i], pages[j] = pages[j], pages[i] })
|
||||
return pages
|
||||
}
|
||||
|
||||
linearSearch := func(p Page, pages Pages) int {
|
||||
return searchPageLinear(p, pages, 0)
|
||||
}
|
||||
|
||||
createPages := func(num int) Pages {
|
||||
pages := createSortTestPages(num)
|
||||
for _, p := range pages {
|
||||
tp := p.(*testPage)
|
||||
tp.weight = rand.Intn(len(pages))
|
||||
tp.title = fmt.Sprintf("Title %d", rand.Intn(len(pages)))
|
||||
|
||||
tp.pubDate = time.Now().Add(time.Duration(rand.Intn(len(pages)/5)) * time.Hour)
|
||||
tp.date = time.Now().Add(time.Duration(rand.Intn(len(pages)/5)) * time.Hour)
|
||||
}
|
||||
|
||||
return pages
|
||||
}
|
||||
|
||||
for _, variant := range []Variant{
|
||||
Variant{"Shuffled", shufflePages, searchPage},
|
||||
Variant{"ByWeight", func(pages Pages) Pages {
|
||||
return pages.ByWeight()
|
||||
}, searchPage},
|
||||
Variant{"ByWeight.Reverse", func(pages Pages) Pages {
|
||||
return pages.ByWeight().Reverse()
|
||||
}, searchPage},
|
||||
Variant{"ByDate", func(pages Pages) Pages {
|
||||
return pages.ByDate()
|
||||
}, searchPage},
|
||||
Variant{"ByPublishDate", func(pages Pages) Pages {
|
||||
return pages.ByPublishDate()
|
||||
}, searchPage},
|
||||
Variant{"ByTitle", func(pages Pages) Pages {
|
||||
return pages.ByTitle()
|
||||
}, searchPage},
|
||||
Variant{"ByTitle Linear", func(pages Pages) Pages {
|
||||
return pages.ByTitle()
|
||||
}, linearSearch},
|
||||
} {
|
||||
for _, numPages := range []int{100, 500, 1000, 5000} {
|
||||
b.Run(fmt.Sprintf("%s-%d", variant.name, numPages), func(b *testing.B) {
|
||||
b.StopTimer()
|
||||
pages := createPages(numPages)
|
||||
if variant.preparePages != nil {
|
||||
pages = variant.preparePages(pages)
|
||||
}
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
j := rand.Intn(numPages)
|
||||
k := variant.search(pages[j], pages)
|
||||
if k != j {
|
||||
b.Fatalf("%d != %d", k, j)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsPagesProbablySorted(t *testing.T) {
|
||||
t.Parallel()
|
||||
c := qt.New(t)
|
||||
|
||||
c.Assert(isPagesProbablySorted(createSortTestPages(6).ByWeight(), DefaultPageSort), qt.Not(qt.IsNil))
|
||||
c.Assert(isPagesProbablySorted(createSortTestPages(300).ByWeight(), DefaultPageSort), qt.Not(qt.IsNil))
|
||||
c.Assert(isPagesProbablySorted(createSortTestPages(6), DefaultPageSort), qt.IsNil)
|
||||
c.Assert(isPagesProbablySorted(createSortTestPages(300).ByTitle(), pageLessFunctions...), qt.Not(qt.IsNil))
|
||||
|
||||
}
|
|
@ -269,6 +269,7 @@ func createSortTestPages(num int) Pages {
|
|||
for i := 0; i < num; i++ {
|
||||
p := newTestPage()
|
||||
p.path = fmt.Sprintf("/x/y/p%d.md", i)
|
||||
p.title = fmt.Sprintf("Title %d", i%(num+1/2))
|
||||
p.params = map[string]interface{}{
|
||||
"arbitrarily": map[string]interface{}{
|
||||
"nested": ("xyz" + fmt.Sprintf("%v", 100-i)),
|
||||
|
|
|
@ -310,6 +310,9 @@ func (p *testPage) Len() int {
|
|||
|
||||
func (p *testPage) LinkTitle() string {
|
||||
if p.linkTitle == "" {
|
||||
if p.title == "" {
|
||||
return p.path
|
||||
}
|
||||
return p.title
|
||||
}
|
||||
return p.linkTitle
|
||||
|
|
|
@ -99,7 +99,7 @@ func (wp WeightedPages) Pages() Pages {
|
|||
// this weighted page set.
|
||||
func (wp WeightedPages) Next(cur Page) Page {
|
||||
for x, c := range wp {
|
||||
if c.Page == cur {
|
||||
if c.Page.Eq(cur) {
|
||||
if x == 0 {
|
||||
return nil
|
||||
}
|
||||
|
@ -113,7 +113,7 @@ func (wp WeightedPages) Next(cur Page) Page {
|
|||
// this weighted page set.
|
||||
func (wp WeightedPages) Prev(cur Page) Page {
|
||||
for x, c := range wp {
|
||||
if c.Page == cur {
|
||||
if c.Page.Eq(cur) {
|
||||
if x < len(wp)-1 {
|
||||
return wp[x+1].Page
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue