resources/page: Use binary search in Pages.Prev/Next if possible

This is obviously much faster for lager data sets:

```bash
name                         old time/op    new time/op    delta
SearchPage/ByWeight-100-4       267ns ± 4%     272ns ± 5%     ~     (p=0.457 n=4+4)
SearchPage/ByWeight-5000-4     10.8µs ± 3%     1.2µs ± 2%  -88.99%  (p=0.029 n=4+4)
SearchPage/ByWeight-10000-4    21.1µs ± 1%     1.4µs ±11%  -93.28%  (p=0.029 n=4+4)
```

See #4500
This commit is contained in:
Bjørn Erik Pedersen 2019-10-11 13:55:46 +02:00
parent f4f566edf4
commit 653e6856ea
8 changed files with 335 additions and 83 deletions

View file

@ -63,7 +63,7 @@ func BenchmarkPagesPrevNext(b *testing.B) {
Variant{"Pages.Shuffled.Prev", shufflePages, func(p page.Page, pages page.Pages) { pages.Prev(p) }},
Variant{"Pages.ByTitle.Next", func(pages page.Pages) page.Pages { return pages.ByTitle() }, func(p page.Page, pages page.Pages) { pages.Next(p) }},
} {
for _, numPages := range []int{100, 300, 900, 5000} {
for _, numPages := range []int{300, 5000} {
b.Run(fmt.Sprintf("%s-pages-%d", variant.name, numPages), func(b *testing.B) {
b.StopTimer()
builder := newPagesPrevNextTestSite(b, numPages)

View file

@ -15,26 +15,21 @@ package page
// Next returns the next page reletive to the given
func (p Pages) Next(cur Page) Page {
for x, c := range p {
if c.Eq(cur) {
if x == 0 {
return nil
}
return p[x-1]
}
x := searchPage(cur, p)
if x <= 0 {
return nil
}
return nil
return p[x-1]
}
// Prev returns the previous page reletive to the given
func (p Pages) Prev(cur Page) Page {
for x, c := range p {
if c.Eq(cur) {
if x < len(p)-1 {
return p[x+1]
}
return nil
}
x := searchPage(cur, p)
if x == -1 || len(p)-x < 2 {
return nil
}
return nil
return p[x+1]
}

View file

@ -46,59 +46,78 @@ func (by pageBy) Sort(pages Pages) {
sort.Stable(ps)
}
// DefaultPageSort is the default sort func for pages in Hugo:
// Order by Weight, Date, LinkTitle and then full file path.
var DefaultPageSort = func(p1, p2 Page) bool {
if p1.Weight() == p2.Weight() {
if p1.Date().Unix() == p2.Date().Unix() {
c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
if c == 0 {
if p1.File().IsZero() || p2.File().IsZero() {
return p1.File().IsZero()
}
return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
}
return c < 0
}
return p1.Date().Unix() > p2.Date().Unix()
}
var (
if p2.Weight() == 0 {
return true
}
if p1.Weight() == 0 {
return false
}
return p1.Weight() < p2.Weight()
}
var languagePageSort = func(p1, p2 Page) bool {
if p1.Language().Weight == p2.Language().Weight {
if p1.Date().Unix() == p2.Date().Unix() {
c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
if c == 0 {
if !p1.File().IsZero() && !p2.File().IsZero() {
// DefaultPageSort is the default sort func for pages in Hugo:
// Order by Weight, Date, LinkTitle and then full file path.
DefaultPageSort = func(p1, p2 Page) bool {
if p1.Weight() == p2.Weight() {
if p1.Date().Unix() == p2.Date().Unix() {
c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
if c == 0 {
if p1.File().IsZero() || p2.File().IsZero() {
return p1.File().IsZero()
}
return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
}
return c < 0
}
return c < 0
return p1.Date().Unix() > p2.Date().Unix()
}
return p1.Date().Unix() > p2.Date().Unix()
if p2.Weight() == 0 {
return true
}
if p1.Weight() == 0 {
return false
}
return p1.Weight() < p2.Weight()
}
if p2.Language().Weight == 0 {
return true
lessPageLanguage = func(p1, p2 Page) bool {
if p1.Language().Weight == p2.Language().Weight {
if p1.Date().Unix() == p2.Date().Unix() {
c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
if c == 0 {
if !p1.File().IsZero() && !p2.File().IsZero() {
return compare.LessStrings(p1.File().Filename(), p2.File().Filename())
}
}
return c < 0
}
return p1.Date().Unix() > p2.Date().Unix()
}
if p2.Language().Weight == 0 {
return true
}
if p1.Language().Weight == 0 {
return false
}
return p1.Language().Weight < p2.Language().Weight
}
if p1.Language().Weight == 0 {
return false
lessPageTitle = func(p1, p2 Page) bool {
return compare.LessStrings(p1.Title(), p2.Title())
}
return p1.Language().Weight < p2.Language().Weight
}
lessPageLinkTitle = func(p1, p2 Page) bool {
return compare.LessStrings(p1.LinkTitle(), p2.LinkTitle())
}
lessPageDate = func(p1, p2 Page) bool {
return p1.Date().Unix() < p2.Date().Unix()
}
lessPagePubDate = func(p1, p2 Page) bool {
return p1.PublishDate().Unix() < p2.PublishDate().Unix()
}
)
func (ps *pageSorter) Len() int { return len(ps.pages) }
func (ps *pageSorter) Swap(i, j int) { ps.pages[i], ps.pages[j] = ps.pages[j], ps.pages[i] }
@ -139,11 +158,7 @@ func (p Pages) ByTitle() Pages {
const key = "pageSort.ByTitle"
title := func(p1, p2 Page) bool {
return compare.LessStrings(p1.Title(), p2.Title())
}
pages, _ := spc.get(key, pageBy(title).Sort, p)
pages, _ := spc.get(key, pageBy(lessPageTitle).Sort, p)
return pages
}
@ -156,11 +171,7 @@ func (p Pages) ByLinkTitle() Pages {
const key = "pageSort.ByLinkTitle"
linkTitle := func(p1, p2 Page) bool {
return compare.LessStrings(p1.LinkTitle(), p2.LinkTitle())
}
pages, _ := spc.get(key, pageBy(linkTitle).Sort, p)
pages, _ := spc.get(key, pageBy(lessPageLinkTitle).Sort, p)
return pages
}
@ -174,11 +185,7 @@ func (p Pages) ByDate() Pages {
const key = "pageSort.ByDate"
date := func(p1, p2 Page) bool {
return p1.Date().Unix() < p2.Date().Unix()
}
pages, _ := spc.get(key, pageBy(date).Sort, p)
pages, _ := spc.get(key, pageBy(lessPageDate).Sort, p)
return pages
}
@ -192,11 +199,7 @@ func (p Pages) ByPublishDate() Pages {
const key = "pageSort.ByPublishDate"
pubDate := func(p1, p2 Page) bool {
return p1.PublishDate().Unix() < p2.PublishDate().Unix()
}
pages, _ := spc.get(key, pageBy(pubDate).Sort, p)
pages, _ := spc.get(key, pageBy(lessPagePubDate).Sort, p)
return pages
}
@ -276,14 +279,14 @@ func (p Pages) ByLanguage() Pages {
const key = "pageSort.ByLanguage"
pages, _ := spc.get(key, pageBy(languagePageSort).Sort, p)
pages, _ := spc.get(key, pageBy(lessPageLanguage).Sort, p)
return pages
}
// SortByLanguage sorts the pages by language.
func SortByLanguage(pages Pages) {
pageBy(languagePageSort).Sort(pages)
pageBy(lessPageLanguage).Sort(pages)
}
// Reverse reverses the order in Pages and returns a copy.

View file

@ -0,0 +1,126 @@
// Copyright 2019 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package page
import "sort"
// Used in page binary search, the most common in front.
var pageLessFunctions = []func(p1, p2 Page) bool{
DefaultPageSort,
lessPageDate,
lessPagePubDate,
lessPageTitle,
lessPageLinkTitle,
}
func searchPage(p Page, pages Pages) int {
if len(pages) < 1000 {
// For smaller data sets, doing a linear search is faster.
return searchPageLinear(p, pages, 0)
}
less := isPagesProbablySorted(pages, pageLessFunctions...)
if less == nil {
return searchPageLinear(p, pages, 0)
}
i := searchPageBinary(p, pages, less)
if i != -1 {
return i
}
return searchPageLinear(p, pages, 0)
}
func searchPageLinear(p Page, pages Pages, start int) int {
for i := start; i < len(pages); i++ {
c := pages[i]
if c.Eq(p) {
return i
}
}
return -1
}
func searchPageBinary(p Page, pages Pages, less func(p1, p2 Page) bool) int {
n := len(pages)
f := func(i int) bool {
c := pages[i]
isLess := less(c, p)
return !isLess || c.Eq(p)
}
i := sort.Search(n, f)
if i == n {
return -1
}
return searchPageLinear(p, pages, i)
}
// isProbablySorted tests if the pages slice is probably sorted.
func isPagesProbablySorted(pages Pages, lessFuncs ...func(p1, p2 Page) bool) func(p1, p2 Page) bool {
n := len(pages)
step := 1
if n > 500 {
step = 50
}
is := func(less func(p1, p2 Page) bool) bool {
samples := 0
for i := n - 1; i > 0; i = i - step {
if less(pages[i], pages[i-1]) {
return false
}
samples++
if samples >= 15 {
return true
}
}
return samples > 0
}
isReverse := func(less func(p1, p2 Page) bool) bool {
samples := 0
for i := 0; i < n-1; i = i + step {
if less(pages[i], pages[i+1]) {
return false
}
samples++
if samples > 15 {
return true
}
}
return samples > 0
}
for _, less := range lessFuncs {
if is(less) {
return less
}
if isReverse(less) {
return func(p1, p2 Page) bool {
return less(p2, p1)
}
}
}
return nil
}

View file

@ -0,0 +1,124 @@
// Copyright 2019 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package page
import (
"fmt"
"math/rand"
"testing"
"time"
qt "github.com/frankban/quicktest"
)
func TestSearchPage(t *testing.T) {
t.Parallel()
c := qt.New(t)
pages := createSortTestPages(10)
for i, p := range pages {
p.(*testPage).title = fmt.Sprintf("Title %d", i%2)
}
for _, pages := range []Pages{pages.ByTitle(), pages.ByTitle().Reverse()} {
less := isPagesProbablySorted(pages, lessPageTitle)
c.Assert(less, qt.Not(qt.IsNil))
for i, p := range pages {
idx := searchPageBinary(p, pages, less)
c.Assert(idx, qt.Equals, i)
}
}
}
func BenchmarkSearchPage(b *testing.B) {
type Variant struct {
name string
preparePages func(pages Pages) Pages
search func(p Page, pages Pages) int
}
shufflePages := func(pages Pages) Pages {
rand.Shuffle(len(pages), func(i, j int) { pages[i], pages[j] = pages[j], pages[i] })
return pages
}
linearSearch := func(p Page, pages Pages) int {
return searchPageLinear(p, pages, 0)
}
createPages := func(num int) Pages {
pages := createSortTestPages(num)
for _, p := range pages {
tp := p.(*testPage)
tp.weight = rand.Intn(len(pages))
tp.title = fmt.Sprintf("Title %d", rand.Intn(len(pages)))
tp.pubDate = time.Now().Add(time.Duration(rand.Intn(len(pages)/5)) * time.Hour)
tp.date = time.Now().Add(time.Duration(rand.Intn(len(pages)/5)) * time.Hour)
}
return pages
}
for _, variant := range []Variant{
Variant{"Shuffled", shufflePages, searchPage},
Variant{"ByWeight", func(pages Pages) Pages {
return pages.ByWeight()
}, searchPage},
Variant{"ByWeight.Reverse", func(pages Pages) Pages {
return pages.ByWeight().Reverse()
}, searchPage},
Variant{"ByDate", func(pages Pages) Pages {
return pages.ByDate()
}, searchPage},
Variant{"ByPublishDate", func(pages Pages) Pages {
return pages.ByPublishDate()
}, searchPage},
Variant{"ByTitle", func(pages Pages) Pages {
return pages.ByTitle()
}, searchPage},
Variant{"ByTitle Linear", func(pages Pages) Pages {
return pages.ByTitle()
}, linearSearch},
} {
for _, numPages := range []int{100, 500, 1000, 5000} {
b.Run(fmt.Sprintf("%s-%d", variant.name, numPages), func(b *testing.B) {
b.StopTimer()
pages := createPages(numPages)
if variant.preparePages != nil {
pages = variant.preparePages(pages)
}
b.StartTimer()
for i := 0; i < b.N; i++ {
j := rand.Intn(numPages)
k := variant.search(pages[j], pages)
if k != j {
b.Fatalf("%d != %d", k, j)
}
}
})
}
}
}
func TestIsPagesProbablySorted(t *testing.T) {
t.Parallel()
c := qt.New(t)
c.Assert(isPagesProbablySorted(createSortTestPages(6).ByWeight(), DefaultPageSort), qt.Not(qt.IsNil))
c.Assert(isPagesProbablySorted(createSortTestPages(300).ByWeight(), DefaultPageSort), qt.Not(qt.IsNil))
c.Assert(isPagesProbablySorted(createSortTestPages(6), DefaultPageSort), qt.IsNil)
c.Assert(isPagesProbablySorted(createSortTestPages(300).ByTitle(), pageLessFunctions...), qt.Not(qt.IsNil))
}

View file

@ -269,6 +269,7 @@ func createSortTestPages(num int) Pages {
for i := 0; i < num; i++ {
p := newTestPage()
p.path = fmt.Sprintf("/x/y/p%d.md", i)
p.title = fmt.Sprintf("Title %d", i%(num+1/2))
p.params = map[string]interface{}{
"arbitrarily": map[string]interface{}{
"nested": ("xyz" + fmt.Sprintf("%v", 100-i)),

View file

@ -310,6 +310,9 @@ func (p *testPage) Len() int {
func (p *testPage) LinkTitle() string {
if p.linkTitle == "" {
if p.title == "" {
return p.path
}
return p.title
}
return p.linkTitle

View file

@ -99,7 +99,7 @@ func (wp WeightedPages) Pages() Pages {
// this weighted page set.
func (wp WeightedPages) Next(cur Page) Page {
for x, c := range wp {
if c.Page == cur {
if c.Page.Eq(cur) {
if x == 0 {
return nil
}
@ -113,7 +113,7 @@ func (wp WeightedPages) Next(cur Page) Page {
// this weighted page set.
func (wp WeightedPages) Prev(cur Page) Page {
for x, c := range wp {
if c.Page == cur {
if c.Page.Eq(cur) {
if x < len(wp)-1 {
return wp[x+1].Page
}