From 653e6856ea1cfc60cc16733807d23b302dbe4bd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Fri, 11 Oct 2019 13:55:46 +0200 Subject: [PATCH] resources/page: Use binary search in Pages.Prev/Next if possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is obviously much faster for lager data sets: ```bash name old time/op new time/op delta SearchPage/ByWeight-100-4 267ns ± 4% 272ns ± 5% ~ (p=0.457 n=4+4) SearchPage/ByWeight-5000-4 10.8µs ± 3% 1.2µs ± 2% -88.99% (p=0.029 n=4+4) SearchPage/ByWeight-10000-4 21.1µs ± 1% 1.4µs ±11% -93.28% (p=0.029 n=4+4) ``` See #4500 --- hugolib/pages_test.go | 2 +- resources/page/pages_prev_next.go | 27 ++--- resources/page/pages_sort.go | 131 ++++++++++++----------- resources/page/pages_sort_search.go | 126 ++++++++++++++++++++++ resources/page/pages_sort_search_test.go | 124 +++++++++++++++++++++ resources/page/pages_sort_test.go | 1 + resources/page/testhelpers_test.go | 3 + resources/page/weighted.go | 4 +- 8 files changed, 335 insertions(+), 83 deletions(-) create mode 100644 resources/page/pages_sort_search.go create mode 100644 resources/page/pages_sort_search_test.go diff --git a/hugolib/pages_test.go b/hugolib/pages_test.go index 371ffe8f2..6a371b421 100644 --- a/hugolib/pages_test.go +++ b/hugolib/pages_test.go @@ -63,7 +63,7 @@ func BenchmarkPagesPrevNext(b *testing.B) { Variant{"Pages.Shuffled.Prev", shufflePages, func(p page.Page, pages page.Pages) { pages.Prev(p) }}, Variant{"Pages.ByTitle.Next", func(pages page.Pages) page.Pages { return pages.ByTitle() }, func(p page.Page, pages page.Pages) { pages.Next(p) }}, } { - for _, numPages := range []int{100, 300, 900, 5000} { + for _, numPages := range []int{300, 5000} { b.Run(fmt.Sprintf("%s-pages-%d", variant.name, numPages), func(b *testing.B) { b.StopTimer() builder := newPagesPrevNextTestSite(b, numPages) diff --git a/resources/page/pages_prev_next.go b/resources/page/pages_prev_next.go index dd87aa4ce..dd767c667 100644 --- a/resources/page/pages_prev_next.go +++ b/resources/page/pages_prev_next.go @@ -15,26 +15,21 @@ package page // Next returns the next page reletive to the given func (p Pages) Next(cur Page) Page { - for x, c := range p { - if c.Eq(cur) { - if x == 0 { - return nil - } - return p[x-1] - } + x := searchPage(cur, p) + if x <= 0 { + return nil } - return nil + return p[x-1] } // Prev returns the previous page reletive to the given func (p Pages) Prev(cur Page) Page { - for x, c := range p { - if c.Eq(cur) { - if x < len(p)-1 { - return p[x+1] - } - return nil - } + x := searchPage(cur, p) + + if x == -1 || len(p)-x < 2 { + return nil } - return nil + + return p[x+1] + } diff --git a/resources/page/pages_sort.go b/resources/page/pages_sort.go index b32d56ed8..b0e7f8e32 100644 --- a/resources/page/pages_sort.go +++ b/resources/page/pages_sort.go @@ -46,59 +46,78 @@ func (by pageBy) Sort(pages Pages) { sort.Stable(ps) } -// DefaultPageSort is the default sort func for pages in Hugo: -// Order by Weight, Date, LinkTitle and then full file path. -var DefaultPageSort = func(p1, p2 Page) bool { - if p1.Weight() == p2.Weight() { - if p1.Date().Unix() == p2.Date().Unix() { - c := compare.Strings(p1.LinkTitle(), p2.LinkTitle()) - if c == 0 { - if p1.File().IsZero() || p2.File().IsZero() { - return p1.File().IsZero() - } - return compare.LessStrings(p1.File().Filename(), p2.File().Filename()) - } - return c < 0 - } - return p1.Date().Unix() > p2.Date().Unix() - } +var ( - if p2.Weight() == 0 { - return true - } - - if p1.Weight() == 0 { - return false - } - - return p1.Weight() < p2.Weight() -} - -var languagePageSort = func(p1, p2 Page) bool { - - if p1.Language().Weight == p2.Language().Weight { - if p1.Date().Unix() == p2.Date().Unix() { - c := compare.Strings(p1.LinkTitle(), p2.LinkTitle()) - if c == 0 { - if !p1.File().IsZero() && !p2.File().IsZero() { + // DefaultPageSort is the default sort func for pages in Hugo: + // Order by Weight, Date, LinkTitle and then full file path. + DefaultPageSort = func(p1, p2 Page) bool { + if p1.Weight() == p2.Weight() { + if p1.Date().Unix() == p2.Date().Unix() { + c := compare.Strings(p1.LinkTitle(), p2.LinkTitle()) + if c == 0 { + if p1.File().IsZero() || p2.File().IsZero() { + return p1.File().IsZero() + } return compare.LessStrings(p1.File().Filename(), p2.File().Filename()) } + return c < 0 } - return c < 0 + return p1.Date().Unix() > p2.Date().Unix() } - return p1.Date().Unix() > p2.Date().Unix() + + if p2.Weight() == 0 { + return true + } + + if p1.Weight() == 0 { + return false + } + + return p1.Weight() < p2.Weight() } - if p2.Language().Weight == 0 { - return true + lessPageLanguage = func(p1, p2 Page) bool { + + if p1.Language().Weight == p2.Language().Weight { + if p1.Date().Unix() == p2.Date().Unix() { + c := compare.Strings(p1.LinkTitle(), p2.LinkTitle()) + if c == 0 { + if !p1.File().IsZero() && !p2.File().IsZero() { + return compare.LessStrings(p1.File().Filename(), p2.File().Filename()) + } + } + return c < 0 + } + return p1.Date().Unix() > p2.Date().Unix() + } + + if p2.Language().Weight == 0 { + return true + } + + if p1.Language().Weight == 0 { + return false + } + + return p1.Language().Weight < p2.Language().Weight } - if p1.Language().Weight == 0 { - return false + lessPageTitle = func(p1, p2 Page) bool { + return compare.LessStrings(p1.Title(), p2.Title()) } - return p1.Language().Weight < p2.Language().Weight -} + lessPageLinkTitle = func(p1, p2 Page) bool { + return compare.LessStrings(p1.LinkTitle(), p2.LinkTitle()) + } + + lessPageDate = func(p1, p2 Page) bool { + return p1.Date().Unix() < p2.Date().Unix() + } + + lessPagePubDate = func(p1, p2 Page) bool { + return p1.PublishDate().Unix() < p2.PublishDate().Unix() + } +) func (ps *pageSorter) Len() int { return len(ps.pages) } func (ps *pageSorter) Swap(i, j int) { ps.pages[i], ps.pages[j] = ps.pages[j], ps.pages[i] } @@ -139,11 +158,7 @@ func (p Pages) ByTitle() Pages { const key = "pageSort.ByTitle" - title := func(p1, p2 Page) bool { - return compare.LessStrings(p1.Title(), p2.Title()) - } - - pages, _ := spc.get(key, pageBy(title).Sort, p) + pages, _ := spc.get(key, pageBy(lessPageTitle).Sort, p) return pages } @@ -156,11 +171,7 @@ func (p Pages) ByLinkTitle() Pages { const key = "pageSort.ByLinkTitle" - linkTitle := func(p1, p2 Page) bool { - return compare.LessStrings(p1.LinkTitle(), p2.LinkTitle()) - } - - pages, _ := spc.get(key, pageBy(linkTitle).Sort, p) + pages, _ := spc.get(key, pageBy(lessPageLinkTitle).Sort, p) return pages } @@ -174,11 +185,7 @@ func (p Pages) ByDate() Pages { const key = "pageSort.ByDate" - date := func(p1, p2 Page) bool { - return p1.Date().Unix() < p2.Date().Unix() - } - - pages, _ := spc.get(key, pageBy(date).Sort, p) + pages, _ := spc.get(key, pageBy(lessPageDate).Sort, p) return pages } @@ -192,11 +199,7 @@ func (p Pages) ByPublishDate() Pages { const key = "pageSort.ByPublishDate" - pubDate := func(p1, p2 Page) bool { - return p1.PublishDate().Unix() < p2.PublishDate().Unix() - } - - pages, _ := spc.get(key, pageBy(pubDate).Sort, p) + pages, _ := spc.get(key, pageBy(lessPagePubDate).Sort, p) return pages } @@ -276,14 +279,14 @@ func (p Pages) ByLanguage() Pages { const key = "pageSort.ByLanguage" - pages, _ := spc.get(key, pageBy(languagePageSort).Sort, p) + pages, _ := spc.get(key, pageBy(lessPageLanguage).Sort, p) return pages } // SortByLanguage sorts the pages by language. func SortByLanguage(pages Pages) { - pageBy(languagePageSort).Sort(pages) + pageBy(lessPageLanguage).Sort(pages) } // Reverse reverses the order in Pages and returns a copy. diff --git a/resources/page/pages_sort_search.go b/resources/page/pages_sort_search.go new file mode 100644 index 000000000..ff44e42d5 --- /dev/null +++ b/resources/page/pages_sort_search.go @@ -0,0 +1,126 @@ +// Copyright 2019 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package page + +import "sort" + +// Used in page binary search, the most common in front. +var pageLessFunctions = []func(p1, p2 Page) bool{ + DefaultPageSort, + lessPageDate, + lessPagePubDate, + lessPageTitle, + lessPageLinkTitle, +} + +func searchPage(p Page, pages Pages) int { + if len(pages) < 1000 { + // For smaller data sets, doing a linear search is faster. + return searchPageLinear(p, pages, 0) + } + + less := isPagesProbablySorted(pages, pageLessFunctions...) + if less == nil { + return searchPageLinear(p, pages, 0) + } + + i := searchPageBinary(p, pages, less) + if i != -1 { + return i + } + + return searchPageLinear(p, pages, 0) +} + +func searchPageLinear(p Page, pages Pages, start int) int { + for i := start; i < len(pages); i++ { + c := pages[i] + if c.Eq(p) { + return i + } + } + return -1 +} + +func searchPageBinary(p Page, pages Pages, less func(p1, p2 Page) bool) int { + n := len(pages) + + f := func(i int) bool { + c := pages[i] + isLess := less(c, p) + return !isLess || c.Eq(p) + } + + i := sort.Search(n, f) + + if i == n { + return -1 + } + + return searchPageLinear(p, pages, i) + +} + +// isProbablySorted tests if the pages slice is probably sorted. +func isPagesProbablySorted(pages Pages, lessFuncs ...func(p1, p2 Page) bool) func(p1, p2 Page) bool { + n := len(pages) + step := 1 + if n > 500 { + step = 50 + } + + is := func(less func(p1, p2 Page) bool) bool { + samples := 0 + + for i := n - 1; i > 0; i = i - step { + if less(pages[i], pages[i-1]) { + return false + } + samples++ + if samples >= 15 { + return true + } + } + return samples > 0 + } + + isReverse := func(less func(p1, p2 Page) bool) bool { + samples := 0 + + for i := 0; i < n-1; i = i + step { + if less(pages[i], pages[i+1]) { + return false + } + samples++ + + if samples > 15 { + return true + } + } + return samples > 0 + } + + for _, less := range lessFuncs { + if is(less) { + return less + } + if isReverse(less) { + return func(p1, p2 Page) bool { + return less(p2, p1) + } + } + } + + return nil +} diff --git a/resources/page/pages_sort_search_test.go b/resources/page/pages_sort_search_test.go new file mode 100644 index 000000000..6cc4ed5ea --- /dev/null +++ b/resources/page/pages_sort_search_test.go @@ -0,0 +1,124 @@ +// Copyright 2019 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package page + +import ( + "fmt" + "math/rand" + "testing" + "time" + + qt "github.com/frankban/quicktest" +) + +func TestSearchPage(t *testing.T) { + t.Parallel() + c := qt.New(t) + pages := createSortTestPages(10) + for i, p := range pages { + p.(*testPage).title = fmt.Sprintf("Title %d", i%2) + } + + for _, pages := range []Pages{pages.ByTitle(), pages.ByTitle().Reverse()} { + less := isPagesProbablySorted(pages, lessPageTitle) + c.Assert(less, qt.Not(qt.IsNil)) + for i, p := range pages { + idx := searchPageBinary(p, pages, less) + c.Assert(idx, qt.Equals, i) + } + } + +} + +func BenchmarkSearchPage(b *testing.B) { + type Variant struct { + name string + preparePages func(pages Pages) Pages + search func(p Page, pages Pages) int + } + + shufflePages := func(pages Pages) Pages { + rand.Shuffle(len(pages), func(i, j int) { pages[i], pages[j] = pages[j], pages[i] }) + return pages + } + + linearSearch := func(p Page, pages Pages) int { + return searchPageLinear(p, pages, 0) + } + + createPages := func(num int) Pages { + pages := createSortTestPages(num) + for _, p := range pages { + tp := p.(*testPage) + tp.weight = rand.Intn(len(pages)) + tp.title = fmt.Sprintf("Title %d", rand.Intn(len(pages))) + + tp.pubDate = time.Now().Add(time.Duration(rand.Intn(len(pages)/5)) * time.Hour) + tp.date = time.Now().Add(time.Duration(rand.Intn(len(pages)/5)) * time.Hour) + } + + return pages + } + + for _, variant := range []Variant{ + Variant{"Shuffled", shufflePages, searchPage}, + Variant{"ByWeight", func(pages Pages) Pages { + return pages.ByWeight() + }, searchPage}, + Variant{"ByWeight.Reverse", func(pages Pages) Pages { + return pages.ByWeight().Reverse() + }, searchPage}, + Variant{"ByDate", func(pages Pages) Pages { + return pages.ByDate() + }, searchPage}, + Variant{"ByPublishDate", func(pages Pages) Pages { + return pages.ByPublishDate() + }, searchPage}, + Variant{"ByTitle", func(pages Pages) Pages { + return pages.ByTitle() + }, searchPage}, + Variant{"ByTitle Linear", func(pages Pages) Pages { + return pages.ByTitle() + }, linearSearch}, + } { + for _, numPages := range []int{100, 500, 1000, 5000} { + b.Run(fmt.Sprintf("%s-%d", variant.name, numPages), func(b *testing.B) { + b.StopTimer() + pages := createPages(numPages) + if variant.preparePages != nil { + pages = variant.preparePages(pages) + } + b.StartTimer() + for i := 0; i < b.N; i++ { + j := rand.Intn(numPages) + k := variant.search(pages[j], pages) + if k != j { + b.Fatalf("%d != %d", k, j) + } + } + }) + } + } +} + +func TestIsPagesProbablySorted(t *testing.T) { + t.Parallel() + c := qt.New(t) + + c.Assert(isPagesProbablySorted(createSortTestPages(6).ByWeight(), DefaultPageSort), qt.Not(qt.IsNil)) + c.Assert(isPagesProbablySorted(createSortTestPages(300).ByWeight(), DefaultPageSort), qt.Not(qt.IsNil)) + c.Assert(isPagesProbablySorted(createSortTestPages(6), DefaultPageSort), qt.IsNil) + c.Assert(isPagesProbablySorted(createSortTestPages(300).ByTitle(), pageLessFunctions...), qt.Not(qt.IsNil)) + +} diff --git a/resources/page/pages_sort_test.go b/resources/page/pages_sort_test.go index c9eb37c06..670abb90a 100644 --- a/resources/page/pages_sort_test.go +++ b/resources/page/pages_sort_test.go @@ -269,6 +269,7 @@ func createSortTestPages(num int) Pages { for i := 0; i < num; i++ { p := newTestPage() p.path = fmt.Sprintf("/x/y/p%d.md", i) + p.title = fmt.Sprintf("Title %d", i%(num+1/2)) p.params = map[string]interface{}{ "arbitrarily": map[string]interface{}{ "nested": ("xyz" + fmt.Sprintf("%v", 100-i)), diff --git a/resources/page/testhelpers_test.go b/resources/page/testhelpers_test.go index e861c1375..560166b0b 100644 --- a/resources/page/testhelpers_test.go +++ b/resources/page/testhelpers_test.go @@ -310,6 +310,9 @@ func (p *testPage) Len() int { func (p *testPage) LinkTitle() string { if p.linkTitle == "" { + if p.title == "" { + return p.path + } return p.title } return p.linkTitle diff --git a/resources/page/weighted.go b/resources/page/weighted.go index b2fc8b67b..7e5e25451 100644 --- a/resources/page/weighted.go +++ b/resources/page/weighted.go @@ -99,7 +99,7 @@ func (wp WeightedPages) Pages() Pages { // this weighted page set. func (wp WeightedPages) Next(cur Page) Page { for x, c := range wp { - if c.Page == cur { + if c.Page.Eq(cur) { if x == 0 { return nil } @@ -113,7 +113,7 @@ func (wp WeightedPages) Next(cur Page) Page { // this weighted page set. func (wp WeightedPages) Prev(cur Page) Page { for x, c := range wp { - if c.Page == cur { + if c.Page.Eq(cur) { if x < len(wp)-1 { return wp[x+1].Page }