Make string sorting (e.g. ByTitle, ByLinkTitle and ByParam) language aware

Fixes #2180
This commit is contained in:
Bjørn Erik Pedersen 2022-04-10 20:30:52 +02:00
parent 82ba634ed9
commit 627eed1d62
No known key found for this signature in database
GPG key ID: 330E6E2BD4859D8F
11 changed files with 297 additions and 31 deletions

View file

@ -739,7 +739,12 @@ func (s *SiteInfo) Sites() page.Sites {
}
// Current returns the currently rendered Site.
// If that isn't set yet, which is the situation before we start rendering,
// if will return the Site itself.
func (s *SiteInfo) Current() page.Site {
if s.s.h.currentSite == nil {
return s
}
return s.s.h.currentSite.Info
}

View file

@ -18,6 +18,7 @@ import (
"sort"
"github.com/gohugoio/hugo/compare"
"github.com/gohugoio/hugo/langs"
"github.com/gohugoio/hugo/resources/page"
)
@ -40,6 +41,15 @@ type Taxonomy map[string]page.WeightedPages
// Important because you can't order a map.
type OrderedTaxonomy []OrderedTaxonomyEntry
// getOneOPage returns one page in the taxonomy,
// nil if there is none.
func (t OrderedTaxonomy) getOneOPage() page.Page {
if len(t) == 0 {
return nil
}
return t[0].Pages()[0]
}
// OrderedTaxonomyEntry is similar to an element of a Taxonomy, but with the key embedded (as name)
// e.g: {Name: Technology, page.WeightedPages: TaxonomyPages}
type OrderedTaxonomyEntry struct {
@ -72,11 +82,18 @@ func (i Taxonomy) TaxonomyArray() OrderedTaxonomy {
// Alphabetical returns an ordered taxonomy sorted by key name.
func (i Taxonomy) Alphabetical() OrderedTaxonomy {
name := func(i1, i2 *OrderedTaxonomyEntry) bool {
return compare.LessStrings(i1.Name, i2.Name)
}
ia := i.TaxonomyArray()
p := ia.getOneOPage()
if p == nil {
return ia
}
currentSite := p.Site().Current()
coll := langs.GetCollator(currentSite.Language())
coll.Lock()
defer coll.Unlock()
name := func(i1, i2 *OrderedTaxonomyEntry) bool {
return coll.CompareStrings(i1.Name, i2.Name) < 0
}
oiBy(name).Sort(ia)
return ia
}

View file

@ -19,6 +19,9 @@ import (
"sync"
"time"
"golang.org/x/text/collate"
"golang.org/x/text/language"
"github.com/pkg/errors"
"github.com/gohugoio/hugo/common/htime"
@ -80,8 +83,9 @@ type Language struct {
// TODO(bep) do the same for some of the others.
translator locales.Translator
timeFormatter htime.TimeFormatter
location *time.Location
tag language.Tag
collator *Collator
location *time.Location
// Error during initialization. Will fail the buld.
initErr error
@ -111,6 +115,18 @@ func NewLanguage(lang string, cfg config.Provider) *Language {
}
}
var coll *Collator
tag, err := language.Parse(lang)
if err == nil {
coll = &Collator{
c: collate.New(tag),
}
} else {
coll = &Collator{
c: collate.New(language.English),
}
}
l := &Language{
Lang: lang,
ContentDir: cfg.GetString("contentDir"),
@ -119,6 +135,8 @@ func NewLanguage(lang string, cfg config.Provider) *Language {
params: params,
translator: translator,
timeFormatter: htime.NewTimeFormatter(translator),
tag: tag,
collator: coll,
}
if err := l.loadLocation(cfg.GetString("timeZone")); err != nil {
@ -275,6 +293,10 @@ func GetLocation(l *Language) *time.Location {
return l.location
}
func GetCollator(l *Language) *Collator {
return l.collator
}
func (l *Language) loadLocation(tzStr string) error {
location, err := time.LoadLocation(tzStr)
if err != nil {
@ -284,3 +306,16 @@ func (l *Language) loadLocation(tzStr string) error {
return nil
}
type Collator struct {
sync.Mutex
c *collate.Collator
}
// CompareStrings compares a and b.
// It returns -1 if a < b, 1 if a > b and 0 if a == b.
// Note that the Collator is not thread safe, so you may want
// to aquire a lock on it before calling this method.
func (c *Collator) CompareStrings(a, b string) int {
return c.c.CompareString(a, b)
}

View file

@ -14,10 +14,13 @@
package langs
import (
"sync"
"testing"
qt "github.com/frankban/quicktest"
"github.com/gohugoio/hugo/config"
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
func TestGetGlobalOnlySetting(t *testing.T) {
@ -47,3 +50,59 @@ func TestLanguageParams(t *testing.T) {
c.Assert(lang.Params()["p1"], qt.Equals, "p1p")
c.Assert(lang.Get("p1"), qt.Equals, "p1cfg")
}
func TestCollator(t *testing.T) {
c := qt.New(t)
var wg sync.WaitGroup
coll := &Collator{c: collate.New(language.English, collate.Loose)}
for i := 0; i < 10; i++ {
wg.Add(1)
go func() {
coll.Lock()
defer coll.Unlock()
defer wg.Done()
for j := 0; j < 10; j++ {
k := coll.CompareStrings("abc", "def")
c.Assert(k, qt.Equals, -1)
}
}()
}
wg.Wait()
}
func BenchmarkCollator(b *testing.B) {
s := []string{"foo", "bar", "éntre", "baz", "qux", "quux", "corge", "grault", "garply", "waldo", "fred", "plugh", "xyzzy", "thud"}
doWork := func(coll *Collator) {
for i := 0; i < len(s); i++ {
for j := i + 1; j < len(s); j++ {
_ = coll.CompareStrings(s[i], s[j])
}
}
}
b.Run("Single", func(b *testing.B) {
coll := &Collator{c: collate.New(language.English, collate.Loose)}
for i := 0; i < b.N; i++ {
doWork(coll)
}
})
b.Run("Para", func(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
coll := &Collator{c: collate.New(language.English, collate.Loose)}
for pb.Next() {
coll.Lock()
doWork(coll)
coll.Unlock()
}
})
})
}

View file

@ -70,3 +70,69 @@ date: "2020-02-01"
b.AssertFileContent("public/en/index.html", "0|February, 2020|Pages(1)1|January, 2020|Pages(1)")
b.AssertFileContent("public/fr/index.html", "0|février, 2020|Pages(1)1|janvier, 2020|Pages(1)")
}
func TestPagesSortCollation(t *testing.T) {
files := `
-- config.toml --
defaultContentLanguage = 'en'
defaultContentLanguageInSubdir = true
[languages]
[languages.en]
title = 'My blog'
weight = 1
[languages.fr]
title = 'Mon blogue'
weight = 2
[languages.nn]
title = 'Bloggen min'
weight = 3
-- content/p1.md --
---
title: "zulu"
date: "2020-01-01"
param1: "xylophone"
tags: ["xylophone", "éclair", "zulu", "emma"]
---
-- content/p2.md --
---
title: "émotion"
date: "2020-01-01"
param1: "violin"
---
-- content/p3.md --
---
title: "alpha"
date: "2020-01-01"
param1: "éclair"
---
-- layouts/index.html --
ByTitle: {{ range site.RegularPages.ByTitle }}{{ .Title }}|{{ end }}
ByLinkTitle: {{ range site.RegularPages.ByLinkTitle }}{{ .Title }}|{{ end }}
ByParam: {{ range site.RegularPages.ByParam "param1" }}{{ .Params.param1 }}|{{ end }}
Tags Alphabetical: {{ range site.Taxonomies.tags.Alphabetical }}{{ .Term }}|{{ end }}
GroupBy: {{ range site.RegularPages.GroupBy "Title" }}{{ .Key }}|{{ end }}
{{ with (site.GetPage "p1").Params.tags }}
Sort: {{ sort . }}
ByWeight: {{ range site.RegularPages.ByWeight }}{{ .Title }}|{{ end }}
{{ end }}
`
b := hugolib.NewIntegrationTestBuilder(
hugolib.IntegrationTestConfig{
T: t,
TxtarString: files,
NeedsOsFS: true,
}).Build()
b.AssertFileContent("public/en/index.html", `
ByTitle: alpha|émotion|zulu|
ByLinkTitle: alpha|émotion|zulu|
ByParam: éclair|violin|xylophone
Tags Alphabetical: éclair|emma|xylophone|zulu|
GroupBy: alpha|émotion|zulu|
Sort: [éclair emma xylophone zulu]
ByWeight: alpha|émotion|zulu|
`)
}

View file

@ -53,13 +53,16 @@ type mapKeyByInt struct{ mapKeyValues }
func (s mapKeyByInt) Less(i, j int) bool { return s.mapKeyValues[i].Int() < s.mapKeyValues[j].Int() }
type mapKeyByStr struct{ mapKeyValues }
func (s mapKeyByStr) Less(i, j int) bool {
return compare.LessStrings(s.mapKeyValues[i].String(), s.mapKeyValues[j].String())
type mapKeyByStr struct {
less func(a, b string) bool
mapKeyValues
}
func sortKeys(v []reflect.Value, order string) []reflect.Value {
func (s mapKeyByStr) Less(i, j int) bool {
return s.less(s.mapKeyValues[i].String(), s.mapKeyValues[j].String())
}
func sortKeys(examplePage Page, v []reflect.Value, order string) []reflect.Value {
if len(v) <= 1 {
return v
}
@ -72,10 +75,12 @@ func sortKeys(v []reflect.Value, order string) []reflect.Value {
sort.Sort(mapKeyByInt{v})
}
case reflect.String:
stringLess, close := collatorStringLess(examplePage)
defer close()
if order == "desc" {
sort.Sort(sort.Reverse(mapKeyByStr{v}))
sort.Sort(sort.Reverse(mapKeyByStr{stringLess, v}))
} else {
sort.Sort(mapKeyByStr{v})
sort.Sort(mapKeyByStr{stringLess, v})
}
}
return v
@ -161,7 +166,7 @@ func (p Pages) GroupBy(key string, order ...string) (PagesGroup, error) {
tmp.SetMapIndex(fv, reflect.Append(tmp.MapIndex(fv), ppv))
}
sortedKeys := sortKeys(tmp.MapKeys(), direction)
sortedKeys := sortKeys(p[0], tmp.MapKeys(), direction)
r := make([]PageGroup, len(sortedKeys))
for i, k := range sortedKeys {
r[i] = PageGroup{Key: k.Interface(), Pages: tmp.MapIndex(k).Interface().(Pages)}
@ -213,7 +218,7 @@ func (p Pages) GroupByParam(key string, order ...string) (PagesGroup, error) {
}
var r []PageGroup
for _, k := range sortKeys(tmp.MapKeys(), direction) {
for _, k := range sortKeys(p[0], tmp.MapKeys(), direction) {
r = append(r, PageGroup{Key: k.Interface(), Pages: tmp.MapIndex(k).Interface().(Pages)})
}

View file

@ -17,6 +17,7 @@ import (
"sort"
"github.com/gohugoio/hugo/common/collections"
"github.com/gohugoio/hugo/langs"
"github.com/gohugoio/hugo/resources/resource"
@ -72,7 +73,7 @@ var (
}
if p1.Weight() == p2.Weight() {
if p1.Date().Unix() == p2.Date().Unix() {
c := compare.Strings(p1.LinkTitle(), p2.LinkTitle())
c := collatorStringCompare(func(p Page) string { return p.LinkTitle() }, p1, p2)
if c == 0 {
if p1.File().IsZero() || p2.File().IsZero() {
return p1.File().IsZero()
@ -121,11 +122,11 @@ var (
}
lessPageTitle = func(p1, p2 Page) bool {
return compare.LessStrings(p1.Title(), p2.Title())
return collatorStringCompare(func(p Page) string { return p.Title() }, p1, p2) < 0
}
lessPageLinkTitle = func(p1, p2 Page) bool {
return compare.LessStrings(p1.LinkTitle(), p2.LinkTitle())
return collatorStringCompare(func(p Page) string { return p.LinkTitle() }, p1, p2) < 0
}
lessPageDate = func(p1, p2 Page) bool {
@ -151,6 +152,46 @@ func (p Pages) Limit(n int) Pages {
return p
}
var collatorStringSort = func(getString func(Page) string) func(p Pages) {
return func(p Pages) {
if len(p) == 0 {
return
}
// Pages may be a mix of multiple languages, so we need to use the language
// for the currently rendered Site.
currentSite := p[0].Site().Current()
coll := langs.GetCollator(currentSite.Language())
coll.Lock()
defer coll.Unlock()
sort.SliceStable(p, func(i, j int) bool {
return coll.CompareStrings(getString(p[i]), getString(p[j])) < 0
})
}
}
var collatorStringCompare = func(getString func(Page) string, p1, p2 Page) int {
currentSite := p1.Site().Current()
coll := langs.GetCollator(currentSite.Language())
coll.Lock()
c := coll.CompareStrings(getString(p1), getString(p2))
coll.Unlock()
return c
}
var collatorStringLess = func(p Page) (less func(s1, s2 string) bool, close func()) {
currentSite := p.Site().Current()
coll := langs.GetCollator(currentSite.Language())
coll.Lock()
return func(s1, s2 string) bool {
return coll.CompareStrings(s1, s2) < 1
},
func() {
coll.Unlock()
}
}
// ByWeight sorts the Pages by weight and returns a copy.
//
// Adjacent invocations on the same receiver will return a cached result.
@ -175,7 +216,8 @@ func SortByDefault(pages Pages) {
func (p Pages) ByTitle() Pages {
const key = "pageSort.ByTitle"
pages, _ := spc.get(key, pageBy(lessPageTitle).Sort, p)
pages, _ := spc.get(key, collatorStringSort(func(p Page) string { return p.Title() }), p)
return pages
}
@ -187,7 +229,7 @@ func (p Pages) ByTitle() Pages {
func (p Pages) ByLinkTitle() Pages {
const key = "pageSort.ByLinkTitle"
pages, _ := spc.get(key, pageBy(lessPageLinkTitle).Sort, p)
pages, _ := spc.get(key, collatorStringSort(func(p Page) string { return p.LinkTitle() }), p)
return pages
}
@ -323,9 +365,15 @@ func (p Pages) Reverse() Pages {
//
// This may safely be executed in parallel.
func (p Pages) ByParam(paramsKey any) Pages {
if len(p) < 2 {
return p
}
paramsKeyStr := cast.ToString(paramsKey)
key := "pageSort.ByParam." + paramsKeyStr
stringLess, close := collatorStringLess(p[0])
defer close()
paramsKeyComparator := func(p1, p2 Page) bool {
v1, _ := p1.Param(paramsKeyStr)
v2, _ := p2.Param(paramsKeyStr)
@ -354,7 +402,8 @@ func (p Pages) ByParam(paramsKey any) Pages {
s1 := cast.ToString(v1)
s2 := cast.ToString(v2)
return compare.LessStrings(s1, s2)
return stringLess(s1, s2)
}
pages, _ := spc.get(key, pageBy(paramsKeyComparator).Sort, p)

View file

@ -133,7 +133,9 @@ func TestDelimit(t *testing.T) {
t.Parallel()
c := qt.New(t)
ns := New(&deps.Deps{})
ns := New(&deps.Deps{
Language: langs.NewDefaultLanguage(config.New()),
})
for i, test := range []struct {
seq any

View file

@ -20,6 +20,7 @@ import (
"strings"
"github.com/gohugoio/hugo/common/maps"
"github.com/gohugoio/hugo/langs"
"github.com/gohugoio/hugo/tpl/compare"
"github.com/spf13/cast"
)
@ -47,8 +48,10 @@ func (ns *Namespace) Sort(seq any, args ...any) (any, error) {
return nil, errors.New("can't sort " + reflect.ValueOf(seq).Type().String())
}
collator := langs.GetCollator(ns.deps.Language)
// Create a list of pairs that will be used to do the sort
p := pairList{SortAsc: true, SliceType: sliceType}
p := pairList{Collator: collator, SortAsc: true, SliceType: sliceType}
p.Pairs = make([]pair, seqv.Len())
var sortByField string
@ -124,6 +127,10 @@ func (ns *Namespace) Sort(seq any, args ...any) (any, error) {
}
}
}
collator.Lock()
defer collator.Unlock()
return p.sort(), nil
}
@ -137,6 +144,7 @@ type pair struct {
// A slice of pairs that implements sort.Interface to sort by Value.
type pairList struct {
Collator *langs.Collator
Pairs []pair
SortAsc bool
SliceType reflect.Type
@ -151,16 +159,16 @@ func (p pairList) Less(i, j int) bool {
if iv.IsValid() {
if jv.IsValid() {
// can only call Interface() on valid reflect Values
return sortComp.Lt(iv.Interface(), jv.Interface())
return sortComp.LtCollate(p.Collator, iv.Interface(), jv.Interface())
}
// if j is invalid, test i against i's zero value
return sortComp.Lt(iv.Interface(), reflect.Zero(iv.Type()))
return sortComp.LtCollate(p.Collator, iv.Interface(), reflect.Zero(iv.Type()))
}
if jv.IsValid() {
// if i is invalid, test j against j's zero value
return sortComp.Lt(reflect.Zero(jv.Type()), jv.Interface())
return sortComp.LtCollate(p.Collator, reflect.Zero(jv.Type()), jv.Interface())
}
return false

View file

@ -19,6 +19,8 @@ import (
"testing"
"github.com/gohugoio/hugo/common/maps"
"github.com/gohugoio/hugo/config"
"github.com/gohugoio/hugo/langs"
"github.com/gohugoio/hugo/deps"
)
@ -28,7 +30,9 @@ type stringsSlice []string
func TestSort(t *testing.T) {
t.Parallel()
ns := New(&deps.Deps{})
ns := New(&deps.Deps{
Language: langs.NewDefaultLanguage(config.New()),
})
type ts struct {
MyInt int

View file

@ -21,6 +21,7 @@ import (
"time"
"github.com/gohugoio/hugo/compare"
"github.com/gohugoio/hugo/langs"
"github.com/gohugoio/hugo/common/types"
)
@ -188,10 +189,11 @@ func (n *Namespace) Le(first any, others ...any) bool {
}
// Lt returns the boolean truth of arg1 < arg2 && arg1 < arg3 && arg1 < arg4.
func (n *Namespace) Lt(first any, others ...any) bool {
// The provided collator will be used for string comparisons.
func (n *Namespace) LtCollate(collator *langs.Collator, first any, others ...any) bool {
n.checkComparisonArgCount(1, others...)
for _, other := range others {
left, right := n.compareGet(first, other)
left, right := n.compareGetWithCollator(collator, first, other)
if !(left < right) {
return false
}
@ -199,6 +201,11 @@ func (n *Namespace) Lt(first any, others ...any) bool {
return true
}
// Lt returns the boolean truth of arg1 < arg2 && arg1 < arg3 && arg1 < arg4.
func (n *Namespace) Lt(first any, others ...any) bool {
return n.LtCollate(nil, first, others...)
}
func (n *Namespace) checkComparisonArgCount(min int, others ...any) bool {
if len(others) < min {
panic("missing arguments for comparison")
@ -216,6 +223,10 @@ func (n *Namespace) Conditional(condition bool, a, b any) any {
}
func (ns *Namespace) compareGet(a any, b any) (float64, float64) {
return ns.compareGetWithCollator(nil, a, b)
}
func (ns *Namespace) compareGetWithCollator(collator *langs.Collator, a any, b any) (float64, float64) {
if ac, ok := a.(compare.Comparer); ok {
c := ac.Compare(b)
if c < 0 {
@ -296,8 +307,13 @@ func (ns *Namespace) compareGet(a any, b any) (float64, float64) {
}
}
if ns.caseInsensitive && leftStr != nil && rightStr != nil {
c := compare.Strings(*leftStr, *rightStr)
if (ns.caseInsensitive || collator != nil) && leftStr != nil && rightStr != nil {
var c int
if collator != nil {
c = collator.CompareStrings(*leftStr, *rightStr)
} else {
c = compare.Strings(*leftStr, *rightStr)
}
if c < 0 {
return 0, 1
} else if c > 0 {