mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-07 20:30:36 -05:00
Improve TotalWords counter func
It is obviously more efficient when we do not care about the actual words. ``` BenchmarkTotalWords-4 100000 18795 ns/op 0 B/op 0 allocs/op BenchmarkTotalWordsOld-4 30000 46751 ns/op 6400 B/op 1 allocs/op ```
This commit is contained in:
parent
bcd434794a
commit
4abaec5c04
3 changed files with 56 additions and 10 deletions
|
@ -384,8 +384,25 @@ func RenderBytes(ctx *RenderingContext) []byte {
|
|||
}
|
||||
}
|
||||
|
||||
// TotalWords returns an int of the total number of words in a given content.
|
||||
// TotalWords counts instance of one or more consecutive white space
|
||||
// characters, as defined by unicode.IsSpace, in s.
|
||||
// This is a cheaper way of word counting than the obvious len(strings.Fields(s)).
|
||||
func TotalWords(s string) int {
|
||||
n := 0
|
||||
inWord := false
|
||||
for _, r := range s {
|
||||
wasInWord := inWord
|
||||
inWord = !unicode.IsSpace(r)
|
||||
if inWord && !wasInWord {
|
||||
n++
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// Old implementation only kept for benchmark comparison.
|
||||
// TODO(bep) remove
|
||||
func totalWordsOld(s string) int {
|
||||
return len(strings.Fields(s))
|
||||
}
|
||||
|
||||
|
|
|
@ -408,12 +408,45 @@ func TestExtractNoTOC(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestTotalWords(t *testing.T) {
|
||||
testString := "Two, Words!"
|
||||
actualWordCount := TotalWords(testString)
|
||||
var totalWordsBenchmarkString = strings.Repeat("Hugo Rocks ", 200)
|
||||
|
||||
if actualWordCount != 2 {
|
||||
t.Errorf("Actual word count (%d) for test string (%s) did not match 2.", actualWordCount, testString)
|
||||
func TestTotalWords(t *testing.T) {
|
||||
|
||||
for i, this := range []struct {
|
||||
s string
|
||||
words int
|
||||
}{
|
||||
{"Two, Words!", 2},
|
||||
{"Word", 1},
|
||||
{"", 0},
|
||||
{"One, Two, Three", 3},
|
||||
{totalWordsBenchmarkString, 400},
|
||||
} {
|
||||
actualWordCount := TotalWords(this.s)
|
||||
|
||||
if actualWordCount != this.words {
|
||||
t.Errorf("[%d] Actual word count (%d) for test string (%s) did not match %d", i, actualWordCount, this.s, this.words)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkTotalWords(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
wordCount := TotalWords(totalWordsBenchmarkString)
|
||||
if wordCount != 400 {
|
||||
b.Fatal("Wordcount error")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkTotalWordsOld(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
wordCount := totalWordsOld(totalWordsBenchmarkString)
|
||||
if wordCount != 400 {
|
||||
b.Fatal("Wordcount error")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -486,10 +486,6 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) {
|
|||
}
|
||||
|
||||
func (p *Page) analyzePage() {
|
||||
// TODO(bep)
|
||||
if true {
|
||||
return
|
||||
}
|
||||
if p.isCJKLanguage {
|
||||
p.WordCount = 0
|
||||
for _, word := range p.PlainWords() {
|
||||
|
|
Loading…
Reference in a new issue