mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-21 20:46:30 -05:00
WordCount Summary support UTF-8 string
This commit is contained in:
parent
c7521b3d67
commit
0e1fd78fb2
3 changed files with 63 additions and 14 deletions
|
@ -19,6 +19,7 @@ package helpers
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode/utf8"
|
||||
"html/template"
|
||||
"os/exec"
|
||||
|
||||
|
@ -386,21 +387,57 @@ func TruncateWords(s string, max int) string {
|
|||
// and returns entire sentences from content, delimited by the int
|
||||
// and whether it's truncated or not.
|
||||
func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {
|
||||
if max >= len(words) {
|
||||
return strings.Join(words, " "), false
|
||||
count := 0
|
||||
index, word := 0, ""
|
||||
truncated := false
|
||||
|
||||
for index, word = range words {
|
||||
runeCount := utf8.RuneCountInString(word)
|
||||
if len(word) == runeCount {
|
||||
count++;
|
||||
} else {
|
||||
if count + runeCount <= max {
|
||||
count += runeCount
|
||||
} else {
|
||||
offset := 0
|
||||
for count < max {
|
||||
_, width := utf8.DecodeRuneInString(word[offset:])
|
||||
offset += width
|
||||
count++
|
||||
}
|
||||
words[index] = word[:offset]
|
||||
truncated = true
|
||||
}
|
||||
}
|
||||
|
||||
for counter, word := range words[max:] {
|
||||
if count >= max {
|
||||
if index < len(words) - 1 {
|
||||
truncated = true
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
index += 1
|
||||
|
||||
if index < len(words) {
|
||||
for counter, word := range words[index:] {
|
||||
if len(word) != utf8.RuneCountInString(word) {
|
||||
break
|
||||
}
|
||||
if strings.HasSuffix(word, ".") ||
|
||||
strings.HasSuffix(word, "?") ||
|
||||
strings.HasSuffix(word, ".\"") ||
|
||||
strings.HasSuffix(word, "!") {
|
||||
upper := max + counter + 1
|
||||
upper := index + counter + 1
|
||||
return strings.Join(words[:upper], " "), (upper < len(words))
|
||||
}
|
||||
}
|
||||
} else if index > len(words) {
|
||||
return strings.Join(words, " "), truncated
|
||||
}
|
||||
|
||||
return strings.Join(words[:max], " "), true
|
||||
return strings.Join(words[:index], " "), truncated
|
||||
}
|
||||
|
||||
// GetAsciidocContent calls asciidoctor or asciidoc as an external helper
|
||||
|
|
|
@ -54,6 +54,8 @@ func TestTruncateWordsToWholeSentence(t *testing.T) {
|
|||
{"a b c", "a b c", 12, false},
|
||||
{"a b c", "a b c", 3, false},
|
||||
{"a", "a", 1, false},
|
||||
{"Hello 中国", "Hello 中", 2, true},
|
||||
{"Hello 中国", "Hello 中国", 3, false},
|
||||
{"This is a sentence.", "This is a sentence.", 5, false},
|
||||
{"This is also a sentence!", "This is also a sentence!", 1, false},
|
||||
{"To be. Or not to be. That's the question.", "To be.", 1, true},
|
||||
|
|
|
@ -31,6 +31,7 @@ import (
|
|||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/spf13/cast"
|
||||
bp "github.com/spf13/hugo/bufferpool"
|
||||
|
@ -362,7 +363,16 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) {
|
|||
}
|
||||
|
||||
func (p *Page) analyzePage() {
|
||||
p.WordCount = len(p.PlainWords())
|
||||
p.WordCount = 0
|
||||
for _, word := range p.PlainWords() {
|
||||
runeCount := utf8.RuneCountInString(word)
|
||||
if len(word) == runeCount {
|
||||
p.WordCount++
|
||||
} else {
|
||||
p.WordCount += runeCount
|
||||
}
|
||||
}
|
||||
|
||||
p.FuzzyWordCount = int((p.WordCount+100)/100) * 100
|
||||
p.ReadingTime = int((p.WordCount + 212) / 213)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue