mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-07 20:30:36 -05:00
Fix slow HTML elements collector for the pre case
``` name old time/op new time/op delta ElementsCollectorWriterPre-10 25.2µs ± 1% 3.4µs ± 0% -86.54% (p=0.029 n=4+4) name old alloc/op new alloc/op delta ElementsCollectorWriterPre-10 624B ± 0% 142B ± 0% -77.18% (p=0.029 n=4+4) name old allocs/op new allocs/op delta ElementsCollectorWriterPre-10 16.0 ± 0% 6.0 ± 0% -62.50% (p=0.029 n=4+4) ``` Fixes #10698
This commit is contained in:
parent
4f4a1c00bf
commit
f9fc0e045b
2 changed files with 93 additions and 8 deletions
|
@ -36,7 +36,6 @@ var (
|
||||||
|
|
||||||
skipInnerElementRe = regexp.MustCompile(`(?i)^(pre|textarea|script|style)`)
|
skipInnerElementRe = regexp.MustCompile(`(?i)^(pre|textarea|script|style)`)
|
||||||
skipAllElementRe = regexp.MustCompile(`(?i)^!DOCTYPE`)
|
skipAllElementRe = regexp.MustCompile(`(?i)^!DOCTYPE`)
|
||||||
endTagRe = regexp.MustCompile(`(?i)<\/\s*([a-zA-Z]+)\s*>$`)
|
|
||||||
|
|
||||||
exceptionList = map[string]bool{
|
exceptionList = map[string]bool{
|
||||||
"thead": true,
|
"thead": true,
|
||||||
|
@ -312,11 +311,7 @@ func htmlLexElementStart(w *htmlElementsCollectorWriter) htmlCollectorStateFunc
|
||||||
if w.r != '>' {
|
if w.r != '>' {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
m := endTagRe.FindSubmatch(w.buff.Bytes())
|
return isClosedByTag(w.buff.Bytes(), tagNameCopy)
|
||||||
if m == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return bytes.EqualFold(m[1], tagNameCopy)
|
|
||||||
},
|
},
|
||||||
htmlLexStart,
|
htmlLexStart,
|
||||||
))
|
))
|
||||||
|
@ -428,8 +423,9 @@ func parseHTMLElement(elStr string) (el htmlElement, err error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Variants of s
|
// Variants of s
|
||||||
// <body class="b a">
|
//
|
||||||
// <div>
|
// <body class="b a">
|
||||||
|
// <div>
|
||||||
func parseStartTag(s string) string {
|
func parseStartTag(s string) string {
|
||||||
spaceIndex := strings.IndexFunc(s, func(r rune) bool {
|
spaceIndex := strings.IndexFunc(s, func(r rune) bool {
|
||||||
return unicode.IsSpace(r)
|
return unicode.IsSpace(r)
|
||||||
|
@ -441,3 +437,64 @@ func parseStartTag(s string) string {
|
||||||
|
|
||||||
return s[1:spaceIndex]
|
return s[1:spaceIndex]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isClosedByTag reports whether b ends with a closing tag for tagName.
|
||||||
|
func isClosedByTag(b, tagName []byte) bool {
|
||||||
|
if len(b) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if b[len(b)-1] != '>' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
lo int
|
||||||
|
hi int
|
||||||
|
|
||||||
|
state int
|
||||||
|
inWord bool
|
||||||
|
)
|
||||||
|
|
||||||
|
LOOP:
|
||||||
|
for i := len(b) - 2; i >= 0; i-- {
|
||||||
|
switch {
|
||||||
|
case b[i] == '<':
|
||||||
|
if state != 1 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
state = 2
|
||||||
|
break LOOP
|
||||||
|
case b[i] == '/':
|
||||||
|
if state != 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
state++
|
||||||
|
if inWord {
|
||||||
|
lo = i + 1
|
||||||
|
inWord = false
|
||||||
|
}
|
||||||
|
case isSpace(b[i]):
|
||||||
|
if inWord {
|
||||||
|
lo = i + 1
|
||||||
|
inWord = false
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
if !inWord {
|
||||||
|
hi = i + 1
|
||||||
|
inWord = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if state != 2 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return bytes.EqualFold(tagName, b[lo:hi])
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func isSpace(b byte) bool {
|
||||||
|
return b == ' ' || b == '\t' || b == '\n'
|
||||||
|
}
|
||||||
|
|
|
@ -155,6 +155,34 @@ func TestClassCollector(t *testing.T) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestEndsWithTag(t *testing.T) {
|
||||||
|
c := qt.New((t))
|
||||||
|
|
||||||
|
for _, test := range []struct {
|
||||||
|
name string
|
||||||
|
s string
|
||||||
|
tagName string
|
||||||
|
expect bool
|
||||||
|
}{
|
||||||
|
{"empty", "", "div", false},
|
||||||
|
{"no match", "foo", "div", false},
|
||||||
|
{"no close", "foo<div>", "div", false},
|
||||||
|
{"no close 2", "foo/div>", "div", false},
|
||||||
|
{"no close 2", "foo//div>", "div", false},
|
||||||
|
{"no tag", "foo</>", "div", false},
|
||||||
|
{"match", "foo</div>", "div", true},
|
||||||
|
{"match space", "foo< / div>", "div", true},
|
||||||
|
{"match space 2", "foo< / div \n>", "div", true},
|
||||||
|
{"match case", "foo</DIV>", "div", true},
|
||||||
|
} {
|
||||||
|
c.Run(test.name, func(c *qt.C) {
|
||||||
|
got := isClosedByTag([]byte(test.s), []byte(test.tagName))
|
||||||
|
c.Assert(got, qt.Equals, test.expect)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkElementsCollectorWriter(b *testing.B) {
|
func BenchmarkElementsCollectorWriter(b *testing.B) {
|
||||||
const benchHTML = `
|
const benchHTML = `
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
|
|
Loading…
Reference in a new issue