mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-21 20:46:30 -05:00
Fix slow HTML elements collector for the pre case
``` name old time/op new time/op delta ElementsCollectorWriterPre-10 25.2µs ± 1% 3.4µs ± 0% -86.54% (p=0.029 n=4+4) name old alloc/op new alloc/op delta ElementsCollectorWriterPre-10 624B ± 0% 142B ± 0% -77.18% (p=0.029 n=4+4) name old allocs/op new allocs/op delta ElementsCollectorWriterPre-10 16.0 ± 0% 6.0 ± 0% -62.50% (p=0.029 n=4+4) ``` Fixes #10698
This commit is contained in:
parent
4f4a1c00bf
commit
f9fc0e045b
2 changed files with 93 additions and 8 deletions
|
@ -36,7 +36,6 @@ var (
|
|||
|
||||
skipInnerElementRe = regexp.MustCompile(`(?i)^(pre|textarea|script|style)`)
|
||||
skipAllElementRe = regexp.MustCompile(`(?i)^!DOCTYPE`)
|
||||
endTagRe = regexp.MustCompile(`(?i)<\/\s*([a-zA-Z]+)\s*>$`)
|
||||
|
||||
exceptionList = map[string]bool{
|
||||
"thead": true,
|
||||
|
@ -312,11 +311,7 @@ func htmlLexElementStart(w *htmlElementsCollectorWriter) htmlCollectorStateFunc
|
|||
if w.r != '>' {
|
||||
return false
|
||||
}
|
||||
m := endTagRe.FindSubmatch(w.buff.Bytes())
|
||||
if m == nil {
|
||||
return false
|
||||
}
|
||||
return bytes.EqualFold(m[1], tagNameCopy)
|
||||
return isClosedByTag(w.buff.Bytes(), tagNameCopy)
|
||||
},
|
||||
htmlLexStart,
|
||||
))
|
||||
|
@ -428,6 +423,7 @@ func parseHTMLElement(elStr string) (el htmlElement, err error) {
|
|||
}
|
||||
|
||||
// Variants of s
|
||||
//
|
||||
// <body class="b a">
|
||||
// <div>
|
||||
func parseStartTag(s string) string {
|
||||
|
@ -441,3 +437,64 @@ func parseStartTag(s string) string {
|
|||
|
||||
return s[1:spaceIndex]
|
||||
}
|
||||
|
||||
// isClosedByTag reports whether b ends with a closing tag for tagName.
|
||||
func isClosedByTag(b, tagName []byte) bool {
|
||||
if len(b) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if b[len(b)-1] != '>' {
|
||||
return false
|
||||
}
|
||||
|
||||
var (
|
||||
lo int
|
||||
hi int
|
||||
|
||||
state int
|
||||
inWord bool
|
||||
)
|
||||
|
||||
LOOP:
|
||||
for i := len(b) - 2; i >= 0; i-- {
|
||||
switch {
|
||||
case b[i] == '<':
|
||||
if state != 1 {
|
||||
return false
|
||||
}
|
||||
state = 2
|
||||
break LOOP
|
||||
case b[i] == '/':
|
||||
if state != 0 {
|
||||
return false
|
||||
}
|
||||
state++
|
||||
if inWord {
|
||||
lo = i + 1
|
||||
inWord = false
|
||||
}
|
||||
case isSpace(b[i]):
|
||||
if inWord {
|
||||
lo = i + 1
|
||||
inWord = false
|
||||
}
|
||||
default:
|
||||
if !inWord {
|
||||
hi = i + 1
|
||||
inWord = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if state != 2 {
|
||||
return false
|
||||
}
|
||||
|
||||
return bytes.EqualFold(tagName, b[lo:hi])
|
||||
|
||||
}
|
||||
|
||||
func isSpace(b byte) bool {
|
||||
return b == ' ' || b == '\t' || b == '\n'
|
||||
}
|
||||
|
|
|
@ -155,6 +155,34 @@ func TestClassCollector(t *testing.T) {
|
|||
|
||||
}
|
||||
|
||||
func TestEndsWithTag(t *testing.T) {
|
||||
c := qt.New((t))
|
||||
|
||||
for _, test := range []struct {
|
||||
name string
|
||||
s string
|
||||
tagName string
|
||||
expect bool
|
||||
}{
|
||||
{"empty", "", "div", false},
|
||||
{"no match", "foo", "div", false},
|
||||
{"no close", "foo<div>", "div", false},
|
||||
{"no close 2", "foo/div>", "div", false},
|
||||
{"no close 2", "foo//div>", "div", false},
|
||||
{"no tag", "foo</>", "div", false},
|
||||
{"match", "foo</div>", "div", true},
|
||||
{"match space", "foo< / div>", "div", true},
|
||||
{"match space 2", "foo< / div \n>", "div", true},
|
||||
{"match case", "foo</DIV>", "div", true},
|
||||
} {
|
||||
c.Run(test.name, func(c *qt.C) {
|
||||
got := isClosedByTag([]byte(test.s), []byte(test.tagName))
|
||||
c.Assert(got, qt.Equals, test.expect)
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func BenchmarkElementsCollectorWriter(b *testing.B) {
|
||||
const benchHTML = `
|
||||
<!DOCTYPE html>
|
||||
|
|
Loading…
Reference in a new issue