mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-21 20:46:30 -05:00
Make the HTML collector parsing more robust
Most notably better handling self-closing elements Closes #10698
This commit is contained in:
parent
2a364cca64
commit
d33a7ebcc1
2 changed files with 17 additions and 4 deletions
|
@ -294,9 +294,10 @@ func htmlLexElementStart(w *htmlElementsCollectorWriter) htmlCollectorStateFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
tagName := w.buff.Bytes()[1:]
|
tagName := w.buff.Bytes()[1:]
|
||||||
|
isSelfClosing := tagName[len(tagName)-1] == '/'
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case skipInnerElementRe.Match(tagName):
|
case !isSelfClosing && skipInnerElementRe.Match(tagName):
|
||||||
// pre, script etc. We collect classes etc. on the surrounding
|
// pre, script etc. We collect classes etc. on the surrounding
|
||||||
// element, but skip the inner content.
|
// element, but skip the inner content.
|
||||||
w.backup()
|
w.backup()
|
||||||
|
@ -432,10 +433,18 @@ func parseStartTag(s string) string {
|
||||||
})
|
})
|
||||||
|
|
||||||
if spaceIndex == -1 {
|
if spaceIndex == -1 {
|
||||||
return s[1 : len(s)-1]
|
s = s[1 : len(s)-1]
|
||||||
|
} else {
|
||||||
|
s = s[1:spaceIndex]
|
||||||
}
|
}
|
||||||
|
|
||||||
return s[1:spaceIndex]
|
if s[len(s)-1] == '/' {
|
||||||
|
// Self closing.
|
||||||
|
s = s[:len(s)-1]
|
||||||
|
}
|
||||||
|
|
||||||
|
return s
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// isClosedByTag reports whether b ends with a closing tag for tagName.
|
// isClosedByTag reports whether b ends with a closing tag for tagName.
|
||||||
|
@ -487,7 +496,7 @@ LOOP:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if state != 2 {
|
if state != 2 || lo >= hi {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -110,6 +110,9 @@ func TestClassCollector(t *testing.T) {
|
||||||
{"DOCTYPE should beskipped", `<!DOCTYPE html>`, f("", "", "")},
|
{"DOCTYPE should beskipped", `<!DOCTYPE html>`, f("", "", "")},
|
||||||
{"Comments should be skipped", `<!-- example comment -->`, f("", "", "")},
|
{"Comments should be skipped", `<!-- example comment -->`, f("", "", "")},
|
||||||
{"Comments with elements before and after", `<div></div><!-- example comment --><span><span>`, f("div span", "", "")},
|
{"Comments with elements before and after", `<div></div><!-- example comment --><span><span>`, f("div span", "", "")},
|
||||||
|
{"Self closing tag", `<div><hr/></div>`, f("div hr", "", "")},
|
||||||
|
// svg with self closing style tag.
|
||||||
|
{"SVG with self closing style tag", `<svg><style/><g><path class="foo"/></g></svg>`, f("g path style svg", "foo", "")},
|
||||||
// Issue #8530
|
// Issue #8530
|
||||||
{"Comment with single quote", `<!-- Hero Area Image d'accueil --><i class="foo">`, f("i", "foo", "")},
|
{"Comment with single quote", `<!-- Hero Area Image d'accueil --><i class="foo">`, f("i", "foo", "")},
|
||||||
{"Uppercase tags", `<DIV></DIV>`, f("div", "", "")},
|
{"Uppercase tags", `<DIV></DIV>`, f("div", "", "")},
|
||||||
|
@ -174,6 +177,7 @@ func TestEndsWithTag(t *testing.T) {
|
||||||
{"match space", "foo< / div>", "div", true},
|
{"match space", "foo< / div>", "div", true},
|
||||||
{"match space 2", "foo< / div \n>", "div", true},
|
{"match space 2", "foo< / div \n>", "div", true},
|
||||||
{"match case", "foo</DIV>", "div", true},
|
{"match case", "foo</DIV>", "div", true},
|
||||||
|
{"self closing", `</defs><g><g><path fill="#010101" d=asdf"/>`, "div", false},
|
||||||
} {
|
} {
|
||||||
c.Run(test.name, func(c *qt.C) {
|
c.Run(test.name, func(c *qt.C) {
|
||||||
got := isClosedByTag([]byte(test.s), []byte(test.tagName))
|
got := isClosedByTag([]byte(test.s), []byte(test.tagName))
|
||||||
|
|
Loading…
Reference in a new issue