mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-21 20:46:30 -05:00
publisher: Skip script, pre and textarea content when looking for HTML elements
Updates #7567
This commit is contained in:
parent
7b4ade56dd
commit
8a308944e4
2 changed files with 66 additions and 39 deletions
|
@ -64,7 +64,7 @@ type cssClassCollectorWriter struct {
|
||||||
buff bytes.Buffer
|
buff bytes.Buffer
|
||||||
|
|
||||||
isCollecting bool
|
isCollecting bool
|
||||||
dropValue bool
|
inPreTag string
|
||||||
|
|
||||||
inQuote bool
|
inQuote bool
|
||||||
quoteValue byte
|
quoteValue byte
|
||||||
|
@ -90,49 +90,58 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
|
||||||
b := p[i]
|
b := p[i]
|
||||||
w.toggleIfQuote(b)
|
w.toggleIfQuote(b)
|
||||||
if !w.inQuote && b == '>' {
|
if !w.inQuote && b == '>' {
|
||||||
w.endCollecting(false)
|
w.endCollecting()
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
w.buff.WriteByte(b)
|
w.buff.WriteByte(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !w.isCollecting {
|
if !w.isCollecting {
|
||||||
if w.dropValue {
|
if w.inPreTag != "" {
|
||||||
w.buff.Reset()
|
|
||||||
} else {
|
|
||||||
// First check if we have processed this element before.
|
|
||||||
w.collector.mu.RLock()
|
|
||||||
|
|
||||||
// See https://github.com/dominikh/go-tools/issues/723
|
|
||||||
//lint:ignore S1030 This construct avoids memory allocation for the string.
|
|
||||||
seen := w.collector.elementSet[string(w.buff.Bytes())]
|
|
||||||
w.collector.mu.RUnlock()
|
|
||||||
if seen {
|
|
||||||
w.buff.Reset()
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
s := w.buff.String()
|
s := w.buff.String()
|
||||||
|
if tagName, isEnd := w.parseEndTag(s); isEnd && w.inPreTag == tagName {
|
||||||
|
w.inPreTag = ""
|
||||||
|
}
|
||||||
w.buff.Reset()
|
w.buff.Reset()
|
||||||
|
continue
|
||||||
if strings.HasPrefix(s, "</") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
key := s
|
|
||||||
|
|
||||||
s, tagName := w.insertStandinHTMLElement(s)
|
|
||||||
el := parseHTMLElement(s)
|
|
||||||
el.Tag = tagName
|
|
||||||
|
|
||||||
w.collector.mu.Lock()
|
|
||||||
w.collector.elementSet[key] = true
|
|
||||||
if el.Tag != "" {
|
|
||||||
w.collector.elements = append(w.collector.elements, el)
|
|
||||||
}
|
|
||||||
w.collector.mu.Unlock()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// First check if we have processed this element before.
|
||||||
|
w.collector.mu.RLock()
|
||||||
|
|
||||||
|
// See https://github.com/dominikh/go-tools/issues/723
|
||||||
|
//lint:ignore S1030 This construct avoids memory allocation for the string.
|
||||||
|
seen := w.collector.elementSet[string(w.buff.Bytes())]
|
||||||
|
w.collector.mu.RUnlock()
|
||||||
|
if seen {
|
||||||
|
w.buff.Reset()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
s := w.buff.String()
|
||||||
|
|
||||||
|
w.buff.Reset()
|
||||||
|
|
||||||
|
if strings.HasPrefix(s, "</") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
key := s
|
||||||
|
|
||||||
|
s, tagName := w.insertStandinHTMLElement(s)
|
||||||
|
el := parseHTMLElement(s)
|
||||||
|
el.Tag = tagName
|
||||||
|
if w.isPreFormatted(tagName) {
|
||||||
|
w.inPreTag = tagName
|
||||||
|
}
|
||||||
|
|
||||||
|
w.collector.mu.Lock()
|
||||||
|
w.collector.elementSet[key] = true
|
||||||
|
if el.Tag != "" {
|
||||||
|
w.collector.elements = append(w.collector.elements, el)
|
||||||
|
}
|
||||||
|
w.collector.mu.Unlock()
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -140,6 +149,11 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// No need to look inside these for HTML elements.
|
||||||
|
func (c *cssClassCollectorWriter) isPreFormatted(s string) bool {
|
||||||
|
return s == "pre" || s == "textarea" || s == "script"
|
||||||
|
}
|
||||||
|
|
||||||
// The net/html parser does not handle single table elements as input, e.g. tbody.
|
// The net/html parser does not handle single table elements as input, e.g. tbody.
|
||||||
// We only care about the element/class/ids, so just store away the original tag name
|
// We only care about the element/class/ids, so just store away the original tag name
|
||||||
// and pretend it's a <div>.
|
// and pretend it's a <div>.
|
||||||
|
@ -154,15 +168,24 @@ func (c *cssClassCollectorWriter) insertStandinHTMLElement(el string) (string, s
|
||||||
return newv, strings.ToLower(tag)
|
return newv, strings.ToLower(tag)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *cssClassCollectorWriter) endCollecting(drop bool) {
|
func (c *cssClassCollectorWriter) parseEndTag(s string) (string, bool) {
|
||||||
|
if !strings.HasPrefix(s, "</") {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
s = strings.TrimPrefix(s, "</")
|
||||||
|
s = strings.TrimSuffix(s, ">")
|
||||||
|
return strings.ToLower(strings.TrimSpace(s)), true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *cssClassCollectorWriter) endCollecting() {
|
||||||
c.isCollecting = false
|
c.isCollecting = false
|
||||||
c.inQuote = false
|
c.inQuote = false
|
||||||
c.dropValue = drop
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *cssClassCollectorWriter) startCollecting() {
|
func (c *cssClassCollectorWriter) startCollecting() {
|
||||||
c.isCollecting = true
|
c.isCollecting = true
|
||||||
c.dropValue = false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *cssClassCollectorWriter) toggleIfQuote(b byte) {
|
func (c *cssClassCollectorWriter) toggleIfQuote(b byte) {
|
||||||
|
|
|
@ -89,8 +89,12 @@ func TestClassCollector(t *testing.T) {
|
||||||
|
|
||||||
{"Alpine transition 1", `<div x-transition:enter-start="opacity-0 transform mobile:-translate-x-8 sm:-translate-y-8">`, f("div", "mobile:-translate-x-8 opacity-0 sm:-translate-y-8 transform", "")},
|
{"Alpine transition 1", `<div x-transition:enter-start="opacity-0 transform mobile:-translate-x-8 sm:-translate-y-8">`, f("div", "mobile:-translate-x-8 opacity-0 sm:-translate-y-8 transform", "")},
|
||||||
{"Vue bind", `<div v-bind:class="{ active: isActive }"></div>`, f("div", "active", "")},
|
{"Vue bind", `<div v-bind:class="{ active: isActive }"></div>`, f("div", "active", "")},
|
||||||
// https://github.com/gohugoio/hugo/issues/7746
|
// Issue #7746
|
||||||
{"Apostrophe inside attribute value", `<a class="missingclass" title="Plus d'information">my text</a><div></div>`, f("a div", "missingclass", "")},
|
{"Apostrophe inside attribute value", `<a class="missingclass" title="Plus d'information">my text</a><div></div>`, f("a div", "missingclass", "")},
|
||||||
|
// Issue #7567
|
||||||
|
{"Script tags content should be skipped", `<script><span>foo</span><span>bar</span></script><div class="foo"></div>`, f("div script", "foo", "")},
|
||||||
|
{"Pre tags content should be skipped", `<pre class="preclass"><span>foo</span><span>bar</span></pre><div class="foo"></div>`, f("div pre", "foo preclass", "")},
|
||||||
|
{"Textare tags content should be skipped", `<textarea class="textareaclass"><span>foo</span><span>bar</span></textarea><div class="foo"></div>`, f("div textarea", "foo textareaclass", "")},
|
||||||
} {
|
} {
|
||||||
c.Run(test.name, func(c *qt.C) {
|
c.Run(test.name, func(c *qt.C) {
|
||||||
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
|
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
|
||||||
|
|
Loading…
Reference in a new issue