publisher: Skip script, pre and textarea content when looking for HTML elements

Updates #7567
This commit is contained in:
Bjørn Erik Pedersen 2021-04-06 18:19:25 +02:00
parent 7b4ade56dd
commit 8a308944e4
2 changed files with 66 additions and 39 deletions

View file

@ -64,7 +64,7 @@ type cssClassCollectorWriter struct {
buff bytes.Buffer buff bytes.Buffer
isCollecting bool isCollecting bool
dropValue bool inPreTag string
inQuote bool inQuote bool
quoteValue byte quoteValue byte
@ -90,49 +90,58 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
b := p[i] b := p[i]
w.toggleIfQuote(b) w.toggleIfQuote(b)
if !w.inQuote && b == '>' { if !w.inQuote && b == '>' {
w.endCollecting(false) w.endCollecting()
break break
} }
w.buff.WriteByte(b) w.buff.WriteByte(b)
} }
if !w.isCollecting { if !w.isCollecting {
if w.dropValue { if w.inPreTag != "" {
w.buff.Reset()
} else {
// First check if we have processed this element before.
w.collector.mu.RLock()
// See https://github.com/dominikh/go-tools/issues/723
//lint:ignore S1030 This construct avoids memory allocation for the string.
seen := w.collector.elementSet[string(w.buff.Bytes())]
w.collector.mu.RUnlock()
if seen {
w.buff.Reset()
continue
}
s := w.buff.String() s := w.buff.String()
if tagName, isEnd := w.parseEndTag(s); isEnd && w.inPreTag == tagName {
w.inPreTag = ""
}
w.buff.Reset() w.buff.Reset()
continue
if strings.HasPrefix(s, "</") {
continue
}
key := s
s, tagName := w.insertStandinHTMLElement(s)
el := parseHTMLElement(s)
el.Tag = tagName
w.collector.mu.Lock()
w.collector.elementSet[key] = true
if el.Tag != "" {
w.collector.elements = append(w.collector.elements, el)
}
w.collector.mu.Unlock()
} }
// First check if we have processed this element before.
w.collector.mu.RLock()
// See https://github.com/dominikh/go-tools/issues/723
//lint:ignore S1030 This construct avoids memory allocation for the string.
seen := w.collector.elementSet[string(w.buff.Bytes())]
w.collector.mu.RUnlock()
if seen {
w.buff.Reset()
continue
}
s := w.buff.String()
w.buff.Reset()
if strings.HasPrefix(s, "</") {
continue
}
key := s
s, tagName := w.insertStandinHTMLElement(s)
el := parseHTMLElement(s)
el.Tag = tagName
if w.isPreFormatted(tagName) {
w.inPreTag = tagName
}
w.collector.mu.Lock()
w.collector.elementSet[key] = true
if el.Tag != "" {
w.collector.elements = append(w.collector.elements, el)
}
w.collector.mu.Unlock()
} }
} }
} }
@ -140,6 +149,11 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
return return
} }
// No need to look inside these for HTML elements.
func (c *cssClassCollectorWriter) isPreFormatted(s string) bool {
return s == "pre" || s == "textarea" || s == "script"
}
// The net/html parser does not handle single table elements as input, e.g. tbody. // The net/html parser does not handle single table elements as input, e.g. tbody.
// We only care about the element/class/ids, so just store away the original tag name // We only care about the element/class/ids, so just store away the original tag name
// and pretend it's a <div>. // and pretend it's a <div>.
@ -154,15 +168,24 @@ func (c *cssClassCollectorWriter) insertStandinHTMLElement(el string) (string, s
return newv, strings.ToLower(tag) return newv, strings.ToLower(tag)
} }
func (c *cssClassCollectorWriter) endCollecting(drop bool) { func (c *cssClassCollectorWriter) parseEndTag(s string) (string, bool) {
if !strings.HasPrefix(s, "</") {
return "", false
}
s = strings.TrimPrefix(s, "</")
s = strings.TrimSuffix(s, ">")
return strings.ToLower(strings.TrimSpace(s)), true
}
func (c *cssClassCollectorWriter) endCollecting() {
c.isCollecting = false c.isCollecting = false
c.inQuote = false c.inQuote = false
c.dropValue = drop
} }
func (c *cssClassCollectorWriter) startCollecting() { func (c *cssClassCollectorWriter) startCollecting() {
c.isCollecting = true c.isCollecting = true
c.dropValue = false
} }
func (c *cssClassCollectorWriter) toggleIfQuote(b byte) { func (c *cssClassCollectorWriter) toggleIfQuote(b byte) {

View file

@ -89,8 +89,12 @@ func TestClassCollector(t *testing.T) {
{"Alpine transition 1", `<div x-transition:enter-start="opacity-0 transform mobile:-translate-x-8 sm:-translate-y-8">`, f("div", "mobile:-translate-x-8 opacity-0 sm:-translate-y-8 transform", "")}, {"Alpine transition 1", `<div x-transition:enter-start="opacity-0 transform mobile:-translate-x-8 sm:-translate-y-8">`, f("div", "mobile:-translate-x-8 opacity-0 sm:-translate-y-8 transform", "")},
{"Vue bind", `<div v-bind:class="{ active: isActive }"></div>`, f("div", "active", "")}, {"Vue bind", `<div v-bind:class="{ active: isActive }"></div>`, f("div", "active", "")},
// https://github.com/gohugoio/hugo/issues/7746 // Issue #7746
{"Apostrophe inside attribute value", `<a class="missingclass" title="Plus d'information">my text</a><div></div>`, f("a div", "missingclass", "")}, {"Apostrophe inside attribute value", `<a class="missingclass" title="Plus d'information">my text</a><div></div>`, f("a div", "missingclass", "")},
// Issue #7567
{"Script tags content should be skipped", `<script><span>foo</span><span>bar</span></script><div class="foo"></div>`, f("div script", "foo", "")},
{"Pre tags content should be skipped", `<pre class="preclass"><span>foo</span><span>bar</span></pre><div class="foo"></div>`, f("div pre", "foo preclass", "")},
{"Textare tags content should be skipped", `<textarea class="textareaclass"><span>foo</span><span>bar</span></textarea><div class="foo"></div>`, f("div textarea", "foo textareaclass", "")},
} { } {
c.Run(test.name, func(c *qt.C) { c.Run(test.name, func(c *qt.C) {
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector()) w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())