diff --git a/hugolib/site_test.go b/hugolib/site_test.go index cd7ce51f8..365679a32 100644 --- a/hugolib/site_test.go +++ b/hugolib/site_test.go @@ -1113,7 +1113,7 @@ ABC. els := stats.HTMLElements b.Assert(els.Classes, qt.HasLen, 3606) // (4 * 900) + 4 +2 - b.Assert(els.Tags, qt.HasLen, 9) + b.Assert(els.Tags, qt.HasLen, 8) b.Assert(els.IDs, qt.HasLen, 1) } } diff --git a/publisher/htmlElementsCollector.go b/publisher/htmlElementsCollector.go index d9479aafa..9f4be1ff5 100644 --- a/publisher/htmlElementsCollector.go +++ b/publisher/htmlElementsCollector.go @@ -20,22 +20,11 @@ import ( "strings" "sync" - "github.com/gohugoio/hugo/helpers" "golang.org/x/net/html" + + "github.com/gohugoio/hugo/helpers" ) -func newHTMLElementsCollector() *htmlElementsCollector { - return &htmlElementsCollector{ - elementSet: make(map[string]bool), - } -} - -func newHTMLElementsCollectorWriter(collector *htmlElementsCollector) *cssClassCollectorWriter { - return &cssClassCollectorWriter{ - collector: collector, - } -} - // HTMLElements holds lists of tags and attribute values for classes and id. type HTMLElements struct { Tags []string `json:"tags"` @@ -59,152 +48,6 @@ func (h *HTMLElements) Sort() { sort.Strings(h.IDs) } -type cssClassCollectorWriter struct { - collector *htmlElementsCollector - buff bytes.Buffer - - isCollecting bool - inPreTag string - - inQuote bool - quoteValue byte -} - -func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) { - n = len(p) - i := 0 - - for i < len(p) { - if !w.isCollecting { - for ; i < len(p); i++ { - b := p[i] - if b == '<' { - w.startCollecting() - break - } - } - } - - if w.isCollecting { - for ; i < len(p); i++ { - b := p[i] - w.toggleIfQuote(b) - if !w.inQuote && b == '>' { - w.endCollecting() - break - } - w.buff.WriteByte(b) - } - - if !w.isCollecting { - if w.inPreTag != "" { - s := w.buff.String() - if tagName, isEnd := w.parseEndTag(s); isEnd && w.inPreTag == tagName { - w.inPreTag = "" - } - w.buff.Reset() - continue - } - - // First check if we have processed this element before. - w.collector.mu.RLock() - - // See https://github.com/dominikh/go-tools/issues/723 - //lint:ignore S1030 This construct avoids memory allocation for the string. - seen := w.collector.elementSet[string(w.buff.Bytes())] - w.collector.mu.RUnlock() - if seen { - w.buff.Reset() - continue - } - - s := w.buff.String() - - w.buff.Reset() - - if strings.HasPrefix(s, "") { - continue - } - - key := s - - s, tagName := w.insertStandinHTMLElement(s) - el := parseHTMLElement(s) - el.Tag = tagName - if w.isPreFormatted(tagName) { - w.inPreTag = tagName - } - - w.collector.mu.Lock() - w.collector.elementSet[key] = true - if el.Tag != "" { - w.collector.elements = append(w.collector.elements, el) - } - w.collector.mu.Unlock() - - } - } - } - - return -} - -// No need to look inside these for HTML elements. -func (c *cssClassCollectorWriter) isPreFormatted(s string) bool { - return s == "pre" || s == "textarea" || s == "script" -} - -// The net/html parser does not handle single table elements as input, e.g. tbody. -// We only care about the element/class/ids, so just store away the original tag name -// and pretend it's a
,