mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-21 20:46:30 -05:00
Revert "publisher: Make the HTML element collector more robust"
This reverts commit ef0f1a7269
.
This commit is contained in:
parent
3f515f0e33
commit
dc6b7a75ff
3 changed files with 169 additions and 302 deletions
|
@ -45,25 +45,3 @@ func RemoveAccentsString(s string) string {
|
||||||
accentTransformerPool.Put(t)
|
accentTransformerPool.Put(t)
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
// Chunk splits s into strings of size.
|
|
||||||
func Chunk(s string, size int) []string {
|
|
||||||
if size >= len(s) {
|
|
||||||
return []string{s}
|
|
||||||
}
|
|
||||||
var chunks []string
|
|
||||||
chunk := make([]rune, size)
|
|
||||||
l := 0
|
|
||||||
for _, r := range s {
|
|
||||||
chunk[l] = r
|
|
||||||
l++
|
|
||||||
if l == size {
|
|
||||||
chunks = append(chunks, string(chunk))
|
|
||||||
l = 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if l > 0 {
|
|
||||||
chunks = append(chunks, string(chunk[:l]))
|
|
||||||
}
|
|
||||||
return chunks
|
|
||||||
}
|
|
||||||
|
|
|
@ -19,51 +19,12 @@ import (
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"unicode"
|
|
||||||
"unicode/utf8"
|
|
||||||
|
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
|
|
||||||
"github.com/gohugoio/hugo/helpers"
|
"github.com/gohugoio/hugo/helpers"
|
||||||
)
|
)
|
||||||
|
|
||||||
const eof = -1
|
|
||||||
|
|
||||||
var (
|
|
||||||
htmlJsonFixer = strings.NewReplacer(", ", "\n")
|
|
||||||
jsonAttrRe = regexp.MustCompile(`'?(.*?)'?:.*`)
|
|
||||||
classAttrRe = regexp.MustCompile(`(?i)^class$|transition`)
|
|
||||||
|
|
||||||
skipInnerElementRe = regexp.MustCompile(`(?i)^(pre|textarea|script|style)`)
|
|
||||||
skipAllElementRe = regexp.MustCompile(`(?i)^!DOCTYPE`)
|
|
||||||
endTagRe = regexp.MustCompile(`(?i)<\/\s*([a-zA-Z]+)\s*>$`)
|
|
||||||
|
|
||||||
exceptionList = map[string]bool{
|
|
||||||
"thead": true,
|
|
||||||
"tbody": true,
|
|
||||||
"tfoot": true,
|
|
||||||
"td": true,
|
|
||||||
"tr": true,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
func newHTMLElementsCollector() *htmlElementsCollector {
|
|
||||||
return &htmlElementsCollector{
|
|
||||||
elementSet: make(map[string]bool),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func newHTMLElementsCollectorWriter(collector *htmlElementsCollector) *htmlElementsCollectorWriter {
|
|
||||||
w := &htmlElementsCollectorWriter{
|
|
||||||
collector: collector,
|
|
||||||
state: htmlLexStart,
|
|
||||||
}
|
|
||||||
|
|
||||||
w.defaultLexElementInside = w.lexElementInside(htmlLexStart)
|
|
||||||
|
|
||||||
return w
|
|
||||||
}
|
|
||||||
|
|
||||||
// HTMLElements holds lists of tags and attribute values for classes and id.
|
// HTMLElements holds lists of tags and attribute values for classes and id.
|
||||||
type HTMLElements struct {
|
type HTMLElements struct {
|
||||||
Tags []string `json:"tags"`
|
Tags []string `json:"tags"`
|
||||||
|
@ -87,12 +48,6 @@ func (h *HTMLElements) Sort() {
|
||||||
sort.Strings(h.IDs)
|
sort.Strings(h.IDs)
|
||||||
}
|
}
|
||||||
|
|
||||||
type htmlElement struct {
|
|
||||||
Tag string
|
|
||||||
Classes []string
|
|
||||||
IDs []string
|
|
||||||
}
|
|
||||||
|
|
||||||
type htmlElementsCollector struct {
|
type htmlElementsCollector struct {
|
||||||
// Contains the raw HTML string. We will get the same element
|
// Contains the raw HTML string. We will get the same element
|
||||||
// several times, and want to avoid costly reparsing when this
|
// several times, and want to avoid costly reparsing when this
|
||||||
|
@ -104,6 +59,12 @@ type htmlElementsCollector struct {
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func newHTMLElementsCollector() *htmlElementsCollector {
|
||||||
|
return &htmlElementsCollector{
|
||||||
|
elementSet: make(map[string]bool),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (c *htmlElementsCollector) getHTMLElements() HTMLElements {
|
func (c *htmlElementsCollector) getHTMLElements() HTMLElements {
|
||||||
var (
|
var (
|
||||||
classes []string
|
classes []string
|
||||||
|
@ -132,118 +93,114 @@ func (c *htmlElementsCollector) getHTMLElements() HTMLElements {
|
||||||
|
|
||||||
type htmlElementsCollectorWriter struct {
|
type htmlElementsCollectorWriter struct {
|
||||||
collector *htmlElementsCollector
|
collector *htmlElementsCollector
|
||||||
|
buff bytes.Buffer
|
||||||
|
|
||||||
r rune // Current rune
|
isCollecting bool
|
||||||
width int // The width in bytes of r
|
inPreTag string
|
||||||
input []byte // The current slice written to Write
|
|
||||||
pos int // The current position in input
|
|
||||||
|
|
||||||
err error
|
inQuote bool
|
||||||
|
quoteValue byte
|
||||||
inQuote rune
|
|
||||||
|
|
||||||
buff bytes.Buffer
|
|
||||||
|
|
||||||
// Current state
|
|
||||||
state htmlCollectorStateFunc
|
|
||||||
|
|
||||||
// Precompiled state funcs
|
|
||||||
defaultLexElementInside htmlCollectorStateFunc
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write collects HTML elements from p.
|
func newHTMLElementsCollectorWriter(collector *htmlElementsCollector) *htmlElementsCollectorWriter {
|
||||||
|
return &htmlElementsCollectorWriter{
|
||||||
|
collector: collector,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write splits the incoming stream into single html element.
|
||||||
func (w *htmlElementsCollectorWriter) Write(p []byte) (n int, err error) {
|
func (w *htmlElementsCollectorWriter) Write(p []byte) (n int, err error) {
|
||||||
n = len(p)
|
n = len(p)
|
||||||
w.input = p
|
i := 0
|
||||||
w.pos = 0
|
|
||||||
|
|
||||||
for {
|
for i < len(p) {
|
||||||
w.r = w.next()
|
// If we are not collecting, cycle through byte stream until start bracket "<" is found.
|
||||||
if w.r == eof {
|
if !w.isCollecting {
|
||||||
return
|
for ; i < len(p); i++ {
|
||||||
}
|
b := p[i]
|
||||||
w.state = w.state(w)
|
if b == '<' {
|
||||||
}
|
w.startCollecting()
|
||||||
}
|
break
|
||||||
|
}
|
||||||
func (l *htmlElementsCollectorWriter) backup() {
|
|
||||||
l.pos -= l.width
|
|
||||||
l.r, _ = utf8.DecodeRune(l.input[l.pos:])
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *htmlElementsCollectorWriter) consumeBuffUntil(condition func() bool, resolve htmlCollectorStateFunc) htmlCollectorStateFunc {
|
|
||||||
var s htmlCollectorStateFunc
|
|
||||||
s = func(*htmlElementsCollectorWriter) htmlCollectorStateFunc {
|
|
||||||
w.buff.WriteRune(w.r)
|
|
||||||
if condition() {
|
|
||||||
w.buff.Reset()
|
|
||||||
return resolve
|
|
||||||
}
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *htmlElementsCollectorWriter) consumeRuneUntil(condition func(r rune) bool, resolve htmlCollectorStateFunc) htmlCollectorStateFunc {
|
|
||||||
var s htmlCollectorStateFunc
|
|
||||||
s = func(*htmlElementsCollectorWriter) htmlCollectorStateFunc {
|
|
||||||
if condition(w.r) {
|
|
||||||
return resolve
|
|
||||||
}
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
// Starts with e.g. "<body " or "<div"
|
|
||||||
func (w *htmlElementsCollectorWriter) lexElementInside(resolve htmlCollectorStateFunc) htmlCollectorStateFunc {
|
|
||||||
var s htmlCollectorStateFunc
|
|
||||||
s = func(w *htmlElementsCollectorWriter) htmlCollectorStateFunc {
|
|
||||||
w.buff.WriteRune(w.r)
|
|
||||||
|
|
||||||
// Skip any text inside a quote.
|
|
||||||
if w.r == '\'' || w.r == '"' {
|
|
||||||
if w.inQuote == w.r {
|
|
||||||
w.inQuote = 0
|
|
||||||
} else if w.inQuote == 0 {
|
|
||||||
w.inQuote = w.r
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if w.inQuote != 0 {
|
if w.isCollecting {
|
||||||
return s
|
// If we are collecting, cycle through byte stream until end bracket ">" is found,
|
||||||
|
// disregard any ">" if within a quote,
|
||||||
|
// write bytes until found to buffer.
|
||||||
|
for ; i < len(p); i++ {
|
||||||
|
b := p[i]
|
||||||
|
w.toggleIfQuote(b)
|
||||||
|
w.buff.WriteByte(b)
|
||||||
|
|
||||||
|
if !w.inQuote && b == '>' {
|
||||||
|
w.endCollecting()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if w.r == '>' {
|
// If no end bracket ">" is found while collecting, but the stream ended
|
||||||
|
// this could mean we received chunks of a stream from e.g. the minify functionality
|
||||||
|
// next if loop will be skipped.
|
||||||
|
|
||||||
|
// At this point we have collected an element line between angle brackets "<" and ">".
|
||||||
|
if !w.isCollecting {
|
||||||
|
if w.buff.Len() == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if w.inPreTag != "" { // within preformatted code block
|
||||||
|
s := w.buff.String()
|
||||||
|
w.buff.Reset()
|
||||||
|
if tagName, isEnd := parseEndTag(s); isEnd && w.inPreTag == tagName {
|
||||||
|
w.inPreTag = ""
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// First check if we have processed this element before.
|
||||||
|
w.collector.mu.RLock()
|
||||||
|
|
||||||
// Work with the bytes slice as long as it's practical,
|
// Work with the bytes slice as long as it's practical,
|
||||||
// to save memory allocations.
|
// to save memory allocations.
|
||||||
b := w.buff.Bytes()
|
b := w.buff.Bytes()
|
||||||
|
|
||||||
defer func() {
|
// See https://github.com/dominikh/go-tools/issues/723
|
||||||
w.buff.Reset()
|
//lint:ignore S1030 This construct avoids memory allocation for the string.
|
||||||
}()
|
|
||||||
|
|
||||||
// First check if we have processed this element before.
|
|
||||||
w.collector.mu.RLock()
|
|
||||||
|
|
||||||
seen := w.collector.elementSet[string(b)]
|
seen := w.collector.elementSet[string(b)]
|
||||||
w.collector.mu.RUnlock()
|
w.collector.mu.RUnlock()
|
||||||
if seen {
|
if seen {
|
||||||
return resolve
|
w.buff.Reset()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter out unwanted tags
|
||||||
|
// if within preformatted code blocks <pre>, <textarea>, <script>, <style>
|
||||||
|
// comments and doctype tags
|
||||||
|
// end tags.
|
||||||
|
switch {
|
||||||
|
case bytes.HasPrefix(b, []byte("<!")): // comment or doctype tag
|
||||||
|
w.buff.Reset()
|
||||||
|
continue
|
||||||
|
case bytes.HasPrefix(b, []byte("</")): // end tag
|
||||||
|
w.buff.Reset()
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
s := w.buff.String()
|
s := w.buff.String()
|
||||||
|
w.buff.Reset()
|
||||||
|
|
||||||
if s == "" {
|
// Check if a preformatted code block started.
|
||||||
return resolve
|
if tagName, isStart := parseStartTag(s); isStart && isPreFormatted(tagName) {
|
||||||
|
w.inPreTag = tagName
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse each collected element.
|
// Parse each collected element.
|
||||||
el, err := parseHTMLElement(s)
|
el, err := parseHTMLElement(s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
w.err = err
|
return n, err
|
||||||
return resolve
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write this tag to the element set.
|
// Write this tag to the element set.
|
||||||
|
@ -251,137 +208,109 @@ func (w *htmlElementsCollectorWriter) lexElementInside(resolve htmlCollectorStat
|
||||||
w.collector.elementSet[s] = true
|
w.collector.elementSet[s] = true
|
||||||
w.collector.elements = append(w.collector.elements, el)
|
w.collector.elements = append(w.collector.elements, el)
|
||||||
w.collector.mu.Unlock()
|
w.collector.mu.Unlock()
|
||||||
|
|
||||||
return resolve
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *htmlElementsCollectorWriter) next() rune {
|
|
||||||
if l.pos >= len(l.input) {
|
|
||||||
l.width = 0
|
|
||||||
return eof
|
|
||||||
}
|
|
||||||
|
|
||||||
runeValue, runeWidth := utf8.DecodeRune(l.input[l.pos:])
|
|
||||||
l.width = runeWidth
|
|
||||||
l.pos += l.width
|
|
||||||
return runeValue
|
|
||||||
}
|
|
||||||
|
|
||||||
// returns the next state in HTML element scanner.
|
|
||||||
type htmlCollectorStateFunc func(*htmlElementsCollectorWriter) htmlCollectorStateFunc
|
|
||||||
|
|
||||||
// At "<", buffer empty.
|
|
||||||
// Potentially starting a HTML element.
|
|
||||||
func htmlLexElementStart(w *htmlElementsCollectorWriter) htmlCollectorStateFunc {
|
|
||||||
if w.r == '>' || unicode.IsSpace(w.r) {
|
|
||||||
if w.buff.Len() < 2 || bytes.HasPrefix(w.buff.Bytes(), []byte("</")) {
|
|
||||||
w.buff.Reset()
|
|
||||||
return htmlLexStart
|
|
||||||
}
|
|
||||||
|
|
||||||
tagName := w.buff.Bytes()[1:]
|
|
||||||
|
|
||||||
switch {
|
|
||||||
case skipInnerElementRe.Match(tagName):
|
|
||||||
// pre, script etc. We collect classes etc. on the surrounding
|
|
||||||
// element, but skip the inner content.
|
|
||||||
w.backup()
|
|
||||||
|
|
||||||
// tagName will be overwritten, so make a copy.
|
|
||||||
tagNameCopy := make([]byte, len(tagName))
|
|
||||||
copy(tagNameCopy, tagName)
|
|
||||||
|
|
||||||
return w.lexElementInside(
|
|
||||||
w.consumeBuffUntil(
|
|
||||||
func() bool {
|
|
||||||
if w.r != '>' {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
m := endTagRe.FindSubmatch(w.buff.Bytes())
|
|
||||||
if m == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return bytes.EqualFold(m[1], tagNameCopy)
|
|
||||||
},
|
|
||||||
htmlLexStart,
|
|
||||||
))
|
|
||||||
case skipAllElementRe.Match(tagName):
|
|
||||||
// E.g. "<!DOCTYPE ..."
|
|
||||||
w.buff.Reset()
|
|
||||||
return w.consumeRuneUntil(func(r rune) bool {
|
|
||||||
return r == '>'
|
|
||||||
}, htmlLexStart)
|
|
||||||
default:
|
|
||||||
w.backup()
|
|
||||||
return w.defaultLexElementInside
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
w.buff.WriteRune(w.r)
|
return
|
||||||
|
|
||||||
// If it's a comment, skip to its end.
|
|
||||||
if w.r == '-' && bytes.Equal(w.buff.Bytes(), []byte("<!--")) {
|
|
||||||
w.buff.Reset()
|
|
||||||
return htmlLexToEndOfComment
|
|
||||||
}
|
|
||||||
|
|
||||||
return htmlLexElementStart
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Entry state func.
|
func (c *htmlElementsCollectorWriter) startCollecting() {
|
||||||
// Looks for a opening bracket, '<'.
|
c.isCollecting = true
|
||||||
func htmlLexStart(w *htmlElementsCollectorWriter) htmlCollectorStateFunc {
|
|
||||||
if w.r == '<' {
|
|
||||||
w.backup()
|
|
||||||
w.buff.Reset()
|
|
||||||
return htmlLexElementStart
|
|
||||||
}
|
|
||||||
|
|
||||||
return htmlLexStart
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// After "<!--", buff empty.
|
func (c *htmlElementsCollectorWriter) endCollecting() {
|
||||||
func htmlLexToEndOfComment(w *htmlElementsCollectorWriter) htmlCollectorStateFunc {
|
c.isCollecting = false
|
||||||
w.buff.WriteRune(w.r)
|
c.inQuote = false
|
||||||
|
}
|
||||||
|
|
||||||
if w.r == '>' && bytes.HasSuffix(w.buff.Bytes(), []byte("-->")) {
|
func (c *htmlElementsCollectorWriter) toggleIfQuote(b byte) {
|
||||||
// Done, start looking for HTML elements again.
|
if isQuote(b) {
|
||||||
return htmlLexStart
|
if c.inQuote && b == c.quoteValue {
|
||||||
|
c.inQuote = false
|
||||||
|
} else if !c.inQuote {
|
||||||
|
c.inQuote = true
|
||||||
|
c.quoteValue = b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func isQuote(b byte) bool {
|
||||||
|
return b == '"' || b == '\''
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseStartTag(s string) (string, bool) {
|
||||||
|
s = strings.TrimPrefix(s, "<")
|
||||||
|
s = strings.TrimSuffix(s, ">")
|
||||||
|
|
||||||
|
spaceIndex := strings.Index(s, " ")
|
||||||
|
if spaceIndex != -1 {
|
||||||
|
s = s[:spaceIndex]
|
||||||
}
|
}
|
||||||
|
|
||||||
return htmlLexToEndOfComment
|
return strings.ToLower(strings.TrimSpace(s)), true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func parseEndTag(s string) (string, bool) {
|
||||||
|
if !strings.HasPrefix(s, "</") {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
s = strings.TrimPrefix(s, "</")
|
||||||
|
s = strings.TrimSuffix(s, ">")
|
||||||
|
|
||||||
|
return strings.ToLower(strings.TrimSpace(s)), true
|
||||||
|
}
|
||||||
|
|
||||||
|
// No need to look inside these for HTML elements.
|
||||||
|
func isPreFormatted(s string) bool {
|
||||||
|
return s == "pre" || s == "textarea" || s == "script" || s == "style"
|
||||||
|
}
|
||||||
|
|
||||||
|
type htmlElement struct {
|
||||||
|
Tag string
|
||||||
|
Classes []string
|
||||||
|
IDs []string
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
htmlJsonFixer = strings.NewReplacer(", ", "\n")
|
||||||
|
jsonAttrRe = regexp.MustCompile(`'?(.*?)'?:.*`)
|
||||||
|
classAttrRe = regexp.MustCompile(`(?i)^class$|transition`)
|
||||||
|
|
||||||
|
exceptionList = map[string]bool{
|
||||||
|
"thead": true,
|
||||||
|
"tbody": true,
|
||||||
|
"tfoot": true,
|
||||||
|
"td": true,
|
||||||
|
"tr": true,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
func parseHTMLElement(elStr string) (el htmlElement, err error) {
|
func parseHTMLElement(elStr string) (el htmlElement, err error) {
|
||||||
|
var tagBuffer string = ""
|
||||||
|
|
||||||
tagName := parseStartTag(elStr)
|
tagName, ok := parseStartTag(elStr)
|
||||||
|
if !ok {
|
||||||
el.Tag = strings.ToLower(tagName)
|
return
|
||||||
tagNameToParse := el.Tag
|
}
|
||||||
|
|
||||||
// The net/html parser does not handle single table elements as input, e.g. tbody.
|
// The net/html parser does not handle single table elements as input, e.g. tbody.
|
||||||
// We only care about the element/class/ids, so just store away the original tag name
|
// We only care about the element/class/ids, so just store away the original tag name
|
||||||
// and pretend it's a <div>.
|
// and pretend it's a <div>.
|
||||||
if exceptionList[el.Tag] {
|
if exceptionList[tagName] {
|
||||||
|
tagBuffer = tagName
|
||||||
elStr = strings.Replace(elStr, tagName, "div", 1)
|
elStr = strings.Replace(elStr, tagName, "div", 1)
|
||||||
tagNameToParse = "div"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
n, err := html.Parse(strings.NewReader(elStr))
|
n, err := html.Parse(strings.NewReader(elStr))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var walk func(*html.Node)
|
var walk func(*html.Node)
|
||||||
walk = func(n *html.Node) {
|
walk = func(n *html.Node) {
|
||||||
if n.Type == html.ElementNode && n.Data == tagNameToParse {
|
if n.Type == html.ElementNode && strings.Contains(elStr, n.Data) {
|
||||||
|
el.Tag = n.Data
|
||||||
|
|
||||||
for _, a := range n.Attr {
|
for _, a := range n.Attr {
|
||||||
switch {
|
switch {
|
||||||
case strings.EqualFold(a.Key, "id"):
|
case strings.EqualFold(a.Key, "id"):
|
||||||
|
@ -416,20 +345,10 @@ func parseHTMLElement(elStr string) (el htmlElement, err error) {
|
||||||
|
|
||||||
walk(n)
|
walk(n)
|
||||||
|
|
||||||
return
|
// did we replaced the start tag?
|
||||||
}
|
if tagBuffer != "" {
|
||||||
|
el.Tag = tagBuffer
|
||||||
// Variants of s
|
|
||||||
// <body class="b a">
|
|
||||||
// <div>
|
|
||||||
func parseStartTag(s string) string {
|
|
||||||
spaceIndex := strings.IndexFunc(s, func(r rune) bool {
|
|
||||||
return unicode.IsSpace(r)
|
|
||||||
})
|
|
||||||
|
|
||||||
if spaceIndex == -1 {
|
|
||||||
return s[1 : len(s)-1]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return s[1:spaceIndex]
|
return
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,12 +15,8 @@ package publisher
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math/rand"
|
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/gohugoio/hugo/common/text"
|
|
||||||
|
|
||||||
"github.com/gohugoio/hugo/media"
|
"github.com/gohugoio/hugo/media"
|
||||||
"github.com/gohugoio/hugo/minifiers"
|
"github.com/gohugoio/hugo/minifiers"
|
||||||
|
@ -32,7 +28,6 @@ import (
|
||||||
|
|
||||||
func TestClassCollector(t *testing.T) {
|
func TestClassCollector(t *testing.T) {
|
||||||
c := qt.New((t))
|
c := qt.New((t))
|
||||||
rnd := rand.New(rand.NewSource(time.Now().Unix()))
|
|
||||||
|
|
||||||
f := func(tags, classes, ids string) HTMLElements {
|
f := func(tags, classes, ids string) HTMLElements {
|
||||||
var tagss, classess, idss []string
|
var tagss, classess, idss []string
|
||||||
|
@ -62,20 +57,14 @@ func TestClassCollector(t *testing.T) {
|
||||||
expect HTMLElements
|
expect HTMLElements
|
||||||
}{
|
}{
|
||||||
{"basic", `<body class="b a"></body>`, f("body", "a b", "")},
|
{"basic", `<body class="b a"></body>`, f("body", "a b", "")},
|
||||||
{"duplicates", `<div class="b a b"></div><div class="b a b"></div>x'`, f("div", "a b", "")},
|
{"duplicates", `<div class="b a b"></div>`, f("div", "a b", "")},
|
||||||
{"single quote", `<body class='b a'></body>`, f("body", "a b", "")},
|
{"single quote", `<body class='b a'></body>`, f("body", "a b", "")},
|
||||||
{"no quote", `<body class=b id=myelement></body>`, f("body", "b", "myelement")},
|
{"no quote", `<body class=b id=myelement></body>`, f("body", "b", "myelement")},
|
||||||
{"short", `<i>`, f("i", "", "")},
|
|
||||||
{"invalid", `< body class="b a"></body><div></div>`, f("div", "", "")},
|
|
||||||
// https://github.com/gohugoio/hugo/issues/7318
|
// https://github.com/gohugoio/hugo/issues/7318
|
||||||
{"thead", `<table class="cl1">
|
{"thead", `<table class="cl1">
|
||||||
<thead class="cl2"><tr class="cl3"><td class="cl4"></td></tr></thead>
|
<thead class="cl2"><tr class="cl3"><td class="cl4"></td></tr></thead>
|
||||||
<tbody class="cl5"><tr class="cl6"><td class="cl7"></td></tr></tbody>
|
<tbody class="cl5"><tr class="cl6"><td class="cl7"></td></tr></tbody>
|
||||||
</table>`, f("table tbody td thead tr", "cl1 cl2 cl3 cl4 cl5 cl6 cl7", "")},
|
</table>`, f("table tbody td thead tr", "cl1 cl2 cl3 cl4 cl5 cl6 cl7", "")},
|
||||||
{"thead uppercase", `<TABLE class="CL1">
|
|
||||||
<THEAD class="CL2"><TR class="CL3"><TD class="CL4"></TD></TR></THEAD>
|
|
||||||
<TBODY class="CL5"><TR class="CL6"><TD class="CL7"></TD></TR></TBODY>
|
|
||||||
</TABLE>`, f("table tbody td thead tr", "CL1 CL2 CL3 CL4 CL5 CL6 CL7", "")},
|
|
||||||
// https://github.com/gohugoio/hugo/issues/7161
|
// https://github.com/gohugoio/hugo/issues/7161
|
||||||
{"minified a href", `<a class="b a" href=/></a>`, f("a", "a b", "")},
|
{"minified a href", `<a class="b a" href=/></a>`, f("a", "a b", "")},
|
||||||
{"AlpineJS bind 1", `<body>
|
{"AlpineJS bind 1", `<body>
|
||||||
|
@ -109,11 +98,6 @@ func TestClassCollector(t *testing.T) {
|
||||||
{"Textarea tags content should be skipped", `<textarea class="textareaclass"><span>foo</span><span>bar</span></textarea><div class="foo"></div>`, f("div textarea", "foo textareaclass", "")},
|
{"Textarea tags content should be skipped", `<textarea class="textareaclass"><span>foo</span><span>bar</span></textarea><div class="foo"></div>`, f("div textarea", "foo textareaclass", "")},
|
||||||
{"DOCTYPE should beskipped", `<!DOCTYPE html>`, f("", "", "")},
|
{"DOCTYPE should beskipped", `<!DOCTYPE html>`, f("", "", "")},
|
||||||
{"Comments should be skipped", `<!-- example comment -->`, f("", "", "")},
|
{"Comments should be skipped", `<!-- example comment -->`, f("", "", "")},
|
||||||
{"Comments with elements before and after", `<div></div><!-- example comment --><span><span>`, f("div span", "", "")},
|
|
||||||
// Issue #8530
|
|
||||||
{"Comment with single quote", `<!-- Hero Area Image d'accueil --><i class="foo">`, f("i", "foo", "")},
|
|
||||||
{"Uppercase tags", `<DIV></DIV>`, f("div", "", "")},
|
|
||||||
{"Predefined tags with distinct casing", `<script>if (a < b) { nothing(); }</SCRIPT><div></div>`, f("div script", "", "")},
|
|
||||||
// Issue #8417
|
// Issue #8417
|
||||||
{"Tabs inline", `<hr id="a" class="foo"><div class="bar">d</div>`, f("div hr", "bar foo", "a")},
|
{"Tabs inline", `<hr id="a" class="foo"><div class="bar">d</div>`, f("div hr", "bar foo", "a")},
|
||||||
{"Tabs on multiple rows", `<form
|
{"Tabs on multiple rows", `<form
|
||||||
|
@ -124,29 +108,16 @@ func TestClassCollector(t *testing.T) {
|
||||||
<div id="b" class="foo">d</div>`, f("div form", "foo", "a b")},
|
<div id="b" class="foo">d</div>`, f("div form", "foo", "a b")},
|
||||||
} {
|
} {
|
||||||
|
|
||||||
for _, variant := range []struct {
|
for _, minify := range []bool{false, true} {
|
||||||
minify bool
|
c.Run(fmt.Sprintf("%s--minify-%t", test.name, minify), func(c *qt.C) {
|
||||||
stream bool
|
|
||||||
}{
|
|
||||||
{minify: false, stream: false},
|
|
||||||
{minify: true, stream: false},
|
|
||||||
{minify: false, stream: true},
|
|
||||||
} {
|
|
||||||
|
|
||||||
c.Run(fmt.Sprintf("%s--minify-%t--stream-%t", test.name, variant.minify, variant.stream), func(c *qt.C) {
|
|
||||||
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
|
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
|
||||||
if variant.minify {
|
if minify {
|
||||||
if skipMinifyTest[test.name] {
|
if skipMinifyTest[test.name] {
|
||||||
c.Skip("skip minify test")
|
c.Skip("skip minify test")
|
||||||
}
|
}
|
||||||
v := viper.New()
|
v := viper.New()
|
||||||
m, _ := minifiers.New(media.DefaultTypes, output.DefaultFormats, v)
|
m, _ := minifiers.New(media.DefaultTypes, output.DefaultFormats, v)
|
||||||
m.Minify(media.HTMLType, w, strings.NewReader(test.html))
|
m.Minify(media.HTMLType, w, strings.NewReader(test.html))
|
||||||
} else if variant.stream {
|
|
||||||
chunks := text.Chunk(test.html, rnd.Intn(41)+1)
|
|
||||||
for _, chunk := range chunks {
|
|
||||||
fmt.Fprint(w, chunk)
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
fmt.Fprint(w, test.html)
|
fmt.Fprint(w, test.html)
|
||||||
}
|
}
|
||||||
|
@ -155,7 +126,6 @@ func TestClassCollector(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkElementsCollectorWriter(b *testing.B) {
|
func BenchmarkElementsCollectorWriter(b *testing.B) {
|
||||||
|
|
Loading…
Reference in a new issue