mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-21 20:46:30 -05:00
parent
dc6b7a75ff
commit
f518b4f71e
2 changed files with 283 additions and 167 deletions
|
@ -19,12 +19,51 @@ import (
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
"unicode"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
|
|
||||||
"github.com/gohugoio/hugo/helpers"
|
"github.com/gohugoio/hugo/helpers"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const eof = -1
|
||||||
|
|
||||||
|
var (
|
||||||
|
htmlJsonFixer = strings.NewReplacer(", ", "\n")
|
||||||
|
jsonAttrRe = regexp.MustCompile(`'?(.*?)'?:.*`)
|
||||||
|
classAttrRe = regexp.MustCompile(`(?i)^class$|transition`)
|
||||||
|
|
||||||
|
skipInnerElementRe = regexp.MustCompile(`(?i)^(pre|textarea|script|style)`)
|
||||||
|
skipAllElementRe = regexp.MustCompile(`(?i)^!DOCTYPE`)
|
||||||
|
endTagRe = regexp.MustCompile(`(?i)<\/\s*([a-zA-Z]+)\s*>$`)
|
||||||
|
|
||||||
|
exceptionList = map[string]bool{
|
||||||
|
"thead": true,
|
||||||
|
"tbody": true,
|
||||||
|
"tfoot": true,
|
||||||
|
"td": true,
|
||||||
|
"tr": true,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func newHTMLElementsCollector() *htmlElementsCollector {
|
||||||
|
return &htmlElementsCollector{
|
||||||
|
elementSet: make(map[string]bool),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newHTMLElementsCollectorWriter(collector *htmlElementsCollector) *htmlElementsCollectorWriter {
|
||||||
|
w := &htmlElementsCollectorWriter{
|
||||||
|
collector: collector,
|
||||||
|
state: htmlLexStart,
|
||||||
|
}
|
||||||
|
|
||||||
|
w.defaultLexElementInside = w.lexElementInside(htmlLexStart)
|
||||||
|
|
||||||
|
return w
|
||||||
|
}
|
||||||
|
|
||||||
// HTMLElements holds lists of tags and attribute values for classes and id.
|
// HTMLElements holds lists of tags and attribute values for classes and id.
|
||||||
type HTMLElements struct {
|
type HTMLElements struct {
|
||||||
Tags []string `json:"tags"`
|
Tags []string `json:"tags"`
|
||||||
|
@ -48,6 +87,12 @@ func (h *HTMLElements) Sort() {
|
||||||
sort.Strings(h.IDs)
|
sort.Strings(h.IDs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type htmlElement struct {
|
||||||
|
Tag string
|
||||||
|
Classes []string
|
||||||
|
IDs []string
|
||||||
|
}
|
||||||
|
|
||||||
type htmlElementsCollector struct {
|
type htmlElementsCollector struct {
|
||||||
// Contains the raw HTML string. We will get the same element
|
// Contains the raw HTML string. We will get the same element
|
||||||
// several times, and want to avoid costly reparsing when this
|
// several times, and want to avoid costly reparsing when this
|
||||||
|
@ -59,12 +104,6 @@ type htmlElementsCollector struct {
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func newHTMLElementsCollector() *htmlElementsCollector {
|
|
||||||
return &htmlElementsCollector{
|
|
||||||
elementSet: make(map[string]bool),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *htmlElementsCollector) getHTMLElements() HTMLElements {
|
func (c *htmlElementsCollector) getHTMLElements() HTMLElements {
|
||||||
var (
|
var (
|
||||||
classes []string
|
classes []string
|
||||||
|
@ -93,114 +132,125 @@ func (c *htmlElementsCollector) getHTMLElements() HTMLElements {
|
||||||
|
|
||||||
type htmlElementsCollectorWriter struct {
|
type htmlElementsCollectorWriter struct {
|
||||||
collector *htmlElementsCollector
|
collector *htmlElementsCollector
|
||||||
buff bytes.Buffer
|
|
||||||
|
|
||||||
isCollecting bool
|
r rune // Current rune
|
||||||
inPreTag string
|
width int // The width in bytes of r
|
||||||
|
input []byte // The current slice written to Write
|
||||||
|
pos int // The current position in input
|
||||||
|
|
||||||
inQuote bool
|
err error
|
||||||
quoteValue byte
|
|
||||||
|
inQuote rune
|
||||||
|
|
||||||
|
buff bytes.Buffer
|
||||||
|
|
||||||
|
// Current state
|
||||||
|
state htmlCollectorStateFunc
|
||||||
|
|
||||||
|
// Precompiled state funcs
|
||||||
|
defaultLexElementInside htmlCollectorStateFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
func newHTMLElementsCollectorWriter(collector *htmlElementsCollector) *htmlElementsCollectorWriter {
|
// Write collects HTML elements from p, which must contain complete runes.
|
||||||
return &htmlElementsCollectorWriter{
|
func (w *htmlElementsCollectorWriter) Write(p []byte) (int, error) {
|
||||||
collector: collector,
|
if p == nil {
|
||||||
|
return 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
w.input = p
|
||||||
|
|
||||||
|
for {
|
||||||
|
w.r = w.next()
|
||||||
|
if w.r == eof || w.r == utf8.RuneError {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
w.state = w.state(w)
|
||||||
|
}
|
||||||
|
|
||||||
|
w.pos = 0
|
||||||
|
w.input = nil
|
||||||
|
|
||||||
|
return len(p), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write splits the incoming stream into single html element.
|
func (l *htmlElementsCollectorWriter) backup() {
|
||||||
func (w *htmlElementsCollectorWriter) Write(p []byte) (n int, err error) {
|
l.pos -= l.width
|
||||||
n = len(p)
|
l.r, _ = utf8.DecodeRune(l.input[l.pos:])
|
||||||
i := 0
|
}
|
||||||
|
|
||||||
for i < len(p) {
|
func (w *htmlElementsCollectorWriter) consumeBuffUntil(condition func() bool, resolve htmlCollectorStateFunc) htmlCollectorStateFunc {
|
||||||
// If we are not collecting, cycle through byte stream until start bracket "<" is found.
|
var s htmlCollectorStateFunc
|
||||||
if !w.isCollecting {
|
s = func(*htmlElementsCollectorWriter) htmlCollectorStateFunc {
|
||||||
for ; i < len(p); i++ {
|
w.buff.WriteRune(w.r)
|
||||||
b := p[i]
|
if condition() {
|
||||||
if b == '<' {
|
w.buff.Reset()
|
||||||
w.startCollecting()
|
return resolve
|
||||||
break
|
}
|
||||||
}
|
return s
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *htmlElementsCollectorWriter) consumeRuneUntil(condition func(r rune) bool, resolve htmlCollectorStateFunc) htmlCollectorStateFunc {
|
||||||
|
var s htmlCollectorStateFunc
|
||||||
|
s = func(*htmlElementsCollectorWriter) htmlCollectorStateFunc {
|
||||||
|
if condition(w.r) {
|
||||||
|
return resolve
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
// Starts with e.g. "<body " or "<div"
|
||||||
|
func (w *htmlElementsCollectorWriter) lexElementInside(resolve htmlCollectorStateFunc) htmlCollectorStateFunc {
|
||||||
|
var s htmlCollectorStateFunc
|
||||||
|
s = func(w *htmlElementsCollectorWriter) htmlCollectorStateFunc {
|
||||||
|
w.buff.WriteRune(w.r)
|
||||||
|
|
||||||
|
// Skip any text inside a quote.
|
||||||
|
if w.r == '\'' || w.r == '"' {
|
||||||
|
if w.inQuote == w.r {
|
||||||
|
w.inQuote = 0
|
||||||
|
} else if w.inQuote == 0 {
|
||||||
|
w.inQuote = w.r
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if w.isCollecting {
|
if w.inQuote != 0 {
|
||||||
// If we are collecting, cycle through byte stream until end bracket ">" is found,
|
return s
|
||||||
// disregard any ">" if within a quote,
|
|
||||||
// write bytes until found to buffer.
|
|
||||||
for ; i < len(p); i++ {
|
|
||||||
b := p[i]
|
|
||||||
w.toggleIfQuote(b)
|
|
||||||
w.buff.WriteByte(b)
|
|
||||||
|
|
||||||
if !w.inQuote && b == '>' {
|
|
||||||
w.endCollecting()
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If no end bracket ">" is found while collecting, but the stream ended
|
if w.r == '>' {
|
||||||
// this could mean we received chunks of a stream from e.g. the minify functionality
|
|
||||||
// next if loop will be skipped.
|
|
||||||
|
|
||||||
// At this point we have collected an element line between angle brackets "<" and ">".
|
|
||||||
if !w.isCollecting {
|
|
||||||
if w.buff.Len() == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if w.inPreTag != "" { // within preformatted code block
|
|
||||||
s := w.buff.String()
|
|
||||||
w.buff.Reset()
|
|
||||||
if tagName, isEnd := parseEndTag(s); isEnd && w.inPreTag == tagName {
|
|
||||||
w.inPreTag = ""
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// First check if we have processed this element before.
|
|
||||||
w.collector.mu.RLock()
|
|
||||||
|
|
||||||
// Work with the bytes slice as long as it's practical,
|
// Work with the bytes slice as long as it's practical,
|
||||||
// to save memory allocations.
|
// to save memory allocations.
|
||||||
b := w.buff.Bytes()
|
b := w.buff.Bytes()
|
||||||
|
|
||||||
// See https://github.com/dominikh/go-tools/issues/723
|
defer func() {
|
||||||
//lint:ignore S1030 This construct avoids memory allocation for the string.
|
w.buff.Reset()
|
||||||
|
}()
|
||||||
|
|
||||||
|
// First check if we have processed this element before.
|
||||||
|
w.collector.mu.RLock()
|
||||||
|
|
||||||
seen := w.collector.elementSet[string(b)]
|
seen := w.collector.elementSet[string(b)]
|
||||||
w.collector.mu.RUnlock()
|
w.collector.mu.RUnlock()
|
||||||
if seen {
|
if seen {
|
||||||
w.buff.Reset()
|
return resolve
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Filter out unwanted tags
|
|
||||||
// if within preformatted code blocks <pre>, <textarea>, <script>, <style>
|
|
||||||
// comments and doctype tags
|
|
||||||
// end tags.
|
|
||||||
switch {
|
|
||||||
case bytes.HasPrefix(b, []byte("<!")): // comment or doctype tag
|
|
||||||
w.buff.Reset()
|
|
||||||
continue
|
|
||||||
case bytes.HasPrefix(b, []byte("</")): // end tag
|
|
||||||
w.buff.Reset()
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
s := w.buff.String()
|
s := w.buff.String()
|
||||||
w.buff.Reset()
|
|
||||||
|
|
||||||
// Check if a preformatted code block started.
|
if s == "" {
|
||||||
if tagName, isStart := parseStartTag(s); isStart && isPreFormatted(tagName) {
|
return resolve
|
||||||
w.inPreTag = tagName
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse each collected element.
|
// Parse each collected element.
|
||||||
el, err := parseHTMLElement(s)
|
el, err := parseHTMLElement(s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return n, err
|
w.err = err
|
||||||
|
return resolve
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write this tag to the element set.
|
// Write this tag to the element set.
|
||||||
|
@ -208,109 +258,138 @@ func (w *htmlElementsCollectorWriter) Write(p []byte) (n int, err error) {
|
||||||
w.collector.elementSet[s] = true
|
w.collector.elementSet[s] = true
|
||||||
w.collector.elements = append(w.collector.elements, el)
|
w.collector.elements = append(w.collector.elements, el)
|
||||||
w.collector.mu.Unlock()
|
w.collector.mu.Unlock()
|
||||||
|
|
||||||
|
return resolve
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *htmlElementsCollectorWriter) next() rune {
|
||||||
|
if l.pos >= len(l.input) {
|
||||||
|
l.width = 0
|
||||||
|
return eof
|
||||||
|
}
|
||||||
|
|
||||||
|
runeValue, runeWidth := utf8.DecodeRune(l.input[l.pos:])
|
||||||
|
|
||||||
|
l.width = runeWidth
|
||||||
|
l.pos += l.width
|
||||||
|
return runeValue
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns the next state in HTML element scanner.
|
||||||
|
type htmlCollectorStateFunc func(*htmlElementsCollectorWriter) htmlCollectorStateFunc
|
||||||
|
|
||||||
|
// At "<", buffer empty.
|
||||||
|
// Potentially starting a HTML element.
|
||||||
|
func htmlLexElementStart(w *htmlElementsCollectorWriter) htmlCollectorStateFunc {
|
||||||
|
if w.r == '>' || unicode.IsSpace(w.r) {
|
||||||
|
if w.buff.Len() < 2 || bytes.HasPrefix(w.buff.Bytes(), []byte("</")) {
|
||||||
|
w.buff.Reset()
|
||||||
|
return htmlLexStart
|
||||||
|
}
|
||||||
|
|
||||||
|
tagName := w.buff.Bytes()[1:]
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case skipInnerElementRe.Match(tagName):
|
||||||
|
// pre, script etc. We collect classes etc. on the surrounding
|
||||||
|
// element, but skip the inner content.
|
||||||
|
w.backup()
|
||||||
|
|
||||||
|
// tagName will be overwritten, so make a copy.
|
||||||
|
tagNameCopy := make([]byte, len(tagName))
|
||||||
|
copy(tagNameCopy, tagName)
|
||||||
|
|
||||||
|
return w.lexElementInside(
|
||||||
|
w.consumeBuffUntil(
|
||||||
|
func() bool {
|
||||||
|
if w.r != '>' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
m := endTagRe.FindSubmatch(w.buff.Bytes())
|
||||||
|
if m == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return bytes.EqualFold(m[1], tagNameCopy)
|
||||||
|
},
|
||||||
|
htmlLexStart,
|
||||||
|
))
|
||||||
|
case skipAllElementRe.Match(tagName):
|
||||||
|
// E.g. "<!DOCTYPE ..."
|
||||||
|
w.buff.Reset()
|
||||||
|
return w.consumeRuneUntil(func(r rune) bool {
|
||||||
|
return r == '>'
|
||||||
|
}, htmlLexStart)
|
||||||
|
default:
|
||||||
|
w.backup()
|
||||||
|
return w.defaultLexElementInside
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
w.buff.WriteRune(w.r)
|
||||||
}
|
|
||||||
|
|
||||||
func (c *htmlElementsCollectorWriter) startCollecting() {
|
// If it's a comment, skip to its end.
|
||||||
c.isCollecting = true
|
if w.r == '-' && bytes.Equal(w.buff.Bytes(), []byte("<!--")) {
|
||||||
}
|
w.buff.Reset()
|
||||||
|
return htmlLexToEndOfComment
|
||||||
func (c *htmlElementsCollectorWriter) endCollecting() {
|
|
||||||
c.isCollecting = false
|
|
||||||
c.inQuote = false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *htmlElementsCollectorWriter) toggleIfQuote(b byte) {
|
|
||||||
if isQuote(b) {
|
|
||||||
if c.inQuote && b == c.quoteValue {
|
|
||||||
c.inQuote = false
|
|
||||||
} else if !c.inQuote {
|
|
||||||
c.inQuote = true
|
|
||||||
c.quoteValue = b
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func isQuote(b byte) bool {
|
|
||||||
return b == '"' || b == '\''
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseStartTag(s string) (string, bool) {
|
|
||||||
s = strings.TrimPrefix(s, "<")
|
|
||||||
s = strings.TrimSuffix(s, ">")
|
|
||||||
|
|
||||||
spaceIndex := strings.Index(s, " ")
|
|
||||||
if spaceIndex != -1 {
|
|
||||||
s = s[:spaceIndex]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return strings.ToLower(strings.TrimSpace(s)), true
|
return htmlLexElementStart
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseEndTag(s string) (string, bool) {
|
// Entry state func.
|
||||||
if !strings.HasPrefix(s, "</") {
|
// Looks for a opening bracket, '<'.
|
||||||
return "", false
|
func htmlLexStart(w *htmlElementsCollectorWriter) htmlCollectorStateFunc {
|
||||||
|
if w.r == '<' {
|
||||||
|
w.backup()
|
||||||
|
w.buff.Reset()
|
||||||
|
return htmlLexElementStart
|
||||||
}
|
}
|
||||||
|
|
||||||
s = strings.TrimPrefix(s, "</")
|
return htmlLexStart
|
||||||
s = strings.TrimSuffix(s, ">")
|
|
||||||
|
|
||||||
return strings.ToLower(strings.TrimSpace(s)), true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// No need to look inside these for HTML elements.
|
// After "<!--", buff empty.
|
||||||
func isPreFormatted(s string) bool {
|
func htmlLexToEndOfComment(w *htmlElementsCollectorWriter) htmlCollectorStateFunc {
|
||||||
return s == "pre" || s == "textarea" || s == "script" || s == "style"
|
w.buff.WriteRune(w.r)
|
||||||
}
|
|
||||||
|
|
||||||
type htmlElement struct {
|
if w.r == '>' && bytes.HasSuffix(w.buff.Bytes(), []byte("-->")) {
|
||||||
Tag string
|
// Done, start looking for HTML elements again.
|
||||||
Classes []string
|
return htmlLexStart
|
||||||
IDs []string
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
htmlJsonFixer = strings.NewReplacer(", ", "\n")
|
|
||||||
jsonAttrRe = regexp.MustCompile(`'?(.*?)'?:.*`)
|
|
||||||
classAttrRe = regexp.MustCompile(`(?i)^class$|transition`)
|
|
||||||
|
|
||||||
exceptionList = map[string]bool{
|
|
||||||
"thead": true,
|
|
||||||
"tbody": true,
|
|
||||||
"tfoot": true,
|
|
||||||
"td": true,
|
|
||||||
"tr": true,
|
|
||||||
}
|
}
|
||||||
)
|
|
||||||
|
return htmlLexToEndOfComment
|
||||||
|
}
|
||||||
|
|
||||||
func parseHTMLElement(elStr string) (el htmlElement, err error) {
|
func parseHTMLElement(elStr string) (el htmlElement, err error) {
|
||||||
var tagBuffer string = ""
|
|
||||||
|
|
||||||
tagName, ok := parseStartTag(elStr)
|
tagName := parseStartTag(elStr)
|
||||||
if !ok {
|
|
||||||
return
|
el.Tag = strings.ToLower(tagName)
|
||||||
}
|
tagNameToParse := el.Tag
|
||||||
|
|
||||||
// The net/html parser does not handle single table elements as input, e.g. tbody.
|
// The net/html parser does not handle single table elements as input, e.g. tbody.
|
||||||
// We only care about the element/class/ids, so just store away the original tag name
|
// We only care about the element/class/ids, so just store away the original tag name
|
||||||
// and pretend it's a <div>.
|
// and pretend it's a <div>.
|
||||||
if exceptionList[tagName] {
|
if exceptionList[el.Tag] {
|
||||||
tagBuffer = tagName
|
|
||||||
elStr = strings.Replace(elStr, tagName, "div", 1)
|
elStr = strings.Replace(elStr, tagName, "div", 1)
|
||||||
|
tagNameToParse = "div"
|
||||||
}
|
}
|
||||||
|
|
||||||
n, err := html.Parse(strings.NewReader(elStr))
|
n, err := html.Parse(strings.NewReader(elStr))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var walk func(*html.Node)
|
var walk func(*html.Node)
|
||||||
walk = func(n *html.Node) {
|
walk = func(n *html.Node) {
|
||||||
if n.Type == html.ElementNode && strings.Contains(elStr, n.Data) {
|
if n.Type == html.ElementNode && n.Data == tagNameToParse {
|
||||||
el.Tag = n.Data
|
|
||||||
|
|
||||||
for _, a := range n.Attr {
|
for _, a := range n.Attr {
|
||||||
switch {
|
switch {
|
||||||
case strings.EqualFold(a.Key, "id"):
|
case strings.EqualFold(a.Key, "id"):
|
||||||
|
@ -345,10 +424,20 @@ func parseHTMLElement(elStr string) (el htmlElement, err error) {
|
||||||
|
|
||||||
walk(n)
|
walk(n)
|
||||||
|
|
||||||
// did we replaced the start tag?
|
|
||||||
if tagBuffer != "" {
|
|
||||||
el.Tag = tagBuffer
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Variants of s
|
||||||
|
// <body class="b a">
|
||||||
|
// <div>
|
||||||
|
func parseStartTag(s string) string {
|
||||||
|
spaceIndex := strings.IndexFunc(s, func(r rune) bool {
|
||||||
|
return unicode.IsSpace(r)
|
||||||
|
})
|
||||||
|
|
||||||
|
if spaceIndex == -1 {
|
||||||
|
return s[1 : len(s)-1]
|
||||||
|
}
|
||||||
|
|
||||||
|
return s[1:spaceIndex]
|
||||||
|
}
|
||||||
|
|
|
@ -14,9 +14,13 @@
|
||||||
package publisher
|
package publisher
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"math/rand"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/gohugoio/hugo/media"
|
"github.com/gohugoio/hugo/media"
|
||||||
"github.com/gohugoio/hugo/minifiers"
|
"github.com/gohugoio/hugo/minifiers"
|
||||||
|
@ -28,6 +32,7 @@ import (
|
||||||
|
|
||||||
func TestClassCollector(t *testing.T) {
|
func TestClassCollector(t *testing.T) {
|
||||||
c := qt.New((t))
|
c := qt.New((t))
|
||||||
|
rnd := rand.New(rand.NewSource(time.Now().Unix()))
|
||||||
|
|
||||||
f := func(tags, classes, ids string) HTMLElements {
|
f := func(tags, classes, ids string) HTMLElements {
|
||||||
var tagss, classess, idss []string
|
var tagss, classess, idss []string
|
||||||
|
@ -57,14 +62,20 @@ func TestClassCollector(t *testing.T) {
|
||||||
expect HTMLElements
|
expect HTMLElements
|
||||||
}{
|
}{
|
||||||
{"basic", `<body class="b a"></body>`, f("body", "a b", "")},
|
{"basic", `<body class="b a"></body>`, f("body", "a b", "")},
|
||||||
{"duplicates", `<div class="b a b"></div>`, f("div", "a b", "")},
|
{"duplicates", `<div class="b a b"></div><div class="b a b"></div>x'`, f("div", "a b", "")},
|
||||||
{"single quote", `<body class='b a'></body>`, f("body", "a b", "")},
|
{"single quote", `<body class='b a'></body>`, f("body", "a b", "")},
|
||||||
{"no quote", `<body class=b id=myelement></body>`, f("body", "b", "myelement")},
|
{"no quote", `<body class=b id=myelement></body>`, f("body", "b", "myelement")},
|
||||||
|
{"short", `<i>`, f("i", "", "")},
|
||||||
|
{"invalid", `< body class="b a"></body><div></div>`, f("div", "", "")},
|
||||||
// https://github.com/gohugoio/hugo/issues/7318
|
// https://github.com/gohugoio/hugo/issues/7318
|
||||||
{"thead", `<table class="cl1">
|
{"thead", `<table class="cl1">
|
||||||
<thead class="cl2"><tr class="cl3"><td class="cl4"></td></tr></thead>
|
<thead class="cl2"><tr class="cl3"><td class="cl4"></td></tr></thead>
|
||||||
<tbody class="cl5"><tr class="cl6"><td class="cl7"></td></tr></tbody>
|
<tbody class="cl5"><tr class="cl6"><td class="cl7"></td></tr></tbody>
|
||||||
</table>`, f("table tbody td thead tr", "cl1 cl2 cl3 cl4 cl5 cl6 cl7", "")},
|
</table>`, f("table tbody td thead tr", "cl1 cl2 cl3 cl4 cl5 cl6 cl7", "")},
|
||||||
|
{"thead uppercase", `<TABLE class="CL1">
|
||||||
|
<THEAD class="CL2"><TR class="CL3"><TD class="CL4"></TD></TR></THEAD>
|
||||||
|
<TBODY class="CL5"><TR class="CL6"><TD class="CL7"></TD></TR></TBODY>
|
||||||
|
</TABLE>`, f("table tbody td thead tr", "CL1 CL2 CL3 CL4 CL5 CL6 CL7", "")},
|
||||||
// https://github.com/gohugoio/hugo/issues/7161
|
// https://github.com/gohugoio/hugo/issues/7161
|
||||||
{"minified a href", `<a class="b a" href=/></a>`, f("a", "a b", "")},
|
{"minified a href", `<a class="b a" href=/></a>`, f("a", "a b", "")},
|
||||||
{"AlpineJS bind 1", `<body>
|
{"AlpineJS bind 1", `<body>
|
||||||
|
@ -98,6 +109,11 @@ func TestClassCollector(t *testing.T) {
|
||||||
{"Textarea tags content should be skipped", `<textarea class="textareaclass"><span>foo</span><span>bar</span></textarea><div class="foo"></div>`, f("div textarea", "foo textareaclass", "")},
|
{"Textarea tags content should be skipped", `<textarea class="textareaclass"><span>foo</span><span>bar</span></textarea><div class="foo"></div>`, f("div textarea", "foo textareaclass", "")},
|
||||||
{"DOCTYPE should beskipped", `<!DOCTYPE html>`, f("", "", "")},
|
{"DOCTYPE should beskipped", `<!DOCTYPE html>`, f("", "", "")},
|
||||||
{"Comments should be skipped", `<!-- example comment -->`, f("", "", "")},
|
{"Comments should be skipped", `<!-- example comment -->`, f("", "", "")},
|
||||||
|
{"Comments with elements before and after", `<div></div><!-- example comment --><span><span>`, f("div span", "", "")},
|
||||||
|
// Issue #8530
|
||||||
|
{"Comment with single quote", `<!-- Hero Area Image d'accueil --><i class="foo">`, f("i", "foo", "")},
|
||||||
|
{"Uppercase tags", `<DIV></DIV>`, f("div", "", "")},
|
||||||
|
{"Predefined tags with distinct casing", `<script>if (a < b) { nothing(); }</SCRIPT><div></div>`, f("div script", "", "")},
|
||||||
// Issue #8417
|
// Issue #8417
|
||||||
{"Tabs inline", `<hr id="a" class="foo"><div class="bar">d</div>`, f("div hr", "bar foo", "a")},
|
{"Tabs inline", `<hr id="a" class="foo"><div class="bar">d</div>`, f("div hr", "bar foo", "a")},
|
||||||
{"Tabs on multiple rows", `<form
|
{"Tabs on multiple rows", `<form
|
||||||
|
@ -106,26 +122,37 @@ func TestClassCollector(t *testing.T) {
|
||||||
method="post"
|
method="post"
|
||||||
></form>
|
></form>
|
||||||
<div id="b" class="foo">d</div>`, f("div form", "foo", "a b")},
|
<div id="b" class="foo">d</div>`, f("div form", "foo", "a b")},
|
||||||
|
{"Big input, multibyte runes", strings.Repeat(`神真美好 `, rnd.Intn(500)+1) + "<div id=\"神真美好\" class=\"foo\">" + strings.Repeat(`神真美好 `, rnd.Intn(100)+1) + " <span>神真美好</span>", f("div span", "foo", "神真美好")},
|
||||||
} {
|
} {
|
||||||
|
|
||||||
for _, minify := range []bool{false, true} {
|
for _, variant := range []struct {
|
||||||
c.Run(fmt.Sprintf("%s--minify-%t", test.name, minify), func(c *qt.C) {
|
minify bool
|
||||||
|
}{
|
||||||
|
{minify: false},
|
||||||
|
{minify: true},
|
||||||
|
} {
|
||||||
|
|
||||||
|
c.Run(fmt.Sprintf("%s--minify-%t", test.name, variant.minify), func(c *qt.C) {
|
||||||
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
|
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
|
||||||
if minify {
|
if variant.minify {
|
||||||
if skipMinifyTest[test.name] {
|
if skipMinifyTest[test.name] {
|
||||||
c.Skip("skip minify test")
|
c.Skip("skip minify test")
|
||||||
}
|
}
|
||||||
v := viper.New()
|
v := viper.New()
|
||||||
m, _ := minifiers.New(media.DefaultTypes, output.DefaultFormats, v)
|
m, _ := minifiers.New(media.DefaultTypes, output.DefaultFormats, v)
|
||||||
m.Minify(media.HTMLType, w, strings.NewReader(test.html))
|
m.Minify(media.HTMLType, w, strings.NewReader(test.html))
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
fmt.Fprint(w, test.html)
|
var buff bytes.Buffer
|
||||||
|
buff.WriteString(test.html)
|
||||||
|
io.Copy(w, &buff)
|
||||||
}
|
}
|
||||||
got := w.collector.getHTMLElements()
|
got := w.collector.getHTMLElements()
|
||||||
c.Assert(got, qt.DeepEquals, test.expect)
|
c.Assert(got, qt.DeepEquals, test.expect)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkElementsCollectorWriter(b *testing.B) {
|
func BenchmarkElementsCollectorWriter(b *testing.B) {
|
||||||
|
|
Loading…
Reference in a new issue