mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-07 20:30:36 -05:00
publisher: Some performance tweaks for the HTML elements collector
This commit is contained in:
parent
bc80022e03
commit
ef34dd8f0e
2 changed files with 50 additions and 102 deletions
|
@ -108,13 +108,13 @@ func newHTMLElementsCollectorWriter(collector *htmlElementsCollector) *htmlEleme
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write splits the incoming stream into single html element and writes these into elementSet
|
// Write splits the incoming stream into single html element.
|
||||||
func (w *htmlElementsCollectorWriter) Write(p []byte) (n int, err error) {
|
func (w *htmlElementsCollectorWriter) Write(p []byte) (n int, err error) {
|
||||||
n = len(p)
|
n = len(p)
|
||||||
i := 0
|
i := 0
|
||||||
|
|
||||||
for i < len(p) {
|
for i < len(p) {
|
||||||
// if is not collecting, cycle through byte stream until start bracket "<" is found
|
// If we are not collecting, cycle through byte stream until start bracket "<" is found.
|
||||||
if !w.isCollecting {
|
if !w.isCollecting {
|
||||||
for ; i < len(p); i++ {
|
for ; i < len(p); i++ {
|
||||||
b := p[i]
|
b := p[i]
|
||||||
|
@ -126,9 +126,9 @@ func (w *htmlElementsCollectorWriter) Write(p []byte) (n int, err error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if w.isCollecting {
|
if w.isCollecting {
|
||||||
// if is collecting, cycle through byte stream until end bracket ">" is found
|
// If we are collecting, cycle through byte stream until end bracket ">" is found,
|
||||||
// disregard any ">" if within a quote
|
// disregard any ">" if within a quote,
|
||||||
// write bytes until found to buffer
|
// write bytes until found to buffer.
|
||||||
for ; i < len(p); i++ {
|
for ; i < len(p); i++ {
|
||||||
b := p[i]
|
b := p[i]
|
||||||
w.toggleIfQuote(b)
|
w.toggleIfQuote(b)
|
||||||
|
@ -141,54 +141,69 @@ func (w *htmlElementsCollectorWriter) Write(p []byte) (n int, err error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if no end bracket ">" is found while collecting, but the stream ended
|
// If no end bracket ">" is found while collecting, but the stream ended
|
||||||
// this could mean we received chunks of a stream from e.g. the minify functionality
|
// this could mean we received chunks of a stream from e.g. the minify functionality
|
||||||
// next if loop will be skipped
|
// next if loop will be skipped.
|
||||||
|
|
||||||
// at this point we have collected an element line between angle brackets "<" and ">"
|
// At this point we have collected an element line between angle brackets "<" and ">".
|
||||||
if !w.isCollecting {
|
if !w.isCollecting {
|
||||||
s := w.buff.String()
|
if w.buff.Len() == 0 {
|
||||||
w.buff.Reset()
|
|
||||||
|
|
||||||
// filter out unwanted tags
|
|
||||||
// empty string, just in case
|
|
||||||
// if within preformatted code blocks <pre>, <textarea>, <script>, <style>
|
|
||||||
// comments and doctype tags
|
|
||||||
// end tags
|
|
||||||
switch {
|
|
||||||
case s == "": // empty string
|
|
||||||
continue
|
continue
|
||||||
case w.inPreTag != "": // within preformatted code block
|
}
|
||||||
|
|
||||||
|
if w.inPreTag != "" { // within preformatted code block
|
||||||
|
s := w.buff.String()
|
||||||
|
w.buff.Reset()
|
||||||
if tagName, isEnd := parseEndTag(s); isEnd && w.inPreTag == tagName {
|
if tagName, isEnd := parseEndTag(s); isEnd && w.inPreTag == tagName {
|
||||||
w.inPreTag = ""
|
w.inPreTag = ""
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
case strings.HasPrefix(s, "<!"): // comment or doctype tag
|
|
||||||
continue
|
|
||||||
case strings.HasPrefix(s, "</"): // end tag
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we have processed this element before.
|
// First check if we have processed this element before.
|
||||||
w.collector.mu.RLock()
|
w.collector.mu.RLock()
|
||||||
seen := w.collector.elementSet[s]
|
|
||||||
|
// Work with the bytes slice as long as it's practical,
|
||||||
|
// to save memory allocations.
|
||||||
|
b := w.buff.Bytes()
|
||||||
|
|
||||||
|
// See https://github.com/dominikh/go-tools/issues/723
|
||||||
|
//lint:ignore S1030 This construct avoids memory allocation for the string.
|
||||||
|
seen := w.collector.elementSet[string(b)]
|
||||||
w.collector.mu.RUnlock()
|
w.collector.mu.RUnlock()
|
||||||
if seen {
|
if seen {
|
||||||
|
w.buff.Reset()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if a preformatted code block started
|
// Filter out unwanted tags
|
||||||
|
// if within preformatted code blocks <pre>, <textarea>, <script>, <style>
|
||||||
|
// comments and doctype tags
|
||||||
|
// end tags.
|
||||||
|
switch {
|
||||||
|
case bytes.HasPrefix(b, []byte("<!")): // comment or doctype tag
|
||||||
|
w.buff.Reset()
|
||||||
|
continue
|
||||||
|
case bytes.HasPrefix(b, []byte("</")): // end tag
|
||||||
|
w.buff.Reset()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
s := w.buff.String()
|
||||||
|
w.buff.Reset()
|
||||||
|
|
||||||
|
// Check if a preformatted code block started.
|
||||||
if tagName, isStart := parseStartTag(s); isStart && isPreFormatted(tagName) {
|
if tagName, isStart := parseStartTag(s); isStart && isPreFormatted(tagName) {
|
||||||
w.inPreTag = tagName
|
w.inPreTag = tagName
|
||||||
}
|
}
|
||||||
|
|
||||||
// parse each collected element
|
// Parse each collected element.
|
||||||
el, err := parseHTMLElement(s)
|
el, err := parseHTMLElement(s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return n, err
|
return n, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// write this tag to the element set
|
// Write this tag to the element set.
|
||||||
w.collector.mu.Lock()
|
w.collector.mu.Lock()
|
||||||
w.collector.elementSet[s] = true
|
w.collector.elementSet[s] = true
|
||||||
w.collector.elements = append(w.collector.elements, el)
|
w.collector.elements = append(w.collector.elements, el)
|
||||||
|
@ -265,17 +280,18 @@ var (
|
||||||
htmlJsonFixer = strings.NewReplacer(", ", "\n")
|
htmlJsonFixer = strings.NewReplacer(", ", "\n")
|
||||||
jsonAttrRe = regexp.MustCompile(`'?(.*?)'?:.*`)
|
jsonAttrRe = regexp.MustCompile(`'?(.*?)'?:.*`)
|
||||||
classAttrRe = regexp.MustCompile(`(?i)^class$|transition`)
|
classAttrRe = regexp.MustCompile(`(?i)^class$|transition`)
|
||||||
)
|
|
||||||
|
|
||||||
func parseHTMLElement(elStr string) (el htmlElement, err error) {
|
exceptionList = map[string]bool{
|
||||||
var tagBuffer string = ""
|
|
||||||
exceptionList := map[string]bool{
|
|
||||||
"thead": true,
|
"thead": true,
|
||||||
"tbody": true,
|
"tbody": true,
|
||||||
"tfoot": true,
|
"tfoot": true,
|
||||||
"td": true,
|
"td": true,
|
||||||
"tr": true,
|
"tr": true,
|
||||||
}
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func parseHTMLElement(elStr string) (el htmlElement, err error) {
|
||||||
|
var tagBuffer string = ""
|
||||||
|
|
||||||
tagName, ok := parseStartTag(elStr)
|
tagName, ok := parseStartTag(elStr)
|
||||||
if !ok {
|
if !ok {
|
||||||
|
|
|
@ -14,7 +14,6 @@
|
||||||
package publisher
|
package publisher
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
@ -129,33 +128,8 @@ func TestClassCollector(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkClassCollectorWriter(b *testing.B) {
|
func BenchmarkElementsCollectorWriter(b *testing.B) {
|
||||||
const benchHTML = `
|
const benchHTML = `
|
||||||
<html>
|
|
||||||
<body id="i1" class="a b c d">
|
|
||||||
<a class="c d e"></a>
|
|
||||||
<br>
|
|
||||||
<a class="c d e"></a>
|
|
||||||
<a class="c d e"></a>
|
|
||||||
<br>
|
|
||||||
<a id="i2" class="c d e f"></a>
|
|
||||||
<a id="i3" class="c d e"></a>
|
|
||||||
<a class="c d e"></a>
|
|
||||||
<br>
|
|
||||||
<a class="c d e"></a>
|
|
||||||
<a class="c d e"></a>
|
|
||||||
<a class="c d e"></a>
|
|
||||||
<a class="c d e"></a>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
`
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
|
|
||||||
fmt.Fprint(w, benchHTML)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const benchHTML = `
|
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
|
@ -207,51 +181,9 @@ const benchHTML = `
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
`
|
`
|
||||||
|
|
||||||
func BenchmarkElementsCollectorWriter(b *testing.B) {
|
|
||||||
b.ReportAllocs()
|
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
|
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
|
||||||
fmt.Fprint(w, benchHTML)
|
fmt.Fprint(w, benchHTML)
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkElementsCollectorWriterMinified(b *testing.B) {
|
|
||||||
b.ReportAllocs()
|
|
||||||
v := viper.New()
|
|
||||||
m, _ := minifiers.New(media.DefaultTypes, output.DefaultFormats, v)
|
|
||||||
var buf bytes.Buffer
|
|
||||||
m.Minify(media.HTMLType, &buf, strings.NewReader(benchHTML))
|
|
||||||
b.ResetTimer()
|
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
|
|
||||||
fmt.Fprint(w, buf.String())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkElementsCollectorWriterWithMinifyStream(b *testing.B) {
|
|
||||||
b.ReportAllocs()
|
|
||||||
v := viper.New()
|
|
||||||
m, _ := minifiers.New(media.DefaultTypes, output.DefaultFormats, v)
|
|
||||||
b.ResetTimer()
|
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
|
|
||||||
m.Minify(media.HTMLType, w, strings.NewReader(benchHTML))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkElementsCollectorWriterWithMinifyString(b *testing.B) {
|
|
||||||
b.ReportAllocs()
|
|
||||||
v := viper.New()
|
|
||||||
m, _ := minifiers.New(media.DefaultTypes, output.DefaultFormats, v)
|
|
||||||
b.ResetTimer()
|
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
var buf bytes.Buffer
|
|
||||||
m.Minify(media.HTMLType, &buf, strings.NewReader(benchHTML))
|
|
||||||
w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
|
|
||||||
fmt.Fprint(w, buf.String())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue