mirror of
https://github.com/gohugoio/hugo.git
synced 2025-02-18 09:22:05 +00:00
Speed is about the same as before, uses slightly less memory: ``` benchmark old ns/op new ns/op delta BenchmarkAbsURL 17302 17713 +2.38% BenchmarkXMLAbsURL 9463 9470 +0.07% benchmark old allocs new allocs delta BenchmarkAbsURL 28 24 -14.29% BenchmarkXMLAbsURL 14 12 -14.29% benchmark old bytes new bytes delta BenchmarkAbsURL 3422 3144 -8.12% BenchmarkXMLAbsURL 1985 1864 -6.10% ``` Fixes #1059
330 lines
5.9 KiB
Go
330 lines
5.9 KiB
Go
package transform
|
|
|
|
import (
|
|
"bytes"
|
|
"io"
|
|
"net/url"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
type matchState int
|
|
|
|
const (
|
|
matchStateNone matchState = iota
|
|
matchStateWhitespace
|
|
matchStatePartial
|
|
matchStateFull
|
|
)
|
|
|
|
const (
|
|
matchPrefixSrc int = iota
|
|
matchPrefixHref
|
|
)
|
|
|
|
type contentlexer struct {
|
|
content []byte
|
|
|
|
pos int // input position
|
|
start int // item start position
|
|
width int // width of last element
|
|
|
|
matchers []absURLMatcher
|
|
state stateFunc
|
|
|
|
ms matchState
|
|
matches [3]bool // track matches of the 3 prefixes
|
|
i int // last index in matches checked
|
|
|
|
w io.Writer
|
|
}
|
|
|
|
type stateFunc func(*contentlexer) stateFunc
|
|
|
|
type prefix struct {
|
|
r []rune
|
|
f func(l *contentlexer)
|
|
}
|
|
|
|
var prefixes = []*prefix{
|
|
&prefix{r: []rune{'s', 'r', 'c', '='}, f: checkCandidateSrc},
|
|
&prefix{r: []rune{'s', 'r', 'c', 's', 'e', 't', '='}, f: checkCandidateSrcset},
|
|
&prefix{r: []rune{'h', 'r', 'e', 'f', '='}, f: checkCandidateHref}}
|
|
|
|
type absURLMatcher struct {
|
|
prefix int
|
|
match []byte
|
|
quote []byte
|
|
replacementURL []byte
|
|
}
|
|
|
|
func (l *contentlexer) match(r rune) {
|
|
|
|
var found bool
|
|
|
|
// note, the prefixes can start off on the same foot, i.e.
|
|
// src and srcset.
|
|
if l.ms == matchStateWhitespace {
|
|
l.i = 0
|
|
for j, p := range prefixes {
|
|
if r == p.r[l.i] {
|
|
l.matches[j] = true
|
|
found = true
|
|
if l.checkMatchState(r, j) {
|
|
return
|
|
}
|
|
} else {
|
|
l.matches[j] = false
|
|
}
|
|
}
|
|
|
|
if !found {
|
|
l.ms = matchStateNone
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
l.i++
|
|
for j, m := range l.matches {
|
|
// still a match?
|
|
if m {
|
|
if prefixes[j].r[l.i] == r {
|
|
found = true
|
|
if l.checkMatchState(r, j) {
|
|
return
|
|
}
|
|
} else {
|
|
l.matches[j] = false
|
|
}
|
|
}
|
|
}
|
|
|
|
if found {
|
|
return
|
|
}
|
|
|
|
l.ms = matchStateNone
|
|
}
|
|
|
|
func (l *contentlexer) checkMatchState(r rune, idx int) bool {
|
|
if r == '=' {
|
|
l.ms = matchStateFull
|
|
for k := range l.matches {
|
|
if k != idx {
|
|
l.matches[k] = false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
l.ms = matchStatePartial
|
|
|
|
return false
|
|
}
|
|
|
|
func (l *contentlexer) emit() {
|
|
l.w.Write(l.content[l.start:l.pos])
|
|
l.start = l.pos
|
|
}
|
|
|
|
func (a absURLMatcher) isSourceType() bool {
|
|
return a.prefix == matchPrefixSrc
|
|
}
|
|
|
|
func checkCandidateSrc(l *contentlexer) {
|
|
for _, m := range l.matchers {
|
|
if !m.isSourceType() {
|
|
continue
|
|
}
|
|
l.replaceSimple(m)
|
|
}
|
|
}
|
|
|
|
func checkCandidateHref(l *contentlexer) {
|
|
for _, m := range l.matchers {
|
|
if m.isSourceType() {
|
|
continue
|
|
}
|
|
l.replaceSimple(m)
|
|
}
|
|
}
|
|
|
|
func checkCandidateSrcset(l *contentlexer) {
|
|
// special case, not frequent (me think)
|
|
for _, m := range l.matchers {
|
|
if m.isSourceType() {
|
|
continue
|
|
}
|
|
|
|
if !bytes.HasPrefix(l.content[l.pos:], m.match) {
|
|
continue
|
|
}
|
|
|
|
// check for schemaless URLs
|
|
posAfter := l.pos + len(m.match)
|
|
if posAfter >= len(l.content) {
|
|
return
|
|
}
|
|
r, _ := utf8.DecodeRune(l.content[posAfter:])
|
|
if r == '/' {
|
|
// schemaless: skip
|
|
continue
|
|
}
|
|
|
|
posLastQuote := bytes.Index(l.content[l.pos+1:], m.quote)
|
|
|
|
// safe guard
|
|
if posLastQuote < 0 || posLastQuote > 2000 {
|
|
return
|
|
}
|
|
|
|
if l.pos > l.start {
|
|
l.emit()
|
|
}
|
|
|
|
section := l.content[l.pos+len(m.quote) : l.pos+posLastQuote+1]
|
|
|
|
fields := bytes.Fields(section)
|
|
l.w.Write([]byte(m.quote))
|
|
for i, f := range fields {
|
|
if f[0] == '/' {
|
|
l.w.Write(m.replacementURL)
|
|
l.w.Write(f[1:])
|
|
|
|
} else {
|
|
l.w.Write(f)
|
|
}
|
|
|
|
if i < len(fields)-1 {
|
|
l.w.Write([]byte(" "))
|
|
}
|
|
}
|
|
|
|
l.w.Write(m.quote)
|
|
l.pos += len(section) + (len(m.quote) * 2)
|
|
l.start = l.pos
|
|
}
|
|
}
|
|
|
|
func (l *contentlexer) replaceSimple(m absURLMatcher) {
|
|
if !bytes.HasPrefix(l.content[l.pos:], m.match) {
|
|
return
|
|
}
|
|
// check for schemaless URLs
|
|
posAfter := l.pos + len(m.match)
|
|
if posAfter >= len(l.content) {
|
|
return
|
|
}
|
|
r, _ := utf8.DecodeRune(l.content[posAfter:])
|
|
if r == '/' {
|
|
// schemaless: skip
|
|
return
|
|
}
|
|
if l.pos > l.start {
|
|
l.emit()
|
|
}
|
|
l.pos += len(m.match)
|
|
l.w.Write(m.quote)
|
|
l.w.Write(m.replacementURL)
|
|
l.start = l.pos
|
|
}
|
|
|
|
func (l *contentlexer) replace() {
|
|
contentLength := len(l.content)
|
|
var r rune
|
|
|
|
for {
|
|
if l.pos >= contentLength {
|
|
l.width = 0
|
|
break
|
|
}
|
|
|
|
var width = 1
|
|
r = rune(l.content[l.pos])
|
|
if r >= utf8.RuneSelf {
|
|
r, width = utf8.DecodeRune(l.content[l.pos:])
|
|
}
|
|
l.width = width
|
|
l.pos += l.width
|
|
if r == ' ' {
|
|
l.ms = matchStateWhitespace
|
|
} else if l.ms != matchStateNone {
|
|
l.match(r)
|
|
if l.ms == matchStateFull {
|
|
var p *prefix
|
|
for i, m := range l.matches {
|
|
if m {
|
|
p = prefixes[i]
|
|
}
|
|
l.matches[i] = false
|
|
}
|
|
if p == nil {
|
|
panic("illegal state: curr is nil when state is full")
|
|
}
|
|
l.ms = matchStateNone
|
|
p.f(l)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Done!
|
|
if l.pos > l.start {
|
|
l.emit()
|
|
}
|
|
}
|
|
|
|
func doReplace(ct contentTransformer, matchers []absURLMatcher) {
|
|
lexer := &contentlexer{
|
|
content: ct.Content(),
|
|
w: ct,
|
|
matchers: matchers}
|
|
|
|
lexer.replace()
|
|
}
|
|
|
|
type absURLReplacer struct {
|
|
htmlMatchers []absURLMatcher
|
|
xmlMatchers []absURLMatcher
|
|
}
|
|
|
|
func newAbsURLReplacer(baseURL string) *absURLReplacer {
|
|
u, _ := url.Parse(baseURL)
|
|
base := []byte(strings.TrimRight(u.String(), "/") + "/")
|
|
|
|
// HTML
|
|
dqHTMLMatch := []byte("\"/")
|
|
sqHTMLMatch := []byte("'/")
|
|
|
|
// XML
|
|
dqXMLMatch := []byte(""/")
|
|
sqXMLMatch := []byte("'/")
|
|
|
|
dqHTML := []byte("\"")
|
|
sqHTML := []byte("'")
|
|
|
|
dqXML := []byte(""")
|
|
sqXML := []byte("'")
|
|
|
|
return &absURLReplacer{
|
|
htmlMatchers: []absURLMatcher{
|
|
{matchPrefixSrc, dqHTMLMatch, dqHTML, base},
|
|
{matchPrefixSrc, sqHTMLMatch, sqHTML, base},
|
|
{matchPrefixHref, dqHTMLMatch, dqHTML, base},
|
|
{matchPrefixHref, sqHTMLMatch, sqHTML, base}},
|
|
xmlMatchers: []absURLMatcher{
|
|
{matchPrefixSrc, dqXMLMatch, dqXML, base},
|
|
{matchPrefixSrc, sqXMLMatch, sqXML, base},
|
|
{matchPrefixHref, dqXMLMatch, dqXML, base},
|
|
{matchPrefixHref, sqXMLMatch, sqXML, base},
|
|
}}
|
|
|
|
}
|
|
|
|
func (au *absURLReplacer) replaceInHTML(ct contentTransformer) {
|
|
doReplace(ct, au.htmlMatchers)
|
|
}
|
|
|
|
func (au *absURLReplacer) replaceInXML(ct contentTransformer) {
|
|
doReplace(ct, au.xmlMatchers)
|
|
}
|