transform/urlreplacers: Simplify implementation

This commit is contained in:
Bjørn Erik Pedersen 2018-12-17 17:42:46 +01:00
parent efe0b4e5c0
commit f7691fe965

View file

@ -22,15 +22,6 @@ import (
"github.com/gohugoio/hugo/transform" "github.com/gohugoio/hugo/transform"
) )
type matchState int
const (
matchStateNone matchState = iota
matchStateWhitespace
matchStatePartial
matchStateFull
)
type absurllexer struct { type absurllexer struct {
// the source to absurlify // the source to absurlify
content []byte content []byte
@ -42,31 +33,24 @@ type absurllexer struct {
pos int // input position pos int // input position
start int // item start position start int // item start position
width int // width of last element
quotes [][]byte quotes [][]byte
ms matchState
matches [3]bool // track matches of the 3 prefixes
idx int // last index in matches checked
} }
type stateFunc func(*absurllexer) stateFunc type stateFunc func(*absurllexer) stateFunc
// prefix is how to identify and which func to handle the replacement.
type prefix struct { type prefix struct {
r []rune disabled bool
b []byte
f func(l *absurllexer) f func(l *absurllexer)
} }
// new prefixes can be added below, but note: func newPrefixState() []*prefix {
// - the matches array above must be expanded. return []*prefix{
// - the prefix must with the current logic end with '=' {b: []byte("src="), f: checkCandidateBase},
var prefixes = []*prefix{ {b: []byte("href="), f: checkCandidateBase},
{r: []rune{'s', 'r', 'c', '='}, f: checkCandidateBase}, {b: []byte("srcset="), f: checkCandidateSrcset},
{r: []rune{'h', 'r', 'e', 'f', '='}, f: checkCandidateBase}, }
{r: []rune{'s', 'r', 'c', 's', 'e', 't', '='}, f: checkCandidateSrcset},
} }
type absURLMatcher struct { type absURLMatcher struct {
@ -74,68 +58,6 @@ type absURLMatcher struct {
quote []byte quote []byte
} }
// match check rune inside word. Will be != ' '.
func (l *absurllexer) match(r rune) {
var found bool
// note, the prefixes can start off on the same foot, i.e.
// src and srcset.
if l.ms == matchStateWhitespace {
l.idx = 0
for j, p := range prefixes {
if r == p.r[l.idx] {
l.matches[j] = true
found = true
// checkMatchState will only return true when r=='=', so
// we can safely ignore the return value here.
l.checkMatchState(r, j)
}
}
if !found {
l.ms = matchStateNone
}
return
}
l.idx++
for j, m := range l.matches {
// still a match?
if m {
if prefixes[j].r[l.idx] == r {
found = true
if l.checkMatchState(r, j) {
return
}
} else {
l.matches[j] = false
}
}
}
if !found {
l.ms = matchStateNone
}
}
func (l *absurllexer) checkMatchState(r rune, idx int) bool {
if r == '=' {
l.ms = matchStateFull
for k := range l.matches {
if k != idx {
l.matches[k] = false
}
}
return true
}
l.ms = matchStatePartial
return false
}
func (l *absurllexer) emit() { func (l *absurllexer) emit() {
l.w.Write(l.content[l.start:l.pos]) l.w.Write(l.content[l.start:l.pos])
l.start = l.pos l.start = l.pos
@ -255,36 +177,40 @@ func checkCandidateSrcset(l *absurllexer) {
// main loop // main loop
func (l *absurllexer) replace() { func (l *absurllexer) replace() {
contentLength := len(l.content) contentLength := len(l.content)
var r rune
prefixes := newPrefixState()
for { for {
if l.pos >= contentLength { if l.pos >= contentLength {
l.width = 0
break break
} }
var width = 1 nextPos := -1
r = rune(l.content[l.pos])
if r >= utf8.RuneSelf { var match *prefix
r, width = utf8.DecodeRune(l.content[l.pos:])
for _, p := range prefixes {
if p.disabled {
continue
} }
l.width = width idx := bytes.Index(l.content[l.pos:], p.b)
l.pos += l.width
if r == ' ' { if idx == -1 {
l.ms = matchStateWhitespace p.disabled = true
} else if l.ms != matchStateNone { // Find the closest match
l.match(r) } else if nextPos == -1 || idx < nextPos {
if l.ms == matchStateFull { nextPos = idx
var p *prefix match = p
for i, m := range l.matches {
if m {
p = prefixes[i]
l.matches[i] = false
} }
} }
l.ms = matchStateNone
p.f(l) if nextPos == -1 {
} // Done!
l.pos = contentLength
break
} else {
l.pos += nextPos + len(match.b)
match.f(l)
} }
} }