hugo/transform/absurlreplacer.go
2015-03-19 02:07:57 +01:00

235 lines
4.5 KiB
Go

package transform
import (
"bytes"
"io"
"net/url"
"strings"
"unicode/utf8"
)
type matchState int
const (
matchStateNone matchState = iota
matchStateWhitespace
matchStatePartial
matchStateFull
)
const (
matchPrefixSrc int = iota
matchPrefixHref
)
type contentlexer struct {
content []byte
pos int // input position
start int // item start position
width int // width of last element
matchers []absURLMatcher
state stateFunc
prefixLookup *prefixes
w io.Writer
}
type stateFunc func(*contentlexer) stateFunc
type prefixRunes []rune
type prefixes struct {
pr []prefixRunes
curr prefixRunes // current prefix lookup table
i int // current index
// first rune in potential match
first rune
// match-state:
// none, whitespace, partial, full
ms matchState
}
// match returns partial and full match for the prefix in play
// - it's a full match if all prefix runes has checked out in row
// - it's a partial match if it's on its way towards a full match
func (l *contentlexer) match(r rune) {
p := l.prefixLookup
if p.curr == nil {
// assumes prefixes all start off on a different rune
// works in this special case: href, src
p.i = 0
for _, pr := range p.pr {
if pr[p.i] == r {
fullMatch := len(p.pr) == 1
p.first = r
if !fullMatch {
p.curr = pr
l.prefixLookup.ms = matchStatePartial
} else {
l.prefixLookup.ms = matchStateFull
}
return
}
}
} else {
p.i++
if p.curr[p.i] == r {
fullMatch := len(p.curr) == p.i+1
if fullMatch {
p.curr = nil
l.prefixLookup.ms = matchStateFull
} else {
l.prefixLookup.ms = matchStatePartial
}
return
}
p.curr = nil
}
l.prefixLookup.ms = matchStateNone
}
func (l *contentlexer) emit() {
l.w.Write(l.content[l.start:l.pos])
l.start = l.pos
}
var mainPrefixRunes = []prefixRunes{{'s', 'r', 'c', '='}, {'h', 'r', 'e', 'f', '='}}
type absURLMatcher struct {
prefix int
match []byte
replacement []byte
}
func (a absURLMatcher) isSourceType() bool {
return a.prefix == matchPrefixSrc
}
func checkCandidate(l *contentlexer) {
isSource := l.prefixLookup.first == 's'
for _, m := range l.matchers {
if isSource && !m.isSourceType() || !isSource && m.isSourceType() {
continue
}
if bytes.HasPrefix(l.content[l.pos:], m.match) {
// check for schemaless URLs
posAfter := l.pos + len(m.match)
if posAfter >= len(l.content) {
return
}
r, _ := utf8.DecodeRune(l.content[posAfter:])
if r == '/' {
// schemaless: skip
return
}
if l.pos > l.start {
l.emit()
}
l.pos += len(m.match)
l.w.Write(m.replacement)
l.start = l.pos
return
}
}
}
func (l *contentlexer) replace() {
contentLength := len(l.content)
var r rune
for {
if l.pos >= contentLength {
l.width = 0
break
}
var width int = 1
r = rune(l.content[l.pos])
if r >= utf8.RuneSelf {
r, width = utf8.DecodeRune(l.content[l.pos:])
}
l.width = width
l.pos += l.width
if r == ' ' {
l.prefixLookup.ms = matchStateWhitespace
} else if l.prefixLookup.ms != matchStateNone {
l.match(r)
if l.prefixLookup.ms == matchStateFull {
checkCandidate(l)
}
}
}
// Done!
if l.pos > l.start {
l.emit()
}
}
func doReplace(ct contentTransformer, matchers []absURLMatcher) {
lexer := &contentlexer{
content: ct.Content(),
w: ct,
prefixLookup: &prefixes{pr: mainPrefixRunes},
matchers: matchers}
lexer.replace()
}
type absURLReplacer struct {
htmlMatchers []absURLMatcher
xmlMatchers []absURLMatcher
}
func newAbsURLReplacer(baseURL string) *absURLReplacer {
u, _ := url.Parse(baseURL)
base := strings.TrimRight(u.String(), "/")
// HTML
dqHTMLMatch := []byte("\"/")
sqHTMLMatch := []byte("'/")
// XML
dqXMLMatch := []byte(""/")
sqXMLMatch := []byte("'/")
dqHTML := []byte("\"" + base + "/")
sqHTML := []byte("'" + base + "/")
dqXML := []byte(""" + base + "/")
sqXML := []byte("'" + base + "/")
return &absURLReplacer{
htmlMatchers: []absURLMatcher{
{matchPrefixSrc, dqHTMLMatch, dqHTML},
{matchPrefixSrc, sqHTMLMatch, sqHTML},
{matchPrefixHref, dqHTMLMatch, dqHTML},
{matchPrefixHref, sqHTMLMatch, sqHTML}},
xmlMatchers: []absURLMatcher{
{matchPrefixSrc, dqXMLMatch, dqXML},
{matchPrefixSrc, sqXMLMatch, sqXML},
{matchPrefixHref, dqXMLMatch, dqXML},
{matchPrefixHref, sqXMLMatch, sqXML},
}}
}
func (au *absURLReplacer) replaceInHTML(ct contentTransformer) {
doReplace(ct, au.htmlMatchers)
}
func (au *absURLReplacer) replaceInXML(ct contentTransformer) {
doReplace(ct, au.xmlMatchers)
}