2015-02-16 09:48:15 +00:00
|
|
|
package transform
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
2015-03-17 23:36:48 +00:00
|
|
|
"io"
|
2015-02-16 09:48:15 +00:00
|
|
|
"net/url"
|
|
|
|
"strings"
|
|
|
|
"unicode/utf8"
|
|
|
|
)
|
|
|
|
|
|
|
|
type matchState int
|
|
|
|
|
|
|
|
const (
|
|
|
|
matchStateNone matchState = iota
|
|
|
|
matchStateWhitespace
|
|
|
|
matchStatePartial
|
|
|
|
matchStateFull
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2015-03-10 17:44:32 +00:00
|
|
|
matchPrefixSrc int = iota
|
|
|
|
matchPrefixHref
|
2015-02-16 09:48:15 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type contentlexer struct {
|
|
|
|
content []byte
|
|
|
|
|
2015-03-10 17:44:32 +00:00
|
|
|
pos int // input position
|
|
|
|
start int // item start position
|
|
|
|
width int // width of last element
|
2015-02-16 09:48:15 +00:00
|
|
|
|
2015-03-11 17:34:57 +00:00
|
|
|
matchers []absURLMatcher
|
2015-02-16 09:48:15 +00:00
|
|
|
state stateFunc
|
|
|
|
prefixLookup *prefixes
|
|
|
|
|
2015-03-17 23:36:48 +00:00
|
|
|
w io.Writer
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type stateFunc func(*contentlexer) stateFunc
|
|
|
|
|
|
|
|
type prefixRunes []rune
|
|
|
|
|
|
|
|
type prefixes struct {
|
|
|
|
pr []prefixRunes
|
|
|
|
curr prefixRunes // current prefix lookup table
|
|
|
|
i int // current index
|
|
|
|
|
|
|
|
// first rune in potential match
|
|
|
|
first rune
|
|
|
|
|
|
|
|
// match-state:
|
|
|
|
// none, whitespace, partial, full
|
|
|
|
ms matchState
|
|
|
|
}
|
|
|
|
|
|
|
|
// match returns partial and full match for the prefix in play
|
|
|
|
// - it's a full match if all prefix runes has checked out in row
|
|
|
|
// - it's a partial match if it's on its way towards a full match
|
|
|
|
func (l *contentlexer) match(r rune) {
|
|
|
|
p := l.prefixLookup
|
|
|
|
if p.curr == nil {
|
|
|
|
// assumes prefixes all start off on a different rune
|
|
|
|
// works in this special case: href, src
|
|
|
|
p.i = 0
|
|
|
|
for _, pr := range p.pr {
|
|
|
|
if pr[p.i] == r {
|
|
|
|
fullMatch := len(p.pr) == 1
|
|
|
|
p.first = r
|
|
|
|
if !fullMatch {
|
|
|
|
p.curr = pr
|
|
|
|
l.prefixLookup.ms = matchStatePartial
|
|
|
|
} else {
|
|
|
|
l.prefixLookup.ms = matchStateFull
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
p.i++
|
|
|
|
if p.curr[p.i] == r {
|
|
|
|
fullMatch := len(p.curr) == p.i+1
|
|
|
|
if fullMatch {
|
|
|
|
p.curr = nil
|
|
|
|
l.prefixLookup.ms = matchStateFull
|
|
|
|
} else {
|
|
|
|
l.prefixLookup.ms = matchStatePartial
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
p.curr = nil
|
|
|
|
}
|
|
|
|
|
|
|
|
l.prefixLookup.ms = matchStateNone
|
|
|
|
}
|
|
|
|
|
2015-03-10 17:44:32 +00:00
|
|
|
func (l *contentlexer) emit() {
|
2015-03-17 23:36:48 +00:00
|
|
|
l.w.Write(l.content[l.start:l.pos])
|
2015-02-16 09:48:15 +00:00
|
|
|
l.start = l.pos
|
|
|
|
}
|
|
|
|
|
|
|
|
var mainPrefixRunes = []prefixRunes{{'s', 'r', 'c', '='}, {'h', 'r', 'e', 'f', '='}}
|
|
|
|
|
2015-03-11 17:34:57 +00:00
|
|
|
type absURLMatcher struct {
|
2015-03-10 17:44:32 +00:00
|
|
|
prefix int
|
2015-02-16 09:48:15 +00:00
|
|
|
match []byte
|
|
|
|
replacement []byte
|
|
|
|
}
|
|
|
|
|
2015-03-11 17:34:57 +00:00
|
|
|
func (a absURLMatcher) isSourceType() bool {
|
2015-03-10 17:44:32 +00:00
|
|
|
return a.prefix == matchPrefixSrc
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
|
2015-03-10 17:44:32 +00:00
|
|
|
func checkCandidate(l *contentlexer) {
|
|
|
|
isSource := l.prefixLookup.first == 's'
|
|
|
|
for _, m := range l.matchers {
|
|
|
|
|
|
|
|
if isSource && !m.isSourceType() || !isSource && m.isSourceType() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if bytes.HasPrefix(l.content[l.pos:], m.match) {
|
2015-03-18 05:16:54 +00:00
|
|
|
// check for schemaless URLs
|
2015-03-10 17:44:32 +00:00
|
|
|
posAfter := l.pos + len(m.match)
|
2015-03-18 19:18:18 +00:00
|
|
|
if posAfter >= len(l.content) {
|
2015-03-10 17:44:32 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
r, _ := utf8.DecodeRune(l.content[posAfter:])
|
|
|
|
if r == '/' {
|
|
|
|
// schemaless: skip
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if l.pos > l.start {
|
|
|
|
l.emit()
|
|
|
|
}
|
|
|
|
l.pos += len(m.match)
|
2015-03-17 23:36:48 +00:00
|
|
|
l.w.Write(m.replacement)
|
2015-03-10 17:44:32 +00:00
|
|
|
l.start = l.pos
|
|
|
|
return
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *contentlexer) replace() {
|
2015-02-16 09:48:15 +00:00
|
|
|
contentLength := len(l.content)
|
|
|
|
var r rune
|
|
|
|
|
|
|
|
for {
|
2015-03-18 19:18:18 +00:00
|
|
|
if l.pos >= contentLength {
|
2015-02-16 09:48:15 +00:00
|
|
|
l.width = 0
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
var width int = 1
|
|
|
|
r = rune(l.content[l.pos])
|
|
|
|
if r >= utf8.RuneSelf {
|
|
|
|
r, width = utf8.DecodeRune(l.content[l.pos:])
|
|
|
|
}
|
2015-03-10 17:44:32 +00:00
|
|
|
l.width = width
|
2015-02-16 09:48:15 +00:00
|
|
|
l.pos += l.width
|
|
|
|
if r == ' ' {
|
|
|
|
l.prefixLookup.ms = matchStateWhitespace
|
|
|
|
} else if l.prefixLookup.ms != matchStateNone {
|
|
|
|
l.match(r)
|
|
|
|
if l.prefixLookup.ms == matchStateFull {
|
|
|
|
checkCandidate(l)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// Done!
|
|
|
|
if l.pos > l.start {
|
2015-03-10 17:44:32 +00:00
|
|
|
l.emit()
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-19 00:55:49 +00:00
|
|
|
func doReplace(ct contentTransformer, matchers []absURLMatcher) {
|
2015-02-16 09:48:15 +00:00
|
|
|
|
2015-03-17 23:36:48 +00:00
|
|
|
lexer := &contentlexer{
|
2015-03-19 00:55:49 +00:00
|
|
|
content: ct.Content(),
|
|
|
|
w: ct,
|
2015-02-17 03:33:44 +00:00
|
|
|
prefixLookup: &prefixes{pr: mainPrefixRunes},
|
|
|
|
matchers: matchers}
|
|
|
|
|
2015-03-10 17:44:32 +00:00
|
|
|
lexer.replace()
|
2015-02-16 09:48:15 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-03-11 17:34:57 +00:00
|
|
|
type absURLReplacer struct {
|
|
|
|
htmlMatchers []absURLMatcher
|
|
|
|
xmlMatchers []absURLMatcher
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
|
2015-03-18 05:16:54 +00:00
|
|
|
func newAbsURLReplacer(baseURL string) *absURLReplacer {
|
2015-03-11 17:34:57 +00:00
|
|
|
u, _ := url.Parse(baseURL)
|
2015-02-16 09:48:15 +00:00
|
|
|
base := strings.TrimRight(u.String(), "/")
|
|
|
|
|
|
|
|
// HTML
|
2015-03-11 17:34:57 +00:00
|
|
|
dqHTMLMatch := []byte("\"/")
|
|
|
|
sqHTMLMatch := []byte("'/")
|
2015-02-16 09:48:15 +00:00
|
|
|
|
|
|
|
// XML
|
2015-03-11 17:34:57 +00:00
|
|
|
dqXMLMatch := []byte(""/")
|
|
|
|
sqXMLMatch := []byte("'/")
|
|
|
|
|
|
|
|
dqHTML := []byte("\"" + base + "/")
|
|
|
|
sqHTML := []byte("'" + base + "/")
|
|
|
|
|
|
|
|
dqXML := []byte(""" + base + "/")
|
|
|
|
sqXML := []byte("'" + base + "/")
|
|
|
|
|
|
|
|
return &absURLReplacer{
|
|
|
|
htmlMatchers: []absURLMatcher{
|
|
|
|
{matchPrefixSrc, dqHTMLMatch, dqHTML},
|
|
|
|
{matchPrefixSrc, sqHTMLMatch, sqHTML},
|
|
|
|
{matchPrefixHref, dqHTMLMatch, dqHTML},
|
|
|
|
{matchPrefixHref, sqHTMLMatch, sqHTML}},
|
|
|
|
xmlMatchers: []absURLMatcher{
|
|
|
|
{matchPrefixSrc, dqXMLMatch, dqXML},
|
|
|
|
{matchPrefixSrc, sqXMLMatch, sqXML},
|
|
|
|
{matchPrefixHref, dqXMLMatch, dqXML},
|
|
|
|
{matchPrefixHref, sqXMLMatch, sqXML},
|
2015-02-16 09:48:15 +00:00
|
|
|
}}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-03-19 00:55:49 +00:00
|
|
|
func (au *absURLReplacer) replaceInHTML(ct contentTransformer) {
|
|
|
|
doReplace(ct, au.htmlMatchers)
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
|
2015-03-19 00:55:49 +00:00
|
|
|
func (au *absURLReplacer) replaceInXML(ct contentTransformer) {
|
|
|
|
doReplace(ct, au.xmlMatchers)
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|