2018-08-05 09:13:49 +00:00
|
|
|
// Copyright 2018 The Hugo Authors. All rights reserved.
|
2015-12-10 22:19:38 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2018-08-05 09:13:49 +00:00
|
|
|
package urlreplacers
|
2015-02-16 09:48:15 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
2015-03-17 23:36:48 +00:00
|
|
|
"io"
|
2018-12-17 13:25:00 +00:00
|
|
|
"unicode"
|
2015-02-16 09:48:15 +00:00
|
|
|
"unicode/utf8"
|
2018-08-05 09:13:49 +00:00
|
|
|
|
|
|
|
"github.com/gohugoio/hugo/transform"
|
2015-02-16 09:48:15 +00:00
|
|
|
)
|
|
|
|
|
2015-05-04 10:51:48 +00:00
|
|
|
type absurllexer struct {
|
|
|
|
// the source to absurlify
|
2015-02-16 09:48:15 +00:00
|
|
|
content []byte
|
2015-05-04 10:51:48 +00:00
|
|
|
// the target for the new absurlified content
|
|
|
|
w io.Writer
|
2015-02-16 09:48:15 +00:00
|
|
|
|
2015-05-15 22:11:39 +00:00
|
|
|
// path may be set to a "." relative path
|
|
|
|
path []byte
|
|
|
|
|
2015-03-10 17:44:32 +00:00
|
|
|
pos int // input position
|
|
|
|
start int // item start position
|
2015-02-16 09:48:15 +00:00
|
|
|
|
2018-12-17 13:25:00 +00:00
|
|
|
quotes [][]byte
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
|
2015-05-03 17:54:17 +00:00
|
|
|
type prefix struct {
|
2018-12-17 16:42:46 +00:00
|
|
|
disabled bool
|
|
|
|
b []byte
|
|
|
|
f func(l *absurllexer)
|
2019-08-06 15:46:20 +00:00
|
|
|
|
|
|
|
nextPos int
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *prefix) find(bs []byte, start int) bool {
|
|
|
|
if p.disabled {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if p.nextPos == -1 {
|
|
|
|
idx := bytes.Index(bs[start:], p.b)
|
|
|
|
|
|
|
|
if idx == -1 {
|
|
|
|
p.disabled = true
|
|
|
|
// Find the closest match
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
p.nextPos = start + idx + len(p.b)
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
2015-05-03 17:54:17 +00:00
|
|
|
}
|
2015-02-16 09:48:15 +00:00
|
|
|
|
2018-12-17 16:42:46 +00:00
|
|
|
func newPrefixState() []*prefix {
|
|
|
|
return []*prefix{
|
|
|
|
{b: []byte("src="), f: checkCandidateBase},
|
|
|
|
{b: []byte("href="), f: checkCandidateBase},
|
2020-06-14 09:14:56 +00:00
|
|
|
{b: []byte("url="), f: checkCandidateBase},
|
2019-02-26 08:11:06 +00:00
|
|
|
{b: []byte("action="), f: checkCandidateBase},
|
2018-12-17 16:42:46 +00:00
|
|
|
{b: []byte("srcset="), f: checkCandidateSrcset},
|
|
|
|
}
|
2015-05-03 20:42:56 +00:00
|
|
|
}
|
2015-02-16 09:48:15 +00:00
|
|
|
|
2015-05-04 10:51:48 +00:00
|
|
|
func (l *absurllexer) emit() {
|
2015-03-17 23:36:48 +00:00
|
|
|
l.w.Write(l.content[l.start:l.pos])
|
2015-02-16 09:48:15 +00:00
|
|
|
l.start = l.pos
|
|
|
|
}
|
|
|
|
|
2018-12-17 13:25:00 +00:00
|
|
|
var (
|
|
|
|
relURLPrefix = []byte("/")
|
|
|
|
relURLPrefixLen = len(relURLPrefix)
|
|
|
|
)
|
|
|
|
|
|
|
|
func (l *absurllexer) consumeQuote() []byte {
|
|
|
|
for _, q := range l.quotes {
|
|
|
|
if bytes.HasPrefix(l.content[l.pos:], q) {
|
|
|
|
l.pos += len(q)
|
2015-05-03 20:42:56 +00:00
|
|
|
l.emit()
|
2018-12-17 13:25:00 +00:00
|
|
|
return q
|
2015-05-03 20:42:56 +00:00
|
|
|
}
|
2015-05-03 17:54:17 +00:00
|
|
|
}
|
2018-12-17 13:25:00 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// handle URLs in src and href.
|
|
|
|
func checkCandidateBase(l *absurllexer) {
|
|
|
|
l.consumeQuote()
|
|
|
|
|
|
|
|
if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// check for schemaless URLs
|
|
|
|
posAfter := l.pos + relURLPrefixLen
|
|
|
|
if posAfter >= len(l.content) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
r, _ := utf8.DecodeRune(l.content[posAfter:])
|
|
|
|
if r == '/' {
|
|
|
|
// schemaless: skip
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if l.pos > l.start {
|
|
|
|
l.emit()
|
|
|
|
}
|
|
|
|
l.pos += relURLPrefixLen
|
|
|
|
l.w.Write(l.path)
|
|
|
|
l.start = l.pos
|
|
|
|
}
|
|
|
|
|
|
|
|
func (l *absurllexer) posAfterURL(q []byte) int {
|
|
|
|
if len(q) > 0 {
|
|
|
|
// look for end quote
|
|
|
|
return bytes.Index(l.content[l.pos:], q)
|
|
|
|
}
|
|
|
|
|
|
|
|
return bytes.IndexFunc(l.content[l.pos:], func(r rune) bool {
|
|
|
|
return r == '>' || unicode.IsSpace(r)
|
|
|
|
})
|
|
|
|
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
|
2015-05-04 10:51:48 +00:00
|
|
|
// handle URLs in srcset.
|
|
|
|
func checkCandidateSrcset(l *absurllexer) {
|
2018-12-17 13:25:00 +00:00
|
|
|
q := l.consumeQuote()
|
|
|
|
if q == nil {
|
|
|
|
// srcset needs to be quoted.
|
|
|
|
return
|
|
|
|
}
|
2015-03-10 17:44:32 +00:00
|
|
|
|
2018-12-17 13:25:00 +00:00
|
|
|
// special case, not frequent (me think)
|
|
|
|
if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) {
|
|
|
|
return
|
|
|
|
}
|
2015-05-03 17:54:17 +00:00
|
|
|
|
2018-12-17 13:25:00 +00:00
|
|
|
// check for schemaless URLs
|
|
|
|
posAfter := l.pos + relURLPrefixLen
|
|
|
|
if posAfter >= len(l.content) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
r, _ := utf8.DecodeRune(l.content[posAfter:])
|
|
|
|
if r == '/' {
|
|
|
|
// schemaless: skip
|
|
|
|
return
|
|
|
|
}
|
2015-05-03 17:54:17 +00:00
|
|
|
|
2018-12-17 13:25:00 +00:00
|
|
|
posEnd := l.posAfterURL(q)
|
2015-03-10 17:44:32 +00:00
|
|
|
|
2018-12-17 13:25:00 +00:00
|
|
|
// safe guard
|
|
|
|
if posEnd < 0 || posEnd > 2000 {
|
|
|
|
return
|
|
|
|
}
|
2015-05-03 17:54:17 +00:00
|
|
|
|
2018-12-17 13:25:00 +00:00
|
|
|
if l.pos > l.start {
|
|
|
|
l.emit()
|
|
|
|
}
|
2015-05-03 17:54:17 +00:00
|
|
|
|
2018-12-17 13:25:00 +00:00
|
|
|
section := l.content[l.pos : l.pos+posEnd+1]
|
2015-05-03 17:54:17 +00:00
|
|
|
|
2018-12-17 13:25:00 +00:00
|
|
|
fields := bytes.Fields(section)
|
|
|
|
for i, f := range fields {
|
|
|
|
if f[0] == '/' {
|
|
|
|
l.w.Write(l.path)
|
|
|
|
l.w.Write(f[1:])
|
2015-05-03 17:54:17 +00:00
|
|
|
|
2018-12-17 13:25:00 +00:00
|
|
|
} else {
|
|
|
|
l.w.Write(f)
|
2015-05-03 17:54:17 +00:00
|
|
|
}
|
|
|
|
|
2018-12-17 13:25:00 +00:00
|
|
|
if i < len(fields)-1 {
|
|
|
|
l.w.Write([]byte(" "))
|
|
|
|
}
|
2015-03-10 17:44:32 +00:00
|
|
|
}
|
2018-12-17 13:25:00 +00:00
|
|
|
|
|
|
|
l.pos += len(section)
|
|
|
|
l.start = l.pos
|
|
|
|
|
2015-03-10 17:44:32 +00:00
|
|
|
}
|
|
|
|
|
2015-05-04 10:51:48 +00:00
|
|
|
// main loop
|
|
|
|
func (l *absurllexer) replace() {
|
2015-02-16 09:48:15 +00:00
|
|
|
contentLength := len(l.content)
|
2018-12-17 16:42:46 +00:00
|
|
|
|
|
|
|
prefixes := newPrefixState()
|
2015-02-16 09:48:15 +00:00
|
|
|
|
|
|
|
for {
|
2015-03-18 19:18:18 +00:00
|
|
|
if l.pos >= contentLength {
|
2015-02-16 09:48:15 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2018-12-17 16:42:46 +00:00
|
|
|
var match *prefix
|
|
|
|
|
|
|
|
for _, p := range prefixes {
|
2019-08-06 15:46:20 +00:00
|
|
|
if !p.find(l.content, l.pos) {
|
2018-12-17 16:42:46 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2019-08-06 15:46:20 +00:00
|
|
|
if match == nil || p.nextPos < match.nextPos {
|
2018-12-17 16:42:46 +00:00
|
|
|
match = p
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
}
|
2018-12-17 16:42:46 +00:00
|
|
|
|
2019-08-06 15:46:20 +00:00
|
|
|
if match == nil {
|
2018-12-17 16:42:46 +00:00
|
|
|
// Done!
|
|
|
|
l.pos = contentLength
|
|
|
|
break
|
|
|
|
} else {
|
2019-08-06 15:46:20 +00:00
|
|
|
l.pos = match.nextPos
|
|
|
|
match.nextPos = -1
|
2018-12-17 16:42:46 +00:00
|
|
|
match.f(l)
|
|
|
|
}
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
// Done!
|
|
|
|
if l.pos > l.start {
|
2015-03-10 17:44:32 +00:00
|
|
|
l.emit()
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-17 13:25:00 +00:00
|
|
|
func doReplace(path string, ct transform.FromTo, quotes [][]byte) {
|
2015-05-15 22:11:39 +00:00
|
|
|
|
2015-05-04 10:51:48 +00:00
|
|
|
lexer := &absurllexer{
|
2018-12-17 13:25:00 +00:00
|
|
|
content: ct.From().Bytes(),
|
|
|
|
w: ct.To(),
|
|
|
|
path: []byte(path),
|
|
|
|
quotes: quotes}
|
2015-02-17 03:33:44 +00:00
|
|
|
|
2015-03-10 17:44:32 +00:00
|
|
|
lexer.replace()
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
|
2015-03-11 17:34:57 +00:00
|
|
|
type absURLReplacer struct {
|
2018-12-17 13:25:00 +00:00
|
|
|
htmlQuotes [][]byte
|
|
|
|
xmlQuotes [][]byte
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
|
2015-05-15 22:11:39 +00:00
|
|
|
func newAbsURLReplacer() *absURLReplacer {
|
2015-03-11 17:34:57 +00:00
|
|
|
return &absURLReplacer{
|
2018-12-17 13:25:00 +00:00
|
|
|
htmlQuotes: [][]byte{[]byte("\""), []byte("'")},
|
|
|
|
xmlQuotes: [][]byte{[]byte("""), []byte("'")}}
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
|
2018-08-05 09:13:49 +00:00
|
|
|
func (au *absURLReplacer) replaceInHTML(path string, ct transform.FromTo) {
|
2018-12-17 13:25:00 +00:00
|
|
|
doReplace(path, ct, au.htmlQuotes)
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|
|
|
|
|
2018-08-05 09:13:49 +00:00
|
|
|
func (au *absURLReplacer) replaceInXML(path string, ct transform.FromTo) {
|
2018-12-17 13:25:00 +00:00
|
|
|
doReplace(path, ct, au.xmlQuotes)
|
2015-02-16 09:48:15 +00:00
|
|
|
}
|