2018-08-05 05:13:49 -04:00
|
|
|
|
// Copyright 2018 The Hugo Authors. All rights reserved.
|
|
|
|
|
//
|
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
//
|
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
|
|
package urlreplacers
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"path/filepath"
|
|
|
|
|
"testing"
|
|
|
|
|
|
|
|
|
|
bp "github.com/gohugoio/hugo/bufferpool"
|
|
|
|
|
|
|
|
|
|
"github.com/gohugoio/hugo/helpers"
|
|
|
|
|
"github.com/gohugoio/hugo/transform"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
h5JsContentDoubleQuote = "<!DOCTYPE html><html><head><script src=\"foobar.js\"></script><script src=\"/barfoo.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"foobar\">foobar</a>. <a href=\"/foobar\">Follow up</a></article></body></html>"
|
|
|
|
|
h5JsContentSingleQuote = "<!DOCTYPE html><html><head><script src='foobar.js'></script><script src='/barfoo.js'></script></head><body><nav><h1>title</h1></nav><article>content <a href='foobar'>foobar</a>. <a href='/foobar'>Follow up</a></article></body></html>"
|
|
|
|
|
h5JsContentAbsURL = "<!DOCTYPE html><html><head><script src=\"http://user@host:10234/foobar.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"https://host/foobar\">foobar</a>. Follow up</article></body></html>"
|
|
|
|
|
h5JsContentAbsURLSchemaless = "<!DOCTYPE html><html><head><script src=\"//host/foobar.js\"></script><script src='//host2/barfoo.js'></head><body><nav><h1>title</h1></nav><article>content <a href=\"//host/foobar\">foobar</a>. <a href='//host2/foobar'>Follow up</a></article></body></html>"
|
2020-12-16 06:11:32 -05:00
|
|
|
|
correctOutputSrcHrefDq = "<!DOCTYPE html><html><head><script src=\"foobar.js\"></script><script src=\"http://base/barfoo.js\"></script></head><body><nav><h1>title</h1></nav><article>content <a href=\"foobar\">foobar</a>. <a href=\"http://base/foobar\">Follow up</a></article></body></html>"
|
|
|
|
|
correctOutputSrcHrefSq = "<!DOCTYPE html><html><head><script src='foobar.js'></script><script src='http://base/barfoo.js'></script></head><body><nav><h1>title</h1></nav><article>content <a href='foobar'>foobar</a>. <a href='http://base/foobar'>Follow up</a></article></body></html>"
|
2018-08-05 05:13:49 -04:00
|
|
|
|
|
2020-12-16 06:11:32 -05:00
|
|
|
|
h5XMLContentAbsURL = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><feed xmlns=\"http://www.w3.org/2005/Atom\"><entry><content type=\"html\"><p><a href="/foobar">foobar</a></p> <p>A video: <iframe src='/foo'></iframe></p></content></entry></feed>"
|
2018-08-05 05:13:49 -04:00
|
|
|
|
correctOutputSrcHrefInXML = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><feed xmlns=\"http://www.w3.org/2005/Atom\"><entry><content type=\"html\"><p><a href="http://base/foobar">foobar</a></p> <p>A video: <iframe src='http://base/foo'></iframe></p></content></entry></feed>"
|
|
|
|
|
h5XMLContentGuarded = "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\" ?><feed xmlns=\"http://www.w3.org/2005/Atom\"><entry><content type=\"html\"><p><a href="//foobar">foobar</a></p> <p>A video: <iframe src='//foo'></iframe></p></content></entry></feed>"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
// additional sanity tests for replacements testing
|
|
|
|
|
replace1 = "No replacements."
|
|
|
|
|
replace2 = "ᚠᛇᚻ ᛒᛦᚦ ᚠᚱᚩᚠᚢᚱ\nᚠᛁᚱᚪ ᚷᛖᚻᚹᛦᛚᚳᚢᛗ"
|
|
|
|
|
replace3 = `End of file: src="/`
|
|
|
|
|
replace5 = `Srcsett with no closing quote: srcset="/img/small.jpg do be do be do.`
|
|
|
|
|
|
|
|
|
|
// Issue: 816, schemaless links combined with others
|
|
|
|
|
replaceSchemalessHTML = `Pre. src='//schemaless' src='/normal' <a href="//schemaless">Schemaless</a>. <a href="/normal">normal</a>. Post.`
|
|
|
|
|
replaceSchemalessHTMLCorrect = `Pre. src='//schemaless' src='http://base/normal' <a href="//schemaless">Schemaless</a>. <a href="http://base/normal">normal</a>. Post.`
|
|
|
|
|
replaceSchemalessXML = `Pre. src='//schemaless' src='/normal' <a href='//schemaless'>Schemaless</a>. <a href='/normal'>normal</a>. Post.`
|
|
|
|
|
replaceSchemalessXMLCorrect = `Pre. src='//schemaless' src='http://base/normal' <a href='//schemaless'>Schemaless</a>. <a href='http://base/normal'>normal</a>. Post.`
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
// srcset=
|
|
|
|
|
srcsetBasic = `Pre. <img srcset="/img/small.jpg 200w, /img/medium.jpg 300w, /img/big.jpg 700w" alt="text" src="/img/foo.jpg">`
|
|
|
|
|
srcsetBasicCorrect = `Pre. <img srcset="http://base/img/small.jpg 200w, http://base/img/medium.jpg 300w, http://base/img/big.jpg 700w" alt="text" src="http://base/img/foo.jpg">`
|
|
|
|
|
srcsetSingleQuote = `Pre. <img srcset='/img/small.jpg 200w, /img/big.jpg 700w' alt="text" src="/img/foo.jpg"> POST.`
|
|
|
|
|
srcsetSingleQuoteCorrect = `Pre. <img srcset='http://base/img/small.jpg 200w, http://base/img/big.jpg 700w' alt="text" src="http://base/img/foo.jpg"> POST.`
|
|
|
|
|
srcsetXMLBasic = `Pre. <img srcset="/img/small.jpg 200w, /img/big.jpg 700w" alt="text" src="/img/foo.jpg">`
|
|
|
|
|
srcsetXMLBasicCorrect = `Pre. <img srcset="http://base/img/small.jpg 200w, http://base/img/big.jpg 700w" alt="text" src="http://base/img/foo.jpg">`
|
|
|
|
|
srcsetXMLSingleQuote = `Pre. <img srcset="/img/small.jpg 200w, /img/big.jpg 700w" alt="text" src="/img/foo.jpg">`
|
|
|
|
|
srcsetXMLSingleQuoteCorrect = `Pre. <img srcset="http://base/img/small.jpg 200w, http://base/img/big.jpg 700w" alt="text" src="http://base/img/foo.jpg">`
|
2020-12-16 06:11:32 -05:00
|
|
|
|
srcsetVariations = `Pre.
|
|
|
|
|
Missing start quote: <img srcset=/img/small.jpg 200w, /img/big.jpg 700w" alt="text"> src='/img/foo.jpg'> FOO.
|
|
|
|
|
<img srcset='/img.jpg'>
|
2018-08-05 05:13:49 -04:00
|
|
|
|
schemaless: <img srcset='//img.jpg' src='//basic.jpg'>
|
|
|
|
|
schemaless2: <img srcset="//img.jpg" src="//basic.jpg2> POST
|
|
|
|
|
`
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const (
|
2020-12-16 06:11:32 -05:00
|
|
|
|
srcsetVariationsCorrect = `Pre.
|
|
|
|
|
Missing start quote: <img srcset=/img/small.jpg 200w, /img/big.jpg 700w" alt="text"> src='http://base/img/foo.jpg'> FOO.
|
|
|
|
|
<img srcset='http://base/img.jpg'>
|
2018-08-05 05:13:49 -04:00
|
|
|
|
schemaless: <img srcset='//img.jpg' src='//basic.jpg'>
|
|
|
|
|
schemaless2: <img srcset="//img.jpg" src="//basic.jpg2> POST
|
|
|
|
|
`
|
2020-12-16 06:11:32 -05:00
|
|
|
|
srcsetXMLVariations = `Pre.
|
|
|
|
|
Missing start quote: <img srcset=/img/small.jpg 200w /img/big.jpg 700w" alt="text"> src='/img/foo.jpg'> FOO.
|
|
|
|
|
<img srcset='/img.jpg'>
|
2018-08-05 05:13:49 -04:00
|
|
|
|
schemaless: <img srcset='//img.jpg' src='//basic.jpg'>
|
|
|
|
|
schemaless2: <img srcset="//img.jpg" src="//basic.jpg2> POST
|
|
|
|
|
`
|
2020-12-16 06:11:32 -05:00
|
|
|
|
srcsetXMLVariationsCorrect = `Pre.
|
|
|
|
|
Missing start quote: <img srcset=/img/small.jpg 200w /img/big.jpg 700w" alt="text"> src='http://base/img/foo.jpg'> FOO.
|
|
|
|
|
<img srcset='http://base/img.jpg'>
|
2018-08-05 05:13:49 -04:00
|
|
|
|
schemaless: <img srcset='//img.jpg' src='//basic.jpg'>
|
|
|
|
|
schemaless2: <img srcset="//img.jpg" src="//basic.jpg2> POST
|
|
|
|
|
`
|
|
|
|
|
|
2020-06-14 05:14:56 -04:00
|
|
|
|
relPathVariations = `PRE. a href="/img/small.jpg" input action="/foo.html" meta url=/redirect/to/page/ POST.`
|
|
|
|
|
relPathVariationsCorrect = `PRE. a href="../../img/small.jpg" input action="../../foo.html" meta url=../../redirect/to/page/ POST.`
|
2018-08-05 05:13:49 -04:00
|
|
|
|
|
|
|
|
|
testBaseURL = "http://base/"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var (
|
|
|
|
|
absURLlBenchTests = []test{
|
2020-12-16 06:11:32 -05:00
|
|
|
|
{h5JsContentDoubleQuote, correctOutputSrcHrefDq},
|
|
|
|
|
{h5JsContentSingleQuote, correctOutputSrcHrefSq},
|
2018-08-05 05:13:49 -04:00
|
|
|
|
{h5JsContentAbsURL, h5JsContentAbsURL},
|
|
|
|
|
{h5JsContentAbsURLSchemaless, h5JsContentAbsURLSchemaless},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
xmlAbsURLBenchTests = []test{
|
2020-12-16 06:11:32 -05:00
|
|
|
|
{h5XMLContentAbsURL, correctOutputSrcHrefInXML},
|
2018-08-05 05:13:49 -04:00
|
|
|
|
{h5XMLContentGuarded, h5XMLContentGuarded},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sanityTests = []test{{replace1, replace1}, {replace2, replace2}, {replace3, replace3}, {replace3, replace3}, {replace5, replace5}}
|
|
|
|
|
extraTestsHTML = []test{{replaceSchemalessHTML, replaceSchemalessHTMLCorrect}}
|
|
|
|
|
absURLTests = append(absURLlBenchTests, append(sanityTests, extraTestsHTML...)...)
|
|
|
|
|
extraTestsXML = []test{{replaceSchemalessXML, replaceSchemalessXMLCorrect}}
|
|
|
|
|
xmlAbsURLTests = append(xmlAbsURLBenchTests, append(sanityTests, extraTestsXML...)...)
|
|
|
|
|
srcsetTests = []test{{srcsetBasic, srcsetBasicCorrect}, {srcsetSingleQuote, srcsetSingleQuoteCorrect}, {srcsetVariations, srcsetVariationsCorrect}}
|
|
|
|
|
srcsetXMLTests = []test{
|
|
|
|
|
{srcsetXMLBasic, srcsetXMLBasicCorrect},
|
|
|
|
|
{srcsetXMLSingleQuote, srcsetXMLSingleQuoteCorrect},
|
2020-12-02 07:23:25 -05:00
|
|
|
|
{srcsetXMLVariations, srcsetXMLVariationsCorrect},
|
|
|
|
|
}
|
2018-08-05 05:13:49 -04:00
|
|
|
|
|
|
|
|
|
relurlTests = []test{{relPathVariations, relPathVariationsCorrect}}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func BenchmarkAbsURL(b *testing.B) {
|
|
|
|
|
tr := transform.New(NewAbsURLTransformer(testBaseURL))
|
|
|
|
|
|
|
|
|
|
b.ResetTimer()
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
apply(b.Errorf, tr, absURLlBenchTests)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func BenchmarkAbsURLSrcset(b *testing.B) {
|
|
|
|
|
tr := transform.New(NewAbsURLTransformer(testBaseURL))
|
|
|
|
|
|
|
|
|
|
b.ResetTimer()
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
apply(b.Errorf, tr, srcsetTests)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func BenchmarkXMLAbsURLSrcset(b *testing.B) {
|
|
|
|
|
tr := transform.New(NewAbsURLInXMLTransformer(testBaseURL))
|
|
|
|
|
|
|
|
|
|
b.ResetTimer()
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
apply(b.Errorf, tr, srcsetXMLTests)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestAbsURL(t *testing.T) {
|
|
|
|
|
tr := transform.New(NewAbsURLTransformer(testBaseURL))
|
|
|
|
|
|
|
|
|
|
apply(t.Errorf, tr, absURLTests)
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-16 06:11:32 -05:00
|
|
|
|
func TestAbsURLUnquoted(t *testing.T) {
|
2018-12-17 08:25:00 -05:00
|
|
|
|
tr := transform.New(NewAbsURLTransformer(testBaseURL))
|
|
|
|
|
|
|
|
|
|
apply(t.Errorf, tr, []test{
|
2019-03-23 15:15:06 -04:00
|
|
|
|
{
|
2018-12-17 08:25:00 -05:00
|
|
|
|
content: `Link: <a href=/asdf>ASDF</a>`,
|
|
|
|
|
expected: `Link: <a href=http://base/asdf>ASDF</a>`,
|
|
|
|
|
},
|
2019-03-23 15:15:06 -04:00
|
|
|
|
{
|
2018-12-17 08:25:00 -05:00
|
|
|
|
content: `Link: <a href=/asdf >ASDF</a>`,
|
|
|
|
|
expected: `Link: <a href=http://base/asdf >ASDF</a>`,
|
|
|
|
|
},
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-05 05:13:49 -04:00
|
|
|
|
func TestRelativeURL(t *testing.T) {
|
|
|
|
|
tr := transform.New(NewAbsURLTransformer(helpers.GetDottedRelativePath(filepath.FromSlash("/post/sub/"))))
|
|
|
|
|
|
|
|
|
|
applyWithPath(t.Errorf, tr, relurlTests)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestAbsURLSrcSet(t *testing.T) {
|
|
|
|
|
tr := transform.New(NewAbsURLTransformer(testBaseURL))
|
|
|
|
|
|
|
|
|
|
apply(t.Errorf, tr, srcsetTests)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestAbsXMLURLSrcSet(t *testing.T) {
|
|
|
|
|
tr := transform.New(NewAbsURLInXMLTransformer(testBaseURL))
|
|
|
|
|
|
|
|
|
|
apply(t.Errorf, tr, srcsetXMLTests)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func BenchmarkXMLAbsURL(b *testing.B) {
|
2018-12-17 08:25:00 -05:00
|
|
|
|
tr := transform.New(NewAbsURLInXMLTransformer(testBaseURL))
|
2018-08-05 05:13:49 -04:00
|
|
|
|
|
|
|
|
|
b.ResetTimer()
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
apply(b.Errorf, tr, xmlAbsURLBenchTests)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestXMLAbsURL(t *testing.T) {
|
|
|
|
|
tr := transform.New(NewAbsURLInXMLTransformer(testBaseURL))
|
|
|
|
|
apply(t.Errorf, tr, xmlAbsURLTests)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func apply(ef errorf, tr transform.Chain, tests []test) {
|
|
|
|
|
applyWithPath(ef, tr, tests)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func applyWithPath(ef errorf, tr transform.Chain, tests []test) {
|
|
|
|
|
out := bp.GetBuffer()
|
|
|
|
|
defer bp.PutBuffer(out)
|
|
|
|
|
|
|
|
|
|
in := bp.GetBuffer()
|
|
|
|
|
defer bp.PutBuffer(in)
|
|
|
|
|
|
|
|
|
|
for _, test := range tests {
|
|
|
|
|
var err error
|
|
|
|
|
in.WriteString(test.content)
|
|
|
|
|
err = tr.Apply(out, in)
|
|
|
|
|
if err != nil {
|
|
|
|
|
ef("Unexpected error: %s", err)
|
|
|
|
|
}
|
|
|
|
|
if test.expected != out.String() {
|
|
|
|
|
ef("Expected:\n%s\nGot:\n%s", test.expected, out.String())
|
|
|
|
|
}
|
|
|
|
|
out.Reset()
|
|
|
|
|
in.Reset()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type test struct {
|
|
|
|
|
content string
|
|
|
|
|
expected string
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-17 17:03:27 -04:00
|
|
|
|
type errorf func(string, ...any)
|