mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-21 20:46:30 -05:00
Fix canonifyurl vs schemaless links
And looks even faster: Compared to previous attempt: benchmark old ns/op new ns/op delta BenchmarkAbsUrl 30902 27206 -11.96% BenchmarkXmlAbsUrl 15389 14216 -7.62% benchmark old allocs new allocs delta BenchmarkAbsUrl 33 28 -15.15% BenchmarkXmlAbsUrl 16 14 -12.50% benchmark old bytes new bytes delta BenchmarkAbsUrl 4167 3504 -15.91% BenchmarkXmlAbsUrl 2057 2048 -0.44% Compared to before I started all of this: benchmark old ns/op new ns/op delta BenchmarkAbsUrl 36219 27206 -24.88% BenchmarkXmlAbsUrl 23903 14216 -40.53% benchmark old allocs new allocs delta BenchmarkAbsUrl 60 28 -53.33% BenchmarkXmlAbsUrl 30 14 -53.33% benchmark old bytes new bytes delta BenchmarkAbsUrl 5842 3504 -40.02% BenchmarkXmlAbsUrl 3754 2048 -45.44% Fixes #816
This commit is contained in:
parent
6b28e38cea
commit
c2e2913872
2 changed files with 46 additions and 74 deletions
|
@ -37,11 +37,6 @@ const (
|
||||||
tHrefdq
|
tHrefdq
|
||||||
tSrcsq
|
tSrcsq
|
||||||
tHrefsq
|
tHrefsq
|
||||||
// guards
|
|
||||||
tGrcdq
|
|
||||||
tGhrefdq
|
|
||||||
tGsrcsq
|
|
||||||
tGhrefsq
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type contentlexer struct {
|
type contentlexer struct {
|
||||||
|
@ -130,24 +125,6 @@ var itemSlicePool = &sync.Pool{
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func replace(content []byte, matchers []absurlMatcher) *contentlexer {
|
|
||||||
var items []item
|
|
||||||
if x := itemSlicePool.Get(); x != nil {
|
|
||||||
items = x.([]item)[:0]
|
|
||||||
defer itemSlicePool.Put(items)
|
|
||||||
} else {
|
|
||||||
items = make([]item, 0, 8)
|
|
||||||
}
|
|
||||||
|
|
||||||
lexer := &contentlexer{content: content,
|
|
||||||
items: items,
|
|
||||||
prefixLookup: &prefixes{pr: mainPrefixRunes},
|
|
||||||
matchers: matchers}
|
|
||||||
|
|
||||||
lexer.runReplacer()
|
|
||||||
return lexer
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *contentlexer) runReplacer() {
|
func (l *contentlexer) runReplacer() {
|
||||||
for l.state = lexReplacements; l.state != nil; {
|
for l.state = lexReplacements; l.state != nil; {
|
||||||
l.state = l.state(l)
|
l.state = l.state(l)
|
||||||
|
@ -156,11 +133,8 @@ func (l *contentlexer) runReplacer() {
|
||||||
|
|
||||||
type absurlMatcher struct {
|
type absurlMatcher struct {
|
||||||
replaceType itemType
|
replaceType itemType
|
||||||
guardType itemType
|
|
||||||
match []byte
|
match []byte
|
||||||
guard []byte
|
|
||||||
replacement []byte
|
replacement []byte
|
||||||
guarded bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a absurlMatcher) isSourceType() bool {
|
func (a absurlMatcher) isSourceType() bool {
|
||||||
|
@ -207,24 +181,21 @@ func checkCandidate(l *contentlexer) {
|
||||||
isSource := l.prefixLookup.first == 's'
|
isSource := l.prefixLookup.first == 's'
|
||||||
for _, m := range l.matchers {
|
for _, m := range l.matchers {
|
||||||
|
|
||||||
if m.guarded {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if isSource && !m.isSourceType() || !isSource && m.isSourceType() {
|
if isSource && !m.isSourceType() || !isSource && m.isSourceType() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
s := l.content[l.pos:]
|
if bytes.HasPrefix(l.content[l.pos:], m.match) {
|
||||||
if bytes.HasPrefix(s, m.guard) {
|
// check for schemaless urls
|
||||||
if l.pos > l.start {
|
posAfter := pos(int(l.pos) + len(m.match))
|
||||||
l.emit(tText)
|
if int(posAfter) >= len(l.content) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
r, _ := utf8.DecodeRune(l.content[posAfter:])
|
||||||
|
if r == '/' {
|
||||||
|
// schemaless: skip
|
||||||
|
return
|
||||||
}
|
}
|
||||||
l.pos += pos(len(m.guard))
|
|
||||||
l.emit(m.guardType)
|
|
||||||
m.guarded = true
|
|
||||||
return
|
|
||||||
} else if bytes.HasPrefix(s, m.match) {
|
|
||||||
if l.pos > l.start {
|
if l.pos > l.start {
|
||||||
l.emit(tText)
|
l.emit(tText)
|
||||||
}
|
}
|
||||||
|
@ -240,31 +211,30 @@ func doReplace(content []byte, matchers []absurlMatcher) []byte {
|
||||||
b := bp.GetBuffer()
|
b := bp.GetBuffer()
|
||||||
defer bp.PutBuffer(b)
|
defer bp.PutBuffer(b)
|
||||||
|
|
||||||
guards := make([]bool, len(matchers))
|
var items []item
|
||||||
replaced := replace(content, matchers)
|
if x := itemSlicePool.Get(); x != nil {
|
||||||
|
items = x.([]item)[:0]
|
||||||
// first pass: check guards
|
defer itemSlicePool.Put(items)
|
||||||
for _, item := range replaced.items {
|
} else {
|
||||||
if item.typ != tText {
|
items = make([]item, 0, 8)
|
||||||
for i, e := range matchers {
|
|
||||||
if item.typ == e.guardType {
|
|
||||||
guards[i] = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// second pass: do replacements for non-guarded tokens
|
|
||||||
for _, token := range replaced.items {
|
lexer := &contentlexer{content: content,
|
||||||
|
items: items,
|
||||||
|
prefixLookup: &prefixes{pr: mainPrefixRunes},
|
||||||
|
matchers: matchers}
|
||||||
|
|
||||||
|
lexer.runReplacer()
|
||||||
|
|
||||||
|
for _, token := range lexer.items {
|
||||||
switch token.typ {
|
switch token.typ {
|
||||||
case tText:
|
case tText:
|
||||||
b.Write(token.val)
|
b.Write(token.val)
|
||||||
default:
|
default:
|
||||||
for i, e := range matchers {
|
for _, e := range matchers {
|
||||||
if token.typ == e.replaceType && !guards[i] {
|
if token.typ == e.replaceType {
|
||||||
b.Write(e.replacement)
|
b.Write(e.replacement)
|
||||||
} else if token.typ == e.replaceType || token.typ == e.guardType {
|
break
|
||||||
b.Write(token.val)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -286,16 +256,10 @@ func newAbsurlReplacer(baseUrl string) *absurlReplacer {
|
||||||
dqHtmlMatch := []byte("\"/")
|
dqHtmlMatch := []byte("\"/")
|
||||||
sqHtmlMatch := []byte("'/")
|
sqHtmlMatch := []byte("'/")
|
||||||
|
|
||||||
dqGuard := []byte("\"//")
|
|
||||||
sqGuard := []byte("'//")
|
|
||||||
|
|
||||||
// XML
|
// XML
|
||||||
dqXmlMatch := []byte(""/")
|
dqXmlMatch := []byte(""/")
|
||||||
sqXmlMatch := []byte("'/")
|
sqXmlMatch := []byte("'/")
|
||||||
|
|
||||||
dqXmlGuard := []byte(""//")
|
|
||||||
sqXmlGuard := []byte("'//")
|
|
||||||
|
|
||||||
dqHtml := []byte("\"" + base + "/")
|
dqHtml := []byte("\"" + base + "/")
|
||||||
sqHtml := []byte("'" + base + "/")
|
sqHtml := []byte("'" + base + "/")
|
||||||
|
|
||||||
|
@ -303,15 +267,15 @@ func newAbsurlReplacer(baseUrl string) *absurlReplacer {
|
||||||
sqXml := []byte("'" + base + "/")
|
sqXml := []byte("'" + base + "/")
|
||||||
|
|
||||||
return &absurlReplacer{htmlMatchers: []absurlMatcher{
|
return &absurlReplacer{htmlMatchers: []absurlMatcher{
|
||||||
{tSrcdq, tGrcdq, dqHtmlMatch, dqGuard, dqHtml, false},
|
{tSrcdq, dqHtmlMatch, dqHtml},
|
||||||
{tSrcsq, tGsrcsq, sqHtmlMatch, sqGuard, sqHtml, false},
|
{tSrcsq, sqHtmlMatch, sqHtml},
|
||||||
{tHrefdq, tGhrefdq, dqHtmlMatch, dqGuard, dqHtml, false},
|
{tHrefdq, dqHtmlMatch, dqHtml},
|
||||||
{tHrefsq, tGhrefsq, sqHtmlMatch, sqGuard, sqHtml, false}},
|
{tHrefsq, sqHtmlMatch, sqHtml}},
|
||||||
xmlMatchers: []absurlMatcher{
|
xmlMatchers: []absurlMatcher{
|
||||||
{tSrcdq, tGrcdq, dqXmlMatch, dqXmlGuard, dqXml, false},
|
{tSrcdq, dqXmlMatch, dqXml},
|
||||||
{tSrcsq, tGsrcsq, sqXmlMatch, sqXmlGuard, sqXml, false},
|
{tSrcsq, sqXmlMatch, sqXml},
|
||||||
{tHrefdq, tGhrefdq, dqXmlMatch, dqXmlGuard, dqXml, false},
|
{tHrefdq, dqXmlMatch, dqXml},
|
||||||
{tHrefsq, tGhrefsq, sqXmlMatch, sqXmlGuard, sqXml, false},
|
{tHrefsq, sqXmlMatch, sqXml},
|
||||||
}}
|
}}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,12 @@ const H5_XML_CONTENT_GUARDED = "<?xml version=\"1.0\" encoding=\"utf-8\" standal
|
||||||
const REPLACE_1 = "No replacements."
|
const REPLACE_1 = "No replacements."
|
||||||
const REPLACE_2 = "ᚠᛇᚻ ᛒᛦᚦ ᚠᚱᚩᚠᚢᚱ\nᚠᛁᚱᚪ ᚷᛖᚻᚹᛦᛚᚳᚢᛗ"
|
const REPLACE_2 = "ᚠᛇᚻ ᛒᛦᚦ ᚠᚱᚩᚠᚢᚱ\nᚠᛁᚱᚪ ᚷᛖᚻᚹᛦᛚᚳᚢᛗ"
|
||||||
|
|
||||||
|
// Issue: 816, schemaless links combined with others
|
||||||
|
const REPLACE_SCHEMALESS_HTML = `Pre. src='//schemaless' src='/normal' <a href="//schemaless">Schemaless</a>. <a href="/normal">normal</a>. Post.`
|
||||||
|
const REPLACE_SCHEMALESS_HTML_CORRECT = `Pre. src='//schemaless' src='http://base/normal' <a href="//schemaless">Schemaless</a>. <a href="http://base/normal">normal</a>. Post.`
|
||||||
|
const REPLACE_SCHEMALESS_XML = `Pre. src="//schemaless" src="/normal" <a href='//schemaless'>Schemaless</a>. <a href='/normal'>normal</a>. Post.`
|
||||||
|
const REPLACE_SCHEMALESS_XML_CORRECT = `Pre. src="//schemaless" src="http://base/normal" <a href='//schemaless'>Schemaless</a>. <a href='http://base/normal'>normal</a>. Post.`
|
||||||
|
|
||||||
var abs_url_bench_tests = []test{
|
var abs_url_bench_tests = []test{
|
||||||
{H5_JS_CONTENT_DOUBLE_QUOTE, CORRECT_OUTPUT_SRC_HREF_DQ},
|
{H5_JS_CONTENT_DOUBLE_QUOTE, CORRECT_OUTPUT_SRC_HREF_DQ},
|
||||||
{H5_JS_CONTENT_SINGLE_QUOTE, CORRECT_OUTPUT_SRC_HREF_SQ},
|
{H5_JS_CONTENT_SINGLE_QUOTE, CORRECT_OUTPUT_SRC_HREF_SQ},
|
||||||
|
@ -34,8 +40,10 @@ var xml_abs_url_bench_tests = []test{
|
||||||
}
|
}
|
||||||
|
|
||||||
var sanity_tests = []test{{REPLACE_1, REPLACE_1}, {REPLACE_2, REPLACE_2}}
|
var sanity_tests = []test{{REPLACE_1, REPLACE_1}, {REPLACE_2, REPLACE_2}}
|
||||||
var abs_url_tests = append(abs_url_bench_tests, sanity_tests...)
|
var extra_tests_html = []test{{REPLACE_SCHEMALESS_HTML, REPLACE_SCHEMALESS_HTML_CORRECT}}
|
||||||
var xml_abs_url_tests = append(xml_abs_url_bench_tests, sanity_tests...)
|
var abs_url_tests = append(abs_url_bench_tests, append(sanity_tests, extra_tests_html...)...)
|
||||||
|
var extra_tests_xml = []test{{REPLACE_SCHEMALESS_XML, REPLACE_SCHEMALESS_XML_CORRECT}}
|
||||||
|
var xml_abs_url_tests = append(xml_abs_url_bench_tests, append(sanity_tests, extra_tests_xml...)...)
|
||||||
|
|
||||||
func TestChainZeroTransformers(t *testing.T) {
|
func TestChainZeroTransformers(t *testing.T) {
|
||||||
tr := NewChain()
|
tr := NewChain()
|
||||||
|
|
Loading…
Reference in a new issue