mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-21 20:46:30 -05:00
Strip hash sign (#) from file paths/URLs
The general way Hugo does this now is: * Sanitize the file paths so the work as URLs * When we create the final RelPermalink/Permalink, we use Go's `url.Parse` to escape it so it work for the browser. So, leaving anything in the first step that does not work with the second step, just doesn't work. It's a little bit odd that `url.Parse` silently truncates this URL without any error, but that's for another day. I have another better test coverage for this. Fixes #12342 Fixes #4926 See #8232
This commit is contained in:
parent
7bf1abfc55
commit
34033e349a
5 changed files with 45 additions and 19 deletions
|
@ -318,7 +318,7 @@ func isAllowedPathCharacter(s string, i int, r rune) bool {
|
||||||
}
|
}
|
||||||
// Check for the most likely first (faster).
|
// Check for the most likely first (faster).
|
||||||
isAllowed := unicode.IsLetter(r) || unicode.IsDigit(r)
|
isAllowed := unicode.IsLetter(r) || unicode.IsDigit(r)
|
||||||
isAllowed = isAllowed || r == '.' || r == '/' || r == '\\' || r == '_' || r == '#' || r == '+' || r == '~' || r == '-' || r == '@'
|
isAllowed = isAllowed || r == '.' || r == '/' || r == '\\' || r == '_' || r == '+' || r == '~' || r == '-' || r == '@'
|
||||||
isAllowed = isAllowed || unicode.IsMark(r)
|
isAllowed = isAllowed || unicode.IsMark(r)
|
||||||
isAllowed = isAllowed || (r == '%' && i+2 < len(s) && ishex(s[i+1]) && ishex(s[i+2]))
|
isAllowed = isAllowed || (r == '%' && i+2 < len(s) && ishex(s[i+1]) && ishex(s[i+2]))
|
||||||
return isAllowed
|
return isAllowed
|
||||||
|
|
|
@ -14,7 +14,10 @@
|
||||||
package paths
|
package paths
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/url"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
qt "github.com/frankban/quicktest"
|
qt "github.com/frankban/quicktest"
|
||||||
|
@ -204,17 +207,30 @@ func TestSanitize(t *testing.T) {
|
||||||
{"трям/трям", "трям/трям"},
|
{"трям/трям", "трям/трям"},
|
||||||
{"은행", "은행"},
|
{"은행", "은행"},
|
||||||
{"Банковский кассир", "Банковский-кассир"},
|
{"Банковский кассир", "Банковский-кассир"},
|
||||||
// Issue #1488
|
|
||||||
{"संस्कृत", "संस्कृत"},
|
{"संस्कृत", "संस्कृत"}, // Issue #1488
|
||||||
{"a%C3%B1ame", "a%C3%B1ame"}, // Issue #1292
|
|
||||||
{"this+is+a+test", "this+is+a+test"}, // Issue #1290
|
{"this+is+a+test", "this+is+a+test"}, // Issue #1290
|
||||||
{"~foo", "~foo"}, // Issue #2177
|
{"~foo", "~foo"}, // Issue #2177
|
||||||
|
|
||||||
|
// Issue #2342
|
||||||
|
{"foo#bar", "foobar"},
|
||||||
|
{"foo@bar", "foo@bar"},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
c.Assert(Sanitize(test.input), qt.Equals, test.expected)
|
c.Assert(Sanitize(test.input), qt.Equals, test.expected)
|
||||||
|
|
||||||
|
// Make sure they survive the URL roundtrip, which makes sure that this works with URLs (e.g. in Permalink)
|
||||||
|
protocol := "https://"
|
||||||
|
urlString := fmt.Sprintf("%s%s", protocol, test.expected)
|
||||||
|
unescaped, err := url.PathUnescape(strings.TrimPrefix(URLEscape(urlString), protocol))
|
||||||
|
c.Assert(err, qt.IsNil)
|
||||||
|
c.Assert(unescaped, qt.Equals, test.expected)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Some special cases.
|
||||||
|
c.Assert(Sanitize("a%C3%B1ame"), qt.Equals, "a%C3%B1ame") // Issue #1292
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkSanitize(b *testing.B) {
|
func BenchmarkSanitize(b *testing.B) {
|
||||||
|
|
|
@ -35,7 +35,7 @@ func TestMakePath(t *testing.T) {
|
||||||
expected string
|
expected string
|
||||||
removeAccents bool
|
removeAccents bool
|
||||||
}{
|
}{
|
||||||
{"dot.slash/backslash\\underscore_pound#plus+hyphen-", "dot.slash/backslash\\underscore_pound#plus+hyphen-", true},
|
{"dot.slash/backslash\\underscore_pound#plus+hyphen-", "dot.slash/backslash\\underscore_poundplus+hyphen-", true},
|
||||||
{"abcXYZ0123456789", "abcXYZ0123456789", true},
|
{"abcXYZ0123456789", "abcXYZ0123456789", true},
|
||||||
{"%20 %2", "%20-2", true},
|
{"%20 %2", "%20-2", true},
|
||||||
{"foo- bar", "foo-bar", true},
|
{"foo- bar", "foo-bar", true},
|
||||||
|
|
|
@ -28,25 +28,13 @@ import (
|
||||||
// uri: Vim (text editor)
|
// uri: Vim (text editor)
|
||||||
// urlize: vim-text-editor
|
// urlize: vim-text-editor
|
||||||
func (p *PathSpec) URLize(uri string) string {
|
func (p *PathSpec) URLize(uri string) string {
|
||||||
return p.URLEscape(p.MakePathSanitized(uri))
|
return paths.URLEscape(p.MakePathSanitized(uri))
|
||||||
}
|
}
|
||||||
|
|
||||||
// URLizeFilename creates an URL from a filename by escaping unicode letters
|
// URLizeFilename creates an URL from a filename by escaping unicode letters
|
||||||
// and turn any filepath separator into forward slashes.
|
// and turn any filepath separator into forward slashes.
|
||||||
func (p *PathSpec) URLizeFilename(filename string) string {
|
func (p *PathSpec) URLizeFilename(filename string) string {
|
||||||
return p.URLEscape(filepath.ToSlash(filename))
|
return paths.URLEscape(filepath.ToSlash(filename))
|
||||||
}
|
|
||||||
|
|
||||||
// URLEscape escapes unicode letters.
|
|
||||||
func (p *PathSpec) URLEscape(uri string) string {
|
|
||||||
// escape unicode letters
|
|
||||||
parsedURI, err := url.Parse(uri)
|
|
||||||
if err != nil {
|
|
||||||
// if net/url can not parse URL it means Sanitize works incorrectly
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
x := parsedURI.String()
|
|
||||||
return x
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// AbsURL creates an absolute URL from the relative path given and the BaseURL set in config.
|
// AbsURL creates an absolute URL from the relative path given and the BaseURL set in config.
|
||||||
|
|
|
@ -157,3 +157,25 @@ Some content.
|
||||||
b.AssertFileContent("public/myblog/p2/index.html", "Single: A page|Hello|en|RelPermalink: /myblog/p2/|Permalink: https://example.com/myblog/p2/|")
|
b.AssertFileContent("public/myblog/p2/index.html", "Single: A page|Hello|en|RelPermalink: /myblog/p2/|Permalink: https://example.com/myblog/p2/|")
|
||||||
b.AssertFileContent("public/myblog/p3/index.html", "Single: A page|Hello|en|RelPermalink: /myblog/p3/|Permalink: https://example.com/myblog/p3/|")
|
b.AssertFileContent("public/myblog/p3/index.html", "Single: A page|Hello|en|RelPermalink: /myblog/p3/|Permalink: https://example.com/myblog/p3/|")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPermalinkHashInSlugIssue12342(t *testing.T) {
|
||||||
|
files := `
|
||||||
|
-- hugo.toml --
|
||||||
|
disableKind = ["taxonomy", "term", "section"]
|
||||||
|
baseURL = "https://example.com/"
|
||||||
|
[permalinks]
|
||||||
|
posts = "/posts/:year/:month/:slug/"
|
||||||
|
-- content/posts/p1.md --
|
||||||
|
---
|
||||||
|
title: 'Newsletter #4'
|
||||||
|
date: 2024-04-04T12:27:52-07:00
|
||||||
|
---
|
||||||
|
Foo
|
||||||
|
-- layouts/_default/single.html --
|
||||||
|
{{ .Title }}|{{ .RelPermalink }}|{{ .Permalink }}|$
|
||||||
|
`
|
||||||
|
|
||||||
|
b := Test(t, files)
|
||||||
|
|
||||||
|
b.AssertFileContent("public/posts/2024/04/newsletter-4/index.html", "Newsletter #4|/posts/2024/04/newsletter-4/|https://example.com/posts/2024/04/newsletter-4/|$")
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue