From 34033e349aea4b7853e4aa8b77800816f659d62d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?=
 <bjorn.erik.pedersen@gmail.com>
Date: Fri, 5 Apr 2024 10:46:27 +0200
Subject: [PATCH] Strip hash sign (#) from file paths/URLs

The general way Hugo does this now is:

* Sanitize the file paths so the work as URLs
* When we create the final RelPermalink/Permalink, we use Go's `url.Parse` to escape it so it work for the browser.

So, leaving anything in the first step that does not work with the second step, just doesn't work.

It's a little bit odd that `url.Parse` silently truncates this URL without any error, but that's for another day.

I have another better test coverage for this.

Fixes #12342
Fixes #4926
See #8232
---
 common/paths/path.go           |  2 +-
 common/paths/path_test.go      | 22 +++++++++++++++++++---
 helpers/path_test.go           |  2 +-
 helpers/url.go                 | 16 ++--------------
 hugolib/page_permalink_test.go | 22 ++++++++++++++++++++++
 5 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/common/paths/path.go b/common/paths/path.go
index 906270cae..66a4b28c7 100644
--- a/common/paths/path.go
+++ b/common/paths/path.go
@@ -318,7 +318,7 @@ func isAllowedPathCharacter(s string, i int, r rune) bool {
 	}
 	// Check for the most likely first (faster).
 	isAllowed := unicode.IsLetter(r) || unicode.IsDigit(r)
-	isAllowed = isAllowed || r == '.' || r == '/' || r == '\\' || r == '_' || r == '#' || r == '+' || r == '~' || r == '-' || r == '@'
+	isAllowed = isAllowed || r == '.' || r == '/' || r == '\\' || r == '_' || r == '+' || r == '~' || r == '-' || r == '@'
 	isAllowed = isAllowed || unicode.IsMark(r)
 	isAllowed = isAllowed || (r == '%' && i+2 < len(s) && ishex(s[i+1]) && ishex(s[i+2]))
 	return isAllowed
diff --git a/common/paths/path_test.go b/common/paths/path_test.go
index 3605bfc43..bf6892659 100644
--- a/common/paths/path_test.go
+++ b/common/paths/path_test.go
@@ -14,7 +14,10 @@
 package paths
 
 import (
+	"fmt"
+	"net/url"
 	"path/filepath"
+	"strings"
 	"testing"
 
 	qt "github.com/frankban/quicktest"
@@ -204,17 +207,30 @@ func TestSanitize(t *testing.T) {
 		{"трям/трям", "трям/трям"},
 		{"은행", "은행"},
 		{"Банковский кассир", "Банковский-кассир"},
-		// Issue #1488
-		{"संस्कृत", "संस्कृत"},
-		{"a%C3%B1ame", "a%C3%B1ame"},         // Issue #1292
+
+		{"संस्कृत", "संस्कृत"},               // Issue #1488
 		{"this+is+a+test", "this+is+a+test"}, // Issue #1290
 		{"~foo", "~foo"},                     // Issue #2177
 
+		// Issue #2342
+		{"foo#bar", "foobar"},
+		{"foo@bar", "foo@bar"},
 	}
 
 	for _, test := range tests {
 		c.Assert(Sanitize(test.input), qt.Equals, test.expected)
+
+		// Make sure they survive the URL roundtrip, which makes sure that this works with URLs (e.g. in Permalink)
+		protocol := "https://"
+		urlString := fmt.Sprintf("%s%s", protocol, test.expected)
+		unescaped, err := url.PathUnescape(strings.TrimPrefix(URLEscape(urlString), protocol))
+		c.Assert(err, qt.IsNil)
+		c.Assert(unescaped, qt.Equals, test.expected)
+
 	}
+
+	// Some special cases.
+	c.Assert(Sanitize("a%C3%B1ame"), qt.Equals, "a%C3%B1ame") // Issue #1292
 }
 
 func BenchmarkSanitize(b *testing.B) {
diff --git a/helpers/path_test.go b/helpers/path_test.go
index 6f3699589..3257d5abd 100644
--- a/helpers/path_test.go
+++ b/helpers/path_test.go
@@ -35,7 +35,7 @@ func TestMakePath(t *testing.T) {
 		expected      string
 		removeAccents bool
 	}{
-		{"dot.slash/backslash\\underscore_pound#plus+hyphen-", "dot.slash/backslash\\underscore_pound#plus+hyphen-", true},
+		{"dot.slash/backslash\\underscore_pound#plus+hyphen-", "dot.slash/backslash\\underscore_poundplus+hyphen-", true},
 		{"abcXYZ0123456789", "abcXYZ0123456789", true},
 		{"%20 %2", "%20-2", true},
 		{"foo- bar", "foo-bar", true},
diff --git a/helpers/url.go b/helpers/url.go
index d5a613029..bdf73d983 100644
--- a/helpers/url.go
+++ b/helpers/url.go
@@ -28,25 +28,13 @@ import (
 //	uri: Vim (text editor)
 //	urlize: vim-text-editor
 func (p *PathSpec) URLize(uri string) string {
-	return p.URLEscape(p.MakePathSanitized(uri))
+	return paths.URLEscape(p.MakePathSanitized(uri))
 }
 
 // URLizeFilename creates an URL from a filename by escaping unicode letters
 // and turn any filepath separator into forward slashes.
 func (p *PathSpec) URLizeFilename(filename string) string {
-	return p.URLEscape(filepath.ToSlash(filename))
-}
-
-// URLEscape escapes unicode letters.
-func (p *PathSpec) URLEscape(uri string) string {
-	// escape unicode letters
-	parsedURI, err := url.Parse(uri)
-	if err != nil {
-		// if net/url can not parse URL it means Sanitize works incorrectly
-		panic(err)
-	}
-	x := parsedURI.String()
-	return x
+	return paths.URLEscape(filepath.ToSlash(filename))
 }
 
 // AbsURL creates an absolute URL from the relative path given and the BaseURL set in config.
diff --git a/hugolib/page_permalink_test.go b/hugolib/page_permalink_test.go
index bc89638d3..97fb1990e 100644
--- a/hugolib/page_permalink_test.go
+++ b/hugolib/page_permalink_test.go
@@ -157,3 +157,25 @@ Some content.
 	b.AssertFileContent("public/myblog/p2/index.html", "Single: A page|Hello|en|RelPermalink: /myblog/p2/|Permalink: https://example.com/myblog/p2/|")
 	b.AssertFileContent("public/myblog/p3/index.html", "Single: A page|Hello|en|RelPermalink: /myblog/p3/|Permalink: https://example.com/myblog/p3/|")
 }
+
+func TestPermalinkHashInSlugIssue12342(t *testing.T) {
+	files := `
+-- hugo.toml --
+disableKind = ["taxonomy", "term", "section"]
+baseURL = "https://example.com/"
+[permalinks]
+posts = "/posts/:year/:month/:slug/"
+-- content/posts/p1.md --
+---
+title: 'Newsletter #4'
+date: 2024-04-04T12:27:52-07:00
+---
+Foo
+-- layouts/_default/single.html --
+{{ .Title }}|{{ .RelPermalink }}|{{ .Permalink }}|$
+`
+
+	b := Test(t, files)
+
+	b.AssertFileContent("public/posts/2024/04/newsletter-4/index.html", "Newsletter #4|/posts/2024/04/newsletter-4/|https://example.com/posts/2024/04/newsletter-4/|$")
+}