helpers: Allow hyphens in UnicodeSanitize

Improve handling of existing hyphens in input to UnicodeSanitize.
This commit accomplishes three things:

1. Explicitly allow hyphens
2. Avoid appending a hyphen if a preceeding hyphen is found
3. Avoid prepending a hyphen if a trailing hyphen is found

Fixes #7288
This commit is contained in:
Cameron Moore 2022-01-17 16:33:47 -06:00 committed by Bjørn Erik Pedersen
parent 6ff39fd908
commit 6407b2cd01
2 changed files with 21 additions and 6 deletions

View file

@ -87,7 +87,8 @@ func ishex(c rune) bool {
// a predefined set of special Unicode characters. // a predefined set of special Unicode characters.
// If RemovePathAccents configuration flag is enabled, Unicode accents // If RemovePathAccents configuration flag is enabled, Unicode accents
// are also removed. // are also removed.
// Spaces will be replaced with a single hyphen, and sequential hyphens will be reduced to one. // Hyphens in the original input are maintained.
// Spaces will be replaced with a single hyphen, and sequential replacement hyphens will be reduced to one.
func (p *PathSpec) UnicodeSanitize(s string) string { func (p *PathSpec) UnicodeSanitize(s string) string {
if p.RemovePathAccents { if p.RemovePathAccents {
s = text.RemoveAccentsString(s) s = text.RemoveAccentsString(s)
@ -95,20 +96,30 @@ func (p *PathSpec) UnicodeSanitize(s string) string {
source := []rune(s) source := []rune(s)
target := make([]rune, 0, len(source)) target := make([]rune, 0, len(source))
var prependHyphen bool var (
prependHyphen bool
wasHyphen bool
)
for i, r := range source { for i, r := range source {
isAllowed := r == '.' || r == '/' || r == '\\' || r == '_' || r == '#' || r == '+' || r == '~' isAllowed := r == '.' || r == '/' || r == '\\' || r == '_' || r == '#' || r == '+' || r == '~' || r == '-'
isAllowed = isAllowed || unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsMark(r) isAllowed = isAllowed || unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsMark(r)
isAllowed = isAllowed || (r == '%' && i+2 < len(source) && ishex(source[i+1]) && ishex(source[i+2])) isAllowed = isAllowed || (r == '%' && i+2 < len(source) && ishex(source[i+1]) && ishex(source[i+2]))
if isAllowed { if isAllowed {
// track explicit hyphen in input; no need to add a new hyphen if
// we just saw one.
wasHyphen = r == '-'
if prependHyphen { if prependHyphen {
target = append(target, '-') // if currently have a hyphen, don't prepend an extra one
if !wasHyphen {
target = append(target, '-')
}
prependHyphen = false prependHyphen = false
} }
target = append(target, r) target = append(target, r)
} else if len(target) > 0 && (r == '-' || unicode.IsSpace(r)) { } else if len(target) > 0 && !wasHyphen && unicode.IsSpace(r) {
prependHyphen = true prependHyphen = true
} }
} }

View file

@ -40,6 +40,10 @@ func TestMakePath(t *testing.T) {
expected string expected string
removeAccents bool removeAccents bool
}{ }{
{"dot.slash/backslash\\underscore_pound#plus+hyphen-", "dot.slash/backslash\\underscore_pound#plus+hyphen-", true},
{"abcXYZ0123456789", "abcXYZ0123456789", true},
{"%20 %2", "%20-2", true},
{"foo- bar", "foo-bar", true},
{" Foo bar ", "Foo-bar", true}, {" Foo bar ", "Foo-bar", true},
{"Foo.Bar/foo_Bar-Foo", "Foo.Bar/foo_Bar-Foo", true}, {"Foo.Bar/foo_Bar-Foo", "Foo.Bar/foo_Bar-Foo", true},
{"fOO,bar:foobAR", "fOObarfoobAR", true}, {"fOO,bar:foobAR", "fOObarfoobAR", true},
@ -52,7 +56,7 @@ func TestMakePath(t *testing.T) {
{"a%C3%B1ame", "a%C3%B1ame", false}, // Issue #1292 {"a%C3%B1ame", "a%C3%B1ame", false}, // Issue #1292
{"this+is+a+test", "this+is+a+test", false}, // Issue #1290 {"this+is+a+test", "this+is+a+test", false}, // Issue #1290
{"~foo", "~foo", false}, // Issue #2177 {"~foo", "~foo", false}, // Issue #2177
{"foo--bar", "foo--bar", true}, // Issue #7288
} }
for _, test := range tests { for _, test := range tests {