mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-21 20:46:30 -05:00
34033e349a
The general way Hugo does this now is: * Sanitize the file paths so the work as URLs * When we create the final RelPermalink/Permalink, we use Go's `url.Parse` to escape it so it work for the browser. So, leaving anything in the first step that does not work with the second step, just doesn't work. It's a little bit odd that `url.Parse` silently truncates this URL without any error, but that's for another day. I have another better test coverage for this. Fixes #12342 Fixes #4926 See #8232
399 lines
11 KiB
Go
399 lines
11 KiB
Go
// Copyright 2021 The Hugo Authors. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package paths
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"net/url"
|
|
"path"
|
|
"path/filepath"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
// FilePathSeparator as defined by os.Separator.
|
|
const (
|
|
FilePathSeparator = string(filepath.Separator)
|
|
slash = "/"
|
|
)
|
|
|
|
// filepathPathBridge is a bridge for common functionality in filepath vs path
|
|
type filepathPathBridge interface {
|
|
Base(in string) string
|
|
Clean(in string) string
|
|
Dir(in string) string
|
|
Ext(in string) string
|
|
Join(elem ...string) string
|
|
Separator() string
|
|
}
|
|
|
|
type filepathBridge struct{}
|
|
|
|
func (filepathBridge) Base(in string) string {
|
|
return filepath.Base(in)
|
|
}
|
|
|
|
func (filepathBridge) Clean(in string) string {
|
|
return filepath.Clean(in)
|
|
}
|
|
|
|
func (filepathBridge) Dir(in string) string {
|
|
return filepath.Dir(in)
|
|
}
|
|
|
|
func (filepathBridge) Ext(in string) string {
|
|
return filepath.Ext(in)
|
|
}
|
|
|
|
func (filepathBridge) Join(elem ...string) string {
|
|
return filepath.Join(elem...)
|
|
}
|
|
|
|
func (filepathBridge) Separator() string {
|
|
return FilePathSeparator
|
|
}
|
|
|
|
var fpb filepathBridge
|
|
|
|
// AbsPathify creates an absolute path if given a working dir and a relative path.
|
|
// If already absolute, the path is just cleaned.
|
|
func AbsPathify(workingDir, inPath string) string {
|
|
if filepath.IsAbs(inPath) {
|
|
return filepath.Clean(inPath)
|
|
}
|
|
return filepath.Join(workingDir, inPath)
|
|
}
|
|
|
|
// AddTrailingSlash adds a trailing Unix styled slash (/) if not already
|
|
// there.
|
|
func AddTrailingSlash(path string) string {
|
|
if !strings.HasSuffix(path, "/") {
|
|
path += "/"
|
|
}
|
|
return path
|
|
}
|
|
|
|
// AddLeadingSlash adds a leading Unix styled slash (/) if not already
|
|
// there.
|
|
func AddLeadingSlash(path string) string {
|
|
if !strings.HasPrefix(path, "/") {
|
|
path = "/" + path
|
|
}
|
|
return path
|
|
}
|
|
|
|
// AddTrailingAndLeadingSlash adds a leading and trailing Unix styled slash (/) if not already
|
|
// there.
|
|
func AddLeadingAndTrailingSlash(path string) string {
|
|
return AddTrailingSlash(AddLeadingSlash(path))
|
|
}
|
|
|
|
// MakeTitle converts the path given to a suitable title, trimming whitespace
|
|
// and replacing hyphens with whitespace.
|
|
func MakeTitle(inpath string) string {
|
|
return strings.Replace(strings.TrimSpace(inpath), "-", " ", -1)
|
|
}
|
|
|
|
// ReplaceExtension takes a path and an extension, strips the old extension
|
|
// and returns the path with the new extension.
|
|
func ReplaceExtension(path string, newExt string) string {
|
|
f, _ := fileAndExt(path, fpb)
|
|
return f + "." + newExt
|
|
}
|
|
|
|
func makePathRelative(inPath string, possibleDirectories ...string) (string, error) {
|
|
for _, currentPath := range possibleDirectories {
|
|
if strings.HasPrefix(inPath, currentPath) {
|
|
return strings.TrimPrefix(inPath, currentPath), nil
|
|
}
|
|
}
|
|
return inPath, errors.New("can't extract relative path, unknown prefix")
|
|
}
|
|
|
|
// ExtNoDelimiter takes a path and returns the extension, excluding the delimiter, i.e. "md".
|
|
func ExtNoDelimiter(in string) string {
|
|
return strings.TrimPrefix(Ext(in), ".")
|
|
}
|
|
|
|
// Ext takes a path and returns the extension, including the delimiter, i.e. ".md".
|
|
func Ext(in string) string {
|
|
_, ext := fileAndExt(in, fpb)
|
|
return ext
|
|
}
|
|
|
|
// PathAndExt is the same as FileAndExt, but it uses the path package.
|
|
func PathAndExt(in string) (string, string) {
|
|
return fileAndExt(in, pb)
|
|
}
|
|
|
|
// FileAndExt takes a path and returns the file and extension separated,
|
|
// the extension including the delimiter, i.e. ".md".
|
|
func FileAndExt(in string) (string, string) {
|
|
return fileAndExt(in, fpb)
|
|
}
|
|
|
|
// FileAndExtNoDelimiter takes a path and returns the file and extension separated,
|
|
// the extension excluding the delimiter, e.g "md".
|
|
func FileAndExtNoDelimiter(in string) (string, string) {
|
|
file, ext := fileAndExt(in, fpb)
|
|
return file, strings.TrimPrefix(ext, ".")
|
|
}
|
|
|
|
// Filename takes a file path, strips out the extension,
|
|
// and returns the name of the file.
|
|
func Filename(in string) (name string) {
|
|
name, _ = fileAndExt(in, fpb)
|
|
return
|
|
}
|
|
|
|
// FileAndExt returns the filename and any extension of a file path as
|
|
// two separate strings.
|
|
//
|
|
// If the path, in, contains a directory name ending in a slash,
|
|
// then both name and ext will be empty strings.
|
|
//
|
|
// If the path, in, is either the current directory, the parent
|
|
// directory or the root directory, or an empty string,
|
|
// then both name and ext will be empty strings.
|
|
//
|
|
// If the path, in, represents the path of a file without an extension,
|
|
// then name will be the name of the file and ext will be an empty string.
|
|
//
|
|
// If the path, in, represents a filename with an extension,
|
|
// then name will be the filename minus any extension - including the dot
|
|
// and ext will contain the extension - minus the dot.
|
|
func fileAndExt(in string, b filepathPathBridge) (name string, ext string) {
|
|
ext = b.Ext(in)
|
|
base := b.Base(in)
|
|
|
|
return extractFilename(in, ext, base, b.Separator()), ext
|
|
}
|
|
|
|
func extractFilename(in, ext, base, pathSeparator string) (name string) {
|
|
// No file name cases. These are defined as:
|
|
// 1. any "in" path that ends in a pathSeparator
|
|
// 2. any "base" consisting of just an pathSeparator
|
|
// 3. any "base" consisting of just an empty string
|
|
// 4. any "base" consisting of just the current directory i.e. "."
|
|
// 5. any "base" consisting of just the parent directory i.e. ".."
|
|
if (strings.LastIndex(in, pathSeparator) == len(in)-1) || base == "" || base == "." || base == ".." || base == pathSeparator {
|
|
name = "" // there is NO filename
|
|
} else if ext != "" { // there was an Extension
|
|
// return the filename minus the extension (and the ".")
|
|
name = base[:strings.LastIndex(base, ".")]
|
|
} else {
|
|
// no extension case so just return base, which will
|
|
// be the filename
|
|
name = base
|
|
}
|
|
return
|
|
}
|
|
|
|
// GetRelativePath returns the relative path of a given path.
|
|
func GetRelativePath(path, base string) (final string, err error) {
|
|
if filepath.IsAbs(path) && base == "" {
|
|
return "", errors.New("source: missing base directory")
|
|
}
|
|
name := filepath.Clean(path)
|
|
base = filepath.Clean(base)
|
|
|
|
name, err = filepath.Rel(base, name)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
if strings.HasSuffix(filepath.FromSlash(path), FilePathSeparator) && !strings.HasSuffix(name, FilePathSeparator) {
|
|
name += FilePathSeparator
|
|
}
|
|
return name, nil
|
|
}
|
|
|
|
func prettifyPath(in string, b filepathPathBridge) string {
|
|
if filepath.Ext(in) == "" {
|
|
// /section/name/ -> /section/name/index.html
|
|
if len(in) < 2 {
|
|
return b.Separator()
|
|
}
|
|
return b.Join(in, "index.html")
|
|
}
|
|
name, ext := fileAndExt(in, b)
|
|
if name == "index" {
|
|
// /section/name/index.html -> /section/name/index.html
|
|
return b.Clean(in)
|
|
}
|
|
// /section/name.html -> /section/name/index.html
|
|
return b.Join(b.Dir(in), name, "index"+ext)
|
|
}
|
|
|
|
// CommonDir returns the common directory of the given paths.
|
|
func CommonDir(path1, path2 string) string {
|
|
if path1 == "" || path2 == "" {
|
|
return ""
|
|
}
|
|
|
|
p1 := strings.Split(path1, "/")
|
|
p2 := strings.Split(path2, "/")
|
|
|
|
var common []string
|
|
|
|
for i := 0; i < len(p1) && i < len(p2); i++ {
|
|
if p1[i] == p2[i] {
|
|
common = append(common, p1[i])
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
return strings.Join(common, "/")
|
|
}
|
|
|
|
// Sanitize sanitizes string to be used in Hugo's file paths and URLs, allowing only
|
|
// a predefined set of special Unicode characters.
|
|
//
|
|
// Spaces will be replaced with a single hyphen.
|
|
//
|
|
// This function is the core function used to normalize paths in Hugo.
|
|
//
|
|
// Note that this is the first common step for URL/path sanitation,
|
|
// the final URL/path may end up looking differently if the user has stricter rules defined (e.g. removePathAccents=true).
|
|
func Sanitize(s string) string {
|
|
var willChange bool
|
|
for i, r := range s {
|
|
willChange = !isAllowedPathCharacter(s, i, r)
|
|
if willChange {
|
|
break
|
|
}
|
|
}
|
|
|
|
if !willChange {
|
|
// Prevent allocation when nothing changes.
|
|
return s
|
|
}
|
|
|
|
target := make([]rune, 0, len(s))
|
|
var (
|
|
prependHyphen bool
|
|
wasHyphen bool
|
|
)
|
|
|
|
for i, r := range s {
|
|
isAllowed := isAllowedPathCharacter(s, i, r)
|
|
|
|
if isAllowed {
|
|
// track explicit hyphen in input; no need to add a new hyphen if
|
|
// we just saw one.
|
|
wasHyphen = r == '-'
|
|
|
|
if prependHyphen {
|
|
// if currently have a hyphen, don't prepend an extra one
|
|
if !wasHyphen {
|
|
target = append(target, '-')
|
|
}
|
|
prependHyphen = false
|
|
}
|
|
target = append(target, r)
|
|
} else if len(target) > 0 && !wasHyphen && unicode.IsSpace(r) {
|
|
prependHyphen = true
|
|
}
|
|
}
|
|
|
|
return string(target)
|
|
}
|
|
|
|
func isAllowedPathCharacter(s string, i int, r rune) bool {
|
|
if r == ' ' {
|
|
return false
|
|
}
|
|
// Check for the most likely first (faster).
|
|
isAllowed := unicode.IsLetter(r) || unicode.IsDigit(r)
|
|
isAllowed = isAllowed || r == '.' || r == '/' || r == '\\' || r == '_' || r == '+' || r == '~' || r == '-' || r == '@'
|
|
isAllowed = isAllowed || unicode.IsMark(r)
|
|
isAllowed = isAllowed || (r == '%' && i+2 < len(s) && ishex(s[i+1]) && ishex(s[i+2]))
|
|
return isAllowed
|
|
}
|
|
|
|
// From https://golang.org/src/net/url/url.go
|
|
func ishex(c byte) bool {
|
|
switch {
|
|
case '0' <= c && c <= '9':
|
|
return true
|
|
case 'a' <= c && c <= 'f':
|
|
return true
|
|
case 'A' <= c && c <= 'F':
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
var slashFunc = func(r rune) bool {
|
|
return r == '/'
|
|
}
|
|
|
|
// Dir behaves like path.Dir without the path.Clean step.
|
|
//
|
|
// The returned path ends in a slash only if it is the root "/".
|
|
func Dir(s string) string {
|
|
dir, _ := path.Split(s)
|
|
if len(dir) > 1 && dir[len(dir)-1] == '/' {
|
|
return dir[:len(dir)-1]
|
|
}
|
|
return dir
|
|
}
|
|
|
|
// FieldsSlash cuts s into fields separated with '/'.
|
|
func FieldsSlash(s string) []string {
|
|
f := strings.FieldsFunc(s, slashFunc)
|
|
return f
|
|
}
|
|
|
|
// DirFile holds the result from path.Split.
|
|
type DirFile struct {
|
|
Dir string
|
|
File string
|
|
}
|
|
|
|
// Used in test.
|
|
func (df DirFile) String() string {
|
|
return fmt.Sprintf("%s|%s", df.Dir, df.File)
|
|
}
|
|
|
|
// PathEscape escapes unicode letters in pth.
|
|
// Use URLEscape to escape full URLs including scheme, query etc.
|
|
// This is slightly faster for the common case.
|
|
// Note, there is a url.PathEscape function, but that also
|
|
// escapes /.
|
|
func PathEscape(pth string) string {
|
|
u, err := url.Parse(pth)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return u.EscapedPath()
|
|
}
|
|
|
|
// ToSlashTrimLeading is just a filepath.ToSlash with an added / prefix trimmer.
|
|
func ToSlashTrimLeading(s string) string {
|
|
return strings.TrimPrefix(filepath.ToSlash(s), "/")
|
|
}
|
|
|
|
// ToSlashTrimTrailing is just a filepath.ToSlash with an added / suffix trimmer.
|
|
func ToSlashTrimTrailing(s string) string {
|
|
return strings.TrimSuffix(filepath.ToSlash(s), "/")
|
|
}
|
|
|
|
// ToSlashPreserveLeading converts the path given to a forward slash separated path
|
|
// and preserves the leading slash if present trimming any trailing slash.
|
|
func ToSlashPreserveLeading(s string) string {
|
|
return "/" + strings.Trim(filepath.ToSlash(s), "/")
|
|
}
|