markup/goldmark: Add an optional Blackfriday auto ID strategy

Fixes #6707
This commit is contained in:
Bjørn Erik Pedersen 2020-01-05 11:52:00 +01:00
parent 8f071fc159
commit 16e7c11203
No known key found for this signature in database
GPG key ID: 330E6E2BD4859D8F
7 changed files with 141 additions and 34 deletions

View file

@ -15,6 +15,8 @@
package blackfriday package blackfriday
import ( import (
"unicode"
"github.com/gohugoio/hugo/identity" "github.com/gohugoio/hugo/identity"
"github.com/gohugoio/hugo/markup/blackfriday/blackfriday_config" "github.com/gohugoio/hugo/markup/blackfriday/blackfriday_config"
"github.com/gohugoio/hugo/markup/converter" "github.com/gohugoio/hugo/markup/converter"
@ -61,7 +63,27 @@ type blackfridayConverter struct {
} }
func (c *blackfridayConverter) SanitizeAnchorName(s string) string { func (c *blackfridayConverter) SanitizeAnchorName(s string) string {
return blackfriday.SanitizedAnchorName(s) return SanitizedAnchorName(s)
}
// SanitizedAnchorName is how Blackfriday sanitizes anchor names.
// Implementation borrowed from https://github.com/russross/blackfriday/blob/a477dd1646916742841ed20379f941cfa6c5bb6f/block.go#L1464
func SanitizedAnchorName(text string) string {
var anchorName []rune
futureDash := false
for _, r := range text {
switch {
case unicode.IsLetter(r) || unicode.IsNumber(r):
if futureDash && len(anchorName) > 0 {
anchorName = append(anchorName, '-')
}
futureDash = false
anchorName = append(anchorName, unicode.ToLower(r))
default:
futureDash = true
}
}
return string(anchorName)
} }
func (c *blackfridayConverter) AnchorSuffix() string { func (c *blackfridayConverter) AnchorSuffix() string {

View file

@ -179,3 +179,45 @@ This is a footnote.[^1] And then some.
c.Assert(s, qt.Contains, "This is a footnote.<sup class=\"footnote-ref\" id=\"fnref:testid:1\"><a href=\"#fn:testid:1\">1</a></sup>") c.Assert(s, qt.Contains, "This is a footnote.<sup class=\"footnote-ref\" id=\"fnref:testid:1\"><a href=\"#fn:testid:1\">1</a></sup>")
c.Assert(s, qt.Contains, "<a class=\"footnote-return\" href=\"#fnref:testid:1\"><sup>[return]</sup></a>") c.Assert(s, qt.Contains, "<a class=\"footnote-return\" href=\"#fnref:testid:1\"><sup>[return]</sup></a>")
} }
// Tests borrowed from https://github.com/russross/blackfriday/blob/a925a152c144ea7de0f451eaf2f7db9e52fa005a/block_test.go#L1817
func TestSanitizedAnchorName(t *testing.T) {
tests := []struct {
text string
want string
}{
{
text: "This is a header",
want: "this-is-a-header",
},
{
text: "This is also a header",
want: "this-is-also-a-header",
},
{
text: "main.go",
want: "main-go",
},
{
text: "Article 123",
want: "article-123",
},
{
text: "<- Let's try this, shall we?",
want: "let-s-try-this-shall-we",
},
{
text: " ",
want: "",
},
{
text: "Hello, 世界",
want: "hello-世界",
},
}
for _, test := range tests {
if got := SanitizedAnchorName(test.text); got != test.want {
t.Errorf("SanitizedAnchorName(%q):\ngot %q\nwant %q", test.text, got, test.want)
}
}
}

View file

@ -19,6 +19,8 @@ import (
"unicode" "unicode"
"unicode/utf8" "unicode/utf8"
"github.com/gohugoio/hugo/markup/blackfriday"
"github.com/gohugoio/hugo/markup/goldmark/goldmark_config" "github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
"github.com/gohugoio/hugo/common/text" "github.com/gohugoio/hugo/common/text"
@ -30,34 +32,41 @@ import (
bp "github.com/gohugoio/hugo/bufferpool" bp "github.com/gohugoio/hugo/bufferpool"
) )
func sanitizeAnchorNameString(s string, asciiOnly bool) string { func sanitizeAnchorNameString(s string, idType string) string {
return string(sanitizeAnchorName([]byte(s), asciiOnly)) return string(sanitizeAnchorName([]byte(s), idType))
} }
func sanitizeAnchorName(b []byte, asciiOnly bool) []byte { func sanitizeAnchorName(b []byte, idType string) []byte {
return sanitizeAnchorNameWithHook(b, asciiOnly, nil) return sanitizeAnchorNameWithHook(b, idType, nil)
} }
func sanitizeAnchorNameWithHook(b []byte, asciiOnly bool, hook func(buf *bytes.Buffer)) []byte { func sanitizeAnchorNameWithHook(b []byte, idType string, hook func(buf *bytes.Buffer)) []byte {
buf := bp.GetBuffer() buf := bp.GetBuffer()
if asciiOnly { if idType == goldmark_config.AutoHeadingIDTypeBlackfriday {
// Normalize it to preserve accents if possible. // TODO(bep) make it more efficient.
b = text.RemoveAccents(b) buf.WriteString(blackfriday.SanitizedAnchorName(string(b)))
} } else {
asciiOnly := idType == goldmark_config.AutoHeadingIDTypeGitHubAscii
for len(b) > 0 { if asciiOnly {
r, size := utf8.DecodeRune(b) // Normalize it to preserve accents if possible.
switch { b = text.RemoveAccents(b)
case asciiOnly && size != 1:
case r == '-' || isSpace(r):
buf.WriteRune('-')
case isAlphaNumeric(r):
buf.WriteRune(unicode.ToLower(r))
default:
} }
b = b[size:] for len(b) > 0 {
r, size := utf8.DecodeRune(b)
switch {
case asciiOnly && size != 1:
case r == '-' || isSpace(r):
buf.WriteRune('-')
case isAlphaNumeric(r):
buf.WriteRune(unicode.ToLower(r))
default:
}
b = b[size:]
}
} }
if hook != nil { if hook != nil {
@ -83,19 +92,19 @@ func isSpace(r rune) bool {
var _ parser.IDs = (*idFactory)(nil) var _ parser.IDs = (*idFactory)(nil)
type idFactory struct { type idFactory struct {
asciiOnly bool idType string
vals map[string]struct{} vals map[string]struct{}
} }
func newIDFactory(idType string) *idFactory { func newIDFactory(idType string) *idFactory {
return &idFactory{ return &idFactory{
vals: make(map[string]struct{}), vals: make(map[string]struct{}),
asciiOnly: idType == goldmark_config.AutoHeadingIDTypeGitHubAscii, idType: idType,
} }
} }
func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte { func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
return sanitizeAnchorNameWithHook(value, ids.asciiOnly, func(buf *bytes.Buffer) { return sanitizeAnchorNameWithHook(value, ids.idType, func(buf *bytes.Buffer) {
if buf.Len() == 0 { if buf.Len() == 0 {
if kind == ast.KindHeading { if kind == ast.KindHeading {
buf.WriteString("heading") buf.WriteString("heading")

View file

@ -17,6 +17,8 @@ import (
"strings" "strings"
"testing" "testing"
"github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
qt "github.com/frankban/quicktest" qt "github.com/frankban/quicktest"
) )
@ -69,9 +71,9 @@ under_score
expect := expectlines[i] expect := expectlines[i]
c.Run(input, func(c *qt.C) { c.Run(input, func(c *qt.C) {
b := []byte(input) b := []byte(input)
got := string(sanitizeAnchorName(b, false)) got := string(sanitizeAnchorName(b, goldmark_config.AutoHeadingIDTypeGitHub))
c.Assert(got, qt.Equals, expect) c.Assert(got, qt.Equals, expect)
c.Assert(sanitizeAnchorNameString(input, false), qt.Equals, expect) c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub), qt.Equals, expect)
c.Assert(string(b), qt.Equals, input) c.Assert(string(b), qt.Equals, input)
}) })
} }
@ -80,16 +82,21 @@ under_score
func TestSanitizeAnchorNameAsciiOnly(t *testing.T) { func TestSanitizeAnchorNameAsciiOnly(t *testing.T) {
c := qt.New(t) c := qt.New(t)
c.Assert(sanitizeAnchorNameString("god is神真美好 good", true), qt.Equals, "god-is-good") c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "god-is-good")
c.Assert(sanitizeAnchorNameString("Resumé", true), qt.Equals, "resume") c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "resume")
} }
func TestSanitizeAnchorNameBlackfriday(t *testing.T) {
c := qt.New(t)
c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoHeadingIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we")
}
func BenchmarkSanitizeAnchorName(b *testing.B) { func BenchmarkSanitizeAnchorName(b *testing.B) {
input := []byte("God is good: 神真美好") input := []byte("God is good: 神真美好")
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
result := sanitizeAnchorName(input, false) result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHub)
if len(result) != 24 { if len(result) != 24 {
b.Fatalf("got %d", len(result)) b.Fatalf("got %d", len(result))
@ -101,7 +108,7 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
input := []byte("God is good: 神真美好") input := []byte("God is good: 神真美好")
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
result := sanitizeAnchorName(input, true) result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHubAscii)
if len(result) != 12 { if len(result) != 12 {
b.Fatalf("got %d", len(result)) b.Fatalf("got %d", len(result))
@ -109,11 +116,23 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
} }
} }
func BenchmarkSanitizeAnchorNameBlackfriday(b *testing.B) {
input := []byte("God is good: 神真美好")
b.ResetTimer()
for i := 0; i < b.N; i++ {
result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeBlackfriday)
if len(result) != 24 {
b.Fatalf("got %d", len(result))
}
}
}
func BenchmarkSanitizeAnchorNameString(b *testing.B) { func BenchmarkSanitizeAnchorNameString(b *testing.B) {
input := "God is good: 神真美好" input := "God is good: 神真美好"
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
result := sanitizeAnchorNameString(input, false) result := sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub)
if len(result) != 24 { if len(result) != 24 {
b.Fatalf("got %d", len(result)) b.Fatalf("got %d", len(result))
} }

View file

@ -29,7 +29,6 @@ import (
"github.com/gohugoio/hugo/hugofs" "github.com/gohugoio/hugo/hugofs"
"github.com/gohugoio/hugo/markup/converter" "github.com/gohugoio/hugo/markup/converter"
"github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
"github.com/gohugoio/hugo/markup/highlight" "github.com/gohugoio/hugo/markup/highlight"
"github.com/gohugoio/hugo/markup/tableofcontents" "github.com/gohugoio/hugo/markup/tableofcontents"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
@ -57,7 +56,7 @@ func (p provide) New(cfg converter.ProviderConfig) (converter.Provider, error) {
cfg: cfg, cfg: cfg,
md: md, md: md,
sanitizeAnchorName: func(s string) string { sanitizeAnchorName: func(s string) string {
return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDType == goldmark_config.AutoHeadingIDTypeGitHub) return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDType)
}, },
}, nil }, nil
}), nil }), nil

View file

@ -178,6 +178,21 @@ func TestConvertAutoIDAsciiOnly(t *testing.T) {
c.Assert(got, qt.Contains, "<h2 id=\"god-is-good-\">") c.Assert(got, qt.Contains, "<h2 id=\"god-is-good-\">")
} }
func TestConvertAutoIDBlackfriday(t *testing.T) {
c := qt.New(t)
content := `
## Let's try this, shall we?
`
mconf := markup_config.Default
mconf.Goldmark.Parser.AutoHeadingIDType = goldmark_config.AutoHeadingIDTypeBlackfriday
b := convert(c, mconf, content)
got := string(b.Bytes())
c.Assert(got, qt.Contains, "<h2 id=\"let-s-try-this-shall-we\">")
}
func TestCodeFence(t *testing.T) { func TestCodeFence(t *testing.T) {
c := qt.New(t) c := qt.New(t)

View file

@ -17,6 +17,7 @@ package goldmark_config
const ( const (
AutoHeadingIDTypeGitHub = "github" AutoHeadingIDTypeGitHub = "github"
AutoHeadingIDTypeGitHubAscii = "github-ascii" AutoHeadingIDTypeGitHubAscii = "github-ascii"
AutoHeadingIDTypeBlackfriday = "blackfriday"
) )
// DefaultConfig holds the default Goldmark configuration. // DefaultConfig holds the default Goldmark configuration.