markdown: Pass emoji codes to yuin/goldmark-emoji

Removes emoji code conversion from the page and shortcode parsers. Emoji
codes in markdown are now passed to Goldmark, where the goldmark-emoji
extension converts them to decimal numeric character references.

This disables emoji rendering for the alternate content formats: html,
asciidoc, org, pandoc, and rst.

Fixes #7332
Fixes #11587
Closes #11598
This commit is contained in:
Joe Mooring 2023-10-24 03:04:13 -07:00 committed by GitHub
parent de4e466036
commit 272484f8bf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 151 additions and 312 deletions

View file

@ -228,3 +228,7 @@ func (c ConfigLanguage) PaginatePath() string {
func (c ConfigLanguage) StaticDirs() []string { func (c ConfigLanguage) StaticDirs() []string {
return c.config.staticDirs() return c.config.staticDirs()
} }
func (c ConfigLanguage) EnableEmoji() bool {
return c.config.EnableEmoji
}

View file

@ -66,6 +66,7 @@ type AllProvider interface {
StaticDirs() []string StaticDirs() []string
IgnoredErrors() map[string]bool IgnoredErrors() map[string]bool
WorkingDir() string WorkingDir() string
EnableEmoji() bool
} }
// Provider provides the configuration settings for Hugo. // Provider provides the configuration settings for Hugo.

1
go.mod
View file

@ -64,6 +64,7 @@ require (
github.com/tdewolff/minify/v2 v2.12.9 github.com/tdewolff/minify/v2 v2.12.9
github.com/tdewolff/parse/v2 v2.6.8 github.com/tdewolff/parse/v2 v2.6.8
github.com/yuin/goldmark v1.5.6 github.com/yuin/goldmark v1.5.6
github.com/yuin/goldmark-emoji v1.0.2
go.uber.org/atomic v1.11.0 go.uber.org/atomic v1.11.0
go.uber.org/automaxprocs v1.5.3 go.uber.org/automaxprocs v1.5.3
gocloud.dev v0.34.0 gocloud.dev v0.34.0

3
go.sum
View file

@ -444,9 +444,12 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yuin/goldmark v1.3.7/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yuin/goldmark v1.5.6 h1:COmQAWTCcGetChm3Ig7G/t8AFAN00t+o8Mt4cf7JpwA= github.com/yuin/goldmark v1.5.6 h1:COmQAWTCcGetChm3Ig7G/t8AFAN00t+o8Mt4cf7JpwA=
github.com/yuin/goldmark v1.5.6/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark v1.5.6/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yuin/goldmark-emoji v1.0.2 h1:c/RgTShNgHTtc6xdz2KKI74jJr6rWi7FPgnP9GAsO5s=
github.com/yuin/goldmark-emoji v1.0.2/go.mod h1:RhP/RWpexdp+KHs7ghKnifRoIs/Bq4nDS7tRbCkOwKY=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=

View file

@ -13,13 +13,8 @@
package helpers package helpers
import ( import (
"math"
"reflect" "reflect"
"strings"
"testing" "testing"
"github.com/gohugoio/hugo/bufferpool"
"github.com/kyokomi/emoji/v2"
) )
func TestEmojiCustom(t *testing.T) { func TestEmojiCustom(t *testing.T) {
@ -68,76 +63,3 @@ func TestEmojiCustom(t *testing.T) {
} }
} }
// The Emoji benchmarks below are heavily skewed in Hugo's direction:
//
// Hugo have a byte slice, wants a byte slice and doesn't mind if the original is modified.
func BenchmarkEmojiKyokomiFprint(b *testing.B) {
f := func(in []byte) []byte {
buff := bufferpool.GetBuffer()
defer bufferpool.PutBuffer(buff)
emoji.Fprint(buff, string(in))
bc := make([]byte, buff.Len())
copy(bc, buff.Bytes())
return bc
}
doBenchmarkEmoji(b, f)
}
func BenchmarkEmojiKyokomiSprint(b *testing.B) {
f := func(in []byte) []byte {
return []byte(emoji.Sprint(string(in)))
}
doBenchmarkEmoji(b, f)
}
func BenchmarkHugoEmoji(b *testing.B) {
doBenchmarkEmoji(b, Emojify)
}
func doBenchmarkEmoji(b *testing.B, f func(in []byte) []byte) {
type input struct {
in []byte
expect []byte
}
data := []struct {
input string
expect string
}{
{"A :smile: a day", emoji.Sprint("A :smile: a day")},
{"A :smile: and a :beer: day keeps the doctor away", emoji.Sprint("A :smile: and a :beer: day keeps the doctor away")},
{"A :smile: a day and 10 " + strings.Repeat(":beer: ", 10), emoji.Sprint("A :smile: a day and 10 " + strings.Repeat(":beer: ", 10))},
{"No smiles today.", "No smiles today."},
{"No smiles for you or " + strings.Repeat("you ", 1000), "No smiles for you or " + strings.Repeat("you ", 1000)},
}
in := make([]input, b.N*len(data))
cnt := 0
for i := 0; i < b.N; i++ {
for _, this := range data {
in[cnt] = input{[]byte(this.input), []byte(this.expect)}
cnt++
}
}
b.ResetTimer()
cnt = 0
for i := 0; i < b.N; i++ {
for j := range data {
currIn := in[cnt]
cnt++
result := f(currIn.in)
// The Emoji implementations gives slightly different output.
diffLen := len(result) - len(currIn.expect)
diffLen = int(math.Abs(float64(diffLen)))
if diffLen > 30 {
b.Fatalf("[%d] emoji std, got \n%q but expected \n%q", j, result, currIn.expect)
}
}
}
}

View file

@ -150,7 +150,7 @@ func (m *pageMap) newPageFromContentNode(n *contentNode, parentBucket *pagesMapB
parseResult, err := pageparser.Parse( parseResult, err := pageparser.Parse(
r, r,
pageparser.Config{EnableEmoji: s.conf.EnableEmoji}, pageparser.Config{},
) )
if err != nil { if err != nil {
return nil, err return nil, err

View file

@ -794,11 +794,7 @@ Loop:
rn.AddShortcode(currShortcode) rn.AddShortcode(currShortcode)
case it.Type == pageparser.TypeEmoji: case it.Type == pageparser.TypeEmoji:
if emoji := helpers.Emoji(it.ValStr(result.Input())); emoji != nil {
rn.AddReplacement(emoji, it)
} else {
rn.AddBytes(it) rn.AddBytes(it)
}
case it.IsEOF(): case it.IsEOF():
break Loop break Loop
case it.IsError(): case it.IsError():

View file

@ -1234,60 +1234,6 @@ func TestChompBOM(t *testing.T) {
checkPageTitle(t, p, "Simple") checkPageTitle(t, p, "Simple")
} }
func TestPageWithEmoji(t *testing.T) {
for _, enableEmoji := range []bool{true, false} {
v := config.New()
v.Set("enableEmoji", enableEmoji)
b := newTestSitesBuilder(t).WithViper(v)
b.WithContent("page-emoji.md", `---
title: "Hugo Smile"
---
This is a :smile:.
<!--more-->
Another :smile: This is :not: :an: :emoji:.
O :christmas_tree:
Write me an :e-mail: or :email:?
Too many colons: :: ::: :::: :?: :!: :.:
If you dislike this video, you can hit that :-1: button :stuck_out_tongue_winking_eye:,
but if you like it, hit :+1: and get subscribed!
`)
b.CreateSites().Build(BuildCfg{})
if enableEmoji {
b.AssertFileContent("public/page-emoji/index.html",
"This is a 😄",
"Another 😄",
"This is :not: :an: :emoji:.",
"O 🎄",
"Write me an 📧 or ✉️?",
"Too many colons: :: ::: :::: :?: :!: :.:",
"you can hit that 👎 button 😜,",
"hit 👍 and get subscribed!",
)
} else {
b.AssertFileContent("public/page-emoji/index.html",
"This is a :smile:",
"Another :smile:",
"This is :not: :an: :emoji:.",
"O :christmas_tree:",
"Write me an :e-mail: or :email:?",
"Too many colons: :: ::: :::: :?: :!: :.:",
"you can hit that :-1: button :stuck_out_tongue_winking_eye:,",
"hit :+1: and get subscribed!",
)
}
}
}
func TestPageHTMLContent(t *testing.T) { func TestPageHTMLContent(t *testing.T) {
b := newTestSitesBuilder(t) b := newTestSitesBuilder(t)
b.WithSimpleConfigFile() b.WithSimpleConfigFile()

View file

@ -26,8 +26,6 @@ import (
"strings" "strings"
"sync" "sync"
"github.com/gohugoio/hugo/helpers"
"errors" "errors"
"github.com/gohugoio/hugo/common/herrors" "github.com/gohugoio/hugo/common/herrors"
@ -632,14 +630,7 @@ Loop:
case currItem.IsText(): case currItem.IsText():
sc.inner = append(sc.inner, currItem.ValStr(source)) sc.inner = append(sc.inner, currItem.ValStr(source))
case currItem.Type == pageparser.TypeEmoji: case currItem.Type == pageparser.TypeEmoji:
// TODO(bep) avoid the duplication of these "text cases", to prevent sc.inner = append(sc.inner, currItem.ValStr(source))
// more of #6504 in the future.
val := currItem.ValStr(source)
if emoji := helpers.Emoji(val); emoji != nil {
sc.inner = append(sc.inner, string(emoji))
} else {
sc.inner = append(sc.inner, val)
}
case currItem.IsShortcodeName(): case currItem.IsShortcodeName():
sc.name = currItem.ValStr(source) sc.name = currItem.ValStr(source)

View file

@ -754,33 +754,6 @@ title: "Hugo Rocks!"
) )
} }
// https://github.com/gohugoio/hugo/issues/6504
func TestShortcodeEmoji(t *testing.T) {
t.Parallel()
v := config.New()
v.Set("enableEmoji", true)
builder := newTestSitesBuilder(t).WithViper(v)
builder.WithContent("page.md", `---
title: "Hugo Rocks!"
---
# doc
{{< event >}}10:30-11:00 My :smile: Event {{< /event >}}
`).WithTemplatesAdded(
"layouts/shortcodes/event.html", `<div>{{ "\u29BE" }} {{ .Inner }} </div>`)
builder.Build(BuildCfg{})
builder.AssertFileContent("public/page/index.html",
"⦾ 10:30-11:00 My 😄 Event",
)
}
func TestShortcodeParams(t *testing.T) { func TestShortcodeParams(t *testing.T) {
t.Parallel() t.Parallel()
c := qt.New(t) c := qt.New(t)

View file

@ -94,7 +94,6 @@ type siteConfigHolder struct {
taxonomiesConfig taxonomiesConfig taxonomiesConfig taxonomiesConfig
timeout time.Duration timeout time.Duration
hasCJKLanguage bool hasCJKLanguage bool
enableEmoji bool
} }
// Lazily loaded site dependencies. // Lazily loaded site dependencies.

View file

@ -28,6 +28,7 @@ import (
"github.com/gohugoio/hugo/markup/converter" "github.com/gohugoio/hugo/markup/converter"
"github.com/gohugoio/hugo/markup/tableofcontents" "github.com/gohugoio/hugo/markup/tableofcontents"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
emoji "github.com/yuin/goldmark-emoji"
"github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension" "github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/parser"
@ -149,6 +150,10 @@ func newMarkdown(pcfg converter.ProviderConfig) goldmark.Markdown {
extensions = append(extensions, c) extensions = append(extensions, c)
} }
if pcfg.Conf.EnableEmoji() {
extensions = append(extensions, emoji.Emoji)
}
if cfg.Parser.AutoHeadingID { if cfg.Parser.AutoHeadingID {
parserOptions = append(parserOptions, parser.WithAutoHeadingID()) parserOptions = append(parserOptions, parser.WithAutoHeadingID())
} }
@ -156,6 +161,7 @@ func newMarkdown(pcfg converter.ProviderConfig) goldmark.Markdown {
if cfg.Parser.Attribute.Title { if cfg.Parser.Attribute.Title {
parserOptions = append(parserOptions, parser.WithAttribute()) parserOptions = append(parserOptions, parser.WithAttribute())
} }
if cfg.Parser.Attribute.Block { if cfg.Parser.Attribute.Block {
extensions = append(extensions, attributes.New()) extensions = append(extensions, attributes.New())
} }

View file

@ -578,3 +578,114 @@ a <!-- b --> c
"<li>This is a list item <!-- Comment: an innocent-looking comment --></li>", "<li>This is a list item <!-- Comment: an innocent-looking comment --></li>",
) )
} }
// Issue #7332
// Issue #11587
func TestGoldmarkEmojiExtension(t *testing.T) {
t.Parallel()
files := `
-- config.toml --
enableEmoji = true
-- content/p1.md --
---
title: "p1"
---
~~~text
:x:
~~~
{{% include "/p2" %}}
{{< sc1 >}}:smiley:{{< /sc1 >}}
{{< sc2 >}}:+1:{{< /sc2 >}}
{{% sc3 %}}:-1:{{% /sc3 %}}
-- content/p2.md --
---
title: "p2"
---
:heavy_check_mark:
-- layouts/shortcodes/include.html --
{{ $p := site.GetPage (.Get 0) }}
{{ $p.RenderShortcodes }}
-- layouts/shortcodes/sc1.html --
sc1_begin|{{ .Inner }}|sc1_end
-- layouts/shortcodes/sc2.html --
sc2_begin|{{ .Inner | .Page.RenderString }}|sc2_end
-- layouts/shortcodes/sc3.html --
sc3_begin|{{ .Inner }}|sc3_end
-- layouts/_default/single.html --
{{ .Content }}
`
b := hugolib.NewIntegrationTestBuilder(
hugolib.IntegrationTestConfig{
T: t,
TxtarString: files,
},
).Build()
b.AssertFileContentExact("public/p1/index.html",
// Issue #7332
"<span>:x:\n</span>",
// Issue #11587
"<p>&#x2714;&#xfe0f;</p>",
// Should not be converted to emoji
"sc1_begin|:smiley:|sc1_end",
// Should be converted to emoji
"sc2_begin|&#x1f44d;|sc2_end",
// Should be converted to emoji
"sc3_begin|&#x1f44e;|sc3_end",
)
}
func TestEmojiDisabled(t *testing.T) {
t.Parallel()
files := `
-- config.toml --
enableEmoji = false
-- content/p1.md --
---
title: "p1"
---
:x:
-- layouts/_default/single.html --
{{ .Content }}
`
b := hugolib.NewIntegrationTestBuilder(
hugolib.IntegrationTestConfig{
T: t,
TxtarString: files,
},
).Build()
b.AssertFileContentExact("public/p1/index.html", "<p>:x:</p>")
}
func TestEmojiDefaultConfig(t *testing.T) {
t.Parallel()
files := `
-- content/p1.md --
---
title: "p1"
---
:x:
-- layouts/_default/single.html --
{{ .Content }}
`
b := hugolib.NewIntegrationTestBuilder(
hugolib.IntegrationTestConfig{
T: t,
TxtarString: files,
},
).Build()
b.AssertFileContentExact("public/p1/index.html", "<p>:x:</p>")
}

View file

@ -61,9 +61,7 @@ func (l *pageLexer) Input() []byte {
return l.input return l.input
} }
type Config struct { type Config struct{}
EnableEmoji bool
}
// note: the input position here is normally 0 (start), but // note: the input position here is normally 0 (start), but
// can be set if position of first shortcode is known // can be set if position of first shortcode is known
@ -103,8 +101,6 @@ var (
delimOrg = []byte("#+") delimOrg = []byte("#+")
htmlCommentStart = []byte("<!--") htmlCommentStart = []byte("<!--")
htmlCommentEnd = []byte("-->") htmlCommentEnd = []byte("-->")
emojiDelim = byte(':')
) )
func (l *pageLexer) next() rune { func (l *pageLexer) next() rune {
@ -276,34 +272,6 @@ func (l *pageLexer) consumeSpace() {
} }
} }
// lex a string starting at ":"
func lexEmoji(l *pageLexer) stateFunc {
pos := l.pos + 1
valid := false
for i := pos; i < len(l.input); i++ {
if i > pos && l.input[i] == emojiDelim {
pos = i + 1
valid = true
break
}
r, _ := utf8.DecodeRune(l.input[i:])
if !(isAlphaNumericOrHyphen(r) || r == '+') {
break
}
}
if valid {
l.pos = pos
l.emit(TypeEmoji)
} else {
l.pos++
l.emit(tText)
}
return lexMainSection
}
type sectionHandlers struct { type sectionHandlers struct {
l *pageLexer l *pageLexer
@ -399,20 +367,6 @@ func createSectionHandlers(l *pageLexer) *sectionHandlers {
handlers := []*sectionHandler{shortCodeHandler, summaryDividerHandler} handlers := []*sectionHandler{shortCodeHandler, summaryDividerHandler}
if l.cfg.EnableEmoji {
emojiHandler := &sectionHandler{
l: l,
skipFunc: func(l *pageLexer) int {
return l.indexByte(emojiDelim)
},
lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) {
return lexEmoji, true
},
}
handlers = append(handlers, emojiHandler)
}
return &sectionHandlers{ return &sectionHandlers{
l: l, l: l,
handlers: handlers, handlers: handlers,

View file

@ -1,42 +0,0 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pageparser
import (
"testing"
qt "github.com/frankban/quicktest"
)
func TestMain(t *testing.T) {
t.Parallel()
c := qt.New(t)
mainTests := []lexerTest{
{"emoji #1", "Some text with :emoji:", []typeText{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), tstEOF}},
{"emoji #2", "Some text with :emoji: and some text.", []typeText{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), nti(tText, " and some text."), tstEOF}},
{"looks like an emoji #1", "Some text and then :emoji", []typeText{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, "emoji"), tstEOF}},
{"looks like an emoji #2", "Some text and then ::", []typeText{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, ":"), tstEOF}},
{"looks like an emoji #3", ":Some :text", []typeText{nti(tText, ":"), nti(tText, "Some "), nti(tText, ":"), nti(tText, "text"), tstEOF}},
}
for i, test := range mainTests {
items := collectWithConfig([]byte(test.input), false, lexMainSection, Config{EnableEmoji: true})
if !equal(test.input, items, test.items) {
got := itemsToString(items, []byte(test.input))
expected := testItemsToString(test.items)
c.Assert(got, qt.Equals, expected, qt.Commentf("Test %d: %s", i, test.name))
}
}
}

View file

@ -38,33 +38,7 @@ This is some summary. This is some summary. This is some summary. This is some s
` `
input := []byte(start + strings.Repeat(strings.Repeat("this is text", 30)+"{{< myshortcode >}}This is some inner content.{{< /myshortcode >}}", 10)) input := []byte(start + strings.Repeat(strings.Repeat("this is text", 30)+"{{< myshortcode >}}This is some inner content.{{< /myshortcode >}}", 10))
cfg := Config{EnableEmoji: false} cfg := Config{}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if _, err := parseBytes(input, cfg, lexIntroSection); err != nil {
b.Fatal(err)
}
}
}
func BenchmarkParseWithEmoji(b *testing.B) {
start := `
---
title: "Front Matters"
description: "It really does"
---
This is some summary. This is some summary. This is some summary. This is some summary.
<!--more-->
`
input := []byte(start + strings.Repeat("this is not emoji: ", 50) + strings.Repeat("some text ", 70) + strings.Repeat("this is not: ", 50) + strings.Repeat("but this is a :smile: ", 3) + strings.Repeat("some text ", 70))
cfg := Config{EnableEmoji: true}
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {