mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-21 20:46:30 -05:00
Move the emoji parsing to pageparser
This avoids double parsing the page content when `enableEmoji=true`. This commit also adds some general improvements to the parser, making it in general much faster: ```bash benchmark old ns/op new ns/op delta BenchmarkShortcodeLexer-4 90258 101730 +12.71% BenchmarkParse-4 148940 15037 -89.90% benchmark old allocs new allocs delta BenchmarkShortcodeLexer-4 456 700 +53.51% BenchmarkParse-4 28 33 +17.86% benchmark old bytes new bytes delta BenchmarkShortcodeLexer-4 69875 81014 +15.94% BenchmarkParse-4 8128 8304 +2.17% ``` Running some site benchmarks with Emoji support turned on: ```bash benchmark old ns/op new ns/op delta BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 924556797 818115620 -11.51% benchmark old allocs new allocs delta BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 4112613 4133787 +0.51% benchmark old bytes new bytes delta BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 426982864 424363832 -0.61% ``` Fixes #5534
This commit is contained in:
parent
a8853f1c5a
commit
9cd54cab20
13 changed files with 388 additions and 71 deletions
|
@ -215,7 +215,7 @@ type parsedFile struct {
|
||||||
func parseContentFile(r io.Reader) (parsedFile, error) {
|
func parseContentFile(r io.Reader) (parsedFile, error) {
|
||||||
var pf parsedFile
|
var pf parsedFile
|
||||||
|
|
||||||
psr, err := pageparser.Parse(r)
|
psr, err := pageparser.Parse(r, pageparser.Config{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return pf, err
|
return pf, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,12 @@ var (
|
||||||
emojiMaxSize int
|
emojiMaxSize int
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Emoji returns the emojy given a key, e.g. ":smile:", nil if not found.
|
||||||
|
func Emoji(key string) []byte {
|
||||||
|
emojiInit.Do(initEmoji)
|
||||||
|
return emojis[key]
|
||||||
|
}
|
||||||
|
|
||||||
// Emojify "emojifies" the input source.
|
// Emojify "emojifies" the input source.
|
||||||
// Note that the input byte slice will be modified if needed.
|
// Note that the input byte slice will be modified if needed.
|
||||||
// See http://www.emoji-cheat-sheet.com/
|
// See http://www.emoji-cheat-sheet.com/
|
||||||
|
|
|
@ -17,6 +17,8 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"io"
|
"io"
|
||||||
|
|
||||||
|
"github.com/gohugoio/hugo/helpers"
|
||||||
|
|
||||||
errors "github.com/pkg/errors"
|
errors "github.com/pkg/errors"
|
||||||
|
|
||||||
bp "github.com/gohugoio/hugo/bufferpool"
|
bp "github.com/gohugoio/hugo/bufferpool"
|
||||||
|
@ -149,6 +151,12 @@ Loop:
|
||||||
result.WriteString(placeHolder)
|
result.WriteString(placeHolder)
|
||||||
ordinal++
|
ordinal++
|
||||||
s.shortcodes.Add(placeHolder, currShortcode)
|
s.shortcodes.Add(placeHolder, currShortcode)
|
||||||
|
case it.Type == pageparser.TypeEmoji:
|
||||||
|
if emoji := helpers.Emoji(it.ValStr()); emoji != nil {
|
||||||
|
result.Write(emoji)
|
||||||
|
} else {
|
||||||
|
result.Write(it.Val)
|
||||||
|
}
|
||||||
case it.IsEOF():
|
case it.IsEOF():
|
||||||
break Loop
|
break Loop
|
||||||
case it.IsError():
|
case it.IsError():
|
||||||
|
@ -170,7 +178,10 @@ Loop:
|
||||||
|
|
||||||
func (p *Page) parse(reader io.Reader) error {
|
func (p *Page) parse(reader io.Reader) error {
|
||||||
|
|
||||||
parseResult, err := pageparser.Parse(reader)
|
parseResult, err := pageparser.Parse(
|
||||||
|
reader,
|
||||||
|
pageparser.Config{EnableEmoji: p.s.Cfg.GetBool("enableEmoji")},
|
||||||
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -1497,6 +1497,45 @@ func TestChompBOM(t *testing.T) {
|
||||||
checkPageTitle(t, p, "Simple")
|
checkPageTitle(t, p, "Simple")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPageWithEmoji(t *testing.T) {
|
||||||
|
for _, enableEmoji := range []bool{true, false} {
|
||||||
|
v := viper.New()
|
||||||
|
v.Set("enableEmoji", enableEmoji)
|
||||||
|
b := newTestSitesBuilder(t)
|
||||||
|
b.WithViper(v)
|
||||||
|
|
||||||
|
b.WithSimpleConfigFile()
|
||||||
|
|
||||||
|
b.WithContent("page-emoji.md", `---
|
||||||
|
title: "Hugo Smile"
|
||||||
|
---
|
||||||
|
This is a :smile:.
|
||||||
|
<!--more-->
|
||||||
|
|
||||||
|
Another :smile: This is :not: an emoji.
|
||||||
|
|
||||||
|
`)
|
||||||
|
|
||||||
|
b.CreateSites().Build(BuildCfg{})
|
||||||
|
|
||||||
|
if enableEmoji {
|
||||||
|
b.AssertFileContent("public/page-emoji/index.html",
|
||||||
|
"This is a 😄",
|
||||||
|
"Another 😄",
|
||||||
|
"This is :not: an emoji",
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
b.AssertFileContent("public/page-emoji/index.html",
|
||||||
|
"This is a :smile:",
|
||||||
|
"Another :smile:",
|
||||||
|
"This is :not: an emoji",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// https://github.com/gohugoio/hugo/issues/5381
|
// https://github.com/gohugoio/hugo/issues/5381
|
||||||
func TestPageManualSummary(t *testing.T) {
|
func TestPageManualSummary(t *testing.T) {
|
||||||
b := newTestSitesBuilder(t)
|
b := newTestSitesBuilder(t)
|
||||||
|
|
|
@ -272,10 +272,6 @@ func (c *contentHandlers) handlePageContent() contentHandler {
|
||||||
|
|
||||||
p := ctx.currentPage
|
p := ctx.currentPage
|
||||||
|
|
||||||
if c.s.Cfg.GetBool("enableEmoji") {
|
|
||||||
p.workContent = helpers.Emojify(p.workContent)
|
|
||||||
}
|
|
||||||
|
|
||||||
p.workContent = p.renderContent(p.workContent)
|
p.workContent = p.renderContent(p.workContent)
|
||||||
|
|
||||||
tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.workContent)
|
tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.workContent)
|
||||||
|
|
|
@ -177,6 +177,16 @@ type shortcode struct {
|
||||||
pos int // the position in bytes in the source file
|
pos int // the position in bytes in the source file
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s shortcode) innerString() string {
|
||||||
|
var sb strings.Builder
|
||||||
|
|
||||||
|
for _, inner := range s.inner {
|
||||||
|
sb.WriteString(inner.(string))
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
func (sc shortcode) String() string {
|
func (sc shortcode) String() string {
|
||||||
// for testing (mostly), so any change here will break tests!
|
// for testing (mostly), so any change here will break tests!
|
||||||
var params interface{}
|
var params interface{}
|
||||||
|
@ -363,7 +373,7 @@ func renderShortcode(
|
||||||
if sc.isInline {
|
if sc.isInline {
|
||||||
templName := path.Join("_inline_shortcode", p.Path(), sc.name)
|
templName := path.Join("_inline_shortcode", p.Path(), sc.name)
|
||||||
if sc.isClosing {
|
if sc.isClosing {
|
||||||
templStr := sc.inner[0].(string)
|
templStr := sc.innerString()
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
tmpl, err = p.s.TextTmpl.Parse(templName, templStr)
|
tmpl, err = p.s.TextTmpl.Parse(templName, templStr)
|
||||||
|
|
|
@ -113,6 +113,7 @@ const (
|
||||||
TypeFrontMatterTOML
|
TypeFrontMatterTOML
|
||||||
TypeFrontMatterJSON
|
TypeFrontMatterJSON
|
||||||
TypeFrontMatterORG
|
TypeFrontMatterORG
|
||||||
|
TypeEmoji
|
||||||
TypeIgnore // // The BOM Unicode byte order marker and possibly others
|
TypeIgnore // // The BOM Unicode byte order marker and possibly others
|
||||||
|
|
||||||
// shortcode items
|
// shortcode items
|
||||||
|
|
|
@ -37,6 +37,12 @@ type pageLexer struct {
|
||||||
start int // item start position
|
start int // item start position
|
||||||
width int // width of last element
|
width int // width of last element
|
||||||
|
|
||||||
|
// Contains lexers for shortcodes and other main section
|
||||||
|
// elements.
|
||||||
|
sectionHandlers *sectionHandlers
|
||||||
|
|
||||||
|
cfg Config
|
||||||
|
|
||||||
// The summary divider to look for.
|
// The summary divider to look for.
|
||||||
summaryDivider []byte
|
summaryDivider []byte
|
||||||
// Set when we have parsed any summary divider
|
// Set when we have parsed any summary divider
|
||||||
|
@ -60,13 +66,17 @@ func (l *pageLexer) Input() []byte {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Config struct {
|
||||||
|
EnableEmoji bool
|
||||||
|
}
|
||||||
|
|
||||||
// note: the input position here is normally 0 (start), but
|
// note: the input position here is normally 0 (start), but
|
||||||
// can be set if position of first shortcode is known
|
// can be set if position of first shortcode is known
|
||||||
func newPageLexer(input []byte, inputPosition int, stateStart stateFunc) *pageLexer {
|
func newPageLexer(input []byte, stateStart stateFunc, cfg Config) *pageLexer {
|
||||||
lexer := &pageLexer{
|
lexer := &pageLexer{
|
||||||
input: input,
|
input: input,
|
||||||
pos: inputPosition,
|
|
||||||
stateStart: stateStart,
|
stateStart: stateStart,
|
||||||
|
cfg: cfg,
|
||||||
lexerShortcodeState: lexerShortcodeState{
|
lexerShortcodeState: lexerShortcodeState{
|
||||||
currLeftDelimItem: tLeftDelimScNoMarkup,
|
currLeftDelimItem: tLeftDelimScNoMarkup,
|
||||||
currRightDelimItem: tRightDelimScNoMarkup,
|
currRightDelimItem: tRightDelimScNoMarkup,
|
||||||
|
@ -75,6 +85,8 @@ func newPageLexer(input []byte, inputPosition int, stateStart stateFunc) *pageLe
|
||||||
items: make([]Item, 0, 5),
|
items: make([]Item, 0, 5),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lexer.sectionHandlers = createSectionHandlers(lexer)
|
||||||
|
|
||||||
return lexer
|
return lexer
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,6 +112,8 @@ var (
|
||||||
delimOrg = []byte("#+")
|
delimOrg = []byte("#+")
|
||||||
htmlCommentStart = []byte("<!--")
|
htmlCommentStart = []byte("<!--")
|
||||||
htmlCommentEnd = []byte("-->")
|
htmlCommentEnd = []byte("-->")
|
||||||
|
|
||||||
|
emojiDelim = byte(':')
|
||||||
)
|
)
|
||||||
|
|
||||||
func (l *pageLexer) next() rune {
|
func (l *pageLexer) next() rune {
|
||||||
|
@ -132,6 +146,10 @@ func (l *pageLexer) emit(t ItemType) {
|
||||||
l.start = l.pos
|
l.start = l.pos
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (l *pageLexer) isEOF() bool {
|
||||||
|
return l.pos >= len(l.input)
|
||||||
|
}
|
||||||
|
|
||||||
// special case, do not send '\\' back to client
|
// special case, do not send '\\' back to client
|
||||||
func (l *pageLexer) ignoreEscapesAndEmit(t ItemType) {
|
func (l *pageLexer) ignoreEscapesAndEmit(t ItemType) {
|
||||||
val := bytes.Map(func(r rune) rune {
|
val := bytes.Map(func(r rune) rune {
|
||||||
|
@ -193,30 +211,80 @@ func (l *pageLexer) consumeSpace() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func lexMainSection(l *pageLexer) stateFunc {
|
// lex a string starting at ":"
|
||||||
if l.isInHTMLComment {
|
func lexEmoji(l *pageLexer) stateFunc {
|
||||||
return lexEndFromtMatterHTMLComment
|
pos := l.pos + 1
|
||||||
}
|
valid := false
|
||||||
|
|
||||||
// Fast forward as far as possible.
|
for i := pos; i < len(l.input); i++ {
|
||||||
var l1, l2 int
|
if i > pos && l.input[i] == emojiDelim {
|
||||||
|
pos = i + 1
|
||||||
if !l.summaryDividerChecked && l.summaryDivider != nil {
|
valid = true
|
||||||
l1 = l.index(l.summaryDivider)
|
break
|
||||||
if l1 == -1 {
|
}
|
||||||
l.summaryDividerChecked = true
|
r, _ := utf8.DecodeRune(l.input[i:])
|
||||||
|
if !isAlphaNumeric(r) {
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
l2 = l.index(leftDelimSc)
|
if valid {
|
||||||
skip := minIndex(l1, l2)
|
l.pos = pos
|
||||||
|
l.emit(TypeEmoji)
|
||||||
if skip > 0 {
|
} else {
|
||||||
l.pos += skip
|
l.pos++
|
||||||
|
l.emit(tText)
|
||||||
}
|
}
|
||||||
|
|
||||||
for {
|
return lexMainSection
|
||||||
if l.isShortCodeStart() {
|
}
|
||||||
|
|
||||||
|
type sectionHandlers struct {
|
||||||
|
l *pageLexer
|
||||||
|
|
||||||
|
// Set when none of the sections are found so we
|
||||||
|
// can safely stop looking and skip to the end.
|
||||||
|
skipAll bool
|
||||||
|
|
||||||
|
handlers []*sectionHandler
|
||||||
|
skipIndexes []int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *sectionHandlers) skip() int {
|
||||||
|
if s.skipAll {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
s.skipIndexes = s.skipIndexes[:0]
|
||||||
|
var shouldSkip bool
|
||||||
|
for _, skipper := range s.handlers {
|
||||||
|
idx := skipper.skip()
|
||||||
|
if idx != -1 {
|
||||||
|
shouldSkip = true
|
||||||
|
s.skipIndexes = append(s.skipIndexes, idx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !shouldSkip {
|
||||||
|
s.skipAll = true
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
return minIndex(s.skipIndexes...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func createSectionHandlers(l *pageLexer) *sectionHandlers {
|
||||||
|
|
||||||
|
shortCodeHandler := §ionHandler{
|
||||||
|
l: l,
|
||||||
|
skipFunc: func(l *pageLexer) int {
|
||||||
|
return l.index(leftDelimSc)
|
||||||
|
},
|
||||||
|
lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) {
|
||||||
|
if !l.isShortCodeStart() {
|
||||||
|
return origin, false
|
||||||
|
}
|
||||||
|
|
||||||
if l.isInline {
|
if l.isInline {
|
||||||
// If we're inside an inline shortcode, the only valid shortcode markup is
|
// If we're inside an inline shortcode, the only valid shortcode markup is
|
||||||
// the markup which closes it.
|
// the markup which closes it.
|
||||||
|
@ -225,14 +293,11 @@ func lexMainSection(l *pageLexer) stateFunc {
|
||||||
if end != len(l.input)-1 {
|
if end != len(l.input)-1 {
|
||||||
b = bytes.TrimSpace(b[end+1:])
|
b = bytes.TrimSpace(b[end+1:])
|
||||||
if end == -1 || !bytes.HasPrefix(b, []byte(l.currShortcodeName+" ")) {
|
if end == -1 || !bytes.HasPrefix(b, []byte(l.currShortcodeName+" ")) {
|
||||||
return l.errorf("inline shortcodes do not support nesting")
|
return l.errorf("inline shortcodes do not support nesting"), true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if l.pos > l.start {
|
|
||||||
l.emit(tText)
|
|
||||||
}
|
|
||||||
if l.hasPrefix(leftDelimScWithMarkup) {
|
if l.hasPrefix(leftDelimScWithMarkup) {
|
||||||
l.currLeftDelimItem = tLeftDelimScWithMarkup
|
l.currLeftDelimItem = tLeftDelimScWithMarkup
|
||||||
l.currRightDelimItem = tRightDelimScWithMarkup
|
l.currRightDelimItem = tRightDelimScWithMarkup
|
||||||
|
@ -240,32 +305,139 @@ func lexMainSection(l *pageLexer) stateFunc {
|
||||||
l.currLeftDelimItem = tLeftDelimScNoMarkup
|
l.currLeftDelimItem = tLeftDelimScNoMarkup
|
||||||
l.currRightDelimItem = tRightDelimScNoMarkup
|
l.currRightDelimItem = tRightDelimScNoMarkup
|
||||||
}
|
}
|
||||||
return lexShortcodeLeftDelim
|
|
||||||
}
|
|
||||||
|
|
||||||
if !l.summaryDividerChecked && l.summaryDivider != nil {
|
|
||||||
if l.hasPrefix(l.summaryDivider) {
|
|
||||||
if l.pos > l.start {
|
|
||||||
l.emit(tText)
|
|
||||||
}
|
|
||||||
l.summaryDividerChecked = true
|
|
||||||
l.pos += len(l.summaryDivider)
|
|
||||||
// This makes it a little easier to reason about later.
|
|
||||||
l.consumeSpace()
|
|
||||||
l.emit(TypeLeadSummaryDivider)
|
|
||||||
|
|
||||||
// We have already moved to the next.
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
r := l.next()
|
|
||||||
if r == eof {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
|
return lexShortcodeLeftDelim, true
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
summaryDividerHandler := §ionHandler{
|
||||||
|
l: l,
|
||||||
|
skipFunc: func(l *pageLexer) int {
|
||||||
|
if l.summaryDividerChecked || l.summaryDivider == nil {
|
||||||
|
return -1
|
||||||
|
|
||||||
|
}
|
||||||
|
return l.index(l.summaryDivider)
|
||||||
|
},
|
||||||
|
lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) {
|
||||||
|
if !l.hasPrefix(l.summaryDivider) {
|
||||||
|
return origin, false
|
||||||
|
}
|
||||||
|
|
||||||
|
l.summaryDividerChecked = true
|
||||||
|
l.pos += len(l.summaryDivider)
|
||||||
|
// This makes it a little easier to reason about later.
|
||||||
|
l.consumeSpace()
|
||||||
|
l.emit(TypeLeadSummaryDivider)
|
||||||
|
|
||||||
|
return origin, true
|
||||||
|
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
handlers := []*sectionHandler{shortCodeHandler, summaryDividerHandler}
|
||||||
|
|
||||||
|
if l.cfg.EnableEmoji {
|
||||||
|
emojiHandler := §ionHandler{
|
||||||
|
l: l,
|
||||||
|
skipFunc: func(l *pageLexer) int {
|
||||||
|
return l.indexByte(emojiDelim)
|
||||||
|
},
|
||||||
|
lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) {
|
||||||
|
return lexEmoji, true
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
handlers = append(handlers, emojiHandler)
|
||||||
|
}
|
||||||
|
|
||||||
|
return §ionHandlers{
|
||||||
|
l: l,
|
||||||
|
handlers: handlers,
|
||||||
|
skipIndexes: make([]int, len(handlers)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *sectionHandlers) lex(origin stateFunc) stateFunc {
|
||||||
|
if s.skipAll {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.l.pos > s.l.start {
|
||||||
|
s.l.emit(tText)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, handler := range s.handlers {
|
||||||
|
if handler.skipAll {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
next, handled := handler.lexFunc(origin, handler.l)
|
||||||
|
if next == nil || handled {
|
||||||
|
return next
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not handled by the above.
|
||||||
|
s.l.pos++
|
||||||
|
|
||||||
|
return origin
|
||||||
|
}
|
||||||
|
|
||||||
|
type sectionHandler struct {
|
||||||
|
l *pageLexer
|
||||||
|
|
||||||
|
// No more sections of this type.
|
||||||
|
skipAll bool
|
||||||
|
|
||||||
|
// Returns the index of the next match, -1 if none found.
|
||||||
|
skipFunc func(l *pageLexer) int
|
||||||
|
|
||||||
|
// Lex lexes the current section and returns the next state func and
|
||||||
|
// a bool telling if this section was handled.
|
||||||
|
// Note that returning nil as the next state will terminate the
|
||||||
|
// lexer.
|
||||||
|
lexFunc func(origin stateFunc, l *pageLexer) (stateFunc, bool)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *sectionHandler) skip() int {
|
||||||
|
if s.skipAll {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
idx := s.skipFunc(s.l)
|
||||||
|
if idx == -1 {
|
||||||
|
s.skipAll = true
|
||||||
|
}
|
||||||
|
return idx
|
||||||
|
}
|
||||||
|
|
||||||
|
func lexMainSection(l *pageLexer) stateFunc {
|
||||||
|
|
||||||
|
if l.isEOF() {
|
||||||
|
return lexDone
|
||||||
|
}
|
||||||
|
|
||||||
|
if l.isInHTMLComment {
|
||||||
|
return lexEndFromtMatterHTMLComment
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fast forward as far as possible.
|
||||||
|
skip := l.sectionHandlers.skip()
|
||||||
|
|
||||||
|
if skip == -1 {
|
||||||
|
l.pos = len(l.input)
|
||||||
|
return lexDone
|
||||||
|
} else if skip > 0 {
|
||||||
|
l.pos += skip
|
||||||
|
}
|
||||||
|
|
||||||
|
next := l.sectionHandlers.lex(lexMainSection)
|
||||||
|
if next != nil {
|
||||||
|
return next
|
||||||
|
}
|
||||||
|
|
||||||
|
l.pos = len(l.input)
|
||||||
return lexDone
|
return lexDone
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -297,10 +469,22 @@ func (l *pageLexer) index(sep []byte) int {
|
||||||
return bytes.Index(l.input[l.pos:], sep)
|
return bytes.Index(l.input[l.pos:], sep)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (l *pageLexer) indexByte(sep byte) int {
|
||||||
|
return bytes.IndexByte(l.input[l.pos:], sep)
|
||||||
|
}
|
||||||
|
|
||||||
func (l *pageLexer) hasPrefix(prefix []byte) bool {
|
func (l *pageLexer) hasPrefix(prefix []byte) bool {
|
||||||
return bytes.HasPrefix(l.input[l.pos:], prefix)
|
return bytes.HasPrefix(l.input[l.pos:], prefix)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (l *pageLexer) hasPrefixByte(prefix byte) bool {
|
||||||
|
b := l.input[l.pos:]
|
||||||
|
if len(b) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return b[0] == prefix
|
||||||
|
}
|
||||||
|
|
||||||
// helper functions
|
// helper functions
|
||||||
|
|
||||||
// returns the min index >= 0
|
// returns the min index >= 0
|
||||||
|
|
|
@ -27,7 +27,7 @@ import (
|
||||||
|
|
||||||
// Result holds the parse result.
|
// Result holds the parse result.
|
||||||
type Result interface {
|
type Result interface {
|
||||||
// Iterator returns a new Iterator positioned at the benning of the parse tree.
|
// Iterator returns a new Iterator positioned at the beginning of the parse tree.
|
||||||
Iterator() *Iterator
|
Iterator() *Iterator
|
||||||
// Input returns the input to Parse.
|
// Input returns the input to Parse.
|
||||||
Input() []byte
|
Input() []byte
|
||||||
|
@ -35,27 +35,21 @@ type Result interface {
|
||||||
|
|
||||||
var _ Result = (*pageLexer)(nil)
|
var _ Result = (*pageLexer)(nil)
|
||||||
|
|
||||||
// Parse parses the page in the given reader.
|
// Parse parses the page in the given reader according to the given Config.
|
||||||
func Parse(r io.Reader) (Result, error) {
|
func Parse(r io.Reader, cfg Config) (Result, error) {
|
||||||
b, err := ioutil.ReadAll(r)
|
b, err := ioutil.ReadAll(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "failed to read page content")
|
return nil, errors.Wrap(err, "failed to read page content")
|
||||||
}
|
}
|
||||||
return parseBytes(b)
|
return parseBytes(b, cfg)
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseBytes(b []byte) (Result, error) {
|
func parseBytes(b []byte, cfg Config) (Result, error) {
|
||||||
lexer := newPageLexer(b, 0, lexIntroSection)
|
lexer := newPageLexer(b, lexIntroSection, cfg)
|
||||||
lexer.run()
|
lexer.run()
|
||||||
return lexer, nil
|
return lexer, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseMainSection(input []byte, from int) Result {
|
|
||||||
lexer := newPageLexer(input, from, lexMainSection)
|
|
||||||
lexer.run()
|
|
||||||
return lexer
|
|
||||||
}
|
|
||||||
|
|
||||||
// An Iterator has methods to iterate a parsed page with support going back
|
// An Iterator has methods to iterate a parsed page with support going back
|
||||||
// if needed.
|
// if needed.
|
||||||
type Iterator struct {
|
type Iterator struct {
|
||||||
|
|
|
@ -88,8 +88,8 @@ func TestFrontMatter(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []Item) {
|
func collectWithConfig(input []byte, skipFrontMatter bool, stateStart stateFunc, cfg Config) (items []Item) {
|
||||||
l := newPageLexer(input, 0, stateStart)
|
l := newPageLexer(input, stateStart, cfg)
|
||||||
l.run()
|
l.run()
|
||||||
t := l.newIterator()
|
t := l.newIterator()
|
||||||
|
|
||||||
|
@ -103,6 +103,13 @@ func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []Item) {
|
||||||
|
var cfg Config
|
||||||
|
|
||||||
|
return collectWithConfig(input, skipFrontMatter, stateStart, cfg)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// no positional checking, for now ...
|
// no positional checking, for now ...
|
||||||
func equal(i1, i2 []Item) bool {
|
func equal(i1, i2 []Item) bool {
|
||||||
if len(i1) != len(i2) {
|
if len(i1) != len(i2) {
|
||||||
|
|
40
parser/pageparser/pageparser_main_test.go
Normal file
40
parser/pageparser/pageparser_main_test.go
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
// Copyright 2018 The Hugo Authors. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package pageparser
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMain(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
var mainTests = []lexerTest{
|
||||||
|
{"emoji #1", "Some text with :emoji:", []Item{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), tstEOF}},
|
||||||
|
{"emoji #2", "Some text with :emoji: and some text.", []Item{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), nti(tText, " and some text."), tstEOF}},
|
||||||
|
{"looks like an emoji #1", "Some text and then :emoji", []Item{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, "emoji"), tstEOF}},
|
||||||
|
{"looks like an emoji #2", "Some text and then ::", []Item{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, ":"), tstEOF}},
|
||||||
|
{"looks like an emoji #3", ":Some :text", []Item{nti(tText, ":"), nti(tText, "Some "), nti(tText, ":"), nti(tText, "text"), tstEOF}},
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, test := range mainTests {
|
||||||
|
items := collectWithConfig([]byte(test.input), false, lexMainSection, Config{EnableEmoji: true})
|
||||||
|
if !equal(items, test.items) {
|
||||||
|
got := crLfReplacer.Replace(fmt.Sprint(items))
|
||||||
|
expected := crLfReplacer.Replace(fmt.Sprint(test.items))
|
||||||
|
t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, got, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -152,7 +152,8 @@ var shortCodeLexerTests = []lexerTest{
|
||||||
{"basic inline", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
|
{"basic inline", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
|
||||||
{"basic inline with space", `{{< sc1.inline >}}Hello World{{< / sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
|
{"basic inline with space", `{{< sc1.inline >}}Hello World{{< / sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
|
||||||
{"inline self closing", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}Hello World{{< sc1.inline />}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSC1Inline, tstSCClose, tstRightNoMD, tstEOF}},
|
{"inline self closing", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}Hello World{{< sc1.inline />}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSC1Inline, tstSCClose, tstRightNoMD, tstEOF}},
|
||||||
{"inline with nested shortcode (not supported)", `{{< sc1.inline >}}Hello World{{< sc1 >}}{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, nti(tError, "inline shortcodes do not support nesting")}},
|
{"inline with template syntax", `{{< sc1.inline >}}{{ .Get 0 }}{{ .Get 1 }}{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, nti(tText, "{{ .Get 0 }}"), nti(tText, "{{ .Get 1 }}"), tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
|
||||||
|
{"inline with nested shortcode (not supported)", `{{< sc1.inline >}}Hello World{{< sc1 >}}{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, nti(tError, "inline shortcodes do not support nesting")}},
|
||||||
{"inline case mismatch", `{{< sc1.Inline >}}Hello World{{< /sc1.Inline >}}`, []Item{tstLeftNoMD, nti(tError, "period in shortcode name only allowed for inline identifiers")}},
|
{"inline case mismatch", `{{< sc1.Inline >}}Hello World{{< /sc1.Inline >}}`, []Item{tstLeftNoMD, nti(tError, "period in shortcode name only allowed for inline identifiers")}},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,10 +172,11 @@ func BenchmarkShortcodeLexer(b *testing.B) {
|
||||||
for i, input := range shortCodeLexerTests {
|
for i, input := range shortCodeLexerTests {
|
||||||
testInputs[i] = []byte(input.input)
|
testInputs[i] = []byte(input.input)
|
||||||
}
|
}
|
||||||
|
var cfg Config
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
for _, input := range testInputs {
|
for _, input := range testInputs {
|
||||||
items := collect(input, true, lexMainSection)
|
items := collectWithConfig(input, true, lexMainSection, cfg)
|
||||||
if len(items) == 0 {
|
if len(items) == 0 {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,10 +34,37 @@ This is some summary. This is some summary. This is some summary. This is some s
|
||||||
|
|
||||||
`
|
`
|
||||||
input := []byte(start + strings.Repeat(strings.Repeat("this is text", 30)+"{{< myshortcode >}}This is some inner content.{{< /myshortcode >}}", 10))
|
input := []byte(start + strings.Repeat(strings.Repeat("this is text", 30)+"{{< myshortcode >}}This is some inner content.{{< /myshortcode >}}", 10))
|
||||||
|
cfg := Config{EnableEmoji: false}
|
||||||
|
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
if _, err := parseBytes(input); err != nil {
|
if _, err := parseBytes(input, cfg); err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkParseWithEmoji(b *testing.B) {
|
||||||
|
start := `
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
title: "Front Matters"
|
||||||
|
description: "It really does"
|
||||||
|
---
|
||||||
|
|
||||||
|
This is some summary. This is some summary. This is some summary. This is some summary.
|
||||||
|
|
||||||
|
<!--more-->
|
||||||
|
|
||||||
|
|
||||||
|
`
|
||||||
|
input := []byte(start + strings.Repeat("this is not emoji: ", 50) + strings.Repeat("some text ", 70) + strings.Repeat("this is not: ", 50) + strings.Repeat("but this is a :smile: ", 3) + strings.Repeat("some text ", 70))
|
||||||
|
cfg := Config{EnableEmoji: true}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
if _, err := parseBytes(input, cfg); err != nil {
|
||||||
b.Fatal(err)
|
b.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue