mirror of
https://github.com/gohugoio/hugo.git
synced 2025-01-15 10:02:37 +00:00
9cd54cab20
This avoids double parsing the page content when `enableEmoji=true`. This commit also adds some general improvements to the parser, making it in general much faster: ```bash benchmark old ns/op new ns/op delta BenchmarkShortcodeLexer-4 90258 101730 +12.71% BenchmarkParse-4 148940 15037 -89.90% benchmark old allocs new allocs delta BenchmarkShortcodeLexer-4 456 700 +53.51% BenchmarkParse-4 28 33 +17.86% benchmark old bytes new bytes delta BenchmarkShortcodeLexer-4 69875 81014 +15.94% BenchmarkParse-4 8128 8304 +2.17% ``` Running some site benchmarks with Emoji support turned on: ```bash benchmark old ns/op new ns/op delta BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 924556797 818115620 -11.51% benchmark old allocs new allocs delta BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 4112613 4133787 +0.51% benchmark old bytes new bytes delta BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 426982864 424363832 -0.61% ``` Fixes #5534
134 lines
2.9 KiB
Go
134 lines
2.9 KiB
Go
// Copyright 2018 The Hugo Authors. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package pageparser
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
)
|
|
|
|
type Item struct {
|
|
Type ItemType
|
|
Pos int
|
|
Val []byte
|
|
}
|
|
|
|
type Items []Item
|
|
|
|
func (i Item) ValStr() string {
|
|
return string(i.Val)
|
|
}
|
|
|
|
func (i Item) IsText() bool {
|
|
return i.Type == tText
|
|
}
|
|
|
|
func (i Item) IsNonWhitespace() bool {
|
|
return len(bytes.TrimSpace(i.Val)) > 0
|
|
}
|
|
|
|
func (i Item) IsShortcodeName() bool {
|
|
return i.Type == tScName
|
|
}
|
|
|
|
func (i Item) IsInlineShortcodeName() bool {
|
|
return i.Type == tScNameInline
|
|
}
|
|
|
|
func (i Item) IsLeftShortcodeDelim() bool {
|
|
return i.Type == tLeftDelimScWithMarkup || i.Type == tLeftDelimScNoMarkup
|
|
}
|
|
|
|
func (i Item) IsRightShortcodeDelim() bool {
|
|
return i.Type == tRightDelimScWithMarkup || i.Type == tRightDelimScNoMarkup
|
|
}
|
|
|
|
func (i Item) IsShortcodeClose() bool {
|
|
return i.Type == tScClose
|
|
}
|
|
|
|
func (i Item) IsShortcodeParam() bool {
|
|
return i.Type == tScParam
|
|
}
|
|
|
|
func (i Item) IsShortcodeParamVal() bool {
|
|
return i.Type == tScParamVal
|
|
}
|
|
|
|
func (i Item) IsShortcodeMarkupDelimiter() bool {
|
|
return i.Type == tLeftDelimScWithMarkup || i.Type == tRightDelimScWithMarkup
|
|
}
|
|
|
|
func (i Item) IsFrontMatter() bool {
|
|
return i.Type >= TypeFrontMatterYAML && i.Type <= TypeFrontMatterORG
|
|
}
|
|
|
|
func (i Item) IsDone() bool {
|
|
return i.Type == tError || i.Type == tEOF
|
|
}
|
|
|
|
func (i Item) IsEOF() bool {
|
|
return i.Type == tEOF
|
|
}
|
|
|
|
func (i Item) IsError() bool {
|
|
return i.Type == tError
|
|
}
|
|
|
|
func (i Item) String() string {
|
|
switch {
|
|
case i.Type == tEOF:
|
|
return "EOF"
|
|
case i.Type == tError:
|
|
return string(i.Val)
|
|
case i.Type > tKeywordMarker:
|
|
return fmt.Sprintf("<%s>", i.Val)
|
|
case len(i.Val) > 50:
|
|
return fmt.Sprintf("%v:%.20q...", i.Type, i.Val)
|
|
}
|
|
return fmt.Sprintf("%v:[%s]", i.Type, i.Val)
|
|
}
|
|
|
|
type ItemType int
|
|
|
|
const (
|
|
tError ItemType = iota
|
|
tEOF
|
|
|
|
// page items
|
|
TypeHTMLStart // document starting with < as first non-whitespace
|
|
TypeLeadSummaryDivider // <!--more-->, # more
|
|
TypeFrontMatterYAML
|
|
TypeFrontMatterTOML
|
|
TypeFrontMatterJSON
|
|
TypeFrontMatterORG
|
|
TypeEmoji
|
|
TypeIgnore // // The BOM Unicode byte order marker and possibly others
|
|
|
|
// shortcode items
|
|
tLeftDelimScNoMarkup
|
|
tRightDelimScNoMarkup
|
|
tLeftDelimScWithMarkup
|
|
tRightDelimScWithMarkup
|
|
tScClose
|
|
tScName
|
|
tScNameInline
|
|
tScParam
|
|
tScParamVal
|
|
|
|
tText // plain text
|
|
|
|
// preserved for later - keywords come after this
|
|
tKeywordMarker
|
|
)
|