mirror of
https://github.com/gohugoio/hugo.git
synced 2024-12-01 15:39:12 -05:00
eada236f87
This commit introduces a new data structure to store pages and their resources. This data structure is backed by radix trees. This simplies tree operations, makes all pages a bundle, and paves the way for #6310. It also solves a set of annoying issues (see list below). Not a motivation behind this, but this commit also makes Hugo in general a little bit faster and more memory effective (see benchmarks). Especially for partial rebuilds on content edits, but also when taxonomies is in use. ``` name old time/op new time/op delta SiteNew/Bundle_with_image/Edit-16 1.32ms ± 8% 1.00ms ± 9% -24.42% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file/Edit-16 1.28ms ± 0% 0.94ms ± 0% -26.26% (p=0.029 n=4+4) SiteNew/Tags_and_categories/Edit-16 33.9ms ± 2% 21.8ms ± 1% -35.67% (p=0.029 n=4+4) SiteNew/Canonify_URLs/Edit-16 40.6ms ± 1% 37.7ms ± 3% -7.20% (p=0.029 n=4+4) SiteNew/Deep_content_tree/Edit-16 56.7ms ± 0% 51.7ms ± 1% -8.82% (p=0.029 n=4+4) SiteNew/Many_HTML_templates/Edit-16 19.9ms ± 2% 18.3ms ± 3% -7.64% (p=0.029 n=4+4) SiteNew/Page_collections/Edit-16 37.9ms ± 4% 34.0ms ± 2% -10.28% (p=0.029 n=4+4) SiteNew/Bundle_with_image-16 10.7ms ± 0% 10.6ms ± 0% -1.15% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file-16 10.8ms ± 0% 10.7ms ± 0% -1.05% (p=0.029 n=4+4) SiteNew/Tags_and_categories-16 43.2ms ± 1% 39.6ms ± 1% -8.35% (p=0.029 n=4+4) SiteNew/Canonify_URLs-16 47.6ms ± 1% 47.3ms ± 0% ~ (p=0.057 n=4+4) SiteNew/Deep_content_tree-16 73.0ms ± 1% 74.2ms ± 1% ~ (p=0.114 n=4+4) SiteNew/Many_HTML_templates-16 37.9ms ± 0% 38.1ms ± 1% ~ (p=0.114 n=4+4) SiteNew/Page_collections-16 53.6ms ± 1% 54.7ms ± 1% +2.09% (p=0.029 n=4+4) name old alloc/op new alloc/op delta SiteNew/Bundle_with_image/Edit-16 486kB ± 0% 430kB ± 0% -11.47% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file/Edit-16 265kB ± 0% 209kB ± 0% -21.06% (p=0.029 n=4+4) SiteNew/Tags_and_categories/Edit-16 13.6MB ± 0% 8.8MB ± 0% -34.93% (p=0.029 n=4+4) SiteNew/Canonify_URLs/Edit-16 66.5MB ± 0% 63.9MB ± 0% -3.95% (p=0.029 n=4+4) SiteNew/Deep_content_tree/Edit-16 28.8MB ± 0% 25.8MB ± 0% -10.55% (p=0.029 n=4+4) SiteNew/Many_HTML_templates/Edit-16 6.16MB ± 0% 5.56MB ± 0% -9.86% (p=0.029 n=4+4) SiteNew/Page_collections/Edit-16 16.9MB ± 0% 16.0MB ± 0% -5.19% (p=0.029 n=4+4) SiteNew/Bundle_with_image-16 2.28MB ± 0% 2.29MB ± 0% +0.35% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file-16 2.07MB ± 0% 2.07MB ± 0% ~ (p=0.114 n=4+4) SiteNew/Tags_and_categories-16 14.3MB ± 0% 13.2MB ± 0% -7.30% (p=0.029 n=4+4) SiteNew/Canonify_URLs-16 69.1MB ± 0% 69.0MB ± 0% ~ (p=0.343 n=4+4) SiteNew/Deep_content_tree-16 31.3MB ± 0% 31.8MB ± 0% +1.49% (p=0.029 n=4+4) SiteNew/Many_HTML_templates-16 10.8MB ± 0% 10.9MB ± 0% +1.11% (p=0.029 n=4+4) SiteNew/Page_collections-16 21.4MB ± 0% 21.6MB ± 0% +1.15% (p=0.029 n=4+4) name old allocs/op new allocs/op delta SiteNew/Bundle_with_image/Edit-16 4.74k ± 0% 3.86k ± 0% -18.57% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file/Edit-16 4.73k ± 0% 3.85k ± 0% -18.58% (p=0.029 n=4+4) SiteNew/Tags_and_categories/Edit-16 301k ± 0% 198k ± 0% -34.14% (p=0.029 n=4+4) SiteNew/Canonify_URLs/Edit-16 389k ± 0% 373k ± 0% -4.07% (p=0.029 n=4+4) SiteNew/Deep_content_tree/Edit-16 338k ± 0% 262k ± 0% -22.63% (p=0.029 n=4+4) SiteNew/Many_HTML_templates/Edit-16 102k ± 0% 88k ± 0% -13.81% (p=0.029 n=4+4) SiteNew/Page_collections/Edit-16 176k ± 0% 152k ± 0% -13.32% (p=0.029 n=4+4) SiteNew/Bundle_with_image-16 26.8k ± 0% 26.8k ± 0% +0.05% (p=0.029 n=4+4) SiteNew/Bundle_with_JSON_file-16 26.8k ± 0% 26.8k ± 0% +0.05% (p=0.029 n=4+4) SiteNew/Tags_and_categories-16 273k ± 0% 245k ± 0% -10.36% (p=0.029 n=4+4) SiteNew/Canonify_URLs-16 396k ± 0% 398k ± 0% +0.39% (p=0.029 n=4+4) SiteNew/Deep_content_tree-16 317k ± 0% 325k ± 0% +2.53% (p=0.029 n=4+4) SiteNew/Many_HTML_templates-16 146k ± 0% 147k ± 0% +0.98% (p=0.029 n=4+4) SiteNew/Page_collections-16 210k ± 0% 215k ± 0% +2.44% (p=0.029 n=4+4) ``` Fixes #6312 Fixes #6087 Fixes #6738 Fixes #6412 Fixes #6743 Fixes #6875 Fixes #6034 Fixes #6902 Fixes #6173 Fixes #6590
195 lines
5.2 KiB
Go
195 lines
5.2 KiB
Go
// Copyright 2019 The Hugo Authors. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Package pageparser provides a parser for Hugo content files (Markdown, HTML etc.) in Hugo.
|
|
// This implementation is highly inspired by the great talk given by Rob Pike called "Lexical Scanning in Go"
|
|
// It's on YouTube, Google it!.
|
|
// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
|
|
package pageparser
|
|
|
|
import (
|
|
"bytes"
|
|
"io"
|
|
"io/ioutil"
|
|
|
|
"github.com/gohugoio/hugo/parser/metadecoders"
|
|
"github.com/pkg/errors"
|
|
)
|
|
|
|
// Result holds the parse result.
|
|
type Result interface {
|
|
// Iterator returns a new Iterator positioned at the beginning of the parse tree.
|
|
Iterator() *Iterator
|
|
// Input returns the input to Parse.
|
|
Input() []byte
|
|
}
|
|
|
|
var _ Result = (*pageLexer)(nil)
|
|
|
|
// Parse parses the page in the given reader according to the given Config.
|
|
// TODO(bep) now that we have improved the "lazy order" init, it *may* be
|
|
// some potential saving in doing a buffered approach where the first pass does
|
|
// the frontmatter only.
|
|
func Parse(r io.Reader, cfg Config) (Result, error) {
|
|
return parseSection(r, cfg, lexIntroSection)
|
|
}
|
|
|
|
type ContentFrontMatter struct {
|
|
Content []byte
|
|
FrontMatter map[string]interface{}
|
|
FrontMatterFormat metadecoders.Format
|
|
}
|
|
|
|
// ParseFrontMatterAndContent is a convenience method to extract front matter
|
|
// and content from a content page.
|
|
func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) {
|
|
var cf ContentFrontMatter
|
|
|
|
psr, err := Parse(r, Config{})
|
|
if err != nil {
|
|
return cf, err
|
|
}
|
|
|
|
var frontMatterSource []byte
|
|
|
|
iter := psr.Iterator()
|
|
|
|
walkFn := func(item Item) bool {
|
|
if frontMatterSource != nil {
|
|
// The rest is content.
|
|
cf.Content = psr.Input()[item.Pos:]
|
|
// Done
|
|
return false
|
|
} else if item.IsFrontMatter() {
|
|
cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type)
|
|
frontMatterSource = item.Val
|
|
}
|
|
return true
|
|
|
|
}
|
|
|
|
iter.PeekWalk(walkFn)
|
|
|
|
cf.FrontMatter, err = metadecoders.Default.UnmarshalToMap(frontMatterSource, cf.FrontMatterFormat)
|
|
return cf, err
|
|
}
|
|
|
|
func FormatFromFrontMatterType(typ ItemType) metadecoders.Format {
|
|
switch typ {
|
|
case TypeFrontMatterJSON:
|
|
return metadecoders.JSON
|
|
case TypeFrontMatterORG:
|
|
return metadecoders.ORG
|
|
case TypeFrontMatterTOML:
|
|
return metadecoders.TOML
|
|
case TypeFrontMatterYAML:
|
|
return metadecoders.YAML
|
|
default:
|
|
return ""
|
|
}
|
|
}
|
|
|
|
// ParseMain parses starting with the main section. Used in tests.
|
|
func ParseMain(r io.Reader, cfg Config) (Result, error) {
|
|
return parseSection(r, cfg, lexMainSection)
|
|
}
|
|
|
|
func parseSection(r io.Reader, cfg Config, start stateFunc) (Result, error) {
|
|
b, err := ioutil.ReadAll(r)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to read page content")
|
|
}
|
|
return parseBytes(b, cfg, start)
|
|
}
|
|
|
|
func parseBytes(b []byte, cfg Config, start stateFunc) (Result, error) {
|
|
lexer := newPageLexer(b, start, cfg)
|
|
lexer.run()
|
|
return lexer, nil
|
|
}
|
|
|
|
// An Iterator has methods to iterate a parsed page with support going back
|
|
// if needed.
|
|
type Iterator struct {
|
|
l *pageLexer
|
|
lastPos int // position of the last item returned by nextItem
|
|
}
|
|
|
|
// consumes and returns the next item
|
|
func (t *Iterator) Next() Item {
|
|
t.lastPos++
|
|
return t.Current()
|
|
}
|
|
|
|
// Input returns the input source.
|
|
func (t *Iterator) Input() []byte {
|
|
return t.l.Input()
|
|
}
|
|
|
|
var errIndexOutOfBounds = Item{tError, 0, []byte("no more tokens"), true}
|
|
|
|
// Current will repeatably return the current item.
|
|
func (t *Iterator) Current() Item {
|
|
if t.lastPos >= len(t.l.items) {
|
|
return errIndexOutOfBounds
|
|
}
|
|
return t.l.items[t.lastPos]
|
|
}
|
|
|
|
// backs up one token.
|
|
func (t *Iterator) Backup() {
|
|
if t.lastPos < 0 {
|
|
panic("need to go forward before going back")
|
|
}
|
|
t.lastPos--
|
|
}
|
|
|
|
// check for non-error and non-EOF types coming next
|
|
func (t *Iterator) IsValueNext() bool {
|
|
i := t.Peek()
|
|
return i.Type != tError && i.Type != tEOF
|
|
}
|
|
|
|
// look at, but do not consume, the next item
|
|
// repeated, sequential calls will return the same item
|
|
func (t *Iterator) Peek() Item {
|
|
return t.l.items[t.lastPos+1]
|
|
}
|
|
|
|
// PeekWalk will feed the next items in the iterator to walkFn
|
|
// until it returns false.
|
|
func (t *Iterator) PeekWalk(walkFn func(item Item) bool) {
|
|
for i := t.lastPos + 1; i < len(t.l.items); i++ {
|
|
item := t.l.items[i]
|
|
if !walkFn(item) {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// Consume is a convencience method to consume the next n tokens,
|
|
// but back off Errors and EOF.
|
|
func (t *Iterator) Consume(cnt int) {
|
|
for i := 0; i < cnt; i++ {
|
|
token := t.Next()
|
|
if token.Type == tError || token.Type == tEOF {
|
|
t.Backup()
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// LineNumber returns the current line number. Used for logging.
|
|
func (t *Iterator) LineNumber() int {
|
|
return bytes.Count(t.l.input[:t.Current().Pos], lf) + 1
|
|
}
|