hugo/parser/pageparser/item.go
Bjørn Erik Pedersen 223bf28004 parser/pageparser: Don't store the byte slices
On its own this change doesn't do any magic, but this is part of a bigger picture about making Hugo leaner in the
memory usage department.
2022-07-09 16:03:11 +02:00

221 lines
4.3 KiB
Go

// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pageparser
import (
"bytes"
"fmt"
"regexp"
"strconv"
"github.com/yuin/goldmark/util"
)
type lowHigh struct {
Low int
High int
}
type Item struct {
Type ItemType
Err error
// The common case is a single segment.
low int
high int
// This is the uncommon case.
segments []lowHigh
// Used for validation.
firstByte byte
isString bool
}
type Items []Item
func (i Item) Pos() int {
if len(i.segments) > 0 {
return i.segments[0].Low
}
return i.low
}
func (i Item) Val(source []byte) []byte {
if len(i.segments) == 0 {
return source[i.low:i.high]
}
if len(i.segments) == 1 {
return source[i.segments[0].Low:i.segments[0].High]
}
var b bytes.Buffer
for _, s := range i.segments {
b.Write(source[s.Low:s.High])
}
return b.Bytes()
}
func (i Item) ValStr(source []byte) string {
return string(i.Val(source))
}
func (i Item) ValTyped(source []byte) any {
str := i.ValStr(source)
if i.isString {
// A quoted value that is a string even if it looks like a number etc.
return str
}
if boolRe.MatchString(str) {
return str == "true"
}
if intRe.MatchString(str) {
num, err := strconv.Atoi(str)
if err != nil {
return str
}
return num
}
if floatRe.MatchString(str) {
num, err := strconv.ParseFloat(str, 64)
if err != nil {
return str
}
return num
}
return str
}
func (i Item) IsText() bool {
return i.Type == tText || i.Type == tIndentation
}
func (i Item) IsIndentation() bool {
return i.Type == tIndentation
}
func (i Item) IsNonWhitespace(source []byte) bool {
return len(bytes.TrimSpace(i.Val(source))) > 0
}
func (i Item) IsShortcodeName() bool {
return i.Type == tScName
}
func (i Item) IsInlineShortcodeName() bool {
return i.Type == tScNameInline
}
func (i Item) IsLeftShortcodeDelim() bool {
return i.Type == tLeftDelimScWithMarkup || i.Type == tLeftDelimScNoMarkup
}
func (i Item) IsRightShortcodeDelim() bool {
return i.Type == tRightDelimScWithMarkup || i.Type == tRightDelimScNoMarkup
}
func (i Item) IsShortcodeClose() bool {
return i.Type == tScClose
}
func (i Item) IsShortcodeParam() bool {
return i.Type == tScParam
}
func (i Item) IsShortcodeParamVal() bool {
return i.Type == tScParamVal
}
func (i Item) IsShortcodeMarkupDelimiter() bool {
return i.Type == tLeftDelimScWithMarkup || i.Type == tRightDelimScWithMarkup
}
func (i Item) IsFrontMatter() bool {
return i.Type >= TypeFrontMatterYAML && i.Type <= TypeFrontMatterORG
}
func (i Item) IsDone() bool {
return i.Type == tError || i.Type == tEOF
}
func (i Item) IsEOF() bool {
return i.Type == tEOF
}
func (i Item) IsError() bool {
return i.Type == tError
}
func (i Item) ToString(source []byte) string {
val := i.Val(source)
switch {
case i.Type == tEOF:
return "EOF"
case i.Type == tError:
return string(val)
case i.Type == tIndentation:
return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(val))
case i.Type > tKeywordMarker:
return fmt.Sprintf("<%s>", val)
case len(val) > 50:
return fmt.Sprintf("%v:%.20q...", i.Type, val)
}
return fmt.Sprintf("%v:[%s]", i.Type, val)
}
type ItemType int
const (
tError ItemType = iota
tEOF
// page items
TypeLeadSummaryDivider // <!--more-->, # more
TypeFrontMatterYAML
TypeFrontMatterTOML
TypeFrontMatterJSON
TypeFrontMatterORG
TypeEmoji
TypeIgnore // // The BOM Unicode byte order marker and possibly others
// shortcode items
tLeftDelimScNoMarkup
tRightDelimScNoMarkup
tLeftDelimScWithMarkup
tRightDelimScWithMarkup
tScClose
tScName
tScNameInline
tScParam
tScParamVal
tIndentation
tText // plain text
// preserved for later - keywords come after this
tKeywordMarker
)
var (
boolRe = regexp.MustCompile(`^(true$)|(false$)`)
intRe = regexp.MustCompile(`^[-+]?\d+$`)
floatRe = regexp.MustCompile(`^[-+]?\d*\.\d+$`)
)