hugo/hugolib/page_content.go

// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package hugolib

import (
	"bytes"
	"io"

	errors "github.com/pkg/errors"

	bp "github.com/gohugoio/hugo/bufferpool"

	"github.com/gohugoio/hugo/common/herrors"
	"github.com/gohugoio/hugo/common/text"
	"github.com/gohugoio/hugo/parser/metadecoders"
	"github.com/gohugoio/hugo/parser/pageparser"
)

var (
	internalSummaryDividerBase      = "HUGOMORE42"
	internalSummaryDividerBaseBytes = []byte(internalSummaryDividerBase)
	internalSummaryDividerPre       = []byte("\n\n" + internalSummaryDividerBase + "\n\n")
)

// The content related items on a Page.
type pageContent struct {
	renderable bool

	// workContent is a copy of rawContent that may be mutated during site build.
	workContent []byte

	shortcodeState *shortcodeHandler

	source rawPageContent
}

type rawPageContent struct {
	hasSummaryDivider bool

	// The AST of the parsed page. Contains information about:
	// shortcodes, front matter, summary indicators.
	parsed pageparser.Result

	// Returns the position in bytes after any front matter.
	posMainContent int
}

// TODO(bep) lazy consolidate
func (p *Page) mapContent() error {
	p.shortcodeState = newShortcodeHandler(p)
	s := p.shortcodeState
	p.renderable = true
	p.source.posMainContent = -1

	result := bp.GetBuffer()
	defer bp.PutBuffer(result)

	iter := p.source.parsed.Iterator()

	fail := func(err error, i pageparser.Item) error {
		return p.parseError(err, iter.Input(), i.Pos)
	}

	// the parser is guaranteed to return items in proper order or fail, so …
	// … it's safe to keep some "global" state
	var currShortcode shortcode
	var ordinal int

Loop:
	for {
		it := iter.Next()

		switch {
		case it.Type == pageparser.TypeIgnore:
		case it.Type == pageparser.TypeHTMLComment:
			// Ignore. This is only a leading Front matter comment.
		case it.Type == pageparser.TypeHTMLStart:
			// This is HTML without front matter. It can still have shortcodes.
			p.renderable = false
			result.Write(it.Val)
		case it.IsFrontMatter():
			f := metadecoders.FormatFromFrontMatterType(it.Type)
			m, err := metadecoders.UnmarshalToMap(it.Val, f)
			if err != nil {
				if fe, ok := err.(herrors.FileError); ok {
					return herrors.ToFileErrorWithOffset(fe, iter.LineNumber()-1)
				} else {
					return err
				}
			}
			if err := p.updateMetaData(m); err != nil {
				return err
			}

			next := iter.Peek()
			if !next.IsDone() {
				p.source.posMainContent = next.Pos
			}

			if !p.shouldBuild() {
				// Nothing more to do.
				return nil
			}

		case it.Type == pageparser.TypeLeadSummaryDivider:
			result.Write(internalSummaryDividerPre)
			p.source.hasSummaryDivider = true
			// Need to determine if the page is truncated.
			f := func(item pageparser.Item) bool {
				if item.IsNonWhitespace() {
					p.truncated = true

					// Done
					return false
				}
				return true
			}
			iter.PeekWalk(f)

		// Handle shortcode
		case it.IsLeftShortcodeDelim():
			// let extractShortcode handle left delim (will do so recursively)
			iter.Backup()

			currShortcode, err := s.extractShortcode(ordinal, iter, p)

			if currShortcode.name != "" {
				s.nameSet[currShortcode.name] = true
			}

			if err != nil {
				return fail(errors.Wrap(err, "failed to extract shortcode"), it)
			}

			if currShortcode.params == nil {
				currShortcode.params = make([]string, 0)
			}

			placeHolder := s.createShortcodePlaceholder()
			result.WriteString(placeHolder)
			ordinal++
			s.shortcodes.Add(placeHolder, currShortcode)
		case it.IsEOF():
			break Loop
		case it.IsError():
			err := fail(errors.WithStack(errors.New(it.ValStr())), it)
			currShortcode.err = err
			return err

		default:
			result.Write(it.Val)
		}
	}

	resultBytes := make([]byte, result.Len())
	copy(resultBytes, result.Bytes())
	p.workContent = resultBytes

	return nil
}

func (p *Page) parse(reader io.Reader) error {

	parseResult, err := pageparser.Parse(reader)
	if err != nil {
		return err
	}

	p.source = rawPageContent{
		parsed: parseResult,
	}

	p.lang = p.File.Lang()

	if p.s != nil && p.s.owner != nil {
		gi, enabled := p.s.owner.gitInfo.forPage(p)
		if gi != nil {
			p.GitInfo = gi
		} else if enabled {
			p.s.Log.WARN.Printf("Failed to find GitInfo for page %q", p.Path())
		}
	}

	return nil
}

func (p *Page) parseError(err error, input []byte, offset int) error {
	if herrors.UnwrapFileError(err) != nil {
		// Use the most specific location.
		return err
	}
	pos := p.posFromInput(input, offset)
	return herrors.NewFileError("md", -1, pos.LineNumber, pos.ColumnNumber, err)

}

func (p *Page) posFromInput(input []byte, offset int) text.Position {
	lf := []byte("\n")
	input = input[:offset]
	lineNumber := bytes.Count(input, lf) + 1
	endOfLastLine := bytes.LastIndex(input, lf)

	return text.Position{
		Filename:     p.pathOrTitle(),
		LineNumber:   lineNumber,
		ColumnNumber: offset - endOfLastLine,
		Offset:       offset,
	}
}

func (p *Page) posFromPage(offset int) text.Position {
	return p.posFromInput(p.source.parsed.Input(), offset)
}
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`// Copyright 2018 The Hugo Authors. All rights reserved.`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

			`package hugolib`

			`import (`
hugolib: Continue the file context/line number errors work See #5324 2018-10-21 06:20:21 -04:00			`"bytes"`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`"io"`

hugolib: Continue the file context/line number errors work See #5324 2018-10-21 06:20:21 -04:00			`errors "github.com/pkg/errors"`

hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`bp "github.com/gohugoio/hugo/bufferpool"`

hugolib: Continue the file context/line number errors work See #5324 2018-10-21 06:20:21 -04:00			`"github.com/gohugoio/hugo/common/herrors"`
Add file (line/col) info to ref/relref errors See #5371 2018-11-01 06:28:30 -04:00			`"github.com/gohugoio/hugo/common/text"`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`"github.com/gohugoio/hugo/parser/metadecoders"`
			`"github.com/gohugoio/hugo/parser/pageparser"`
			`)`

hugolib: Redo the summary delimiter logic Now that we have a proper page parse tree, this can be greatly simplified. See #5324 2018-10-19 05:30:57 -04:00			`var (`
hugolib: Fix broken manual summary handling Fixes #5381 2018-10-30 15:24:34 -04:00			`internalSummaryDividerBase = "HUGOMORE42"`
			`internalSummaryDividerBaseBytes = []byte(internalSummaryDividerBase)`
			`internalSummaryDividerPre = []byte("\n\n" + internalSummaryDividerBase + "\n\n")`
hugolib: Redo the summary delimiter logic Now that we have a proper page parse tree, this can be greatly simplified. See #5324 2018-10-19 05:30:57 -04:00			`)`

hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`// The content related items on a Page.`
			`type pageContent struct {`
			`renderable bool`

			`// workContent is a copy of rawContent that may be mutated during site build.`
			`workContent []byte`

			`shortcodeState *shortcodeHandler`

			`source rawPageContent`
			`}`

			`type rawPageContent struct {`
hugolib: Redo the summary delimiter logic Now that we have a proper page parse tree, this can be greatly simplified. See #5324 2018-10-19 05:30:57 -04:00			`hasSummaryDivider bool`

hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`// The AST of the parsed page. Contains information about:`
Resolve error handling/parser related TODOs See #5324 2018-10-23 08:37:09 -04:00			`// shortcodes, front matter, summary indicators.`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`parsed pageparser.Result`
Resolve error handling/parser related TODOs See #5324 2018-10-23 08:37:09 -04:00
			`// Returns the position in bytes after any front matter.`
			`posMainContent int`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`}`

			`// TODO(bep) lazy consolidate`
			`func (p *Page) mapContent() error {`
			`p.shortcodeState = newShortcodeHandler(p)`
			`s := p.shortcodeState`
			`p.renderable = true`
Resolve error handling/parser related TODOs See #5324 2018-10-23 08:37:09 -04:00			`p.source.posMainContent = -1`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00
			`result := bp.GetBuffer()`
			`defer bp.PutBuffer(result)`

			`iter := p.source.parsed.Iterator()`

hugolib: Continue the file context/line number errors work See #5324 2018-10-21 06:20:21 -04:00			`fail := func(err error, i pageparser.Item) error {`
hugolib: Add .Position to shortcode To allow for better error logging in shortcodes. Note that this may be expensive to calculate, so this is primarily for error situations. See #5371 2018-11-01 05:39:44 -04:00			`return p.parseError(err, iter.Input(), i.Pos)`
hugolib: Continue the file context/line number errors work See #5324 2018-10-21 06:20:21 -04:00			`}`

hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`// the parser is guaranteed to return items in proper order or fail, so …`
			`// … it's safe to keep some "global" state`
			`var currShortcode shortcode`
			`var ordinal int`

			`Loop:`
			`for {`
			`it := iter.Next()`

			`switch {`
hugolib: Redo the summary delimiter logic Now that we have a proper page parse tree, this can be greatly simplified. See #5324 2018-10-19 05:30:57 -04:00			`case it.Type == pageparser.TypeIgnore:`
			`case it.Type == pageparser.TypeHTMLComment:`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`// Ignore. This is only a leading Front matter comment.`
Resolve error handling/parser related TODOs See #5324 2018-10-23 08:37:09 -04:00			`case it.Type == pageparser.TypeHTMLStart:`
			`// This is HTML without front matter. It can still have shortcodes.`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`p.renderable = false`
			`result.Write(it.Val)`
			`case it.IsFrontMatter():`
hugolib: Redo the summary delimiter logic Now that we have a proper page parse tree, this can be greatly simplified. See #5324 2018-10-19 05:30:57 -04:00			`f := metadecoders.FormatFromFrontMatterType(it.Type)`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`m, err := metadecoders.UnmarshalToMap(it.Val, f)`
			`if err != nil {`
herrors: Improve handling of JSON errors `json.UnmarshalTypeError` and `json.SyntaxError` has a byte `Offset`, so use that. This commit also reworks/simplifies the errror line matching logic. This also makes the file reading unbuffered, but that should be fine in this error case. See #5324 2018-10-23 02:54:10 -04:00			`if fe, ok := err.(herrors.FileError); ok {`
			`return herrors.ToFileErrorWithOffset(fe, iter.LineNumber()-1)`
			`} else {`
			`return err`
			`}`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`}`
			`if err := p.updateMetaData(m); err != nil {`
			`return err`
			`}`

Resolve error handling/parser related TODOs See #5324 2018-10-23 08:37:09 -04:00			`next := iter.Peek()`
			`if !next.IsDone() {`
			`p.source.posMainContent = next.Pos`
			`}`

hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`if !p.shouldBuild() {`
			`// Nothing more to do.`
			`return nil`
hugolib: Redo the summary delimiter logic Now that we have a proper page parse tree, this can be greatly simplified. See #5324 2018-10-19 05:30:57 -04:00			`}`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00
Resolve error handling/parser related TODOs See #5324 2018-10-23 08:37:09 -04:00			`case it.Type == pageparser.TypeLeadSummaryDivider:`
hugolib: Fix broken manual summary handling Fixes #5381 2018-10-30 15:24:34 -04:00			`result.Write(internalSummaryDividerPre)`
hugolib: Redo the summary delimiter logic Now that we have a proper page parse tree, this can be greatly simplified. See #5324 2018-10-19 05:30:57 -04:00			`p.source.hasSummaryDivider = true`
			`// Need to determine if the page is truncated.`
			`f := func(item pageparser.Item) bool {`
			`if item.IsNonWhitespace() {`
			`p.truncated = true`

			`// Done`
			`return false`
			`}`
			`return true`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`}`
hugolib: Redo the summary delimiter logic Now that we have a proper page parse tree, this can be greatly simplified. See #5324 2018-10-19 05:30:57 -04:00			`iter.PeekWalk(f)`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00
			`// Handle shortcode`
			`case it.IsLeftShortcodeDelim():`
			`// let extractShortcode handle left delim (will do so recursively)`
			`iter.Backup()`

			`currShortcode, err := s.extractShortcode(ordinal, iter, p)`

			`if currShortcode.name != "" {`
			`s.nameSet[currShortcode.name] = true`
			`}`

			`if err != nil {`
hugolib: Continue the file context/line number errors work See #5324 2018-10-21 06:20:21 -04:00			`return fail(errors.Wrap(err, "failed to extract shortcode"), it)`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`}`

			`if currShortcode.params == nil {`
			`currShortcode.params = make([]string, 0)`
			`}`

			`placeHolder := s.createShortcodePlaceholder()`
			`result.WriteString(placeHolder)`
			`ordinal++`
			`s.shortcodes.Add(placeHolder, currShortcode)`
			`case it.IsEOF():`
			`break Loop`
			`case it.IsError():`
hugolib: Continue the file context/line number errors work See #5324 2018-10-21 06:20:21 -04:00			`err := fail(errors.WithStack(errors.New(it.ValStr())), it)`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`currShortcode.err = err`
			`return err`
hugolib: Continue the file context/line number errors work See #5324 2018-10-21 06:20:21 -04:00
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00			`default:`
			`result.Write(it.Val)`
			`}`
			`}`

			`resultBytes := make([]byte, result.Len())`
			`copy(resultBytes, result.Bytes())`
			`p.workContent = resultBytes`

			`return nil`
			`}`

			`func (p *Page) parse(reader io.Reader) error {`

			`parseResult, err := pageparser.Parse(reader)`
			`if err != nil {`
			`return err`
			`}`

			`p.source = rawPageContent{`
			`parsed: parseResult,`
			`}`

hugolib: Remove the now superflous Source struct See #5324 2018-10-20 13:09:03 -04:00			`p.lang = p.File.Lang()`
hugolib: Integrate new page parser See #5324 2018-10-18 04:21:23 -04:00
			`if p.s != nil && p.s.owner != nil {`
			`gi, enabled := p.s.owner.gitInfo.forPage(p)`
			`if gi != nil {`
			`p.GitInfo = gi`
			`} else if enabled {`
			`p.s.Log.WARN.Printf("Failed to find GitInfo for page %q", p.Path())`
			`}`
			`}`

			`return nil`
			`}`
hugolib: Continue the file context/line number errors work See #5324 2018-10-21 06:20:21 -04:00
hugolib: Add .Position to shortcode To allow for better error logging in shortcodes. Note that this may be expensive to calculate, so this is primarily for error situations. See #5371 2018-11-01 05:39:44 -04:00			`func (p *Page) parseError(err error, input []byte, offset int) error {`
hugolib: Continue the file context/line number errors work See #5324 2018-10-21 06:20:21 -04:00			`if herrors.UnwrapFileError(err) != nil {`
			`// Use the most specific location.`
			`return err`
			`}`
hugolib: Add .Position to shortcode To allow for better error logging in shortcodes. Note that this may be expensive to calculate, so this is primarily for error situations. See #5371 2018-11-01 05:39:44 -04:00			`pos := p.posFromInput(input, offset)`
			`return herrors.NewFileError("md", -1, pos.LineNumber, pos.ColumnNumber, err)`

			`}`

Add file (line/col) info to ref/relref errors See #5371 2018-11-01 06:28:30 -04:00			`func (p *Page) posFromInput(input []byte, offset int) text.Position {`
hugolib: Continue the file context/line number errors work See #5324 2018-10-21 06:20:21 -04:00			`lf := []byte("\n")`
hugolib: Add .Position to shortcode To allow for better error logging in shortcodes. Note that this may be expensive to calculate, so this is primarily for error situations. See #5371 2018-11-01 05:39:44 -04:00			`input = input[:offset]`
hugolib: Continue the file context/line number errors work See #5324 2018-10-21 06:20:21 -04:00			`lineNumber := bytes.Count(input, lf) + 1`
			`endOfLastLine := bytes.LastIndex(input, lf)`

Add file (line/col) info to ref/relref errors See #5371 2018-11-01 06:28:30 -04:00			`return text.Position{`
hugolib: Add .Position to shortcode To allow for better error logging in shortcodes. Note that this may be expensive to calculate, so this is primarily for error situations. See #5371 2018-11-01 05:39:44 -04:00			`Filename: p.pathOrTitle(),`
			`LineNumber: lineNumber,`
			`ColumnNumber: offset - endOfLastLine,`
			`Offset: offset,`
			`}`
			`}`

Add file (line/col) info to ref/relref errors See #5371 2018-11-01 06:28:30 -04:00			`func (p *Page) posFromPage(offset int) text.Position {`
hugolib: Add .Position to shortcode To allow for better error logging in shortcodes. Note that this may be expensive to calculate, so this is primarily for error situations. See #5371 2018-11-01 05:39:44 -04:00			`return p.posFromInput(p.source.parsed.Input(), offset)`
hugolib: Continue the file context/line number errors work See #5324 2018-10-21 06:20:21 -04:00			`}`