2018-10-17 07:16:45 -04:00
|
|
|
// Copyright 2018 The Hugo Authors. All rights reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
// Package pageparser provides a parser for Hugo content files (Markdown, HTML etc.) in Hugo.
|
|
|
|
// This implementation is highly inspired by the great talk given by Rob Pike called "Lexical Scanning in Go"
|
|
|
|
// It's on YouTube, Google it!.
|
|
|
|
// See slides here: http://cuddle.googlecode.com/hg/talk/lex.html
|
|
|
|
package pageparser
|
|
|
|
|
2018-10-18 04:21:23 -04:00
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"io"
|
|
|
|
"io/ioutil"
|
|
|
|
|
|
|
|
"github.com/pkg/errors"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Result holds the parse result.
|
|
|
|
type Result interface {
|
|
|
|
// Iterator returns a new Iterator positioned at the benning of the parse tree.
|
|
|
|
Iterator() *Iterator
|
|
|
|
// Input returns the input to Parse.
|
|
|
|
Input() []byte
|
2018-10-18 03:47:39 -04:00
|
|
|
}
|
|
|
|
|
2018-10-18 04:21:23 -04:00
|
|
|
var _ Result = (*pageLexer)(nil)
|
|
|
|
|
|
|
|
// Parse parses the page in the given reader.
|
|
|
|
func Parse(r io.Reader) (Result, error) {
|
|
|
|
b, err := ioutil.ReadAll(r)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.Wrap(err, "failed to read page content")
|
|
|
|
}
|
|
|
|
lexer := newPageLexer(b, 0, lexIntroSection)
|
2018-10-18 03:47:39 -04:00
|
|
|
lexer.run()
|
2018-10-18 04:21:23 -04:00
|
|
|
return lexer, nil
|
|
|
|
|
2018-10-18 03:47:39 -04:00
|
|
|
}
|
2018-10-17 07:16:45 -04:00
|
|
|
|
2018-10-18 04:21:23 -04:00
|
|
|
func parseMainSection(input []byte, from int) Result {
|
|
|
|
lexer := newPageLexer(input, pos(from), lexMainSection) // TODO(bep) 2errors
|
|
|
|
lexer.run()
|
|
|
|
return lexer
|
2018-10-17 07:16:45 -04:00
|
|
|
}
|
|
|
|
|
2018-10-18 04:21:23 -04:00
|
|
|
// An Iterator has methods to iterate a parsed page with support going back
|
|
|
|
// if needed.
|
|
|
|
type Iterator struct {
|
|
|
|
l *pageLexer
|
|
|
|
lastPos pos // position of the last item returned by nextItem
|
2018-10-17 07:16:45 -04:00
|
|
|
}
|
|
|
|
|
2018-10-18 04:21:23 -04:00
|
|
|
// consumes and returns the next item
|
|
|
|
func (t *Iterator) Next() Item {
|
|
|
|
t.lastPos++
|
|
|
|
return t.current()
|
2018-10-17 07:16:45 -04:00
|
|
|
}
|
|
|
|
|
2018-10-18 04:21:23 -04:00
|
|
|
var errIndexOutOfBounds = Item{tError, 0, []byte("no more tokens")}
|
|
|
|
|
|
|
|
func (t *Iterator) current() Item {
|
|
|
|
if t.lastPos >= pos(len(t.l.items)) {
|
|
|
|
return errIndexOutOfBounds
|
|
|
|
}
|
|
|
|
return t.l.items[t.lastPos]
|
2018-10-17 07:16:45 -04:00
|
|
|
}
|
|
|
|
|
2018-10-18 04:21:23 -04:00
|
|
|
// backs up one token.
|
|
|
|
func (t *Iterator) Backup() {
|
|
|
|
if t.lastPos < 0 {
|
|
|
|
panic("need to go forward before going back")
|
|
|
|
}
|
|
|
|
t.lastPos--
|
2018-10-17 07:16:45 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// check for non-error and non-EOF types coming next
|
2018-10-18 04:21:23 -04:00
|
|
|
func (t *Iterator) IsValueNext() bool {
|
2018-10-17 07:16:45 -04:00
|
|
|
i := t.Peek()
|
2018-10-19 05:30:57 -04:00
|
|
|
return i.Type != tError && i.Type != tEOF
|
2018-10-17 07:16:45 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// look at, but do not consume, the next item
|
|
|
|
// repeated, sequential calls will return the same item
|
2018-10-18 04:21:23 -04:00
|
|
|
func (t *Iterator) Peek() Item {
|
|
|
|
return t.l.items[t.lastPos+1]
|
2018-10-17 07:16:45 -04:00
|
|
|
}
|
|
|
|
|
2018-10-19 05:30:57 -04:00
|
|
|
// PeekWalk will feed the next items in the iterator to walkFn
|
|
|
|
// until it returns false.
|
|
|
|
func (t *Iterator) PeekWalk(walkFn func(item Item) bool) {
|
|
|
|
for i := t.lastPos + 1; i < pos(len(t.l.items)); i++ {
|
|
|
|
item := t.l.items[i]
|
|
|
|
if !walkFn(item) {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-17 07:16:45 -04:00
|
|
|
// Consume is a convencience method to consume the next n tokens,
|
|
|
|
// but back off Errors and EOF.
|
2018-10-18 04:21:23 -04:00
|
|
|
func (t *Iterator) Consume(cnt int) {
|
2018-10-17 07:16:45 -04:00
|
|
|
for i := 0; i < cnt; i++ {
|
|
|
|
token := t.Next()
|
2018-10-19 05:30:57 -04:00
|
|
|
if token.Type == tError || token.Type == tEOF {
|
2018-10-17 07:16:45 -04:00
|
|
|
t.Backup()
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// LineNumber returns the current line number. Used for logging.
|
2018-10-18 04:21:23 -04:00
|
|
|
func (t *Iterator) LineNumber() int {
|
|
|
|
return bytes.Count(t.l.input[:t.current().pos], lf) + 1
|
2018-10-17 07:16:45 -04:00
|
|
|
}
|