mirror of
https://github.com/gohugoio/hugo.git
synced 2024-12-27 21:02:11 +00:00
parent
f6863e1ef7
commit
2fdc4a24d5
4 changed files with 345 additions and 68 deletions
|
@ -73,10 +73,10 @@ func (i Item) String() string {
|
|||
return i.Val
|
||||
case i.typ > tKeywordMarker:
|
||||
return fmt.Sprintf("<%s>", i.Val)
|
||||
case len(i.Val) > 20:
|
||||
return fmt.Sprintf("%.20q...", i.Val)
|
||||
case len(i.Val) > 50:
|
||||
return fmt.Sprintf("%v:%.20q...", i.typ, i.Val)
|
||||
}
|
||||
return fmt.Sprintf("[%s]", i.Val)
|
||||
return fmt.Sprintf("%v:[%s]", i.typ, i.Val)
|
||||
}
|
||||
|
||||
type itemType int
|
||||
|
@ -85,6 +85,15 @@ const (
|
|||
tError itemType = iota
|
||||
tEOF
|
||||
|
||||
// page items
|
||||
tHTMLLead // <
|
||||
tSummaryDivider // <!--more-->
|
||||
tSummaryDividerOrg // # more
|
||||
tFrontMatterYAML
|
||||
tFrontMatterTOML
|
||||
tFrontMatterJSON
|
||||
tFrontMatterORG
|
||||
|
||||
// shortcode items
|
||||
tLeftDelimScNoMarkup
|
||||
tRightDelimScNoMarkup
|
||||
|
@ -95,8 +104,7 @@ const (
|
|||
tScParam
|
||||
tScParamVal
|
||||
|
||||
//itemIdentifier
|
||||
tText // plain text, used for everything outside the shortcodes
|
||||
tText // plain text
|
||||
|
||||
// preserved for later - keywords come after this
|
||||
tKeywordMarker
|
||||
|
|
|
@ -44,13 +44,15 @@ type lexerShortcodeState struct {
|
|||
}
|
||||
|
||||
type pageLexer struct {
|
||||
name string
|
||||
input string
|
||||
state stateFunc
|
||||
pos pos // input position
|
||||
start pos // item start position
|
||||
width pos // width of last element
|
||||
lastPos pos // position of the last item returned by nextItem
|
||||
input string
|
||||
stateStart stateFunc
|
||||
state stateFunc
|
||||
pos pos // input position
|
||||
start pos // item start position
|
||||
width pos // width of last element
|
||||
lastPos pos // position of the last item returned by nextItem
|
||||
|
||||
contentSections int
|
||||
|
||||
lexerShortcodeState
|
||||
|
||||
|
@ -63,18 +65,18 @@ func Parse(s string) *Tokens {
|
|||
}
|
||||
|
||||
func ParseFrom(s string, from int) *Tokens {
|
||||
lexer := newPageLexer("default", s, pos(from))
|
||||
lexer := newPageLexer(s, pos(from), lexMainSection) // TODO(bep) 2errors
|
||||
lexer.run()
|
||||
return &Tokens{lexer: lexer}
|
||||
}
|
||||
|
||||
// note: the input position here is normally 0 (start), but
|
||||
// can be set if position of first shortcode is known
|
||||
func newPageLexer(name, input string, inputPosition pos) *pageLexer {
|
||||
func newPageLexer(input string, inputPosition pos, stateStart stateFunc) *pageLexer {
|
||||
lexer := &pageLexer{
|
||||
name: name,
|
||||
input: input,
|
||||
pos: inputPosition,
|
||||
input: input,
|
||||
pos: inputPosition,
|
||||
stateStart: stateStart,
|
||||
lexerShortcodeState: lexerShortcodeState{
|
||||
currLeftDelimItem: tLeftDelimScNoMarkup,
|
||||
currRightDelimItem: tRightDelimScNoMarkup,
|
||||
|
@ -88,14 +90,13 @@ func newPageLexer(name, input string, inputPosition pos) *pageLexer {
|
|||
|
||||
// main loop
|
||||
func (l *pageLexer) run() *pageLexer {
|
||||
for l.state = lexTextOutsideShortcodes; l.state != nil; {
|
||||
for l.state = l.stateStart; l.state != nil; {
|
||||
l.state = l.state(l)
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
// state functions
|
||||
|
||||
// Shortcode syntax
|
||||
const (
|
||||
leftDelimScNoMarkup = "{{<"
|
||||
rightDelimScNoMarkup = ">}}"
|
||||
|
@ -105,6 +106,12 @@ const (
|
|||
rightComment = "*/"
|
||||
)
|
||||
|
||||
// Page syntax
|
||||
const (
|
||||
summaryDivider = "<!--more-->"
|
||||
summaryDividerOrg = "# more"
|
||||
)
|
||||
|
||||
func (l *pageLexer) next() rune {
|
||||
if int(l.pos) >= len(l.input) {
|
||||
l.width = 0
|
||||
|
@ -178,11 +185,21 @@ func (l *pageLexer) nextItem() Item {
|
|||
return item
|
||||
}
|
||||
|
||||
// scans until an opening shortcode opening bracket.
|
||||
// if no shortcodes, it will keep on scanning until EOF
|
||||
func lexTextOutsideShortcodes(l *pageLexer) stateFunc {
|
||||
func (l *pageLexer) consumeCRLF() bool {
|
||||
var consumed bool
|
||||
for _, r := range crLf {
|
||||
if l.next() != r {
|
||||
l.backup()
|
||||
} else {
|
||||
consumed = true
|
||||
}
|
||||
}
|
||||
return consumed
|
||||
}
|
||||
|
||||
func lexMainSection(l *pageLexer) stateFunc {
|
||||
for {
|
||||
if strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup) {
|
||||
if l.isShortCodeStart() {
|
||||
if l.pos > l.start {
|
||||
l.emit(tText)
|
||||
}
|
||||
|
@ -194,12 +211,79 @@ func lexTextOutsideShortcodes(l *pageLexer) stateFunc {
|
|||
l.currRightDelimItem = tRightDelimScNoMarkup
|
||||
}
|
||||
return lexShortcodeLeftDelim
|
||||
|
||||
}
|
||||
if l.next() == eof {
|
||||
|
||||
if l.contentSections <= 1 {
|
||||
if strings.HasPrefix(l.input[l.pos:], summaryDivider) {
|
||||
if l.pos > l.start {
|
||||
l.emit(tText)
|
||||
}
|
||||
l.contentSections++
|
||||
l.pos += pos(len(summaryDivider))
|
||||
l.emit(tSummaryDivider)
|
||||
} else if strings.HasPrefix(l.input[l.pos:], summaryDividerOrg) {
|
||||
if l.pos > l.start {
|
||||
l.emit(tText)
|
||||
}
|
||||
l.contentSections++
|
||||
l.pos += pos(len(summaryDividerOrg))
|
||||
l.emit(tSummaryDividerOrg)
|
||||
}
|
||||
}
|
||||
|
||||
r := l.next()
|
||||
if r == eof {
|
||||
break
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return lexDone
|
||||
|
||||
}
|
||||
|
||||
func (l *pageLexer) isShortCodeStart() bool {
|
||||
return strings.HasPrefix(l.input[l.pos:], leftDelimScWithMarkup) || strings.HasPrefix(l.input[l.pos:], leftDelimScNoMarkup)
|
||||
}
|
||||
|
||||
func lexIntroSection(l *pageLexer) stateFunc {
|
||||
LOOP:
|
||||
for {
|
||||
r := l.next()
|
||||
if r == eof {
|
||||
break
|
||||
}
|
||||
|
||||
switch {
|
||||
case r == '+':
|
||||
return l.lexFrontMatterSection(tFrontMatterTOML, r, "TOML", "+++")
|
||||
case r == '-':
|
||||
return l.lexFrontMatterSection(tFrontMatterYAML, r, "YAML", "---")
|
||||
case r == '{':
|
||||
return lexFrontMatterJSON
|
||||
case r == '#':
|
||||
return lexFrontMatterOrgMode
|
||||
case !isSpace(r) && !isEndOfLine(r):
|
||||
if r == '<' {
|
||||
l.emit(tHTMLLead)
|
||||
// Not need to look further. Hugo treats this as plain HTML,
|
||||
// no front matter, no shortcodes, no nothing.
|
||||
l.pos = pos(len(l.input))
|
||||
l.emit(tText)
|
||||
break LOOP
|
||||
}
|
||||
return l.errorf("failed to detect front matter type; got unknown identifier %q", r)
|
||||
}
|
||||
}
|
||||
|
||||
l.contentSections = 1
|
||||
|
||||
// Now move on to the shortcodes.
|
||||
return lexMainSection
|
||||
}
|
||||
|
||||
func lexDone(l *pageLexer) stateFunc {
|
||||
|
||||
// Done!
|
||||
if l.pos > l.start {
|
||||
l.emit(tText)
|
||||
|
@ -208,6 +292,122 @@ func lexTextOutsideShortcodes(l *pageLexer) stateFunc {
|
|||
return nil
|
||||
}
|
||||
|
||||
func lexFrontMatterJSON(l *pageLexer) stateFunc {
|
||||
// Include the left delimiter
|
||||
l.backup()
|
||||
|
||||
var (
|
||||
inQuote bool
|
||||
level int
|
||||
)
|
||||
|
||||
for {
|
||||
|
||||
r := l.next()
|
||||
|
||||
switch {
|
||||
case r == eof:
|
||||
return l.errorf("unexpected EOF parsing JSON front matter")
|
||||
case r == '{':
|
||||
if !inQuote {
|
||||
level++
|
||||
}
|
||||
case r == '}':
|
||||
if !inQuote {
|
||||
level--
|
||||
}
|
||||
case r == '"':
|
||||
inQuote = !inQuote
|
||||
case r == '\\':
|
||||
// This may be an escaped quote. Make sure it's not marked as a
|
||||
// real one.
|
||||
l.next()
|
||||
}
|
||||
|
||||
if level == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
l.consumeCRLF()
|
||||
l.emit(tFrontMatterJSON)
|
||||
|
||||
return lexMainSection
|
||||
}
|
||||
|
||||
func lexFrontMatterOrgMode(l *pageLexer) stateFunc {
|
||||
/*
|
||||
#+TITLE: Test File For chaseadamsio/goorgeous
|
||||
#+AUTHOR: Chase Adams
|
||||
#+DESCRIPTION: Just another golang parser for org content!
|
||||
*/
|
||||
|
||||
const prefix = "#+"
|
||||
|
||||
l.backup()
|
||||
|
||||
if !strings.HasPrefix(l.input[l.pos:], prefix) {
|
||||
// TODO(bep) consider error
|
||||
return lexMainSection
|
||||
}
|
||||
|
||||
// Read lines until we no longer see a #+ prefix
|
||||
LOOP:
|
||||
for {
|
||||
|
||||
r := l.next()
|
||||
|
||||
switch {
|
||||
case r == '\n':
|
||||
if !strings.HasPrefix(l.input[l.pos:], prefix) {
|
||||
break LOOP
|
||||
}
|
||||
case r == eof:
|
||||
break LOOP
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
l.emit(tFrontMatterORG)
|
||||
|
||||
return lexMainSection
|
||||
|
||||
}
|
||||
|
||||
// Handle YAML or TOML front matter.
|
||||
func (l *pageLexer) lexFrontMatterSection(tp itemType, delimr rune, name, delim string) stateFunc {
|
||||
for i := 0; i < 2; i++ {
|
||||
if r := l.next(); r != delimr {
|
||||
return l.errorf("invalid %s delimiter", name)
|
||||
}
|
||||
}
|
||||
|
||||
if !l.consumeCRLF() {
|
||||
return l.errorf("invalid %s delimiter", name)
|
||||
}
|
||||
|
||||
// We don't care about the delimiters.
|
||||
l.ignore()
|
||||
|
||||
for {
|
||||
r := l.next()
|
||||
if r == eof {
|
||||
return l.errorf("EOF looking for end %s front matter delimiter", name)
|
||||
}
|
||||
if isEndOfLine(r) {
|
||||
if strings.HasPrefix(l.input[l.pos:], delim) {
|
||||
l.emit(tp)
|
||||
l.pos += 3
|
||||
l.consumeCRLF()
|
||||
l.ignore()
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return lexMainSection
|
||||
}
|
||||
|
||||
func lexShortcodeLeftDelim(l *pageLexer) stateFunc {
|
||||
l.pos += pos(len(l.currentLeftShortcodeDelim()))
|
||||
if strings.HasPrefix(l.input[l.pos:], leftComment) {
|
||||
|
@ -234,14 +434,14 @@ func lexShortcodeComment(l *pageLexer) stateFunc {
|
|||
l.ignore()
|
||||
l.pos += pos(len(l.currentRightShortcodeDelim()))
|
||||
l.emit(tText)
|
||||
return lexTextOutsideShortcodes
|
||||
return lexMainSection
|
||||
}
|
||||
|
||||
func lexShortcodeRightDelim(l *pageLexer) stateFunc {
|
||||
l.closingState = 0
|
||||
l.pos += pos(len(l.currentRightShortcodeDelim()))
|
||||
l.emit(l.currentRightShortcodeDelimItem())
|
||||
return lexTextOutsideShortcodes
|
||||
return lexMainSection
|
||||
}
|
||||
|
||||
// either:
|
||||
|
@ -485,6 +685,8 @@ func isAlphaNumericOrHyphen(r rune) bool {
|
|||
return isAlphaNumeric(r) || r == '-'
|
||||
}
|
||||
|
||||
var crLf = []rune{'\r', '\n'}
|
||||
|
||||
func isEndOfLine(r rune) bool {
|
||||
return r == '\r' || r == '\n'
|
||||
}
|
||||
|
|
103
parser/pageparser/pageparser_intro_test.go
Normal file
103
parser/pageparser/pageparser_intro_test.go
Normal file
|
@ -0,0 +1,103 @@
|
|||
// Copyright 2018 The Hugo Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package pageparser
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type lexerTest struct {
|
||||
name string
|
||||
input string
|
||||
items []Item
|
||||
}
|
||||
|
||||
var (
|
||||
tstJSON = `{ "a": { "b": "\"Hugo\"}" } }`
|
||||
tstHTMLLead = Item{tHTMLLead, 0, " <"}
|
||||
tstFrontMatterTOML = Item{tFrontMatterTOML, 0, "foo = \"bar\"\n"}
|
||||
tstFrontMatterYAML = Item{tFrontMatterYAML, 0, "foo: \"bar\"\n"}
|
||||
tstFrontMatterYAMLCRLF = Item{tFrontMatterYAML, 0, "foo: \"bar\"\r\n"}
|
||||
tstFrontMatterJSON = Item{tFrontMatterJSON, 0, tstJSON + "\r\n"}
|
||||
tstSomeText = Item{tText, 0, "\nSome text.\n"}
|
||||
tstSummaryDivider = Item{tSummaryDivider, 0, "<!--more-->"}
|
||||
tstSummaryDividerOrg = Item{tSummaryDividerOrg, 0, "# more"}
|
||||
|
||||
tstORG = `
|
||||
#+TITLE: T1
|
||||
#+AUTHOR: A1
|
||||
#+DESCRIPTION: D1
|
||||
`
|
||||
tstFrontMatterORG = Item{tFrontMatterORG, 0, tstORG}
|
||||
)
|
||||
|
||||
var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
|
||||
|
||||
// TODO(bep) a way to toggle ORG mode vs the rest.
|
||||
var frontMatterTests = []lexerTest{
|
||||
{"empty", "", []Item{tstEOF}},
|
||||
{"HTML Document", ` <html> `, []Item{tstHTMLLead, Item{tText, 0, "html> "}, tstEOF}},
|
||||
{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},
|
||||
// Note that we keep all bytes as they are, but we need to handle CRLF
|
||||
{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}},
|
||||
{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}},
|
||||
{"JSON front matter", tstJSON + "\r\n\nSome text.\n", []Item{tstFrontMatterJSON, tstSomeText, tstEOF}},
|
||||
{"ORG front matter", tstORG + "\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstEOF}},
|
||||
{"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstSummaryDividerOrg, tstSomeText, tstEOF}},
|
||||
{"Summary divider", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstSummaryDivider, tstSomeText, tstEOF}},
|
||||
}
|
||||
|
||||
func TestFrontMatter(t *testing.T) {
|
||||
t.Parallel()
|
||||
for i, test := range frontMatterTests {
|
||||
items := collect(test.name, test.input, false, lexIntroSection)
|
||||
if !equal(items, test.items) {
|
||||
got := crLfReplacer.Replace(fmt.Sprint(items))
|
||||
expected := crLfReplacer.Replace(fmt.Sprint(test.items))
|
||||
t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, got, expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func collect(name, input string, skipFrontMatter bool, stateStart stateFunc) (items []Item) {
|
||||
l := newPageLexer(input, 0, stateStart)
|
||||
l.run()
|
||||
|
||||
for {
|
||||
item := l.nextItem()
|
||||
items = append(items, item)
|
||||
if item.typ == tEOF || item.typ == tError {
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// no positional checking, for now ...
|
||||
func equal(i1, i2 []Item) bool {
|
||||
if len(i1) != len(i2) {
|
||||
return false
|
||||
}
|
||||
for k := range i1 {
|
||||
if i1[k].typ != i2[k].typ {
|
||||
return false
|
||||
}
|
||||
if i1[k].Val != i2[k].Val {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
|
@ -13,15 +13,7 @@
|
|||
|
||||
package pageparser
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
type shortCodeLexerTest struct {
|
||||
name string
|
||||
input string
|
||||
items []Item
|
||||
}
|
||||
import "testing"
|
||||
|
||||
var (
|
||||
tstEOF = Item{tEOF, 0, ""}
|
||||
|
@ -39,7 +31,7 @@ var (
|
|||
tstVal = Item{tScParamVal, 0, "Hello World"}
|
||||
)
|
||||
|
||||
var shortCodeLexerTests = []shortCodeLexerTest{
|
||||
var shortCodeLexerTests = []lexerTest{
|
||||
{"empty", "", []Item{tstEOF}},
|
||||
{"spaces", " \t\n", []Item{{tText, 0, " \t\n"}, tstEOF}},
|
||||
{"text", `to be or not`, []Item{{tText, 0, "to be or not"}, tstEOF}},
|
||||
|
@ -159,7 +151,7 @@ var shortCodeLexerTests = []shortCodeLexerTest{
|
|||
func TestShortcodeLexer(t *testing.T) {
|
||||
t.Parallel()
|
||||
for i, test := range shortCodeLexerTests {
|
||||
items := collect(&test)
|
||||
items := collect(test.name, test.input, true, lexMainSection)
|
||||
if !equal(items, test.items) {
|
||||
t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items)
|
||||
}
|
||||
|
@ -170,38 +162,10 @@ func BenchmarkShortcodeLexer(b *testing.B) {
|
|||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, test := range shortCodeLexerTests {
|
||||
items := collect(&test)
|
||||
items := collect(test.name, test.input, true, lexMainSection)
|
||||
if !equal(items, test.items) {
|
||||
b.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func collect(t *shortCodeLexerTest) (items []Item) {
|
||||
l := newPageLexer(t.name, t.input, 0).run()
|
||||
for {
|
||||
item := l.nextItem()
|
||||
items = append(items, item)
|
||||
if item.typ == tEOF || item.typ == tError {
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// no positional checking, for now ...
|
||||
func equal(i1, i2 []Item) bool {
|
||||
if len(i1) != len(i2) {
|
||||
return false
|
||||
}
|
||||
for k := range i1 {
|
||||
if i1[k].typ != i2[k].typ {
|
||||
return false
|
||||
}
|
||||
if i1[k].Val != i2[k].Val {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
Loading…
Reference in a new issue