parser/pageparser: Don't store the byte slices

On its own this change doesn't do any magic, but this is part of a bigger picture about making Hugo leaner in the memory usage department.
2024-11-14 20:37:55 -05:00 · 2022-07-07 16:11:47 +02:00 · 2022-07-07 16:11:47 +02:00 · 223bf28004
commit 223bf28004
parent 72b0ccdb01
13 changed files with 385 additions and 198 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -50,6 +50,7 @@ jobs:
    - if: matrix.os == 'windows-latest'
      run: |
        choco install pandoc
+        choco install mingw --version 10.2.0 --allow-downgrade
    - run: pandoc -v
    - if: matrix.os == 'ubuntu-latest'
      name: Install dart-sass-embedded Linux
--- a/hugolib/page.go
+++ b/hugolib/page.go
@ -639,7 +639,7 @@ func (p *pageState) mapContentForResult(
 		if fe, ok := err.(herrors.FileError); ok {
 			return fe
 		}
-		return p.parseError(err, iter.Input(), i.Pos)
+		return p.parseError(err, result.Input(), i.Pos())
 	}

 	// the parser is guaranteed to return items in proper order or fail, so …
@ -656,14 +656,14 @@ Loop:
 		case it.Type == pageparser.TypeIgnore:
 		case it.IsFrontMatter():
 			f := pageparser.FormatFromFrontMatterType(it.Type)
-			m, err := metadecoders.Default.UnmarshalToMap(it.Val, f)
+			m, err := metadecoders.Default.UnmarshalToMap(it.Val(result.Input()), f)
 			if err != nil {
 				if fe, ok := err.(herrors.FileError); ok {
 					pos := fe.Position()
 					// Apply the error to the content file.
 					pos.Filename = p.File().Filename()
 					// Offset the starting position of front matter.
-					offset := iter.LineNumber() - 1
+					offset := iter.LineNumber(result.Input()) - 1
 					if f == metadecoders.YAML {
 						offset -= 1
 					}
@ -687,7 +687,7 @@ Loop:

 			next := iter.Peek()
 			if !next.IsDone() {
-				p.source.posMainContent = next.Pos
+				p.source.posMainContent = next.Pos()
 			}

 			if !p.s.shouldBuild(p) {
@ -699,10 +699,10 @@ Loop:
 			posBody := -1
 			f := func(item pageparser.Item) bool {
 				if posBody == -1 && !item.IsDone() {
-					posBody = item.Pos
+					posBody = item.Pos()
 				}

-				if item.IsNonWhitespace() {
+				if item.IsNonWhitespace(result.Input()) {
 					p.truncated = true

 					// Done
@ -712,7 +712,7 @@ Loop:
 			}
 			iter.PeekWalk(f)

-			p.source.posSummaryEnd = it.Pos
+			p.source.posSummaryEnd = it.Pos()
 			p.source.posBodyStart = posBody
 			p.source.hasSummaryDivider = true

@ -727,13 +727,13 @@ Loop:
 			// let extractShortcode handle left delim (will do so recursively)
 			iter.Backup()

-			currShortcode, err := s.extractShortcode(ordinal, 0, iter)
+			currShortcode, err := s.extractShortcode(ordinal, 0, result.Input(), iter)
 			if err != nil {
 				return fail(err, it)
 			}

-			currShortcode.pos = it.Pos
-			currShortcode.length = iter.Current().Pos - it.Pos
+			currShortcode.pos = it.Pos()
+			currShortcode.length = iter.Current().Pos() - it.Pos()
 			if currShortcode.placeholder == "" {
 				currShortcode.placeholder = createShortcodePlaceholder("s", currShortcode.ordinal)
 			}
@ -754,7 +754,7 @@ Loop:
 			rn.AddShortcode(currShortcode)

 		case it.Type == pageparser.TypeEmoji:
-			if emoji := helpers.Emoji(it.ValStr()); emoji != nil {
+			if emoji := helpers.Emoji(it.ValStr(result.Input())); emoji != nil {
 				rn.AddReplacement(emoji, it)
 			} else {
 				rn.AddBytes(it)
@ -762,7 +762,7 @@ Loop:
 		case it.IsEOF():
 			break Loop
 		case it.IsError():
-			err := fail(errors.New(it.ValStr()), it)
+			err := fail(errors.New(it.ValStr(result.Input())), it)
 			currShortcode.err = err
 			return err

--- a/hugolib/page__content.go
+++ b/hugolib/page__content.go
@ -45,7 +45,7 @@ func (p pageContent) contentToRender(parsed pageparser.Result, pm *pageContentMa
 	for _, it := range pm.items {
 		switch v := it.(type) {
 		case pageparser.Item:
-			c = append(c, source[v.Pos:v.Pos+len(v.Val)]...)
+			c = append(c, source[v.Pos():v.Pos()+len(v.Val(source))]...)
 		case pageContentReplacement:
 			c = append(c, v.val...)
 		case *shortcode:
--- a/hugolib/shortcode.go
+++ b/hugolib/shortcode.go
@ -509,7 +509,7 @@ func (s *shortcodeHandler) parseError(err error, input []byte, pos int) error {
 // pageTokens state:
 // - before: positioned just before the shortcode start
 // - after: shortcode(s) consumed (plural when they are nested)
-func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.Iterator) (*shortcode, error) {
+func (s *shortcodeHandler) extractShortcode(ordinal, level int, source []byte, pt *pageparser.Iterator) (*shortcode, error) {
 	if s == nil {
 		panic("handler nil")
 	}
@ -520,7 +520,7 @@ func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.I
 		pt.Backup()
 		item := pt.Next()
 		if item.IsIndentation() {
-			sc.indentation = string(item.Val)
+			sc.indentation = item.ValStr(source)
 		}
 	}

@ -530,7 +530,7 @@ func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.I
 	const errorPrefix = "failed to extract shortcode"

 	fail := func(err error, i pageparser.Item) error {
-		return s.parseError(fmt.Errorf("%s: %w", errorPrefix, err), pt.Input(), i.Pos)
+		return s.parseError(fmt.Errorf("%s: %w", errorPrefix, err), source, i.Pos())
 	}

 Loop:
@ -550,7 +550,7 @@ Loop:
 			if cnt > 0 {
 				// nested shortcode; append it to inner content
 				pt.Backup()
-				nested, err := s.extractShortcode(nestedOrdinal, nextLevel, pt)
+				nested, err := s.extractShortcode(nestedOrdinal, nextLevel, source, pt)
 				nestedOrdinal++
 				if nested != nil && nested.name != "" {
 					s.addName(nested.name)
@ -589,7 +589,7 @@ Loop:
 						// return that error, more specific
 						continue
 					}
-					return sc, fail(fmt.Errorf("shortcode %q has no .Inner, yet a closing tag was provided", next.Val), next)
+					return sc, fail(fmt.Errorf("shortcode %q has no .Inner, yet a closing tag was provided", next.ValStr(source)), next)
 				}
 			}
 			if next.IsRightShortcodeDelim() {
@ -602,11 +602,11 @@ Loop:

 			return sc, nil
 		case currItem.IsText():
-			sc.inner = append(sc.inner, currItem.ValStr())
+			sc.inner = append(sc.inner, currItem.ValStr(source))
 		case currItem.Type == pageparser.TypeEmoji:
 			// TODO(bep) avoid the duplication of these "text cases", to prevent
 			// more of #6504 in the future.
-			val := currItem.ValStr()
+			val := currItem.ValStr(source)
 			if emoji := helpers.Emoji(val); emoji != nil {
 				sc.inner = append(sc.inner, string(emoji))
 			} else {
@ -614,7 +614,7 @@ Loop:
 			}
 		case currItem.IsShortcodeName():

-			sc.name = currItem.ValStr()
+			sc.name = currItem.ValStr(source)

 			// Used to check if the template expects inner content.
 			templs := s.s.Tmpl().LookupVariants(sc.name)
@ -625,7 +625,7 @@ Loop:
 			sc.info = templs[0].(tpl.Info)
 			sc.templs = templs
 		case currItem.IsInlineShortcodeName():
-			sc.name = currItem.ValStr()
+			sc.name = currItem.ValStr(source)
 			sc.isInline = true
 		case currItem.IsShortcodeParam():
 			if !pt.IsValueNext() {
@ -634,11 +634,11 @@ Loop:
 				// named params
 				if sc.params == nil {
 					params := make(map[string]any)
-					params[currItem.ValStr()] = pt.Next().ValTyped()
+					params[currItem.ValStr(source)] = pt.Next().ValTyped(source)
 					sc.params = params
 				} else {
 					if params, ok := sc.params.(map[string]any); ok {
-						params[currItem.ValStr()] = pt.Next().ValTyped()
+						params[currItem.ValStr(source)] = pt.Next().ValTyped(source)
 					} else {
 						return sc, errShortCodeIllegalState
 					}
@ -647,11 +647,11 @@ Loop:
 				// positional params
 				if sc.params == nil {
 					var params []any
-					params = append(params, currItem.ValTyped())
+					params = append(params, currItem.ValTyped(source))
 					sc.params = params
 				} else {
 					if params, ok := sc.params.([]any); ok {
-						params = append(params, currItem.ValTyped())
+						params = append(params, currItem.ValTyped(source))
 						sc.params = params
 					} else {
 						return sc, errShortCodeIllegalState
--- a/hugolib/shortcode_test.go
+++ b/hugolib/shortcode_test.go
@ -112,7 +112,7 @@ title: "Shortcodes Galore!"
 			handler := newShortcodeHandler(nil, s)
 			iter := p.Iterator()

-			short, err := handler.extractShortcode(0, 0, iter)
+			short, err := handler.extractShortcode(0, 0, p.Input(), iter)

 			test.check(c, short, err)
 		})
@ -763,7 +763,7 @@ title: "Hugo Rocks!"
 	)
 }

-func TestShortcodeTypedParams(t *testing.T) {
+func TestShortcodeParams(t *testing.T) {
 	t.Parallel()
 	c := qt.New(t)

@ -778,6 +778,7 @@ title: "Hugo Rocks!"
 types positional: {{< hello true false 33 3.14 >}}
 types named: {{< hello b1=true b2=false i1=33 f1=3.14 >}}
 types string: {{< hello "true" trues "33" "3.14" >}}
+escaped quoute: {{< hello "hello \"world\"." >}}


 `).WithTemplatesAdded(
@ -796,6 +797,7 @@ Get: {{ printf "%v (%T)" $b1 $b1 | safeHTML }}
 		"types positional: - 0: true (bool) - 1: false (bool) - 2: 33 (int) - 3: 3.14 (float64)",
 		"types named: - b1: true (bool) - b2: false (bool) - f1: 3.14 (float64) - i1: 33 (int) Get: true (bool) ",
 		"types string: - 0: true (string) - 1: trues (string) - 2: 33 (string) - 3: 3.14 (string) ",
+		"hello &#34;world&#34;. (string)",
 	)
 }

--- a/parser/pageparser/item.go
+++ b/parser/pageparser/item.go
@ -22,21 +22,59 @@ import (
 	"github.com/yuin/goldmark/util"
 )

+type lowHigh struct {
+	Low  int
+	High int
+}
+
 type Item struct {
-	Type     ItemType
-	Pos      int
-	Val      []byte
+	Type ItemType
+	Err  error
+
+	// The common case is a single segment.
+	low  int
+	high int
+
+	// This is the uncommon case.
+	segments []lowHigh
+
+	// Used for validation.
+	firstByte byte
+
 	isString bool
 }

 type Items []Item

-func (i Item) ValStr() string {
-	return string(i.Val)
+func (i Item) Pos() int {
+	if len(i.segments) > 0 {
+		return i.segments[0].Low
+	}
+	return i.low
 }

-func (i Item) ValTyped() any {
-	str := i.ValStr()
+func (i Item) Val(source []byte) []byte {
+	if len(i.segments) == 0 {
+		return source[i.low:i.high]
+	}
+
+	if len(i.segments) == 1 {
+		return source[i.segments[0].Low:i.segments[0].High]
+	}
+
+	var b bytes.Buffer
+	for _, s := range i.segments {
+		b.Write(source[s.Low:s.High])
+	}
+	return b.Bytes()
+}
+
+func (i Item) ValStr(source []byte) string {
+	return string(i.Val(source))
+}
+
+func (i Item) ValTyped(source []byte) any {
+	str := i.ValStr(source)
 	if i.isString {
 		// A quoted value that is a string even if it looks like a number etc.
 		return str
@ -73,8 +111,8 @@ func (i Item) IsIndentation() bool {
 	return i.Type == tIndentation
 }

-func (i Item) IsNonWhitespace() bool {
-	return len(bytes.TrimSpace(i.Val)) > 0
+func (i Item) IsNonWhitespace(source []byte) bool {
+	return len(bytes.TrimSpace(i.Val(source))) > 0
 }

 func (i Item) IsShortcodeName() bool {
@ -125,20 +163,21 @@ func (i Item) IsError() bool {
 	return i.Type == tError
 }

-func (i Item) String() string {
+func (i Item) ToString(source []byte) string {
+	val := i.Val(source)
 	switch {
 	case i.Type == tEOF:
 		return "EOF"
 	case i.Type == tError:
-		return string(i.Val)
+		return string(val)
 	case i.Type == tIndentation:
-		return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(i.Val))
+		return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(val))
 	case i.Type > tKeywordMarker:
-		return fmt.Sprintf("<%s>", i.Val)
-	case len(i.Val) > 50:
-		return fmt.Sprintf("%v:%.20q...", i.Type, i.Val)
+		return fmt.Sprintf("<%s>", val)
+	case len(val) > 50:
+		return fmt.Sprintf("%v:%.20q...", i.Type, val)
 	}
-	return fmt.Sprintf("%v:[%s]", i.Type, i.Val)
+	return fmt.Sprintf("%v:[%s]", i.Type, val)
 }

 type ItemType int
--- a/parser/pageparser/item_test.go
+++ b/parser/pageparser/item_test.go
@ -22,13 +22,22 @@ import (
 func TestItemValTyped(t *testing.T) {
 	c := qt.New(t)

-	c.Assert(Item{Val: []byte("3.14")}.ValTyped(), qt.Equals, float64(3.14))
-	c.Assert(Item{Val: []byte(".14")}.ValTyped(), qt.Equals, float64(.14))
-	c.Assert(Item{Val: []byte("314")}.ValTyped(), qt.Equals, 314)
-	c.Assert(Item{Val: []byte("314x")}.ValTyped(), qt.Equals, "314x")
-	c.Assert(Item{Val: []byte("314 ")}.ValTyped(), qt.Equals, "314 ")
-	c.Assert(Item{Val: []byte("314"), isString: true}.ValTyped(), qt.Equals, "314")
-	c.Assert(Item{Val: []byte("true")}.ValTyped(), qt.Equals, true)
-	c.Assert(Item{Val: []byte("false")}.ValTyped(), qt.Equals, false)
-	c.Assert(Item{Val: []byte("trues")}.ValTyped(), qt.Equals, "trues")
+	source := []byte("3.14")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, float64(3.14))
+	source = []byte(".14")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, float64(0.14))
+	source = []byte("314")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, 314)
+	source = []byte("314")
+	c.Assert(Item{low: 0, high: len(source), isString: true}.ValTyped(source), qt.Equals, "314")
+	source = []byte("314x")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, "314x")
+	source = []byte("314 ")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, "314 ")
+	source = []byte("true")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, true)
+	source = []byte("false")
+	c.Assert(Item{low: 0, high: len(source)}.ValTyped(source), qt.Equals, false)
+	source = []byte("trued")
+
 }
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@ -54,7 +54,7 @@ type pageLexer struct {

 // Implement the Result interface
 func (l *pageLexer) Iterator() *Iterator {
-	return l.newIterator()
+	return NewIterator(l.items)
 }

 func (l *pageLexer) Input() []byte {
@ -85,10 +85,6 @@ func newPageLexer(input []byte, stateStart stateFunc, cfg Config) *pageLexer {
 	return lexer
 }

-func (l *pageLexer) newIterator() *Iterator {
-	return &Iterator{l: l, lastPos: -1}
-}
-
 // main loop
 func (l *pageLexer) run() *pageLexer {
 	for l.state = l.stateStart; l.state != nil; {
@ -136,6 +132,13 @@ func (l *pageLexer) backup() {
 	l.pos -= l.width
 }

+func (l *pageLexer) append(item Item) {
+	if item.Pos() < len(l.input) {
+		item.firstByte = l.input[item.Pos()]
+	}
+	l.items = append(l.items, item)
+}
+
 // sends an item back to the client.
 func (l *pageLexer) emit(t ItemType) {
 	defer func() {
@ -151,11 +154,11 @@ func (l *pageLexer) emit(t ItemType) {
 				break
 			}
 			if i == l.start && b != '\n' {
-				l.items = append(l.items, Item{tIndentation, l.start, l.input[l.start:l.pos], false})
+				l.append(Item{Type: tIndentation, low: l.start, high: l.pos})
 				return
 			} else if b == '\n' && i < l.pos-1 {
-				l.items = append(l.items, Item{t, l.start, l.input[l.start : i+1], false})
-				l.items = append(l.items, Item{tIndentation, i + 1, l.input[i+1 : l.pos], false})
+				l.append(Item{Type: t, low: l.start, high: i + 1})
+				l.append(Item{Type: tIndentation, low: i + 1, high: l.pos})
 				return
 			} else if b == '\n' && i == l.pos-1 {
 				break
@ -164,13 +167,13 @@ func (l *pageLexer) emit(t ItemType) {
 		}
 	}

-	l.items = append(l.items, Item{t, l.start, l.input[l.start:l.pos], false})
+	l.append(Item{Type: t, low: l.start, high: l.pos})

 }

 // sends a string item back to the client.
 func (l *pageLexer) emitString(t ItemType) {
-	l.items = append(l.items, Item{t, l.start, l.input[l.start:l.pos], true})
+	l.append(Item{Type: t, low: l.start, high: l.pos, isString: true})
 	l.start = l.pos
 }

@ -180,14 +183,33 @@ func (l *pageLexer) isEOF() bool {

 // special case, do not send '\\' back to client
 func (l *pageLexer) ignoreEscapesAndEmit(t ItemType, isString bool) {
-	val := bytes.Map(func(r rune) rune {
+	i := l.start
+	k := i
+
+	var segments []lowHigh
+
+	for i < l.pos {
+		r, w := utf8.DecodeRune(l.input[i:l.pos])
 		if r == '\\' {
-			return -1
+			if i > k {
+				segments = append(segments, lowHigh{k, i})
+			}
+			l.append(Item{Type: TypeIgnore, low: i, high: i + w})
+			k = i + w
 		}
-		return r
-	}, l.input[l.start:l.pos])
-	l.items = append(l.items, Item{t, l.start, val, isString})
+		i += w
+	}
+
+	if k < l.pos {
+		segments = append(segments, lowHigh{k, l.pos})
+	}
+
+	if len(segments) > 0 {
+		l.append(Item{Type: t, segments: segments})
+	}
+
 	l.start = l.pos
+
 }

 // gets the current value (for debugging and error handling)
@ -204,7 +226,7 @@ var lf = []byte("\n")

 // nil terminates the parser
 func (l *pageLexer) errorf(format string, args ...any) stateFunc {
-	l.items = append(l.items, Item{tError, l.start, []byte(fmt.Sprintf(format, args...)), true})
+	l.append(Item{Type: tError, Err: fmt.Errorf(format, args...)})
 	return nil
 }

--- a/parser/pageparser/pageparser.go
+++ b/parser/pageparser/pageparser.go
@ -15,6 +15,7 @@ package pageparser

 import (
 	"bytes"
+	"errors"
 	"fmt"
 	"io"
 	"io/ioutil"
@ -33,9 +34,6 @@ type Result interface {
 var _ Result = (*pageLexer)(nil)

 // Parse parses the page in the given reader according to the given Config.
-// TODO(bep) now that we have improved the "lazy order" init, it *may* be
-// some potential saving in doing a buffered approach where the first pass does
-// the frontmatter only.
 func Parse(r io.Reader, cfg Config) (Result, error) {
 	return parseSection(r, cfg, lexIntroSection)
 }
@ -63,12 +61,12 @@ func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) {
 	walkFn := func(item Item) bool {
 		if frontMatterSource != nil {
 			// The rest is content.
-			cf.Content = psr.Input()[item.Pos:]
+			cf.Content = psr.Input()[item.low:]
 			// Done
 			return false
 		} else if item.IsFrontMatter() {
 			cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type)
-			frontMatterSource = item.Val
+			frontMatterSource = item.Val(psr.Input())
 		}
 		return true
 	}
@ -113,10 +111,15 @@ func parseBytes(b []byte, cfg Config, start stateFunc) (Result, error) {
 	return lexer, nil
 }

+// NewIterator creates a new Iterator.
+func NewIterator(items Items) *Iterator {
+	return &Iterator{items: items, lastPos: -1}
+}
+
 // An Iterator has methods to iterate a parsed page with support going back
 // if needed.
 type Iterator struct {
-	l       *pageLexer
+	items   Items
 	lastPos int // position of the last item returned by nextItem
 }

@ -126,19 +129,14 @@ func (t *Iterator) Next() Item {
 	return t.Current()
 }

-// Input returns the input source.
-func (t *Iterator) Input() []byte {
-	return t.l.Input()
-}
-
-var errIndexOutOfBounds = Item{tError, 0, []byte("no more tokens"), true}
+var errIndexOutOfBounds = Item{Type: tError, Err: errors.New("no more tokens")}

 // Current will repeatably return the current item.
 func (t *Iterator) Current() Item {
-	if t.lastPos >= len(t.l.items) {
+	if t.lastPos >= len(t.items) {
 		return errIndexOutOfBounds
 	}
-	return t.l.items[t.lastPos]
+	return t.items[t.lastPos]
 }

 // backs up one token.
@ -163,14 +161,14 @@ func (t *Iterator) IsValueNext() bool {
 // look at, but do not consume, the next item
 // repeated, sequential calls will return the same item
 func (t *Iterator) Peek() Item {
-	return t.l.items[t.lastPos+1]
+	return t.items[t.lastPos+1]
 }

 // PeekWalk will feed the next items in the iterator to walkFn
 // until it returns false.
 func (t *Iterator) PeekWalk(walkFn func(item Item) bool) {
-	for i := t.lastPos + 1; i < len(t.l.items); i++ {
-		item := t.l.items[i]
+	for i := t.lastPos + 1; i < len(t.items); i++ {
+		item := t.items[i]
 		if !walkFn(item) {
 			break
 		}
@ -190,6 +188,49 @@ func (t *Iterator) Consume(cnt int) {
 }

 // LineNumber returns the current line number. Used for logging.
-func (t *Iterator) LineNumber() int {
-	return bytes.Count(t.l.input[:t.Current().Pos], lf) + 1
+func (t *Iterator) LineNumber(source []byte) int {
+	return bytes.Count(source[:t.Current().low], lf) + 1
+}
+
+// IsProbablySourceOfItems returns true if the given source looks like original
+// source of the items.
+// There may be some false positives, but that is highly unlikely and good enough
+// for the planned purpose.
+// It will also return false if the last item is not EOF (error situations) and
+// true if both source and items are empty.
+func IsProbablySourceOfItems(source []byte, items Items) bool {
+	if len(source) == 0 && len(items) == 0 {
+		return false
+	}
+	if len(items) == 0 {
+		return false
+	}
+
+	last := items[len(items)-1]
+	if last.Type != tEOF {
+		return false
+	}
+
+	if last.Pos() != len(source) {
+		return false
+	}
+
+	for _, item := range items {
+		if item.Type == tError {
+			return false
+		}
+		if item.Type == tEOF {
+			return true
+		}
+
+		if item.Pos() >= len(source) {
+			return false
+		}
+
+		if item.firstByte != source[item.Pos()] {
+			return false
+		}
+	}
+
+	return true
 }
--- a/parser/pageparser/pageparser_intro_test.go
+++ b/parser/pageparser/pageparser_intro_test.go
@ -15,19 +15,25 @@ package pageparser

 import (
 	"fmt"
-	"reflect"
 	"strings"
 	"testing"
+
+	qt "github.com/frankban/quicktest"
 )

 type lexerTest struct {
 	name  string
 	input string
-	items []Item
+	items []typeText
 }

-func nti(tp ItemType, val string) Item {
-	return Item{tp, 0, []byte(val), false}
+type typeText struct {
+	typ  ItemType
+	text string
+}
+
+func nti(tp ItemType, val string) typeText {
+	return typeText{typ: tp, text: val}
 }

 var (
@ -52,48 +58,79 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")

 // TODO(bep) a way to toggle ORG mode vs the rest.
 var frontMatterTests = []lexerTest{
-	{"empty", "", []Item{tstEOF}},
-	{"Byte order mark", "\ufeff\nSome text.\n", []Item{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}},
-	{"HTML Document", `  <html>  `, []Item{nti(tError, "plain HTML documents not supported")}},
-	{"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, []Item{nti(tError, "plain HTML documents not supported")}},
-	{"No front matter", "\nSome text.\n", []Item{tstSomeText, tstEOF}},
-	{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []Item{tstFrontMatterYAML, tstSomeText, tstEOF}},
-	{"YAML empty front matter", "---\n---\n\nSome text.\n", []Item{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}},
-	{"YAML commented out front matter", "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n", []Item{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(TypeIgnore, "-->"), tstSomeText, tstEOF}},
-	{"YAML commented out front matter, no end", "<!--\n---\nfoo: \"bar\"\n---\nSome text.\n", []Item{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(tError, "starting HTML comment with no end")}},
+	{"empty", "", []typeText{tstEOF}},
+	{"Byte order mark", "\ufeff\nSome text.\n", []typeText{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}},
+	{"HTML Document", `  <html>  `, []typeText{nti(tError, "plain HTML documents not supported")}},
+	{"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, []typeText{nti(tError, "plain HTML documents not supported")}},
+	{"No front matter", "\nSome text.\n", []typeText{tstSomeText, tstEOF}},
+	{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []typeText{tstFrontMatterYAML, tstSomeText, tstEOF}},
+	{"YAML empty front matter", "---\n---\n\nSome text.\n", []typeText{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}},
+	{"YAML commented out front matter", "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(TypeIgnore, "-->"), tstSomeText, tstEOF}},
+	{"YAML commented out front matter, no end", "<!--\n---\nfoo: \"bar\"\n---\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(tError, "starting HTML comment with no end")}},
 	// Note that we keep all bytes as they are, but we need to handle CRLF
-	{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []Item{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}},
-	{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstEOF}},
-	{"JSON front matter", tstJSON + "\r\n\nSome text.\n", []Item{tstFrontMatterJSON, tstSomeText, tstEOF}},
-	{"ORG front matter", tstORG + "\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, tstEOF}},
-	{"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []Item{tstFrontMatterORG, tstSomeText, nti(TypeLeadSummaryDivider, "# more\n"), nti(tText, "Some text.\n"), tstEOF}},
-	{"Summary divider", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, tstSummaryDivider, nti(tText, "Some text.\n"), tstEOF}},
-	{"Summary divider same line", "+++\nfoo = \"bar\"\n+++\n\nSome text.<!--more-->Some text.\n", []Item{tstFrontMatterTOML, nti(tText, "\nSome text."), nti(TypeLeadSummaryDivider, "<!--more-->"), nti(tText, "Some text.\n"), tstEOF}},
+	{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []typeText{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}},
+	{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []typeText{tstFrontMatterTOML, tstSomeText, tstEOF}},
+	{"JSON front matter", tstJSON + "\r\n\nSome text.\n", []typeText{tstFrontMatterJSON, tstSomeText, tstEOF}},
+	{"ORG front matter", tstORG + "\nSome text.\n", []typeText{tstFrontMatterORG, tstSomeText, tstEOF}},
+	{"Summary divider ORG", tstORG + "\nSome text.\n# more\nSome text.\n", []typeText{tstFrontMatterORG, tstSomeText, nti(TypeLeadSummaryDivider, "# more\n"), nti(tText, "Some text.\n"), tstEOF}},
+	{"Summary divider", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->\nSome text.\n", []typeText{tstFrontMatterTOML, tstSomeText, tstSummaryDivider, nti(tText, "Some text.\n"), tstEOF}},
+	{"Summary divider same line", "+++\nfoo = \"bar\"\n+++\n\nSome text.<!--more-->Some text.\n", []typeText{tstFrontMatterTOML, nti(tText, "\nSome text."), nti(TypeLeadSummaryDivider, "<!--more-->"), nti(tText, "Some text.\n"), tstEOF}},
 	// https://github.com/gohugoio/hugo/issues/5402
-	{"Summary and shortcode, no space", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->{{< sc1 >}}\nSome text.\n", []Item{tstFrontMatterTOML, tstSomeText, nti(TypeLeadSummaryDivider, "<!--more-->"), tstLeftNoMD, tstSC1, tstRightNoMD, tstSomeText, tstEOF}},
+	{"Summary and shortcode, no space", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n<!--more-->{{< sc1 >}}\nSome text.\n", []typeText{tstFrontMatterTOML, tstSomeText, nti(TypeLeadSummaryDivider, "<!--more-->"), tstLeftNoMD, tstSC1, tstRightNoMD, tstSomeText, tstEOF}},
 	// https://github.com/gohugoio/hugo/issues/5464
-	{"Summary and shortcode only", "+++\nfoo = \"bar\"\n+++\n{{< sc1 >}}\n<!--more-->\n{{< sc2 >}}", []Item{tstFrontMatterTOML, tstLeftNoMD, tstSC1, tstRightNoMD, tstNewline, tstSummaryDivider, tstLeftNoMD, tstSC2, tstRightNoMD, tstEOF}},
+	{"Summary and shortcode only", "+++\nfoo = \"bar\"\n+++\n{{< sc1 >}}\n<!--more-->\n{{< sc2 >}}", []typeText{tstFrontMatterTOML, tstLeftNoMD, tstSC1, tstRightNoMD, tstNewline, tstSummaryDivider, tstLeftNoMD, tstSC2, tstRightNoMD, tstEOF}},
 }

 func TestFrontMatter(t *testing.T) {
 	t.Parallel()
+	c := qt.New(t)
 	for i, test := range frontMatterTests {
 		items := collect([]byte(test.input), false, lexIntroSection)
-		if !equal(items, test.items) {
-			got := crLfReplacer.Replace(fmt.Sprint(items))
-			expected := crLfReplacer.Replace(fmt.Sprint(test.items))
-			t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, got, expected)
+		if !equal(test.input, items, test.items) {
+			got := itemsToString(items, []byte(test.input))
+			expected := testItemsToString(test.items)
+			c.Assert(got, qt.Equals, expected, qt.Commentf("Test %d: %s", i, test.name))
 		}
 	}
 }

+func itemsToString(items []Item, source []byte) string {
+	var sb strings.Builder
+	for i, item := range items {
+		var s string
+		if item.Err != nil {
+			s = item.Err.Error()
+		} else {
+			s = string(item.Val(source))
+		}
+		sb.WriteString(fmt.Sprintf("%s: %s\n", item.Type, s))
+
+		if i < len(items)-1 {
+			sb.WriteString("\n")
+		}
+	}
+	return crLfReplacer.Replace(sb.String())
+}
+
+func testItemsToString(items []typeText) string {
+	var sb strings.Builder
+	for i, item := range items {
+		sb.WriteString(fmt.Sprintf("%s: %s\n", item.typ, item.text))
+
+		if i < len(items)-1 {
+			sb.WriteString("\n")
+		}
+	}
+	return crLfReplacer.Replace(sb.String())
+}
+
 func collectWithConfig(input []byte, skipFrontMatter bool, stateStart stateFunc, cfg Config) (items []Item) {
 	l := newPageLexer(input, stateStart, cfg)
 	l.run()
-	t := l.newIterator()
+	iter := NewIterator(l.items)

 	for {
-		item := t.Next()
+		item := iter.Next()
 		items = append(items, item)
 		if item.Type == tEOF || item.Type == tError {
 			break
@ -108,19 +145,34 @@ func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []
 	return collectWithConfig(input, skipFrontMatter, stateStart, cfg)
 }

+func collectStringMain(input string) []Item {
+	return collect([]byte(input), true, lexMainSection)
+}
+
 // no positional checking, for now ...
-func equal(i1, i2 []Item) bool {
-	if len(i1) != len(i2) {
+func equal(source string, got []Item, expect []typeText) bool {
+	if len(got) != len(expect) {
 		return false
 	}
-	for k := range i1 {
-		if i1[k].Type != i2[k].Type {
+	sourceb := []byte(source)
+	for k := range got {
+		g := got[k]
+		e := expect[k]
+		if g.Type != e.typ {
 			return false
 		}

-		if !reflect.DeepEqual(i1[k].Val, i2[k].Val) {
+		var s string
+		if g.Err != nil {
+			s = g.Err.Error()
+		} else {
+			s = string(g.Val(sourceb))
+		}
+
+		if s != e.text {
 			return false
 		}
+
 	}
 	return true
 }
--- a/parser/pageparser/pageparser_main_test.go
+++ b/parser/pageparser/pageparser_main_test.go
@ -14,27 +14,29 @@
 package pageparser

 import (
-	"fmt"
 	"testing"
+
+	qt "github.com/frankban/quicktest"
 )

 func TestMain(t *testing.T) {
 	t.Parallel()
+	c := qt.New(t)

 	mainTests := []lexerTest{
-		{"emoji #1", "Some text with :emoji:", []Item{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), tstEOF}},
-		{"emoji #2", "Some text with :emoji: and some text.", []Item{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), nti(tText, " and some text."), tstEOF}},
-		{"looks like an emoji #1", "Some text and then :emoji", []Item{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, "emoji"), tstEOF}},
-		{"looks like an emoji #2", "Some text and then ::", []Item{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, ":"), tstEOF}},
-		{"looks like an emoji #3", ":Some :text", []Item{nti(tText, ":"), nti(tText, "Some "), nti(tText, ":"), nti(tText, "text"), tstEOF}},
+		{"emoji #1", "Some text with :emoji:", []typeText{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), tstEOF}},
+		{"emoji #2", "Some text with :emoji: and some text.", []typeText{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), nti(tText, " and some text."), tstEOF}},
+		{"looks like an emoji #1", "Some text and then :emoji", []typeText{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, "emoji"), tstEOF}},
+		{"looks like an emoji #2", "Some text and then ::", []typeText{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, ":"), tstEOF}},
+		{"looks like an emoji #3", ":Some :text", []typeText{nti(tText, ":"), nti(tText, "Some "), nti(tText, ":"), nti(tText, "text"), tstEOF}},
 	}

 	for i, test := range mainTests {
 		items := collectWithConfig([]byte(test.input), false, lexMainSection, Config{EnableEmoji: true})
-		if !equal(items, test.items) {
-			got := crLfReplacer.Replace(fmt.Sprint(items))
-			expected := crLfReplacer.Replace(fmt.Sprint(test.items))
-			t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, got, expected)
+		if !equal(test.input, items, test.items) {
+			got := itemsToString(items, []byte(test.input))
+			expected := testItemsToString(test.items)
+			c.Assert(got, qt.Equals, expected, qt.Commentf("Test %d: %s", i, test.name))
 		}
 	}
 }
--- a/parser/pageparser/pageparser_shortcode_test.go
+++ b/parser/pageparser/pageparser_shortcode_test.go
@ -15,6 +15,8 @@ package pageparser

 import (
 	"testing"
+
+	qt "github.com/frankban/quicktest"
 )

 var (
@ -38,27 +40,28 @@ var (
 	tstParamFloat     = nti(tScParam, "3.14")
 	tstVal            = nti(tScParamVal, "Hello World")
 	tstText           = nti(tText, "Hello World")
+	tstIgnoreEscape   = nti(TypeIgnore, "\\")
 )

 var shortCodeLexerTests = []lexerTest{
-	{"empty", "", []Item{tstEOF}},
-	{"spaces", " \t\n", []Item{nti(tText, " \t\n"), tstEOF}},
-	{"text", `to be or not`, []Item{nti(tText, "to be or not"), tstEOF}},
-	{"no markup", `{{< sc1 >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
-	{"with EOL", "{{< sc1 \n >}}", []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
+	{"empty", "", []typeText{tstEOF}},
+	{"spaces", " \t\n", []typeText{nti(tText, " \t\n"), tstEOF}},
+	{"text", `to be or not`, []typeText{nti(tText, "to be or not"), tstEOF}},
+	{"no markup", `{{< sc1 >}}`, []typeText{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
+	{"with EOL", "{{< sc1 \n >}}", []typeText{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},

-	{"forward slash inside name", `{{< sc/sub >}}`, []Item{tstLeftNoMD, tstSCSlash, tstRightNoMD, tstEOF}},
+	{"forward slash inside name", `{{< sc/sub >}}`, []typeText{tstLeftNoMD, tstSCSlash, tstRightNoMD, tstEOF}},

-	{"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}},
-	{"with spaces", `{{<     sc1     >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
-	{"indented on new line", "Hello\n    {{% sc1 %}}", []Item{nti(tText, "Hello\n"), nti(tIndentation, "    "), tstLeftMD, tstSC1, tstRightMD, tstEOF}},
-	{"indented on new line tab", "Hello\n\t{{% sc1 %}}", []Item{nti(tText, "Hello\n"), nti(tIndentation, "\t"), tstLeftMD, tstSC1, tstRightMD, tstEOF}},
-	{"indented on first line", "    {{% sc1 %}}", []Item{nti(tIndentation, "    "), tstLeftMD, tstSC1, tstRightMD, tstEOF}},
-	{"mismatched rightDelim", `{{< sc1 %}}`, []Item{
+	{"simple with markup", `{{% sc1 %}}`, []typeText{tstLeftMD, tstSC1, tstRightMD, tstEOF}},
+	{"with spaces", `{{<     sc1     >}}`, []typeText{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
+	{"indented on new line", "Hello\n    {{% sc1 %}}", []typeText{nti(tText, "Hello\n"), nti(tIndentation, "    "), tstLeftMD, tstSC1, tstRightMD, tstEOF}},
+	{"indented on new line tab", "Hello\n\t{{% sc1 %}}", []typeText{nti(tText, "Hello\n"), nti(tIndentation, "\t"), tstLeftMD, tstSC1, tstRightMD, tstEOF}},
+	{"indented on first line", "    {{% sc1 %}}", []typeText{nti(tIndentation, "    "), tstLeftMD, tstSC1, tstRightMD, tstEOF}},
+	{"mismatched rightDelim", `{{< sc1 %}}`, []typeText{
 		tstLeftNoMD, tstSC1,
 		nti(tError, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"),
 	}},
-	{"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []Item{
+	{"inner, markup", `{{% sc1 %}} inner {{% /sc1 %}}`, []typeText{
 		tstLeftMD,
 		tstSC1,
 		tstRightMD,
@ -69,79 +72,79 @@ var shortCodeLexerTests = []lexerTest{
 		tstRightMD,
 		tstEOF,
 	}},
-	{"close, but no open", `{{< /sc1 >}}`, []Item{
+	{"close, but no open", `{{< /sc1 >}}`, []typeText{
 		tstLeftNoMD, nti(tError, "got closing shortcode, but none is open"),
 	}},
-	{"close wrong", `{{< sc1 >}}{{< /another >}}`, []Item{
+	{"close wrong", `{{< sc1 >}}{{< /another >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
 		nti(tError, "closing tag for shortcode 'another' does not match start tag"),
 	}},
-	{"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []Item{
+	{"close, but no open, more", `{{< sc1 >}}{{< /sc1 >}}{{< /another >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose,
 		nti(tError, "closing tag for shortcode 'another' does not match start tag"),
 	}},
-	{"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []Item{
+	{"close with extra keyword", `{{< sc1 >}}{{< /sc1 keyword>}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstRightNoMD, tstLeftNoMD, tstSCClose, tstSC1,
 		nti(tError, "unclosed shortcode"),
 	}},
-	{"float param, positional", `{{< sc1 3.14 >}}`, []Item{
+	{"float param, positional", `{{< sc1 3.14 >}}`, []typeText{
 		tstLeftNoMD, tstSC1, nti(tScParam, "3.14"), tstRightNoMD, tstEOF,
 	}},
-	{"float param, named", `{{< sc1 param1=3.14 >}}`, []Item{
+	{"float param, named", `{{< sc1 param1=3.14 >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, nti(tScParamVal, "3.14"), tstRightNoMD, tstEOF,
 	}},
-	{"named param, raw string", `{{< sc1 param1=` + "`" + "Hello World" + "`" + " >}}", []Item{
+	{"named param, raw string", `{{< sc1 param1=` + "`" + "Hello World" + "`" + " >}}", []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, nti(tScParamVal, "Hello World"), tstRightNoMD, tstEOF,
 	}},
-	{"float param, named, space before", `{{< sc1 param1= 3.14 >}}`, []Item{
+	{"float param, named, space before", `{{< sc1 param1= 3.14 >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, nti(tScParamVal, "3.14"), tstRightNoMD, tstEOF,
 	}},
-	{"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []Item{
+	{"Youtube id", `{{< sc1 -ziL-Q_456igdO-4 >}}`, []typeText{
 		tstLeftNoMD, tstSC1, nti(tScParam, "-ziL-Q_456igdO-4"), tstRightNoMD, tstEOF,
 	}},
-	{"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []Item{
+	{"non-alphanumerics param quoted", `{{< sc1 "-ziL-.%QigdO-4" >}}`, []typeText{
 		tstLeftNoMD, tstSC1, nti(tScParam, "-ziL-.%QigdO-4"), tstRightNoMD, tstEOF,
 	}},
-	{"raw string", `{{< sc1` + "`" + "Hello World" + "`" + ` >}}`, []Item{
+	{"raw string", `{{< sc1` + "`" + "Hello World" + "`" + ` >}}`, []typeText{
 		tstLeftNoMD, tstSC1, nti(tScParam, "Hello World"), tstRightNoMD, tstEOF,
 	}},
 	{"raw string with newline", `{{< sc1` + "`" + `Hello 
-	World` + "`" + ` >}}`, []Item{
+	World` + "`" + ` >}}`, []typeText{
 		tstLeftNoMD, tstSC1, nti(tScParam, `Hello 
 	World`), tstRightNoMD, tstEOF,
 	}},
-	{"raw string with escape character", `{{< sc1` + "`" + `Hello \b World` + "`" + ` >}}`, []Item{
+	{"raw string with escape character", `{{< sc1` + "`" + `Hello \b World` + "`" + ` >}}`, []typeText{
 		tstLeftNoMD, tstSC1, nti(tScParam, `Hello \b World`), tstRightNoMD, tstEOF,
 	}},
-	{"two params", `{{< sc1 param1   param2 >}}`, []Item{
+	{"two params", `{{< sc1 param1   param2 >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, tstParam2, tstRightNoMD, tstEOF,
 	}},
 	// issue #934
-	{"self-closing", `{{< sc1 />}}`, []Item{
+	{"self-closing", `{{< sc1 />}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF,
 	}},
 	// Issue 2498
-	{"multiple self-closing", `{{< sc1 />}}{{< sc1 />}}`, []Item{
+	{"multiple self-closing", `{{< sc1 />}}{{< sc1 />}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD,
 		tstLeftNoMD, tstSC1, tstSCClose, tstRightNoMD, tstEOF,
 	}},
-	{"self-closing with param", `{{< sc1 param1 />}}`, []Item{
+	{"self-closing with param", `{{< sc1 param1 />}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF,
 	}},
-	{"multiple self-closing with param", `{{< sc1 param1 />}}{{< sc1 param1 />}}`, []Item{
+	{"multiple self-closing with param", `{{< sc1 param1 />}}{{< sc1 param1 />}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD,
 		tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD, tstEOF,
 	}},
-	{"multiple different self-closing with param", `{{< sc1 param1 />}}{{< sc2 param1 />}}`, []Item{
+	{"multiple different self-closing with param", `{{< sc1 param1 />}}{{< sc2 param1 />}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, tstSCClose, tstRightNoMD,
 		tstLeftNoMD, tstSC2, tstParam1, tstSCClose, tstRightNoMD, tstEOF,
 	}},
-	{"nested simple", `{{< sc1 >}}{{< sc2 >}}{{< /sc1 >}}`, []Item{
+	{"nested simple", `{{< sc1 >}}{{< sc2 >}}{{< /sc1 >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstRightNoMD,
 		tstLeftNoMD, tstSC2, tstRightNoMD,
 		tstLeftNoMD, tstSCClose, tstSC1, tstRightNoMD, tstEOF,
 	}},
-	{"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []Item{
+	{"nested complex", `{{< sc1 >}}ab{{% sc2 param1 %}}cd{{< sc3 >}}ef{{< /sc3 >}}gh{{% /sc2 %}}ij{{< /sc1 >}}kl`, []typeText{
 		tstLeftNoMD, tstSC1, tstRightNoMD,
 		nti(tText, "ab"),
 		tstLeftMD, tstSC2, tstParam1, tstRightMD,
@ -156,106 +159,109 @@ var shortCodeLexerTests = []lexerTest{
 		nti(tText, "kl"), tstEOF,
 	}},

-	{"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []Item{
+	{"two quoted params", `{{< sc1 "param nr. 1" "param nr. 2" >}}`, []typeText{
 		tstLeftNoMD, tstSC1, nti(tScParam, "param nr. 1"), nti(tScParam, "param nr. 2"), tstRightNoMD, tstEOF,
 	}},
-	{"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []Item{
+	{"two named params", `{{< sc1 param1="Hello World" param2="p2Val">}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, tstVal, tstParam2, nti(tScParamVal, "p2Val"), tstRightNoMD, tstEOF,
 	}},
-	{"escaped quotes", `{{< sc1 param1=\"Hello World\"  >}}`, []Item{
+	{"escaped quotes", `{{< sc1 param1=\"Hello World\"  >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, tstVal, tstRightNoMD, tstEOF,
 	}},
-	{"escaped quotes, positional param", `{{< sc1 \"param1\"  >}}`, []Item{
+	{"escaped quotes, positional param", `{{< sc1 \"param1\"  >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, tstRightNoMD, tstEOF,
 	}},
-	{"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\"  >}}`, []Item{
+	{"escaped quotes inside escaped quotes", `{{< sc1 param1=\"Hello \"escaped\" World\"  >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1,
 		nti(tScParamVal, `Hello `), nti(tError, `got positional parameter 'escaped'. Cannot mix named and positional parameters`),
 	}},
 	{
 		"escaped quotes inside nonescaped quotes",
 		`{{< sc1 param1="Hello \"escaped\" World"  >}}`,
-		[]Item{
-			tstLeftNoMD, tstSC1, tstParam1, nti(tScParamVal, `Hello "escaped" World`), tstRightNoMD, tstEOF,
+		[]typeText{
+			tstLeftNoMD, tstSC1, tstParam1, tstIgnoreEscape, tstIgnoreEscape, nti(tScParamVal, `Hello "escaped" World`), tstRightNoMD, tstEOF,
 		},
 	},
 	{
 		"escaped quotes inside nonescaped quotes in positional param",
 		`{{< sc1 "Hello \"escaped\" World"  >}}`,
-		[]Item{
-			tstLeftNoMD, tstSC1, nti(tScParam, `Hello "escaped" World`), tstRightNoMD, tstEOF,
+		[]typeText{
+			tstLeftNoMD, tstSC1, tstIgnoreEscape, tstIgnoreEscape, nti(tScParam, `Hello "escaped" World`), tstRightNoMD, tstEOF,
 		},
 	},
-	{"escaped raw string, named param", `{{< sc1 param1=` + `\` + "`" + "Hello World" + `\` + "`" + ` >}}`, []Item{
+	{"escaped raw string, named param", `{{< sc1 param1=` + `\` + "`" + "Hello World" + `\` + "`" + ` >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, nti(tError, "unrecognized escape character"),
 	}},
-	{"escaped raw string, positional param", `{{< sc1 param1 ` + `\` + "`" + "Hello World" + `\` + "`" + ` >}}`, []Item{
+	{"escaped raw string, positional param", `{{< sc1 param1 ` + `\` + "`" + "Hello World" + `\` + "`" + ` >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, nti(tError, "unrecognized escape character"),
 	}},
-	{"two raw string params", `{{< sc1` + "`" + "Hello World" + "`" + "`" + "Second Param" + "`" + ` >}}`, []Item{
+	{"two raw string params", `{{< sc1` + "`" + "Hello World" + "`" + "`" + "Second Param" + "`" + ` >}}`, []typeText{
 		tstLeftNoMD, tstSC1, nti(tScParam, "Hello World"), nti(tScParam, "Second Param"), tstRightNoMD, tstEOF,
 	}},
-	{"unterminated quote", `{{< sc1 param2="Hello World>}}`, []Item{
+	{"unterminated quote", `{{< sc1 param2="Hello World>}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam2, nti(tError, "unterminated quoted string in shortcode parameter-argument: 'Hello World>}}'"),
 	}},
-	{"unterminated raw string", `{{< sc1` + "`" + "Hello World" + ` >}}`, []Item{
+	{"unterminated raw string", `{{< sc1` + "`" + "Hello World" + ` >}}`, []typeText{
 		tstLeftNoMD, tstSC1, nti(tError, "unterminated raw string in shortcode parameter-argument: 'Hello World >}}'"),
 	}},
-	{"unterminated raw string in second argument", `{{< sc1` + "`" + "Hello World" + "`" + "`" + "Second Param" + ` >}}`, []Item{
+	{"unterminated raw string in second argument", `{{< sc1` + "`" + "Hello World" + "`" + "`" + "Second Param" + ` >}}`, []typeText{
 		tstLeftNoMD, tstSC1, nti(tScParam, "Hello World"), nti(tError, "unterminated raw string in shortcode parameter-argument: 'Second Param >}}'"),
 	}},
-	{"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []Item{
+	{"one named param, one not", `{{< sc1 param1="Hello World" p2 >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, tstVal,
 		nti(tError, "got positional parameter 'p2'. Cannot mix named and positional parameters"),
 	}},
-	{"one named param, one quoted positional param, both raw strings", `{{< sc1 param1=` + "`" + "Hello World" + "`" + "`" + "Second Param" + "`" + ` >}}`, []Item{
+	{"one named param, one quoted positional param, both raw strings", `{{< sc1 param1=` + "`" + "Hello World" + "`" + "`" + "Second Param" + "`" + ` >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, tstVal,
 		nti(tError, "got quoted positional parameter. Cannot mix named and positional parameters"),
 	}},
-	{"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []Item{
+	{"one named param, one quoted positional param", `{{< sc1 param1="Hello World" "And Universe" >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1, tstVal,
 		nti(tError, "got quoted positional parameter. Cannot mix named and positional parameters"),
 	}},
-	{"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []Item{
+	{"one quoted positional param, one named param", `{{< sc1 "param1" param2="And Universe" >}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1,
 		nti(tError, "got named parameter 'param2'. Cannot mix named and positional parameters"),
 	}},
-	{"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []Item{
+	{"ono positional param, one not", `{{< sc1 param1 param2="Hello World">}}`, []typeText{
 		tstLeftNoMD, tstSC1, tstParam1,
 		nti(tError, "got named parameter 'param2'. Cannot mix named and positional parameters"),
 	}},
-	{"commented out", `{{</* sc1 */>}}`, []Item{
+	{"commented out", `{{</* sc1 */>}}`, []typeText{
 		nti(tText, "{{<"), nti(tText, " sc1 "), nti(tText, ">}}"), tstEOF,
 	}},
-	{"commented out, with asterisk inside", `{{</* sc1 "**/*.pdf" */>}}`, []Item{
+	{"commented out, with asterisk inside", `{{</* sc1 "**/*.pdf" */>}}`, []typeText{
 		nti(tText, "{{<"), nti(tText, " sc1 \"**/*.pdf\" "), nti(tText, ">}}"), tstEOF,
 	}},
-	{"commented out, missing close", `{{</* sc1 >}}`, []Item{
+	{"commented out, missing close", `{{</* sc1 >}}`, []typeText{
 		nti(tError, "comment must be closed"),
 	}},
-	{"commented out, misplaced close", `{{</* sc1 >}}*/`, []Item{
+	{"commented out, misplaced close", `{{</* sc1 >}}*/`, []typeText{
 		nti(tError, "comment must be closed"),
 	}},
 	// Inline shortcodes
-	{"basic inline", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
-	{"basic inline with space", `{{< sc1.inline >}}Hello World{{< / sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
-	{"inline self closing", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}Hello World{{< sc1.inline />}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSC1Inline, tstSCClose, tstRightNoMD, tstEOF}},
-	{"inline self closing, then a new inline", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}Hello World{{< sc1.inline />}}{{< sc2.inline >}}Hello World{{< /sc2.inline >}}`, []Item{
+	{"basic inline", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}`, []typeText{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
+	{"basic inline with space", `{{< sc1.inline >}}Hello World{{< / sc1.inline >}}`, []typeText{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
+	{"inline self closing", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}Hello World{{< sc1.inline />}}`, []typeText{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSC1Inline, tstSCClose, tstRightNoMD, tstEOF}},
+	{"inline self closing, then a new inline", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}Hello World{{< sc1.inline />}}{{< sc2.inline >}}Hello World{{< /sc2.inline >}}`, []typeText{
 		tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSC1Inline, tstSCClose, tstRightNoMD,
 		tstLeftNoMD, tstSC2Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC2Inline, tstRightNoMD, tstEOF,
 	}},
-	{"inline with template syntax", `{{< sc1.inline >}}{{ .Get 0 }}{{ .Get 1 }}{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, nti(tText, "{{ .Get 0 }}"), nti(tText, "{{ .Get 1 }}"), tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
-	{"inline with nested shortcode (not supported)", `{{< sc1.inline >}}Hello World{{< sc1 >}}{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, nti(tError, "inline shortcodes do not support nesting")}},
-	{"inline case mismatch", `{{< sc1.Inline >}}Hello World{{< /sc1.Inline >}}`, []Item{tstLeftNoMD, nti(tError, "period in shortcode name only allowed for inline identifiers")}},
+	{"inline with template syntax", `{{< sc1.inline >}}{{ .Get 0 }}{{ .Get 1 }}{{< /sc1.inline >}}`, []typeText{tstLeftNoMD, tstSC1Inline, tstRightNoMD, nti(tText, "{{ .Get 0 }}"), nti(tText, "{{ .Get 1 }}"), tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
+	{"inline with nested shortcode (not supported)", `{{< sc1.inline >}}Hello World{{< sc1 >}}{{< /sc1.inline >}}`, []typeText{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, nti(tError, "inline shortcodes do not support nesting")}},
+	{"inline case mismatch", `{{< sc1.Inline >}}Hello World{{< /sc1.Inline >}}`, []typeText{tstLeftNoMD, nti(tError, "period in shortcode name only allowed for inline identifiers")}},
 }

 func TestShortcodeLexer(t *testing.T) {
 	t.Parallel()
+	c := qt.New(t)
 	for i, test := range shortCodeLexerTests {
 		t.Run(test.name, func(t *testing.T) {
 			items := collect([]byte(test.input), true, lexMainSection)
-			if !equal(items, test.items) {
-				t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, items, test.items)
+			if !equal(test.input, items, test.items) {
+				got := itemsToString(items, []byte(test.input))
+				expected := testItemsToString(test.items)
+				c.Assert(got, qt.Equals, expected, qt.Commentf("Test %d: %s", i, test.name))
 			}
 		})
 	}
--- a/parser/pageparser/pageparser_test.go
+++ b/parser/pageparser/pageparser_test.go
@ -14,6 +14,7 @@
 package pageparser

 import (
+	"bytes"
 	"strings"
 	"testing"

@ -88,3 +89,15 @@ func TestFormatFromFrontMatterType(t *testing.T) {
 		c.Assert(FormatFromFrontMatterType(test.typ), qt.Equals, test.expect)
 	}
 }
+
+func TestIsProbablyItemsSource(t *testing.T) {
+	c := qt.New(t)
+
+	input := ` {{< foo >}} `
+	items := collectStringMain(input)
+
+	c.Assert(IsProbablySourceOfItems([]byte(input), items), qt.IsTrue)
+	c.Assert(IsProbablySourceOfItems(bytes.Repeat([]byte(" "), len(input)), items), qt.IsFalse)
+	c.Assert(IsProbablySourceOfItems([]byte(`{{< foo >}}  `), items), qt.IsFalse)
+	c.Assert(IsProbablySourceOfItems([]byte(``), items), qt.IsFalse)
+}