From 8e5044d7f5af5f3454e65860809f4a23692a0b00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Thu, 1 Sep 2022 09:26:27 +0200 Subject: [PATCH] Fix shortcode parser regression with quoted param values This issue was introduced in `v0.102.0`. In 223bf2800488ad5d38854bbb595d789bc35ebe32 we removed the byte source from the parsed page result, which meant we had to preserve exact positioning for all elements. This introduced some new `TypeIgnore` tokens which we, wrongly, assumed didn't matter where we put in the result slice (they should be ignored anyway). But it seems that this broke the logic where we determine if it's positional or named params in the case where the paramater value contains escaped quoutes. This commit makes sure that these ignore tokens (the back slashes) are never sent back to the client, which is how it was before `v0.102.0`. This commit also fixes some lost error information in that same commit. Fixes #10236 --- hugolib/page.go | 4 +-- hugolib/shortcode.go | 6 ++-- hugolib/shortcode_test.go | 32 +++++++++++++++++++ parser/pageparser/pagelexer.go | 7 +++- .../pageparser/pageparser_shortcode_test.go | 5 ++- 5 files changed, 43 insertions(+), 11 deletions(-) diff --git a/hugolib/page.go b/hugolib/page.go index 4752d11f1..37bf528c7 100644 --- a/hugolib/page.go +++ b/hugolib/page.go @@ -39,8 +39,6 @@ import ( "github.com/gohugoio/hugo/common/herrors" "github.com/gohugoio/hugo/parser/metadecoders" - "errors" - "github.com/gohugoio/hugo/parser/pageparser" "github.com/gohugoio/hugo/output" @@ -762,7 +760,7 @@ Loop: case it.IsEOF(): break Loop case it.IsError(): - err := fail(errors.New(it.ValStr(result.Input())), it) + err := fail(it.Err, it) currShortcode.err = err return err diff --git a/hugolib/shortcode.go b/hugolib/shortcode.go index 1627acacb..b2f42ff1d 100644 --- a/hugolib/shortcode.go +++ b/hugolib/shortcode.go @@ -497,8 +497,6 @@ func (s *shortcodeHandler) renderShortcodesForPage(p *pageState, f output.Format return rendered, hasVariants, nil } -var errShortCodeIllegalState = errors.New("Illegal shortcode state") - func (s *shortcodeHandler) parseError(err error, input []byte, pos int) error { if s.p != nil { return s.p.parseError(err, input, pos) @@ -640,7 +638,7 @@ Loop: if params, ok := sc.params.(map[string]any); ok { params[currItem.ValStr(source)] = pt.Next().ValTyped(source) } else { - return sc, errShortCodeIllegalState + return sc, fmt.Errorf("%s: invalid state: invalid param type %T for shortcode %q, expected a map", errorPrefix, params, sc.name) } } } else { @@ -654,7 +652,7 @@ Loop: params = append(params, currItem.ValTyped(source)) sc.params = params } else { - return sc, errShortCodeIllegalState + return sc, fmt.Errorf("%s: invalid state: invalid param type %T for shortcode %q, expected a slice", errorPrefix, params, sc.name) } } } diff --git a/hugolib/shortcode_test.go b/hugolib/shortcode_test.go index 3f9190962..ec521729b 100644 --- a/hugolib/shortcode_test.go +++ b/hugolib/shortcode_test.go @@ -1055,3 +1055,35 @@ title: "p1" `) } + +// Issue 10236. +func TestShortcodeParamEscapedQuote(t *testing.T) { + t.Parallel() + + files := ` +-- config.toml -- +-- content/p1.md -- +--- +title: "p1" +--- + +{{< figure src="/media/spf13.jpg" title="Steve \"Francia\"." >}} + +-- layouts/shortcodes/figure.html -- +Title: {{ .Get "title" | safeHTML }} +-- layouts/_default/single.html -- +{{ .Content }} +` + + b := NewIntegrationTestBuilder( + IntegrationTestConfig{ + T: t, + TxtarString: files, + Running: true, + Verbose: true, + }, + ).Build() + + b.AssertFileContent("public/p1/index.html", `Title: Steve "Francia".`) + +} diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go index a7e6b6cd4..5f5d147e6 100644 --- a/parser/pageparser/pagelexer.go +++ b/parser/pageparser/pagelexer.go @@ -194,7 +194,12 @@ func (l *pageLexer) ignoreEscapesAndEmit(t ItemType, isString bool) { if i > k { segments = append(segments, lowHigh{k, i}) } - l.append(Item{Type: TypeIgnore, low: i, high: i + w}) + // See issue #10236. + // We don't send the backslash back to the client, + // which makes the end parsing simpler. + // This means that we cannot render the AST back to be + // exactly the same as the input, + // but that was also the situation before we introduced the issue in #10236. k = i + w } i += w diff --git a/parser/pageparser/pageparser_shortcode_test.go b/parser/pageparser/pageparser_shortcode_test.go index a95d55ef3..26d836e32 100644 --- a/parser/pageparser/pageparser_shortcode_test.go +++ b/parser/pageparser/pageparser_shortcode_test.go @@ -40,7 +40,6 @@ var ( tstParamFloat = nti(tScParam, "3.14") tstVal = nti(tScParamVal, "Hello World") tstText = nti(tText, "Hello World") - tstIgnoreEscape = nti(TypeIgnore, "\\") ) var shortCodeLexerTests = []lexerTest{ @@ -179,14 +178,14 @@ var shortCodeLexerTests = []lexerTest{ "escaped quotes inside nonescaped quotes", `{{< sc1 param1="Hello \"escaped\" World" >}}`, []typeText{ - tstLeftNoMD, tstSC1, tstParam1, tstIgnoreEscape, tstIgnoreEscape, nti(tScParamVal, `Hello "escaped" World`), tstRightNoMD, tstEOF, + tstLeftNoMD, tstSC1, tstParam1, nti(tScParamVal, `Hello "escaped" World`), tstRightNoMD, tstEOF, }, }, { "escaped quotes inside nonescaped quotes in positional param", `{{< sc1 "Hello \"escaped\" World" >}}`, []typeText{ - tstLeftNoMD, tstSC1, tstIgnoreEscape, tstIgnoreEscape, nti(tScParam, `Hello "escaped" World`), tstRightNoMD, tstEOF, + tstLeftNoMD, tstSC1, nti(tScParam, `Hello "escaped" World`), tstRightNoMD, tstEOF, }, }, {"escaped raw string, named param", `{{< sc1 param1=` + `\` + "`" + "Hello World" + `\` + "`" + ` >}}`, []typeText{