Split parse and render for Goldmark

This also speeds up situations where you only need the fragments/toc and not the rendered content, e.g. Related
with fragments type indexing:

```bash

name            old time/op    new time/op    delta
RelatedSite-10    12.3ms ± 2%    10.7ms ± 1%  -12.95%  (p=0.029 n=4+4)

name            old alloc/op   new alloc/op   delta
RelatedSite-10    38.6MB ± 0%    38.2MB ± 0%   -1.08%  (p=0.029 n=4+4)

name            old allocs/op  new allocs/op  delta
RelatedSite-10      117k ± 0%      115k ± 0%   -1.36%  (p=0.029 n=4+4)
```

Fixes #10750
This commit is contained in:
Bjørn Erik Pedersen 2023-02-24 07:23:10 +01:00
parent e442a63bb7
commit 271318ad78
14 changed files with 258 additions and 45 deletions

View file

@ -427,3 +427,52 @@ Image:
<p>html-image: image.jpg|Text: Hello<br> Goodbye|Plain: Hello GoodbyeEND</p>
`)
}
func TestRenderHookContentFragmentsOnSelf(t *testing.T) {
files := `
-- hugo.toml --
baseURL = "https://example.org"
disableKinds = ["taxonomy", "term", "RSS", "sitemap", "robotsTXT"]
-- content/p1.md --
---
title: "p1"
---
## A {#z}
## B
## C
-- content/p2.md --
---
title: "p2"
---
## D
## E
## F
-- layouts/_default/_markup/render-heading.html --
Heading: {{ .Text }}|
Self Fragments: {{ .Page.Fragments.Identifiers }}|
P1 Fragments: {{ (site.GetPage "p1.md").Fragments.Identifiers }}|
-- layouts/_default/single.html --
{{ .Content}}
`
b := NewIntegrationTestBuilder(
IntegrationTestConfig{
T: t,
TxtarString: files,
},
).Build()
b.AssertFileContent("public/p1/index.html", `
Self Fragments: [b c z]
P1 Fragments: [b c z]
`)
b.AssertFileContent("public/p2/index.html", `
Self Fragments: [d e f]
P1 Fragments: [b c z]
`)
}

View file

@ -32,6 +32,8 @@ import (
func NewIntegrationTestBuilder(conf IntegrationTestConfig) *IntegrationTestBuilder {
// Code fences.
conf.TxtarString = strings.ReplaceAll(conf.TxtarString, "§§§", "```")
// Multiline strings.
conf.TxtarString = strings.ReplaceAll(conf.TxtarString, "§§", "`")
data := txtar.Parse([]byte(conf.TxtarString))

View file

@ -115,14 +115,7 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err
isHTML := cp.p.m.markup == "html"
if !isHTML {
r, err := po.contentRenderer.RenderContent(ctx, cp.workContent, true)
if err != nil {
return err
}
cp.workContent = r.Bytes()
if tocProvider, ok := r.(converter.TableOfContentsProvider); ok {
createAndSetToC := func(tocProvider converter.TableOfContentsProvider) {
cfg := p.s.ContentSpec.Converters.GetMarkupConfig()
cp.tableOfContents = tocProvider.TableOfContents()
cp.tableOfContentsHTML = template.HTML(
@ -132,6 +125,31 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err
cfg.TableOfContents.Ordered,
),
)
}
// If the converter supports doing the parsing separately, we do that.
parseResult, ok, err := po.contentRenderer.ParseContent(ctx, cp.workContent)
if err != nil {
return err
}
if ok {
// This is Goldmark.
// Store away the parse result for later use.
createAndSetToC(parseResult)
cp.astDoc = parseResult.Doc()
return nil
}
// This is Asciidoctor etc.
r, err := po.contentRenderer.ParseAndRenderContent(ctx, cp.workContent, true)
if err != nil {
return err
}
cp.workContent = r.Bytes()
if tocProvider, ok := r.(converter.TableOfContentsProvider); ok {
createAndSetToC(tocProvider)
} else {
tmpContent, tmpTableOfContents := helpers.ExtractTOC(cp.workContent)
cp.tableOfContentsHTML = helpers.BytesToHTML(tmpTableOfContents)
@ -153,6 +171,19 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err
return nil
}
if cp.astDoc != nil {
// The content is parsed, but not rendered.
r, ok, err := po.contentRenderer.RenderContent(ctx, cp.workContent, cp.astDoc)
if err != nil {
return err
}
if !ok {
return errors.New("invalid state: astDoc is set but RenderContent returned false")
}
cp.workContent = r.Bytes()
}
if p.cmap.hasNonMarkdownShortcode || cp.placeholdersEnabled {
// There are one or more replacement tokens to be replaced.
var hasShortcodeVariants bool
@ -210,7 +241,7 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err
}
}
} else if cp.p.m.summary != "" {
b, err := po.contentRenderer.RenderContent(ctx, []byte(cp.p.m.summary), false)
b, err := po.contentRenderer.ParseAndRenderContent(ctx, []byte(cp.p.m.summary), false)
if err != nil {
return err
}
@ -282,6 +313,8 @@ type pageContentOutput struct {
summary template.HTML
tableOfContents *tableofcontents.Fragments
tableOfContentsHTML template.HTML
// For Goldmark we split Parse and Render.
astDoc any
truncated bool
@ -682,15 +715,66 @@ func (p *pageContentOutput) setAutoSummary() error {
return nil
}
func (cp *pageContentOutput) RenderContent(ctx context.Context, content []byte, renderTOC bool) (converter.Result, error) {
func (cp *pageContentOutput) getContentConverter() (converter.Converter, error) {
if err := cp.initRenderHooks(); err != nil {
return nil, err
}
c := cp.p.getContentConverter()
return cp.p.getContentConverter(), nil
}
func (cp *pageContentOutput) ParseAndRenderContent(ctx context.Context, content []byte, renderTOC bool) (converter.ResultRender, error) {
c, err := cp.getContentConverter()
if err != nil {
return nil, err
}
return cp.renderContentWithConverter(ctx, c, content, renderTOC)
}
func (cp *pageContentOutput) renderContentWithConverter(ctx context.Context, c converter.Converter, content []byte, renderTOC bool) (converter.Result, error) {
func (cp *pageContentOutput) ParseContent(ctx context.Context, content []byte) (converter.ResultParse, bool, error) {
c, err := cp.getContentConverter()
if err != nil {
return nil, false, err
}
p, ok := c.(converter.ParseRenderer)
if !ok {
return nil, ok, nil
}
rctx := converter.RenderContext{
Src: content,
RenderTOC: true,
GetRenderer: cp.renderHooks.getRenderer,
}
r, err := p.Parse(rctx)
return r, ok, err
}
func (cp *pageContentOutput) RenderContent(ctx context.Context, content []byte, doc any) (converter.ResultRender, bool, error) {
c, err := cp.getContentConverter()
if err != nil {
return nil, false, err
}
p, ok := c.(converter.ParseRenderer)
if !ok {
return nil, ok, nil
}
rctx := converter.RenderContext{
Src: content,
RenderTOC: true,
GetRenderer: cp.renderHooks.getRenderer,
}
r, err := p.Render(rctx, doc)
if err == nil {
if ids, ok := r.(identity.IdentitiesProvider); ok {
for _, v := range ids.GetIdentities() {
cp.trackDependency(v)
}
}
}
return r, ok, err
}
func (cp *pageContentOutput) renderContentWithConverter(ctx context.Context, c converter.Converter, content []byte, renderTOC bool) (converter.ResultRender, error) {
r, err := c.Convert(
converter.RenderContext{
Src: content,

View file

@ -422,7 +422,7 @@ func doRenderShortcode(
// shortcode.
if sc.doMarkup && (level > 0 || sc.configVersion() == 1) {
var err error
b, err := p.pageOutput.contentRenderer.RenderContent(ctx, []byte(inner), false)
b, err := p.pageOutput.contentRenderer.ParseAndRenderContent(ctx, []byte(inner), false)
if err != nil {
return zeroShortcode, err
}

View file

@ -52,7 +52,7 @@ func (p provider) New(cfg converter.ProviderConfig) (converter.Provider, error)
}
type asciidocResult struct {
converter.Result
converter.ResultRender
toc *tableofcontents.Fragments
}
@ -65,7 +65,7 @@ type asciidocConverter struct {
cfg converter.ProviderConfig
}
func (a *asciidocConverter) Convert(ctx converter.RenderContext) (converter.Result, error) {
func (a *asciidocConverter) Convert(ctx converter.RenderContext) (converter.ResultRender, error) {
b, err := a.getAsciidocContent(ctx.Src, a.ctx)
if err != nil {
return nil, err
@ -75,7 +75,7 @@ func (a *asciidocConverter) Convert(ctx converter.RenderContext) (converter.Resu
return nil, err
}
return asciidocResult{
Result: converter.Bytes(content),
ResultRender: converter.Bytes(content),
toc: toc,
}, nil
}

View file

@ -74,7 +74,7 @@ var NopConverter = new(nopConverter)
type nopConverter int
func (nopConverter) Convert(ctx RenderContext) (Result, error) {
func (nopConverter) Convert(ctx RenderContext) (ResultRender, error) {
return &bytes.Buffer{}, nil
}
@ -85,15 +85,29 @@ func (nopConverter) Supports(feature identity.Identity) bool {
// Converter wraps the Convert method that converts some markup into
// another format, e.g. Markdown to HTML.
type Converter interface {
Convert(ctx RenderContext) (Result, error)
Convert(ctx RenderContext) (ResultRender, error)
Supports(feature identity.Identity) bool
}
// Result represents the minimum returned from Convert.
type Result interface {
// ParseRenderer is an optional interface.
// The Goldmark converter implements this, and this allows us
// to extract the ToC without having to render the content.
type ParseRenderer interface {
Parse(RenderContext) (ResultParse, error)
Render(RenderContext, any) (ResultRender, error)
}
// ResultRender represents the minimum returned from Convert and Render.
type ResultRender interface {
Bytes() []byte
}
// ResultParse represents the minimum returned from Parse.
type ResultParse interface {
Doc() any
TableOfContents() *tableofcontents.Fragments
}
// DocumentInfo holds additional information provided by some converters.
type DocumentInfo interface {
AnchorSuffix() string

View file

@ -18,6 +18,7 @@ import (
"bytes"
"github.com/gohugoio/hugo/identity"
"github.com/gohugoio/hugo/markup/goldmark/codeblocks"
"github.com/gohugoio/hugo/markup/goldmark/images"
"github.com/gohugoio/hugo/markup/goldmark/internal/extensions/attributes"
@ -26,6 +27,7 @@ import (
"github.com/gohugoio/hugo/markup/converter"
"github.com/gohugoio/hugo/markup/tableofcontents"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
@ -158,26 +160,41 @@ func newMarkdown(pcfg converter.ProviderConfig) goldmark.Markdown {
var _ identity.IdentitiesProvider = (*converterResult)(nil)
type converterResult struct {
converter.Result
type parserResult struct {
doc any
toc *tableofcontents.Fragments
}
func (p parserResult) Doc() any {
return p.doc
}
func (p parserResult) TableOfContents() *tableofcontents.Fragments {
return p.toc
}
type renderResult struct {
converter.ResultRender
ids identity.Identities
}
func (c converterResult) TableOfContents() *tableofcontents.Fragments {
return c.toc
func (r renderResult) GetIdentities() identity.Identities {
return r.ids
}
func (c converterResult) GetIdentities() identity.Identities {
return c.ids
type converterResult struct {
converter.ResultRender
tableOfContentsProvider
identity.IdentitiesProvider
}
type tableOfContentsProvider interface {
TableOfContents() *tableofcontents.Fragments
}
var converterIdentity = identity.KeyValueIdentity{Key: "goldmark", Value: "converter"}
func (c *goldmarkConverter) Convert(ctx converter.RenderContext) (result converter.Result, err error) {
buf := &render.BufWriter{Buffer: &bytes.Buffer{}}
result = buf
func (c *goldmarkConverter) Parse(ctx converter.RenderContext) (converter.ResultParse, error) {
pctx := c.newParserContext(ctx)
reader := text.NewReader(ctx.Src)
@ -186,6 +203,16 @@ func (c *goldmarkConverter) Convert(ctx converter.RenderContext) (result convert
parser.WithContext(pctx),
)
return parserResult{
doc: doc,
toc: pctx.TableOfContents(),
}, nil
}
func (c *goldmarkConverter) Render(ctx converter.RenderContext, doc any) (converter.ResultRender, error) {
n := doc.(ast.Node)
buf := &render.BufWriter{Buffer: &bytes.Buffer{}}
rcx := &render.RenderContextDataHolder{
Rctx: ctx,
Dctx: c.ctx,
@ -197,15 +224,32 @@ func (c *goldmarkConverter) Convert(ctx converter.RenderContext) (result convert
ContextData: rcx,
}
if err := c.md.Renderer().Render(w, ctx.Src, doc); err != nil {
if err := c.md.Renderer().Render(w, ctx.Src, n); err != nil {
return nil, err
}
return converterResult{
Result: buf,
return renderResult{
ResultRender: buf,
ids: rcx.IDs.GetIdentities(),
toc: pctx.TableOfContents(),
}, nil
}
func (c *goldmarkConverter) Convert(ctx converter.RenderContext) (converter.ResultRender, error) {
parseResult, err := c.Parse(ctx)
if err != nil {
return nil, err
}
renderResult, err := c.Render(ctx, parseResult.Doc())
if err != nil {
return nil, err
}
return converterResult{
ResultRender: renderResult,
tableOfContentsProvider: parseResult,
IdentitiesProvider: renderResult.(identity.IdentitiesProvider),
}, nil
}
var featureSet = map[identity.Identity]bool{

View file

@ -34,7 +34,7 @@ import (
qt "github.com/frankban/quicktest"
)
func convert(c *qt.C, mconf markup_config.Config, content string) converter.Result {
func convert(c *qt.C, mconf markup_config.Config, content string) converter.ResultRender {
p, err := Provider.New(
converter.ProviderConfig{
MarkupConfig: mconf,

View file

@ -43,7 +43,7 @@ type orgConverter struct {
cfg converter.ProviderConfig
}
func (c *orgConverter) Convert(ctx converter.RenderContext) (converter.Result, error) {
func (c *orgConverter) Convert(ctx converter.RenderContext) (converter.ResultRender, error) {
logger := c.cfg.Logger
config := org.New()
config.Log = logger.Warn()

View file

@ -43,7 +43,7 @@ type pandocConverter struct {
cfg converter.ProviderConfig
}
func (c *pandocConverter) Convert(ctx converter.RenderContext) (converter.Result, error) {
func (c *pandocConverter) Convert(ctx converter.RenderContext) (converter.ResultRender, error) {
b, err := c.getPandocContent(ctx.Src, c.ctx)
if err != nil {
return nil, err

View file

@ -47,7 +47,7 @@ type rstConverter struct {
cfg converter.ProviderConfig
}
func (c *rstConverter) Convert(ctx converter.RenderContext) (converter.Result, error) {
func (c *rstConverter) Convert(ctx converter.RenderContext) (converter.ResultRender, error) {
b, err := c.getRstContent(ctx.Src, c.ctx)
if err != nil {
return nil, err

View file

@ -109,9 +109,13 @@ type ContentProvider interface {
// ContentRenderer provides the content rendering methods for some content.
type ContentRenderer interface {
// RenderContent renders the given content.
// ParseAndRenderContent renders the given content.
// For internal use only.
RenderContent(ctx context.Context, content []byte, renderTOC bool) (converter.Result, error)
ParseAndRenderContent(ctx context.Context, content []byte, enableTOC bool) (converter.ResultRender, error)
// For internal use only.
ParseContent(ctx context.Context, content []byte) (converter.ResultParse, bool, error)
// For internal use only.
RenderContent(ctx context.Context, content []byte, doc any) (converter.ResultRender, bool, error)
}
// FileProvider provides the source file.

View file

@ -133,7 +133,16 @@ func (lcp *LazyContentProvider) TableOfContents(ctx context.Context) template.HT
return lcp.cp.TableOfContents(ctx)
}
func (lcp *LazyContentProvider) RenderContent(ctx context.Context, content []byte, renderTOC bool) (converter.Result, error) {
func (lcp *LazyContentProvider) ParseAndRenderContent(ctx context.Context, content []byte, renderTOC bool) (converter.ResultRender, error) {
lcp.init.Do(ctx)
return lcp.cp.RenderContent(ctx, content, renderTOC)
return lcp.cp.ParseAndRenderContent(ctx, content, renderTOC)
}
func (lcp *LazyContentProvider) ParseContent(ctx context.Context, content []byte) (converter.ResultParse, bool, error) {
lcp.init.Do(ctx)
return lcp.cp.ParseContent(ctx, content)
}
func (lcp *LazyContentProvider) RenderContent(ctx context.Context, content []byte, doc any) (converter.ResultRender, bool, error) {
lcp.init.Do(ctx)
return lcp.cp.RenderContent(ctx, content, doc)
}

View file

@ -538,7 +538,14 @@ func (p *nopPage) HeadingsFiltered(context.Context) tableofcontents.Headings {
type nopContentRenderer int
func (r *nopContentRenderer) RenderContent(ctx context.Context, content []byte, renderTOC bool) (converter.Result, error) {
func (r *nopContentRenderer) ParseAndRenderContent(ctx context.Context, content []byte, renderTOC bool) (converter.ResultRender, error) {
b := &bytes.Buffer{}
return b, nil
}
func (r *nopContentRenderer) ParseContent(ctx context.Context, content []byte) (converter.ResultParse, bool, error) {
return nil, false, nil
}
func (r *nopContentRenderer) RenderContent(ctx context.Context, content []byte, doc any) (converter.ResultRender, bool, error) {
return nil, false, nil
}