Make HTML behave exactly like other content formats (note)

Fixes #11999
This commit is contained in:
Bjørn Erik Pedersen 2024-02-07 10:30:32 +01:00
parent 068ccde4c7
commit 676e6875da
9 changed files with 107 additions and 175 deletions

View file

@ -328,7 +328,7 @@ func PrintFs(fs afero.Fs, path string, w io.Writer) {
}
afero.Walk(fs, path, func(path string, info os.FileInfo, err error) error {
fmt.Println(path)
fmt.Fprintln(w, filepath.ToSlash(path))
return nil
})
}

View file

@ -10,6 +10,7 @@ import (
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"sync"
"testing"
@ -273,6 +274,32 @@ func (s *IntegrationTestBuilder) AssertFileContentExact(filename string, matches
}
}
func (s *IntegrationTestBuilder) AssertPublishDir(matches ...string) {
s.Helper()
var buff bytes.Buffer
helpers.PrintFs(s.H.Fs.PublishDir, "", &buff)
printFsLines := strings.Split(buff.String(), "\n")
sort.Strings(printFsLines)
content := strings.TrimSpace((strings.Join(printFsLines, "\n")))
for _, m := range matches {
cm := qt.Commentf("Match: %q\nIn:\n%s", m, content)
lines := strings.Split(m, "\n")
for _, match := range lines {
match = strings.TrimSpace(match)
var negate bool
if strings.HasPrefix(match, "! ") {
negate = true
match = strings.TrimPrefix(match, "! ")
}
if negate {
s.Assert(content, qt.Not(qt.Contains), match, cm)
continue
}
s.Assert(content, qt.Contains, match, cm)
}
}
}
func (s *IntegrationTestBuilder) AssertFileExists(filename string, b bool) {
checker := qt.IsNil
if !b {

View file

@ -1540,32 +1540,6 @@ CONTENT:{{ .Content }}
)
}
// https://github.com/gohugoio/hugo/issues/5478
func TestPageWithCommentedOutFrontMatter(t *testing.T) {
b := newTestSitesBuilder(t)
b.WithSimpleConfigFile()
b.WithContent("page.md", `<!--
+++
title = "hello"
+++
-->
This is the content.
`)
b.WithTemplatesAdded("layouts/_default/single.html", `
Title: {{ .Title }}
Content:{{ .Content }}
`)
b.CreateSites().Build(BuildCfg{})
b.AssertFileContent("public/page/index.html",
"Title: hello",
"Content:<p>This is the content.</p>",
)
}
func TestHomePageWithNoTitle(t *testing.T) {
b := newTestSitesBuilder(t).WithConfigFile("toml", `
title = "Site Title"

View file

@ -757,3 +757,81 @@ func TestPageBundlerHome(t *testing.T) {
Title: Home|First Resource: data.json|Content: <p>Hook Len Page Resources 1</p>
`)
}
func TestHTMLFilesIsue11999(t *testing.T) {
t.Parallel()
files := `
-- hugo.toml --
disableKinds = ["taxonomy", "term", "rss", "sitemap", "robotsTXT", "404"]
[permalinks]
posts = "/myposts/:slugorfilename"
-- content/posts/markdown-without-frontmatter.md --
-- content/posts/html-without-frontmatter.html --
<html>hello</html>
-- content/posts/html-with-frontmatter.html --
---
title: "HTML with frontmatter"
---
<html>hello</html>
-- content/posts/html-with-commented-out-frontmatter.html --
<!--
---
title: "HTML with commented out frontmatter"
---
-->
<html>hello</html>
-- content/posts/markdown-with-frontmatter.md --
---
title: "Markdown"
---
-- content/posts/mybundle/index.md --
---
title: My Bundle
---
-- content/posts/mybundle/data.txt --
Data.txt
-- content/posts/mybundle/html-in-bundle-without-frontmatter.html --
<html>hell</html>
-- content/posts/mybundle/html-in-bundle-with-frontmatter.html --
---
title: Hello
---
<html>hello</html>
-- content/posts/mybundle/html-in-bundle-with-commented-out-frontmatter.html --
<!--
---
title: "HTML with commented out frontmatter"
---
-->
<html>hello</html>
-- layouts/index.html --
{{ range site.RegularPages }}{{ .RelPermalink }}|{{ end }}$
-- layouts/_default/single.html --
{{ .Title }}|{{ .RelPermalink }}Resources: {{ range .Resources }}{{ .Name }}|{{ end }}$
`
b := Test(t, files)
b.AssertFileContent("public/index.html", "/myposts/html-with-commented-out-frontmatter/|/myposts/html-without-frontmatter/|/myposts/markdown-without-frontmatter/|/myposts/html-with-frontmatter/|/myposts/markdown-with-frontmatter/|/myposts/mybundle/|$")
b.AssertFileContent("public/myposts/mybundle/index.html",
"My Bundle|/myposts/mybundle/Resources: html-in-bundle-with-commented-out-frontmatter.html|html-in-bundle-without-frontmatter.html|html-in-bundle-with-frontmatter.html|data.txt|$")
b.AssertPublishDir(`
index.html
myposts/html-with-commented-out-frontmatter
myposts/html-with-commented-out-frontmatter/index.html
myposts/html-with-frontmatter
myposts/html-with-frontmatter/index.html
myposts/html-without-frontmatter
myposts/html-without-frontmatter/index.html
myposts/markdown-with-frontmatter
myposts/markdown-with-frontmatter/index.html
myposts/markdown-without-frontmatter
myposts/markdown-without-frontmatter/index.html
myposts/mybundle/data.txt
myposts/mybundle/index.html
! myposts/mybundle/html-in-bundle-with-frontmatter.html
`)
}

View file

@ -15,7 +15,6 @@ package hugolib
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
@ -27,8 +26,6 @@ import (
"github.com/bep/logg"
"github.com/gohugoio/hugo/common/paths"
"github.com/gohugoio/hugo/common/rungroup"
"github.com/gohugoio/hugo/helpers"
"github.com/gohugoio/hugo/parser/pageparser"
"github.com/spf13/afero"
"github.com/gohugoio/hugo/source"
@ -77,26 +74,6 @@ type pagesCollector struct {
g rungroup.Group[hugofs.FileMetaInfo]
}
func (c *pagesCollector) copyFile(fim hugofs.FileMetaInfo) error {
meta := fim.Meta()
f, err := meta.Open()
if err != nil {
return fmt.Errorf("copyFile: failed to open: %w", err)
}
s := c.m.s
target := filepath.Join(s.PathSpec.GetTargetLanguageBasePath(), meta.PathInfo.Path())
defer f.Close()
fs := s.PublishFsStatic
s.PathSpec.ProcessingStats.Incr(&s.PathSpec.ProcessingStats.Files)
return helpers.WriteToDisk(filepath.Clean(target), f, fs)
}
// Collect collects content by walking the file system and storing
// it in the content tree.
// It may be restricted by filenames set on the collector (partial build).
@ -136,14 +113,7 @@ func (c *pagesCollector) Collect() (collectErr error) {
NumWorkers: numWorkers,
Handle: func(ctx context.Context, fi hugofs.FileMetaInfo) error {
if err := c.m.AddFi(fi); err != nil {
if errors.Is(err, pageparser.ErrPlainHTMLDocumentsNotSupported) {
// Reclassify this as a static file.
if err := c.copyFile(fi); err != nil {
return err
}
} else {
return hugofs.AddFileInfoToError(err, fi, c.fs)
}
return hugofs.AddFileInfoToError(err, fi, c.fs)
}
numFilesProcessedTotal.Add(1)
if numFilesProcessedTotal.Load()%1000 == 0 {

View file

@ -32,11 +32,6 @@ import (
"github.com/gohugoio/hugo/resources/page"
)
const (
templateMissingFunc = "{{ .Title | funcdoesnotexists }}"
templateWithURLAbs = "<a href=\"/foobar.jpg\">Going</a>"
)
func TestDraftAndFutureRender(t *testing.T) {
t.Parallel()
c := qt.New(t)
@ -577,60 +572,6 @@ func doTestSectionNaming(t *testing.T, canonify, uglify, pluralize bool) {
}
}
func TestAbsURLify(t *testing.T) {
t.Parallel()
c := qt.New(t)
sources := [][2]string{
{filepath.FromSlash("sect/doc1.html"), "<!doctype html><html><head></head><body><a href=\"#frag1\">link</a></body></html>"},
{filepath.FromSlash("blue/doc2.html"), "---\nf: t\n---\n<!doctype html><html><body>more content</body></html>"},
}
for _, baseURL := range []string{"http://auth/bub", "http://base", "//base"} {
for _, canonify := range []bool{true, false} {
cfg, fs := newTestCfg()
cfg.Set("uglyURLs", true)
cfg.Set("canonifyURLs", canonify)
cfg.Set("baseURL", baseURL)
configs, err := loadTestConfigFromProvider(cfg)
c.Assert(err, qt.IsNil)
for _, src := range sources {
writeSource(t, fs, filepath.Join("content", src[0]), src[1])
}
writeSource(t, fs, filepath.Join("layouts", "blue/single.html"), templateWithURLAbs)
s := buildSingleSite(t, deps.DepsCfg{Fs: fs, Configs: configs}, BuildCfg{})
th := newTestHelper(s.conf, s.Fs, t)
tests := []struct {
file, expected string
}{
{"public/blue/doc2.html", "<a href=\"%s/foobar.jpg\">Going</a>"},
{"public/sect/doc1.html", "<!doctype html><html><head></head><body><a href=\"#frag1\">link</a></body></html>"},
}
for _, test := range tests {
expected := test.expected
if strings.Contains(expected, "%s") {
expected = fmt.Sprintf(expected, baseURL)
}
if !canonify {
expected = strings.Replace(expected, baseURL, "", -1)
}
th.assertFileContent(test.file, expected)
}
}
}
}
var weightedPage1 = `+++
weight = "2"
title = "One"

View file

@ -43,8 +43,6 @@ type pageLexer struct {
summaryDivider []byte
// Set when we have parsed any summary divider
summaryDividerChecked bool
// Whether we're in a HTML comment.
isInHTMLComment bool
lexerShortcodeState
@ -102,8 +100,6 @@ var (
delimTOML = []byte("+++")
delimYAML = []byte("---")
delimOrg = []byte("#+")
htmlCommentStart = []byte("<!--")
htmlCommentEnd = []byte("-->")
)
func (l *pageLexer) next() rune {
@ -232,13 +228,6 @@ func (l *pageLexer) errorf(format string, args ...any) stateFunc {
return nil
}
// documentError can be used to signal a fatal error in the lexing process.
// nil terminates the parser
func (l *pageLexer) documentError(err error) stateFunc {
l.err = err
return nil
}
func (l *pageLexer) consumeCRLF() bool {
var consumed bool
for _, r := range crLf {
@ -251,15 +240,6 @@ func (l *pageLexer) consumeCRLF() bool {
return consumed
}
func (l *pageLexer) consumeToNextLine() {
for {
r := l.next()
if r == eof || isEndOfLine(r) {
return
}
}
}
func (l *pageLexer) consumeToSpace() {
for {
r := l.next()
@ -441,10 +421,6 @@ func lexMainSection(l *pageLexer) stateFunc {
return lexDone
}
if l.isInHTMLComment {
return lexEndFrontMatterHTMLComment
}
// Fast forward as far as possible.
skip := l.sectionHandlers.skip()

View file

@ -13,10 +13,6 @@
package pageparser
import "errors"
var ErrPlainHTMLDocumentsNotSupported = errors.New("plain HTML documents not supported")
func lexIntroSection(l *pageLexer) stateFunc {
l.summaryDivider = summaryDivider
@ -39,19 +35,6 @@ LOOP:
case r == byteOrderMark:
l.emit(TypeIgnore)
case !isSpace(r) && !isEndOfLine(r):
if r == '<' {
l.backup()
if l.hasPrefix(htmlCommentStart) {
// This may be commented out front matter, which should
// still be read.
l.consumeToNextLine()
l.isInHTMLComment = true
l.emit(TypeIgnore)
continue LOOP
} else {
return l.documentError(ErrPlainHTMLDocumentsNotSupported)
}
}
break LOOP
}
}
@ -60,19 +43,6 @@ LOOP:
return lexMainSection
}
func lexEndFrontMatterHTMLComment(l *pageLexer) stateFunc {
l.isInHTMLComment = false
right := l.index(htmlCommentEnd)
if right == -1 {
return l.errorf("starting HTML comment with no end")
}
l.pos += right + len(htmlCommentEnd)
l.emit(TypeIgnore)
// Now move on to the shortcodes.
return lexMainSection
}
func lexFrontMatterJSON(l *pageLexer) stateFunc {
// Include the left delimiter
l.backup()

View file

@ -61,13 +61,9 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
var frontMatterTests = []lexerTest{
{"empty", "", []typeText{tstEOF}, nil},
{"Byte order mark", "\ufeff\nSome text.\n", []typeText{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}, nil},
{"HTML Document", ` <html> `, nil, ErrPlainHTMLDocumentsNotSupported},
{"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, nil, ErrPlainHTMLDocumentsNotSupported},
{"No front matter", "\nSome text.\n", []typeText{tstSomeText, tstEOF}, nil},
{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []typeText{tstFrontMatterYAML, tstSomeText, tstEOF}, nil},
{"YAML empty front matter", "---\n---\n\nSome text.\n", []typeText{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}, nil},
{"YAML commented out front matter", "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(TypeIgnore, "-->"), tstSomeText, tstEOF}, nil},
{"YAML commented out front matter, no end", "<!--\n---\nfoo: \"bar\"\n---\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(tError, "starting HTML comment with no end")}, nil},
// Note that we keep all bytes as they are, but we need to handle CRLF
{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []typeText{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}, nil},
{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []typeText{tstFrontMatterTOML, tstSomeText, tstEOF}, nil},