Make HTML behave exactly like other content formats (note)

Fixes #11999
2024-11-21 20:46:30 -05:00 · 2024-02-07 10:30:32 +01:00 · 2024-02-07 10:30:32 +01:00 · 676e6875da
commit 676e6875da
parent 068ccde4c7
9 changed files with 107 additions and 175 deletions
--- a/helpers/general.go
+++ b/helpers/general.go
@ -328,7 +328,7 @@ func PrintFs(fs afero.Fs, path string, w io.Writer) {
 	}
 	afero.Walk(fs, path, func(path string, info os.FileInfo, err error) error {
-		fmt.Println(path)
+		fmt.Fprintln(w, filepath.ToSlash(path))
 		return nil
 	})
 }
--- a/hugolib/integrationtest_builder.go
+++ b/hugolib/integrationtest_builder.go
@ -10,6 +10,7 @@ import (
 	"os"
 	"path/filepath"
 	"regexp"
 	"sort"
 	"strings"
 	"sync"
 	"testing"
@ -273,6 +274,32 @@ func (s *IntegrationTestBuilder) AssertFileContentExact(filename string, matches
 	}
 }
 func (s *IntegrationTestBuilder) AssertPublishDir(matches ...string) {
 	s.Helper()
 	var buff bytes.Buffer
 	helpers.PrintFs(s.H.Fs.PublishDir, "", &buff)
 	printFsLines := strings.Split(buff.String(), "\n")
 	sort.Strings(printFsLines)
 	content := strings.TrimSpace((strings.Join(printFsLines, "\n")))
 	for _, m := range matches {
 		cm := qt.Commentf("Match: %q\nIn:\n%s", m, content)
 		lines := strings.Split(m, "\n")
 		for _, match := range lines {
 			match = strings.TrimSpace(match)
 			var negate bool
 			if strings.HasPrefix(match, "! ") {
 				negate = true
 				match = strings.TrimPrefix(match, "! ")
 			}
 			if negate {
 				s.Assert(content, qt.Not(qt.Contains), match, cm)
 				continue
 			}
 			s.Assert(content, qt.Contains, match, cm)
 		}
 	}
 }
 func (s *IntegrationTestBuilder) AssertFileExists(filename string, b bool) {
 	checker := qt.IsNil
 	if !b {
--- a/hugolib/page_test.go
+++ b/hugolib/page_test.go
@ -1540,32 +1540,6 @@ CONTENT:{{ .Content }}
 	)
 }
 // https://github.com/gohugoio/hugo/issues/5478
 func TestPageWithCommentedOutFrontMatter(t *testing.T) {
 	b := newTestSitesBuilder(t)
 	b.WithSimpleConfigFile()
 	b.WithContent("page.md", `<!--
 +++
 title = "hello"
 +++
 -->
 This is the content.
 `)
 	b.WithTemplatesAdded("layouts/_default/single.html", `
 Title: {{ .Title }}
 Content:{{ .Content }}
 `)
 	b.CreateSites().Build(BuildCfg{})
 	b.AssertFileContent("public/page/index.html",
 		"Title: hello",
 		"Content:<p>This is the content.</p>",
 	)
 }
 func TestHomePageWithNoTitle(t *testing.T) {
 	b := newTestSitesBuilder(t).WithConfigFile("toml", `
 title = "Site Title"
--- a/hugolib/pagebundler_test.go
+++ b/hugolib/pagebundler_test.go
@ -757,3 +757,81 @@ func TestPageBundlerHome(t *testing.T) {
 Title: Home|First Resource: data.json|Content: <p>Hook Len Page Resources 1</p>
 `)
 }
 func TestHTMLFilesIsue11999(t *testing.T) {
 	t.Parallel()
 	files := `
 -- hugo.toml --
 disableKinds = ["taxonomy", "term", "rss", "sitemap", "robotsTXT", "404"]
 [permalinks]
 posts = "/myposts/:slugorfilename"
 -- content/posts/markdown-without-frontmatter.md --
 -- content/posts/html-without-frontmatter.html --
 <html>hello</html>
 -- content/posts/html-with-frontmatter.html --
 ---
 title: "HTML with frontmatter"
 ---
 <html>hello</html>
 -- content/posts/html-with-commented-out-frontmatter.html --
 <!--
 ---
 title: "HTML with commented out frontmatter"
 ---
 -->
 <html>hello</html>
 -- content/posts/markdown-with-frontmatter.md --
 ---
 title: "Markdown"
 ---
 -- content/posts/mybundle/index.md --
 ---
 title: My Bundle
 ---
 -- content/posts/mybundle/data.txt --
 Data.txt
 -- content/posts/mybundle/html-in-bundle-without-frontmatter.html --
 <html>hell</html>
 -- content/posts/mybundle/html-in-bundle-with-frontmatter.html --
 ---
 title: Hello
 ---
 <html>hello</html>
 -- content/posts/mybundle/html-in-bundle-with-commented-out-frontmatter.html --
 <!--
 ---
 title: "HTML with commented out frontmatter"
 ---
 -->
 <html>hello</html>
 -- layouts/index.html --
 {{ range site.RegularPages }}{{ .RelPermalink }}|{{ end }}$
 -- layouts/_default/single.html --
 {{ .Title }}|{{ .RelPermalink }}Resources: {{ range .Resources }}{{ .Name }}|{{ end }}$
 `
 	b := Test(t, files)
 	b.AssertFileContent("public/index.html", "/myposts/html-with-commented-out-frontmatter/|/myposts/html-without-frontmatter/|/myposts/markdown-without-frontmatter/|/myposts/html-with-frontmatter/|/myposts/markdown-with-frontmatter/|/myposts/mybundle/|$")
 	b.AssertFileContent("public/myposts/mybundle/index.html",
 		"My Bundle|/myposts/mybundle/Resources: html-in-bundle-with-commented-out-frontmatter.html|html-in-bundle-without-frontmatter.html|html-in-bundle-with-frontmatter.html|data.txt|$")
 	b.AssertPublishDir(`
 index.html
 myposts/html-with-commented-out-frontmatter
 myposts/html-with-commented-out-frontmatter/index.html
 myposts/html-with-frontmatter
 myposts/html-with-frontmatter/index.html
 myposts/html-without-frontmatter
 myposts/html-without-frontmatter/index.html
 myposts/markdown-with-frontmatter
 myposts/markdown-with-frontmatter/index.html
 myposts/markdown-without-frontmatter
 myposts/markdown-without-frontmatter/index.html
 myposts/mybundle/data.txt
 myposts/mybundle/index.html
 ! myposts/mybundle/html-in-bundle-with-frontmatter.html
 `)
 }
--- a/hugolib/pages_capture.go
+++ b/hugolib/pages_capture.go
@ -15,7 +15,6 @@ package hugolib
 import (
 	"context"
 	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
@ -27,8 +26,6 @@ import (
 	"github.com/bep/logg"
 	"github.com/gohugoio/hugo/common/paths"
 	"github.com/gohugoio/hugo/common/rungroup"
 	"github.com/gohugoio/hugo/helpers"
 	"github.com/gohugoio/hugo/parser/pageparser"
 	"github.com/spf13/afero"
 	"github.com/gohugoio/hugo/source"
@ -77,26 +74,6 @@ type pagesCollector struct {
 	g rungroup.Group[hugofs.FileMetaInfo]
 }
 func (c *pagesCollector) copyFile(fim hugofs.FileMetaInfo) error {
 	meta := fim.Meta()
 	f, err := meta.Open()
 	if err != nil {
 		return fmt.Errorf("copyFile: failed to open: %w", err)
 	}
 	s := c.m.s
 	target := filepath.Join(s.PathSpec.GetTargetLanguageBasePath(), meta.PathInfo.Path())
 	defer f.Close()
 	fs := s.PublishFsStatic
 	s.PathSpec.ProcessingStats.Incr(&s.PathSpec.ProcessingStats.Files)
 	return helpers.WriteToDisk(filepath.Clean(target), f, fs)
 }
 // Collect collects content by walking the file system and storing
 // it in the content tree.
 // It may be restricted by filenames set on the collector (partial build).
@ -136,15 +113,8 @@ func (c *pagesCollector) Collect() (collectErr error) {
 		NumWorkers: numWorkers,
 		Handle: func(ctx context.Context, fi hugofs.FileMetaInfo) error {
 			if err := c.m.AddFi(fi); err != nil {
 				if errors.Is(err, pageparser.ErrPlainHTMLDocumentsNotSupported) {
 					// Reclassify this as a static file.
 					if err := c.copyFile(fi); err != nil {
 						return err
 					}
 				} else {
 				return hugofs.AddFileInfoToError(err, fi, c.fs)
 			}
 			}
 			numFilesProcessedTotal.Add(1)
 			if numFilesProcessedTotal.Load()%1000 == 0 {
 				logFilesProcessed(false)
--- a/hugolib/site_test.go
+++ b/hugolib/site_test.go
@ -32,11 +32,6 @@ import (
 	"github.com/gohugoio/hugo/resources/page"
 )
 const (
 	templateMissingFunc = "{{ .Title | funcdoesnotexists }}"
 	templateWithURLAbs  = "<a href=\"/foobar.jpg\">Going</a>"
 )
 func TestDraftAndFutureRender(t *testing.T) {
 	t.Parallel()
 	c := qt.New(t)
@ -577,60 +572,6 @@ func doTestSectionNaming(t *testing.T, canonify, uglify, pluralize bool) {
 	}
 }
 func TestAbsURLify(t *testing.T) {
 	t.Parallel()
 	c := qt.New(t)
 	sources := [][2]string{
 		{filepath.FromSlash("sect/doc1.html"), "<!doctype html><html><head></head><body><a href=\"#frag1\">link</a></body></html>"},
 		{filepath.FromSlash("blue/doc2.html"), "---\nf: t\n---\n<!doctype html><html><body>more content</body></html>"},
 	}
 	for _, baseURL := range []string{"http://auth/bub", "http://base", "//base"} {
 		for _, canonify := range []bool{true, false} {
 			cfg, fs := newTestCfg()
 			cfg.Set("uglyURLs", true)
 			cfg.Set("canonifyURLs", canonify)
 			cfg.Set("baseURL", baseURL)
 			configs, err := loadTestConfigFromProvider(cfg)
 			c.Assert(err, qt.IsNil)
 			for _, src := range sources {
 				writeSource(t, fs, filepath.Join("content", src[0]), src[1])
 			}
 			writeSource(t, fs, filepath.Join("layouts", "blue/single.html"), templateWithURLAbs)
 			s := buildSingleSite(t, deps.DepsCfg{Fs: fs, Configs: configs}, BuildCfg{})
 			th := newTestHelper(s.conf, s.Fs, t)
 			tests := []struct {
 				file, expected string
 			}{
 				{"public/blue/doc2.html", "<a href=\"%s/foobar.jpg\">Going</a>"},
 				{"public/sect/doc1.html", "<!doctype html><html><head></head><body><a href=\"#frag1\">link</a></body></html>"},
 			}
 			for _, test := range tests {
 				expected := test.expected
 				if strings.Contains(expected, "%s") {
 					expected = fmt.Sprintf(expected, baseURL)
 				}
 				if !canonify {
 					expected = strings.Replace(expected, baseURL, "", -1)
 				}
 				th.assertFileContent(test.file, expected)
 			}
 		}
 	}
 }
 var weightedPage1 = `+++
 weight = "2"
 title = "One"
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@ -43,8 +43,6 @@ type pageLexer struct {
 	summaryDivider []byte
 	// Set when we have parsed any summary divider
 	summaryDividerChecked bool
 	// Whether we're in a HTML comment.
 	isInHTMLComment bool
 	lexerShortcodeState
@ -102,8 +100,6 @@ var (
 	delimTOML         = []byte("+++")
 	delimYAML         = []byte("---")
 	delimOrg          = []byte("#+")
 	htmlCommentStart  = []byte("<!--")
 	htmlCommentEnd    = []byte("-->")
 )
 func (l *pageLexer) next() rune {
@ -232,13 +228,6 @@ func (l *pageLexer) errorf(format string, args ...any) stateFunc {
 	return nil
 }
 // documentError can be used to signal a fatal error in the lexing process.
 // nil terminates the parser
 func (l *pageLexer) documentError(err error) stateFunc {
 	l.err = err
 	return nil
 }
 func (l *pageLexer) consumeCRLF() bool {
 	var consumed bool
 	for _, r := range crLf {
@ -251,15 +240,6 @@ func (l *pageLexer) consumeCRLF() bool {
 	return consumed
 }
 func (l *pageLexer) consumeToNextLine() {
 	for {
 		r := l.next()
 		if r == eof || isEndOfLine(r) {
 			return
 		}
 	}
 }
 func (l *pageLexer) consumeToSpace() {
 	for {
 		r := l.next()
@ -441,10 +421,6 @@ func lexMainSection(l *pageLexer) stateFunc {
 		return lexDone
 	}
 	if l.isInHTMLComment {
 		return lexEndFrontMatterHTMLComment
 	}
 	// Fast forward as far as possible.
 	skip := l.sectionHandlers.skip()
--- a/parser/pageparser/pagelexer_intro.go
+++ b/parser/pageparser/pagelexer_intro.go
@ -13,10 +13,6 @@
 package pageparser
 import "errors"
 var ErrPlainHTMLDocumentsNotSupported = errors.New("plain HTML documents not supported")
 func lexIntroSection(l *pageLexer) stateFunc {
 	l.summaryDivider = summaryDivider
@ -39,19 +35,6 @@ LOOP:
 		case r == byteOrderMark:
 			l.emit(TypeIgnore)
 		case !isSpace(r) && !isEndOfLine(r):
 			if r == '<' {
 				l.backup()
 				if l.hasPrefix(htmlCommentStart) {
 					// This may be commented out front matter, which should
 					// still be read.
 					l.consumeToNextLine()
 					l.isInHTMLComment = true
 					l.emit(TypeIgnore)
 					continue LOOP
 				} else {
 					return l.documentError(ErrPlainHTMLDocumentsNotSupported)
 				}
 			}
 			break LOOP
 		}
 	}
@ -60,19 +43,6 @@ LOOP:
 	return lexMainSection
 }
 func lexEndFrontMatterHTMLComment(l *pageLexer) stateFunc {
 	l.isInHTMLComment = false
 	right := l.index(htmlCommentEnd)
 	if right == -1 {
 		return l.errorf("starting HTML comment with no end")
 	}
 	l.pos += right + len(htmlCommentEnd)
 	l.emit(TypeIgnore)
 	// Now move on to the shortcodes.
 	return lexMainSection
 }
 func lexFrontMatterJSON(l *pageLexer) stateFunc {
 	// Include the left delimiter
 	l.backup()
--- a/parser/pageparser/pageparser_intro_test.go
+++ b/parser/pageparser/pageparser_intro_test.go
@ -61,13 +61,9 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
 var frontMatterTests = []lexerTest{
 	{"empty", "", []typeText{tstEOF}, nil},
 	{"Byte order mark", "\ufeff\nSome text.\n", []typeText{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}, nil},
 	{"HTML Document", `  <html>  `, nil, ErrPlainHTMLDocumentsNotSupported},
 	{"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, nil, ErrPlainHTMLDocumentsNotSupported},
 	{"No front matter", "\nSome text.\n", []typeText{tstSomeText, tstEOF}, nil},
 	{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []typeText{tstFrontMatterYAML, tstSomeText, tstEOF}, nil},
 	{"YAML empty front matter", "---\n---\n\nSome text.\n", []typeText{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}, nil},
 	{"YAML commented out front matter", "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(TypeIgnore, "-->"), tstSomeText, tstEOF}, nil},
 	{"YAML commented out front matter, no end", "<!--\n---\nfoo: \"bar\"\n---\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(tError, "starting HTML comment with no end")}, nil},
 	// Note that we keep all bytes as they are, but we need to handle CRLF
 	{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []typeText{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}, nil},
 	{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []typeText{tstFrontMatterTOML, tstSomeText, tstEOF}, nil},