Move the duplicate page/resource filter

Move the removal of duplicate content and resource files after we have determined if we're inside a leaf bundle or not.

Note that these would eventually have been filtered out as duplicates when  inserting them into the document store, but doing it here will preserve a consistent ordering.

Fixes #12013
This commit is contained in:
Bjørn Erik Pedersen 2024-02-08 08:44:28 +01:00
parent 676e6875da
commit 0851c175ad
3 changed files with 67 additions and 28 deletions

View file

@ -21,7 +21,6 @@ import (
"sort"
"github.com/gohugoio/hugo/common/herrors"
"github.com/gohugoio/hugo/common/hstrings"
"github.com/gohugoio/hugo/common/paths"
"github.com/gohugoio/hugo/hugofs/files"
"github.com/spf13/afero"
@ -150,32 +149,6 @@ func (f *componentFsDir) ReadDir(count int) ([]iofs.DirEntry, error) {
return fimi.Name() < fimj.Name()
})
if f.fs.opts.Component == files.ComponentFolderContent {
// Finally filter out any duplicate content or resource files, e.g. page.md and page.html.
n := 0
seen := map[hstrings.Tuple]bool{}
for _, fi := range fis {
fim := fi.(FileMetaInfo)
pi := fim.Meta().PathInfo
keep := fim.IsDir()
if !keep {
baseLang := hstrings.Tuple{First: pi.Base(), Second: fim.Meta().Lang}
if !seen[baseLang] {
keep = true
seen[baseLang] = true
}
}
if keep {
fis[n] = fi
n++
}
}
fis = fis[:n]
}
return fis, nil
}

View file

@ -835,3 +835,44 @@ myposts/mybundle/index.html
! myposts/mybundle/html-in-bundle-with-frontmatter.html
`)
}
func TestBundleDuplicatePagesAndResources(t *testing.T) {
files := `
-- hugo.toml --
baseURL = "https://example.com"
disableKinds = ["taxonomy", "term"]
-- content/mysection/mybundle/index.md --
-- content/mysection/mybundle/index.html --
-- content/mysection/mybundle/p1.md --
-- content/mysection/mybundle/p1.html --
-- content/mysection/mybundle/foo/p1.html --
-- content/mysection/mybundle/data.txt --
Data txt.
-- content/mysection/mybundle/data.en.txt --
Data en txt.
-- content/mysection/mybundle/data.json --
Data JSON.
-- content/mysection/_index.md --
-- content/mysection/_index.html --
-- content/mysection/sectiondata.json --
Secion data JSON.
-- content/mysection/sectiondata.txt --
Section data TXT.
-- content/mysection/p2.md --
-- content/mysection/p2.html --
-- content/mysection/foo/p2.md --
-- layouts/_default/single.html --
Single:{{ .Title }}|{{ .Path }}|File LogicalName: {{ with .File }}{{ .LogicalName }}{{ end }}||{{ .RelPermalink }}|{{ .Kind }}|Resources: {{ range .Resources}}{{ .Name }}: {{ .Content }}|{{ end }}$
-- layouts/_default/list.html --
List: {{ .Title }}|{{ .Path }}|File LogicalName: {{ with .File }}{{ .LogicalName }}{{ end }}|{{ .RelPermalink }}|{{ .Kind }}|Resources: {{ range .Resources}}{{ .Name }}: {{ .Content }}|{{ end }}$
RegularPages: {{ range .RegularPages }}{{ .RelPermalink }}|File LogicalName: {{ with .File }}{{ .LogicalName }}|{{ end }}{{ end }}$
`
b := Test(t, files)
// Note that the sort order gives us the most specific data file for the en language (the data.en.json).
b.AssertFileContent("public/mysection/mybundle/index.html", `Single:|/mysection/mybundle|File LogicalName: index.md||/mysection/mybundle/|page|Resources: data.json: Data JSON.|foo/p1.html: |p1.html: |p1.md: |data.txt: Data en txt.|$`)
b.AssertFileContent("public/mysection/index.html",
"List: |/mysection|File LogicalName: _index.md|/mysection/|section|Resources: sectiondata.json: Secion data JSON.|sectiondata.txt: Section data TXT.|$",
"RegularPages: /mysection/foo/p2/|File LogicalName: p2.md|/mysection/mybundle/|File LogicalName: index.md|/mysection/p2/|File LogicalName: p2.md|$")
}

View file

@ -24,6 +24,7 @@ import (
"time"
"github.com/bep/logg"
"github.com/gohugoio/hugo/common/hstrings"
"github.com/gohugoio/hugo/common/paths"
"github.com/gohugoio/hugo/common/rungroup"
"github.com/spf13/afero"
@ -270,13 +271,25 @@ func (c *pagesCollector) collectDirDir(path string, root hugofs.FileMetaInfo, in
return nil, filepath.SkipDir
}
seen := map[hstrings.Tuple]bool{}
for _, fi := range readdir {
if fi.IsDir() {
continue
}
pi := fi.Meta().PathInfo
meta := fi.Meta()
pi := meta.PathInfo
// Filter out duplicate page or resource.
// These would eventually have been filtered out as duplicates when
// inserting them into the document store,
// but doing it here will preserve a consistent ordering.
baseLang := hstrings.Tuple{First: pi.Base(), Second: meta.Lang}
if seen[baseLang] {
continue
}
seen[baseLang] = true
if pi == nil {
panic(fmt.Sprintf("no path info for %q", meta.Filename))
}
@ -317,6 +330,8 @@ func (c *pagesCollector) collectDirDir(path string, root hugofs.FileMetaInfo, in
func (c *pagesCollector) handleBundleLeaf(dir, bundle hugofs.FileMetaInfo, inPath string, readdir []hugofs.FileMetaInfo) error {
bundlePi := bundle.Meta().PathInfo
seen := map[hstrings.Tuple]bool{}
walk := func(path string, info hugofs.FileMetaInfo) error {
if info.IsDir() {
return nil
@ -333,6 +348,16 @@ func (c *pagesCollector) handleBundleLeaf(dir, bundle hugofs.FileMetaInfo, inPat
}
}
// Filter out duplicate page or resource.
// These would eventually have been filtered out as duplicates when
// inserting them into the document store,
// but doing it here will preserve a consistent ordering.
baseLang := hstrings.Tuple{First: pi.Base(), Second: info.Meta().Lang}
if seen[baseLang] {
return nil
}
seen[baseLang] = true
return c.g.Enqueue(info)
}