Bjørn Erik Pedersen 7ff0a8ee9f Simplify page tree logic
This is preparation for #6041.

For historic reasons, the code for bulding the section tree and the taxonomies were very much separate.

This works, but makes it hard to extend, maintain, and possibly not so fast as it could be.

This simplification also introduces 3 slightly breaking changes, which I suspect most people will be pleased about. See referenced issues:

This commit also switches the radix tree dependency to a mutable implementation:

Fixes #6154
Fixes #6153
Fixes #6152
2019-08-08 20:13:39 +02:00

779 lines
16 KiB

// Copyright 2019 The Hugo Authors. All rights reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package hugolib
import (
pth "path"
func newPagesCollector(
sp *source.SourceSpec,
logger *loggers.Logger,
contentTracker *contentChangeMap,
proc pagesCollectorProcessorProvider, filenames ...string) *pagesCollector {
return &pagesCollector{
fs: sp.SourceFs,
proc: proc,
sp: sp,
logger: logger,
filenames: filenames,
tracker: contentTracker,
func newPagesProcessor(h *HugoSites, sp *source.SourceSpec, partialBuild bool) *pagesProcessor {
return &pagesProcessor{
h: h,
sp: sp,
partialBuild: partialBuild,
numWorkers: config.GetNumWorkerMultiplier() * 3,
type fileinfoBundle struct {
header hugofs.FileMetaInfo
resources []hugofs.FileMetaInfo
func (b *fileinfoBundle) containsResource(name string) bool {
for _, r := range b.resources {
if r.Name() == name {
return true
return false
type pageBundles map[string]*fileinfoBundle
type pagesCollector struct {
sp *source.SourceSpec
fs afero.Fs
logger *loggers.Logger
// Ordered list (bundle headers first) used in partial builds.
filenames []string
// Content files tracker used in partial builds.
tracker *contentChangeMap
proc pagesCollectorProcessorProvider
type contentDirKey struct {
dirname string
filename string
tp bundleDirType
// Collect.
func (c *pagesCollector) Collect() error {
var collectErr error
if len(c.filenames) == 0 {
// Collect everything.
collectErr = c.collectDir("", false, nil)
} else {
dirs := make(map[contentDirKey]bool)
for _, filename := range c.filenames {
dir, filename, btype := c.tracker.resolveAndRemove(filename)
dirs[contentDirKey{dir, filename, btype}] = true
for dir := range dirs {
switch {
case bundleLeaf, bundleBranch:
collectErr = c.collectDir(dir.dirname, true, nil)
// We always start from a directory.
collectErr = c.collectDir(dir.dirname, true, func(fim hugofs.FileMetaInfo) bool {
return strings.HasSuffix(dir.filename, fim.Meta().Path())
if collectErr != nil {
err := c.proc.Wait()
if collectErr != nil {
return collectErr
return err
func (c *pagesCollector) collectDir(dirname string, partial bool, inFilter func(fim hugofs.FileMetaInfo) bool) error {
fi, err := c.fs.Stat(dirname)
if err != nil {
if os.IsNotExist(err) {
// May have been deleted.
return nil
return err
handleDir := func(
btype bundleDirType,
dir hugofs.FileMetaInfo,
path string,
readdir []hugofs.FileMetaInfo) error {
if btype > bundleNot && c.tracker != nil {
c.tracker.add(path, btype)
if btype == bundleBranch {
if err := c.handleBundleBranch(readdir); err != nil {
return err
// A branch bundle is only this directory level, so keep walking.
return nil
} else if btype == bundleLeaf {
if err := c.handleBundleLeaf(dir, path, readdir); err != nil {
return err
return nil
if err := c.handleFiles(readdir...); err != nil {
return err
return nil
filter := func(fim hugofs.FileMetaInfo) bool {
if fim.Meta().SkipDir() {
return false
if c.sp.IgnoreFile(fim.Meta().Filename()) {
return false
if inFilter != nil {
return inFilter(fim)
return true
preHook := func(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) ([]hugofs.FileMetaInfo, error) {
var btype bundleDirType
filtered := readdir[:0]
for _, fi := range readdir {
if filter(fi) {
filtered = append(filtered, fi)
if c.tracker != nil {
// Track symlinks.
readdir = filtered
// We merge language directories, so there can be duplicates, but they
// will be ordered, most important first.
var duplicates []int
seen := make(map[string]bool)
for i, fi := range readdir {
if fi.IsDir() {
meta := fi.Meta()
class := meta.Classifier()
translationBase := meta.TranslationBaseNameWithExt()
key := pth.Join(meta.Lang(), translationBase)
if seen[key] {
duplicates = append(duplicates, i)
seen[key] = true
var thisBtype bundleDirType
switch class {
case files.ContentClassLeaf:
thisBtype = bundleLeaf
case files.ContentClassBranch:
thisBtype = bundleBranch
// Folders with both and type of files have
// undefined behaviour and can never work.
// The branch variant will win because of sort order, but log
// a warning about it.
if thisBtype > bundleNot && btype > bundleNot && thisBtype != btype {
c.logger.WARN.Printf("Content directory %q have both index.* and _index.* files, pick one.", dir.Meta().Filename())
// Reclassify it so it will be handled as a content file inside the
// section, which is in line with the <= 0.55 behaviour.
meta["classifier"] = files.ContentClassContent
} else if thisBtype > bundleNot {
btype = thisBtype
if len(duplicates) > 0 {
for i := len(duplicates) - 1; i >= 0; i-- {
idx := duplicates[i]
readdir = append(readdir[:idx], readdir[idx+1:]...)
err := handleDir(btype, dir, path, readdir)
if err != nil {
return nil, err
if btype == bundleLeaf || partial {
return nil, filepath.SkipDir
// Keep walking.
return readdir, nil
var postHook hugofs.WalkHook
if c.tracker != nil {
postHook = func(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) ([]hugofs.FileMetaInfo, error) {
if c.tracker == nil {
// Nothing to do.
return readdir, nil
return readdir, nil
wfn := func(path string, info hugofs.FileMetaInfo, err error) error {
if err != nil {
return err
return nil
w := hugofs.NewWalkway(hugofs.WalkwayConfig{
Fs: c.fs,
Logger: c.logger,
Root: dirname,
Info: fi.(hugofs.FileMetaInfo),
HookPre: preHook,
HookPost: postHook,
WalkFn: wfn})
return w.Walk()
func (c *pagesCollector) isBundleHeader(fi hugofs.FileMetaInfo) bool {
class := fi.Meta().Classifier()
return class == files.ContentClassLeaf || class == files.ContentClassBranch
func (c *pagesCollector) getLang(fi hugofs.FileMetaInfo) string {
lang := fi.Meta().Lang()
if lang != "" {
return lang
return c.sp.DefaultContentLanguage
func (c *pagesCollector) addToBundle(info hugofs.FileMetaInfo, btyp bundleDirType, bundles pageBundles) error {
getBundle := func(lang string) *fileinfoBundle {
return bundles[lang]
cloneBundle := func(lang string) *fileinfoBundle {
// Every bundled content file needs a content file header.
// Use the default content language if found, else just
// pick one.
var (
source *fileinfoBundle
found bool
source, found = bundles[c.sp.DefaultContentLanguage]
if !found {
for _, b := range bundles {
source = b
if source == nil {
panic(fmt.Sprintf("no source found, %d", len(bundles)))
clone := c.cloneFileInfo(source.header)
clone.Meta()["lang"] = lang
return &fileinfoBundle{
header: clone,
lang := c.getLang(info)
bundle := getBundle(lang)
isBundleHeader := c.isBundleHeader(info)
classifier := info.Meta().Classifier()
isContent := classifier == files.ContentClassContent
if bundle == nil {
if isBundleHeader {
bundle = &fileinfoBundle{header: info}
bundles[lang] = bundle
} else {
if btyp == bundleBranch {
// No special logic for branch bundles.
// Every language needs its own file.
// Also, we only clone bundle headers for lonsesome, bundled,
// content files.
return c.handleFiles(info)
if isContent {
bundle = cloneBundle(lang)
bundles[lang] = bundle
if !isBundleHeader && bundle != nil {
bundle.resources = append(bundle.resources, info)
if classifier == files.ContentClassFile {
translations := info.Meta().Translations()
for lang, b := range bundles {
if !stringSliceContains(lang, translations...) && !b.containsResource(info.Name()) {
// Clone and add it to the bundle.
clone := c.cloneFileInfo(info)
clone.Meta()["lang"] = lang
b.resources = append(b.resources, clone)
return nil
func (c *pagesCollector) cloneFileInfo(fi hugofs.FileMetaInfo) hugofs.FileMetaInfo {
cm := hugofs.FileMeta{}
meta := fi.Meta()
if meta == nil {
panic(fmt.Sprintf("not meta: %v", fi.Name()))
for k, v := range meta {
cm[k] = v
return hugofs.NewFileMetaInfo(fi, cm)
func (c *pagesCollector) handleBundleBranch(readdir []hugofs.FileMetaInfo) error {
// Maps bundles to its language.
bundles := pageBundles{}
for _, fim := range readdir {
if fim.IsDir() {
meta := fim.Meta()
switch meta.Classifier() {
case files.ContentClassContent:
if err := c.handleFiles(fim); err != nil {
return err
if err := c.addToBundle(fim, bundleBranch, bundles); err != nil {
return err
return c.proc.Process(bundles)
func (c *pagesCollector) handleBundleLeaf(dir hugofs.FileMetaInfo, path string, readdir []hugofs.FileMetaInfo) error {
// Maps bundles to its language.
bundles := pageBundles{}
walk := func(path string, info hugofs.FileMetaInfo, err error) error {
if err != nil {
return err
if info.IsDir() {
return nil
return c.addToBundle(info, bundleLeaf, bundles)
// Start a new walker from the given path.
w := hugofs.NewWalkway(hugofs.WalkwayConfig{
Root: path,
Fs: c.fs,
Logger: c.logger,
Info: dir,
DirEntries: readdir,
WalkFn: walk})
if err := w.Walk(); err != nil {
return err
return c.proc.Process(bundles)
func (c *pagesCollector) handleFiles(fis ...hugofs.FileMetaInfo) error {
for _, fi := range fis {
if fi.IsDir() {
if err := c.proc.Process(fi); err != nil {
return err
return nil
type pagesCollectorProcessorProvider interface {
Process(item interface{}) error
Start(ctx context.Context) context.Context
Wait() error
type pagesProcessor struct {
h *HugoSites
sp *source.SourceSpec
itemChan chan interface{}
itemGroup *errgroup.Group
// The output Pages
pagesChan chan *pageState
pagesGroup *errgroup.Group
numWorkers int
partialBuild bool
func (proc *pagesProcessor) Process(item interface{}) error {
proc.itemChan <- item
return nil
func (proc *pagesProcessor) Start(ctx context.Context) context.Context {
proc.pagesChan = make(chan *pageState, proc.numWorkers)
proc.pagesGroup, ctx = errgroup.WithContext(ctx)
proc.itemChan = make(chan interface{}, proc.numWorkers)
proc.itemGroup, ctx = errgroup.WithContext(ctx)
proc.pagesGroup.Go(func() error {
for p := range proc.pagesChan {
s := p.s
p.forceRender = proc.partialBuild
if p.forceRender {
} else {
return nil
for i := 0; i < proc.numWorkers; i++ {
proc.itemGroup.Go(func() error {
for item := range proc.itemChan {
select {
case <-proc.h.Done():
return nil
if err := proc.process(item); err != nil {
return nil
return ctx
func (proc *pagesProcessor) Wait() error {
err := proc.itemGroup.Wait()
if err != nil {
return err
return proc.pagesGroup.Wait()
func (proc *pagesProcessor) newPageFromBundle(b *fileinfoBundle) (*pageState, error) {
p, err := proc.newPageFromFi(b.header, nil)
if err != nil {
return nil, err
if len(b.resources) > 0 {
resources := make(resource.Resources, len(b.resources))
for i, rfi := range b.resources {
meta := rfi.Meta()
classifier := meta.Classifier()
var r resource.Resource
switch classifier {
case files.ContentClassContent:
rp, err := proc.newPageFromFi(rfi, p)
if err != nil {
return nil, err
rp.m.resourcePath = filepath.ToSlash(strings.TrimPrefix(rp.Path(), p.File().Dir()))
r = rp
case files.ContentClassFile:
r, err = proc.newResource(rfi, p)
if err != nil {
return nil, err
panic(fmt.Sprintf("invalid classifier: %q", classifier))
resources[i] = r
return p, nil
func (proc *pagesProcessor) newPageFromFi(fim hugofs.FileMetaInfo, owner *pageState) (*pageState, error) {
fi, err := newFileInfo(proc.sp, fim)
if err != nil {
return nil, err
var s *Site
meta := fim.Meta()
if owner != nil {
s = owner.s
} else {
lang := meta.Lang()
s = proc.getSite(lang)
r := func() (hugio.ReadSeekCloser, error) {
return meta.Open()
p, err := newPageWithContent(fi, s, owner != nil, r)
if err != nil {
return nil, err
p.parent = owner
return p, nil
func (proc *pagesProcessor) newResource(fim hugofs.FileMetaInfo, owner *pageState) (resource.Resource, error) {
// TODO(bep) consolidate with multihost logic + clean up
outputFormats := owner.m.outputFormats()
seen := make(map[string]bool)
var targetBasePaths []string
// Make sure bundled resources are published to all of the ouptput formats'
// sub paths.
for _, f := range outputFormats {
p := f.Path
if seen[p] {
seen[p] = true
targetBasePaths = append(targetBasePaths, p)
meta := fim.Meta()
r := func() (hugio.ReadSeekCloser, error) {
return meta.Open()
target := strings.TrimPrefix(meta.Path(), owner.File().Dir())
return owner.s.ResourceSpec.New(
TargetPaths: owner.getTargetPaths,
OpenReadSeekCloser: r,
FileInfo: fim,
RelTargetFilename: target,
TargetBasePaths: targetBasePaths,
func (proc *pagesProcessor) getSite(lang string) *Site {
if lang == "" {
return proc.h.Sites[0]
for _, s := range proc.h.Sites {
if lang == s.Lang() {
return s
return proc.h.Sites[0]
func (proc *pagesProcessor) copyFile(fim hugofs.FileMetaInfo) error {
meta := fim.Meta()
s := proc.getSite(meta.Lang())
f, err := meta.Open()
if err != nil {
return errors.Wrap(err, "copyFile: failed to open")
target := filepath.Join(s.PathSpec.GetTargetLanguageBasePath(), meta.Path())
defer f.Close()
return s.publish(&s.PathSpec.ProcessingStats.Files, target, f)
func (proc *pagesProcessor) process(item interface{}) error {
send := func(p *pageState, err error) {
if err != nil {
} else {
proc.pagesChan <- p
switch v := item.(type) {
// Page bundles mapped to their language.
case pageBundles:
for _, bundle := range v {
if proc.shouldSkip(bundle.header) {
case hugofs.FileMetaInfo:
if proc.shouldSkip(v) {
return nil
meta := v.Meta()
classifier := meta.Classifier()
switch classifier {
case files.ContentClassContent:
send(proc.newPageFromFi(v, nil))
case files.ContentClassFile:
panic(fmt.Sprintf("invalid classifier: %q", classifier))
panic(fmt.Sprintf("unrecognized item type in Process: %T", item))
return nil
func (proc *pagesProcessor) sendError(err error) {
if err == nil {
func (proc *pagesProcessor) shouldSkip(fim hugofs.FileMetaInfo) bool {
return proc.sp.DisabledLanguages[fim.Meta().Lang()]
func stringSliceContains(k string, values ...string) bool {
for _, v := range values {
if k == v {
return true
return false