hugo/hugofs/filter_fs.go
Bjørn Erik Pedersen eada236f87
Introduce a tree map for all content
This commit introduces a new data structure to store pages and their resources.

This data structure is backed by radix trees.

This simplies tree operations, makes all pages a bundle,  and paves the way for #6310.

It also solves a set of annoying issues (see list below).

Not a motivation behind this, but this commit also makes Hugo in general a little bit faster and more memory effective (see benchmarks). Especially for partial rebuilds on content edits, but also when taxonomies is in use.

```
name                                   old time/op    new time/op    delta
SiteNew/Bundle_with_image/Edit-16        1.32ms ± 8%    1.00ms ± 9%  -24.42%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16    1.28ms ± 0%    0.94ms ± 0%  -26.26%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16      33.9ms ± 2%    21.8ms ± 1%  -35.67%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16            40.6ms ± 1%    37.7ms ± 3%   -7.20%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16        56.7ms ± 0%    51.7ms ± 1%   -8.82%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16      19.9ms ± 2%    18.3ms ± 3%   -7.64%  (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16         37.9ms ± 4%    34.0ms ± 2%  -10.28%  (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16             10.7ms ± 0%    10.6ms ± 0%   -1.15%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16         10.8ms ± 0%    10.7ms ± 0%   -1.05%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories-16           43.2ms ± 1%    39.6ms ± 1%   -8.35%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16                 47.6ms ± 1%    47.3ms ± 0%     ~     (p=0.057 n=4+4)
SiteNew/Deep_content_tree-16             73.0ms ± 1%    74.2ms ± 1%     ~     (p=0.114 n=4+4)
SiteNew/Many_HTML_templates-16           37.9ms ± 0%    38.1ms ± 1%     ~     (p=0.114 n=4+4)
SiteNew/Page_collections-16              53.6ms ± 1%    54.7ms ± 1%   +2.09%  (p=0.029 n=4+4)

name                                   old alloc/op   new alloc/op   delta
SiteNew/Bundle_with_image/Edit-16         486kB ± 0%     430kB ± 0%  -11.47%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16     265kB ± 0%     209kB ± 0%  -21.06%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16      13.6MB ± 0%     8.8MB ± 0%  -34.93%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16            66.5MB ± 0%    63.9MB ± 0%   -3.95%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16        28.8MB ± 0%    25.8MB ± 0%  -10.55%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16      6.16MB ± 0%    5.56MB ± 0%   -9.86%  (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16         16.9MB ± 0%    16.0MB ± 0%   -5.19%  (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16             2.28MB ± 0%    2.29MB ± 0%   +0.35%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16         2.07MB ± 0%    2.07MB ± 0%     ~     (p=0.114 n=4+4)
SiteNew/Tags_and_categories-16           14.3MB ± 0%    13.2MB ± 0%   -7.30%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16                 69.1MB ± 0%    69.0MB ± 0%     ~     (p=0.343 n=4+4)
SiteNew/Deep_content_tree-16             31.3MB ± 0%    31.8MB ± 0%   +1.49%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates-16           10.8MB ± 0%    10.9MB ± 0%   +1.11%  (p=0.029 n=4+4)
SiteNew/Page_collections-16              21.4MB ± 0%    21.6MB ± 0%   +1.15%  (p=0.029 n=4+4)

name                                   old allocs/op  new allocs/op  delta
SiteNew/Bundle_with_image/Edit-16         4.74k ± 0%     3.86k ± 0%  -18.57%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file/Edit-16     4.73k ± 0%     3.85k ± 0%  -18.58%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories/Edit-16        301k ± 0%      198k ± 0%  -34.14%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs/Edit-16              389k ± 0%      373k ± 0%   -4.07%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree/Edit-16          338k ± 0%      262k ± 0%  -22.63%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates/Edit-16        102k ± 0%       88k ± 0%  -13.81%  (p=0.029 n=4+4)
SiteNew/Page_collections/Edit-16           176k ± 0%      152k ± 0%  -13.32%  (p=0.029 n=4+4)
SiteNew/Bundle_with_image-16              26.8k ± 0%     26.8k ± 0%   +0.05%  (p=0.029 n=4+4)
SiteNew/Bundle_with_JSON_file-16          26.8k ± 0%     26.8k ± 0%   +0.05%  (p=0.029 n=4+4)
SiteNew/Tags_and_categories-16             273k ± 0%      245k ± 0%  -10.36%  (p=0.029 n=4+4)
SiteNew/Canonify_URLs-16                   396k ± 0%      398k ± 0%   +0.39%  (p=0.029 n=4+4)
SiteNew/Deep_content_tree-16               317k ± 0%      325k ± 0%   +2.53%  (p=0.029 n=4+4)
SiteNew/Many_HTML_templates-16             146k ± 0%      147k ± 0%   +0.98%  (p=0.029 n=4+4)
SiteNew/Page_collections-16                210k ± 0%      215k ± 0%   +2.44%  (p=0.029 n=4+4)
```

Fixes #6312
Fixes #6087
Fixes #6738
Fixes #6412
Fixes #6743
Fixes #6875
Fixes #6034
Fixes #6902
Fixes #6173
Fixes #6590
2020-02-18 09:49:42 +01:00

341 lines
7.4 KiB
Go

// Copyright 2019 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hugofs
import (
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strings"
"syscall"
"time"
"github.com/gohugoio/hugo/hugofs/files"
"github.com/spf13/afero"
)
var (
_ afero.Fs = (*FilterFs)(nil)
_ afero.Lstater = (*FilterFs)(nil)
_ afero.File = (*filterDir)(nil)
)
func NewLanguageFs(langs map[string]int, fs afero.Fs) (afero.Fs, error) {
applyMeta := func(fs *FilterFs, name string, fis []os.FileInfo) {
for i, fi := range fis {
if fi.IsDir() {
filename := filepath.Join(name, fi.Name())
fis[i] = decorateFileInfo(fi, fs, fs.getOpener(filename), "", "", nil)
continue
}
meta := fi.(FileMetaInfo).Meta()
lang := meta.Lang()
fileLang, translationBaseName, translationBaseNameWithExt := langInfoFrom(langs, fi.Name())
weight := 0
if fileLang != "" {
weight = 1
if fileLang == lang {
// Give priority to myfile.sv.txt inside the sv filesystem.
weight++
}
lang = fileLang
}
fim := NewFileMetaInfo(fi, FileMeta{
metaKeyLang: lang,
metaKeyWeight: weight,
metaKeyOrdinal: langs[lang],
metaKeyTranslationBaseName: translationBaseName,
metaKeyTranslationBaseNameWithExt: translationBaseNameWithExt,
metaKeyClassifier: files.ClassifyContentFile(fi.Name()),
})
fis[i] = fim
}
}
all := func(fis []os.FileInfo) {
// Maps translation base name to a list of language codes.
translations := make(map[string][]string)
trackTranslation := func(meta FileMeta) {
name := meta.TranslationBaseNameWithExt()
translations[name] = append(translations[name], meta.Lang())
}
for _, fi := range fis {
if fi.IsDir() {
continue
}
meta := fi.(FileMetaInfo).Meta()
trackTranslation(meta)
}
for _, fi := range fis {
fim := fi.(FileMetaInfo)
langs := translations[fim.Meta().TranslationBaseNameWithExt()]
if len(langs) > 0 {
fim.Meta()["translations"] = sortAndremoveStringDuplicates(langs)
}
}
}
return &FilterFs{
fs: fs,
applyPerSource: applyMeta,
applyAll: all,
}, nil
}
func NewFilterFs(fs afero.Fs) (afero.Fs, error) {
applyMeta := func(fs *FilterFs, name string, fis []os.FileInfo) {
for i, fi := range fis {
if fi.IsDir() {
fis[i] = decorateFileInfo(fi, fs, fs.getOpener(fi.(FileMetaInfo).Meta().Filename()), "", "", nil)
}
}
}
ffs := &FilterFs{
fs: fs,
applyPerSource: applyMeta,
}
return ffs, nil
}
// FilterFs is an ordered composite filesystem.
type FilterFs struct {
fs afero.Fs
applyPerSource func(fs *FilterFs, name string, fis []os.FileInfo)
applyAll func(fis []os.FileInfo)
}
func (fs *FilterFs) Chmod(n string, m os.FileMode) error {
return syscall.EPERM
}
func (fs *FilterFs) Chtimes(n string, a, m time.Time) error {
return syscall.EPERM
}
func (fs *FilterFs) LstatIfPossible(name string) (os.FileInfo, bool, error) {
fi, b, err := lstatIfPossible(fs.fs, name)
if err != nil {
return nil, false, err
}
if fi.IsDir() {
return decorateFileInfo(fi, fs, fs.getOpener(name), "", "", nil), false, nil
}
fs.applyFilters(name, -1, fi)
return fi, b, nil
}
func (fs *FilterFs) Mkdir(n string, p os.FileMode) error {
return syscall.EPERM
}
func (fs *FilterFs) MkdirAll(n string, p os.FileMode) error {
return syscall.EPERM
}
func (fs *FilterFs) Name() string {
return "WeightedFileSystem"
}
func (fs *FilterFs) Open(name string) (afero.File, error) {
f, err := fs.fs.Open(name)
if err != nil {
return nil, err
}
return &filterDir{
File: f,
ffs: fs,
}, nil
}
func (fs *FilterFs) OpenFile(name string, flag int, perm os.FileMode) (afero.File, error) {
return fs.fs.Open(name)
}
func (fs *FilterFs) ReadDir(name string) ([]os.FileInfo, error) {
panic("not implemented")
}
func (fs *FilterFs) Remove(n string) error {
return syscall.EPERM
}
func (fs *FilterFs) RemoveAll(p string) error {
return syscall.EPERM
}
func (fs *FilterFs) Rename(o, n string) error {
return syscall.EPERM
}
func (fs *FilterFs) Stat(name string) (os.FileInfo, error) {
fi, _, err := fs.LstatIfPossible(name)
return fi, err
}
func (fs *FilterFs) Create(n string) (afero.File, error) {
return nil, syscall.EPERM
}
func (fs *FilterFs) getOpener(name string) func() (afero.File, error) {
return func() (afero.File, error) {
return fs.Open(name)
}
}
func (fs *FilterFs) applyFilters(name string, count int, fis ...os.FileInfo) ([]os.FileInfo, error) {
if fs.applyPerSource != nil {
fs.applyPerSource(fs, name, fis)
}
seen := make(map[string]bool)
var duplicates []int
for i, dir := range fis {
if !dir.IsDir() {
continue
}
if seen[dir.Name()] {
duplicates = append(duplicates, i)
} else {
seen[dir.Name()] = true
}
}
// Remove duplicate directories, keep first.
if len(duplicates) > 0 {
for i := len(duplicates) - 1; i >= 0; i-- {
idx := duplicates[i]
fis = append(fis[:idx], fis[idx+1:]...)
}
}
if fs.applyAll != nil {
fs.applyAll(fis)
}
if count > 0 && len(fis) >= count {
return fis[:count], nil
}
return fis, nil
}
type filterDir struct {
afero.File
ffs *FilterFs
}
func (f *filterDir) Readdir(count int) ([]os.FileInfo, error) {
fis, err := f.File.Readdir(-1)
if err != nil {
return nil, err
}
return f.ffs.applyFilters(f.Name(), count, fis...)
}
func (f *filterDir) Readdirnames(count int) ([]string, error) {
dirsi, err := f.Readdir(count)
if err != nil {
return nil, err
}
dirs := make([]string, len(dirsi))
for i, d := range dirsi {
dirs[i] = d.Name()
}
return dirs, nil
}
// Try to extract the language from the given filename.
// Any valid language identificator in the name will win over the
// language set on the file system, e.g. "mypost.en.md".
func langInfoFrom(languages map[string]int, name string) (string, string, string) {
var lang string
baseName := filepath.Base(name)
ext := filepath.Ext(baseName)
translationBaseName := baseName
if ext != "" {
translationBaseName = strings.TrimSuffix(translationBaseName, ext)
}
fileLangExt := filepath.Ext(translationBaseName)
fileLang := strings.TrimPrefix(fileLangExt, ".")
if _, found := languages[fileLang]; found {
lang = fileLang
translationBaseName = strings.TrimSuffix(translationBaseName, fileLangExt)
}
translationBaseNameWithExt := translationBaseName
if ext != "" {
translationBaseNameWithExt += ext
}
return lang, translationBaseName, translationBaseNameWithExt
}
func printFs(fs afero.Fs, path string, w io.Writer) {
if fs == nil {
return
}
afero.Walk(fs, path, func(path string, info os.FileInfo, err error) error {
fmt.Println("p:::", path)
return nil
})
}
func sortAndremoveStringDuplicates(s []string) []string {
ss := sort.StringSlice(s)
ss.Sort()
i := 0
for j := 1; j < len(s); j++ {
if !ss.Less(i, j) {
continue
}
i++
s[i] = s[j]
}
return s[:i+1]
}