cache/filecache: Add a cache prune func

Fixes #5439
This commit is contained in:
Bjørn Erik Pedersen 2018-11-14 12:06:46 +01:00
parent 33502667fb
commit 3c29c5af8e
No known key found for this signature in database
GPG key ID: 330E6E2BD4859D8F
6 changed files with 233 additions and 96 deletions

View file

@ -19,6 +19,7 @@ import (
"io/ioutil"
"path/filepath"
"strings"
"sync"
"time"
"github.com/gohugoio/hugo/common/hugio"
@ -44,7 +45,30 @@ type Cache struct {
// 0 is effectively turning this cache off.
maxAge time.Duration
nlocker *locker.Locker
nlocker *lockTracker
}
type lockTracker struct {
seenMu sync.RWMutex
seen map[string]struct{}
*locker.Locker
}
// Lock tracks the ids in use. We use this information to do garbage collection
// after a Hugo build.
func (l *lockTracker) Lock(id string) {
l.seenMu.RLock()
if _, seen := l.seen[id]; !seen {
l.seenMu.RUnlock()
l.seenMu.Lock()
l.seen[id] = struct{}{}
l.seenMu.Unlock()
} else {
l.seenMu.RUnlock()
}
l.Locker.Lock(id)
}
// ItemInfo contains info about a cached file.
@ -57,7 +81,7 @@ type ItemInfo struct {
func NewCache(fs afero.Fs, maxAge time.Duration) *Cache {
return &Cache{
Fs: fs,
nlocker: locker.NewLocker(),
nlocker: &lockTracker{Locker: locker.NewLocker(), seen: make(map[string]struct{})},
maxAge: maxAge,
}
}
@ -232,7 +256,7 @@ func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser {
return nil
}
if time.Now().Sub(fi.ModTime()) > c.maxAge {
if c.isExpired(fi.ModTime()) {
c.Fs.Remove(id)
return nil
}
@ -247,6 +271,10 @@ func (c *Cache) getOrRemove(id string) hugio.ReadSeekCloser {
return f
}
func (c *Cache) isExpired(modTime time.Time) bool {
return c.maxAge >= 0 && time.Now().Sub(modTime) > c.maxAge
}
// For testing
func (c *Cache) getString(id string) string {
id = cleanID(id)
@ -254,13 +282,15 @@ func (c *Cache) getString(id string) string {
c.nlocker.Lock(id)
defer c.nlocker.Unlock(id)
if r := c.getOrRemove(id); r != nil {
defer r.Close()
b, _ := ioutil.ReadAll(r)
return string(b)
}
f, err := c.Fs.Open(id)
return ""
if err != nil {
return ""
}
defer f.Close()
b, _ := ioutil.ReadAll(f)
return string(b)
}
@ -309,5 +339,5 @@ func NewCachesFromPaths(p *paths.Paths) (Caches, error) {
}
func cleanID(name string) string {
return filepath.Clean(name)
return strings.TrimPrefix(filepath.Clean(name), helpers.FilePathSeparator)
}

80
cache/filecache/filecache_pruner.go vendored Normal file
View file

@ -0,0 +1,80 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package filecache
import (
"io"
"os"
"github.com/pkg/errors"
"github.com/spf13/afero"
)
// Prune removes expired and unused items from this cache.
// The last one requires a full build so the cache usage can be tracked.
// Note that we operate directly on the filesystem here, so this is not
// thread safe.
func (c Caches) Prune() (int, error) {
counter := 0
for k, cache := range c {
err := afero.Walk(cache.Fs, "", func(name string, info os.FileInfo, err error) error {
if info == nil {
return nil
}
name = cleanID(name)
if info.IsDir() {
f, err := cache.Fs.Open(name)
if err != nil {
// This cache dir may not exist.
return nil
}
defer f.Close()
_, err = f.Readdirnames(1)
if err == io.EOF {
// Empty dir.
return cache.Fs.Remove(name)
}
return nil
}
shouldRemove := cache.isExpired(info.ModTime())
if !shouldRemove && len(cache.nlocker.seen) > 0 {
// Remove it if it's not been touched/used in the last build.
_, seen := cache.nlocker.seen[name]
shouldRemove = !seen
}
if shouldRemove {
err := cache.Fs.Remove(name)
if err == nil {
counter++
}
return err
}
return nil
})
if err != nil {
return counter, errors.Wrapf(err, "failed to prune cache %q", k)
}
}
return counter, nil
}

100
cache/filecache/filecache_pruner_test.go vendored Normal file
View file

@ -0,0 +1,100 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package filecache
import (
"fmt"
"testing"
"time"
"github.com/gohugoio/hugo/config"
"github.com/gohugoio/hugo/hugofs"
"github.com/gohugoio/hugo/hugolib/paths"
"github.com/stretchr/testify/require"
)
func TestPrune(t *testing.T) {
t.Parallel()
assert := require.New(t)
configStr := `
resourceDir = "myresources"
[caches]
[caches.getjson]
maxAge = "200ms"
dir = "/cache/c"
`
cfg, err := config.FromConfigString(configStr, "toml")
assert.NoError(err)
fs := hugofs.NewMem(cfg)
p, err := paths.New(fs, cfg)
assert.NoError(err)
caches, err := NewCachesFromPaths(p)
assert.NoError(err)
jsonCache := caches.GetJSONCache()
for i := 0; i < 10; i++ {
id := fmt.Sprintf("i%d", i)
jsonCache.GetOrCreateBytes(id, func() ([]byte, error) {
return []byte("abc"), nil
})
if i == 4 {
// This will expire the first 5
time.Sleep(201 * time.Millisecond)
}
}
count, err := caches.Prune()
assert.NoError(err)
assert.Equal(5, count)
for i := 0; i < 10; i++ {
id := fmt.Sprintf("i%d", i)
v := jsonCache.getString(id)
if i < 5 {
assert.Equal("", v, id)
} else {
assert.Equal("abc", v, id)
}
}
caches, err = NewCachesFromPaths(p)
assert.NoError(err)
jsonCache = caches.GetJSONCache()
// Touch one and then prune.
jsonCache.GetOrCreateBytes("i5", func() ([]byte, error) {
return []byte("abc"), nil
})
count, err = caches.Prune()
assert.NoError(err)
assert.Equal(4, count)
// Now only the i5 should be left.
for i := 0; i < 10; i++ {
id := fmt.Sprintf("i%d", i)
v := jsonCache.getString(id)
if i != 5 {
assert.Equal("", v, id)
} else {
assert.Equal("abc", v, id)
}
}
}

View file

@ -209,3 +209,9 @@ dir = "/cache/c"
}
wg.Wait()
}
func TestCleanID(t *testing.T) {
assert := require.New(t)
assert.Equal(filepath.FromSlash("a/b/c.txt"), cleanID(filepath.FromSlash("/a/b//c.txt")))
assert.Equal(filepath.FromSlash("a/b/c.txt"), cleanID(filepath.FromSlash("a/b//c.txt")))
}

View file

@ -1,4 +1,4 @@
// Copyright 2017-present The Hugo Authors. All rights reserved.
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -13,89 +13,7 @@
package hugolib
import (
"io"
"os"
"strings"
"github.com/gohugoio/hugo/helpers"
"github.com/spf13/afero"
)
// GC requires a build first.
// GC requires a build first and must run on it's own. It is not thread safe.
func (h *HugoSites) GC() (int, error) {
s := h.Sites[0]
assetsCacheFs := h.Deps.FileCaches.AssetsCache().Fs
imageCacheFs := h.Deps.FileCaches.ImageCache().Fs
isImageInUse := func(name string) bool {
for _, site := range h.Sites {
if site.ResourceSpec.IsInImageCache(name) {
return true
}
}
return false
}
isAssetInUse := func(name string) bool {
// These assets are stored in tuplets with an added extension to the key.
key := strings.TrimSuffix(name, helpers.Ext(name))
for _, site := range h.Sites {
if site.ResourceSpec.ResourceCache.Contains(key) {
return true
}
}
return false
}
walker := func(fs afero.Fs, dirname string, inUse func(filename string) bool) (int, error) {
counter := 0
err := afero.Walk(fs, dirname, func(path string, info os.FileInfo, err error) error {
if info == nil {
return nil
}
if info.IsDir() {
f, err := fs.Open(path)
if err != nil {
return nil
}
defer f.Close()
_, err = f.Readdirnames(1)
if err == io.EOF {
// Empty dir.
s.Fs.Source.Remove(path)
}
return nil
}
inUse := inUse(path)
if !inUse {
err := fs.Remove(path)
if err != nil && !os.IsNotExist(err) {
s.Log.ERROR.Printf("Failed to remove %q: %s", path, err)
} else {
counter++
}
}
return nil
})
return counter, err
}
imageCounter, err1 := walker(imageCacheFs, "", isImageInUse)
assetsCounter, err2 := walker(assetsCacheFs, "", isAssetInUse)
totalCount := imageCounter + assetsCounter
if err1 != nil {
return totalCount, err1
}
return totalCount, err2
return h.Deps.FileCaches.Prune()
}

View file

@ -192,7 +192,10 @@ func newDeps(cfg config.Provider) *deps.Deps {
logger := loggers.NewErrorLogger()
p, _ := paths.New(fs, cfg)
fileCaches, _ := filecache.NewCachesFromPaths(p)
fileCaches, err := filecache.NewCachesFromPaths(p)
if err != nil {
panic(err)
}
return &deps.Deps{
Cfg: cfg,