From 66753416b5ec0f9f1be588a935d5551dfb5eebb9 Mon Sep 17 00:00:00 2001 From: Paul van Brouwershaven Date: Thu, 2 Dec 2021 12:56:25 +0100 Subject: [PATCH] Make resources.Get use a file cache for remote resources Closes #9228 --- cache/filecache/filecache_config.go | 17 ++- cache/filecache/filecache_config_test.go | 16 ++- .../en/getting-started/configuration.md | 3 + docs/content/en/hugo-pipes/introduction.md | 10 ++ resources/resource_factories/create/create.go | 113 ++++++++++-------- 5 files changed, 102 insertions(+), 57 deletions(-) diff --git a/cache/filecache/filecache_config.go b/cache/filecache/filecache_config.go index 801799e36..1b3819771 100644 --- a/cache/filecache/filecache_config.go +++ b/cache/filecache/filecache_config.go @@ -42,11 +42,12 @@ var defaultCacheConfig = Config{ } const ( - cacheKeyGetJSON = "getjson" - cacheKeyGetCSV = "getcsv" - cacheKeyImages = "images" - cacheKeyAssets = "assets" - cacheKeyModules = "modules" + cacheKeyGetJSON = "getjson" + cacheKeyGetCSV = "getcsv" + cacheKeyImages = "images" + cacheKeyAssets = "assets" + cacheKeyModules = "modules" + cacheGetResource = "getresource" ) type Configs map[string]Config @@ -70,6 +71,7 @@ var defaultCacheConfigs = Configs{ MaxAge: -1, Dir: resourcesGenDir, }, + cacheGetResource: defaultCacheConfig, } type Config struct { @@ -111,6 +113,11 @@ func (f Caches) AssetsCache() *Cache { return f[cacheKeyAssets] } +// GetResourceCache gets the file cache for remote resources. +func (f Caches) GetResourceCache() *Cache { + return f[cacheGetResource] +} + func DecodeConfig(fs afero.Fs, cfg config.Provider) (Configs, error) { c := make(Configs) valid := make(map[string]bool) diff --git a/cache/filecache/filecache_config_test.go b/cache/filecache/filecache_config_test.go index acc127e67..1ff3b8112 100644 --- a/cache/filecache/filecache_config_test.go +++ b/cache/filecache/filecache_config_test.go @@ -50,7 +50,8 @@ maxAge = "11h" dir = "/path/to/c2" [caches.images] dir = "/path/to/c3" - +[caches.getResource] +dir = "/path/to/c4" ` cfg, err := config.FromConfigString(configStr, "toml") @@ -59,7 +60,7 @@ dir = "/path/to/c3" decoded, err := DecodeConfig(fs, cfg) c.Assert(err, qt.IsNil) - c.Assert(len(decoded), qt.Equals, 5) + c.Assert(len(decoded), qt.Equals, 6) c2 := decoded["getcsv"] c.Assert(c2.MaxAge.String(), qt.Equals, "11h0m0s") @@ -68,6 +69,10 @@ dir = "/path/to/c3" c3 := decoded["images"] c.Assert(c3.MaxAge, qt.Equals, time.Duration(-1)) c.Assert(c3.Dir, qt.Equals, filepath.FromSlash("/path/to/c3/filecache/images")) + + c4 := decoded["getresource"] + c.Assert(c4.MaxAge, qt.Equals, time.Duration(-1)) + c.Assert(c4.Dir, qt.Equals, filepath.FromSlash("/path/to/c4/filecache/getresource")) } func TestDecodeConfigIgnoreCache(t *testing.T) { @@ -94,7 +99,8 @@ maxAge = 3456 dir = "/path/to/c2" [caches.images] dir = "/path/to/c3" - +[caches.getResource] +dir = "/path/to/c4" ` cfg, err := config.FromConfigString(configStr, "toml") @@ -103,7 +109,7 @@ dir = "/path/to/c3" decoded, err := DecodeConfig(fs, cfg) c.Assert(err, qt.IsNil) - c.Assert(len(decoded), qt.Equals, 5) + c.Assert(len(decoded), qt.Equals, 6) for _, v := range decoded { c.Assert(v.MaxAge, qt.Equals, time.Duration(0)) @@ -129,7 +135,7 @@ func TestDecodeConfigDefault(t *testing.T) { c.Assert(err, qt.IsNil) - c.Assert(len(decoded), qt.Equals, 5) + c.Assert(len(decoded), qt.Equals, 6) imgConfig := decoded[cacheKeyImages] jsonConfig := decoded[cacheKeyGetJSON] diff --git a/docs/content/en/getting-started/configuration.md b/docs/content/en/getting-started/configuration.md index 9c78446ac..8332421be 100644 --- a/docs/content/en/getting-started/configuration.md +++ b/docs/content/en/getting-started/configuration.md @@ -657,6 +657,9 @@ maxAge = -1 [caches.getcsv] dir = ":cacheDir/:project" maxAge = -1 +[caches.getresource] +dir = ":cacheDir/:project" +maxAge = -1 [caches.images] dir = ":resourceDir/_gen" maxAge = -1 diff --git a/docs/content/en/hugo-pipes/introduction.md b/docs/content/en/hugo-pipes/introduction.md index 7a0ce5fc5..d097dec6b 100755 --- a/docs/content/en/hugo-pipes/introduction.md +++ b/docs/content/en/hugo-pipes/introduction.md @@ -55,6 +55,16 @@ You can also change the request method and set the request body: )}} ``` +#### Cache of remote resources + +Each downloaded URL will be cached in the default folder `$TMPDIR/hugo_cache/`. The variable `$TMPDIR` will be resolved to your system-dependent temporary directory. + +With the command-line flag `--cacheDir`, you can specify any folder on your system as a caching directory. + +You can also set `cacheDir` or `caches.getresource` in the [main configuration file][config]. + +If you don't like caching at all, you can fully disable caching with the command line flag `--ignoreCache`. + ### Asset publishing Assets will only be published (to `/public`) if `.Permalink` or `.RelPermalink` is used. diff --git a/resources/resource_factories/create/create.go b/resources/resource_factories/create/create.go index 4ad3da448..3f66642d6 100644 --- a/resources/resource_factories/create/create.go +++ b/resources/resource_factories/create/create.go @@ -16,12 +16,14 @@ package create import ( + "bufio" "bytes" "fmt" "io" "io/ioutil" "mime" "net/http" + "net/http/httputil" "net/url" "path" "path/filepath" @@ -32,6 +34,7 @@ import ( "github.com/gohugoio/hugo/hugofs" + "github.com/gohugoio/hugo/cache/filecache" "github.com/gohugoio/hugo/common/hugio" "github.com/gohugoio/hugo/common/maps" "github.com/gohugoio/hugo/common/types" @@ -45,8 +48,9 @@ import ( // Client contains methods to create Resource objects. // tasks to Resource objects. type Client struct { - rs *resources.Spec - httpClient *http.Client + rs *resources.Spec + httpClient *http.Client + cacheGetResource *filecache.Cache } // New creates a new Client with the given specification. @@ -56,6 +60,7 @@ func New(rs *resources.Spec) *Client { httpClient: &http.Client{ Timeout: 10 * time.Second, }, + cacheGetResource: rs.FileCaches.GetResourceCache(), } } @@ -156,10 +161,7 @@ func (c *Client) FromRemote(uri string, options map[string]interface{}) (resourc resourceID := helpers.HashString(uri, options) - // This caches to memory and will, in server mode, not be evicted unless the resourceID changes - // or the server restarts. - // There is ongoing work to improve this. - return c.rs.ResourceCache.GetOrCreate(resourceID, func() (resource.Resource, error) { + _, httpResponse, err := c.cacheGetResource.GetOrCreate(resourceID, func() (io.ReadCloser, error) { method, reqBody, err := getMethodAndBody(options) if err != nil { return nil, errors.Wrapf(err, "failed to get method or body for resource %s", uri) @@ -187,51 +189,68 @@ func (c *Client) FromRemote(uri string, options map[string]interface{}) (resourc return nil, errors.Errorf("failed to retrieve remote resource: %s", http.StatusText(res.StatusCode)) } - body, err := ioutil.ReadAll(res.Body) + httpResponse, err := httputil.DumpResponse(res, true) if err != nil { - return nil, errors.Wrapf(err, "failed to read remote resource %s", uri) + return nil, err } - filename := path.Base(rURL.Path) - if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil { - if _, ok := params["filename"]; ok { - filename = params["filename"] - } - } - - var contentType string - if arr, _ := mime.ExtensionsByType(res.Header.Get("Content-Type")); len(arr) == 1 { - contentType = arr[0] - } - - // If content type was not determined by header, look for a file extention - if contentType == "" { - if ext := path.Ext(filename); ext != "" { - contentType = ext - } - } - - // If content type was not determined by header or file extention, try using content itself - if contentType == "" { - if ct := http.DetectContentType(body); ct != "application/octet-stream" { - if arr, _ := mime.ExtensionsByType(ct); arr != nil { - contentType = arr[0] - } - } - } - - resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + contentType - - return c.rs.New( - resources.ResourceSourceDescriptor{ - Fs: c.rs.FileCaches.AssetsCache().Fs, - LazyPublish: true, - OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) { - return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil - }, - RelTargetFilename: filepath.Clean(resourceID), - }) + return hugio.ToReadCloser(bytes.NewReader(httpResponse)), nil }) + if err != nil { + return nil, err + } + defer httpResponse.Close() + + res, err := http.ReadResponse(bufio.NewReader(httpResponse), nil) + if err != nil { + return nil, err + } + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return nil, errors.Wrapf(err, "failed to read remote resource %s", uri) + } + + filename := path.Base(rURL.Path) + if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil { + if _, ok := params["filename"]; ok { + filename = params["filename"] + } + } + + var contentType string + if arr, _ := mime.ExtensionsByType(res.Header.Get("Content-Type")); len(arr) == 1 { + contentType = arr[0] + } + + // If content type was not determined by header, look for a file extention + if contentType == "" { + if ext := path.Ext(filename); ext != "" { + contentType = ext + } + } + + // If content type was not determined by header or file extention, try using content itself + if contentType == "" { + if ct := http.DetectContentType(body); ct != "application/octet-stream" { + if arr, _ := mime.ExtensionsByType(ct); arr != nil { + contentType = arr[0] + } + } + } + + resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + contentType + + return c.rs.New( + resources.ResourceSourceDescriptor{ + Fs: c.rs.FileCaches.AssetsCache().Fs, + LazyPublish: true, + OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) { + return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil + }, + RelTargetFilename: filepath.Clean(resourceID), + }) + } func addDefaultHeaders(req *http.Request, accepts ...string) {