Make resources.Get use a file cache for remote resources

Closes #9228
This commit is contained in:
Paul van Brouwershaven 2021-12-02 12:56:25 +01:00 committed by GitHub
parent 133e4bfbee
commit 66753416b5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 102 additions and 57 deletions

View file

@ -42,11 +42,12 @@ var defaultCacheConfig = Config{
}
const (
cacheKeyGetJSON = "getjson"
cacheKeyGetCSV = "getcsv"
cacheKeyImages = "images"
cacheKeyAssets = "assets"
cacheKeyModules = "modules"
cacheKeyGetJSON = "getjson"
cacheKeyGetCSV = "getcsv"
cacheKeyImages = "images"
cacheKeyAssets = "assets"
cacheKeyModules = "modules"
cacheGetResource = "getresource"
)
type Configs map[string]Config
@ -70,6 +71,7 @@ var defaultCacheConfigs = Configs{
MaxAge: -1,
Dir: resourcesGenDir,
},
cacheGetResource: defaultCacheConfig,
}
type Config struct {
@ -111,6 +113,11 @@ func (f Caches) AssetsCache() *Cache {
return f[cacheKeyAssets]
}
// GetResourceCache gets the file cache for remote resources.
func (f Caches) GetResourceCache() *Cache {
return f[cacheGetResource]
}
func DecodeConfig(fs afero.Fs, cfg config.Provider) (Configs, error) {
c := make(Configs)
valid := make(map[string]bool)

View file

@ -50,7 +50,8 @@ maxAge = "11h"
dir = "/path/to/c2"
[caches.images]
dir = "/path/to/c3"
[caches.getResource]
dir = "/path/to/c4"
`
cfg, err := config.FromConfigString(configStr, "toml")
@ -59,7 +60,7 @@ dir = "/path/to/c3"
decoded, err := DecodeConfig(fs, cfg)
c.Assert(err, qt.IsNil)
c.Assert(len(decoded), qt.Equals, 5)
c.Assert(len(decoded), qt.Equals, 6)
c2 := decoded["getcsv"]
c.Assert(c2.MaxAge.String(), qt.Equals, "11h0m0s")
@ -68,6 +69,10 @@ dir = "/path/to/c3"
c3 := decoded["images"]
c.Assert(c3.MaxAge, qt.Equals, time.Duration(-1))
c.Assert(c3.Dir, qt.Equals, filepath.FromSlash("/path/to/c3/filecache/images"))
c4 := decoded["getresource"]
c.Assert(c4.MaxAge, qt.Equals, time.Duration(-1))
c.Assert(c4.Dir, qt.Equals, filepath.FromSlash("/path/to/c4/filecache/getresource"))
}
func TestDecodeConfigIgnoreCache(t *testing.T) {
@ -94,7 +99,8 @@ maxAge = 3456
dir = "/path/to/c2"
[caches.images]
dir = "/path/to/c3"
[caches.getResource]
dir = "/path/to/c4"
`
cfg, err := config.FromConfigString(configStr, "toml")
@ -103,7 +109,7 @@ dir = "/path/to/c3"
decoded, err := DecodeConfig(fs, cfg)
c.Assert(err, qt.IsNil)
c.Assert(len(decoded), qt.Equals, 5)
c.Assert(len(decoded), qt.Equals, 6)
for _, v := range decoded {
c.Assert(v.MaxAge, qt.Equals, time.Duration(0))
@ -129,7 +135,7 @@ func TestDecodeConfigDefault(t *testing.T) {
c.Assert(err, qt.IsNil)
c.Assert(len(decoded), qt.Equals, 5)
c.Assert(len(decoded), qt.Equals, 6)
imgConfig := decoded[cacheKeyImages]
jsonConfig := decoded[cacheKeyGetJSON]

View file

@ -657,6 +657,9 @@ maxAge = -1
[caches.getcsv]
dir = ":cacheDir/:project"
maxAge = -1
[caches.getresource]
dir = ":cacheDir/:project"
maxAge = -1
[caches.images]
dir = ":resourceDir/_gen"
maxAge = -1

View file

@ -55,6 +55,16 @@ You can also change the request method and set the request body:
)}}
```
#### Cache of remote resources
Each downloaded URL will be cached in the default folder `$TMPDIR/hugo_cache/`. The variable `$TMPDIR` will be resolved to your system-dependent temporary directory.
With the command-line flag `--cacheDir`, you can specify any folder on your system as a caching directory.
You can also set `cacheDir` or `caches.getresource` in the [main configuration file][config].
If you don't like caching at all, you can fully disable caching with the command line flag `--ignoreCache`.
### Asset publishing
Assets will only be published (to `/public`) if `.Permalink` or `.RelPermalink` is used.

View file

@ -16,12 +16,14 @@
package create
import (
"bufio"
"bytes"
"fmt"
"io"
"io/ioutil"
"mime"
"net/http"
"net/http/httputil"
"net/url"
"path"
"path/filepath"
@ -32,6 +34,7 @@ import (
"github.com/gohugoio/hugo/hugofs"
"github.com/gohugoio/hugo/cache/filecache"
"github.com/gohugoio/hugo/common/hugio"
"github.com/gohugoio/hugo/common/maps"
"github.com/gohugoio/hugo/common/types"
@ -45,8 +48,9 @@ import (
// Client contains methods to create Resource objects.
// tasks to Resource objects.
type Client struct {
rs *resources.Spec
httpClient *http.Client
rs *resources.Spec
httpClient *http.Client
cacheGetResource *filecache.Cache
}
// New creates a new Client with the given specification.
@ -56,6 +60,7 @@ func New(rs *resources.Spec) *Client {
httpClient: &http.Client{
Timeout: 10 * time.Second,
},
cacheGetResource: rs.FileCaches.GetResourceCache(),
}
}
@ -156,10 +161,7 @@ func (c *Client) FromRemote(uri string, options map[string]interface{}) (resourc
resourceID := helpers.HashString(uri, options)
// This caches to memory and will, in server mode, not be evicted unless the resourceID changes
// or the server restarts.
// There is ongoing work to improve this.
return c.rs.ResourceCache.GetOrCreate(resourceID, func() (resource.Resource, error) {
_, httpResponse, err := c.cacheGetResource.GetOrCreate(resourceID, func() (io.ReadCloser, error) {
method, reqBody, err := getMethodAndBody(options)
if err != nil {
return nil, errors.Wrapf(err, "failed to get method or body for resource %s", uri)
@ -187,51 +189,68 @@ func (c *Client) FromRemote(uri string, options map[string]interface{}) (resourc
return nil, errors.Errorf("failed to retrieve remote resource: %s", http.StatusText(res.StatusCode))
}
body, err := ioutil.ReadAll(res.Body)
httpResponse, err := httputil.DumpResponse(res, true)
if err != nil {
return nil, errors.Wrapf(err, "failed to read remote resource %s", uri)
return nil, err
}
filename := path.Base(rURL.Path)
if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil {
if _, ok := params["filename"]; ok {
filename = params["filename"]
}
}
var contentType string
if arr, _ := mime.ExtensionsByType(res.Header.Get("Content-Type")); len(arr) == 1 {
contentType = arr[0]
}
// If content type was not determined by header, look for a file extention
if contentType == "" {
if ext := path.Ext(filename); ext != "" {
contentType = ext
}
}
// If content type was not determined by header or file extention, try using content itself
if contentType == "" {
if ct := http.DetectContentType(body); ct != "application/octet-stream" {
if arr, _ := mime.ExtensionsByType(ct); arr != nil {
contentType = arr[0]
}
}
}
resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + contentType
return c.rs.New(
resources.ResourceSourceDescriptor{
Fs: c.rs.FileCaches.AssetsCache().Fs,
LazyPublish: true,
OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) {
return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil
},
RelTargetFilename: filepath.Clean(resourceID),
})
return hugio.ToReadCloser(bytes.NewReader(httpResponse)), nil
})
if err != nil {
return nil, err
}
defer httpResponse.Close()
res, err := http.ReadResponse(bufio.NewReader(httpResponse), nil)
if err != nil {
return nil, err
}
body, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, errors.Wrapf(err, "failed to read remote resource %s", uri)
}
filename := path.Base(rURL.Path)
if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil {
if _, ok := params["filename"]; ok {
filename = params["filename"]
}
}
var contentType string
if arr, _ := mime.ExtensionsByType(res.Header.Get("Content-Type")); len(arr) == 1 {
contentType = arr[0]
}
// If content type was not determined by header, look for a file extention
if contentType == "" {
if ext := path.Ext(filename); ext != "" {
contentType = ext
}
}
// If content type was not determined by header or file extention, try using content itself
if contentType == "" {
if ct := http.DetectContentType(body); ct != "application/octet-stream" {
if arr, _ := mime.ExtensionsByType(ct); arr != nil {
contentType = arr[0]
}
}
}
resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + contentType
return c.rs.New(
resources.ResourceSourceDescriptor{
Fs: c.rs.FileCaches.AssetsCache().Fs,
LazyPublish: true,
OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) {
return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil
},
RelTargetFilename: filepath.Clean(resourceID),
})
}
func addDefaultHeaders(req *http.Request, accepts ...string) {