From a3d42a277d3c492ce4a7860956234134e130aba1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Thu, 3 Aug 2023 17:52:17 +0200 Subject: [PATCH] Add retry in resources.GetRemote for temporary HTTP errors Fixes #11312 --- hugolib/resource_chain_test.go | 6 +- .../create/integration_test.go | 67 +++++++++++++++- resources/resource_factories/create/remote.go | 76 ++++++++++++++----- 3 files changed, 128 insertions(+), 21 deletions(-) diff --git a/hugolib/resource_chain_test.go b/hugolib/resource_chain_test.go index da90634aa..17c3b2f0c 100644 --- a/hugolib/resource_chain_test.go +++ b/hugolib/resource_chain_test.go @@ -36,7 +36,7 @@ func TestResourceChainBasic(t *testing.T) { failIfHandler := func(h http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/fail.jpg" { - http.Error(w, "{ msg: failed }", 500) + http.Error(w, "{ msg: failed }", 501) return } h.ServeHTTP(w, r) @@ -116,8 +116,8 @@ FIT REMOTE: sunset_%[1]s.jpg|/sunset_%[1]s_hu59e56ffff1bc1d8d122b1403d34e039f_0_ REMOTE NOT FOUND: OK LOCAL NOT FOUND: OK PRINT PROTOCOL ERROR DETAILS: Err: error calling resources.GetRemote: Get "gopher://example.org": unsupported protocol scheme "gopher"|| -FAILED REMOTE ERROR DETAILS CONTENT: |failed to fetch remote resource: Internal Server Error|Body: { msg: failed } -|StatusCode: 500|ContentLength: 16|ContentType: text/plain; charset=utf-8| +FAILED REMOTE ERROR DETAILS CONTENT: |failed to fetch remote resource: Not Implemented|Body: { msg: failed } +|StatusCode: 501|ContentLength: 16|ContentType: text/plain; charset=utf-8| `, identity.HashString(ts.URL+"/sunset.jpg", map[string]any{}))) diff --git a/resources/resource_factories/create/integration_test.go b/resources/resource_factories/create/integration_test.go index 2d9a700d3..acd2cf3a1 100644 --- a/resources/resource_factories/create/integration_test.go +++ b/resources/resource_factories/create/integration_test.go @@ -14,12 +14,17 @@ package create_test import ( + "fmt" + "math/rand" + "net/http" + "net/http/httptest" + "strings" "testing" "github.com/gohugoio/hugo/hugolib" ) -func TestGetResourceHead(t *testing.T) { +func TestGetRemoteHead(t *testing.T) { files := ` -- config.toml -- @@ -57,3 +62,63 @@ func TestGetResourceHead(t *testing.T) { ) } + +func TestGetRemoteRetry(t *testing.T) { + t.Parallel() + + temporaryHTTPCodes := []int{408, 429, 500, 502, 503, 504} + numPages := 30 + + handler := func(w http.ResponseWriter, r *http.Request) { + if rand.Intn(4) == 0 { + w.WriteHeader(temporaryHTTPCodes[rand.Intn(len(temporaryHTTPCodes))]) + return + } + w.Header().Add("Content-Type", "text/plain") + w.Write([]byte("Response for " + r.URL.Path + ".")) + } + + srv := httptest.NewServer(http.HandlerFunc(handler)) + t.Cleanup(func() { srv.Close() }) + + files := ` +-- hugo.toml -- +disableKinds = ["home", "taxonomy", "term"] +[security] +[security.http] +urls = ['.*'] +mediaTypes = ['text/plain'] +-- layouts/_default/single.html -- +{{ $url := printf "%s%s" "URL" .RelPermalink}} +{{ $opts := dict }} +{{ with resources.GetRemote $url $opts }} + {{ with .Err }} + {{ errorf "Unable to get remote resource: %s" . }} + {{ else }} + Content: {{ .Content }} + {{ end }} +{{ else }} + {{ errorf "Unable to get remote resource: %s" $url }} +{{ end }} +` + + for i := 0; i < numPages; i++ { + files += fmt.Sprintf("-- content/post/p%d.md --\n", i) + } + + files = strings.ReplaceAll(files, "URL", srv.URL) + + b := hugolib.NewIntegrationTestBuilder( + hugolib.IntegrationTestConfig{ + T: t, + TxtarString: files, + }, + ) + + b.Build() + + for i := 0; i < numPages; i++ { + b.AssertFileContent(fmt.Sprintf("public/post/p%d/index.html", i), fmt.Sprintf("Content: Response for /post/p%d/.", i)) + } + +} diff --git a/resources/resource_factories/create/remote.go b/resources/resource_factories/create/remote.go index 73171e570..ce9c80881 100644 --- a/resources/resource_factories/create/remote.go +++ b/resources/resource_factories/create/remote.go @@ -18,6 +18,7 @@ import ( "bytes" "fmt" "io" + "math/rand" "mime" "net/http" "net/http/httputil" @@ -25,6 +26,7 @@ import ( "path" "path/filepath" "strings" + "time" "github.com/gohugoio/hugo/common/hugio" "github.com/gohugoio/hugo/common/maps" @@ -83,6 +85,15 @@ func toHTTPError(err error, res *http.Response, readBody bool) *HTTPError { } } +var temporaryHTTPStatusCodes = map[int]bool{ + 408: true, + 429: true, + 500: true, + 502: true, + 503: true, + 504: true, +} + // FromRemote expects one or n-parts of a URL to a resource // If you provide multiple parts they will be joined together to the final URL. func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resource, error) { @@ -108,30 +119,61 @@ func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resou return nil, err } - req, err := options.NewRequest(uri) - if err != nil { - return nil, fmt.Errorf("failed to create request for resource %s: %w", uri, err) - } + var ( + start time.Time + nextSleep = time.Duration((rand.Intn(1000) + 100)) * time.Millisecond + nextSleepLimit = time.Duration(5) * time.Second + ) - res, err := c.httpClient.Do(req) - if err != nil { - return nil, err - } - defer res.Body.Close() + for { + b, retry, err := func() ([]byte, bool, error) { + req, err := options.NewRequest(uri) + if err != nil { + return nil, false, fmt.Errorf("failed to create request for resource %s: %w", uri, err) + } - httpResponse, err := httputil.DumpResponse(res, true) - if err != nil { - return nil, toHTTPError(err, res, !isHeadMethod) - } + res, err := c.httpClient.Do(req) + if err != nil { + return nil, false, err + } + defer res.Body.Close() - if res.StatusCode != http.StatusNotFound { - if res.StatusCode < 200 || res.StatusCode > 299 { - return nil, toHTTPError(fmt.Errorf("failed to fetch remote resource: %s", http.StatusText(res.StatusCode)), res, !isHeadMethod) + if res.StatusCode != http.StatusNotFound { + if res.StatusCode < 200 || res.StatusCode > 299 { + return nil, temporaryHTTPStatusCodes[res.StatusCode], toHTTPError(fmt.Errorf("failed to fetch remote resource: %s", http.StatusText(res.StatusCode)), res, !isHeadMethod) + } + } + + b, err := httputil.DumpResponse(res, true) + if err != nil { + return nil, false, toHTTPError(err, res, !isHeadMethod) + } + + return b, false, nil + + }() + + if err != nil { + if retry { + if start.IsZero() { + start = time.Now() + } else if d := time.Since(start) + nextSleep; d >= c.rs.Cfg.Timeout() { + return nil, fmt.Errorf("timeout (configured to %s) fetching remote resource %s: last error: %w", c.rs.Cfg.Timeout(), uri, err) + } + time.Sleep(nextSleep) + if nextSleep < nextSleepLimit { + nextSleep *= 2 + } + continue + } + return nil, err } + + return hugio.ToReadCloser(bytes.NewReader(b)), nil + } - return hugio.ToReadCloser(bytes.NewReader(httpResponse)), nil }) if err != nil { return nil, err