From f330e869e42dc782a48c045aea5d29a134e225cb Mon Sep 17 00:00:00 2001 From: Robert van Gent Date: Fri, 3 May 2019 13:28:35 -0700 Subject: [PATCH] deploy: compute MD5 by reading if List didn't provide one --- deploy/deploy.go | 31 +++++++++++++++++++++---------- go.mod | 1 + go.sum | 1 + 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/deploy/deploy.go b/deploy/deploy.go index 5cc529f7e..6ba348dd8 100644 --- a/deploy/deploy.go +++ b/deploy/deploy.go @@ -19,7 +19,6 @@ import ( "context" "crypto/md5" "fmt" - "golang.org/x/text/unicode/norm" "io" "mime" "os" @@ -33,6 +32,7 @@ import ( "github.com/pkg/errors" "github.com/spf13/afero" jww "github.com/spf13/jwalterweatherman" + "golang.org/x/text/unicode/norm" "gocloud.dev/blob" _ "gocloud.dev/blob/azureblob" // import @@ -418,6 +418,23 @@ func walkRemote(ctx context.Context, bucket *blob.Bucket) (map[string]*blob.List if err != nil { return nil, err } + // If the remote didn't give us an MD5, compute one. + // This can happen for some providers (e.g., fileblob, which uses the + // local filesystem), but not for the most common Cloud providers + // (S3, GCS, Azure). Although, it can happen for S3 if the blob was uploaded + // via a multi-part upload. + // Although it's unfortunate to have to read the file, it's likely better + // than assuming a delta and re-uploading it. + if len(obj.MD5) == 0 { + r, err := bucket.NewReader(ctx, obj.Key, nil) + if err == nil { + h := md5.New() + if _, err := io.Copy(h, r); err == nil { + obj.MD5 = h.Sum(nil) + } + r.Close() + } + } retval[obj.Key] = obj } return retval, nil @@ -494,15 +511,9 @@ func findDiffs(localFiles map[string]*localFile, remoteFiles map[string]*blob.Li upload = true reason = reasonSize } else if len(remoteFile.MD5) == 0 { - // TODO: This can happen if the remote provider doesn't return an MD5 - // hash for the blob from their "list" command. This is common for - // some providers (e.g., fileblob, which uses the local filesystem), - // but not for the biggest Cloud providers (S3, GCS, Azure). Although, - // it can happen for S3 if the blob was originally uploaded as a - // multi-part upload (shouldn't happen when using "hugo deploy"). - // For now, we treat this as an MD5 mismatch and re-upload. An - // alternative would be to read entire the remote blob and compute the - // MD5 hash. + // This shouldn't happen unless the remote didn't give us an MD5 hash + // from List, AND we failed to compute one by reading the remote file. + // Default to considering the files different. upload = true reason = reasonMD5Missing } else if !bytes.Equal(lf.MD5(), remoteFile.MD5) { diff --git a/go.mod b/go.mod index e21c0cd68..8bf8c4c75 100644 --- a/go.mod +++ b/go.mod @@ -19,6 +19,7 @@ require ( github.com/fortytw2/leaktest v1.2.0 github.com/fsnotify/fsnotify v1.4.7 github.com/gobwas/glob v0.2.3 + github.com/google/go-cmp v0.2.0 github.com/gorilla/websocket v1.4.0 github.com/hashicorp/go-immutable-radix v1.0.0 github.com/jdkato/prose v1.1.0 diff --git a/go.sum b/go.sum index 7f7771f7e..36cfdbece 100644 --- a/go.sum +++ b/go.sum @@ -211,6 +211,7 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y github.com/golang/snappy v0.0.0-20170215233205-553a64147049/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=