2018-12-20 14:22:03 -05:00
// Copyright 2019 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2022-03-17 17:03:27 -04:00
//go:build !nodeploy
2020-10-23 03:03:41 -04:00
// +build !nodeploy
2018-12-20 14:22:03 -05:00
package deploy
import (
"bytes"
"compress/gzip"
"context"
"crypto/md5"
2022-04-05 04:42:54 -04:00
"encoding/hex"
2018-12-20 14:22:03 -05:00
"fmt"
"io"
"mime"
"os"
"path/filepath"
2019-05-03 12:30:46 -04:00
"regexp"
2018-12-20 14:22:03 -05:00
"runtime"
2019-05-03 12:30:46 -04:00
"sort"
2018-12-20 14:22:03 -05:00
"strings"
"sync"
2022-05-02 10:07:52 -04:00
"errors"
2018-12-20 14:22:03 -05:00
"github.com/dustin/go-humanize"
2020-02-27 01:26:05 -05:00
"github.com/gobwas/glob"
2018-12-20 14:22:03 -05:00
"github.com/gohugoio/hugo/config"
2020-08-03 13:06:18 -04:00
"github.com/gohugoio/hugo/media"
2018-12-20 14:22:03 -05:00
"github.com/spf13/afero"
jww "github.com/spf13/jwalterweatherman"
2019-05-03 16:28:35 -04:00
"golang.org/x/text/unicode/norm"
2018-12-20 14:22:03 -05:00
"gocloud.dev/blob"
2019-09-13 08:53:36 -04:00
_ "gocloud.dev/blob/fileblob" // import
_ "gocloud.dev/blob/gcsblob" // import
_ "gocloud.dev/blob/s3blob" // import
2021-04-28 23:32:19 -04:00
"gocloud.dev/gcerrors"
2018-12-20 14:22:03 -05:00
)
// Deployer supports deploying the site to target cloud providers.
type Deployer struct {
localFs afero . Fs
2019-05-03 19:38:05 -04:00
bucket * blob . Bucket
2018-12-20 14:22:03 -05:00
2019-05-03 12:30:46 -04:00
target * target // the target to deploy to
matchers [ ] * matcher // matchers to apply to uploaded files
2020-08-03 13:06:18 -04:00
mediaTypes media . Types // Hugo's MediaType to guess ContentType
2019-05-03 12:30:46 -04:00
ordering [ ] * regexp . Regexp // orders uploads
quiet bool // true reduces STDOUT
confirm bool // true enables confirmation before making changes
dryRun bool // true skips conformations and prints changes instead of applying them
force bool // true forces upload of all files
invalidateCDN bool // true enables invalidate CDN cache (if possible)
maxDeletes int // caps the # of files to delete; -1 to disable
2019-05-03 19:38:05 -04:00
// For tests...
summary deploySummary // summary of latest Deploy results
}
type deploySummary struct {
NumLocal , NumRemote , NumUploads , NumDeletes int
2018-12-20 14:22:03 -05:00
}
2022-04-05 04:42:54 -04:00
const metaMD5Hash = "md5chksum" // the meta key to store md5hash in
2018-12-20 14:22:03 -05:00
// New constructs a new *Deployer.
func New ( cfg config . Provider , localFs afero . Fs ) ( * Deployer , error ) {
2019-05-01 16:25:06 -04:00
targetName := cfg . GetString ( "target" )
2018-12-20 14:22:03 -05:00
// Load the [deployment] section of the config.
dcfg , err := decodeConfig ( cfg )
if err != nil {
return nil , err
}
2019-05-31 13:10:38 -04:00
if len ( dcfg . Targets ) == 0 {
return nil , errors . New ( "no deployment targets found" )
}
2018-12-20 14:22:03 -05:00
// Find the target to deploy to.
2019-05-01 16:25:06 -04:00
var tgt * target
2019-05-31 13:10:38 -04:00
if targetName == "" {
// Default to the first target.
tgt = dcfg . Targets [ 0 ]
} else {
for _ , t := range dcfg . Targets {
if t . Name == targetName {
tgt = t
}
}
if tgt == nil {
return nil , fmt . Errorf ( "deployment target %q not found" , targetName )
2018-12-20 14:22:03 -05:00
}
}
2020-08-03 13:06:18 -04:00
2018-12-20 14:22:03 -05:00
return & Deployer {
2019-05-01 16:25:06 -04:00
localFs : localFs ,
target : tgt ,
matchers : dcfg . Matchers ,
2019-05-03 12:30:46 -04:00
ordering : dcfg . ordering ,
2020-08-03 13:06:18 -04:00
mediaTypes : dcfg . mediaTypes ,
2019-05-01 16:25:06 -04:00
quiet : cfg . GetBool ( "quiet" ) ,
confirm : cfg . GetBool ( "confirm" ) ,
dryRun : cfg . GetBool ( "dryRun" ) ,
force : cfg . GetBool ( "force" ) ,
invalidateCDN : cfg . GetBool ( "invalidateCDN" ) ,
maxDeletes : cfg . GetInt ( "maxDeletes" ) ,
2018-12-20 14:22:03 -05:00
} , nil
}
2019-05-03 19:38:05 -04:00
func ( d * Deployer ) openBucket ( ctx context . Context ) ( * blob . Bucket , error ) {
if d . bucket != nil {
return d . bucket , nil
}
2019-05-31 13:10:38 -04:00
jww . FEEDBACK . Printf ( "Deploying to target %q (%s)\n" , d . target . Name , d . target . URL )
2019-05-03 19:38:05 -04:00
return blob . OpenBucket ( ctx , d . target . URL )
}
2018-12-20 14:22:03 -05:00
// Deploy deploys the site to a target.
func ( d * Deployer ) Deploy ( ctx context . Context ) error {
2019-05-03 19:38:05 -04:00
bucket , err := d . openBucket ( ctx )
2018-12-20 14:22:03 -05:00
if err != nil {
return err
}
// Load local files from the source directory.
2020-02-27 01:26:05 -05:00
var include , exclude glob . Glob
if d . target != nil {
include , exclude = d . target . includeGlob , d . target . excludeGlob
}
2020-08-03 13:06:18 -04:00
local , err := walkLocal ( d . localFs , d . matchers , include , exclude , d . mediaTypes )
2018-12-20 14:22:03 -05:00
if err != nil {
return err
}
jww . INFO . Printf ( "Found %d local files.\n" , len ( local ) )
2019-05-03 19:38:05 -04:00
d . summary . NumLocal = len ( local )
2018-12-20 14:22:03 -05:00
// Load remote files from the target.
2020-03-08 12:35:32 -04:00
remote , err := walkRemote ( ctx , bucket , include , exclude )
2018-12-20 14:22:03 -05:00
if err != nil {
return err
}
jww . INFO . Printf ( "Found %d remote files.\n" , len ( remote ) )
2019-05-03 19:38:05 -04:00
d . summary . NumRemote = len ( remote )
2018-12-20 14:22:03 -05:00
// Diff local vs remote to see what changes need to be applied.
uploads , deletes := findDiffs ( local , remote , d . force )
2019-05-03 19:38:05 -04:00
d . summary . NumUploads = len ( uploads )
d . summary . NumDeletes = len ( deletes )
2018-12-20 14:22:03 -05:00
if len ( uploads ) + len ( deletes ) == 0 {
if ! d . quiet {
jww . FEEDBACK . Println ( "No changes required." )
}
return nil
}
if ! d . quiet {
jww . FEEDBACK . Println ( summarizeChanges ( uploads , deletes ) )
}
// Ask for confirmation before proceeding.
if d . confirm && ! d . dryRun {
fmt . Printf ( "Continue? (Y/n) " )
var confirm string
if _ , err := fmt . Scanln ( & confirm ) ; err != nil {
return err
}
if confirm != "" && confirm [ 0 ] != 'y' && confirm [ 0 ] != 'Y' {
return errors . New ( "aborted" )
}
}
2019-05-03 12:30:46 -04:00
// Order the uploads. They are organized in groups; all uploads in a group
// must be complete before moving on to the next group.
uploadGroups := applyOrdering ( d . ordering , uploads )
2018-12-20 14:22:03 -05:00
// Apply the changes in parallel, using an inverted worker
// pool (https://www.youtube.com/watch?v=5zXAHh5tJqQ&t=26m58s).
// sem prevents more than nParallel concurrent goroutines.
const nParallel = 10
var errs [ ] error
var errMu sync . Mutex // protects errs
2019-05-03 12:30:46 -04:00
for _ , uploads := range uploadGroups {
// Short-circuit for an empty group.
if len ( uploads ) == 0 {
2018-12-20 14:22:03 -05:00
continue
}
2019-05-03 12:30:46 -04:00
// Within the group, apply uploads in parallel.
sem := make ( chan struct { } , nParallel )
for _ , upload := range uploads {
if d . dryRun {
if ! d . quiet {
jww . FEEDBACK . Printf ( "[DRY RUN] Would upload: %v\n" , upload )
}
continue
2018-12-20 14:22:03 -05:00
}
2019-05-03 12:30:46 -04:00
sem <- struct { } { }
go func ( upload * fileToUpload ) {
if err := doSingleUpload ( ctx , bucket , upload ) ; err != nil {
errMu . Lock ( )
defer errMu . Unlock ( )
errs = append ( errs , err )
}
<- sem
} ( upload )
}
// Wait for all uploads in the group to finish.
for n := nParallel ; n > 0 ; n -- {
sem <- struct { } { }
}
2018-12-20 14:22:03 -05:00
}
if d . maxDeletes != - 1 && len ( deletes ) > d . maxDeletes {
jww . WARN . Printf ( "Skipping %d deletes because it is more than --maxDeletes (%d). If this is expected, set --maxDeletes to a larger number, or -1 to disable this check.\n" , len ( deletes ) , d . maxDeletes )
2019-05-03 19:38:05 -04:00
d . summary . NumDeletes = 0
2018-12-20 14:22:03 -05:00
} else {
2019-05-03 12:30:46 -04:00
// Apply deletes in parallel.
sort . Slice ( deletes , func ( i , j int ) bool { return deletes [ i ] < deletes [ j ] } )
sem := make ( chan struct { } , nParallel )
2018-12-20 14:22:03 -05:00
for _ , del := range deletes {
if d . dryRun {
if ! d . quiet {
jww . FEEDBACK . Printf ( "[DRY RUN] Would delete %s\n" , del )
}
continue
}
sem <- struct { } { }
go func ( del string ) {
jww . INFO . Printf ( "Deleting %s...\n" , del )
if err := bucket . Delete ( ctx , del ) ; err != nil {
2021-04-28 23:32:19 -04:00
if gcerrors . Code ( err ) == gcerrors . NotFound {
jww . WARN . Printf ( "Failed to delete %q because it wasn't found: %v" , del , err )
} else {
errMu . Lock ( )
defer errMu . Unlock ( )
errs = append ( errs , err )
}
2018-12-20 14:22:03 -05:00
}
<- sem
} ( del )
}
2019-05-03 12:30:46 -04:00
// Wait for all deletes to finish.
for n := nParallel ; n > 0 ; n -- {
sem <- struct { } { }
}
2018-12-20 14:22:03 -05:00
}
if len ( errs ) > 0 {
if ! d . quiet {
jww . FEEDBACK . Printf ( "Encountered %d errors.\n" , len ( errs ) )
}
return errs [ 0 ]
}
if ! d . quiet {
jww . FEEDBACK . Println ( "Success!" )
}
2019-10-03 08:46:27 -04:00
if d . invalidateCDN {
if d . target . CloudFrontDistributionID != "" {
2020-10-27 15:41:15 -04:00
if d . dryRun {
if ! d . quiet {
jww . FEEDBACK . Printf ( "[DRY RUN] Would invalidate CloudFront CDN with ID %s\n" , d . target . CloudFrontDistributionID )
}
} else {
jww . FEEDBACK . Println ( "Invalidating CloudFront CDN..." )
if err := InvalidateCloudFront ( ctx , d . target . CloudFrontDistributionID ) ; err != nil {
jww . FEEDBACK . Printf ( "Failed to invalidate CloudFront CDN: %v\n" , err )
return err
}
2019-10-03 08:46:27 -04:00
}
}
if d . target . GoogleCloudCDNOrigin != "" {
2020-10-27 15:41:15 -04:00
if d . dryRun {
if ! d . quiet {
jww . FEEDBACK . Printf ( "[DRY RUN] Would invalidate Google Cloud CDN with origin %s\n" , d . target . GoogleCloudCDNOrigin )
}
} else {
jww . FEEDBACK . Println ( "Invalidating Google Cloud CDN..." )
if err := InvalidateGoogleCloudCDN ( ctx , d . target . GoogleCloudCDNOrigin ) ; err != nil {
jww . FEEDBACK . Printf ( "Failed to invalidate Google Cloud CDN: %v\n" , err )
return err
}
2019-10-03 08:46:27 -04:00
}
2019-05-01 16:25:06 -04:00
}
jww . FEEDBACK . Println ( "Success!" )
}
2018-12-20 14:22:03 -05:00
return nil
}
// summarizeChanges creates a text description of the proposed changes.
func summarizeChanges ( uploads [ ] * fileToUpload , deletes [ ] string ) string {
uploadSize := int64 ( 0 )
for _ , u := range uploads {
uploadSize += u . Local . UploadSize
}
return fmt . Sprintf ( "Identified %d file(s) to upload, totaling %s, and %d file(s) to delete." , len ( uploads ) , humanize . Bytes ( uint64 ( uploadSize ) ) , len ( deletes ) )
}
// doSingleUpload executes a single file upload.
func doSingleUpload ( ctx context . Context , bucket * blob . Bucket , upload * fileToUpload ) error {
jww . INFO . Printf ( "Uploading %v...\n" , upload )
opts := & blob . WriterOptions {
CacheControl : upload . Local . CacheControl ( ) ,
ContentEncoding : upload . Local . ContentEncoding ( ) ,
ContentType : upload . Local . ContentType ( ) ,
2022-04-05 04:42:54 -04:00
Metadata : map [ string ] string { metaMD5Hash : hex . EncodeToString ( upload . Local . MD5 ( ) ) } ,
2018-12-20 14:22:03 -05:00
}
2019-05-03 19:38:05 -04:00
w , err := bucket . NewWriter ( ctx , upload . Local . SlashPath , opts )
2018-12-20 14:22:03 -05:00
if err != nil {
return err
}
2019-05-03 19:38:05 -04:00
r , err := upload . Local . Reader ( )
if err != nil {
return err
}
defer r . Close ( )
_ , err = io . Copy ( w , r )
2018-12-20 14:22:03 -05:00
if err != nil {
return err
}
if err := w . Close ( ) ; err != nil {
return err
}
return nil
}
// localFile represents a local file from the source. Use newLocalFile to
// construct one.
type localFile struct {
2019-05-03 19:38:05 -04:00
// NativePath is the native path to the file (using file.Separator).
NativePath string
// SlashPath is NativePath converted to use /.
SlashPath string
2018-12-20 14:22:03 -05:00
// UploadSize is the size of the content to be uploaded. It may not
// be the same as the local file size if the content will be
// gzipped before upload.
UploadSize int64
2020-08-03 13:06:18 -04:00
fs afero . Fs
matcher * matcher
md5 [ ] byte // cache
gzipped bytes . Buffer // cached of gzipped contents if gzipping
mediaTypes media . Types
2018-12-20 14:22:03 -05:00
}
// newLocalFile initializes a *localFile.
2020-08-03 13:06:18 -04:00
func newLocalFile ( fs afero . Fs , nativePath , slashpath string , m * matcher , mt media . Types ) ( * localFile , error ) {
2019-05-03 19:38:05 -04:00
f , err := fs . Open ( nativePath )
2018-12-20 14:22:03 -05:00
if err != nil {
return nil , err
}
2019-05-03 19:38:05 -04:00
defer f . Close ( )
lf := & localFile {
NativePath : nativePath ,
SlashPath : slashpath ,
fs : fs ,
matcher : m ,
2020-08-03 13:06:18 -04:00
mediaTypes : mt ,
2018-12-20 14:22:03 -05:00
}
if m != nil && m . Gzip {
2019-05-03 19:38:05 -04:00
// We're going to gzip the content. Do it once now, and cache the result
// in gzipped. The UploadSize is the size of the gzipped content.
gz := gzip . NewWriter ( & lf . gzipped )
if _ , err := io . Copy ( gz , f ) ; err != nil {
return nil , err
}
if err := gz . Close ( ) ; err != nil {
return nil , err
}
lf . UploadSize = int64 ( lf . gzipped . Len ( ) )
} else {
// Raw content. Just get the UploadSize.
info , err := f . Stat ( )
if err != nil {
return nil , err
}
lf . UploadSize = info . Size ( )
}
return lf , nil
}
// Reader returns an io.ReadCloser for reading the content to be uploaded.
// The caller must call Close on the returned ReaderCloser.
// The reader content may not be the same as the local file content due to
// gzipping.
func ( lf * localFile ) Reader ( ) ( io . ReadCloser , error ) {
if lf . matcher != nil && lf . matcher . Gzip {
// We've got the gzipped contents cached in gzipped.
// Note: we can't use lf.gzipped directly as a Reader, since we it discards
// data after it is read, and we may read it more than once.
2023-02-18 17:43:26 -05:00
return io . NopCloser ( bytes . NewReader ( lf . gzipped . Bytes ( ) ) ) , nil
2019-05-03 19:38:05 -04:00
}
// Not expected to fail since we did it successfully earlier in newLocalFile,
// but could happen due to changes in the underlying filesystem.
return lf . fs . Open ( lf . NativePath )
2018-12-20 14:22:03 -05:00
}
// CacheControl returns the Cache-Control header to use for lf, based on the
// first matching matcher (if any).
func ( lf * localFile ) CacheControl ( ) string {
if lf . matcher == nil {
return ""
}
return lf . matcher . CacheControl
}
// ContentEncoding returns the Content-Encoding header to use for lf, based
// on the matcher's Content-Encoding and Gzip fields.
func ( lf * localFile ) ContentEncoding ( ) string {
if lf . matcher == nil {
return ""
}
if lf . matcher . Gzip {
return "gzip"
}
return lf . matcher . ContentEncoding
}
// ContentType returns the Content-Type header to use for lf.
// It first checks if there's a Content-Type header configured via a matching
// matcher; if not, it tries to generate one based on the filename extension.
// If this fails, the Content-Type will be the empty string. In this case, Go
// Cloud will automatically try to infer a Content-Type based on the file
// content.
func ( lf * localFile ) ContentType ( ) string {
if lf . matcher != nil && lf . matcher . ContentType != "" {
return lf . matcher . ContentType
}
2020-08-03 13:06:18 -04:00
ext := filepath . Ext ( lf . NativePath )
2021-03-11 03:18:01 -05:00
if mimeType , _ , found := lf . mediaTypes . GetFirstBySuffix ( strings . TrimPrefix ( ext , "." ) ) ; found {
2020-08-03 13:06:18 -04:00
return mimeType . Type ( )
}
return mime . TypeByExtension ( ext )
2018-12-20 14:22:03 -05:00
}
// Force returns true if the file should be forced to re-upload based on the
// matching matcher.
func ( lf * localFile ) Force ( ) bool {
return lf . matcher != nil && lf . matcher . Force
}
// MD5 returns an MD5 hash of the content to be uploaded.
func ( lf * localFile ) MD5 ( ) [ ] byte {
if len ( lf . md5 ) > 0 {
return lf . md5
}
2019-05-03 19:38:05 -04:00
h := md5 . New ( )
r , err := lf . Reader ( )
2018-12-20 14:22:03 -05:00
if err != nil {
return nil
}
2019-05-03 19:38:05 -04:00
defer r . Close ( )
2018-12-20 14:22:03 -05:00
if _ , err := io . Copy ( h , r ) ; err != nil {
return nil
}
lf . md5 = h . Sum ( nil )
return lf . md5
}
2020-05-13 19:20:52 -04:00
// knownHiddenDirectory checks if the specified name is a well known
// hidden directory.
func knownHiddenDirectory ( name string ) bool {
2020-12-02 07:23:25 -05:00
knownDirectories := [ ] string {
2020-05-13 19:20:52 -04:00
".well-known" ,
}
for _ , dir := range knownDirectories {
if name == dir {
return true
}
}
return false
}
2019-05-03 19:38:05 -04:00
// walkLocal walks the source directory and returns a flat list of files,
// using localFile.SlashPath as the map keys.
2020-08-03 13:06:18 -04:00
func walkLocal ( fs afero . Fs , matchers [ ] * matcher , include , exclude glob . Glob , mediaTypes media . Types ) ( map [ string ] * localFile , error ) {
2018-12-20 14:22:03 -05:00
retval := map [ string ] * localFile { }
err := afero . Walk ( fs , "" , func ( path string , info os . FileInfo , err error ) error {
if err != nil {
return err
}
if info . IsDir ( ) {
// Skip hidden directories.
if path != "" && strings . HasPrefix ( info . Name ( ) , "." ) {
2020-05-13 19:20:52 -04:00
// Except for specific hidden directories
if ! knownHiddenDirectory ( info . Name ( ) ) {
return filepath . SkipDir
}
2018-12-20 14:22:03 -05:00
}
return nil
}
// .DS_Store is an internal MacOS attribute file; skip it.
if info . Name ( ) == ".DS_Store" {
return nil
}
// When a file system is HFS+, its filepath is in NFD form.
if runtime . GOOS == "darwin" {
path = norm . NFC . String ( path )
}
2020-02-27 01:26:05 -05:00
// Check include/exclude matchers.
2019-05-03 19:38:05 -04:00
slashpath := filepath . ToSlash ( path )
2020-02-27 01:26:05 -05:00
if include != nil && ! include . Match ( slashpath ) {
jww . INFO . Printf ( " dropping %q due to include\n" , slashpath )
return nil
}
if exclude != nil && exclude . Match ( slashpath ) {
jww . INFO . Printf ( " dropping %q due to exclude\n" , slashpath )
return nil
}
// Find the first matching matcher (if any).
2018-12-20 14:22:03 -05:00
var m * matcher
for _ , cur := range matchers {
2019-05-03 19:38:05 -04:00
if cur . Matches ( slashpath ) {
2018-12-20 14:22:03 -05:00
m = cur
break
}
}
2020-08-03 13:06:18 -04:00
lf , err := newLocalFile ( fs , path , slashpath , m , mediaTypes )
2018-12-20 14:22:03 -05:00
if err != nil {
return err
}
2019-05-03 19:38:05 -04:00
retval [ lf . SlashPath ] = lf
2018-12-20 14:22:03 -05:00
return nil
} )
if err != nil {
return nil , err
}
return retval , nil
}
// walkRemote walks the target bucket and returns a flat list.
2020-03-08 12:35:32 -04:00
func walkRemote ( ctx context . Context , bucket * blob . Bucket , include , exclude glob . Glob ) ( map [ string ] * blob . ListObject , error ) {
2018-12-20 14:22:03 -05:00
retval := map [ string ] * blob . ListObject { }
iter := bucket . List ( nil )
for {
obj , err := iter . Next ( ctx )
if err == io . EOF {
break
}
if err != nil {
return nil , err
}
2020-03-08 12:35:32 -04:00
// Check include/exclude matchers.
if include != nil && ! include . Match ( obj . Key ) {
jww . INFO . Printf ( " remote dropping %q due to include\n" , obj . Key )
continue
}
if exclude != nil && exclude . Match ( obj . Key ) {
jww . INFO . Printf ( " remote dropping %q due to exclude\n" , obj . Key )
continue
}
2022-04-05 04:42:54 -04:00
// If the remote didn't give us an MD5, use remote attributes MD5, if that doesn't exist compute one.
2019-05-03 16:28:35 -04:00
// This can happen for some providers (e.g., fileblob, which uses the
// local filesystem), but not for the most common Cloud providers
// (S3, GCS, Azure). Although, it can happen for S3 if the blob was uploaded
// via a multi-part upload.
// Although it's unfortunate to have to read the file, it's likely better
// than assuming a delta and re-uploading it.
if len ( obj . MD5 ) == 0 {
2022-04-05 04:42:54 -04:00
var attrMD5 [ ] byte
attrs , err := bucket . Attributes ( ctx , obj . Key )
2019-05-03 16:28:35 -04:00
if err == nil {
2022-04-05 04:42:54 -04:00
md5String , exists := attrs . Metadata [ metaMD5Hash ]
if exists {
attrMD5 , _ = hex . DecodeString ( md5String )
2019-05-03 16:28:35 -04:00
}
2022-04-05 04:42:54 -04:00
}
if len ( attrMD5 ) == 0 {
r , err := bucket . NewReader ( ctx , obj . Key , nil )
if err == nil {
h := md5 . New ( )
if _ , err := io . Copy ( h , r ) ; err == nil {
obj . MD5 = h . Sum ( nil )
}
r . Close ( )
}
} else {
obj . MD5 = attrMD5
2019-05-03 16:28:35 -04:00
}
}
2018-12-20 14:22:03 -05:00
retval [ obj . Key ] = obj
}
return retval , nil
}
// uploadReason is an enum of reasons why a file must be uploaded.
type uploadReason string
const (
reasonUnknown uploadReason = "unknown"
reasonNotFound uploadReason = "not found at target"
reasonForce uploadReason = "--force"
reasonSize uploadReason = "size differs"
reasonMD5Differs uploadReason = "md5 differs"
reasonMD5Missing uploadReason = "remote md5 missing"
)
// fileToUpload represents a single local file that should be uploaded to
// the target.
type fileToUpload struct {
Local * localFile
Reason uploadReason
}
func ( u * fileToUpload ) String ( ) string {
details := [ ] string { humanize . Bytes ( uint64 ( u . Local . UploadSize ) ) }
if s := u . Local . CacheControl ( ) ; s != "" {
details = append ( details , fmt . Sprintf ( "Cache-Control: %q" , s ) )
}
if s := u . Local . ContentEncoding ( ) ; s != "" {
details = append ( details , fmt . Sprintf ( "Content-Encoding: %q" , s ) )
}
if s := u . Local . ContentType ( ) ; s != "" {
details = append ( details , fmt . Sprintf ( "Content-Type: %q" , s ) )
}
2019-05-03 19:38:05 -04:00
return fmt . Sprintf ( "%s (%s): %v" , u . Local . SlashPath , strings . Join ( details , ", " ) , u . Reason )
2018-12-20 14:22:03 -05:00
}
// findDiffs diffs localFiles vs remoteFiles to see what changes should be
// applied to the remote target. It returns a slice of *fileToUpload and a
// slice of paths for files to delete.
func findDiffs ( localFiles map [ string ] * localFile , remoteFiles map [ string ] * blob . ListObject , force bool ) ( [ ] * fileToUpload , [ ] string ) {
var uploads [ ] * fileToUpload
var deletes [ ] string
found := map [ string ] bool { }
for path , lf := range localFiles {
upload := false
reason := reasonUnknown
if remoteFile , ok := remoteFiles [ path ] ; ok {
// The file exists in remote. Let's see if we need to upload it anyway.
// TODO: We don't register a diff if the metadata (e.g., Content-Type
// header) has changed. This would be difficult/expensive to detect; some
// providers return metadata along with their "List" result, but others
// (notably AWS S3) do not, so gocloud.dev's blob.Bucket doesn't expose
// it in the list result. It would require a separate request per blob
// to fetch. At least for now, we work around this by documenting it and
// providing a "force" flag (to re-upload everything) and a "force" bool
// per matcher (to re-upload all files in a matcher whose headers may have
// changed).
// Idea: extract a sample set of 1 file per extension + 1 file per matcher
// and check those files?
if force {
upload = true
reason = reasonForce
} else if lf . Force ( ) {
upload = true
reason = reasonForce
} else if lf . UploadSize != remoteFile . Size {
upload = true
reason = reasonSize
} else if len ( remoteFile . MD5 ) == 0 {
2019-05-03 16:28:35 -04:00
// This shouldn't happen unless the remote didn't give us an MD5 hash
// from List, AND we failed to compute one by reading the remote file.
// Default to considering the files different.
2018-12-20 14:22:03 -05:00
upload = true
reason = reasonMD5Missing
} else if ! bytes . Equal ( lf . MD5 ( ) , remoteFile . MD5 ) {
upload = true
reason = reasonMD5Differs
} else {
// Nope! Leave uploaded = false.
}
found [ path ] = true
} else {
// The file doesn't exist in remote.
upload = true
reason = reasonNotFound
}
if upload {
jww . DEBUG . Printf ( "%s needs to be uploaded: %v\n" , path , reason )
uploads = append ( uploads , & fileToUpload { lf , reason } )
} else {
jww . DEBUG . Printf ( "%s exists at target and does not need to be uploaded" , path )
}
}
// Remote files that weren't found locally should be deleted.
for path := range remoteFiles {
if ! found [ path ] {
deletes = append ( deletes , path )
}
}
return uploads , deletes
}
2019-05-03 12:30:46 -04:00
// applyOrdering returns an ordered slice of slices of uploads.
//
// The returned slice will have length len(ordering)+1.
//
// The subslice at index i, for i = 0 ... len(ordering)-1, will have all of the
2019-05-03 19:38:05 -04:00
// uploads whose Local.SlashPath matched the regex at ordering[i] (but not any
2019-05-03 12:30:46 -04:00
// previous ordering regex).
// The subslice at index len(ordering) will have the remaining uploads that
// didn't match any ordering regex.
//
2019-05-03 19:38:05 -04:00
// The subslices are sorted by Local.SlashPath.
2019-05-03 12:30:46 -04:00
func applyOrdering ( ordering [ ] * regexp . Regexp , uploads [ ] * fileToUpload ) [ ] [ ] * fileToUpload {
2019-05-03 19:38:05 -04:00
// Sort the whole slice by Local.SlashPath first.
sort . Slice ( uploads , func ( i , j int ) bool { return uploads [ i ] . Local . SlashPath < uploads [ j ] . Local . SlashPath } )
2019-05-03 12:30:46 -04:00
retval := make ( [ ] [ ] * fileToUpload , len ( ordering ) + 1 )
for _ , u := range uploads {
matched := false
for i , re := range ordering {
2019-05-03 19:38:05 -04:00
if re . MatchString ( u . Local . SlashPath ) {
2019-05-03 12:30:46 -04:00
retval [ i ] = append ( retval [ i ] , u )
matched = true
break
}
}
if ! matched {
retval [ len ( ordering ) ] = append ( retval [ len ( ordering ) ] , u )
}
}
return retval
}