deploy: Add stripIndexHtml target option

This new configuration parameter causes paths matching
"<dir>/index.html" to be stored as "<dir>/" remotely. This simplifies
the cloud configuration needed for some use cases, such as CloudFront
distributions with S3 bucket origins. Before this change, users must
configure their S3 buckets as public websites (which is incompatible
with certain authentication / authorization schemes), or users must add
a CloudFormation function to add index.html to the end of incoming
requests. After this change, users can simply use an ordinary CloudFront
distribution (no additional code) with an ordinary S3 bucket origin (and
not an S3 website).

This adds tests to ensure that functionality like matchers is unaffected
by this change. I have also tested that the functionality works as
expected when deploying to a real S3 / CloudFront website.

Closes #12607
This commit is contained in:
Dietrich Epp 2024-06-18 14:26:08 -04:00 committed by Bjørn Erik Pedersen
parent 478a9107a6
commit d5542ed286
4 changed files with 101 additions and 5 deletions

View file

@ -133,10 +133,14 @@ func (d *Deployer) Deploy(ctx context.Context) error {
// Load local files from the source directory.
var include, exclude glob.Glob
var mappath func(string) string
if d.target != nil {
include, exclude = d.target.IncludeGlob, d.target.ExcludeGlob
if d.target.StripIndexHTML {
mappath = stripIndexHTML
}
local, err := d.walkLocal(d.localFs, d.cfg.Matchers, include, exclude, d.mediaTypes)
}
local, err := d.walkLocal(d.localFs, d.cfg.Matchers, include, exclude, d.mediaTypes, mappath)
if err != nil {
return err
}
@ -483,7 +487,7 @@ func knownHiddenDirectory(name string) bool {
// walkLocal walks the source directory and returns a flat list of files,
// using localFile.SlashPath as the map keys.
func (d *Deployer) walkLocal(fs afero.Fs, matchers []*deployconfig.Matcher, include, exclude glob.Glob, mediaTypes media.Types) (map[string]*localFile, error) {
func (d *Deployer) walkLocal(fs afero.Fs, matchers []*deployconfig.Matcher, include, exclude glob.Glob, mediaTypes media.Types, mappath func(string) string) (map[string]*localFile, error) {
retval := map[string]*localFile{}
err := afero.Walk(fs, "", func(path string, info os.FileInfo, err error) error {
if err != nil {
@ -529,6 +533,11 @@ func (d *Deployer) walkLocal(fs afero.Fs, matchers []*deployconfig.Matcher, incl
break
}
}
// Apply any additional modifications to the local path, to map it to
// the remote path.
if mappath != nil {
slashpath = mappath(slashpath)
}
lf, err := newLocalFile(fs, path, slashpath, m, mediaTypes)
if err != nil {
return err
@ -542,6 +551,15 @@ func (d *Deployer) walkLocal(fs afero.Fs, matchers []*deployconfig.Matcher, incl
return retval, nil
}
// stripIndexHTML remaps keys matching "<dir>/index.html" to "<dir>/".
func stripIndexHTML(slashpath string) string {
const suffix = "/index.html"
if strings.HasSuffix(slashpath, suffix) {
return slashpath[:len(slashpath)-len(suffix)+1]
}
return slashpath
}
// walkRemote walks the target bucket and returns a flat list.
func (d *Deployer) walkRemote(ctx context.Context, bucket *blob.Bucket, include, exclude glob.Glob) (map[string]*blob.ListObject, error) {
retval := map[string]*blob.ListObject{}

View file

@ -218,6 +218,7 @@ func TestWalkLocal(t *testing.T) {
tests := map[string]struct {
Given []string
Expect []string
MapPath func(string) string
}{
"Empty": {
Given: []string{},
@ -235,6 +236,11 @@ func TestWalkLocal(t *testing.T) {
Given: []string{"file.txt", ".hidden_dir/file.txt", ".well-known/file.txt"},
Expect: []string{"file.txt", ".well-known/file.txt"},
},
"StripIndexHTML": {
Given: []string{"index.html", "file.txt", "dir/index.html", "dir/file.txt"},
Expect: []string{"index.html", "file.txt", "dir/", "dir/file.txt"},
MapPath: stripIndexHTML,
},
}
for desc, tc := range tests {
@ -254,7 +260,7 @@ func TestWalkLocal(t *testing.T) {
}
}
d := newDeployer()
if got, err := d.walkLocal(fs, nil, nil, nil, media.DefaultTypes); err != nil {
if got, err := d.walkLocal(fs, nil, nil, nil, media.DefaultTypes, tc.MapPath); err != nil {
t.Fatal(err)
} else {
expect := map[string]any{}
@ -274,6 +280,63 @@ func TestWalkLocal(t *testing.T) {
}
}
func TestStripIndexHTML(t *testing.T) {
tests := map[string]struct {
Input string
Output string
}{
"Unmapped": {Input: "normal_file.txt", Output: "normal_file.txt"},
"Stripped": {Input: "directory/index.html", Output: "directory/"},
"NoSlash": {Input: "prefix_index.html", Output: "prefix_index.html"},
"Root": {Input: "index.html", Output: "index.html"},
}
for desc, tc := range tests {
t.Run(desc, func(t *testing.T) {
got := stripIndexHTML(tc.Input)
if got != tc.Output {
t.Errorf("got %q, expect %q", got, tc.Output)
}
})
}
}
func TestStripIndexHTMLMatcher(t *testing.T) {
// StripIndexHTML should not affect matchers.
fs := afero.NewMemMapFs()
if err := fs.Mkdir("dir", 0o755); err != nil {
t.Fatal(err)
}
for _, name := range []string{"index.html", "dir/index.html", "file.txt"} {
if fd, err := fs.Create(name); err != nil {
t.Fatal(err)
} else {
fd.Close()
}
}
d := newDeployer()
const pattern = `\.html$`
matcher := &deployconfig.Matcher{Pattern: pattern, Gzip: true, Re: regexp.MustCompile(pattern)}
if got, err := d.walkLocal(fs, []*deployconfig.Matcher{matcher}, nil, nil, media.DefaultTypes, stripIndexHTML); err != nil {
t.Fatal(err)
} else {
for _, name := range []string{"index.html", "dir/"} {
lf := got[name]
if lf == nil {
t.Errorf("missing file %q", name)
} else if lf.matcher == nil {
t.Errorf("file %q has nil matcher, expect %q", name, pattern)
}
}
const name = "file.txt"
lf := got[name]
if lf == nil {
t.Errorf("missing file %q", name)
} else if lf.matcher != nil {
t.Errorf("file %q has matcher %q, expect nil", name, lf.matcher.Pattern)
}
}
}
func TestLocalFile(t *testing.T) {
const (
content = "hello world!"

View file

@ -69,6 +69,11 @@ type Target struct {
// Parsed versions of Include/Exclude.
IncludeGlob glob.Glob `json:"-"`
ExcludeGlob glob.Glob `json:"-"`
// If true, any local path matching <dir>/index.html will be mapped to the
// remote path <dir>/. This does not affect the top-level index.html file,
// since that would result in an empty path.
StripIndexHTML bool
}
func (tgt *Target) ParseIncludeExclude() error {

View file

@ -186,6 +186,15 @@ URL = "<FILL ME IN>"
#include = "**.html" # would only include files with ".html" suffix
#exclude = "**.{jpg, png}" # would exclude files with ".jpg" or ".png" suffix
# Map any file named "<dir>/index.html" to the remote file "<dir>/". This does
# not affect the root "index.html" file, and it does not affect matchers below.
# This works when deploying to key-value cloud storage systems, such as Amazon
# S3 (general purpose buckets, not directory buckets), Google Cloud Storage, and
# Azure Blob Storage. This makes it so the canonical URL will match the object
# key in cloud storage, except for the root index.html file.
#
#stripIndexHTML = true
#######################
[[deployment.matchers]]
@ -195,6 +204,7 @@ URL = "<FILL ME IN>"
# See https://golang.org/pkg/regexp/syntax/ for pattern syntax.
# Pattern searching is stopped on first match.
# This is not affected by stripIndexHTML, above.
pattern = "<FILL ME IN>"
# If true, Hugo will gzip the file before uploading it to the bucket.