mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-21 20:46:30 -05:00
Replace the MD5 hashing of images with xxHash
Note that we only use this for change detection. The previous implementation invoked `MD5FromReaderFast` that created a MD5 has from 8 64 bytes chunks in the file, which is obviously very fast. The new implementation creates the hash from the entire file and ... seems to be even more effective: ``` name old time/op new time/op delta HashImage-10 9.45µs ±21% 10.89µs ± 1% ~ (p=0.343 n=4+4) name old alloc/op new alloc/op delta HashImage-10 144B ± 0% 8B ± 0% -94.44% (p=0.029 n=4+4) name old allocs/op new allocs/op delta HashImage-10 4.00 ± 0% 1.00 ± 0% -75.00% (p=0.029 n=4+4) ```
This commit is contained in:
parent
8b5d796989
commit
d5eda13cb2
7 changed files with 180 additions and 29 deletions
86
common/hashing/hashing.go
Normal file
86
common/hashing/hashing.go
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
// Copyright 2024 The Hugo Authors. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package hashing provides common hashing utilities.
|
||||||
|
package hashing
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/hex"
|
||||||
|
"io"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/cespare/xxhash/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
// XXHashFromReader calculates the xxHash for the given reader.
|
||||||
|
func XXHashFromReader(r io.ReadSeeker) (uint64, int64, error) {
|
||||||
|
h := getXxHashReadFrom()
|
||||||
|
defer putXxHashReadFrom(h)
|
||||||
|
|
||||||
|
size, err := io.Copy(h, r)
|
||||||
|
if err != nil {
|
||||||
|
return 0, 0, err
|
||||||
|
}
|
||||||
|
return h.Sum64(), size, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXHashFromString calculates the xxHash for the given string.
|
||||||
|
func XXHashFromString(s string) (uint64, error) {
|
||||||
|
h := xxhash.New()
|
||||||
|
h.WriteString(s)
|
||||||
|
return h.Sum64(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// XxHashFromStringHexEncoded calculates the xxHash for the given string
|
||||||
|
// and returns the hash as a hex encoded string.
|
||||||
|
func XxHashFromStringHexEncoded(f string) string {
|
||||||
|
h := xxhash.New()
|
||||||
|
h.WriteString(f)
|
||||||
|
hash := h.Sum(nil)
|
||||||
|
return hex.EncodeToString(hash)
|
||||||
|
}
|
||||||
|
|
||||||
|
type xxhashReadFrom struct {
|
||||||
|
buff []byte
|
||||||
|
*xxhash.Digest
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *xxhashReadFrom) ReadFrom(r io.Reader) (int64, error) {
|
||||||
|
for {
|
||||||
|
n, err := r.Read(x.buff)
|
||||||
|
if n > 0 {
|
||||||
|
x.Digest.Write(x.buff[:n])
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
return int64(n), err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var xXhashReadFromPool = sync.Pool{
|
||||||
|
New: func() any {
|
||||||
|
return &xxhashReadFrom{Digest: xxhash.New(), buff: make([]byte, 48*1024)}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func getXxHashReadFrom() *xxhashReadFrom {
|
||||||
|
return xXhashReadFromPool.Get().(*xxhashReadFrom)
|
||||||
|
}
|
||||||
|
|
||||||
|
func putXxHashReadFrom(h *xxhashReadFrom) {
|
||||||
|
h.Reset()
|
||||||
|
xXhashReadFromPool.Put(h)
|
||||||
|
}
|
79
common/hashing/hashing_test.go
Normal file
79
common/hashing/hashing_test.go
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
// Copyright 2024 The Hugo Authors. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package hashing
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/cespare/xxhash/v2"
|
||||||
|
qt "github.com/frankban/quicktest"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestXxHashFromReader(t *testing.T) {
|
||||||
|
c := qt.New(t)
|
||||||
|
s := "Hello World"
|
||||||
|
r := strings.NewReader(s)
|
||||||
|
got, size, err := XXHashFromReader(r)
|
||||||
|
c.Assert(err, qt.IsNil)
|
||||||
|
c.Assert(size, qt.Equals, int64(len(s)))
|
||||||
|
c.Assert(got, qt.Equals, uint64(7148569436472236994))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestXxHashFromString(t *testing.T) {
|
||||||
|
c := qt.New(t)
|
||||||
|
s := "Hello World"
|
||||||
|
got, err := XXHashFromString(s)
|
||||||
|
c.Assert(err, qt.IsNil)
|
||||||
|
c.Assert(got, qt.Equals, uint64(7148569436472236994))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestXxHashFromStringHexEncoded(t *testing.T) {
|
||||||
|
c := qt.New(t)
|
||||||
|
s := "The quick brown fox jumps over the lazy dog"
|
||||||
|
got := XxHashFromStringHexEncoded(s)
|
||||||
|
// Facit: https://asecuritysite.com/encryption/xxhash?val=The%20quick%20brown%20fox%20jumps%20over%20the%20lazy%20dog
|
||||||
|
c.Assert(got, qt.Equals, "0b242d361fda71bc")
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkXXHashFromReader(b *testing.B) {
|
||||||
|
r := strings.NewReader("Hello World")
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
XXHashFromReader(r)
|
||||||
|
r.Seek(0, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkXXHashFromString(b *testing.B) {
|
||||||
|
s := "Hello World"
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
XXHashFromString(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkXXHashFromStringHexEncoded(b *testing.B) {
|
||||||
|
s := "The quick brown fox jumps over the lazy dog"
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
XxHashFromStringHexEncoded(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func xxHashFromString(f string) uint64 {
|
||||||
|
h := xxhash.New()
|
||||||
|
h.WriteString(f)
|
||||||
|
return h.Sum64()
|
||||||
|
}
|
|
@ -27,12 +27,11 @@ import (
|
||||||
"unicode"
|
"unicode"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/cespare/xxhash/v2"
|
bp "github.com/gohugoio/hugo/bufferpool"
|
||||||
|
|
||||||
"github.com/spf13/afero"
|
"github.com/spf13/afero"
|
||||||
|
|
||||||
"github.com/jdkato/prose/transform"
|
"github.com/jdkato/prose/transform"
|
||||||
|
|
||||||
bp "github.com/gohugoio/hugo/bufferpool"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// FilePathSeparator as defined by os.Separator.
|
// FilePathSeparator as defined by os.Separator.
|
||||||
|
@ -258,13 +257,7 @@ func SliceToLower(s []string) []string {
|
||||||
return l
|
return l
|
||||||
}
|
}
|
||||||
|
|
||||||
// XxHashString takes a string and returns its xxHash hash.
|
// XXHashFromReader creates a xxHash hash from the given reader.
|
||||||
func XxHashString(f string) string {
|
|
||||||
h := xxhash.New()
|
|
||||||
h.WriteString(f)
|
|
||||||
hash := h.Sum(nil)
|
|
||||||
return hex.EncodeToString(hash)
|
|
||||||
}
|
|
||||||
|
|
||||||
// MD5String takes a string and returns its MD5 hash.
|
// MD5String takes a string and returns its MD5 hash.
|
||||||
func MD5String(f string) string {
|
func MD5String(f string) string {
|
||||||
|
|
|
@ -493,7 +493,7 @@ func (i *imageResource) relTargetPathFromConfig(conf images.ImageConfig) interna
|
||||||
}
|
}
|
||||||
|
|
||||||
h := i.hash()
|
h := i.hash()
|
||||||
idStr := fmt.Sprintf("_hu%s_%d", h, i.size())
|
idStr := fmt.Sprintf("_hu%d_%d", h, i.size())
|
||||||
|
|
||||||
// Do not change for no good reason.
|
// Do not change for no good reason.
|
||||||
const md5Threshold = 100
|
const md5Threshold = 100
|
||||||
|
|
|
@ -26,6 +26,7 @@ import (
|
||||||
"github.com/gohugoio/hugo/identity"
|
"github.com/gohugoio/hugo/identity"
|
||||||
"github.com/gohugoio/hugo/resources/internal"
|
"github.com/gohugoio/hugo/resources/internal"
|
||||||
|
|
||||||
|
"github.com/gohugoio/hugo/common/hashing"
|
||||||
"github.com/gohugoio/hugo/common/herrors"
|
"github.com/gohugoio/hugo/common/herrors"
|
||||||
"github.com/gohugoio/hugo/common/paths"
|
"github.com/gohugoio/hugo/common/paths"
|
||||||
|
|
||||||
|
@ -307,7 +308,7 @@ type fileInfo interface {
|
||||||
}
|
}
|
||||||
|
|
||||||
type hashProvider interface {
|
type hashProvider interface {
|
||||||
hash() string
|
hash() uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ resource.StaleInfo = (*StaleValue[any])(nil)
|
var _ resource.StaleInfo = (*StaleValue[any])(nil)
|
||||||
|
@ -403,7 +404,7 @@ func (l *genericResource) size() int64 {
|
||||||
return l.h.size
|
return l.h.size
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *genericResource) hash() string {
|
func (l *genericResource) hash() uint64 {
|
||||||
if err := l.h.init(l); err != nil {
|
if err := l.h.init(l); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
@ -628,7 +629,7 @@ type targetPather interface {
|
||||||
}
|
}
|
||||||
|
|
||||||
type resourceHash struct {
|
type resourceHash struct {
|
||||||
value string
|
value uint64
|
||||||
size int64
|
size int64
|
||||||
initOnce sync.Once
|
initOnce sync.Once
|
||||||
}
|
}
|
||||||
|
@ -636,7 +637,7 @@ type resourceHash struct {
|
||||||
func (r *resourceHash) init(l hugio.ReadSeekCloserProvider) error {
|
func (r *resourceHash) init(l hugio.ReadSeekCloserProvider) error {
|
||||||
var initErr error
|
var initErr error
|
||||||
r.initOnce.Do(func() {
|
r.initOnce.Do(func() {
|
||||||
var hash string
|
var hash uint64
|
||||||
var size int64
|
var size int64
|
||||||
f, err := l.ReadSeekCloser()
|
f, err := l.ReadSeekCloser()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -656,6 +657,6 @@ func (r *resourceHash) init(l hugio.ReadSeekCloserProvider) error {
|
||||||
return initErr
|
return initErr
|
||||||
}
|
}
|
||||||
|
|
||||||
func hashImage(r io.ReadSeeker) (string, int64, error) {
|
func hashImage(r io.ReadSeeker) (uint64, int64, error) {
|
||||||
return helpers.MD5FromReaderFast(r)
|
return hashing.XXHashFromReader(r)
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,10 +16,9 @@ package hash
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/hex"
|
|
||||||
"hash/fnv"
|
"hash/fnv"
|
||||||
|
|
||||||
"github.com/cespare/xxhash/v2"
|
"github.com/gohugoio/hugo/common/hashing"
|
||||||
"github.com/gohugoio/hugo/deps"
|
"github.com/gohugoio/hugo/deps"
|
||||||
"github.com/gohugoio/hugo/tpl/internal"
|
"github.com/gohugoio/hugo/tpl/internal"
|
||||||
"github.com/spf13/cast"
|
"github.com/spf13/cast"
|
||||||
|
@ -51,14 +50,7 @@ func (ns *Namespace) XxHash(v any) (string, error) {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
hasher := xxhash.New()
|
return hashing.XxHashFromStringHexEncoded(conv), nil
|
||||||
|
|
||||||
_, err = hasher.WriteString(conv)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
hash := hasher.Sum(nil)
|
|
||||||
return hex.EncodeToString(hash), nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const name = "hash"
|
const name = "hash"
|
||||||
|
|
|
@ -18,12 +18,12 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/gohugoio/hugo/helpers"
|
|
||||||
htmltemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/htmltemplate"
|
htmltemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/htmltemplate"
|
||||||
texttemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate"
|
texttemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate"
|
||||||
|
|
||||||
"github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse"
|
"github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse"
|
||||||
|
|
||||||
|
"github.com/gohugoio/hugo/common/hashing"
|
||||||
"github.com/gohugoio/hugo/common/maps"
|
"github.com/gohugoio/hugo/common/maps"
|
||||||
"github.com/gohugoio/hugo/tpl"
|
"github.com/gohugoio/hugo/tpl"
|
||||||
"github.com/mitchellh/mapstructure"
|
"github.com/mitchellh/mapstructure"
|
||||||
|
@ -254,7 +254,7 @@ func (c *templateContext) handleDefer(withNode *parse.WithNode) {
|
||||||
c.err = errors.New("resources.PostProcess cannot be used in a deferred template")
|
c.err = errors.New("resources.PostProcess cannot be used in a deferred template")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
innerHash := helpers.XxHashString(s)
|
innerHash := hashing.XxHashFromStringHexEncoded(s)
|
||||||
deferredID := tpl.HugoDeferredTemplatePrefix + innerHash
|
deferredID := tpl.HugoDeferredTemplatePrefix + innerHash
|
||||||
|
|
||||||
c.deferNodes[deferredID] = inner
|
c.deferNodes[deferredID] = inner
|
||||||
|
|
Loading…
Reference in a new issue