mirror of
https://github.com/gohugoio/hugo.git
synced 2024-11-07 20:30:36 -05:00
Replace the MD5 hashing of images with xxHash
Note that we only use this for change detection. The previous implementation invoked `MD5FromReaderFast` that created a MD5 has from 8 64 bytes chunks in the file, which is obviously very fast. The new implementation creates the hash from the entire file and ... seems to be even more effective: ``` name old time/op new time/op delta HashImage-10 9.45µs ±21% 10.89µs ± 1% ~ (p=0.343 n=4+4) name old alloc/op new alloc/op delta HashImage-10 144B ± 0% 8B ± 0% -94.44% (p=0.029 n=4+4) name old allocs/op new allocs/op delta HashImage-10 4.00 ± 0% 1.00 ± 0% -75.00% (p=0.029 n=4+4) ```
This commit is contained in:
parent
8b5d796989
commit
d5eda13cb2
7 changed files with 180 additions and 29 deletions
86
common/hashing/hashing.go
Normal file
86
common/hashing/hashing.go
Normal file
|
@ -0,0 +1,86 @@
|
|||
// Copyright 2024 The Hugo Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package hashing provides common hashing utilities.
|
||||
package hashing
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"io"
|
||||
"sync"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
// XXHashFromReader calculates the xxHash for the given reader.
|
||||
func XXHashFromReader(r io.ReadSeeker) (uint64, int64, error) {
|
||||
h := getXxHashReadFrom()
|
||||
defer putXxHashReadFrom(h)
|
||||
|
||||
size, err := io.Copy(h, r)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
return h.Sum64(), size, nil
|
||||
}
|
||||
|
||||
// XXHashFromString calculates the xxHash for the given string.
|
||||
func XXHashFromString(s string) (uint64, error) {
|
||||
h := xxhash.New()
|
||||
h.WriteString(s)
|
||||
return h.Sum64(), nil
|
||||
}
|
||||
|
||||
// XxHashFromStringHexEncoded calculates the xxHash for the given string
|
||||
// and returns the hash as a hex encoded string.
|
||||
func XxHashFromStringHexEncoded(f string) string {
|
||||
h := xxhash.New()
|
||||
h.WriteString(f)
|
||||
hash := h.Sum(nil)
|
||||
return hex.EncodeToString(hash)
|
||||
}
|
||||
|
||||
type xxhashReadFrom struct {
|
||||
buff []byte
|
||||
*xxhash.Digest
|
||||
}
|
||||
|
||||
func (x *xxhashReadFrom) ReadFrom(r io.Reader) (int64, error) {
|
||||
for {
|
||||
n, err := r.Read(x.buff)
|
||||
if n > 0 {
|
||||
x.Digest.Write(x.buff[:n])
|
||||
}
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
err = nil
|
||||
}
|
||||
return int64(n), err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var xXhashReadFromPool = sync.Pool{
|
||||
New: func() any {
|
||||
return &xxhashReadFrom{Digest: xxhash.New(), buff: make([]byte, 48*1024)}
|
||||
},
|
||||
}
|
||||
|
||||
func getXxHashReadFrom() *xxhashReadFrom {
|
||||
return xXhashReadFromPool.Get().(*xxhashReadFrom)
|
||||
}
|
||||
|
||||
func putXxHashReadFrom(h *xxhashReadFrom) {
|
||||
h.Reset()
|
||||
xXhashReadFromPool.Put(h)
|
||||
}
|
79
common/hashing/hashing_test.go
Normal file
79
common/hashing/hashing_test.go
Normal file
|
@ -0,0 +1,79 @@
|
|||
// Copyright 2024 The Hugo Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package hashing
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
qt "github.com/frankban/quicktest"
|
||||
)
|
||||
|
||||
func TestXxHashFromReader(t *testing.T) {
|
||||
c := qt.New(t)
|
||||
s := "Hello World"
|
||||
r := strings.NewReader(s)
|
||||
got, size, err := XXHashFromReader(r)
|
||||
c.Assert(err, qt.IsNil)
|
||||
c.Assert(size, qt.Equals, int64(len(s)))
|
||||
c.Assert(got, qt.Equals, uint64(7148569436472236994))
|
||||
}
|
||||
|
||||
func TestXxHashFromString(t *testing.T) {
|
||||
c := qt.New(t)
|
||||
s := "Hello World"
|
||||
got, err := XXHashFromString(s)
|
||||
c.Assert(err, qt.IsNil)
|
||||
c.Assert(got, qt.Equals, uint64(7148569436472236994))
|
||||
}
|
||||
|
||||
func TestXxHashFromStringHexEncoded(t *testing.T) {
|
||||
c := qt.New(t)
|
||||
s := "The quick brown fox jumps over the lazy dog"
|
||||
got := XxHashFromStringHexEncoded(s)
|
||||
// Facit: https://asecuritysite.com/encryption/xxhash?val=The%20quick%20brown%20fox%20jumps%20over%20the%20lazy%20dog
|
||||
c.Assert(got, qt.Equals, "0b242d361fda71bc")
|
||||
}
|
||||
|
||||
func BenchmarkXXHashFromReader(b *testing.B) {
|
||||
r := strings.NewReader("Hello World")
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
XXHashFromReader(r)
|
||||
r.Seek(0, 0)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkXXHashFromString(b *testing.B) {
|
||||
s := "Hello World"
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
XXHashFromString(s)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkXXHashFromStringHexEncoded(b *testing.B) {
|
||||
s := "The quick brown fox jumps over the lazy dog"
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
XxHashFromStringHexEncoded(s)
|
||||
}
|
||||
}
|
||||
|
||||
func xxHashFromString(f string) uint64 {
|
||||
h := xxhash.New()
|
||||
h.WriteString(f)
|
||||
return h.Sum64()
|
||||
}
|
|
@ -27,12 +27,11 @@ import (
|
|||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
bp "github.com/gohugoio/hugo/bufferpool"
|
||||
|
||||
"github.com/spf13/afero"
|
||||
|
||||
"github.com/jdkato/prose/transform"
|
||||
|
||||
bp "github.com/gohugoio/hugo/bufferpool"
|
||||
)
|
||||
|
||||
// FilePathSeparator as defined by os.Separator.
|
||||
|
@ -258,13 +257,7 @@ func SliceToLower(s []string) []string {
|
|||
return l
|
||||
}
|
||||
|
||||
// XxHashString takes a string and returns its xxHash hash.
|
||||
func XxHashString(f string) string {
|
||||
h := xxhash.New()
|
||||
h.WriteString(f)
|
||||
hash := h.Sum(nil)
|
||||
return hex.EncodeToString(hash)
|
||||
}
|
||||
// XXHashFromReader creates a xxHash hash from the given reader.
|
||||
|
||||
// MD5String takes a string and returns its MD5 hash.
|
||||
func MD5String(f string) string {
|
||||
|
|
|
@ -493,7 +493,7 @@ func (i *imageResource) relTargetPathFromConfig(conf images.ImageConfig) interna
|
|||
}
|
||||
|
||||
h := i.hash()
|
||||
idStr := fmt.Sprintf("_hu%s_%d", h, i.size())
|
||||
idStr := fmt.Sprintf("_hu%d_%d", h, i.size())
|
||||
|
||||
// Do not change for no good reason.
|
||||
const md5Threshold = 100
|
||||
|
|
|
@ -26,6 +26,7 @@ import (
|
|||
"github.com/gohugoio/hugo/identity"
|
||||
"github.com/gohugoio/hugo/resources/internal"
|
||||
|
||||
"github.com/gohugoio/hugo/common/hashing"
|
||||
"github.com/gohugoio/hugo/common/herrors"
|
||||
"github.com/gohugoio/hugo/common/paths"
|
||||
|
||||
|
@ -307,7 +308,7 @@ type fileInfo interface {
|
|||
}
|
||||
|
||||
type hashProvider interface {
|
||||
hash() string
|
||||
hash() uint64
|
||||
}
|
||||
|
||||
var _ resource.StaleInfo = (*StaleValue[any])(nil)
|
||||
|
@ -403,7 +404,7 @@ func (l *genericResource) size() int64 {
|
|||
return l.h.size
|
||||
}
|
||||
|
||||
func (l *genericResource) hash() string {
|
||||
func (l *genericResource) hash() uint64 {
|
||||
if err := l.h.init(l); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
@ -628,7 +629,7 @@ type targetPather interface {
|
|||
}
|
||||
|
||||
type resourceHash struct {
|
||||
value string
|
||||
value uint64
|
||||
size int64
|
||||
initOnce sync.Once
|
||||
}
|
||||
|
@ -636,7 +637,7 @@ type resourceHash struct {
|
|||
func (r *resourceHash) init(l hugio.ReadSeekCloserProvider) error {
|
||||
var initErr error
|
||||
r.initOnce.Do(func() {
|
||||
var hash string
|
||||
var hash uint64
|
||||
var size int64
|
||||
f, err := l.ReadSeekCloser()
|
||||
if err != nil {
|
||||
|
@ -656,6 +657,6 @@ func (r *resourceHash) init(l hugio.ReadSeekCloserProvider) error {
|
|||
return initErr
|
||||
}
|
||||
|
||||
func hashImage(r io.ReadSeeker) (string, int64, error) {
|
||||
return helpers.MD5FromReaderFast(r)
|
||||
func hashImage(r io.ReadSeeker) (uint64, int64, error) {
|
||||
return hashing.XXHashFromReader(r)
|
||||
}
|
||||
|
|
|
@ -16,10 +16,9 @@ package hash
|
|||
|
||||
import (
|
||||
"context"
|
||||
"encoding/hex"
|
||||
"hash/fnv"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
"github.com/gohugoio/hugo/common/hashing"
|
||||
"github.com/gohugoio/hugo/deps"
|
||||
"github.com/gohugoio/hugo/tpl/internal"
|
||||
"github.com/spf13/cast"
|
||||
|
@ -51,14 +50,7 @@ func (ns *Namespace) XxHash(v any) (string, error) {
|
|||
return "", err
|
||||
}
|
||||
|
||||
hasher := xxhash.New()
|
||||
|
||||
_, err = hasher.WriteString(conv)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
hash := hasher.Sum(nil)
|
||||
return hex.EncodeToString(hash), nil
|
||||
return hashing.XxHashFromStringHexEncoded(conv), nil
|
||||
}
|
||||
|
||||
const name = "hash"
|
||||
|
|
|
@ -18,12 +18,12 @@ import (
|
|||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/gohugoio/hugo/helpers"
|
||||
htmltemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/htmltemplate"
|
||||
texttemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate"
|
||||
|
||||
"github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse"
|
||||
|
||||
"github.com/gohugoio/hugo/common/hashing"
|
||||
"github.com/gohugoio/hugo/common/maps"
|
||||
"github.com/gohugoio/hugo/tpl"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
|
@ -254,7 +254,7 @@ func (c *templateContext) handleDefer(withNode *parse.WithNode) {
|
|||
c.err = errors.New("resources.PostProcess cannot be used in a deferred template")
|
||||
return
|
||||
}
|
||||
innerHash := helpers.XxHashString(s)
|
||||
innerHash := hashing.XxHashFromStringHexEncoded(s)
|
||||
deferredID := tpl.HugoDeferredTemplatePrefix + innerHash
|
||||
|
||||
c.deferNodes[deferredID] = inner
|
||||
|
|
Loading…
Reference in a new issue