Use xxHash for the change detector

Much faster compared to MD5:

```
name          old time/op    new time/op    delta
HashingFs-10    21.3µs ± 2%     3.2µs ±17%  -84.96%  (p=0.029 n=4+4)

name          old alloc/op   new alloc/op   delta
HashingFs-10    12.9kB ± 0%    12.8kB ± 1%   -1.31%  (p=0.029 n=4+4)

name          old allocs/op  new allocs/op  delta
HashingFs-10      10.0 ± 0%       7.0 ± 0%  -30.00%  (p=0.029 n=4+4)
```

Updates #12643
This commit is contained in:
Bjørn Erik Pedersen 2024-07-06 16:06:24 +02:00
parent 0ee2610d7c
commit fb8909d5b0
No known key found for this signature in database
3 changed files with 53 additions and 30 deletions

View file

@ -162,16 +162,16 @@ type dynamicEvents struct {
type fileChangeDetector struct { type fileChangeDetector struct {
sync.Mutex sync.Mutex
current map[string]string current map[string]uint64
prev map[string]string prev map[string]uint64
irrelevantRe *regexp.Regexp irrelevantRe *regexp.Regexp
} }
func (f *fileChangeDetector) OnFileClose(name, md5sum string) { func (f *fileChangeDetector) OnFileClose(name string, checksum uint64) {
f.Lock() f.Lock()
defer f.Unlock() defer f.Unlock()
f.current[name] = md5sum f.current[name] = checksum
} }
func (f *fileChangeDetector) PrepareNew() { func (f *fileChangeDetector) PrepareNew() {
@ -183,16 +183,16 @@ func (f *fileChangeDetector) PrepareNew() {
defer f.Unlock() defer f.Unlock()
if f.current == nil { if f.current == nil {
f.current = make(map[string]string) f.current = make(map[string]uint64)
f.prev = make(map[string]string) f.prev = make(map[string]uint64)
return return
} }
f.prev = make(map[string]string) f.prev = make(map[string]uint64)
for k, v := range f.current { for k, v := range f.current {
f.prev[k] = v f.prev[k] = v
} }
f.current = make(map[string]string) f.current = make(map[string]uint64)
} }
func (f *fileChangeDetector) changed() []string { func (f *fileChangeDetector) changed() []string {

View file

@ -14,25 +14,24 @@
package hugofs package hugofs
import ( import (
"crypto/md5"
"encoding/hex"
"hash" "hash"
"os" "os"
"github.com/cespare/xxhash/v2"
"github.com/spf13/afero" "github.com/spf13/afero"
) )
var ( var (
_ afero.Fs = (*md5HashingFs)(nil) _ afero.Fs = (*hashingFs)(nil)
_ FilesystemUnwrapper = (*md5HashingFs)(nil) _ FilesystemUnwrapper = (*hashingFs)(nil)
) )
// FileHashReceiver will receive the filename an the content's MD5 sum on file close. // FileHashReceiver will receive the filename an the content's MD5 sum on file close.
type FileHashReceiver interface { type FileHashReceiver interface {
OnFileClose(name, md5sum string) OnFileClose(name string, checksum uint64)
} }
type md5HashingFs struct { type hashingFs struct {
afero.Fs afero.Fs
hashReceiver FileHashReceiver hashReceiver FileHashReceiver
} }
@ -45,14 +44,14 @@ type md5HashingFs struct {
// Note that this will only work for file operations that use the io.Writer // Note that this will only work for file operations that use the io.Writer
// to write content to file, but that is fine for the "publish content" use case. // to write content to file, but that is fine for the "publish content" use case.
func NewHashingFs(delegate afero.Fs, hashReceiver FileHashReceiver) afero.Fs { func NewHashingFs(delegate afero.Fs, hashReceiver FileHashReceiver) afero.Fs {
return &md5HashingFs{Fs: delegate, hashReceiver: hashReceiver} return &hashingFs{Fs: delegate, hashReceiver: hashReceiver}
} }
func (fs *md5HashingFs) UnwrapFilesystem() afero.Fs { func (fs *hashingFs) UnwrapFilesystem() afero.Fs {
return fs.Fs return fs.Fs
} }
func (fs *md5HashingFs) Create(name string) (afero.File, error) { func (fs *hashingFs) Create(name string) (afero.File, error) {
f, err := fs.Fs.Create(name) f, err := fs.Fs.Create(name)
if err == nil { if err == nil {
f = fs.wrapFile(f) f = fs.wrapFile(f)
@ -60,7 +59,7 @@ func (fs *md5HashingFs) Create(name string) (afero.File, error) {
return f, err return f, err
} }
func (fs *md5HashingFs) OpenFile(name string, flag int, perm os.FileMode) (afero.File, error) { func (fs *hashingFs) OpenFile(name string, flag int, perm os.FileMode) (afero.File, error) {
f, err := fs.Fs.OpenFile(name, flag, perm) f, err := fs.Fs.OpenFile(name, flag, perm)
if err == nil && isWrite(flag) { if err == nil && isWrite(flag) {
f = fs.wrapFile(f) f = fs.wrapFile(f)
@ -68,17 +67,17 @@ func (fs *md5HashingFs) OpenFile(name string, flag int, perm os.FileMode) (afero
return f, err return f, err
} }
func (fs *md5HashingFs) wrapFile(f afero.File) afero.File { func (fs *hashingFs) wrapFile(f afero.File) afero.File {
return &hashingFile{File: f, h: md5.New(), hashReceiver: fs.hashReceiver} return &hashingFile{File: f, h: xxhash.New(), hashReceiver: fs.hashReceiver}
} }
func (fs *md5HashingFs) Name() string { func (fs *hashingFs) Name() string {
return "md5HashingFs" return "hashingFs"
} }
type hashingFile struct { type hashingFile struct {
hashReceiver FileHashReceiver hashReceiver FileHashReceiver
h hash.Hash h hash.Hash64
afero.File afero.File
} }
@ -91,7 +90,6 @@ func (h *hashingFile) Write(p []byte) (n int, err error) {
} }
func (h *hashingFile) Close() error { func (h *hashingFile) Close() error {
sum := hex.EncodeToString(h.h.Sum(nil)) h.hashReceiver.OnFileClose(h.Name(), h.h.Sum64())
h.hashReceiver.OnFileClose(h.Name(), sum)
return h.File.Close() return h.File.Close()
} }

View file

@ -14,6 +14,8 @@
package hugofs package hugofs
import ( import (
"fmt"
"strings"
"testing" "testing"
qt "github.com/frankban/quicktest" qt "github.com/frankban/quicktest"
@ -21,13 +23,13 @@ import (
) )
type testHashReceiver struct { type testHashReceiver struct {
sum string
name string name string
sum uint64
} }
func (t *testHashReceiver) OnFileClose(name, md5hash string) { func (t *testHashReceiver) OnFileClose(name string, checksum uint64) {
t.name = name t.name = name
t.sum = md5hash t.sum = checksum
} }
func TestHashingFs(t *testing.T) { func TestHashingFs(t *testing.T) {
@ -42,11 +44,34 @@ func TestHashingFs(t *testing.T) {
_, err = f.Write([]byte("content")) _, err = f.Write([]byte("content"))
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
c.Assert(f.Close(), qt.IsNil) c.Assert(f.Close(), qt.IsNil)
c.Assert(observer.sum, qt.Equals, "9a0364b9e99bb480dd25e1f0284c8555") c.Assert(observer.sum, qt.Equals, uint64(7807861979271768572))
c.Assert(observer.name, qt.Equals, "hashme") c.Assert(observer.name, qt.Equals, "hashme")
f, err = ofs.Create("nowrites") f, err = ofs.Create("nowrites")
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
c.Assert(f.Close(), qt.IsNil) c.Assert(f.Close(), qt.IsNil)
c.Assert(observer.sum, qt.Equals, "d41d8cd98f00b204e9800998ecf8427e") c.Assert(observer.sum, qt.Equals, uint64(17241709254077376921))
}
func BenchmarkHashingFs(b *testing.B) {
fs := afero.NewMemMapFs()
observer := &testHashReceiver{}
ofs := NewHashingFs(fs, observer)
content := []byte(strings.Repeat("lorem ipsum ", 1000))
b.ResetTimer()
for i := 0; i < b.N; i++ {
f, err := ofs.Create(fmt.Sprintf("file%d", i))
if err != nil {
b.Fatal(err)
}
_, err = f.Write(content)
if err != nil {
b.Fatal(err)
}
if err := f.Close(); err != nil {
b.Fatal(err)
}
}
} }