util/deephash: use sha256x (#5339)

Switch deephash to use sha256x.Hash.

We add sha256x.HashString to efficiently hash a string.
It uses unsafe under the hood to convert a string to a []byte.
We also modify sha256x.Hash to export the underlying hash.Hash
for testing purposes so that we can intercept all hash.Hash calls.

Performance:

	name                 old time/op    new time/op    delta
	Hash-24                19.8µs ± 1%    19.2µs ± 1%  -3.01%  (p=0.000 n=10+10)
	HashPacketFilter-24    2.61µs ± 0%    2.53µs ± 1%  -3.01%  (p=0.000 n=8+10)
	HashMapAcyclic-24      31.3µs ± 1%    29.8µs ± 0%  -4.80%  (p=0.000 n=10+9)
	TailcfgNode-24         1.83µs ± 1%    1.82µs ± 2%    ~     (p=0.305 n=10+10)
	HashArray-24            344ns ± 2%     323ns ± 1%  -6.02%  (p=0.000 n=9+10)

The performance gains is not as dramatic as sha256x over sha256 due to:
1. most of the hashing already occurring through the direct memory hashing logic, and
2. what does not go through direct memory hashing is slowed down by reflect.

Signed-off-by: Joe Tsai <joetsai@digital-static.net>
This commit is contained in:
Joe Tsai
2022-08-11 17:44:09 -07:00
committed by GitHub
parent d942a2ff56
commit 1f7479466e
5 changed files with 192 additions and 160 deletions

View File

@ -6,10 +6,9 @@ package deephash
import (
"archive/tar"
"bufio"
"bytes"
"crypto/sha256"
"fmt"
"hash"
"io"
"math"
"math/rand"
@ -626,10 +625,9 @@ func TestGetTypeHasher(t *testing.T) {
va := newAddressableValue(rv.Type())
va.Set(rv)
fn := getTypeInfo(va.Type()).hasher()
var buf bytes.Buffer
h := &hasher{
bw: bufio.NewWriter(&buf),
}
hb := &hashBuffer{Hash: sha256.New()}
h := new(hasher)
h.Hash.H = hb
got := fn(h, va)
const ptrSize = 32 << uintptr(^uintptr(0)>>63)
if tt.out32 != "" && ptrSize == 32 {
@ -641,10 +639,8 @@ func TestGetTypeHasher(t *testing.T) {
if got != tt.want {
t.Fatalf("func returned %v; want %v", got, tt.want)
}
if err := h.bw.Flush(); err != nil {
t.Fatal(err)
}
if got := buf.String(); got != tt.out {
h.sum()
if got := string(hb.B); got != tt.out {
t.Fatalf("got %q; want %q", got, tt.out)
}
})
@ -720,21 +716,21 @@ func TestHashMapAcyclic(t *testing.T) {
}
got := map[string]bool{}
var buf bytes.Buffer
bw := bufio.NewWriter(&buf)
hb := &hashBuffer{Hash: sha256.New()}
ti := getTypeInfo(reflect.TypeOf(m))
for i := 0; i < 20; i++ {
v := addressableValue{reflect.ValueOf(&m).Elem()}
buf.Reset()
bw.Reset(&buf)
h := &hasher{bw: bw}
hb.Reset()
h := new(hasher)
h.Hash.H = hb
h.hashMap(v, ti, false)
if got[string(buf.Bytes())] {
h.sum()
if got[string(hb.B)] {
continue
}
got[string(buf.Bytes())] = true
got[string(hb.B)] = true
}
if len(got) != 1 {
t.Errorf("got %d results; want 1", len(got))
@ -746,13 +742,13 @@ func TestPrintArray(t *testing.T) {
X [32]byte
}
x := T{X: [32]byte{1: 1, 31: 31}}
var got bytes.Buffer
bw := bufio.NewWriter(&got)
h := &hasher{bw: bw}
hb := &hashBuffer{Hash: sha256.New()}
h := new(hasher)
h.Hash.H = hb
h.hashValue(addressableValue{reflect.ValueOf(&x).Elem()}, false)
bw.Flush()
h.sum()
const want = "\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f"
if got := got.Bytes(); string(got) != want {
if got := hb.B; string(got) != want {
t.Errorf("wrong:\n got: %q\nwant: %q\n", got, want)
}
}
@ -764,16 +760,15 @@ func BenchmarkHashMapAcyclic(b *testing.B) {
m[i] = fmt.Sprint(i)
}
var buf bytes.Buffer
bw := bufio.NewWriter(&buf)
hb := &hashBuffer{Hash: sha256.New()}
v := addressableValue{reflect.ValueOf(&m).Elem()}
ti := getTypeInfo(v.Type())
h := &hasher{bw: bw}
h := new(hasher)
h.Hash.H = hb
for i := 0; i < b.N; i++ {
buf.Reset()
bw.Reset(&buf)
h.Reset()
h.hashMap(v, ti, false)
}
}
@ -874,3 +869,19 @@ func BenchmarkHashArray(b *testing.B) {
sink = Hash(x)
}
}
// hashBuffer is a hash.Hash that buffers all written data.
type hashBuffer struct {
hash.Hash
B []byte
}
func (h *hashBuffer) Write(b []byte) (int, error) {
n, err := h.Hash.Write(b)
h.B = append(h.B, b[:n]...)
return n, err
}
func (h *hashBuffer) Reset() {
h.Hash.Reset()
h.B = h.B[:0]
}