diff --git a/go.mod b/go.mod index cf602914109..4f3f2999d9e 100644 --- a/go.mod +++ b/go.mod @@ -100,7 +100,7 @@ require ( github.com/ipfs/go-log/v2 v2.6.0 // indirect github.com/jackpal/go-nat-pmp v1.0.2 // indirect github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect - github.com/klauspost/cpuid/v2 v2.2.10 // indirect + github.com/klauspost/cpuid/v2 v2.3.0 github.com/koron/go-ssdp v0.0.6 // indirect github.com/leodido/go-urn v1.4.0 // indirect github.com/libdns/libdns v0.2.2 // indirect diff --git a/go.sum b/go.sum index fab9e1c2f69..e90b1b1bd84 100644 --- a/go.sum +++ b/go.sum @@ -527,8 +527,8 @@ github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYW github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE= -github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= +github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= +github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg= github.com/klauspost/pgzip v1.0.2-0.20170402124221-0bf5dcad4ada/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/klauspost/reedsolomon v1.11.8 h1:s8RpUW5TK4hjr+djiOpbZJB4ksx+TdYbRH7vHQpwPOY= diff --git a/pkg/bmt/benchmark_test.go b/pkg/bmt/benchmark_test.go index aa6cc1703dd..0fe014bb0dc 100644 --- a/pkg/bmt/benchmark_test.go +++ b/pkg/bmt/benchmark_test.go @@ -29,6 +29,9 @@ func BenchmarkBMT(b *testing.B) { b.Run(fmt.Sprintf("%v_size_%v", "BMT", size), func(b *testing.B) { benchmarkBMT(b, size) }) + b.Run(fmt.Sprintf("%v_size_%v", "BMT_NoSIMD", size), func(b *testing.B) { + benchmarkBMTNoSIMD(b, size) + }) } } @@ -87,7 +90,7 @@ func benchmarkBMT(b *testing.B, n int) { testData := testutil.RandBytesWithSeed(b, 4096, seed) - pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, testSegmentCount, testPoolSize)) + pool := bmt.NewPool(bmt.NewConf(testSegmentCount, testPoolSize)) h := pool.Get() defer pool.Put(h) @@ -106,7 +109,7 @@ func benchmarkPool(b *testing.B, poolsize int) { testData := testutil.RandBytesWithSeed(b, 4096, seed) - pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, testSegmentCount, poolsize)) + pool := bmt.NewPool(bmt.NewConf(testSegmentCount, poolsize)) cycles := 100 b.ReportAllocs() @@ -127,6 +130,25 @@ func benchmarkPool(b *testing.B, poolsize int) { } } +// benchmarks BMT Hasher with SIMD disabled +func benchmarkBMTNoSIMD(b *testing.B, n int) { + b.Helper() + + testData := testutil.RandBytesWithSeed(b, 4096, seed) + + pool := bmt.NewPool(bmt.NewConfNoSIMD(testSegmentCount, testPoolSize)) + h := pool.Get() + defer pool.Put(h) + + b.ReportAllocs() + + for b.Loop() { + if _, err := syncHash(h, testData[:n]); err != nil { + b.Fatalf("seed %d: %v", seed, err) + } + } +} + // benchmarks the reference hasher func benchmarkRefHasher(b *testing.B, n int) { b.Helper() diff --git a/pkg/bmt/bmt.go b/pkg/bmt/bmt.go index ae3c5e95421..b21584a046c 100644 --- a/pkg/bmt/bmt.go +++ b/pkg/bmt/bmt.go @@ -18,40 +18,6 @@ var ( zerosection = make([]byte, 64) ) -// Hasher is a reusable hasher for fixed maximum size chunks representing a BMT -// It reuses a pool of trees for amortised memory allocation and resource control, -// and supports order-agnostic concurrent segment writes and section (double segment) writes -// as well as sequential read and write. -// -// The same hasher instance must not be called concurrently on more than one chunk. -// -// The same hasher instance is synchronously reusable. -// -// Sum gives back the tree to the pool and guaranteed to leave -// the tree and itself in a state reusable for hashing a new chunk. -type Hasher struct { - *Conf // configuration - bmt *tree // prebuilt BMT resource for flowcontrol and proofs - size int // bytes written to Hasher since last Reset() - pos int // index of rightmost currently open segment - result chan []byte // result channel - errc chan error // error channel - span []byte // The span of the data subsumed under the chunk -} - -// NewHasher gives back an instance of a Hasher struct -func NewHasher(hasherFact func() hash.Hash) *Hasher { - conf := NewConf(hasherFact, swarm.BmtBranches, 32) - - return &Hasher{ - Conf: conf, - result: make(chan []byte), - errc: make(chan error, 1), - span: make([]byte, SpanSize), - bmt: newTree(conf.maxSize, conf.depth, conf.hasher), - } -} - // Capacity returns the maximum amount of bytes that will be processed by this hasher implementation. // since BMT assumes a balanced binary tree, capacity it is always a power of 2 func (h *Hasher) Capacity() int { @@ -91,191 +57,12 @@ func (h *Hasher) BlockSize() int { return 2 * h.segmentSize } -// Hash returns the BMT root hash of the buffer and an error -// using Hash presupposes sequential synchronous writes (io.Writer interface). -func (h *Hasher) Hash(b []byte) ([]byte, error) { - if h.size == 0 { - return doHash(h.hasher(), h.span, h.zerohashes[h.depth]) - } - copy(h.bmt.buffer[h.size:], zerosection) - // write the last section with final flag set to true - go h.processSection(h.pos, true) - select { - case result := <-h.result: - return doHash(h.hasher(), h.span, result) - case err := <-h.errc: - return nil, err - } -} - // Sum returns the BMT root hash of the buffer, unsafe version of Hash func (h *Hasher) Sum(b []byte) []byte { s, _ := h.Hash(b) return s } -// Write calls sequentially add to the buffer to be hashed, -// with every full segment calls processSection in a go routine. -func (h *Hasher) Write(b []byte) (int, error) { - l := len(b) - maxVal := h.maxSize - h.size - if l > maxVal { - l = maxVal - } - copy(h.bmt.buffer[h.size:], b) - secsize := 2 * h.segmentSize - from := h.size / secsize - h.size += l - to := h.size / secsize - if l == maxVal { - to-- - } - h.pos = to - for i := from; i < to; i++ { - go h.processSection(i, false) - } - return l, nil -} - -// Reset prepares the Hasher for reuse -func (h *Hasher) Reset() { - h.pos = 0 - h.size = 0 - copy(h.span, zerospan) -} - -// processSection writes the hash of i-th section into level 1 node of the BMT tree. -func (h *Hasher) processSection(i int, final bool) { - secsize := 2 * h.segmentSize - offset := i * secsize - level := 1 - // select the leaf node for the section - n := h.bmt.leaves[i] - isLeft := n.isLeft - hasher := n.hasher - n = n.parent - // hash the section - section, err := doHash(hasher, h.bmt.buffer[offset:offset+secsize]) - if err != nil { - select { - case h.errc <- err: - default: - } - return - } - // write hash into parent node - if final { - // for the last segment use writeFinalNode - h.writeFinalNode(level, n, isLeft, section) - } else { - h.writeNode(n, isLeft, section) - } -} - -// writeNode pushes the data to the node. -// if it is the first of 2 sisters written, the routine terminates. -// if it is the second, it calculates the hash and writes it -// to the parent node recursively. -// since hashing the parent is synchronous the same hasher can be used. -func (h *Hasher) writeNode(n *node, isLeft bool, s []byte) { - var err error - for { - // at the root of the bmt just write the result to the result channel - if n == nil { - h.result <- s - return - } - // otherwise assign child hash to left or right segment - if isLeft { - n.left = s - } else { - n.right = s - } - // the child-thread first arriving will terminate - if n.toggle() { - return - } - // the thread coming second now can be sure both left and right children are written - // so it calculates the hash of left|right and pushes it to the parent - s, err = doHash(n.hasher, n.left, n.right) - if err != nil { - select { - case h.errc <- err: - default: - } - return - } - isLeft = n.isLeft - n = n.parent - } -} - -// writeFinalNode is following the path starting from the final datasegment to the -// BMT root via parents. -// For unbalanced trees it fills in the missing right sister nodes using -// the pool's lookup table for BMT subtree root hashes for all-zero sections. -// Otherwise behaves like `writeNode`. -func (h *Hasher) writeFinalNode(level int, n *node, isLeft bool, s []byte) { - var err error - for { - // at the root of the bmt just write the result to the result channel - if n == nil { - if s != nil { - h.result <- s - } - return - } - var noHash bool - if isLeft { - // coming from left sister branch - // when the final section's path is going via left child node - // we include an all-zero subtree hash for the right level and toggle the node. - n.right = h.zerohashes[level] - if s != nil { - n.left = s - // if a left final node carries a hash, it must be the first (and only thread) - // so the toggle is already in passive state no need no call - // yet thread needs to carry on pushing hash to parent - noHash = false - } else { - // if again first thread then propagate nil and calculate no hash - noHash = n.toggle() - } - } else { - // right sister branch - if s != nil { - // if hash was pushed from right child node, write right segment change state - n.right = s - // if toggle is true, we arrived first so no hashing just push nil to parent - noHash = n.toggle() - } else { - // if s is nil, then thread arrived first at previous node and here there will be two, - // so no need to do anything and keep s = nil for parent - noHash = true - } - } - // the child-thread first arriving will just continue resetting s to nil - // the second thread now can be sure both left and right children are written - // it calculates the hash of left|right and pushes it to the parent - if noHash { - s = nil - } else { - s, err = doHash(n.hasher, n.left, n.right) - if err != nil { - select { - case h.errc <- err: - default: - } - return - } - } - // iterate to parent - isLeft = n.isLeft - n = n.parent - level++ - } -} - // calculates the Keccak256 SHA3 hash of the data func sha3hash(data ...[]byte) ([]byte, error) { return doHash(swarm.NewHasher(), data...) diff --git a/pkg/bmt/bmt_simd.go b/pkg/bmt/bmt_simd.go new file mode 100644 index 00000000000..b19d2245dfd --- /dev/null +++ b/pkg/bmt/bmt_simd.go @@ -0,0 +1,181 @@ +// Copyright 2024 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build linux && amd64 && !purego + +package bmt + +import ( + "github.com/ethersphere/bee/v2/pkg/keccak" +) + +// hashSIMD computes the BMT root hash using SIMD-accelerated Keccak hashing. +// It processes the tree level by level from leaves to root, using batched +// SIMD calls instead of goroutine-per-section. A single thread handles all +// levels since SIMD already provides intra-call parallelism (4-way or 8-way). +func (h *Hasher) hashSIMD() ([]byte, error) { + secsize := 2 * h.segmentSize + bw := h.batchWidth + prefixLen := len(h.prefix) + + // Leaf level: hash each section and write results to parent nodes. + // Single-threaded: SIMD batching (4 or 8 hashes per call) replaces goroutine parallelism. + h.hashLeavesBatch(0, len(h.bmt.levels[0]), bw, secsize, prefixLen) + + // Internal levels: process each level single-threaded (diminishing work). + for lvl := 1; lvl < len(h.bmt.levels)-1; lvl++ { + h.hashNodesBatch(h.bmt.levels[lvl], bw, prefixLen) + } + + // Root level: hash using scalar hasher. + root := h.bmt.levels[len(h.bmt.levels)-1][0] + return doHash(root.hasher, root.left, root.right) +} + +// hashLeavesBatch hashes leaf sections in the range [start, end) using SIMD batches. +func (h *Hasher) hashLeavesBatch(start, end, bw, secsize, prefixLen int) { + buf := h.bmt.buffer + + if bw == 8 { + var inputs [8][]byte + for i := start; i < end; i += 8 { + batch := 8 + if i+batch > end { + batch = end - i + } + for j := 0; j < batch; j++ { + offset := (i + j) * secsize + if prefixLen > 0 { + copy(h.bmt.leafConcat[j][prefixLen:], buf[offset:offset+secsize]) + inputs[j] = h.bmt.leafConcat[j][:prefixLen+secsize] + } else { + inputs[j] = buf[offset : offset+secsize] + } + } + for j := batch; j < 8; j++ { + inputs[j] = nil + } + var outputs [8]keccak.Hash256 + if h.useSIMD { + outputs = keccak.Sum256x8(inputs) + } else { + outputs = keccak.Sum256x8Scalar(inputs) + } + for j := 0; j < batch; j++ { + leaf := h.bmt.levels[0][i+j] + if leaf.isLeft { + copy(leaf.parent.left, outputs[j][:]) + } else { + copy(leaf.parent.right, outputs[j][:]) + } + } + } + } else { + var inputs [4][]byte + for i := start; i < end; i += 4 { + batch := 4 + if i+batch > end { + batch = end - i + } + for j := 0; j < batch; j++ { + offset := (i + j) * secsize + if prefixLen > 0 { + copy(h.bmt.leafConcat[j][prefixLen:], buf[offset:offset+secsize]) + inputs[j] = h.bmt.leafConcat[j][:prefixLen+secsize] + } else { + inputs[j] = buf[offset : offset+secsize] + } + } + for j := batch; j < 4; j++ { + inputs[j] = nil + } + var outputs [4]keccak.Hash256 + if h.useSIMD { + outputs = keccak.Sum256x4(inputs) + } else { + outputs = keccak.Sum256x4Scalar(inputs) + } + for j := 0; j < batch; j++ { + leaf := h.bmt.levels[0][i+j] + if leaf.isLeft { + copy(leaf.parent.left, outputs[j][:]) + } else { + copy(leaf.parent.right, outputs[j][:]) + } + } + } + } +} + +// hashNodesBatch hashes a level of internal nodes using SIMD batches. +// Each node's left||right (64 bytes) is hashed to produce the input for its parent. +func (h *Hasher) hashNodesBatch(nodes []*node, bw, prefixLen int) { + count := len(nodes) + segSize := h.segmentSize + concat := &h.bmt.concat + + if bw == 8 { + var inputs [8][]byte + for i := 0; i < count; i += 8 { + batch := 8 + if i+batch > count { + batch = count - i + } + for j := 0; j < batch; j++ { + n := nodes[i+j] + copy(concat[j][prefixLen:prefixLen+segSize], n.left) + copy(concat[j][prefixLen+segSize:], n.right) + inputs[j] = concat[j][:prefixLen+2*segSize] + } + for j := batch; j < 8; j++ { + inputs[j] = nil + } + var outputs [8]keccak.Hash256 + if h.useSIMD { + outputs = keccak.Sum256x8(inputs) + } else { + outputs = keccak.Sum256x8Scalar(inputs) + } + for j := 0; j < batch; j++ { + n := nodes[i+j] + if n.isLeft { + copy(n.parent.left, outputs[j][:]) + } else { + copy(n.parent.right, outputs[j][:]) + } + } + } + } else { + var inputs [4][]byte + for i := 0; i < count; i += 4 { + batch := 4 + if i+batch > count { + batch = count - i + } + for j := 0; j < batch; j++ { + n := nodes[i+j] + copy(concat[j][prefixLen:prefixLen+segSize], n.left) + copy(concat[j][prefixLen+segSize:], n.right) + inputs[j] = concat[j][:prefixLen+2*segSize] + } + for j := batch; j < 4; j++ { + inputs[j] = nil + } + var outputs [4]keccak.Hash256 + if h.useSIMD { + outputs = keccak.Sum256x4(inputs) + } else { + outputs = keccak.Sum256x4Scalar(inputs) + } + for j := 0; j < batch; j++ { + n := nodes[i+j] + if n.isLeft { + copy(n.parent.left, outputs[j][:]) + } else { + copy(n.parent.right, outputs[j][:]) + } + } + } + } +} diff --git a/pkg/bmt/bmt_test.go b/pkg/bmt/bmt_test.go index c82fb896a6f..e22538eb3f2 100644 --- a/pkg/bmt/bmt_test.go +++ b/pkg/bmt/bmt_test.go @@ -67,7 +67,7 @@ func TestHasherEmptyData(t *testing.T) { if err != nil { t.Fatal(err) } - pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, count, 1)) + pool := bmt.NewPool(bmt.NewConf(count, 1)) h := pool.Get() resHash, err := syncHash(h, nil) if err != nil { @@ -92,7 +92,7 @@ func TestSyncHasherCorrectness(t *testing.T) { maxValue := count * hashSize var incr int capacity := 1 - pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, count, capacity)) + pool := bmt.NewPool(bmt.NewConf(count, capacity)) for n := 0; n <= maxValue; n += incr { h := pool.Get() incr = 1 + rand.Intn(5) @@ -125,7 +125,7 @@ func TestHasherReuse(t *testing.T) { func testHasherReuse(t *testing.T, poolsize int) { t.Helper() - pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, testSegmentCount, poolsize)) + pool := bmt.NewPool(bmt.NewConf(testSegmentCount, poolsize)) h := pool.Get() defer pool.Put(h) @@ -145,7 +145,7 @@ func TestBMTConcurrentUse(t *testing.T) { t.Parallel() testData := testutil.RandBytesWithSeed(t, 4096, seed) - pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, testSegmentCount, testPoolSize)) + pool := bmt.NewPool(bmt.NewConf(testSegmentCount, testPoolSize)) cycles := 100 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) @@ -178,7 +178,7 @@ func TestBMTWriterBuffers(t *testing.T) { t.Run(fmt.Sprintf("%d_segments", count), func(t *testing.T) { t.Parallel() - pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, count, testPoolSize)) + pool := bmt.NewPool(bmt.NewConf(count, testPoolSize)) h := pool.Get() defer pool.Put(h) @@ -275,7 +275,7 @@ func testHasherCorrectness(h *bmt.Hasher, data []byte, n, count int) (err error) func TestUseSyncAsOrdinaryHasher(t *testing.T) { t.Parallel() - pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, testSegmentCount, testPoolSize)) + pool := bmt.NewPool(bmt.NewConf(testSegmentCount, testPoolSize)) h := pool.Get() defer pool.Put(h) data := []byte("moodbytesmoodbytesmoodbytesmoodbytes") diff --git a/pkg/bmt/export_goroutine_test.go b/pkg/bmt/export_goroutine_test.go new file mode 100644 index 00000000000..26f48b88739 --- /dev/null +++ b/pkg/bmt/export_goroutine_test.go @@ -0,0 +1,13 @@ +// Copyright 2021 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !linux || !amd64 || purego + +package bmt + +// NewConfNoSIMD on the goroutine path just returns a regular Conf +// since there is no SIMD to disable. +func NewConfNoSIMD(segmentCount, capacity int) *Conf { + return NewConf(segmentCount, capacity) +} diff --git a/pkg/bmt/export_simd_test.go b/pkg/bmt/export_simd_test.go new file mode 100644 index 00000000000..f03e52d788f --- /dev/null +++ b/pkg/bmt/export_simd_test.go @@ -0,0 +1,16 @@ +// Copyright 2021 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build linux && amd64 && !purego + +package bmt + +// NewConfNoSIMD creates a Conf identical to NewConf but with SIMD disabled, +// useful for benchmarking the non-SIMD path. +func NewConfNoSIMD(segmentCount, capacity int) *Conf { + c := NewConf(segmentCount, capacity) + c.useSIMD = false + c.batchWidth = 8 // use 8-wide batching with scalar fallback + return c +} diff --git a/pkg/bmt/hasher_goroutine.go b/pkg/bmt/hasher_goroutine.go new file mode 100644 index 00000000000..d18728dfee7 --- /dev/null +++ b/pkg/bmt/hasher_goroutine.go @@ -0,0 +1,234 @@ +// Copyright 2021 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !linux || !amd64 || purego + +package bmt + +import ( + "github.com/ethersphere/bee/v2/pkg/swarm" +) + +// Hasher is a reusable hasher for fixed maximum size chunks representing a BMT. +// This implementation uses goroutine-based concurrent tree traversal. +// +// It reuses a pool of trees for amortised memory allocation and resource control, +// and supports order-agnostic concurrent segment writes and section (double segment) writes +// as well as sequential read and write. +// +// The same hasher instance must not be called concurrently on more than one chunk. +// +// The same hasher instance is synchronously reusable. +// +// Sum gives back the tree to the pool and guaranteed to leave +// the tree and itself in a state reusable for hashing a new chunk. +type Hasher struct { + *Conf // configuration + bmt *tree // prebuilt BMT resource for flowcontrol and proofs + size int // bytes written to Hasher since last Reset() + pos int // index of rightmost currently open segment + result chan []byte // result channel + errc chan error // error channel + span []byte // The span of the data subsumed under the chunk +} + +// NewHasher gives back an instance of a Hasher struct +func NewHasher() *Hasher { + return newHasherWithConf(NewConf(swarm.BmtBranches, 32)) +} + +// NewPrefixHasher gives back an instance of a Hasher struct with the given prefix +// prepended to every hash operation. +func NewPrefixHasher(prefix []byte) *Hasher { + return newHasherWithConf(NewConfWithPrefix(prefix, swarm.BmtBranches, 32)) +} + +func newHasherWithConf(conf *Conf) *Hasher { + return &Hasher{ + Conf: conf, + result: make(chan []byte), + errc: make(chan error, 1), + span: make([]byte, SpanSize), + bmt: newTree(conf.maxSize, conf.depth, conf.hasherFunc), + } +} + +// Write calls sequentially add to the buffer to be hashed, +// with every full segment calls processSection in a go routine. +func (h *Hasher) Write(b []byte) (int, error) { + l := len(b) + maxVal := h.maxSize - h.size + if l > maxVal { + l = maxVal + } + copy(h.bmt.buffer[h.size:], b) + secsize := 2 * h.segmentSize + from := h.size / secsize + h.size += l + to := h.size / secsize + if l == maxVal { + to-- + } + h.pos = to + for i := from; i < to; i++ { + go h.processSection(i, false) + } + return l, nil +} + +// Hash returns the BMT root hash of the buffer and an error +// using Hash presupposes sequential synchronous writes (io.Writer interface). +func (h *Hasher) Hash(b []byte) ([]byte, error) { + if h.size == 0 { + return doHash(h.baseHasher(), h.span, h.zerohashes[h.depth]) + } + copy(h.bmt.buffer[h.size:], zerosection) + // write the last section with final flag set to true + go h.processSection(h.pos, true) + select { + case result := <-h.result: + return doHash(h.baseHasher(), h.span, result) + case err := <-h.errc: + return nil, err + } +} + +// Reset prepares the Hasher for reuse +func (h *Hasher) Reset() { + h.pos = 0 + h.size = 0 + copy(h.span, zerospan) +} + +// processSection writes the hash of i-th section into level 1 node of the BMT tree. +func (h *Hasher) processSection(i int, final bool) { + secsize := 2 * h.segmentSize + offset := i * secsize + level := 1 + // select the leaf node for the section + n := h.bmt.leaves[i] + isLeft := n.isLeft + hasher := n.hasher + n = n.parent + // hash the section + section, err := doHash(hasher, h.bmt.buffer[offset:offset+secsize]) + if err != nil { + select { + case h.errc <- err: + default: + } + return + } + // write hash into parent node + if final { + // for the last segment use writeFinalNode + h.writeFinalNode(level, n, isLeft, section) + } else { + h.writeNode(n, isLeft, section) + } +} + +// writeNode pushes the data to the node. +// if it is the first of 2 sisters written, the routine terminates. +// if it is the second, it calculates the hash and writes it +// to the parent node recursively. +// since hashing the parent is synchronous the same hasher can be used. +func (h *Hasher) writeNode(n *node, isLeft bool, s []byte) { + var err error + for { + // at the root of the bmt just write the result to the result channel + if n == nil { + h.result <- s + return + } + // otherwise assign child hash to left or right segment + if isLeft { + n.left = s + } else { + n.right = s + } + // the child-thread first arriving will terminate + if n.toggle() { + return + } + // the thread coming second now can be sure both left and right children are written + // so it calculates the hash of left|right and pushes it to the parent + s, err = doHash(n.hasher, n.left, n.right) + if err != nil { + select { + case h.errc <- err: + default: + } + return + } + isLeft = n.isLeft + n = n.parent + } +} + +// writeFinalNode is following the path starting from the final datasegment to the +// BMT root via parents. +// For unbalanced trees it fills in the missing right sister nodes using +// the pool's lookup table for BMT subtree root hashes for all-zero sections. +// Otherwise behaves like `writeNode`. +func (h *Hasher) writeFinalNode(level int, n *node, isLeft bool, s []byte) { + var err error + for { + // at the root of the bmt just write the result to the result channel + if n == nil { + if s != nil { + h.result <- s + } + return + } + var noHash bool + if isLeft { + // coming from left sister branch + // when the final section's path is going via left child node + // we include an all-zero subtree hash for the right level and toggle the node. + n.right = h.zerohashes[level] + if s != nil { + n.left = s + // if a left final node carries a hash, it must be the first (and only thread) + // so the toggle is already in passive state no need no call + // yet thread needs to carry on pushing hash to parent + noHash = false + } else { + // if again first thread then propagate nil and calculate no hash + noHash = n.toggle() + } + } else { + // right sister branch + if s != nil { + // if hash was pushed from right child node, write right segment change state + n.right = s + // if toggle is true, we arrived first so no hashing just push nil to parent + noHash = n.toggle() + } else { + // if s is nil, then thread arrived first at previous node and here there will be two, + // so no need to do anything and keep s = nil for parent + noHash = true + } + } + // the child-thread first arriving will just continue resetting s to nil + // the second thread now can be sure both left and right children are written + // it calculates the hash of left|right and pushes it to the parent + if noHash { + s = nil + } else { + s, err = doHash(n.hasher, n.left, n.right) + if err != nil { + select { + case h.errc <- err: + default: + } + return + } + } + // iterate to parent + isLeft = n.isLeft + n = n.parent + level++ + } +} diff --git a/pkg/bmt/hasher_simd.go b/pkg/bmt/hasher_simd.go new file mode 100644 index 00000000000..ef3396482d7 --- /dev/null +++ b/pkg/bmt/hasher_simd.go @@ -0,0 +1,90 @@ +// Copyright 2021 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build linux && amd64 && !purego + +package bmt + +import ( + "github.com/ethersphere/bee/v2/pkg/swarm" +) + +// Hasher is a reusable hasher for fixed maximum size chunks representing a BMT. +// This implementation buffers all data and defers hashing to Hash(), +// using SIMD-accelerated Keccak for multi-level trees. +// +// The same hasher instance must not be called concurrently on more than one chunk. +// +// The same hasher instance is synchronously reusable. +type Hasher struct { + *Conf // configuration + bmt *tree // prebuilt BMT resource for flowcontrol and proofs + size int // bytes written to Hasher since last Reset() + span []byte // The span of the data subsumed under the chunk +} + +// NewHasher gives back an instance of a Hasher struct +func NewHasher() *Hasher { + return newHasherWithConf(NewConf(swarm.BmtBranches, 32)) +} + +// NewPrefixHasher gives back an instance of a Hasher struct with the given prefix +// prepended to every hash operation. +func NewPrefixHasher(prefix []byte) *Hasher { + return newHasherWithConf(NewConfWithPrefix(prefix, swarm.BmtBranches, 32)) +} + +func newHasherWithConf(conf *Conf) *Hasher { + return &Hasher{ + Conf: conf, + span: make([]byte, SpanSize), + bmt: newTree(conf.maxSize, conf.depth, conf.baseHasher, conf.prefix), + } +} + +// Write calls sequentially add to the buffer to be hashed. +// All hashing is deferred to Hash(). +func (h *Hasher) Write(b []byte) (int, error) { + l := len(b) + maxVal := h.maxSize - h.size + if l > maxVal { + l = maxVal + } + copy(h.bmt.buffer[h.size:], b) + h.size += l + return l, nil +} + +// Hash returns the BMT root hash of the buffer and an error +// using Hash presupposes sequential synchronous writes (io.Writer interface). +func (h *Hasher) Hash(b []byte) ([]byte, error) { + if h.size == 0 { + return doHash(h.baseHasher(), h.span, h.zerohashes[h.depth]) + } + // zero-fill remainder so all sections have deterministic input + for i := h.size; i < h.maxSize; i++ { + h.bmt.buffer[i] = 0 + } + if len(h.bmt.levels) == 1 { + // single-level tree: hash the only section directly + secsize := 2 * h.segmentSize + root := h.bmt.levels[0][0] + rootHash, err := doHash(root.hasher, h.bmt.buffer[:secsize]) + if err != nil { + return nil, err + } + return doHash(h.baseHasher(), h.span, rootHash) + } + rootHash, err := h.hashSIMD() + if err != nil { + return nil, err + } + return doHash(h.baseHasher(), h.span, rootHash) +} + +// Reset prepares the Hasher for reuse +func (h *Hasher) Reset() { + h.size = 0 + copy(h.span, zerospan) +} diff --git a/pkg/bmt/pool.go b/pkg/bmt/pool_goroutine.go similarity index 71% rename from pkg/bmt/pool.go rename to pkg/bmt/pool_goroutine.go index a7f7245c40d..9a325ee57a9 100644 --- a/pkg/bmt/pool.go +++ b/pkg/bmt/pool_goroutine.go @@ -2,26 +2,37 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !linux || !amd64 || purego + package bmt import ( "hash" "sync/atomic" + + "github.com/ethersphere/bee/v2/pkg/swarm" ) -// BaseHasherFunc is a hash.Hash constructor function used for the base hash of the BMT. -// implemented by Keccak256 SHA3 sha3.NewLegacyKeccak256 -type BaseHasherFunc func() hash.Hash +const SEGMENT_SIZE = 32 // configuration type Conf struct { - segmentSize int // size of leaf segments, stipulated to be = hash size - segmentCount int // the number of segments on the base level of the BMT - capacity int // pool capacity, controls concurrency - depth int // depth of the bmt trees = int(log2(segmentCount))+1 - maxSize int // the total length of the data (count * size) - zerohashes [][]byte // lookup table for predictable padding subtrees for all levels - hasher BaseHasherFunc // base hasher to use for the BMT levels + segmentSize int // size of leaf segments, stipulated to be = hash size + segmentCount int // the number of segments on the base level of the BMT + capacity int // pool capacity, controls concurrency + depth int // depth of the bmt trees = int(log2(segmentCount))+1 + maxSize int // the total length of the data (count * size) + zerohashes [][]byte // lookup table for predictable padding subtrees for all levels + prefix []byte // optional prefix prepended to every hash operation + hasherFunc func() hash.Hash +} + +// baseHasher returns a new base hasher instance, optionally with prefix. +func (c *Conf) baseHasher() hash.Hash { + if len(c.prefix) > 0 { + return swarm.NewPrefixHasher(c.prefix) + } + return swarm.NewHasher() } // Pool provides a pool of trees used as resources by the BMT Hasher. @@ -32,29 +43,49 @@ type Pool struct { *Conf // configuration } -func NewConf(hasher BaseHasherFunc, segmentCount, capacity int) *Conf { +func NewConf(segmentCount, capacity int) *Conf { + return newConf(nil, segmentCount, capacity) +} + +func NewConfWithPrefix(prefix []byte, segmentCount, capacity int) *Conf { + return newConf(prefix, segmentCount, capacity) +} + +func newConf(prefix []byte, segmentCount, capacity int) *Conf { count, depth := sizeToParams(segmentCount) - segmentSize := hasher().Size() + segmentSize := SEGMENT_SIZE + + hasherFunc := func() hash.Hash { + if len(prefix) > 0 { + return swarm.NewPrefixHasher(prefix) + } + return swarm.NewHasher() + } + + c := &Conf{ + segmentSize: segmentSize, + segmentCount: segmentCount, + capacity: capacity, + maxSize: count * segmentSize, + depth: depth, + prefix: prefix, + hasherFunc: hasherFunc, + } + zerohashes := make([][]byte, depth+1) zeros := make([]byte, segmentSize) zerohashes[0] = zeros var err error // initialises the zerohashes lookup table for i := 1; i < depth+1; i++ { - if zeros, err = doHash(hasher(), zeros, zeros); err != nil { + if zeros, err = doHash(c.baseHasher(), zeros, zeros); err != nil { panic(err.Error()) } zerohashes[i] = zeros } - return &Conf{ - hasher: hasher, - segmentSize: segmentSize, - segmentCount: segmentCount, - capacity: capacity, - maxSize: count * segmentSize, - depth: depth, - zerohashes: zerohashes, - } + c.zerohashes = zerohashes + + return c } // NewPool creates a tree pool with hasher, segment size, segment count and capacity @@ -65,7 +96,7 @@ func NewPool(c *Conf) *Pool { c: make(chan *tree, c.capacity), } for i := 0; i < c.capacity; i++ { - p.c <- newTree(p.maxSize, p.depth, p.hasher) + p.c <- newTree(p.maxSize, p.depth, c.hasherFunc) } return p } diff --git a/pkg/bmt/pool_simd.go b/pkg/bmt/pool_simd.go new file mode 100644 index 00000000000..78c7061c6ac --- /dev/null +++ b/pkg/bmt/pool_simd.go @@ -0,0 +1,209 @@ +// Copyright 2021 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build linux && amd64 && !purego + +package bmt + +import ( + "hash" + + "github.com/ethersphere/bee/v2/pkg/keccak" + "github.com/ethersphere/bee/v2/pkg/swarm" +) + +const SEGMENT_SIZE = 32 + +// configuration +type Conf struct { + segmentSize int // size of leaf segments, stipulated to be = hash size + segmentCount int // the number of segments on the base level of the BMT + capacity int // pool capacity, controls concurrency + depth int // depth of the bmt trees = int(log2(segmentCount))+1 + maxSize int // the total length of the data (count * size) + zerohashes [][]byte // lookup table for predictable padding subtrees for all levels + prefix []byte // optional prefix prepended to every hash operation + useSIMD bool // whether SIMD keccak is available + batchWidth int // 4 (AVX2), 8 (AVX-512), or 0 +} + +// baseHasher returns a new base hasher instance, optionally with prefix. +func (c *Conf) baseHasher() hash.Hash { + if len(c.prefix) > 0 { + return swarm.NewPrefixHasher(c.prefix) + } + return swarm.NewHasher() +} + +// Pool provides a pool of trees used as resources by the BMT Hasher. +// A tree popped from the pool is guaranteed to have a clean state ready +// for hashing a new chunk. +type Pool struct { + c chan *tree // the channel to obtain a resource from the pool + *Conf // configuration +} + +func NewConf(segmentCount, capacity int) *Conf { + return newConf(nil, segmentCount, capacity) +} + +func NewConfWithPrefix(prefix []byte, segmentCount, capacity int) *Conf { + return newConf(prefix, segmentCount, capacity) +} + +func newConf(prefix []byte, segmentCount, capacity int) *Conf { + count, depth := sizeToParams(segmentCount) + segmentSize := SEGMENT_SIZE + + c := &Conf{ + segmentSize: segmentSize, + segmentCount: segmentCount, + capacity: capacity, + maxSize: count * segmentSize, + depth: depth, + prefix: prefix, + useSIMD: keccak.HasSIMD(), + } + + bw := keccak.BatchWidth() + if bw == 0 { + bw = 8 // use 4-wide batching with scalar fallback + } + c.batchWidth = bw + + zerohashes := make([][]byte, depth+1) + zeros := make([]byte, segmentSize) + zerohashes[0] = zeros + var err error + // initialises the zerohashes lookup table + for i := 1; i < depth+1; i++ { + if zeros, err = doHash(c.baseHasher(), zeros, zeros); err != nil { + panic(err.Error()) + } + zerohashes[i] = zeros + } + c.zerohashes = zerohashes + + return c +} + +// NewPool creates a tree pool with hasher, segment size, segment count and capacity +// it reuses free trees or creates a new one if capacity is not reached. +func NewPool(c *Conf) *Pool { + p := &Pool{ + Conf: c, + c: make(chan *tree, c.capacity), + } + for i := 0; i < c.capacity; i++ { + p.c <- newTree(p.maxSize, p.depth, c.baseHasher, c.prefix) + } + return p +} + +// Get returns a BMT hasher possibly reusing a tree from the pool +func (p *Pool) Get() *Hasher { + t := <-p.c + return &Hasher{ + Conf: p.Conf, + span: make([]byte, SpanSize), + bmt: t, + } +} + +// Put is called after using a bmt hasher to return the tree to a pool for reuse +func (p *Pool) Put(h *Hasher) { + p.c <- h.bmt +} + +// tree is a reusable control structure representing a BMT +// organised in a binary tree +// +// Hasher uses a Pool to obtain a tree for each chunk hash +// the tree is 'locked' while not in the pool. +type tree struct { + leaves []*node // leaf nodes of the tree, other nodes accessible via parent links + levels [][]*node // levels[0]=leaves, levels[1]=parents of leaves, ..., levels[depth-1]=root + buffer []byte + concat [8][]byte // reusable concat buffers for SIMD node hashing + leafConcat [8][]byte // reusable concat buffers for SIMD leaf hashing +} + +// node is a reusable segment hasher representing a node in a BMT. +type node struct { + isLeft bool // whether it is left side of the parent double segment + parent *node // pointer to parent node in the BMT + left, right []byte // this is where the two children sections are written + hasher hash.Hash // preconstructed hasher on nodes +} + +// newNode constructs a segment hasher node in the BMT (used by newTree). +func newNode(index int, parent *node, hasher hash.Hash) *node { + return &node{ + parent: parent, + isLeft: index%2 == 0, + hasher: hasher, + left: make([]byte, hasher.Size()), + right: make([]byte, hasher.Size()), + } +} + +// newTree initialises a tree by building up the nodes of a BMT +func newTree(maxsize, depth int, hashfunc func() hash.Hash, prefix []byte) *tree { + prefixLen := len(prefix) + n := newNode(0, nil, hashfunc()) + prevlevel := []*node{n} + // collect levels top-down during construction, then reverse + allLevels := [][]*node{prevlevel} + // iterate over levels and creates 2^(depth-level) nodes + // the 0 level is on double segment sections so we start at depth - 2 + count := 2 + for level := depth - 2; level >= 0; level-- { + nodes := make([]*node, count) + for i := 0; i < count; i++ { + parent := prevlevel[i/2] + nodes[i] = newNode(i, parent, hashfunc()) + } + allLevels = append(allLevels, nodes) + prevlevel = nodes + count *= 2 + } + // reverse so levels[0]=leaves, levels[len-1]=root + for i, j := 0, len(allLevels)-1; i < j; i, j = i+1, j-1 { + allLevels[i], allLevels[j] = allLevels[j], allLevels[i] + } + // pre-allocate concat buffers for SIMD hashing (with space for optional prefix) + segSize := hashfunc().Size() + bufSize := prefixLen + 2*segSize + var concat [8][]byte + for i := range concat { + concat[i] = make([]byte, bufSize) + if prefixLen > 0 { + copy(concat[i][:prefixLen], prefix) + } + } + var leafConcat [8][]byte + for i := range leafConcat { + leafConcat[i] = make([]byte, prefixLen+2*segSize) + if prefixLen > 0 { + copy(leafConcat[i][:prefixLen], prefix) + } + } + // the datanode level is the nodes on the last level + return &tree{ + leaves: prevlevel, + levels: allLevels, + buffer: make([]byte, maxsize), + concat: concat, + leafConcat: leafConcat, + } +} + +// sizeToParams calculates the depth (number of levels) and segment count in the BMT tree. +func sizeToParams(n int) (c, d int) { + c = 2 + for ; c < n; c *= 2 { + d++ + } + return c, d + 1 +} diff --git a/pkg/bmt/proof.go b/pkg/bmt/proof.go index b08017b43cd..4b1eda7351f 100644 --- a/pkg/bmt/proof.go +++ b/pkg/bmt/proof.go @@ -17,7 +17,8 @@ type Proof struct { Index int } -// Hash overrides base hash function of Hasher to fill buffer with zeros until chunk length +// Hash overrides base hash function of Hasher to fill buffer with zeros until chunk length. +// It always pads with zero sections so that the proof tree is fully populated. func (p Prover) Hash(b []byte) ([]byte, error) { for i := p.size; i < p.maxSize; i += len(zerosection) { _, err := p.Write(zerosection) @@ -67,7 +68,7 @@ func (p Prover) Verify(i int, proof Proof) (root []byte, err error) { } i = i / 2 n := p.bmt.leaves[i] - hasher := p.hasher() + hasher := p.baseHasher() isLeft := n.isLeft root, err = doHash(hasher, section) if err != nil { diff --git a/pkg/bmt/proof_test.go b/pkg/bmt/proof_test.go index 5b3625acefb..edf91c192a7 100644 --- a/pkg/bmt/proof_test.go +++ b/pkg/bmt/proof_test.go @@ -46,7 +46,7 @@ func TestProofCorrectness(t *testing.T) { } } - pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, 128, 128)) + pool := bmt.NewPool(bmt.NewConf(128, 128)) hh := pool.Get() t.Cleanup(func() { pool.Put(hh) @@ -191,7 +191,7 @@ func TestProof(t *testing.T) { t.Fatal(err) } - pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, 128, 128)) + pool := bmt.NewPool(bmt.NewConf(128, 128)) hh := pool.Get() t.Cleanup(func() { pool.Put(hh) diff --git a/pkg/bmtpool/bmtpool.go b/pkg/bmtpool/bmtpool.go index 88c1ad32dba..06bdcaa04b4 100644 --- a/pkg/bmtpool/bmtpool.go +++ b/pkg/bmtpool/bmtpool.go @@ -17,7 +17,7 @@ var instance *bmt.Pool // nolint:gochecknoinits func init() { - instance = bmt.NewPool(bmt.NewConf(swarm.NewHasher, swarm.BmtBranches, Capacity)) + instance = bmt.NewPool(bmt.NewConf(swarm.BmtBranches, Capacity)) } // Get a bmt Hasher instance. diff --git a/pkg/keccak/keccak.go b/pkg/keccak/keccak.go new file mode 100644 index 00000000000..24b3ba58727 --- /dev/null +++ b/pkg/keccak/keccak.go @@ -0,0 +1,149 @@ +// Copyright 2024 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package keccak provides legacy Keccak-256 (Ethereum-compatible) hashing +// with SIMD acceleration via XKCP. +// +// On amd64, the package automatically selects between AVX-512 (8-way parallel) +// and AVX2 (4-way parallel) based on the CPU's capabilities. +package keccak + +import ( + "encoding/hex" + "sync" + + "golang.org/x/crypto/sha3" +) + +// Hash256 represents a 32-byte Keccak-256 hash +type Hash256 [32]byte + +// HexString returns the hash as a hexadecimal string +func (h Hash256) HexString() string { + return hex.EncodeToString(h[:]) +} + +// HasAVX512 reports whether the CPU supports AVX-512 (F + VL) and the +// AVX-512 code path is available. +func HasAVX512() bool { + return hasAVX512 +} + +// HasSIMD reports whether any SIMD-accelerated Keccak path is available +// (AVX2 or AVX-512). +func HasSIMD() bool { + return hasAVX2 +} + +// BatchWidth returns the SIMD batch width: 8 for AVX-512, 4 for AVX2, or 0 +// if no SIMD acceleration is available. +func BatchWidth() int { + if hasAVX512 { + return 8 + } + if hasAVX2 { + return 4 + } + return 0 +} + +// Sum256 computes a single Keccak-256 hash (legacy, Ethereum-compatible). +// Uses the best available implementation. +func Sum256(data []byte) Hash256 { + return sum256Scalar(data) +} + +// Sum256x4 computes 4 Keccak-256 hashes in parallel using AVX2. +// Falls back to scalar if AVX2 is not available. +func Sum256x4(inputs [4][]byte) [4]Hash256 { + if !hasAVX2 { + return sum256x4Scalar(inputs) + } + var outputs [4]Hash256 + var inputsCopy [4][]byte + copy(inputsCopy[:], inputs[:]) + keccak256x4(&inputsCopy, &outputs) + return outputs +} + +// Sum256x4Scalar computes 4 Keccak-256 hashes using the scalar path, +// bypassing SIMD detection. Used by callers that explicitly want non-SIMD. +func Sum256x4Scalar(inputs [4][]byte) [4]Hash256 { + return sum256x4Scalar(inputs) +} + +// Sum256x8 computes 8 Keccak-256 hashes in parallel using AVX-512. +// Falls back to scalar if AVX-512 is not available. +func Sum256x8(inputs [8][]byte) [8]Hash256 { + if !hasAVX512 { + return sum256x8Scalar(inputs) + } + var outputs [8]Hash256 + var inputsCopy [8][]byte + copy(inputsCopy[:], inputs[:]) + keccak256x8(&inputsCopy, &outputs) + return outputs +} + +// Sum256x8Scalar computes 8 Keccak-256 hashes using the scalar path, +// bypassing SIMD detection. Used by callers that explicitly want non-SIMD. +func Sum256x8Scalar(inputs [8][]byte) [8]Hash256 { + return sum256x8Scalar(inputs) +} + +func sum256Scalar(data []byte) Hash256 { + var out Hash256 + h := sha3.NewLegacyKeccak256() + h.Write(data) + copy(out[:], h.Sum(nil)) + return out +} + +func sum256x4Scalar(inputs [4][]byte) [4]Hash256 { + var outputs [4]Hash256 + var wg sync.WaitGroup + var mu sync.Mutex + for i := 0; i < 4; i++ { + if inputs[i] == nil { + continue + } + wg.Add(1) + + go func() { + defer wg.Done() + h := sha3.NewLegacyKeccak256() + h.Write(inputs[i]) + result := h.Sum(nil) + mu.Lock() + copy(outputs[i][:], result) + mu.Unlock() + }() + } + wg.Wait() + return outputs +} + +func sum256x8Scalar(inputs [8][]byte) [8]Hash256 { + var outputs [8]Hash256 + var wg sync.WaitGroup + var mu sync.Mutex + + for i := 0; i < 8; i++ { + if inputs[i] == nil { + continue + } + wg.Add(1) + go func() { + defer wg.Done() + h := sha3.NewLegacyKeccak256() + h.Write(inputs[i]) + result := h.Sum(nil) + mu.Lock() + copy(outputs[i][:], result) + mu.Unlock() + }() + } + wg.Wait() + return outputs +} diff --git a/pkg/keccak/keccak_amd64.go b/pkg/keccak/keccak_amd64.go new file mode 100644 index 00000000000..2894b54080c --- /dev/null +++ b/pkg/keccak/keccak_amd64.go @@ -0,0 +1,9 @@ +//go:build linux && amd64 && !purego + +package keccak + +//go:noescape +func keccak256x4(inputs *[4][]byte, outputs *[4]Hash256) + +//go:noescape +func keccak256x8(inputs *[8][]byte, outputs *[8]Hash256) diff --git a/pkg/keccak/keccak_cpu.go b/pkg/keccak/keccak_cpu.go new file mode 100644 index 00000000000..7ee62db10e8 --- /dev/null +++ b/pkg/keccak/keccak_cpu.go @@ -0,0 +1,14 @@ +// Copyright 2024 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build linux && amd64 && !purego + +package keccak + +import cpuid "github.com/klauspost/cpuid/v2" + +var ( + hasAVX2 = cpuid.CPU.Supports(cpuid.AVX2) + hasAVX512 = cpuid.CPU.Supports(cpuid.AVX512F, cpuid.AVX512VL) +) diff --git a/pkg/keccak/keccak_cpu_other.go b/pkg/keccak/keccak_cpu_other.go new file mode 100644 index 00000000000..0e0966aad73 --- /dev/null +++ b/pkg/keccak/keccak_cpu_other.go @@ -0,0 +1,14 @@ +// Copyright 2024 The Swarm Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !linux || !amd64 || purego + +package keccak + +// No SIMD Keccak implementations are available on this platform; +// Sum256x4/Sum256x8 will fall back to the scalar goroutine path. +var ( + hasAVX2 = false + hasAVX512 = false +) diff --git a/pkg/keccak/keccak_other.go b/pkg/keccak/keccak_other.go new file mode 100644 index 00000000000..37e5b29caac --- /dev/null +++ b/pkg/keccak/keccak_other.go @@ -0,0 +1,11 @@ +//go:build !linux || !amd64 || purego + +package keccak + +func keccak256x4(_ *[4][]byte, _ *[4]Hash256) { + panic("keccak: SIMD not available on this platform") +} + +func keccak256x8(_ *[8][]byte, _ *[8]Hash256) { + panic("keccak: SIMD not available on this platform") +} diff --git a/pkg/keccak/keccak_times4_amd64.s b/pkg/keccak/keccak_times4_amd64.s new file mode 100644 index 00000000000..6d6f385f45f --- /dev/null +++ b/pkg/keccak/keccak_times4_amd64.s @@ -0,0 +1,11 @@ +//go:build linux && amd64 && !purego + +#include "textflag.h" + +// func keccak256x4(inputs *[4][]byte, outputs *[4]Hash256) +TEXT ·keccak256x4(SB), $16384-16 + MOVQ inputs+0(FP), DI + MOVQ outputs+8(FP), SI + CALL go_keccak256x4(SB) + VZEROUPPER + RET diff --git a/pkg/keccak/keccak_times4_linux_amd64.syso b/pkg/keccak/keccak_times4_linux_amd64.syso new file mode 100644 index 00000000000..06c7bc1db5f Binary files /dev/null and b/pkg/keccak/keccak_times4_linux_amd64.syso differ diff --git a/pkg/keccak/keccak_times8_amd64.s b/pkg/keccak/keccak_times8_amd64.s new file mode 100644 index 00000000000..8108924205a --- /dev/null +++ b/pkg/keccak/keccak_times8_amd64.s @@ -0,0 +1,13 @@ +//go:build linux && amd64 && !purego + +#include "textflag.h" + +// func keccak256x8(inputs *[8][]byte, outputs *[8]Hash256) +// Frame size 16384: AVX-512 state is larger (25 x 64 bytes = 1600 bytes) and +// the permutation uses more stack. Generous headroom provided. +TEXT ·keccak256x8(SB), $16384-16 + MOVQ inputs+0(FP), DI + MOVQ outputs+8(FP), SI + CALL go_keccak256x8(SB) + VZEROUPPER + RET diff --git a/pkg/keccak/keccak_times8_linux_amd64.syso b/pkg/keccak/keccak_times8_linux_amd64.syso new file mode 100644 index 00000000000..5d9f9fa77c2 Binary files /dev/null and b/pkg/keccak/keccak_times8_linux_amd64.syso differ diff --git a/pkg/storageincentives/proof.go b/pkg/storageincentives/proof.go index 86dd115c67e..b26f5dc54a5 100644 --- a/pkg/storageincentives/proof.go +++ b/pkg/storageincentives/proof.go @@ -7,7 +7,6 @@ package storageincentives import ( "errors" "fmt" - "hash" "math/big" "github.com/ethersphere/bee/v2/pkg/bmt" @@ -55,10 +54,7 @@ func makeInclusionProofs( require2++ } - prefixHasherFactory := func() hash.Hash { - return swarm.NewPrefixHasher(anchor1) - } - prefixHasherPool := bmt.NewPool(bmt.NewConf(prefixHasherFactory, swarm.BmtBranches, 8)) + prefixHasherPool := bmt.NewPool(bmt.NewConfWithPrefix(anchor1, swarm.BmtBranches, 8)) // Sample chunk proofs rccontent := bmt.Prover{Hasher: bmtpool.Get()} diff --git a/pkg/storageincentives/soc_mine_test.go b/pkg/storageincentives/soc_mine_test.go index 0265a9a21f7..b3d71077165 100644 --- a/pkg/storageincentives/soc_mine_test.go +++ b/pkg/storageincentives/soc_mine_test.go @@ -9,12 +9,10 @@ import ( "encoding/binary" "encoding/hex" "fmt" - "hash" "math/big" "os" "sync" "testing" - "testing/synctest" "github.com/ethersphere/bee/v2/pkg/bmt" "github.com/ethersphere/bee/v2/pkg/cac" @@ -33,59 +31,55 @@ import ( // to generate uploads using the input // cat socs.txt | tail 19 | head 16 | perl -pne 's/([a-f0-9]+)\t([a-f0-9]+)\t([a-f0-9]+)\t([a-f0-9]+)/echo -n $4 | xxd -r -p | curl -X POST \"http:\/\/localhost:1633\/soc\/$1\/$2?sig=$3\" -H \"accept: application\/json, text\/plain, \/\" -H \"content-type: application\/octet-stream\" -H \"swarm-postage-batch-id: 14b26beca257e763609143c6b04c2c487f01a051798c535c2f542ce75a97c05f\" --data-binary \@-/' func TestSocMine(t *testing.T) { - synctest.Test(t, func(t *testing.T) { - // the anchor used in neighbourhood selection and reserve salt for sampling - prefix, err := hex.DecodeString("3617319a054d772f909f7c479a2cebe5066e836a939412e32403c99029b92eff") - if err != nil { - t.Fatal(err) - } - // the transformed address hasher factory function - prefixhasher := func() hash.Hash { return swarm.NewPrefixHasher(prefix) } - // Create a pool for efficient hasher reuse - trHasherPool := bmt.NewPool(bmt.NewConf(prefixhasher, swarm.BmtBranches, 8)) - // the bignum cast of the maximum sample value (upper bound on transformed addresses as a 256-bit article) - // this constant is for a minimum reserve size of 2 million chunks with sample size of 16 - // = 1.284401 * 10^71 = 1284401 + 66 0-s - mstring := "1284401" - for range 66 { - mstring = mstring + "0" - } - n, ok := new(big.Int).SetString(mstring, 10) - if !ok { - t.Fatalf("SetString: error setting to '%s'", mstring) - } - // the filter function on the SOC address - // meant to make sure we pass check for proof of retrievability for - // a node of overlay 0x65xxx with a reserve depth of 1, i.e., - // SOC address must start with zero bit - filterSOCAddr := func(a swarm.Address) bool { - return a.Bytes()[0]&0x80 != 0x00 - } - // the filter function on the transformed address using the density estimation constant - filterTrAddr := func(a swarm.Address) (bool, error) { - m := new(big.Int).SetBytes(a.Bytes()) - return m.Cmp(n) < 0, nil - } - // setup the signer with a private key from a fixture - data, err := hex.DecodeString("634fb5a872396d9693e5c9f9d7233cfa93f395c093371017ff44aa9ae6564cdd") - if err != nil { - t.Fatal(err) - } - privKey, err := crypto.DecodeSecp256k1PrivateKey(data) - if err != nil { - t.Fatal(err) - } - signer := crypto.NewDefaultSigner(privKey) + // the anchor used in neighbourhood selection and reserve salt for sampling + prefix, err := hex.DecodeString("3617319a054d772f909f7c479a2cebe5066e836a939412e32403c99029b92eff") + if err != nil { + t.Fatal(err) + } + // Create a pool for efficient hasher reuse + trHasherPool := bmt.NewPool(bmt.NewConfWithPrefix(prefix, swarm.BmtBranches, 8)) + // the bignum cast of the maximum sample value (upper bound on transformed addresses as a 256-bit article) + // this constant is for a minimum reserve size of 2 million chunks with sample size of 16 + // = 1.284401 * 10^71 = 1284401 + 66 0-s + mstring := "1284401" + for range 66 { + mstring = mstring + "0" + } + n, ok := new(big.Int).SetString(mstring, 10) + if !ok { + t.Fatalf("SetString: error setting to '%s'", mstring) + } + // the filter function on the SOC address + // meant to make sure we pass check for proof of retrievability for + // a node of overlay 0x65xxx with a reserve depth of 1, i.e., + // SOC address must start with zero bit + filterSOCAddr := func(a swarm.Address) bool { + return a.Bytes()[0]&0x80 != 0x00 + } + // the filter function on the transformed address using the density estimation constant + filterTrAddr := func(a swarm.Address) (bool, error) { + m := new(big.Int).SetBytes(a.Bytes()) + return m.Cmp(n) < 0, nil + } + // setup the signer with a private key from a fixture + data, err := hex.DecodeString("634fb5a872396d9693e5c9f9d7233cfa93f395c093371017ff44aa9ae6564cdd") + if err != nil { + t.Fatal(err) + } + privKey, err := crypto.DecodeSecp256k1PrivateKey(data) + if err != nil { + t.Fatal(err) + } + signer := crypto.NewDefaultSigner(privKey) - sampleSize := 16 - // for sanity check: given a filterSOCAddr requiring a 0 leading bit (chance of 1/2) - // we expect an overall rough 4 million chunks to be mined to create this sample - // for 8 workers that is half a million round on average per worker - err = makeChunks(t, signer, sampleSize, filterSOCAddr, filterTrAddr, trHasherPool) - if err != nil { - t.Fatal(err) - } - }) + sampleSize := 16 + // for sanity check: given a filterSOCAddr requiring a 0 leading bit (chance of 1/2) + // we expect an overall rough 4 million chunks to be mined to create this sample + // for 8 workers that is half a million round on average per worker + err = makeChunks(t, signer, sampleSize, filterSOCAddr, filterTrAddr, trHasherPool) + if err != nil { + t.Fatal(err) + } } func makeChunks(t *testing.T, signer crypto.Signer, sampleSize int, filterSOCAddr func(swarm.Address) bool, filterTrAddr func(swarm.Address) (bool, error), trHasherPool *bmt.Pool) error { diff --git a/pkg/storer/sample.go b/pkg/storer/sample.go index 43999429300..ae4b88c66a6 100644 --- a/pkg/storer/sample.go +++ b/pkg/storer/sample.go @@ -9,7 +9,6 @@ import ( "context" "encoding/binary" "fmt" - "hash" "math/big" "runtime" "sort" @@ -121,16 +120,12 @@ func (db *DB) ReserveSample( // Phase 2: Get the chunk data and calculate transformed hash sampleItemChan := make(chan SampleItem, 3*workers) - prefixHasherFactory := func() hash.Hash { - return swarm.NewPrefixHasher(anchor) - } - db.logger.Debug("reserve sampler workers", "count", workers) for range workers { g.Go(func() error { wstat := SampleStats{} - hasher := bmt.NewHasher(prefixHasherFactory) + hasher := bmt.NewPrefixHasher(anchor) defer func() { addStats(wstat) }() @@ -407,12 +402,9 @@ func RandSample(t *testing.T, anchor []byte) Sample { // MakeSampleUsingChunks returns Sample constructed using supplied chunks. func MakeSampleUsingChunks(chunks []swarm.Chunk, anchor []byte) (Sample, error) { - prefixHasherFactory := func() hash.Hash { - return swarm.NewPrefixHasher(anchor) - } items := make([]SampleItem, len(chunks)) for i, ch := range chunks { - tr, err := transformedAddress(bmt.NewHasher(prefixHasherFactory), ch, getChunkType(ch)) + tr, err := transformedAddress(bmt.NewPrefixHasher(anchor), ch, getChunkType(ch)) if err != nil { return Sample{}, err }