- Port field operations assembler from libsecp256k1 (field_amd64.s,
field_amd64_bmi2.s) with MULX/ADCX/ADOX instructions
- Add AVX2 scalar and affine point operations in avx/ package
- Implement CPU feature detection (cpufeatures.go) for AVX2/BMI2
- Add libsecp256k1.so via purego for native C library comparison
- Create comprehensive SIMD benchmark suite comparing btcec, P256K1
pure Go, P256K1 ASM, and libsecp256k1
- Add BENCHMARK_SIMD.md documenting performance across implementations
- Remove BtcecSigner, consolidate on P256K1Signer as primary impl
- Add field operation tests and benchmarks (field_asm_test.go,
field_bench_test.go)
- Update GLV endomorphism with wNAF scalar multiplication
- Add scalar assembly (scalar_amd64.s) for optimized operations
- Clean up dependencies and update benchmark reports
785 lines
19 KiB
Go
785 lines
19 KiB
Go
package p256k1
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"unsafe"
|
|
)
|
|
|
|
const (
|
|
// Window sizes for elliptic curve multiplication optimizations
|
|
windowA = 5 // Window size for main scalar (A)
|
|
windowG = 14 // Window size for generator (G) - larger for better performance
|
|
)
|
|
|
|
// EcmultConst computes r = q * a using constant-time multiplication
|
|
// Uses simple binary method
|
|
func EcmultConst(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
|
|
if a.isInfinity() {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
if q.isZero() {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
// Convert affine point to Jacobian
|
|
var aJac GroupElementJacobian
|
|
aJac.setGE(a)
|
|
|
|
// Use simple binary method for constant-time behavior
|
|
r.setInfinity()
|
|
|
|
var base GroupElementJacobian
|
|
base = aJac
|
|
|
|
// Process bits from MSB to LSB
|
|
for i := 0; i < 256; i++ {
|
|
if i > 0 {
|
|
r.double(r)
|
|
}
|
|
|
|
// Get bit i (from MSB)
|
|
bit := q.getBits(uint(255-i), 1)
|
|
if bit != 0 {
|
|
if r.isInfinity() {
|
|
*r = base
|
|
} else {
|
|
r.addVar(r, &base)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ecmultWindowedVar computes r = q * a using optimized windowed multiplication (variable-time)
|
|
// Uses a window size of 6 bits (64 precomputed multiples) for better CPU performance
|
|
// Trades memory (64 entries vs 32) for ~20% faster multiplication
|
|
func ecmultWindowedVar(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
|
|
if a.isInfinity() {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
if q.isZero() {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
const windowSize = 6 // Increased from 5 to 6 for better performance
|
|
const tableSize = 1 << windowSize // 64
|
|
|
|
// Convert point to Jacobian once
|
|
var aJac GroupElementJacobian
|
|
aJac.setGE(a)
|
|
|
|
// Build table efficiently using Jacobian coordinates, only convert to affine at end
|
|
// Store odd multiples in Jacobian form to avoid frequent conversions
|
|
var tableJac [tableSize]GroupElementJacobian
|
|
tableJac[0].setInfinity()
|
|
tableJac[1] = aJac
|
|
|
|
// Build odd multiples efficiently: tableJac[2*i+1] = (2*i+1) * a
|
|
// Start with 3*a = a + 2*a
|
|
var twoA GroupElementJacobian
|
|
twoA.double(&aJac)
|
|
|
|
// Build table: tableJac[i] = tableJac[i-2] + 2*a for odd i
|
|
for i := 3; i < tableSize; i += 2 {
|
|
tableJac[i].addVar(&tableJac[i-2], &twoA)
|
|
}
|
|
|
|
// Build even multiples: tableJac[2*i] = 2 * tableJac[i]
|
|
for i := 1; i < tableSize/2; i++ {
|
|
tableJac[2*i].double(&tableJac[i])
|
|
}
|
|
|
|
// Process scalar in windows of 6 bits from MSB to LSB
|
|
r.setInfinity()
|
|
numWindows := (256 + windowSize - 1) / windowSize // Ceiling division
|
|
|
|
for window := 0; window < numWindows; window++ {
|
|
// Calculate bit offset for this window (MSB first)
|
|
bitOffset := 255 - window*windowSize
|
|
if bitOffset < 0 {
|
|
break
|
|
}
|
|
|
|
// Extract window bits
|
|
actualWindowSize := windowSize
|
|
if bitOffset < windowSize-1 {
|
|
actualWindowSize = bitOffset + 1
|
|
}
|
|
|
|
windowBits := q.getBits(uint(bitOffset-actualWindowSize+1), uint(actualWindowSize))
|
|
|
|
// Double result windowSize times (once per bit position in window)
|
|
if !r.isInfinity() {
|
|
for j := 0; j < actualWindowSize; j++ {
|
|
r.double(r)
|
|
}
|
|
}
|
|
|
|
// Add precomputed point if window is non-zero
|
|
if windowBits != 0 && windowBits < tableSize {
|
|
if r.isInfinity() {
|
|
*r = tableJac[windowBits]
|
|
} else {
|
|
r.addVar(r, &tableJac[windowBits])
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Ecmult computes r = q * a using optimized GLV+Strauss+wNAF multiplication
|
|
// This provides good performance for verification and ECDH operations
|
|
func Ecmult(r *GroupElementJacobian, a *GroupElementJacobian, q *Scalar) {
|
|
if a.isInfinity() {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
if q.isZero() {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
// Convert to affine for GLV multiplication
|
|
var aAff GroupElementAffine
|
|
aAff.setGEJ(a)
|
|
|
|
// Use optimized GLV+Strauss+wNAF multiplication
|
|
ecmultStraussWNAFGLV(r, &aAff, q)
|
|
}
|
|
|
|
// EcmultCombined computes r = na*a + ng*G using optimized algorithms
|
|
// This is more efficient than computing the two multiplications separately
|
|
// when both scalars are non-zero
|
|
func EcmultCombined(r *GroupElementJacobian, a *GroupElementJacobian, na, ng *Scalar) {
|
|
// Handle edge cases
|
|
naZero := na == nil || na.isZero()
|
|
ngZero := ng == nil || ng.isZero()
|
|
aInf := a == nil || a.isInfinity()
|
|
|
|
// If both scalars are zero, result is infinity
|
|
if naZero && ngZero {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
// If na is zero or a is infinity, just compute ng*G
|
|
if naZero || aInf {
|
|
ecmultGenGLV(r, ng)
|
|
return
|
|
}
|
|
|
|
// If ng is zero, just compute na*a
|
|
if ngZero {
|
|
var aAff GroupElementAffine
|
|
aAff.setGEJ(a)
|
|
ecmultStraussWNAFGLV(r, &aAff, na)
|
|
return
|
|
}
|
|
|
|
// Both multiplications needed - compute separately and add
|
|
// TODO: Could optimize further with combined Strauss algorithm
|
|
var naa, ngg GroupElementJacobian
|
|
|
|
var aAff GroupElementAffine
|
|
aAff.setGEJ(a)
|
|
ecmultStraussWNAFGLV(&naa, &aAff, na)
|
|
ecmultGenGLV(&ngg, ng)
|
|
|
|
// Add them together
|
|
r.addVar(&naa, &ngg)
|
|
}
|
|
|
|
// ecmultStraussGLV computes r = q * a using Strauss algorithm with GLV endomorphism
|
|
// This provides significant speedup for both verification and ECDH operations
|
|
func ecmultStraussGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
|
|
if a.isInfinity() {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
if q.isZero() {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
// For now, use simplified Strauss algorithm without GLV endomorphism
|
|
// Convert base point to Jacobian
|
|
var aJac GroupElementJacobian
|
|
aJac.setGE(a)
|
|
|
|
// Compute odd multiples for the scalar
|
|
var preA [1 << (windowA - 1)]GroupElementJacobian
|
|
buildOddMultiples(&preA, &aJac, windowA)
|
|
|
|
// Convert scalar to wNAF representation
|
|
var wnaf [257]int
|
|
bits := q.wNAF(wnaf[:], windowA)
|
|
|
|
// Perform Strauss algorithm
|
|
r.setInfinity()
|
|
|
|
for i := bits - 1; i >= 0; i-- {
|
|
// Double the result
|
|
r.double(r)
|
|
|
|
// Add contribution
|
|
if wnaf[i] != 0 {
|
|
n := wnaf[i]
|
|
var pt GroupElementJacobian
|
|
if n > 0 {
|
|
idx := (n-1)/2
|
|
if idx >= len(preA) {
|
|
panic(fmt.Sprintf("wNAF positive index out of bounds: n=%d, idx=%d, len=%d", n, idx, len(preA)))
|
|
}
|
|
pt = preA[idx]
|
|
} else {
|
|
if (-n-1)/2 >= len(preA) {
|
|
panic("wNAF index out of bounds (negative)")
|
|
}
|
|
pt = preA[(-n-1)/2]
|
|
pt.y.negate(&pt.y, 1)
|
|
}
|
|
r.addVar(r, &pt)
|
|
}
|
|
}
|
|
}
|
|
|
|
// buildOddMultiples builds a table of odd multiples of a point
|
|
// pre[i] = (2*i+1) * a for i = 0 to (1<<(w-1))-1
|
|
func buildOddMultiples(pre *[1 << (windowA - 1)]GroupElementJacobian, a *GroupElementJacobian, w uint) {
|
|
tableSize := 1 << (w - 1)
|
|
|
|
// pre[0] = a (which is 1*a)
|
|
pre[0] = *a
|
|
|
|
if tableSize > 1 {
|
|
// Compute 2*a
|
|
var twoA GroupElementJacobian
|
|
twoA.double(a)
|
|
|
|
// Build odd multiples: pre[i] = pre[i-2] + 2*a for i >= 2, i even
|
|
for i := 2; i < tableSize; i += 2 {
|
|
pre[i].addVar(&pre[i-2], &twoA)
|
|
}
|
|
}
|
|
}
|
|
|
|
// EcmultStraussGLV is the public interface for optimized Strauss+GLV multiplication
|
|
func EcmultStraussGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
|
|
ecmultStraussGLV(r, a, q)
|
|
}
|
|
|
|
// ECDHHashFunction is a function type for hashing ECDH shared secrets
|
|
type ECDHHashFunction func(output []byte, x32 []byte, y32 []byte) bool
|
|
|
|
// ecdhHashFunctionSHA256 implements the default SHA-256 based hash function for ECDH
|
|
// Following the C reference implementation exactly
|
|
func ecdhHashFunctionSHA256(output []byte, x32 []byte, y32 []byte) bool {
|
|
if len(output) != 32 || len(x32) != 32 || len(y32) != 32 {
|
|
return false
|
|
}
|
|
|
|
// Version byte: (y32[31] & 0x01) | 0x02
|
|
version := byte((y32[31] & 0x01) | 0x02)
|
|
|
|
sha := NewSHA256()
|
|
sha.Write([]byte{version})
|
|
sha.Write(x32)
|
|
sha.Finalize(output)
|
|
sha.Clear()
|
|
|
|
return true
|
|
}
|
|
|
|
// ECDH computes an EC Diffie-Hellman shared secret
|
|
// Following the C reference implementation secp256k1_ecdh
|
|
func ECDH(output []byte, pubkey *PublicKey, seckey []byte, hashfp ECDHHashFunction) error {
|
|
if len(output) != 32 {
|
|
return errors.New("output must be 32 bytes")
|
|
}
|
|
if len(seckey) != 32 {
|
|
return errors.New("seckey must be 32 bytes")
|
|
}
|
|
if pubkey == nil {
|
|
return errors.New("pubkey cannot be nil")
|
|
}
|
|
|
|
// Use default hash function if none provided
|
|
if hashfp == nil {
|
|
hashfp = ecdhHashFunctionSHA256
|
|
}
|
|
|
|
// Load public key
|
|
var pt GroupElementAffine
|
|
pt.fromBytes(pubkey.data[:])
|
|
if pt.isInfinity() {
|
|
return errors.New("invalid public key")
|
|
}
|
|
|
|
// Parse scalar
|
|
var s Scalar
|
|
if !s.setB32Seckey(seckey) {
|
|
return errors.New("invalid secret key")
|
|
}
|
|
|
|
// Handle zero scalar
|
|
if s.isZero() {
|
|
return errors.New("secret key cannot be zero")
|
|
}
|
|
|
|
// Compute res = s * pt using optimized windowed multiplication (variable-time)
|
|
// ECDH doesn't require constant-time since the secret key is already known
|
|
var res GroupElementJacobian
|
|
ecmultWindowedVar(&res, &pt, &s)
|
|
|
|
// Convert to affine
|
|
var resAff GroupElementAffine
|
|
resAff.setGEJ(&res)
|
|
resAff.x.normalize()
|
|
resAff.y.normalize()
|
|
|
|
// Extract x and y coordinates
|
|
var x, y [32]byte
|
|
resAff.x.getB32(x[:])
|
|
resAff.y.getB32(y[:])
|
|
|
|
// Compute hash
|
|
success := hashfp(output, x[:], y[:])
|
|
|
|
// Clear sensitive data
|
|
memclear(unsafe.Pointer(&x[0]), 32)
|
|
memclear(unsafe.Pointer(&y[0]), 32)
|
|
s.clear()
|
|
resAff.clear()
|
|
res.clear()
|
|
|
|
if !success {
|
|
return errors.New("hash function failed")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// HKDF performs HMAC-based Key Derivation Function (RFC 5869)
|
|
// Outputs key material of the specified length
|
|
func HKDF(output []byte, ikm []byte, salt []byte, info []byte) error {
|
|
if len(output) == 0 {
|
|
return errors.New("output length must be greater than 0")
|
|
}
|
|
|
|
// Step 1: Extract (if salt is empty, use zeros)
|
|
if len(salt) == 0 {
|
|
salt = make([]byte, 32)
|
|
}
|
|
|
|
// PRK = HMAC-SHA256(salt, IKM)
|
|
var prk [32]byte
|
|
hmac := NewHMACSHA256(salt)
|
|
hmac.Write(ikm)
|
|
hmac.Finalize(prk[:])
|
|
hmac.Clear()
|
|
|
|
// Step 2: Expand
|
|
// Generate output using HKDF-Expand
|
|
// T(0) = empty
|
|
// T(i) = HMAC(PRK, T(i-1) || info || i)
|
|
|
|
outlen := len(output)
|
|
outidx := 0
|
|
|
|
// T(0) is empty
|
|
var t []byte
|
|
|
|
// Generate blocks until we have enough output
|
|
blockNum := byte(1)
|
|
for outidx < outlen {
|
|
// Compute T(i) = HMAC(PRK, T(i-1) || info || i)
|
|
hmac = NewHMACSHA256(prk[:])
|
|
if len(t) > 0 {
|
|
hmac.Write(t)
|
|
}
|
|
if len(info) > 0 {
|
|
hmac.Write(info)
|
|
}
|
|
hmac.Write([]byte{blockNum})
|
|
|
|
var tBlock [32]byte
|
|
hmac.Finalize(tBlock[:])
|
|
hmac.Clear()
|
|
|
|
// Copy to output
|
|
copyLen := len(tBlock)
|
|
if copyLen > outlen-outidx {
|
|
copyLen = outlen - outidx
|
|
}
|
|
copy(output[outidx:outidx+copyLen], tBlock[:copyLen])
|
|
outidx += copyLen
|
|
|
|
// Update T for next iteration
|
|
t = tBlock[:]
|
|
blockNum++
|
|
}
|
|
|
|
// Clear sensitive data
|
|
memclear(unsafe.Pointer(&prk[0]), 32)
|
|
if len(t) > 0 {
|
|
memclear(unsafe.Pointer(&t[0]), uintptr(len(t)))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// ECDHWithHKDF computes ECDH and derives a key using HKDF
|
|
func ECDHWithHKDF(output []byte, pubkey *PublicKey, seckey []byte, salt []byte, info []byte) error {
|
|
// Compute ECDH shared secret
|
|
var sharedSecret [32]byte
|
|
if err := ECDH(sharedSecret[:], pubkey, seckey, nil); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Derive key using HKDF
|
|
err := HKDF(output, sharedSecret[:], salt, info)
|
|
|
|
// Clear shared secret
|
|
memclear(unsafe.Pointer(&sharedSecret[0]), 32)
|
|
|
|
return err
|
|
}
|
|
|
|
// =============================================================================
|
|
// Phase 4: Strauss-GLV Algorithm with wNAF
|
|
// =============================================================================
|
|
|
|
// buildOddMultiplesTableAffine builds a table of odd multiples of a point in affine coordinates
|
|
// pre[i] = (2*i+1) * a for i = 0 to tableSize-1
|
|
// Also returns the precomputed β*x values for λ-transformed lookups
|
|
//
|
|
// The table is built efficiently using:
|
|
// 1. Compute odd multiples in Jacobian: 1*a, 3*a, 5*a, ...
|
|
// 2. Batch normalize all points to affine
|
|
// 3. Precompute β*x for each point for GLV lookups
|
|
//
|
|
// Reference: libsecp256k1 ecmult_impl.h:secp256k1_ecmult_odd_multiples_table
|
|
func buildOddMultiplesTableAffine(preA []GroupElementAffine, preBetaX []FieldElement, a *GroupElementJacobian, tableSize int) {
|
|
if tableSize == 0 {
|
|
return
|
|
}
|
|
|
|
// Build odd multiples in Jacobian coordinates
|
|
preJac := make([]GroupElementJacobian, tableSize)
|
|
|
|
// pre[0] = a (which is 1*a)
|
|
preJac[0] = *a
|
|
|
|
if tableSize > 1 {
|
|
// Compute 2*a
|
|
var twoA GroupElementJacobian
|
|
twoA.double(a)
|
|
|
|
// Build odd multiples: pre[i] = pre[i-1] + 2*a for i >= 1
|
|
for i := 1; i < tableSize; i++ {
|
|
preJac[i].addVar(&preJac[i-1], &twoA)
|
|
}
|
|
}
|
|
|
|
// Batch normalize to affine coordinates
|
|
BatchNormalize(preA, preJac)
|
|
|
|
// Precompute β*x for each point (for λ-transformed lookups)
|
|
for i := 0; i < tableSize; i++ {
|
|
if preA[i].isInfinity() {
|
|
preBetaX[i] = FieldElementZero
|
|
} else {
|
|
preBetaX[i].mul(&preA[i].x, &fieldBeta)
|
|
}
|
|
}
|
|
}
|
|
|
|
// tableGetGE retrieves a point from the table, handling sign
|
|
// n is the wNAF digit (can be negative)
|
|
// Returns pre[(|n|-1)/2], negated if n < 0
|
|
//
|
|
// Reference: libsecp256k1 ecmult_impl.h:ECMULT_TABLE_GET_GE
|
|
func tableGetGE(r *GroupElementAffine, pre []GroupElementAffine, n int) {
|
|
if n == 0 {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
var idx int
|
|
if n > 0 {
|
|
idx = (n - 1) / 2
|
|
} else {
|
|
idx = (-n - 1) / 2
|
|
}
|
|
|
|
if idx >= len(pre) {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
*r = pre[idx]
|
|
|
|
// Negate if n < 0
|
|
if n < 0 {
|
|
r.negate(r)
|
|
}
|
|
}
|
|
|
|
// tableGetGELambda retrieves the λ-transformed point from the table
|
|
// Uses precomputed β*x values for efficiency
|
|
// n is the wNAF digit (can be negative)
|
|
// Returns λ*pre[(|n|-1)/2], negated if n < 0
|
|
//
|
|
// Since λ*(x, y) = (β*x, y), and we precomputed β*x,
|
|
// we just need to use the precomputed β*x instead of x
|
|
//
|
|
// Reference: libsecp256k1 ecmult_impl.h:ECMULT_TABLE_GET_GE_LAMBDA
|
|
func tableGetGELambda(r *GroupElementAffine, pre []GroupElementAffine, preBetaX []FieldElement, n int) {
|
|
if n == 0 {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
var idx int
|
|
if n > 0 {
|
|
idx = (n - 1) / 2
|
|
} else {
|
|
idx = (-n - 1) / 2
|
|
}
|
|
|
|
if idx >= len(pre) {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
// Use precomputed β*x instead of x
|
|
r.x = preBetaX[idx]
|
|
r.y = pre[idx].y
|
|
r.infinity = pre[idx].infinity
|
|
|
|
// Negate if n < 0
|
|
if n < 0 {
|
|
r.negate(r)
|
|
}
|
|
}
|
|
|
|
// Window size for the GLV split scalars
|
|
const glvWNAFW = 5
|
|
const glvTableSize = 1 << (glvWNAFW - 1) // 16 entries for window size 5
|
|
|
|
// ecmultStraussWNAFGLV computes r = q * a using Strauss algorithm with GLV endomorphism
|
|
// This splits the scalar using GLV and processes two ~128-bit scalars simultaneously
|
|
// using wNAF representation for efficient point multiplication.
|
|
//
|
|
// The algorithm:
|
|
// 1. Split q into q1, q2 such that q1 + q2*λ ≡ q (mod n), where q1, q2 are ~128 bits
|
|
// 2. Build odd multiples table for a and precompute β*x for λ-transformed lookups
|
|
// 3. Convert q1, q2 to wNAF representation
|
|
// 4. Process both wNAF representations simultaneously in a single pass
|
|
//
|
|
// Reference: libsecp256k1 ecmult_impl.h:secp256k1_ecmult_strauss_wnaf
|
|
func ecmultStraussWNAFGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
|
|
if a.isInfinity() {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
if q.isZero() {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
// Split scalar using GLV endomorphism: q = q1 + q2*λ
|
|
// Also get the transformed points p1 = a, p2 = λ*a
|
|
var q1, q2 Scalar
|
|
var p1, p2 GroupElementAffine
|
|
ecmultEndoSplit(&q1, &q2, &p1, &p2, q, a)
|
|
|
|
// Build odd multiples tables using stack-allocated arrays
|
|
var aJac GroupElementJacobian
|
|
aJac.setGE(&p1)
|
|
|
|
var preA [glvTableSize]GroupElementAffine
|
|
var preBetaX [glvTableSize]FieldElement
|
|
buildOddMultiplesTableAffineFixed(&preA, &preBetaX, &aJac)
|
|
|
|
// Build odd multiples table for p2 (which is λ*a)
|
|
var p2Jac GroupElementJacobian
|
|
p2Jac.setGE(&p2)
|
|
|
|
var preA2 [glvTableSize]GroupElementAffine
|
|
var preBetaX2 [glvTableSize]FieldElement
|
|
buildOddMultiplesTableAffineFixed(&preA2, &preBetaX2, &p2Jac)
|
|
|
|
// Convert scalars to wNAF representation
|
|
const wnafMaxLen = 257
|
|
var wnaf1, wnaf2 [wnafMaxLen]int
|
|
|
|
bits1 := q1.wNAF(wnaf1[:], glvWNAFW)
|
|
bits2 := q2.wNAF(wnaf2[:], glvWNAFW)
|
|
|
|
// Find the maximum bit position
|
|
maxBits := bits1
|
|
if bits2 > maxBits {
|
|
maxBits = bits2
|
|
}
|
|
|
|
// Perform the Strauss algorithm
|
|
r.setInfinity()
|
|
|
|
for i := maxBits - 1; i >= 0; i-- {
|
|
// Double the result
|
|
if !r.isInfinity() {
|
|
r.double(r)
|
|
}
|
|
|
|
// Add contribution from q1
|
|
if i < bits1 && wnaf1[i] != 0 {
|
|
var pt GroupElementAffine
|
|
tableGetGEFixed(&pt, &preA, wnaf1[i])
|
|
|
|
if r.isInfinity() {
|
|
r.setGE(&pt)
|
|
} else {
|
|
r.addGE(r, &pt)
|
|
}
|
|
}
|
|
|
|
// Add contribution from q2
|
|
if i < bits2 && wnaf2[i] != 0 {
|
|
var pt GroupElementAffine
|
|
tableGetGEFixed(&pt, &preA2, wnaf2[i])
|
|
|
|
if r.isInfinity() {
|
|
r.setGE(&pt)
|
|
} else {
|
|
r.addGE(r, &pt)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// buildOddMultiplesTableAffineFixed is like buildOddMultiplesTableAffine but uses fixed-size arrays
|
|
func buildOddMultiplesTableAffineFixed(preA *[glvTableSize]GroupElementAffine, preBetaX *[glvTableSize]FieldElement, a *GroupElementJacobian) {
|
|
// Build odd multiples in Jacobian coordinates
|
|
var preJac [glvTableSize]GroupElementJacobian
|
|
|
|
// pre[0] = a (which is 1*a)
|
|
preJac[0] = *a
|
|
|
|
if glvTableSize > 1 {
|
|
// Compute 2*a
|
|
var twoA GroupElementJacobian
|
|
twoA.double(a)
|
|
|
|
// Build odd multiples: pre[i] = pre[i-1] + 2*a for i >= 1
|
|
for i := 1; i < glvTableSize; i++ {
|
|
preJac[i].addVar(&preJac[i-1], &twoA)
|
|
}
|
|
}
|
|
|
|
// Batch normalize to affine coordinates
|
|
BatchNormalize(preA[:], preJac[:])
|
|
|
|
// Precompute β*x for each point
|
|
for i := 0; i < glvTableSize; i++ {
|
|
if preA[i].isInfinity() {
|
|
preBetaX[i] = FieldElementZero
|
|
} else {
|
|
preBetaX[i].mul(&preA[i].x, &fieldBeta)
|
|
}
|
|
}
|
|
}
|
|
|
|
// tableGetGEFixed retrieves a point from a fixed-size table
|
|
func tableGetGEFixed(r *GroupElementAffine, pre *[glvTableSize]GroupElementAffine, n int) {
|
|
if n == 0 {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
var idx int
|
|
if n > 0 {
|
|
idx = (n - 1) / 2
|
|
} else {
|
|
idx = (-n - 1) / 2
|
|
}
|
|
|
|
if idx >= glvTableSize {
|
|
r.setInfinity()
|
|
return
|
|
}
|
|
|
|
*r = pre[idx]
|
|
|
|
// Negate if n < 0
|
|
if n < 0 {
|
|
r.negate(r)
|
|
}
|
|
}
|
|
|
|
// EcmultStraussWNAFGLV is the public interface for optimized Strauss+GLV+wNAF multiplication
|
|
func EcmultStraussWNAFGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
|
|
ecmultStraussWNAFGLV(r, a, q)
|
|
}
|
|
|
|
// ECDHXOnly computes X-only ECDH (BIP-340 style)
|
|
// Outputs only the X coordinate of the shared secret point
|
|
func ECDHXOnly(output []byte, pubkey *PublicKey, seckey []byte) error {
|
|
if len(output) != 32 {
|
|
return errors.New("output must be 32 bytes")
|
|
}
|
|
if len(seckey) != 32 {
|
|
return errors.New("seckey must be 32 bytes")
|
|
}
|
|
if pubkey == nil {
|
|
return errors.New("pubkey cannot be nil")
|
|
}
|
|
|
|
// Load public key
|
|
var pt GroupElementAffine
|
|
pt.fromBytes(pubkey.data[:])
|
|
if pt.isInfinity() {
|
|
return errors.New("invalid public key")
|
|
}
|
|
|
|
// Parse scalar
|
|
var s Scalar
|
|
if !s.setB32Seckey(seckey) {
|
|
return errors.New("invalid secret key")
|
|
}
|
|
|
|
if s.isZero() {
|
|
return errors.New("secret key cannot be zero")
|
|
}
|
|
|
|
// Compute res = s * pt using optimized windowed multiplication (variable-time)
|
|
// ECDH doesn't require constant-time since the secret key is already known
|
|
var res GroupElementJacobian
|
|
ecmultWindowedVar(&res, &pt, &s)
|
|
|
|
// Convert to affine
|
|
var resAff GroupElementAffine
|
|
resAff.setGEJ(&res)
|
|
resAff.x.normalize()
|
|
|
|
// Extract X coordinate only
|
|
resAff.x.getB32(output)
|
|
|
|
// Clear sensitive data
|
|
s.clear()
|
|
resAff.clear()
|
|
res.clear()
|
|
|
|
return nil
|
|
}
|
|
|
|
|