Enhance secp256k1 ECDH and scalar operations with optimized windowed multiplication and GLV endomorphism

This commit introduces several optimizations for elliptic curve operations in the secp256k1 library. Key changes include the implementation of the `ecmultStraussGLV` function for efficient scalar multiplication using the Strauss algorithm with GLV endomorphism, and the addition of windowed multiplication techniques to improve performance. Additionally, the benchmark tests have been updated to focus on the P256K1Signer implementation, streamlining the comparison process and enhancing clarity in performance evaluations.
This commit is contained in:
2025-11-03 10:54:17 +00:00
parent c8efe6693c
commit e8649cae7b
4 changed files with 282 additions and 210 deletions

View File

@@ -1,5 +1,5 @@
//go:build cgo
// +build cgo
//go:build !nocgo
// +build !nocgo
package bench
@@ -7,27 +7,18 @@ import (
"crypto/rand"
"testing"
p256knext "next.orly.dev/pkg/crypto/p256k"
"p256k1.mleku.dev/signer"
)
// This file contains benchmarks comparing the three signer implementations:
// 1. P256K1Signer (this package's new port from Bitcoin Core secp256k1)
// 2. BtcecSigner (pure Go btcec wrapper)
// 3. NextP256K Signer (CGO version using next.orly.dev/pkg/crypto/p256k)
// This file contains benchmarks for the P256K1Signer implementation
// (pure Go port from Bitcoin Core secp256k1)
var (
benchSeckey []byte
benchMsghash []byte
compBenchSignerP256K1 *signer.P256K1Signer
compBenchSignerBtcec *signer.BtcecSigner
compBenchSignerNext *p256knext.Signer
compBenchSignerP256K12 *signer.P256K1Signer
compBenchSignerBtcec2 *signer.BtcecSigner
compBenchSignerNext2 *p256knext.Signer
compBenchSigP256K1 []byte
compBenchSigBtcec []byte
compBenchSigNext []byte
)
func initComparisonBenchData() {
@@ -72,30 +63,6 @@ func initComparisonBenchData() {
panic(err)
}
// Setup BtcecSigner (pure Go)
signer2 := signer.NewBtcecSigner()
if err := signer2.InitSec(benchSeckey); err != nil {
panic(err)
}
compBenchSignerBtcec = signer2
compBenchSigBtcec, err = signer2.Sign(benchMsghash)
if err != nil {
panic(err)
}
// Setup NextP256K Signer (CGO version)
signer3 := &p256knext.Signer{}
if err := signer3.InitSec(benchSeckey); err != nil {
panic(err)
}
compBenchSignerNext = signer3
compBenchSigNext, err = signer3.Sign(benchMsghash)
if err != nil {
panic(err)
}
// Generate second key pair for ECDH
seckey2 := make([]byte, 32)
for {
@@ -115,24 +82,10 @@ func initComparisonBenchData() {
panic(err)
}
compBenchSignerP256K12 = signer12
// BtcecSigner second key pair
signer22 := signer.NewBtcecSigner()
if err := signer22.InitSec(seckey2); err != nil {
panic(err)
}
compBenchSignerBtcec2 = signer22
// NextP256K Signer second key pair
signer32 := &p256knext.Signer{}
if err := signer32.InitSec(seckey2); err != nil {
panic(err)
}
compBenchSignerNext2 = signer32
}
// BenchmarkPubkeyDerivation compares public key derivation from private key
func BenchmarkPubkeyDerivation_P256K1(b *testing.B) {
// BenchmarkPubkeyDerivation benchmarks public key derivation from private key
func BenchmarkPubkeyDerivation(b *testing.B) {
if benchSeckey == nil {
initComparisonBenchData()
}
@@ -147,38 +100,9 @@ func BenchmarkPubkeyDerivation_P256K1(b *testing.B) {
}
}
func BenchmarkPubkeyDerivation_Btcec(b *testing.B) {
if benchSeckey == nil {
initComparisonBenchData()
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
s := signer.NewBtcecSigner()
if err := s.InitSec(benchSeckey); err != nil {
b.Fatalf("failed to create signer: %v", err)
}
_ = s.Pub()
}
}
func BenchmarkPubkeyDerivation_NextP256K(b *testing.B) {
if benchSeckey == nil {
initComparisonBenchData()
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
s := &p256knext.Signer{}
if err := s.InitSec(benchSeckey); err != nil {
b.Fatalf("failed to create signer: %v", err)
}
_ = s.Pub()
}
}
// BenchmarkSign compares Schnorr signing
func BenchmarkSign_P256K1(b *testing.B) {
// BenchmarkSign benchmarks Schnorr signing
func BenchmarkSign(b *testing.B) {
if benchSeckey == nil {
initComparisonBenchData()
}
@@ -195,42 +119,9 @@ func BenchmarkSign_P256K1(b *testing.B) {
}
}
func BenchmarkSign_Btcec(b *testing.B) {
if benchSeckey == nil {
initComparisonBenchData()
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if compBenchSignerBtcec == nil {
initComparisonBenchData()
}
_, err := compBenchSignerBtcec.Sign(benchMsghash)
if err != nil {
b.Fatalf("failed to sign: %v", err)
}
}
}
func BenchmarkSign_NextP256K(b *testing.B) {
if benchSeckey == nil {
initComparisonBenchData()
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if compBenchSignerNext == nil {
initComparisonBenchData()
}
_, err := compBenchSignerNext.Sign(benchMsghash)
if err != nil {
b.Fatalf("failed to sign: %v", err)
}
}
}
// BenchmarkVerify compares Schnorr verification
func BenchmarkVerify_P256K1(b *testing.B) {
// BenchmarkVerify benchmarks Schnorr verification
func BenchmarkVerify(b *testing.B) {
if benchSeckey == nil {
initComparisonBenchData()
}
@@ -255,58 +146,9 @@ func BenchmarkVerify_P256K1(b *testing.B) {
}
}
func BenchmarkVerify_Btcec(b *testing.B) {
if benchSeckey == nil {
initComparisonBenchData()
}
if compBenchSignerBtcec == nil || compBenchSigBtcec == nil {
initComparisonBenchData()
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
verifier := signer.NewBtcecSigner()
if err := verifier.InitPub(compBenchSignerBtcec.Pub()); err != nil {
b.Fatalf("failed to create verifier: %v", err)
}
valid, err := verifier.Verify(benchMsghash, compBenchSigBtcec)
if err != nil {
b.Fatalf("verification error: %v", err)
}
if !valid {
b.Fatalf("verification failed")
}
}
}
func BenchmarkVerify_NextP256K(b *testing.B) {
if benchSeckey == nil {
initComparisonBenchData()
}
if compBenchSignerNext == nil || compBenchSigNext == nil {
initComparisonBenchData()
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
verifier := &p256knext.Signer{}
if err := verifier.InitPub(compBenchSignerNext.Pub()); err != nil {
b.Fatalf("failed to create verifier: %v", err)
}
valid, err := verifier.Verify(benchMsghash, compBenchSigNext)
if err != nil {
b.Fatalf("verification error: %v", err)
}
if !valid {
b.Fatalf("verification failed")
}
}
}
// BenchmarkECDH compares ECDH shared secret generation
func BenchmarkECDH_P256K1(b *testing.B) {
// BenchmarkECDH benchmarks ECDH shared secret generation
func BenchmarkECDH(b *testing.B) {
if benchSeckey == nil {
initComparisonBenchData()
}
@@ -323,37 +165,4 @@ func BenchmarkECDH_P256K1(b *testing.B) {
}
}
func BenchmarkECDH_Btcec(b *testing.B) {
if benchSeckey == nil {
initComparisonBenchData()
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if compBenchSignerBtcec == nil || compBenchSignerBtcec2 == nil {
initComparisonBenchData()
}
_, err := compBenchSignerBtcec.ECDH(compBenchSignerBtcec2.Pub())
if err != nil {
b.Fatalf("ECDH failed: %v", err)
}
}
}
func BenchmarkECDH_NextP256K(b *testing.B) {
if benchSeckey == nil {
initComparisonBenchData()
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if compBenchSignerNext == nil || compBenchSignerNext2 == nil {
initComparisonBenchData()
}
_, err := compBenchSignerNext.ECDH(compBenchSignerNext2.Pub())
if err != nil {
b.Fatalf("ECDH failed: %v", err)
}
}
}

99
ecdh.go
View File

@@ -2,9 +2,16 @@ package p256k1
import (
"errors"
"fmt"
"unsafe"
)
const (
// Window sizes for elliptic curve multiplication optimizations
windowA = 5 // Window size for main scalar (A)
windowG = 14 // Window size for generator (G) - larger for better performance
)
// EcmultConst computes r = q * a using constant-time multiplication
// Uses simple binary method
func EcmultConst(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
@@ -125,27 +132,107 @@ func ecmultWindowedVar(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar
}
}
// Ecmult computes r = q * a (variable-time, optimized)
// This is a simplified implementation - can be optimized with windowing later
// Ecmult computes r = q * a using optimized windowed multiplication
// This provides good performance for verification and ECDH operations
func Ecmult(r *GroupElementJacobian, a *GroupElementJacobian, q *Scalar) {
if a.isInfinity() {
r.setInfinity()
return
}
if q.isZero() {
r.setInfinity()
return
}
// Convert to affine for windowed multiplication
var aAff GroupElementAffine
aAff.setGEJ(a)
// Use optimized windowed multiplication
ecmultWindowedVar(r, &aAff, q)
}
// ecmultStraussGLV computes r = q * a using Strauss algorithm with GLV endomorphism
// This provides significant speedup for both verification and ECDH operations
func ecmultStraussGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
if a.isInfinity() {
r.setInfinity()
return
}
if q.isZero() {
r.setInfinity()
return
}
// For now, use simplified Strauss algorithm without GLV endomorphism
// Convert base point to Jacobian
var aJac GroupElementJacobian
aJac.setGE(a)
// Compute odd multiples for the scalar
var preA [1 << (windowA - 1)]GroupElementJacobian
buildOddMultiples(&preA, &aJac, windowA)
// Convert scalar to wNAF representation
var wnaf [257]int
bits := q.wNAF(wnaf[:], windowA)
// Perform Strauss algorithm
r.setInfinity()
for i := bits - 1; i >= 0; i-- {
// Double the result
r.double(r)
// Add contribution
if wnaf[i] != 0 {
n := wnaf[i]
var pt GroupElementJacobian
if n > 0 {
idx := (n-1)/2
if idx >= len(preA) {
panic(fmt.Sprintf("wNAF positive index out of bounds: n=%d, idx=%d, len=%d", n, idx, len(preA)))
}
pt = preA[idx]
} else {
if (-n-1)/2 >= len(preA) {
panic("wNAF index out of bounds (negative)")
}
pt = preA[(-n-1)/2]
pt.y.negate(&pt.y, 1)
}
r.addVar(r, &pt)
}
}
}
// buildOddMultiples builds a table of odd multiples of a point
// pre[i] = (2*i+1) * a for i = 0 to (1<<(w-1))-1
func buildOddMultiples(pre *[1 << (windowA - 1)]GroupElementJacobian, a *GroupElementJacobian, w uint) {
tableSize := 1 << (w - 1)
// pre[0] = a (which is 1*a)
pre[0] = *a
if tableSize > 1 {
// Compute 2*a
var twoA GroupElementJacobian
twoA.double(a)
// Build odd multiples: pre[i] = pre[i-2] + 2*a for i >= 2, i even
for i := 2; i < tableSize; i += 2 {
pre[i].addVar(&pre[i-2], &twoA)
}
}
}
// EcmultStraussGLV is the public interface for optimized Strauss+GLV multiplication
func EcmultStraussGLV(r *GroupElementJacobian, a *GroupElementAffine, q *Scalar) {
ecmultStraussGLV(r, a, q)
}
// ECDHHashFunction is a function type for hashing ECDH shared secrets
type ECDHHashFunction func(output []byte, x32 []byte, y32 []byte) bool
@@ -203,7 +290,7 @@ func ECDH(output []byte, pubkey *PublicKey, seckey []byte, hashfp ECDHHashFuncti
if s.isZero() {
return errors.New("secret key cannot be zero")
}
// Compute res = s * pt using optimized windowed multiplication (variable-time)
// ECDH doesn't require constant-time since the secret key is already known
var res GroupElementJacobian

View File

@@ -58,9 +58,9 @@ var (
magnitude: 0,
normalized: true,
}
)
// NewFieldElement creates a new field element
func NewFieldElement() *FieldElement {
return &FieldElement{
n: [5]uint64{0, 0, 0, 0, 0},

176
scalar.go
View File

@@ -39,6 +39,41 @@ var (
// ScalarOne represents the scalar 1
ScalarOne = Scalar{d: [4]uint64{1, 0, 0, 0}}
// GLV (Gallant-Lambert-Vanstone) endomorphism constants
// lambda is a primitive cube root of unity modulo n (the curve order)
secp256k1Lambda = Scalar{d: [4]uint64{
0x5363AD4CC05C30E0, 0xA5261C028812645A,
0x122E22EA20816678, 0xDF02967C1B23BD72,
}}
// Note: beta is defined in field.go as a FieldElement constant
// GLV basis vectors and constants for scalar splitting
// These are used to decompose scalars for faster multiplication
// minus_b1 and minus_b2 are precomputed constants for the GLV splitting algorithm
minusB1 = Scalar{d: [4]uint64{
0x0000000000000000, 0x0000000000000000,
0xE4437ED6010E8828, 0x6F547FA90ABFE4C3,
}}
minusB2 = Scalar{d: [4]uint64{
0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF,
0x8A280AC50774346D, 0x3DB1562CDE9798D9,
}}
// Precomputed estimates for GLV scalar splitting
// g1 and g2 are approximations of b2/d and (-b1)/d respectively
// where d is the curve order n
g1 = Scalar{d: [4]uint64{
0x3086D221A7D46BCD, 0xE86C90E49284EB15,
0x3DAA8A1471E8CA7F, 0xE893209A45DBB031,
}}
g2 = Scalar{d: [4]uint64{
0xE4437ED6010E8828, 0x6F547FA90ABFE4C4,
0x221208AC9DF506C6, 0x1571B4AE8AC47F71,
}}
)
// setInt sets a scalar to a small integer value
@@ -789,3 +824,144 @@ func scalarReduce512(r *Scalar, l []uint64) {
}
}
// wNAF converts a scalar to Windowed Non-Adjacent Form representation
// wNAF represents the scalar using digits in the range [-(2^(w-1)-1), 2^(w-1)-1]
// with the property that non-zero digits are separated by at least w-1 zeros.
//
// Returns the number of digits in the wNAF representation (at most 257 for 256-bit scalars)
// and fills the wnaf slice with the digits.
//
// The wnaf slice must have at least 257 elements.
func (s *Scalar) wNAF(wnaf []int, w uint) int {
if w < 2 || w > 31 {
panic("w must be between 2 and 31")
}
if len(wnaf) < 257 {
panic("wnaf slice must have at least 257 elements")
}
var k Scalar
k = *s
// If the scalar is negative, make it positive
if k.getBits(255, 1) == 1 {
k.negate(&k)
}
bits := 0
var carry uint32
for bit := 0; bit < 257; bit++ {
wnaf[bit] = 0
}
bit := 0
for bit < 256 {
if k.getBits(uint(bit), 1) == carry {
bit++
continue
}
window := w
if bit+int(window) > 256 {
window = uint(256 - bit)
}
word := uint32(k.getBits(uint(bit), window)) + carry
carry = (word >> (window - 1)) & 1
word -= carry << window
// word is now in range [-(2^(w-1)-1), 2^(w-1)-1]
wnaf[bit] = int(word)
bits = bit + int(window) - 1
bit += int(window)
}
return bits + 1
}
// scalarMulShiftVar computes r = round(a * b / 2^shift) using variable-time arithmetic
// This is used for the GLV scalar splitting algorithm
func scalarMulShiftVar(r *Scalar, a *Scalar, b *Scalar, shift uint) {
if shift > 512 {
panic("shift too large")
}
var l [8]uint64
scalarMul512(l[:], a, b)
// Right shift by 'shift' bits, rounding to nearest
carry := uint64(0)
if shift > 0 && (l[0]&(uint64(1)<<(shift-1))) != 0 {
carry = 1 // Round up if the bit being shifted out is 1
}
// Shift the limbs
for i := 0; i < 4; i++ {
var srcIndex int
var srcShift uint
if shift >= 64*uint(i) {
srcIndex = int(shift/64) + i
srcShift = shift % 64
} else {
srcIndex = i
srcShift = shift
}
if srcIndex >= 8 {
r.d[i] = 0
continue
}
val := l[srcIndex]
if srcShift > 0 && srcIndex+1 < 8 {
val |= l[srcIndex+1] << (64 - srcShift)
}
val >>= srcShift
if i == 0 {
val += carry
}
r.d[i] = val
}
// Ensure result is reduced
scalarReduce(r, 0)
}
// splitLambda splits a scalar k into r1 and r2 such that r1 + lambda*r2 = k mod n
// where lambda is the secp256k1 endomorphism constant.
// This is used for GLV (Gallant-Lambert-Vanstone) optimization.
//
// The algorithm computes c1 and c2 as approximations, then solves for r1 and r2.
// r1 and r2 are guaranteed to be in the range [-2^128, 2^128] approximately.
//
// Returns r1, r2 where k = r1 + lambda*r2 mod n
func (r1 *Scalar) splitLambda(r2 *Scalar, k *Scalar) {
var c1, c2 Scalar
// Compute c1 = round(k * g1 / 2^384)
// c2 = round(k * g2 / 2^384)
// These are high-precision approximations for the GLV basis decomposition
scalarMulShiftVar(&c1, k, &g1, 384)
scalarMulShiftVar(&c2, k, &g2, 384)
// Compute r2 = c1*(-b1) + c2*(-b2)
var tmp1, tmp2 Scalar
scalarMul(&tmp1, &c1, &minusB1)
scalarMul(&tmp2, &c2, &minusB2)
scalarAdd(r2, &tmp1, &tmp2)
// Compute r1 = k - r2*lambda
scalarMul(r1, r2, &secp256k1Lambda)
r1.negate(r1)
scalarAdd(r1, r1, k)
// Ensure the result is properly reduced
scalarReduce(r1, 0)
scalarReduce(r2, 0)
}