Files
p256k1/ecmult_gen.go
mleku 14dc85cdc3 Add BMI2/AVX2 field assembly and SIMD comparison benchmarks
- Port field operations assembler from libsecp256k1 (field_amd64.s,
    field_amd64_bmi2.s) with MULX/ADCX/ADOX instructions
  - Add AVX2 scalar and affine point operations in avx/ package
  - Implement CPU feature detection (cpufeatures.go) for AVX2/BMI2
  - Add libsecp256k1.so via purego for native C library comparison
  - Create comprehensive SIMD benchmark suite comparing btcec, P256K1
    pure Go, P256K1 ASM, and libsecp256k1
  - Add BENCHMARK_SIMD.md documenting performance across implementations
  - Remove BtcecSigner, consolidate on P256K1Signer as primary impl
  - Add field operation tests and benchmarks (field_asm_test.go,
    field_bench_test.go)
  - Update GLV endomorphism with wNAF scalar multiplication
  - Add scalar assembly (scalar_amd64.s) for optimized operations
  - Clean up dependencies and update benchmark reports
2025-11-29 08:11:13 +00:00

325 lines
7.7 KiB
Go

package p256k1
// =============================================================================
// Phase 5: Generator Precomputation for GLV Optimization
// =============================================================================
//
// This file contains precomputed tables for the secp256k1 generator point G
// and its λ-transformed version λ*G. These tables enable very fast scalar
// multiplication of the generator point.
//
// The GLV approach splits a 256-bit scalar k into two ~128-bit scalars k1, k2
// such that k = k1 + k2*λ (mod n). Then k*G = k1*G + k2*(λ*G).
//
// We precompute odd multiples of G and λ*G:
// preGenG[i] = (2*i+1) * G for i = 0 to tableSize-1
// preGenLambdaG[i] = (2*i+1) * (λ*G) for i = 0 to tableSize-1
//
// Reference: libsecp256k1 ecmult_gen_impl.h
// Window size for generator multiplication
// Larger window = more precomputation but faster multiplication
const genWindowSize = 6
const genTableSize = 1 << (genWindowSize - 1) // 32 entries
// Precomputed tables for generator multiplication
// These are computed once at init() time
var (
// preGenG contains odd multiples of G: preGenG[i] = (2*i+1)*G
preGenG [genTableSize]GroupElementAffine
// preGenLambdaG contains odd multiples of λ*G: preGenLambdaG[i] = (2*i+1)*(λ*G)
preGenLambdaG [genTableSize]GroupElementAffine
// preGenBetaX contains β*x for each point in preGenG (for potential future optimization)
preGenBetaX [genTableSize]FieldElement
// genTablesInitialized tracks whether the tables have been computed
genTablesInitialized bool
)
// initGenTables computes the precomputed generator tables
// This is called automatically on first use
func initGenTables() {
if genTablesInitialized {
return
}
// Build odd multiples of G
var gJac GroupElementJacobian
gJac.setGE(&Generator)
var preJacG [genTableSize]GroupElementJacobian
preJacG[0] = gJac
// Compute 2*G
var twoG GroupElementJacobian
twoG.double(&gJac)
// Build odd multiples: preJacG[i] = (2*i+1)*G
for i := 1; i < genTableSize; i++ {
preJacG[i].addVar(&preJacG[i-1], &twoG)
}
// Batch normalize to affine
BatchNormalize(preGenG[:], preJacG[:])
// Compute λ*G
var lambdaG GroupElementAffine
lambdaG.mulLambda(&Generator)
// Build odd multiples of λ*G
var lambdaGJac GroupElementJacobian
lambdaGJac.setGE(&lambdaG)
var preJacLambdaG [genTableSize]GroupElementJacobian
preJacLambdaG[0] = lambdaGJac
// Compute 2*(λ*G)
var twoLambdaG GroupElementJacobian
twoLambdaG.double(&lambdaGJac)
// Build odd multiples: preJacLambdaG[i] = (2*i+1)*(λ*G)
for i := 1; i < genTableSize; i++ {
preJacLambdaG[i].addVar(&preJacLambdaG[i-1], &twoLambdaG)
}
// Batch normalize to affine
BatchNormalize(preGenLambdaG[:], preJacLambdaG[:])
// Precompute β*x for each point in preGenG
for i := 0; i < genTableSize; i++ {
if preGenG[i].isInfinity() {
preGenBetaX[i] = FieldElementZero
} else {
preGenBetaX[i].mul(&preGenG[i].x, &fieldBeta)
}
}
genTablesInitialized = true
}
// EnsureGenTablesInitialized ensures the generator tables are computed
// This is automatically called by ecmultGenGLV, but can be called explicitly
// during application startup to avoid first-use latency
func EnsureGenTablesInitialized() {
initGenTables()
}
// ecmultGenGLV computes r = k * G using precomputed tables and GLV endomorphism
// This is the fastest method for generator multiplication
func ecmultGenGLV(r *GroupElementJacobian, k *Scalar) {
if k.isZero() {
r.setInfinity()
return
}
// Ensure tables are initialized
initGenTables()
// Split scalar using GLV: k = k1 + k2*λ
var k1, k2 Scalar
scalarSplitLambda(&k1, &k2, k)
// Normalize k1 and k2 to be "low" (not high)
// If k1 is high, negate it and we'll negate the final contribution
neg1 := k1.isHigh()
if neg1 {
k1.negate(&k1)
}
neg2 := k2.isHigh()
if neg2 {
k2.negate(&k2)
}
// Convert to wNAF
const wnafMaxLen = 257
var wnaf1, wnaf2 [wnafMaxLen]int
bits1 := k1.wNAF(wnaf1[:], genWindowSize)
bits2 := k2.wNAF(wnaf2[:], genWindowSize)
// Find maximum bit position
maxBits := bits1
if bits2 > maxBits {
maxBits = bits2
}
// Perform Strauss algorithm using precomputed tables
r.setInfinity()
for i := maxBits - 1; i >= 0; i-- {
// Double the result
if !r.isInfinity() {
r.double(r)
}
// Add contribution from k1 (using preGenG table)
if i < bits1 && wnaf1[i] != 0 {
var pt GroupElementAffine
n := wnaf1[i]
var idx int
if n > 0 {
idx = (n - 1) / 2
} else {
idx = (-n - 1) / 2
}
if idx < genTableSize {
pt = preGenG[idx]
// Negate if wNAF digit is negative
if n < 0 {
pt.negate(&pt)
}
// Negate if k1 was negated during normalization
if neg1 {
pt.negate(&pt)
}
if r.isInfinity() {
r.setGE(&pt)
} else {
r.addGE(r, &pt)
}
}
}
// Add contribution from k2 (using preGenLambdaG table)
if i < bits2 && wnaf2[i] != 0 {
var pt GroupElementAffine
n := wnaf2[i]
var idx int
if n > 0 {
idx = (n - 1) / 2
} else {
idx = (-n - 1) / 2
}
if idx < genTableSize {
pt = preGenLambdaG[idx]
// Negate if wNAF digit is negative
if n < 0 {
pt.negate(&pt)
}
// Negate if k2 was negated during normalization
if neg2 {
pt.negate(&pt)
}
if r.isInfinity() {
r.setGE(&pt)
} else {
r.addGE(r, &pt)
}
}
}
}
}
// EcmultGenGLV is the public interface for fast generator multiplication
// r = k * G
func EcmultGenGLV(r *GroupElementJacobian, k *Scalar) {
ecmultGenGLV(r, k)
}
// ecmultGenSimple computes r = k * G using a simple approach without GLV
// This uses the precomputed table for G only, without scalar splitting
// Useful for comparison and as a fallback
func ecmultGenSimple(r *GroupElementJacobian, k *Scalar) {
if k.isZero() {
r.setInfinity()
return
}
// Ensure tables are initialized
initGenTables()
// Normalize scalar if it's high (has high bit set)
var kNorm Scalar
kNorm = *k
negResult := kNorm.isHigh()
if negResult {
kNorm.negate(&kNorm)
}
// Convert to wNAF
const wnafMaxLen = 257
var wnaf [wnafMaxLen]int
bits := kNorm.wNAF(wnaf[:], genWindowSize)
// Perform algorithm using precomputed table
r.setInfinity()
for i := bits - 1; i >= 0; i-- {
// Double the result
if !r.isInfinity() {
r.double(r)
}
// Add contribution
if wnaf[i] != 0 {
var pt GroupElementAffine
n := wnaf[i]
var idx int
if n > 0 {
idx = (n - 1) / 2
} else {
idx = (-n - 1) / 2
}
if idx < genTableSize {
pt = preGenG[idx]
if n < 0 {
pt.negate(&pt)
}
if r.isInfinity() {
r.setGE(&pt)
} else {
r.addGE(r, &pt)
}
}
}
}
// Negate result if we negated the scalar
if negResult {
r.negate(r)
}
}
// EcmultGenSimple is the public interface for simple generator multiplication
func EcmultGenSimple(r *GroupElementJacobian, k *Scalar) {
ecmultGenSimple(r, k)
}
// =============================================================================
// EcmultGenContext - Compatibility layer for existing codebase
// =============================================================================
// EcmultGenContext represents the generator multiplication context
// This wraps the precomputed tables for generator multiplication
type EcmultGenContext struct {
initialized bool
}
// NewEcmultGenContext creates a new generator multiplication context
// This initializes the precomputed tables if not already done
func NewEcmultGenContext() *EcmultGenContext {
initGenTables()
return &EcmultGenContext{
initialized: true,
}
}
// EcmultGen computes r = k * G using the fastest available method
// This is the main entry point for generator multiplication throughout the codebase
func EcmultGen(r *GroupElementJacobian, k *Scalar) {
ecmultGenGLV(r, k)
}