Signed-digit multi-comb ecmult_gen algorithm
This introduces the signed-digit multi-comb multiplication algorithm for constant-time G multiplications (ecmult_gen). It is based on section 3.3 of "Fast and compact elliptic-curve cryptography" by Mike Hamburg (see https://eprint.iacr.org/2012/309). Original implementation by Peter Dettman, with changes by Pieter Wuille to use scalars for recoding, and additional comments.
This commit is contained in:
committed by
Pieter Wuille
parent
486518b350
commit
fde1dfcd8d
106
src/ecmult_gen.h
106
src/ecmult_gen.h
@@ -1,5 +1,5 @@
|
||||
/***********************************************************************
|
||||
* Copyright (c) 2013, 2014 Pieter Wuille *
|
||||
* Copyright (c) Pieter Wuille, Peter Dettman *
|
||||
* Distributed under the MIT software license, see the accompanying *
|
||||
* file COPYING or https://www.opensource.org/licenses/mit-license.php.*
|
||||
***********************************************************************/
|
||||
@@ -10,31 +10,105 @@
|
||||
#include "scalar.h"
|
||||
#include "group.h"
|
||||
|
||||
#ifndef ECMULT_GEN_PREC_BITS
|
||||
# define ECMULT_GEN_PREC_BITS 4
|
||||
# ifdef DEBUG_CONFIG
|
||||
# pragma message DEBUG_CONFIG_MSG("ECMULT_GEN_PREC_BITS undefined, assuming default value")
|
||||
|
||||
/* Configuration parameters for the signed-digit multi-comb algorithm:
|
||||
*
|
||||
* - COMB_BLOCKS is the number of blocks the input is split into. Each
|
||||
* has a corresponding table.
|
||||
* - COMB_TEETH is the number of bits simultaneously covered by one table.
|
||||
*
|
||||
* The comb's spacing (COMB_SPACING), or the distance between the teeth,
|
||||
* is defined as ceil(256 / (COMB_BLOCKS * COMB_TEETH)). Each block covers
|
||||
* COMB_SPACING * COMB_TEETH consecutive bits in the input.
|
||||
*
|
||||
* The size of the precomputed table is COMB_BLOCKS * (1 << (COMB_TEETH - 1))
|
||||
* secp256k1_ge_storages.
|
||||
*
|
||||
* The number of point additions equals COMB_BLOCKS * COMB_SPACING. Each point
|
||||
* addition involves a cmov from (1 << (COMB_TEETH - 1)) table entries and a
|
||||
* conditional negation.
|
||||
*
|
||||
* The number of point doublings is COMB_SPACING - 1. */
|
||||
|
||||
#if defined(EXHAUSTIVE_TEST_ORDER)
|
||||
/* We need to control these values for exhaustive tests because
|
||||
* the table cannot have infinities in them (secp256k1_ge_storage
|
||||
* doesn't support infinities) */
|
||||
# undef COMB_BLOCKS
|
||||
# undef COMB_TEETH
|
||||
# if EXHAUSTIVE_TEST_ORDER > 32
|
||||
# define COMB_BLOCKS 52
|
||||
# define COMB_TEETH 5
|
||||
# elif EXHAUSTIVE_TEST_ORDER > 16
|
||||
# define COMB_BLOCKS 64
|
||||
# define COMB_TEETH 4
|
||||
# elif EXHAUSTIVE_TEST_ORDER > 8
|
||||
# define COMB_BLOCKS 86
|
||||
# define COMB_TEETH 3
|
||||
# elif EXHAUSTIVE_TEST_ORDER > 4
|
||||
# define COMB_BLOCKS 128
|
||||
# define COMB_TEETH 2
|
||||
# else
|
||||
# define COMB_BLOCKS 256
|
||||
# define COMB_TEETH 1
|
||||
# endif
|
||||
#else /* !defined(EXHAUSTIVE_TEST_ORDER) */
|
||||
/* Use (11, 6) as default configuration, which results in a 22 kB table. */
|
||||
# ifndef COMB_BLOCKS
|
||||
# define COMB_BLOCKS 11
|
||||
# ifdef DEBUG_CONFIG
|
||||
# pragma message DEBUG_CONFIG_MSG("COMB_BLOCKS undefined, assuming default value")
|
||||
# endif
|
||||
# endif
|
||||
# ifndef COMB_TEETH
|
||||
# define COMB_TEETH 6
|
||||
# ifdef DEBUG_CONFIG
|
||||
# pragma message DEBUG_CONFIG_MSG("COMB_TEETH undefined, assuming default value")
|
||||
# endif
|
||||
# endif
|
||||
#endif /* defined(EXHAUSTIVE_TEST_ORDER) */
|
||||
|
||||
/* Range checks on the parameters. */
|
||||
#if !(1 <= COMB_BLOCKS && COMB_BLOCKS <= 256)
|
||||
# error "COMB_BLOCKS must be in the range [1, 256]"
|
||||
#endif
|
||||
#if !(1 <= COMB_TEETH && COMB_TEETH <= 8)
|
||||
# error "COMB_TEETH must be in the range [1, 8]"
|
||||
#endif
|
||||
|
||||
/* The remaining COMB_* parameters are derived values, don't modify these. */
|
||||
/* - The distance between the teeth of each comb. */
|
||||
#define COMB_SPACING CEIL_DIV(256, COMB_BLOCKS * COMB_TEETH)
|
||||
/* - The number of bits covered by all the blocks; must be at least 256. */
|
||||
#define COMB_BITS (COMB_BLOCKS * COMB_TEETH * COMB_SPACING)
|
||||
/* - The number of entries per table. */
|
||||
#define COMB_POINTS (1 << (COMB_TEETH - 1))
|
||||
|
||||
/* Additional sanity checks. */
|
||||
#if (COMB_BLOCKS - 1) * COMB_TEETH * COMB_SPACING >= 256
|
||||
# error "COMB_BLOCKS can be reduced"
|
||||
#endif
|
||||
#if COMB_BLOCKS * (COMB_TEETH - 1) * COMB_SPACING >= 256
|
||||
# error "COMB_TEETH can be reduced"
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_CONFIG
|
||||
# pragma message DEBUG_CONFIG_DEF(ECMULT_GEN_PREC_BITS)
|
||||
# pragma message DEBUG_CONFIG_DEF(COMB_BLOCKS)
|
||||
# pragma message DEBUG_CONFIG_DEF(COMB_TEETH)
|
||||
#endif
|
||||
|
||||
#if ECMULT_GEN_PREC_BITS != 2 && ECMULT_GEN_PREC_BITS != 4 && ECMULT_GEN_PREC_BITS != 8
|
||||
# error "Set ECMULT_GEN_PREC_BITS to 2, 4 or 8."
|
||||
#endif
|
||||
|
||||
#define ECMULT_GEN_PREC_G(bits) (1 << bits)
|
||||
#define ECMULT_GEN_PREC_N(bits) (256 / bits)
|
||||
|
||||
typedef struct {
|
||||
/* Whether the context has been built. */
|
||||
int built;
|
||||
|
||||
/* Blinding values used when computing nG as (n-b)G + bG. */
|
||||
secp256k1_scalar scalar_offset; /* -b */
|
||||
secp256k1_ge ge_offset; /* bG */
|
||||
/* Values chosen such that
|
||||
*
|
||||
* n*G == comb(n + (2^COMB_BITS-1)/2 + scalar_offset, G/2) + ge_offset.
|
||||
*
|
||||
* This expression lets us use scalar blinding and optimize the comb precomputation. See
|
||||
* ecmult_gen_impl.h for more details. */
|
||||
secp256k1_scalar scalar_offset;
|
||||
secp256k1_ge ge_offset;
|
||||
} secp256k1_ecmult_gen_context;
|
||||
|
||||
static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context* ctx);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/***********************************************************************
|
||||
* Copyright (c) 2013, 2014, 2015 Pieter Wuille, Gregory Maxwell *
|
||||
* Copyright (c) Pieter Wuille, Gregory Maxwell *
|
||||
* Distributed under the MIT software license, see the accompanying *
|
||||
* file COPYING or https://www.opensource.org/licenses/mit-license.php.*
|
||||
***********************************************************************/
|
||||
@@ -9,6 +9,6 @@
|
||||
|
||||
#include "ecmult_gen.h"
|
||||
|
||||
static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int bits);
|
||||
static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int blocks, int teeth);
|
||||
|
||||
#endif /* SECP256K1_ECMULT_GEN_COMPUTE_TABLE_H */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/***********************************************************************
|
||||
* Copyright (c) 2013, 2014, 2015 Pieter Wuille, Gregory Maxwell *
|
||||
* Copyright (c) Pieter Wuille, Gregory Maxwell, Peter Dettman *
|
||||
* Distributed under the MIT software license, see the accompanying *
|
||||
* file COPYING or https://www.opensource.org/licenses/mit-license.php.*
|
||||
***********************************************************************/
|
||||
@@ -10,74 +10,96 @@
|
||||
#include "ecmult_gen_compute_table.h"
|
||||
#include "group_impl.h"
|
||||
#include "field_impl.h"
|
||||
#include "scalar_impl.h"
|
||||
#include "ecmult_gen.h"
|
||||
#include "util.h"
|
||||
|
||||
static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int bits) {
|
||||
int g = ECMULT_GEN_PREC_G(bits);
|
||||
int n = ECMULT_GEN_PREC_N(bits);
|
||||
static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int blocks, int teeth) {
|
||||
size_t points = ((size_t)1) << (teeth - 1);
|
||||
size_t points_total = points * blocks;
|
||||
int spacing = (256 + blocks * teeth - 1) / (blocks * teeth);
|
||||
secp256k1_ge* prec = checked_malloc(&default_error_callback, points_total * sizeof(*prec));
|
||||
secp256k1_gej* ds = checked_malloc(&default_error_callback, teeth * sizeof(*ds));
|
||||
secp256k1_gej* vs = checked_malloc(&default_error_callback, points_total * sizeof(*vs));
|
||||
secp256k1_gej u;
|
||||
size_t vs_pos = 0;
|
||||
secp256k1_scalar half;
|
||||
int block, i;
|
||||
|
||||
secp256k1_ge* prec = checked_malloc(&default_error_callback, n * g * sizeof(*prec));
|
||||
secp256k1_gej gj;
|
||||
secp256k1_gej nums_gej;
|
||||
int i, j;
|
||||
VERIFY_CHECK(points_total > 0);
|
||||
|
||||
VERIFY_CHECK(g > 0);
|
||||
VERIFY_CHECK(n > 0);
|
||||
|
||||
/* get the generator */
|
||||
secp256k1_gej_set_ge(&gj, gen);
|
||||
|
||||
/* Construct a group element with no known corresponding scalar (nothing up my sleeve). */
|
||||
{
|
||||
static const unsigned char nums_b32[33] = "The scalar for this x is unknown";
|
||||
secp256k1_fe nums_x;
|
||||
secp256k1_ge nums_ge;
|
||||
int r;
|
||||
r = secp256k1_fe_set_b32_limit(&nums_x, nums_b32);
|
||||
(void)r;
|
||||
VERIFY_CHECK(r);
|
||||
r = secp256k1_ge_set_xo_var(&nums_ge, &nums_x, 0);
|
||||
(void)r;
|
||||
VERIFY_CHECK(r);
|
||||
secp256k1_gej_set_ge(&nums_gej, &nums_ge);
|
||||
/* Add G to make the bits in x uniformly distributed. */
|
||||
secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, gen, NULL);
|
||||
}
|
||||
|
||||
/* compute prec. */
|
||||
{
|
||||
secp256k1_gej gbase;
|
||||
secp256k1_gej numsbase;
|
||||
secp256k1_gej* precj = checked_malloc(&default_error_callback, n * g * sizeof(*precj)); /* Jacobian versions of prec. */
|
||||
gbase = gj; /* PREC_G^j * G */
|
||||
numsbase = nums_gej; /* 2^j * nums. */
|
||||
for (j = 0; j < n; j++) {
|
||||
/* Set precj[j*PREC_G .. j*PREC_G+(PREC_G-1)] to (numsbase, numsbase + gbase, ..., numsbase + (PREC_G-1)*gbase). */
|
||||
precj[j*g] = numsbase;
|
||||
for (i = 1; i < g; i++) {
|
||||
secp256k1_gej_add_var(&precj[j*g + i], &precj[j*g + i - 1], &gbase, NULL);
|
||||
}
|
||||
/* Multiply gbase by PREC_G. */
|
||||
for (i = 0; i < bits; i++) {
|
||||
secp256k1_gej_double_var(&gbase, &gbase, NULL);
|
||||
}
|
||||
/* Multiply numbase by 2. */
|
||||
secp256k1_gej_double_var(&numsbase, &numsbase, NULL);
|
||||
if (j == n - 2) {
|
||||
/* In the last iteration, numsbase is (1 - 2^j) * nums instead. */
|
||||
secp256k1_gej_neg(&numsbase, &numsbase);
|
||||
secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej, NULL);
|
||||
}
|
||||
}
|
||||
secp256k1_ge_set_all_gej_var(prec, precj, n * g);
|
||||
free(precj);
|
||||
}
|
||||
for (j = 0; j < n; j++) {
|
||||
for (i = 0; i < g; i++) {
|
||||
secp256k1_ge_to_storage(&table[j*g + i], &prec[j*g + i]);
|
||||
/* u is the running power of two times gen we're working with, initially gen/2. */
|
||||
secp256k1_scalar_half(&half, &secp256k1_scalar_one);
|
||||
secp256k1_gej_set_infinity(&u);
|
||||
for (i = 255; i >= 0; --i) {
|
||||
/* Use a very simple multiplication ladder to avoid dependency on ecmult. */
|
||||
secp256k1_gej_double_var(&u, &u, NULL);
|
||||
if (secp256k1_scalar_get_bits(&half, i, 1)) {
|
||||
secp256k1_gej_add_ge_var(&u, &u, gen, NULL);
|
||||
}
|
||||
}
|
||||
#ifdef VERIFY
|
||||
{
|
||||
/* Verify that u*2 = gen. */
|
||||
secp256k1_gej double_u;
|
||||
secp256k1_gej_double_var(&double_u, &u, NULL);
|
||||
VERIFY_CHECK(secp256k1_gej_eq_ge_var(&double_u, gen));
|
||||
}
|
||||
#endif
|
||||
|
||||
for (block = 0; block < blocks; ++block) {
|
||||
int tooth;
|
||||
/* Here u = 2^(block*teeth*spacing) * gen/2. */
|
||||
secp256k1_gej sum;
|
||||
secp256k1_gej_set_infinity(&sum);
|
||||
for (tooth = 0; tooth < teeth; ++tooth) {
|
||||
/* Here u = 2^((block*teeth + tooth)*spacing) * gen/2. */
|
||||
int bit_off;
|
||||
/* Make sum = sum(2^((block*teeth + t)*spacing), t=0..tooth) * gen/2. */
|
||||
secp256k1_gej_add_var(&sum, &sum, &u, NULL);
|
||||
/* Make u = 2^((block*teeth + tooth)*spacing + 1) * gen/2. */
|
||||
secp256k1_gej_double_var(&u, &u, NULL);
|
||||
/* Make ds[tooth] = u = 2^((block*teeth + tooth)*spacing + 1) * gen/2. */
|
||||
ds[tooth] = u;
|
||||
/* Make u = 2^((block*teeth + tooth + 1)*spacing) * gen/2. */
|
||||
for (bit_off = 1; bit_off < spacing; ++bit_off) {
|
||||
secp256k1_gej_double_var(&u, &u, NULL);
|
||||
}
|
||||
}
|
||||
/* Now u = 2^((block*teeth + teeth)*spacing) * gen/2
|
||||
* = 2^((block+1)*teeth*spacing) * gen/2 */
|
||||
|
||||
/* Next, compute the table entries for block number block in Jacobian coordinates.
|
||||
* The entries will occupy vs[block*points + i] for i=0..points-1.
|
||||
* We start by computing the first (i=0) value corresponding to all summed
|
||||
* powers of two times G being negative. */
|
||||
secp256k1_gej_neg(&vs[vs_pos++], &sum);
|
||||
/* And then teeth-1 times "double" the range of i values for which the table
|
||||
* is computed: in each iteration, double the table by taking an existing
|
||||
* table entry and adding ds[tooth]. */
|
||||
for (tooth = 0; tooth < teeth - 1; ++tooth) {
|
||||
size_t stride = ((size_t)1) << tooth;
|
||||
size_t index;
|
||||
for (index = 0; index < stride; ++index, ++vs_pos) {
|
||||
secp256k1_gej_add_var(&vs[vs_pos], &vs[vs_pos - stride], &ds[tooth], NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
VERIFY_CHECK(vs_pos == points_total);
|
||||
|
||||
/* Convert all points simultaneously from secp256k1_gej to secp256k1_ge. */
|
||||
secp256k1_ge_set_all_gej_var(prec, vs, points_total);
|
||||
/* Convert all points from secp256k1_ge to secp256k1_ge_storage output. */
|
||||
for (block = 0; block < blocks; ++block) {
|
||||
size_t index;
|
||||
for (index = 0; index < points; ++index) {
|
||||
secp256k1_ge_to_storage(&table[block * points + index], &prec[block * points + index]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Free memory. */
|
||||
free(vs);
|
||||
free(ds);
|
||||
free(prec);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/***********************************************************************
|
||||
* Copyright (c) 2013, 2014, 2015 Pieter Wuille, Gregory Maxwell *
|
||||
* Copyright (c) Pieter Wuille, Gregory Maxwell, Peter Dettman *
|
||||
* Distributed under the MIT software license, see the accompanying *
|
||||
* file COPYING or https://www.opensource.org/licenses/mit-license.php.*
|
||||
***********************************************************************/
|
||||
@@ -29,38 +29,180 @@ static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context *ctx
|
||||
secp256k1_ge_clear(&ctx->ge_offset);
|
||||
}
|
||||
|
||||
/* For accelerating the computation of a*G:
|
||||
* To harden against timing attacks, use the following mechanism:
|
||||
* * Break up the multiplicand into groups of PREC_BITS bits, called n_0, n_1, n_2, ..., n_(PREC_N-1).
|
||||
* * Compute sum(n_i * (PREC_G)^i * G + U_i, i=0 ... PREC_N-1), where:
|
||||
* * U_i = U * 2^i, for i=0 ... PREC_N-2
|
||||
* * U_i = U * (1-2^(PREC_N-1)), for i=PREC_N-1
|
||||
* where U is a point with no known corresponding scalar. Note that sum(U_i, i=0 ... PREC_N-1) = 0.
|
||||
* For each i, and each of the PREC_G possible values of n_i, (n_i * (PREC_G)^i * G + U_i) is
|
||||
* precomputed (call it prec(i, n_i)). The formula now becomes sum(prec(i, n_i), i=0 ... PREC_N-1).
|
||||
* None of the resulting prec group elements have a known scalar, and neither do any of
|
||||
* the intermediate sums while computing a*G.
|
||||
* The prec values are stored in secp256k1_ecmult_gen_prec_table[i][n_i] = n_i * (PREC_G)^i * G + U_i.
|
||||
*/
|
||||
static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp256k1_gej *r, const secp256k1_scalar *gn) {
|
||||
int bits = ECMULT_GEN_PREC_BITS;
|
||||
int g = ECMULT_GEN_PREC_G(bits);
|
||||
int n = ECMULT_GEN_PREC_N(bits);
|
||||
/* Compute the scalar (2^COMB_BITS - 1) / 2, the difference between the gn argument to
|
||||
* secp256k1_ecmult_gen, and the scalar whose encoding the table lookup bits are drawn
|
||||
* from (before applying blinding). */
|
||||
static void secp256k1_ecmult_gen_scalar_diff(secp256k1_scalar* diff) {
|
||||
int i;
|
||||
|
||||
/* Compute scalar -1/2. */
|
||||
secp256k1_scalar neghalf;
|
||||
secp256k1_scalar_half(&neghalf, &secp256k1_scalar_one);
|
||||
secp256k1_scalar_negate(&neghalf, &neghalf);
|
||||
|
||||
/* Compute offset = 2^(COMB_BITS - 1). */
|
||||
*diff = secp256k1_scalar_one;
|
||||
for (i = 0; i < COMB_BITS - 1; ++i) {
|
||||
secp256k1_scalar_add(diff, diff, diff);
|
||||
}
|
||||
|
||||
/* The result is the sum 2^(COMB_BITS - 1) + (-1/2). */
|
||||
secp256k1_scalar_add(diff, diff, &neghalf);
|
||||
}
|
||||
|
||||
static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp256k1_gej *r, const secp256k1_scalar *gn) {
|
||||
uint32_t comb_off;
|
||||
secp256k1_ge add;
|
||||
secp256k1_fe neg;
|
||||
secp256k1_ge_storage adds;
|
||||
secp256k1_scalar gnb;
|
||||
int i, j, n_i;
|
||||
secp256k1_scalar d;
|
||||
|
||||
memset(&adds, 0, sizeof(adds));
|
||||
|
||||
/* We want to compute R = gn*G.
|
||||
*
|
||||
* To blind the scalar used in the computation, we rewrite this to be
|
||||
* R = (gn - b)*G + b*G, with a blinding value b determined by the context.
|
||||
*
|
||||
* The multiplication (gn-b)*G will be performed using a signed-digit multi-comb (see Section
|
||||
* 3.3 of "Fast and compact elliptic-curve cryptography" by Mike Hamburg,
|
||||
* https://eprint.iacr.org/2012/309).
|
||||
*
|
||||
* Let comb(s, P) = sum((2*s[i]-1)*2^i*P for i=0..COMB_BITS-1), where s[i] is the i'th bit of
|
||||
* the binary representation of scalar s. So the s[i] values determine whether -2^i*P (s[i]=0)
|
||||
* or +2^i*P (s[i]=1) are added together. COMB_BITS is at least 256, so all bits of s are
|
||||
* covered. By manipulating:
|
||||
*
|
||||
* comb(s, P) = sum((2*s[i]-1)*2^i*P for i=0..COMB_BITS-1)
|
||||
* <=> comb(s, P) = sum((2*s[i]-1)*2^i for i=0..COMB_BITS-1) * P
|
||||
* <=> comb(s, P) = (2*sum(s[i]*2^i for i=0..COMB_BITS-1) - sum(2^i for i=0..COMB_BITS-1)) * P
|
||||
* <=> comb(s, P) = (2*s - (2^COMB_BITS - 1)) * P
|
||||
*
|
||||
* If we wanted to compute (gn-b)*G as comb(s, G), it would need to hold that
|
||||
*
|
||||
* (gn - b) * G = (2*s - (2^COMB_BITS - 1)) * G
|
||||
* <=> s = (gn - b + (2^COMB_BITS - 1))/2 (mod order)
|
||||
*
|
||||
* We use an alternative here that avoids the modular division by two: instead we compute
|
||||
* (gn-b)*G as comb(d, G/2). For that to hold it must be the case that
|
||||
*
|
||||
* (gn - b) * G = (2*d - (2^COMB_BITS - 1)) * (G/2)
|
||||
* <=> d = gn - b + (2^COMB_BITS - 1)/2 (mod order)
|
||||
*
|
||||
* Adding precomputation, our final equations become:
|
||||
*
|
||||
* ctx->scalar_offset = -b
|
||||
* ctx->ge_offset = b*G
|
||||
* d = gn + ctx->scalar_offset + (2^COMB_BITS - 1)/2 (mod order)
|
||||
* R = comb(d, G/2) + ctx->ge_offset
|
||||
*
|
||||
* comb(d, G/2) function is then computed by summing + or - 2^(i-1)*G, for i=0..COMB_BITS-1,
|
||||
* depending on the value of the bits d[i] of the binary representation of scalar d.
|
||||
*/
|
||||
|
||||
/* Compute the scalar d = (gn + ctx->scalar_offset + (2^COMB_BITS - 1)/2). */
|
||||
secp256k1_ecmult_gen_scalar_diff(&d);
|
||||
secp256k1_scalar_add(&d, &d, &ctx->scalar_offset);
|
||||
secp256k1_scalar_add(&d, &d, gn);
|
||||
|
||||
/* In secp256k1_ecmult_gen_prec_table we have precomputed sums of the
|
||||
* (2*d[i]-1) * 2^(i-1) * G points, for various combinations of i positions.
|
||||
* We rewrite our equation in terms of these table entries.
|
||||
*
|
||||
* Let mask(b) = sum(2^((b*COMB_TEETH + t)*COMB_SPACING) for t=0..COMB_TEETH-1),
|
||||
* with b ranging from 0 to COMB_BLOCKS-1. So for example with COMB_BLOCKS=11,
|
||||
* COMB_TEETH=6, COMB_SPACING=4, we would have:
|
||||
* mask(0) = 2^0 + 2^4 + 2^8 + 2^12 + 2^16 + 2^20,
|
||||
* mask(1) = 2^24 + 2^28 + 2^32 + 2^36 + 2^40 + 2^44,
|
||||
* mask(2) = 2^48 + 2^52 + 2^56 + 2^60 + 2^64 + 2^68,
|
||||
* ...
|
||||
* mask(10) = 2^240 + 2^244 + 2^248 + 2^252 + 2^256 + 2^260
|
||||
*
|
||||
* We will split up the bits d[i] using these masks. Specifically, each mask is
|
||||
* used COMB_SPACING times, with different shifts:
|
||||
*
|
||||
* d = (d & mask(0)<<0) + (d & mask(1)<<0) + ... + (d & mask(COMB_BLOCKS-1)<<0) +
|
||||
* (d & mask(0)<<1) + (d & mask(1)<<1) + ... + (d & mask(COMB_BLOCKS-1)<<1) +
|
||||
* ...
|
||||
* (d & mask(0)<<(COMB_SPACING-1)) + ...
|
||||
*
|
||||
* Now define table(b, m) = (m - mask(b)/2) * G, and we will precompute these values for
|
||||
* b=0..COMB_BLOCKS-1, and for all values m which (d & mask(b)) can take (so m can take on
|
||||
* 2^COMB_TEETH distinct values).
|
||||
*
|
||||
* If m=(d & mask(b)), then table(b, m) is the sum of 2^i * (2*d[i]-1) * G/2, with i
|
||||
* iterating over the set bits in mask(b). In our example, table(2, 2^48 + 2^56 + 2^68)
|
||||
* would equal (2^48 - 2^52 + 2^56 - 2^60 - 2^64 + 2^68) * G/2.
|
||||
*
|
||||
* With that, we can rewrite comb(d, G/2) as:
|
||||
*
|
||||
* 2^0 * (table(0, d>>0 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>0 & mask(COMP_BLOCKS-1)))
|
||||
* + 2^1 * (table(0, d>>1 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>1 & mask(COMP_BLOCKS-1)))
|
||||
* + 2^2 * (table(0, d>>2 & mask(0)) + ... + table(COMB_BLOCKS-1, d>>2 & mask(COMP_BLOCKS-1)))
|
||||
* + ...
|
||||
* + 2^(COMB_SPACING-1) * (table(0, d>>(COMB_SPACING-1) & mask(0)) + ...)
|
||||
*
|
||||
* Or more generically as
|
||||
*
|
||||
* sum(2^i * sum(table(b, d>>i & mask(b)), b=0..COMB_BLOCKS-1), i=0..COMB_SPACING-1)
|
||||
*
|
||||
* This is implemented using an outer loop that runs in reverse order over the lines of this
|
||||
* equation, which in each iteration runs an inner loop that adds the terms of that line and
|
||||
* then doubles the result before proceeding to the next line.
|
||||
*
|
||||
* In pseudocode:
|
||||
* c = infinity
|
||||
* for comb_off in range(COMB_SPACING - 1, -1, -1):
|
||||
* for block in range(COMB_BLOCKS):
|
||||
* c += table(block, (d >> comb_off) & mask(block))
|
||||
* if comb_off > 0:
|
||||
* c = 2*c
|
||||
* return c
|
||||
*
|
||||
* This computes c = comb(d, G/2), and thus finally R = c + ctx->ge_offset. Note that it would
|
||||
* be possible to apply an initial offset instead of a final offset (moving ge_offset to take
|
||||
* the place of infinity above), but the chosen approach allows using (in a future improvement)
|
||||
* an incomplete addition formula for most of the multiplication.
|
||||
*
|
||||
* The last question is how to implement the table(b, m) function. For any value of b,
|
||||
* m=(d & mask(b)) can only take on at most 2^COMB_TEETH possible values (the last one may have
|
||||
* fewer as there mask(b) may exceed the curve order). So we could create COMB_BLOCK tables
|
||||
* which contain a value for each such m value.
|
||||
*
|
||||
* Now note that if m=(d & mask(b)), then flipping the relevant bits of m results in negating
|
||||
* the result of table(b, m). This is because table(b,m XOR mask(b)) = table(b, mask(b) - m) =
|
||||
* (mask(b) - m - mask(b)/2)*G = (-m + mask(b)/2)*G = -(m - mask(b)/2)*G = -table(b, m).
|
||||
* Because of this it suffices to only store the first half of the m values for every b. If an
|
||||
* entry from the second half is needed, we look up its bit-flipped version instead, and negate
|
||||
* it.
|
||||
*
|
||||
* secp256k1_ecmult_gen_prec_table[b][index] stores the table(b, m) entries. Index
|
||||
* is the relevant mask(b) bits of m packed together without gaps. */
|
||||
|
||||
secp256k1_gej_set_infinity(r);
|
||||
/* Outer loop: iterate over comb_off from COMB_SPACING - 1 down to 0. */
|
||||
comb_off = COMB_SPACING - 1;
|
||||
while (1) {
|
||||
uint32_t block;
|
||||
uint32_t bit_pos = comb_off;
|
||||
/* Inner loop: for each block, add table entries to the result. */
|
||||
for (block = 0; block < COMB_BLOCKS; ++block) {
|
||||
/* Gather the mask(block)-selected bits of d into bits. They're packed:
|
||||
* bits[tooth] = d[(block*COMB_TEETH + tooth)*COMB_SPACING + comb_off]. */
|
||||
uint32_t bits = 0, sign, abs, index, tooth;
|
||||
for (tooth = 0; tooth < COMB_TEETH && bit_pos < 256; ++tooth) {
|
||||
uint32_t bit = secp256k1_scalar_get_bits(&d, bit_pos, 1);
|
||||
bits |= bit << tooth;
|
||||
bit_pos += COMB_SPACING;
|
||||
}
|
||||
|
||||
/* Blind scalar/point multiplication by computing (gn-b)*G + b*G instead of gn*G. */
|
||||
secp256k1_scalar_add(&gnb, gn, &ctx->scalar_offset);
|
||||
/* If the top bit of bits is 1, flip them all (corresponding to looking up
|
||||
* the negated table value), and remember to negate the result in sign. */
|
||||
sign = (bits >> (COMB_TEETH - 1)) & 1;
|
||||
abs = (bits ^ -sign) & (COMB_POINTS - 1);
|
||||
VERIFY_CHECK(sign == 0 || sign == 1);
|
||||
VERIFY_CHECK(abs < COMB_POINTS);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
n_i = secp256k1_scalar_get_bits(&gnb, i * bits, bits);
|
||||
for (j = 0; j < g; j++) {
|
||||
/** This uses a conditional move to avoid any secret data in array indexes.
|
||||
* _Any_ use of secret indexes has been demonstrated to result in timing
|
||||
* sidechannels, even when the cache-line access patterns are uniform.
|
||||
@@ -71,19 +213,33 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
|
||||
* by Dag Arne Osvik, Adi Shamir, and Eran Tromer
|
||||
* (https://www.tau.ac.il/~tromer/papers/cache.pdf)
|
||||
*/
|
||||
secp256k1_ge_storage_cmov(&adds, &secp256k1_ecmult_gen_prec_table[i][j], j == n_i);
|
||||
for (index = 0; index < COMB_POINTS; ++index) {
|
||||
secp256k1_ge_storage_cmov(&adds, &secp256k1_ecmult_gen_prec_table[block][index], index == abs);
|
||||
}
|
||||
|
||||
/* Set add=adds or add=-adds, in constant time, based on sign. */
|
||||
secp256k1_ge_from_storage(&add, &adds);
|
||||
secp256k1_fe_negate(&neg, &add.y, 1);
|
||||
secp256k1_fe_cmov(&add.y, &neg, sign);
|
||||
|
||||
/* Add the looked up and conditionally negated value to r. */
|
||||
secp256k1_gej_add_ge(r, r, &add);
|
||||
}
|
||||
secp256k1_ge_from_storage(&add, &adds);
|
||||
secp256k1_gej_add_ge(r, r, &add);
|
||||
|
||||
/* Double the result, except in the last iteration. */
|
||||
if (comb_off-- == 0) break;
|
||||
secp256k1_gej_double(r, r);
|
||||
}
|
||||
n_i = 0;
|
||||
|
||||
/* Correct for the scalar_offset added at the start (ge_offset = b*G, while b was
|
||||
* subtracted from the input scalar gn). */
|
||||
secp256k1_gej_add_ge(r, r, &ctx->ge_offset);
|
||||
|
||||
/* Cleanup. */
|
||||
secp256k1_fe_clear(&neg);
|
||||
secp256k1_ge_clear(&add);
|
||||
secp256k1_scalar_clear(&gnb);
|
||||
memset(&adds, 0, sizeof(adds));
|
||||
secp256k1_scalar_clear(&d);
|
||||
}
|
||||
|
||||
/* Setup blinding values for secp256k1_ecmult_gen. */
|
||||
|
||||
@@ -17,10 +17,42 @@
|
||||
#include "ecmult_gen.h"
|
||||
#include "ecmult_gen_compute_table_impl.h"
|
||||
|
||||
static const int CONFIGS[1][2] = {
|
||||
{11, 6}
|
||||
};
|
||||
|
||||
static void print_table(FILE* fp, int blocks, int teeth) {
|
||||
size_t points = ((size_t)1) << (teeth - 1);
|
||||
int outer;
|
||||
size_t inner;
|
||||
|
||||
secp256k1_ge_storage* table = checked_malloc(&default_error_callback, blocks * points * sizeof(secp256k1_ge_storage));
|
||||
secp256k1_ecmult_gen_compute_table(table, &secp256k1_ge_const_g, blocks, teeth);
|
||||
|
||||
fprintf(fp, "#elif (COMB_BLOCKS == %d) && (COMB_TEETH == %d)\n", blocks, teeth);
|
||||
for (outer = 0; outer != blocks; outer++) {
|
||||
fprintf(fp,"{");
|
||||
for (inner = 0; inner != points; inner++) {
|
||||
fprintf(fp, "S(%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32
|
||||
",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32")",
|
||||
SECP256K1_GE_STORAGE_CONST_GET(table[outer * points + inner]));
|
||||
if (inner != points - 1) {
|
||||
fprintf(fp,",\n");
|
||||
}
|
||||
}
|
||||
if (outer != blocks - 1) {
|
||||
fprintf(fp,"},\n");
|
||||
} else {
|
||||
fprintf(fp,"}\n");
|
||||
}
|
||||
}
|
||||
free(table);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
const char outfile[] = "src/precomputed_ecmult_gen.c";
|
||||
FILE* fp;
|
||||
int bits;
|
||||
size_t config;
|
||||
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
@@ -40,36 +72,15 @@ int main(int argc, char **argv) {
|
||||
fprintf(fp, "# error Cannot compile precomputed_ecmult_gen.c in exhaustive test mode\n");
|
||||
fprintf(fp, "#endif /* EXHAUSTIVE_TEST_ORDER */\n");
|
||||
fprintf(fp, "#define S(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) SECP256K1_GE_STORAGE_CONST(0x##a##u,0x##b##u,0x##c##u,0x##d##u,0x##e##u,0x##f##u,0x##g##u,0x##h##u,0x##i##u,0x##j##u,0x##k##u,0x##l##u,0x##m##u,0x##n##u,0x##o##u,0x##p##u)\n");
|
||||
fprintf(fp, "const secp256k1_ge_storage secp256k1_ecmult_gen_prec_table[ECMULT_GEN_PREC_N(ECMULT_GEN_PREC_BITS)][ECMULT_GEN_PREC_G(ECMULT_GEN_PREC_BITS)] = {\n");
|
||||
|
||||
for (bits = 2; bits <= 8; bits *= 2) {
|
||||
int g = ECMULT_GEN_PREC_G(bits);
|
||||
int n = ECMULT_GEN_PREC_N(bits);
|
||||
int inner, outer;
|
||||
|
||||
secp256k1_ge_storage* table = checked_malloc(&default_error_callback, n * g * sizeof(secp256k1_ge_storage));
|
||||
secp256k1_ecmult_gen_compute_table(table, &secp256k1_ge_const_g, bits);
|
||||
|
||||
fprintf(fp, "#if ECMULT_GEN_PREC_BITS == %d\n", bits);
|
||||
for(outer = 0; outer != n; outer++) {
|
||||
fprintf(fp,"{");
|
||||
for(inner = 0; inner != g; inner++) {
|
||||
fprintf(fp, "S(%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32
|
||||
",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32")",
|
||||
SECP256K1_GE_STORAGE_CONST_GET(table[outer * g + inner]));
|
||||
if (inner != g - 1) {
|
||||
fprintf(fp,",\n");
|
||||
}
|
||||
}
|
||||
if (outer != n - 1) {
|
||||
fprintf(fp,"},\n");
|
||||
} else {
|
||||
fprintf(fp,"}\n");
|
||||
}
|
||||
}
|
||||
fprintf(fp, "#endif\n");
|
||||
free(table);
|
||||
fprintf(fp, "const secp256k1_ge_storage secp256k1_ecmult_gen_prec_table[COMB_BLOCKS][COMB_POINTS] = {\n");
|
||||
fprintf(fp, "#if 0\n");
|
||||
for (config = 0; config < sizeof(CONFIGS) / sizeof(*CONFIGS); ++config) {
|
||||
print_table(fp, CONFIGS[config][0], CONFIGS[config][1]);
|
||||
}
|
||||
fprintf(fp, "#else\n");
|
||||
fprintf(fp, "# error Configuration mismatch, invalid COMB_* parameters. Try deleting precomputed_ecmult_gen.c before the build.\n");
|
||||
fprintf(fp, "#endif\n");
|
||||
|
||||
fprintf(fp, "};\n");
|
||||
fprintf(fp, "#undef S\n");
|
||||
|
||||
10091
src/precomputed_ecmult_gen.c
generated
10091
src/precomputed_ecmult_gen.c
generated
File diff suppressed because it is too large
Load Diff
@@ -14,9 +14,9 @@ extern "C" {
|
||||
#include "group.h"
|
||||
#include "ecmult_gen.h"
|
||||
#ifdef EXHAUSTIVE_TEST_ORDER
|
||||
static secp256k1_ge_storage secp256k1_ecmult_gen_prec_table[ECMULT_GEN_PREC_N(ECMULT_GEN_PREC_BITS)][ECMULT_GEN_PREC_G(ECMULT_GEN_PREC_BITS)];
|
||||
static secp256k1_ge_storage secp256k1_ecmult_gen_prec_table[COMB_BLOCKS][COMB_POINTS];
|
||||
#else
|
||||
extern const secp256k1_ge_storage secp256k1_ecmult_gen_prec_table[ECMULT_GEN_PREC_N(ECMULT_GEN_PREC_BITS)][ECMULT_GEN_PREC_G(ECMULT_GEN_PREC_BITS)];
|
||||
extern const secp256k1_ge_storage secp256k1_ecmult_gen_prec_table[COMB_BLOCKS][COMB_POINTS];
|
||||
#endif /* defined(EXHAUSTIVE_TEST_ORDER) */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -389,7 +389,7 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
/* Recreate the ecmult{,_gen} tables using the right generator (as selected via EXHAUSTIVE_TEST_ORDER) */
|
||||
secp256k1_ecmult_gen_compute_table(&secp256k1_ecmult_gen_prec_table[0][0], &secp256k1_ge_const_g, ECMULT_GEN_PREC_BITS);
|
||||
secp256k1_ecmult_gen_compute_table(&secp256k1_ecmult_gen_prec_table[0][0], &secp256k1_ge_const_g, COMB_BLOCKS, COMB_TEETH);
|
||||
secp256k1_ecmult_compute_two_tables(secp256k1_pre_g, secp256k1_pre_g_128, WINDOW_G, &secp256k1_ge_const_g);
|
||||
|
||||
while (count--) {
|
||||
|
||||
Reference in New Issue
Block a user