remove 32 bit limbs

2025-11-01 17:57:52 +00:00
parent 77f747f360
commit 93989d07be
17 changed files with 2 additions and 2909 deletions
--- a/README.md
+++ b/README.md
@@ -36,12 +36,9 @@ Implementation details
 * Field operations
  * Optimized implementation of arithmetic modulo the curve's field size (2^256 - 0x1000003D1).
    * Using 5 52-bit limbs
-    * Using 10 26-bit limbs (including hand-optimized assembly for 32-bit ARM, by Wladimir J. van der Laan).
-      * This is an experimental feature that has not received enough scrutiny to satisfy the standard of quality of this library but is made available for testing and review by the community.
 * Scalar operations
  * Optimized implementation without data-dependent branches of arithmetic modulo the curve's order.
    * Using 4 64-bit limbs (relying on __int128 support in the compiler).
-    * Using 8 32-bit limbs.
 * Modular inverses (both field elements and scalars) based on [safegcd](https://gcd.cr.yp.to/index.html) with some modifications, and a variable-time variant (by Peter Dettman).
 * Group operations
  * Point addition formula specifically simplified for the curve equation (y^2 = x^3 + 7).
--- a/libp256k1.a
+++ b/libp256k1.a
--- a/libp256k1.so
+++ b/libp256k1.so
--- a/src/field.h
+++ b/src/field.h
@@ -13,7 +13,7 @@
 * objects, which represent field elements (integers modulo 2^256 - 2^32 - 977).
 *
 * The actual definition of the secp256k1_fe type depends on the chosen field
- * implementation; see the field_5x52.h and field_10x26.h files for details.
+ * implementation; see the field_5x52.h file for details.
 *
 * All secp256k1_fe objects have implicit properties that determine what
 * operations are permitted on it. These are purely a function of what
@@ -39,8 +39,6 @@

 #if defined(SECP256K1_WIDEMUL_INT128)
 #include "field_5x52.h"
-#elif defined(SECP256K1_WIDEMUL_INT64)
-#include "field_10x26.h"
 #else
 #error "Please select wide multiplication implementation"
 #endif
--- a/src/field_10x26.h
+++ b/src/field_10x26.h
@@ -1,57 +0,0 @@
-/***********************************************************************
- * Copyright (c) 2013, 2014 Pieter Wuille                              *
- * Distributed under the MIT software license, see the accompanying    *
- * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
- ***********************************************************************/
-
-#ifndef SECP256K1_FIELD_REPR_H
-#define SECP256K1_FIELD_REPR_H
-
-#include <stdint.h>
-
-/** This field implementation represents the value as 10 uint32_t limbs in base
- *  2^26. */
-typedef struct {
-   /* A field element f represents the sum(i=0..9, f.n[i] << (i*26)) mod p,
-    * where p is the field modulus, 2^256 - 2^32 - 977.
-    *
-    * The individual limbs f.n[i] can exceed 2^26; the field's magnitude roughly
-    * corresponds to how much excess is allowed. The value
-    * sum(i=0..9, f.n[i] << (i*26)) may exceed p, unless the field element is
-    * normalized. */
-    uint32_t n[10];
-    /*
-     * Magnitude m requires:
-     *     n[i] <= 2 * m * (2^26 - 1) for i=0..8
-     *     n[9] <= 2 * m * (2^22 - 1)
-     *
-     * Normalized requires:
-     *     n[i] <= (2^26 - 1) for i=0..8
-     *     sum(i=0..9, n[i] << (i*26)) < p
-     *     (together these imply n[9] <= 2^22 - 1)
-     */
-    SECP256K1_FE_VERIFY_FIELDS
-} secp256k1_fe;
-
-/* Unpacks a constant into a overlapping multi-limbed FE element. */
-#define SECP256K1_FE_CONST_INNER(d7, d6, d5, d4, d3, d2, d1, d0) { \
-    (d0) & 0x3FFFFFFUL, \
-    (((uint32_t)d0) >> 26) | (((uint32_t)(d1) & 0xFFFFFUL) << 6), \
-    (((uint32_t)d1) >> 20) | (((uint32_t)(d2) & 0x3FFFUL) << 12), \
-    (((uint32_t)d2) >> 14) | (((uint32_t)(d3) & 0xFFUL) << 18), \
-    (((uint32_t)d3) >> 8) | (((uint32_t)(d4) & 0x3UL) << 24), \
-    (((uint32_t)d4) >> 2) & 0x3FFFFFFUL, \
-    (((uint32_t)d4) >> 28) | (((uint32_t)(d5) & 0x3FFFFFUL) << 4), \
-    (((uint32_t)d5) >> 22) | (((uint32_t)(d6) & 0xFFFFUL) << 10), \
-    (((uint32_t)d6) >> 16) | (((uint32_t)(d7) & 0x3FFUL) << 16), \
-    (((uint32_t)d7) >> 10) \
-}
-
-typedef struct {
-    uint32_t n[8];
-} secp256k1_fe_storage;
-
-#define SECP256K1_FE_STORAGE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}
-#define SECP256K1_FE_STORAGE_CONST_GET(d) d.n[7], d.n[6], d.n[5], d.n[4],d.n[3], d.n[2], d.n[1], d.n[0]
-
-#endif /* SECP256K1_FIELD_REPR_H */
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
--- a/src/field_impl.h
+++ b/src/field_impl.h
@@ -12,8 +12,6 @@

 #if defined(SECP256K1_WIDEMUL_INT128)
 #include "field_5x52_impl.h"
-#elif defined(SECP256K1_WIDEMUL_INT64)
-#include "field_10x26_impl.h"
 #else
 #error "Please select wide multiplication implementation"
 #endif
--- a/src/modinv32.h
+++ b/src/modinv32.h
@@ -1,43 +0,0 @@
-/***********************************************************************
- * Copyright (c) 2020 Peter Dettman                                    *
- * Distributed under the MIT software license, see the accompanying    *
- * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
- **********************************************************************/
-
-#ifndef SECP256K1_MODINV32_H
-#define SECP256K1_MODINV32_H
-
-#include "util.h"
-
-/* A signed 30-bit limb representation of integers.
- *
- * Its value is sum(v[i] * 2^(30*i), i=0..8). */
-typedef struct {
-    int32_t v[9];
-} secp256k1_modinv32_signed30;
-
-typedef struct {
-    /* The modulus in signed30 notation, must be odd and in [3, 2^256]. */
-    secp256k1_modinv32_signed30 modulus;
-
-    /* modulus^{-1} mod 2^30 */
-    uint32_t modulus_inv30;
-} secp256k1_modinv32_modinfo;
-
-/* Replace x with its modular inverse mod modinfo->modulus. x must be in range [0, modulus).
- * If x is zero, the result will be zero as well. If not, the inverse must exist (i.e., the gcd of
- * x and modulus must be 1). These rules are automatically satisfied if the modulus is prime.
- *
- * On output, all of x's limbs will be in [0, 2^30).
- */
-static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
-
-/* Same as secp256k1_modinv32_var, but constant time in x (not in the modulus). */
-static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
-
-/* Compute the Jacobi symbol for (x | modinfo->modulus). x must be coprime with modulus (and thus
- * cannot be 0, as modulus >= 3). All limbs of x must be non-negative. Returns 0 if the result
- * cannot be computed. */
-static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
-
-#endif /* SECP256K1_MODINV32_H */
--- a/src/modinv32_impl.h
+++ b/src/modinv32_impl.h
@@ -1,725 +0,0 @@
-/***********************************************************************
- * Copyright (c) 2020 Peter Dettman                                    *
- * Distributed under the MIT software license, see the accompanying    *
- * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
- **********************************************************************/
-
-#ifndef SECP256K1_MODINV32_IMPL_H
-#define SECP256K1_MODINV32_IMPL_H
-
-#include "modinv32.h"
-
-#include "util.h"
-
-#include <stdlib.h>
-
-/* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
- * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
- *
- * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
- * implementation for N=30, using 30-bit signed limbs represented as int32_t.
- */
-
-#ifdef VERIFY
-static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}};
-
-/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */
-static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) {
-    const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
-    int64_t c = 0;
-    int i;
-    for (i = 0; i < 8; ++i) {
-        if (i < alen) c += (int64_t)a->v[i] * factor;
-        r->v[i] = (int32_t)c & M30; c >>= 30;
-    }
-    if (8 < alen) c += (int64_t)a->v[8] * factor;
-    VERIFY_CHECK(c == (int32_t)c);
-    r->v[8] = (int32_t)c;
-}
-
-/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
-static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
-    int i;
-    secp256k1_modinv32_signed30 am, bm;
-    secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
-    secp256k1_modinv32_mul_30(&bm, b, 9, factor);
-    for (i = 0; i < 8; ++i) {
-        /* Verify that all but the top limb of a and b are normalized. */
-        VERIFY_CHECK(am.v[i] >> 30 == 0);
-        VERIFY_CHECK(bm.v[i] >> 30 == 0);
-    }
-    for (i = 8; i >= 0; --i) {
-        if (am.v[i] < bm.v[i]) return -1;
-        if (am.v[i] > bm.v[i]) return 1;
-    }
-    return 0;
-}
-#endif
-
-/* Take as input a signed30 number in range (-2*modulus,modulus), and add a multiple of the modulus
- * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
- * process. The input must have limbs in range (-2^30,2^30). The output will have limbs in range
- * [0,2^30). */
-static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int32_t sign, const secp256k1_modinv32_modinfo *modinfo) {
-    const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
-    int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4],
-            r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8];
-    volatile int32_t cond_add, cond_negate;
-
-#ifdef VERIFY
-    /* Verify that all limbs are in range (-2^30,2^30). */
-    int i;
-    for (i = 0; i < 9; ++i) {
-        VERIFY_CHECK(r->v[i] >= -M30);
-        VERIFY_CHECK(r->v[i] <= M30);
-    }
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
-#endif
-
-    /* In a first step, add the modulus if the input is negative, and then negate if requested.
-     * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
-     * limbs are in range (-2^30,2^30), this cannot overflow an int32_t. Note that the right
-     * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
-     * indeed the behavior of the right shift operator). */
-    cond_add = r8 >> 31;
-    r0 += modinfo->modulus.v[0] & cond_add;
-    r1 += modinfo->modulus.v[1] & cond_add;
-    r2 += modinfo->modulus.v[2] & cond_add;
-    r3 += modinfo->modulus.v[3] & cond_add;
-    r4 += modinfo->modulus.v[4] & cond_add;
-    r5 += modinfo->modulus.v[5] & cond_add;
-    r6 += modinfo->modulus.v[6] & cond_add;
-    r7 += modinfo->modulus.v[7] & cond_add;
-    r8 += modinfo->modulus.v[8] & cond_add;
-    cond_negate = sign >> 31;
-    r0 = (r0 ^ cond_negate) - cond_negate;
-    r1 = (r1 ^ cond_negate) - cond_negate;
-    r2 = (r2 ^ cond_negate) - cond_negate;
-    r3 = (r3 ^ cond_negate) - cond_negate;
-    r4 = (r4 ^ cond_negate) - cond_negate;
-    r5 = (r5 ^ cond_negate) - cond_negate;
-    r6 = (r6 ^ cond_negate) - cond_negate;
-    r7 = (r7 ^ cond_negate) - cond_negate;
-    r8 = (r8 ^ cond_negate) - cond_negate;
-    /* Propagate the top bits, to bring limbs back to range (-2^30,2^30). */
-    r1 += r0 >> 30; r0 &= M30;
-    r2 += r1 >> 30; r1 &= M30;
-    r3 += r2 >> 30; r2 &= M30;
-    r4 += r3 >> 30; r3 &= M30;
-    r5 += r4 >> 30; r4 &= M30;
-    r6 += r5 >> 30; r5 &= M30;
-    r7 += r6 >> 30; r6 &= M30;
-    r8 += r7 >> 30; r7 &= M30;
-
-    /* In a second step add the modulus again if the result is still negative, bringing r to range
-     * [0,modulus). */
-    cond_add = r8 >> 31;
-    r0 += modinfo->modulus.v[0] & cond_add;
-    r1 += modinfo->modulus.v[1] & cond_add;
-    r2 += modinfo->modulus.v[2] & cond_add;
-    r3 += modinfo->modulus.v[3] & cond_add;
-    r4 += modinfo->modulus.v[4] & cond_add;
-    r5 += modinfo->modulus.v[5] & cond_add;
-    r6 += modinfo->modulus.v[6] & cond_add;
-    r7 += modinfo->modulus.v[7] & cond_add;
-    r8 += modinfo->modulus.v[8] & cond_add;
-    /* And propagate again. */
-    r1 += r0 >> 30; r0 &= M30;
-    r2 += r1 >> 30; r1 &= M30;
-    r3 += r2 >> 30; r2 &= M30;
-    r4 += r3 >> 30; r3 &= M30;
-    r5 += r4 >> 30; r4 &= M30;
-    r6 += r5 >> 30; r5 &= M30;
-    r7 += r6 >> 30; r6 &= M30;
-    r8 += r7 >> 30; r7 &= M30;
-
-    r->v[0] = r0;
-    r->v[1] = r1;
-    r->v[2] = r2;
-    r->v[3] = r3;
-    r->v[4] = r4;
-    r->v[5] = r5;
-    r->v[6] = r6;
-    r->v[7] = r7;
-    r->v[8] = r8;
-
-    VERIFY_CHECK(r0 >> 30 == 0);
-    VERIFY_CHECK(r1 >> 30 == 0);
-    VERIFY_CHECK(r2 >> 30 == 0);
-    VERIFY_CHECK(r3 >> 30 == 0);
-    VERIFY_CHECK(r4 >> 30 == 0);
-    VERIFY_CHECK(r5 >> 30 == 0);
-    VERIFY_CHECK(r6 >> 30 == 0);
-    VERIFY_CHECK(r7 >> 30 == 0);
-    VERIFY_CHECK(r8 >> 30 == 0);
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
-}
-
-/* Data type for transition matrices (see section 3 of explanation).
- *
- * t = [ u  v ]
- *     [ q  r ]
- */
-typedef struct {
-    int32_t u, v, q, r;
-} secp256k1_modinv32_trans2x2;
-
-/* Compute the transition matrix and zeta for 30 divsteps.
- *
- * Input:  zeta: initial zeta
- *         f0:   bottom limb of initial f
- *         g0:   bottom limb of initial g
- * Output: t: transition matrix
- * Return: final zeta
- *
- * Implements the divsteps_n_matrix function from the explanation.
- */
-static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
-    /* u,v,q,r are the elements of the transformation matrix being built up,
-     * starting with the identity matrix. Semantically they are signed integers
-     * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
-     * permits left shifting (which is UB for negative numbers). The range
-     * being inside [-2^31,2^31) means that casting to signed works correctly.
-     */
-    uint32_t u = 1, v = 0, q = 0, r = 1;
-    volatile uint32_t c1, c2;
-    uint32_t mask1, mask2, f = f0, g = g0, x, y, z;
-    int i;
-
-    for (i = 0; i < 30; ++i) {
-        VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
-        VERIFY_CHECK((u * f0 + v * g0) == f << i);
-        VERIFY_CHECK((q * f0 + r * g0) == g << i);
-        /* Compute conditional masks for (zeta < 0) and for (g & 1). */
-        c1 = zeta >> 31;
-        mask1 = c1;
-        c2 = g & 1;
-        mask2 = -c2;
-        /* Compute x,y,z, conditionally negated versions of f,u,v. */
-        x = (f ^ mask1) - mask1;
-        y = (u ^ mask1) - mask1;
-        z = (v ^ mask1) - mask1;
-        /* Conditionally add x,y,z to g,q,r. */
-        g += x & mask2;
-        q += y & mask2;
-        r += z & mask2;
-        /* In what follows, mask1 is a condition mask for (zeta < 0) and (g & 1). */
-        mask1 &= mask2;
-        /* Conditionally change zeta into -zeta-2 or zeta-1. */
-        zeta = (zeta ^ mask1) - 1;
-        /* Conditionally add g,q,r to f,u,v. */
-        f += g & mask1;
-        u += q & mask1;
-        v += r & mask1;
-        /* Shifts */
-        g >>= 1;
-        u <<= 1;
-        v <<= 1;
-        /* Bounds on zeta that follow from the bounds on iteration count (max 20*30 divsteps). */
-        VERIFY_CHECK(zeta >= -601 && zeta <= 601);
-    }
-    /* Return data in t and return value. */
-    t->u = (int32_t)u;
-    t->v = (int32_t)v;
-    t->q = (int32_t)q;
-    t->r = (int32_t)r;
-    /* The determinant of t must be a power of two. This guarantees that multiplication with t
-     * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
-     * will be divided out again). As each divstep's individual matrix has determinant 2, the
-     * aggregate of 30 of them will have determinant 2^30. */
-    VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
-    return zeta;
-}
-
-/* secp256k1_modinv32_inv256[i] = -(2*i+1)^-1 (mod 256) */
-static const uint8_t secp256k1_modinv32_inv256[128] = {
-    0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
-    0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
-    0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
-    0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
-    0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
-    0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
-    0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
-    0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
-    0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
-    0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
-    0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
-};
-
-/* Compute the transition matrix and eta for 30 divsteps (variable time).
- *
- * Input:  eta: initial eta
- *         f0:  bottom limb of initial f
- *         g0:  bottom limb of initial g
- * Output: t: transition matrix
- * Return: final eta
- *
- * Implements the divsteps_n_matrix_var function from the explanation.
- */
-static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
-    /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
-    uint32_t u = 1, v = 0, q = 0, r = 1;
-    uint32_t f = f0, g = g0, m;
-    uint16_t w;
-    int i = 30, limit, zeros;
-
-    for (;;) {
-        /* Use a sentinel bit to count zeros only up to i. */
-        zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
-        /* Perform zeros divsteps at once; they all just divide g by two. */
-        g >>= zeros;
-        u <<= zeros;
-        v <<= zeros;
-        eta -= zeros;
-        i -= zeros;
-         /* We're done once we've done 30 divsteps. */
-        if (i == 0) break;
-        VERIFY_CHECK((f & 1) == 1);
-        VERIFY_CHECK((g & 1) == 1);
-        VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
-        VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
-        /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
-        VERIFY_CHECK(eta >= -751 && eta <= 751);
-        /* If eta is negative, negate it and replace f,g with g,-f. */
-        if (eta < 0) {
-            uint32_t tmp;
-            eta = -eta;
-            tmp = f; f = g; g = -tmp;
-            tmp = u; u = q; q = -tmp;
-            tmp = v; v = r; r = -tmp;
-        }
-        /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
-         * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
-         * can be done as its sign will flip once that happens. */
-        limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
-        /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
-        VERIFY_CHECK(limit > 0 && limit <= 30);
-        m = (UINT32_MAX >> (32 - limit)) & 255U;
-        /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
-        w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
-        /* Do so. */
-        g += f * w;
-        q += u * w;
-        r += v * w;
-        VERIFY_CHECK((g & m) == 0);
-    }
-    /* Return data in t and return value. */
-    t->u = (int32_t)u;
-    t->v = (int32_t)v;
-    t->q = (int32_t)q;
-    t->r = (int32_t)r;
-    /* The determinant of t must be a power of two. This guarantees that multiplication with t
-     * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
-     * will be divided out again). As each divstep's individual matrix has determinant 2, the
-     * aggregate of 30 of them will have determinant 2^30. */
-    VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
-    return eta;
-}
-
-/* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track
- * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because
- * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
- *
- * Input:        eta: initial eta
- *               f0:  bottom limb of initial f
- *               g0:  bottom limb of initial g
- * Output:       t: transition matrix
- * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign
- *               by applying the returned transformation matrix to it. The other bits of *jacp may
- *               change, but are meaningless.
- * Return: final eta
- */
-static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) {
-    /* Transformation matrix. */
-    uint32_t u = 1, v = 0, q = 0, r = 1;
-    uint32_t f = f0, g = g0, m;
-    uint16_t w;
-    int i = 30, limit, zeros;
-    int jac = *jacp;
-
-    for (;;) {
-        /* Use a sentinel bit to count zeros only up to i. */
-        zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
-        /* Perform zeros divsteps at once; they all just divide g by two. */
-        g >>= zeros;
-        u <<= zeros;
-        v <<= zeros;
-        eta -= zeros;
-        i -= zeros;
-        /* Update the bottom bit of jac: when dividing g by an odd power of 2,
-         * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
-        jac ^= (zeros & ((f >> 1) ^ (f >> 2)));
-        /* We're done once we've done 30 posdivsteps. */
-        if (i == 0) break;
-        VERIFY_CHECK((f & 1) == 1);
-        VERIFY_CHECK((g & 1) == 1);
-        VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
-        VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
-        /* If eta is negative, negate it and replace f,g with g,f. */
-        if (eta < 0) {
-            uint32_t tmp;
-            eta = -eta;
-            /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
-             * if both f and g are 3 mod 4. */
-            jac ^= ((f & g) >> 1);
-            tmp = f; f = g; g = tmp;
-            tmp = u; u = q; q = tmp;
-            tmp = v; v = r; r = tmp;
-        }
-        /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
-         * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
-         * can be done as its sign will flip once that happens. */
-        limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
-        /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
-        VERIFY_CHECK(limit > 0 && limit <= 30);
-        m = (UINT32_MAX >> (32 - limit)) & 255U;
-        /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
-        w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
-        /* Do so. */
-        g += f * w;
-        q += u * w;
-        r += v * w;
-        VERIFY_CHECK((g & m) == 0);
-    }
-    /* Return data in t and return value. */
-    t->u = (int32_t)u;
-    t->v = (int32_t)v;
-    t->q = (int32_t)q;
-    t->r = (int32_t)r;
-    /* The determinant of t must be a power of two. This guarantees that multiplication with t
-     * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
-     * will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
-     * the aggregate of 30 of them will have determinant 2^30 or -2^30. */
-    VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 ||
-                 (int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30));
-    *jacp = jac;
-    return eta;
-}
-
-/* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
- *
- * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
- * (-2^30,2^30).
- *
- * This implements the update_de function from the explanation.
- */
-static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp256k1_modinv32_signed30 *e, const secp256k1_modinv32_trans2x2 *t, const secp256k1_modinv32_modinfo* modinfo) {
-    const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
-    const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
-    int32_t di, ei, md, me, sd, se;
-    int64_t cd, ce;
-    int i;
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0);  /* d <    modulus */
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0);  /* e <    modulus */
-    VERIFY_CHECK(labs(u) <= (M30 + 1 - labs(v))); /* |u|+|v| <= 2^30 */
-    VERIFY_CHECK(labs(q) <= (M30 + 1 - labs(r))); /* |q|+|r| <= 2^30 */
-
-    /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
-    sd = d->v[8] >> 31;
-    se = e->v[8] >> 31;
-    md = (u & sd) + (v & se);
-    me = (q & sd) + (r & se);
-    /* Begin computing t*[d,e]. */
-    di = d->v[0];
-    ei = e->v[0];
-    cd = (int64_t)u * di + (int64_t)v * ei;
-    ce = (int64_t)q * di + (int64_t)r * ei;
-    /* Correct md,me so that t*[d,e]+modulus*[md,me] has 30 zero bottom bits. */
-    md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
-    me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
-    /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
-    cd += (int64_t)modinfo->modulus.v[0] * md;
-    ce += (int64_t)modinfo->modulus.v[0] * me;
-    /* Verify that the low 30 bits of the computation are indeed zero, and then throw them away. */
-    VERIFY_CHECK(((int32_t)cd & M30) == 0); cd >>= 30;
-    VERIFY_CHECK(((int32_t)ce & M30) == 0); ce >>= 30;
-    /* Now iteratively compute limb i=1..8 of t*[d,e]+modulus*[md,me], and store them in output
-     * limb i-1 (shifting down by 30 bits). */
-    for (i = 1; i < 9; ++i) {
-        di = d->v[i];
-        ei = e->v[i];
-        cd += (int64_t)u * di + (int64_t)v * ei;
-        ce += (int64_t)q * di + (int64_t)r * ei;
-        cd += (int64_t)modinfo->modulus.v[i] * md;
-        ce += (int64_t)modinfo->modulus.v[i] * me;
-        d->v[i - 1] = (int32_t)cd & M30; cd >>= 30;
-        e->v[i - 1] = (int32_t)ce & M30; ce >>= 30;
-    }
-    /* What remains is limb 9 of t*[d,e]+modulus*[md,me]; store it as output limb 8. */
-    d->v[8] = (int32_t)cd;
-    e->v[8] = (int32_t)ce;
-
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0);  /* d <    modulus */
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0);  /* e <    modulus */
-}
-
-/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
- *
- * This implements the update_fg function from the explanation.
- */
-static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t) {
-    const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
-    const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
-    int32_t fi, gi;
-    int64_t cf, cg;
-    int i;
-    /* Start computing t*[f,g]. */
-    fi = f->v[0];
-    gi = g->v[0];
-    cf = (int64_t)u * fi + (int64_t)v * gi;
-    cg = (int64_t)q * fi + (int64_t)r * gi;
-    /* Verify that the bottom 30 bits of the result are zero, and then throw them away. */
-    VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
-    VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
-    /* Now iteratively compute limb i=1..8 of t*[f,g], and store them in output limb i-1 (shifting
-     * down by 30 bits). */
-    for (i = 1; i < 9; ++i) {
-        fi = f->v[i];
-        gi = g->v[i];
-        cf += (int64_t)u * fi + (int64_t)v * gi;
-        cg += (int64_t)q * fi + (int64_t)r * gi;
-        f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
-        g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
-    }
-    /* What remains is limb 9 of t*[f,g]; store it as output limb 8. */
-    f->v[8] = (int32_t)cf;
-    g->v[8] = (int32_t)cg;
-}
-
-/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
- *
- * Version that operates on a variable number of limbs in f and g.
- *
- * This implements the update_fg function from the explanation in modinv64_impl.h.
- */
-static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t) {
-    const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
-    const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
-    int32_t fi, gi;
-    int64_t cf, cg;
-    int i;
-    VERIFY_CHECK(len > 0);
-    /* Start computing t*[f,g]. */
-    fi = f->v[0];
-    gi = g->v[0];
-    cf = (int64_t)u * fi + (int64_t)v * gi;
-    cg = (int64_t)q * fi + (int64_t)r * gi;
-    /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
-    VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
-    VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
-    /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
-     * down by 30 bits). */
-    for (i = 1; i < len; ++i) {
-        fi = f->v[i];
-        gi = g->v[i];
-        cf += (int64_t)u * fi + (int64_t)v * gi;
-        cg += (int64_t)q * fi + (int64_t)r * gi;
-        f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
-        g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
-    }
-    /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
-    f->v[len - 1] = (int32_t)cf;
-    g->v[len - 1] = (int32_t)cg;
-}
-
-/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
-static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
-    /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */
-    secp256k1_modinv32_signed30 d = {{0}};
-    secp256k1_modinv32_signed30 e = {{1}};
-    secp256k1_modinv32_signed30 f = modinfo->modulus;
-    secp256k1_modinv32_signed30 g = *x;
-    int i;
-    int32_t zeta = -1; /* zeta = -(delta+1/2); delta is initially 1/2. */
-
-    /* Do 20 iterations of 30 divsteps each = 600 divsteps. 590 suffices for 256-bit inputs. */
-    for (i = 0; i < 20; ++i) {
-        /* Compute transition matrix and new zeta after 30 divsteps. */
-        secp256k1_modinv32_trans2x2 t;
-        zeta = secp256k1_modinv32_divsteps_30(zeta, f.v[0], g.v[0], &t);
-        /* Update d,e using that transition matrix. */
-        secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
-        /* Update f,g using that transition matrix. */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0);  /* g <  modulus */
-
-        secp256k1_modinv32_update_fg_30(&f, &g, &t);
-
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0);  /* g <  modulus */
-    }
-
-    /* At this point sufficient iterations have been performed that g must have reached 0
-     * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
-     * values i.e. +/- 1, and d now contains +/- the modular inverse. */
-
-    /* g == 0 */
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0);
-    /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
-                 secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
-                 (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
-                  secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
-                  secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0));
-
-    /* Optionally negate d, normalize to [0,modulus), and return it. */
-    secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo);
-    *x = d;
-}
-
-/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
-static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
-    /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
-    secp256k1_modinv32_signed30 d = {{0, 0, 0, 0, 0, 0, 0, 0, 0}};
-    secp256k1_modinv32_signed30 e = {{1, 0, 0, 0, 0, 0, 0, 0, 0}};
-    secp256k1_modinv32_signed30 f = modinfo->modulus;
-    secp256k1_modinv32_signed30 g = *x;
-#ifdef VERIFY
-    int i = 0;
-#endif
-    int j, len = 9;
-    int32_t eta = -1; /* eta = -delta; delta is initially 1 (faster for the variable-time code) */
-    int32_t cond, fn, gn;
-
-    /* Do iterations of 30 divsteps each until g=0. */
-    while (1) {
-        /* Compute transition matrix and new eta after 30 divsteps. */
-        secp256k1_modinv32_trans2x2 t;
-        eta = secp256k1_modinv32_divsteps_30_var(eta, f.v[0], g.v[0], &t);
-        /* Update d,e using that transition matrix. */
-        secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
-        /* Update f,g using that transition matrix. */
-
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g <  modulus */
-
-        secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t);
-        /* If the bottom limb of g is 0, there is a chance g=0. */
-        if (g.v[0] == 0) {
-            cond = 0;
-            /* Check if all other limbs are also 0. */
-            for (j = 1; j < len; ++j) {
-                cond |= g.v[j];
-            }
-            /* If so, we're done. */
-            if (cond == 0) break;
-        }
-
-        /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
-        fn = f.v[len - 1];
-        gn = g.v[len - 1];
-        cond = ((int32_t)len - 2) >> 31;
-        cond |= fn ^ (fn >> 31);
-        cond |= gn ^ (gn >> 31);
-        /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
-        if (cond == 0) {
-            f.v[len - 2] |= (uint32_t)fn << 30;
-            g.v[len - 2] |= (uint32_t)gn << 30;
-            --len;
-        }
-
-        VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g <  modulus */
-    }
-
-    /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
-     * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
-
-    /* g == 0 */
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0);
-    /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */
-    VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
-                 secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
-                 (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
-                  secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
-                  secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0));
-
-    /* Optionally negate d, normalize to [0,modulus), and return it. */
-    secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo);
-    *x = d;
-}
-
-/* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1.
- * In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */
-#ifdef VERIFY
-#define JACOBI32_ITERATIONS 25
-#else
-#define JACOBI32_ITERATIONS 50
-#endif
-
-/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
-static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
-    /* Start with f=modulus, g=x, eta=-1. */
-    secp256k1_modinv32_signed30 f = modinfo->modulus;
-    secp256k1_modinv32_signed30 g = *x;
-    int j, len = 9;
-    int32_t eta = -1; /* eta = -delta; delta is initially 1 */
-    int32_t cond, fn, gn;
-    int jac = 0;
-    int count;
-
-    /* The input limbs must all be non-negative. */
-    VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0);
-
-    /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we
-     * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or
-     * time out). */
-    VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8]) != 0);
-
-    for (count = 0; count < JACOBI32_ITERATIONS; ++count) {
-        /* Compute transition matrix and new eta after 30 posdivsteps. */
-        secp256k1_modinv32_trans2x2 t;
-        eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac);
-        /* Update f,g using that transition matrix. */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g < modulus */
-
-        secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t);
-        /* If the bottom limb of f is 1, there is a chance that f=1. */
-        if (f.v[0] == 1) {
-            cond = 0;
-            /* Check if the other limbs are also 0. */
-            for (j = 1; j < len; ++j) {
-                cond |= f.v[j];
-            }
-            /* If so, we're done. If f=1, the Jacobi symbol (g | f)=1. */
-            if (cond == 0) return 1 - 2*(jac & 1);
-        }
-
-        /* Determine if len>1 and limb (len-1) of both f and g is 0. */
-        fn = f.v[len - 1];
-        gn = g.v[len - 1];
-        cond = ((int32_t)len - 2) >> 31;
-        cond |= fn;
-        cond |= gn;
-        /* If so, reduce length. */
-        if (cond == 0) --len;
-
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
-        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g < modulus */
-    }
-
-    /* The loop failed to converge to f=g after 1500 iterations. Return 0, indicating unknown result. */
-    return 0;
-}
-
-#endif /* SECP256K1_MODINV32_IMPL_H */
--- a/src/precomputed_ecmult.o
+++ b/src/precomputed_ecmult.o
--- a/src/precomputed_ecmult_gen.o
+++ b/src/precomputed_ecmult_gen.o
--- a/src/scalar.h
+++ b/src/scalar.h
@@ -13,8 +13,6 @@
 #include "scalar_low.h"
 #elif defined(SECP256K1_WIDEMUL_INT128)
 #include "scalar_4x64.h"
-#elif defined(SECP256K1_WIDEMUL_INT64)
-#include "scalar_8x32.h"
 #else
 #error "Please select wide multiplication implementation"
 #endif
--- a/src/scalar_8x32.h
+++ b/src/scalar_8x32.h
@@ -1,19 +0,0 @@
-/***********************************************************************
- * Copyright (c) 2014 Pieter Wuille                                    *
- * Distributed under the MIT software license, see the accompanying    *
- * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
- ***********************************************************************/
-
-#ifndef SECP256K1_SCALAR_REPR_H
-#define SECP256K1_SCALAR_REPR_H
-
-#include <stdint.h>
-
-/** A scalar modulo the group order of the secp256k1 curve. */
-typedef struct {
-    uint32_t d[8];
-} secp256k1_scalar;
-
-#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{(d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7)}}
-
-#endif /* SECP256K1_SCALAR_REPR_H */
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -1,816 +0,0 @@
-/***********************************************************************
- * Copyright (c) 2014 Pieter Wuille                                    *
- * Distributed under the MIT software license, see the accompanying    *
- * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
- ***********************************************************************/
-
-#ifndef SECP256K1_SCALAR_REPR_IMPL_H
-#define SECP256K1_SCALAR_REPR_IMPL_H
-
-#include "checkmem.h"
-#include "modinv32_impl.h"
-#include "util.h"
-
-/* Limbs of the secp256k1 order. */
-#define SECP256K1_N_0 ((uint32_t)0xD0364141UL)
-#define SECP256K1_N_1 ((uint32_t)0xBFD25E8CUL)
-#define SECP256K1_N_2 ((uint32_t)0xAF48A03BUL)
-#define SECP256K1_N_3 ((uint32_t)0xBAAEDCE6UL)
-#define SECP256K1_N_4 ((uint32_t)0xFFFFFFFEUL)
-#define SECP256K1_N_5 ((uint32_t)0xFFFFFFFFUL)
-#define SECP256K1_N_6 ((uint32_t)0xFFFFFFFFUL)
-#define SECP256K1_N_7 ((uint32_t)0xFFFFFFFFUL)
-
-/* Limbs of 2^256 minus the secp256k1 order. */
-#define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1)
-#define SECP256K1_N_C_1 (~SECP256K1_N_1)
-#define SECP256K1_N_C_2 (~SECP256K1_N_2)
-#define SECP256K1_N_C_3 (~SECP256K1_N_3)
-#define SECP256K1_N_C_4 (1)
-
-/* Limbs of half the secp256k1 order. */
-#define SECP256K1_N_H_0 ((uint32_t)0x681B20A0UL)
-#define SECP256K1_N_H_1 ((uint32_t)0xDFE92F46UL)
-#define SECP256K1_N_H_2 ((uint32_t)0x57A4501DUL)
-#define SECP256K1_N_H_3 ((uint32_t)0x5D576E73UL)
-#define SECP256K1_N_H_4 ((uint32_t)0xFFFFFFFFUL)
-#define SECP256K1_N_H_5 ((uint32_t)0xFFFFFFFFUL)
-#define SECP256K1_N_H_6 ((uint32_t)0xFFFFFFFFUL)
-#define SECP256K1_N_H_7 ((uint32_t)0x7FFFFFFFUL)
-
-SECP256K1_INLINE static void secp256k1_scalar_set_int(secp256k1_scalar *r, unsigned int v) {
-    r->d[0] = v;
-    r->d[1] = 0;
-    r->d[2] = 0;
-    r->d[3] = 0;
-    r->d[4] = 0;
-    r->d[5] = 0;
-    r->d[6] = 0;
-    r->d[7] = 0;
-
-    SECP256K1_SCALAR_VERIFY(r);
-}
-
-SECP256K1_INLINE static uint32_t secp256k1_scalar_get_bits_limb32(const secp256k1_scalar *a, unsigned int offset, unsigned int count) {
-    SECP256K1_SCALAR_VERIFY(a);
-    VERIFY_CHECK(count > 0 && count <= 32);
-    VERIFY_CHECK((offset + count - 1) >> 5 == offset >> 5);
-
-    return (a->d[offset >> 5] >> (offset & 0x1F)) & (0xFFFFFFFF >> (32 - count));
-}
-
-SECP256K1_INLINE static uint32_t secp256k1_scalar_get_bits_var(const secp256k1_scalar *a, unsigned int offset, unsigned int count) {
-    SECP256K1_SCALAR_VERIFY(a);
-    VERIFY_CHECK(count > 0 && count <= 32);
-    VERIFY_CHECK(offset + count <= 256);
-
-    if ((offset + count - 1) >> 5 == offset >> 5) {
-        return secp256k1_scalar_get_bits_limb32(a, offset, count);
-    } else {
-        VERIFY_CHECK((offset >> 5) + 1 < 8);
-        return ((a->d[offset >> 5] >> (offset & 0x1F)) | (a->d[(offset >> 5) + 1] << (32 - (offset & 0x1F)))) & (0xFFFFFFFF >> (32 - count));
-    }
-}
-
-SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar *a) {
-    int yes = 0;
-    int no = 0;
-    no |= (a->d[7] < SECP256K1_N_7); /* No need for a > check. */
-    no |= (a->d[6] < SECP256K1_N_6); /* No need for a > check. */
-    no |= (a->d[5] < SECP256K1_N_5); /* No need for a > check. */
-    no |= (a->d[4] < SECP256K1_N_4);
-    yes |= (a->d[4] > SECP256K1_N_4) & ~no;
-    no |= (a->d[3] < SECP256K1_N_3) & ~yes;
-    yes |= (a->d[3] > SECP256K1_N_3) & ~no;
-    no |= (a->d[2] < SECP256K1_N_2) & ~yes;
-    yes |= (a->d[2] > SECP256K1_N_2) & ~no;
-    no |= (a->d[1] < SECP256K1_N_1) & ~yes;
-    yes |= (a->d[1] > SECP256K1_N_1) & ~no;
-    yes |= (a->d[0] >= SECP256K1_N_0) & ~no;
-    return yes;
-}
-
-SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar *r, uint32_t overflow) {
-    uint64_t t;
-    VERIFY_CHECK(overflow <= 1);
-
-    t = (uint64_t)r->d[0] + overflow * SECP256K1_N_C_0;
-    r->d[0] = t & 0xFFFFFFFFUL; t >>= 32;
-    t += (uint64_t)r->d[1] + overflow * SECP256K1_N_C_1;
-    r->d[1] = t & 0xFFFFFFFFUL; t >>= 32;
-    t += (uint64_t)r->d[2] + overflow * SECP256K1_N_C_2;
-    r->d[2] = t & 0xFFFFFFFFUL; t >>= 32;
-    t += (uint64_t)r->d[3] + overflow * SECP256K1_N_C_3;
-    r->d[3] = t & 0xFFFFFFFFUL; t >>= 32;
-    t += (uint64_t)r->d[4] + overflow * SECP256K1_N_C_4;
-    r->d[4] = t & 0xFFFFFFFFUL; t >>= 32;
-    t += (uint64_t)r->d[5];
-    r->d[5] = t & 0xFFFFFFFFUL; t >>= 32;
-    t += (uint64_t)r->d[6];
-    r->d[6] = t & 0xFFFFFFFFUL; t >>= 32;
-    t += (uint64_t)r->d[7];
-    r->d[7] = t & 0xFFFFFFFFUL;
-
-    SECP256K1_SCALAR_VERIFY(r);
-    return overflow;
-}
-
-static int secp256k1_scalar_add(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
-    int overflow;
-    uint64_t t = (uint64_t)a->d[0] + b->d[0];
-    SECP256K1_SCALAR_VERIFY(a);
-    SECP256K1_SCALAR_VERIFY(b);
-
-    r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)a->d[1] + b->d[1];
-    r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)a->d[2] + b->d[2];
-    r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)a->d[3] + b->d[3];
-    r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)a->d[4] + b->d[4];
-    r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)a->d[5] + b->d[5];
-    r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)a->d[6] + b->d[6];
-    r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)a->d[7] + b->d[7];
-    r->d[7] = t & 0xFFFFFFFFULL; t >>= 32;
-    overflow = t + secp256k1_scalar_check_overflow(r);
-    VERIFY_CHECK(overflow == 0 || overflow == 1);
-    secp256k1_scalar_reduce(r, overflow);
-
-    SECP256K1_SCALAR_VERIFY(r);
-    return overflow;
-}
-
-static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int flag) {
-    uint64_t t;
-    volatile int vflag = flag;
-    SECP256K1_SCALAR_VERIFY(r);
-    VERIFY_CHECK(bit < 256);
-
-    bit += ((uint32_t) vflag - 1) & 0x100;  /* forcing (bit >> 5) > 7 makes this a noop */
-    t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << (bit & 0x1F));
-    r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)r->d[1] + (((uint32_t)((bit >> 5) == 1)) << (bit & 0x1F));
-    r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)r->d[2] + (((uint32_t)((bit >> 5) == 2)) << (bit & 0x1F));
-    r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)r->d[3] + (((uint32_t)((bit >> 5) == 3)) << (bit & 0x1F));
-    r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)r->d[4] + (((uint32_t)((bit >> 5) == 4)) << (bit & 0x1F));
-    r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)r->d[5] + (((uint32_t)((bit >> 5) == 5)) << (bit & 0x1F));
-    r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)r->d[6] + (((uint32_t)((bit >> 5) == 6)) << (bit & 0x1F));
-    r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
-    t += (uint64_t)r->d[7] + (((uint32_t)((bit >> 5) == 7)) << (bit & 0x1F));
-    r->d[7] = t & 0xFFFFFFFFULL;
-
-    SECP256K1_SCALAR_VERIFY(r);
-    VERIFY_CHECK((t >> 32) == 0);
-}
-
-static void secp256k1_scalar_set_b32(secp256k1_scalar *r, const unsigned char *b32, int *overflow) {
-    int over;
-    r->d[0] = secp256k1_read_be32(&b32[28]);
-    r->d[1] = secp256k1_read_be32(&b32[24]);
-    r->d[2] = secp256k1_read_be32(&b32[20]);
-    r->d[3] = secp256k1_read_be32(&b32[16]);
-    r->d[4] = secp256k1_read_be32(&b32[12]);
-    r->d[5] = secp256k1_read_be32(&b32[8]);
-    r->d[6] = secp256k1_read_be32(&b32[4]);
-    r->d[7] = secp256k1_read_be32(&b32[0]);
-    over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r));
-    if (overflow) {
-        *overflow = over;
-    }
-
-    SECP256K1_SCALAR_VERIFY(r);
-}
-
-static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar* a) {
-    SECP256K1_SCALAR_VERIFY(a);
-
-    secp256k1_write_be32(&bin[0], a->d[7]);
-    secp256k1_write_be32(&bin[4], a->d[6]);
-    secp256k1_write_be32(&bin[8], a->d[5]);
-    secp256k1_write_be32(&bin[12], a->d[4]);
-    secp256k1_write_be32(&bin[16], a->d[3]);
-    secp256k1_write_be32(&bin[20], a->d[2]);
-    secp256k1_write_be32(&bin[24], a->d[1]);
-    secp256k1_write_be32(&bin[28], a->d[0]);
-}
-
-SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar *a) {
-    SECP256K1_SCALAR_VERIFY(a);
-
-    return (a->d[0] | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0;
-}
-
-static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a) {
-    uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(a) == 0);
-    uint64_t t = (uint64_t)(~a->d[0]) + SECP256K1_N_0 + 1;
-    SECP256K1_SCALAR_VERIFY(a);
-
-    r->d[0] = t & nonzero; t >>= 32;
-    t += (uint64_t)(~a->d[1]) + SECP256K1_N_1;
-    r->d[1] = t & nonzero; t >>= 32;
-    t += (uint64_t)(~a->d[2]) + SECP256K1_N_2;
-    r->d[2] = t & nonzero; t >>= 32;
-    t += (uint64_t)(~a->d[3]) + SECP256K1_N_3;
-    r->d[3] = t & nonzero; t >>= 32;
-    t += (uint64_t)(~a->d[4]) + SECP256K1_N_4;
-    r->d[4] = t & nonzero; t >>= 32;
-    t += (uint64_t)(~a->d[5]) + SECP256K1_N_5;
-    r->d[5] = t & nonzero; t >>= 32;
-    t += (uint64_t)(~a->d[6]) + SECP256K1_N_6;
-    r->d[6] = t & nonzero; t >>= 32;
-    t += (uint64_t)(~a->d[7]) + SECP256K1_N_7;
-    r->d[7] = t & nonzero;
-
-    SECP256K1_SCALAR_VERIFY(r);
-}
-
-static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) {
-    /* Writing `/` for field division and `//` for integer division, we compute
-     *
-     *   a/2 = (a - (a&1))/2 + (a&1)/2
-     *       = (a >> 1) + (a&1 ?    1/2 : 0)
-     *       = (a >> 1) + (a&1 ? n//2+1 : 0),
-     *
-     * where n is the group order and in the last equality we have used 1/2 = n//2+1 (mod n).
-     * For n//2, we have the constants SECP256K1_N_H_0, ...
-     *
-     * This sum does not overflow. The most extreme case is a = -2, the largest odd scalar. Here:
-     * - the left summand is:  a >> 1 = (a - a&1)/2 = (n-2-1)//2           = (n-3)//2
-     * - the right summand is: a&1 ? n//2+1 : 0 = n//2+1 = (n-1)//2 + 2//2 = (n+1)//2
-     * Together they sum to (n-3)//2 + (n+1)//2 = (2n-2)//2 = n - 1, which is less than n.
-     */
-    uint32_t mask = -(uint32_t)(a->d[0] & 1U);
-    uint64_t t = (uint32_t)((a->d[0] >> 1) | (a->d[1] << 31));
-    SECP256K1_SCALAR_VERIFY(a);
-
-    t += (SECP256K1_N_H_0 + 1U) & mask;
-    r->d[0] = t; t >>= 32;
-    t += (uint32_t)((a->d[1] >> 1) | (a->d[2] << 31));
-    t += SECP256K1_N_H_1 & mask;
-    r->d[1] = t; t >>= 32;
-    t += (uint32_t)((a->d[2] >> 1) | (a->d[3] << 31));
-    t += SECP256K1_N_H_2 & mask;
-    r->d[2] = t; t >>= 32;
-    t += (uint32_t)((a->d[3] >> 1) | (a->d[4] << 31));
-    t += SECP256K1_N_H_3 & mask;
-    r->d[3] = t; t >>= 32;
-    t += (uint32_t)((a->d[4] >> 1) | (a->d[5] << 31));
-    t += SECP256K1_N_H_4 & mask;
-    r->d[4] = t; t >>= 32;
-    t += (uint32_t)((a->d[5] >> 1) | (a->d[6] << 31));
-    t += SECP256K1_N_H_5 & mask;
-    r->d[5] = t; t >>= 32;
-    t += (uint32_t)((a->d[6] >> 1) | (a->d[7] << 31));
-    t += SECP256K1_N_H_6 & mask;
-    r->d[6] = t; t >>= 32;
-    r->d[7] = (uint32_t)t + (uint32_t)(a->d[7] >> 1) + (SECP256K1_N_H_7 & mask);
-
-    /* The line above only computed the bottom 32 bits of r->d[7]. Redo the computation
-     * in full 64 bits to make sure the top 32 bits are indeed zero. */
-    VERIFY_CHECK((t + (a->d[7] >> 1) + (SECP256K1_N_H_7 & mask)) >> 32 == 0);
-
-    SECP256K1_SCALAR_VERIFY(r);
-}
-
-SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) {
-    SECP256K1_SCALAR_VERIFY(a);
-
-    return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0;
-}
-
-static int secp256k1_scalar_is_high(const secp256k1_scalar *a) {
-    int yes = 0;
-    int no = 0;
-    SECP256K1_SCALAR_VERIFY(a);
-
-    no |= (a->d[7] < SECP256K1_N_H_7);
-    yes |= (a->d[7] > SECP256K1_N_H_7) & ~no;
-    no |= (a->d[6] < SECP256K1_N_H_6) & ~yes; /* No need for a > check. */
-    no |= (a->d[5] < SECP256K1_N_H_5) & ~yes; /* No need for a > check. */
-    no |= (a->d[4] < SECP256K1_N_H_4) & ~yes; /* No need for a > check. */
-    no |= (a->d[3] < SECP256K1_N_H_3) & ~yes;
-    yes |= (a->d[3] > SECP256K1_N_H_3) & ~no;
-    no |= (a->d[2] < SECP256K1_N_H_2) & ~yes;
-    yes |= (a->d[2] > SECP256K1_N_H_2) & ~no;
-    no |= (a->d[1] < SECP256K1_N_H_1) & ~yes;
-    yes |= (a->d[1] > SECP256K1_N_H_1) & ~no;
-    yes |= (a->d[0] > SECP256K1_N_H_0) & ~no;
-    return yes;
-}
-
-static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
-    /* If we are flag = 0, mask = 00...00 and this is a no-op;
-     * if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */
-    volatile int vflag = flag;
-    uint32_t mask = -vflag;
-    uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(r) == 0);
-    uint64_t t = (uint64_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask);
-    SECP256K1_SCALAR_VERIFY(r);
-
-    r->d[0] = t & nonzero; t >>= 32;
-    t += (uint64_t)(r->d[1] ^ mask) + (SECP256K1_N_1 & mask);
-    r->d[1] = t & nonzero; t >>= 32;
-    t += (uint64_t)(r->d[2] ^ mask) + (SECP256K1_N_2 & mask);
-    r->d[2] = t & nonzero; t >>= 32;
-    t += (uint64_t)(r->d[3] ^ mask) + (SECP256K1_N_3 & mask);
-    r->d[3] = t & nonzero; t >>= 32;
-    t += (uint64_t)(r->d[4] ^ mask) + (SECP256K1_N_4 & mask);
-    r->d[4] = t & nonzero; t >>= 32;
-    t += (uint64_t)(r->d[5] ^ mask) + (SECP256K1_N_5 & mask);
-    r->d[5] = t & nonzero; t >>= 32;
-    t += (uint64_t)(r->d[6] ^ mask) + (SECP256K1_N_6 & mask);
-    r->d[6] = t & nonzero; t >>= 32;
-    t += (uint64_t)(r->d[7] ^ mask) + (SECP256K1_N_7 & mask);
-    r->d[7] = t & nonzero;
-
-    SECP256K1_SCALAR_VERIFY(r);
-    return 2 * (mask == 0) - 1;
-}
-
-
-/* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
-
-/** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
-#define muladd(a,b) { \
-    uint32_t tl, th; \
-    { \
-        uint64_t t = (uint64_t)a * b; \
-        th = t >> 32;         /* at most 0xFFFFFFFE */ \
-        tl = t; \
-    } \
-    c0 += tl;                 /* overflow is handled on the next line */ \
-    th += (c0 < tl);          /* at most 0xFFFFFFFF */ \
-    c1 += th;                 /* overflow is handled on the next line */ \
-    c2 += (c1 < th);          /* never overflows by contract (verified in the next line) */ \
-    VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
-}
-
-/** Add a*b to the number defined by (c0,c1). c1 must never overflow. */
-#define muladd_fast(a,b) { \
-    uint32_t tl, th; \
-    { \
-        uint64_t t = (uint64_t)a * b; \
-        th = t >> 32;         /* at most 0xFFFFFFFE */ \
-        tl = t; \
-    } \
-    c0 += tl;                 /* overflow is handled on the next line */ \
-    th += (c0 < tl);          /* at most 0xFFFFFFFF */ \
-    c1 += th;                 /* never overflows by contract (verified in the next line) */ \
-    VERIFY_CHECK(c1 >= th); \
-}
-
-/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
-#define sumadd(a) { \
-    unsigned int over; \
-    c0 += (a);                  /* overflow is handled on the next line */ \
-    over = (c0 < (a)); \
-    c1 += over;                 /* overflow is handled on the next line */ \
-    c2 += (c1 < over);          /* never overflows by contract */ \
-}
-
-/** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */
-#define sumadd_fast(a) { \
-    c0 += (a);                 /* overflow is handled on the next line */ \
-    c1 += (c0 < (a));          /* never overflows by contract (verified the next line) */ \
-    VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \
-    VERIFY_CHECK(c2 == 0); \
-}
-
-/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. */
-#define extract(n) { \
-    (n) = c0; \
-    c0 = c1; \
-    c1 = c2; \
-    c2 = 0; \
-}
-
-/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. c2 is required to be zero. */
-#define extract_fast(n) { \
-    (n) = c0; \
-    c0 = c1; \
-    c1 = 0; \
-    VERIFY_CHECK(c2 == 0); \
-}
-
-static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint32_t *l) {
-    uint64_t c;
-    uint32_t n0 = l[8], n1 = l[9], n2 = l[10], n3 = l[11], n4 = l[12], n5 = l[13], n6 = l[14], n7 = l[15];
-    uint32_t m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12;
-    uint32_t p0, p1, p2, p3, p4, p5, p6, p7, p8;
-
-    /* 96 bit accumulator. */
-    uint32_t c0, c1, c2;
-
-    /* Reduce 512 bits into 385. */
-    /* m[0..12] = l[0..7] + n[0..7] * SECP256K1_N_C. */
-    c0 = l[0]; c1 = 0; c2 = 0;
-    muladd_fast(n0, SECP256K1_N_C_0);
-    extract_fast(m0);
-    sumadd_fast(l[1]);
-    muladd(n1, SECP256K1_N_C_0);
-    muladd(n0, SECP256K1_N_C_1);
-    extract(m1);
-    sumadd(l[2]);
-    muladd(n2, SECP256K1_N_C_0);
-    muladd(n1, SECP256K1_N_C_1);
-    muladd(n0, SECP256K1_N_C_2);
-    extract(m2);
-    sumadd(l[3]);
-    muladd(n3, SECP256K1_N_C_0);
-    muladd(n2, SECP256K1_N_C_1);
-    muladd(n1, SECP256K1_N_C_2);
-    muladd(n0, SECP256K1_N_C_3);
-    extract(m3);
-    sumadd(l[4]);
-    muladd(n4, SECP256K1_N_C_0);
-    muladd(n3, SECP256K1_N_C_1);
-    muladd(n2, SECP256K1_N_C_2);
-    muladd(n1, SECP256K1_N_C_3);
-    sumadd(n0);
-    extract(m4);
-    sumadd(l[5]);
-    muladd(n5, SECP256K1_N_C_0);
-    muladd(n4, SECP256K1_N_C_1);
-    muladd(n3, SECP256K1_N_C_2);
-    muladd(n2, SECP256K1_N_C_3);
-    sumadd(n1);
-    extract(m5);
-    sumadd(l[6]);
-    muladd(n6, SECP256K1_N_C_0);
-    muladd(n5, SECP256K1_N_C_1);
-    muladd(n4, SECP256K1_N_C_2);
-    muladd(n3, SECP256K1_N_C_3);
-    sumadd(n2);
-    extract(m6);
-    sumadd(l[7]);
-    muladd(n7, SECP256K1_N_C_0);
-    muladd(n6, SECP256K1_N_C_1);
-    muladd(n5, SECP256K1_N_C_2);
-    muladd(n4, SECP256K1_N_C_3);
-    sumadd(n3);
-    extract(m7);
-    muladd(n7, SECP256K1_N_C_1);
-    muladd(n6, SECP256K1_N_C_2);
-    muladd(n5, SECP256K1_N_C_3);
-    sumadd(n4);
-    extract(m8);
-    muladd(n7, SECP256K1_N_C_2);
-    muladd(n6, SECP256K1_N_C_3);
-    sumadd(n5);
-    extract(m9);
-    muladd(n7, SECP256K1_N_C_3);
-    sumadd(n6);
-    extract(m10);
-    sumadd_fast(n7);
-    extract_fast(m11);
-    VERIFY_CHECK(c0 <= 1);
-    m12 = c0;
-
-    /* Reduce 385 bits into 258. */
-    /* p[0..8] = m[0..7] + m[8..12] * SECP256K1_N_C. */
-    c0 = m0; c1 = 0; c2 = 0;
-    muladd_fast(m8, SECP256K1_N_C_0);
-    extract_fast(p0);
-    sumadd_fast(m1);
-    muladd(m9, SECP256K1_N_C_0);
-    muladd(m8, SECP256K1_N_C_1);
-    extract(p1);
-    sumadd(m2);
-    muladd(m10, SECP256K1_N_C_0);
-    muladd(m9, SECP256K1_N_C_1);
-    muladd(m8, SECP256K1_N_C_2);
-    extract(p2);
-    sumadd(m3);
-    muladd(m11, SECP256K1_N_C_0);
-    muladd(m10, SECP256K1_N_C_1);
-    muladd(m9, SECP256K1_N_C_2);
-    muladd(m8, SECP256K1_N_C_3);
-    extract(p3);
-    sumadd(m4);
-    muladd(m12, SECP256K1_N_C_0);
-    muladd(m11, SECP256K1_N_C_1);
-    muladd(m10, SECP256K1_N_C_2);
-    muladd(m9, SECP256K1_N_C_3);
-    sumadd(m8);
-    extract(p4);
-    sumadd(m5);
-    muladd(m12, SECP256K1_N_C_1);
-    muladd(m11, SECP256K1_N_C_2);
-    muladd(m10, SECP256K1_N_C_3);
-    sumadd(m9);
-    extract(p5);
-    sumadd(m6);
-    muladd(m12, SECP256K1_N_C_2);
-    muladd(m11, SECP256K1_N_C_3);
-    sumadd(m10);
-    extract(p6);
-    sumadd_fast(m7);
-    muladd_fast(m12, SECP256K1_N_C_3);
-    sumadd_fast(m11);
-    extract_fast(p7);
-    p8 = c0 + m12;
-    VERIFY_CHECK(p8 <= 2);
-
-    /* Reduce 258 bits into 256. */
-    /* r[0..7] = p[0..7] + p[8] * SECP256K1_N_C. */
-    c = p0 + (uint64_t)SECP256K1_N_C_0 * p8;
-    r->d[0] = c & 0xFFFFFFFFUL; c >>= 32;
-    c += p1 + (uint64_t)SECP256K1_N_C_1 * p8;
-    r->d[1] = c & 0xFFFFFFFFUL; c >>= 32;
-    c += p2 + (uint64_t)SECP256K1_N_C_2 * p8;
-    r->d[2] = c & 0xFFFFFFFFUL; c >>= 32;
-    c += p3 + (uint64_t)SECP256K1_N_C_3 * p8;
-    r->d[3] = c & 0xFFFFFFFFUL; c >>= 32;
-    c += p4 + (uint64_t)p8;
-    r->d[4] = c & 0xFFFFFFFFUL; c >>= 32;
-    c += p5;
-    r->d[5] = c & 0xFFFFFFFFUL; c >>= 32;
-    c += p6;
-    r->d[6] = c & 0xFFFFFFFFUL; c >>= 32;
-    c += p7;
-    r->d[7] = c & 0xFFFFFFFFUL; c >>= 32;
-
-    /* Final reduction of r. */
-    secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
-}
-
-static void secp256k1_scalar_mul_512(uint32_t *l, const secp256k1_scalar *a, const secp256k1_scalar *b) {
-    /* 96 bit accumulator. */
-    uint32_t c0 = 0, c1 = 0, c2 = 0;
-
-    /* l[0..15] = a[0..7] * b[0..7]. */
-    muladd_fast(a->d[0], b->d[0]);
-    extract_fast(l[0]);
-    muladd(a->d[0], b->d[1]);
-    muladd(a->d[1], b->d[0]);
-    extract(l[1]);
-    muladd(a->d[0], b->d[2]);
-    muladd(a->d[1], b->d[1]);
-    muladd(a->d[2], b->d[0]);
-    extract(l[2]);
-    muladd(a->d[0], b->d[3]);
-    muladd(a->d[1], b->d[2]);
-    muladd(a->d[2], b->d[1]);
-    muladd(a->d[3], b->d[0]);
-    extract(l[3]);
-    muladd(a->d[0], b->d[4]);
-    muladd(a->d[1], b->d[3]);
-    muladd(a->d[2], b->d[2]);
-    muladd(a->d[3], b->d[1]);
-    muladd(a->d[4], b->d[0]);
-    extract(l[4]);
-    muladd(a->d[0], b->d[5]);
-    muladd(a->d[1], b->d[4]);
-    muladd(a->d[2], b->d[3]);
-    muladd(a->d[3], b->d[2]);
-    muladd(a->d[4], b->d[1]);
-    muladd(a->d[5], b->d[0]);
-    extract(l[5]);
-    muladd(a->d[0], b->d[6]);
-    muladd(a->d[1], b->d[5]);
-    muladd(a->d[2], b->d[4]);
-    muladd(a->d[3], b->d[3]);
-    muladd(a->d[4], b->d[2]);
-    muladd(a->d[5], b->d[1]);
-    muladd(a->d[6], b->d[0]);
-    extract(l[6]);
-    muladd(a->d[0], b->d[7]);
-    muladd(a->d[1], b->d[6]);
-    muladd(a->d[2], b->d[5]);
-    muladd(a->d[3], b->d[4]);
-    muladd(a->d[4], b->d[3]);
-    muladd(a->d[5], b->d[2]);
-    muladd(a->d[6], b->d[1]);
-    muladd(a->d[7], b->d[0]);
-    extract(l[7]);
-    muladd(a->d[1], b->d[7]);
-    muladd(a->d[2], b->d[6]);
-    muladd(a->d[3], b->d[5]);
-    muladd(a->d[4], b->d[4]);
-    muladd(a->d[5], b->d[3]);
-    muladd(a->d[6], b->d[2]);
-    muladd(a->d[7], b->d[1]);
-    extract(l[8]);
-    muladd(a->d[2], b->d[7]);
-    muladd(a->d[3], b->d[6]);
-    muladd(a->d[4], b->d[5]);
-    muladd(a->d[5], b->d[4]);
-    muladd(a->d[6], b->d[3]);
-    muladd(a->d[7], b->d[2]);
-    extract(l[9]);
-    muladd(a->d[3], b->d[7]);
-    muladd(a->d[4], b->d[6]);
-    muladd(a->d[5], b->d[5]);
-    muladd(a->d[6], b->d[4]);
-    muladd(a->d[7], b->d[3]);
-    extract(l[10]);
-    muladd(a->d[4], b->d[7]);
-    muladd(a->d[5], b->d[6]);
-    muladd(a->d[6], b->d[5]);
-    muladd(a->d[7], b->d[4]);
-    extract(l[11]);
-    muladd(a->d[5], b->d[7]);
-    muladd(a->d[6], b->d[6]);
-    muladd(a->d[7], b->d[5]);
-    extract(l[12]);
-    muladd(a->d[6], b->d[7]);
-    muladd(a->d[7], b->d[6]);
-    extract(l[13]);
-    muladd_fast(a->d[7], b->d[7]);
-    extract_fast(l[14]);
-    VERIFY_CHECK(c1 == 0);
-    l[15] = c0;
-}
-
-#undef sumadd
-#undef sumadd_fast
-#undef muladd
-#undef muladd_fast
-#undef extract
-#undef extract_fast
-
-static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
-    uint32_t l[16];
-    SECP256K1_SCALAR_VERIFY(a);
-    SECP256K1_SCALAR_VERIFY(b);
-
-    secp256k1_scalar_mul_512(l, a, b);
-    secp256k1_scalar_reduce_512(r, l);
-
-    SECP256K1_SCALAR_VERIFY(r);
-}
-
-static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
-    SECP256K1_SCALAR_VERIFY(k);
-
-    r1->d[0] = k->d[0];
-    r1->d[1] = k->d[1];
-    r1->d[2] = k->d[2];
-    r1->d[3] = k->d[3];
-    r1->d[4] = 0;
-    r1->d[5] = 0;
-    r1->d[6] = 0;
-    r1->d[7] = 0;
-    r2->d[0] = k->d[4];
-    r2->d[1] = k->d[5];
-    r2->d[2] = k->d[6];
-    r2->d[3] = k->d[7];
-    r2->d[4] = 0;
-    r2->d[5] = 0;
-    r2->d[6] = 0;
-    r2->d[7] = 0;
-
-    SECP256K1_SCALAR_VERIFY(r1);
-    SECP256K1_SCALAR_VERIFY(r2);
-}
-
-SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const secp256k1_scalar *b) {
-    SECP256K1_SCALAR_VERIFY(a);
-    SECP256K1_SCALAR_VERIFY(b);
-
-    return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3]) | (a->d[4] ^ b->d[4]) | (a->d[5] ^ b->d[5]) | (a->d[6] ^ b->d[6]) | (a->d[7] ^ b->d[7])) == 0;
-}
-
-SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b, unsigned int shift) {
-    uint32_t l[16];
-    unsigned int shiftlimbs;
-    unsigned int shiftlow;
-    unsigned int shifthigh;
-    SECP256K1_SCALAR_VERIFY(a);
-    SECP256K1_SCALAR_VERIFY(b);
-    VERIFY_CHECK(shift >= 256);
-
-    secp256k1_scalar_mul_512(l, a, b);
-    shiftlimbs = shift >> 5;
-    shiftlow = shift & 0x1F;
-    shifthigh = 32 - shiftlow;
-    r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 480 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0;
-    r->d[1] = shift < 480 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
-    r->d[2] = shift < 448 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 416 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;
-    r->d[3] = shift < 416 ? (l[3 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[4 + shiftlimbs] << shifthigh) : 0)) : 0;
-    r->d[4] = shift < 384 ? (l[4 + shiftlimbs] >> shiftlow | (shift < 352 && shiftlow ? (l[5 + shiftlimbs] << shifthigh) : 0)) : 0;
-    r->d[5] = shift < 352 ? (l[5 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[6 + shiftlimbs] << shifthigh) : 0)) : 0;
-    r->d[6] = shift < 320 ? (l[6 + shiftlimbs] >> shiftlow | (shift < 288 && shiftlow ? (l[7 + shiftlimbs] << shifthigh) : 0)) : 0;
-    r->d[7] = shift < 288 ? (l[7 + shiftlimbs] >> shiftlow)  : 0;
-    secp256k1_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 5] >> ((shift - 1) & 0x1f)) & 1);
-
-    SECP256K1_SCALAR_VERIFY(r);
-}
-
-static SECP256K1_INLINE void secp256k1_scalar_cmov(secp256k1_scalar *r, const secp256k1_scalar *a, int flag) {
-    uint32_t mask0, mask1;
-    volatile int vflag = flag;
-    SECP256K1_SCALAR_VERIFY(a);
-    SECP256K1_CHECKMEM_CHECK_VERIFY(r->d, sizeof(r->d));
-
-    mask0 = vflag + ~((uint32_t)0);
-    mask1 = ~mask0;
-    r->d[0] = (r->d[0] & mask0) | (a->d[0] & mask1);
-    r->d[1] = (r->d[1] & mask0) | (a->d[1] & mask1);
-    r->d[2] = (r->d[2] & mask0) | (a->d[2] & mask1);
-    r->d[3] = (r->d[3] & mask0) | (a->d[3] & mask1);
-    r->d[4] = (r->d[4] & mask0) | (a->d[4] & mask1);
-    r->d[5] = (r->d[5] & mask0) | (a->d[5] & mask1);
-    r->d[6] = (r->d[6] & mask0) | (a->d[6] & mask1);
-    r->d[7] = (r->d[7] & mask0) | (a->d[7] & mask1);
-
-    SECP256K1_SCALAR_VERIFY(r);
-}
-
-static void secp256k1_scalar_from_signed30(secp256k1_scalar *r, const secp256k1_modinv32_signed30 *a) {
-    const uint32_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4],
-                   a5 = a->v[5], a6 = a->v[6], a7 = a->v[7], a8 = a->v[8];
-
-    /* The output from secp256k1_modinv32{_var} should be normalized to range [0,modulus), and
-     * have limbs in [0,2^30). The modulus is < 2^256, so the top limb must be below 2^(256-30*8).
-     */
-    VERIFY_CHECK(a0 >> 30 == 0);
-    VERIFY_CHECK(a1 >> 30 == 0);
-    VERIFY_CHECK(a2 >> 30 == 0);
-    VERIFY_CHECK(a3 >> 30 == 0);
-    VERIFY_CHECK(a4 >> 30 == 0);
-    VERIFY_CHECK(a5 >> 30 == 0);
-    VERIFY_CHECK(a6 >> 30 == 0);
-    VERIFY_CHECK(a7 >> 30 == 0);
-    VERIFY_CHECK(a8 >> 16 == 0);
-
-    r->d[0] = a0       | a1 << 30;
-    r->d[1] = a1 >>  2 | a2 << 28;
-    r->d[2] = a2 >>  4 | a3 << 26;
-    r->d[3] = a3 >>  6 | a4 << 24;
-    r->d[4] = a4 >>  8 | a5 << 22;
-    r->d[5] = a5 >> 10 | a6 << 20;
-    r->d[6] = a6 >> 12 | a7 << 18;
-    r->d[7] = a7 >> 14 | a8 << 16;
-
-    SECP256K1_SCALAR_VERIFY(r);
-}
-
-static void secp256k1_scalar_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_scalar *a) {
-    const uint32_t M30 = UINT32_MAX >> 2;
-    const uint32_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3],
-                   a4 = a->d[4], a5 = a->d[5], a6 = a->d[6], a7 = a->d[7];
-    SECP256K1_SCALAR_VERIFY(a);
-
-    r->v[0] =  a0                   & M30;
-    r->v[1] = (a0 >> 30 | a1 <<  2) & M30;
-    r->v[2] = (a1 >> 28 | a2 <<  4) & M30;
-    r->v[3] = (a2 >> 26 | a3 <<  6) & M30;
-    r->v[4] = (a3 >> 24 | a4 <<  8) & M30;
-    r->v[5] = (a4 >> 22 | a5 << 10) & M30;
-    r->v[6] = (a5 >> 20 | a6 << 12) & M30;
-    r->v[7] = (a6 >> 18 | a7 << 14) & M30;
-    r->v[8] =  a7 >> 16;
-}
-
-static const secp256k1_modinv32_modinfo secp256k1_const_modinfo_scalar = {
-    {{0x10364141L, 0x3F497A33L, 0x348A03BBL, 0x2BB739ABL, -0x146L, 0, 0, 0, 65536}},
-    0x2A774EC1L
-};
-
-static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *x) {
-    secp256k1_modinv32_signed30 s;
-#ifdef VERIFY
-    int zero_in = secp256k1_scalar_is_zero(x);
-#endif
-    SECP256K1_SCALAR_VERIFY(x);
-
-    secp256k1_scalar_to_signed30(&s, x);
-    secp256k1_modinv32(&s, &secp256k1_const_modinfo_scalar);
-    secp256k1_scalar_from_signed30(r, &s);
-
-    SECP256K1_SCALAR_VERIFY(r);
-    VERIFY_CHECK(secp256k1_scalar_is_zero(r) == zero_in);
-}
-
-static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_scalar *x) {
-    secp256k1_modinv32_signed30 s;
-#ifdef VERIFY
-    int zero_in = secp256k1_scalar_is_zero(x);
-#endif
-    SECP256K1_SCALAR_VERIFY(x);
-
-    secp256k1_scalar_to_signed30(&s, x);
-    secp256k1_modinv32_var(&s, &secp256k1_const_modinfo_scalar);
-    secp256k1_scalar_from_signed30(r, &s);
-
-    SECP256K1_SCALAR_VERIFY(r);
-    VERIFY_CHECK(secp256k1_scalar_is_zero(r) == zero_in);
-}
-
-SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) {
-    SECP256K1_SCALAR_VERIFY(a);
-
-    return !(a->d[0] & 1);
-}
-
-#endif /* SECP256K1_SCALAR_REPR_IMPL_H */
--- a/src/scalar_impl.h
+++ b/src/scalar_impl.h
@@ -18,8 +18,6 @@
 #include "scalar_low_impl.h"
 #elif defined(SECP256K1_WIDEMUL_INT128)
 #include "scalar_4x64_impl.h"
-#elif defined(SECP256K1_WIDEMUL_INT64)
-#include "scalar_8x32_impl.h"
 #else
 #error "Please select wide multiplication implementation"
 #endif
--- a/src/secp256k1.o
+++ b/src/secp256k1.o
--- a/src/util.h
+++ b/src/util.h
@@ -321,9 +321,6 @@ static SECP256K1_INLINE void secp256k1_int_cmov(int *r, const int *a, int flag)
 /* If USE_FORCE_WIDEMUL_INT128 is set, use int128. */
 # define SECP256K1_WIDEMUL_INT128 1
 # define SECP256K1_INT128_NATIVE 1
-#elif defined(USE_FORCE_WIDEMUL_INT64)
-/* If USE_FORCE_WIDEMUL_INT64 is set, use int64. */
-# define SECP256K1_WIDEMUL_INT64 1
 #elif defined(UINT128_MAX) || defined(__SIZEOF_INT128__)
 /* If a native 128-bit integer type exists, use int128. */
 # define SECP256K1_WIDEMUL_INT128 1
@@ -340,8 +337,7 @@ static SECP256K1_INLINE void secp256k1_int_cmov(int *r, const int *a, int flag)
 # define SECP256K1_WIDEMUL_INT128 1
 # define SECP256K1_INT128_STRUCT 1
 #else
-/* Lastly, fall back to int64 based arithmetic. */
-# define SECP256K1_WIDEMUL_INT64 1
+#error "No suitable wide multiplication implementation found. 32-bit limb support has been removed."
 #endif

 #ifndef __has_builtin