找回密码
 立即注册
首页 业界区 业界 secp256k1算法详解二(关键理论及源码分析) ...

secp256k1算法详解二(关键理论及源码分析)

嶝扁 2025-6-30 16:07:02
1 关键结构体

1.1 secp256k1_fe

secp256k1库域元素field element,其具体定义如下
  1. /** This field implementation represents the value as 10 uint32_t limbs in base
  2. *  2^26. */
  3. typedef struct {
  4.    /* A field element f represents the sum(i=0..9, f.n[i] << (i*26)) mod p,
  5.     * where p is the field modulus, 2^256 - 2^32 - 977.
  6.     *
  7.     * The individual limbs f.n[i] can exceed 2^26; the field's magnitude roughly
  8.     * corresponds to how much excess is allowed. The value
  9.     * sum(i=0..9, f.n[i] << (i*26)) may exceed p, unless the field element is
  10.     * normalized. */
  11.     uint32_t n[10];
  12.     /*
  13.      * Magnitude m requires:
  14.      *     n[i] <= 2 * m * (2^26 - 1) for i=0..8
  15.      *     n[9] <= 2 * m * (2^22 - 1)
  16.      *
  17.      * Normalized requires:
  18.      *     n[i] <= (2^26 - 1) for i=0..8
  19.      *     sum(i=0..9, n[i] << (i*26)) < p
  20.      *     (together these imply n[9] <= 2^22 - 1)
  21.      */
  22.     SECP256K1_FE_VERIFY_FIELDS
  23. } secp256k1_fe;
复制代码
域的模数p = 2256 - 232 - 977,域中大数r被分为10个limbs,其标准表示为每个limb是26-bit数(除了最后一个是22-bit数),即:
1.png

如之前所描述,由于magnitude的存在,在运算过程中,会使某些limbs超过26-bit,导致进位错误。归一化目标:1 确保最终值 < p; 2 所有limbs小于226(除了t9,小于222)。
在函数中共进行了两次模约减操作,首轮模约减主要目的是去除高位t9的22-bit之后多余部分:
  1. /** Maximum allowed magnitudes for group element coordinates
  2. *  in affine (x, y) and jacobian (x, y, z) representation. */
  3. #define SECP256K1_GE_X_MAGNITUDE_MAX  4
  4. #define SECP256K1_GE_Y_MAGNITUDE_MAX  3
  5. #define SECP256K1_GEJ_X_MAGNITUDE_MAX 4
  6. #define SECP256K1_GEJ_Y_MAGNITUDE_MAX 4
  7. #define SECP256K1_GEJ_Z_MAGNITUDE_MAX 1
复制代码
这里的x值为r/2256向下取整,由SECP256K1_GE_X_MAGNITUDE_MAX等宏定义可知x最大为4-bit数,因为2256 ≡ 232 + 977 mod p,所以超出2256部分即为 x*2256 ≡ x*(232 + 977) mod p ≡ x*(1 n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],             t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];    /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */    uint32_t z0, z1;    /* Reduce t9 at the start so there will be at most a single carry from the first pass */    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;    /* The first pass ensures the magnitude is 1, ... */    t0 += x * 0x3D1UL; t1 += (x > 26); t0 &= 0x3FFFFFFUL; z0  = t0; z1  = t0 ^ 0x3D0UL;    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;                                         z0 |= t9; z1 &= t9 ^ 0x3C00000UL;    /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */    return (z0 == 0) | (z1 == 0x3FFFFFFUL);}[/code]secp256k1_fe_normalize_to_zero该函数在不完全归一化(normalize)整个字段元素的情况下,判断其值是否为零或等于模数,代码中z0用于检测是否所有位都为 0,z1用于检测是否所有位构成了模数p。类似secp256k1_fe_normalizes_to_zero_var函数是secp256k1_fe_normalizes_to_zero的变体(var)版,其功能与前者一致:判断一个字段元素是否为0或模数p(即归一化后是否为 0),但使用了更快路径和懒惰归一化以提高性能,该函数的关键代码如下:
  1. /** A group element in affine coordinates on the secp256k1 curve,
  2. *  or occasionally on an isomorphic curve of the form y^2 = x^3 + 7*t^6.
  3. *  Note: For exhaustive test mode, secp256k1 is replaced by a small subgroup of a different curve.
  4. */
  5. typedef struct {
  6.     secp256k1_fe x;
  7.     secp256k1_fe y;
  8.     int infinity; /* whether this represents the point at infinity */
  9. } secp256k1_ge;
复制代码
若t9有进位(超过 22 位),说明有“≥2256”成分,我们提取出这部分(乘以 0x3D1 并加回 t0),这是归一化关键。然后立即对 t0 做一次判断:z0 == 0 表示可能是 0,z1 == 0x3FFFFFFUL 表示可能是 P(因为 t0 应该是 0x3D0),如果两个都不满足,直接返回 0,这是个非常快的判断分支。其他部分代码就和secp256k1_fe_normalizes_to_zero一致了。
3.3 secp256k1_fe_mul_inner

该函数是大数乘法的内部实现,libsecp256k1的作者Pieter Wuille的设计理念可以总结为:"所有公共API都要求输入是完全归一化的,但内部函数可以处理非归一化形式。中间值的大小必须受控,但可以比26位大(比如在该函数内部被限制在30bit),为的是避免频繁的carry"。以下是完整函数
2.gif
3.gif
[code]  1 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {  2     uint64_t c, d;  3     uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;  4     uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;  5     const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;  6   7     VERIFY_BITS(a[0], 30);  8     VERIFY_BITS(a[1], 30);  9     VERIFY_BITS(a[2], 30); 10     VERIFY_BITS(a[3], 30); 11     VERIFY_BITS(a[4], 30); 12     VERIFY_BITS(a[5], 30); 13     VERIFY_BITS(a[6], 30); 14     VERIFY_BITS(a[7], 30); 15     VERIFY_BITS(a[8], 30); 16     VERIFY_BITS(a[9], 26); 17     VERIFY_BITS(b[0], 30); 18     VERIFY_BITS(b[1], 30); 19     VERIFY_BITS(b[2], 30); 20     VERIFY_BITS(b[3], 30); 21     VERIFY_BITS(b[4], 30); 22     VERIFY_BITS(b[5], 30); 23     VERIFY_BITS(b[6], 30); 24     VERIFY_BITS(b[7], 30); 25     VERIFY_BITS(b[8], 30); 26     VERIFY_BITS(b[9], 26); 27  28     /** [... a b c] is a shorthand for ... + a= 26; c += u1 * R1; 96     VERIFY_BITS(t1, 26); 97     VERIFY_BITS(c, 38); 98     /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 99     /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */100 101     c += (uint64_t)a[0] * b[2]102        + (uint64_t)a[1] * b[1]103        + (uint64_t)a[2] * b[0];104     VERIFY_BITS(c, 62);105     /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */106     d += (uint64_t)a[3] * b[9]107        + (uint64_t)a[4] * b[8]108        + (uint64_t)a[5] * b[7]109        + (uint64_t)a[6] * b[6]110        + (uint64_t)a[7] * b[5]111        + (uint64_t)a[8] * b[4]112        + (uint64_t)a[9] * b[3];113     VERIFY_BITS(d, 63);114     /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */115     u2 = d & M; d >>= 26; c += u2 * R0;116     VERIFY_BITS(u2, 26);117     VERIFY_BITS(d, 37);118     VERIFY_BITS(c, 63);119     /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */120     t2 = c & M; c >>= 26; c += u2 * R1;121     VERIFY_BITS(t2, 26);122     VERIFY_BITS(c, 38);123     /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */124     /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */125 126     c += (uint64_t)a[0] * b[3]127        + (uint64_t)a[1] * b[2]128        + (uint64_t)a[2] * b[1]129        + (uint64_t)a[3] * b[0];130     VERIFY_BITS(c, 63);131     /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */132     d += (uint64_t)a[4] * b[9]133        + (uint64_t)a[5] * b[8]134        + (uint64_t)a[6] * b[7]135        + (uint64_t)a[7] * b[6]136        + (uint64_t)a[8] * b[5]137        + (uint64_t)a[9] * b[4];138     VERIFY_BITS(d, 63);139     /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */140     u3 = d & M; d >>= 26; c += u3 * R0;141     VERIFY_BITS(u3, 26);142     VERIFY_BITS(d, 37);143     /* VERIFY_BITS(c, 64); */144     /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */145     t3 = c & M; c >>= 26; c += u3 * R1;146     VERIFY_BITS(t3, 26);147     VERIFY_BITS(c, 39);148     /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */149     /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */150 151     c += (uint64_t)a[0] * b[4]152        + (uint64_t)a[1] * b[3]153        + (uint64_t)a[2] * b[2]154        + (uint64_t)a[3] * b[1]155        + (uint64_t)a[4] * b[0];156     VERIFY_BITS(c, 63);157     /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */158     d += (uint64_t)a[5] * b[9]159        + (uint64_t)a[6] * b[8]160        + (uint64_t)a[7] * b[7]161        + (uint64_t)a[8] * b[6]162        + (uint64_t)a[9] * b[5];163     VERIFY_BITS(d, 62);164     /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */165     u4 = d & M; d >>= 26; c += u4 * R0;166     VERIFY_BITS(u4, 26);167     VERIFY_BITS(d, 36);168     /* VERIFY_BITS(c, 64); */169     /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */170     t4 = c & M; c >>= 26; c += u4 * R1;171     VERIFY_BITS(t4, 26);172     VERIFY_BITS(c, 39);173     /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */174     /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */175 176     c += (uint64_t)a[0] * b[5]177        + (uint64_t)a[1] * b[4]178        + (uint64_t)a[2] * b[3]179        + (uint64_t)a[3] * b[2]180        + (uint64_t)a[4] * b[1]181        + (uint64_t)a[5] * b[0];182     VERIFY_BITS(c, 63);183     /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */184     d += (uint64_t)a[6] * b[9]185        + (uint64_t)a[7] * b[8]186        + (uint64_t)a[8] * b[7]187        + (uint64_t)a[9] * b[6];188     VERIFY_BITS(d, 62);189     /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */190     u5 = d & M; d >>= 26; c += u5 * R0;191     VERIFY_BITS(u5, 26);192     VERIFY_BITS(d, 36);193     /* VERIFY_BITS(c, 64); */194     /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */195     t5 = c & M; c >>= 26; c += u5 * R1;196     VERIFY_BITS(t5, 26);197     VERIFY_BITS(c, 39);198     /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */199     /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */200 201     c += (uint64_t)a[0] * b[6]202        + (uint64_t)a[1] * b[5]203        + (uint64_t)a[2] * b[4]204        + (uint64_t)a[3] * b[3]205        + (uint64_t)a[4] * b[2]206        + (uint64_t)a[5] * b[1]207        + (uint64_t)a[6] * b[0];208     VERIFY_BITS(c, 63);209     /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */210     d += (uint64_t)a[7] * b[9]211        + (uint64_t)a[8] * b[8]212        + (uint64_t)a[9] * b[7];213     VERIFY_BITS(d, 61);214     /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */215     u6 = d & M; d >>= 26; c += u6 * R0;216     VERIFY_BITS(u6, 26);217     VERIFY_BITS(d, 35);218     /* VERIFY_BITS(c, 64); */219     /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */220     t6 = c & M; c >>= 26; c += u6 * R1;221     VERIFY_BITS(t6, 26);222     VERIFY_BITS(c, 39);223     /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */224     /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */225 226     c += (uint64_t)a[0] * b[7]227        + (uint64_t)a[1] * b[6]228        + (uint64_t)a[2] * b[5]229        + (uint64_t)a[3] * b[4]230        + (uint64_t)a[4] * b[3]231        + (uint64_t)a[5] * b[2]232        + (uint64_t)a[6] * b[1]233        + (uint64_t)a[7] * b[0];234     /* VERIFY_BITS(c, 64); */235     VERIFY_CHECK(c >= 26; c += u7 * R0;242     VERIFY_BITS(u7, 26);243     VERIFY_BITS(d, 32);244     /* VERIFY_BITS(c, 64); */245     VERIFY_CHECK(c >= 26; c += u7 * R1;248     VERIFY_BITS(t7, 26);249     VERIFY_BITS(c, 38);250     /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */251     /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */252 253     c += (uint64_t)a[0] * b[8]254        + (uint64_t)a[1] * b[7]255        + (uint64_t)a[2] * b[6]256        + (uint64_t)a[3] * b[5]257        + (uint64_t)a[4] * b[4]258        + (uint64_t)a[5] * b[3]259        + (uint64_t)a[6] * b[2]260        + (uint64_t)a[7] * b[1]261        + (uint64_t)a[8] * b[0];262     /* VERIFY_BITS(c, 64); */263     VERIFY_CHECK(c >= 26; c += u8 * R0;269     VERIFY_BITS(u8, 26);270     VERIFY_BITS(d, 31);271     /* VERIFY_BITS(c, 64); */272     VERIFY_CHECK(c >= 26; c += u8 * R1;292     VERIFY_BITS(r[8], 26);293     VERIFY_BITS(c, 39);294     /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */295     /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */296     c   += d * R0 + t9;297     VERIFY_BITS(c, 45);298     /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */299     r[9] = c & (M >> 4); c >>= 22; c += d * (R1 = 26;319     VERIFY_BITS(r[1], 26);320     VERIFY_BITS(d, 27);321     VERIFY_CHECK(d

相关推荐

您需要登录后才可以回帖 登录 | 立即注册