1 关键结构体
1.1 secp256k1_fe
secp256k1库域元素field element,其具体定义如下- /** This field implementation represents the value as 10 uint32_t limbs in base
- * 2^26. */
- typedef struct {
- /* A field element f represents the sum(i=0..9, f.n[i] << (i*26)) mod p,
- * where p is the field modulus, 2^256 - 2^32 - 977.
- *
- * The individual limbs f.n[i] can exceed 2^26; the field's magnitude roughly
- * corresponds to how much excess is allowed. The value
- * sum(i=0..9, f.n[i] << (i*26)) may exceed p, unless the field element is
- * normalized. */
- uint32_t n[10];
- /*
- * Magnitude m requires:
- * n[i] <= 2 * m * (2^26 - 1) for i=0..8
- * n[9] <= 2 * m * (2^22 - 1)
- *
- * Normalized requires:
- * n[i] <= (2^26 - 1) for i=0..8
- * sum(i=0..9, n[i] << (i*26)) < p
- * (together these imply n[9] <= 2^22 - 1)
- */
- SECP256K1_FE_VERIFY_FIELDS
- } secp256k1_fe;
复制代码 域的模数p = 2256 - 232 - 977,域中大数r被分为10个limbs,其标准表示为每个limb是26-bit数(除了最后一个是22-bit数),即:
如之前所描述,由于magnitude的存在,在运算过程中,会使某些limbs超过26-bit,导致进位错误。归一化目标:1 确保最终值 < p; 2 所有limbs小于226(除了t9,小于222)。
在函数中共进行了两次模约减操作,首轮模约减主要目的是去除高位t9的22-bit之后多余部分:- /** Maximum allowed magnitudes for group element coordinates
- * in affine (x, y) and jacobian (x, y, z) representation. */
- #define SECP256K1_GE_X_MAGNITUDE_MAX 4
- #define SECP256K1_GE_Y_MAGNITUDE_MAX 3
- #define SECP256K1_GEJ_X_MAGNITUDE_MAX 4
- #define SECP256K1_GEJ_Y_MAGNITUDE_MAX 4
- #define SECP256K1_GEJ_Z_MAGNITUDE_MAX 1
复制代码 这里的x值为r/2256向下取整,由SECP256K1_GE_X_MAGNITUDE_MAX等宏定义可知x最大为4-bit数,因为2256 ≡ 232 + 977 mod p,所以超出2256部分即为 x*2256 ≡ x*(232 + 977) mod p ≡ x*(1 n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ uint32_t z0, z1; /* Reduce t9 at the start so there will be at most a single carry from the first pass */ uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; /* The first pass ensures the magnitude is 1, ... */ t0 += x * 0x3D1UL; t1 += (x > 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL; t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL; t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2; t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3; t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4; t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5; t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6; t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7; t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8; z0 |= t9; z1 &= t9 ^ 0x3C00000UL; /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ return (z0 == 0) | (z1 == 0x3FFFFFFUL);}[/code]secp256k1_fe_normalize_to_zero该函数在不完全归一化(normalize)整个字段元素的情况下,判断其值是否为零或等于模数,代码中z0用于检测是否所有位都为 0,z1用于检测是否所有位构成了模数p。类似secp256k1_fe_normalizes_to_zero_var函数是secp256k1_fe_normalizes_to_zero的变体(var)版,其功能与前者一致:判断一个字段元素是否为0或模数p(即归一化后是否为 0),但使用了更快路径和懒惰归一化以提高性能,该函数的关键代码如下:- /** A group element in affine coordinates on the secp256k1 curve,
- * or occasionally on an isomorphic curve of the form y^2 = x^3 + 7*t^6.
- * Note: For exhaustive test mode, secp256k1 is replaced by a small subgroup of a different curve.
- */
- typedef struct {
- secp256k1_fe x;
- secp256k1_fe y;
- int infinity; /* whether this represents the point at infinity */
- } secp256k1_ge;
复制代码 若t9有进位(超过 22 位),说明有“≥2256”成分,我们提取出这部分(乘以 0x3D1 并加回 t0),这是归一化关键。然后立即对 t0 做一次判断:z0 == 0 表示可能是 0,z1 == 0x3FFFFFFUL 表示可能是 P(因为 t0 应该是 0x3D0),如果两个都不满足,直接返回 0,这是个非常快的判断分支。其他部分代码就和secp256k1_fe_normalizes_to_zero一致了。
3.3 secp256k1_fe_mul_inner
该函数是大数乘法的内部实现,libsecp256k1的作者Pieter Wuille的设计理念可以总结为:"所有公共API都要求输入是完全归一化的,但内部函数可以处理非归一化形式。中间值的大小必须受控,但可以比26位大(比如在该函数内部被限制在30bit),为的是避免频繁的carry"。以下是完整函数
[code] 1 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) { 2 uint64_t c, d; 3 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; 4 uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7; 5 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; 6 7 VERIFY_BITS(a[0], 30); 8 VERIFY_BITS(a[1], 30); 9 VERIFY_BITS(a[2], 30); 10 VERIFY_BITS(a[3], 30); 11 VERIFY_BITS(a[4], 30); 12 VERIFY_BITS(a[5], 30); 13 VERIFY_BITS(a[6], 30); 14 VERIFY_BITS(a[7], 30); 15 VERIFY_BITS(a[8], 30); 16 VERIFY_BITS(a[9], 26); 17 VERIFY_BITS(b[0], 30); 18 VERIFY_BITS(b[1], 30); 19 VERIFY_BITS(b[2], 30); 20 VERIFY_BITS(b[3], 30); 21 VERIFY_BITS(b[4], 30); 22 VERIFY_BITS(b[5], 30); 23 VERIFY_BITS(b[6], 30); 24 VERIFY_BITS(b[7], 30); 25 VERIFY_BITS(b[8], 30); 26 VERIFY_BITS(b[9], 26); 27 28 /** [... a b c] is a shorthand for ... + a= 26; c += u1 * R1; 96 VERIFY_BITS(t1, 26); 97 VERIFY_BITS(c, 38); 98 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 99 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */100 101 c += (uint64_t)a[0] * b[2]102 + (uint64_t)a[1] * b[1]103 + (uint64_t)a[2] * b[0];104 VERIFY_BITS(c, 62);105 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */106 d += (uint64_t)a[3] * b[9]107 + (uint64_t)a[4] * b[8]108 + (uint64_t)a[5] * b[7]109 + (uint64_t)a[6] * b[6]110 + (uint64_t)a[7] * b[5]111 + (uint64_t)a[8] * b[4]112 + (uint64_t)a[9] * b[3];113 VERIFY_BITS(d, 63);114 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */115 u2 = d & M; d >>= 26; c += u2 * R0;116 VERIFY_BITS(u2, 26);117 VERIFY_BITS(d, 37);118 VERIFY_BITS(c, 63);119 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */120 t2 = c & M; c >>= 26; c += u2 * R1;121 VERIFY_BITS(t2, 26);122 VERIFY_BITS(c, 38);123 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */124 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */125 126 c += (uint64_t)a[0] * b[3]127 + (uint64_t)a[1] * b[2]128 + (uint64_t)a[2] * b[1]129 + (uint64_t)a[3] * b[0];130 VERIFY_BITS(c, 63);131 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */132 d += (uint64_t)a[4] * b[9]133 + (uint64_t)a[5] * b[8]134 + (uint64_t)a[6] * b[7]135 + (uint64_t)a[7] * b[6]136 + (uint64_t)a[8] * b[5]137 + (uint64_t)a[9] * b[4];138 VERIFY_BITS(d, 63);139 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */140 u3 = d & M; d >>= 26; c += u3 * R0;141 VERIFY_BITS(u3, 26);142 VERIFY_BITS(d, 37);143 /* VERIFY_BITS(c, 64); */144 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */145 t3 = c & M; c >>= 26; c += u3 * R1;146 VERIFY_BITS(t3, 26);147 VERIFY_BITS(c, 39);148 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */149 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */150 151 c += (uint64_t)a[0] * b[4]152 + (uint64_t)a[1] * b[3]153 + (uint64_t)a[2] * b[2]154 + (uint64_t)a[3] * b[1]155 + (uint64_t)a[4] * b[0];156 VERIFY_BITS(c, 63);157 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */158 d += (uint64_t)a[5] * b[9]159 + (uint64_t)a[6] * b[8]160 + (uint64_t)a[7] * b[7]161 + (uint64_t)a[8] * b[6]162 + (uint64_t)a[9] * b[5];163 VERIFY_BITS(d, 62);164 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */165 u4 = d & M; d >>= 26; c += u4 * R0;166 VERIFY_BITS(u4, 26);167 VERIFY_BITS(d, 36);168 /* VERIFY_BITS(c, 64); */169 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */170 t4 = c & M; c >>= 26; c += u4 * R1;171 VERIFY_BITS(t4, 26);172 VERIFY_BITS(c, 39);173 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */174 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */175 176 c += (uint64_t)a[0] * b[5]177 + (uint64_t)a[1] * b[4]178 + (uint64_t)a[2] * b[3]179 + (uint64_t)a[3] * b[2]180 + (uint64_t)a[4] * b[1]181 + (uint64_t)a[5] * b[0];182 VERIFY_BITS(c, 63);183 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */184 d += (uint64_t)a[6] * b[9]185 + (uint64_t)a[7] * b[8]186 + (uint64_t)a[8] * b[7]187 + (uint64_t)a[9] * b[6];188 VERIFY_BITS(d, 62);189 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */190 u5 = d & M; d >>= 26; c += u5 * R0;191 VERIFY_BITS(u5, 26);192 VERIFY_BITS(d, 36);193 /* VERIFY_BITS(c, 64); */194 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */195 t5 = c & M; c >>= 26; c += u5 * R1;196 VERIFY_BITS(t5, 26);197 VERIFY_BITS(c, 39);198 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */199 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */200 201 c += (uint64_t)a[0] * b[6]202 + (uint64_t)a[1] * b[5]203 + (uint64_t)a[2] * b[4]204 + (uint64_t)a[3] * b[3]205 + (uint64_t)a[4] * b[2]206 + (uint64_t)a[5] * b[1]207 + (uint64_t)a[6] * b[0];208 VERIFY_BITS(c, 63);209 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */210 d += (uint64_t)a[7] * b[9]211 + (uint64_t)a[8] * b[8]212 + (uint64_t)a[9] * b[7];213 VERIFY_BITS(d, 61);214 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */215 u6 = d & M; d >>= 26; c += u6 * R0;216 VERIFY_BITS(u6, 26);217 VERIFY_BITS(d, 35);218 /* VERIFY_BITS(c, 64); */219 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */220 t6 = c & M; c >>= 26; c += u6 * R1;221 VERIFY_BITS(t6, 26);222 VERIFY_BITS(c, 39);223 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */224 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */225 226 c += (uint64_t)a[0] * b[7]227 + (uint64_t)a[1] * b[6]228 + (uint64_t)a[2] * b[5]229 + (uint64_t)a[3] * b[4]230 + (uint64_t)a[4] * b[3]231 + (uint64_t)a[5] * b[2]232 + (uint64_t)a[6] * b[1]233 + (uint64_t)a[7] * b[0];234 /* VERIFY_BITS(c, 64); */235 VERIFY_CHECK(c >= 26; c += u7 * R0;242 VERIFY_BITS(u7, 26);243 VERIFY_BITS(d, 32);244 /* VERIFY_BITS(c, 64); */245 VERIFY_CHECK(c >= 26; c += u7 * R1;248 VERIFY_BITS(t7, 26);249 VERIFY_BITS(c, 38);250 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */251 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */252 253 c += (uint64_t)a[0] * b[8]254 + (uint64_t)a[1] * b[7]255 + (uint64_t)a[2] * b[6]256 + (uint64_t)a[3] * b[5]257 + (uint64_t)a[4] * b[4]258 + (uint64_t)a[5] * b[3]259 + (uint64_t)a[6] * b[2]260 + (uint64_t)a[7] * b[1]261 + (uint64_t)a[8] * b[0];262 /* VERIFY_BITS(c, 64); */263 VERIFY_CHECK(c >= 26; c += u8 * R0;269 VERIFY_BITS(u8, 26);270 VERIFY_BITS(d, 31);271 /* VERIFY_BITS(c, 64); */272 VERIFY_CHECK(c >= 26; c += u8 * R1;292 VERIFY_BITS(r[8], 26);293 VERIFY_BITS(c, 39);294 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */295 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */296 c += d * R0 + t9;297 VERIFY_BITS(c, 45);298 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */299 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 = 26;319 VERIFY_BITS(r[1], 26);320 VERIFY_BITS(d, 27);321 VERIFY_CHECK(d |