From 93989d07beb25b1d034e3320af4cd246a88ae034 Mon Sep 17 00:00:00 2001 From: mleku Date: Sat, 1 Nov 2025 17:57:52 +0000 Subject: [PATCH] remove 32 bit limbs --- README.md | 3 - libp256k1.a | Bin 1864266 -> 1864266 bytes libp256k1.so | Bin 1549360 -> 1549360 bytes src/field.h | 4 +- src/field_10x26.h | 57 -- src/field_10x26_impl.h | 1232 ---------------------------------- src/field_impl.h | 2 - src/modinv32.h | 43 -- src/modinv32_impl.h | 725 -------------------- src/precomputed_ecmult.o | Bin 1053184 -> 1053184 bytes src/precomputed_ecmult_gen.o | Bin 27032 -> 27032 bytes src/scalar.h | 2 - src/scalar_8x32.h | 19 - src/scalar_8x32_impl.h | 816 ---------------------- src/scalar_impl.h | 2 - src/secp256k1.o | Bin 781688 -> 781688 bytes src/util.h | 6 +- 17 files changed, 2 insertions(+), 2909 deletions(-) delete mode 100644 src/field_10x26.h delete mode 100644 src/field_10x26_impl.h delete mode 100644 src/modinv32.h delete mode 100644 src/modinv32_impl.h delete mode 100644 src/scalar_8x32.h delete mode 100644 src/scalar_8x32_impl.h diff --git a/README.md b/README.md index 90edae1..a045192 100644 --- a/README.md +++ b/README.md @@ -36,12 +36,9 @@ Implementation details * Field operations * Optimized implementation of arithmetic modulo the curve's field size (2^256 - 0x1000003D1). * Using 5 52-bit limbs - * Using 10 26-bit limbs (including hand-optimized assembly for 32-bit ARM, by Wladimir J. van der Laan). - * This is an experimental feature that has not received enough scrutiny to satisfy the standard of quality of this library but is made available for testing and review by the community. * Scalar operations * Optimized implementation without data-dependent branches of arithmetic modulo the curve's order. * Using 4 64-bit limbs (relying on __int128 support in the compiler). - * Using 8 32-bit limbs. * Modular inverses (both field elements and scalars) based on [safegcd](https://gcd.cr.yp.to/index.html) with some modifications, and a variable-time variant (by Peter Dettman). * Group operations * Point addition formula specifically simplified for the curve equation (y^2 = x^3 + 7). diff --git a/libp256k1.a b/libp256k1.a index fdf44d3635ec21296a68d38c7f607591278582f7..c9747cda9e41a6cc175f3f612f525ed9123f2813 100644 GIT binary patch delta 5889 zcmbVQ33OCN7VTH{tJ2-+4hg?I3nZODNZ6NvfMA1wj$qgZ*MMROs|MLS3WQFR0J02A z0)!HAfZ%|`h=`+}@8B}ZnPG4NaR!46A|iqk6p6?|5uI06Kco}JW$be*{r+3-yY=e* zs_MLcV#ezyX3T#U-tl_}G{2QO9PJ$V$-Kcj?uR6R6cL{UerTq@Mp)TNV0j?jI+_V@ zI3Z2ID(EVXbb!4mkJx~6MlH+|^YS6d%I^ey;}bfu|`(vww@wC1TCy@ z2SR_UJ+@lM2E+5dXcVh&0i>CbCGHNvOzqWNl)H&n3||Bjto<#TDWQx4;YNrAik+23+DuKC}}*i~+Z}^cXCw zGojlgD}5^rX=eo3oqrcqr=X`ZRj!o7Ot*Hv1!t2WAU2+Y{BPQhhBzt`TAetl7s zaT@l=*jN^>&>jGv%4zw}Ly=3<)Qcoux(xq9D^9eH_2dumRUz~e8}B2-j3E1Tq)6yX zI*PPH5-|F^rjL@3siMpMxF+`&l0u_5J330_+)8?j_{pRT^b!3hlPPe6cxo~!fPo_6 zezMNEnLTs2^yw%1%pz{_{bb@ZhPpCFM`87tLWUT_*~-z{Z|@Y+4sI3arjTK9yXZF+ zAxDaxQ<2hL%$cwA^`1t$82PSo`Qls*?It$PBFRPp`=_$@6PKrvcE$u(#u!mhM7oK4 zrlWNdD;uL@N5@c)cx5_iWlVN`I3@~U3cJ2QQ$PT>D4l^+XSiN3h@!e@22Ohx3yzh3 zy~KSpNq3kdHq9iXjAF*d>1dp2J0t8GGcF1{!v0}fzneue*h8_K$Aj3M39ZFVA(CjM zv+KqSxS6DhaEN5cwGEN+#(Z{ZyvFENL|Q?qxTlDWB8!-)I0@~=TQdo_YKn-#VY&FK zn52$eKH@(ys!7zJmvV=LR_rQCABQn3n(4=b zC2DhsUzsTqlEBwvb!H{HxoBN<`Xr29ca;(m9*I(la}g~6pIF*7;hj&)tf66&O^j{q znQ3D90@7A&j*$CctL2_gHWA}-_W8^!>gM8H85srJ#gGN$VqP0B_(I_D%yh)J>0`+$ zw3S5SGdy?U7Ywp(8GR364LoD-n~J78rh&terQBM$q?@CqdPoh>F}8oI_2@#jWE5+`$V0KH;n zcXgN@n-0XiHfDA=6ce@B1I1=q?By^c9Rpg<$w7TfEq9fXt+jm3H7Iw`^0n8ZoFip5 zH?w1pC^;A<4^aLg2{}lKg$}cmDn!i82_cjfbvciIxU(ds$=TB9x;hr4{opwtD_Z(vn$LYIY0}*fQ;{g-LX2 zvfe@U@w6_7POvs~p(%Bn?BY{76o+A*%3Dh}&C&nDapc2oN0;k`GK?=DD947_3R8$^f zx4}j0^da^zxT{H?0jxJIBVu_q%QRO3uoROqKXaKJ4B+%jv8S4CY!hw3i0+)8DFK#H zj==GBEd(Wu3%#u+huO!T$XXK8jOFy0n?ulGaz>xpGg{Nwo~!LAZTsqIe)Gc-409&UYhf~{cLiFr5&%u6q@P{@d9JAp2nTc_C=v^8SGX_gA_SkIqk5pbUV}Mgr1M6XD@x4={4}ZuoFS!Io)R)cGVf2Eo{qcbK$T!_62Jec#;8kc1Hqp zLJUN1CtM&pr&Ats8qcvgx_!wWG=i6~kk|(2O;o4RGF$;>`k0>sOD{)?$@Ef5o{_*z zEOWJbK2wqxL}fjC*q_IZ{R%NSgW06i|=?#!Es;B?-^`s8bNeN5!#a5-jmgUM2S zGEa(OpxJbcib#qsdd{Y} zN>h}I>P8fGwDjg^)m2}0g(7YsQPZ(AwX`|fMw`Y%Q8X@zewSJEh^>24OEI9?Bd`olgn-@4m^Q7yF>-5)kNGM8smPa`rR;b$KiCb z4g9SQyjYw?YC-K&2eA;d#X}Ue^EL$TGjxTG@U@MwLTvhhr3JN19m2B42wz4atg#Wk zuo2cMgot*jLnv>I@L3%KHcyp}@R5zMzC`t64ajwN)5;RviZ!5GQ7Rg@;uSQUQKgJ# z1&r1}T`#Uv*>Q_j1=h}r8NN!t!e;ltaYdOsaX-SByGlIzBTJ1O(@MJwHa4E-``TQs zLblm7Mk*Rd>LZcHmd26Q%FMi+&fc-9im@#U;xB4)0s5XD9FbHWx0UZ|rP_O*uyu#7 zWZ`rN=18Lz5w(5dIpYJYu-vrPVy}+y;laZa<@tl#6QbfSr|Y#-buu!M%i*E_4)4Pt zFWpJ7eF=ZE@>QN#d4MNmAHY`ipcBWfZ7-ga(s_3{C|?PB)&bJO>0t}Ecz-L;cEzH8 z3l50#ZahxhlErV2oRpm9>6;|~_zXY$ofJ4-gOhTybagO)fk&jDr6_qrNl?4c+4AI$ zcId1e)|G^==ubmX>N;U`f)sQMC_g+xava3U&gRcD&t;sBeMr>ELn0tvzQEj8TnFBR zdiK{F)d><1Q#r<=ON3(Pl+wMQVU&D>-bQfU<^L++vbaW%u>52ofGXz_q~O;t!EU6$Lq&d(JlN! z(^xTYcis*|YCaJUOyC(2G0%QJLFGmltjcz(wqI?QZ z@x6dYXgt37=Ng$+k9(8iIDOIf_^OddmW2QT5~)^t6E8%uxJ9SrAsnNVa)Z2K(XN_E zE=%92LMkFMCYcs$gv7e1-MT*849)QEsQhwU@FDAafQciWc@ueS`vCTfm)7vKc0og4 z*a>}B1}f>z;xhToN6Ejcyh>dr@@Mc7tq&CAwNfc+Io@@Zv~o_K{5~NmNP*L*9zQ&T z6VG3xSm?cA-#YFUaUp)B|E}V4U4xC9xn+cWI5Q93VVCHaY9iVf@$@8lX=SvQpHO|2 zm#e`JS_MVCf_NUgih$ZK+Ro-RaK|EKfJ=7C*ryYh{&M0tMKv+kY@uN=NFddQN|I-K7xV8K`;(5LCwryb}EMlGojr9Wc3D%NFxu13V zLy2>@{3U@OdE_=eR!y~qy^Xh74{J@&h&QTu*ZLhy=@!Rs;W%P&IB~dexN*ed@Zj*` zh{MqYM?8)M98GbUID9z#I1+Ir;Yh}jf}@!gyQTS$x5o@S=X^n(VLa+EI06a+Dk36+fNYU*L=DWV`oEJ-7?-insr37Ax$oAi z_p7S2@AR~Nr>B*_1|NH!{afD8ES7W&e%cJsPkA90prvrdf)^5uulx1XSkN8NN7K4e%cI+ zLnd^apeJvIA?aEQd-!i+<5}ooO^_>PaktWU9)QcSkRoc%LO(3)$+J)g==aZ0q+cJ@ zq@0HjB1|kDSLkhk7?o4^UV{>wLDN_y@m3xD1FbmG_WF(+aHSCL6g3mcP^}O9`VkS` zhukI-3rUjpC)?CK`IsOwpTISFypR-XK6WBcWZh4C3D+c&34KMsNu&q{h?gdje7HwM zKS5S&gW0Q(N}p`edpfa;pC=KIHq4eXG7PJG5gDQlXUj$!e(x2Lba+r)Eh0nVVUax< zAxDUvlabO$W*ueZ>otXBYWcQ;QQ~R@?J8=flQ^w_y{oL*qHYRF*CyCfMvMFs(p5Y* z6|ECl#b_gTWCV4HJyS_rt;qKI=rDjOtZ%+S0Rik{&NQSt-L@}3jOyFdaM~rzH%9vP z6cdX{Hz*bBi%FgqV64E1#)+nDLEGqpFzh+(9n<>bbdtgrMs64jVnZ8j-@E-JMr+0H z94p`+k|+Xxk|NjEPsVB$?5D8?M$Zz`7Uqh_N=P1Az{H9(&`}&HCS1QzLNpG`#FbK# z@W`^^hayyys6TPVT`aC;rHxdRQ1jxO)&{5Kf-H+`dCVeivCtJSmn9cq%<>k-QlZ})AgM%qp1nFn z44X?jhz&t95uVfS(wpA)e4daFNz^^$q%{dPVo4_;wZMb(tGQ23LPX7 zS8K-z{DMKIt#zOMSOdqHiIdUv&=jy(+9|g-HtA+*tsat+=xEbFNnbaQbjb>?cEa;p z5kBAIUfnmoElNcw-Q5u6u5!X2)1#`J^e_mx+ZhBFY*Xai?XuS*If>J5QWAPa%;;iq zXEYy(du_ywt|-PBVs{i<7-CP0JD8DV$XQvaZ*9n3WMrBl-+nvFoecSoJ5bJ&vYMMa zqkEX_3zP3s{(cELNQs3Occv;tMDaL3%8I(2M>g&(@riP_^mREAq#xg3m0=7@Bcv2D z8I>*6M2s@Jy-D<->Fm`VIvF`I_%5^_N)ROX5?Gqeuvp)PyGG%2voSNJoVx(4)=4gb zbsxmZ2TQ7Y+6758AVa2Fth?Z@LbS-}T`+VkMq7F-rybB*KGAB^aR&^R>uW-6MTkAa zOw-l|jIA(KGBPoqHTj(|9iKHZj0{hMQG%bP74tu>_qa(C<5*fkY6KD3G9C4Kv2;MhuIpie@b#SV7=%K5KE4*HttmbEZ)tSm)YDLG~o2Sc>4&eX&-LDh;E#IQwA)$ z5`kTG4frIC4ZZb+N7|dGoDl)z? zzKjQY8qLj^gX`a%W=mOWTrSQ5^U`a~@7G*xC(tFcLYf+%sS(xZSppo=Uq8=+V5?RR zN%SK<`&%{-64t3_Cw++-Yv6ffCxW^--8T!wPZwE>o<}i-lWq^$t*uOl4`#s*2He^W z3C#2}5ILQ3f#}T6xfn2((*nKgCH9o&tHVNK8(edvI+2#+3NW{ac{#A;N~9P^>&kMq zXy&FX;fQ*^RF>;Q1(tJ;I8)2^Iu@787+i`ZdQyCt!CMI5RNhSg>O0mqBDg0|iCOL7 zX7L`GCtlOguST^L&(++V%A!l%T0?h4>b@~`)rPKC>b^F0tHb;*1b|twAjlo#D>OH= z(z5|v9By5~T-Xyqx574Jq?w~Vc~y?eFkV+>4o+8*ASg~3 z0~LznDq^iP6_VuKu(U?FG;(^;q_~QNDB|0Bi*JbHui1j8(d4)|jN^X{j`((CQXBow zq`1_eC>2MVP&A^YH%F^c#ijKWuQj4*W@>Gg(KMPg=7rIy6WMjFWzbYf_ZUupR9M`H z1hQ0dZ>YXBQqL``RK=`=T-88xv4d?KtQB(E-KtDHnp-o&0#&IHw;Wwv5Qh7MiHkc9 zr;AMB|Cqpw#AT%BGkii2e`yL)8=_|Cbh(Le)kIh>*56=>KEox1u%s!%w_ylXCc*_1 zp-LeH4VMtY%BBcsLI~JAn@xnzO@!())r;pquCtwrGNTpGfoesmXxfVPXgDJwqgg(q z=OEOJYgBgJqE&$nXT=O(rQcyQd*Gy^%&oW|;mf^Qth>n)f+q~6*##S#PV<;yKB7XN zH)%YgXnfibi8Qt}jXbQ(%*pAD?dvx(wn;&3SCdPkN6p}%r1FBPJYp!--m}BheN>-? z)18^Hv>NTO3=#|kQPoq)^UsXwspLySTt9wAv4qNk?v*O=obPRBeX&dEa}N&NMDX4j)S@$S@d zpuwn4kR&lVgFozer$K#A$CxVW_yWPh&l)w+ILC=w)Cl0zzv#-D`|hi=|IiZ2igR3_ zT4Q}g`as?>%3PIPEdnndpNN8ieD6Jf5FcB>t5)^HK(0#hlJF1WY2hiX^rZg6AYQ83 zhcub>kb8Milb>-ef<08 zk!xq~&fTs_&6ncIaXclscDDI^iprgsWV*evcwunRRR*8D#y5-%Y!7dRdBdo{1KdmJ zf}bT<;FaSdLj$dxS%LV?>2bPUxow$^_q-^49nMj_0->9jJ>D-BTxx`OJm1}Gv61Uc zqg?o|7#A6N`7+bghBDV8f;`W3+XS8sj-MNP&TK{OQ>R}3I3KJzt~I&ov=fy@Jl^vL z9-%IL@xNFjpFo8)1$VDM>`z>l1T8F(!23O6p34OY%anvIx)w`8yD@CiR7^4 z8Wpl4C}ZO269$4O^t9Wk&lb&KcqlBt+!j2@`Y~YQcm{7KZ*8B!0rA!N`~p0*02$ztUDx&+iRb=y;wa>IU6VfoymayG@p4hjo3j-Q zDI4^j19?&jFcFs?_ydIiD06i+L}xKgipA_WznWM$aqf zJ?!ogzW|}1Sj!IW-6=#iUR-h42ke+&HO-yXyqnvljL<-Z38=zV@@ TX)XL;7J-(L`q427)0_Pl8x^$U diff --git a/libp256k1.so b/libp256k1.so index 6425da4dee7fe8cbea3476f8648ba0b616956b7b..90239ad34615f7671a90ac6bcbfc6000a88b9f18 100755 GIT binary patch delta 5390 zcmbVP32;5We7JWT*?m0;R;3tERP+a5D@`aiqmY#l?)7G z0679AGyx3;Ojr;BuYDA2ZDCw3Koa8>BqFOA6-^Kgpdf<#cfU7b2)epXrR)9o|MlO0 zAK7!@{yp&bp~_hK&p-Y!=(F8J>l&*bUh&@3cNQFYd2zE^)T|;H;)`K8U=SD@j2IYB z7_l&1Fydh7Fx)WWVYGvhU^c6s#+TefI^0bZrR%*|Q8I^`AOFMC9`#5{E!7ky77yx= z6!GgCa4AYcvMo)ENM~k&^e!`fugBC;s(JL^p0}JRg;&;i&Z88wyw-Ce9=;ns_VkZK zS!Ufy&m;_dzh=)kFMLaoQB6@6|BKg{4eTMWF-(Q8k!U=ofq1!JiZA(%jY7O3-S9a; zygN&ZGddX`g4MQ*-3<}*r(Q;#3fh`n<56G-_cmM@Qr^+eNOu4~eSq;)4DfM7jFkuw zmJKyd!KdgJ;{s?G{n_}uEAsOxGmXJ8jqNjy)9ryRTPoR)mKv83ut%3mw!@P~O$<1F zwbnR`5HpXL7~5hY))VCriV9!&1$&GF!VFaJ|J;70T9?SLE0&kJrmpPT^P z1pZ0_aFckZ2e>JGwgx~t!4-GC7o#YZ z!;!KHM&j&TC1J&UAvz9hy%0?W$#btdp-*l@iUuQfVvdrq(zguy3$NKzm^u-BRwi3) zU18S?elT6d$EQ$lx3!s87pC>Kx-fMLxURQ_3tNQ{U?bn0LNn2`=Ibf+ITcm$8ELc> zRhlH7ZdTDYUYSWN&~`rTI(k0X*^O8}QuNtMLg#@dAXB=GRUv7bJ{D*++}5StR15Ra zAN=cdP~A5J5M((!fmwR2FjF#)Fkm|R@dLJfN0VpK+?-H}8~wc!!cS0W$<0aE0W}S% zTP-JVM@6CQ+-SdTv97PMP`ZqRYS2szKv9(hM>7YN0XPvSbE+S#V&>#3w1+VaHfzkB z-az^+vLBGi7I~vWLp}T!os$Fd42#Ycp4k@Nrw`EGE&7HVfX)$g*2BqwphrJRpXngz zfTYh$dWf{o7a@mB@)3phl!b_yJt-e(30>*(LG~bUZJ#1$i}#9&0Q~m1$TB<#)L22q zOb212oQRTfX9hS>Kd!TlsTmWb<+;H?bYEsVCK48h?|w&8YkyI5bP z(-s}H6%bE1**K4RaYRpR*A^Db@Cv?0lmzca7si&EE(Qe%1DQA*&1h&x8dnY|n6v*=eW zOt5(Ip1kF%GmQ`mKZlF-(Qr!vayW?#uTqNxAR!t)BPD*kOhpmHh|q;ss>OMh*;yWw z!%{mfQF{`is17XRQ9)?H2e*p+nbelj#hw2Ukx`GBH}5AzzMhn1w@!wur_b*!#>qf2~Qu) z+Q)uwPhcG%EZ8q>cCE#JX|rqiqQT6EPMRAAvl24Y7lh1&5d1yK&vzt{T?j9q7m<_) zS|!C$B4;n^gC9wzlynfd!O@RDFlP*7ArxCC-Te4{^M~Q=aWrzH^Ux89>BhURQN6_v<*`&W$^1HxJ?02pgnGbgo~2-(g2PZqLK*RD zh@=(+=0tq4FzE0Q3e~EYd~;zi00LO&#r)}!?0~*>zVLyH`tb>K)+qLHOz2+)FiY%0 zlq4|`J;~t!?K83;w4j4(_!B6ikZrPL^+G18!6y41I=eegNXVMAr(DPi zhzVOJglCxW$(8dYIrB)NWp6)VsU9Q1cetTM+x-t zT?{S&;uCT>&#Yt7D&sEM<$y%2l8)akH~>y?oU37|$@6^1IF{Y17Ft>d0}al>H{nJU zhWiQvVyocO3KAL(A2DAY$Gq3++a&_)l;#u8Fh4ICz~XsqnB9xM;N!zAF=-vZcal_j z;hsVPhnFNtdJ``WGuF)#^^&6Xh54j*5E(ppLFYs=aEIT@d6YkarD%pI8E3eJu@eVdE?r*InvFqht#$0^aa+YAew1fSC zZ?FAhGY4dJk==pL$@cWm?P6W7V2g*__fxa`ZuY$q#~17i?sN#rXS_!>OAGM@Hs6Rv z7%4%}&!2$=m`F;JQ0! z?th&PLi&$a+R<)R=wr9p{|$D#LvM+C{#fk@y_&`kA7oDbyK8jdn5ci-R&0VxmKv&; zatbe`I5?S;f&ho$DR}|y>M~seJ&&Z8OP4hv;gg6Tu_E{)+&-%!RSs*9yc3mQ990bV z&Hae*1*70tP;1y>RKwq{VOeo63kTTKDn87Au3chVL1=jQwpF!E)nAOtDazE1zfs4Ih4#!ZwkBI{K{Ort45pytopvpL zd-4vI#NjiAK@x)>E0>G89+i_+YwqIpN&sJ@?44kqm8A~mC0^Vc&9ryfGF9K&2ArS6 zq2Vzb-)CQ<`<4L2cnx249I%ir`fX(KFoBDXJPBs`g2`fe?5DIdU?I53(~q!O`uTaS z8`tVAQzTa1N9V~ElqF5#4M*AasGi3)uyhpWJsQ|8-44zbf!xxdipaT2x?x_P_{w<- zLc>+&Qw?krVl^U_5Xq;yG(N#!JH|%wO~;tQ4*!DM1a*;%j{arGk>-vs*+Hk(D=CKd zxD_1!jgZ0rNULyYm1MKecPw4i4@Dj1CN#`eqA!jTi5}`@-h6?1NzW$>o%^EBb=2kx l6|XTJzXMNQon+%?9=^P#j*A=6beB!Ns043R@Z!sX&1r-H!+d|o3wR7CMw4l%o&$v|)o zAV)YW2nG=Xa`VvLMwa)MMR*HHAcAs4L~v0-0VUu9u4MPGs-7?ecXgeYdi~%3SAYF= zWYg})H^JXgomHLK_lLi~;QOTJ<)>F3xbWAVbsy8dQ_kt7=X3-^d@&3c3~p&B^gG^OO&%ng*79MjY0d7jKmD_>F}kBk`T|4K;&Hc5 zNE5%7A&;gdHFcyJQR%2`kUnTYB%CwK|^nKt)>AbSacNL}E75jbH65(6^sjpK4 z%CW1@_@2SQcRc4C?1%4sWbL3Rhd=7KMgzOoZ}ryUYb9G_3=mHbO7Zoe^|}z3XIcRl zh&N_SaaMEdC|IrfS8Gee{4v+6)L#$UTAXdPhwA4y|5#7ZXd1ya$H$NjjJ$a`Dx)6o<{scN4b>$%w zKKJl7CLMzA<;{G+JV^< z8CXu{n;9L7rt*NFLIrr(PrHc%`03NAgxC4OVG4gN0H0F(-2m-qpjrHzrnG5z)&qy) zG_A4^(iX!=9-Xfx&6*%YCxAUHL@$D5?KU^`$$dyOV5ASt(~@Qfra^zox4aev(=z@<8uhoD-%NF3MhDe}>Cb{|xg(rZFN6R~_|i1m46U@^Poryf^fn)v zK^LG^Hp!$*b+npSHlxeXTHgCMdNthAi&!Dj%+Xp>%Puw`(^?GDA!(L52x!#D(PiDe zALd~`_527>Ju(syWV@S#S+jUyrez&rzyyQ%Nyk2D^K6=*7g^{}v4oi99d6y3f((5)4H=bb?32|D|^R6vm1QPP*1 z2s$L`tCH>^?F&W7K9YPvqitm&;zmDH2(*N*wfH!v8@Q&ViP_>UVj=)PrI#$jQ$P(8 zRNM#&~oZ}ikSL5EAB zYORxUG|jyiB@0GtUg!hvJru-6!ex}8d%OS|;r$-!_cVBGT{a4%^(aelc(G%+9`zOL zt2KYi1Z^dR*xuh!Ro;kEiTVjdT9*-CuzJJ6cfv7z!->&lL>g#cL#hNn3N7S+&CY+5 zrX|A(G++zek_^X3{D*XGKBY@x1yNC2&F{9H)crb~SVb z`Y#qXSZsc%V3zLAAcVrtSZC25CeZJB|$t>M^VG5(1YjbWd+LY zJ06$E(wk4y+YqAZE-d0vK^XY^A|!|c2%IPgJd@v_#~wnL?PYna9C>Q=unUpSxDN52 z?N~E94r&ZPnPkyJ`a6M-8XgZPj=vNwyM_{GvKh!5IV`mk3}|0R<5;*CVrll^4_ zq)IaIUPrMZs&HpInh(aKBM{Sz*WIG3;(H5NIvQ$!UBJe;Lf4@lu$q@Cn5W@MP)I@< z@f(PwmjUKve7!jA@)3&XqQjEEP#g|{0M>ajf1^L!ZBCyce4wI1eA0et0DCSjvb_jq zi9Lvt6egmlxLlyEko{nUUDUv$rb4zz$qozI8ArBQ$!dk{3rDsv#_pRUMBGpTKn(sR zE=t_^tD<3|u5(BdENFzv*vdz!4bVsJYau_(Ph^wEv9`v#U#x>Q6Zp$wa9tER3_MSd zAc^UGQHjJjPj{C&0s(SLOj_kGlmez`G0R#Ua6Br>d&6Dan*`f;1l;Sd3;p-?YyJ`wFGlb<24P=3cGL-d#IFmq? zgks`cjfpeOiF3t?Gfl-2@LJx0*H-3Ne$lY6HVj)E4a+5j zki&VVjzv|*b#k)+60u4q{-|g-IKgqQmq1O{^O1vDPV;Km8YUPRa1QQ-8&Mc;D+-CN zg1^;}&=~k*d)r{R7c7%C({tQ-N?!2LM9J}pharXKdY!o&>gNjD)oBEmf=m5A9p{yrv(TcG4 z!ZJqay%#46SMmG5mO)h3fHkO9Yrsz}XRQ*Q>W5u%@RT^lQ&+Iv_x;NFu*`nBdLI^r zWr~fwRVB-bP2q;S^~p*$!R5KX!Q}B8HauZWeMy{pd8aWCpEjK(+3&1jS8(6zUpI3= zMkhNR=pN=w|N9NB#SLtUaQl8{x8BJ9X(cS0xHY`aB_yZ#9XnV?WYI*2{~U|P)!R~k zV^}5=&r^5?-u&#yg)Q$dah@F|aKHQU6jVskOW+-Ho`3?EjVpop7Cw&GNwXCb;SHAv z8{jM!>z|DI#9@U(U%8=PAF{N!CaNx+wi z!z2!msgR4g5LJ+LHFxoPC4ld=?459d%CaBkC0^XG+ZhMhG~KLh04~Vk(D2!(KVn~@ zM@j)=qJgiub}M94ejZsOOyHWUK!TY*ahO;h=PB(DDFoMe<`MRi`NR17jjKA#7KwH5 zC*$P`%91wbHOE+6bd)F5uuL?G-%-PQwfbP32;`LpD~Oz{q>qm;5MMb@K^S<2y`+XM zM(lt{B}DS6E{#v}za3`-_~PTtV*7tWZG*bhLkIn|V}EH5D3u4;A$aPV dBnSR8EU9DpH_k69E8S7YVEFAFb%7C;{{f`1TNMBR diff --git a/src/field.h b/src/field.h index 1f6ba74..cfa7626 100644 --- a/src/field.h +++ b/src/field.h @@ -13,7 +13,7 @@ * objects, which represent field elements (integers modulo 2^256 - 2^32 - 977). * * The actual definition of the secp256k1_fe type depends on the chosen field - * implementation; see the field_5x52.h and field_10x26.h files for details. + * implementation; see the field_5x52.h file for details. * * All secp256k1_fe objects have implicit properties that determine what * operations are permitted on it. These are purely a function of what @@ -39,8 +39,6 @@ #if defined(SECP256K1_WIDEMUL_INT128) #include "field_5x52.h" -#elif defined(SECP256K1_WIDEMUL_INT64) -#include "field_10x26.h" #else #error "Please select wide multiplication implementation" #endif diff --git a/src/field_10x26.h b/src/field_10x26.h deleted file mode 100644 index 203c101..0000000 --- a/src/field_10x26.h +++ /dev/null @@ -1,57 +0,0 @@ -/*********************************************************************** - * Copyright (c) 2013, 2014 Pieter Wuille * - * Distributed under the MIT software license, see the accompanying * - * file COPYING or https://www.opensource.org/licenses/mit-license.php.* - ***********************************************************************/ - -#ifndef SECP256K1_FIELD_REPR_H -#define SECP256K1_FIELD_REPR_H - -#include - -/** This field implementation represents the value as 10 uint32_t limbs in base - * 2^26. */ -typedef struct { - /* A field element f represents the sum(i=0..9, f.n[i] << (i*26)) mod p, - * where p is the field modulus, 2^256 - 2^32 - 977. - * - * The individual limbs f.n[i] can exceed 2^26; the field's magnitude roughly - * corresponds to how much excess is allowed. The value - * sum(i=0..9, f.n[i] << (i*26)) may exceed p, unless the field element is - * normalized. */ - uint32_t n[10]; - /* - * Magnitude m requires: - * n[i] <= 2 * m * (2^26 - 1) for i=0..8 - * n[9] <= 2 * m * (2^22 - 1) - * - * Normalized requires: - * n[i] <= (2^26 - 1) for i=0..8 - * sum(i=0..9, n[i] << (i*26)) < p - * (together these imply n[9] <= 2^22 - 1) - */ - SECP256K1_FE_VERIFY_FIELDS -} secp256k1_fe; - -/* Unpacks a constant into a overlapping multi-limbed FE element. */ -#define SECP256K1_FE_CONST_INNER(d7, d6, d5, d4, d3, d2, d1, d0) { \ - (d0) & 0x3FFFFFFUL, \ - (((uint32_t)d0) >> 26) | (((uint32_t)(d1) & 0xFFFFFUL) << 6), \ - (((uint32_t)d1) >> 20) | (((uint32_t)(d2) & 0x3FFFUL) << 12), \ - (((uint32_t)d2) >> 14) | (((uint32_t)(d3) & 0xFFUL) << 18), \ - (((uint32_t)d3) >> 8) | (((uint32_t)(d4) & 0x3UL) << 24), \ - (((uint32_t)d4) >> 2) & 0x3FFFFFFUL, \ - (((uint32_t)d4) >> 28) | (((uint32_t)(d5) & 0x3FFFFFUL) << 4), \ - (((uint32_t)d5) >> 22) | (((uint32_t)(d6) & 0xFFFFUL) << 10), \ - (((uint32_t)d6) >> 16) | (((uint32_t)(d7) & 0x3FFUL) << 16), \ - (((uint32_t)d7) >> 10) \ -} - -typedef struct { - uint32_t n[8]; -} secp256k1_fe_storage; - -#define SECP256K1_FE_STORAGE_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }} -#define SECP256K1_FE_STORAGE_CONST_GET(d) d.n[7], d.n[6], d.n[5], d.n[4],d.n[3], d.n[2], d.n[1], d.n[0] - -#endif /* SECP256K1_FIELD_REPR_H */ diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h deleted file mode 100644 index ea14c27..0000000 --- a/src/field_10x26_impl.h +++ /dev/null @@ -1,1232 +0,0 @@ -/*********************************************************************** - * Copyright (c) 2013, 2014 Pieter Wuille * - * Distributed under the MIT software license, see the accompanying * - * file COPYING or https://www.opensource.org/licenses/mit-license.php.* - ***********************************************************************/ - -#ifndef SECP256K1_FIELD_REPR_IMPL_H -#define SECP256K1_FIELD_REPR_IMPL_H - -#include "checkmem.h" -#include "util.h" -#include "field.h" -#include "modinv32_impl.h" - -#ifdef VERIFY -static void secp256k1_fe_impl_verify(const secp256k1_fe *a) { - const uint32_t *d = a->n; - int m = a->normalized ? 1 : 2 * a->magnitude; - VERIFY_CHECK(d[0] <= 0x3FFFFFFUL * m); - VERIFY_CHECK(d[1] <= 0x3FFFFFFUL * m); - VERIFY_CHECK(d[2] <= 0x3FFFFFFUL * m); - VERIFY_CHECK(d[3] <= 0x3FFFFFFUL * m); - VERIFY_CHECK(d[4] <= 0x3FFFFFFUL * m); - VERIFY_CHECK(d[5] <= 0x3FFFFFFUL * m); - VERIFY_CHECK(d[6] <= 0x3FFFFFFUL * m); - VERIFY_CHECK(d[7] <= 0x3FFFFFFUL * m); - VERIFY_CHECK(d[8] <= 0x3FFFFFFUL * m); - VERIFY_CHECK(d[9] <= 0x03FFFFFUL * m); - if (a->normalized) { - if (d[9] == 0x03FFFFFUL) { - uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2]; - if (mid == 0x3FFFFFFUL) { - VERIFY_CHECK((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL); - } - } - } -} -#endif - -static void secp256k1_fe_impl_get_bounds(secp256k1_fe *r, int m) { - r->n[0] = 0x3FFFFFFUL * 2 * m; - r->n[1] = 0x3FFFFFFUL * 2 * m; - r->n[2] = 0x3FFFFFFUL * 2 * m; - r->n[3] = 0x3FFFFFFUL * 2 * m; - r->n[4] = 0x3FFFFFFUL * 2 * m; - r->n[5] = 0x3FFFFFFUL * 2 * m; - r->n[6] = 0x3FFFFFFUL * 2 * m; - r->n[7] = 0x3FFFFFFUL * 2 * m; - r->n[8] = 0x3FFFFFFUL * 2 * m; - r->n[9] = 0x03FFFFFUL * 2 * m; -} - -static void secp256k1_fe_impl_normalize(secp256k1_fe *r) { - uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], - t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; - - /* Reduce t9 at the start so there will be at most a single carry from the first pass */ - uint32_t m; - uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; - - /* The first pass ensures the magnitude is 1, ... */ - t0 += x * 0x3D1UL; t1 += (x << 6); - t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; - t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; - t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2; - t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3; - t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4; - t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5; - t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6; - t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7; - t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8; - - /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ - VERIFY_CHECK(t9 >> 23 == 0); - - /* At most a single final reduction is needed; check if the value is >= the field characteristic */ - x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL) - & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); - - /* Apply the final reduction (for constant-time behaviour, we do it always) */ - t0 += x * 0x3D1UL; t1 += (x << 6); - t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; - t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; - t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; - t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; - t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; - t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; - t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; - t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; - t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; - - /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */ - VERIFY_CHECK(t9 >> 22 == x); - - /* Mask off the possible multiple of 2^256 from the final reduction */ - t9 &= 0x03FFFFFUL; - - r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; - r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; -} - -static void secp256k1_fe_impl_normalize_weak(secp256k1_fe *r) { - uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], - t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; - - /* Reduce t9 at the start so there will be at most a single carry from the first pass */ - uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; - - /* The first pass ensures the magnitude is 1, ... */ - t0 += x * 0x3D1UL; t1 += (x << 6); - t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; - t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; - t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; - t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; - t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; - t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; - t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; - t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; - t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; - - /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ - VERIFY_CHECK(t9 >> 23 == 0); - - r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; - r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; -} - -static void secp256k1_fe_impl_normalize_var(secp256k1_fe *r) { - uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], - t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; - - /* Reduce t9 at the start so there will be at most a single carry from the first pass */ - uint32_t m; - uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; - - /* The first pass ensures the magnitude is 1, ... */ - t0 += x * 0x3D1UL; t1 += (x << 6); - t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; - t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; - t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2; - t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3; - t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4; - t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5; - t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6; - t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7; - t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8; - - /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ - VERIFY_CHECK(t9 >> 23 == 0); - - /* At most a single final reduction is needed; check if the value is >= the field characteristic */ - x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL) - & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); - - if (x) { - t0 += 0x3D1UL; t1 += (x << 6); - t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; - t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; - t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; - t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; - t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; - t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; - t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; - t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; - t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; - - /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */ - VERIFY_CHECK(t9 >> 22 == x); - - /* Mask off the possible multiple of 2^256 from the final reduction */ - t9 &= 0x03FFFFFUL; - } - - r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; - r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; -} - -static int secp256k1_fe_impl_normalizes_to_zero(const secp256k1_fe *r) { - uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], - t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; - - /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ - uint32_t z0, z1; - - /* Reduce t9 at the start so there will be at most a single carry from the first pass */ - uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; - - /* The first pass ensures the magnitude is 1, ... */ - t0 += x * 0x3D1UL; t1 += (x << 6); - t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL; - t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL; - t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2; - t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3; - t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4; - t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5; - t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6; - t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7; - t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8; - z0 |= t9; z1 &= t9 ^ 0x3C00000UL; - - /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ - VERIFY_CHECK(t9 >> 23 == 0); - - return (z0 == 0) | (z1 == 0x3FFFFFFUL); -} - -static int secp256k1_fe_impl_normalizes_to_zero_var(const secp256k1_fe *r) { - uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; - uint32_t z0, z1; - uint32_t x; - - t0 = r->n[0]; - t9 = r->n[9]; - - /* Reduce t9 at the start so there will be at most a single carry from the first pass */ - x = t9 >> 22; - - /* The first pass ensures the magnitude is 1, ... */ - t0 += x * 0x3D1UL; - - /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ - z0 = t0 & 0x3FFFFFFUL; - z1 = z0 ^ 0x3D0UL; - - /* Fast return path should catch the majority of cases */ - if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) { - return 0; - } - - t1 = r->n[1]; - t2 = r->n[2]; - t3 = r->n[3]; - t4 = r->n[4]; - t5 = r->n[5]; - t6 = r->n[6]; - t7 = r->n[7]; - t8 = r->n[8]; - - t9 &= 0x03FFFFFUL; - t1 += (x << 6); - - t1 += (t0 >> 26); - t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL; - t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2; - t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3; - t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4; - t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5; - t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6; - t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7; - t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8; - z0 |= t9; z1 &= t9 ^ 0x3C00000UL; - - /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ - VERIFY_CHECK(t9 >> 23 == 0); - - return (z0 == 0) | (z1 == 0x3FFFFFFUL); -} - -SECP256K1_INLINE static void secp256k1_fe_impl_set_int(secp256k1_fe *r, int a) { - r->n[0] = a; - r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0; -} - -SECP256K1_INLINE static int secp256k1_fe_impl_is_zero(const secp256k1_fe *a) { - const uint32_t *t = a->n; - return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0; -} - -SECP256K1_INLINE static int secp256k1_fe_impl_is_odd(const secp256k1_fe *a) { - return a->n[0] & 1; -} - -static int secp256k1_fe_impl_cmp_var(const secp256k1_fe *a, const secp256k1_fe *b) { - int i; - for (i = 9; i >= 0; i--) { - if (a->n[i] > b->n[i]) { - return 1; - } - if (a->n[i] < b->n[i]) { - return -1; - } - } - return 0; -} - -static void secp256k1_fe_impl_set_b32_mod(secp256k1_fe *r, const unsigned char *a) { - r->n[0] = (uint32_t)a[31] | ((uint32_t)a[30] << 8) | ((uint32_t)a[29] << 16) | ((uint32_t)(a[28] & 0x3) << 24); - r->n[1] = (uint32_t)((a[28] >> 2) & 0x3f) | ((uint32_t)a[27] << 6) | ((uint32_t)a[26] << 14) | ((uint32_t)(a[25] & 0xf) << 22); - r->n[2] = (uint32_t)((a[25] >> 4) & 0xf) | ((uint32_t)a[24] << 4) | ((uint32_t)a[23] << 12) | ((uint32_t)(a[22] & 0x3f) << 20); - r->n[3] = (uint32_t)((a[22] >> 6) & 0x3) | ((uint32_t)a[21] << 2) | ((uint32_t)a[20] << 10) | ((uint32_t)a[19] << 18); - r->n[4] = (uint32_t)a[18] | ((uint32_t)a[17] << 8) | ((uint32_t)a[16] << 16) | ((uint32_t)(a[15] & 0x3) << 24); - r->n[5] = (uint32_t)((a[15] >> 2) & 0x3f) | ((uint32_t)a[14] << 6) | ((uint32_t)a[13] << 14) | ((uint32_t)(a[12] & 0xf) << 22); - r->n[6] = (uint32_t)((a[12] >> 4) & 0xf) | ((uint32_t)a[11] << 4) | ((uint32_t)a[10] << 12) | ((uint32_t)(a[9] & 0x3f) << 20); - r->n[7] = (uint32_t)((a[9] >> 6) & 0x3) | ((uint32_t)a[8] << 2) | ((uint32_t)a[7] << 10) | ((uint32_t)a[6] << 18); - r->n[8] = (uint32_t)a[5] | ((uint32_t)a[4] << 8) | ((uint32_t)a[3] << 16) | ((uint32_t)(a[2] & 0x3) << 24); - r->n[9] = (uint32_t)((a[2] >> 2) & 0x3f) | ((uint32_t)a[1] << 6) | ((uint32_t)a[0] << 14); -} - -static int secp256k1_fe_impl_set_b32_limit(secp256k1_fe *r, const unsigned char *a) { - secp256k1_fe_impl_set_b32_mod(r, a); - return !((r->n[9] == 0x3FFFFFUL) & ((r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL) & ((r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); -} - -/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */ -static void secp256k1_fe_impl_get_b32(unsigned char *r, const secp256k1_fe *a) { - r[0] = (a->n[9] >> 14) & 0xff; - r[1] = (a->n[9] >> 6) & 0xff; - r[2] = ((a->n[9] & 0x3F) << 2) | ((a->n[8] >> 24) & 0x3); - r[3] = (a->n[8] >> 16) & 0xff; - r[4] = (a->n[8] >> 8) & 0xff; - r[5] = a->n[8] & 0xff; - r[6] = (a->n[7] >> 18) & 0xff; - r[7] = (a->n[7] >> 10) & 0xff; - r[8] = (a->n[7] >> 2) & 0xff; - r[9] = ((a->n[7] & 0x3) << 6) | ((a->n[6] >> 20) & 0x3f); - r[10] = (a->n[6] >> 12) & 0xff; - r[11] = (a->n[6] >> 4) & 0xff; - r[12] = ((a->n[6] & 0xf) << 4) | ((a->n[5] >> 22) & 0xf); - r[13] = (a->n[5] >> 14) & 0xff; - r[14] = (a->n[5] >> 6) & 0xff; - r[15] = ((a->n[5] & 0x3f) << 2) | ((a->n[4] >> 24) & 0x3); - r[16] = (a->n[4] >> 16) & 0xff; - r[17] = (a->n[4] >> 8) & 0xff; - r[18] = a->n[4] & 0xff; - r[19] = (a->n[3] >> 18) & 0xff; - r[20] = (a->n[3] >> 10) & 0xff; - r[21] = (a->n[3] >> 2) & 0xff; - r[22] = ((a->n[3] & 0x3) << 6) | ((a->n[2] >> 20) & 0x3f); - r[23] = (a->n[2] >> 12) & 0xff; - r[24] = (a->n[2] >> 4) & 0xff; - r[25] = ((a->n[2] & 0xf) << 4) | ((a->n[1] >> 22) & 0xf); - r[26] = (a->n[1] >> 14) & 0xff; - r[27] = (a->n[1] >> 6) & 0xff; - r[28] = ((a->n[1] & 0x3f) << 2) | ((a->n[0] >> 24) & 0x3); - r[29] = (a->n[0] >> 16) & 0xff; - r[30] = (a->n[0] >> 8) & 0xff; - r[31] = a->n[0] & 0xff; -} - -SECP256K1_INLINE static void secp256k1_fe_impl_negate_unchecked(secp256k1_fe *r, const secp256k1_fe *a, int m) { - /* For all legal values of m (0..31), the following properties hold: */ - VERIFY_CHECK(0x3FFFC2FUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m); - VERIFY_CHECK(0x3FFFFBFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m); - VERIFY_CHECK(0x3FFFFFFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m); - VERIFY_CHECK(0x03FFFFFUL * 2 * (m + 1) >= 0x03FFFFFUL * 2 * m); - - /* Due to the properties above, the left hand in the subtractions below is never less than - * the right hand. */ - r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0]; - r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1]; - r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2]; - r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3]; - r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4]; - r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5]; - r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6]; - r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7]; - r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8]; - r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9]; -} - -SECP256K1_INLINE static void secp256k1_fe_impl_mul_int_unchecked(secp256k1_fe *r, int a) { - r->n[0] *= a; - r->n[1] *= a; - r->n[2] *= a; - r->n[3] *= a; - r->n[4] *= a; - r->n[5] *= a; - r->n[6] *= a; - r->n[7] *= a; - r->n[8] *= a; - r->n[9] *= a; -} - -SECP256K1_INLINE static void secp256k1_fe_impl_add(secp256k1_fe *r, const secp256k1_fe *a) { - r->n[0] += a->n[0]; - r->n[1] += a->n[1]; - r->n[2] += a->n[2]; - r->n[3] += a->n[3]; - r->n[4] += a->n[4]; - r->n[5] += a->n[5]; - r->n[6] += a->n[6]; - r->n[7] += a->n[7]; - r->n[8] += a->n[8]; - r->n[9] += a->n[9]; -} - -SECP256K1_INLINE static void secp256k1_fe_impl_add_int(secp256k1_fe *r, int a) { - r->n[0] += a; -} - -#if defined(USE_EXTERNAL_ASM) - -/* External assembler implementation */ -void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b); -void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a); - -#else - -#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0) - -SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) { - uint64_t c, d; - uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; - uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7; - const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; - - VERIFY_BITS(a[0], 30); - VERIFY_BITS(a[1], 30); - VERIFY_BITS(a[2], 30); - VERIFY_BITS(a[3], 30); - VERIFY_BITS(a[4], 30); - VERIFY_BITS(a[5], 30); - VERIFY_BITS(a[6], 30); - VERIFY_BITS(a[7], 30); - VERIFY_BITS(a[8], 30); - VERIFY_BITS(a[9], 26); - VERIFY_BITS(b[0], 30); - VERIFY_BITS(b[1], 30); - VERIFY_BITS(b[2], 30); - VERIFY_BITS(b[3], 30); - VERIFY_BITS(b[4], 30); - VERIFY_BITS(b[5], 30); - VERIFY_BITS(b[6], 30); - VERIFY_BITS(b[7], 30); - VERIFY_BITS(b[8], 30); - VERIFY_BITS(b[9], 26); - - /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. - * for 0 <= x <= 9, px is a shorthand for sum(a[i]*b[x-i], i=0..x). - * for 9 <= x <= 18, px is a shorthand for sum(a[i]*b[x-i], i=(x-9)..9) - * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. - */ - - d = (uint64_t)a[0] * b[9] - + (uint64_t)a[1] * b[8] - + (uint64_t)a[2] * b[7] - + (uint64_t)a[3] * b[6] - + (uint64_t)a[4] * b[5] - + (uint64_t)a[5] * b[4] - + (uint64_t)a[6] * b[3] - + (uint64_t)a[7] * b[2] - + (uint64_t)a[8] * b[1] - + (uint64_t)a[9] * b[0]; - /* VERIFY_BITS(d, 64); */ - /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ - t9 = d & M; d >>= 26; - VERIFY_BITS(t9, 26); - VERIFY_BITS(d, 38); - /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ - - c = (uint64_t)a[0] * b[0]; - VERIFY_BITS(c, 60); - /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */ - d += (uint64_t)a[1] * b[9] - + (uint64_t)a[2] * b[8] - + (uint64_t)a[3] * b[7] - + (uint64_t)a[4] * b[6] - + (uint64_t)a[5] * b[5] - + (uint64_t)a[6] * b[4] - + (uint64_t)a[7] * b[3] - + (uint64_t)a[8] * b[2] - + (uint64_t)a[9] * b[1]; - VERIFY_BITS(d, 63); - /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ - u0 = d & M; d >>= 26; c += u0 * R0; - VERIFY_BITS(u0, 26); - VERIFY_BITS(d, 37); - VERIFY_BITS(c, 61); - /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ - t0 = c & M; c >>= 26; c += u0 * R1; - VERIFY_BITS(t0, 26); - VERIFY_BITS(c, 37); - /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ - /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ - - c += (uint64_t)a[0] * b[1] - + (uint64_t)a[1] * b[0]; - VERIFY_BITS(c, 62); - /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */ - d += (uint64_t)a[2] * b[9] - + (uint64_t)a[3] * b[8] - + (uint64_t)a[4] * b[7] - + (uint64_t)a[5] * b[6] - + (uint64_t)a[6] * b[5] - + (uint64_t)a[7] * b[4] - + (uint64_t)a[8] * b[3] - + (uint64_t)a[9] * b[2]; - VERIFY_BITS(d, 63); - /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ - u1 = d & M; d >>= 26; c += u1 * R0; - VERIFY_BITS(u1, 26); - VERIFY_BITS(d, 37); - VERIFY_BITS(c, 63); - /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ - t1 = c & M; c >>= 26; c += u1 * R1; - VERIFY_BITS(t1, 26); - VERIFY_BITS(c, 38); - /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ - /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ - - c += (uint64_t)a[0] * b[2] - + (uint64_t)a[1] * b[1] - + (uint64_t)a[2] * b[0]; - VERIFY_BITS(c, 62); - /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ - d += (uint64_t)a[3] * b[9] - + (uint64_t)a[4] * b[8] - + (uint64_t)a[5] * b[7] - + (uint64_t)a[6] * b[6] - + (uint64_t)a[7] * b[5] - + (uint64_t)a[8] * b[4] - + (uint64_t)a[9] * b[3]; - VERIFY_BITS(d, 63); - /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ - u2 = d & M; d >>= 26; c += u2 * R0; - VERIFY_BITS(u2, 26); - VERIFY_BITS(d, 37); - VERIFY_BITS(c, 63); - /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ - t2 = c & M; c >>= 26; c += u2 * R1; - VERIFY_BITS(t2, 26); - VERIFY_BITS(c, 38); - /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ - /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ - - c += (uint64_t)a[0] * b[3] - + (uint64_t)a[1] * b[2] - + (uint64_t)a[2] * b[1] - + (uint64_t)a[3] * b[0]; - VERIFY_BITS(c, 63); - /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ - d += (uint64_t)a[4] * b[9] - + (uint64_t)a[5] * b[8] - + (uint64_t)a[6] * b[7] - + (uint64_t)a[7] * b[6] - + (uint64_t)a[8] * b[5] - + (uint64_t)a[9] * b[4]; - VERIFY_BITS(d, 63); - /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ - u3 = d & M; d >>= 26; c += u3 * R0; - VERIFY_BITS(u3, 26); - VERIFY_BITS(d, 37); - /* VERIFY_BITS(c, 64); */ - /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ - t3 = c & M; c >>= 26; c += u3 * R1; - VERIFY_BITS(t3, 26); - VERIFY_BITS(c, 39); - /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ - /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ - - c += (uint64_t)a[0] * b[4] - + (uint64_t)a[1] * b[3] - + (uint64_t)a[2] * b[2] - + (uint64_t)a[3] * b[1] - + (uint64_t)a[4] * b[0]; - VERIFY_BITS(c, 63); - /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ - d += (uint64_t)a[5] * b[9] - + (uint64_t)a[6] * b[8] - + (uint64_t)a[7] * b[7] - + (uint64_t)a[8] * b[6] - + (uint64_t)a[9] * b[5]; - VERIFY_BITS(d, 62); - /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ - u4 = d & M; d >>= 26; c += u4 * R0; - VERIFY_BITS(u4, 26); - VERIFY_BITS(d, 36); - /* VERIFY_BITS(c, 64); */ - /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ - t4 = c & M; c >>= 26; c += u4 * R1; - VERIFY_BITS(t4, 26); - VERIFY_BITS(c, 39); - /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ - /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ - - c += (uint64_t)a[0] * b[5] - + (uint64_t)a[1] * b[4] - + (uint64_t)a[2] * b[3] - + (uint64_t)a[3] * b[2] - + (uint64_t)a[4] * b[1] - + (uint64_t)a[5] * b[0]; - VERIFY_BITS(c, 63); - /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ - d += (uint64_t)a[6] * b[9] - + (uint64_t)a[7] * b[8] - + (uint64_t)a[8] * b[7] - + (uint64_t)a[9] * b[6]; - VERIFY_BITS(d, 62); - /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ - u5 = d & M; d >>= 26; c += u5 * R0; - VERIFY_BITS(u5, 26); - VERIFY_BITS(d, 36); - /* VERIFY_BITS(c, 64); */ - /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ - t5 = c & M; c >>= 26; c += u5 * R1; - VERIFY_BITS(t5, 26); - VERIFY_BITS(c, 39); - /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ - /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ - - c += (uint64_t)a[0] * b[6] - + (uint64_t)a[1] * b[5] - + (uint64_t)a[2] * b[4] - + (uint64_t)a[3] * b[3] - + (uint64_t)a[4] * b[2] - + (uint64_t)a[5] * b[1] - + (uint64_t)a[6] * b[0]; - VERIFY_BITS(c, 63); - /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ - d += (uint64_t)a[7] * b[9] - + (uint64_t)a[8] * b[8] - + (uint64_t)a[9] * b[7]; - VERIFY_BITS(d, 61); - /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ - u6 = d & M; d >>= 26; c += u6 * R0; - VERIFY_BITS(u6, 26); - VERIFY_BITS(d, 35); - /* VERIFY_BITS(c, 64); */ - /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ - t6 = c & M; c >>= 26; c += u6 * R1; - VERIFY_BITS(t6, 26); - VERIFY_BITS(c, 39); - /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ - /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ - - c += (uint64_t)a[0] * b[7] - + (uint64_t)a[1] * b[6] - + (uint64_t)a[2] * b[5] - + (uint64_t)a[3] * b[4] - + (uint64_t)a[4] * b[3] - + (uint64_t)a[5] * b[2] - + (uint64_t)a[6] * b[1] - + (uint64_t)a[7] * b[0]; - /* VERIFY_BITS(c, 64); */ - VERIFY_CHECK(c <= 0x8000007C00000007ULL); - /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ - d += (uint64_t)a[8] * b[9] - + (uint64_t)a[9] * b[8]; - VERIFY_BITS(d, 58); - /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ - u7 = d & M; d >>= 26; c += u7 * R0; - VERIFY_BITS(u7, 26); - VERIFY_BITS(d, 32); - /* VERIFY_BITS(c, 64); */ - VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); - /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ - t7 = c & M; c >>= 26; c += u7 * R1; - VERIFY_BITS(t7, 26); - VERIFY_BITS(c, 38); - /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ - /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ - - c += (uint64_t)a[0] * b[8] - + (uint64_t)a[1] * b[7] - + (uint64_t)a[2] * b[6] - + (uint64_t)a[3] * b[5] - + (uint64_t)a[4] * b[4] - + (uint64_t)a[5] * b[3] - + (uint64_t)a[6] * b[2] - + (uint64_t)a[7] * b[1] - + (uint64_t)a[8] * b[0]; - /* VERIFY_BITS(c, 64); */ - VERIFY_CHECK(c <= 0x9000007B80000008ULL); - /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - d += (uint64_t)a[9] * b[9]; - VERIFY_BITS(d, 57); - /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - u8 = d & M; d >>= 26; c += u8 * R0; - VERIFY_BITS(u8, 26); - VERIFY_BITS(d, 31); - /* VERIFY_BITS(c, 64); */ - VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL); - /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - - r[3] = t3; - VERIFY_BITS(r[3], 26); - /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[4] = t4; - VERIFY_BITS(r[4], 26); - /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[5] = t5; - VERIFY_BITS(r[5], 26); - /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[6] = t6; - VERIFY_BITS(r[6], 26); - /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[7] = t7; - VERIFY_BITS(r[7], 26); - /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - - r[8] = c & M; c >>= 26; c += u8 * R1; - VERIFY_BITS(r[8], 26); - VERIFY_BITS(c, 39); - /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - c += d * R0 + t9; - VERIFY_BITS(c, 45); - /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4); - VERIFY_BITS(r[9], 22); - VERIFY_BITS(c, 46); - /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - - d = c * (R0 >> 4) + t0; - VERIFY_BITS(d, 56); - /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[0] = d & M; d >>= 26; - VERIFY_BITS(r[0], 26); - VERIFY_BITS(d, 30); - /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - d += c * (R1 >> 4) + t1; - VERIFY_BITS(d, 53); - VERIFY_CHECK(d <= 0x10000003FFFFBFULL); - /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[1] = d & M; d >>= 26; - VERIFY_BITS(r[1], 26); - VERIFY_BITS(d, 27); - VERIFY_CHECK(d <= 0x4000000ULL); - /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - d += t2; - VERIFY_BITS(d, 27); - /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[2] = d; - VERIFY_BITS(r[2], 27); - /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ -} - -SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) { - uint64_t c, d; - uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; - uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7; - const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; - - VERIFY_BITS(a[0], 30); - VERIFY_BITS(a[1], 30); - VERIFY_BITS(a[2], 30); - VERIFY_BITS(a[3], 30); - VERIFY_BITS(a[4], 30); - VERIFY_BITS(a[5], 30); - VERIFY_BITS(a[6], 30); - VERIFY_BITS(a[7], 30); - VERIFY_BITS(a[8], 30); - VERIFY_BITS(a[9], 26); - - /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. - * px is a shorthand for sum(a[i]*a[x-i], i=0..x). - * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. - */ - - d = (uint64_t)(a[0]*2) * a[9] - + (uint64_t)(a[1]*2) * a[8] - + (uint64_t)(a[2]*2) * a[7] - + (uint64_t)(a[3]*2) * a[6] - + (uint64_t)(a[4]*2) * a[5]; - /* VERIFY_BITS(d, 64); */ - /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ - t9 = d & M; d >>= 26; - VERIFY_BITS(t9, 26); - VERIFY_BITS(d, 38); - /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ - - c = (uint64_t)a[0] * a[0]; - VERIFY_BITS(c, 60); - /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */ - d += (uint64_t)(a[1]*2) * a[9] - + (uint64_t)(a[2]*2) * a[8] - + (uint64_t)(a[3]*2) * a[7] - + (uint64_t)(a[4]*2) * a[6] - + (uint64_t)a[5] * a[5]; - VERIFY_BITS(d, 63); - /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ - u0 = d & M; d >>= 26; c += u0 * R0; - VERIFY_BITS(u0, 26); - VERIFY_BITS(d, 37); - VERIFY_BITS(c, 61); - /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ - t0 = c & M; c >>= 26; c += u0 * R1; - VERIFY_BITS(t0, 26); - VERIFY_BITS(c, 37); - /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ - /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ - - c += (uint64_t)(a[0]*2) * a[1]; - VERIFY_BITS(c, 62); - /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */ - d += (uint64_t)(a[2]*2) * a[9] - + (uint64_t)(a[3]*2) * a[8] - + (uint64_t)(a[4]*2) * a[7] - + (uint64_t)(a[5]*2) * a[6]; - VERIFY_BITS(d, 63); - /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ - u1 = d & M; d >>= 26; c += u1 * R0; - VERIFY_BITS(u1, 26); - VERIFY_BITS(d, 37); - VERIFY_BITS(c, 63); - /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ - t1 = c & M; c >>= 26; c += u1 * R1; - VERIFY_BITS(t1, 26); - VERIFY_BITS(c, 38); - /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ - /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ - - c += (uint64_t)(a[0]*2) * a[2] - + (uint64_t)a[1] * a[1]; - VERIFY_BITS(c, 62); - /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ - d += (uint64_t)(a[3]*2) * a[9] - + (uint64_t)(a[4]*2) * a[8] - + (uint64_t)(a[5]*2) * a[7] - + (uint64_t)a[6] * a[6]; - VERIFY_BITS(d, 63); - /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ - u2 = d & M; d >>= 26; c += u2 * R0; - VERIFY_BITS(u2, 26); - VERIFY_BITS(d, 37); - VERIFY_BITS(c, 63); - /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ - t2 = c & M; c >>= 26; c += u2 * R1; - VERIFY_BITS(t2, 26); - VERIFY_BITS(c, 38); - /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ - /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ - - c += (uint64_t)(a[0]*2) * a[3] - + (uint64_t)(a[1]*2) * a[2]; - VERIFY_BITS(c, 63); - /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ - d += (uint64_t)(a[4]*2) * a[9] - + (uint64_t)(a[5]*2) * a[8] - + (uint64_t)(a[6]*2) * a[7]; - VERIFY_BITS(d, 63); - /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ - u3 = d & M; d >>= 26; c += u3 * R0; - VERIFY_BITS(u3, 26); - VERIFY_BITS(d, 37); - /* VERIFY_BITS(c, 64); */ - /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ - t3 = c & M; c >>= 26; c += u3 * R1; - VERIFY_BITS(t3, 26); - VERIFY_BITS(c, 39); - /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ - /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ - - c += (uint64_t)(a[0]*2) * a[4] - + (uint64_t)(a[1]*2) * a[3] - + (uint64_t)a[2] * a[2]; - VERIFY_BITS(c, 63); - /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ - d += (uint64_t)(a[5]*2) * a[9] - + (uint64_t)(a[6]*2) * a[8] - + (uint64_t)a[7] * a[7]; - VERIFY_BITS(d, 62); - /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ - u4 = d & M; d >>= 26; c += u4 * R0; - VERIFY_BITS(u4, 26); - VERIFY_BITS(d, 36); - /* VERIFY_BITS(c, 64); */ - /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ - t4 = c & M; c >>= 26; c += u4 * R1; - VERIFY_BITS(t4, 26); - VERIFY_BITS(c, 39); - /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ - /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ - - c += (uint64_t)(a[0]*2) * a[5] - + (uint64_t)(a[1]*2) * a[4] - + (uint64_t)(a[2]*2) * a[3]; - VERIFY_BITS(c, 63); - /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ - d += (uint64_t)(a[6]*2) * a[9] - + (uint64_t)(a[7]*2) * a[8]; - VERIFY_BITS(d, 62); - /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ - u5 = d & M; d >>= 26; c += u5 * R0; - VERIFY_BITS(u5, 26); - VERIFY_BITS(d, 36); - /* VERIFY_BITS(c, 64); */ - /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ - t5 = c & M; c >>= 26; c += u5 * R1; - VERIFY_BITS(t5, 26); - VERIFY_BITS(c, 39); - /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ - /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ - - c += (uint64_t)(a[0]*2) * a[6] - + (uint64_t)(a[1]*2) * a[5] - + (uint64_t)(a[2]*2) * a[4] - + (uint64_t)a[3] * a[3]; - VERIFY_BITS(c, 63); - /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ - d += (uint64_t)(a[7]*2) * a[9] - + (uint64_t)a[8] * a[8]; - VERIFY_BITS(d, 61); - /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ - u6 = d & M; d >>= 26; c += u6 * R0; - VERIFY_BITS(u6, 26); - VERIFY_BITS(d, 35); - /* VERIFY_BITS(c, 64); */ - /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ - t6 = c & M; c >>= 26; c += u6 * R1; - VERIFY_BITS(t6, 26); - VERIFY_BITS(c, 39); - /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ - /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ - - c += (uint64_t)(a[0]*2) * a[7] - + (uint64_t)(a[1]*2) * a[6] - + (uint64_t)(a[2]*2) * a[5] - + (uint64_t)(a[3]*2) * a[4]; - /* VERIFY_BITS(c, 64); */ - VERIFY_CHECK(c <= 0x8000007C00000007ULL); - /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ - d += (uint64_t)(a[8]*2) * a[9]; - VERIFY_BITS(d, 58); - /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ - u7 = d & M; d >>= 26; c += u7 * R0; - VERIFY_BITS(u7, 26); - VERIFY_BITS(d, 32); - /* VERIFY_BITS(c, 64); */ - VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); - /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ - t7 = c & M; c >>= 26; c += u7 * R1; - VERIFY_BITS(t7, 26); - VERIFY_BITS(c, 38); - /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ - /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ - - c += (uint64_t)(a[0]*2) * a[8] - + (uint64_t)(a[1]*2) * a[7] - + (uint64_t)(a[2]*2) * a[6] - + (uint64_t)(a[3]*2) * a[5] - + (uint64_t)a[4] * a[4]; - /* VERIFY_BITS(c, 64); */ - VERIFY_CHECK(c <= 0x9000007B80000008ULL); - /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - d += (uint64_t)a[9] * a[9]; - VERIFY_BITS(d, 57); - /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - u8 = d & M; d >>= 26; c += u8 * R0; - VERIFY_BITS(u8, 26); - VERIFY_BITS(d, 31); - /* VERIFY_BITS(c, 64); */ - VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL); - /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - - r[3] = t3; - VERIFY_BITS(r[3], 26); - /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[4] = t4; - VERIFY_BITS(r[4], 26); - /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[5] = t5; - VERIFY_BITS(r[5], 26); - /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[6] = t6; - VERIFY_BITS(r[6], 26); - /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[7] = t7; - VERIFY_BITS(r[7], 26); - /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - - r[8] = c & M; c >>= 26; c += u8 * R1; - VERIFY_BITS(r[8], 26); - VERIFY_BITS(c, 39); - /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - c += d * R0 + t9; - VERIFY_BITS(c, 45); - /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4); - VERIFY_BITS(r[9], 22); - VERIFY_BITS(c, 46); - /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - - d = c * (R0 >> 4) + t0; - VERIFY_BITS(d, 56); - /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[0] = d & M; d >>= 26; - VERIFY_BITS(r[0], 26); - VERIFY_BITS(d, 30); - /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - d += c * (R1 >> 4) + t1; - VERIFY_BITS(d, 53); - VERIFY_CHECK(d <= 0x10000003FFFFBFULL); - /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[1] = d & M; d >>= 26; - VERIFY_BITS(r[1], 26); - VERIFY_BITS(d, 27); - VERIFY_CHECK(d <= 0x4000000ULL); - /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - d += t2; - VERIFY_BITS(d, 27); - /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[2] = d; - VERIFY_BITS(r[2], 27); - /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ -} -#endif - -SECP256K1_INLINE static void secp256k1_fe_impl_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp256k1_fe * SECP256K1_RESTRICT b) { - secp256k1_fe_mul_inner(r->n, a->n, b->n); -} - -SECP256K1_INLINE static void secp256k1_fe_impl_sqr(secp256k1_fe *r, const secp256k1_fe *a) { - secp256k1_fe_sqr_inner(r->n, a->n); -} - -SECP256K1_INLINE static void secp256k1_fe_impl_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag) { - uint32_t mask0, mask1; - volatile int vflag = flag; - SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n)); - mask0 = vflag + ~((uint32_t)0); - mask1 = ~mask0; - r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); - r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); - r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); - r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); - r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1); - r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1); - r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1); - r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1); - r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1); - r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1); -} - -static SECP256K1_INLINE void secp256k1_fe_impl_half(secp256k1_fe *r) { - uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], - t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; - uint32_t one = (uint32_t)1; - uint32_t mask = -(t0 & one) >> 6; - - /* Bounds analysis (over the rationals). - * - * Let m = r->magnitude - * C = 0x3FFFFFFUL * 2 - * D = 0x03FFFFFUL * 2 - * - * Initial bounds: t0..t8 <= C * m - * t9 <= D * m - */ - - t0 += 0x3FFFC2FUL & mask; - t1 += 0x3FFFFBFUL & mask; - t2 += mask; - t3 += mask; - t4 += mask; - t5 += mask; - t6 += mask; - t7 += mask; - t8 += mask; - t9 += mask >> 4; - - VERIFY_CHECK((t0 & one) == 0); - - /* t0..t8: added <= C/2 - * t9: added <= D/2 - * - * Current bounds: t0..t8 <= C * (m + 1/2) - * t9 <= D * (m + 1/2) - */ - - r->n[0] = (t0 >> 1) + ((t1 & one) << 25); - r->n[1] = (t1 >> 1) + ((t2 & one) << 25); - r->n[2] = (t2 >> 1) + ((t3 & one) << 25); - r->n[3] = (t3 >> 1) + ((t4 & one) << 25); - r->n[4] = (t4 >> 1) + ((t5 & one) << 25); - r->n[5] = (t5 >> 1) + ((t6 & one) << 25); - r->n[6] = (t6 >> 1) + ((t7 & one) << 25); - r->n[7] = (t7 >> 1) + ((t8 & one) << 25); - r->n[8] = (t8 >> 1) + ((t9 & one) << 25); - r->n[9] = (t9 >> 1); - - /* t0..t8: shifted right and added <= C/4 + 1/2 - * t9: shifted right - * - * Current bounds: t0..t8 <= C * (m/2 + 1/2) - * t9 <= D * (m/2 + 1/4) - * - * Therefore the output magnitude (M) has to be set such that: - * t0..t8: C * M >= C * (m/2 + 1/2) - * t9: D * M >= D * (m/2 + 1/4) - * - * It suffices for all limbs that, for any input magnitude m: - * M >= m/2 + 1/2 - * - * and since we want the smallest such integer value for M: - * M == floor(m/2) + 1 - */ -} - -static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) { - uint32_t mask0, mask1; - volatile int vflag = flag; - SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n)); - mask0 = vflag + ~((uint32_t)0); - mask1 = ~mask0; - r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); - r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); - r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); - r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); - r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1); - r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1); - r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1); - r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1); -} - -static void secp256k1_fe_impl_to_storage(secp256k1_fe_storage *r, const secp256k1_fe *a) { - r->n[0] = a->n[0] | a->n[1] << 26; - r->n[1] = a->n[1] >> 6 | a->n[2] << 20; - r->n[2] = a->n[2] >> 12 | a->n[3] << 14; - r->n[3] = a->n[3] >> 18 | a->n[4] << 8; - r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28; - r->n[5] = a->n[6] >> 4 | a->n[7] << 22; - r->n[6] = a->n[7] >> 10 | a->n[8] << 16; - r->n[7] = a->n[8] >> 16 | a->n[9] << 10; -} - -static SECP256K1_INLINE void secp256k1_fe_impl_from_storage(secp256k1_fe *r, const secp256k1_fe_storage *a) { - r->n[0] = a->n[0] & 0x3FFFFFFUL; - r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL); - r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL); - r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL); - r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL); - r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL; - r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL); - r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL); - r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL); - r->n[9] = a->n[7] >> 10; -} - -static void secp256k1_fe_from_signed30(secp256k1_fe *r, const secp256k1_modinv32_signed30 *a) { - const uint32_t M26 = UINT32_MAX >> 6; - const uint32_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4], - a5 = a->v[5], a6 = a->v[6], a7 = a->v[7], a8 = a->v[8]; - - /* The output from secp256k1_modinv32{_var} should be normalized to range [0,modulus), and - * have limbs in [0,2^30). The modulus is < 2^256, so the top limb must be below 2^(256-30*8). - */ - VERIFY_CHECK(a0 >> 30 == 0); - VERIFY_CHECK(a1 >> 30 == 0); - VERIFY_CHECK(a2 >> 30 == 0); - VERIFY_CHECK(a3 >> 30 == 0); - VERIFY_CHECK(a4 >> 30 == 0); - VERIFY_CHECK(a5 >> 30 == 0); - VERIFY_CHECK(a6 >> 30 == 0); - VERIFY_CHECK(a7 >> 30 == 0); - VERIFY_CHECK(a8 >> 16 == 0); - - r->n[0] = a0 & M26; - r->n[1] = (a0 >> 26 | a1 << 4) & M26; - r->n[2] = (a1 >> 22 | a2 << 8) & M26; - r->n[3] = (a2 >> 18 | a3 << 12) & M26; - r->n[4] = (a3 >> 14 | a4 << 16) & M26; - r->n[5] = (a4 >> 10 | a5 << 20) & M26; - r->n[6] = (a5 >> 6 | a6 << 24) & M26; - r->n[7] = (a6 >> 2 ) & M26; - r->n[8] = (a6 >> 28 | a7 << 2) & M26; - r->n[9] = (a7 >> 24 | a8 << 6); -} - -static void secp256k1_fe_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_fe *a) { - const uint32_t M30 = UINT32_MAX >> 2; - const uint64_t a0 = a->n[0], a1 = a->n[1], a2 = a->n[2], a3 = a->n[3], a4 = a->n[4], - a5 = a->n[5], a6 = a->n[6], a7 = a->n[7], a8 = a->n[8], a9 = a->n[9]; - - r->v[0] = (a0 | a1 << 26) & M30; - r->v[1] = (a1 >> 4 | a2 << 22) & M30; - r->v[2] = (a2 >> 8 | a3 << 18) & M30; - r->v[3] = (a3 >> 12 | a4 << 14) & M30; - r->v[4] = (a4 >> 16 | a5 << 10) & M30; - r->v[5] = (a5 >> 20 | a6 << 6) & M30; - r->v[6] = (a6 >> 24 | a7 << 2 - | a8 << 28) & M30; - r->v[7] = (a8 >> 2 | a9 << 24) & M30; - r->v[8] = a9 >> 6; -} - -static const secp256k1_modinv32_modinfo secp256k1_const_modinfo_fe = { - {{-0x3D1, -4, 0, 0, 0, 0, 0, 0, 65536}}, - 0x2DDACACFL -}; - -static void secp256k1_fe_impl_inv(secp256k1_fe *r, const secp256k1_fe *x) { - secp256k1_fe tmp = *x; - secp256k1_modinv32_signed30 s; - - secp256k1_fe_normalize(&tmp); - secp256k1_fe_to_signed30(&s, &tmp); - secp256k1_modinv32(&s, &secp256k1_const_modinfo_fe); - secp256k1_fe_from_signed30(r, &s); -} - -static void secp256k1_fe_impl_inv_var(secp256k1_fe *r, const secp256k1_fe *x) { - secp256k1_fe tmp = *x; - secp256k1_modinv32_signed30 s; - - secp256k1_fe_normalize_var(&tmp); - secp256k1_fe_to_signed30(&s, &tmp); - secp256k1_modinv32_var(&s, &secp256k1_const_modinfo_fe); - secp256k1_fe_from_signed30(r, &s); -} - -static int secp256k1_fe_impl_is_square_var(const secp256k1_fe *x) { - secp256k1_fe tmp; - secp256k1_modinv32_signed30 s; - int jac, ret; - - tmp = *x; - secp256k1_fe_normalize_var(&tmp); - /* secp256k1_jacobi32_maybe_var cannot deal with input 0. */ - if (secp256k1_fe_is_zero(&tmp)) return 1; - secp256k1_fe_to_signed30(&s, &tmp); - jac = secp256k1_jacobi32_maybe_var(&s, &secp256k1_const_modinfo_fe); - if (jac == 0) { - /* secp256k1_jacobi32_maybe_var failed to compute the Jacobi symbol. Fall back - * to computing a square root. This should be extremely rare with random - * input (except in VERIFY mode, where a lower iteration count is used). */ - secp256k1_fe dummy; - ret = secp256k1_fe_sqrt(&dummy, &tmp); - } else { - ret = jac >= 0; - } - return ret; -} - -#endif /* SECP256K1_FIELD_REPR_IMPL_H */ diff --git a/src/field_impl.h b/src/field_impl.h index 7aa7de4..9ebaf98 100644 --- a/src/field_impl.h +++ b/src/field_impl.h @@ -12,8 +12,6 @@ #if defined(SECP256K1_WIDEMUL_INT128) #include "field_5x52_impl.h" -#elif defined(SECP256K1_WIDEMUL_INT64) -#include "field_10x26_impl.h" #else #error "Please select wide multiplication implementation" #endif diff --git a/src/modinv32.h b/src/modinv32.h deleted file mode 100644 index 846c642..0000000 --- a/src/modinv32.h +++ /dev/null @@ -1,43 +0,0 @@ -/*********************************************************************** - * Copyright (c) 2020 Peter Dettman * - * Distributed under the MIT software license, see the accompanying * - * file COPYING or https://www.opensource.org/licenses/mit-license.php.* - **********************************************************************/ - -#ifndef SECP256K1_MODINV32_H -#define SECP256K1_MODINV32_H - -#include "util.h" - -/* A signed 30-bit limb representation of integers. - * - * Its value is sum(v[i] * 2^(30*i), i=0..8). */ -typedef struct { - int32_t v[9]; -} secp256k1_modinv32_signed30; - -typedef struct { - /* The modulus in signed30 notation, must be odd and in [3, 2^256]. */ - secp256k1_modinv32_signed30 modulus; - - /* modulus^{-1} mod 2^30 */ - uint32_t modulus_inv30; -} secp256k1_modinv32_modinfo; - -/* Replace x with its modular inverse mod modinfo->modulus. x must be in range [0, modulus). - * If x is zero, the result will be zero as well. If not, the inverse must exist (i.e., the gcd of - * x and modulus must be 1). These rules are automatically satisfied if the modulus is prime. - * - * On output, all of x's limbs will be in [0, 2^30). - */ -static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo); - -/* Same as secp256k1_modinv32_var, but constant time in x (not in the modulus). */ -static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo); - -/* Compute the Jacobi symbol for (x | modinfo->modulus). x must be coprime with modulus (and thus - * cannot be 0, as modulus >= 3). All limbs of x must be non-negative. Returns 0 if the result - * cannot be computed. */ -static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo); - -#endif /* SECP256K1_MODINV32_H */ diff --git a/src/modinv32_impl.h b/src/modinv32_impl.h deleted file mode 100644 index 981d2ab..0000000 --- a/src/modinv32_impl.h +++ /dev/null @@ -1,725 +0,0 @@ -/*********************************************************************** - * Copyright (c) 2020 Peter Dettman * - * Distributed under the MIT software license, see the accompanying * - * file COPYING or https://www.opensource.org/licenses/mit-license.php.* - **********************************************************************/ - -#ifndef SECP256K1_MODINV32_IMPL_H -#define SECP256K1_MODINV32_IMPL_H - -#include "modinv32.h" - -#include "util.h" - -#include - -/* This file implements modular inversion based on the paper "Fast constant-time gcd computation and - * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang. - * - * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an - * implementation for N=30, using 30-bit signed limbs represented as int32_t. - */ - -#ifdef VERIFY -static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}}; - -/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */ -static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) { - const int32_t M30 = (int32_t)(UINT32_MAX >> 2); - int64_t c = 0; - int i; - for (i = 0; i < 8; ++i) { - if (i < alen) c += (int64_t)a->v[i] * factor; - r->v[i] = (int32_t)c & M30; c >>= 30; - } - if (8 < alen) c += (int64_t)a->v[8] * factor; - VERIFY_CHECK(c == (int32_t)c); - r->v[8] = (int32_t)c; -} - -/* Return -1 for ab*factor. A consists of alen limbs; b has 9. */ -static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) { - int i; - secp256k1_modinv32_signed30 am, bm; - secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */ - secp256k1_modinv32_mul_30(&bm, b, 9, factor); - for (i = 0; i < 8; ++i) { - /* Verify that all but the top limb of a and b are normalized. */ - VERIFY_CHECK(am.v[i] >> 30 == 0); - VERIFY_CHECK(bm.v[i] >> 30 == 0); - } - for (i = 8; i >= 0; --i) { - if (am.v[i] < bm.v[i]) return -1; - if (am.v[i] > bm.v[i]) return 1; - } - return 0; -} -#endif - -/* Take as input a signed30 number in range (-2*modulus,modulus), and add a multiple of the modulus - * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the - * process. The input must have limbs in range (-2^30,2^30). The output will have limbs in range - * [0,2^30). */ -static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int32_t sign, const secp256k1_modinv32_modinfo *modinfo) { - const int32_t M30 = (int32_t)(UINT32_MAX >> 2); - int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4], - r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8]; - volatile int32_t cond_add, cond_negate; - -#ifdef VERIFY - /* Verify that all limbs are in range (-2^30,2^30). */ - int i; - for (i = 0; i < 9; ++i) { - VERIFY_CHECK(r->v[i] >= -M30); - VERIFY_CHECK(r->v[i] <= M30); - } - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */ -#endif - - /* In a first step, add the modulus if the input is negative, and then negate if requested. - * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input - * limbs are in range (-2^30,2^30), this cannot overflow an int32_t. Note that the right - * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is - * indeed the behavior of the right shift operator). */ - cond_add = r8 >> 31; - r0 += modinfo->modulus.v[0] & cond_add; - r1 += modinfo->modulus.v[1] & cond_add; - r2 += modinfo->modulus.v[2] & cond_add; - r3 += modinfo->modulus.v[3] & cond_add; - r4 += modinfo->modulus.v[4] & cond_add; - r5 += modinfo->modulus.v[5] & cond_add; - r6 += modinfo->modulus.v[6] & cond_add; - r7 += modinfo->modulus.v[7] & cond_add; - r8 += modinfo->modulus.v[8] & cond_add; - cond_negate = sign >> 31; - r0 = (r0 ^ cond_negate) - cond_negate; - r1 = (r1 ^ cond_negate) - cond_negate; - r2 = (r2 ^ cond_negate) - cond_negate; - r3 = (r3 ^ cond_negate) - cond_negate; - r4 = (r4 ^ cond_negate) - cond_negate; - r5 = (r5 ^ cond_negate) - cond_negate; - r6 = (r6 ^ cond_negate) - cond_negate; - r7 = (r7 ^ cond_negate) - cond_negate; - r8 = (r8 ^ cond_negate) - cond_negate; - /* Propagate the top bits, to bring limbs back to range (-2^30,2^30). */ - r1 += r0 >> 30; r0 &= M30; - r2 += r1 >> 30; r1 &= M30; - r3 += r2 >> 30; r2 &= M30; - r4 += r3 >> 30; r3 &= M30; - r5 += r4 >> 30; r4 &= M30; - r6 += r5 >> 30; r5 &= M30; - r7 += r6 >> 30; r6 &= M30; - r8 += r7 >> 30; r7 &= M30; - - /* In a second step add the modulus again if the result is still negative, bringing r to range - * [0,modulus). */ - cond_add = r8 >> 31; - r0 += modinfo->modulus.v[0] & cond_add; - r1 += modinfo->modulus.v[1] & cond_add; - r2 += modinfo->modulus.v[2] & cond_add; - r3 += modinfo->modulus.v[3] & cond_add; - r4 += modinfo->modulus.v[4] & cond_add; - r5 += modinfo->modulus.v[5] & cond_add; - r6 += modinfo->modulus.v[6] & cond_add; - r7 += modinfo->modulus.v[7] & cond_add; - r8 += modinfo->modulus.v[8] & cond_add; - /* And propagate again. */ - r1 += r0 >> 30; r0 &= M30; - r2 += r1 >> 30; r1 &= M30; - r3 += r2 >> 30; r2 &= M30; - r4 += r3 >> 30; r3 &= M30; - r5 += r4 >> 30; r4 &= M30; - r6 += r5 >> 30; r5 &= M30; - r7 += r6 >> 30; r6 &= M30; - r8 += r7 >> 30; r7 &= M30; - - r->v[0] = r0; - r->v[1] = r1; - r->v[2] = r2; - r->v[3] = r3; - r->v[4] = r4; - r->v[5] = r5; - r->v[6] = r6; - r->v[7] = r7; - r->v[8] = r8; - - VERIFY_CHECK(r0 >> 30 == 0); - VERIFY_CHECK(r1 >> 30 == 0); - VERIFY_CHECK(r2 >> 30 == 0); - VERIFY_CHECK(r3 >> 30 == 0); - VERIFY_CHECK(r4 >> 30 == 0); - VERIFY_CHECK(r5 >> 30 == 0); - VERIFY_CHECK(r6 >> 30 == 0); - VERIFY_CHECK(r7 >> 30 == 0); - VERIFY_CHECK(r8 >> 30 == 0); - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */ -} - -/* Data type for transition matrices (see section 3 of explanation). - * - * t = [ u v ] - * [ q r ] - */ -typedef struct { - int32_t u, v, q, r; -} secp256k1_modinv32_trans2x2; - -/* Compute the transition matrix and zeta for 30 divsteps. - * - * Input: zeta: initial zeta - * f0: bottom limb of initial f - * g0: bottom limb of initial g - * Output: t: transition matrix - * Return: final zeta - * - * Implements the divsteps_n_matrix function from the explanation. - */ -static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) { - /* u,v,q,r are the elements of the transformation matrix being built up, - * starting with the identity matrix. Semantically they are signed integers - * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This - * permits left shifting (which is UB for negative numbers). The range - * being inside [-2^31,2^31) means that casting to signed works correctly. - */ - uint32_t u = 1, v = 0, q = 0, r = 1; - volatile uint32_t c1, c2; - uint32_t mask1, mask2, f = f0, g = g0, x, y, z; - int i; - - for (i = 0; i < 30; ++i) { - VERIFY_CHECK((f & 1) == 1); /* f must always be odd */ - VERIFY_CHECK((u * f0 + v * g0) == f << i); - VERIFY_CHECK((q * f0 + r * g0) == g << i); - /* Compute conditional masks for (zeta < 0) and for (g & 1). */ - c1 = zeta >> 31; - mask1 = c1; - c2 = g & 1; - mask2 = -c2; - /* Compute x,y,z, conditionally negated versions of f,u,v. */ - x = (f ^ mask1) - mask1; - y = (u ^ mask1) - mask1; - z = (v ^ mask1) - mask1; - /* Conditionally add x,y,z to g,q,r. */ - g += x & mask2; - q += y & mask2; - r += z & mask2; - /* In what follows, mask1 is a condition mask for (zeta < 0) and (g & 1). */ - mask1 &= mask2; - /* Conditionally change zeta into -zeta-2 or zeta-1. */ - zeta = (zeta ^ mask1) - 1; - /* Conditionally add g,q,r to f,u,v. */ - f += g & mask1; - u += q & mask1; - v += r & mask1; - /* Shifts */ - g >>= 1; - u <<= 1; - v <<= 1; - /* Bounds on zeta that follow from the bounds on iteration count (max 20*30 divsteps). */ - VERIFY_CHECK(zeta >= -601 && zeta <= 601); - } - /* Return data in t and return value. */ - t->u = (int32_t)u; - t->v = (int32_t)v; - t->q = (int32_t)q; - t->r = (int32_t)r; - /* The determinant of t must be a power of two. This guarantees that multiplication with t - * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which - * will be divided out again). As each divstep's individual matrix has determinant 2, the - * aggregate of 30 of them will have determinant 2^30. */ - VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30); - return zeta; -} - -/* secp256k1_modinv32_inv256[i] = -(2*i+1)^-1 (mod 256) */ -static const uint8_t secp256k1_modinv32_inv256[128] = { - 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59, - 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31, - 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89, - 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61, - 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9, - 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91, - 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9, - 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1, - 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19, - 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1, - 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01 -}; - -/* Compute the transition matrix and eta for 30 divsteps (variable time). - * - * Input: eta: initial eta - * f0: bottom limb of initial f - * g0: bottom limb of initial g - * Output: t: transition matrix - * Return: final eta - * - * Implements the divsteps_n_matrix_var function from the explanation. - */ -static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) { - /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */ - uint32_t u = 1, v = 0, q = 0, r = 1; - uint32_t f = f0, g = g0, m; - uint16_t w; - int i = 30, limit, zeros; - - for (;;) { - /* Use a sentinel bit to count zeros only up to i. */ - zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i)); - /* Perform zeros divsteps at once; they all just divide g by two. */ - g >>= zeros; - u <<= zeros; - v <<= zeros; - eta -= zeros; - i -= zeros; - /* We're done once we've done 30 divsteps. */ - if (i == 0) break; - VERIFY_CHECK((f & 1) == 1); - VERIFY_CHECK((g & 1) == 1); - VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i)); - VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i)); - /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */ - VERIFY_CHECK(eta >= -751 && eta <= 751); - /* If eta is negative, negate it and replace f,g with g,-f. */ - if (eta < 0) { - uint32_t tmp; - eta = -eta; - tmp = f; f = g; g = -tmp; - tmp = u; u = q; q = -tmp; - tmp = v; v = r; r = -tmp; - } - /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more - * than i can be cancelled out (as we'd be done before that point), and no more than eta+1 - * can be done as its sign will flip once that happens. */ - limit = ((int)eta + 1) > i ? i : ((int)eta + 1); - /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */ - VERIFY_CHECK(limit > 0 && limit <= 30); - m = (UINT32_MAX >> (32 - limit)) & 255U; - /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */ - w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m; - /* Do so. */ - g += f * w; - q += u * w; - r += v * w; - VERIFY_CHECK((g & m) == 0); - } - /* Return data in t and return value. */ - t->u = (int32_t)u; - t->v = (int32_t)v; - t->q = (int32_t)q; - t->r = (int32_t)r; - /* The determinant of t must be a power of two. This guarantees that multiplication with t - * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which - * will be divided out again). As each divstep's individual matrix has determinant 2, the - * aggregate of 30 of them will have determinant 2^30. */ - VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30); - return eta; -} - -/* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track - * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because - * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2). - * - * Input: eta: initial eta - * f0: bottom limb of initial f - * g0: bottom limb of initial g - * Output: t: transition matrix - * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign - * by applying the returned transformation matrix to it. The other bits of *jacp may - * change, but are meaningless. - * Return: final eta - */ -static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) { - /* Transformation matrix. */ - uint32_t u = 1, v = 0, q = 0, r = 1; - uint32_t f = f0, g = g0, m; - uint16_t w; - int i = 30, limit, zeros; - int jac = *jacp; - - for (;;) { - /* Use a sentinel bit to count zeros only up to i. */ - zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i)); - /* Perform zeros divsteps at once; they all just divide g by two. */ - g >>= zeros; - u <<= zeros; - v <<= zeros; - eta -= zeros; - i -= zeros; - /* Update the bottom bit of jac: when dividing g by an odd power of 2, - * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */ - jac ^= (zeros & ((f >> 1) ^ (f >> 2))); - /* We're done once we've done 30 posdivsteps. */ - if (i == 0) break; - VERIFY_CHECK((f & 1) == 1); - VERIFY_CHECK((g & 1) == 1); - VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i)); - VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i)); - /* If eta is negative, negate it and replace f,g with g,f. */ - if (eta < 0) { - uint32_t tmp; - eta = -eta; - /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign - * if both f and g are 3 mod 4. */ - jac ^= ((f & g) >> 1); - tmp = f; f = g; g = tmp; - tmp = u; u = q; q = tmp; - tmp = v; v = r; r = tmp; - } - /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more - * than i can be cancelled out (as we'd be done before that point), and no more than eta+1 - * can be done as its sign will flip once that happens. */ - limit = ((int)eta + 1) > i ? i : ((int)eta + 1); - /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */ - VERIFY_CHECK(limit > 0 && limit <= 30); - m = (UINT32_MAX >> (32 - limit)) & 255U; - /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */ - w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m; - /* Do so. */ - g += f * w; - q += u * w; - r += v * w; - VERIFY_CHECK((g & m) == 0); - } - /* Return data in t and return value. */ - t->u = (int32_t)u; - t->v = (int32_t)v; - t->q = (int32_t)q; - t->r = (int32_t)r; - /* The determinant of t must be a power of two. This guarantees that multiplication with t - * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which - * will be divided out again). As each divstep's individual matrix has determinant 2 or -2, - * the aggregate of 30 of them will have determinant 2^30 or -2^30. */ - VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 || - (int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30)); - *jacp = jac; - return eta; -} - -/* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps. - * - * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range - * (-2^30,2^30). - * - * This implements the update_de function from the explanation. - */ -static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp256k1_modinv32_signed30 *e, const secp256k1_modinv32_trans2x2 *t, const secp256k1_modinv32_modinfo* modinfo) { - const int32_t M30 = (int32_t)(UINT32_MAX >> 2); - const int32_t u = t->u, v = t->v, q = t->q, r = t->r; - int32_t di, ei, md, me, sd, se; - int64_t cd, ce; - int i; - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */ - VERIFY_CHECK(labs(u) <= (M30 + 1 - labs(v))); /* |u|+|v| <= 2^30 */ - VERIFY_CHECK(labs(q) <= (M30 + 1 - labs(r))); /* |q|+|r| <= 2^30 */ - - /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */ - sd = d->v[8] >> 31; - se = e->v[8] >> 31; - md = (u & sd) + (v & se); - me = (q & sd) + (r & se); - /* Begin computing t*[d,e]. */ - di = d->v[0]; - ei = e->v[0]; - cd = (int64_t)u * di + (int64_t)v * ei; - ce = (int64_t)q * di + (int64_t)r * ei; - /* Correct md,me so that t*[d,e]+modulus*[md,me] has 30 zero bottom bits. */ - md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30; - me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30; - /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */ - cd += (int64_t)modinfo->modulus.v[0] * md; - ce += (int64_t)modinfo->modulus.v[0] * me; - /* Verify that the low 30 bits of the computation are indeed zero, and then throw them away. */ - VERIFY_CHECK(((int32_t)cd & M30) == 0); cd >>= 30; - VERIFY_CHECK(((int32_t)ce & M30) == 0); ce >>= 30; - /* Now iteratively compute limb i=1..8 of t*[d,e]+modulus*[md,me], and store them in output - * limb i-1 (shifting down by 30 bits). */ - for (i = 1; i < 9; ++i) { - di = d->v[i]; - ei = e->v[i]; - cd += (int64_t)u * di + (int64_t)v * ei; - ce += (int64_t)q * di + (int64_t)r * ei; - cd += (int64_t)modinfo->modulus.v[i] * md; - ce += (int64_t)modinfo->modulus.v[i] * me; - d->v[i - 1] = (int32_t)cd & M30; cd >>= 30; - e->v[i - 1] = (int32_t)ce & M30; ce >>= 30; - } - /* What remains is limb 9 of t*[d,e]+modulus*[md,me]; store it as output limb 8. */ - d->v[8] = (int32_t)cd; - e->v[8] = (int32_t)ce; - - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */ -} - -/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps. - * - * This implements the update_fg function from the explanation. - */ -static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t) { - const int32_t M30 = (int32_t)(UINT32_MAX >> 2); - const int32_t u = t->u, v = t->v, q = t->q, r = t->r; - int32_t fi, gi; - int64_t cf, cg; - int i; - /* Start computing t*[f,g]. */ - fi = f->v[0]; - gi = g->v[0]; - cf = (int64_t)u * fi + (int64_t)v * gi; - cg = (int64_t)q * fi + (int64_t)r * gi; - /* Verify that the bottom 30 bits of the result are zero, and then throw them away. */ - VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30; - VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30; - /* Now iteratively compute limb i=1..8 of t*[f,g], and store them in output limb i-1 (shifting - * down by 30 bits). */ - for (i = 1; i < 9; ++i) { - fi = f->v[i]; - gi = g->v[i]; - cf += (int64_t)u * fi + (int64_t)v * gi; - cg += (int64_t)q * fi + (int64_t)r * gi; - f->v[i - 1] = (int32_t)cf & M30; cf >>= 30; - g->v[i - 1] = (int32_t)cg & M30; cg >>= 30; - } - /* What remains is limb 9 of t*[f,g]; store it as output limb 8. */ - f->v[8] = (int32_t)cf; - g->v[8] = (int32_t)cg; -} - -/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps. - * - * Version that operates on a variable number of limbs in f and g. - * - * This implements the update_fg function from the explanation in modinv64_impl.h. - */ -static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t) { - const int32_t M30 = (int32_t)(UINT32_MAX >> 2); - const int32_t u = t->u, v = t->v, q = t->q, r = t->r; - int32_t fi, gi; - int64_t cf, cg; - int i; - VERIFY_CHECK(len > 0); - /* Start computing t*[f,g]. */ - fi = f->v[0]; - gi = g->v[0]; - cf = (int64_t)u * fi + (int64_t)v * gi; - cg = (int64_t)q * fi + (int64_t)r * gi; - /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */ - VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30; - VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30; - /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting - * down by 30 bits). */ - for (i = 1; i < len; ++i) { - fi = f->v[i]; - gi = g->v[i]; - cf += (int64_t)u * fi + (int64_t)v * gi; - cg += (int64_t)q * fi + (int64_t)r * gi; - f->v[i - 1] = (int32_t)cf & M30; cf >>= 30; - g->v[i - 1] = (int32_t)cg & M30; cg >>= 30; - } - /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */ - f->v[len - 1] = (int32_t)cf; - g->v[len - 1] = (int32_t)cg; -} - -/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */ -static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) { - /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */ - secp256k1_modinv32_signed30 d = {{0}}; - secp256k1_modinv32_signed30 e = {{1}}; - secp256k1_modinv32_signed30 f = modinfo->modulus; - secp256k1_modinv32_signed30 g = *x; - int i; - int32_t zeta = -1; /* zeta = -(delta+1/2); delta is initially 1/2. */ - - /* Do 20 iterations of 30 divsteps each = 600 divsteps. 590 suffices for 256-bit inputs. */ - for (i = 0; i < 20; ++i) { - /* Compute transition matrix and new zeta after 30 divsteps. */ - secp256k1_modinv32_trans2x2 t; - zeta = secp256k1_modinv32_divsteps_30(zeta, f.v[0], g.v[0], &t); - /* Update d,e using that transition matrix. */ - secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo); - /* Update f,g using that transition matrix. */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */ - - secp256k1_modinv32_update_fg_30(&f, &g, &t); - - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */ - } - - /* At this point sufficient iterations have been performed that g must have reached 0 - * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g - * values i.e. +/- 1, and d now contains +/- the modular inverse. */ - - /* g == 0 */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0); - /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 || - secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 || - (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 && - secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 && - secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0)); - - /* Optionally negate d, normalize to [0,modulus), and return it. */ - secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo); - *x = d; -} - -/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */ -static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) { - /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */ - secp256k1_modinv32_signed30 d = {{0, 0, 0, 0, 0, 0, 0, 0, 0}}; - secp256k1_modinv32_signed30 e = {{1, 0, 0, 0, 0, 0, 0, 0, 0}}; - secp256k1_modinv32_signed30 f = modinfo->modulus; - secp256k1_modinv32_signed30 g = *x; -#ifdef VERIFY - int i = 0; -#endif - int j, len = 9; - int32_t eta = -1; /* eta = -delta; delta is initially 1 (faster for the variable-time code) */ - int32_t cond, fn, gn; - - /* Do iterations of 30 divsteps each until g=0. */ - while (1) { - /* Compute transition matrix and new eta after 30 divsteps. */ - secp256k1_modinv32_trans2x2 t; - eta = secp256k1_modinv32_divsteps_30_var(eta, f.v[0], g.v[0], &t); - /* Update d,e using that transition matrix. */ - secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo); - /* Update f,g using that transition matrix. */ - - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */ - - secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t); - /* If the bottom limb of g is 0, there is a chance g=0. */ - if (g.v[0] == 0) { - cond = 0; - /* Check if all other limbs are also 0. */ - for (j = 1; j < len; ++j) { - cond |= g.v[j]; - } - /* If so, we're done. */ - if (cond == 0) break; - } - - /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */ - fn = f.v[len - 1]; - gn = g.v[len - 1]; - cond = ((int32_t)len - 2) >> 31; - cond |= fn ^ (fn >> 31); - cond |= gn ^ (gn >> 31); - /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */ - if (cond == 0) { - f.v[len - 2] |= (uint32_t)fn << 30; - g.v[len - 2] |= (uint32_t)gn << 30; - --len; - } - - VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */ - } - - /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of - * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */ - - /* g == 0 */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0); - /* |f| == 1, or (x == 0 and d == 0 and f == modulus) */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 || - secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 || - (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 && - secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 && - secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0)); - - /* Optionally negate d, normalize to [0,modulus), and return it. */ - secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo); - *x = d; -} - -/* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1. - * In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */ -#ifdef VERIFY -#define JACOBI32_ITERATIONS 25 -#else -#define JACOBI32_ITERATIONS 50 -#endif - -/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */ -static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) { - /* Start with f=modulus, g=x, eta=-1. */ - secp256k1_modinv32_signed30 f = modinfo->modulus; - secp256k1_modinv32_signed30 g = *x; - int j, len = 9; - int32_t eta = -1; /* eta = -delta; delta is initially 1 */ - int32_t cond, fn, gn; - int jac = 0; - int count; - - /* The input limbs must all be non-negative. */ - VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0); - - /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we - * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or - * time out). */ - VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8]) != 0); - - for (count = 0; count < JACOBI32_ITERATIONS; ++count) { - /* Compute transition matrix and new eta after 30 posdivsteps. */ - secp256k1_modinv32_trans2x2 t; - eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac); - /* Update f,g using that transition matrix. */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */ - - secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t); - /* If the bottom limb of f is 1, there is a chance that f=1. */ - if (f.v[0] == 1) { - cond = 0; - /* Check if the other limbs are also 0. */ - for (j = 1; j < len; ++j) { - cond |= f.v[j]; - } - /* If so, we're done. If f=1, the Jacobi symbol (g | f)=1. */ - if (cond == 0) return 1 - 2*(jac & 1); - } - - /* Determine if len>1 and limb (len-1) of both f and g is 0. */ - fn = f.v[len - 1]; - gn = g.v[len - 1]; - cond = ((int32_t)len - 2) >> 31; - cond |= fn; - cond |= gn; - /* If so, reduce length. */ - if (cond == 0) --len; - - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */ - VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */ - } - - /* The loop failed to converge to f=g after 1500 iterations. Return 0, indicating unknown result. */ - return 0; -} - -#endif /* SECP256K1_MODINV32_IMPL_H */ diff --git a/src/precomputed_ecmult.o b/src/precomputed_ecmult.o index 2fdd38e4086e945204cbcef6993d7ff49ae6be96..7f6cf94c76ed8365dc3f71b37f41617de37e2633 100644 GIT binary patch delta 67 zcmZoz;?Mv@EsQNpEzB(}EvzkUE$l5EEu1Y}E!-_UExav!E&N*qrZBQQNii}oXfRG^ TV!Z diff --git a/src/scalar.h b/src/scalar.h index 70f49b1..f947f35 100644 --- a/src/scalar.h +++ b/src/scalar.h @@ -13,8 +13,6 @@ #include "scalar_low.h" #elif defined(SECP256K1_WIDEMUL_INT128) #include "scalar_4x64.h" -#elif defined(SECP256K1_WIDEMUL_INT64) -#include "scalar_8x32.h" #else #error "Please select wide multiplication implementation" #endif diff --git a/src/scalar_8x32.h b/src/scalar_8x32.h deleted file mode 100644 index 17863ef..0000000 --- a/src/scalar_8x32.h +++ /dev/null @@ -1,19 +0,0 @@ -/*********************************************************************** - * Copyright (c) 2014 Pieter Wuille * - * Distributed under the MIT software license, see the accompanying * - * file COPYING or https://www.opensource.org/licenses/mit-license.php.* - ***********************************************************************/ - -#ifndef SECP256K1_SCALAR_REPR_H -#define SECP256K1_SCALAR_REPR_H - -#include - -/** A scalar modulo the group order of the secp256k1 curve. */ -typedef struct { - uint32_t d[8]; -} secp256k1_scalar; - -#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) {{(d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7)}} - -#endif /* SECP256K1_SCALAR_REPR_H */ diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h deleted file mode 100644 index 2610496..0000000 --- a/src/scalar_8x32_impl.h +++ /dev/null @@ -1,816 +0,0 @@ -/*********************************************************************** - * Copyright (c) 2014 Pieter Wuille * - * Distributed under the MIT software license, see the accompanying * - * file COPYING or https://www.opensource.org/licenses/mit-license.php.* - ***********************************************************************/ - -#ifndef SECP256K1_SCALAR_REPR_IMPL_H -#define SECP256K1_SCALAR_REPR_IMPL_H - -#include "checkmem.h" -#include "modinv32_impl.h" -#include "util.h" - -/* Limbs of the secp256k1 order. */ -#define SECP256K1_N_0 ((uint32_t)0xD0364141UL) -#define SECP256K1_N_1 ((uint32_t)0xBFD25E8CUL) -#define SECP256K1_N_2 ((uint32_t)0xAF48A03BUL) -#define SECP256K1_N_3 ((uint32_t)0xBAAEDCE6UL) -#define SECP256K1_N_4 ((uint32_t)0xFFFFFFFEUL) -#define SECP256K1_N_5 ((uint32_t)0xFFFFFFFFUL) -#define SECP256K1_N_6 ((uint32_t)0xFFFFFFFFUL) -#define SECP256K1_N_7 ((uint32_t)0xFFFFFFFFUL) - -/* Limbs of 2^256 minus the secp256k1 order. */ -#define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1) -#define SECP256K1_N_C_1 (~SECP256K1_N_1) -#define SECP256K1_N_C_2 (~SECP256K1_N_2) -#define SECP256K1_N_C_3 (~SECP256K1_N_3) -#define SECP256K1_N_C_4 (1) - -/* Limbs of half the secp256k1 order. */ -#define SECP256K1_N_H_0 ((uint32_t)0x681B20A0UL) -#define SECP256K1_N_H_1 ((uint32_t)0xDFE92F46UL) -#define SECP256K1_N_H_2 ((uint32_t)0x57A4501DUL) -#define SECP256K1_N_H_3 ((uint32_t)0x5D576E73UL) -#define SECP256K1_N_H_4 ((uint32_t)0xFFFFFFFFUL) -#define SECP256K1_N_H_5 ((uint32_t)0xFFFFFFFFUL) -#define SECP256K1_N_H_6 ((uint32_t)0xFFFFFFFFUL) -#define SECP256K1_N_H_7 ((uint32_t)0x7FFFFFFFUL) - -SECP256K1_INLINE static void secp256k1_scalar_set_int(secp256k1_scalar *r, unsigned int v) { - r->d[0] = v; - r->d[1] = 0; - r->d[2] = 0; - r->d[3] = 0; - r->d[4] = 0; - r->d[5] = 0; - r->d[6] = 0; - r->d[7] = 0; - - SECP256K1_SCALAR_VERIFY(r); -} - -SECP256K1_INLINE static uint32_t secp256k1_scalar_get_bits_limb32(const secp256k1_scalar *a, unsigned int offset, unsigned int count) { - SECP256K1_SCALAR_VERIFY(a); - VERIFY_CHECK(count > 0 && count <= 32); - VERIFY_CHECK((offset + count - 1) >> 5 == offset >> 5); - - return (a->d[offset >> 5] >> (offset & 0x1F)) & (0xFFFFFFFF >> (32 - count)); -} - -SECP256K1_INLINE static uint32_t secp256k1_scalar_get_bits_var(const secp256k1_scalar *a, unsigned int offset, unsigned int count) { - SECP256K1_SCALAR_VERIFY(a); - VERIFY_CHECK(count > 0 && count <= 32); - VERIFY_CHECK(offset + count <= 256); - - if ((offset + count - 1) >> 5 == offset >> 5) { - return secp256k1_scalar_get_bits_limb32(a, offset, count); - } else { - VERIFY_CHECK((offset >> 5) + 1 < 8); - return ((a->d[offset >> 5] >> (offset & 0x1F)) | (a->d[(offset >> 5) + 1] << (32 - (offset & 0x1F)))) & (0xFFFFFFFF >> (32 - count)); - } -} - -SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scalar *a) { - int yes = 0; - int no = 0; - no |= (a->d[7] < SECP256K1_N_7); /* No need for a > check. */ - no |= (a->d[6] < SECP256K1_N_6); /* No need for a > check. */ - no |= (a->d[5] < SECP256K1_N_5); /* No need for a > check. */ - no |= (a->d[4] < SECP256K1_N_4); - yes |= (a->d[4] > SECP256K1_N_4) & ~no; - no |= (a->d[3] < SECP256K1_N_3) & ~yes; - yes |= (a->d[3] > SECP256K1_N_3) & ~no; - no |= (a->d[2] < SECP256K1_N_2) & ~yes; - yes |= (a->d[2] > SECP256K1_N_2) & ~no; - no |= (a->d[1] < SECP256K1_N_1) & ~yes; - yes |= (a->d[1] > SECP256K1_N_1) & ~no; - yes |= (a->d[0] >= SECP256K1_N_0) & ~no; - return yes; -} - -SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar *r, uint32_t overflow) { - uint64_t t; - VERIFY_CHECK(overflow <= 1); - - t = (uint64_t)r->d[0] + overflow * SECP256K1_N_C_0; - r->d[0] = t & 0xFFFFFFFFUL; t >>= 32; - t += (uint64_t)r->d[1] + overflow * SECP256K1_N_C_1; - r->d[1] = t & 0xFFFFFFFFUL; t >>= 32; - t += (uint64_t)r->d[2] + overflow * SECP256K1_N_C_2; - r->d[2] = t & 0xFFFFFFFFUL; t >>= 32; - t += (uint64_t)r->d[3] + overflow * SECP256K1_N_C_3; - r->d[3] = t & 0xFFFFFFFFUL; t >>= 32; - t += (uint64_t)r->d[4] + overflow * SECP256K1_N_C_4; - r->d[4] = t & 0xFFFFFFFFUL; t >>= 32; - t += (uint64_t)r->d[5]; - r->d[5] = t & 0xFFFFFFFFUL; t >>= 32; - t += (uint64_t)r->d[6]; - r->d[6] = t & 0xFFFFFFFFUL; t >>= 32; - t += (uint64_t)r->d[7]; - r->d[7] = t & 0xFFFFFFFFUL; - - SECP256K1_SCALAR_VERIFY(r); - return overflow; -} - -static int secp256k1_scalar_add(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) { - int overflow; - uint64_t t = (uint64_t)a->d[0] + b->d[0]; - SECP256K1_SCALAR_VERIFY(a); - SECP256K1_SCALAR_VERIFY(b); - - r->d[0] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)a->d[1] + b->d[1]; - r->d[1] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)a->d[2] + b->d[2]; - r->d[2] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)a->d[3] + b->d[3]; - r->d[3] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)a->d[4] + b->d[4]; - r->d[4] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)a->d[5] + b->d[5]; - r->d[5] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)a->d[6] + b->d[6]; - r->d[6] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)a->d[7] + b->d[7]; - r->d[7] = t & 0xFFFFFFFFULL; t >>= 32; - overflow = t + secp256k1_scalar_check_overflow(r); - VERIFY_CHECK(overflow == 0 || overflow == 1); - secp256k1_scalar_reduce(r, overflow); - - SECP256K1_SCALAR_VERIFY(r); - return overflow; -} - -static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int flag) { - uint64_t t; - volatile int vflag = flag; - SECP256K1_SCALAR_VERIFY(r); - VERIFY_CHECK(bit < 256); - - bit += ((uint32_t) vflag - 1) & 0x100; /* forcing (bit >> 5) > 7 makes this a noop */ - t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << (bit & 0x1F)); - r->d[0] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)r->d[1] + (((uint32_t)((bit >> 5) == 1)) << (bit & 0x1F)); - r->d[1] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)r->d[2] + (((uint32_t)((bit >> 5) == 2)) << (bit & 0x1F)); - r->d[2] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)r->d[3] + (((uint32_t)((bit >> 5) == 3)) << (bit & 0x1F)); - r->d[3] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)r->d[4] + (((uint32_t)((bit >> 5) == 4)) << (bit & 0x1F)); - r->d[4] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)r->d[5] + (((uint32_t)((bit >> 5) == 5)) << (bit & 0x1F)); - r->d[5] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)r->d[6] + (((uint32_t)((bit >> 5) == 6)) << (bit & 0x1F)); - r->d[6] = t & 0xFFFFFFFFULL; t >>= 32; - t += (uint64_t)r->d[7] + (((uint32_t)((bit >> 5) == 7)) << (bit & 0x1F)); - r->d[7] = t & 0xFFFFFFFFULL; - - SECP256K1_SCALAR_VERIFY(r); - VERIFY_CHECK((t >> 32) == 0); -} - -static void secp256k1_scalar_set_b32(secp256k1_scalar *r, const unsigned char *b32, int *overflow) { - int over; - r->d[0] = secp256k1_read_be32(&b32[28]); - r->d[1] = secp256k1_read_be32(&b32[24]); - r->d[2] = secp256k1_read_be32(&b32[20]); - r->d[3] = secp256k1_read_be32(&b32[16]); - r->d[4] = secp256k1_read_be32(&b32[12]); - r->d[5] = secp256k1_read_be32(&b32[8]); - r->d[6] = secp256k1_read_be32(&b32[4]); - r->d[7] = secp256k1_read_be32(&b32[0]); - over = secp256k1_scalar_reduce(r, secp256k1_scalar_check_overflow(r)); - if (overflow) { - *overflow = over; - } - - SECP256K1_SCALAR_VERIFY(r); -} - -static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar* a) { - SECP256K1_SCALAR_VERIFY(a); - - secp256k1_write_be32(&bin[0], a->d[7]); - secp256k1_write_be32(&bin[4], a->d[6]); - secp256k1_write_be32(&bin[8], a->d[5]); - secp256k1_write_be32(&bin[12], a->d[4]); - secp256k1_write_be32(&bin[16], a->d[3]); - secp256k1_write_be32(&bin[20], a->d[2]); - secp256k1_write_be32(&bin[24], a->d[1]); - secp256k1_write_be32(&bin[28], a->d[0]); -} - -SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar *a) { - SECP256K1_SCALAR_VERIFY(a); - - return (a->d[0] | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0; -} - -static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a) { - uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(a) == 0); - uint64_t t = (uint64_t)(~a->d[0]) + SECP256K1_N_0 + 1; - SECP256K1_SCALAR_VERIFY(a); - - r->d[0] = t & nonzero; t >>= 32; - t += (uint64_t)(~a->d[1]) + SECP256K1_N_1; - r->d[1] = t & nonzero; t >>= 32; - t += (uint64_t)(~a->d[2]) + SECP256K1_N_2; - r->d[2] = t & nonzero; t >>= 32; - t += (uint64_t)(~a->d[3]) + SECP256K1_N_3; - r->d[3] = t & nonzero; t >>= 32; - t += (uint64_t)(~a->d[4]) + SECP256K1_N_4; - r->d[4] = t & nonzero; t >>= 32; - t += (uint64_t)(~a->d[5]) + SECP256K1_N_5; - r->d[5] = t & nonzero; t >>= 32; - t += (uint64_t)(~a->d[6]) + SECP256K1_N_6; - r->d[6] = t & nonzero; t >>= 32; - t += (uint64_t)(~a->d[7]) + SECP256K1_N_7; - r->d[7] = t & nonzero; - - SECP256K1_SCALAR_VERIFY(r); -} - -static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) { - /* Writing `/` for field division and `//` for integer division, we compute - * - * a/2 = (a - (a&1))/2 + (a&1)/2 - * = (a >> 1) + (a&1 ? 1/2 : 0) - * = (a >> 1) + (a&1 ? n//2+1 : 0), - * - * where n is the group order and in the last equality we have used 1/2 = n//2+1 (mod n). - * For n//2, we have the constants SECP256K1_N_H_0, ... - * - * This sum does not overflow. The most extreme case is a = -2, the largest odd scalar. Here: - * - the left summand is: a >> 1 = (a - a&1)/2 = (n-2-1)//2 = (n-3)//2 - * - the right summand is: a&1 ? n//2+1 : 0 = n//2+1 = (n-1)//2 + 2//2 = (n+1)//2 - * Together they sum to (n-3)//2 + (n+1)//2 = (2n-2)//2 = n - 1, which is less than n. - */ - uint32_t mask = -(uint32_t)(a->d[0] & 1U); - uint64_t t = (uint32_t)((a->d[0] >> 1) | (a->d[1] << 31)); - SECP256K1_SCALAR_VERIFY(a); - - t += (SECP256K1_N_H_0 + 1U) & mask; - r->d[0] = t; t >>= 32; - t += (uint32_t)((a->d[1] >> 1) | (a->d[2] << 31)); - t += SECP256K1_N_H_1 & mask; - r->d[1] = t; t >>= 32; - t += (uint32_t)((a->d[2] >> 1) | (a->d[3] << 31)); - t += SECP256K1_N_H_2 & mask; - r->d[2] = t; t >>= 32; - t += (uint32_t)((a->d[3] >> 1) | (a->d[4] << 31)); - t += SECP256K1_N_H_3 & mask; - r->d[3] = t; t >>= 32; - t += (uint32_t)((a->d[4] >> 1) | (a->d[5] << 31)); - t += SECP256K1_N_H_4 & mask; - r->d[4] = t; t >>= 32; - t += (uint32_t)((a->d[5] >> 1) | (a->d[6] << 31)); - t += SECP256K1_N_H_5 & mask; - r->d[5] = t; t >>= 32; - t += (uint32_t)((a->d[6] >> 1) | (a->d[7] << 31)); - t += SECP256K1_N_H_6 & mask; - r->d[6] = t; t >>= 32; - r->d[7] = (uint32_t)t + (uint32_t)(a->d[7] >> 1) + (SECP256K1_N_H_7 & mask); - - /* The line above only computed the bottom 32 bits of r->d[7]. Redo the computation - * in full 64 bits to make sure the top 32 bits are indeed zero. */ - VERIFY_CHECK((t + (a->d[7] >> 1) + (SECP256K1_N_H_7 & mask)) >> 32 == 0); - - SECP256K1_SCALAR_VERIFY(r); -} - -SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) { - SECP256K1_SCALAR_VERIFY(a); - - return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3] | a->d[4] | a->d[5] | a->d[6] | a->d[7]) == 0; -} - -static int secp256k1_scalar_is_high(const secp256k1_scalar *a) { - int yes = 0; - int no = 0; - SECP256K1_SCALAR_VERIFY(a); - - no |= (a->d[7] < SECP256K1_N_H_7); - yes |= (a->d[7] > SECP256K1_N_H_7) & ~no; - no |= (a->d[6] < SECP256K1_N_H_6) & ~yes; /* No need for a > check. */ - no |= (a->d[5] < SECP256K1_N_H_5) & ~yes; /* No need for a > check. */ - no |= (a->d[4] < SECP256K1_N_H_4) & ~yes; /* No need for a > check. */ - no |= (a->d[3] < SECP256K1_N_H_3) & ~yes; - yes |= (a->d[3] > SECP256K1_N_H_3) & ~no; - no |= (a->d[2] < SECP256K1_N_H_2) & ~yes; - yes |= (a->d[2] > SECP256K1_N_H_2) & ~no; - no |= (a->d[1] < SECP256K1_N_H_1) & ~yes; - yes |= (a->d[1] > SECP256K1_N_H_1) & ~no; - yes |= (a->d[0] > SECP256K1_N_H_0) & ~no; - return yes; -} - -static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) { - /* If we are flag = 0, mask = 00...00 and this is a no-op; - * if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */ - volatile int vflag = flag; - uint32_t mask = -vflag; - uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(r) == 0); - uint64_t t = (uint64_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask); - SECP256K1_SCALAR_VERIFY(r); - - r->d[0] = t & nonzero; t >>= 32; - t += (uint64_t)(r->d[1] ^ mask) + (SECP256K1_N_1 & mask); - r->d[1] = t & nonzero; t >>= 32; - t += (uint64_t)(r->d[2] ^ mask) + (SECP256K1_N_2 & mask); - r->d[2] = t & nonzero; t >>= 32; - t += (uint64_t)(r->d[3] ^ mask) + (SECP256K1_N_3 & mask); - r->d[3] = t & nonzero; t >>= 32; - t += (uint64_t)(r->d[4] ^ mask) + (SECP256K1_N_4 & mask); - r->d[4] = t & nonzero; t >>= 32; - t += (uint64_t)(r->d[5] ^ mask) + (SECP256K1_N_5 & mask); - r->d[5] = t & nonzero; t >>= 32; - t += (uint64_t)(r->d[6] ^ mask) + (SECP256K1_N_6 & mask); - r->d[6] = t & nonzero; t >>= 32; - t += (uint64_t)(r->d[7] ^ mask) + (SECP256K1_N_7 & mask); - r->d[7] = t & nonzero; - - SECP256K1_SCALAR_VERIFY(r); - return 2 * (mask == 0) - 1; -} - - -/* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */ - -/** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */ -#define muladd(a,b) { \ - uint32_t tl, th; \ - { \ - uint64_t t = (uint64_t)a * b; \ - th = t >> 32; /* at most 0xFFFFFFFE */ \ - tl = t; \ - } \ - c0 += tl; /* overflow is handled on the next line */ \ - th += (c0 < tl); /* at most 0xFFFFFFFF */ \ - c1 += th; /* overflow is handled on the next line */ \ - c2 += (c1 < th); /* never overflows by contract (verified in the next line) */ \ - VERIFY_CHECK((c1 >= th) || (c2 != 0)); \ -} - -/** Add a*b to the number defined by (c0,c1). c1 must never overflow. */ -#define muladd_fast(a,b) { \ - uint32_t tl, th; \ - { \ - uint64_t t = (uint64_t)a * b; \ - th = t >> 32; /* at most 0xFFFFFFFE */ \ - tl = t; \ - } \ - c0 += tl; /* overflow is handled on the next line */ \ - th += (c0 < tl); /* at most 0xFFFFFFFF */ \ - c1 += th; /* never overflows by contract (verified in the next line) */ \ - VERIFY_CHECK(c1 >= th); \ -} - -/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */ -#define sumadd(a) { \ - unsigned int over; \ - c0 += (a); /* overflow is handled on the next line */ \ - over = (c0 < (a)); \ - c1 += over; /* overflow is handled on the next line */ \ - c2 += (c1 < over); /* never overflows by contract */ \ -} - -/** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */ -#define sumadd_fast(a) { \ - c0 += (a); /* overflow is handled on the next line */ \ - c1 += (c0 < (a)); /* never overflows by contract (verified the next line) */ \ - VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \ - VERIFY_CHECK(c2 == 0); \ -} - -/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. */ -#define extract(n) { \ - (n) = c0; \ - c0 = c1; \ - c1 = c2; \ - c2 = 0; \ -} - -/** Extract the lowest 32 bits of (c0,c1,c2) into n, and left shift the number 32 bits. c2 is required to be zero. */ -#define extract_fast(n) { \ - (n) = c0; \ - c0 = c1; \ - c1 = 0; \ - VERIFY_CHECK(c2 == 0); \ -} - -static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint32_t *l) { - uint64_t c; - uint32_t n0 = l[8], n1 = l[9], n2 = l[10], n3 = l[11], n4 = l[12], n5 = l[13], n6 = l[14], n7 = l[15]; - uint32_t m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12; - uint32_t p0, p1, p2, p3, p4, p5, p6, p7, p8; - - /* 96 bit accumulator. */ - uint32_t c0, c1, c2; - - /* Reduce 512 bits into 385. */ - /* m[0..12] = l[0..7] + n[0..7] * SECP256K1_N_C. */ - c0 = l[0]; c1 = 0; c2 = 0; - muladd_fast(n0, SECP256K1_N_C_0); - extract_fast(m0); - sumadd_fast(l[1]); - muladd(n1, SECP256K1_N_C_0); - muladd(n0, SECP256K1_N_C_1); - extract(m1); - sumadd(l[2]); - muladd(n2, SECP256K1_N_C_0); - muladd(n1, SECP256K1_N_C_1); - muladd(n0, SECP256K1_N_C_2); - extract(m2); - sumadd(l[3]); - muladd(n3, SECP256K1_N_C_0); - muladd(n2, SECP256K1_N_C_1); - muladd(n1, SECP256K1_N_C_2); - muladd(n0, SECP256K1_N_C_3); - extract(m3); - sumadd(l[4]); - muladd(n4, SECP256K1_N_C_0); - muladd(n3, SECP256K1_N_C_1); - muladd(n2, SECP256K1_N_C_2); - muladd(n1, SECP256K1_N_C_3); - sumadd(n0); - extract(m4); - sumadd(l[5]); - muladd(n5, SECP256K1_N_C_0); - muladd(n4, SECP256K1_N_C_1); - muladd(n3, SECP256K1_N_C_2); - muladd(n2, SECP256K1_N_C_3); - sumadd(n1); - extract(m5); - sumadd(l[6]); - muladd(n6, SECP256K1_N_C_0); - muladd(n5, SECP256K1_N_C_1); - muladd(n4, SECP256K1_N_C_2); - muladd(n3, SECP256K1_N_C_3); - sumadd(n2); - extract(m6); - sumadd(l[7]); - muladd(n7, SECP256K1_N_C_0); - muladd(n6, SECP256K1_N_C_1); - muladd(n5, SECP256K1_N_C_2); - muladd(n4, SECP256K1_N_C_3); - sumadd(n3); - extract(m7); - muladd(n7, SECP256K1_N_C_1); - muladd(n6, SECP256K1_N_C_2); - muladd(n5, SECP256K1_N_C_3); - sumadd(n4); - extract(m8); - muladd(n7, SECP256K1_N_C_2); - muladd(n6, SECP256K1_N_C_3); - sumadd(n5); - extract(m9); - muladd(n7, SECP256K1_N_C_3); - sumadd(n6); - extract(m10); - sumadd_fast(n7); - extract_fast(m11); - VERIFY_CHECK(c0 <= 1); - m12 = c0; - - /* Reduce 385 bits into 258. */ - /* p[0..8] = m[0..7] + m[8..12] * SECP256K1_N_C. */ - c0 = m0; c1 = 0; c2 = 0; - muladd_fast(m8, SECP256K1_N_C_0); - extract_fast(p0); - sumadd_fast(m1); - muladd(m9, SECP256K1_N_C_0); - muladd(m8, SECP256K1_N_C_1); - extract(p1); - sumadd(m2); - muladd(m10, SECP256K1_N_C_0); - muladd(m9, SECP256K1_N_C_1); - muladd(m8, SECP256K1_N_C_2); - extract(p2); - sumadd(m3); - muladd(m11, SECP256K1_N_C_0); - muladd(m10, SECP256K1_N_C_1); - muladd(m9, SECP256K1_N_C_2); - muladd(m8, SECP256K1_N_C_3); - extract(p3); - sumadd(m4); - muladd(m12, SECP256K1_N_C_0); - muladd(m11, SECP256K1_N_C_1); - muladd(m10, SECP256K1_N_C_2); - muladd(m9, SECP256K1_N_C_3); - sumadd(m8); - extract(p4); - sumadd(m5); - muladd(m12, SECP256K1_N_C_1); - muladd(m11, SECP256K1_N_C_2); - muladd(m10, SECP256K1_N_C_3); - sumadd(m9); - extract(p5); - sumadd(m6); - muladd(m12, SECP256K1_N_C_2); - muladd(m11, SECP256K1_N_C_3); - sumadd(m10); - extract(p6); - sumadd_fast(m7); - muladd_fast(m12, SECP256K1_N_C_3); - sumadd_fast(m11); - extract_fast(p7); - p8 = c0 + m12; - VERIFY_CHECK(p8 <= 2); - - /* Reduce 258 bits into 256. */ - /* r[0..7] = p[0..7] + p[8] * SECP256K1_N_C. */ - c = p0 + (uint64_t)SECP256K1_N_C_0 * p8; - r->d[0] = c & 0xFFFFFFFFUL; c >>= 32; - c += p1 + (uint64_t)SECP256K1_N_C_1 * p8; - r->d[1] = c & 0xFFFFFFFFUL; c >>= 32; - c += p2 + (uint64_t)SECP256K1_N_C_2 * p8; - r->d[2] = c & 0xFFFFFFFFUL; c >>= 32; - c += p3 + (uint64_t)SECP256K1_N_C_3 * p8; - r->d[3] = c & 0xFFFFFFFFUL; c >>= 32; - c += p4 + (uint64_t)p8; - r->d[4] = c & 0xFFFFFFFFUL; c >>= 32; - c += p5; - r->d[5] = c & 0xFFFFFFFFUL; c >>= 32; - c += p6; - r->d[6] = c & 0xFFFFFFFFUL; c >>= 32; - c += p7; - r->d[7] = c & 0xFFFFFFFFUL; c >>= 32; - - /* Final reduction of r. */ - secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r)); -} - -static void secp256k1_scalar_mul_512(uint32_t *l, const secp256k1_scalar *a, const secp256k1_scalar *b) { - /* 96 bit accumulator. */ - uint32_t c0 = 0, c1 = 0, c2 = 0; - - /* l[0..15] = a[0..7] * b[0..7]. */ - muladd_fast(a->d[0], b->d[0]); - extract_fast(l[0]); - muladd(a->d[0], b->d[1]); - muladd(a->d[1], b->d[0]); - extract(l[1]); - muladd(a->d[0], b->d[2]); - muladd(a->d[1], b->d[1]); - muladd(a->d[2], b->d[0]); - extract(l[2]); - muladd(a->d[0], b->d[3]); - muladd(a->d[1], b->d[2]); - muladd(a->d[2], b->d[1]); - muladd(a->d[3], b->d[0]); - extract(l[3]); - muladd(a->d[0], b->d[4]); - muladd(a->d[1], b->d[3]); - muladd(a->d[2], b->d[2]); - muladd(a->d[3], b->d[1]); - muladd(a->d[4], b->d[0]); - extract(l[4]); - muladd(a->d[0], b->d[5]); - muladd(a->d[1], b->d[4]); - muladd(a->d[2], b->d[3]); - muladd(a->d[3], b->d[2]); - muladd(a->d[4], b->d[1]); - muladd(a->d[5], b->d[0]); - extract(l[5]); - muladd(a->d[0], b->d[6]); - muladd(a->d[1], b->d[5]); - muladd(a->d[2], b->d[4]); - muladd(a->d[3], b->d[3]); - muladd(a->d[4], b->d[2]); - muladd(a->d[5], b->d[1]); - muladd(a->d[6], b->d[0]); - extract(l[6]); - muladd(a->d[0], b->d[7]); - muladd(a->d[1], b->d[6]); - muladd(a->d[2], b->d[5]); - muladd(a->d[3], b->d[4]); - muladd(a->d[4], b->d[3]); - muladd(a->d[5], b->d[2]); - muladd(a->d[6], b->d[1]); - muladd(a->d[7], b->d[0]); - extract(l[7]); - muladd(a->d[1], b->d[7]); - muladd(a->d[2], b->d[6]); - muladd(a->d[3], b->d[5]); - muladd(a->d[4], b->d[4]); - muladd(a->d[5], b->d[3]); - muladd(a->d[6], b->d[2]); - muladd(a->d[7], b->d[1]); - extract(l[8]); - muladd(a->d[2], b->d[7]); - muladd(a->d[3], b->d[6]); - muladd(a->d[4], b->d[5]); - muladd(a->d[5], b->d[4]); - muladd(a->d[6], b->d[3]); - muladd(a->d[7], b->d[2]); - extract(l[9]); - muladd(a->d[3], b->d[7]); - muladd(a->d[4], b->d[6]); - muladd(a->d[5], b->d[5]); - muladd(a->d[6], b->d[4]); - muladd(a->d[7], b->d[3]); - extract(l[10]); - muladd(a->d[4], b->d[7]); - muladd(a->d[5], b->d[6]); - muladd(a->d[6], b->d[5]); - muladd(a->d[7], b->d[4]); - extract(l[11]); - muladd(a->d[5], b->d[7]); - muladd(a->d[6], b->d[6]); - muladd(a->d[7], b->d[5]); - extract(l[12]); - muladd(a->d[6], b->d[7]); - muladd(a->d[7], b->d[6]); - extract(l[13]); - muladd_fast(a->d[7], b->d[7]); - extract_fast(l[14]); - VERIFY_CHECK(c1 == 0); - l[15] = c0; -} - -#undef sumadd -#undef sumadd_fast -#undef muladd -#undef muladd_fast -#undef extract -#undef extract_fast - -static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) { - uint32_t l[16]; - SECP256K1_SCALAR_VERIFY(a); - SECP256K1_SCALAR_VERIFY(b); - - secp256k1_scalar_mul_512(l, a, b); - secp256k1_scalar_reduce_512(r, l); - - SECP256K1_SCALAR_VERIFY(r); -} - -static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) { - SECP256K1_SCALAR_VERIFY(k); - - r1->d[0] = k->d[0]; - r1->d[1] = k->d[1]; - r1->d[2] = k->d[2]; - r1->d[3] = k->d[3]; - r1->d[4] = 0; - r1->d[5] = 0; - r1->d[6] = 0; - r1->d[7] = 0; - r2->d[0] = k->d[4]; - r2->d[1] = k->d[5]; - r2->d[2] = k->d[6]; - r2->d[3] = k->d[7]; - r2->d[4] = 0; - r2->d[5] = 0; - r2->d[6] = 0; - r2->d[7] = 0; - - SECP256K1_SCALAR_VERIFY(r1); - SECP256K1_SCALAR_VERIFY(r2); -} - -SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const secp256k1_scalar *b) { - SECP256K1_SCALAR_VERIFY(a); - SECP256K1_SCALAR_VERIFY(b); - - return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3]) | (a->d[4] ^ b->d[4]) | (a->d[5] ^ b->d[5]) | (a->d[6] ^ b->d[6]) | (a->d[7] ^ b->d[7])) == 0; -} - -SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b, unsigned int shift) { - uint32_t l[16]; - unsigned int shiftlimbs; - unsigned int shiftlow; - unsigned int shifthigh; - SECP256K1_SCALAR_VERIFY(a); - SECP256K1_SCALAR_VERIFY(b); - VERIFY_CHECK(shift >= 256); - - secp256k1_scalar_mul_512(l, a, b); - shiftlimbs = shift >> 5; - shiftlow = shift & 0x1F; - shifthigh = 32 - shiftlow; - r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 480 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0; - r->d[1] = shift < 480 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0; - r->d[2] = shift < 448 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 416 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0; - r->d[3] = shift < 416 ? (l[3 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[4 + shiftlimbs] << shifthigh) : 0)) : 0; - r->d[4] = shift < 384 ? (l[4 + shiftlimbs] >> shiftlow | (shift < 352 && shiftlow ? (l[5 + shiftlimbs] << shifthigh) : 0)) : 0; - r->d[5] = shift < 352 ? (l[5 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[6 + shiftlimbs] << shifthigh) : 0)) : 0; - r->d[6] = shift < 320 ? (l[6 + shiftlimbs] >> shiftlow | (shift < 288 && shiftlow ? (l[7 + shiftlimbs] << shifthigh) : 0)) : 0; - r->d[7] = shift < 288 ? (l[7 + shiftlimbs] >> shiftlow) : 0; - secp256k1_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 5] >> ((shift - 1) & 0x1f)) & 1); - - SECP256K1_SCALAR_VERIFY(r); -} - -static SECP256K1_INLINE void secp256k1_scalar_cmov(secp256k1_scalar *r, const secp256k1_scalar *a, int flag) { - uint32_t mask0, mask1; - volatile int vflag = flag; - SECP256K1_SCALAR_VERIFY(a); - SECP256K1_CHECKMEM_CHECK_VERIFY(r->d, sizeof(r->d)); - - mask0 = vflag + ~((uint32_t)0); - mask1 = ~mask0; - r->d[0] = (r->d[0] & mask0) | (a->d[0] & mask1); - r->d[1] = (r->d[1] & mask0) | (a->d[1] & mask1); - r->d[2] = (r->d[2] & mask0) | (a->d[2] & mask1); - r->d[3] = (r->d[3] & mask0) | (a->d[3] & mask1); - r->d[4] = (r->d[4] & mask0) | (a->d[4] & mask1); - r->d[5] = (r->d[5] & mask0) | (a->d[5] & mask1); - r->d[6] = (r->d[6] & mask0) | (a->d[6] & mask1); - r->d[7] = (r->d[7] & mask0) | (a->d[7] & mask1); - - SECP256K1_SCALAR_VERIFY(r); -} - -static void secp256k1_scalar_from_signed30(secp256k1_scalar *r, const secp256k1_modinv32_signed30 *a) { - const uint32_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4], - a5 = a->v[5], a6 = a->v[6], a7 = a->v[7], a8 = a->v[8]; - - /* The output from secp256k1_modinv32{_var} should be normalized to range [0,modulus), and - * have limbs in [0,2^30). The modulus is < 2^256, so the top limb must be below 2^(256-30*8). - */ - VERIFY_CHECK(a0 >> 30 == 0); - VERIFY_CHECK(a1 >> 30 == 0); - VERIFY_CHECK(a2 >> 30 == 0); - VERIFY_CHECK(a3 >> 30 == 0); - VERIFY_CHECK(a4 >> 30 == 0); - VERIFY_CHECK(a5 >> 30 == 0); - VERIFY_CHECK(a6 >> 30 == 0); - VERIFY_CHECK(a7 >> 30 == 0); - VERIFY_CHECK(a8 >> 16 == 0); - - r->d[0] = a0 | a1 << 30; - r->d[1] = a1 >> 2 | a2 << 28; - r->d[2] = a2 >> 4 | a3 << 26; - r->d[3] = a3 >> 6 | a4 << 24; - r->d[4] = a4 >> 8 | a5 << 22; - r->d[5] = a5 >> 10 | a6 << 20; - r->d[6] = a6 >> 12 | a7 << 18; - r->d[7] = a7 >> 14 | a8 << 16; - - SECP256K1_SCALAR_VERIFY(r); -} - -static void secp256k1_scalar_to_signed30(secp256k1_modinv32_signed30 *r, const secp256k1_scalar *a) { - const uint32_t M30 = UINT32_MAX >> 2; - const uint32_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3], - a4 = a->d[4], a5 = a->d[5], a6 = a->d[6], a7 = a->d[7]; - SECP256K1_SCALAR_VERIFY(a); - - r->v[0] = a0 & M30; - r->v[1] = (a0 >> 30 | a1 << 2) & M30; - r->v[2] = (a1 >> 28 | a2 << 4) & M30; - r->v[3] = (a2 >> 26 | a3 << 6) & M30; - r->v[4] = (a3 >> 24 | a4 << 8) & M30; - r->v[5] = (a4 >> 22 | a5 << 10) & M30; - r->v[6] = (a5 >> 20 | a6 << 12) & M30; - r->v[7] = (a6 >> 18 | a7 << 14) & M30; - r->v[8] = a7 >> 16; -} - -static const secp256k1_modinv32_modinfo secp256k1_const_modinfo_scalar = { - {{0x10364141L, 0x3F497A33L, 0x348A03BBL, 0x2BB739ABL, -0x146L, 0, 0, 0, 65536}}, - 0x2A774EC1L -}; - -static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *x) { - secp256k1_modinv32_signed30 s; -#ifdef VERIFY - int zero_in = secp256k1_scalar_is_zero(x); -#endif - SECP256K1_SCALAR_VERIFY(x); - - secp256k1_scalar_to_signed30(&s, x); - secp256k1_modinv32(&s, &secp256k1_const_modinfo_scalar); - secp256k1_scalar_from_signed30(r, &s); - - SECP256K1_SCALAR_VERIFY(r); - VERIFY_CHECK(secp256k1_scalar_is_zero(r) == zero_in); -} - -static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_scalar *x) { - secp256k1_modinv32_signed30 s; -#ifdef VERIFY - int zero_in = secp256k1_scalar_is_zero(x); -#endif - SECP256K1_SCALAR_VERIFY(x); - - secp256k1_scalar_to_signed30(&s, x); - secp256k1_modinv32_var(&s, &secp256k1_const_modinfo_scalar); - secp256k1_scalar_from_signed30(r, &s); - - SECP256K1_SCALAR_VERIFY(r); - VERIFY_CHECK(secp256k1_scalar_is_zero(r) == zero_in); -} - -SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) { - SECP256K1_SCALAR_VERIFY(a); - - return !(a->d[0] & 1); -} - -#endif /* SECP256K1_SCALAR_REPR_IMPL_H */ diff --git a/src/scalar_impl.h b/src/scalar_impl.h index 9965c2b..2d9c94a 100644 --- a/src/scalar_impl.h +++ b/src/scalar_impl.h @@ -18,8 +18,6 @@ #include "scalar_low_impl.h" #elif defined(SECP256K1_WIDEMUL_INT128) #include "scalar_4x64_impl.h" -#elif defined(SECP256K1_WIDEMUL_INT64) -#include "scalar_8x32_impl.h" #else #error "Please select wide multiplication implementation" #endif diff --git a/src/secp256k1.o b/src/secp256k1.o index ba5061037f79f94ee5cca348c71393dd764b552f..4fb3fcc1b2c738c3b4143ad24b299425c7b0dbf6 100644 GIT binary patch delta 5865 zcmbVQ33OCN7VWC~Rq0NChrrKH(g`Gl0EUo&qS%6ff&>Q_TpCnJ$Yux;fo52;G=wD* z2_!;NL>x5E5M<;aD$XCl<922g6&PG54#*Z1T!IS`5M*@bRn_mL6P(dQpVR60-*W4| zSMR?nxNs=_!lCpfJCWgZj_Ey_+H3=D@JSkLCOT0RN{dOTXEvLI2&MQ)Ygp_rYVR=f zLXoL!3G@|%_c?}!ns@d_^>!4`Ta!^L4@*V|P`nB9#ZbR)`f?O(I)@<|qVD=w?gxQC(x|}sf*tGnvJ)*$N=s*hMXaBa-)m{ zy81P-3u$KD+vv3@a9j0pr&~BmFkQ#cPYCqy9PFx}){6E8sf9N4k##7_{NhV=ErP82 zw4p5yWl7~?@A##&=xr3PHdt=vxpU~$Ozk?_Fohp4!f9O3!7j~BPg;-J<1phnIXGGy z?Ra>qaEap&rzvW21widS zYM(BkM)C3jJV=}F$ek{%!};-K#Q&`<~fg|sr8=lqPv z@t+HEvR3RkBCQrc2_2Qw!8BQ$<9IixGwFsRJX4!T-7|RWI7oAA5gv*b@gqfes#Z#A zZbzKXmlptZxg$F_fSZs0tADA zO8$wSn`sMMUD`!e44P+Ov3EKposr)aC@m^1=6NP(`O`mHEOTEDq$qf(*h9&Cv!4JB{ zvL%k(57p6U6g>o{duAh>t)F!3;}CAPzVac?MY1jbIP*{`&dl(wccN!D1ALCnSU)Nz z8Ke@B#&rf6>zwFy%VXWu!b1@-k`#feW=YOSy0H<+37n*27kC9N%(NM4y~e{l4_Y`B z#7HGx17fTaud^AxG?$VyGC7kS)hvl&tGB3trTkqFR_x=EH{21xe_Dd-_kM$1H084bfl z4p`2HnYA51Fy8kD>N^Poi02z*>#@*ge*=w~4$nPiEE2M(3aWOM*aG`oC|W$&QsmQ1 z$VJLU$ip`KOK8kYu!!iF(4;(ww%s6|wxYh`iEOkSx1xz+e=Ue6fO!2j%hc3~(S#-m zMixe+#qX%)*l1y>6{?Ub!M}wS)~_<>pU3ghv|n6G5a#cDQKC5FLd-Wq$*O1=zVe_O znM(4_gZ)VijAb!>GpqUyAn?`9egkO2(^=Kno=B+yA1#;;W2C(tX?7Y+*5Fz71Bd046?O6-B zVPrjW3ls-9n{Vx>t-5bL_A0_6I~vT8TcaYOlU73Oy4$mq{gh>Y?0?ulx9oMepew$J zN8}4I_n>nn zBeUXDx*QFETs}L=^Hgm?`0nih8pg;T%do?5u=lYn?<__ibJs~4%O{_t@hIP%bCNF5 z+?SwWu-j)1pcGFkq0rO_rx6U%gj%2yO)gbltTmOjY`)dbfUsS_XnkG z7rIX^-G%_aFUt|NL#99$;wv?S+F|#g>dan8!(g)w38O-URc)?M`M1ORkeXQSGH15! zH`{20T;#C;Uf&9?jMQT!DYcbXN{aQ^UMmzn(k4kR3s_c#3o8SwN~s?EDW2{^(OSx) z_#12%#nl!?pQz_K#c{3V=pbh-j%yW1p=g0R22xbvKoQ=dGZw|Q9TZ_O1f*jvWl^+j z7LAp9K&dYAinFx0Z>3TR|42qING#?C0vWQnOQcyXy|kiMR`Wl|BO7Qbw5)?6d!^WR zLvmm}nSDq=pxjNs{kc2tkFvov111S0RTl6?3%H7>ouj?o%Eu3}999cM0o2Y~5HLVU zjfL>Ng;2wfoTKq><>E)E?vC(H0Kz&8;cE+FokZ{{7e7L6cZ4te2+%xD7Q!(Lp|L{t zVjB{@7)P0lop`;D@d$B>T?tizGvt$OZ(r>U?O>ja| zX7<5y51aU*^E7Viaiz4nU{m)r4=Hnt3~9D#Op*J1xHA%HJl#FAh2Q=I%$+~WY+U4E zhc_czEZx79PR^QVE!{@}e%k;up49o&gf)zC1QiuSYCZJk9pP|-&`5ERz&KJR@9q!QO&n&H$HPLHZlBN{Me5) z*t|7~T|@MJokn>wxOmtg=HxG@uq^$}PMZoe)B3XoL_Y#m4?In*ln)PN!TRCuHWToA z8RNb!yP`c3wP&zD?biN0IFbztwf4uO1;G{K6W%nE?U`_e_|O9pa)19(?vYD8&tDkD z`UfsyCnwC$N3l{ZIICOM)i<${kog_8uxgZeGwZ2M+<#^H0v}_kB=kYKy}(MkYyun5 z$tV=g(^m7v3G95Y5MH*+vrQA4Px;RKS)z}ZS@pN)~KtBxJFsawEI_X2Lu@Q=gjfb*o{d4v9s~4ri6}lny*f0 z6E(fP+g--Rp(}&<{yY{Fz8enCFnImXT;?MHqCy#Y#q#*>G7o`b0*VX!TBOl{yHE&B z=NJ!QQc{@ZW?jp7SxQ+nVUrB0^NE;fGE*T$`k!{GN;PYW=KBF8#Ha`d>eCSAR~54! z@`lD&6tV+oAHP)1;s?1kaS_L4r-+u)-qK3(%chFIvf5HziTLe42LCJYARF3$f2l_k zGYFG*23J`rty$y|zn=*n$4v7Y$FQa*8wfg73q9}mn>x?xW0g;hVT7<=16nH0yaF@nBr&G4Z@78WJm3) zr_%71w2BF>ay{1H?KbHLnZ_Enkm}1W)4#cn#n8czR@isJ02Lo`Orvf`u_344`*7pc kX@7E>I~rML=g^b7stJt@zCrM{!`A`dU~@uaNcvs>1?%;rHvj+t delta 5884 zcmbVQ33OCN7VWC~Rq0N;L*Qp8=_C*m7DGrtQE1r&l)wQIl@UYvfoY6y{)9Lr$a^J02 z@4qUzc_Q`ZiPX6}k?wE|Y&(itES)XzX+Ow_cc2KA(l%}-vsi3ID8)xwLt=hYJIcrj zMuuHWpnG+ma0YcZ9&3wMT2UOYO++1eNFq9d;tY^)1^aXp=A%Hv(F4&SEjeamPu{;A zB^W*XqW;9^0a1a*mI0_sn9^)CO57+$2ltd5>CWHIN5jGPw`#~yRg7|tHB-?pq`Wuq zxtS>5h@OUaPzcE^P$5@6O+qFahh`!NZUT}w3%y7nWPwM8gcmo27?9;nhLSh+DMw9s za^N|GS1v@M#?2Q{1V+g`qyo(_agD7N=u6NUOBbUA4JrYQ*9O4z@KvbFzf@0)G=?Qf4 zJ7O17jF=D6I}zYk@mZr=DAd_`2Hi%W|M&5x`th~sP=HFbf%jR4B8+b@p!*RDHxe$P zO*Uoe$i?3ApX`qHVMZnI8P29hDP##<^bFiJarNtjz;s+Nw`28Yuh+pQH#k1YU8MN zf`IDFOLB1+Ez6cYL0Eh8vr}-WmQ6p9R+%V-51NeQwaK>lOa)Lg836KVStifAg9r0F zlX0R}U^^wPCcre>H>-hZqBg^}KdW)kHF4_}38mQ$ zadtjG7ofei%xphy^XU7HEE zzznUD&f84IT~QhDKNXUd(asuo%RT*8H+qP3DeE=t!ap7et~-M_&K;0zx5~Rg-ymI zh4^JmU#AzQHyZQk7P?@DeD28qJ_jFkcXpsq53<;&TS7WNXo$si5(^AzXCDhc=n>P_ zuFqkpj(uX@vtW918nReANVoPj;bv(kADm8-Y5F^j6Gb>Z&0FI@n>PS_mPN1W8?@{vo_k)}!WVtpy^*%q@b!`=^2yWto>+?SE1)hvtk12kX)Jhz%STgdK(P_=u+7FhSANbz7vl25yk zlaz>%=PcG;XuuS(i0EBtcn(Bc9+XboP&@HN)|igl&``0zCd3AScz1+p+R%ux0Sy<7 zOpKRIey2^xmrM+mVG&X#_(fPD{~}}NbsQT>lVg$sF#p(t;>8gcWV{zlDk5R{%7d;? zM>5Gco=l=(EQ{%zzOX|l0$Aq7`a@6G%F1x%W<^}G?bAa zq{d0AF!DM2l!0U!uFg;uzG?pP~vE-VCgYVpRjh`X&_3RAv=T zn@MB7-LF))d1*au>z%Ju!atmmpClG@0f97G+&i)nRqFE6YFW+O$SoVF99q`KkhM%~ zyDrPbS~F`mzd%_kz?}=OA;=H+rilx~g^>yq_=X8w!Bei$wl3x4gLt7iM4gXXAR|jn zgsUdPQhw?hjddv(AHw412;cc3tTYiWnFuQ-f>*it5SBMbIPXJ%=6TITIAtQNDV4o= z0f~JMB)nAh;sqpoQ7FXw;=j)gXs{wKqG=W-7m%+PSIO+YcS|`-X7DQg0h`$bXC-B3 z?JD>1HGbkcjTv)VDa|f;rTH>HSLS0fWV1=*DY?%l8Y6+m`sR_J@<(sL+}SEu<0PLo zcr)_4sXL~0a@K4!b)We8Z3oO)vdpW7uC9k7sHhlR%b|5ghQgsj!^PPG<48?4HG8(o z#-#?k8JSF%|!ua03)crOa7;yjHIe?qk|I!FkjC&d{aCy&~zaO#E8C@J$&mvSMq z#98d`kSZOPmwFoXmyt{8xR~PAuC=?Z3iu%;A4Q41Xs-^Aim; zhP7w+5&K7tMtL$gc}N%L;JcDohW-6Un+h~il35|KpMf;~r-`}blbum#YL7(i8SGD;xj)bJVO@gF{c&pnaH}}ZU+u&84Y^Bv=z%b~zaN*lWr{cWj=n6} zKZTW?HNNS~inPFC%~oAGkQD?KHq^qbQT!m*S{r)g?$!%@^m&rdrzO^0bJ4{^Sf@rt z!Em0QG2R-&uD1zVUA)`9RTG+8zVm4o?_FJNK7UEX=*8Xiun4#k*vSgTXH3&OL@G2& zSebc;D1#F?jFck}jVpso#rG5pqK? zl;mZ&Skv;gSW*U!TPH)7c|}YlnW_*%eNVeprPgbT=21TqVpN0z_3?=Ed-7Q;c|+q1 zCbJ{xBmQ$Gi|yjl#N8Z|9U@vvhl|R@FB>EN%4$pXT*M#sGWd^zXIS^-BSmga%pgpz zFu2`Hsd~0s{C+Li2!WAp{MAJ)+DOe~=aGGI4A+c)^-Rf9^-qDQAQ&B&j|zClke1RAi3 zx3qmKd-?BY4+fIAG;#3{BbCLuVu#EtvlR*|`t}yz{5*T!{?nX>H`#9z8X`M(d@)Dv zq^w>9&+@Qt?LQa(LLvSQrT)}Q2~x#M$GLo`hYjpz)W`bRHG?o>%GhbE>Zufb zH?3kotMtb9gUu#=7eimdW>Ncs7W(&AuqfKKs?@q02B`RmV;VI(igh{lj>3)CPLs(+ hZd=XL8;73MEgZ6%!8ZWDR`}ZB8)yt!9hCa`e*t}BroR9H diff --git a/src/util.h b/src/util.h index 94e0837..75e591b 100644 --- a/src/util.h +++ b/src/util.h @@ -321,9 +321,6 @@ static SECP256K1_INLINE void secp256k1_int_cmov(int *r, const int *a, int flag) /* If USE_FORCE_WIDEMUL_INT128 is set, use int128. */ # define SECP256K1_WIDEMUL_INT128 1 # define SECP256K1_INT128_NATIVE 1 -#elif defined(USE_FORCE_WIDEMUL_INT64) -/* If USE_FORCE_WIDEMUL_INT64 is set, use int64. */ -# define SECP256K1_WIDEMUL_INT64 1 #elif defined(UINT128_MAX) || defined(__SIZEOF_INT128__) /* If a native 128-bit integer type exists, use int128. */ # define SECP256K1_WIDEMUL_INT128 1 @@ -340,8 +337,7 @@ static SECP256K1_INLINE void secp256k1_int_cmov(int *r, const int *a, int flag) # define SECP256K1_WIDEMUL_INT128 1 # define SECP256K1_INT128_STRUCT 1 #else -/* Lastly, fall back to int64 based arithmetic. */ -# define SECP256K1_WIDEMUL_INT64 1 +#error "No suitable wide multiplication implementation found. 32-bit limb support has been removed." #endif #ifndef __has_builtin