ed25519-donna-impl-sse2.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. #if defined(ED25519_SSE2)
  2. /*
  3. conversions
  4. */
  5. static void
  6. ge25519_p1p1_to_partial(ge25519 *r, const ge25519_p1p1 *p) {
  7. packed64bignum25519 ALIGN(16) xz, tt, xzout;
  8. curve25519_mul(r->y, p->y, p->z);
  9. curve25519_tangle64(xz, p->x, p->z);
  10. curve25519_tangleone64(tt, p->t);
  11. curve25519_mul_packed64(xzout, xz, tt);
  12. curve25519_untangle64(r->x, r->z, xzout);
  13. }
  14. static void
  15. ge25519_p1p1_to_full(ge25519 *r, const ge25519_p1p1 *p) {
  16. packed64bignum25519 ALIGN(16) zy, xt, xx, zz, ty;
  17. curve25519_tangle64(ty, p->t, p->y);
  18. curve25519_tangleone64(xx, p->x);
  19. curve25519_mul_packed64(xt, xx, ty);
  20. curve25519_untangle64(r->x, r->t, xt);
  21. curve25519_tangleone64(zz, p->z);
  22. curve25519_mul_packed64(zy, zz, ty);
  23. curve25519_untangle64(r->z, r->y, zy);
  24. }
  25. static void
  26. ge25519_full_to_pniels(ge25519_pniels *p, const ge25519 *r) {
  27. curve25519_sub(p->ysubx, r->y, r->x);
  28. curve25519_add(p->xaddy, r->x, r->y);
  29. curve25519_copy(p->z, r->z);
  30. curve25519_mul(p->t2d, r->t, ge25519_ec2d);
  31. }
  32. /*
  33. adding & doubling
  34. */
  35. static void
  36. ge25519_add_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519 *q) {
  37. bignum25519 ALIGN(16) a,b,c,d;
  38. packed32bignum25519 ALIGN(16) xx, yy, yypxx, yymxx, bd, ac, bdmac, bdpac;
  39. packed64bignum25519 ALIGN(16) at, bu, atbu, ptz, qtz, cd;
  40. curve25519_tangle32(yy, p->y, q->y);
  41. curve25519_tangle32(xx, p->x, q->x);
  42. curve25519_add_packed32(yypxx, yy, xx);
  43. curve25519_sub_packed32(yymxx, yy, xx);
  44. curve25519_tangle64_from32(at, bu, yymxx, yypxx);
  45. curve25519_mul_packed64(atbu, at, bu);
  46. curve25519_untangle64(a, b, atbu);
  47. curve25519_tangle64(ptz, p->t, p->z);
  48. curve25519_tangle64(qtz, q->t, q->z);
  49. curve25519_mul_packed64(cd, ptz, qtz);
  50. curve25519_untangle64(c, d, cd);
  51. curve25519_mul(c, c, ge25519_ec2d);
  52. curve25519_add_reduce(d, d, d);
  53. /* reduce, so no after_basic is needed later */
  54. curve25519_tangle32(bd, b, d);
  55. curve25519_tangle32(ac, a, c);
  56. curve25519_sub_packed32(bdmac, bd, ac);
  57. curve25519_add_packed32(bdpac, bd, ac);
  58. curve25519_untangle32(r->x, r->t, bdmac);
  59. curve25519_untangle32(r->y, r->z, bdpac);
  60. }
  61. static void
  62. ge25519_double_p1p1(ge25519_p1p1 *r, const ge25519 *p) {
  63. bignum25519 ALIGN(16) a,b,c,x;
  64. packed64bignum25519 ALIGN(16) xy, zx, ab, cx;
  65. packed32bignum25519 ALIGN(16) xc, yz, xt, yc, ac, bc;
  66. curve25519_add(x, p->x, p->y);
  67. curve25519_tangle64(xy, p->x, p->y);
  68. curve25519_square_packed64(ab, xy);
  69. curve25519_untangle64(a, b, ab);
  70. curve25519_tangle64(zx, p->z, x);
  71. curve25519_square_packed64(cx, zx);
  72. curve25519_untangle64(c, x, cx);
  73. curve25519_tangle32(bc, b, c);
  74. curve25519_tangle32(ac, a, c);
  75. curve25519_add_reduce_packed32(yc, bc, ac);
  76. curve25519_untangle32(r->y, c, yc);
  77. curve25519_sub(r->z, b, a);
  78. curve25519_tangle32(yz, r->y, r->z);
  79. curve25519_tangle32(xc, x, c);
  80. curve25519_sub_after_basic_packed32(xt, xc, yz);
  81. curve25519_untangle32(r->x, r->t, xt);
  82. }
  83. static void
  84. ge25519_nielsadd2_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_niels *q, unsigned char signbit) {
  85. const bignum25519 *qb = (const bignum25519 *)q;
  86. bignum25519 *rb = (bignum25519 *)r;
  87. bignum25519 ALIGN(16) a,b,c;
  88. packed64bignum25519 ALIGN(16) ab, yx, aybx;
  89. packed32bignum25519 ALIGN(16) bd, ac, bdac;
  90. curve25519_sub(a, p->y, p->x);
  91. curve25519_add(b, p->y, p->x);
  92. curve25519_tangle64(ab, a, b);
  93. curve25519_tangle64(yx, qb[signbit], qb[signbit^1]);
  94. curve25519_mul_packed64(aybx, ab, yx);
  95. curve25519_untangle64(a, b, aybx);
  96. curve25519_add(r->y, b, a);
  97. curve25519_add_reduce(r->t, p->z, p->z);
  98. curve25519_mul(c, p->t, q->t2d);
  99. curve25519_copy(r->z, r->t);
  100. curve25519_add(rb[2+signbit], rb[2+signbit], c);
  101. curve25519_tangle32(bd, b, rb[2+(signbit^1)]);
  102. curve25519_tangle32(ac, a, c);
  103. curve25519_sub_packed32(bdac, bd, ac);
  104. curve25519_untangle32(r->x, rb[2+(signbit^1)], bdac);
  105. }
  106. static void
  107. ge25519_pnielsadd_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_pniels *q, unsigned char signbit) {
  108. const bignum25519 *qb = (const bignum25519 *)q;
  109. bignum25519 *rb = (bignum25519 *)r;
  110. bignum25519 ALIGN(16) a,b,c;
  111. packed64bignum25519 ALIGN(16) ab, yx, aybx, zt, zt2d, tc;
  112. packed32bignum25519 ALIGN(16) bd, ac, bdac;
  113. curve25519_sub(a, p->y, p->x);
  114. curve25519_add(b, p->y, p->x);
  115. curve25519_tangle64(ab, a, b);
  116. curve25519_tangle64(yx, qb[signbit], qb[signbit^1]);
  117. curve25519_mul_packed64(aybx, ab, yx);
  118. curve25519_untangle64(a, b, aybx);
  119. curve25519_add(r->y, b, a);
  120. curve25519_tangle64(zt, p->z, p->t);
  121. curve25519_tangle64(zt2d, q->z, q->t2d);
  122. curve25519_mul_packed64(tc, zt, zt2d);
  123. curve25519_untangle64(r->t, c, tc);
  124. curve25519_add_reduce(r->t, r->t, r->t);
  125. curve25519_copy(r->z, r->t);
  126. curve25519_add(rb[2+signbit], rb[2+signbit], c);
  127. curve25519_tangle32(bd, b, rb[2+(signbit^1)]);
  128. curve25519_tangle32(ac, a, c);
  129. curve25519_sub_packed32(bdac, bd, ac);
  130. curve25519_untangle32(r->x, rb[2+(signbit^1)], bdac);
  131. }
  132. static void
  133. ge25519_double(ge25519 *r, const ge25519 *p) {
  134. ge25519_p1p1 ALIGN(16) t;
  135. ge25519_double_p1p1(&t, p);
  136. ge25519_p1p1_to_full(r, &t);
  137. }
  138. static void
  139. ge25519_add(ge25519 *r, const ge25519 *p, const ge25519 *q) {
  140. ge25519_p1p1 ALIGN(16) t;
  141. ge25519_add_p1p1(&t, p, q);
  142. ge25519_p1p1_to_full(r, &t);
  143. }
  144. static void
  145. ge25519_double_partial(ge25519 *r, const ge25519 *p) {
  146. ge25519_p1p1 ALIGN(16) t;
  147. ge25519_double_p1p1(&t, p);
  148. ge25519_p1p1_to_partial(r, &t);
  149. }
  150. static void
  151. ge25519_nielsadd2(ge25519 *r, const ge25519_niels *q) {
  152. packed64bignum25519 ALIGN(16) ab, yx, aybx, eg, ff, hh, xz, ty;
  153. packed32bignum25519 ALIGN(16) bd, ac, bdac;
  154. bignum25519 ALIGN(16) a,b,c,d,e,f,g,h;
  155. curve25519_sub(a, r->y, r->x);
  156. curve25519_add(b, r->y, r->x);
  157. curve25519_tangle64(ab, a, b);
  158. curve25519_tangle64(yx, q->ysubx, q->xaddy);
  159. curve25519_mul_packed64(aybx, ab, yx);
  160. curve25519_untangle64(a, b, aybx);
  161. curve25519_add(h, b, a);
  162. curve25519_add_reduce(d, r->z, r->z);
  163. curve25519_mul(c, r->t, q->t2d);
  164. curve25519_add(g, d, c); /* d is reduced, so no need for after_basic */
  165. curve25519_tangle32(bd, b, d);
  166. curve25519_tangle32(ac, a, c);
  167. curve25519_sub_packed32(bdac, bd, ac); /* d is reduced, so no need for after_basic */
  168. curve25519_untangle32(e, f, bdac);
  169. curve25519_tangle64(eg, e, g);
  170. curve25519_tangleone64(ff, f);
  171. curve25519_mul_packed64(xz, eg, ff);
  172. curve25519_untangle64(r->x, r->z, xz);
  173. curve25519_tangleone64(hh, h);
  174. curve25519_mul_packed64(ty, eg, hh);
  175. curve25519_untangle64(r->t, r->y, ty);
  176. }
  177. static void
  178. ge25519_pnielsadd(ge25519_pniels *r, const ge25519 *p, const ge25519_pniels *q) {
  179. ge25519_p1p1 ALIGN(16) t;
  180. ge25519 ALIGN(16) f;
  181. ge25519_pnielsadd_p1p1(&t, p, q, 0);
  182. ge25519_p1p1_to_full(&f, &t);
  183. ge25519_full_to_pniels(r, &f);
  184. }
  185. /*
  186. pack & unpack
  187. */
  188. static void
  189. ge25519_pack(unsigned char r[32], const ge25519 *p) {
  190. bignum25519 ALIGN(16) tx, ty, zi;
  191. unsigned char parity[32];
  192. curve25519_recip(zi, p->z);
  193. curve25519_mul(tx, p->x, zi);
  194. curve25519_mul(ty, p->y, zi);
  195. curve25519_contract(r, ty);
  196. curve25519_contract(parity, tx);
  197. r[31] ^= ((parity[0] & 1) << 7);
  198. }
  199. // assumes inz[] points to things in in[]
  200. // NOTE: leaves in unfinished state
  201. static void
  202. ge25519_batchpack_destructive_1(bytes32 *out, ge25519 *in, bignum25519 *tmp, size_t num) {
  203. bignum25519 ALIGN(16) ty;
  204. curve25519_batchrecip(&in->z, &in->z, tmp, num, sizeof(ge25519));
  205. for (size_t i = 0; i < num; ++i) {
  206. curve25519_mul(ty, in[i].y, in[i].z);
  207. curve25519_contract(out[i], ty);
  208. }
  209. }
  210. static void
  211. ge25519_batchpack_destructive_finish(bytes32 out, ge25519 *unf) {
  212. bignum25519 ALIGN(16) tx;
  213. unsigned char parity[32];
  214. // z of unfinished is inverted
  215. curve25519_mul(tx, unf->x, unf->z);
  216. curve25519_contract(parity, tx);
  217. out[31] ^= ((parity[0] & 1) << 7);
  218. }
  219. static int
  220. ge25519_unpack_negative_vartime(ge25519 *r, const unsigned char p[32]) {
  221. static const bignum25519 ALIGN(16) one = {1};
  222. static const unsigned char zero[32] = {0};
  223. unsigned char parity = p[31] >> 7;
  224. unsigned char check[32];
  225. bignum25519 ALIGN(16) t, root, num, den, d3;
  226. curve25519_expand(r->y, p);
  227. curve25519_copy(r->z, one);
  228. curve25519_square_times(num, r->y, 1); /* x = y^2 */
  229. curve25519_mul(den, num, ge25519_ecd); /* den = dy^2 */
  230. curve25519_sub_reduce(num, num, r->z); /* x = y^2 - 1 */
  231. curve25519_add(den, den, r->z); /* den = dy^2 + 1 */
  232. /* Computation of sqrt(num/den) */
  233. /* 1.: computation of num^((p-5)/8)*den^((7p-35)/8) = (num*den^7)^((p-5)/8) */
  234. curve25519_square_times(t, den, 1);
  235. curve25519_mul(d3, t, den);
  236. curve25519_square_times(r->x, d3, 1);
  237. curve25519_mul(r->x, r->x, den);
  238. curve25519_mul(r->x, r->x, num);
  239. curve25519_pow_two252m3(r->x, r->x);
  240. /* 2. computation of r->x = t * num * den^3 */
  241. curve25519_mul(r->x, r->x, d3);
  242. curve25519_mul(r->x, r->x, num);
  243. /* 3. Check if either of the roots works: */
  244. curve25519_square_times(t, r->x, 1);
  245. curve25519_mul(t, t, den);
  246. curve25519_copy(root, t);
  247. curve25519_sub_reduce(root, root, num);
  248. curve25519_contract(check, root);
  249. if (!ed25519_verify(check, zero, 32)) {
  250. curve25519_add_reduce(t, t, num);
  251. curve25519_contract(check, t);
  252. if (!ed25519_verify(check, zero, 32))
  253. return 0;
  254. curve25519_mul(r->x, r->x, ge25519_sqrtneg1);
  255. }
  256. curve25519_contract(check, r->x);
  257. if ((check[0] & 1) == parity) {
  258. curve25519_copy(t, r->x);
  259. curve25519_neg(r->x, t);
  260. }
  261. curve25519_mul(r->t, r->x, r->y);
  262. return 1;
  263. }
  264. /*
  265. scalarmults
  266. */
  267. #define S1_SWINDOWSIZE 5
  268. #define S1_TABLE_SIZE (1<<(S1_SWINDOWSIZE-2))
  269. #define S2_SWINDOWSIZE 7
  270. #define S2_TABLE_SIZE (1<<(S2_SWINDOWSIZE-2))
  271. static void
  272. ge25519_double_scalarmult_vartime(ge25519 *r, const ge25519 *p1, const bignum256modm s1, const bignum256modm s2) {
  273. signed char slide1[256], slide2[256];
  274. ge25519_pniels ALIGN(16) pre1[S1_TABLE_SIZE];
  275. ge25519 ALIGN(16) d1;
  276. ge25519_p1p1 ALIGN(16) t;
  277. int32_t i;
  278. contract256_slidingwindow_modm(slide1, s1, S1_SWINDOWSIZE);
  279. contract256_slidingwindow_modm(slide2, s2, S2_SWINDOWSIZE);
  280. ge25519_double(&d1, p1);
  281. ge25519_full_to_pniels(pre1, p1);
  282. for (i = 0; i < S1_TABLE_SIZE - 1; i++)
  283. ge25519_pnielsadd(&pre1[i+1], &d1, &pre1[i]);
  284. /* set neutral */
  285. memset(r, 0, sizeof(ge25519));
  286. r->y[0] = 1;
  287. r->z[0] = 1;
  288. i = 255;
  289. while ((i >= 0) && !(slide1[i] | slide2[i]))
  290. i--;
  291. for (; i >= 0; i--) {
  292. ge25519_double_p1p1(&t, r);
  293. if (slide1[i]) {
  294. ge25519_p1p1_to_full(r, &t);
  295. ge25519_pnielsadd_p1p1(&t, r, &pre1[abs(slide1[i]) / 2], (unsigned char)slide1[i] >> 7);
  296. }
  297. if (slide2[i]) {
  298. ge25519_p1p1_to_full(r, &t);
  299. ge25519_nielsadd2_p1p1(&t, r, &ge25519_niels_sliding_multiples[abs(slide2[i]) / 2], (unsigned char)slide2[i] >> 7);
  300. }
  301. ge25519_p1p1_to_partial(r, &t);
  302. }
  303. }
  304. #if !defined(HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS)
  305. static uint32_t
  306. ge25519_windowb_equal(uint32_t b, uint32_t c) {
  307. return ((b ^ c) - 1) >> 31;
  308. }
  309. static void
  310. ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const uint8_t table[256][96], uint32_t pos, signed char b) {
  311. bignum25519 ALIGN(16) neg;
  312. uint32_t sign = (uint32_t)((unsigned char)b >> 7);
  313. uint32_t mask = ~(sign - 1);
  314. uint32_t u = (b + mask) ^ mask;
  315. uint32_t i;
  316. /* ysubx, xaddy, t2d in packed form. initialize to ysubx = 1, xaddy = 1, t2d = 0 */
  317. uint8_t ALIGN(16) packed[96] = {0};
  318. packed[0] = 1;
  319. packed[32] = 1;
  320. for (i = 0; i < 8; i++)
  321. curve25519_move_conditional_bytes(packed, table[(pos * 8) + i], ge25519_windowb_equal(u, i + 1));
  322. /* expand in to t */
  323. curve25519_expand(t->ysubx, packed + 0);
  324. curve25519_expand(t->xaddy, packed + 32);
  325. curve25519_expand(t->t2d , packed + 64);
  326. /* adjust for sign */
  327. curve25519_swap_conditional(t->ysubx, t->xaddy, sign);
  328. curve25519_neg(neg, t->t2d);
  329. curve25519_swap_conditional(t->t2d, neg, sign);
  330. }
  331. #endif /* HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS */
  332. static void
  333. ge25519_scalarmult_base_niels(ge25519 *r, const uint8_t table[256][96], const bignum256modm s) {
  334. signed char b[64];
  335. uint32_t i;
  336. ge25519_niels ALIGN(16) t;
  337. contract256_window4_modm(b, s);
  338. ge25519_scalarmult_base_choose_niels(&t, table, 0, b[1]);
  339. curve25519_sub_reduce(r->x, t.xaddy, t.ysubx);
  340. curve25519_add_reduce(r->y, t.xaddy, t.ysubx);
  341. memset(r->z, 0, sizeof(bignum25519));
  342. r->z[0] = 2;
  343. curve25519_copy(r->t, t.t2d);
  344. for (i = 3; i < 64; i += 2) {
  345. ge25519_scalarmult_base_choose_niels(&t, table, i / 2, b[i]);
  346. ge25519_nielsadd2(r, &t);
  347. }
  348. ge25519_double_partial(r, r);
  349. ge25519_double_partial(r, r);
  350. ge25519_double_partial(r, r);
  351. ge25519_double(r, r);
  352. ge25519_scalarmult_base_choose_niels(&t, table, 0, b[0]);
  353. curve25519_mul(t.t2d, t.t2d, ge25519_ecd);
  354. ge25519_nielsadd2(r, &t);
  355. for(i = 2; i < 64; i += 2) {
  356. ge25519_scalarmult_base_choose_niels(&t, table, i / 2, b[i]);
  357. ge25519_nielsadd2(r, &t);
  358. }
  359. }
  360. #endif /* defined(ED25519_SSE2) */