x25519-x86_64.s 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770
  1. .text
  2. .globl _x25519_fe51_mul
  3. .p2align 5
  4. _x25519_fe51_mul:
  5. pushq %rbp
  6. pushq %rbx
  7. pushq %r12
  8. pushq %r13
  9. pushq %r14
  10. pushq %r15
  11. leaq -40(%rsp),%rsp
  12. L$fe51_mul_body:
  13. movq 0(%rsi),%rax
  14. movq 0(%rdx),%r11
  15. movq 8(%rdx),%r12
  16. movq 16(%rdx),%r13
  17. movq 24(%rdx),%rbp
  18. movq 32(%rdx),%r14
  19. movq %rdi,32(%rsp)
  20. movq %rax,%rdi
  21. mulq %r11
  22. movq %r11,0(%rsp)
  23. movq %rax,%rbx
  24. movq %rdi,%rax
  25. movq %rdx,%rcx
  26. mulq %r12
  27. movq %r12,8(%rsp)
  28. movq %rax,%r8
  29. movq %rdi,%rax
  30. leaq (%r14,%r14,8),%r15
  31. movq %rdx,%r9
  32. mulq %r13
  33. movq %r13,16(%rsp)
  34. movq %rax,%r10
  35. movq %rdi,%rax
  36. leaq (%r14,%r15,2),%rdi
  37. movq %rdx,%r11
  38. mulq %rbp
  39. movq %rax,%r12
  40. movq 0(%rsi),%rax
  41. movq %rdx,%r13
  42. mulq %r14
  43. movq %rax,%r14
  44. movq 8(%rsi),%rax
  45. movq %rdx,%r15
  46. mulq %rdi
  47. addq %rax,%rbx
  48. movq 16(%rsi),%rax
  49. adcq %rdx,%rcx
  50. mulq %rdi
  51. addq %rax,%r8
  52. movq 24(%rsi),%rax
  53. adcq %rdx,%r9
  54. mulq %rdi
  55. addq %rax,%r10
  56. movq 32(%rsi),%rax
  57. adcq %rdx,%r11
  58. mulq %rdi
  59. imulq $19,%rbp,%rdi
  60. addq %rax,%r12
  61. movq 8(%rsi),%rax
  62. adcq %rdx,%r13
  63. mulq %rbp
  64. movq 16(%rsp),%rbp
  65. addq %rax,%r14
  66. movq 16(%rsi),%rax
  67. adcq %rdx,%r15
  68. mulq %rdi
  69. addq %rax,%rbx
  70. movq 24(%rsi),%rax
  71. adcq %rdx,%rcx
  72. mulq %rdi
  73. addq %rax,%r8
  74. movq 32(%rsi),%rax
  75. adcq %rdx,%r9
  76. mulq %rdi
  77. imulq $19,%rbp,%rdi
  78. addq %rax,%r10
  79. movq 8(%rsi),%rax
  80. adcq %rdx,%r11
  81. mulq %rbp
  82. addq %rax,%r12
  83. movq 16(%rsi),%rax
  84. adcq %rdx,%r13
  85. mulq %rbp
  86. movq 8(%rsp),%rbp
  87. addq %rax,%r14
  88. movq 24(%rsi),%rax
  89. adcq %rdx,%r15
  90. mulq %rdi
  91. addq %rax,%rbx
  92. movq 32(%rsi),%rax
  93. adcq %rdx,%rcx
  94. mulq %rdi
  95. addq %rax,%r8
  96. movq 8(%rsi),%rax
  97. adcq %rdx,%r9
  98. mulq %rbp
  99. imulq $19,%rbp,%rdi
  100. addq %rax,%r10
  101. movq 16(%rsi),%rax
  102. adcq %rdx,%r11
  103. mulq %rbp
  104. addq %rax,%r12
  105. movq 24(%rsi),%rax
  106. adcq %rdx,%r13
  107. mulq %rbp
  108. movq 0(%rsp),%rbp
  109. addq %rax,%r14
  110. movq 32(%rsi),%rax
  111. adcq %rdx,%r15
  112. mulq %rdi
  113. addq %rax,%rbx
  114. movq 8(%rsi),%rax
  115. adcq %rdx,%rcx
  116. mulq %rbp
  117. addq %rax,%r8
  118. movq 16(%rsi),%rax
  119. adcq %rdx,%r9
  120. mulq %rbp
  121. addq %rax,%r10
  122. movq 24(%rsi),%rax
  123. adcq %rdx,%r11
  124. mulq %rbp
  125. addq %rax,%r12
  126. movq 32(%rsi),%rax
  127. adcq %rdx,%r13
  128. mulq %rbp
  129. addq %rax,%r14
  130. adcq %rdx,%r15
  131. movq 32(%rsp),%rdi
  132. jmp L$reduce51
  133. L$fe51_mul_epilogue:
  134. .globl _x25519_fe51_sqr
  135. .p2align 5
  136. _x25519_fe51_sqr:
  137. pushq %rbp
  138. pushq %rbx
  139. pushq %r12
  140. pushq %r13
  141. pushq %r14
  142. pushq %r15
  143. leaq -40(%rsp),%rsp
  144. L$fe51_sqr_body:
  145. movq 0(%rsi),%rax
  146. movq 16(%rsi),%r15
  147. movq 32(%rsi),%rbp
  148. movq %rdi,32(%rsp)
  149. leaq (%rax,%rax,1),%r14
  150. mulq %rax
  151. movq %rax,%rbx
  152. movq 8(%rsi),%rax
  153. movq %rdx,%rcx
  154. mulq %r14
  155. movq %rax,%r8
  156. movq %r15,%rax
  157. movq %r15,0(%rsp)
  158. movq %rdx,%r9
  159. mulq %r14
  160. movq %rax,%r10
  161. movq 24(%rsi),%rax
  162. movq %rdx,%r11
  163. imulq $19,%rbp,%rdi
  164. mulq %r14
  165. movq %rax,%r12
  166. movq %rbp,%rax
  167. movq %rdx,%r13
  168. mulq %r14
  169. movq %rax,%r14
  170. movq %rbp,%rax
  171. movq %rdx,%r15
  172. mulq %rdi
  173. addq %rax,%r12
  174. movq 8(%rsi),%rax
  175. adcq %rdx,%r13
  176. movq 24(%rsi),%rsi
  177. leaq (%rax,%rax,1),%rbp
  178. mulq %rax
  179. addq %rax,%r10
  180. movq 0(%rsp),%rax
  181. adcq %rdx,%r11
  182. mulq %rbp
  183. addq %rax,%r12
  184. movq %rbp,%rax
  185. adcq %rdx,%r13
  186. mulq %rsi
  187. addq %rax,%r14
  188. movq %rbp,%rax
  189. adcq %rdx,%r15
  190. imulq $19,%rsi,%rbp
  191. mulq %rdi
  192. addq %rax,%rbx
  193. leaq (%rsi,%rsi,1),%rax
  194. adcq %rdx,%rcx
  195. mulq %rdi
  196. addq %rax,%r10
  197. movq %rsi,%rax
  198. adcq %rdx,%r11
  199. mulq %rbp
  200. addq %rax,%r8
  201. movq 0(%rsp),%rax
  202. adcq %rdx,%r9
  203. leaq (%rax,%rax,1),%rsi
  204. mulq %rax
  205. addq %rax,%r14
  206. movq %rbp,%rax
  207. adcq %rdx,%r15
  208. mulq %rsi
  209. addq %rax,%rbx
  210. movq %rsi,%rax
  211. adcq %rdx,%rcx
  212. mulq %rdi
  213. addq %rax,%r8
  214. adcq %rdx,%r9
  215. movq 32(%rsp),%rdi
  216. jmp L$reduce51
  217. .p2align 5
  218. L$reduce51:
  219. movq $0x7ffffffffffff,%rbp
  220. movq %r10,%rdx
  221. shrq $51,%r10
  222. shlq $13,%r11
  223. andq %rbp,%rdx
  224. orq %r10,%r11
  225. addq %r11,%r12
  226. adcq $0,%r13
  227. movq %rbx,%rax
  228. shrq $51,%rbx
  229. shlq $13,%rcx
  230. andq %rbp,%rax
  231. orq %rbx,%rcx
  232. addq %rcx,%r8
  233. adcq $0,%r9
  234. movq %r12,%rbx
  235. shrq $51,%r12
  236. shlq $13,%r13
  237. andq %rbp,%rbx
  238. orq %r12,%r13
  239. addq %r13,%r14
  240. adcq $0,%r15
  241. movq %r8,%rcx
  242. shrq $51,%r8
  243. shlq $13,%r9
  244. andq %rbp,%rcx
  245. orq %r8,%r9
  246. addq %r9,%rdx
  247. movq %r14,%r10
  248. shrq $51,%r14
  249. shlq $13,%r15
  250. andq %rbp,%r10
  251. orq %r14,%r15
  252. leaq (%r15,%r15,8),%r14
  253. leaq (%r15,%r14,2),%r15
  254. addq %r15,%rax
  255. movq %rdx,%r8
  256. andq %rbp,%rdx
  257. shrq $51,%r8
  258. addq %r8,%rbx
  259. movq %rax,%r9
  260. andq %rbp,%rax
  261. shrq $51,%r9
  262. addq %r9,%rcx
  263. movq %rax,0(%rdi)
  264. movq %rcx,8(%rdi)
  265. movq %rdx,16(%rdi)
  266. movq %rbx,24(%rdi)
  267. movq %r10,32(%rdi)
  268. movq 40(%rsp),%r15
  269. movq 48(%rsp),%r14
  270. movq 56(%rsp),%r13
  271. movq 64(%rsp),%r12
  272. movq 72(%rsp),%rbx
  273. movq 80(%rsp),%rbp
  274. leaq 88(%rsp),%rsp
  275. L$fe51_sqr_epilogue:
  276. .byte 0xf3,0xc3
  277. .globl _x25519_fe51_mul121666
  278. .p2align 5
  279. _x25519_fe51_mul121666:
  280. pushq %rbp
  281. pushq %rbx
  282. pushq %r12
  283. pushq %r13
  284. pushq %r14
  285. pushq %r15
  286. leaq -40(%rsp),%rsp
  287. L$fe51_mul121666_body:
  288. movl $121666,%eax
  289. mulq 0(%rsi)
  290. movq %rax,%rbx
  291. movl $121666,%eax
  292. movq %rdx,%rcx
  293. mulq 8(%rsi)
  294. movq %rax,%r8
  295. movl $121666,%eax
  296. movq %rdx,%r9
  297. mulq 16(%rsi)
  298. movq %rax,%r10
  299. movl $121666,%eax
  300. movq %rdx,%r11
  301. mulq 24(%rsi)
  302. movq %rax,%r12
  303. movl $121666,%eax
  304. movq %rdx,%r13
  305. mulq 32(%rsi)
  306. movq %rax,%r14
  307. movq %rdx,%r15
  308. jmp L$reduce51
  309. L$fe51_mul121666_epilogue:
  310. .globl _x25519_fe64_eligible
  311. .p2align 5
  312. _x25519_fe64_eligible:
  313. movl _OPENSSL_ia32cap_P+8(%rip),%ecx
  314. xorl %eax,%eax
  315. andl $0x80100,%ecx
  316. cmpl $0x80100,%ecx
  317. cmovel %ecx,%eax
  318. .byte 0xf3,0xc3
  319. .globl _x25519_fe64_mul
  320. .p2align 5
  321. _x25519_fe64_mul:
  322. pushq %rbp
  323. pushq %rbx
  324. pushq %r12
  325. pushq %r13
  326. pushq %r14
  327. pushq %r15
  328. pushq %rdi
  329. leaq -16(%rsp),%rsp
  330. L$fe64_mul_body:
  331. movq %rdx,%rax
  332. movq 0(%rdx),%rbp
  333. movq 0(%rsi),%rdx
  334. movq 8(%rax),%rcx
  335. movq 16(%rax),%r14
  336. movq 24(%rax),%r15
  337. mulxq %rbp,%r8,%rax
  338. xorl %edi,%edi
  339. mulxq %rcx,%r9,%rbx
  340. adcxq %rax,%r9
  341. mulxq %r14,%r10,%rax
  342. adcxq %rbx,%r10
  343. mulxq %r15,%r11,%r12
  344. movq 8(%rsi),%rdx
  345. adcxq %rax,%r11
  346. movq %r14,(%rsp)
  347. adcxq %rdi,%r12
  348. mulxq %rbp,%rax,%rbx
  349. adoxq %rax,%r9
  350. adcxq %rbx,%r10
  351. mulxq %rcx,%rax,%rbx
  352. adoxq %rax,%r10
  353. adcxq %rbx,%r11
  354. mulxq %r14,%rax,%rbx
  355. adoxq %rax,%r11
  356. adcxq %rbx,%r12
  357. mulxq %r15,%rax,%r13
  358. movq 16(%rsi),%rdx
  359. adoxq %rax,%r12
  360. adcxq %rdi,%r13
  361. adoxq %rdi,%r13
  362. mulxq %rbp,%rax,%rbx
  363. adcxq %rax,%r10
  364. adoxq %rbx,%r11
  365. mulxq %rcx,%rax,%rbx
  366. adcxq %rax,%r11
  367. adoxq %rbx,%r12
  368. mulxq %r14,%rax,%rbx
  369. adcxq %rax,%r12
  370. adoxq %rbx,%r13
  371. mulxq %r15,%rax,%r14
  372. movq 24(%rsi),%rdx
  373. adcxq %rax,%r13
  374. adoxq %rdi,%r14
  375. adcxq %rdi,%r14
  376. mulxq %rbp,%rax,%rbx
  377. adoxq %rax,%r11
  378. adcxq %rbx,%r12
  379. mulxq %rcx,%rax,%rbx
  380. adoxq %rax,%r12
  381. adcxq %rbx,%r13
  382. mulxq (%rsp),%rax,%rbx
  383. adoxq %rax,%r13
  384. adcxq %rbx,%r14
  385. mulxq %r15,%rax,%r15
  386. movl $38,%edx
  387. adoxq %rax,%r14
  388. adcxq %rdi,%r15
  389. adoxq %rdi,%r15
  390. jmp L$reduce64
  391. L$fe64_mul_epilogue:
  392. .globl _x25519_fe64_sqr
  393. .p2align 5
  394. _x25519_fe64_sqr:
  395. pushq %rbp
  396. pushq %rbx
  397. pushq %r12
  398. pushq %r13
  399. pushq %r14
  400. pushq %r15
  401. pushq %rdi
  402. leaq -16(%rsp),%rsp
  403. L$fe64_sqr_body:
  404. movq 0(%rsi),%rdx
  405. movq 8(%rsi),%rcx
  406. movq 16(%rsi),%rbp
  407. movq 24(%rsi),%rsi
  408. mulxq %rdx,%r8,%r15
  409. mulxq %rcx,%r9,%rax
  410. xorl %edi,%edi
  411. mulxq %rbp,%r10,%rbx
  412. adcxq %rax,%r10
  413. mulxq %rsi,%r11,%r12
  414. movq %rcx,%rdx
  415. adcxq %rbx,%r11
  416. adcxq %rdi,%r12
  417. mulxq %rbp,%rax,%rbx
  418. adoxq %rax,%r11
  419. adcxq %rbx,%r12
  420. mulxq %rsi,%rax,%r13
  421. movq %rbp,%rdx
  422. adoxq %rax,%r12
  423. adcxq %rdi,%r13
  424. mulxq %rsi,%rax,%r14
  425. movq %rcx,%rdx
  426. adoxq %rax,%r13
  427. adcxq %rdi,%r14
  428. adoxq %rdi,%r14
  429. adcxq %r9,%r9
  430. adoxq %r15,%r9
  431. adcxq %r10,%r10
  432. mulxq %rdx,%rax,%rbx
  433. movq %rbp,%rdx
  434. adcxq %r11,%r11
  435. adoxq %rax,%r10
  436. adcxq %r12,%r12
  437. adoxq %rbx,%r11
  438. mulxq %rdx,%rax,%rbx
  439. movq %rsi,%rdx
  440. adcxq %r13,%r13
  441. adoxq %rax,%r12
  442. adcxq %r14,%r14
  443. adoxq %rbx,%r13
  444. mulxq %rdx,%rax,%r15
  445. movl $38,%edx
  446. adoxq %rax,%r14
  447. adcxq %rdi,%r15
  448. adoxq %rdi,%r15
  449. jmp L$reduce64
  450. .p2align 5
  451. L$reduce64:
  452. mulxq %r12,%rax,%rbx
  453. adcxq %rax,%r8
  454. adoxq %rbx,%r9
  455. mulxq %r13,%rax,%rbx
  456. adcxq %rax,%r9
  457. adoxq %rbx,%r10
  458. mulxq %r14,%rax,%rbx
  459. adcxq %rax,%r10
  460. adoxq %rbx,%r11
  461. mulxq %r15,%rax,%r12
  462. adcxq %rax,%r11
  463. adoxq %rdi,%r12
  464. adcxq %rdi,%r12
  465. movq 16(%rsp),%rdi
  466. imulq %rdx,%r12
  467. addq %r12,%r8
  468. adcq $0,%r9
  469. adcq $0,%r10
  470. adcq $0,%r11
  471. sbbq %rax,%rax
  472. andq $38,%rax
  473. addq %rax,%r8
  474. movq %r9,8(%rdi)
  475. movq %r10,16(%rdi)
  476. movq %r11,24(%rdi)
  477. movq %r8,0(%rdi)
  478. movq 24(%rsp),%r15
  479. movq 32(%rsp),%r14
  480. movq 40(%rsp),%r13
  481. movq 48(%rsp),%r12
  482. movq 56(%rsp),%rbx
  483. movq 64(%rsp),%rbp
  484. leaq 72(%rsp),%rsp
  485. L$fe64_sqr_epilogue:
  486. .byte 0xf3,0xc3
  487. .globl _x25519_fe64_mul121666
  488. .p2align 5
  489. _x25519_fe64_mul121666:
  490. L$fe64_mul121666_body:
  491. movl $121666,%edx
  492. mulxq 0(%rsi),%r8,%rcx
  493. mulxq 8(%rsi),%r9,%rax
  494. addq %rcx,%r9
  495. mulxq 16(%rsi),%r10,%rcx
  496. adcq %rax,%r10
  497. mulxq 24(%rsi),%r11,%rax
  498. adcq %rcx,%r11
  499. adcq $0,%rax
  500. imulq $38,%rax,%rax
  501. addq %rax,%r8
  502. adcq $0,%r9
  503. adcq $0,%r10
  504. adcq $0,%r11
  505. sbbq %rax,%rax
  506. andq $38,%rax
  507. addq %rax,%r8
  508. movq %r9,8(%rdi)
  509. movq %r10,16(%rdi)
  510. movq %r11,24(%rdi)
  511. movq %r8,0(%rdi)
  512. L$fe64_mul121666_epilogue:
  513. .byte 0xf3,0xc3
  514. .globl _x25519_fe64_add
  515. .p2align 5
  516. _x25519_fe64_add:
  517. L$fe64_add_body:
  518. movq 0(%rsi),%r8
  519. movq 8(%rsi),%r9
  520. movq 16(%rsi),%r10
  521. movq 24(%rsi),%r11
  522. addq 0(%rdx),%r8
  523. adcq 8(%rdx),%r9
  524. adcq 16(%rdx),%r10
  525. adcq 24(%rdx),%r11
  526. sbbq %rax,%rax
  527. andq $38,%rax
  528. addq %rax,%r8
  529. adcq $0,%r9
  530. adcq $0,%r10
  531. movq %r9,8(%rdi)
  532. adcq $0,%r11
  533. movq %r10,16(%rdi)
  534. sbbq %rax,%rax
  535. movq %r11,24(%rdi)
  536. andq $38,%rax
  537. addq %rax,%r8
  538. movq %r8,0(%rdi)
  539. L$fe64_add_epilogue:
  540. .byte 0xf3,0xc3
  541. .globl _x25519_fe64_sub
  542. .p2align 5
  543. _x25519_fe64_sub:
  544. L$fe64_sub_body:
  545. movq 0(%rsi),%r8
  546. movq 8(%rsi),%r9
  547. movq 16(%rsi),%r10
  548. movq 24(%rsi),%r11
  549. subq 0(%rdx),%r8
  550. sbbq 8(%rdx),%r9
  551. sbbq 16(%rdx),%r10
  552. sbbq 24(%rdx),%r11
  553. sbbq %rax,%rax
  554. andq $38,%rax
  555. subq %rax,%r8
  556. sbbq $0,%r9
  557. sbbq $0,%r10
  558. movq %r9,8(%rdi)
  559. sbbq $0,%r11
  560. movq %r10,16(%rdi)
  561. sbbq %rax,%rax
  562. movq %r11,24(%rdi)
  563. andq $38,%rax
  564. subq %rax,%r8
  565. movq %r8,0(%rdi)
  566. L$fe64_sub_epilogue:
  567. .byte 0xf3,0xc3
  568. .globl _x25519_fe64_tobytes
  569. .p2align 5
  570. _x25519_fe64_tobytes:
  571. L$fe64_to_body:
  572. movq 0(%rsi),%r8
  573. movq 8(%rsi),%r9
  574. movq 16(%rsi),%r10
  575. movq 24(%rsi),%r11
  576. leaq (%r11,%r11,1),%rax
  577. sarq $63,%r11
  578. shrq $1,%rax
  579. andq $19,%r11
  580. addq $19,%r11
  581. addq %r11,%r8
  582. adcq $0,%r9
  583. adcq $0,%r10
  584. adcq $0,%rax
  585. leaq (%rax,%rax,1),%r11
  586. sarq $63,%rax
  587. shrq $1,%r11
  588. notq %rax
  589. andq $19,%rax
  590. subq %rax,%r8
  591. sbbq $0,%r9
  592. sbbq $0,%r10
  593. sbbq $0,%r11
  594. movq %r8,0(%rdi)
  595. movq %r9,8(%rdi)
  596. movq %r10,16(%rdi)
  597. movq %r11,24(%rdi)
  598. L$fe64_to_epilogue:
  599. .byte 0xf3,0xc3
  600. .byte 88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101,115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0