fe25519_mul.S 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865
  1. # qhasm: int64 rp
  2. # qhasm: int64 xp
  3. # qhasm: int64 yp
  4. # qhasm: input rp
  5. # qhasm: input xp
  6. # qhasm: input yp
  7. # qhasm: int64 r0
  8. # qhasm: int64 r1
  9. # qhasm: int64 r2
  10. # qhasm: int64 r3
  11. # qhasm: int64 caller1
  12. # qhasm: int64 caller2
  13. # qhasm: int64 caller3
  14. # qhasm: int64 caller4
  15. # qhasm: int64 caller5
  16. # qhasm: int64 caller6
  17. # qhasm: int64 caller7
  18. # qhasm: caller caller1
  19. # qhasm: caller caller2
  20. # qhasm: caller caller3
  21. # qhasm: caller caller4
  22. # qhasm: caller caller5
  23. # qhasm: caller caller6
  24. # qhasm: caller caller7
  25. # qhasm: stack64 caller1_stack
  26. # qhasm: stack64 caller2_stack
  27. # qhasm: stack64 caller3_stack
  28. # qhasm: stack64 caller4_stack
  29. # qhasm: stack64 caller5_stack
  30. # qhasm: stack64 caller6_stack
  31. # qhasm: stack64 caller7_stack
  32. # qhasm: int64 mulr4
  33. # qhasm: int64 mulr5
  34. # qhasm: int64 mulr6
  35. # qhasm: int64 mulr7
  36. # qhasm: int64 mulr8
  37. # qhasm: int64 mulrax
  38. # qhasm: int64 mulrdx
  39. # qhasm: int64 mulx0
  40. # qhasm: int64 mulx1
  41. # qhasm: int64 mulx2
  42. # qhasm: int64 mulx3
  43. # qhasm: int64 mulc
  44. # qhasm: int64 mulzero
  45. # qhasm: int64 muli38
  46. # qhasm: enter CRYPTO_NAMESPACE(fe25519_mul)
  47. .text
  48. .p2align 5
  49. .globl _CRYPTO_NAMESPACE(fe25519_mul)
  50. .globl CRYPTO_NAMESPACE(fe25519_mul)
  51. _CRYPTO_NAMESPACE(fe25519_mul):
  52. CRYPTO_NAMESPACE(fe25519_mul):
  53. mov %rsp,%r11
  54. and $31,%r11
  55. add $64,%r11
  56. sub %r11,%rsp
  57. # qhasm: caller1_stack = caller1
  58. # asm 1: movq <caller1=int64#9,>caller1_stack=stack64#1
  59. # asm 2: movq <caller1=%r11,>caller1_stack=0(%rsp)
  60. movq %r11,0(%rsp)
  61. # qhasm: caller2_stack = caller2
  62. # asm 1: movq <caller2=int64#10,>caller2_stack=stack64#2
  63. # asm 2: movq <caller2=%r12,>caller2_stack=8(%rsp)
  64. movq %r12,8(%rsp)
  65. # qhasm: caller3_stack = caller3
  66. # asm 1: movq <caller3=int64#11,>caller3_stack=stack64#3
  67. # asm 2: movq <caller3=%r13,>caller3_stack=16(%rsp)
  68. movq %r13,16(%rsp)
  69. # qhasm: caller4_stack = caller4
  70. # asm 1: movq <caller4=int64#12,>caller4_stack=stack64#4
  71. # asm 2: movq <caller4=%r14,>caller4_stack=24(%rsp)
  72. movq %r14,24(%rsp)
  73. # qhasm: caller5_stack = caller5
  74. # asm 1: movq <caller5=int64#13,>caller5_stack=stack64#5
  75. # asm 2: movq <caller5=%r15,>caller5_stack=32(%rsp)
  76. movq %r15,32(%rsp)
  77. # qhasm: caller6_stack = caller6
  78. # asm 1: movq <caller6=int64#14,>caller6_stack=stack64#6
  79. # asm 2: movq <caller6=%rbx,>caller6_stack=40(%rsp)
  80. movq %rbx,40(%rsp)
  81. # qhasm: caller7_stack = caller7
  82. # asm 1: movq <caller7=int64#15,>caller7_stack=stack64#7
  83. # asm 2: movq <caller7=%rbp,>caller7_stack=48(%rsp)
  84. movq %rbp,48(%rsp)
  85. # qhasm: yp = yp
  86. # asm 1: mov <yp=int64#3,>yp=int64#4
  87. # asm 2: mov <yp=%rdx,>yp=%rcx
  88. mov %rdx,%rcx
  89. # qhasm: mulr4 = 0
  90. # asm 1: mov $0,>mulr4=int64#5
  91. # asm 2: mov $0,>mulr4=%r8
  92. mov $0,%r8
  93. # qhasm: mulr5 = 0
  94. # asm 1: mov $0,>mulr5=int64#6
  95. # asm 2: mov $0,>mulr5=%r9
  96. mov $0,%r9
  97. # qhasm: mulr6 = 0
  98. # asm 1: mov $0,>mulr6=int64#8
  99. # asm 2: mov $0,>mulr6=%r10
  100. mov $0,%r10
  101. # qhasm: mulr7 = 0
  102. # asm 1: mov $0,>mulr7=int64#9
  103. # asm 2: mov $0,>mulr7=%r11
  104. mov $0,%r11
  105. # qhasm: mulx0 = *(uint64 *)(xp + 0)
  106. # asm 1: movq 0(<xp=int64#2),>mulx0=int64#10
  107. # asm 2: movq 0(<xp=%rsi),>mulx0=%r12
  108. movq 0(%rsi),%r12
  109. # qhasm: mulrax = *(uint64 *)(yp + 0)
  110. # asm 1: movq 0(<yp=int64#4),>mulrax=int64#7
  111. # asm 2: movq 0(<yp=%rcx),>mulrax=%rax
  112. movq 0(%rcx),%rax
  113. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx0
  114. # asm 1: mul <mulx0=int64#10
  115. # asm 2: mul <mulx0=%r12
  116. mul %r12
  117. # qhasm: r0 = mulrax
  118. # asm 1: mov <mulrax=int64#7,>r0=int64#11
  119. # asm 2: mov <mulrax=%rax,>r0=%r13
  120. mov %rax,%r13
  121. # qhasm: r1 = mulrdx
  122. # asm 1: mov <mulrdx=int64#3,>r1=int64#12
  123. # asm 2: mov <mulrdx=%rdx,>r1=%r14
  124. mov %rdx,%r14
  125. # qhasm: mulrax = *(uint64 *)(yp + 8)
  126. # asm 1: movq 8(<yp=int64#4),>mulrax=int64#7
  127. # asm 2: movq 8(<yp=%rcx),>mulrax=%rax
  128. movq 8(%rcx),%rax
  129. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx0
  130. # asm 1: mul <mulx0=int64#10
  131. # asm 2: mul <mulx0=%r12
  132. mul %r12
  133. # qhasm: carry? r1 += mulrax
  134. # asm 1: add <mulrax=int64#7,<r1=int64#12
  135. # asm 2: add <mulrax=%rax,<r1=%r14
  136. add %rax,%r14
  137. # qhasm: r2 = 0
  138. # asm 1: mov $0,>r2=int64#13
  139. # asm 2: mov $0,>r2=%r15
  140. mov $0,%r15
  141. # qhasm: r2 += mulrdx + carry
  142. # asm 1: adc <mulrdx=int64#3,<r2=int64#13
  143. # asm 2: adc <mulrdx=%rdx,<r2=%r15
  144. adc %rdx,%r15
  145. # qhasm: mulrax = *(uint64 *)(yp + 16)
  146. # asm 1: movq 16(<yp=int64#4),>mulrax=int64#7
  147. # asm 2: movq 16(<yp=%rcx),>mulrax=%rax
  148. movq 16(%rcx),%rax
  149. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx0
  150. # asm 1: mul <mulx0=int64#10
  151. # asm 2: mul <mulx0=%r12
  152. mul %r12
  153. # qhasm: carry? r2 += mulrax
  154. # asm 1: add <mulrax=int64#7,<r2=int64#13
  155. # asm 2: add <mulrax=%rax,<r2=%r15
  156. add %rax,%r15
  157. # qhasm: r3 = 0
  158. # asm 1: mov $0,>r3=int64#14
  159. # asm 2: mov $0,>r3=%rbx
  160. mov $0,%rbx
  161. # qhasm: r3 += mulrdx + carry
  162. # asm 1: adc <mulrdx=int64#3,<r3=int64#14
  163. # asm 2: adc <mulrdx=%rdx,<r3=%rbx
  164. adc %rdx,%rbx
  165. # qhasm: mulrax = *(uint64 *)(yp + 24)
  166. # asm 1: movq 24(<yp=int64#4),>mulrax=int64#7
  167. # asm 2: movq 24(<yp=%rcx),>mulrax=%rax
  168. movq 24(%rcx),%rax
  169. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx0
  170. # asm 1: mul <mulx0=int64#10
  171. # asm 2: mul <mulx0=%r12
  172. mul %r12
  173. # qhasm: carry? r3 += mulrax
  174. # asm 1: add <mulrax=int64#7,<r3=int64#14
  175. # asm 2: add <mulrax=%rax,<r3=%rbx
  176. add %rax,%rbx
  177. # qhasm: mulr4 += mulrdx + carry
  178. # asm 1: adc <mulrdx=int64#3,<mulr4=int64#5
  179. # asm 2: adc <mulrdx=%rdx,<mulr4=%r8
  180. adc %rdx,%r8
  181. # qhasm: mulx1 = *(uint64 *)(xp + 8)
  182. # asm 1: movq 8(<xp=int64#2),>mulx1=int64#10
  183. # asm 2: movq 8(<xp=%rsi),>mulx1=%r12
  184. movq 8(%rsi),%r12
  185. # qhasm: mulrax = *(uint64 *)(yp + 0)
  186. # asm 1: movq 0(<yp=int64#4),>mulrax=int64#7
  187. # asm 2: movq 0(<yp=%rcx),>mulrax=%rax
  188. movq 0(%rcx),%rax
  189. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx1
  190. # asm 1: mul <mulx1=int64#10
  191. # asm 2: mul <mulx1=%r12
  192. mul %r12
  193. # qhasm: carry? r1 += mulrax
  194. # asm 1: add <mulrax=int64#7,<r1=int64#12
  195. # asm 2: add <mulrax=%rax,<r1=%r14
  196. add %rax,%r14
  197. # qhasm: mulc = 0
  198. # asm 1: mov $0,>mulc=int64#15
  199. # asm 2: mov $0,>mulc=%rbp
  200. mov $0,%rbp
  201. # qhasm: mulc += mulrdx + carry
  202. # asm 1: adc <mulrdx=int64#3,<mulc=int64#15
  203. # asm 2: adc <mulrdx=%rdx,<mulc=%rbp
  204. adc %rdx,%rbp
  205. # qhasm: mulrax = *(uint64 *)(yp + 8)
  206. # asm 1: movq 8(<yp=int64#4),>mulrax=int64#7
  207. # asm 2: movq 8(<yp=%rcx),>mulrax=%rax
  208. movq 8(%rcx),%rax
  209. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx1
  210. # asm 1: mul <mulx1=int64#10
  211. # asm 2: mul <mulx1=%r12
  212. mul %r12
  213. # qhasm: carry? r2 += mulrax
  214. # asm 1: add <mulrax=int64#7,<r2=int64#13
  215. # asm 2: add <mulrax=%rax,<r2=%r15
  216. add %rax,%r15
  217. # qhasm: mulrdx += 0 + carry
  218. # asm 1: adc $0,<mulrdx=int64#3
  219. # asm 2: adc $0,<mulrdx=%rdx
  220. adc $0,%rdx
  221. # qhasm: carry? r2 += mulc
  222. # asm 1: add <mulc=int64#15,<r2=int64#13
  223. # asm 2: add <mulc=%rbp,<r2=%r15
  224. add %rbp,%r15
  225. # qhasm: mulc = 0
  226. # asm 1: mov $0,>mulc=int64#15
  227. # asm 2: mov $0,>mulc=%rbp
  228. mov $0,%rbp
  229. # qhasm: mulc += mulrdx + carry
  230. # asm 1: adc <mulrdx=int64#3,<mulc=int64#15
  231. # asm 2: adc <mulrdx=%rdx,<mulc=%rbp
  232. adc %rdx,%rbp
  233. # qhasm: mulrax = *(uint64 *)(yp + 16)
  234. # asm 1: movq 16(<yp=int64#4),>mulrax=int64#7
  235. # asm 2: movq 16(<yp=%rcx),>mulrax=%rax
  236. movq 16(%rcx),%rax
  237. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx1
  238. # asm 1: mul <mulx1=int64#10
  239. # asm 2: mul <mulx1=%r12
  240. mul %r12
  241. # qhasm: carry? r3 += mulrax
  242. # asm 1: add <mulrax=int64#7,<r3=int64#14
  243. # asm 2: add <mulrax=%rax,<r3=%rbx
  244. add %rax,%rbx
  245. # qhasm: mulrdx += 0 + carry
  246. # asm 1: adc $0,<mulrdx=int64#3
  247. # asm 2: adc $0,<mulrdx=%rdx
  248. adc $0,%rdx
  249. # qhasm: carry? r3 += mulc
  250. # asm 1: add <mulc=int64#15,<r3=int64#14
  251. # asm 2: add <mulc=%rbp,<r3=%rbx
  252. add %rbp,%rbx
  253. # qhasm: mulc = 0
  254. # asm 1: mov $0,>mulc=int64#15
  255. # asm 2: mov $0,>mulc=%rbp
  256. mov $0,%rbp
  257. # qhasm: mulc += mulrdx + carry
  258. # asm 1: adc <mulrdx=int64#3,<mulc=int64#15
  259. # asm 2: adc <mulrdx=%rdx,<mulc=%rbp
  260. adc %rdx,%rbp
  261. # qhasm: mulrax = *(uint64 *)(yp + 24)
  262. # asm 1: movq 24(<yp=int64#4),>mulrax=int64#7
  263. # asm 2: movq 24(<yp=%rcx),>mulrax=%rax
  264. movq 24(%rcx),%rax
  265. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx1
  266. # asm 1: mul <mulx1=int64#10
  267. # asm 2: mul <mulx1=%r12
  268. mul %r12
  269. # qhasm: carry? mulr4 += mulrax
  270. # asm 1: add <mulrax=int64#7,<mulr4=int64#5
  271. # asm 2: add <mulrax=%rax,<mulr4=%r8
  272. add %rax,%r8
  273. # qhasm: mulrdx += 0 + carry
  274. # asm 1: adc $0,<mulrdx=int64#3
  275. # asm 2: adc $0,<mulrdx=%rdx
  276. adc $0,%rdx
  277. # qhasm: carry? mulr4 += mulc
  278. # asm 1: add <mulc=int64#15,<mulr4=int64#5
  279. # asm 2: add <mulc=%rbp,<mulr4=%r8
  280. add %rbp,%r8
  281. # qhasm: mulr5 += mulrdx + carry
  282. # asm 1: adc <mulrdx=int64#3,<mulr5=int64#6
  283. # asm 2: adc <mulrdx=%rdx,<mulr5=%r9
  284. adc %rdx,%r9
  285. # qhasm: mulx2 = *(uint64 *)(xp + 16)
  286. # asm 1: movq 16(<xp=int64#2),>mulx2=int64#10
  287. # asm 2: movq 16(<xp=%rsi),>mulx2=%r12
  288. movq 16(%rsi),%r12
  289. # qhasm: mulrax = *(uint64 *)(yp + 0)
  290. # asm 1: movq 0(<yp=int64#4),>mulrax=int64#7
  291. # asm 2: movq 0(<yp=%rcx),>mulrax=%rax
  292. movq 0(%rcx),%rax
  293. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx2
  294. # asm 1: mul <mulx2=int64#10
  295. # asm 2: mul <mulx2=%r12
  296. mul %r12
  297. # qhasm: carry? r2 += mulrax
  298. # asm 1: add <mulrax=int64#7,<r2=int64#13
  299. # asm 2: add <mulrax=%rax,<r2=%r15
  300. add %rax,%r15
  301. # qhasm: mulc = 0
  302. # asm 1: mov $0,>mulc=int64#15
  303. # asm 2: mov $0,>mulc=%rbp
  304. mov $0,%rbp
  305. # qhasm: mulc += mulrdx + carry
  306. # asm 1: adc <mulrdx=int64#3,<mulc=int64#15
  307. # asm 2: adc <mulrdx=%rdx,<mulc=%rbp
  308. adc %rdx,%rbp
  309. # qhasm: mulrax = *(uint64 *)(yp + 8)
  310. # asm 1: movq 8(<yp=int64#4),>mulrax=int64#7
  311. # asm 2: movq 8(<yp=%rcx),>mulrax=%rax
  312. movq 8(%rcx),%rax
  313. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx2
  314. # asm 1: mul <mulx2=int64#10
  315. # asm 2: mul <mulx2=%r12
  316. mul %r12
  317. # qhasm: carry? r3 += mulrax
  318. # asm 1: add <mulrax=int64#7,<r3=int64#14
  319. # asm 2: add <mulrax=%rax,<r3=%rbx
  320. add %rax,%rbx
  321. # qhasm: mulrdx += 0 + carry
  322. # asm 1: adc $0,<mulrdx=int64#3
  323. # asm 2: adc $0,<mulrdx=%rdx
  324. adc $0,%rdx
  325. # qhasm: carry? r3 += mulc
  326. # asm 1: add <mulc=int64#15,<r3=int64#14
  327. # asm 2: add <mulc=%rbp,<r3=%rbx
  328. add %rbp,%rbx
  329. # qhasm: mulc = 0
  330. # asm 1: mov $0,>mulc=int64#15
  331. # asm 2: mov $0,>mulc=%rbp
  332. mov $0,%rbp
  333. # qhasm: mulc += mulrdx + carry
  334. # asm 1: adc <mulrdx=int64#3,<mulc=int64#15
  335. # asm 2: adc <mulrdx=%rdx,<mulc=%rbp
  336. adc %rdx,%rbp
  337. # qhasm: mulrax = *(uint64 *)(yp + 16)
  338. # asm 1: movq 16(<yp=int64#4),>mulrax=int64#7
  339. # asm 2: movq 16(<yp=%rcx),>mulrax=%rax
  340. movq 16(%rcx),%rax
  341. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx2
  342. # asm 1: mul <mulx2=int64#10
  343. # asm 2: mul <mulx2=%r12
  344. mul %r12
  345. # qhasm: carry? mulr4 += mulrax
  346. # asm 1: add <mulrax=int64#7,<mulr4=int64#5
  347. # asm 2: add <mulrax=%rax,<mulr4=%r8
  348. add %rax,%r8
  349. # qhasm: mulrdx += 0 + carry
  350. # asm 1: adc $0,<mulrdx=int64#3
  351. # asm 2: adc $0,<mulrdx=%rdx
  352. adc $0,%rdx
  353. # qhasm: carry? mulr4 += mulc
  354. # asm 1: add <mulc=int64#15,<mulr4=int64#5
  355. # asm 2: add <mulc=%rbp,<mulr4=%r8
  356. add %rbp,%r8
  357. # qhasm: mulc = 0
  358. # asm 1: mov $0,>mulc=int64#15
  359. # asm 2: mov $0,>mulc=%rbp
  360. mov $0,%rbp
  361. # qhasm: mulc += mulrdx + carry
  362. # asm 1: adc <mulrdx=int64#3,<mulc=int64#15
  363. # asm 2: adc <mulrdx=%rdx,<mulc=%rbp
  364. adc %rdx,%rbp
  365. # qhasm: mulrax = *(uint64 *)(yp + 24)
  366. # asm 1: movq 24(<yp=int64#4),>mulrax=int64#7
  367. # asm 2: movq 24(<yp=%rcx),>mulrax=%rax
  368. movq 24(%rcx),%rax
  369. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx2
  370. # asm 1: mul <mulx2=int64#10
  371. # asm 2: mul <mulx2=%r12
  372. mul %r12
  373. # qhasm: carry? mulr5 += mulrax
  374. # asm 1: add <mulrax=int64#7,<mulr5=int64#6
  375. # asm 2: add <mulrax=%rax,<mulr5=%r9
  376. add %rax,%r9
  377. # qhasm: mulrdx += 0 + carry
  378. # asm 1: adc $0,<mulrdx=int64#3
  379. # asm 2: adc $0,<mulrdx=%rdx
  380. adc $0,%rdx
  381. # qhasm: carry? mulr5 += mulc
  382. # asm 1: add <mulc=int64#15,<mulr5=int64#6
  383. # asm 2: add <mulc=%rbp,<mulr5=%r9
  384. add %rbp,%r9
  385. # qhasm: mulr6 += mulrdx + carry
  386. # asm 1: adc <mulrdx=int64#3,<mulr6=int64#8
  387. # asm 2: adc <mulrdx=%rdx,<mulr6=%r10
  388. adc %rdx,%r10
  389. # qhasm: mulx3 = *(uint64 *)(xp + 24)
  390. # asm 1: movq 24(<xp=int64#2),>mulx3=int64#2
  391. # asm 2: movq 24(<xp=%rsi),>mulx3=%rsi
  392. movq 24(%rsi),%rsi
  393. # qhasm: mulrax = *(uint64 *)(yp + 0)
  394. # asm 1: movq 0(<yp=int64#4),>mulrax=int64#7
  395. # asm 2: movq 0(<yp=%rcx),>mulrax=%rax
  396. movq 0(%rcx),%rax
  397. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx3
  398. # asm 1: mul <mulx3=int64#2
  399. # asm 2: mul <mulx3=%rsi
  400. mul %rsi
  401. # qhasm: carry? r3 += mulrax
  402. # asm 1: add <mulrax=int64#7,<r3=int64#14
  403. # asm 2: add <mulrax=%rax,<r3=%rbx
  404. add %rax,%rbx
  405. # qhasm: mulc = 0
  406. # asm 1: mov $0,>mulc=int64#10
  407. # asm 2: mov $0,>mulc=%r12
  408. mov $0,%r12
  409. # qhasm: mulc += mulrdx + carry
  410. # asm 1: adc <mulrdx=int64#3,<mulc=int64#10
  411. # asm 2: adc <mulrdx=%rdx,<mulc=%r12
  412. adc %rdx,%r12
  413. # qhasm: mulrax = *(uint64 *)(yp + 8)
  414. # asm 1: movq 8(<yp=int64#4),>mulrax=int64#7
  415. # asm 2: movq 8(<yp=%rcx),>mulrax=%rax
  416. movq 8(%rcx),%rax
  417. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx3
  418. # asm 1: mul <mulx3=int64#2
  419. # asm 2: mul <mulx3=%rsi
  420. mul %rsi
  421. # qhasm: carry? mulr4 += mulrax
  422. # asm 1: add <mulrax=int64#7,<mulr4=int64#5
  423. # asm 2: add <mulrax=%rax,<mulr4=%r8
  424. add %rax,%r8
  425. # qhasm: mulrdx += 0 + carry
  426. # asm 1: adc $0,<mulrdx=int64#3
  427. # asm 2: adc $0,<mulrdx=%rdx
  428. adc $0,%rdx
  429. # qhasm: carry? mulr4 += mulc
  430. # asm 1: add <mulc=int64#10,<mulr4=int64#5
  431. # asm 2: add <mulc=%r12,<mulr4=%r8
  432. add %r12,%r8
  433. # qhasm: mulc = 0
  434. # asm 1: mov $0,>mulc=int64#10
  435. # asm 2: mov $0,>mulc=%r12
  436. mov $0,%r12
  437. # qhasm: mulc += mulrdx + carry
  438. # asm 1: adc <mulrdx=int64#3,<mulc=int64#10
  439. # asm 2: adc <mulrdx=%rdx,<mulc=%r12
  440. adc %rdx,%r12
  441. # qhasm: mulrax = *(uint64 *)(yp + 16)
  442. # asm 1: movq 16(<yp=int64#4),>mulrax=int64#7
  443. # asm 2: movq 16(<yp=%rcx),>mulrax=%rax
  444. movq 16(%rcx),%rax
  445. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx3
  446. # asm 1: mul <mulx3=int64#2
  447. # asm 2: mul <mulx3=%rsi
  448. mul %rsi
  449. # qhasm: carry? mulr5 += mulrax
  450. # asm 1: add <mulrax=int64#7,<mulr5=int64#6
  451. # asm 2: add <mulrax=%rax,<mulr5=%r9
  452. add %rax,%r9
  453. # qhasm: mulrdx += 0 + carry
  454. # asm 1: adc $0,<mulrdx=int64#3
  455. # asm 2: adc $0,<mulrdx=%rdx
  456. adc $0,%rdx
  457. # qhasm: carry? mulr5 += mulc
  458. # asm 1: add <mulc=int64#10,<mulr5=int64#6
  459. # asm 2: add <mulc=%r12,<mulr5=%r9
  460. add %r12,%r9
  461. # qhasm: mulc = 0
  462. # asm 1: mov $0,>mulc=int64#10
  463. # asm 2: mov $0,>mulc=%r12
  464. mov $0,%r12
  465. # qhasm: mulc += mulrdx + carry
  466. # asm 1: adc <mulrdx=int64#3,<mulc=int64#10
  467. # asm 2: adc <mulrdx=%rdx,<mulc=%r12
  468. adc %rdx,%r12
  469. # qhasm: mulrax = *(uint64 *)(yp + 24)
  470. # asm 1: movq 24(<yp=int64#4),>mulrax=int64#7
  471. # asm 2: movq 24(<yp=%rcx),>mulrax=%rax
  472. movq 24(%rcx),%rax
  473. # qhasm: (uint128) mulrdx mulrax = mulrax * mulx3
  474. # asm 1: mul <mulx3=int64#2
  475. # asm 2: mul <mulx3=%rsi
  476. mul %rsi
  477. # qhasm: carry? mulr6 += mulrax
  478. # asm 1: add <mulrax=int64#7,<mulr6=int64#8
  479. # asm 2: add <mulrax=%rax,<mulr6=%r10
  480. add %rax,%r10
  481. # qhasm: mulrdx += 0 + carry
  482. # asm 1: adc $0,<mulrdx=int64#3
  483. # asm 2: adc $0,<mulrdx=%rdx
  484. adc $0,%rdx
  485. # qhasm: carry? mulr6 += mulc
  486. # asm 1: add <mulc=int64#10,<mulr6=int64#8
  487. # asm 2: add <mulc=%r12,<mulr6=%r10
  488. add %r12,%r10
  489. # qhasm: mulr7 += mulrdx + carry
  490. # asm 1: adc <mulrdx=int64#3,<mulr7=int64#9
  491. # asm 2: adc <mulrdx=%rdx,<mulr7=%r11
  492. adc %rdx,%r11
  493. # qhasm: mulrax = mulr4
  494. # asm 1: mov <mulr4=int64#5,>mulrax=int64#7
  495. # asm 2: mov <mulr4=%r8,>mulrax=%rax
  496. mov %r8,%rax
  497. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)&CRYPTO_NAMESPACE(38)
  498. mulq CRYPTO_NAMESPACE(38)(%rip)
  499. # qhasm: mulr4 = mulrax
  500. # asm 1: mov <mulrax=int64#7,>mulr4=int64#2
  501. # asm 2: mov <mulrax=%rax,>mulr4=%rsi
  502. mov %rax,%rsi
  503. # qhasm: mulrax = mulr5
  504. # asm 1: mov <mulr5=int64#6,>mulrax=int64#7
  505. # asm 2: mov <mulr5=%r9,>mulrax=%rax
  506. mov %r9,%rax
  507. # qhasm: mulr5 = mulrdx
  508. # asm 1: mov <mulrdx=int64#3,>mulr5=int64#4
  509. # asm 2: mov <mulrdx=%rdx,>mulr5=%rcx
  510. mov %rdx,%rcx
  511. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)&CRYPTO_NAMESPACE(38)
  512. mulq CRYPTO_NAMESPACE(38)(%rip)
  513. # qhasm: carry? mulr5 += mulrax
  514. # asm 1: add <mulrax=int64#7,<mulr5=int64#4
  515. # asm 2: add <mulrax=%rax,<mulr5=%rcx
  516. add %rax,%rcx
  517. # qhasm: mulrax = mulr6
  518. # asm 1: mov <mulr6=int64#8,>mulrax=int64#7
  519. # asm 2: mov <mulr6=%r10,>mulrax=%rax
  520. mov %r10,%rax
  521. # qhasm: mulr6 = 0
  522. # asm 1: mov $0,>mulr6=int64#5
  523. # asm 2: mov $0,>mulr6=%r8
  524. mov $0,%r8
  525. # qhasm: mulr6 += mulrdx + carry
  526. # asm 1: adc <mulrdx=int64#3,<mulr6=int64#5
  527. # asm 2: adc <mulrdx=%rdx,<mulr6=%r8
  528. adc %rdx,%r8
  529. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)&CRYPTO_NAMESPACE(38)
  530. mulq CRYPTO_NAMESPACE(38)(%rip)
  531. # qhasm: carry? mulr6 += mulrax
  532. # asm 1: add <mulrax=int64#7,<mulr6=int64#5
  533. # asm 2: add <mulrax=%rax,<mulr6=%r8
  534. add %rax,%r8
  535. # qhasm: mulrax = mulr7
  536. # asm 1: mov <mulr7=int64#9,>mulrax=int64#7
  537. # asm 2: mov <mulr7=%r11,>mulrax=%rax
  538. mov %r11,%rax
  539. # qhasm: mulr7 = 0
  540. # asm 1: mov $0,>mulr7=int64#6
  541. # asm 2: mov $0,>mulr7=%r9
  542. mov $0,%r9
  543. # qhasm: mulr7 += mulrdx + carry
  544. # asm 1: adc <mulrdx=int64#3,<mulr7=int64#6
  545. # asm 2: adc <mulrdx=%rdx,<mulr7=%r9
  546. adc %rdx,%r9
  547. # qhasm: (uint128) mulrdx mulrax = mulrax * *(uint64 *)&CRYPTO_NAMESPACE(38)
  548. mulq CRYPTO_NAMESPACE(38)(%rip)
  549. # qhasm: carry? mulr7 += mulrax
  550. # asm 1: add <mulrax=int64#7,<mulr7=int64#6
  551. # asm 2: add <mulrax=%rax,<mulr7=%r9
  552. add %rax,%r9
  553. # qhasm: mulr8 = 0
  554. # asm 1: mov $0,>mulr8=int64#7
  555. # asm 2: mov $0,>mulr8=%rax
  556. mov $0,%rax
  557. # qhasm: mulr8 += mulrdx + carry
  558. # asm 1: adc <mulrdx=int64#3,<mulr8=int64#7
  559. # asm 2: adc <mulrdx=%rdx,<mulr8=%rax
  560. adc %rdx,%rax
  561. # qhasm: carry? r0 += mulr4
  562. # asm 1: add <mulr4=int64#2,<r0=int64#11
  563. # asm 2: add <mulr4=%rsi,<r0=%r13
  564. add %rsi,%r13
  565. # qhasm: carry? r1 += mulr5 + carry
  566. # asm 1: adc <mulr5=int64#4,<r1=int64#12
  567. # asm 2: adc <mulr5=%rcx,<r1=%r14
  568. adc %rcx,%r14
  569. # qhasm: carry? r2 += mulr6 + carry
  570. # asm 1: adc <mulr6=int64#5,<r2=int64#13
  571. # asm 2: adc <mulr6=%r8,<r2=%r15
  572. adc %r8,%r15
  573. # qhasm: carry? r3 += mulr7 + carry
  574. # asm 1: adc <mulr7=int64#6,<r3=int64#14
  575. # asm 2: adc <mulr7=%r9,<r3=%rbx
  576. adc %r9,%rbx
  577. # qhasm: mulzero = 0
  578. # asm 1: mov $0,>mulzero=int64#2
  579. # asm 2: mov $0,>mulzero=%rsi
  580. mov $0,%rsi
  581. # qhasm: mulr8 += mulzero + carry
  582. # asm 1: adc <mulzero=int64#2,<mulr8=int64#7
  583. # asm 2: adc <mulzero=%rsi,<mulr8=%rax
  584. adc %rsi,%rax
  585. # qhasm: mulr8 *= 38
  586. # asm 1: imulq $38,<mulr8=int64#7,>mulr8=int64#3
  587. # asm 2: imulq $38,<mulr8=%rax,>mulr8=%rdx
  588. imulq $38,%rax,%rdx
  589. # qhasm: carry? r0 += mulr8
  590. # asm 1: add <mulr8=int64#3,<r0=int64#11
  591. # asm 2: add <mulr8=%rdx,<r0=%r13
  592. add %rdx,%r13
  593. # qhasm: carry? r1 += mulzero + carry
  594. # asm 1: adc <mulzero=int64#2,<r1=int64#12
  595. # asm 2: adc <mulzero=%rsi,<r1=%r14
  596. adc %rsi,%r14
  597. # qhasm: carry? r2 += mulzero + carry
  598. # asm 1: adc <mulzero=int64#2,<r2=int64#13
  599. # asm 2: adc <mulzero=%rsi,<r2=%r15
  600. adc %rsi,%r15
  601. # qhasm: carry? r3 += mulzero + carry
  602. # asm 1: adc <mulzero=int64#2,<r3=int64#14
  603. # asm 2: adc <mulzero=%rsi,<r3=%rbx
  604. adc %rsi,%rbx
  605. # qhasm: mulzero += mulzero + carry
  606. # asm 1: adc <mulzero=int64#2,<mulzero=int64#2
  607. # asm 2: adc <mulzero=%rsi,<mulzero=%rsi
  608. adc %rsi,%rsi
  609. # qhasm: mulzero *= 38
  610. # asm 1: imulq $38,<mulzero=int64#2,>mulzero=int64#2
  611. # asm 2: imulq $38,<mulzero=%rsi,>mulzero=%rsi
  612. imulq $38,%rsi,%rsi
  613. # qhasm: r0 += mulzero
  614. # asm 1: add <mulzero=int64#2,<r0=int64#11
  615. # asm 2: add <mulzero=%rsi,<r0=%r13
  616. add %rsi,%r13
  617. # qhasm: *(uint64 *)(rp + 8) = r1
  618. # asm 1: movq <r1=int64#12,8(<rp=int64#1)
  619. # asm 2: movq <r1=%r14,8(<rp=%rdi)
  620. movq %r14,8(%rdi)
  621. # qhasm: *(uint64 *)(rp + 16) = r2
  622. # asm 1: movq <r2=int64#13,16(<rp=int64#1)
  623. # asm 2: movq <r2=%r15,16(<rp=%rdi)
  624. movq %r15,16(%rdi)
  625. # qhasm: *(uint64 *)(rp + 24) = r3
  626. # asm 1: movq <r3=int64#14,24(<rp=int64#1)
  627. # asm 2: movq <r3=%rbx,24(<rp=%rdi)
  628. movq %rbx,24(%rdi)
  629. # qhasm: *(uint64 *)(rp + 0) = r0
  630. # asm 1: movq <r0=int64#11,0(<rp=int64#1)
  631. # asm 2: movq <r0=%r13,0(<rp=%rdi)
  632. movq %r13,0(%rdi)
  633. # qhasm: caller1 = caller1_stack
  634. # asm 1: movq <caller1_stack=stack64#1,>caller1=int64#9
  635. # asm 2: movq <caller1_stack=0(%rsp),>caller1=%r11
  636. movq 0(%rsp),%r11
  637. # qhasm: caller2 = caller2_stack
  638. # asm 1: movq <caller2_stack=stack64#2,>caller2=int64#10
  639. # asm 2: movq <caller2_stack=8(%rsp),>caller2=%r12
  640. movq 8(%rsp),%r12
  641. # qhasm: caller3 = caller3_stack
  642. # asm 1: movq <caller3_stack=stack64#3,>caller3=int64#11
  643. # asm 2: movq <caller3_stack=16(%rsp),>caller3=%r13
  644. movq 16(%rsp),%r13
  645. # qhasm: caller4 = caller4_stack
  646. # asm 1: movq <caller4_stack=stack64#4,>caller4=int64#12
  647. # asm 2: movq <caller4_stack=24(%rsp),>caller4=%r14
  648. movq 24(%rsp),%r14
  649. # qhasm: caller5 = caller5_stack
  650. # asm 1: movq <caller5_stack=stack64#5,>caller5=int64#13
  651. # asm 2: movq <caller5_stack=32(%rsp),>caller5=%r15
  652. movq 32(%rsp),%r15
  653. # qhasm: caller6 = caller6_stack
  654. # asm 1: movq <caller6_stack=stack64#6,>caller6=int64#14
  655. # asm 2: movq <caller6_stack=40(%rsp),>caller6=%rbx
  656. movq 40(%rsp),%rbx
  657. # qhasm: caller7 = caller7_stack
  658. # asm 1: movq <caller7_stack=stack64#7,>caller7=int64#15
  659. # asm 2: movq <caller7_stack=48(%rsp),>caller7=%rbp
  660. movq 48(%rsp),%rbp
  661. # qhasm: leave
  662. add %r11,%rsp
  663. mov %rdi,%rax
  664. mov %rsi,%rdx
  665. ret