fe25519_nsquare.S 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763
  1. # qhasm: int64 rp
  2. # qhasm: int64 n
  3. # qhasm: input rp
  4. # qhasm: input n
  5. # qhasm: int64 r0
  6. # qhasm: int64 r1
  7. # qhasm: int64 r2
  8. # qhasm: int64 r3
  9. # qhasm: int64 r4
  10. # qhasm: int64 c1
  11. # qhasm: int64 c2
  12. # qhasm: int64 c3
  13. # qhasm: int64 c4
  14. # qhasm: int64 c5
  15. # qhasm: int64 c6
  16. # qhasm: int64 c7
  17. # qhasm: caller c1
  18. # qhasm: caller c2
  19. # qhasm: caller c3
  20. # qhasm: caller c4
  21. # qhasm: caller c5
  22. # qhasm: caller c6
  23. # qhasm: caller c7
  24. # qhasm: stack64 c1_stack
  25. # qhasm: stack64 c2_stack
  26. # qhasm: stack64 c3_stack
  27. # qhasm: stack64 c4_stack
  28. # qhasm: stack64 c5_stack
  29. # qhasm: stack64 c6_stack
  30. # qhasm: stack64 c7_stack
  31. # qhasm: stack64 x119_stack
  32. # qhasm: stack64 x219_stack
  33. # qhasm: stack64 x319_stack
  34. # qhasm: stack64 x419_stack
  35. # qhasm: int64 squarer01
  36. # qhasm: int64 squarer11
  37. # qhasm: int64 squarer21
  38. # qhasm: int64 squarer31
  39. # qhasm: int64 squarer41
  40. # qhasm: int64 squarerax
  41. # qhasm: int64 squarerdx
  42. # qhasm: int64 squaret
  43. # qhasm: int64 squareredmask
  44. # qhasm: stack64 n_stack
  45. # qhasm: enter CRYPTO_NAMESPACE(batch_fe25519_nsquare)
  46. .text
  47. .p2align 5
  48. .globl _CRYPTO_NAMESPACE(batch_fe25519_nsquare)
  49. .globl CRYPTO_NAMESPACE(batch_fe25519_nsquare)
  50. _CRYPTO_NAMESPACE(batch_fe25519_nsquare):
  51. CRYPTO_NAMESPACE(batch_fe25519_nsquare):
  52. mov %rsp,%r11
  53. and $31,%r11
  54. add $64,%r11
  55. sub %r11,%rsp
  56. # qhasm: c1_stack = c1
  57. # asm 1: movq <c1=int64#9,>c1_stack=stack64#1
  58. # asm 2: movq <c1=%r11,>c1_stack=0(%rsp)
  59. movq %r11,0(%rsp)
  60. # qhasm: c2_stack = c2
  61. # asm 1: movq <c2=int64#10,>c2_stack=stack64#2
  62. # asm 2: movq <c2=%r12,>c2_stack=8(%rsp)
  63. movq %r12,8(%rsp)
  64. # qhasm: c3_stack = c3
  65. # asm 1: movq <c3=int64#11,>c3_stack=stack64#3
  66. # asm 2: movq <c3=%r13,>c3_stack=16(%rsp)
  67. movq %r13,16(%rsp)
  68. # qhasm: c4_stack = c4
  69. # asm 1: movq <c4=int64#12,>c4_stack=stack64#4
  70. # asm 2: movq <c4=%r14,>c4_stack=24(%rsp)
  71. movq %r14,24(%rsp)
  72. # qhasm: c5_stack = c5
  73. # asm 1: movq <c5=int64#13,>c5_stack=stack64#5
  74. # asm 2: movq <c5=%r15,>c5_stack=32(%rsp)
  75. movq %r15,32(%rsp)
  76. # qhasm: c6_stack = c6
  77. # asm 1: movq <c6=int64#14,>c6_stack=stack64#6
  78. # asm 2: movq <c6=%rbx,>c6_stack=40(%rsp)
  79. movq %rbx,40(%rsp)
  80. # qhasm: c7_stack = c7
  81. # asm 1: movq <c7=int64#15,>c7_stack=stack64#7
  82. # asm 2: movq <c7=%rbp,>c7_stack=48(%rsp)
  83. movq %rbp,48(%rsp)
  84. # qhasm: loop:
  85. ._loop:
  86. # qhasm: squarerax = *(uint64 *)(rp + 0)
  87. # asm 1: movq 0(<rp=int64#1),>squarerax=int64#7
  88. # asm 2: movq 0(<rp=%rdi),>squarerax=%rax
  89. movq 0(%rdi),%rax
  90. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 0)
  91. # asm 1: mulq 0(<rp=int64#1)
  92. # asm 2: mulq 0(<rp=%rdi)
  93. mulq 0(%rdi)
  94. # qhasm: r0 = squarerax
  95. # asm 1: mov <squarerax=int64#7,>r0=int64#4
  96. # asm 2: mov <squarerax=%rax,>r0=%rcx
  97. mov %rax,%rcx
  98. # qhasm: squarer01 = squarerdx
  99. # asm 1: mov <squarerdx=int64#3,>squarer01=int64#5
  100. # asm 2: mov <squarerdx=%rdx,>squarer01=%r8
  101. mov %rdx,%r8
  102. # qhasm: squarerax = *(uint64 *)(rp + 0)
  103. # asm 1: movq 0(<rp=int64#1),>squarerax=int64#7
  104. # asm 2: movq 0(<rp=%rdi),>squarerax=%rax
  105. movq 0(%rdi),%rax
  106. # qhasm: squarerax <<= 1
  107. # asm 1: shl $1,<squarerax=int64#7
  108. # asm 2: shl $1,<squarerax=%rax
  109. shl $1,%rax
  110. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 8)
  111. # asm 1: mulq 8(<rp=int64#1)
  112. # asm 2: mulq 8(<rp=%rdi)
  113. mulq 8(%rdi)
  114. # qhasm: r1 = squarerax
  115. # asm 1: mov <squarerax=int64#7,>r1=int64#6
  116. # asm 2: mov <squarerax=%rax,>r1=%r9
  117. mov %rax,%r9
  118. # qhasm: squarer11 = squarerdx
  119. # asm 1: mov <squarerdx=int64#3,>squarer11=int64#8
  120. # asm 2: mov <squarerdx=%rdx,>squarer11=%r10
  121. mov %rdx,%r10
  122. # qhasm: squarerax = *(uint64 *)(rp + 0)
  123. # asm 1: movq 0(<rp=int64#1),>squarerax=int64#7
  124. # asm 2: movq 0(<rp=%rdi),>squarerax=%rax
  125. movq 0(%rdi),%rax
  126. # qhasm: squarerax <<= 1
  127. # asm 1: shl $1,<squarerax=int64#7
  128. # asm 2: shl $1,<squarerax=%rax
  129. shl $1,%rax
  130. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 16)
  131. # asm 1: mulq 16(<rp=int64#1)
  132. # asm 2: mulq 16(<rp=%rdi)
  133. mulq 16(%rdi)
  134. # qhasm: r2 = squarerax
  135. # asm 1: mov <squarerax=int64#7,>r2=int64#9
  136. # asm 2: mov <squarerax=%rax,>r2=%r11
  137. mov %rax,%r11
  138. # qhasm: squarer21 = squarerdx
  139. # asm 1: mov <squarerdx=int64#3,>squarer21=int64#10
  140. # asm 2: mov <squarerdx=%rdx,>squarer21=%r12
  141. mov %rdx,%r12
  142. # qhasm: squarerax = *(uint64 *)(rp + 0)
  143. # asm 1: movq 0(<rp=int64#1),>squarerax=int64#7
  144. # asm 2: movq 0(<rp=%rdi),>squarerax=%rax
  145. movq 0(%rdi),%rax
  146. # qhasm: squarerax <<= 1
  147. # asm 1: shl $1,<squarerax=int64#7
  148. # asm 2: shl $1,<squarerax=%rax
  149. shl $1,%rax
  150. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 24)
  151. # asm 1: mulq 24(<rp=int64#1)
  152. # asm 2: mulq 24(<rp=%rdi)
  153. mulq 24(%rdi)
  154. # qhasm: r3 = squarerax
  155. # asm 1: mov <squarerax=int64#7,>r3=int64#11
  156. # asm 2: mov <squarerax=%rax,>r3=%r13
  157. mov %rax,%r13
  158. # qhasm: squarer31 = squarerdx
  159. # asm 1: mov <squarerdx=int64#3,>squarer31=int64#12
  160. # asm 2: mov <squarerdx=%rdx,>squarer31=%r14
  161. mov %rdx,%r14
  162. # qhasm: squarerax = *(uint64 *)(rp + 0)
  163. # asm 1: movq 0(<rp=int64#1),>squarerax=int64#7
  164. # asm 2: movq 0(<rp=%rdi),>squarerax=%rax
  165. movq 0(%rdi),%rax
  166. # qhasm: squarerax <<= 1
  167. # asm 1: shl $1,<squarerax=int64#7
  168. # asm 2: shl $1,<squarerax=%rax
  169. shl $1,%rax
  170. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 32)
  171. # asm 1: mulq 32(<rp=int64#1)
  172. # asm 2: mulq 32(<rp=%rdi)
  173. mulq 32(%rdi)
  174. # qhasm: r4 = squarerax
  175. # asm 1: mov <squarerax=int64#7,>r4=int64#13
  176. # asm 2: mov <squarerax=%rax,>r4=%r15
  177. mov %rax,%r15
  178. # qhasm: squarer41 = squarerdx
  179. # asm 1: mov <squarerdx=int64#3,>squarer41=int64#14
  180. # asm 2: mov <squarerdx=%rdx,>squarer41=%rbx
  181. mov %rdx,%rbx
  182. # qhasm: squarerax = *(uint64 *)(rp + 8)
  183. # asm 1: movq 8(<rp=int64#1),>squarerax=int64#7
  184. # asm 2: movq 8(<rp=%rdi),>squarerax=%rax
  185. movq 8(%rdi),%rax
  186. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 8)
  187. # asm 1: mulq 8(<rp=int64#1)
  188. # asm 2: mulq 8(<rp=%rdi)
  189. mulq 8(%rdi)
  190. # qhasm: carry? r2 += squarerax
  191. # asm 1: add <squarerax=int64#7,<r2=int64#9
  192. # asm 2: add <squarerax=%rax,<r2=%r11
  193. add %rax,%r11
  194. # qhasm: squarer21 += squarerdx + carry
  195. # asm 1: adc <squarerdx=int64#3,<squarer21=int64#10
  196. # asm 2: adc <squarerdx=%rdx,<squarer21=%r12
  197. adc %rdx,%r12
  198. # qhasm: squarerax = *(uint64 *)(rp + 8)
  199. # asm 1: movq 8(<rp=int64#1),>squarerax=int64#7
  200. # asm 2: movq 8(<rp=%rdi),>squarerax=%rax
  201. movq 8(%rdi),%rax
  202. # qhasm: squarerax <<= 1
  203. # asm 1: shl $1,<squarerax=int64#7
  204. # asm 2: shl $1,<squarerax=%rax
  205. shl $1,%rax
  206. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 16)
  207. # asm 1: mulq 16(<rp=int64#1)
  208. # asm 2: mulq 16(<rp=%rdi)
  209. mulq 16(%rdi)
  210. # qhasm: carry? r3 += squarerax
  211. # asm 1: add <squarerax=int64#7,<r3=int64#11
  212. # asm 2: add <squarerax=%rax,<r3=%r13
  213. add %rax,%r13
  214. # qhasm: squarer31 += squarerdx + carry
  215. # asm 1: adc <squarerdx=int64#3,<squarer31=int64#12
  216. # asm 2: adc <squarerdx=%rdx,<squarer31=%r14
  217. adc %rdx,%r14
  218. # qhasm: squarerax = *(uint64 *)(rp + 8)
  219. # asm 1: movq 8(<rp=int64#1),>squarerax=int64#7
  220. # asm 2: movq 8(<rp=%rdi),>squarerax=%rax
  221. movq 8(%rdi),%rax
  222. # qhasm: squarerax <<= 1
  223. # asm 1: shl $1,<squarerax=int64#7
  224. # asm 2: shl $1,<squarerax=%rax
  225. shl $1,%rax
  226. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 24)
  227. # asm 1: mulq 24(<rp=int64#1)
  228. # asm 2: mulq 24(<rp=%rdi)
  229. mulq 24(%rdi)
  230. # qhasm: carry? r4 += squarerax
  231. # asm 1: add <squarerax=int64#7,<r4=int64#13
  232. # asm 2: add <squarerax=%rax,<r4=%r15
  233. add %rax,%r15
  234. # qhasm: squarer41 += squarerdx + carry
  235. # asm 1: adc <squarerdx=int64#3,<squarer41=int64#14
  236. # asm 2: adc <squarerdx=%rdx,<squarer41=%rbx
  237. adc %rdx,%rbx
  238. # qhasm: squarerax = *(uint64 *)(rp + 8)
  239. # asm 1: movq 8(<rp=int64#1),>squarerax=int64#3
  240. # asm 2: movq 8(<rp=%rdi),>squarerax=%rdx
  241. movq 8(%rdi),%rdx
  242. # qhasm: squarerax *= 38
  243. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  244. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  245. imulq $38,%rdx,%rax
  246. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 32)
  247. # asm 1: mulq 32(<rp=int64#1)
  248. # asm 2: mulq 32(<rp=%rdi)
  249. mulq 32(%rdi)
  250. # qhasm: carry? r0 += squarerax
  251. # asm 1: add <squarerax=int64#7,<r0=int64#4
  252. # asm 2: add <squarerax=%rax,<r0=%rcx
  253. add %rax,%rcx
  254. # qhasm: squarer01 += squarerdx + carry
  255. # asm 1: adc <squarerdx=int64#3,<squarer01=int64#5
  256. # asm 2: adc <squarerdx=%rdx,<squarer01=%r8
  257. adc %rdx,%r8
  258. # qhasm: squarerax = *(uint64 *)(rp + 16)
  259. # asm 1: movq 16(<rp=int64#1),>squarerax=int64#7
  260. # asm 2: movq 16(<rp=%rdi),>squarerax=%rax
  261. movq 16(%rdi),%rax
  262. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 16)
  263. # asm 1: mulq 16(<rp=int64#1)
  264. # asm 2: mulq 16(<rp=%rdi)
  265. mulq 16(%rdi)
  266. # qhasm: carry? r4 += squarerax
  267. # asm 1: add <squarerax=int64#7,<r4=int64#13
  268. # asm 2: add <squarerax=%rax,<r4=%r15
  269. add %rax,%r15
  270. # qhasm: squarer41 += squarerdx + carry
  271. # asm 1: adc <squarerdx=int64#3,<squarer41=int64#14
  272. # asm 2: adc <squarerdx=%rdx,<squarer41=%rbx
  273. adc %rdx,%rbx
  274. # qhasm: squarerax = *(uint64 *)(rp + 16)
  275. # asm 1: movq 16(<rp=int64#1),>squarerax=int64#3
  276. # asm 2: movq 16(<rp=%rdi),>squarerax=%rdx
  277. movq 16(%rdi),%rdx
  278. # qhasm: squarerax *= 38
  279. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  280. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  281. imulq $38,%rdx,%rax
  282. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 24)
  283. # asm 1: mulq 24(<rp=int64#1)
  284. # asm 2: mulq 24(<rp=%rdi)
  285. mulq 24(%rdi)
  286. # qhasm: carry? r0 += squarerax
  287. # asm 1: add <squarerax=int64#7,<r0=int64#4
  288. # asm 2: add <squarerax=%rax,<r0=%rcx
  289. add %rax,%rcx
  290. # qhasm: squarer01 += squarerdx + carry
  291. # asm 1: adc <squarerdx=int64#3,<squarer01=int64#5
  292. # asm 2: adc <squarerdx=%rdx,<squarer01=%r8
  293. adc %rdx,%r8
  294. # qhasm: squarerax = *(uint64 *)(rp + 16)
  295. # asm 1: movq 16(<rp=int64#1),>squarerax=int64#3
  296. # asm 2: movq 16(<rp=%rdi),>squarerax=%rdx
  297. movq 16(%rdi),%rdx
  298. # qhasm: squarerax *= 38
  299. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  300. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  301. imulq $38,%rdx,%rax
  302. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 32)
  303. # asm 1: mulq 32(<rp=int64#1)
  304. # asm 2: mulq 32(<rp=%rdi)
  305. mulq 32(%rdi)
  306. # qhasm: carry? r1 += squarerax
  307. # asm 1: add <squarerax=int64#7,<r1=int64#6
  308. # asm 2: add <squarerax=%rax,<r1=%r9
  309. add %rax,%r9
  310. # qhasm: squarer11 += squarerdx + carry
  311. # asm 1: adc <squarerdx=int64#3,<squarer11=int64#8
  312. # asm 2: adc <squarerdx=%rdx,<squarer11=%r10
  313. adc %rdx,%r10
  314. # qhasm: squarerax = *(uint64 *)(rp + 24)
  315. # asm 1: movq 24(<rp=int64#1),>squarerax=int64#3
  316. # asm 2: movq 24(<rp=%rdi),>squarerax=%rdx
  317. movq 24(%rdi),%rdx
  318. # qhasm: squarerax *= 19
  319. # asm 1: imulq $19,<squarerax=int64#3,>squarerax=int64#7
  320. # asm 2: imulq $19,<squarerax=%rdx,>squarerax=%rax
  321. imulq $19,%rdx,%rax
  322. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 24)
  323. # asm 1: mulq 24(<rp=int64#1)
  324. # asm 2: mulq 24(<rp=%rdi)
  325. mulq 24(%rdi)
  326. # qhasm: carry? r1 += squarerax
  327. # asm 1: add <squarerax=int64#7,<r1=int64#6
  328. # asm 2: add <squarerax=%rax,<r1=%r9
  329. add %rax,%r9
  330. # qhasm: squarer11 += squarerdx + carry
  331. # asm 1: adc <squarerdx=int64#3,<squarer11=int64#8
  332. # asm 2: adc <squarerdx=%rdx,<squarer11=%r10
  333. adc %rdx,%r10
  334. # qhasm: squarerax = *(uint64 *)(rp + 24)
  335. # asm 1: movq 24(<rp=int64#1),>squarerax=int64#3
  336. # asm 2: movq 24(<rp=%rdi),>squarerax=%rdx
  337. movq 24(%rdi),%rdx
  338. # qhasm: squarerax *= 38
  339. # asm 1: imulq $38,<squarerax=int64#3,>squarerax=int64#7
  340. # asm 2: imulq $38,<squarerax=%rdx,>squarerax=%rax
  341. imulq $38,%rdx,%rax
  342. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 32)
  343. # asm 1: mulq 32(<rp=int64#1)
  344. # asm 2: mulq 32(<rp=%rdi)
  345. mulq 32(%rdi)
  346. # qhasm: carry? r2 += squarerax
  347. # asm 1: add <squarerax=int64#7,<r2=int64#9
  348. # asm 2: add <squarerax=%rax,<r2=%r11
  349. add %rax,%r11
  350. # qhasm: squarer21 += squarerdx + carry
  351. # asm 1: adc <squarerdx=int64#3,<squarer21=int64#10
  352. # asm 2: adc <squarerdx=%rdx,<squarer21=%r12
  353. adc %rdx,%r12
  354. # qhasm: squarerax = *(uint64 *)(rp + 32)
  355. # asm 1: movq 32(<rp=int64#1),>squarerax=int64#3
  356. # asm 2: movq 32(<rp=%rdi),>squarerax=%rdx
  357. movq 32(%rdi),%rdx
  358. # qhasm: squarerax *= 19
  359. # asm 1: imulq $19,<squarerax=int64#3,>squarerax=int64#7
  360. # asm 2: imulq $19,<squarerax=%rdx,>squarerax=%rax
  361. imulq $19,%rdx,%rax
  362. # qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(rp + 32)
  363. # asm 1: mulq 32(<rp=int64#1)
  364. # asm 2: mulq 32(<rp=%rdi)
  365. mulq 32(%rdi)
  366. # qhasm: carry? r3 += squarerax
  367. # asm 1: add <squarerax=int64#7,<r3=int64#11
  368. # asm 2: add <squarerax=%rax,<r3=%r13
  369. add %rax,%r13
  370. # qhasm: squarer31 += squarerdx + carry
  371. # asm 1: adc <squarerdx=int64#3,<squarer31=int64#12
  372. # asm 2: adc <squarerdx=%rdx,<squarer31=%r14
  373. adc %rdx,%r14
  374. # qhasm: squareredmask = *(uint64 *) &CRYPTO_NAMESPACE(batch_REDMASK51)
  375. # asm 1: movq CRYPTO_NAMESPACE(batch_REDMASK51),>squareredmask=int64#3
  376. # asm 2: movq CRYPTO_NAMESPACE(batch_REDMASK51),>squareredmask=%rdx
  377. movq CRYPTO_NAMESPACE(batch_REDMASK51)(%rip),%rdx
  378. # qhasm: squarer01 = (squarer01.r0) << 13
  379. # asm 1: shld $13,<r0=int64#4,<squarer01=int64#5
  380. # asm 2: shld $13,<r0=%rcx,<squarer01=%r8
  381. shld $13,%rcx,%r8
  382. # qhasm: r0 &= squareredmask
  383. # asm 1: and <squareredmask=int64#3,<r0=int64#4
  384. # asm 2: and <squareredmask=%rdx,<r0=%rcx
  385. and %rdx,%rcx
  386. # qhasm: squarer11 = (squarer11.r1) << 13
  387. # asm 1: shld $13,<r1=int64#6,<squarer11=int64#8
  388. # asm 2: shld $13,<r1=%r9,<squarer11=%r10
  389. shld $13,%r9,%r10
  390. # qhasm: r1 &= squareredmask
  391. # asm 1: and <squareredmask=int64#3,<r1=int64#6
  392. # asm 2: and <squareredmask=%rdx,<r1=%r9
  393. and %rdx,%r9
  394. # qhasm: r1 += squarer01
  395. # asm 1: add <squarer01=int64#5,<r1=int64#6
  396. # asm 2: add <squarer01=%r8,<r1=%r9
  397. add %r8,%r9
  398. # qhasm: squarer21 = (squarer21.r2) << 13
  399. # asm 1: shld $13,<r2=int64#9,<squarer21=int64#10
  400. # asm 2: shld $13,<r2=%r11,<squarer21=%r12
  401. shld $13,%r11,%r12
  402. # qhasm: r2 &= squareredmask
  403. # asm 1: and <squareredmask=int64#3,<r2=int64#9
  404. # asm 2: and <squareredmask=%rdx,<r2=%r11
  405. and %rdx,%r11
  406. # qhasm: r2 += squarer11
  407. # asm 1: add <squarer11=int64#8,<r2=int64#9
  408. # asm 2: add <squarer11=%r10,<r2=%r11
  409. add %r10,%r11
  410. # qhasm: squarer31 = (squarer31.r3) << 13
  411. # asm 1: shld $13,<r3=int64#11,<squarer31=int64#12
  412. # asm 2: shld $13,<r3=%r13,<squarer31=%r14
  413. shld $13,%r13,%r14
  414. # qhasm: r3 &= squareredmask
  415. # asm 1: and <squareredmask=int64#3,<r3=int64#11
  416. # asm 2: and <squareredmask=%rdx,<r3=%r13
  417. and %rdx,%r13
  418. # qhasm: r3 += squarer21
  419. # asm 1: add <squarer21=int64#10,<r3=int64#11
  420. # asm 2: add <squarer21=%r12,<r3=%r13
  421. add %r12,%r13
  422. # qhasm: squarer41 = (squarer41.r4) << 13
  423. # asm 1: shld $13,<r4=int64#13,<squarer41=int64#14
  424. # asm 2: shld $13,<r4=%r15,<squarer41=%rbx
  425. shld $13,%r15,%rbx
  426. # qhasm: r4 &= squareredmask
  427. # asm 1: and <squareredmask=int64#3,<r4=int64#13
  428. # asm 2: and <squareredmask=%rdx,<r4=%r15
  429. and %rdx,%r15
  430. # qhasm: r4 += squarer31
  431. # asm 1: add <squarer31=int64#12,<r4=int64#13
  432. # asm 2: add <squarer31=%r14,<r4=%r15
  433. add %r14,%r15
  434. # qhasm: squarer41 = squarer41 * 19
  435. # asm 1: imulq $19,<squarer41=int64#14,>squarer41=int64#5
  436. # asm 2: imulq $19,<squarer41=%rbx,>squarer41=%r8
  437. imulq $19,%rbx,%r8
  438. # qhasm: r0 += squarer41
  439. # asm 1: add <squarer41=int64#5,<r0=int64#4
  440. # asm 2: add <squarer41=%r8,<r0=%rcx
  441. add %r8,%rcx
  442. # qhasm: squaret = r0
  443. # asm 1: mov <r0=int64#4,>squaret=int64#5
  444. # asm 2: mov <r0=%rcx,>squaret=%r8
  445. mov %rcx,%r8
  446. # qhasm: (uint64) squaret >>= 51
  447. # asm 1: shr $51,<squaret=int64#5
  448. # asm 2: shr $51,<squaret=%r8
  449. shr $51,%r8
  450. # qhasm: squaret += r1
  451. # asm 1: add <r1=int64#6,<squaret=int64#5
  452. # asm 2: add <r1=%r9,<squaret=%r8
  453. add %r9,%r8
  454. # qhasm: r0 &= squareredmask
  455. # asm 1: and <squareredmask=int64#3,<r0=int64#4
  456. # asm 2: and <squareredmask=%rdx,<r0=%rcx
  457. and %rdx,%rcx
  458. # qhasm: r1 = squaret
  459. # asm 1: mov <squaret=int64#5,>r1=int64#6
  460. # asm 2: mov <squaret=%r8,>r1=%r9
  461. mov %r8,%r9
  462. # qhasm: (uint64) squaret >>= 51
  463. # asm 1: shr $51,<squaret=int64#5
  464. # asm 2: shr $51,<squaret=%r8
  465. shr $51,%r8
  466. # qhasm: squaret += r2
  467. # asm 1: add <r2=int64#9,<squaret=int64#5
  468. # asm 2: add <r2=%r11,<squaret=%r8
  469. add %r11,%r8
  470. # qhasm: r1 &= squareredmask
  471. # asm 1: and <squareredmask=int64#3,<r1=int64#6
  472. # asm 2: and <squareredmask=%rdx,<r1=%r9
  473. and %rdx,%r9
  474. # qhasm: r2 = squaret
  475. # asm 1: mov <squaret=int64#5,>r2=int64#7
  476. # asm 2: mov <squaret=%r8,>r2=%rax
  477. mov %r8,%rax
  478. # qhasm: (uint64) squaret >>= 51
  479. # asm 1: shr $51,<squaret=int64#5
  480. # asm 2: shr $51,<squaret=%r8
  481. shr $51,%r8
  482. # qhasm: squaret += r3
  483. # asm 1: add <r3=int64#11,<squaret=int64#5
  484. # asm 2: add <r3=%r13,<squaret=%r8
  485. add %r13,%r8
  486. # qhasm: r2 &= squareredmask
  487. # asm 1: and <squareredmask=int64#3,<r2=int64#7
  488. # asm 2: and <squareredmask=%rdx,<r2=%rax
  489. and %rdx,%rax
  490. # qhasm: r3 = squaret
  491. # asm 1: mov <squaret=int64#5,>r3=int64#8
  492. # asm 2: mov <squaret=%r8,>r3=%r10
  493. mov %r8,%r10
  494. # qhasm: (uint64) squaret >>= 51
  495. # asm 1: shr $51,<squaret=int64#5
  496. # asm 2: shr $51,<squaret=%r8
  497. shr $51,%r8
  498. # qhasm: squaret += r4
  499. # asm 1: add <r4=int64#13,<squaret=int64#5
  500. # asm 2: add <r4=%r15,<squaret=%r8
  501. add %r15,%r8
  502. # qhasm: r3 &= squareredmask
  503. # asm 1: and <squareredmask=int64#3,<r3=int64#8
  504. # asm 2: and <squareredmask=%rdx,<r3=%r10
  505. and %rdx,%r10
  506. # qhasm: r4 = squaret
  507. # asm 1: mov <squaret=int64#5,>r4=int64#9
  508. # asm 2: mov <squaret=%r8,>r4=%r11
  509. mov %r8,%r11
  510. # qhasm: (uint64) squaret >>= 51
  511. # asm 1: shr $51,<squaret=int64#5
  512. # asm 2: shr $51,<squaret=%r8
  513. shr $51,%r8
  514. # qhasm: squaret *= 19
  515. # asm 1: imulq $19,<squaret=int64#5,>squaret=int64#5
  516. # asm 2: imulq $19,<squaret=%r8,>squaret=%r8
  517. imulq $19,%r8,%r8
  518. # qhasm: r0 += squaret
  519. # asm 1: add <squaret=int64#5,<r0=int64#4
  520. # asm 2: add <squaret=%r8,<r0=%rcx
  521. add %r8,%rcx
  522. # qhasm: r4 &= squareredmask
  523. # asm 1: and <squareredmask=int64#3,<r4=int64#9
  524. # asm 2: and <squareredmask=%rdx,<r4=%r11
  525. and %rdx,%r11
  526. # qhasm: *(uint64 *)(rp + 0) = r0
  527. # asm 1: movq <r0=int64#4,0(<rp=int64#1)
  528. # asm 2: movq <r0=%rcx,0(<rp=%rdi)
  529. movq %rcx,0(%rdi)
  530. # qhasm: *(uint64 *)(rp + 8) = r1
  531. # asm 1: movq <r1=int64#6,8(<rp=int64#1)
  532. # asm 2: movq <r1=%r9,8(<rp=%rdi)
  533. movq %r9,8(%rdi)
  534. # qhasm: *(uint64 *)(rp + 16) = r2
  535. # asm 1: movq <r2=int64#7,16(<rp=int64#1)
  536. # asm 2: movq <r2=%rax,16(<rp=%rdi)
  537. movq %rax,16(%rdi)
  538. # qhasm: *(uint64 *)(rp + 24) = r3
  539. # asm 1: movq <r3=int64#8,24(<rp=int64#1)
  540. # asm 2: movq <r3=%r10,24(<rp=%rdi)
  541. movq %r10,24(%rdi)
  542. # qhasm: *(uint64 *)(rp + 32) = r4
  543. # asm 1: movq <r4=int64#9,32(<rp=int64#1)
  544. # asm 2: movq <r4=%r11,32(<rp=%rdi)
  545. movq %r11,32(%rdi)
  546. # qhasm: signed>? n -= 1
  547. # asm 1: sub $1,<n=int64#2
  548. # asm 2: sub $1,<n=%rsi
  549. sub $1,%rsi
  550. # comment:fp stack unchanged by jump
  551. # qhasm: goto loop if signed>
  552. jg ._loop
  553. # qhasm: c1 =c1_stack
  554. # asm 1: movq <c1_stack=stack64#1,>c1=int64#9
  555. # asm 2: movq <c1_stack=0(%rsp),>c1=%r11
  556. movq 0(%rsp),%r11
  557. # qhasm: c2 =c2_stack
  558. # asm 1: movq <c2_stack=stack64#2,>c2=int64#10
  559. # asm 2: movq <c2_stack=8(%rsp),>c2=%r12
  560. movq 8(%rsp),%r12
  561. # qhasm: c3 =c3_stack
  562. # asm 1: movq <c3_stack=stack64#3,>c3=int64#11
  563. # asm 2: movq <c3_stack=16(%rsp),>c3=%r13
  564. movq 16(%rsp),%r13
  565. # qhasm: c4 =c4_stack
  566. # asm 1: movq <c4_stack=stack64#4,>c4=int64#12
  567. # asm 2: movq <c4_stack=24(%rsp),>c4=%r14
  568. movq 24(%rsp),%r14
  569. # qhasm: c5 =c5_stack
  570. # asm 1: movq <c5_stack=stack64#5,>c5=int64#13
  571. # asm 2: movq <c5_stack=32(%rsp),>c5=%r15
  572. movq 32(%rsp),%r15
  573. # qhasm: c6 =c6_stack
  574. # asm 1: movq <c6_stack=stack64#6,>c6=int64#14
  575. # asm 2: movq <c6_stack=40(%rsp),>c6=%rbx
  576. movq 40(%rsp),%rbx
  577. # qhasm: c7 =c7_stack
  578. # asm 1: movq <c7_stack=stack64#7,>c7=int64#15
  579. # asm 2: movq <c7_stack=48(%rsp),>c7=%rbp
  580. movq 48(%rsp),%rbp
  581. # qhasm: leave
  582. add %r11,%rsp
  583. mov %rdi,%rax
  584. mov %rsi,%rdx
  585. ret