x86_64-gf2m.masm 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. OPTION DOTNAME
  2. .text$ SEGMENT ALIGN(256) 'CODE'
  3. ALIGN 16
  4. _mul_1x1 PROC PRIVATE
  5. sub rsp,128+8
  6. mov r9,-1
  7. lea rsi,QWORD PTR[rax*1+rax]
  8. shr r9,3
  9. lea rdi,QWORD PTR[rax*4]
  10. and r9,rax
  11. lea r12,QWORD PTR[rax*8]
  12. sar rax,63
  13. lea r10,QWORD PTR[r9*1+r9]
  14. sar rsi,63
  15. lea r11,QWORD PTR[r9*4]
  16. and rax,rbp
  17. sar rdi,63
  18. mov rdx,rax
  19. shl rax,63
  20. and rsi,rbp
  21. shr rdx,1
  22. mov rcx,rsi
  23. shl rsi,62
  24. and rdi,rbp
  25. shr rcx,2
  26. xor rax,rsi
  27. mov rbx,rdi
  28. shl rdi,61
  29. xor rdx,rcx
  30. shr rbx,3
  31. xor rax,rdi
  32. xor rdx,rbx
  33. mov r13,r9
  34. mov QWORD PTR[rsp],0
  35. xor r13,r10
  36. mov QWORD PTR[8+rsp],r9
  37. mov r14,r11
  38. mov QWORD PTR[16+rsp],r10
  39. xor r14,r12
  40. mov QWORD PTR[24+rsp],r13
  41. xor r9,r11
  42. mov QWORD PTR[32+rsp],r11
  43. xor r10,r11
  44. mov QWORD PTR[40+rsp],r9
  45. xor r13,r11
  46. mov QWORD PTR[48+rsp],r10
  47. xor r9,r14
  48. mov QWORD PTR[56+rsp],r13
  49. xor r10,r14
  50. mov QWORD PTR[64+rsp],r12
  51. xor r13,r14
  52. mov QWORD PTR[72+rsp],r9
  53. xor r9,r11
  54. mov QWORD PTR[80+rsp],r10
  55. xor r10,r11
  56. mov QWORD PTR[88+rsp],r13
  57. xor r13,r11
  58. mov QWORD PTR[96+rsp],r14
  59. mov rsi,r8
  60. mov QWORD PTR[104+rsp],r9
  61. and rsi,rbp
  62. mov QWORD PTR[112+rsp],r10
  63. shr rbp,4
  64. mov QWORD PTR[120+rsp],r13
  65. mov rdi,r8
  66. and rdi,rbp
  67. shr rbp,4
  68. movq xmm0,QWORD PTR[rsi*8+rsp]
  69. mov rsi,r8
  70. and rsi,rbp
  71. shr rbp,4
  72. mov rcx,QWORD PTR[rdi*8+rsp]
  73. mov rdi,r8
  74. mov rbx,rcx
  75. shl rcx,4
  76. and rdi,rbp
  77. movq xmm1,QWORD PTR[rsi*8+rsp]
  78. shr rbx,60
  79. xor rax,rcx
  80. pslldq xmm1,1
  81. mov rsi,r8
  82. shr rbp,4
  83. xor rdx,rbx
  84. and rsi,rbp
  85. shr rbp,4
  86. pxor xmm0,xmm1
  87. mov rcx,QWORD PTR[rdi*8+rsp]
  88. mov rdi,r8
  89. mov rbx,rcx
  90. shl rcx,12
  91. and rdi,rbp
  92. movq xmm1,QWORD PTR[rsi*8+rsp]
  93. shr rbx,52
  94. xor rax,rcx
  95. pslldq xmm1,2
  96. mov rsi,r8
  97. shr rbp,4
  98. xor rdx,rbx
  99. and rsi,rbp
  100. shr rbp,4
  101. pxor xmm0,xmm1
  102. mov rcx,QWORD PTR[rdi*8+rsp]
  103. mov rdi,r8
  104. mov rbx,rcx
  105. shl rcx,20
  106. and rdi,rbp
  107. movq xmm1,QWORD PTR[rsi*8+rsp]
  108. shr rbx,44
  109. xor rax,rcx
  110. pslldq xmm1,3
  111. mov rsi,r8
  112. shr rbp,4
  113. xor rdx,rbx
  114. and rsi,rbp
  115. shr rbp,4
  116. pxor xmm0,xmm1
  117. mov rcx,QWORD PTR[rdi*8+rsp]
  118. mov rdi,r8
  119. mov rbx,rcx
  120. shl rcx,28
  121. and rdi,rbp
  122. movq xmm1,QWORD PTR[rsi*8+rsp]
  123. shr rbx,36
  124. xor rax,rcx
  125. pslldq xmm1,4
  126. mov rsi,r8
  127. shr rbp,4
  128. xor rdx,rbx
  129. and rsi,rbp
  130. shr rbp,4
  131. pxor xmm0,xmm1
  132. mov rcx,QWORD PTR[rdi*8+rsp]
  133. mov rdi,r8
  134. mov rbx,rcx
  135. shl rcx,36
  136. and rdi,rbp
  137. movq xmm1,QWORD PTR[rsi*8+rsp]
  138. shr rbx,28
  139. xor rax,rcx
  140. pslldq xmm1,5
  141. mov rsi,r8
  142. shr rbp,4
  143. xor rdx,rbx
  144. and rsi,rbp
  145. shr rbp,4
  146. pxor xmm0,xmm1
  147. mov rcx,QWORD PTR[rdi*8+rsp]
  148. mov rdi,r8
  149. mov rbx,rcx
  150. shl rcx,44
  151. and rdi,rbp
  152. movq xmm1,QWORD PTR[rsi*8+rsp]
  153. shr rbx,20
  154. xor rax,rcx
  155. pslldq xmm1,6
  156. mov rsi,r8
  157. shr rbp,4
  158. xor rdx,rbx
  159. and rsi,rbp
  160. shr rbp,4
  161. pxor xmm0,xmm1
  162. mov rcx,QWORD PTR[rdi*8+rsp]
  163. mov rdi,r8
  164. mov rbx,rcx
  165. shl rcx,52
  166. and rdi,rbp
  167. movq xmm1,QWORD PTR[rsi*8+rsp]
  168. shr rbx,12
  169. xor rax,rcx
  170. pslldq xmm1,7
  171. mov rsi,r8
  172. shr rbp,4
  173. xor rdx,rbx
  174. and rsi,rbp
  175. shr rbp,4
  176. pxor xmm0,xmm1
  177. mov rcx,QWORD PTR[rdi*8+rsp]
  178. mov rbx,rcx
  179. shl rcx,60
  180. DB 102,72,15,126,198
  181. shr rbx,4
  182. xor rax,rcx
  183. psrldq xmm0,8
  184. xor rdx,rbx
  185. DB 102,72,15,126,199
  186. xor rax,rsi
  187. xor rdx,rdi
  188. add rsp,128+8
  189. DB 0F3h,0C3h ;repret
  190. $L$end_mul_1x1::
  191. _mul_1x1 ENDP
  192. EXTERN OPENSSL_ia32cap_P:NEAR
  193. PUBLIC bn_GF2m_mul_2x2
  194. ALIGN 16
  195. bn_GF2m_mul_2x2 PROC PUBLIC
  196. mov rax,rsp
  197. mov r10,QWORD PTR[OPENSSL_ia32cap_P]
  198. bt r10,33
  199. jnc $L$vanilla_mul_2x2
  200. DB 102,72,15,110,194
  201. DB 102,73,15,110,201
  202. DB 102,73,15,110,208
  203. movq xmm3,QWORD PTR[40+rsp]
  204. movdqa xmm4,xmm0
  205. movdqa xmm5,xmm1
  206. DB 102,15,58,68,193,0
  207. pxor xmm4,xmm2
  208. pxor xmm5,xmm3
  209. DB 102,15,58,68,211,0
  210. DB 102,15,58,68,229,0
  211. xorps xmm4,xmm0
  212. xorps xmm4,xmm2
  213. movdqa xmm5,xmm4
  214. pslldq xmm4,8
  215. psrldq xmm5,8
  216. pxor xmm2,xmm4
  217. pxor xmm0,xmm5
  218. movdqu XMMWORD PTR[rcx],xmm2
  219. movdqu XMMWORD PTR[16+rcx],xmm0
  220. DB 0F3h,0C3h ;repret
  221. ALIGN 16
  222. $L$vanilla_mul_2x2::
  223. lea rsp,QWORD PTR[((-136))+rsp]
  224. mov r10,QWORD PTR[176+rsp]
  225. mov QWORD PTR[120+rsp],rdi
  226. mov QWORD PTR[128+rsp],rsi
  227. mov QWORD PTR[80+rsp],r14
  228. mov QWORD PTR[88+rsp],r13
  229. mov QWORD PTR[96+rsp],r12
  230. mov QWORD PTR[104+rsp],rbp
  231. mov QWORD PTR[112+rsp],rbx
  232. $L$body_mul_2x2::
  233. mov QWORD PTR[32+rsp],rcx
  234. mov QWORD PTR[40+rsp],rdx
  235. mov QWORD PTR[48+rsp],r8
  236. mov QWORD PTR[56+rsp],r9
  237. mov QWORD PTR[64+rsp],r10
  238. mov r8,0fh
  239. mov rax,rdx
  240. mov rbp,r9
  241. call _mul_1x1
  242. mov QWORD PTR[16+rsp],rax
  243. mov QWORD PTR[24+rsp],rdx
  244. mov rax,QWORD PTR[48+rsp]
  245. mov rbp,QWORD PTR[64+rsp]
  246. call _mul_1x1
  247. mov QWORD PTR[rsp],rax
  248. mov QWORD PTR[8+rsp],rdx
  249. mov rax,QWORD PTR[40+rsp]
  250. mov rbp,QWORD PTR[56+rsp]
  251. xor rax,QWORD PTR[48+rsp]
  252. xor rbp,QWORD PTR[64+rsp]
  253. call _mul_1x1
  254. mov rbx,QWORD PTR[rsp]
  255. mov rcx,QWORD PTR[8+rsp]
  256. mov rdi,QWORD PTR[16+rsp]
  257. mov rsi,QWORD PTR[24+rsp]
  258. mov rbp,QWORD PTR[32+rsp]
  259. xor rax,rdx
  260. xor rdx,rcx
  261. xor rax,rbx
  262. mov QWORD PTR[rbp],rbx
  263. xor rdx,rdi
  264. mov QWORD PTR[24+rbp],rsi
  265. xor rax,rsi
  266. xor rdx,rsi
  267. xor rax,rdx
  268. mov QWORD PTR[16+rbp],rdx
  269. mov QWORD PTR[8+rbp],rax
  270. mov r14,QWORD PTR[80+rsp]
  271. mov r13,QWORD PTR[88+rsp]
  272. mov r12,QWORD PTR[96+rsp]
  273. mov rbp,QWORD PTR[104+rsp]
  274. mov rbx,QWORD PTR[112+rsp]
  275. mov rdi,QWORD PTR[120+rsp]
  276. mov rsi,QWORD PTR[128+rsp]
  277. lea rsp,QWORD PTR[136+rsp]
  278. $L$epilogue_mul_2x2::
  279. DB 0F3h,0C3h ;repret
  280. $L$end_mul_2x2::
  281. bn_GF2m_mul_2x2 ENDP
  282. DB 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
  283. DB 99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54
  284. DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
  285. DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
  286. DB 111,114,103,62,0
  287. ALIGN 16
  288. EXTERN __imp_RtlVirtualUnwind:NEAR
  289. ALIGN 16
  290. se_handler PROC PRIVATE
  291. push rsi
  292. push rdi
  293. push rbx
  294. push rbp
  295. push r12
  296. push r13
  297. push r14
  298. push r15
  299. pushfq
  300. sub rsp,64
  301. mov rax,QWORD PTR[120+r8]
  302. mov rbx,QWORD PTR[248+r8]
  303. lea r10,QWORD PTR[$L$body_mul_2x2]
  304. cmp rbx,r10
  305. jb $L$in_prologue
  306. mov rax,QWORD PTR[152+r8]
  307. lea r10,QWORD PTR[$L$epilogue_mul_2x2]
  308. cmp rbx,r10
  309. jae $L$in_prologue
  310. mov r14,QWORD PTR[80+rax]
  311. mov r13,QWORD PTR[88+rax]
  312. mov r12,QWORD PTR[96+rax]
  313. mov rbp,QWORD PTR[104+rax]
  314. mov rbx,QWORD PTR[112+rax]
  315. mov rdi,QWORD PTR[120+rax]
  316. mov rsi,QWORD PTR[128+rax]
  317. mov QWORD PTR[144+r8],rbx
  318. mov QWORD PTR[160+r8],rbp
  319. mov QWORD PTR[168+r8],rsi
  320. mov QWORD PTR[176+r8],rdi
  321. mov QWORD PTR[216+r8],r12
  322. mov QWORD PTR[224+r8],r13
  323. mov QWORD PTR[232+r8],r14
  324. lea rax,QWORD PTR[136+rax]
  325. $L$in_prologue::
  326. mov QWORD PTR[152+r8],rax
  327. mov rdi,QWORD PTR[40+r9]
  328. mov rsi,r8
  329. mov ecx,154
  330. DD 0a548f3fch
  331. mov rsi,r9
  332. xor rcx,rcx
  333. mov rdx,QWORD PTR[8+rsi]
  334. mov r8,QWORD PTR[rsi]
  335. mov r9,QWORD PTR[16+rsi]
  336. mov r10,QWORD PTR[40+rsi]
  337. lea r11,QWORD PTR[56+rsi]
  338. lea r12,QWORD PTR[24+rsi]
  339. mov QWORD PTR[32+rsp],r10
  340. mov QWORD PTR[40+rsp],r11
  341. mov QWORD PTR[48+rsp],r12
  342. mov QWORD PTR[56+rsp],rcx
  343. call QWORD PTR[__imp_RtlVirtualUnwind]
  344. mov eax,1
  345. add rsp,64
  346. popfq
  347. pop r15
  348. pop r14
  349. pop r13
  350. pop r12
  351. pop rbp
  352. pop rbx
  353. pop rdi
  354. pop rsi
  355. DB 0F3h,0C3h ;repret
  356. se_handler ENDP
  357. .text$ ENDS
  358. .pdata SEGMENT READONLY ALIGN(4)
  359. ALIGN 4
  360. DD imagerel _mul_1x1
  361. DD imagerel $L$end_mul_1x1
  362. DD imagerel $L$SEH_info_1x1
  363. DD imagerel $L$vanilla_mul_2x2
  364. DD imagerel $L$end_mul_2x2
  365. DD imagerel $L$SEH_info_2x2
  366. .pdata ENDS
  367. .xdata SEGMENT READONLY ALIGN(8)
  368. ALIGN 8
  369. $L$SEH_info_1x1::
  370. DB 001h,007h,002h,000h
  371. DB 007h,001h,011h,000h
  372. $L$SEH_info_2x2::
  373. DB 9,0,0,0
  374. DD imagerel se_handler
  375. .xdata ENDS
  376. END