x86-gf2m.masm 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. IF @Version LT 800
  2. ECHO MASM version 8.00 or later is strongly recommended.
  3. ENDIF
  4. .686
  5. .XMM
  6. IF @Version LT 800
  7. XMMWORD STRUCT 16
  8. DQ 2 dup (?)
  9. XMMWORD ENDS
  10. ENDIF
  11. .MODEL FLAT
  12. OPTION DOTNAME
  13. IF @Version LT 800
  14. .text$ SEGMENT PAGE 'CODE'
  15. ELSE
  16. .text$ SEGMENT ALIGN(64) 'CODE'
  17. ENDIF
  18. ;EXTERN _OPENSSL_ia32cap_P:NEAR
  19. ALIGN 16
  20. __mul_1x1_mmx PROC PRIVATE
  21. sub esp,36
  22. mov ecx,eax
  23. lea edx,DWORD PTR [eax*1+eax]
  24. and ecx,1073741823
  25. lea ebp,DWORD PTR [edx*1+edx]
  26. mov DWORD PTR [esp],0
  27. and edx,2147483647
  28. movd mm2,eax
  29. movd mm3,ebx
  30. mov DWORD PTR 4[esp],ecx
  31. xor ecx,edx
  32. pxor mm5,mm5
  33. pxor mm4,mm4
  34. mov DWORD PTR 8[esp],edx
  35. xor edx,ebp
  36. mov DWORD PTR 12[esp],ecx
  37. pcmpgtd mm5,mm2
  38. paddd mm2,mm2
  39. xor ecx,edx
  40. mov DWORD PTR 16[esp],ebp
  41. xor ebp,edx
  42. pand mm5,mm3
  43. pcmpgtd mm4,mm2
  44. mov DWORD PTR 20[esp],ecx
  45. xor ebp,ecx
  46. psllq mm5,31
  47. pand mm4,mm3
  48. mov DWORD PTR 24[esp],edx
  49. mov esi,7
  50. mov DWORD PTR 28[esp],ebp
  51. mov ebp,esi
  52. and esi,ebx
  53. shr ebx,3
  54. mov edi,ebp
  55. psllq mm4,30
  56. and edi,ebx
  57. shr ebx,3
  58. movd mm0,DWORD PTR [esi*4+esp]
  59. mov esi,ebp
  60. and esi,ebx
  61. shr ebx,3
  62. movd mm2,DWORD PTR [edi*4+esp]
  63. mov edi,ebp
  64. psllq mm2,3
  65. and edi,ebx
  66. shr ebx,3
  67. pxor mm0,mm2
  68. movd mm1,DWORD PTR [esi*4+esp]
  69. mov esi,ebp
  70. psllq mm1,6
  71. and esi,ebx
  72. shr ebx,3
  73. pxor mm0,mm1
  74. movd mm2,DWORD PTR [edi*4+esp]
  75. mov edi,ebp
  76. psllq mm2,9
  77. and edi,ebx
  78. shr ebx,3
  79. pxor mm0,mm2
  80. movd mm1,DWORD PTR [esi*4+esp]
  81. mov esi,ebp
  82. psllq mm1,12
  83. and esi,ebx
  84. shr ebx,3
  85. pxor mm0,mm1
  86. movd mm2,DWORD PTR [edi*4+esp]
  87. mov edi,ebp
  88. psllq mm2,15
  89. and edi,ebx
  90. shr ebx,3
  91. pxor mm0,mm2
  92. movd mm1,DWORD PTR [esi*4+esp]
  93. mov esi,ebp
  94. psllq mm1,18
  95. and esi,ebx
  96. shr ebx,3
  97. pxor mm0,mm1
  98. movd mm2,DWORD PTR [edi*4+esp]
  99. mov edi,ebp
  100. psllq mm2,21
  101. and edi,ebx
  102. shr ebx,3
  103. pxor mm0,mm2
  104. movd mm1,DWORD PTR [esi*4+esp]
  105. mov esi,ebp
  106. psllq mm1,24
  107. and esi,ebx
  108. shr ebx,3
  109. pxor mm0,mm1
  110. movd mm2,DWORD PTR [edi*4+esp]
  111. pxor mm0,mm4
  112. psllq mm2,27
  113. pxor mm0,mm2
  114. movd mm1,DWORD PTR [esi*4+esp]
  115. pxor mm0,mm5
  116. psllq mm1,30
  117. add esp,36
  118. pxor mm0,mm1
  119. ret
  120. __mul_1x1_mmx ENDP
  121. ALIGN 16
  122. __mul_1x1_ialu PROC PRIVATE
  123. sub esp,36
  124. mov ecx,eax
  125. lea edx,DWORD PTR [eax*1+eax]
  126. lea ebp,DWORD PTR [eax*4]
  127. and ecx,1073741823
  128. lea edi,DWORD PTR [eax*1+eax]
  129. sar eax,31
  130. mov DWORD PTR [esp],0
  131. and edx,2147483647
  132. mov DWORD PTR 4[esp],ecx
  133. xor ecx,edx
  134. mov DWORD PTR 8[esp],edx
  135. xor edx,ebp
  136. mov DWORD PTR 12[esp],ecx
  137. xor ecx,edx
  138. mov DWORD PTR 16[esp],ebp
  139. xor ebp,edx
  140. mov DWORD PTR 20[esp],ecx
  141. xor ebp,ecx
  142. sar edi,31
  143. and eax,ebx
  144. mov DWORD PTR 24[esp],edx
  145. and edi,ebx
  146. mov DWORD PTR 28[esp],ebp
  147. mov edx,eax
  148. shl eax,31
  149. mov ecx,edi
  150. shr edx,1
  151. mov esi,7
  152. shl edi,30
  153. and esi,ebx
  154. shr ecx,2
  155. xor eax,edi
  156. shr ebx,3
  157. mov edi,7
  158. and edi,ebx
  159. shr ebx,3
  160. xor edx,ecx
  161. xor eax,DWORD PTR [esi*4+esp]
  162. mov esi,7
  163. and esi,ebx
  164. shr ebx,3
  165. mov ebp,DWORD PTR [edi*4+esp]
  166. mov edi,7
  167. mov ecx,ebp
  168. shl ebp,3
  169. and edi,ebx
  170. shr ecx,29
  171. xor eax,ebp
  172. shr ebx,3
  173. xor edx,ecx
  174. mov ecx,DWORD PTR [esi*4+esp]
  175. mov esi,7
  176. mov ebp,ecx
  177. shl ecx,6
  178. and esi,ebx
  179. shr ebp,26
  180. xor eax,ecx
  181. shr ebx,3
  182. xor edx,ebp
  183. mov ebp,DWORD PTR [edi*4+esp]
  184. mov edi,7
  185. mov ecx,ebp
  186. shl ebp,9
  187. and edi,ebx
  188. shr ecx,23
  189. xor eax,ebp
  190. shr ebx,3
  191. xor edx,ecx
  192. mov ecx,DWORD PTR [esi*4+esp]
  193. mov esi,7
  194. mov ebp,ecx
  195. shl ecx,12
  196. and esi,ebx
  197. shr ebp,20
  198. xor eax,ecx
  199. shr ebx,3
  200. xor edx,ebp
  201. mov ebp,DWORD PTR [edi*4+esp]
  202. mov edi,7
  203. mov ecx,ebp
  204. shl ebp,15
  205. and edi,ebx
  206. shr ecx,17
  207. xor eax,ebp
  208. shr ebx,3
  209. xor edx,ecx
  210. mov ecx,DWORD PTR [esi*4+esp]
  211. mov esi,7
  212. mov ebp,ecx
  213. shl ecx,18
  214. and esi,ebx
  215. shr ebp,14
  216. xor eax,ecx
  217. shr ebx,3
  218. xor edx,ebp
  219. mov ebp,DWORD PTR [edi*4+esp]
  220. mov edi,7
  221. mov ecx,ebp
  222. shl ebp,21
  223. and edi,ebx
  224. shr ecx,11
  225. xor eax,ebp
  226. shr ebx,3
  227. xor edx,ecx
  228. mov ecx,DWORD PTR [esi*4+esp]
  229. mov esi,7
  230. mov ebp,ecx
  231. shl ecx,24
  232. and esi,ebx
  233. shr ebp,8
  234. xor eax,ecx
  235. shr ebx,3
  236. xor edx,ebp
  237. mov ebp,DWORD PTR [edi*4+esp]
  238. mov ecx,ebp
  239. shl ebp,27
  240. mov edi,DWORD PTR [esi*4+esp]
  241. shr ecx,5
  242. mov esi,edi
  243. xor eax,ebp
  244. shl edi,30
  245. xor edx,ecx
  246. shr esi,2
  247. xor eax,edi
  248. xor edx,esi
  249. add esp,36
  250. ret
  251. __mul_1x1_ialu ENDP
  252. ALIGN 16
  253. _bn_GF2m_mul_2x2 PROC PUBLIC
  254. $L_bn_GF2m_mul_2x2_begin::
  255. lea edx,DWORD PTR _OPENSSL_ia32cap_P
  256. mov eax,DWORD PTR [edx]
  257. mov edx,DWORD PTR 4[edx]
  258. test eax,8388608
  259. jz $L000ialu
  260. test eax,16777216
  261. jz $L001mmx
  262. test edx,2
  263. jz $L001mmx
  264. movups xmm0,XMMWORD PTR 8[esp]
  265. shufps xmm0,xmm0,177
  266. DB 102,15,58,68,192,1
  267. mov eax,DWORD PTR 4[esp]
  268. movups XMMWORD PTR [eax],xmm0
  269. ret
  270. ALIGN 16
  271. $L001mmx:
  272. push ebp
  273. push ebx
  274. push esi
  275. push edi
  276. mov eax,DWORD PTR 24[esp]
  277. mov ebx,DWORD PTR 32[esp]
  278. call __mul_1x1_mmx
  279. movq mm7,mm0
  280. mov eax,DWORD PTR 28[esp]
  281. mov ebx,DWORD PTR 36[esp]
  282. call __mul_1x1_mmx
  283. movq mm6,mm0
  284. mov eax,DWORD PTR 24[esp]
  285. mov ebx,DWORD PTR 32[esp]
  286. xor eax,DWORD PTR 28[esp]
  287. xor ebx,DWORD PTR 36[esp]
  288. call __mul_1x1_mmx
  289. pxor mm0,mm7
  290. mov eax,DWORD PTR 20[esp]
  291. pxor mm0,mm6
  292. movq mm2,mm0
  293. psllq mm0,32
  294. pop edi
  295. psrlq mm2,32
  296. pop esi
  297. pxor mm0,mm6
  298. pop ebx
  299. pxor mm2,mm7
  300. movq QWORD PTR [eax],mm0
  301. pop ebp
  302. movq QWORD PTR 8[eax],mm2
  303. emms
  304. ret
  305. ALIGN 16
  306. $L000ialu:
  307. push ebp
  308. push ebx
  309. push esi
  310. push edi
  311. sub esp,20
  312. mov eax,DWORD PTR 44[esp]
  313. mov ebx,DWORD PTR 52[esp]
  314. call __mul_1x1_ialu
  315. mov DWORD PTR 8[esp],eax
  316. mov DWORD PTR 12[esp],edx
  317. mov eax,DWORD PTR 48[esp]
  318. mov ebx,DWORD PTR 56[esp]
  319. call __mul_1x1_ialu
  320. mov DWORD PTR [esp],eax
  321. mov DWORD PTR 4[esp],edx
  322. mov eax,DWORD PTR 44[esp]
  323. mov ebx,DWORD PTR 52[esp]
  324. xor eax,DWORD PTR 48[esp]
  325. xor ebx,DWORD PTR 56[esp]
  326. call __mul_1x1_ialu
  327. mov ebp,DWORD PTR 40[esp]
  328. mov ebx,DWORD PTR [esp]
  329. mov ecx,DWORD PTR 4[esp]
  330. mov edi,DWORD PTR 8[esp]
  331. mov esi,DWORD PTR 12[esp]
  332. xor eax,edx
  333. xor edx,ecx
  334. xor eax,ebx
  335. mov DWORD PTR [ebp],ebx
  336. xor edx,edi
  337. mov DWORD PTR 12[ebp],esi
  338. xor eax,esi
  339. add esp,20
  340. xor edx,esi
  341. pop edi
  342. xor eax,edx
  343. pop esi
  344. mov DWORD PTR 8[ebp],edx
  345. pop ebx
  346. mov DWORD PTR 4[ebp],eax
  347. pop ebp
  348. ret
  349. _bn_GF2m_mul_2x2 ENDP
  350. DB 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
  351. DB 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
  352. DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
  353. DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
  354. DB 62,0
  355. .text$ ENDS
  356. .bss SEGMENT 'BSS'
  357. COMM _OPENSSL_ia32cap_P:DWORD:4
  358. .bss ENDS
  359. END