x86-gf2m.s 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. .text
  2. .type _mul_1x1_mmx,@function
  3. .align 16
  4. _mul_1x1_mmx:
  5. subl $36,%esp
  6. movl %eax,%ecx
  7. leal (%eax,%eax,1),%edx
  8. andl $1073741823,%ecx
  9. leal (%edx,%edx,1),%ebp
  10. movl $0,(%esp)
  11. andl $2147483647,%edx
  12. movd %eax,%mm2
  13. movd %ebx,%mm3
  14. movl %ecx,4(%esp)
  15. xorl %edx,%ecx
  16. pxor %mm5,%mm5
  17. pxor %mm4,%mm4
  18. movl %edx,8(%esp)
  19. xorl %ebp,%edx
  20. movl %ecx,12(%esp)
  21. pcmpgtd %mm2,%mm5
  22. paddd %mm2,%mm2
  23. xorl %edx,%ecx
  24. movl %ebp,16(%esp)
  25. xorl %edx,%ebp
  26. pand %mm3,%mm5
  27. pcmpgtd %mm2,%mm4
  28. movl %ecx,20(%esp)
  29. xorl %ecx,%ebp
  30. psllq $31,%mm5
  31. pand %mm3,%mm4
  32. movl %edx,24(%esp)
  33. movl $7,%esi
  34. movl %ebp,28(%esp)
  35. movl %esi,%ebp
  36. andl %ebx,%esi
  37. shrl $3,%ebx
  38. movl %ebp,%edi
  39. psllq $30,%mm4
  40. andl %ebx,%edi
  41. shrl $3,%ebx
  42. movd (%esp,%esi,4),%mm0
  43. movl %ebp,%esi
  44. andl %ebx,%esi
  45. shrl $3,%ebx
  46. movd (%esp,%edi,4),%mm2
  47. movl %ebp,%edi
  48. psllq $3,%mm2
  49. andl %ebx,%edi
  50. shrl $3,%ebx
  51. pxor %mm2,%mm0
  52. movd (%esp,%esi,4),%mm1
  53. movl %ebp,%esi
  54. psllq $6,%mm1
  55. andl %ebx,%esi
  56. shrl $3,%ebx
  57. pxor %mm1,%mm0
  58. movd (%esp,%edi,4),%mm2
  59. movl %ebp,%edi
  60. psllq $9,%mm2
  61. andl %ebx,%edi
  62. shrl $3,%ebx
  63. pxor %mm2,%mm0
  64. movd (%esp,%esi,4),%mm1
  65. movl %ebp,%esi
  66. psllq $12,%mm1
  67. andl %ebx,%esi
  68. shrl $3,%ebx
  69. pxor %mm1,%mm0
  70. movd (%esp,%edi,4),%mm2
  71. movl %ebp,%edi
  72. psllq $15,%mm2
  73. andl %ebx,%edi
  74. shrl $3,%ebx
  75. pxor %mm2,%mm0
  76. movd (%esp,%esi,4),%mm1
  77. movl %ebp,%esi
  78. psllq $18,%mm1
  79. andl %ebx,%esi
  80. shrl $3,%ebx
  81. pxor %mm1,%mm0
  82. movd (%esp,%edi,4),%mm2
  83. movl %ebp,%edi
  84. psllq $21,%mm2
  85. andl %ebx,%edi
  86. shrl $3,%ebx
  87. pxor %mm2,%mm0
  88. movd (%esp,%esi,4),%mm1
  89. movl %ebp,%esi
  90. psllq $24,%mm1
  91. andl %ebx,%esi
  92. shrl $3,%ebx
  93. pxor %mm1,%mm0
  94. movd (%esp,%edi,4),%mm2
  95. pxor %mm4,%mm0
  96. psllq $27,%mm2
  97. pxor %mm2,%mm0
  98. movd (%esp,%esi,4),%mm1
  99. pxor %mm5,%mm0
  100. psllq $30,%mm1
  101. addl $36,%esp
  102. pxor %mm1,%mm0
  103. ret
  104. .size _mul_1x1_mmx,.-_mul_1x1_mmx
  105. .type _mul_1x1_ialu,@function
  106. .align 16
  107. _mul_1x1_ialu:
  108. subl $36,%esp
  109. movl %eax,%ecx
  110. leal (%eax,%eax,1),%edx
  111. leal (,%eax,4),%ebp
  112. andl $1073741823,%ecx
  113. leal (%eax,%eax,1),%edi
  114. sarl $31,%eax
  115. movl $0,(%esp)
  116. andl $2147483647,%edx
  117. movl %ecx,4(%esp)
  118. xorl %edx,%ecx
  119. movl %edx,8(%esp)
  120. xorl %ebp,%edx
  121. movl %ecx,12(%esp)
  122. xorl %edx,%ecx
  123. movl %ebp,16(%esp)
  124. xorl %edx,%ebp
  125. movl %ecx,20(%esp)
  126. xorl %ecx,%ebp
  127. sarl $31,%edi
  128. andl %ebx,%eax
  129. movl %edx,24(%esp)
  130. andl %ebx,%edi
  131. movl %ebp,28(%esp)
  132. movl %eax,%edx
  133. shll $31,%eax
  134. movl %edi,%ecx
  135. shrl $1,%edx
  136. movl $7,%esi
  137. shll $30,%edi
  138. andl %ebx,%esi
  139. shrl $2,%ecx
  140. xorl %edi,%eax
  141. shrl $3,%ebx
  142. movl $7,%edi
  143. andl %ebx,%edi
  144. shrl $3,%ebx
  145. xorl %ecx,%edx
  146. xorl (%esp,%esi,4),%eax
  147. movl $7,%esi
  148. andl %ebx,%esi
  149. shrl $3,%ebx
  150. movl (%esp,%edi,4),%ebp
  151. movl $7,%edi
  152. movl %ebp,%ecx
  153. shll $3,%ebp
  154. andl %ebx,%edi
  155. shrl $29,%ecx
  156. xorl %ebp,%eax
  157. shrl $3,%ebx
  158. xorl %ecx,%edx
  159. movl (%esp,%esi,4),%ecx
  160. movl $7,%esi
  161. movl %ecx,%ebp
  162. shll $6,%ecx
  163. andl %ebx,%esi
  164. shrl $26,%ebp
  165. xorl %ecx,%eax
  166. shrl $3,%ebx
  167. xorl %ebp,%edx
  168. movl (%esp,%edi,4),%ebp
  169. movl $7,%edi
  170. movl %ebp,%ecx
  171. shll $9,%ebp
  172. andl %ebx,%edi
  173. shrl $23,%ecx
  174. xorl %ebp,%eax
  175. shrl $3,%ebx
  176. xorl %ecx,%edx
  177. movl (%esp,%esi,4),%ecx
  178. movl $7,%esi
  179. movl %ecx,%ebp
  180. shll $12,%ecx
  181. andl %ebx,%esi
  182. shrl $20,%ebp
  183. xorl %ecx,%eax
  184. shrl $3,%ebx
  185. xorl %ebp,%edx
  186. movl (%esp,%edi,4),%ebp
  187. movl $7,%edi
  188. movl %ebp,%ecx
  189. shll $15,%ebp
  190. andl %ebx,%edi
  191. shrl $17,%ecx
  192. xorl %ebp,%eax
  193. shrl $3,%ebx
  194. xorl %ecx,%edx
  195. movl (%esp,%esi,4),%ecx
  196. movl $7,%esi
  197. movl %ecx,%ebp
  198. shll $18,%ecx
  199. andl %ebx,%esi
  200. shrl $14,%ebp
  201. xorl %ecx,%eax
  202. shrl $3,%ebx
  203. xorl %ebp,%edx
  204. movl (%esp,%edi,4),%ebp
  205. movl $7,%edi
  206. movl %ebp,%ecx
  207. shll $21,%ebp
  208. andl %ebx,%edi
  209. shrl $11,%ecx
  210. xorl %ebp,%eax
  211. shrl $3,%ebx
  212. xorl %ecx,%edx
  213. movl (%esp,%esi,4),%ecx
  214. movl $7,%esi
  215. movl %ecx,%ebp
  216. shll $24,%ecx
  217. andl %ebx,%esi
  218. shrl $8,%ebp
  219. xorl %ecx,%eax
  220. shrl $3,%ebx
  221. xorl %ebp,%edx
  222. movl (%esp,%edi,4),%ebp
  223. movl %ebp,%ecx
  224. shll $27,%ebp
  225. movl (%esp,%esi,4),%edi
  226. shrl $5,%ecx
  227. movl %edi,%esi
  228. xorl %ebp,%eax
  229. shll $30,%edi
  230. xorl %ecx,%edx
  231. shrl $2,%esi
  232. xorl %edi,%eax
  233. xorl %esi,%edx
  234. addl $36,%esp
  235. ret
  236. .size _mul_1x1_ialu,.-_mul_1x1_ialu
  237. .globl bn_GF2m_mul_2x2
  238. .type bn_GF2m_mul_2x2,@function
  239. .align 16
  240. bn_GF2m_mul_2x2:
  241. .L_bn_GF2m_mul_2x2_begin:
  242. call .L000PIC_me_up
  243. .L000PIC_me_up:
  244. popl %edx
  245. leal OPENSSL_ia32cap_P-.L000PIC_me_up(%edx),%edx
  246. movl (%edx),%eax
  247. movl 4(%edx),%edx
  248. testl $8388608,%eax
  249. jz .L001ialu
  250. testl $16777216,%eax
  251. jz .L002mmx
  252. testl $2,%edx
  253. jz .L002mmx
  254. movups 8(%esp),%xmm0
  255. shufps $177,%xmm0,%xmm0
  256. .byte 102,15,58,68,192,1
  257. movl 4(%esp),%eax
  258. movups %xmm0,(%eax)
  259. ret
  260. .align 16
  261. .L002mmx:
  262. pushl %ebp
  263. pushl %ebx
  264. pushl %esi
  265. pushl %edi
  266. movl 24(%esp),%eax
  267. movl 32(%esp),%ebx
  268. call _mul_1x1_mmx
  269. movq %mm0,%mm7
  270. movl 28(%esp),%eax
  271. movl 36(%esp),%ebx
  272. call _mul_1x1_mmx
  273. movq %mm0,%mm6
  274. movl 24(%esp),%eax
  275. movl 32(%esp),%ebx
  276. xorl 28(%esp),%eax
  277. xorl 36(%esp),%ebx
  278. call _mul_1x1_mmx
  279. pxor %mm7,%mm0
  280. movl 20(%esp),%eax
  281. pxor %mm6,%mm0
  282. movq %mm0,%mm2
  283. psllq $32,%mm0
  284. popl %edi
  285. psrlq $32,%mm2
  286. popl %esi
  287. pxor %mm6,%mm0
  288. popl %ebx
  289. pxor %mm7,%mm2
  290. movq %mm0,(%eax)
  291. popl %ebp
  292. movq %mm2,8(%eax)
  293. emms
  294. ret
  295. .align 16
  296. .L001ialu:
  297. pushl %ebp
  298. pushl %ebx
  299. pushl %esi
  300. pushl %edi
  301. subl $20,%esp
  302. movl 44(%esp),%eax
  303. movl 52(%esp),%ebx
  304. call _mul_1x1_ialu
  305. movl %eax,8(%esp)
  306. movl %edx,12(%esp)
  307. movl 48(%esp),%eax
  308. movl 56(%esp),%ebx
  309. call _mul_1x1_ialu
  310. movl %eax,(%esp)
  311. movl %edx,4(%esp)
  312. movl 44(%esp),%eax
  313. movl 52(%esp),%ebx
  314. xorl 48(%esp),%eax
  315. xorl 56(%esp),%ebx
  316. call _mul_1x1_ialu
  317. movl 40(%esp),%ebp
  318. movl (%esp),%ebx
  319. movl 4(%esp),%ecx
  320. movl 8(%esp),%edi
  321. movl 12(%esp),%esi
  322. xorl %edx,%eax
  323. xorl %ecx,%edx
  324. xorl %ebx,%eax
  325. movl %ebx,(%ebp)
  326. xorl %edi,%edx
  327. movl %esi,12(%ebp)
  328. xorl %esi,%eax
  329. addl $20,%esp
  330. xorl %esi,%edx
  331. popl %edi
  332. xorl %edx,%eax
  333. popl %esi
  334. movl %edx,8(%ebp)
  335. popl %ebx
  336. movl %eax,4(%ebp)
  337. popl %ebp
  338. ret
  339. .size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin
  340. .byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
  341. .byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
  342. .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
  343. .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
  344. .byte 62,0
  345. .comm OPENSSL_ia32cap_P,16,4