chacha-x86.masm 25 KB


  1. IF @Version LT 800
  2. ECHO MASM version 8.00 or later is strongly recommended.
  3. ENDIF
  4. .686
  5. .XMM
  6. IF @Version LT 800
  7. XMMWORD STRUCT 16
  8. DQ 2 dup (?)
  9. XMMWORD ENDS
  10. ENDIF
  11. .MODEL FLAT
  12. OPTION DOTNAME
  13. IF @Version LT 800
  14. .text$ SEGMENT PAGE 'CODE'
  15. ELSE
  16. .text$ SEGMENT ALIGN(64) 'CODE'
  17. ENDIF
  18. ALIGN 16
  19. _ChaCha20_ctr32 PROC PUBLIC
  20. $L_ChaCha20_ctr32_begin::
  21. push ebp
  22. push ebx
  23. push esi
  24. push edi
  25. xor eax,eax
  26. cmp eax,DWORD PTR 28[esp]
  27. je $L000no_data
  28. call $Lpic_point
  29. $Lpic_point::
  30. pop eax
  31. lea ebp,DWORD PTR _OPENSSL_ia32cap_P
  32. test DWORD PTR [ebp],16777216
  33. jz $L001x86
  34. test DWORD PTR 4[ebp],512
  35. jz $L001x86
  36. jmp $Lssse3_shortcut
  37. $L001x86:
  38. mov esi,DWORD PTR 32[esp]
  39. mov edi,DWORD PTR 36[esp]
  40. sub esp,132
  41. mov eax,DWORD PTR [esi]
  42. mov ebx,DWORD PTR 4[esi]
  43. mov ecx,DWORD PTR 8[esi]
  44. mov edx,DWORD PTR 12[esi]
  45. mov DWORD PTR 80[esp],eax
  46. mov DWORD PTR 84[esp],ebx
  47. mov DWORD PTR 88[esp],ecx
  48. mov DWORD PTR 92[esp],edx
  49. mov eax,DWORD PTR 16[esi]
  50. mov ebx,DWORD PTR 20[esi]
  51. mov ecx,DWORD PTR 24[esi]
  52. mov edx,DWORD PTR 28[esi]
  53. mov DWORD PTR 96[esp],eax
  54. mov DWORD PTR 100[esp],ebx
  55. mov DWORD PTR 104[esp],ecx
  56. mov DWORD PTR 108[esp],edx
  57. mov eax,DWORD PTR [edi]
  58. mov ebx,DWORD PTR 4[edi]
  59. mov ecx,DWORD PTR 8[edi]
  60. mov edx,DWORD PTR 12[edi]
  61. sub eax,1
  62. mov DWORD PTR 112[esp],eax
  63. mov DWORD PTR 116[esp],ebx
  64. mov DWORD PTR 120[esp],ecx
  65. mov DWORD PTR 124[esp],edx
  66. jmp $L002entry
  67. ALIGN 16
  68. $L003outer_loop:
  69. mov DWORD PTR 156[esp],ebx
  70. mov DWORD PTR 152[esp],eax
  71. mov DWORD PTR 160[esp],ecx
  72. $L002entry:
  73. mov eax,1634760805
  74. mov DWORD PTR 4[esp],857760878
  75. mov DWORD PTR 8[esp],2036477234
  76. mov DWORD PTR 12[esp],1797285236
  77. mov ebx,DWORD PTR 84[esp]
  78. mov ebp,DWORD PTR 88[esp]
  79. mov ecx,DWORD PTR 104[esp]
  80. mov esi,DWORD PTR 108[esp]
  81. mov edx,DWORD PTR 116[esp]
  82. mov edi,DWORD PTR 120[esp]
  83. mov DWORD PTR 20[esp],ebx
  84. mov DWORD PTR 24[esp],ebp
  85. mov DWORD PTR 40[esp],ecx
  86. mov DWORD PTR 44[esp],esi
  87. mov DWORD PTR 52[esp],edx
  88. mov DWORD PTR 56[esp],edi
  89. mov ebx,DWORD PTR 92[esp]
  90. mov edi,DWORD PTR 124[esp]
  91. mov edx,DWORD PTR 112[esp]
  92. mov ebp,DWORD PTR 80[esp]
  93. mov ecx,DWORD PTR 96[esp]
  94. mov esi,DWORD PTR 100[esp]
  95. add edx,1
  96. mov DWORD PTR 28[esp],ebx
  97. mov DWORD PTR 60[esp],edi
  98. mov DWORD PTR 112[esp],edx
  99. mov ebx,10
  100. jmp $L004loop
  101. ALIGN 16
  102. $L004loop:
  103. add eax,ebp
  104. mov DWORD PTR 128[esp],ebx
  105. mov ebx,ebp
  106. xor edx,eax
  107. rol edx,16
  108. add ecx,edx
  109. xor ebx,ecx
  110. mov edi,DWORD PTR 52[esp]
  111. rol ebx,12
  112. mov ebp,DWORD PTR 20[esp]
  113. add eax,ebx
  114. xor edx,eax
  115. mov DWORD PTR [esp],eax
  116. rol edx,8
  117. mov eax,DWORD PTR 4[esp]
  118. add ecx,edx
  119. mov DWORD PTR 48[esp],edx
  120. xor ebx,ecx
  121. add eax,ebp
  122. rol ebx,7
  123. xor edi,eax
  124. mov DWORD PTR 32[esp],ecx
  125. rol edi,16
  126. mov DWORD PTR 16[esp],ebx
  127. add esi,edi
  128. mov ecx,DWORD PTR 40[esp]
  129. xor ebp,esi
  130. mov edx,DWORD PTR 56[esp]
  131. rol ebp,12
  132. mov ebx,DWORD PTR 24[esp]
  133. add eax,ebp
  134. xor edi,eax
  135. mov DWORD PTR 4[esp],eax
  136. rol edi,8
  137. mov eax,DWORD PTR 8[esp]
  138. add esi,edi
  139. mov DWORD PTR 52[esp],edi
  140. xor ebp,esi
  141. add eax,ebx
  142. rol ebp,7
  143. xor edx,eax
  144. mov DWORD PTR 36[esp],esi
  145. rol edx,16
  146. mov DWORD PTR 20[esp],ebp
  147. add ecx,edx
  148. mov esi,DWORD PTR 44[esp]
  149. xor ebx,ecx
  150. mov edi,DWORD PTR 60[esp]
  151. rol ebx,12
  152. mov ebp,DWORD PTR 28[esp]
  153. add eax,ebx
  154. xor edx,eax
  155. mov DWORD PTR 8[esp],eax
  156. rol edx,8
  157. mov eax,DWORD PTR 12[esp]
  158. add ecx,edx
  159. mov DWORD PTR 56[esp],edx
  160. xor ebx,ecx
  161. add eax,ebp
  162. rol ebx,7
  163. xor edi,eax
  164. rol edi,16
  165. mov DWORD PTR 24[esp],ebx
  166. add esi,edi
  167. xor ebp,esi
  168. rol ebp,12
  169. mov ebx,DWORD PTR 20[esp]
  170. add eax,ebp
  171. xor edi,eax
  172. mov DWORD PTR 12[esp],eax
  173. rol edi,8
  174. mov eax,DWORD PTR [esp]
  175. add esi,edi
  176. mov edx,edi
  177. xor ebp,esi
  178. add eax,ebx
  179. rol ebp,7
  180. xor edx,eax
  181. rol edx,16
  182. mov DWORD PTR 28[esp],ebp
  183. add ecx,edx
  184. xor ebx,ecx
  185. mov edi,DWORD PTR 48[esp]
  186. rol ebx,12
  187. mov ebp,DWORD PTR 24[esp]
  188. add eax,ebx
  189. xor edx,eax
  190. mov DWORD PTR [esp],eax
  191. rol edx,8
  192. mov eax,DWORD PTR 4[esp]
  193. add ecx,edx
  194. mov DWORD PTR 60[esp],edx
  195. xor ebx,ecx
  196. add eax,ebp
  197. rol ebx,7
  198. xor edi,eax
  199. mov DWORD PTR 40[esp],ecx
  200. rol edi,16
  201. mov DWORD PTR 20[esp],ebx
  202. add esi,edi
  203. mov ecx,DWORD PTR 32[esp]
  204. xor ebp,esi
  205. mov edx,DWORD PTR 52[esp]
  206. rol ebp,12
  207. mov ebx,DWORD PTR 28[esp]
  208. add eax,ebp
  209. xor edi,eax
  210. mov DWORD PTR 4[esp],eax
  211. rol edi,8
  212. mov eax,DWORD PTR 8[esp]
  213. add esi,edi
  214. mov DWORD PTR 48[esp],edi
  215. xor ebp,esi
  216. add eax,ebx
  217. rol ebp,7
  218. xor edx,eax
  219. mov DWORD PTR 44[esp],esi
  220. rol edx,16
  221. mov DWORD PTR 24[esp],ebp
  222. add ecx,edx
  223. mov esi,DWORD PTR 36[esp]
  224. xor ebx,ecx
  225. mov edi,DWORD PTR 56[esp]
  226. rol ebx,12
  227. mov ebp,DWORD PTR 16[esp]
  228. add eax,ebx
  229. xor edx,eax
  230. mov DWORD PTR 8[esp],eax
  231. rol edx,8
  232. mov eax,DWORD PTR 12[esp]
  233. add ecx,edx
  234. mov DWORD PTR 52[esp],edx
  235. xor ebx,ecx
  236. add eax,ebp
  237. rol ebx,7
  238. xor edi,eax
  239. rol edi,16
  240. mov DWORD PTR 28[esp],ebx
  241. add esi,edi
  242. xor ebp,esi
  243. mov edx,DWORD PTR 48[esp]
  244. rol ebp,12
  245. mov ebx,DWORD PTR 128[esp]
  246. add eax,ebp
  247. xor edi,eax
  248. mov DWORD PTR 12[esp],eax
  249. rol edi,8
  250. mov eax,DWORD PTR [esp]
  251. add esi,edi
  252. mov DWORD PTR 56[esp],edi
  253. xor ebp,esi
  254. rol ebp,7
  255. dec ebx
  256. jnz $L004loop
  257. mov ebx,DWORD PTR 160[esp]
  258. add eax,1634760805
  259. add ebp,DWORD PTR 80[esp]
  260. add ecx,DWORD PTR 96[esp]
  261. add esi,DWORD PTR 100[esp]
  262. cmp ebx,64
  263. jb $L005tail
  264. mov ebx,DWORD PTR 156[esp]
  265. add edx,DWORD PTR 112[esp]
  266. add edi,DWORD PTR 120[esp]
  267. xor eax,DWORD PTR [ebx]
  268. xor ebp,DWORD PTR 16[ebx]
  269. mov DWORD PTR [esp],eax
  270. mov eax,DWORD PTR 152[esp]
  271. xor ecx,DWORD PTR 32[ebx]
  272. xor esi,DWORD PTR 36[ebx]
  273. xor edx,DWORD PTR 48[ebx]
  274. xor edi,DWORD PTR 56[ebx]
  275. mov DWORD PTR 16[eax],ebp
  276. mov DWORD PTR 32[eax],ecx
  277. mov DWORD PTR 36[eax],esi
  278. mov DWORD PTR 48[eax],edx
  279. mov DWORD PTR 56[eax],edi
  280. mov ebp,DWORD PTR 4[esp]
  281. mov ecx,DWORD PTR 8[esp]
  282. mov esi,DWORD PTR 12[esp]
  283. mov edx,DWORD PTR 20[esp]
  284. mov edi,DWORD PTR 24[esp]
  285. add ebp,857760878
  286. add ecx,2036477234
  287. add esi,1797285236
  288. add edx,DWORD PTR 84[esp]
  289. add edi,DWORD PTR 88[esp]
  290. xor ebp,DWORD PTR 4[ebx]
  291. xor ecx,DWORD PTR 8[ebx]
  292. xor esi,DWORD PTR 12[ebx]
  293. xor edx,DWORD PTR 20[ebx]
  294. xor edi,DWORD PTR 24[ebx]
  295. mov DWORD PTR 4[eax],ebp
  296. mov DWORD PTR 8[eax],ecx
  297. mov DWORD PTR 12[eax],esi
  298. mov DWORD PTR 20[eax],edx
  299. mov DWORD PTR 24[eax],edi
  300. mov ebp,DWORD PTR 28[esp]
  301. mov ecx,DWORD PTR 40[esp]
  302. mov esi,DWORD PTR 44[esp]
  303. mov edx,DWORD PTR 52[esp]
  304. mov edi,DWORD PTR 60[esp]
  305. add ebp,DWORD PTR 92[esp]
  306. add ecx,DWORD PTR 104[esp]
  307. add esi,DWORD PTR 108[esp]
  308. add edx,DWORD PTR 116[esp]
  309. add edi,DWORD PTR 124[esp]
  310. xor ebp,DWORD PTR 28[ebx]
  311. xor ecx,DWORD PTR 40[ebx]
  312. xor esi,DWORD PTR 44[ebx]
  313. xor edx,DWORD PTR 52[ebx]
  314. xor edi,DWORD PTR 60[ebx]
  315. lea ebx,DWORD PTR 64[ebx]
  316. mov DWORD PTR 28[eax],ebp
  317. mov ebp,DWORD PTR [esp]
  318. mov DWORD PTR 40[eax],ecx
  319. mov ecx,DWORD PTR 160[esp]
  320. mov DWORD PTR 44[eax],esi
  321. mov DWORD PTR 52[eax],edx
  322. mov DWORD PTR 60[eax],edi
  323. mov DWORD PTR [eax],ebp
  324. lea eax,DWORD PTR 64[eax]
  325. sub ecx,64
  326. jnz $L003outer_loop
  327. jmp $L006done
  328. $L005tail:
  329. add edx,DWORD PTR 112[esp]
  330. add edi,DWORD PTR 120[esp]
  331. mov DWORD PTR [esp],eax
  332. mov DWORD PTR 16[esp],ebp
  333. mov DWORD PTR 32[esp],ecx
  334. mov DWORD PTR 36[esp],esi
  335. mov DWORD PTR 48[esp],edx
  336. mov DWORD PTR 56[esp],edi
  337. mov ebp,DWORD PTR 4[esp]
  338. mov ecx,DWORD PTR 8[esp]
  339. mov esi,DWORD PTR 12[esp]
  340. mov edx,DWORD PTR 20[esp]
  341. mov edi,DWORD PTR 24[esp]
  342. add ebp,857760878
  343. add ecx,2036477234
  344. add esi,1797285236
  345. add edx,DWORD PTR 84[esp]
  346. add edi,DWORD PTR 88[esp]
  347. mov DWORD PTR 4[esp],ebp
  348. mov DWORD PTR 8[esp],ecx
  349. mov DWORD PTR 12[esp],esi
  350. mov DWORD PTR 20[esp],edx
  351. mov DWORD PTR 24[esp],edi
  352. mov ebp,DWORD PTR 28[esp]
  353. mov ecx,DWORD PTR 40[esp]
  354. mov esi,DWORD PTR 44[esp]
  355. mov edx,DWORD PTR 52[esp]
  356. mov edi,DWORD PTR 60[esp]
  357. add ebp,DWORD PTR 92[esp]
  358. add ecx,DWORD PTR 104[esp]
  359. add esi,DWORD PTR 108[esp]
  360. add edx,DWORD PTR 116[esp]
  361. add edi,DWORD PTR 124[esp]
  362. mov DWORD PTR 28[esp],ebp
  363. mov ebp,DWORD PTR 156[esp]
  364. mov DWORD PTR 40[esp],ecx
  365. mov ecx,DWORD PTR 152[esp]
  366. mov DWORD PTR 44[esp],esi
  367. xor esi,esi
  368. mov DWORD PTR 52[esp],edx
  369. mov DWORD PTR 60[esp],edi
  370. xor eax,eax
  371. xor edx,edx
  372. $L007tail_loop:
  373. mov al,BYTE PTR [ebp*1+esi]
  374. mov dl,BYTE PTR [esi*1+esp]
  375. lea esi,DWORD PTR 1[esi]
  376. xor al,dl
  377. mov BYTE PTR [esi*1+ecx-1],al
  378. dec ebx
  379. jnz $L007tail_loop
  380. $L006done:
  381. add esp,132
  382. $L000no_data:
  383. pop edi
  384. pop esi
  385. pop ebx
  386. pop ebp
  387. ret
  388. _ChaCha20_ctr32 ENDP
  389. ALIGN 16
  390. _ChaCha20_ssse3 PROC PUBLIC
  391. $L_ChaCha20_ssse3_begin::
  392. push ebp
  393. push ebx
  394. push esi
  395. push edi
  396. $Lssse3_shortcut::
  397. test DWORD PTR 4[ebp],2048
  398. jnz $Lxop_shortcut
  399. mov edi,DWORD PTR 20[esp]
  400. mov esi,DWORD PTR 24[esp]
  401. mov ecx,DWORD PTR 28[esp]
  402. mov edx,DWORD PTR 32[esp]
  403. mov ebx,DWORD PTR 36[esp]
  404. mov ebp,esp
  405. sub esp,524
  406. and esp,-64
  407. mov DWORD PTR 512[esp],ebp
  408. lea eax,DWORD PTR ($Lssse3_data-$Lpic_point)[eax]
  409. movdqu xmm3,XMMWORD PTR [ebx]
  410. $L0081x:
  411. movdqa xmm0,XMMWORD PTR 32[eax]
  412. movdqu xmm1,XMMWORD PTR [edx]
  413. movdqu xmm2,XMMWORD PTR 16[edx]
  414. movdqa xmm6,XMMWORD PTR [eax]
  415. movdqa xmm7,XMMWORD PTR 16[eax]
  416. mov DWORD PTR 48[esp],ebp
  417. movdqa XMMWORD PTR [esp],xmm0
  418. movdqa XMMWORD PTR 16[esp],xmm1
  419. movdqa XMMWORD PTR 32[esp],xmm2
  420. movdqa XMMWORD PTR 48[esp],xmm3
  421. mov edx,10
  422. jmp $L009loop1x
  423. ALIGN 16
  424. $L010outer1x:
  425. movdqa xmm3,XMMWORD PTR 80[eax]
  426. movdqa xmm0,XMMWORD PTR [esp]
  427. movdqa xmm1,XMMWORD PTR 16[esp]
  428. movdqa xmm2,XMMWORD PTR 32[esp]
  429. paddd xmm3,XMMWORD PTR 48[esp]
  430. mov edx,10
  431. movdqa XMMWORD PTR 48[esp],xmm3
  432. jmp $L009loop1x
  433. ALIGN 16
  434. $L009loop1x:
  435. paddd xmm0,xmm1
  436. pxor xmm3,xmm0
  437. DB 102,15,56,0,222
  438. paddd xmm2,xmm3
  439. pxor xmm1,xmm2
  440. movdqa xmm4,xmm1
  441. psrld xmm1,20
  442. pslld xmm4,12
  443. por xmm1,xmm4
  444. paddd xmm0,xmm1
  445. pxor xmm3,xmm0
  446. DB 102,15,56,0,223
  447. paddd xmm2,xmm3
  448. pxor xmm1,xmm2
  449. movdqa xmm4,xmm1
  450. psrld xmm1,25
  451. pslld xmm4,7
  452. por xmm1,xmm4
  453. pshufd xmm2,xmm2,78
  454. pshufd xmm1,xmm1,57
  455. pshufd xmm3,xmm3,147
  456. nop
  457. paddd xmm0,xmm1
  458. pxor xmm3,xmm0
  459. DB 102,15,56,0,222
  460. paddd xmm2,xmm3
  461. pxor xmm1,xmm2
  462. movdqa xmm4,xmm1
  463. psrld xmm1,20
  464. pslld xmm4,12
  465. por xmm1,xmm4
  466. paddd xmm0,xmm1
  467. pxor xmm3,xmm0
  468. DB 102,15,56,0,223
  469. paddd xmm2,xmm3
  470. pxor xmm1,xmm2
  471. movdqa xmm4,xmm1
  472. psrld xmm1,25
  473. pslld xmm4,7
  474. por xmm1,xmm4
  475. pshufd xmm2,xmm2,78
  476. pshufd xmm1,xmm1,147
  477. pshufd xmm3,xmm3,57
  478. dec edx
  479. jnz $L009loop1x
  480. paddd xmm0,XMMWORD PTR [esp]
  481. paddd xmm1,XMMWORD PTR 16[esp]
  482. paddd xmm2,XMMWORD PTR 32[esp]
  483. paddd xmm3,XMMWORD PTR 48[esp]
  484. cmp ecx,64
  485. jb $L011tail
  486. movdqu xmm4,XMMWORD PTR [esi]
  487. movdqu xmm5,XMMWORD PTR 16[esi]
  488. pxor xmm0,xmm4
  489. movdqu xmm4,XMMWORD PTR 32[esi]
  490. pxor xmm1,xmm5
  491. movdqu xmm5,XMMWORD PTR 48[esi]
  492. pxor xmm2,xmm4
  493. pxor xmm3,xmm5
  494. lea esi,DWORD PTR 64[esi]
  495. movdqu XMMWORD PTR [edi],xmm0
  496. movdqu XMMWORD PTR 16[edi],xmm1
  497. movdqu XMMWORD PTR 32[edi],xmm2
  498. movdqu XMMWORD PTR 48[edi],xmm3
  499. lea edi,DWORD PTR 64[edi]
  500. sub ecx,64
  501. jnz $L010outer1x
  502. jmp $L012done
  503. $L011tail:
  504. movdqa XMMWORD PTR [esp],xmm0
  505. movdqa XMMWORD PTR 16[esp],xmm1
  506. movdqa XMMWORD PTR 32[esp],xmm2
  507. movdqa XMMWORD PTR 48[esp],xmm3
  508. xor eax,eax
  509. xor edx,edx
  510. xor ebp,ebp
  511. $L013tail_loop:
  512. mov al,BYTE PTR [ebp*1+esp]
  513. mov dl,BYTE PTR [ebp*1+esi]
  514. lea ebp,DWORD PTR 1[ebp]
  515. xor al,dl
  516. mov BYTE PTR [ebp*1+edi-1],al
  517. dec ecx
  518. jnz $L013tail_loop
  519. $L012done:
  520. mov esp,DWORD PTR 512[esp]
  521. pop edi
  522. pop esi
  523. pop ebx
  524. pop ebp
  525. ret
  526. _ChaCha20_ssse3 ENDP
  527. ALIGN 64
  528. $Lssse3_data::
  529. DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
  530. DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
  531. DD 1634760805,857760878,2036477234,1797285236
  532. DD 0,1,2,3
  533. DD 4,4,4,4
  534. DD 1,0,0,0
  535. DD 4,0,0,0
  536. DD 0,-1,-1,-1
  537. ALIGN 64
  538. DB 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
  539. DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
  540. DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
  541. DB 114,103,62,0
  542. ALIGN 16
  543. _ChaCha20_xop PROC PUBLIC
  544. $L_ChaCha20_xop_begin::
  545. push ebp
  546. push ebx
  547. push esi
  548. push edi
  549. $Lxop_shortcut::
  550. mov edi,DWORD PTR 20[esp]
  551. mov esi,DWORD PTR 24[esp]
  552. mov ecx,DWORD PTR 28[esp]
  553. mov edx,DWORD PTR 32[esp]
  554. mov ebx,DWORD PTR 36[esp]
  555. vzeroupper
  556. mov ebp,esp
  557. sub esp,524
  558. and esp,-64
  559. mov DWORD PTR 512[esp],ebp
  560. lea eax,DWORD PTR ($Lssse3_data-$Lpic_point)[eax]
  561. vmovdqu xmm3,XMMWORD PTR [ebx]
  562. cmp ecx,256
  563. jb $L0141x
  564. mov DWORD PTR 516[esp],edx
  565. mov DWORD PTR 520[esp],ebx
  566. sub ecx,256
  567. lea ebp,DWORD PTR 384[esp]
  568. vmovdqu xmm7,XMMWORD PTR [edx]
  569. vpshufd xmm0,xmm3,0
  570. vpshufd xmm1,xmm3,85
  571. vpshufd xmm2,xmm3,170
  572. vpshufd xmm3,xmm3,255
  573. vpaddd xmm0,xmm0,XMMWORD PTR 48[eax]
  574. vpshufd xmm4,xmm7,0
  575. vpshufd xmm5,xmm7,85
  576. vpsubd xmm0,xmm0,XMMWORD PTR 64[eax]
  577. vpshufd xmm6,xmm7,170
  578. vpshufd xmm7,xmm7,255
  579. vmovdqa XMMWORD PTR 64[ebp],xmm0
  580. vmovdqa XMMWORD PTR 80[ebp],xmm1
  581. vmovdqa XMMWORD PTR 96[ebp],xmm2
  582. vmovdqa XMMWORD PTR 112[ebp],xmm3
  583. vmovdqu xmm3,XMMWORD PTR 16[edx]
  584. vmovdqa XMMWORD PTR [ebp-64],xmm4
  585. vmovdqa XMMWORD PTR [ebp-48],xmm5
  586. vmovdqa XMMWORD PTR [ebp-32],xmm6
  587. vmovdqa XMMWORD PTR [ebp-16],xmm7
  588. vmovdqa xmm7,XMMWORD PTR 32[eax]
  589. lea ebx,DWORD PTR 128[esp]
  590. vpshufd xmm0,xmm3,0
  591. vpshufd xmm1,xmm3,85
  592. vpshufd xmm2,xmm3,170
  593. vpshufd xmm3,xmm3,255
  594. vpshufd xmm4,xmm7,0
  595. vpshufd xmm5,xmm7,85
  596. vpshufd xmm6,xmm7,170
  597. vpshufd xmm7,xmm7,255
  598. vmovdqa XMMWORD PTR [ebp],xmm0
  599. vmovdqa XMMWORD PTR 16[ebp],xmm1
  600. vmovdqa XMMWORD PTR 32[ebp],xmm2
  601. vmovdqa XMMWORD PTR 48[ebp],xmm3
  602. vmovdqa XMMWORD PTR [ebp-128],xmm4
  603. vmovdqa XMMWORD PTR [ebp-112],xmm5
  604. vmovdqa XMMWORD PTR [ebp-96],xmm6
  605. vmovdqa XMMWORD PTR [ebp-80],xmm7
  606. lea esi,DWORD PTR 128[esi]
  607. lea edi,DWORD PTR 128[edi]
  608. jmp $L015outer_loop
  609. ALIGN 32
  610. $L015outer_loop:
  611. vmovdqa xmm1,XMMWORD PTR [ebp-112]
  612. vmovdqa xmm2,XMMWORD PTR [ebp-96]
  613. vmovdqa xmm3,XMMWORD PTR [ebp-80]
  614. vmovdqa xmm5,XMMWORD PTR [ebp-48]
  615. vmovdqa xmm6,XMMWORD PTR [ebp-32]
  616. vmovdqa xmm7,XMMWORD PTR [ebp-16]
  617. vmovdqa XMMWORD PTR [ebx-112],xmm1
  618. vmovdqa XMMWORD PTR [ebx-96],xmm2
  619. vmovdqa XMMWORD PTR [ebx-80],xmm3
  620. vmovdqa XMMWORD PTR [ebx-48],xmm5
  621. vmovdqa XMMWORD PTR [ebx-32],xmm6
  622. vmovdqa XMMWORD PTR [ebx-16],xmm7
  623. vmovdqa xmm2,XMMWORD PTR 32[ebp]
  624. vmovdqa xmm3,XMMWORD PTR 48[ebp]
  625. vmovdqa xmm4,XMMWORD PTR 64[ebp]
  626. vmovdqa xmm5,XMMWORD PTR 80[ebp]
  627. vmovdqa xmm6,XMMWORD PTR 96[ebp]
  628. vmovdqa xmm7,XMMWORD PTR 112[ebp]
  629. vpaddd xmm4,xmm4,XMMWORD PTR 64[eax]
  630. vmovdqa XMMWORD PTR 32[ebx],xmm2
  631. vmovdqa XMMWORD PTR 48[ebx],xmm3
  632. vmovdqa XMMWORD PTR 64[ebx],xmm4
  633. vmovdqa XMMWORD PTR 80[ebx],xmm5
  634. vmovdqa XMMWORD PTR 96[ebx],xmm6
  635. vmovdqa XMMWORD PTR 112[ebx],xmm7
  636. vmovdqa XMMWORD PTR 64[ebp],xmm4
  637. vmovdqa xmm0,XMMWORD PTR [ebp-128]
  638. vmovdqa xmm6,xmm4
  639. vmovdqa xmm3,XMMWORD PTR [ebp-64]
  640. vmovdqa xmm4,XMMWORD PTR [ebp]
  641. vmovdqa xmm5,XMMWORD PTR 16[ebp]
  642. mov edx,10
  643. nop
  644. ALIGN 32
  645. $L016loop:
  646. vpaddd xmm0,xmm0,xmm3
  647. vpxor xmm6,xmm6,xmm0
  648. DB 143,232,120,194,246,16
  649. vpaddd xmm4,xmm4,xmm6
  650. vpxor xmm2,xmm3,xmm4
  651. vmovdqa xmm1,XMMWORD PTR [ebx-112]
  652. DB 143,232,120,194,210,12
  653. vmovdqa xmm3,XMMWORD PTR [ebx-48]
  654. vpaddd xmm0,xmm0,xmm2
  655. vmovdqa xmm7,XMMWORD PTR 80[ebx]
  656. vpxor xmm6,xmm6,xmm0
  657. vpaddd xmm1,xmm1,xmm3
  658. DB 143,232,120,194,246,8
  659. vmovdqa XMMWORD PTR [ebx-128],xmm0
  660. vpaddd xmm4,xmm4,xmm6
  661. vmovdqa XMMWORD PTR 64[ebx],xmm6
  662. vpxor xmm2,xmm2,xmm4
  663. vpxor xmm7,xmm7,xmm1
  664. DB 143,232,120,194,210,7
  665. vmovdqa XMMWORD PTR [ebx],xmm4
  666. DB 143,232,120,194,255,16
  667. vmovdqa XMMWORD PTR [ebx-64],xmm2
  668. vpaddd xmm5,xmm5,xmm7
  669. vmovdqa xmm4,XMMWORD PTR 32[ebx]
  670. vpxor xmm3,xmm3,xmm5
  671. vmovdqa xmm0,XMMWORD PTR [ebx-96]
  672. DB 143,232,120,194,219,12
  673. vmovdqa xmm2,XMMWORD PTR [ebx-32]
  674. vpaddd xmm1,xmm1,xmm3
  675. vmovdqa xmm6,XMMWORD PTR 96[ebx]
  676. vpxor xmm7,xmm7,xmm1
  677. vpaddd xmm0,xmm0,xmm2
  678. DB 143,232,120,194,255,8
  679. vmovdqa XMMWORD PTR [ebx-112],xmm1
  680. vpaddd xmm5,xmm5,xmm7
  681. vmovdqa XMMWORD PTR 80[ebx],xmm7
  682. vpxor xmm3,xmm3,xmm5
  683. vpxor xmm6,xmm6,xmm0
  684. DB 143,232,120,194,219,7
  685. vmovdqa XMMWORD PTR 16[ebx],xmm5
  686. DB 143,232,120,194,246,16
  687. vmovdqa XMMWORD PTR [ebx-48],xmm3
  688. vpaddd xmm4,xmm4,xmm6
  689. vmovdqa xmm5,XMMWORD PTR 48[ebx]
  690. vpxor xmm2,xmm2,xmm4
  691. vmovdqa xmm1,XMMWORD PTR [ebx-80]
  692. DB 143,232,120,194,210,12
  693. vmovdqa xmm3,XMMWORD PTR [ebx-16]
  694. vpaddd xmm0,xmm0,xmm2
  695. vmovdqa xmm7,XMMWORD PTR 112[ebx]
  696. vpxor xmm6,xmm6,xmm0
  697. vpaddd xmm1,xmm1,xmm3
  698. DB 143,232,120,194,246,8
  699. vmovdqa XMMWORD PTR [ebx-96],xmm0
  700. vpaddd xmm4,xmm4,xmm6
  701. vmovdqa XMMWORD PTR 96[ebx],xmm6
  702. vpxor xmm2,xmm2,xmm4
  703. vpxor xmm7,xmm7,xmm1
  704. DB 143,232,120,194,210,7
  705. DB 143,232,120,194,255,16
  706. vmovdqa XMMWORD PTR [ebx-32],xmm2
  707. vpaddd xmm5,xmm5,xmm7
  708. vpxor xmm3,xmm3,xmm5
  709. vmovdqa xmm0,XMMWORD PTR [ebx-128]
  710. DB 143,232,120,194,219,12
  711. vmovdqa xmm2,XMMWORD PTR [ebx-48]
  712. vpaddd xmm1,xmm1,xmm3
  713. vpxor xmm7,xmm7,xmm1
  714. vpaddd xmm0,xmm0,xmm2
  715. DB 143,232,120,194,255,8
  716. vmovdqa XMMWORD PTR [ebx-80],xmm1
  717. vpaddd xmm5,xmm5,xmm7
  718. vpxor xmm3,xmm3,xmm5
  719. vpxor xmm6,xmm7,xmm0
  720. DB 143,232,120,194,219,7
  721. DB 143,232,120,194,246,16
  722. vmovdqa XMMWORD PTR [ebx-16],xmm3
  723. vpaddd xmm4,xmm4,xmm6
  724. vpxor xmm2,xmm2,xmm4
  725. vmovdqa xmm1,XMMWORD PTR [ebx-112]
  726. DB 143,232,120,194,210,12
  727. vmovdqa xmm3,XMMWORD PTR [ebx-32]
  728. vpaddd xmm0,xmm0,xmm2
  729. vmovdqa xmm7,XMMWORD PTR 64[ebx]
  730. vpxor xmm6,xmm6,xmm0
  731. vpaddd xmm1,xmm1,xmm3
  732. DB 143,232,120,194,246,8
  733. vmovdqa XMMWORD PTR [ebx-128],xmm0
  734. vpaddd xmm4,xmm4,xmm6
  735. vmovdqa XMMWORD PTR 112[ebx],xmm6
  736. vpxor xmm2,xmm2,xmm4
  737. vpxor xmm7,xmm7,xmm1
  738. DB 143,232,120,194,210,7
  739. vmovdqa XMMWORD PTR 32[ebx],xmm4
  740. DB 143,232,120,194,255,16
  741. vmovdqa XMMWORD PTR [ebx-48],xmm2
  742. vpaddd xmm5,xmm5,xmm7
  743. vmovdqa xmm4,XMMWORD PTR [ebx]
  744. vpxor xmm3,xmm3,xmm5
  745. vmovdqa xmm0,XMMWORD PTR [ebx-96]
  746. DB 143,232,120,194,219,12
  747. vmovdqa xmm2,XMMWORD PTR [ebx-16]
  748. vpaddd xmm1,xmm1,xmm3
  749. vmovdqa xmm6,XMMWORD PTR 80[ebx]
  750. vpxor xmm7,xmm7,xmm1
  751. vpaddd xmm0,xmm0,xmm2
  752. DB 143,232,120,194,255,8
  753. vmovdqa XMMWORD PTR [ebx-112],xmm1
  754. vpaddd xmm5,xmm5,xmm7
  755. vmovdqa XMMWORD PTR 64[ebx],xmm7
  756. vpxor xmm3,xmm3,xmm5
  757. vpxor xmm6,xmm6,xmm0
  758. DB 143,232,120,194,219,7
  759. vmovdqa XMMWORD PTR 48[ebx],xmm5
  760. DB 143,232,120,194,246,16
  761. vmovdqa XMMWORD PTR [ebx-32],xmm3
  762. vpaddd xmm4,xmm4,xmm6
  763. vmovdqa xmm5,XMMWORD PTR 16[ebx]
  764. vpxor xmm2,xmm2,xmm4
  765. vmovdqa xmm1,XMMWORD PTR [ebx-80]
  766. DB 143,232,120,194,210,12
  767. vmovdqa xmm3,XMMWORD PTR [ebx-64]
  768. vpaddd xmm0,xmm0,xmm2
  769. vmovdqa xmm7,XMMWORD PTR 96[ebx]
  770. vpxor xmm6,xmm6,xmm0
  771. vpaddd xmm1,xmm1,xmm3
  772. DB 143,232,120,194,246,8
  773. vmovdqa XMMWORD PTR [ebx-96],xmm0
  774. vpaddd xmm4,xmm4,xmm6
  775. vmovdqa XMMWORD PTR 80[ebx],xmm6
  776. vpxor xmm2,xmm2,xmm4
  777. vpxor xmm7,xmm7,xmm1
  778. DB 143,232,120,194,210,7
  779. DB 143,232,120,194,255,16
  780. vmovdqa XMMWORD PTR [ebx-16],xmm2
  781. vpaddd xmm5,xmm5,xmm7
  782. vpxor xmm3,xmm3,xmm5
  783. vmovdqa xmm0,XMMWORD PTR [ebx-128]
  784. DB 143,232,120,194,219,12
  785. vpaddd xmm1,xmm1,xmm3
  786. vmovdqa xmm6,XMMWORD PTR 64[ebx]
  787. vpxor xmm7,xmm7,xmm1
  788. DB 143,232,120,194,255,8
  789. vmovdqa XMMWORD PTR [ebx-80],xmm1
  790. vpaddd xmm5,xmm5,xmm7
  791. vmovdqa XMMWORD PTR 96[ebx],xmm7
  792. vpxor xmm3,xmm3,xmm5
  793. DB 143,232,120,194,219,7
  794. dec edx
  795. jnz $L016loop
  796. vmovdqa XMMWORD PTR [ebx-64],xmm3
  797. vmovdqa XMMWORD PTR [ebx],xmm4
  798. vmovdqa XMMWORD PTR 16[ebx],xmm5
  799. vmovdqa XMMWORD PTR 64[ebx],xmm6
  800. vmovdqa XMMWORD PTR 96[ebx],xmm7
  801. vmovdqa xmm1,XMMWORD PTR [ebx-112]
  802. vmovdqa xmm2,XMMWORD PTR [ebx-96]
  803. vmovdqa xmm3,XMMWORD PTR [ebx-80]
  804. vpaddd xmm0,xmm0,XMMWORD PTR [ebp-128]
  805. vpaddd xmm1,xmm1,XMMWORD PTR [ebp-112]
  806. vpaddd xmm2,xmm2,XMMWORD PTR [ebp-96]
  807. vpaddd xmm3,xmm3,XMMWORD PTR [ebp-80]
  808. vpunpckldq xmm6,xmm0,xmm1
  809. vpunpckldq xmm7,xmm2,xmm3
  810. vpunpckhdq xmm0,xmm0,xmm1
  811. vpunpckhdq xmm2,xmm2,xmm3
  812. vpunpcklqdq xmm1,xmm6,xmm7
  813. vpunpckhqdq xmm6,xmm6,xmm7
  814. vpunpcklqdq xmm7,xmm0,xmm2
  815. vpunpckhqdq xmm3,xmm0,xmm2
  816. vpxor xmm4,xmm1,XMMWORD PTR [esi-128]
  817. vpxor xmm5,xmm6,XMMWORD PTR [esi-64]
  818. vpxor xmm6,xmm7,XMMWORD PTR [esi]
  819. vpxor xmm7,xmm3,XMMWORD PTR 64[esi]
  820. lea esi,QWORD PTR 16[esi]
  821. vmovdqa xmm0,XMMWORD PTR [ebx-64]
  822. vmovdqa xmm1,XMMWORD PTR [ebx-48]
  823. vmovdqa xmm2,XMMWORD PTR [ebx-32]
  824. vmovdqa xmm3,XMMWORD PTR [ebx-16]
  825. vmovdqu XMMWORD PTR [edi-128],xmm4
  826. vmovdqu XMMWORD PTR [edi-64],xmm5
  827. vmovdqu XMMWORD PTR [edi],xmm6
  828. vmovdqu XMMWORD PTR 64[edi],xmm7
  829. lea edi,QWORD PTR 16[edi]
  830. vpaddd xmm0,xmm0,XMMWORD PTR [ebp-64]
  831. vpaddd xmm1,xmm1,XMMWORD PTR [ebp-48]
  832. vpaddd xmm2,xmm2,XMMWORD PTR [ebp-32]
  833. vpaddd xmm3,xmm3,XMMWORD PTR [ebp-16]
  834. vpunpckldq xmm6,xmm0,xmm1
  835. vpunpckldq xmm7,xmm2,xmm3
  836. vpunpckhdq xmm0,xmm0,xmm1
  837. vpunpckhdq xmm2,xmm2,xmm3
  838. vpunpcklqdq xmm1,xmm6,xmm7
  839. vpunpckhqdq xmm6,xmm6,xmm7
  840. vpunpcklqdq xmm7,xmm0,xmm2
  841. vpunpckhqdq xmm3,xmm0,xmm2
  842. vpxor xmm4,xmm1,XMMWORD PTR [esi-128]
  843. vpxor xmm5,xmm6,XMMWORD PTR [esi-64]
  844. vpxor xmm6,xmm7,XMMWORD PTR [esi]
  845. vpxor xmm7,xmm3,XMMWORD PTR 64[esi]
  846. lea esi,QWORD PTR 16[esi]
  847. vmovdqa xmm0,XMMWORD PTR [ebx]
  848. vmovdqa xmm1,XMMWORD PTR 16[ebx]
  849. vmovdqa xmm2,XMMWORD PTR 32[ebx]
  850. vmovdqa xmm3,XMMWORD PTR 48[ebx]
  851. vmovdqu XMMWORD PTR [edi-128],xmm4
  852. vmovdqu XMMWORD PTR [edi-64],xmm5
  853. vmovdqu XMMWORD PTR [edi],xmm6
  854. vmovdqu XMMWORD PTR 64[edi],xmm7
  855. lea edi,QWORD PTR 16[edi]
  856. vpaddd xmm0,xmm0,XMMWORD PTR [ebp]
  857. vpaddd xmm1,xmm1,XMMWORD PTR 16[ebp]
  858. vpaddd xmm2,xmm2,XMMWORD PTR 32[ebp]
  859. vpaddd xmm3,xmm3,XMMWORD PTR 48[ebp]
  860. vpunpckldq xmm6,xmm0,xmm1
  861. vpunpckldq xmm7,xmm2,xmm3
  862. vpunpckhdq xmm0,xmm0,xmm1
  863. vpunpckhdq xmm2,xmm2,xmm3
  864. vpunpcklqdq xmm1,xmm6,xmm7
  865. vpunpckhqdq xmm6,xmm6,xmm7
  866. vpunpcklqdq xmm7,xmm0,xmm2
  867. vpunpckhqdq xmm3,xmm0,xmm2
  868. vpxor xmm4,xmm1,XMMWORD PTR [esi-128]
  869. vpxor xmm5,xmm6,XMMWORD PTR [esi-64]
  870. vpxor xmm6,xmm7,XMMWORD PTR [esi]
  871. vpxor xmm7,xmm3,XMMWORD PTR 64[esi]
  872. lea esi,QWORD PTR 16[esi]
  873. vmovdqa xmm0,XMMWORD PTR 64[ebx]
  874. vmovdqa xmm1,XMMWORD PTR 80[ebx]
  875. vmovdqa xmm2,XMMWORD PTR 96[ebx]
  876. vmovdqa xmm3,XMMWORD PTR 112[ebx]
  877. vmovdqu XMMWORD PTR [edi-128],xmm4
  878. vmovdqu XMMWORD PTR [edi-64],xmm5
  879. vmovdqu XMMWORD PTR [edi],xmm6
  880. vmovdqu XMMWORD PTR 64[edi],xmm7
  881. lea edi,QWORD PTR 16[edi]
  882. vpaddd xmm0,xmm0,XMMWORD PTR 64[ebp]
  883. vpaddd xmm1,xmm1,XMMWORD PTR 80[ebp]
  884. vpaddd xmm2,xmm2,XMMWORD PTR 96[ebp]
  885. vpaddd xmm3,xmm3,XMMWORD PTR 112[ebp]
  886. vpunpckldq xmm6,xmm0,xmm1
  887. vpunpckldq xmm7,xmm2,xmm3
  888. vpunpckhdq xmm0,xmm0,xmm1
  889. vpunpckhdq xmm2,xmm2,xmm3
  890. vpunpcklqdq xmm1,xmm6,xmm7
  891. vpunpckhqdq xmm6,xmm6,xmm7
  892. vpunpcklqdq xmm7,xmm0,xmm2
  893. vpunpckhqdq xmm3,xmm0,xmm2
  894. vpxor xmm4,xmm1,XMMWORD PTR [esi-128]
  895. vpxor xmm5,xmm6,XMMWORD PTR [esi-64]
  896. vpxor xmm6,xmm7,XMMWORD PTR [esi]
  897. vpxor xmm7,xmm3,XMMWORD PTR 64[esi]
  898. lea esi,QWORD PTR 208[esi]
  899. vmovdqu XMMWORD PTR [edi-128],xmm4
  900. vmovdqu XMMWORD PTR [edi-64],xmm5
  901. vmovdqu XMMWORD PTR [edi],xmm6
  902. vmovdqu XMMWORD PTR 64[edi],xmm7
  903. lea edi,QWORD PTR 208[edi]
  904. sub ecx,256
  905. jnc $L015outer_loop
  906. add ecx,256
  907. jz $L017done
  908. mov ebx,DWORD PTR 520[esp]
  909. lea esi,DWORD PTR [esi-128]
  910. mov edx,DWORD PTR 516[esp]
  911. lea edi,DWORD PTR [edi-128]
  912. vmovd xmm2,DWORD PTR 64[ebp]
  913. vmovdqu xmm3,XMMWORD PTR [ebx]
  914. vpaddd xmm2,xmm2,XMMWORD PTR 96[eax]
  915. vpand xmm3,xmm3,XMMWORD PTR 112[eax]
  916. vpor xmm3,xmm3,xmm2
  917. $L0141x:
  918. vmovdqa xmm0,XMMWORD PTR 32[eax]
  919. vmovdqu xmm1,XMMWORD PTR [edx]
  920. vmovdqu xmm2,XMMWORD PTR 16[edx]
  921. vmovdqa xmm6,XMMWORD PTR [eax]
  922. vmovdqa xmm7,XMMWORD PTR 16[eax]
  923. mov DWORD PTR 48[esp],ebp
  924. vmovdqa XMMWORD PTR [esp],xmm0
  925. vmovdqa XMMWORD PTR 16[esp],xmm1
  926. vmovdqa XMMWORD PTR 32[esp],xmm2
  927. vmovdqa XMMWORD PTR 48[esp],xmm3
  928. mov edx,10
  929. jmp $L018loop1x
  930. ALIGN 16
  931. $L019outer1x:
  932. vmovdqa xmm3,XMMWORD PTR 80[eax]
  933. vmovdqa xmm0,XMMWORD PTR [esp]
  934. vmovdqa xmm1,XMMWORD PTR 16[esp]
  935. vmovdqa xmm2,XMMWORD PTR 32[esp]
  936. vpaddd xmm3,xmm3,XMMWORD PTR 48[esp]
  937. mov edx,10
  938. vmovdqa XMMWORD PTR 48[esp],xmm3
  939. jmp $L018loop1x
  940. ALIGN 16
  941. $L018loop1x:
  942. vpaddd xmm0,xmm0,xmm1
  943. vpxor xmm3,xmm3,xmm0
  944. DB 143,232,120,194,219,16
  945. vpaddd xmm2,xmm2,xmm3
  946. vpxor xmm1,xmm1,xmm2
  947. DB 143,232,120,194,201,12
  948. vpaddd xmm0,xmm0,xmm1
  949. vpxor xmm3,xmm3,xmm0
  950. DB 143,232,120,194,219,8
  951. vpaddd xmm2,xmm2,xmm3
  952. vpxor xmm1,xmm1,xmm2
  953. DB 143,232,120,194,201,7
  954. vpshufd xmm2,xmm2,78
  955. vpshufd xmm1,xmm1,57
  956. vpshufd xmm3,xmm3,147
  957. vpaddd xmm0,xmm0,xmm1
  958. vpxor xmm3,xmm3,xmm0
  959. DB 143,232,120,194,219,16
  960. vpaddd xmm2,xmm2,xmm3
  961. vpxor xmm1,xmm1,xmm2
  962. DB 143,232,120,194,201,12
  963. vpaddd xmm0,xmm0,xmm1
  964. vpxor xmm3,xmm3,xmm0
  965. DB 143,232,120,194,219,8
  966. vpaddd xmm2,xmm2,xmm3
  967. vpxor xmm1,xmm1,xmm2
  968. DB 143,232,120,194,201,7
  969. vpshufd xmm2,xmm2,78
  970. vpshufd xmm1,xmm1,147
  971. vpshufd xmm3,xmm3,57
  972. dec edx
  973. jnz $L018loop1x
  974. vpaddd xmm0,xmm0,XMMWORD PTR [esp]
  975. vpaddd xmm1,xmm1,XMMWORD PTR 16[esp]
  976. vpaddd xmm2,xmm2,XMMWORD PTR 32[esp]
  977. vpaddd xmm3,xmm3,XMMWORD PTR 48[esp]
  978. cmp ecx,64
  979. jb $L020tail
  980. vpxor xmm0,xmm0,XMMWORD PTR [esi]
  981. vpxor xmm1,xmm1,XMMWORD PTR 16[esi]
  982. vpxor xmm2,xmm2,XMMWORD PTR 32[esi]
  983. vpxor xmm3,xmm3,XMMWORD PTR 48[esi]
  984. lea esi,DWORD PTR 64[esi]
  985. vmovdqu XMMWORD PTR [edi],xmm0
  986. vmovdqu XMMWORD PTR 16[edi],xmm1
  987. vmovdqu XMMWORD PTR 32[edi],xmm2
  988. vmovdqu XMMWORD PTR 48[edi],xmm3
  989. lea edi,DWORD PTR 64[edi]
  990. sub ecx,64
  991. jnz $L019outer1x
  992. jmp $L017done
  993. $L020tail:
  994. vmovdqa XMMWORD PTR [esp],xmm0
  995. vmovdqa XMMWORD PTR 16[esp],xmm1
  996. vmovdqa XMMWORD PTR 32[esp],xmm2
  997. vmovdqa XMMWORD PTR 48[esp],xmm3
  998. xor eax,eax
  999. xor edx,edx
  1000. xor ebp,ebp
  1001. $L021tail_loop:
  1002. mov al,BYTE PTR [ebp*1+esp]
  1003. mov dl,BYTE PTR [ebp*1+esi]
  1004. lea ebp,DWORD PTR 1[ebp]
  1005. xor al,dl
  1006. mov BYTE PTR [ebp*1+edi-1],al
  1007. dec ecx
  1008. jnz $L021tail_loop
  1009. $L017done:
  1010. vzeroupper
  1011. mov esp,DWORD PTR 512[esp]
  1012. pop edi
  1013. pop esi
  1014. pop ebx
  1015. pop ebp
  1016. ret
  1017. _ChaCha20_xop ENDP
  1018. .text$ ENDS
  1019. .bss SEGMENT 'BSS'
  1020. COMM _OPENSSL_ia32cap_P:DWORD:4
  1021. .bss ENDS
  1022. END