sha256-x86_64.masm 99 KB


  1. OPTION DOTNAME
  2. .text$ SEGMENT ALIGN(256) 'CODE'
  3. EXTERN OPENSSL_ia32cap_P:NEAR
  4. PUBLIC sha256_block_data_order
  5. ALIGN 16
  6. sha256_block_data_order PROC PUBLIC
  7. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  8. mov QWORD PTR[16+rsp],rsi
  9. mov rax,rsp
  10. $L$SEH_begin_sha256_block_data_order::
  11. mov rdi,rcx
  12. mov rsi,rdx
  13. mov rdx,r8
  14. lea r11,QWORD PTR[OPENSSL_ia32cap_P]
  15. mov r9d,DWORD PTR[r11]
  16. mov r10d,DWORD PTR[4+r11]
  17. mov r11d,DWORD PTR[8+r11]
  18. test r11d,536870912
  19. jnz _shaext_shortcut
  20. and r11d,296
  21. cmp r11d,296
  22. je $L$avx2_shortcut
  23. and r9d,1073741824
  24. and r10d,268435968
  25. or r10d,r9d
  26. cmp r10d,1342177792
  27. je $L$avx_shortcut
  28. test r10d,512
  29. jnz $L$ssse3_shortcut
  30. mov rax,rsp
  31. push rbx
  32. push rbp
  33. push r12
  34. push r13
  35. push r14
  36. push r15
  37. shl rdx,4
  38. sub rsp,16*4+4*8
  39. lea rdx,QWORD PTR[rdx*4+rsi]
  40. and rsp,-64
  41. mov QWORD PTR[((64+0))+rsp],rdi
  42. mov QWORD PTR[((64+8))+rsp],rsi
  43. mov QWORD PTR[((64+16))+rsp],rdx
  44. mov QWORD PTR[88+rsp],rax
  45. $L$prologue::
  46. mov eax,DWORD PTR[rdi]
  47. mov ebx,DWORD PTR[4+rdi]
  48. mov ecx,DWORD PTR[8+rdi]
  49. mov edx,DWORD PTR[12+rdi]
  50. mov r8d,DWORD PTR[16+rdi]
  51. mov r9d,DWORD PTR[20+rdi]
  52. mov r10d,DWORD PTR[24+rdi]
  53. mov r11d,DWORD PTR[28+rdi]
  54. jmp $L$loop
  55. ALIGN 16
  56. $L$loop::
  57. mov edi,ebx
  58. lea rbp,QWORD PTR[K256]
  59. xor edi,ecx
  60. mov r12d,DWORD PTR[rsi]
  61. mov r13d,r8d
  62. mov r14d,eax
  63. bswap r12d
  64. ror r13d,14
  65. mov r15d,r9d
  66. xor r13d,r8d
  67. ror r14d,9
  68. xor r15d,r10d
  69. mov DWORD PTR[rsp],r12d
  70. xor r14d,eax
  71. and r15d,r8d
  72. ror r13d,5
  73. add r12d,r11d
  74. xor r15d,r10d
  75. ror r14d,11
  76. xor r13d,r8d
  77. add r12d,r15d
  78. mov r15d,eax
  79. add r12d,DWORD PTR[rbp]
  80. xor r14d,eax
  81. xor r15d,ebx
  82. ror r13d,6
  83. mov r11d,ebx
  84. and edi,r15d
  85. ror r14d,2
  86. add r12d,r13d
  87. xor r11d,edi
  88. add edx,r12d
  89. add r11d,r12d
  90. lea rbp,QWORD PTR[4+rbp]
  91. add r11d,r14d
  92. mov r12d,DWORD PTR[4+rsi]
  93. mov r13d,edx
  94. mov r14d,r11d
  95. bswap r12d
  96. ror r13d,14
  97. mov edi,r8d
  98. xor r13d,edx
  99. ror r14d,9
  100. xor edi,r9d
  101. mov DWORD PTR[4+rsp],r12d
  102. xor r14d,r11d
  103. and edi,edx
  104. ror r13d,5
  105. add r12d,r10d
  106. xor edi,r9d
  107. ror r14d,11
  108. xor r13d,edx
  109. add r12d,edi
  110. mov edi,r11d
  111. add r12d,DWORD PTR[rbp]
  112. xor r14d,r11d
  113. xor edi,eax
  114. ror r13d,6
  115. mov r10d,eax
  116. and r15d,edi
  117. ror r14d,2
  118. add r12d,r13d
  119. xor r10d,r15d
  120. add ecx,r12d
  121. add r10d,r12d
  122. lea rbp,QWORD PTR[4+rbp]
  123. add r10d,r14d
  124. mov r12d,DWORD PTR[8+rsi]
  125. mov r13d,ecx
  126. mov r14d,r10d
  127. bswap r12d
  128. ror r13d,14
  129. mov r15d,edx
  130. xor r13d,ecx
  131. ror r14d,9
  132. xor r15d,r8d
  133. mov DWORD PTR[8+rsp],r12d
  134. xor r14d,r10d
  135. and r15d,ecx
  136. ror r13d,5
  137. add r12d,r9d
  138. xor r15d,r8d
  139. ror r14d,11
  140. xor r13d,ecx
  141. add r12d,r15d
  142. mov r15d,r10d
  143. add r12d,DWORD PTR[rbp]
  144. xor r14d,r10d
  145. xor r15d,r11d
  146. ror r13d,6
  147. mov r9d,r11d
  148. and edi,r15d
  149. ror r14d,2
  150. add r12d,r13d
  151. xor r9d,edi
  152. add ebx,r12d
  153. add r9d,r12d
  154. lea rbp,QWORD PTR[4+rbp]
  155. add r9d,r14d
  156. mov r12d,DWORD PTR[12+rsi]
  157. mov r13d,ebx
  158. mov r14d,r9d
  159. bswap r12d
  160. ror r13d,14
  161. mov edi,ecx
  162. xor r13d,ebx
  163. ror r14d,9
  164. xor edi,edx
  165. mov DWORD PTR[12+rsp],r12d
  166. xor r14d,r9d
  167. and edi,ebx
  168. ror r13d,5
  169. add r12d,r8d
  170. xor edi,edx
  171. ror r14d,11
  172. xor r13d,ebx
  173. add r12d,edi
  174. mov edi,r9d
  175. add r12d,DWORD PTR[rbp]
  176. xor r14d,r9d
  177. xor edi,r10d
  178. ror r13d,6
  179. mov r8d,r10d
  180. and r15d,edi
  181. ror r14d,2
  182. add r12d,r13d
  183. xor r8d,r15d
  184. add eax,r12d
  185. add r8d,r12d
  186. lea rbp,QWORD PTR[20+rbp]
  187. add r8d,r14d
  188. mov r12d,DWORD PTR[16+rsi]
  189. mov r13d,eax
  190. mov r14d,r8d
  191. bswap r12d
  192. ror r13d,14
  193. mov r15d,ebx
  194. xor r13d,eax
  195. ror r14d,9
  196. xor r15d,ecx
  197. mov DWORD PTR[16+rsp],r12d
  198. xor r14d,r8d
  199. and r15d,eax
  200. ror r13d,5
  201. add r12d,edx
  202. xor r15d,ecx
  203. ror r14d,11
  204. xor r13d,eax
  205. add r12d,r15d
  206. mov r15d,r8d
  207. add r12d,DWORD PTR[rbp]
  208. xor r14d,r8d
  209. xor r15d,r9d
  210. ror r13d,6
  211. mov edx,r9d
  212. and edi,r15d
  213. ror r14d,2
  214. add r12d,r13d
  215. xor edx,edi
  216. add r11d,r12d
  217. add edx,r12d
  218. lea rbp,QWORD PTR[4+rbp]
  219. add edx,r14d
  220. mov r12d,DWORD PTR[20+rsi]
  221. mov r13d,r11d
  222. mov r14d,edx
  223. bswap r12d
  224. ror r13d,14
  225. mov edi,eax
  226. xor r13d,r11d
  227. ror r14d,9
  228. xor edi,ebx
  229. mov DWORD PTR[20+rsp],r12d
  230. xor r14d,edx
  231. and edi,r11d
  232. ror r13d,5
  233. add r12d,ecx
  234. xor edi,ebx
  235. ror r14d,11
  236. xor r13d,r11d
  237. add r12d,edi
  238. mov edi,edx
  239. add r12d,DWORD PTR[rbp]
  240. xor r14d,edx
  241. xor edi,r8d
  242. ror r13d,6
  243. mov ecx,r8d
  244. and r15d,edi
  245. ror r14d,2
  246. add r12d,r13d
  247. xor ecx,r15d
  248. add r10d,r12d
  249. add ecx,r12d
  250. lea rbp,QWORD PTR[4+rbp]
  251. add ecx,r14d
  252. mov r12d,DWORD PTR[24+rsi]
  253. mov r13d,r10d
  254. mov r14d,ecx
  255. bswap r12d
  256. ror r13d,14
  257. mov r15d,r11d
  258. xor r13d,r10d
  259. ror r14d,9
  260. xor r15d,eax
  261. mov DWORD PTR[24+rsp],r12d
  262. xor r14d,ecx
  263. and r15d,r10d
  264. ror r13d,5
  265. add r12d,ebx
  266. xor r15d,eax
  267. ror r14d,11
  268. xor r13d,r10d
  269. add r12d,r15d
  270. mov r15d,ecx
  271. add r12d,DWORD PTR[rbp]
  272. xor r14d,ecx
  273. xor r15d,edx
  274. ror r13d,6
  275. mov ebx,edx
  276. and edi,r15d
  277. ror r14d,2
  278. add r12d,r13d
  279. xor ebx,edi
  280. add r9d,r12d
  281. add ebx,r12d
  282. lea rbp,QWORD PTR[4+rbp]
  283. add ebx,r14d
  284. mov r12d,DWORD PTR[28+rsi]
  285. mov r13d,r9d
  286. mov r14d,ebx
  287. bswap r12d
  288. ror r13d,14
  289. mov edi,r10d
  290. xor r13d,r9d
  291. ror r14d,9
  292. xor edi,r11d
  293. mov DWORD PTR[28+rsp],r12d
  294. xor r14d,ebx
  295. and edi,r9d
  296. ror r13d,5
  297. add r12d,eax
  298. xor edi,r11d
  299. ror r14d,11
  300. xor r13d,r9d
  301. add r12d,edi
  302. mov edi,ebx
  303. add r12d,DWORD PTR[rbp]
  304. xor r14d,ebx
  305. xor edi,ecx
  306. ror r13d,6
  307. mov eax,ecx
  308. and r15d,edi
  309. ror r14d,2
  310. add r12d,r13d
  311. xor eax,r15d
  312. add r8d,r12d
  313. add eax,r12d
  314. lea rbp,QWORD PTR[20+rbp]
  315. add eax,r14d
  316. mov r12d,DWORD PTR[32+rsi]
  317. mov r13d,r8d
  318. mov r14d,eax
  319. bswap r12d
  320. ror r13d,14
  321. mov r15d,r9d
  322. xor r13d,r8d
  323. ror r14d,9
  324. xor r15d,r10d
  325. mov DWORD PTR[32+rsp],r12d
  326. xor r14d,eax
  327. and r15d,r8d
  328. ror r13d,5
  329. add r12d,r11d
  330. xor r15d,r10d
  331. ror r14d,11
  332. xor r13d,r8d
  333. add r12d,r15d
  334. mov r15d,eax
  335. add r12d,DWORD PTR[rbp]
  336. xor r14d,eax
  337. xor r15d,ebx
  338. ror r13d,6
  339. mov r11d,ebx
  340. and edi,r15d
  341. ror r14d,2
  342. add r12d,r13d
  343. xor r11d,edi
  344. add edx,r12d
  345. add r11d,r12d
  346. lea rbp,QWORD PTR[4+rbp]
  347. add r11d,r14d
  348. mov r12d,DWORD PTR[36+rsi]
  349. mov r13d,edx
  350. mov r14d,r11d
  351. bswap r12d
  352. ror r13d,14
  353. mov edi,r8d
  354. xor r13d,edx
  355. ror r14d,9
  356. xor edi,r9d
  357. mov DWORD PTR[36+rsp],r12d
  358. xor r14d,r11d
  359. and edi,edx
  360. ror r13d,5
  361. add r12d,r10d
  362. xor edi,r9d
  363. ror r14d,11
  364. xor r13d,edx
  365. add r12d,edi
  366. mov edi,r11d
  367. add r12d,DWORD PTR[rbp]
  368. xor r14d,r11d
  369. xor edi,eax
  370. ror r13d,6
  371. mov r10d,eax
  372. and r15d,edi
  373. ror r14d,2
  374. add r12d,r13d
  375. xor r10d,r15d
  376. add ecx,r12d
  377. add r10d,r12d
  378. lea rbp,QWORD PTR[4+rbp]
  379. add r10d,r14d
  380. mov r12d,DWORD PTR[40+rsi]
  381. mov r13d,ecx
  382. mov r14d,r10d
  383. bswap r12d
  384. ror r13d,14
  385. mov r15d,edx
  386. xor r13d,ecx
  387. ror r14d,9
  388. xor r15d,r8d
  389. mov DWORD PTR[40+rsp],r12d
  390. xor r14d,r10d
  391. and r15d,ecx
  392. ror r13d,5
  393. add r12d,r9d
  394. xor r15d,r8d
  395. ror r14d,11
  396. xor r13d,ecx
  397. add r12d,r15d
  398. mov r15d,r10d
  399. add r12d,DWORD PTR[rbp]
  400. xor r14d,r10d
  401. xor r15d,r11d
  402. ror r13d,6
  403. mov r9d,r11d
  404. and edi,r15d
  405. ror r14d,2
  406. add r12d,r13d
  407. xor r9d,edi
  408. add ebx,r12d
  409. add r9d,r12d
  410. lea rbp,QWORD PTR[4+rbp]
  411. add r9d,r14d
  412. mov r12d,DWORD PTR[44+rsi]
  413. mov r13d,ebx
  414. mov r14d,r9d
  415. bswap r12d
  416. ror r13d,14
  417. mov edi,ecx
  418. xor r13d,ebx
  419. ror r14d,9
  420. xor edi,edx
  421. mov DWORD PTR[44+rsp],r12d
  422. xor r14d,r9d
  423. and edi,ebx
  424. ror r13d,5
  425. add r12d,r8d
  426. xor edi,edx
  427. ror r14d,11
  428. xor r13d,ebx
  429. add r12d,edi
  430. mov edi,r9d
  431. add r12d,DWORD PTR[rbp]
  432. xor r14d,r9d
  433. xor edi,r10d
  434. ror r13d,6
  435. mov r8d,r10d
  436. and r15d,edi
  437. ror r14d,2
  438. add r12d,r13d
  439. xor r8d,r15d
  440. add eax,r12d
  441. add r8d,r12d
  442. lea rbp,QWORD PTR[20+rbp]
  443. add r8d,r14d
  444. mov r12d,DWORD PTR[48+rsi]
  445. mov r13d,eax
  446. mov r14d,r8d
  447. bswap r12d
  448. ror r13d,14
  449. mov r15d,ebx
  450. xor r13d,eax
  451. ror r14d,9
  452. xor r15d,ecx
  453. mov DWORD PTR[48+rsp],r12d
  454. xor r14d,r8d
  455. and r15d,eax
  456. ror r13d,5
  457. add r12d,edx
  458. xor r15d,ecx
  459. ror r14d,11
  460. xor r13d,eax
  461. add r12d,r15d
  462. mov r15d,r8d
  463. add r12d,DWORD PTR[rbp]
  464. xor r14d,r8d
  465. xor r15d,r9d
  466. ror r13d,6
  467. mov edx,r9d
  468. and edi,r15d
  469. ror r14d,2
  470. add r12d,r13d
  471. xor edx,edi
  472. add r11d,r12d
  473. add edx,r12d
  474. lea rbp,QWORD PTR[4+rbp]
  475. add edx,r14d
  476. mov r12d,DWORD PTR[52+rsi]
  477. mov r13d,r11d
  478. mov r14d,edx
  479. bswap r12d
  480. ror r13d,14
  481. mov edi,eax
  482. xor r13d,r11d
  483. ror r14d,9
  484. xor edi,ebx
  485. mov DWORD PTR[52+rsp],r12d
  486. xor r14d,edx
  487. and edi,r11d
  488. ror r13d,5
  489. add r12d,ecx
  490. xor edi,ebx
  491. ror r14d,11
  492. xor r13d,r11d
  493. add r12d,edi
  494. mov edi,edx
  495. add r12d,DWORD PTR[rbp]
  496. xor r14d,edx
  497. xor edi,r8d
  498. ror r13d,6
  499. mov ecx,r8d
  500. and r15d,edi
  501. ror r14d,2
  502. add r12d,r13d
  503. xor ecx,r15d
  504. add r10d,r12d
  505. add ecx,r12d
  506. lea rbp,QWORD PTR[4+rbp]
  507. add ecx,r14d
  508. mov r12d,DWORD PTR[56+rsi]
  509. mov r13d,r10d
  510. mov r14d,ecx
  511. bswap r12d
  512. ror r13d,14
  513. mov r15d,r11d
  514. xor r13d,r10d
  515. ror r14d,9
  516. xor r15d,eax
  517. mov DWORD PTR[56+rsp],r12d
  518. xor r14d,ecx
  519. and r15d,r10d
  520. ror r13d,5
  521. add r12d,ebx
  522. xor r15d,eax
  523. ror r14d,11
  524. xor r13d,r10d
  525. add r12d,r15d
  526. mov r15d,ecx
  527. add r12d,DWORD PTR[rbp]
  528. xor r14d,ecx
  529. xor r15d,edx
  530. ror r13d,6
  531. mov ebx,edx
  532. and edi,r15d
  533. ror r14d,2
  534. add r12d,r13d
  535. xor ebx,edi
  536. add r9d,r12d
  537. add ebx,r12d
  538. lea rbp,QWORD PTR[4+rbp]
  539. add ebx,r14d
  540. mov r12d,DWORD PTR[60+rsi]
  541. mov r13d,r9d
  542. mov r14d,ebx
  543. bswap r12d
  544. ror r13d,14
  545. mov edi,r10d
  546. xor r13d,r9d
  547. ror r14d,9
  548. xor edi,r11d
  549. mov DWORD PTR[60+rsp],r12d
  550. xor r14d,ebx
  551. and edi,r9d
  552. ror r13d,5
  553. add r12d,eax
  554. xor edi,r11d
  555. ror r14d,11
  556. xor r13d,r9d
  557. add r12d,edi
  558. mov edi,ebx
  559. add r12d,DWORD PTR[rbp]
  560. xor r14d,ebx
  561. xor edi,ecx
  562. ror r13d,6
  563. mov eax,ecx
  564. and r15d,edi
  565. ror r14d,2
  566. add r12d,r13d
  567. xor eax,r15d
  568. add r8d,r12d
  569. add eax,r12d
  570. lea rbp,QWORD PTR[20+rbp]
  571. jmp $L$rounds_16_xx
  572. ALIGN 16
  573. $L$rounds_16_xx::
  574. mov r13d,DWORD PTR[4+rsp]
  575. mov r15d,DWORD PTR[56+rsp]
  576. mov r12d,r13d
  577. ror r13d,11
  578. add eax,r14d
  579. mov r14d,r15d
  580. ror r15d,2
  581. xor r13d,r12d
  582. shr r12d,3
  583. ror r13d,7
  584. xor r15d,r14d
  585. shr r14d,10
  586. ror r15d,17
  587. xor r12d,r13d
  588. xor r15d,r14d
  589. add r12d,DWORD PTR[36+rsp]
  590. add r12d,DWORD PTR[rsp]
  591. mov r13d,r8d
  592. add r12d,r15d
  593. mov r14d,eax
  594. ror r13d,14
  595. mov r15d,r9d
  596. xor r13d,r8d
  597. ror r14d,9
  598. xor r15d,r10d
  599. mov DWORD PTR[rsp],r12d
  600. xor r14d,eax
  601. and r15d,r8d
  602. ror r13d,5
  603. add r12d,r11d
  604. xor r15d,r10d
  605. ror r14d,11
  606. xor r13d,r8d
  607. add r12d,r15d
  608. mov r15d,eax
  609. add r12d,DWORD PTR[rbp]
  610. xor r14d,eax
  611. xor r15d,ebx
  612. ror r13d,6
  613. mov r11d,ebx
  614. and edi,r15d
  615. ror r14d,2
  616. add r12d,r13d
  617. xor r11d,edi
  618. add edx,r12d
  619. add r11d,r12d
  620. lea rbp,QWORD PTR[4+rbp]
  621. mov r13d,DWORD PTR[8+rsp]
  622. mov edi,DWORD PTR[60+rsp]
  623. mov r12d,r13d
  624. ror r13d,11
  625. add r11d,r14d
  626. mov r14d,edi
  627. ror edi,2
  628. xor r13d,r12d
  629. shr r12d,3
  630. ror r13d,7
  631. xor edi,r14d
  632. shr r14d,10
  633. ror edi,17
  634. xor r12d,r13d
  635. xor edi,r14d
  636. add r12d,DWORD PTR[40+rsp]
  637. add r12d,DWORD PTR[4+rsp]
  638. mov r13d,edx
  639. add r12d,edi
  640. mov r14d,r11d
  641. ror r13d,14
  642. mov edi,r8d
  643. xor r13d,edx
  644. ror r14d,9
  645. xor edi,r9d
  646. mov DWORD PTR[4+rsp],r12d
  647. xor r14d,r11d
  648. and edi,edx
  649. ror r13d,5
  650. add r12d,r10d
  651. xor edi,r9d
  652. ror r14d,11
  653. xor r13d,edx
  654. add r12d,edi
  655. mov edi,r11d
  656. add r12d,DWORD PTR[rbp]
  657. xor r14d,r11d
  658. xor edi,eax
  659. ror r13d,6
  660. mov r10d,eax
  661. and r15d,edi
  662. ror r14d,2
  663. add r12d,r13d
  664. xor r10d,r15d
  665. add ecx,r12d
  666. add r10d,r12d
  667. lea rbp,QWORD PTR[4+rbp]
  668. mov r13d,DWORD PTR[12+rsp]
  669. mov r15d,DWORD PTR[rsp]
  670. mov r12d,r13d
  671. ror r13d,11
  672. add r10d,r14d
  673. mov r14d,r15d
  674. ror r15d,2
  675. xor r13d,r12d
  676. shr r12d,3
  677. ror r13d,7
  678. xor r15d,r14d
  679. shr r14d,10
  680. ror r15d,17
  681. xor r12d,r13d
  682. xor r15d,r14d
  683. add r12d,DWORD PTR[44+rsp]
  684. add r12d,DWORD PTR[8+rsp]
  685. mov r13d,ecx
  686. add r12d,r15d
  687. mov r14d,r10d
  688. ror r13d,14
  689. mov r15d,edx
  690. xor r13d,ecx
  691. ror r14d,9
  692. xor r15d,r8d
  693. mov DWORD PTR[8+rsp],r12d
  694. xor r14d,r10d
  695. and r15d,ecx
  696. ror r13d,5
  697. add r12d,r9d
  698. xor r15d,r8d
  699. ror r14d,11
  700. xor r13d,ecx
  701. add r12d,r15d
  702. mov r15d,r10d
  703. add r12d,DWORD PTR[rbp]
  704. xor r14d,r10d
  705. xor r15d,r11d
  706. ror r13d,6
  707. mov r9d,r11d
  708. and edi,r15d
  709. ror r14d,2
  710. add r12d,r13d
  711. xor r9d,edi
  712. add ebx,r12d
  713. add r9d,r12d
  714. lea rbp,QWORD PTR[4+rbp]
  715. mov r13d,DWORD PTR[16+rsp]
  716. mov edi,DWORD PTR[4+rsp]
  717. mov r12d,r13d
  718. ror r13d,11
  719. add r9d,r14d
  720. mov r14d,edi
  721. ror edi,2
  722. xor r13d,r12d
  723. shr r12d,3
  724. ror r13d,7
  725. xor edi,r14d
  726. shr r14d,10
  727. ror edi,17
  728. xor r12d,r13d
  729. xor edi,r14d
  730. add r12d,DWORD PTR[48+rsp]
  731. add r12d,DWORD PTR[12+rsp]
  732. mov r13d,ebx
  733. add r12d,edi
  734. mov r14d,r9d
  735. ror r13d,14
  736. mov edi,ecx
  737. xor r13d,ebx
  738. ror r14d,9
  739. xor edi,edx
  740. mov DWORD PTR[12+rsp],r12d
  741. xor r14d,r9d
  742. and edi,ebx
  743. ror r13d,5
  744. add r12d,r8d
  745. xor edi,edx
  746. ror r14d,11
  747. xor r13d,ebx
  748. add r12d,edi
  749. mov edi,r9d
  750. add r12d,DWORD PTR[rbp]
  751. xor r14d,r9d
  752. xor edi,r10d
  753. ror r13d,6
  754. mov r8d,r10d
  755. and r15d,edi
  756. ror r14d,2
  757. add r12d,r13d
  758. xor r8d,r15d
  759. add eax,r12d
  760. add r8d,r12d
  761. lea rbp,QWORD PTR[20+rbp]
  762. mov r13d,DWORD PTR[20+rsp]
  763. mov r15d,DWORD PTR[8+rsp]
  764. mov r12d,r13d
  765. ror r13d,11
  766. add r8d,r14d
  767. mov r14d,r15d
  768. ror r15d,2
  769. xor r13d,r12d
  770. shr r12d,3
  771. ror r13d,7
  772. xor r15d,r14d
  773. shr r14d,10
  774. ror r15d,17
  775. xor r12d,r13d
  776. xor r15d,r14d
  777. add r12d,DWORD PTR[52+rsp]
  778. add r12d,DWORD PTR[16+rsp]
  779. mov r13d,eax
  780. add r12d,r15d
  781. mov r14d,r8d
  782. ror r13d,14
  783. mov r15d,ebx
  784. xor r13d,eax
  785. ror r14d,9
  786. xor r15d,ecx
  787. mov DWORD PTR[16+rsp],r12d
  788. xor r14d,r8d
  789. and r15d,eax
  790. ror r13d,5
  791. add r12d,edx
  792. xor r15d,ecx
  793. ror r14d,11
  794. xor r13d,eax
  795. add r12d,r15d
  796. mov r15d,r8d
  797. add r12d,DWORD PTR[rbp]
  798. xor r14d,r8d
  799. xor r15d,r9d
  800. ror r13d,6
  801. mov edx,r9d
  802. and edi,r15d
  803. ror r14d,2
  804. add r12d,r13d
  805. xor edx,edi
  806. add r11d,r12d
  807. add edx,r12d
  808. lea rbp,QWORD PTR[4+rbp]
  809. mov r13d,DWORD PTR[24+rsp]
  810. mov edi,DWORD PTR[12+rsp]
  811. mov r12d,r13d
  812. ror r13d,11
  813. add edx,r14d
  814. mov r14d,edi
  815. ror edi,2
  816. xor r13d,r12d
  817. shr r12d,3
  818. ror r13d,7
  819. xor edi,r14d
  820. shr r14d,10
  821. ror edi,17
  822. xor r12d,r13d
  823. xor edi,r14d
  824. add r12d,DWORD PTR[56+rsp]
  825. add r12d,DWORD PTR[20+rsp]
  826. mov r13d,r11d
  827. add r12d,edi
  828. mov r14d,edx
  829. ror r13d,14
  830. mov edi,eax
  831. xor r13d,r11d
  832. ror r14d,9
  833. xor edi,ebx
  834. mov DWORD PTR[20+rsp],r12d
  835. xor r14d,edx
  836. and edi,r11d
  837. ror r13d,5
  838. add r12d,ecx
  839. xor edi,ebx
  840. ror r14d,11
  841. xor r13d,r11d
  842. add r12d,edi
  843. mov edi,edx
  844. add r12d,DWORD PTR[rbp]
  845. xor r14d,edx
  846. xor edi,r8d
  847. ror r13d,6
  848. mov ecx,r8d
  849. and r15d,edi
  850. ror r14d,2
  851. add r12d,r13d
  852. xor ecx,r15d
  853. add r10d,r12d
  854. add ecx,r12d
  855. lea rbp,QWORD PTR[4+rbp]
  856. mov r13d,DWORD PTR[28+rsp]
  857. mov r15d,DWORD PTR[16+rsp]
  858. mov r12d,r13d
  859. ror r13d,11
  860. add ecx,r14d
  861. mov r14d,r15d
  862. ror r15d,2
  863. xor r13d,r12d
  864. shr r12d,3
  865. ror r13d,7
  866. xor r15d,r14d
  867. shr r14d,10
  868. ror r15d,17
  869. xor r12d,r13d
  870. xor r15d,r14d
  871. add r12d,DWORD PTR[60+rsp]
  872. add r12d,DWORD PTR[24+rsp]
  873. mov r13d,r10d
  874. add r12d,r15d
  875. mov r14d,ecx
  876. ror r13d,14
  877. mov r15d,r11d
  878. xor r13d,r10d
  879. ror r14d,9
  880. xor r15d,eax
  881. mov DWORD PTR[24+rsp],r12d
  882. xor r14d,ecx
  883. and r15d,r10d
  884. ror r13d,5
  885. add r12d,ebx
  886. xor r15d,eax
  887. ror r14d,11
  888. xor r13d,r10d
  889. add r12d,r15d
  890. mov r15d,ecx
  891. add r12d,DWORD PTR[rbp]
  892. xor r14d,ecx
  893. xor r15d,edx
  894. ror r13d,6
  895. mov ebx,edx
  896. and edi,r15d
  897. ror r14d,2
  898. add r12d,r13d
  899. xor ebx,edi
  900. add r9d,r12d
  901. add ebx,r12d
  902. lea rbp,QWORD PTR[4+rbp]
  903. mov r13d,DWORD PTR[32+rsp]
  904. mov edi,DWORD PTR[20+rsp]
  905. mov r12d,r13d
  906. ror r13d,11
  907. add ebx,r14d
  908. mov r14d,edi
  909. ror edi,2
  910. xor r13d,r12d
  911. shr r12d,3
  912. ror r13d,7
  913. xor edi,r14d
  914. shr r14d,10
  915. ror edi,17
  916. xor r12d,r13d
  917. xor edi,r14d
  918. add r12d,DWORD PTR[rsp]
  919. add r12d,DWORD PTR[28+rsp]
  920. mov r13d,r9d
  921. add r12d,edi
  922. mov r14d,ebx
  923. ror r13d,14
  924. mov edi,r10d
  925. xor r13d,r9d
  926. ror r14d,9
  927. xor edi,r11d
  928. mov DWORD PTR[28+rsp],r12d
  929. xor r14d,ebx
  930. and edi,r9d
  931. ror r13d,5
  932. add r12d,eax
  933. xor edi,r11d
  934. ror r14d,11
  935. xor r13d,r9d
  936. add r12d,edi
  937. mov edi,ebx
  938. add r12d,DWORD PTR[rbp]
  939. xor r14d,ebx
  940. xor edi,ecx
  941. ror r13d,6
  942. mov eax,ecx
  943. and r15d,edi
  944. ror r14d,2
  945. add r12d,r13d
  946. xor eax,r15d
  947. add r8d,r12d
  948. add eax,r12d
  949. lea rbp,QWORD PTR[20+rbp]
  950. mov r13d,DWORD PTR[36+rsp]
  951. mov r15d,DWORD PTR[24+rsp]
  952. mov r12d,r13d
  953. ror r13d,11
  954. add eax,r14d
  955. mov r14d,r15d
  956. ror r15d,2
  957. xor r13d,r12d
  958. shr r12d,3
  959. ror r13d,7
  960. xor r15d,r14d
  961. shr r14d,10
  962. ror r15d,17
  963. xor r12d,r13d
  964. xor r15d,r14d
  965. add r12d,DWORD PTR[4+rsp]
  966. add r12d,DWORD PTR[32+rsp]
  967. mov r13d,r8d
  968. add r12d,r15d
  969. mov r14d,eax
  970. ror r13d,14
  971. mov r15d,r9d
  972. xor r13d,r8d
  973. ror r14d,9
  974. xor r15d,r10d
  975. mov DWORD PTR[32+rsp],r12d
  976. xor r14d,eax
  977. and r15d,r8d
  978. ror r13d,5
  979. add r12d,r11d
  980. xor r15d,r10d
  981. ror r14d,11
  982. xor r13d,r8d
  983. add r12d,r15d
  984. mov r15d,eax
  985. add r12d,DWORD PTR[rbp]
  986. xor r14d,eax
  987. xor r15d,ebx
  988. ror r13d,6
  989. mov r11d,ebx
  990. and edi,r15d
  991. ror r14d,2
  992. add r12d,r13d
  993. xor r11d,edi
  994. add edx,r12d
  995. add r11d,r12d
  996. lea rbp,QWORD PTR[4+rbp]
  997. mov r13d,DWORD PTR[40+rsp]
  998. mov edi,DWORD PTR[28+rsp]
  999. mov r12d,r13d
  1000. ror r13d,11
  1001. add r11d,r14d
  1002. mov r14d,edi
  1003. ror edi,2
  1004. xor r13d,r12d
  1005. shr r12d,3
  1006. ror r13d,7
  1007. xor edi,r14d
  1008. shr r14d,10
  1009. ror edi,17
  1010. xor r12d,r13d
  1011. xor edi,r14d
  1012. add r12d,DWORD PTR[8+rsp]
  1013. add r12d,DWORD PTR[36+rsp]
  1014. mov r13d,edx
  1015. add r12d,edi
  1016. mov r14d,r11d
  1017. ror r13d,14
  1018. mov edi,r8d
  1019. xor r13d,edx
  1020. ror r14d,9
  1021. xor edi,r9d
  1022. mov DWORD PTR[36+rsp],r12d
  1023. xor r14d,r11d
  1024. and edi,edx
  1025. ror r13d,5
  1026. add r12d,r10d
  1027. xor edi,r9d
  1028. ror r14d,11
  1029. xor r13d,edx
  1030. add r12d,edi
  1031. mov edi,r11d
  1032. add r12d,DWORD PTR[rbp]
  1033. xor r14d,r11d
  1034. xor edi,eax
  1035. ror r13d,6
  1036. mov r10d,eax
  1037. and r15d,edi
  1038. ror r14d,2
  1039. add r12d,r13d
  1040. xor r10d,r15d
  1041. add ecx,r12d
  1042. add r10d,r12d
  1043. lea rbp,QWORD PTR[4+rbp]
  1044. mov r13d,DWORD PTR[44+rsp]
  1045. mov r15d,DWORD PTR[32+rsp]
  1046. mov r12d,r13d
  1047. ror r13d,11
  1048. add r10d,r14d
  1049. mov r14d,r15d
  1050. ror r15d,2
  1051. xor r13d,r12d
  1052. shr r12d,3
  1053. ror r13d,7
  1054. xor r15d,r14d
  1055. shr r14d,10
  1056. ror r15d,17
  1057. xor r12d,r13d
  1058. xor r15d,r14d
  1059. add r12d,DWORD PTR[12+rsp]
  1060. add r12d,DWORD PTR[40+rsp]
  1061. mov r13d,ecx
  1062. add r12d,r15d
  1063. mov r14d,r10d
  1064. ror r13d,14
  1065. mov r15d,edx
  1066. xor r13d,ecx
  1067. ror r14d,9
  1068. xor r15d,r8d
  1069. mov DWORD PTR[40+rsp],r12d
  1070. xor r14d,r10d
  1071. and r15d,ecx
  1072. ror r13d,5
  1073. add r12d,r9d
  1074. xor r15d,r8d
  1075. ror r14d,11
  1076. xor r13d,ecx
  1077. add r12d,r15d
  1078. mov r15d,r10d
  1079. add r12d,DWORD PTR[rbp]
  1080. xor r14d,r10d
  1081. xor r15d,r11d
  1082. ror r13d,6
  1083. mov r9d,r11d
  1084. and edi,r15d
  1085. ror r14d,2
  1086. add r12d,r13d
  1087. xor r9d,edi
  1088. add ebx,r12d
  1089. add r9d,r12d
  1090. lea rbp,QWORD PTR[4+rbp]
  1091. mov r13d,DWORD PTR[48+rsp]
  1092. mov edi,DWORD PTR[36+rsp]
  1093. mov r12d,r13d
  1094. ror r13d,11
  1095. add r9d,r14d
  1096. mov r14d,edi
  1097. ror edi,2
  1098. xor r13d,r12d
  1099. shr r12d,3
  1100. ror r13d,7
  1101. xor edi,r14d
  1102. shr r14d,10
  1103. ror edi,17
  1104. xor r12d,r13d
  1105. xor edi,r14d
  1106. add r12d,DWORD PTR[16+rsp]
  1107. add r12d,DWORD PTR[44+rsp]
  1108. mov r13d,ebx
  1109. add r12d,edi
  1110. mov r14d,r9d
  1111. ror r13d,14
  1112. mov edi,ecx
  1113. xor r13d,ebx
  1114. ror r14d,9
  1115. xor edi,edx
  1116. mov DWORD PTR[44+rsp],r12d
  1117. xor r14d,r9d
  1118. and edi,ebx
  1119. ror r13d,5
  1120. add r12d,r8d
  1121. xor edi,edx
  1122. ror r14d,11
  1123. xor r13d,ebx
  1124. add r12d,edi
  1125. mov edi,r9d
  1126. add r12d,DWORD PTR[rbp]
  1127. xor r14d,r9d
  1128. xor edi,r10d
  1129. ror r13d,6
  1130. mov r8d,r10d
  1131. and r15d,edi
  1132. ror r14d,2
  1133. add r12d,r13d
  1134. xor r8d,r15d
  1135. add eax,r12d
  1136. add r8d,r12d
  1137. lea rbp,QWORD PTR[20+rbp]
  1138. mov r13d,DWORD PTR[52+rsp]
  1139. mov r15d,DWORD PTR[40+rsp]
  1140. mov r12d,r13d
  1141. ror r13d,11
  1142. add r8d,r14d
  1143. mov r14d,r15d
  1144. ror r15d,2
  1145. xor r13d,r12d
  1146. shr r12d,3
  1147. ror r13d,7
  1148. xor r15d,r14d
  1149. shr r14d,10
  1150. ror r15d,17
  1151. xor r12d,r13d
  1152. xor r15d,r14d
  1153. add r12d,DWORD PTR[20+rsp]
  1154. add r12d,DWORD PTR[48+rsp]
  1155. mov r13d,eax
  1156. add r12d,r15d
  1157. mov r14d,r8d
  1158. ror r13d,14
  1159. mov r15d,ebx
  1160. xor r13d,eax
  1161. ror r14d,9
  1162. xor r15d,ecx
  1163. mov DWORD PTR[48+rsp],r12d
  1164. xor r14d,r8d
  1165. and r15d,eax
  1166. ror r13d,5
  1167. add r12d,edx
  1168. xor r15d,ecx
  1169. ror r14d,11
  1170. xor r13d,eax
  1171. add r12d,r15d
  1172. mov r15d,r8d
  1173. add r12d,DWORD PTR[rbp]
  1174. xor r14d,r8d
  1175. xor r15d,r9d
  1176. ror r13d,6
  1177. mov edx,r9d
  1178. and edi,r15d
  1179. ror r14d,2
  1180. add r12d,r13d
  1181. xor edx,edi
  1182. add r11d,r12d
  1183. add edx,r12d
  1184. lea rbp,QWORD PTR[4+rbp]
  1185. mov r13d,DWORD PTR[56+rsp]
  1186. mov edi,DWORD PTR[44+rsp]
  1187. mov r12d,r13d
  1188. ror r13d,11
  1189. add edx,r14d
  1190. mov r14d,edi
  1191. ror edi,2
  1192. xor r13d,r12d
  1193. shr r12d,3
  1194. ror r13d,7
  1195. xor edi,r14d
  1196. shr r14d,10
  1197. ror edi,17
  1198. xor r12d,r13d
  1199. xor edi,r14d
  1200. add r12d,DWORD PTR[24+rsp]
  1201. add r12d,DWORD PTR[52+rsp]
  1202. mov r13d,r11d
  1203. add r12d,edi
  1204. mov r14d,edx
  1205. ror r13d,14
  1206. mov edi,eax
  1207. xor r13d,r11d
  1208. ror r14d,9
  1209. xor edi,ebx
  1210. mov DWORD PTR[52+rsp],r12d
  1211. xor r14d,edx
  1212. and edi,r11d
  1213. ror r13d,5
  1214. add r12d,ecx
  1215. xor edi,ebx
  1216. ror r14d,11
  1217. xor r13d,r11d
  1218. add r12d,edi
  1219. mov edi,edx
  1220. add r12d,DWORD PTR[rbp]
  1221. xor r14d,edx
  1222. xor edi,r8d
  1223. ror r13d,6
  1224. mov ecx,r8d
  1225. and r15d,edi
  1226. ror r14d,2
  1227. add r12d,r13d
  1228. xor ecx,r15d
  1229. add r10d,r12d
  1230. add ecx,r12d
  1231. lea rbp,QWORD PTR[4+rbp]
  1232. mov r13d,DWORD PTR[60+rsp]
  1233. mov r15d,DWORD PTR[48+rsp]
  1234. mov r12d,r13d
  1235. ror r13d,11
  1236. add ecx,r14d
  1237. mov r14d,r15d
  1238. ror r15d,2
  1239. xor r13d,r12d
  1240. shr r12d,3
  1241. ror r13d,7
  1242. xor r15d,r14d
  1243. shr r14d,10
  1244. ror r15d,17
  1245. xor r12d,r13d
  1246. xor r15d,r14d
  1247. add r12d,DWORD PTR[28+rsp]
  1248. add r12d,DWORD PTR[56+rsp]
  1249. mov r13d,r10d
  1250. add r12d,r15d
  1251. mov r14d,ecx
  1252. ror r13d,14
  1253. mov r15d,r11d
  1254. xor r13d,r10d
  1255. ror r14d,9
  1256. xor r15d,eax
  1257. mov DWORD PTR[56+rsp],r12d
  1258. xor r14d,ecx
  1259. and r15d,r10d
  1260. ror r13d,5
  1261. add r12d,ebx
  1262. xor r15d,eax
  1263. ror r14d,11
  1264. xor r13d,r10d
  1265. add r12d,r15d
  1266. mov r15d,ecx
  1267. add r12d,DWORD PTR[rbp]
  1268. xor r14d,ecx
  1269. xor r15d,edx
  1270. ror r13d,6
  1271. mov ebx,edx
  1272. and edi,r15d
  1273. ror r14d,2
  1274. add r12d,r13d
  1275. xor ebx,edi
  1276. add r9d,r12d
  1277. add ebx,r12d
  1278. lea rbp,QWORD PTR[4+rbp]
  1279. mov r13d,DWORD PTR[rsp]
  1280. mov edi,DWORD PTR[52+rsp]
  1281. mov r12d,r13d
  1282. ror r13d,11
  1283. add ebx,r14d
  1284. mov r14d,edi
  1285. ror edi,2
  1286. xor r13d,r12d
  1287. shr r12d,3
  1288. ror r13d,7
  1289. xor edi,r14d
  1290. shr r14d,10
  1291. ror edi,17
  1292. xor r12d,r13d
  1293. xor edi,r14d
  1294. add r12d,DWORD PTR[32+rsp]
  1295. add r12d,DWORD PTR[60+rsp]
  1296. mov r13d,r9d
  1297. add r12d,edi
  1298. mov r14d,ebx
  1299. ror r13d,14
  1300. mov edi,r10d
  1301. xor r13d,r9d
  1302. ror r14d,9
  1303. xor edi,r11d
  1304. mov DWORD PTR[60+rsp],r12d
  1305. xor r14d,ebx
  1306. and edi,r9d
  1307. ror r13d,5
  1308. add r12d,eax
  1309. xor edi,r11d
  1310. ror r14d,11
  1311. xor r13d,r9d
  1312. add r12d,edi
  1313. mov edi,ebx
  1314. add r12d,DWORD PTR[rbp]
  1315. xor r14d,ebx
  1316. xor edi,ecx
  1317. ror r13d,6
  1318. mov eax,ecx
  1319. and r15d,edi
  1320. ror r14d,2
  1321. add r12d,r13d
  1322. xor eax,r15d
  1323. add r8d,r12d
  1324. add eax,r12d
  1325. lea rbp,QWORD PTR[20+rbp]
  1326. cmp BYTE PTR[3+rbp],0
  1327. jnz $L$rounds_16_xx
  1328. mov rdi,QWORD PTR[((64+0))+rsp]
  1329. add eax,r14d
  1330. lea rsi,QWORD PTR[64+rsi]
  1331. add eax,DWORD PTR[rdi]
  1332. add ebx,DWORD PTR[4+rdi]
  1333. add ecx,DWORD PTR[8+rdi]
  1334. add edx,DWORD PTR[12+rdi]
  1335. add r8d,DWORD PTR[16+rdi]
  1336. add r9d,DWORD PTR[20+rdi]
  1337. add r10d,DWORD PTR[24+rdi]
  1338. add r11d,DWORD PTR[28+rdi]
  1339. cmp rsi,QWORD PTR[((64+16))+rsp]
  1340. mov DWORD PTR[rdi],eax
  1341. mov DWORD PTR[4+rdi],ebx
  1342. mov DWORD PTR[8+rdi],ecx
  1343. mov DWORD PTR[12+rdi],edx
  1344. mov DWORD PTR[16+rdi],r8d
  1345. mov DWORD PTR[20+rdi],r9d
  1346. mov DWORD PTR[24+rdi],r10d
  1347. mov DWORD PTR[28+rdi],r11d
  1348. jb $L$loop
  1349. mov rsi,QWORD PTR[88+rsp]
  1350. mov r15,QWORD PTR[((-48))+rsi]
  1351. mov r14,QWORD PTR[((-40))+rsi]
  1352. mov r13,QWORD PTR[((-32))+rsi]
  1353. mov r12,QWORD PTR[((-24))+rsi]
  1354. mov rbp,QWORD PTR[((-16))+rsi]
  1355. mov rbx,QWORD PTR[((-8))+rsi]
  1356. lea rsp,QWORD PTR[rsi]
  1357. $L$epilogue::
  1358. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1359. mov rsi,QWORD PTR[16+rsp]
  1360. DB 0F3h,0C3h ;repret
  1361. $L$SEH_end_sha256_block_data_order::
  1362. sha256_block_data_order ENDP
  1363. ALIGN 64
  1364. K256::
  1365. DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h
  1366. DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h
  1367. DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h
  1368. DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h
  1369. DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h
  1370. DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h
  1371. DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h
  1372. DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h
  1373. DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch
  1374. DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch
  1375. DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah
  1376. DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah
  1377. DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h
  1378. DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h
  1379. DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h
  1380. DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h
  1381. DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h
  1382. DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h
  1383. DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h
  1384. DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h
  1385. DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h
  1386. DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h
  1387. DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h
  1388. DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h
  1389. DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h
  1390. DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h
  1391. DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h
  1392. DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h
  1393. DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h
  1394. DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h
  1395. DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h
  1396. DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h
  1397. DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
  1398. DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
  1399. DD 003020100h,00b0a0908h,0ffffffffh,0ffffffffh
  1400. DD 003020100h,00b0a0908h,0ffffffffh,0ffffffffh
  1401. DD 0ffffffffh,0ffffffffh,003020100h,00b0a0908h
  1402. DD 0ffffffffh,0ffffffffh,003020100h,00b0a0908h
  1403. DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
  1404. DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
  1405. DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
  1406. DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
  1407. DB 111,114,103,62,0
  1408. ALIGN 64
  1409. sha256_block_data_order_shaext PROC PRIVATE
  1410. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  1411. mov QWORD PTR[16+rsp],rsi
  1412. mov rax,rsp
  1413. $L$SEH_begin_sha256_block_data_order_shaext::
  1414. mov rdi,rcx
  1415. mov rsi,rdx
  1416. mov rdx,r8
  1417. _shaext_shortcut::
  1418. lea rsp,QWORD PTR[((-88))+rsp]
  1419. movaps XMMWORD PTR[(-8-80)+rax],xmm6
  1420. movaps XMMWORD PTR[(-8-64)+rax],xmm7
  1421. movaps XMMWORD PTR[(-8-48)+rax],xmm8
  1422. movaps XMMWORD PTR[(-8-32)+rax],xmm9
  1423. movaps XMMWORD PTR[(-8-16)+rax],xmm10
  1424. $L$prologue_shaext::
  1425. lea rcx,QWORD PTR[((K256+128))]
  1426. movdqu xmm1,XMMWORD PTR[rdi]
  1427. movdqu xmm2,XMMWORD PTR[16+rdi]
  1428. movdqa xmm7,XMMWORD PTR[((512-128))+rcx]
  1429. pshufd xmm0,xmm1,01bh
  1430. pshufd xmm1,xmm1,0b1h
  1431. pshufd xmm2,xmm2,01bh
  1432. movdqa xmm8,xmm7
  1433. DB 102,15,58,15,202,8
  1434. punpcklqdq xmm2,xmm0
  1435. jmp $L$oop_shaext
  1436. ALIGN 16
  1437. $L$oop_shaext::
  1438. movdqu xmm3,XMMWORD PTR[rsi]
  1439. movdqu xmm4,XMMWORD PTR[16+rsi]
  1440. movdqu xmm5,XMMWORD PTR[32+rsi]
  1441. DB 102,15,56,0,223
  1442. movdqu xmm6,XMMWORD PTR[48+rsi]
  1443. movdqa xmm0,XMMWORD PTR[((0-128))+rcx]
  1444. paddd xmm0,xmm3
  1445. DB 102,15,56,0,231
  1446. movdqa xmm10,xmm2
  1447. DB 15,56,203,209
  1448. pshufd xmm0,xmm0,00eh
  1449. nop
  1450. movdqa xmm9,xmm1
  1451. DB 15,56,203,202
  1452. movdqa xmm0,XMMWORD PTR[((32-128))+rcx]
  1453. paddd xmm0,xmm4
  1454. DB 102,15,56,0,239
  1455. DB 15,56,203,209
  1456. pshufd xmm0,xmm0,00eh
  1457. lea rsi,QWORD PTR[64+rsi]
  1458. DB 15,56,204,220
  1459. DB 15,56,203,202
  1460. movdqa xmm0,XMMWORD PTR[((64-128))+rcx]
  1461. paddd xmm0,xmm5
  1462. DB 102,15,56,0,247
  1463. DB 15,56,203,209
  1464. pshufd xmm0,xmm0,00eh
  1465. movdqa xmm7,xmm6
  1466. DB 102,15,58,15,253,4
  1467. nop
  1468. paddd xmm3,xmm7
  1469. DB 15,56,204,229
  1470. DB 15,56,203,202
  1471. movdqa xmm0,XMMWORD PTR[((96-128))+rcx]
  1472. paddd xmm0,xmm6
  1473. DB 15,56,205,222
  1474. DB 15,56,203,209
  1475. pshufd xmm0,xmm0,00eh
  1476. movdqa xmm7,xmm3
  1477. DB 102,15,58,15,254,4
  1478. nop
  1479. paddd xmm4,xmm7
  1480. DB 15,56,204,238
  1481. DB 15,56,203,202
  1482. movdqa xmm0,XMMWORD PTR[((128-128))+rcx]
  1483. paddd xmm0,xmm3
  1484. DB 15,56,205,227
  1485. DB 15,56,203,209
  1486. pshufd xmm0,xmm0,00eh
  1487. movdqa xmm7,xmm4
  1488. DB 102,15,58,15,251,4
  1489. nop
  1490. paddd xmm5,xmm7
  1491. DB 15,56,204,243
  1492. DB 15,56,203,202
  1493. movdqa xmm0,XMMWORD PTR[((160-128))+rcx]
  1494. paddd xmm0,xmm4
  1495. DB 15,56,205,236
  1496. DB 15,56,203,209
  1497. pshufd xmm0,xmm0,00eh
  1498. movdqa xmm7,xmm5
  1499. DB 102,15,58,15,252,4
  1500. nop
  1501. paddd xmm6,xmm7
  1502. DB 15,56,204,220
  1503. DB 15,56,203,202
  1504. movdqa xmm0,XMMWORD PTR[((192-128))+rcx]
  1505. paddd xmm0,xmm5
  1506. DB 15,56,205,245
  1507. DB 15,56,203,209
  1508. pshufd xmm0,xmm0,00eh
  1509. movdqa xmm7,xmm6
  1510. DB 102,15,58,15,253,4
  1511. nop
  1512. paddd xmm3,xmm7
  1513. DB 15,56,204,229
  1514. DB 15,56,203,202
  1515. movdqa xmm0,XMMWORD PTR[((224-128))+rcx]
  1516. paddd xmm0,xmm6
  1517. DB 15,56,205,222
  1518. DB 15,56,203,209
  1519. pshufd xmm0,xmm0,00eh
  1520. movdqa xmm7,xmm3
  1521. DB 102,15,58,15,254,4
  1522. nop
  1523. paddd xmm4,xmm7
  1524. DB 15,56,204,238
  1525. DB 15,56,203,202
  1526. movdqa xmm0,XMMWORD PTR[((256-128))+rcx]
  1527. paddd xmm0,xmm3
  1528. DB 15,56,205,227
  1529. DB 15,56,203,209
  1530. pshufd xmm0,xmm0,00eh
  1531. movdqa xmm7,xmm4
  1532. DB 102,15,58,15,251,4
  1533. nop
  1534. paddd xmm5,xmm7
  1535. DB 15,56,204,243
  1536. DB 15,56,203,202
  1537. movdqa xmm0,XMMWORD PTR[((288-128))+rcx]
  1538. paddd xmm0,xmm4
  1539. DB 15,56,205,236
  1540. DB 15,56,203,209
  1541. pshufd xmm0,xmm0,00eh
  1542. movdqa xmm7,xmm5
  1543. DB 102,15,58,15,252,4
  1544. nop
  1545. paddd xmm6,xmm7
  1546. DB 15,56,204,220
  1547. DB 15,56,203,202
  1548. movdqa xmm0,XMMWORD PTR[((320-128))+rcx]
  1549. paddd xmm0,xmm5
  1550. DB 15,56,205,245
  1551. DB 15,56,203,209
  1552. pshufd xmm0,xmm0,00eh
  1553. movdqa xmm7,xmm6
  1554. DB 102,15,58,15,253,4
  1555. nop
  1556. paddd xmm3,xmm7
  1557. DB 15,56,204,229
  1558. DB 15,56,203,202
  1559. movdqa xmm0,XMMWORD PTR[((352-128))+rcx]
  1560. paddd xmm0,xmm6
  1561. DB 15,56,205,222
  1562. DB 15,56,203,209
  1563. pshufd xmm0,xmm0,00eh
  1564. movdqa xmm7,xmm3
  1565. DB 102,15,58,15,254,4
  1566. nop
  1567. paddd xmm4,xmm7
  1568. DB 15,56,204,238
  1569. DB 15,56,203,202
  1570. movdqa xmm0,XMMWORD PTR[((384-128))+rcx]
  1571. paddd xmm0,xmm3
  1572. DB 15,56,205,227
  1573. DB 15,56,203,209
  1574. pshufd xmm0,xmm0,00eh
  1575. movdqa xmm7,xmm4
  1576. DB 102,15,58,15,251,4
  1577. nop
  1578. paddd xmm5,xmm7
  1579. DB 15,56,204,243
  1580. DB 15,56,203,202
  1581. movdqa xmm0,XMMWORD PTR[((416-128))+rcx]
  1582. paddd xmm0,xmm4
  1583. DB 15,56,205,236
  1584. DB 15,56,203,209
  1585. pshufd xmm0,xmm0,00eh
  1586. movdqa xmm7,xmm5
  1587. DB 102,15,58,15,252,4
  1588. DB 15,56,203,202
  1589. paddd xmm6,xmm7
  1590. movdqa xmm0,XMMWORD PTR[((448-128))+rcx]
  1591. paddd xmm0,xmm5
  1592. DB 15,56,203,209
  1593. pshufd xmm0,xmm0,00eh
  1594. DB 15,56,205,245
  1595. movdqa xmm7,xmm8
  1596. DB 15,56,203,202
  1597. movdqa xmm0,XMMWORD PTR[((480-128))+rcx]
  1598. paddd xmm0,xmm6
  1599. nop
  1600. DB 15,56,203,209
  1601. pshufd xmm0,xmm0,00eh
  1602. dec rdx
  1603. nop
  1604. DB 15,56,203,202
  1605. paddd xmm2,xmm10
  1606. paddd xmm1,xmm9
  1607. jnz $L$oop_shaext
  1608. pshufd xmm2,xmm2,0b1h
  1609. pshufd xmm7,xmm1,01bh
  1610. pshufd xmm1,xmm1,0b1h
  1611. punpckhqdq xmm1,xmm2
  1612. DB 102,15,58,15,215,8
  1613. movdqu XMMWORD PTR[rdi],xmm1
  1614. movdqu XMMWORD PTR[16+rdi],xmm2
  1615. movaps xmm6,XMMWORD PTR[((-8-80))+rax]
  1616. movaps xmm7,XMMWORD PTR[((-8-64))+rax]
  1617. movaps xmm8,XMMWORD PTR[((-8-48))+rax]
  1618. movaps xmm9,XMMWORD PTR[((-8-32))+rax]
  1619. movaps xmm10,XMMWORD PTR[((-8-16))+rax]
  1620. mov rsp,rax
  1621. $L$epilogue_shaext::
  1622. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1623. mov rsi,QWORD PTR[16+rsp]
  1624. DB 0F3h,0C3h ;repret
  1625. $L$SEH_end_sha256_block_data_order_shaext::
  1626. sha256_block_data_order_shaext ENDP
  1627. ALIGN 64
  1628. sha256_block_data_order_ssse3 PROC PRIVATE
  1629. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  1630. mov QWORD PTR[16+rsp],rsi
  1631. mov rax,rsp
  1632. $L$SEH_begin_sha256_block_data_order_ssse3::
  1633. mov rdi,rcx
  1634. mov rsi,rdx
  1635. mov rdx,r8
  1636. $L$ssse3_shortcut::
  1637. mov rax,rsp
  1638. push rbx
  1639. push rbp
  1640. push r12
  1641. push r13
  1642. push r14
  1643. push r15
  1644. shl rdx,4
  1645. sub rsp,160
  1646. lea rdx,QWORD PTR[rdx*4+rsi]
  1647. and rsp,-64
  1648. mov QWORD PTR[((64+0))+rsp],rdi
  1649. mov QWORD PTR[((64+8))+rsp],rsi
  1650. mov QWORD PTR[((64+16))+rsp],rdx
  1651. mov QWORD PTR[88+rsp],rax
  1652. movaps XMMWORD PTR[(64+32)+rsp],xmm6
  1653. movaps XMMWORD PTR[(64+48)+rsp],xmm7
  1654. movaps XMMWORD PTR[(64+64)+rsp],xmm8
  1655. movaps XMMWORD PTR[(64+80)+rsp],xmm9
  1656. $L$prologue_ssse3::
  1657. mov eax,DWORD PTR[rdi]
  1658. mov ebx,DWORD PTR[4+rdi]
  1659. mov ecx,DWORD PTR[8+rdi]
  1660. mov edx,DWORD PTR[12+rdi]
  1661. mov r8d,DWORD PTR[16+rdi]
  1662. mov r9d,DWORD PTR[20+rdi]
  1663. mov r10d,DWORD PTR[24+rdi]
  1664. mov r11d,DWORD PTR[28+rdi]
  1665. jmp $L$loop_ssse3
  1666. ALIGN 16
  1667. $L$loop_ssse3::
  1668. movdqa xmm7,XMMWORD PTR[((K256+512))]
  1669. movdqu xmm0,XMMWORD PTR[rsi]
  1670. movdqu xmm1,XMMWORD PTR[16+rsi]
  1671. movdqu xmm2,XMMWORD PTR[32+rsi]
  1672. DB 102,15,56,0,199
  1673. movdqu xmm3,XMMWORD PTR[48+rsi]
  1674. lea rbp,QWORD PTR[K256]
  1675. DB 102,15,56,0,207
  1676. movdqa xmm4,XMMWORD PTR[rbp]
  1677. movdqa xmm5,XMMWORD PTR[32+rbp]
  1678. DB 102,15,56,0,215
  1679. paddd xmm4,xmm0
  1680. movdqa xmm6,XMMWORD PTR[64+rbp]
  1681. DB 102,15,56,0,223
  1682. movdqa xmm7,XMMWORD PTR[96+rbp]
  1683. paddd xmm5,xmm1
  1684. paddd xmm6,xmm2
  1685. paddd xmm7,xmm3
  1686. movdqa XMMWORD PTR[rsp],xmm4
  1687. mov r14d,eax
  1688. movdqa XMMWORD PTR[16+rsp],xmm5
  1689. mov edi,ebx
  1690. movdqa XMMWORD PTR[32+rsp],xmm6
  1691. xor edi,ecx
  1692. movdqa XMMWORD PTR[48+rsp],xmm7
  1693. mov r13d,r8d
  1694. jmp $L$ssse3_00_47
  1695. ALIGN 16
  1696. $L$ssse3_00_47::
  1697. sub rbp,-128
  1698. ror r13d,14
  1699. movdqa xmm4,xmm1
  1700. mov eax,r14d
  1701. mov r12d,r9d
  1702. movdqa xmm7,xmm3
  1703. ror r14d,9
  1704. xor r13d,r8d
  1705. xor r12d,r10d
  1706. ror r13d,5
  1707. xor r14d,eax
  1708. DB 102,15,58,15,224,4
  1709. and r12d,r8d
  1710. xor r13d,r8d
  1711. DB 102,15,58,15,250,4
  1712. add r11d,DWORD PTR[rsp]
  1713. mov r15d,eax
  1714. xor r12d,r10d
  1715. ror r14d,11
  1716. movdqa xmm5,xmm4
  1717. xor r15d,ebx
  1718. add r11d,r12d
  1719. movdqa xmm6,xmm4
  1720. ror r13d,6
  1721. and edi,r15d
  1722. psrld xmm4,3
  1723. xor r14d,eax
  1724. add r11d,r13d
  1725. xor edi,ebx
  1726. paddd xmm0,xmm7
  1727. ror r14d,2
  1728. add edx,r11d
  1729. psrld xmm6,7
  1730. add r11d,edi
  1731. mov r13d,edx
  1732. pshufd xmm7,xmm3,250
  1733. add r14d,r11d
  1734. ror r13d,14
  1735. pslld xmm5,14
  1736. mov r11d,r14d
  1737. mov r12d,r8d
  1738. pxor xmm4,xmm6
  1739. ror r14d,9
  1740. xor r13d,edx
  1741. xor r12d,r9d
  1742. ror r13d,5
  1743. psrld xmm6,11
  1744. xor r14d,r11d
  1745. pxor xmm4,xmm5
  1746. and r12d,edx
  1747. xor r13d,edx
  1748. pslld xmm5,11
  1749. add r10d,DWORD PTR[4+rsp]
  1750. mov edi,r11d
  1751. pxor xmm4,xmm6
  1752. xor r12d,r9d
  1753. ror r14d,11
  1754. movdqa xmm6,xmm7
  1755. xor edi,eax
  1756. add r10d,r12d
  1757. pxor xmm4,xmm5
  1758. ror r13d,6
  1759. and r15d,edi
  1760. xor r14d,r11d
  1761. psrld xmm7,10
  1762. add r10d,r13d
  1763. xor r15d,eax
  1764. paddd xmm0,xmm4
  1765. ror r14d,2
  1766. add ecx,r10d
  1767. psrlq xmm6,17
  1768. add r10d,r15d
  1769. mov r13d,ecx
  1770. add r14d,r10d
  1771. pxor xmm7,xmm6
  1772. ror r13d,14
  1773. mov r10d,r14d
  1774. mov r12d,edx
  1775. ror r14d,9
  1776. psrlq xmm6,2
  1777. xor r13d,ecx
  1778. xor r12d,r8d
  1779. pxor xmm7,xmm6
  1780. ror r13d,5
  1781. xor r14d,r10d
  1782. and r12d,ecx
  1783. pshufd xmm7,xmm7,128
  1784. xor r13d,ecx
  1785. add r9d,DWORD PTR[8+rsp]
  1786. mov r15d,r10d
  1787. psrldq xmm7,8
  1788. xor r12d,r8d
  1789. ror r14d,11
  1790. xor r15d,r11d
  1791. add r9d,r12d
  1792. ror r13d,6
  1793. paddd xmm0,xmm7
  1794. and edi,r15d
  1795. xor r14d,r10d
  1796. add r9d,r13d
  1797. pshufd xmm7,xmm0,80
  1798. xor edi,r11d
  1799. ror r14d,2
  1800. add ebx,r9d
  1801. movdqa xmm6,xmm7
  1802. add r9d,edi
  1803. mov r13d,ebx
  1804. psrld xmm7,10
  1805. add r14d,r9d
  1806. ror r13d,14
  1807. psrlq xmm6,17
  1808. mov r9d,r14d
  1809. mov r12d,ecx
  1810. pxor xmm7,xmm6
  1811. ror r14d,9
  1812. xor r13d,ebx
  1813. xor r12d,edx
  1814. ror r13d,5
  1815. xor r14d,r9d
  1816. psrlq xmm6,2
  1817. and r12d,ebx
  1818. xor r13d,ebx
  1819. add r8d,DWORD PTR[12+rsp]
  1820. pxor xmm7,xmm6
  1821. mov edi,r9d
  1822. xor r12d,edx
  1823. ror r14d,11
  1824. pshufd xmm7,xmm7,8
  1825. xor edi,r10d
  1826. add r8d,r12d
  1827. movdqa xmm6,XMMWORD PTR[rbp]
  1828. ror r13d,6
  1829. and r15d,edi
  1830. pslldq xmm7,8
  1831. xor r14d,r9d
  1832. add r8d,r13d
  1833. xor r15d,r10d
  1834. paddd xmm0,xmm7
  1835. ror r14d,2
  1836. add eax,r8d
  1837. add r8d,r15d
  1838. paddd xmm6,xmm0
  1839. mov r13d,eax
  1840. add r14d,r8d
  1841. movdqa XMMWORD PTR[rsp],xmm6
  1842. ror r13d,14
  1843. movdqa xmm4,xmm2
  1844. mov r8d,r14d
  1845. mov r12d,ebx
  1846. movdqa xmm7,xmm0
  1847. ror r14d,9
  1848. xor r13d,eax
  1849. xor r12d,ecx
  1850. ror r13d,5
  1851. xor r14d,r8d
  1852. DB 102,15,58,15,225,4
  1853. and r12d,eax
  1854. xor r13d,eax
  1855. DB 102,15,58,15,251,4
  1856. add edx,DWORD PTR[16+rsp]
  1857. mov r15d,r8d
  1858. xor r12d,ecx
  1859. ror r14d,11
  1860. movdqa xmm5,xmm4
  1861. xor r15d,r9d
  1862. add edx,r12d
  1863. movdqa xmm6,xmm4
  1864. ror r13d,6
  1865. and edi,r15d
  1866. psrld xmm4,3
  1867. xor r14d,r8d
  1868. add edx,r13d
  1869. xor edi,r9d
  1870. paddd xmm1,xmm7
  1871. ror r14d,2
  1872. add r11d,edx
  1873. psrld xmm6,7
  1874. add edx,edi
  1875. mov r13d,r11d
  1876. pshufd xmm7,xmm0,250
  1877. add r14d,edx
  1878. ror r13d,14
  1879. pslld xmm5,14
  1880. mov edx,r14d
  1881. mov r12d,eax
  1882. pxor xmm4,xmm6
  1883. ror r14d,9
  1884. xor r13d,r11d
  1885. xor r12d,ebx
  1886. ror r13d,5
  1887. psrld xmm6,11
  1888. xor r14d,edx
  1889. pxor xmm4,xmm5
  1890. and r12d,r11d
  1891. xor r13d,r11d
  1892. pslld xmm5,11
  1893. add ecx,DWORD PTR[20+rsp]
  1894. mov edi,edx
  1895. pxor xmm4,xmm6
  1896. xor r12d,ebx
  1897. ror r14d,11
  1898. movdqa xmm6,xmm7
  1899. xor edi,r8d
  1900. add ecx,r12d
  1901. pxor xmm4,xmm5
  1902. ror r13d,6
  1903. and r15d,edi
  1904. xor r14d,edx
  1905. psrld xmm7,10
  1906. add ecx,r13d
  1907. xor r15d,r8d
  1908. paddd xmm1,xmm4
  1909. ror r14d,2
  1910. add r10d,ecx
  1911. psrlq xmm6,17
  1912. add ecx,r15d
  1913. mov r13d,r10d
  1914. add r14d,ecx
  1915. pxor xmm7,xmm6
  1916. ror r13d,14
  1917. mov ecx,r14d
  1918. mov r12d,r11d
  1919. ror r14d,9
  1920. psrlq xmm6,2
  1921. xor r13d,r10d
  1922. xor r12d,eax
  1923. pxor xmm7,xmm6
  1924. ror r13d,5
  1925. xor r14d,ecx
  1926. and r12d,r10d
  1927. pshufd xmm7,xmm7,128
  1928. xor r13d,r10d
  1929. add ebx,DWORD PTR[24+rsp]
  1930. mov r15d,ecx
  1931. psrldq xmm7,8
  1932. xor r12d,eax
  1933. ror r14d,11
  1934. xor r15d,edx
  1935. add ebx,r12d
  1936. ror r13d,6
  1937. paddd xmm1,xmm7
  1938. and edi,r15d
  1939. xor r14d,ecx
  1940. add ebx,r13d
  1941. pshufd xmm7,xmm1,80
  1942. xor edi,edx
  1943. ror r14d,2
  1944. add r9d,ebx
  1945. movdqa xmm6,xmm7
  1946. add ebx,edi
  1947. mov r13d,r9d
  1948. psrld xmm7,10
  1949. add r14d,ebx
  1950. ror r13d,14
  1951. psrlq xmm6,17
  1952. mov ebx,r14d
  1953. mov r12d,r10d
  1954. pxor xmm7,xmm6
  1955. ror r14d,9
  1956. xor r13d,r9d
  1957. xor r12d,r11d
  1958. ror r13d,5
  1959. xor r14d,ebx
  1960. psrlq xmm6,2
  1961. and r12d,r9d
  1962. xor r13d,r9d
  1963. add eax,DWORD PTR[28+rsp]
  1964. pxor xmm7,xmm6
  1965. mov edi,ebx
  1966. xor r12d,r11d
  1967. ror r14d,11
  1968. pshufd xmm7,xmm7,8
  1969. xor edi,ecx
  1970. add eax,r12d
  1971. movdqa xmm6,XMMWORD PTR[32+rbp]
  1972. ror r13d,6
  1973. and r15d,edi
  1974. pslldq xmm7,8
  1975. xor r14d,ebx
  1976. add eax,r13d
  1977. xor r15d,ecx
  1978. paddd xmm1,xmm7
  1979. ror r14d,2
  1980. add r8d,eax
  1981. add eax,r15d
  1982. paddd xmm6,xmm1
  1983. mov r13d,r8d
  1984. add r14d,eax
  1985. movdqa XMMWORD PTR[16+rsp],xmm6
  1986. ror r13d,14
  1987. movdqa xmm4,xmm3
  1988. mov eax,r14d
  1989. mov r12d,r9d
  1990. movdqa xmm7,xmm1
  1991. ror r14d,9
  1992. xor r13d,r8d
  1993. xor r12d,r10d
  1994. ror r13d,5
  1995. xor r14d,eax
  1996. DB 102,15,58,15,226,4
  1997. and r12d,r8d
  1998. xor r13d,r8d
  1999. DB 102,15,58,15,248,4
  2000. add r11d,DWORD PTR[32+rsp]
  2001. mov r15d,eax
  2002. xor r12d,r10d
  2003. ror r14d,11
  2004. movdqa xmm5,xmm4
  2005. xor r15d,ebx
  2006. add r11d,r12d
  2007. movdqa xmm6,xmm4
  2008. ror r13d,6
  2009. and edi,r15d
  2010. psrld xmm4,3
  2011. xor r14d,eax
  2012. add r11d,r13d
  2013. xor edi,ebx
  2014. paddd xmm2,xmm7
  2015. ror r14d,2
  2016. add edx,r11d
  2017. psrld xmm6,7
  2018. add r11d,edi
  2019. mov r13d,edx
  2020. pshufd xmm7,xmm1,250
  2021. add r14d,r11d
  2022. ror r13d,14
  2023. pslld xmm5,14
  2024. mov r11d,r14d
  2025. mov r12d,r8d
  2026. pxor xmm4,xmm6
  2027. ror r14d,9
  2028. xor r13d,edx
  2029. xor r12d,r9d
  2030. ror r13d,5
  2031. psrld xmm6,11
  2032. xor r14d,r11d
  2033. pxor xmm4,xmm5
  2034. and r12d,edx
  2035. xor r13d,edx
  2036. pslld xmm5,11
  2037. add r10d,DWORD PTR[36+rsp]
  2038. mov edi,r11d
  2039. pxor xmm4,xmm6
  2040. xor r12d,r9d
  2041. ror r14d,11
  2042. movdqa xmm6,xmm7
  2043. xor edi,eax
  2044. add r10d,r12d
  2045. pxor xmm4,xmm5
  2046. ror r13d,6
  2047. and r15d,edi
  2048. xor r14d,r11d
  2049. psrld xmm7,10
  2050. add r10d,r13d
  2051. xor r15d,eax
  2052. paddd xmm2,xmm4
  2053. ror r14d,2
  2054. add ecx,r10d
  2055. psrlq xmm6,17
  2056. add r10d,r15d
  2057. mov r13d,ecx
  2058. add r14d,r10d
  2059. pxor xmm7,xmm6
  2060. ror r13d,14
  2061. mov r10d,r14d
  2062. mov r12d,edx
  2063. ror r14d,9
  2064. psrlq xmm6,2
  2065. xor r13d,ecx
  2066. xor r12d,r8d
  2067. pxor xmm7,xmm6
  2068. ror r13d,5
  2069. xor r14d,r10d
  2070. and r12d,ecx
  2071. pshufd xmm7,xmm7,128
  2072. xor r13d,ecx
  2073. add r9d,DWORD PTR[40+rsp]
  2074. mov r15d,r10d
  2075. psrldq xmm7,8
  2076. xor r12d,r8d
  2077. ror r14d,11
  2078. xor r15d,r11d
  2079. add r9d,r12d
  2080. ror r13d,6
  2081. paddd xmm2,xmm7
  2082. and edi,r15d
  2083. xor r14d,r10d
  2084. add r9d,r13d
  2085. pshufd xmm7,xmm2,80
  2086. xor edi,r11d
  2087. ror r14d,2
  2088. add ebx,r9d
  2089. movdqa xmm6,xmm7
  2090. add r9d,edi
  2091. mov r13d,ebx
  2092. psrld xmm7,10
  2093. add r14d,r9d
  2094. ror r13d,14
  2095. psrlq xmm6,17
  2096. mov r9d,r14d
  2097. mov r12d,ecx
  2098. pxor xmm7,xmm6
  2099. ror r14d,9
  2100. xor r13d,ebx
  2101. xor r12d,edx
  2102. ror r13d,5
  2103. xor r14d,r9d
  2104. psrlq xmm6,2
  2105. and r12d,ebx
  2106. xor r13d,ebx
  2107. add r8d,DWORD PTR[44+rsp]
  2108. pxor xmm7,xmm6
  2109. mov edi,r9d
  2110. xor r12d,edx
  2111. ror r14d,11
  2112. pshufd xmm7,xmm7,8
  2113. xor edi,r10d
  2114. add r8d,r12d
  2115. movdqa xmm6,XMMWORD PTR[64+rbp]
  2116. ror r13d,6
  2117. and r15d,edi
  2118. pslldq xmm7,8
  2119. xor r14d,r9d
  2120. add r8d,r13d
  2121. xor r15d,r10d
  2122. paddd xmm2,xmm7
  2123. ror r14d,2
  2124. add eax,r8d
  2125. add r8d,r15d
  2126. paddd xmm6,xmm2
  2127. mov r13d,eax
  2128. add r14d,r8d
  2129. movdqa XMMWORD PTR[32+rsp],xmm6
  2130. ror r13d,14
  2131. movdqa xmm4,xmm0
  2132. mov r8d,r14d
  2133. mov r12d,ebx
  2134. movdqa xmm7,xmm2
  2135. ror r14d,9
  2136. xor r13d,eax
  2137. xor r12d,ecx
  2138. ror r13d,5
  2139. xor r14d,r8d
  2140. DB 102,15,58,15,227,4
  2141. and r12d,eax
  2142. xor r13d,eax
  2143. DB 102,15,58,15,249,4
  2144. add edx,DWORD PTR[48+rsp]
  2145. mov r15d,r8d
  2146. xor r12d,ecx
  2147. ror r14d,11
  2148. movdqa xmm5,xmm4
  2149. xor r15d,r9d
  2150. add edx,r12d
  2151. movdqa xmm6,xmm4
  2152. ror r13d,6
  2153. and edi,r15d
  2154. psrld xmm4,3
  2155. xor r14d,r8d
  2156. add edx,r13d
  2157. xor edi,r9d
  2158. paddd xmm3,xmm7
  2159. ror r14d,2
  2160. add r11d,edx
  2161. psrld xmm6,7
  2162. add edx,edi
  2163. mov r13d,r11d
  2164. pshufd xmm7,xmm2,250
  2165. add r14d,edx
  2166. ror r13d,14
  2167. pslld xmm5,14
  2168. mov edx,r14d
  2169. mov r12d,eax
  2170. pxor xmm4,xmm6
  2171. ror r14d,9
  2172. xor r13d,r11d
  2173. xor r12d,ebx
  2174. ror r13d,5
  2175. psrld xmm6,11
  2176. xor r14d,edx
  2177. pxor xmm4,xmm5
  2178. and r12d,r11d
  2179. xor r13d,r11d
  2180. pslld xmm5,11
  2181. add ecx,DWORD PTR[52+rsp]
  2182. mov edi,edx
  2183. pxor xmm4,xmm6
  2184. xor r12d,ebx
  2185. ror r14d,11
  2186. movdqa xmm6,xmm7
  2187. xor edi,r8d
  2188. add ecx,r12d
  2189. pxor xmm4,xmm5
  2190. ror r13d,6
  2191. and r15d,edi
  2192. xor r14d,edx
  2193. psrld xmm7,10
  2194. add ecx,r13d
  2195. xor r15d,r8d
  2196. paddd xmm3,xmm4
  2197. ror r14d,2
  2198. add r10d,ecx
  2199. psrlq xmm6,17
  2200. add ecx,r15d
  2201. mov r13d,r10d
  2202. add r14d,ecx
  2203. pxor xmm7,xmm6
  2204. ror r13d,14
  2205. mov ecx,r14d
  2206. mov r12d,r11d
  2207. ror r14d,9
  2208. psrlq xmm6,2
  2209. xor r13d,r10d
  2210. xor r12d,eax
  2211. pxor xmm7,xmm6
  2212. ror r13d,5
  2213. xor r14d,ecx
  2214. and r12d,r10d
  2215. pshufd xmm7,xmm7,128
  2216. xor r13d,r10d
  2217. add ebx,DWORD PTR[56+rsp]
  2218. mov r15d,ecx
  2219. psrldq xmm7,8
  2220. xor r12d,eax
  2221. ror r14d,11
  2222. xor r15d,edx
  2223. add ebx,r12d
  2224. ror r13d,6
  2225. paddd xmm3,xmm7
  2226. and edi,r15d
  2227. xor r14d,ecx
  2228. add ebx,r13d
  2229. pshufd xmm7,xmm3,80
  2230. xor edi,edx
  2231. ror r14d,2
  2232. add r9d,ebx
  2233. movdqa xmm6,xmm7
  2234. add ebx,edi
  2235. mov r13d,r9d
  2236. psrld xmm7,10
  2237. add r14d,ebx
  2238. ror r13d,14
  2239. psrlq xmm6,17
  2240. mov ebx,r14d
  2241. mov r12d,r10d
  2242. pxor xmm7,xmm6
  2243. ror r14d,9
  2244. xor r13d,r9d
  2245. xor r12d,r11d
  2246. ror r13d,5
  2247. xor r14d,ebx
  2248. psrlq xmm6,2
  2249. and r12d,r9d
  2250. xor r13d,r9d
  2251. add eax,DWORD PTR[60+rsp]
  2252. pxor xmm7,xmm6
  2253. mov edi,ebx
  2254. xor r12d,r11d
  2255. ror r14d,11
  2256. pshufd xmm7,xmm7,8
  2257. xor edi,ecx
  2258. add eax,r12d
  2259. movdqa xmm6,XMMWORD PTR[96+rbp]
  2260. ror r13d,6
  2261. and r15d,edi
  2262. pslldq xmm7,8
  2263. xor r14d,ebx
  2264. add eax,r13d
  2265. xor r15d,ecx
  2266. paddd xmm3,xmm7
  2267. ror r14d,2
  2268. add r8d,eax
  2269. add eax,r15d
  2270. paddd xmm6,xmm3
  2271. mov r13d,r8d
  2272. add r14d,eax
  2273. movdqa XMMWORD PTR[48+rsp],xmm6
  2274. cmp BYTE PTR[131+rbp],0
  2275. jne $L$ssse3_00_47
  2276. ror r13d,14
  2277. mov eax,r14d
  2278. mov r12d,r9d
  2279. ror r14d,9
  2280. xor r13d,r8d
  2281. xor r12d,r10d
  2282. ror r13d,5
  2283. xor r14d,eax
  2284. and r12d,r8d
  2285. xor r13d,r8d
  2286. add r11d,DWORD PTR[rsp]
  2287. mov r15d,eax
  2288. xor r12d,r10d
  2289. ror r14d,11
  2290. xor r15d,ebx
  2291. add r11d,r12d
  2292. ror r13d,6
  2293. and edi,r15d
  2294. xor r14d,eax
  2295. add r11d,r13d
  2296. xor edi,ebx
  2297. ror r14d,2
  2298. add edx,r11d
  2299. add r11d,edi
  2300. mov r13d,edx
  2301. add r14d,r11d
  2302. ror r13d,14
  2303. mov r11d,r14d
  2304. mov r12d,r8d
  2305. ror r14d,9
  2306. xor r13d,edx
  2307. xor r12d,r9d
  2308. ror r13d,5
  2309. xor r14d,r11d
  2310. and r12d,edx
  2311. xor r13d,edx
  2312. add r10d,DWORD PTR[4+rsp]
  2313. mov edi,r11d
  2314. xor r12d,r9d
  2315. ror r14d,11
  2316. xor edi,eax
  2317. add r10d,r12d
  2318. ror r13d,6
  2319. and r15d,edi
  2320. xor r14d,r11d
  2321. add r10d,r13d
  2322. xor r15d,eax
  2323. ror r14d,2
  2324. add ecx,r10d
  2325. add r10d,r15d
  2326. mov r13d,ecx
  2327. add r14d,r10d
  2328. ror r13d,14
  2329. mov r10d,r14d
  2330. mov r12d,edx
  2331. ror r14d,9
  2332. xor r13d,ecx
  2333. xor r12d,r8d
  2334. ror r13d,5
  2335. xor r14d,r10d
  2336. and r12d,ecx
  2337. xor r13d,ecx
  2338. add r9d,DWORD PTR[8+rsp]
  2339. mov r15d,r10d
  2340. xor r12d,r8d
  2341. ror r14d,11
  2342. xor r15d,r11d
  2343. add r9d,r12d
  2344. ror r13d,6
  2345. and edi,r15d
  2346. xor r14d,r10d
  2347. add r9d,r13d
  2348. xor edi,r11d
  2349. ror r14d,2
  2350. add ebx,r9d
  2351. add r9d,edi
  2352. mov r13d,ebx
  2353. add r14d,r9d
  2354. ror r13d,14
  2355. mov r9d,r14d
  2356. mov r12d,ecx
  2357. ror r14d,9
  2358. xor r13d,ebx
  2359. xor r12d,edx
  2360. ror r13d,5
  2361. xor r14d,r9d
  2362. and r12d,ebx
  2363. xor r13d,ebx
  2364. add r8d,DWORD PTR[12+rsp]
  2365. mov edi,r9d
  2366. xor r12d,edx
  2367. ror r14d,11
  2368. xor edi,r10d
  2369. add r8d,r12d
  2370. ror r13d,6
  2371. and r15d,edi
  2372. xor r14d,r9d
  2373. add r8d,r13d
  2374. xor r15d,r10d
  2375. ror r14d,2
  2376. add eax,r8d
  2377. add r8d,r15d
  2378. mov r13d,eax
  2379. add r14d,r8d
  2380. ror r13d,14
  2381. mov r8d,r14d
  2382. mov r12d,ebx
  2383. ror r14d,9
  2384. xor r13d,eax
  2385. xor r12d,ecx
  2386. ror r13d,5
  2387. xor r14d,r8d
  2388. and r12d,eax
  2389. xor r13d,eax
  2390. add edx,DWORD PTR[16+rsp]
  2391. mov r15d,r8d
  2392. xor r12d,ecx
  2393. ror r14d,11
  2394. xor r15d,r9d
  2395. add edx,r12d
  2396. ror r13d,6
  2397. and edi,r15d
  2398. xor r14d,r8d
  2399. add edx,r13d
  2400. xor edi,r9d
  2401. ror r14d,2
  2402. add r11d,edx
  2403. add edx,edi
  2404. mov r13d,r11d
  2405. add r14d,edx
  2406. ror r13d,14
  2407. mov edx,r14d
  2408. mov r12d,eax
  2409. ror r14d,9
  2410. xor r13d,r11d
  2411. xor r12d,ebx
  2412. ror r13d,5
  2413. xor r14d,edx
  2414. and r12d,r11d
  2415. xor r13d,r11d
  2416. add ecx,DWORD PTR[20+rsp]
  2417. mov edi,edx
  2418. xor r12d,ebx
  2419. ror r14d,11
  2420. xor edi,r8d
  2421. add ecx,r12d
  2422. ror r13d,6
  2423. and r15d,edi
  2424. xor r14d,edx
  2425. add ecx,r13d
  2426. xor r15d,r8d
  2427. ror r14d,2
  2428. add r10d,ecx
  2429. add ecx,r15d
  2430. mov r13d,r10d
  2431. add r14d,ecx
  2432. ror r13d,14
  2433. mov ecx,r14d
  2434. mov r12d,r11d
  2435. ror r14d,9
  2436. xor r13d,r10d
  2437. xor r12d,eax
  2438. ror r13d,5
  2439. xor r14d,ecx
  2440. and r12d,r10d
  2441. xor r13d,r10d
  2442. add ebx,DWORD PTR[24+rsp]
  2443. mov r15d,ecx
  2444. xor r12d,eax
  2445. ror r14d,11
  2446. xor r15d,edx
  2447. add ebx,r12d
  2448. ror r13d,6
  2449. and edi,r15d
  2450. xor r14d,ecx
  2451. add ebx,r13d
  2452. xor edi,edx
  2453. ror r14d,2
  2454. add r9d,ebx
  2455. add ebx,edi
  2456. mov r13d,r9d
  2457. add r14d,ebx
  2458. ror r13d,14
  2459. mov ebx,r14d
  2460. mov r12d,r10d
  2461. ror r14d,9
  2462. xor r13d,r9d
  2463. xor r12d,r11d
  2464. ror r13d,5
  2465. xor r14d,ebx
  2466. and r12d,r9d
  2467. xor r13d,r9d
  2468. add eax,DWORD PTR[28+rsp]
  2469. mov edi,ebx
  2470. xor r12d,r11d
  2471. ror r14d,11
  2472. xor edi,ecx
  2473. add eax,r12d
  2474. ror r13d,6
  2475. and r15d,edi
  2476. xor r14d,ebx
  2477. add eax,r13d
  2478. xor r15d,ecx
  2479. ror r14d,2
  2480. add r8d,eax
  2481. add eax,r15d
  2482. mov r13d,r8d
  2483. add r14d,eax
  2484. ror r13d,14
  2485. mov eax,r14d
  2486. mov r12d,r9d
  2487. ror r14d,9
  2488. xor r13d,r8d
  2489. xor r12d,r10d
  2490. ror r13d,5
  2491. xor r14d,eax
  2492. and r12d,r8d
  2493. xor r13d,r8d
  2494. add r11d,DWORD PTR[32+rsp]
  2495. mov r15d,eax
  2496. xor r12d,r10d
  2497. ror r14d,11
  2498. xor r15d,ebx
  2499. add r11d,r12d
  2500. ror r13d,6
  2501. and edi,r15d
  2502. xor r14d,eax
  2503. add r11d,r13d
  2504. xor edi,ebx
  2505. ror r14d,2
  2506. add edx,r11d
  2507. add r11d,edi
  2508. mov r13d,edx
  2509. add r14d,r11d
  2510. ror r13d,14
  2511. mov r11d,r14d
  2512. mov r12d,r8d
  2513. ror r14d,9
  2514. xor r13d,edx
  2515. xor r12d,r9d
  2516. ror r13d,5
  2517. xor r14d,r11d
  2518. and r12d,edx
  2519. xor r13d,edx
  2520. add r10d,DWORD PTR[36+rsp]
  2521. mov edi,r11d
  2522. xor r12d,r9d
  2523. ror r14d,11
  2524. xor edi,eax
  2525. add r10d,r12d
  2526. ror r13d,6
  2527. and r15d,edi
  2528. xor r14d,r11d
  2529. add r10d,r13d
  2530. xor r15d,eax
  2531. ror r14d,2
  2532. add ecx,r10d
  2533. add r10d,r15d
  2534. mov r13d,ecx
  2535. add r14d,r10d
  2536. ror r13d,14
  2537. mov r10d,r14d
  2538. mov r12d,edx
  2539. ror r14d,9
  2540. xor r13d,ecx
  2541. xor r12d,r8d
  2542. ror r13d,5
  2543. xor r14d,r10d
  2544. and r12d,ecx
  2545. xor r13d,ecx
  2546. add r9d,DWORD PTR[40+rsp]
  2547. mov r15d,r10d
  2548. xor r12d,r8d
  2549. ror r14d,11
  2550. xor r15d,r11d
  2551. add r9d,r12d
  2552. ror r13d,6
  2553. and edi,r15d
  2554. xor r14d,r10d
  2555. add r9d,r13d
  2556. xor edi,r11d
  2557. ror r14d,2
  2558. add ebx,r9d
  2559. add r9d,edi
  2560. mov r13d,ebx
  2561. add r14d,r9d
  2562. ror r13d,14
  2563. mov r9d,r14d
  2564. mov r12d,ecx
  2565. ror r14d,9
  2566. xor r13d,ebx
  2567. xor r12d,edx
  2568. ror r13d,5
  2569. xor r14d,r9d
  2570. and r12d,ebx
  2571. xor r13d,ebx
  2572. add r8d,DWORD PTR[44+rsp]
  2573. mov edi,r9d
  2574. xor r12d,edx
  2575. ror r14d,11
  2576. xor edi,r10d
  2577. add r8d,r12d
  2578. ror r13d,6
  2579. and r15d,edi
  2580. xor r14d,r9d
  2581. add r8d,r13d
  2582. xor r15d,r10d
  2583. ror r14d,2
  2584. add eax,r8d
  2585. add r8d,r15d
  2586. mov r13d,eax
  2587. add r14d,r8d
  2588. ror r13d,14
  2589. mov r8d,r14d
  2590. mov r12d,ebx
  2591. ror r14d,9
  2592. xor r13d,eax
  2593. xor r12d,ecx
  2594. ror r13d,5
  2595. xor r14d,r8d
  2596. and r12d,eax
  2597. xor r13d,eax
  2598. add edx,DWORD PTR[48+rsp]
  2599. mov r15d,r8d
  2600. xor r12d,ecx
  2601. ror r14d,11
  2602. xor r15d,r9d
  2603. add edx,r12d
  2604. ror r13d,6
  2605. and edi,r15d
  2606. xor r14d,r8d
  2607. add edx,r13d
  2608. xor edi,r9d
  2609. ror r14d,2
  2610. add r11d,edx
  2611. add edx,edi
  2612. mov r13d,r11d
  2613. add r14d,edx
  2614. ror r13d,14
  2615. mov edx,r14d
  2616. mov r12d,eax
  2617. ror r14d,9
  2618. xor r13d,r11d
  2619. xor r12d,ebx
  2620. ror r13d,5
  2621. xor r14d,edx
  2622. and r12d,r11d
  2623. xor r13d,r11d
  2624. add ecx,DWORD PTR[52+rsp]
  2625. mov edi,edx
  2626. xor r12d,ebx
  2627. ror r14d,11
  2628. xor edi,r8d
  2629. add ecx,r12d
  2630. ror r13d,6
  2631. and r15d,edi
  2632. xor r14d,edx
  2633. add ecx,r13d
  2634. xor r15d,r8d
  2635. ror r14d,2
  2636. add r10d,ecx
  2637. add ecx,r15d
  2638. mov r13d,r10d
  2639. add r14d,ecx
  2640. ror r13d,14
  2641. mov ecx,r14d
  2642. mov r12d,r11d
  2643. ror r14d,9
  2644. xor r13d,r10d
  2645. xor r12d,eax
  2646. ror r13d,5
  2647. xor r14d,ecx
  2648. and r12d,r10d
  2649. xor r13d,r10d
  2650. add ebx,DWORD PTR[56+rsp]
  2651. mov r15d,ecx
  2652. xor r12d,eax
  2653. ror r14d,11
  2654. xor r15d,edx
  2655. add ebx,r12d
  2656. ror r13d,6
  2657. and edi,r15d
  2658. xor r14d,ecx
  2659. add ebx,r13d
  2660. xor edi,edx
  2661. ror r14d,2
  2662. add r9d,ebx
  2663. add ebx,edi
  2664. mov r13d,r9d
  2665. add r14d,ebx
  2666. ror r13d,14
  2667. mov ebx,r14d
  2668. mov r12d,r10d
  2669. ror r14d,9
  2670. xor r13d,r9d
  2671. xor r12d,r11d
  2672. ror r13d,5
  2673. xor r14d,ebx
  2674. and r12d,r9d
  2675. xor r13d,r9d
  2676. add eax,DWORD PTR[60+rsp]
  2677. mov edi,ebx
  2678. xor r12d,r11d
  2679. ror r14d,11
  2680. xor edi,ecx
  2681. add eax,r12d
  2682. ror r13d,6
  2683. and r15d,edi
  2684. xor r14d,ebx
  2685. add eax,r13d
  2686. xor r15d,ecx
  2687. ror r14d,2
  2688. add r8d,eax
  2689. add eax,r15d
  2690. mov r13d,r8d
  2691. add r14d,eax
  2692. mov rdi,QWORD PTR[((64+0))+rsp]
  2693. mov eax,r14d
  2694. add eax,DWORD PTR[rdi]
  2695. lea rsi,QWORD PTR[64+rsi]
  2696. add ebx,DWORD PTR[4+rdi]
  2697. add ecx,DWORD PTR[8+rdi]
  2698. add edx,DWORD PTR[12+rdi]
  2699. add r8d,DWORD PTR[16+rdi]
  2700. add r9d,DWORD PTR[20+rdi]
  2701. add r10d,DWORD PTR[24+rdi]
  2702. add r11d,DWORD PTR[28+rdi]
  2703. cmp rsi,QWORD PTR[((64+16))+rsp]
  2704. mov DWORD PTR[rdi],eax
  2705. mov DWORD PTR[4+rdi],ebx
  2706. mov DWORD PTR[8+rdi],ecx
  2707. mov DWORD PTR[12+rdi],edx
  2708. mov DWORD PTR[16+rdi],r8d
  2709. mov DWORD PTR[20+rdi],r9d
  2710. mov DWORD PTR[24+rdi],r10d
  2711. mov DWORD PTR[28+rdi],r11d
  2712. jb $L$loop_ssse3
  2713. mov rsi,QWORD PTR[88+rsp]
  2714. movaps xmm6,XMMWORD PTR[((64+32))+rsp]
  2715. movaps xmm7,XMMWORD PTR[((64+48))+rsp]
  2716. movaps xmm8,XMMWORD PTR[((64+64))+rsp]
  2717. movaps xmm9,XMMWORD PTR[((64+80))+rsp]
  2718. mov r15,QWORD PTR[((-48))+rsi]
  2719. mov r14,QWORD PTR[((-40))+rsi]
  2720. mov r13,QWORD PTR[((-32))+rsi]
  2721. mov r12,QWORD PTR[((-24))+rsi]
  2722. mov rbp,QWORD PTR[((-16))+rsi]
  2723. mov rbx,QWORD PTR[((-8))+rsi]
  2724. lea rsp,QWORD PTR[rsi]
  2725. $L$epilogue_ssse3::
  2726. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  2727. mov rsi,QWORD PTR[16+rsp]
  2728. DB 0F3h,0C3h ;repret
  2729. $L$SEH_end_sha256_block_data_order_ssse3::
  2730. sha256_block_data_order_ssse3 ENDP
  2731. ALIGN 64
  2732. sha256_block_data_order_avx PROC PRIVATE
  2733. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  2734. mov QWORD PTR[16+rsp],rsi
  2735. mov rax,rsp
  2736. $L$SEH_begin_sha256_block_data_order_avx::
  2737. mov rdi,rcx
  2738. mov rsi,rdx
  2739. mov rdx,r8
  2740. $L$avx_shortcut::
  2741. mov rax,rsp
  2742. push rbx
  2743. push rbp
  2744. push r12
  2745. push r13
  2746. push r14
  2747. push r15
  2748. shl rdx,4
  2749. sub rsp,160
  2750. lea rdx,QWORD PTR[rdx*4+rsi]
  2751. and rsp,-64
  2752. mov QWORD PTR[((64+0))+rsp],rdi
  2753. mov QWORD PTR[((64+8))+rsp],rsi
  2754. mov QWORD PTR[((64+16))+rsp],rdx
  2755. mov QWORD PTR[88+rsp],rax
  2756. movaps XMMWORD PTR[(64+32)+rsp],xmm6
  2757. movaps XMMWORD PTR[(64+48)+rsp],xmm7
  2758. movaps XMMWORD PTR[(64+64)+rsp],xmm8
  2759. movaps XMMWORD PTR[(64+80)+rsp],xmm9
  2760. $L$prologue_avx::
  2761. vzeroupper
  2762. mov eax,DWORD PTR[rdi]
  2763. mov ebx,DWORD PTR[4+rdi]
  2764. mov ecx,DWORD PTR[8+rdi]
  2765. mov edx,DWORD PTR[12+rdi]
  2766. mov r8d,DWORD PTR[16+rdi]
  2767. mov r9d,DWORD PTR[20+rdi]
  2768. mov r10d,DWORD PTR[24+rdi]
  2769. mov r11d,DWORD PTR[28+rdi]
  2770. vmovdqa xmm8,XMMWORD PTR[((K256+512+32))]
  2771. vmovdqa xmm9,XMMWORD PTR[((K256+512+64))]
  2772. jmp $L$loop_avx
  2773. ALIGN 16
  2774. $L$loop_avx::
  2775. vmovdqa xmm7,XMMWORD PTR[((K256+512))]
  2776. vmovdqu xmm0,XMMWORD PTR[rsi]
  2777. vmovdqu xmm1,XMMWORD PTR[16+rsi]
  2778. vmovdqu xmm2,XMMWORD PTR[32+rsi]
  2779. vmovdqu xmm3,XMMWORD PTR[48+rsi]
  2780. vpshufb xmm0,xmm0,xmm7
  2781. lea rbp,QWORD PTR[K256]
  2782. vpshufb xmm1,xmm1,xmm7
  2783. vpshufb xmm2,xmm2,xmm7
  2784. vpaddd xmm4,xmm0,XMMWORD PTR[rbp]
  2785. vpshufb xmm3,xmm3,xmm7
  2786. vpaddd xmm5,xmm1,XMMWORD PTR[32+rbp]
  2787. vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp]
  2788. vpaddd xmm7,xmm3,XMMWORD PTR[96+rbp]
  2789. vmovdqa XMMWORD PTR[rsp],xmm4
  2790. mov r14d,eax
  2791. vmovdqa XMMWORD PTR[16+rsp],xmm5
  2792. mov edi,ebx
  2793. vmovdqa XMMWORD PTR[32+rsp],xmm6
  2794. xor edi,ecx
  2795. vmovdqa XMMWORD PTR[48+rsp],xmm7
  2796. mov r13d,r8d
  2797. jmp $L$avx_00_47
  2798. ALIGN 16
  2799. $L$avx_00_47::
  2800. sub rbp,-128
  2801. vpalignr xmm4,xmm1,xmm0,4
  2802. shrd r13d,r13d,14
  2803. mov eax,r14d
  2804. mov r12d,r9d
  2805. vpalignr xmm7,xmm3,xmm2,4
  2806. shrd r14d,r14d,9
  2807. xor r13d,r8d
  2808. xor r12d,r10d
  2809. vpsrld xmm6,xmm4,7
  2810. shrd r13d,r13d,5
  2811. xor r14d,eax
  2812. and r12d,r8d
  2813. vpaddd xmm0,xmm0,xmm7
  2814. xor r13d,r8d
  2815. add r11d,DWORD PTR[rsp]
  2816. mov r15d,eax
  2817. vpsrld xmm7,xmm4,3
  2818. xor r12d,r10d
  2819. shrd r14d,r14d,11
  2820. xor r15d,ebx
  2821. vpslld xmm5,xmm4,14
  2822. add r11d,r12d
  2823. shrd r13d,r13d,6
  2824. and edi,r15d
  2825. vpxor xmm4,xmm7,xmm6
  2826. xor r14d,eax
  2827. add r11d,r13d
  2828. xor edi,ebx
  2829. vpshufd xmm7,xmm3,250
  2830. shrd r14d,r14d,2
  2831. add edx,r11d
  2832. add r11d,edi
  2833. vpsrld xmm6,xmm6,11
  2834. mov r13d,edx
  2835. add r14d,r11d
  2836. shrd r13d,r13d,14
  2837. vpxor xmm4,xmm4,xmm5
  2838. mov r11d,r14d
  2839. mov r12d,r8d
  2840. shrd r14d,r14d,9
  2841. vpslld xmm5,xmm5,11
  2842. xor r13d,edx
  2843. xor r12d,r9d
  2844. shrd r13d,r13d,5
  2845. vpxor xmm4,xmm4,xmm6
  2846. xor r14d,r11d
  2847. and r12d,edx
  2848. xor r13d,edx
  2849. vpsrld xmm6,xmm7,10
  2850. add r10d,DWORD PTR[4+rsp]
  2851. mov edi,r11d
  2852. xor r12d,r9d
  2853. vpxor xmm4,xmm4,xmm5
  2854. shrd r14d,r14d,11
  2855. xor edi,eax
  2856. add r10d,r12d
  2857. vpsrlq xmm7,xmm7,17
  2858. shrd r13d,r13d,6
  2859. and r15d,edi
  2860. xor r14d,r11d
  2861. vpaddd xmm0,xmm0,xmm4
  2862. add r10d,r13d
  2863. xor r15d,eax
  2864. shrd r14d,r14d,2
  2865. vpxor xmm6,xmm6,xmm7
  2866. add ecx,r10d
  2867. add r10d,r15d
  2868. mov r13d,ecx
  2869. vpsrlq xmm7,xmm7,2
  2870. add r14d,r10d
  2871. shrd r13d,r13d,14
  2872. mov r10d,r14d
  2873. vpxor xmm6,xmm6,xmm7
  2874. mov r12d,edx
  2875. shrd r14d,r14d,9
  2876. xor r13d,ecx
  2877. vpshufb xmm6,xmm6,xmm8
  2878. xor r12d,r8d
  2879. shrd r13d,r13d,5
  2880. xor r14d,r10d
  2881. vpaddd xmm0,xmm0,xmm6
  2882. and r12d,ecx
  2883. xor r13d,ecx
  2884. add r9d,DWORD PTR[8+rsp]
  2885. vpshufd xmm7,xmm0,80
  2886. mov r15d,r10d
  2887. xor r12d,r8d
  2888. shrd r14d,r14d,11
  2889. vpsrld xmm6,xmm7,10
  2890. xor r15d,r11d
  2891. add r9d,r12d
  2892. shrd r13d,r13d,6
  2893. vpsrlq xmm7,xmm7,17
  2894. and edi,r15d
  2895. xor r14d,r10d
  2896. add r9d,r13d
  2897. vpxor xmm6,xmm6,xmm7
  2898. xor edi,r11d
  2899. shrd r14d,r14d,2
  2900. add ebx,r9d
  2901. vpsrlq xmm7,xmm7,2
  2902. add r9d,edi
  2903. mov r13d,ebx
  2904. add r14d,r9d
  2905. vpxor xmm6,xmm6,xmm7
  2906. shrd r13d,r13d,14
  2907. mov r9d,r14d
  2908. mov r12d,ecx
  2909. vpshufb xmm6,xmm6,xmm9
  2910. shrd r14d,r14d,9
  2911. xor r13d,ebx
  2912. xor r12d,edx
  2913. vpaddd xmm0,xmm0,xmm6
  2914. shrd r13d,r13d,5
  2915. xor r14d,r9d
  2916. and r12d,ebx
  2917. vpaddd xmm6,xmm0,XMMWORD PTR[rbp]
  2918. xor r13d,ebx
  2919. add r8d,DWORD PTR[12+rsp]
  2920. mov edi,r9d
  2921. xor r12d,edx
  2922. shrd r14d,r14d,11
  2923. xor edi,r10d
  2924. add r8d,r12d
  2925. shrd r13d,r13d,6
  2926. and r15d,edi
  2927. xor r14d,r9d
  2928. add r8d,r13d
  2929. xor r15d,r10d
  2930. shrd r14d,r14d,2
  2931. add eax,r8d
  2932. add r8d,r15d
  2933. mov r13d,eax
  2934. add r14d,r8d
  2935. vmovdqa XMMWORD PTR[rsp],xmm6
  2936. vpalignr xmm4,xmm2,xmm1,4
  2937. shrd r13d,r13d,14
  2938. mov r8d,r14d
  2939. mov r12d,ebx
  2940. vpalignr xmm7,xmm0,xmm3,4
  2941. shrd r14d,r14d,9
  2942. xor r13d,eax
  2943. xor r12d,ecx
  2944. vpsrld xmm6,xmm4,7
  2945. shrd r13d,r13d,5
  2946. xor r14d,r8d
  2947. and r12d,eax
  2948. vpaddd xmm1,xmm1,xmm7
  2949. xor r13d,eax
  2950. add edx,DWORD PTR[16+rsp]
  2951. mov r15d,r8d
  2952. vpsrld xmm7,xmm4,3
  2953. xor r12d,ecx
  2954. shrd r14d,r14d,11
  2955. xor r15d,r9d
  2956. vpslld xmm5,xmm4,14
  2957. add edx,r12d
  2958. shrd r13d,r13d,6
  2959. and edi,r15d
  2960. vpxor xmm4,xmm7,xmm6
  2961. xor r14d,r8d
  2962. add edx,r13d
  2963. xor edi,r9d
  2964. vpshufd xmm7,xmm0,250
  2965. shrd r14d,r14d,2
  2966. add r11d,edx
  2967. add edx,edi
  2968. vpsrld xmm6,xmm6,11
  2969. mov r13d,r11d
  2970. add r14d,edx
  2971. shrd r13d,r13d,14
  2972. vpxor xmm4,xmm4,xmm5
  2973. mov edx,r14d
  2974. mov r12d,eax
  2975. shrd r14d,r14d,9
  2976. vpslld xmm5,xmm5,11
  2977. xor r13d,r11d
  2978. xor r12d,ebx
  2979. shrd r13d,r13d,5
  2980. vpxor xmm4,xmm4,xmm6
  2981. xor r14d,edx
  2982. and r12d,r11d
  2983. xor r13d,r11d
  2984. vpsrld xmm6,xmm7,10
  2985. add ecx,DWORD PTR[20+rsp]
  2986. mov edi,edx
  2987. xor r12d,ebx
  2988. vpxor xmm4,xmm4,xmm5
  2989. shrd r14d,r14d,11
  2990. xor edi,r8d
  2991. add ecx,r12d
  2992. vpsrlq xmm7,xmm7,17
  2993. shrd r13d,r13d,6
  2994. and r15d,edi
  2995. xor r14d,edx
  2996. vpaddd xmm1,xmm1,xmm4
  2997. add ecx,r13d
  2998. xor r15d,r8d
  2999. shrd r14d,r14d,2
  3000. vpxor xmm6,xmm6,xmm7
  3001. add r10d,ecx
  3002. add ecx,r15d
  3003. mov r13d,r10d
  3004. vpsrlq xmm7,xmm7,2
  3005. add r14d,ecx
  3006. shrd r13d,r13d,14
  3007. mov ecx,r14d
  3008. vpxor xmm6,xmm6,xmm7
  3009. mov r12d,r11d
  3010. shrd r14d,r14d,9
  3011. xor r13d,r10d
  3012. vpshufb xmm6,xmm6,xmm8
  3013. xor r12d,eax
  3014. shrd r13d,r13d,5
  3015. xor r14d,ecx
  3016. vpaddd xmm1,xmm1,xmm6
  3017. and r12d,r10d
  3018. xor r13d,r10d
  3019. add ebx,DWORD PTR[24+rsp]
  3020. vpshufd xmm7,xmm1,80
  3021. mov r15d,ecx
  3022. xor r12d,eax
  3023. shrd r14d,r14d,11
  3024. vpsrld xmm6,xmm7,10
  3025. xor r15d,edx
  3026. add ebx,r12d
  3027. shrd r13d,r13d,6
  3028. vpsrlq xmm7,xmm7,17
  3029. and edi,r15d
  3030. xor r14d,ecx
  3031. add ebx,r13d
  3032. vpxor xmm6,xmm6,xmm7
  3033. xor edi,edx
  3034. shrd r14d,r14d,2
  3035. add r9d,ebx
  3036. vpsrlq xmm7,xmm7,2
  3037. add ebx,edi
  3038. mov r13d,r9d
  3039. add r14d,ebx
  3040. vpxor xmm6,xmm6,xmm7
  3041. shrd r13d,r13d,14
  3042. mov ebx,r14d
  3043. mov r12d,r10d
  3044. vpshufb xmm6,xmm6,xmm9
  3045. shrd r14d,r14d,9
  3046. xor r13d,r9d
  3047. xor r12d,r11d
  3048. vpaddd xmm1,xmm1,xmm6
  3049. shrd r13d,r13d,5
  3050. xor r14d,ebx
  3051. and r12d,r9d
  3052. vpaddd xmm6,xmm1,XMMWORD PTR[32+rbp]
  3053. xor r13d,r9d
  3054. add eax,DWORD PTR[28+rsp]
  3055. mov edi,ebx
  3056. xor r12d,r11d
  3057. shrd r14d,r14d,11
  3058. xor edi,ecx
  3059. add eax,r12d
  3060. shrd r13d,r13d,6
  3061. and r15d,edi
  3062. xor r14d,ebx
  3063. add eax,r13d
  3064. xor r15d,ecx
  3065. shrd r14d,r14d,2
  3066. add r8d,eax
  3067. add eax,r15d
  3068. mov r13d,r8d
  3069. add r14d,eax
  3070. vmovdqa XMMWORD PTR[16+rsp],xmm6
  3071. vpalignr xmm4,xmm3,xmm2,4
  3072. shrd r13d,r13d,14
  3073. mov eax,r14d
  3074. mov r12d,r9d
  3075. vpalignr xmm7,xmm1,xmm0,4
  3076. shrd r14d,r14d,9
  3077. xor r13d,r8d
  3078. xor r12d,r10d
  3079. vpsrld xmm6,xmm4,7
  3080. shrd r13d,r13d,5
  3081. xor r14d,eax
  3082. and r12d,r8d
  3083. vpaddd xmm2,xmm2,xmm7
  3084. xor r13d,r8d
  3085. add r11d,DWORD PTR[32+rsp]
  3086. mov r15d,eax
  3087. vpsrld xmm7,xmm4,3
  3088. xor r12d,r10d
  3089. shrd r14d,r14d,11
  3090. xor r15d,ebx
  3091. vpslld xmm5,xmm4,14
  3092. add r11d,r12d
  3093. shrd r13d,r13d,6
  3094. and edi,r15d
  3095. vpxor xmm4,xmm7,xmm6
  3096. xor r14d,eax
  3097. add r11d,r13d
  3098. xor edi,ebx
  3099. vpshufd xmm7,xmm1,250
  3100. shrd r14d,r14d,2
  3101. add edx,r11d
  3102. add r11d,edi
  3103. vpsrld xmm6,xmm6,11
  3104. mov r13d,edx
  3105. add r14d,r11d
  3106. shrd r13d,r13d,14
  3107. vpxor xmm4,xmm4,xmm5
  3108. mov r11d,r14d
  3109. mov r12d,r8d
  3110. shrd r14d,r14d,9
  3111. vpslld xmm5,xmm5,11
  3112. xor r13d,edx
  3113. xor r12d,r9d
  3114. shrd r13d,r13d,5
  3115. vpxor xmm4,xmm4,xmm6
  3116. xor r14d,r11d
  3117. and r12d,edx
  3118. xor r13d,edx
  3119. vpsrld xmm6,xmm7,10
  3120. add r10d,DWORD PTR[36+rsp]
  3121. mov edi,r11d
  3122. xor r12d,r9d
  3123. vpxor xmm4,xmm4,xmm5
  3124. shrd r14d,r14d,11
  3125. xor edi,eax
  3126. add r10d,r12d
  3127. vpsrlq xmm7,xmm7,17
  3128. shrd r13d,r13d,6
  3129. and r15d,edi
  3130. xor r14d,r11d
  3131. vpaddd xmm2,xmm2,xmm4
  3132. add r10d,r13d
  3133. xor r15d,eax
  3134. shrd r14d,r14d,2
  3135. vpxor xmm6,xmm6,xmm7
  3136. add ecx,r10d
  3137. add r10d,r15d
  3138. mov r13d,ecx
  3139. vpsrlq xmm7,xmm7,2
  3140. add r14d,r10d
  3141. shrd r13d,r13d,14
  3142. mov r10d,r14d
  3143. vpxor xmm6,xmm6,xmm7
  3144. mov r12d,edx
  3145. shrd r14d,r14d,9
  3146. xor r13d,ecx
  3147. vpshufb xmm6,xmm6,xmm8
  3148. xor r12d,r8d
  3149. shrd r13d,r13d,5
  3150. xor r14d,r10d
  3151. vpaddd xmm2,xmm2,xmm6
  3152. and r12d,ecx
  3153. xor r13d,ecx
  3154. add r9d,DWORD PTR[40+rsp]
  3155. vpshufd xmm7,xmm2,80
  3156. mov r15d,r10d
  3157. xor r12d,r8d
  3158. shrd r14d,r14d,11
  3159. vpsrld xmm6,xmm7,10
  3160. xor r15d,r11d
  3161. add r9d,r12d
  3162. shrd r13d,r13d,6
  3163. vpsrlq xmm7,xmm7,17
  3164. and edi,r15d
  3165. xor r14d,r10d
  3166. add r9d,r13d
  3167. vpxor xmm6,xmm6,xmm7
  3168. xor edi,r11d
  3169. shrd r14d,r14d,2
  3170. add ebx,r9d
  3171. vpsrlq xmm7,xmm7,2
  3172. add r9d,edi
  3173. mov r13d,ebx
  3174. add r14d,r9d
  3175. vpxor xmm6,xmm6,xmm7
  3176. shrd r13d,r13d,14
  3177. mov r9d,r14d
  3178. mov r12d,ecx
  3179. vpshufb xmm6,xmm6,xmm9
  3180. shrd r14d,r14d,9
  3181. xor r13d,ebx
  3182. xor r12d,edx
  3183. vpaddd xmm2,xmm2,xmm6
  3184. shrd r13d,r13d,5
  3185. xor r14d,r9d
  3186. and r12d,ebx
  3187. vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp]
  3188. xor r13d,ebx
  3189. add r8d,DWORD PTR[44+rsp]
  3190. mov edi,r9d
  3191. xor r12d,edx
  3192. shrd r14d,r14d,11
  3193. xor edi,r10d
  3194. add r8d,r12d
  3195. shrd r13d,r13d,6
  3196. and r15d,edi
  3197. xor r14d,r9d
  3198. add r8d,r13d
  3199. xor r15d,r10d
  3200. shrd r14d,r14d,2
  3201. add eax,r8d
  3202. add r8d,r15d
  3203. mov r13d,eax
  3204. add r14d,r8d
  3205. vmovdqa XMMWORD PTR[32+rsp],xmm6
  3206. vpalignr xmm4,xmm0,xmm3,4
  3207. shrd r13d,r13d,14
  3208. mov r8d,r14d
  3209. mov r12d,ebx
  3210. vpalignr xmm7,xmm2,xmm1,4
  3211. shrd r14d,r14d,9
  3212. xor r13d,eax
  3213. xor r12d,ecx
  3214. vpsrld xmm6,xmm4,7
  3215. shrd r13d,r13d,5
  3216. xor r14d,r8d
  3217. and r12d,eax
  3218. vpaddd xmm3,xmm3,xmm7
  3219. xor r13d,eax
  3220. add edx,DWORD PTR[48+rsp]
  3221. mov r15d,r8d
  3222. vpsrld xmm7,xmm4,3
  3223. xor r12d,ecx
  3224. shrd r14d,r14d,11
  3225. xor r15d,r9d
  3226. vpslld xmm5,xmm4,14
  3227. add edx,r12d
  3228. shrd r13d,r13d,6
  3229. and edi,r15d
  3230. vpxor xmm4,xmm7,xmm6
  3231. xor r14d,r8d
  3232. add edx,r13d
  3233. xor edi,r9d
  3234. vpshufd xmm7,xmm2,250
  3235. shrd r14d,r14d,2
  3236. add r11d,edx
  3237. add edx,edi
  3238. vpsrld xmm6,xmm6,11
  3239. mov r13d,r11d
  3240. add r14d,edx
  3241. shrd r13d,r13d,14
  3242. vpxor xmm4,xmm4,xmm5
  3243. mov edx,r14d
  3244. mov r12d,eax
  3245. shrd r14d,r14d,9
  3246. vpslld xmm5,xmm5,11
  3247. xor r13d,r11d
  3248. xor r12d,ebx
  3249. shrd r13d,r13d,5
  3250. vpxor xmm4,xmm4,xmm6
  3251. xor r14d,edx
  3252. and r12d,r11d
  3253. xor r13d,r11d
  3254. vpsrld xmm6,xmm7,10
  3255. add ecx,DWORD PTR[52+rsp]
  3256. mov edi,edx
  3257. xor r12d,ebx
  3258. vpxor xmm4,xmm4,xmm5
  3259. shrd r14d,r14d,11
  3260. xor edi,r8d
  3261. add ecx,r12d
  3262. vpsrlq xmm7,xmm7,17
  3263. shrd r13d,r13d,6
  3264. and r15d,edi
  3265. xor r14d,edx
  3266. vpaddd xmm3,xmm3,xmm4
  3267. add ecx,r13d
  3268. xor r15d,r8d
  3269. shrd r14d,r14d,2
  3270. vpxor xmm6,xmm6,xmm7
  3271. add r10d,ecx
  3272. add ecx,r15d
  3273. mov r13d,r10d
  3274. vpsrlq xmm7,xmm7,2
  3275. add r14d,ecx
  3276. shrd r13d,r13d,14
  3277. mov ecx,r14d
  3278. vpxor xmm6,xmm6,xmm7
  3279. mov r12d,r11d
  3280. shrd r14d,r14d,9
  3281. xor r13d,r10d
  3282. vpshufb xmm6,xmm6,xmm8
  3283. xor r12d,eax
  3284. shrd r13d,r13d,5
  3285. xor r14d,ecx
  3286. vpaddd xmm3,xmm3,xmm6
  3287. and r12d,r10d
  3288. xor r13d,r10d
  3289. add ebx,DWORD PTR[56+rsp]
  3290. vpshufd xmm7,xmm3,80
  3291. mov r15d,ecx
  3292. xor r12d,eax
  3293. shrd r14d,r14d,11
  3294. vpsrld xmm6,xmm7,10
  3295. xor r15d,edx
  3296. add ebx,r12d
  3297. shrd r13d,r13d,6
  3298. vpsrlq xmm7,xmm7,17
  3299. and edi,r15d
  3300. xor r14d,ecx
  3301. add ebx,r13d
  3302. vpxor xmm6,xmm6,xmm7
  3303. xor edi,edx
  3304. shrd r14d,r14d,2
  3305. add r9d,ebx
  3306. vpsrlq xmm7,xmm7,2
  3307. add ebx,edi
  3308. mov r13d,r9d
  3309. add r14d,ebx
  3310. vpxor xmm6,xmm6,xmm7
  3311. shrd r13d,r13d,14
  3312. mov ebx,r14d
  3313. mov r12d,r10d
  3314. vpshufb xmm6,xmm6,xmm9
  3315. shrd r14d,r14d,9
  3316. xor r13d,r9d
  3317. xor r12d,r11d
  3318. vpaddd xmm3,xmm3,xmm6
  3319. shrd r13d,r13d,5
  3320. xor r14d,ebx
  3321. and r12d,r9d
  3322. vpaddd xmm6,xmm3,XMMWORD PTR[96+rbp]
  3323. xor r13d,r9d
  3324. add eax,DWORD PTR[60+rsp]
  3325. mov edi,ebx
  3326. xor r12d,r11d
  3327. shrd r14d,r14d,11
  3328. xor edi,ecx
  3329. add eax,r12d
  3330. shrd r13d,r13d,6
  3331. and r15d,edi
  3332. xor r14d,ebx
  3333. add eax,r13d
  3334. xor r15d,ecx
  3335. shrd r14d,r14d,2
  3336. add r8d,eax
  3337. add eax,r15d
  3338. mov r13d,r8d
  3339. add r14d,eax
  3340. vmovdqa XMMWORD PTR[48+rsp],xmm6
  3341. cmp BYTE PTR[131+rbp],0
  3342. jne $L$avx_00_47
  3343. shrd r13d,r13d,14
  3344. mov eax,r14d
  3345. mov r12d,r9d
  3346. shrd r14d,r14d,9
  3347. xor r13d,r8d
  3348. xor r12d,r10d
  3349. shrd r13d,r13d,5
  3350. xor r14d,eax
  3351. and r12d,r8d
  3352. xor r13d,r8d
  3353. add r11d,DWORD PTR[rsp]
  3354. mov r15d,eax
  3355. xor r12d,r10d
  3356. shrd r14d,r14d,11
  3357. xor r15d,ebx
  3358. add r11d,r12d
  3359. shrd r13d,r13d,6
  3360. and edi,r15d
  3361. xor r14d,eax
  3362. add r11d,r13d
  3363. xor edi,ebx
  3364. shrd r14d,r14d,2
  3365. add edx,r11d
  3366. add r11d,edi
  3367. mov r13d,edx
  3368. add r14d,r11d
  3369. shrd r13d,r13d,14
  3370. mov r11d,r14d
  3371. mov r12d,r8d
  3372. shrd r14d,r14d,9
  3373. xor r13d,edx
  3374. xor r12d,r9d
  3375. shrd r13d,r13d,5
  3376. xor r14d,r11d
  3377. and r12d,edx
  3378. xor r13d,edx
  3379. add r10d,DWORD PTR[4+rsp]
  3380. mov edi,r11d
  3381. xor r12d,r9d
  3382. shrd r14d,r14d,11
  3383. xor edi,eax
  3384. add r10d,r12d
  3385. shrd r13d,r13d,6
  3386. and r15d,edi
  3387. xor r14d,r11d
  3388. add r10d,r13d
  3389. xor r15d,eax
  3390. shrd r14d,r14d,2
  3391. add ecx,r10d
  3392. add r10d,r15d
  3393. mov r13d,ecx
  3394. add r14d,r10d
  3395. shrd r13d,r13d,14
  3396. mov r10d,r14d
  3397. mov r12d,edx
  3398. shrd r14d,r14d,9
  3399. xor r13d,ecx
  3400. xor r12d,r8d
  3401. shrd r13d,r13d,5
  3402. xor r14d,r10d
  3403. and r12d,ecx
  3404. xor r13d,ecx
  3405. add r9d,DWORD PTR[8+rsp]
  3406. mov r15d,r10d
  3407. xor r12d,r8d
  3408. shrd r14d,r14d,11
  3409. xor r15d,r11d
  3410. add r9d,r12d
  3411. shrd r13d,r13d,6
  3412. and edi,r15d
  3413. xor r14d,r10d
  3414. add r9d,r13d
  3415. xor edi,r11d
  3416. shrd r14d,r14d,2
  3417. add ebx,r9d
  3418. add r9d,edi
  3419. mov r13d,ebx
  3420. add r14d,r9d
  3421. shrd r13d,r13d,14
  3422. mov r9d,r14d
  3423. mov r12d,ecx
  3424. shrd r14d,r14d,9
  3425. xor r13d,ebx
  3426. xor r12d,edx
  3427. shrd r13d,r13d,5
  3428. xor r14d,r9d
  3429. and r12d,ebx
  3430. xor r13d,ebx
  3431. add r8d,DWORD PTR[12+rsp]
  3432. mov edi,r9d
  3433. xor r12d,edx
  3434. shrd r14d,r14d,11
  3435. xor edi,r10d
  3436. add r8d,r12d
  3437. shrd r13d,r13d,6
  3438. and r15d,edi
  3439. xor r14d,r9d
  3440. add r8d,r13d
  3441. xor r15d,r10d
  3442. shrd r14d,r14d,2
  3443. add eax,r8d
  3444. add r8d,r15d
  3445. mov r13d,eax
  3446. add r14d,r8d
  3447. shrd r13d,r13d,14
  3448. mov r8d,r14d
  3449. mov r12d,ebx
  3450. shrd r14d,r14d,9
  3451. xor r13d,eax
  3452. xor r12d,ecx
  3453. shrd r13d,r13d,5
  3454. xor r14d,r8d
  3455. and r12d,eax
  3456. xor r13d,eax
  3457. add edx,DWORD PTR[16+rsp]
  3458. mov r15d,r8d
  3459. xor r12d,ecx
  3460. shrd r14d,r14d,11
  3461. xor r15d,r9d
  3462. add edx,r12d
  3463. shrd r13d,r13d,6
  3464. and edi,r15d
  3465. xor r14d,r8d
  3466. add edx,r13d
  3467. xor edi,r9d
  3468. shrd r14d,r14d,2
  3469. add r11d,edx
  3470. add edx,edi
  3471. mov r13d,r11d
  3472. add r14d,edx
  3473. shrd r13d,r13d,14
  3474. mov edx,r14d
  3475. mov r12d,eax
  3476. shrd r14d,r14d,9
  3477. xor r13d,r11d
  3478. xor r12d,ebx
  3479. shrd r13d,r13d,5
  3480. xor r14d,edx
  3481. and r12d,r11d
  3482. xor r13d,r11d
  3483. add ecx,DWORD PTR[20+rsp]
  3484. mov edi,edx
  3485. xor r12d,ebx
  3486. shrd r14d,r14d,11
  3487. xor edi,r8d
  3488. add ecx,r12d
  3489. shrd r13d,r13d,6
  3490. and r15d,edi
  3491. xor r14d,edx
  3492. add ecx,r13d
  3493. xor r15d,r8d
  3494. shrd r14d,r14d,2
  3495. add r10d,ecx
  3496. add ecx,r15d
  3497. mov r13d,r10d
  3498. add r14d,ecx
  3499. shrd r13d,r13d,14
  3500. mov ecx,r14d
  3501. mov r12d,r11d
  3502. shrd r14d,r14d,9
  3503. xor r13d,r10d
  3504. xor r12d,eax
  3505. shrd r13d,r13d,5
  3506. xor r14d,ecx
  3507. and r12d,r10d
  3508. xor r13d,r10d
  3509. add ebx,DWORD PTR[24+rsp]
  3510. mov r15d,ecx
  3511. xor r12d,eax
  3512. shrd r14d,r14d,11
  3513. xor r15d,edx
  3514. add ebx,r12d
  3515. shrd r13d,r13d,6
  3516. and edi,r15d
  3517. xor r14d,ecx
  3518. add ebx,r13d
  3519. xor edi,edx
  3520. shrd r14d,r14d,2
  3521. add r9d,ebx
  3522. add ebx,edi
  3523. mov r13d,r9d
  3524. add r14d,ebx
  3525. shrd r13d,r13d,14
  3526. mov ebx,r14d
  3527. mov r12d,r10d
  3528. shrd r14d,r14d,9
  3529. xor r13d,r9d
  3530. xor r12d,r11d
  3531. shrd r13d,r13d,5
  3532. xor r14d,ebx
  3533. and r12d,r9d
  3534. xor r13d,r9d
  3535. add eax,DWORD PTR[28+rsp]
  3536. mov edi,ebx
  3537. xor r12d,r11d
  3538. shrd r14d,r14d,11
  3539. xor edi,ecx
  3540. add eax,r12d
  3541. shrd r13d,r13d,6
  3542. and r15d,edi
  3543. xor r14d,ebx
  3544. add eax,r13d
  3545. xor r15d,ecx
  3546. shrd r14d,r14d,2
  3547. add r8d,eax
  3548. add eax,r15d
  3549. mov r13d,r8d
  3550. add r14d,eax
  3551. shrd r13d,r13d,14
  3552. mov eax,r14d
  3553. mov r12d,r9d
  3554. shrd r14d,r14d,9
  3555. xor r13d,r8d
  3556. xor r12d,r10d
  3557. shrd r13d,r13d,5
  3558. xor r14d,eax
  3559. and r12d,r8d
  3560. xor r13d,r8d
  3561. add r11d,DWORD PTR[32+rsp]
  3562. mov r15d,eax
  3563. xor r12d,r10d
  3564. shrd r14d,r14d,11
  3565. xor r15d,ebx
  3566. add r11d,r12d
  3567. shrd r13d,r13d,6
  3568. and edi,r15d
  3569. xor r14d,eax
  3570. add r11d,r13d
  3571. xor edi,ebx
  3572. shrd r14d,r14d,2
  3573. add edx,r11d
  3574. add r11d,edi
  3575. mov r13d,edx
  3576. add r14d,r11d
  3577. shrd r13d,r13d,14
  3578. mov r11d,r14d
  3579. mov r12d,r8d
  3580. shrd r14d,r14d,9
  3581. xor r13d,edx
  3582. xor r12d,r9d
  3583. shrd r13d,r13d,5
  3584. xor r14d,r11d
  3585. and r12d,edx
  3586. xor r13d,edx
  3587. add r10d,DWORD PTR[36+rsp]
  3588. mov edi,r11d
  3589. xor r12d,r9d
  3590. shrd r14d,r14d,11
  3591. xor edi,eax
  3592. add r10d,r12d
  3593. shrd r13d,r13d,6
  3594. and r15d,edi
  3595. xor r14d,r11d
  3596. add r10d,r13d
  3597. xor r15d,eax
  3598. shrd r14d,r14d,2
  3599. add ecx,r10d
  3600. add r10d,r15d
  3601. mov r13d,ecx
  3602. add r14d,r10d
  3603. shrd r13d,r13d,14
  3604. mov r10d,r14d
  3605. mov r12d,edx
  3606. shrd r14d,r14d,9
  3607. xor r13d,ecx
  3608. xor r12d,r8d
  3609. shrd r13d,r13d,5
  3610. xor r14d,r10d
  3611. and r12d,ecx
  3612. xor r13d,ecx
  3613. add r9d,DWORD PTR[40+rsp]
  3614. mov r15d,r10d
  3615. xor r12d,r8d
  3616. shrd r14d,r14d,11
  3617. xor r15d,r11d
  3618. add r9d,r12d
  3619. shrd r13d,r13d,6
  3620. and edi,r15d
  3621. xor r14d,r10d
  3622. add r9d,r13d
  3623. xor edi,r11d
  3624. shrd r14d,r14d,2
  3625. add ebx,r9d
  3626. add r9d,edi
  3627. mov r13d,ebx
  3628. add r14d,r9d
  3629. shrd r13d,r13d,14
  3630. mov r9d,r14d
  3631. mov r12d,ecx
  3632. shrd r14d,r14d,9
  3633. xor r13d,ebx
  3634. xor r12d,edx
  3635. shrd r13d,r13d,5
  3636. xor r14d,r9d
  3637. and r12d,ebx
  3638. xor r13d,ebx
  3639. add r8d,DWORD PTR[44+rsp]
  3640. mov edi,r9d
  3641. xor r12d,edx
  3642. shrd r14d,r14d,11
  3643. xor edi,r10d
  3644. add r8d,r12d
  3645. shrd r13d,r13d,6
  3646. and r15d,edi
  3647. xor r14d,r9d
  3648. add r8d,r13d
  3649. xor r15d,r10d
  3650. shrd r14d,r14d,2
  3651. add eax,r8d
  3652. add r8d,r15d
  3653. mov r13d,eax
  3654. add r14d,r8d
  3655. shrd r13d,r13d,14
  3656. mov r8d,r14d
  3657. mov r12d,ebx
  3658. shrd r14d,r14d,9
  3659. xor r13d,eax
  3660. xor r12d,ecx
  3661. shrd r13d,r13d,5
  3662. xor r14d,r8d
  3663. and r12d,eax
  3664. xor r13d,eax
  3665. add edx,DWORD PTR[48+rsp]
  3666. mov r15d,r8d
  3667. xor r12d,ecx
  3668. shrd r14d,r14d,11
  3669. xor r15d,r9d
  3670. add edx,r12d
  3671. shrd r13d,r13d,6
  3672. and edi,r15d
  3673. xor r14d,r8d
  3674. add edx,r13d
  3675. xor edi,r9d
  3676. shrd r14d,r14d,2
  3677. add r11d,edx
  3678. add edx,edi
  3679. mov r13d,r11d
  3680. add r14d,edx
  3681. shrd r13d,r13d,14
  3682. mov edx,r14d
  3683. mov r12d,eax
  3684. shrd r14d,r14d,9
  3685. xor r13d,r11d
  3686. xor r12d,ebx
  3687. shrd r13d,r13d,5
  3688. xor r14d,edx
  3689. and r12d,r11d
  3690. xor r13d,r11d
  3691. add ecx,DWORD PTR[52+rsp]
  3692. mov edi,edx
  3693. xor r12d,ebx
  3694. shrd r14d,r14d,11
  3695. xor edi,r8d
  3696. add ecx,r12d
  3697. shrd r13d,r13d,6
  3698. and r15d,edi
  3699. xor r14d,edx
  3700. add ecx,r13d
  3701. xor r15d,r8d
  3702. shrd r14d,r14d,2
  3703. add r10d,ecx
  3704. add ecx,r15d
  3705. mov r13d,r10d
  3706. add r14d,ecx
  3707. shrd r13d,r13d,14
  3708. mov ecx,r14d
  3709. mov r12d,r11d
  3710. shrd r14d,r14d,9
  3711. xor r13d,r10d
  3712. xor r12d,eax
  3713. shrd r13d,r13d,5
  3714. xor r14d,ecx
  3715. and r12d,r10d
  3716. xor r13d,r10d
  3717. add ebx,DWORD PTR[56+rsp]
  3718. mov r15d,ecx
  3719. xor r12d,eax
  3720. shrd r14d,r14d,11
  3721. xor r15d,edx
  3722. add ebx,r12d
  3723. shrd r13d,r13d,6
  3724. and edi,r15d
  3725. xor r14d,ecx
  3726. add ebx,r13d
  3727. xor edi,edx
  3728. shrd r14d,r14d,2
  3729. add r9d,ebx
  3730. add ebx,edi
  3731. mov r13d,r9d
  3732. add r14d,ebx
  3733. shrd r13d,r13d,14
  3734. mov ebx,r14d
  3735. mov r12d,r10d
  3736. shrd r14d,r14d,9
  3737. xor r13d,r9d
  3738. xor r12d,r11d
  3739. shrd r13d,r13d,5
  3740. xor r14d,ebx
  3741. and r12d,r9d
  3742. xor r13d,r9d
  3743. add eax,DWORD PTR[60+rsp]
  3744. mov edi,ebx
  3745. xor r12d,r11d
  3746. shrd r14d,r14d,11
  3747. xor edi,ecx
  3748. add eax,r12d
  3749. shrd r13d,r13d,6
  3750. and r15d,edi
  3751. xor r14d,ebx
  3752. add eax,r13d
  3753. xor r15d,ecx
  3754. shrd r14d,r14d,2
  3755. add r8d,eax
  3756. add eax,r15d
  3757. mov r13d,r8d
  3758. add r14d,eax
  3759. mov rdi,QWORD PTR[((64+0))+rsp]
  3760. mov eax,r14d
  3761. add eax,DWORD PTR[rdi]
  3762. lea rsi,QWORD PTR[64+rsi]
  3763. add ebx,DWORD PTR[4+rdi]
  3764. add ecx,DWORD PTR[8+rdi]
  3765. add edx,DWORD PTR[12+rdi]
  3766. add r8d,DWORD PTR[16+rdi]
  3767. add r9d,DWORD PTR[20+rdi]
  3768. add r10d,DWORD PTR[24+rdi]
  3769. add r11d,DWORD PTR[28+rdi]
  3770. cmp rsi,QWORD PTR[((64+16))+rsp]
  3771. mov DWORD PTR[rdi],eax
  3772. mov DWORD PTR[4+rdi],ebx
  3773. mov DWORD PTR[8+rdi],ecx
  3774. mov DWORD PTR[12+rdi],edx
  3775. mov DWORD PTR[16+rdi],r8d
  3776. mov DWORD PTR[20+rdi],r9d
  3777. mov DWORD PTR[24+rdi],r10d
  3778. mov DWORD PTR[28+rdi],r11d
  3779. jb $L$loop_avx
  3780. mov rsi,QWORD PTR[88+rsp]
  3781. vzeroupper
  3782. movaps xmm6,XMMWORD PTR[((64+32))+rsp]
  3783. movaps xmm7,XMMWORD PTR[((64+48))+rsp]
  3784. movaps xmm8,XMMWORD PTR[((64+64))+rsp]
  3785. movaps xmm9,XMMWORD PTR[((64+80))+rsp]
  3786. mov r15,QWORD PTR[((-48))+rsi]
  3787. mov r14,QWORD PTR[((-40))+rsi]
  3788. mov r13,QWORD PTR[((-32))+rsi]
  3789. mov r12,QWORD PTR[((-24))+rsi]
  3790. mov rbp,QWORD PTR[((-16))+rsi]
  3791. mov rbx,QWORD PTR[((-8))+rsi]
  3792. lea rsp,QWORD PTR[rsi]
  3793. $L$epilogue_avx::
  3794. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  3795. mov rsi,QWORD PTR[16+rsp]
  3796. DB 0F3h,0C3h ;repret
  3797. $L$SEH_end_sha256_block_data_order_avx::
  3798. sha256_block_data_order_avx ENDP
  3799. ALIGN 64
  3800. sha256_block_data_order_avx2 PROC PRIVATE
  3801. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  3802. mov QWORD PTR[16+rsp],rsi
  3803. mov rax,rsp
  3804. $L$SEH_begin_sha256_block_data_order_avx2::
  3805. mov rdi,rcx
  3806. mov rsi,rdx
  3807. mov rdx,r8
  3808. $L$avx2_shortcut::
  3809. mov rax,rsp
  3810. push rbx
  3811. push rbp
  3812. push r12
  3813. push r13
  3814. push r14
  3815. push r15
  3816. sub rsp,608
  3817. shl rdx,4
  3818. and rsp,-256*4
  3819. lea rdx,QWORD PTR[rdx*4+rsi]
  3820. add rsp,448
  3821. mov QWORD PTR[((64+0))+rsp],rdi
  3822. mov QWORD PTR[((64+8))+rsp],rsi
  3823. mov QWORD PTR[((64+16))+rsp],rdx
  3824. mov QWORD PTR[88+rsp],rax
  3825. movaps XMMWORD PTR[(64+32)+rsp],xmm6
  3826. movaps XMMWORD PTR[(64+48)+rsp],xmm7
  3827. movaps XMMWORD PTR[(64+64)+rsp],xmm8
  3828. movaps XMMWORD PTR[(64+80)+rsp],xmm9
  3829. $L$prologue_avx2::
  3830. vzeroupper
  3831. sub rsi,-16*4
  3832. mov eax,DWORD PTR[rdi]
  3833. mov r12,rsi
  3834. mov ebx,DWORD PTR[4+rdi]
  3835. cmp rsi,rdx
  3836. mov ecx,DWORD PTR[8+rdi]
  3837. cmove r12,rsp
  3838. mov edx,DWORD PTR[12+rdi]
  3839. mov r8d,DWORD PTR[16+rdi]
  3840. mov r9d,DWORD PTR[20+rdi]
  3841. mov r10d,DWORD PTR[24+rdi]
  3842. mov r11d,DWORD PTR[28+rdi]
  3843. vmovdqa ymm8,YMMWORD PTR[((K256+512+32))]
  3844. vmovdqa ymm9,YMMWORD PTR[((K256+512+64))]
  3845. jmp $L$oop_avx2
  3846. ALIGN 16
  3847. $L$oop_avx2::
  3848. vmovdqa ymm7,YMMWORD PTR[((K256+512))]
  3849. vmovdqu xmm0,XMMWORD PTR[((-64+0))+rsi]
  3850. vmovdqu xmm1,XMMWORD PTR[((-64+16))+rsi]
  3851. vmovdqu xmm2,XMMWORD PTR[((-64+32))+rsi]
  3852. vmovdqu xmm3,XMMWORD PTR[((-64+48))+rsi]
  3853. vinserti128 ymm0,ymm0,XMMWORD PTR[r12],1
  3854. vinserti128 ymm1,ymm1,XMMWORD PTR[16+r12],1
  3855. vpshufb ymm0,ymm0,ymm7
  3856. vinserti128 ymm2,ymm2,XMMWORD PTR[32+r12],1
  3857. vpshufb ymm1,ymm1,ymm7
  3858. vinserti128 ymm3,ymm3,XMMWORD PTR[48+r12],1
  3859. lea rbp,QWORD PTR[K256]
  3860. vpshufb ymm2,ymm2,ymm7
  3861. vpaddd ymm4,ymm0,YMMWORD PTR[rbp]
  3862. vpshufb ymm3,ymm3,ymm7
  3863. vpaddd ymm5,ymm1,YMMWORD PTR[32+rbp]
  3864. vpaddd ymm6,ymm2,YMMWORD PTR[64+rbp]
  3865. vpaddd ymm7,ymm3,YMMWORD PTR[96+rbp]
  3866. vmovdqa YMMWORD PTR[rsp],ymm4
  3867. xor r14d,r14d
  3868. vmovdqa YMMWORD PTR[32+rsp],ymm5
  3869. lea rsp,QWORD PTR[((-64))+rsp]
  3870. mov edi,ebx
  3871. vmovdqa YMMWORD PTR[rsp],ymm6
  3872. xor edi,ecx
  3873. vmovdqa YMMWORD PTR[32+rsp],ymm7
  3874. mov r12d,r9d
  3875. sub rbp,-16*2*4
  3876. jmp $L$avx2_00_47
  3877. ALIGN 16
  3878. $L$avx2_00_47::
  3879. lea rsp,QWORD PTR[((-64))+rsp]
  3880. vpalignr ymm4,ymm1,ymm0,4
  3881. add r11d,DWORD PTR[((0+128))+rsp]
  3882. and r12d,r8d
  3883. rorx r13d,r8d,25
  3884. vpalignr ymm7,ymm3,ymm2,4
  3885. rorx r15d,r8d,11
  3886. lea eax,DWORD PTR[r14*1+rax]
  3887. lea r11d,DWORD PTR[r12*1+r11]
  3888. vpsrld ymm6,ymm4,7
  3889. andn r12d,r8d,r10d
  3890. xor r13d,r15d
  3891. rorx r14d,r8d,6
  3892. vpaddd ymm0,ymm0,ymm7
  3893. lea r11d,DWORD PTR[r12*1+r11]
  3894. xor r13d,r14d
  3895. mov r15d,eax
  3896. vpsrld ymm7,ymm4,3
  3897. rorx r12d,eax,22
  3898. lea r11d,DWORD PTR[r13*1+r11]
  3899. xor r15d,ebx
  3900. vpslld ymm5,ymm4,14
  3901. rorx r14d,eax,13
  3902. rorx r13d,eax,2
  3903. lea edx,DWORD PTR[r11*1+rdx]
  3904. vpxor ymm4,ymm7,ymm6
  3905. and edi,r15d
  3906. xor r14d,r12d
  3907. xor edi,ebx
  3908. vpshufd ymm7,ymm3,250
  3909. xor r14d,r13d
  3910. lea r11d,DWORD PTR[rdi*1+r11]
  3911. mov r12d,r8d
  3912. vpsrld ymm6,ymm6,11
  3913. add r10d,DWORD PTR[((4+128))+rsp]
  3914. and r12d,edx
  3915. rorx r13d,edx,25
  3916. vpxor ymm4,ymm4,ymm5
  3917. rorx edi,edx,11
  3918. lea r11d,DWORD PTR[r14*1+r11]
  3919. lea r10d,DWORD PTR[r12*1+r10]
  3920. vpslld ymm5,ymm5,11
  3921. andn r12d,edx,r9d
  3922. xor r13d,edi
  3923. rorx r14d,edx,6
  3924. vpxor ymm4,ymm4,ymm6
  3925. lea r10d,DWORD PTR[r12*1+r10]
  3926. xor r13d,r14d
  3927. mov edi,r11d
  3928. vpsrld ymm6,ymm7,10
  3929. rorx r12d,r11d,22
  3930. lea r10d,DWORD PTR[r13*1+r10]
  3931. xor edi,eax
  3932. vpxor ymm4,ymm4,ymm5
  3933. rorx r14d,r11d,13
  3934. rorx r13d,r11d,2
  3935. lea ecx,DWORD PTR[r10*1+rcx]
  3936. vpsrlq ymm7,ymm7,17
  3937. and r15d,edi
  3938. xor r14d,r12d
  3939. xor r15d,eax
  3940. vpaddd ymm0,ymm0,ymm4
  3941. xor r14d,r13d
  3942. lea r10d,DWORD PTR[r15*1+r10]
  3943. mov r12d,edx
  3944. vpxor ymm6,ymm6,ymm7
  3945. add r9d,DWORD PTR[((8+128))+rsp]
  3946. and r12d,ecx
  3947. rorx r13d,ecx,25
  3948. vpsrlq ymm7,ymm7,2
  3949. rorx r15d,ecx,11
  3950. lea r10d,DWORD PTR[r14*1+r10]
  3951. lea r9d,DWORD PTR[r12*1+r9]
  3952. vpxor ymm6,ymm6,ymm7
  3953. andn r12d,ecx,r8d
  3954. xor r13d,r15d
  3955. rorx r14d,ecx,6
  3956. vpshufb ymm6,ymm6,ymm8
  3957. lea r9d,DWORD PTR[r12*1+r9]
  3958. xor r13d,r14d
  3959. mov r15d,r10d
  3960. vpaddd ymm0,ymm0,ymm6
  3961. rorx r12d,r10d,22
  3962. lea r9d,DWORD PTR[r13*1+r9]
  3963. xor r15d,r11d
  3964. vpshufd ymm7,ymm0,80
  3965. rorx r14d,r10d,13
  3966. rorx r13d,r10d,2
  3967. lea ebx,DWORD PTR[r9*1+rbx]
  3968. vpsrld ymm6,ymm7,10
  3969. and edi,r15d
  3970. xor r14d,r12d
  3971. xor edi,r11d
  3972. vpsrlq ymm7,ymm7,17
  3973. xor r14d,r13d
  3974. lea r9d,DWORD PTR[rdi*1+r9]
  3975. mov r12d,ecx
  3976. vpxor ymm6,ymm6,ymm7
  3977. add r8d,DWORD PTR[((12+128))+rsp]
  3978. and r12d,ebx
  3979. rorx r13d,ebx,25
  3980. vpsrlq ymm7,ymm7,2
  3981. rorx edi,ebx,11
  3982. lea r9d,DWORD PTR[r14*1+r9]
  3983. lea r8d,DWORD PTR[r12*1+r8]
  3984. vpxor ymm6,ymm6,ymm7
  3985. andn r12d,ebx,edx
  3986. xor r13d,edi
  3987. rorx r14d,ebx,6
  3988. vpshufb ymm6,ymm6,ymm9
  3989. lea r8d,DWORD PTR[r12*1+r8]
  3990. xor r13d,r14d
  3991. mov edi,r9d
  3992. vpaddd ymm0,ymm0,ymm6
  3993. rorx r12d,r9d,22
  3994. lea r8d,DWORD PTR[r13*1+r8]
  3995. xor edi,r10d
  3996. vpaddd ymm6,ymm0,YMMWORD PTR[rbp]
  3997. rorx r14d,r9d,13
  3998. rorx r13d,r9d,2
  3999. lea eax,DWORD PTR[r8*1+rax]
  4000. and r15d,edi
  4001. xor r14d,r12d
  4002. xor r15d,r10d
  4003. xor r14d,r13d
  4004. lea r8d,DWORD PTR[r15*1+r8]
  4005. mov r12d,ebx
  4006. vmovdqa YMMWORD PTR[rsp],ymm6
  4007. vpalignr ymm4,ymm2,ymm1,4
  4008. add edx,DWORD PTR[((32+128))+rsp]
  4009. and r12d,eax
  4010. rorx r13d,eax,25
  4011. vpalignr ymm7,ymm0,ymm3,4
  4012. rorx r15d,eax,11
  4013. lea r8d,DWORD PTR[r14*1+r8]
  4014. lea edx,DWORD PTR[r12*1+rdx]
  4015. vpsrld ymm6,ymm4,7
  4016. andn r12d,eax,ecx
  4017. xor r13d,r15d
  4018. rorx r14d,eax,6
  4019. vpaddd ymm1,ymm1,ymm7
  4020. lea edx,DWORD PTR[r12*1+rdx]
  4021. xor r13d,r14d
  4022. mov r15d,r8d
  4023. vpsrld ymm7,ymm4,3
  4024. rorx r12d,r8d,22
  4025. lea edx,DWORD PTR[r13*1+rdx]
  4026. xor r15d,r9d
  4027. vpslld ymm5,ymm4,14
  4028. rorx r14d,r8d,13
  4029. rorx r13d,r8d,2
  4030. lea r11d,DWORD PTR[rdx*1+r11]
  4031. vpxor ymm4,ymm7,ymm6
  4032. and edi,r15d
  4033. xor r14d,r12d
  4034. xor edi,r9d
  4035. vpshufd ymm7,ymm0,250
  4036. xor r14d,r13d
  4037. lea edx,DWORD PTR[rdi*1+rdx]
  4038. mov r12d,eax
  4039. vpsrld ymm6,ymm6,11
  4040. add ecx,DWORD PTR[((36+128))+rsp]
  4041. and r12d,r11d
  4042. rorx r13d,r11d,25
  4043. vpxor ymm4,ymm4,ymm5
  4044. rorx edi,r11d,11
  4045. lea edx,DWORD PTR[r14*1+rdx]
  4046. lea ecx,DWORD PTR[r12*1+rcx]
  4047. vpslld ymm5,ymm5,11
  4048. andn r12d,r11d,ebx
  4049. xor r13d,edi
  4050. rorx r14d,r11d,6
  4051. vpxor ymm4,ymm4,ymm6
  4052. lea ecx,DWORD PTR[r12*1+rcx]
  4053. xor r13d,r14d
  4054. mov edi,edx
  4055. vpsrld ymm6,ymm7,10
  4056. rorx r12d,edx,22
  4057. lea ecx,DWORD PTR[r13*1+rcx]
  4058. xor edi,r8d
  4059. vpxor ymm4,ymm4,ymm5
  4060. rorx r14d,edx,13
  4061. rorx r13d,edx,2
  4062. lea r10d,DWORD PTR[rcx*1+r10]
  4063. vpsrlq ymm7,ymm7,17
  4064. and r15d,edi
  4065. xor r14d,r12d
  4066. xor r15d,r8d
  4067. vpaddd ymm1,ymm1,ymm4
  4068. xor r14d,r13d
  4069. lea ecx,DWORD PTR[r15*1+rcx]
  4070. mov r12d,r11d
  4071. vpxor ymm6,ymm6,ymm7
  4072. add ebx,DWORD PTR[((40+128))+rsp]
  4073. and r12d,r10d
  4074. rorx r13d,r10d,25
  4075. vpsrlq ymm7,ymm7,2
  4076. rorx r15d,r10d,11
  4077. lea ecx,DWORD PTR[r14*1+rcx]
  4078. lea ebx,DWORD PTR[r12*1+rbx]
  4079. vpxor ymm6,ymm6,ymm7
  4080. andn r12d,r10d,eax
  4081. xor r13d,r15d
  4082. rorx r14d,r10d,6
  4083. vpshufb ymm6,ymm6,ymm8
  4084. lea ebx,DWORD PTR[r12*1+rbx]
  4085. xor r13d,r14d
  4086. mov r15d,ecx
  4087. vpaddd ymm1,ymm1,ymm6
  4088. rorx r12d,ecx,22
  4089. lea ebx,DWORD PTR[r13*1+rbx]
  4090. xor r15d,edx
  4091. vpshufd ymm7,ymm1,80
  4092. rorx r14d,ecx,13
  4093. rorx r13d,ecx,2
  4094. lea r9d,DWORD PTR[rbx*1+r9]
  4095. vpsrld ymm6,ymm7,10
  4096. and edi,r15d
  4097. xor r14d,r12d
  4098. xor edi,edx
  4099. vpsrlq ymm7,ymm7,17
  4100. xor r14d,r13d
  4101. lea ebx,DWORD PTR[rdi*1+rbx]
  4102. mov r12d,r10d
  4103. vpxor ymm6,ymm6,ymm7
  4104. add eax,DWORD PTR[((44+128))+rsp]
  4105. and r12d,r9d
  4106. rorx r13d,r9d,25
  4107. vpsrlq ymm7,ymm7,2
  4108. rorx edi,r9d,11
  4109. lea ebx,DWORD PTR[r14*1+rbx]
  4110. lea eax,DWORD PTR[r12*1+rax]
  4111. vpxor ymm6,ymm6,ymm7
  4112. andn r12d,r9d,r11d
  4113. xor r13d,edi
  4114. rorx r14d,r9d,6
  4115. vpshufb ymm6,ymm6,ymm9
  4116. lea eax,DWORD PTR[r12*1+rax]
  4117. xor r13d,r14d
  4118. mov edi,ebx
  4119. vpaddd ymm1,ymm1,ymm6
  4120. rorx r12d,ebx,22
  4121. lea eax,DWORD PTR[r13*1+rax]
  4122. xor edi,ecx
  4123. vpaddd ymm6,ymm1,YMMWORD PTR[32+rbp]
  4124. rorx r14d,ebx,13
  4125. rorx r13d,ebx,2
  4126. lea r8d,DWORD PTR[rax*1+r8]
  4127. and r15d,edi
  4128. xor r14d,r12d
  4129. xor r15d,ecx
  4130. xor r14d,r13d
  4131. lea eax,DWORD PTR[r15*1+rax]
  4132. mov r12d,r9d
  4133. vmovdqa YMMWORD PTR[32+rsp],ymm6
  4134. lea rsp,QWORD PTR[((-64))+rsp]
  4135. vpalignr ymm4,ymm3,ymm2,4
  4136. add r11d,DWORD PTR[((0+128))+rsp]
  4137. and r12d,r8d
  4138. rorx r13d,r8d,25
  4139. vpalignr ymm7,ymm1,ymm0,4
  4140. rorx r15d,r8d,11
  4141. lea eax,DWORD PTR[r14*1+rax]
  4142. lea r11d,DWORD PTR[r12*1+r11]
  4143. vpsrld ymm6,ymm4,7
  4144. andn r12d,r8d,r10d
  4145. xor r13d,r15d
  4146. rorx r14d,r8d,6
  4147. vpaddd ymm2,ymm2,ymm7
  4148. lea r11d,DWORD PTR[r12*1+r11]
  4149. xor r13d,r14d
  4150. mov r15d,eax
  4151. vpsrld ymm7,ymm4,3
  4152. rorx r12d,eax,22
  4153. lea r11d,DWORD PTR[r13*1+r11]
  4154. xor r15d,ebx
  4155. vpslld ymm5,ymm4,14
  4156. rorx r14d,eax,13
  4157. rorx r13d,eax,2
  4158. lea edx,DWORD PTR[r11*1+rdx]
  4159. vpxor ymm4,ymm7,ymm6
  4160. and edi,r15d
  4161. xor r14d,r12d
  4162. xor edi,ebx
  4163. vpshufd ymm7,ymm1,250
  4164. xor r14d,r13d
  4165. lea r11d,DWORD PTR[rdi*1+r11]
  4166. mov r12d,r8d
  4167. vpsrld ymm6,ymm6,11
  4168. add r10d,DWORD PTR[((4+128))+rsp]
  4169. and r12d,edx
  4170. rorx r13d,edx,25
  4171. vpxor ymm4,ymm4,ymm5
  4172. rorx edi,edx,11
  4173. lea r11d,DWORD PTR[r14*1+r11]
  4174. lea r10d,DWORD PTR[r12*1+r10]
  4175. vpslld ymm5,ymm5,11
  4176. andn r12d,edx,r9d
  4177. xor r13d,edi
  4178. rorx r14d,edx,6
  4179. vpxor ymm4,ymm4,ymm6
  4180. lea r10d,DWORD PTR[r12*1+r10]
  4181. xor r13d,r14d
  4182. mov edi,r11d
  4183. vpsrld ymm6,ymm7,10
  4184. rorx r12d,r11d,22
  4185. lea r10d,DWORD PTR[r13*1+r10]
  4186. xor edi,eax
  4187. vpxor ymm4,ymm4,ymm5
  4188. rorx r14d,r11d,13
  4189. rorx r13d,r11d,2
  4190. lea ecx,DWORD PTR[r10*1+rcx]
  4191. vpsrlq ymm7,ymm7,17
  4192. and r15d,edi
  4193. xor r14d,r12d
  4194. xor r15d,eax
  4195. vpaddd ymm2,ymm2,ymm4
  4196. xor r14d,r13d
  4197. lea r10d,DWORD PTR[r15*1+r10]
  4198. mov r12d,edx
  4199. vpxor ymm6,ymm6,ymm7
  4200. add r9d,DWORD PTR[((8+128))+rsp]
  4201. and r12d,ecx
  4202. rorx r13d,ecx,25
  4203. vpsrlq ymm7,ymm7,2
  4204. rorx r15d,ecx,11
  4205. lea r10d,DWORD PTR[r14*1+r10]
  4206. lea r9d,DWORD PTR[r12*1+r9]
  4207. vpxor ymm6,ymm6,ymm7
  4208. andn r12d,ecx,r8d
  4209. xor r13d,r15d
  4210. rorx r14d,ecx,6
  4211. vpshufb ymm6,ymm6,ymm8
  4212. lea r9d,DWORD PTR[r12*1+r9]
  4213. xor r13d,r14d
  4214. mov r15d,r10d
  4215. vpaddd ymm2,ymm2,ymm6
  4216. rorx r12d,r10d,22
  4217. lea r9d,DWORD PTR[r13*1+r9]
  4218. xor r15d,r11d
  4219. vpshufd ymm7,ymm2,80
  4220. rorx r14d,r10d,13
  4221. rorx r13d,r10d,2
  4222. lea ebx,DWORD PTR[r9*1+rbx]
  4223. vpsrld ymm6,ymm7,10
  4224. and edi,r15d
  4225. xor r14d,r12d
  4226. xor edi,r11d
  4227. vpsrlq ymm7,ymm7,17
  4228. xor r14d,r13d
  4229. lea r9d,DWORD PTR[rdi*1+r9]
  4230. mov r12d,ecx
  4231. vpxor ymm6,ymm6,ymm7
  4232. add r8d,DWORD PTR[((12+128))+rsp]
  4233. and r12d,ebx
  4234. rorx r13d,ebx,25
  4235. vpsrlq ymm7,ymm7,2
  4236. rorx edi,ebx,11
  4237. lea r9d,DWORD PTR[r14*1+r9]
  4238. lea r8d,DWORD PTR[r12*1+r8]
  4239. vpxor ymm6,ymm6,ymm7
  4240. andn r12d,ebx,edx
  4241. xor r13d,edi
  4242. rorx r14d,ebx,6
  4243. vpshufb ymm6,ymm6,ymm9
  4244. lea r8d,DWORD PTR[r12*1+r8]
  4245. xor r13d,r14d
  4246. mov edi,r9d
  4247. vpaddd ymm2,ymm2,ymm6
  4248. rorx r12d,r9d,22
  4249. lea r8d,DWORD PTR[r13*1+r8]
  4250. xor edi,r10d
  4251. vpaddd ymm6,ymm2,YMMWORD PTR[64+rbp]
  4252. rorx r14d,r9d,13
  4253. rorx r13d,r9d,2
  4254. lea eax,DWORD PTR[r8*1+rax]
  4255. and r15d,edi
  4256. xor r14d,r12d
  4257. xor r15d,r10d
  4258. xor r14d,r13d
  4259. lea r8d,DWORD PTR[r15*1+r8]
  4260. mov r12d,ebx
  4261. vmovdqa YMMWORD PTR[rsp],ymm6
  4262. vpalignr ymm4,ymm0,ymm3,4
  4263. add edx,DWORD PTR[((32+128))+rsp]
  4264. and r12d,eax
  4265. rorx r13d,eax,25
  4266. vpalignr ymm7,ymm2,ymm1,4
  4267. rorx r15d,eax,11
  4268. lea r8d,DWORD PTR[r14*1+r8]
  4269. lea edx,DWORD PTR[r12*1+rdx]
  4270. vpsrld ymm6,ymm4,7
  4271. andn r12d,eax,ecx
  4272. xor r13d,r15d
  4273. rorx r14d,eax,6
  4274. vpaddd ymm3,ymm3,ymm7
  4275. lea edx,DWORD PTR[r12*1+rdx]
  4276. xor r13d,r14d
  4277. mov r15d,r8d
  4278. vpsrld ymm7,ymm4,3
  4279. rorx r12d,r8d,22
  4280. lea edx,DWORD PTR[r13*1+rdx]
  4281. xor r15d,r9d
  4282. vpslld ymm5,ymm4,14
  4283. rorx r14d,r8d,13
  4284. rorx r13d,r8d,2
  4285. lea r11d,DWORD PTR[rdx*1+r11]
  4286. vpxor ymm4,ymm7,ymm6
  4287. and edi,r15d
  4288. xor r14d,r12d
  4289. xor edi,r9d
  4290. vpshufd ymm7,ymm2,250
  4291. xor r14d,r13d
  4292. lea edx,DWORD PTR[rdi*1+rdx]
  4293. mov r12d,eax
  4294. vpsrld ymm6,ymm6,11
  4295. add ecx,DWORD PTR[((36+128))+rsp]
  4296. and r12d,r11d
  4297. rorx r13d,r11d,25
  4298. vpxor ymm4,ymm4,ymm5
  4299. rorx edi,r11d,11
  4300. lea edx,DWORD PTR[r14*1+rdx]
  4301. lea ecx,DWORD PTR[r12*1+rcx]
  4302. vpslld ymm5,ymm5,11
  4303. andn r12d,r11d,ebx
  4304. xor r13d,edi
  4305. rorx r14d,r11d,6
  4306. vpxor ymm4,ymm4,ymm6
  4307. lea ecx,DWORD PTR[r12*1+rcx]
  4308. xor r13d,r14d
  4309. mov edi,edx
  4310. vpsrld ymm6,ymm7,10
  4311. rorx r12d,edx,22
  4312. lea ecx,DWORD PTR[r13*1+rcx]
  4313. xor edi,r8d
  4314. vpxor ymm4,ymm4,ymm5
  4315. rorx r14d,edx,13
  4316. rorx r13d,edx,2
  4317. lea r10d,DWORD PTR[rcx*1+r10]
  4318. vpsrlq ymm7,ymm7,17
  4319. and r15d,edi
  4320. xor r14d,r12d
  4321. xor r15d,r8d
  4322. vpaddd ymm3,ymm3,ymm4
  4323. xor r14d,r13d
  4324. lea ecx,DWORD PTR[r15*1+rcx]
  4325. mov r12d,r11d
  4326. vpxor ymm6,ymm6,ymm7
  4327. add ebx,DWORD PTR[((40+128))+rsp]
  4328. and r12d,r10d
  4329. rorx r13d,r10d,25
  4330. vpsrlq ymm7,ymm7,2
  4331. rorx r15d,r10d,11
  4332. lea ecx,DWORD PTR[r14*1+rcx]
  4333. lea ebx,DWORD PTR[r12*1+rbx]
  4334. vpxor ymm6,ymm6,ymm7
  4335. andn r12d,r10d,eax
  4336. xor r13d,r15d
  4337. rorx r14d,r10d,6
  4338. vpshufb ymm6,ymm6,ymm8
  4339. lea ebx,DWORD PTR[r12*1+rbx]
  4340. xor r13d,r14d
  4341. mov r15d,ecx
  4342. vpaddd ymm3,ymm3,ymm6
  4343. rorx r12d,ecx,22
  4344. lea ebx,DWORD PTR[r13*1+rbx]
  4345. xor r15d,edx
  4346. vpshufd ymm7,ymm3,80
  4347. rorx r14d,ecx,13
  4348. rorx r13d,ecx,2
  4349. lea r9d,DWORD PTR[rbx*1+r9]
  4350. vpsrld ymm6,ymm7,10
  4351. and edi,r15d
  4352. xor r14d,r12d
  4353. xor edi,edx
  4354. vpsrlq ymm7,ymm7,17
  4355. xor r14d,r13d
  4356. lea ebx,DWORD PTR[rdi*1+rbx]
  4357. mov r12d,r10d
  4358. vpxor ymm6,ymm6,ymm7
  4359. add eax,DWORD PTR[((44+128))+rsp]
  4360. and r12d,r9d
  4361. rorx r13d,r9d,25
  4362. vpsrlq ymm7,ymm7,2
  4363. rorx edi,r9d,11
  4364. lea ebx,DWORD PTR[r14*1+rbx]
  4365. lea eax,DWORD PTR[r12*1+rax]
  4366. vpxor ymm6,ymm6,ymm7
  4367. andn r12d,r9d,r11d
  4368. xor r13d,edi
  4369. rorx r14d,r9d,6
  4370. vpshufb ymm6,ymm6,ymm9
  4371. lea eax,DWORD PTR[r12*1+rax]
  4372. xor r13d,r14d
  4373. mov edi,ebx
  4374. vpaddd ymm3,ymm3,ymm6
  4375. rorx r12d,ebx,22
  4376. lea eax,DWORD PTR[r13*1+rax]
  4377. xor edi,ecx
  4378. vpaddd ymm6,ymm3,YMMWORD PTR[96+rbp]
  4379. rorx r14d,ebx,13
  4380. rorx r13d,ebx,2
  4381. lea r8d,DWORD PTR[rax*1+r8]
  4382. and r15d,edi
  4383. xor r14d,r12d
  4384. xor r15d,ecx
  4385. xor r14d,r13d
  4386. lea eax,DWORD PTR[r15*1+rax]
  4387. mov r12d,r9d
  4388. vmovdqa YMMWORD PTR[32+rsp],ymm6
  4389. lea rbp,QWORD PTR[128+rbp]
  4390. cmp BYTE PTR[3+rbp],0
  4391. jne $L$avx2_00_47
  4392. add r11d,DWORD PTR[((0+64))+rsp]
  4393. and r12d,r8d
  4394. rorx r13d,r8d,25
  4395. rorx r15d,r8d,11
  4396. lea eax,DWORD PTR[r14*1+rax]
  4397. lea r11d,DWORD PTR[r12*1+r11]
  4398. andn r12d,r8d,r10d
  4399. xor r13d,r15d
  4400. rorx r14d,r8d,6
  4401. lea r11d,DWORD PTR[r12*1+r11]
  4402. xor r13d,r14d
  4403. mov r15d,eax
  4404. rorx r12d,eax,22
  4405. lea r11d,DWORD PTR[r13*1+r11]
  4406. xor r15d,ebx
  4407. rorx r14d,eax,13
  4408. rorx r13d,eax,2
  4409. lea edx,DWORD PTR[r11*1+rdx]
  4410. and edi,r15d
  4411. xor r14d,r12d
  4412. xor edi,ebx
  4413. xor r14d,r13d
  4414. lea r11d,DWORD PTR[rdi*1+r11]
  4415. mov r12d,r8d
  4416. add r10d,DWORD PTR[((4+64))+rsp]
  4417. and r12d,edx
  4418. rorx r13d,edx,25
  4419. rorx edi,edx,11
  4420. lea r11d,DWORD PTR[r14*1+r11]
  4421. lea r10d,DWORD PTR[r12*1+r10]
  4422. andn r12d,edx,r9d
  4423. xor r13d,edi
  4424. rorx r14d,edx,6
  4425. lea r10d,DWORD PTR[r12*1+r10]
  4426. xor r13d,r14d
  4427. mov edi,r11d
  4428. rorx r12d,r11d,22
  4429. lea r10d,DWORD PTR[r13*1+r10]
  4430. xor edi,eax
  4431. rorx r14d,r11d,13
  4432. rorx r13d,r11d,2
  4433. lea ecx,DWORD PTR[r10*1+rcx]
  4434. and r15d,edi
  4435. xor r14d,r12d
  4436. xor r15d,eax
  4437. xor r14d,r13d
  4438. lea r10d,DWORD PTR[r15*1+r10]
  4439. mov r12d,edx
  4440. add r9d,DWORD PTR[((8+64))+rsp]
  4441. and r12d,ecx
  4442. rorx r13d,ecx,25
  4443. rorx r15d,ecx,11
  4444. lea r10d,DWORD PTR[r14*1+r10]
  4445. lea r9d,DWORD PTR[r12*1+r9]
  4446. andn r12d,ecx,r8d
  4447. xor r13d,r15d
  4448. rorx r14d,ecx,6
  4449. lea r9d,DWORD PTR[r12*1+r9]
  4450. xor r13d,r14d
  4451. mov r15d,r10d
  4452. rorx r12d,r10d,22
  4453. lea r9d,DWORD PTR[r13*1+r9]
  4454. xor r15d,r11d
  4455. rorx r14d,r10d,13
  4456. rorx r13d,r10d,2
  4457. lea ebx,DWORD PTR[r9*1+rbx]
  4458. and edi,r15d
  4459. xor r14d,r12d
  4460. xor edi,r11d
  4461. xor r14d,r13d
  4462. lea r9d,DWORD PTR[rdi*1+r9]
  4463. mov r12d,ecx
  4464. add r8d,DWORD PTR[((12+64))+rsp]
  4465. and r12d,ebx
  4466. rorx r13d,ebx,25
  4467. rorx edi,ebx,11
  4468. lea r9d,DWORD PTR[r14*1+r9]
  4469. lea r8d,DWORD PTR[r12*1+r8]
  4470. andn r12d,ebx,edx
  4471. xor r13d,edi
  4472. rorx r14d,ebx,6
  4473. lea r8d,DWORD PTR[r12*1+r8]
  4474. xor r13d,r14d
  4475. mov edi,r9d
  4476. rorx r12d,r9d,22
  4477. lea r8d,DWORD PTR[r13*1+r8]
  4478. xor edi,r10d
  4479. rorx r14d,r9d,13
  4480. rorx r13d,r9d,2
  4481. lea eax,DWORD PTR[r8*1+rax]
  4482. and r15d,edi
  4483. xor r14d,r12d
  4484. xor r15d,r10d
  4485. xor r14d,r13d
  4486. lea r8d,DWORD PTR[r15*1+r8]
  4487. mov r12d,ebx
  4488. add edx,DWORD PTR[((32+64))+rsp]
  4489. and r12d,eax
  4490. rorx r13d,eax,25
  4491. rorx r15d,eax,11
  4492. lea r8d,DWORD PTR[r14*1+r8]
  4493. lea edx,DWORD PTR[r12*1+rdx]
  4494. andn r12d,eax,ecx
  4495. xor r13d,r15d
  4496. rorx r14d,eax,6
  4497. lea edx,DWORD PTR[r12*1+rdx]
  4498. xor r13d,r14d
  4499. mov r15d,r8d
  4500. rorx r12d,r8d,22
  4501. lea edx,DWORD PTR[r13*1+rdx]
  4502. xor r15d,r9d
  4503. rorx r14d,r8d,13
  4504. rorx r13d,r8d,2
  4505. lea r11d,DWORD PTR[rdx*1+r11]
  4506. and edi,r15d
  4507. xor r14d,r12d
  4508. xor edi,r9d
  4509. xor r14d,r13d
  4510. lea edx,DWORD PTR[rdi*1+rdx]
  4511. mov r12d,eax
  4512. add ecx,DWORD PTR[((36+64))+rsp]
  4513. and r12d,r11d
  4514. rorx r13d,r11d,25
  4515. rorx edi,r11d,11
  4516. lea edx,DWORD PTR[r14*1+rdx]
  4517. lea ecx,DWORD PTR[r12*1+rcx]
  4518. andn r12d,r11d,ebx
  4519. xor r13d,edi
  4520. rorx r14d,r11d,6
  4521. lea ecx,DWORD PTR[r12*1+rcx]
  4522. xor r13d,r14d
  4523. mov edi,edx
  4524. rorx r12d,edx,22
  4525. lea ecx,DWORD PTR[r13*1+rcx]
  4526. xor edi,r8d
  4527. rorx r14d,edx,13
  4528. rorx r13d,edx,2
  4529. lea r10d,DWORD PTR[rcx*1+r10]
  4530. and r15d,edi
  4531. xor r14d,r12d
  4532. xor r15d,r8d
  4533. xor r14d,r13d
  4534. lea ecx,DWORD PTR[r15*1+rcx]
  4535. mov r12d,r11d
  4536. add ebx,DWORD PTR[((40+64))+rsp]
  4537. and r12d,r10d
  4538. rorx r13d,r10d,25
  4539. rorx r15d,r10d,11
  4540. lea ecx,DWORD PTR[r14*1+rcx]
  4541. lea ebx,DWORD PTR[r12*1+rbx]
  4542. andn r12d,r10d,eax
  4543. xor r13d,r15d
  4544. rorx r14d,r10d,6
  4545. lea ebx,DWORD PTR[r12*1+rbx]
  4546. xor r13d,r14d
  4547. mov r15d,ecx
  4548. rorx r12d,ecx,22
  4549. lea ebx,DWORD PTR[r13*1+rbx]
  4550. xor r15d,edx
  4551. rorx r14d,ecx,13
  4552. rorx r13d,ecx,2
  4553. lea r9d,DWORD PTR[rbx*1+r9]
  4554. and edi,r15d
  4555. xor r14d,r12d
  4556. xor edi,edx
  4557. xor r14d,r13d
  4558. lea ebx,DWORD PTR[rdi*1+rbx]
  4559. mov r12d,r10d
  4560. add eax,DWORD PTR[((44+64))+rsp]
  4561. and r12d,r9d
  4562. rorx r13d,r9d,25
  4563. rorx edi,r9d,11
  4564. lea ebx,DWORD PTR[r14*1+rbx]
  4565. lea eax,DWORD PTR[r12*1+rax]
  4566. andn r12d,r9d,r11d
  4567. xor r13d,edi
  4568. rorx r14d,r9d,6
  4569. lea eax,DWORD PTR[r12*1+rax]
  4570. xor r13d,r14d
  4571. mov edi,ebx
  4572. rorx r12d,ebx,22
  4573. lea eax,DWORD PTR[r13*1+rax]
  4574. xor edi,ecx
  4575. rorx r14d,ebx,13
  4576. rorx r13d,ebx,2
  4577. lea r8d,DWORD PTR[rax*1+r8]
  4578. and r15d,edi
  4579. xor r14d,r12d
  4580. xor r15d,ecx
  4581. xor r14d,r13d
  4582. lea eax,DWORD PTR[r15*1+rax]
  4583. mov r12d,r9d
  4584. add r11d,DWORD PTR[rsp]
  4585. and r12d,r8d
  4586. rorx r13d,r8d,25
  4587. rorx r15d,r8d,11
  4588. lea eax,DWORD PTR[r14*1+rax]
  4589. lea r11d,DWORD PTR[r12*1+r11]
  4590. andn r12d,r8d,r10d
  4591. xor r13d,r15d
  4592. rorx r14d,r8d,6
  4593. lea r11d,DWORD PTR[r12*1+r11]
  4594. xor r13d,r14d
  4595. mov r15d,eax
  4596. rorx r12d,eax,22
  4597. lea r11d,DWORD PTR[r13*1+r11]
  4598. xor r15d,ebx
  4599. rorx r14d,eax,13
  4600. rorx r13d,eax,2
  4601. lea edx,DWORD PTR[r11*1+rdx]
  4602. and edi,r15d
  4603. xor r14d,r12d
  4604. xor edi,ebx
  4605. xor r14d,r13d
  4606. lea r11d,DWORD PTR[rdi*1+r11]
  4607. mov r12d,r8d
  4608. add r10d,DWORD PTR[4+rsp]
  4609. and r12d,edx
  4610. rorx r13d,edx,25
  4611. rorx edi,edx,11
  4612. lea r11d,DWORD PTR[r14*1+r11]
  4613. lea r10d,DWORD PTR[r12*1+r10]
  4614. andn r12d,edx,r9d
  4615. xor r13d,edi
  4616. rorx r14d,edx,6
  4617. lea r10d,DWORD PTR[r12*1+r10]
  4618. xor r13d,r14d
  4619. mov edi,r11d
  4620. rorx r12d,r11d,22
  4621. lea r10d,DWORD PTR[r13*1+r10]
  4622. xor edi,eax
  4623. rorx r14d,r11d,13
  4624. rorx r13d,r11d,2
  4625. lea ecx,DWORD PTR[r10*1+rcx]
  4626. and r15d,edi
  4627. xor r14d,r12d
  4628. xor r15d,eax
  4629. xor r14d,r13d
  4630. lea r10d,DWORD PTR[r15*1+r10]
  4631. mov r12d,edx
  4632. add r9d,DWORD PTR[8+rsp]
  4633. and r12d,ecx
  4634. rorx r13d,ecx,25
  4635. rorx r15d,ecx,11
  4636. lea r10d,DWORD PTR[r14*1+r10]
  4637. lea r9d,DWORD PTR[r12*1+r9]
  4638. andn r12d,ecx,r8d
  4639. xor r13d,r15d
  4640. rorx r14d,ecx,6
  4641. lea r9d,DWORD PTR[r12*1+r9]
  4642. xor r13d,r14d
  4643. mov r15d,r10d
  4644. rorx r12d,r10d,22
  4645. lea r9d,DWORD PTR[r13*1+r9]
  4646. xor r15d,r11d
  4647. rorx r14d,r10d,13
  4648. rorx r13d,r10d,2
  4649. lea ebx,DWORD PTR[r9*1+rbx]
  4650. and edi,r15d
  4651. xor r14d,r12d
  4652. xor edi,r11d
  4653. xor r14d,r13d
  4654. lea r9d,DWORD PTR[rdi*1+r9]
  4655. mov r12d,ecx
  4656. add r8d,DWORD PTR[12+rsp]
  4657. and r12d,ebx
  4658. rorx r13d,ebx,25
  4659. rorx edi,ebx,11
  4660. lea r9d,DWORD PTR[r14*1+r9]
  4661. lea r8d,DWORD PTR[r12*1+r8]
  4662. andn r12d,ebx,edx
  4663. xor r13d,edi
  4664. rorx r14d,ebx,6
  4665. lea r8d,DWORD PTR[r12*1+r8]
  4666. xor r13d,r14d
  4667. mov edi,r9d
  4668. rorx r12d,r9d,22
  4669. lea r8d,DWORD PTR[r13*1+r8]
  4670. xor edi,r10d
  4671. rorx r14d,r9d,13
  4672. rorx r13d,r9d,2
  4673. lea eax,DWORD PTR[r8*1+rax]
  4674. and r15d,edi
  4675. xor r14d,r12d
  4676. xor r15d,r10d
  4677. xor r14d,r13d
  4678. lea r8d,DWORD PTR[r15*1+r8]
  4679. mov r12d,ebx
  4680. add edx,DWORD PTR[32+rsp]
  4681. and r12d,eax
  4682. rorx r13d,eax,25
  4683. rorx r15d,eax,11
  4684. lea r8d,DWORD PTR[r14*1+r8]
  4685. lea edx,DWORD PTR[r12*1+rdx]
  4686. andn r12d,eax,ecx
  4687. xor r13d,r15d
  4688. rorx r14d,eax,6
  4689. lea edx,DWORD PTR[r12*1+rdx]
  4690. xor r13d,r14d
  4691. mov r15d,r8d
  4692. rorx r12d,r8d,22
  4693. lea edx,DWORD PTR[r13*1+rdx]
  4694. xor r15d,r9d
  4695. rorx r14d,r8d,13
  4696. rorx r13d,r8d,2
  4697. lea r11d,DWORD PTR[rdx*1+r11]
  4698. and edi,r15d
  4699. xor r14d,r12d
  4700. xor edi,r9d
  4701. xor r14d,r13d
  4702. lea edx,DWORD PTR[rdi*1+rdx]
  4703. mov r12d,eax
  4704. add ecx,DWORD PTR[36+rsp]
  4705. and r12d,r11d
  4706. rorx r13d,r11d,25
  4707. rorx edi,r11d,11
  4708. lea edx,DWORD PTR[r14*1+rdx]
  4709. lea ecx,DWORD PTR[r12*1+rcx]
  4710. andn r12d,r11d,ebx
  4711. xor r13d,edi
  4712. rorx r14d,r11d,6
  4713. lea ecx,DWORD PTR[r12*1+rcx]
  4714. xor r13d,r14d
  4715. mov edi,edx
  4716. rorx r12d,edx,22
  4717. lea ecx,DWORD PTR[r13*1+rcx]
  4718. xor edi,r8d
  4719. rorx r14d,edx,13
  4720. rorx r13d,edx,2
  4721. lea r10d,DWORD PTR[rcx*1+r10]
  4722. and r15d,edi
  4723. xor r14d,r12d
  4724. xor r15d,r8d
  4725. xor r14d,r13d
  4726. lea ecx,DWORD PTR[r15*1+rcx]
  4727. mov r12d,r11d
  4728. add ebx,DWORD PTR[40+rsp]
  4729. and r12d,r10d
  4730. rorx r13d,r10d,25
  4731. rorx r15d,r10d,11
  4732. lea ecx,DWORD PTR[r14*1+rcx]
  4733. lea ebx,DWORD PTR[r12*1+rbx]
  4734. andn r12d,r10d,eax
  4735. xor r13d,r15d
  4736. rorx r14d,r10d,6
  4737. lea ebx,DWORD PTR[r12*1+rbx]
  4738. xor r13d,r14d
  4739. mov r15d,ecx
  4740. rorx r12d,ecx,22
  4741. lea ebx,DWORD PTR[r13*1+rbx]
  4742. xor r15d,edx
  4743. rorx r14d,ecx,13
  4744. rorx r13d,ecx,2
  4745. lea r9d,DWORD PTR[rbx*1+r9]
  4746. and edi,r15d
  4747. xor r14d,r12d
  4748. xor edi,edx
  4749. xor r14d,r13d
  4750. lea ebx,DWORD PTR[rdi*1+rbx]
  4751. mov r12d,r10d
  4752. add eax,DWORD PTR[44+rsp]
  4753. and r12d,r9d
  4754. rorx r13d,r9d,25
  4755. rorx edi,r9d,11
  4756. lea ebx,DWORD PTR[r14*1+rbx]
  4757. lea eax,DWORD PTR[r12*1+rax]
  4758. andn r12d,r9d,r11d
  4759. xor r13d,edi
  4760. rorx r14d,r9d,6
  4761. lea eax,DWORD PTR[r12*1+rax]
  4762. xor r13d,r14d
  4763. mov edi,ebx
  4764. rorx r12d,ebx,22
  4765. lea eax,DWORD PTR[r13*1+rax]
  4766. xor edi,ecx
  4767. rorx r14d,ebx,13
  4768. rorx r13d,ebx,2
  4769. lea r8d,DWORD PTR[rax*1+r8]
  4770. and r15d,edi
  4771. xor r14d,r12d
  4772. xor r15d,ecx
  4773. xor r14d,r13d
  4774. lea eax,DWORD PTR[r15*1+rax]
  4775. mov r12d,r9d
  4776. mov rdi,QWORD PTR[512+rsp]
  4777. add eax,r14d
  4778. lea rbp,QWORD PTR[448+rsp]
  4779. add eax,DWORD PTR[rdi]
  4780. add ebx,DWORD PTR[4+rdi]
  4781. add ecx,DWORD PTR[8+rdi]
  4782. add edx,DWORD PTR[12+rdi]
  4783. add r8d,DWORD PTR[16+rdi]
  4784. add r9d,DWORD PTR[20+rdi]
  4785. add r10d,DWORD PTR[24+rdi]
  4786. add r11d,DWORD PTR[28+rdi]
  4787. mov DWORD PTR[rdi],eax
  4788. mov DWORD PTR[4+rdi],ebx
  4789. mov DWORD PTR[8+rdi],ecx
  4790. mov DWORD PTR[12+rdi],edx
  4791. mov DWORD PTR[16+rdi],r8d
  4792. mov DWORD PTR[20+rdi],r9d
  4793. mov DWORD PTR[24+rdi],r10d
  4794. mov DWORD PTR[28+rdi],r11d
  4795. cmp rsi,QWORD PTR[80+rbp]
  4796. je $L$done_avx2
  4797. xor r14d,r14d
  4798. mov edi,ebx
  4799. xor edi,ecx
  4800. mov r12d,r9d
  4801. jmp $L$ower_avx2
  4802. ALIGN 16
  4803. $L$ower_avx2::
  4804. add r11d,DWORD PTR[((0+16))+rbp]
  4805. and r12d,r8d
  4806. rorx r13d,r8d,25
  4807. rorx r15d,r8d,11
  4808. lea eax,DWORD PTR[r14*1+rax]
  4809. lea r11d,DWORD PTR[r12*1+r11]
  4810. andn r12d,r8d,r10d
  4811. xor r13d,r15d
  4812. rorx r14d,r8d,6
  4813. lea r11d,DWORD PTR[r12*1+r11]
  4814. xor r13d,r14d
  4815. mov r15d,eax
  4816. rorx r12d,eax,22
  4817. lea r11d,DWORD PTR[r13*1+r11]
  4818. xor r15d,ebx
  4819. rorx r14d,eax,13
  4820. rorx r13d,eax,2
  4821. lea edx,DWORD PTR[r11*1+rdx]
  4822. and edi,r15d
  4823. xor r14d,r12d
  4824. xor edi,ebx
  4825. xor r14d,r13d
  4826. lea r11d,DWORD PTR[rdi*1+r11]
  4827. mov r12d,r8d
  4828. add r10d,DWORD PTR[((4+16))+rbp]
  4829. and r12d,edx
  4830. rorx r13d,edx,25
  4831. rorx edi,edx,11
  4832. lea r11d,DWORD PTR[r14*1+r11]
  4833. lea r10d,DWORD PTR[r12*1+r10]
  4834. andn r12d,edx,r9d
  4835. xor r13d,edi
  4836. rorx r14d,edx,6
  4837. lea r10d,DWORD PTR[r12*1+r10]
  4838. xor r13d,r14d
  4839. mov edi,r11d
  4840. rorx r12d,r11d,22
  4841. lea r10d,DWORD PTR[r13*1+r10]
  4842. xor edi,eax
  4843. rorx r14d,r11d,13
  4844. rorx r13d,r11d,2
  4845. lea ecx,DWORD PTR[r10*1+rcx]
  4846. and r15d,edi
  4847. xor r14d,r12d
  4848. xor r15d,eax
  4849. xor r14d,r13d
  4850. lea r10d,DWORD PTR[r15*1+r10]
  4851. mov r12d,edx
  4852. add r9d,DWORD PTR[((8+16))+rbp]
  4853. and r12d,ecx
  4854. rorx r13d,ecx,25
  4855. rorx r15d,ecx,11
  4856. lea r10d,DWORD PTR[r14*1+r10]
  4857. lea r9d,DWORD PTR[r12*1+r9]
  4858. andn r12d,ecx,r8d
  4859. xor r13d,r15d
  4860. rorx r14d,ecx,6
  4861. lea r9d,DWORD PTR[r12*1+r9]
  4862. xor r13d,r14d
  4863. mov r15d,r10d
  4864. rorx r12d,r10d,22
  4865. lea r9d,DWORD PTR[r13*1+r9]
  4866. xor r15d,r11d
  4867. rorx r14d,r10d,13
  4868. rorx r13d,r10d,2
  4869. lea ebx,DWORD PTR[r9*1+rbx]
  4870. and edi,r15d
  4871. xor r14d,r12d
  4872. xor edi,r11d
  4873. xor r14d,r13d
  4874. lea r9d,DWORD PTR[rdi*1+r9]
  4875. mov r12d,ecx
  4876. add r8d,DWORD PTR[((12+16))+rbp]
  4877. and r12d,ebx
  4878. rorx r13d,ebx,25
  4879. rorx edi,ebx,11
  4880. lea r9d,DWORD PTR[r14*1+r9]
  4881. lea r8d,DWORD PTR[r12*1+r8]
  4882. andn r12d,ebx,edx
  4883. xor r13d,edi
  4884. rorx r14d,ebx,6
  4885. lea r8d,DWORD PTR[r12*1+r8]
  4886. xor r13d,r14d
  4887. mov edi,r9d
  4888. rorx r12d,r9d,22
  4889. lea r8d,DWORD PTR[r13*1+r8]
  4890. xor edi,r10d
  4891. rorx r14d,r9d,13
  4892. rorx r13d,r9d,2
  4893. lea eax,DWORD PTR[r8*1+rax]
  4894. and r15d,edi
  4895. xor r14d,r12d
  4896. xor r15d,r10d
  4897. xor r14d,r13d
  4898. lea r8d,DWORD PTR[r15*1+r8]
  4899. mov r12d,ebx
  4900. add edx,DWORD PTR[((32+16))+rbp]
  4901. and r12d,eax
  4902. rorx r13d,eax,25
  4903. rorx r15d,eax,11
  4904. lea r8d,DWORD PTR[r14*1+r8]
  4905. lea edx,DWORD PTR[r12*1+rdx]
  4906. andn r12d,eax,ecx
  4907. xor r13d,r15d
  4908. rorx r14d,eax,6
  4909. lea edx,DWORD PTR[r12*1+rdx]
  4910. xor r13d,r14d
  4911. mov r15d,r8d
  4912. rorx r12d,r8d,22
  4913. lea edx,DWORD PTR[r13*1+rdx]
  4914. xor r15d,r9d
  4915. rorx r14d,r8d,13
  4916. rorx r13d,r8d,2
  4917. lea r11d,DWORD PTR[rdx*1+r11]
  4918. and edi,r15d
  4919. xor r14d,r12d
  4920. xor edi,r9d
  4921. xor r14d,r13d
  4922. lea edx,DWORD PTR[rdi*1+rdx]
  4923. mov r12d,eax
  4924. add ecx,DWORD PTR[((36+16))+rbp]
  4925. and r12d,r11d
  4926. rorx r13d,r11d,25
  4927. rorx edi,r11d,11
  4928. lea edx,DWORD PTR[r14*1+rdx]
  4929. lea ecx,DWORD PTR[r12*1+rcx]
  4930. andn r12d,r11d,ebx
  4931. xor r13d,edi
  4932. rorx r14d,r11d,6
  4933. lea ecx,DWORD PTR[r12*1+rcx]
  4934. xor r13d,r14d
  4935. mov edi,edx
  4936. rorx r12d,edx,22
  4937. lea ecx,DWORD PTR[r13*1+rcx]
  4938. xor edi,r8d
  4939. rorx r14d,edx,13
  4940. rorx r13d,edx,2
  4941. lea r10d,DWORD PTR[rcx*1+r10]
  4942. and r15d,edi
  4943. xor r14d,r12d
  4944. xor r15d,r8d
  4945. xor r14d,r13d
  4946. lea ecx,DWORD PTR[r15*1+rcx]
  4947. mov r12d,r11d
  4948. add ebx,DWORD PTR[((40+16))+rbp]
  4949. and r12d,r10d
  4950. rorx r13d,r10d,25
  4951. rorx r15d,r10d,11
  4952. lea ecx,DWORD PTR[r14*1+rcx]
  4953. lea ebx,DWORD PTR[r12*1+rbx]
  4954. andn r12d,r10d,eax
  4955. xor r13d,r15d
  4956. rorx r14d,r10d,6
  4957. lea ebx,DWORD PTR[r12*1+rbx]
  4958. xor r13d,r14d
  4959. mov r15d,ecx
  4960. rorx r12d,ecx,22
  4961. lea ebx,DWORD PTR[r13*1+rbx]
  4962. xor r15d,edx
  4963. rorx r14d,ecx,13
  4964. rorx r13d,ecx,2
  4965. lea r9d,DWORD PTR[rbx*1+r9]
  4966. and edi,r15d
  4967. xor r14d,r12d
  4968. xor edi,edx
  4969. xor r14d,r13d
  4970. lea ebx,DWORD PTR[rdi*1+rbx]
  4971. mov r12d,r10d
  4972. add eax,DWORD PTR[((44+16))+rbp]
  4973. and r12d,r9d
  4974. rorx r13d,r9d,25
  4975. rorx edi,r9d,11
  4976. lea ebx,DWORD PTR[r14*1+rbx]
  4977. lea eax,DWORD PTR[r12*1+rax]
  4978. andn r12d,r9d,r11d
  4979. xor r13d,edi
  4980. rorx r14d,r9d,6
  4981. lea eax,DWORD PTR[r12*1+rax]
  4982. xor r13d,r14d
  4983. mov edi,ebx
  4984. rorx r12d,ebx,22
  4985. lea eax,DWORD PTR[r13*1+rax]
  4986. xor edi,ecx
  4987. rorx r14d,ebx,13
  4988. rorx r13d,ebx,2
  4989. lea r8d,DWORD PTR[rax*1+r8]
  4990. and r15d,edi
  4991. xor r14d,r12d
  4992. xor r15d,ecx
  4993. xor r14d,r13d
  4994. lea eax,DWORD PTR[r15*1+rax]
  4995. mov r12d,r9d
  4996. lea rbp,QWORD PTR[((-64))+rbp]
  4997. cmp rbp,rsp
  4998. jae $L$ower_avx2
  4999. mov rdi,QWORD PTR[512+rsp]
  5000. add eax,r14d
  5001. lea rsp,QWORD PTR[448+rsp]
  5002. add eax,DWORD PTR[rdi]
  5003. add ebx,DWORD PTR[4+rdi]
  5004. add ecx,DWORD PTR[8+rdi]
  5005. add edx,DWORD PTR[12+rdi]
  5006. add r8d,DWORD PTR[16+rdi]
  5007. add r9d,DWORD PTR[20+rdi]
  5008. lea rsi,QWORD PTR[128+rsi]
  5009. add r10d,DWORD PTR[24+rdi]
  5010. mov r12,rsi
  5011. add r11d,DWORD PTR[28+rdi]
  5012. cmp rsi,QWORD PTR[((64+16))+rsp]
  5013. mov DWORD PTR[rdi],eax
  5014. cmove r12,rsp
  5015. mov DWORD PTR[4+rdi],ebx
  5016. mov DWORD PTR[8+rdi],ecx
  5017. mov DWORD PTR[12+rdi],edx
  5018. mov DWORD PTR[16+rdi],r8d
  5019. mov DWORD PTR[20+rdi],r9d
  5020. mov DWORD PTR[24+rdi],r10d
  5021. mov DWORD PTR[28+rdi],r11d
  5022. jbe $L$oop_avx2
  5023. lea rbp,QWORD PTR[rsp]
  5024. $L$done_avx2::
  5025. mov rsi,QWORD PTR[88+rbp]
  5026. vzeroupper
  5027. movaps xmm6,XMMWORD PTR[((64+32))+rbp]
  5028. movaps xmm7,XMMWORD PTR[((64+48))+rbp]
  5029. movaps xmm8,XMMWORD PTR[((64+64))+rbp]
  5030. movaps xmm9,XMMWORD PTR[((64+80))+rbp]
  5031. mov r15,QWORD PTR[((-48))+rsi]
  5032. mov r14,QWORD PTR[((-40))+rsi]
  5033. mov r13,QWORD PTR[((-32))+rsi]
  5034. mov r12,QWORD PTR[((-24))+rsi]
  5035. mov rbp,QWORD PTR[((-16))+rsi]
  5036. mov rbx,QWORD PTR[((-8))+rsi]
  5037. lea rsp,QWORD PTR[rsi]
  5038. $L$epilogue_avx2::
  5039. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  5040. mov rsi,QWORD PTR[16+rsp]
  5041. DB 0F3h,0C3h ;repret
  5042. $L$SEH_end_sha256_block_data_order_avx2::
  5043. sha256_block_data_order_avx2 ENDP
  5044. EXTERN __imp_RtlVirtualUnwind:NEAR
  5045. ALIGN 16
  5046. se_handler PROC PRIVATE
  5047. push rsi
  5048. push rdi
  5049. push rbx
  5050. push rbp
  5051. push r12
  5052. push r13
  5053. push r14
  5054. push r15
  5055. pushfq
  5056. sub rsp,64
  5057. mov rax,QWORD PTR[120+r8]
  5058. mov rbx,QWORD PTR[248+r8]
  5059. mov rsi,QWORD PTR[8+r9]
  5060. mov r11,QWORD PTR[56+r9]
  5061. mov r10d,DWORD PTR[r11]
  5062. lea r10,QWORD PTR[r10*1+rsi]
  5063. cmp rbx,r10
  5064. jb $L$in_prologue
  5065. mov rax,QWORD PTR[152+r8]
  5066. mov r10d,DWORD PTR[4+r11]
  5067. lea r10,QWORD PTR[r10*1+rsi]
  5068. cmp rbx,r10
  5069. jae $L$in_prologue
  5070. lea r10,QWORD PTR[$L$avx2_shortcut]
  5071. cmp rbx,r10
  5072. jb $L$not_in_avx2
  5073. and rax,-256*4
  5074. add rax,448
  5075. $L$not_in_avx2::
  5076. mov rsi,rax
  5077. mov rax,QWORD PTR[((64+24))+rax]
  5078. mov rbx,QWORD PTR[((-8))+rax]
  5079. mov rbp,QWORD PTR[((-16))+rax]
  5080. mov r12,QWORD PTR[((-24))+rax]
  5081. mov r13,QWORD PTR[((-32))+rax]
  5082. mov r14,QWORD PTR[((-40))+rax]
  5083. mov r15,QWORD PTR[((-48))+rax]
  5084. mov QWORD PTR[144+r8],rbx
  5085. mov QWORD PTR[160+r8],rbp
  5086. mov QWORD PTR[216+r8],r12
  5087. mov QWORD PTR[224+r8],r13
  5088. mov QWORD PTR[232+r8],r14
  5089. mov QWORD PTR[240+r8],r15
  5090. lea r10,QWORD PTR[$L$epilogue]
  5091. cmp rbx,r10
  5092. jb $L$in_prologue
  5093. lea rsi,QWORD PTR[((64+32))+rsi]
  5094. lea rdi,QWORD PTR[512+r8]
  5095. mov ecx,8
  5096. DD 0a548f3fch
  5097. $L$in_prologue::
  5098. mov rdi,QWORD PTR[8+rax]
  5099. mov rsi,QWORD PTR[16+rax]
  5100. mov QWORD PTR[152+r8],rax
  5101. mov QWORD PTR[168+r8],rsi
  5102. mov QWORD PTR[176+r8],rdi
  5103. mov rdi,QWORD PTR[40+r9]
  5104. mov rsi,r8
  5105. mov ecx,154
  5106. DD 0a548f3fch
  5107. mov rsi,r9
  5108. xor rcx,rcx
  5109. mov rdx,QWORD PTR[8+rsi]
  5110. mov r8,QWORD PTR[rsi]
  5111. mov r9,QWORD PTR[16+rsi]
  5112. mov r10,QWORD PTR[40+rsi]
  5113. lea r11,QWORD PTR[56+rsi]
  5114. lea r12,QWORD PTR[24+rsi]
  5115. mov QWORD PTR[32+rsp],r10
  5116. mov QWORD PTR[40+rsp],r11
  5117. mov QWORD PTR[48+rsp],r12
  5118. mov QWORD PTR[56+rsp],rcx
  5119. call QWORD PTR[__imp_RtlVirtualUnwind]
  5120. mov eax,1
  5121. add rsp,64
  5122. popfq
  5123. pop r15
  5124. pop r14
  5125. pop r13
  5126. pop r12
  5127. pop rbp
  5128. pop rbx
  5129. pop rdi
  5130. pop rsi
  5131. DB 0F3h,0C3h ;repret
  5132. se_handler ENDP
  5133. ALIGN 16
  5134. shaext_handler PROC PRIVATE
  5135. push rsi
  5136. push rdi
  5137. push rbx
  5138. push rbp
  5139. push r12
  5140. push r13
  5141. push r14
  5142. push r15
  5143. pushfq
  5144. sub rsp,64
  5145. mov rax,QWORD PTR[120+r8]
  5146. mov rbx,QWORD PTR[248+r8]
  5147. lea r10,QWORD PTR[$L$prologue_shaext]
  5148. cmp rbx,r10
  5149. jb $L$in_prologue
  5150. lea r10,QWORD PTR[$L$epilogue_shaext]
  5151. cmp rbx,r10
  5152. jae $L$in_prologue
  5153. lea rsi,QWORD PTR[((-8-80))+rax]
  5154. lea rdi,QWORD PTR[512+r8]
  5155. mov ecx,10
  5156. DD 0a548f3fch
  5157. jmp $L$in_prologue
  5158. shaext_handler ENDP
  5159. .text$ ENDS
  5160. .pdata SEGMENT READONLY ALIGN(4)
  5161. ALIGN 4
  5162. DD imagerel $L$SEH_begin_sha256_block_data_order
  5163. DD imagerel $L$SEH_end_sha256_block_data_order
  5164. DD imagerel $L$SEH_info_sha256_block_data_order
  5165. DD imagerel $L$SEH_begin_sha256_block_data_order_shaext
  5166. DD imagerel $L$SEH_end_sha256_block_data_order_shaext
  5167. DD imagerel $L$SEH_info_sha256_block_data_order_shaext
  5168. DD imagerel $L$SEH_begin_sha256_block_data_order_ssse3
  5169. DD imagerel $L$SEH_end_sha256_block_data_order_ssse3
  5170. DD imagerel $L$SEH_info_sha256_block_data_order_ssse3
  5171. DD imagerel $L$SEH_begin_sha256_block_data_order_avx
  5172. DD imagerel $L$SEH_end_sha256_block_data_order_avx
  5173. DD imagerel $L$SEH_info_sha256_block_data_order_avx
  5174. DD imagerel $L$SEH_begin_sha256_block_data_order_avx2
  5175. DD imagerel $L$SEH_end_sha256_block_data_order_avx2
  5176. DD imagerel $L$SEH_info_sha256_block_data_order_avx2
  5177. .pdata ENDS
  5178. .xdata SEGMENT READONLY ALIGN(8)
  5179. ALIGN 8
  5180. $L$SEH_info_sha256_block_data_order::
  5181. DB 9,0,0,0
  5182. DD imagerel se_handler
  5183. DD imagerel $L$prologue,imagerel $L$epilogue
  5184. $L$SEH_info_sha256_block_data_order_shaext::
  5185. DB 9,0,0,0
  5186. DD imagerel shaext_handler
  5187. $L$SEH_info_sha256_block_data_order_ssse3::
  5188. DB 9,0,0,0
  5189. DD imagerel se_handler
  5190. DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3
  5191. $L$SEH_info_sha256_block_data_order_avx::
  5192. DB 9,0,0,0
  5193. DD imagerel se_handler
  5194. DD imagerel $L$prologue_avx,imagerel $L$epilogue_avx
  5195. $L$SEH_info_sha256_block_data_order_avx2::
  5196. DB 9,0,0,0
  5197. DD imagerel se_handler
  5198. DD imagerel $L$prologue_avx2,imagerel $L$epilogue_avx2
  5199. .xdata ENDS
  5200. END