aesni-sha256-x86_64.masm 96 KB


  1. OPTION DOTNAME
  2. .text$ SEGMENT ALIGN(256) 'CODE'
  3. EXTERN OPENSSL_ia32cap_P:NEAR
  4. PUBLIC aesni_cbc_sha256_enc
  5. ALIGN 16
  6. aesni_cbc_sha256_enc PROC PUBLIC
  7. lea r11,QWORD PTR[OPENSSL_ia32cap_P]
  8. mov eax,1
  9. cmp rcx,0
  10. je $L$probe
  11. mov eax,DWORD PTR[r11]
  12. mov r10,QWORD PTR[4+r11]
  13. bt r10,61
  14. jc aesni_cbc_sha256_enc_shaext
  15. mov r11,r10
  16. shr r11,32
  17. test r10d,2048
  18. jnz aesni_cbc_sha256_enc_xop
  19. and r11d,296
  20. cmp r11d,296
  21. je aesni_cbc_sha256_enc_avx2
  22. and r10d,268435456
  23. jnz aesni_cbc_sha256_enc_avx
  24. ud2
  25. xor eax,eax
  26. cmp rcx,0
  27. je $L$probe
  28. ud2
  29. $L$probe::
  30. DB 0F3h,0C3h ;repret
  31. aesni_cbc_sha256_enc ENDP
  32. ALIGN 64
  33. K256::
  34. DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h
  35. DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h
  36. DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h
  37. DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h
  38. DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h
  39. DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h
  40. DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h
  41. DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h
  42. DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch
  43. DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch
  44. DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah
  45. DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah
  46. DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h
  47. DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h
  48. DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h
  49. DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h
  50. DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h
  51. DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h
  52. DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h
  53. DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h
  54. DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h
  55. DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h
  56. DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h
  57. DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h
  58. DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h
  59. DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h
  60. DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h
  61. DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h
  62. DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h
  63. DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h
  64. DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h
  65. DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h
  66. DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
  67. DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
  68. DD 0,0,0,0,0,0,0,0,-1,-1,-1,-1
  69. DD 0,0,0,0,0,0,0,0
  70. DB 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54
  71. DB 32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95
  72. DB 54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98
  73. DB 121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108
  74. DB 46,111,114,103,62,0
  75. ALIGN 64
  76. ALIGN 64
  77. aesni_cbc_sha256_enc_xop PROC PRIVATE
  78. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  79. mov QWORD PTR[16+rsp],rsi
  80. mov rax,rsp
  81. $L$SEH_begin_aesni_cbc_sha256_enc_xop::
  82. mov rdi,rcx
  83. mov rsi,rdx
  84. mov rdx,r8
  85. mov rcx,r9
  86. mov r8,QWORD PTR[40+rsp]
  87. mov r9,QWORD PTR[48+rsp]
  88. $L$xop_shortcut::
  89. mov r10,QWORD PTR[56+rsp]
  90. mov rax,rsp
  91. push rbx
  92. push rbp
  93. push r12
  94. push r13
  95. push r14
  96. push r15
  97. sub rsp,288
  98. and rsp,-64
  99. shl rdx,6
  100. sub rsi,rdi
  101. sub r10,rdi
  102. add rdx,rdi
  103. mov QWORD PTR[((64+8))+rsp],rsi
  104. mov QWORD PTR[((64+16))+rsp],rdx
  105. mov QWORD PTR[((64+32))+rsp],r8
  106. mov QWORD PTR[((64+40))+rsp],r9
  107. mov QWORD PTR[((64+48))+rsp],r10
  108. mov QWORD PTR[120+rsp],rax
  109. movaps XMMWORD PTR[128+rsp],xmm6
  110. movaps XMMWORD PTR[144+rsp],xmm7
  111. movaps XMMWORD PTR[160+rsp],xmm8
  112. movaps XMMWORD PTR[176+rsp],xmm9
  113. movaps XMMWORD PTR[192+rsp],xmm10
  114. movaps XMMWORD PTR[208+rsp],xmm11
  115. movaps XMMWORD PTR[224+rsp],xmm12
  116. movaps XMMWORD PTR[240+rsp],xmm13
  117. movaps XMMWORD PTR[256+rsp],xmm14
  118. movaps XMMWORD PTR[272+rsp],xmm15
  119. $L$prologue_xop::
  120. vzeroall
  121. mov r12,rdi
  122. lea rdi,QWORD PTR[128+rcx]
  123. lea r13,QWORD PTR[((K256+544))]
  124. mov r14d,DWORD PTR[((240-128))+rdi]
  125. mov r15,r9
  126. mov rsi,r10
  127. vmovdqu xmm8,XMMWORD PTR[r8]
  128. sub r14,9
  129. mov eax,DWORD PTR[r15]
  130. mov ebx,DWORD PTR[4+r15]
  131. mov ecx,DWORD PTR[8+r15]
  132. mov edx,DWORD PTR[12+r15]
  133. mov r8d,DWORD PTR[16+r15]
  134. mov r9d,DWORD PTR[20+r15]
  135. mov r10d,DWORD PTR[24+r15]
  136. mov r11d,DWORD PTR[28+r15]
  137. vmovdqa xmm14,XMMWORD PTR[r14*8+r13]
  138. vmovdqa xmm13,XMMWORD PTR[16+r14*8+r13]
  139. vmovdqa xmm12,XMMWORD PTR[32+r14*8+r13]
  140. vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi]
  141. jmp $L$loop_xop
  142. ALIGN 16
  143. $L$loop_xop::
  144. vmovdqa xmm7,XMMWORD PTR[((K256+512))]
  145. vmovdqu xmm0,XMMWORD PTR[r12*1+rsi]
  146. vmovdqu xmm1,XMMWORD PTR[16+r12*1+rsi]
  147. vmovdqu xmm2,XMMWORD PTR[32+r12*1+rsi]
  148. vmovdqu xmm3,XMMWORD PTR[48+r12*1+rsi]
  149. vpshufb xmm0,xmm0,xmm7
  150. lea rbp,QWORD PTR[K256]
  151. vpshufb xmm1,xmm1,xmm7
  152. vpshufb xmm2,xmm2,xmm7
  153. vpaddd xmm4,xmm0,XMMWORD PTR[rbp]
  154. vpshufb xmm3,xmm3,xmm7
  155. vpaddd xmm5,xmm1,XMMWORD PTR[32+rbp]
  156. vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp]
  157. vpaddd xmm7,xmm3,XMMWORD PTR[96+rbp]
  158. vmovdqa XMMWORD PTR[rsp],xmm4
  159. mov r14d,eax
  160. vmovdqa XMMWORD PTR[16+rsp],xmm5
  161. mov esi,ebx
  162. vmovdqa XMMWORD PTR[32+rsp],xmm6
  163. xor esi,ecx
  164. vmovdqa XMMWORD PTR[48+rsp],xmm7
  165. mov r13d,r8d
  166. jmp $L$xop_00_47
  167. ALIGN 16
  168. $L$xop_00_47::
  169. sub rbp,-16*2*4
  170. vmovdqu xmm9,XMMWORD PTR[r12]
  171. mov QWORD PTR[((64+0))+rsp],r12
  172. vpalignr xmm4,xmm1,xmm0,4
  173. ror r13d,14
  174. mov eax,r14d
  175. vpalignr xmm7,xmm3,xmm2,4
  176. mov r12d,r9d
  177. xor r13d,r8d
  178. DB 143,232,120,194,236,14
  179. ror r14d,9
  180. xor r12d,r10d
  181. vpsrld xmm4,xmm4,3
  182. ror r13d,5
  183. xor r14d,eax
  184. vpaddd xmm0,xmm0,xmm7
  185. and r12d,r8d
  186. vpxor xmm9,xmm9,xmm10
  187. vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi]
  188. xor r13d,r8d
  189. add r11d,DWORD PTR[rsp]
  190. mov r15d,eax
  191. DB 143,232,120,194,245,11
  192. ror r14d,11
  193. xor r12d,r10d
  194. vpxor xmm4,xmm4,xmm5
  195. xor r15d,ebx
  196. ror r13d,6
  197. add r11d,r12d
  198. and esi,r15d
  199. DB 143,232,120,194,251,13
  200. xor r14d,eax
  201. add r11d,r13d
  202. vpxor xmm4,xmm4,xmm6
  203. xor esi,ebx
  204. add edx,r11d
  205. vpsrld xmm6,xmm3,10
  206. ror r14d,2
  207. add r11d,esi
  208. vpaddd xmm0,xmm0,xmm4
  209. mov r13d,edx
  210. add r14d,r11d
  211. DB 143,232,120,194,239,2
  212. ror r13d,14
  213. mov r11d,r14d
  214. vpxor xmm7,xmm7,xmm6
  215. mov r12d,r8d
  216. xor r13d,edx
  217. ror r14d,9
  218. xor r12d,r9d
  219. vpxor xmm7,xmm7,xmm5
  220. ror r13d,5
  221. xor r14d,r11d
  222. and r12d,edx
  223. vpxor xmm9,xmm9,xmm8
  224. xor r13d,edx
  225. vpsrldq xmm7,xmm7,8
  226. add r10d,DWORD PTR[4+rsp]
  227. mov esi,r11d
  228. ror r14d,11
  229. xor r12d,r9d
  230. vpaddd xmm0,xmm0,xmm7
  231. xor esi,eax
  232. ror r13d,6
  233. add r10d,r12d
  234. and r15d,esi
  235. DB 143,232,120,194,248,13
  236. xor r14d,r11d
  237. add r10d,r13d
  238. vpsrld xmm6,xmm0,10
  239. xor r15d,eax
  240. add ecx,r10d
  241. DB 143,232,120,194,239,2
  242. ror r14d,2
  243. add r10d,r15d
  244. vpxor xmm7,xmm7,xmm6
  245. mov r13d,ecx
  246. add r14d,r10d
  247. ror r13d,14
  248. mov r10d,r14d
  249. vpxor xmm7,xmm7,xmm5
  250. mov r12d,edx
  251. xor r13d,ecx
  252. ror r14d,9
  253. xor r12d,r8d
  254. vpslldq xmm7,xmm7,8
  255. ror r13d,5
  256. xor r14d,r10d
  257. and r12d,ecx
  258. vaesenc xmm9,xmm9,xmm10
  259. vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi]
  260. xor r13d,ecx
  261. vpaddd xmm0,xmm0,xmm7
  262. add r9d,DWORD PTR[8+rsp]
  263. mov r15d,r10d
  264. ror r14d,11
  265. xor r12d,r8d
  266. vpaddd xmm6,xmm0,XMMWORD PTR[rbp]
  267. xor r15d,r11d
  268. ror r13d,6
  269. add r9d,r12d
  270. and esi,r15d
  271. xor r14d,r10d
  272. add r9d,r13d
  273. xor esi,r11d
  274. add ebx,r9d
  275. ror r14d,2
  276. add r9d,esi
  277. mov r13d,ebx
  278. add r14d,r9d
  279. ror r13d,14
  280. mov r9d,r14d
  281. mov r12d,ecx
  282. xor r13d,ebx
  283. ror r14d,9
  284. xor r12d,edx
  285. ror r13d,5
  286. xor r14d,r9d
  287. and r12d,ebx
  288. vaesenc xmm9,xmm9,xmm10
  289. vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi]
  290. xor r13d,ebx
  291. add r8d,DWORD PTR[12+rsp]
  292. mov esi,r9d
  293. ror r14d,11
  294. xor r12d,edx
  295. xor esi,r10d
  296. ror r13d,6
  297. add r8d,r12d
  298. and r15d,esi
  299. xor r14d,r9d
  300. add r8d,r13d
  301. xor r15d,r10d
  302. add eax,r8d
  303. ror r14d,2
  304. add r8d,r15d
  305. mov r13d,eax
  306. add r14d,r8d
  307. vmovdqa XMMWORD PTR[rsp],xmm6
  308. vpalignr xmm4,xmm2,xmm1,4
  309. ror r13d,14
  310. mov r8d,r14d
  311. vpalignr xmm7,xmm0,xmm3,4
  312. mov r12d,ebx
  313. xor r13d,eax
  314. DB 143,232,120,194,236,14
  315. ror r14d,9
  316. xor r12d,ecx
  317. vpsrld xmm4,xmm4,3
  318. ror r13d,5
  319. xor r14d,r8d
  320. vpaddd xmm1,xmm1,xmm7
  321. and r12d,eax
  322. vaesenc xmm9,xmm9,xmm10
  323. vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi]
  324. xor r13d,eax
  325. add edx,DWORD PTR[16+rsp]
  326. mov r15d,r8d
  327. DB 143,232,120,194,245,11
  328. ror r14d,11
  329. xor r12d,ecx
  330. vpxor xmm4,xmm4,xmm5
  331. xor r15d,r9d
  332. ror r13d,6
  333. add edx,r12d
  334. and esi,r15d
  335. DB 143,232,120,194,248,13
  336. xor r14d,r8d
  337. add edx,r13d
  338. vpxor xmm4,xmm4,xmm6
  339. xor esi,r9d
  340. add r11d,edx
  341. vpsrld xmm6,xmm0,10
  342. ror r14d,2
  343. add edx,esi
  344. vpaddd xmm1,xmm1,xmm4
  345. mov r13d,r11d
  346. add r14d,edx
  347. DB 143,232,120,194,239,2
  348. ror r13d,14
  349. mov edx,r14d
  350. vpxor xmm7,xmm7,xmm6
  351. mov r12d,eax
  352. xor r13d,r11d
  353. ror r14d,9
  354. xor r12d,ebx
  355. vpxor xmm7,xmm7,xmm5
  356. ror r13d,5
  357. xor r14d,edx
  358. and r12d,r11d
  359. vaesenc xmm9,xmm9,xmm10
  360. vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi]
  361. xor r13d,r11d
  362. vpsrldq xmm7,xmm7,8
  363. add ecx,DWORD PTR[20+rsp]
  364. mov esi,edx
  365. ror r14d,11
  366. xor r12d,ebx
  367. vpaddd xmm1,xmm1,xmm7
  368. xor esi,r8d
  369. ror r13d,6
  370. add ecx,r12d
  371. and r15d,esi
  372. DB 143,232,120,194,249,13
  373. xor r14d,edx
  374. add ecx,r13d
  375. vpsrld xmm6,xmm1,10
  376. xor r15d,r8d
  377. add r10d,ecx
  378. DB 143,232,120,194,239,2
  379. ror r14d,2
  380. add ecx,r15d
  381. vpxor xmm7,xmm7,xmm6
  382. mov r13d,r10d
  383. add r14d,ecx
  384. ror r13d,14
  385. mov ecx,r14d
  386. vpxor xmm7,xmm7,xmm5
  387. mov r12d,r11d
  388. xor r13d,r10d
  389. ror r14d,9
  390. xor r12d,eax
  391. vpslldq xmm7,xmm7,8
  392. ror r13d,5
  393. xor r14d,ecx
  394. and r12d,r10d
  395. vaesenc xmm9,xmm9,xmm10
  396. vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi]
  397. xor r13d,r10d
  398. vpaddd xmm1,xmm1,xmm7
  399. add ebx,DWORD PTR[24+rsp]
  400. mov r15d,ecx
  401. ror r14d,11
  402. xor r12d,eax
  403. vpaddd xmm6,xmm1,XMMWORD PTR[32+rbp]
  404. xor r15d,edx
  405. ror r13d,6
  406. add ebx,r12d
  407. and esi,r15d
  408. xor r14d,ecx
  409. add ebx,r13d
  410. xor esi,edx
  411. add r9d,ebx
  412. ror r14d,2
  413. add ebx,esi
  414. mov r13d,r9d
  415. add r14d,ebx
  416. ror r13d,14
  417. mov ebx,r14d
  418. mov r12d,r10d
  419. xor r13d,r9d
  420. ror r14d,9
  421. xor r12d,r11d
  422. ror r13d,5
  423. xor r14d,ebx
  424. and r12d,r9d
  425. vaesenc xmm9,xmm9,xmm10
  426. vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi]
  427. xor r13d,r9d
  428. add eax,DWORD PTR[28+rsp]
  429. mov esi,ebx
  430. ror r14d,11
  431. xor r12d,r11d
  432. xor esi,ecx
  433. ror r13d,6
  434. add eax,r12d
  435. and r15d,esi
  436. xor r14d,ebx
  437. add eax,r13d
  438. xor r15d,ecx
  439. add r8d,eax
  440. ror r14d,2
  441. add eax,r15d
  442. mov r13d,r8d
  443. add r14d,eax
  444. vmovdqa XMMWORD PTR[16+rsp],xmm6
  445. vpalignr xmm4,xmm3,xmm2,4
  446. ror r13d,14
  447. mov eax,r14d
  448. vpalignr xmm7,xmm1,xmm0,4
  449. mov r12d,r9d
  450. xor r13d,r8d
  451. DB 143,232,120,194,236,14
  452. ror r14d,9
  453. xor r12d,r10d
  454. vpsrld xmm4,xmm4,3
  455. ror r13d,5
  456. xor r14d,eax
  457. vpaddd xmm2,xmm2,xmm7
  458. and r12d,r8d
  459. vaesenc xmm9,xmm9,xmm10
  460. vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi]
  461. xor r13d,r8d
  462. add r11d,DWORD PTR[32+rsp]
  463. mov r15d,eax
  464. DB 143,232,120,194,245,11
  465. ror r14d,11
  466. xor r12d,r10d
  467. vpxor xmm4,xmm4,xmm5
  468. xor r15d,ebx
  469. ror r13d,6
  470. add r11d,r12d
  471. and esi,r15d
  472. DB 143,232,120,194,249,13
  473. xor r14d,eax
  474. add r11d,r13d
  475. vpxor xmm4,xmm4,xmm6
  476. xor esi,ebx
  477. add edx,r11d
  478. vpsrld xmm6,xmm1,10
  479. ror r14d,2
  480. add r11d,esi
  481. vpaddd xmm2,xmm2,xmm4
  482. mov r13d,edx
  483. add r14d,r11d
  484. DB 143,232,120,194,239,2
  485. ror r13d,14
  486. mov r11d,r14d
  487. vpxor xmm7,xmm7,xmm6
  488. mov r12d,r8d
  489. xor r13d,edx
  490. ror r14d,9
  491. xor r12d,r9d
  492. vpxor xmm7,xmm7,xmm5
  493. ror r13d,5
  494. xor r14d,r11d
  495. and r12d,edx
  496. vaesenc xmm9,xmm9,xmm10
  497. vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi]
  498. xor r13d,edx
  499. vpsrldq xmm7,xmm7,8
  500. add r10d,DWORD PTR[36+rsp]
  501. mov esi,r11d
  502. ror r14d,11
  503. xor r12d,r9d
  504. vpaddd xmm2,xmm2,xmm7
  505. xor esi,eax
  506. ror r13d,6
  507. add r10d,r12d
  508. and r15d,esi
  509. DB 143,232,120,194,250,13
  510. xor r14d,r11d
  511. add r10d,r13d
  512. vpsrld xmm6,xmm2,10
  513. xor r15d,eax
  514. add ecx,r10d
  515. DB 143,232,120,194,239,2
  516. ror r14d,2
  517. add r10d,r15d
  518. vpxor xmm7,xmm7,xmm6
  519. mov r13d,ecx
  520. add r14d,r10d
  521. ror r13d,14
  522. mov r10d,r14d
  523. vpxor xmm7,xmm7,xmm5
  524. mov r12d,edx
  525. xor r13d,ecx
  526. ror r14d,9
  527. xor r12d,r8d
  528. vpslldq xmm7,xmm7,8
  529. ror r13d,5
  530. xor r14d,r10d
  531. and r12d,ecx
  532. vaesenc xmm9,xmm9,xmm10
  533. vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi]
  534. xor r13d,ecx
  535. vpaddd xmm2,xmm2,xmm7
  536. add r9d,DWORD PTR[40+rsp]
  537. mov r15d,r10d
  538. ror r14d,11
  539. xor r12d,r8d
  540. vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp]
  541. xor r15d,r11d
  542. ror r13d,6
  543. add r9d,r12d
  544. and esi,r15d
  545. xor r14d,r10d
  546. add r9d,r13d
  547. xor esi,r11d
  548. add ebx,r9d
  549. ror r14d,2
  550. add r9d,esi
  551. mov r13d,ebx
  552. add r14d,r9d
  553. ror r13d,14
  554. mov r9d,r14d
  555. mov r12d,ecx
  556. xor r13d,ebx
  557. ror r14d,9
  558. xor r12d,edx
  559. ror r13d,5
  560. xor r14d,r9d
  561. and r12d,ebx
  562. vaesenclast xmm11,xmm9,xmm10
  563. vaesenc xmm9,xmm9,xmm10
  564. vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi]
  565. xor r13d,ebx
  566. add r8d,DWORD PTR[44+rsp]
  567. mov esi,r9d
  568. ror r14d,11
  569. xor r12d,edx
  570. xor esi,r10d
  571. ror r13d,6
  572. add r8d,r12d
  573. and r15d,esi
  574. xor r14d,r9d
  575. add r8d,r13d
  576. xor r15d,r10d
  577. add eax,r8d
  578. ror r14d,2
  579. add r8d,r15d
  580. mov r13d,eax
  581. add r14d,r8d
  582. vmovdqa XMMWORD PTR[32+rsp],xmm6
  583. vpalignr xmm4,xmm0,xmm3,4
  584. ror r13d,14
  585. mov r8d,r14d
  586. vpalignr xmm7,xmm2,xmm1,4
  587. mov r12d,ebx
  588. xor r13d,eax
  589. DB 143,232,120,194,236,14
  590. ror r14d,9
  591. xor r12d,ecx
  592. vpsrld xmm4,xmm4,3
  593. ror r13d,5
  594. xor r14d,r8d
  595. vpaddd xmm3,xmm3,xmm7
  596. and r12d,eax
  597. vpand xmm8,xmm11,xmm12
  598. vaesenc xmm9,xmm9,xmm10
  599. vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi]
  600. xor r13d,eax
  601. add edx,DWORD PTR[48+rsp]
  602. mov r15d,r8d
  603. DB 143,232,120,194,245,11
  604. ror r14d,11
  605. xor r12d,ecx
  606. vpxor xmm4,xmm4,xmm5
  607. xor r15d,r9d
  608. ror r13d,6
  609. add edx,r12d
  610. and esi,r15d
  611. DB 143,232,120,194,250,13
  612. xor r14d,r8d
  613. add edx,r13d
  614. vpxor xmm4,xmm4,xmm6
  615. xor esi,r9d
  616. add r11d,edx
  617. vpsrld xmm6,xmm2,10
  618. ror r14d,2
  619. add edx,esi
  620. vpaddd xmm3,xmm3,xmm4
  621. mov r13d,r11d
  622. add r14d,edx
  623. DB 143,232,120,194,239,2
  624. ror r13d,14
  625. mov edx,r14d
  626. vpxor xmm7,xmm7,xmm6
  627. mov r12d,eax
  628. xor r13d,r11d
  629. ror r14d,9
  630. xor r12d,ebx
  631. vpxor xmm7,xmm7,xmm5
  632. ror r13d,5
  633. xor r14d,edx
  634. and r12d,r11d
  635. vaesenclast xmm11,xmm9,xmm10
  636. vaesenc xmm9,xmm9,xmm10
  637. vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi]
  638. xor r13d,r11d
  639. vpsrldq xmm7,xmm7,8
  640. add ecx,DWORD PTR[52+rsp]
  641. mov esi,edx
  642. ror r14d,11
  643. xor r12d,ebx
  644. vpaddd xmm3,xmm3,xmm7
  645. xor esi,r8d
  646. ror r13d,6
  647. add ecx,r12d
  648. and r15d,esi
  649. DB 143,232,120,194,251,13
  650. xor r14d,edx
  651. add ecx,r13d
  652. vpsrld xmm6,xmm3,10
  653. xor r15d,r8d
  654. add r10d,ecx
  655. DB 143,232,120,194,239,2
  656. ror r14d,2
  657. add ecx,r15d
  658. vpxor xmm7,xmm7,xmm6
  659. mov r13d,r10d
  660. add r14d,ecx
  661. ror r13d,14
  662. mov ecx,r14d
  663. vpxor xmm7,xmm7,xmm5
  664. mov r12d,r11d
  665. xor r13d,r10d
  666. ror r14d,9
  667. xor r12d,eax
  668. vpslldq xmm7,xmm7,8
  669. ror r13d,5
  670. xor r14d,ecx
  671. and r12d,r10d
  672. vpand xmm11,xmm11,xmm13
  673. vaesenc xmm9,xmm9,xmm10
  674. vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi]
  675. xor r13d,r10d
  676. vpaddd xmm3,xmm3,xmm7
  677. add ebx,DWORD PTR[56+rsp]
  678. mov r15d,ecx
  679. ror r14d,11
  680. xor r12d,eax
  681. vpaddd xmm6,xmm3,XMMWORD PTR[96+rbp]
  682. xor r15d,edx
  683. ror r13d,6
  684. add ebx,r12d
  685. and esi,r15d
  686. xor r14d,ecx
  687. add ebx,r13d
  688. xor esi,edx
  689. add r9d,ebx
  690. ror r14d,2
  691. add ebx,esi
  692. mov r13d,r9d
  693. add r14d,ebx
  694. ror r13d,14
  695. mov ebx,r14d
  696. mov r12d,r10d
  697. xor r13d,r9d
  698. ror r14d,9
  699. xor r12d,r11d
  700. ror r13d,5
  701. xor r14d,ebx
  702. and r12d,r9d
  703. vpor xmm8,xmm8,xmm11
  704. vaesenclast xmm11,xmm9,xmm10
  705. vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi]
  706. xor r13d,r9d
  707. add eax,DWORD PTR[60+rsp]
  708. mov esi,ebx
  709. ror r14d,11
  710. xor r12d,r11d
  711. xor esi,ecx
  712. ror r13d,6
  713. add eax,r12d
  714. and r15d,esi
  715. xor r14d,ebx
  716. add eax,r13d
  717. xor r15d,ecx
  718. add r8d,eax
  719. ror r14d,2
  720. add eax,r15d
  721. mov r13d,r8d
  722. add r14d,eax
  723. vmovdqa XMMWORD PTR[48+rsp],xmm6
  724. mov r12,QWORD PTR[((64+0))+rsp]
  725. vpand xmm11,xmm11,xmm14
  726. mov r15,QWORD PTR[((64+8))+rsp]
  727. vpor xmm8,xmm8,xmm11
  728. vmovdqu XMMWORD PTR[r12*1+r15],xmm8
  729. lea r12,QWORD PTR[16+r12]
  730. cmp BYTE PTR[131+rbp],0
  731. jne $L$xop_00_47
  732. vmovdqu xmm9,XMMWORD PTR[r12]
  733. mov QWORD PTR[((64+0))+rsp],r12
  734. ror r13d,14
  735. mov eax,r14d
  736. mov r12d,r9d
  737. xor r13d,r8d
  738. ror r14d,9
  739. xor r12d,r10d
  740. ror r13d,5
  741. xor r14d,eax
  742. and r12d,r8d
  743. vpxor xmm9,xmm9,xmm10
  744. vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi]
  745. xor r13d,r8d
  746. add r11d,DWORD PTR[rsp]
  747. mov r15d,eax
  748. ror r14d,11
  749. xor r12d,r10d
  750. xor r15d,ebx
  751. ror r13d,6
  752. add r11d,r12d
  753. and esi,r15d
  754. xor r14d,eax
  755. add r11d,r13d
  756. xor esi,ebx
  757. add edx,r11d
  758. ror r14d,2
  759. add r11d,esi
  760. mov r13d,edx
  761. add r14d,r11d
  762. ror r13d,14
  763. mov r11d,r14d
  764. mov r12d,r8d
  765. xor r13d,edx
  766. ror r14d,9
  767. xor r12d,r9d
  768. ror r13d,5
  769. xor r14d,r11d
  770. and r12d,edx
  771. vpxor xmm9,xmm9,xmm8
  772. xor r13d,edx
  773. add r10d,DWORD PTR[4+rsp]
  774. mov esi,r11d
  775. ror r14d,11
  776. xor r12d,r9d
  777. xor esi,eax
  778. ror r13d,6
  779. add r10d,r12d
  780. and r15d,esi
  781. xor r14d,r11d
  782. add r10d,r13d
  783. xor r15d,eax
  784. add ecx,r10d
  785. ror r14d,2
  786. add r10d,r15d
  787. mov r13d,ecx
  788. add r14d,r10d
  789. ror r13d,14
  790. mov r10d,r14d
  791. mov r12d,edx
  792. xor r13d,ecx
  793. ror r14d,9
  794. xor r12d,r8d
  795. ror r13d,5
  796. xor r14d,r10d
  797. and r12d,ecx
  798. vaesenc xmm9,xmm9,xmm10
  799. vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi]
  800. xor r13d,ecx
  801. add r9d,DWORD PTR[8+rsp]
  802. mov r15d,r10d
  803. ror r14d,11
  804. xor r12d,r8d
  805. xor r15d,r11d
  806. ror r13d,6
  807. add r9d,r12d
  808. and esi,r15d
  809. xor r14d,r10d
  810. add r9d,r13d
  811. xor esi,r11d
  812. add ebx,r9d
  813. ror r14d,2
  814. add r9d,esi
  815. mov r13d,ebx
  816. add r14d,r9d
  817. ror r13d,14
  818. mov r9d,r14d
  819. mov r12d,ecx
  820. xor r13d,ebx
  821. ror r14d,9
  822. xor r12d,edx
  823. ror r13d,5
  824. xor r14d,r9d
  825. and r12d,ebx
  826. vaesenc xmm9,xmm9,xmm10
  827. vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi]
  828. xor r13d,ebx
  829. add r8d,DWORD PTR[12+rsp]
  830. mov esi,r9d
  831. ror r14d,11
  832. xor r12d,edx
  833. xor esi,r10d
  834. ror r13d,6
  835. add r8d,r12d
  836. and r15d,esi
  837. xor r14d,r9d
  838. add r8d,r13d
  839. xor r15d,r10d
  840. add eax,r8d
  841. ror r14d,2
  842. add r8d,r15d
  843. mov r13d,eax
  844. add r14d,r8d
  845. ror r13d,14
  846. mov r8d,r14d
  847. mov r12d,ebx
  848. xor r13d,eax
  849. ror r14d,9
  850. xor r12d,ecx
  851. ror r13d,5
  852. xor r14d,r8d
  853. and r12d,eax
  854. vaesenc xmm9,xmm9,xmm10
  855. vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi]
  856. xor r13d,eax
  857. add edx,DWORD PTR[16+rsp]
  858. mov r15d,r8d
  859. ror r14d,11
  860. xor r12d,ecx
  861. xor r15d,r9d
  862. ror r13d,6
  863. add edx,r12d
  864. and esi,r15d
  865. xor r14d,r8d
  866. add edx,r13d
  867. xor esi,r9d
  868. add r11d,edx
  869. ror r14d,2
  870. add edx,esi
  871. mov r13d,r11d
  872. add r14d,edx
  873. ror r13d,14
  874. mov edx,r14d
  875. mov r12d,eax
  876. xor r13d,r11d
  877. ror r14d,9
  878. xor r12d,ebx
  879. ror r13d,5
  880. xor r14d,edx
  881. and r12d,r11d
  882. vaesenc xmm9,xmm9,xmm10
  883. vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi]
  884. xor r13d,r11d
  885. add ecx,DWORD PTR[20+rsp]
  886. mov esi,edx
  887. ror r14d,11
  888. xor r12d,ebx
  889. xor esi,r8d
  890. ror r13d,6
  891. add ecx,r12d
  892. and r15d,esi
  893. xor r14d,edx
  894. add ecx,r13d
  895. xor r15d,r8d
  896. add r10d,ecx
  897. ror r14d,2
  898. add ecx,r15d
  899. mov r13d,r10d
  900. add r14d,ecx
  901. ror r13d,14
  902. mov ecx,r14d
  903. mov r12d,r11d
  904. xor r13d,r10d
  905. ror r14d,9
  906. xor r12d,eax
  907. ror r13d,5
  908. xor r14d,ecx
  909. and r12d,r10d
  910. vaesenc xmm9,xmm9,xmm10
  911. vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi]
  912. xor r13d,r10d
  913. add ebx,DWORD PTR[24+rsp]
  914. mov r15d,ecx
  915. ror r14d,11
  916. xor r12d,eax
  917. xor r15d,edx
  918. ror r13d,6
  919. add ebx,r12d
  920. and esi,r15d
  921. xor r14d,ecx
  922. add ebx,r13d
  923. xor esi,edx
  924. add r9d,ebx
  925. ror r14d,2
  926. add ebx,esi
  927. mov r13d,r9d
  928. add r14d,ebx
  929. ror r13d,14
  930. mov ebx,r14d
  931. mov r12d,r10d
  932. xor r13d,r9d
  933. ror r14d,9
  934. xor r12d,r11d
  935. ror r13d,5
  936. xor r14d,ebx
  937. and r12d,r9d
  938. vaesenc xmm9,xmm9,xmm10
  939. vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi]
  940. xor r13d,r9d
  941. add eax,DWORD PTR[28+rsp]
  942. mov esi,ebx
  943. ror r14d,11
  944. xor r12d,r11d
  945. xor esi,ecx
  946. ror r13d,6
  947. add eax,r12d
  948. and r15d,esi
  949. xor r14d,ebx
  950. add eax,r13d
  951. xor r15d,ecx
  952. add r8d,eax
  953. ror r14d,2
  954. add eax,r15d
  955. mov r13d,r8d
  956. add r14d,eax
  957. ror r13d,14
  958. mov eax,r14d
  959. mov r12d,r9d
  960. xor r13d,r8d
  961. ror r14d,9
  962. xor r12d,r10d
  963. ror r13d,5
  964. xor r14d,eax
  965. and r12d,r8d
  966. vaesenc xmm9,xmm9,xmm10
  967. vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi]
  968. xor r13d,r8d
  969. add r11d,DWORD PTR[32+rsp]
  970. mov r15d,eax
  971. ror r14d,11
  972. xor r12d,r10d
  973. xor r15d,ebx
  974. ror r13d,6
  975. add r11d,r12d
  976. and esi,r15d
  977. xor r14d,eax
  978. add r11d,r13d
  979. xor esi,ebx
  980. add edx,r11d
  981. ror r14d,2
  982. add r11d,esi
  983. mov r13d,edx
  984. add r14d,r11d
  985. ror r13d,14
  986. mov r11d,r14d
  987. mov r12d,r8d
  988. xor r13d,edx
  989. ror r14d,9
  990. xor r12d,r9d
  991. ror r13d,5
  992. xor r14d,r11d
  993. and r12d,edx
  994. vaesenc xmm9,xmm9,xmm10
  995. vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi]
  996. xor r13d,edx
  997. add r10d,DWORD PTR[36+rsp]
  998. mov esi,r11d
  999. ror r14d,11
  1000. xor r12d,r9d
  1001. xor esi,eax
  1002. ror r13d,6
  1003. add r10d,r12d
  1004. and r15d,esi
  1005. xor r14d,r11d
  1006. add r10d,r13d
  1007. xor r15d,eax
  1008. add ecx,r10d
  1009. ror r14d,2
  1010. add r10d,r15d
  1011. mov r13d,ecx
  1012. add r14d,r10d
  1013. ror r13d,14
  1014. mov r10d,r14d
  1015. mov r12d,edx
  1016. xor r13d,ecx
  1017. ror r14d,9
  1018. xor r12d,r8d
  1019. ror r13d,5
  1020. xor r14d,r10d
  1021. and r12d,ecx
  1022. vaesenc xmm9,xmm9,xmm10
  1023. vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi]
  1024. xor r13d,ecx
  1025. add r9d,DWORD PTR[40+rsp]
  1026. mov r15d,r10d
  1027. ror r14d,11
  1028. xor r12d,r8d
  1029. xor r15d,r11d
  1030. ror r13d,6
  1031. add r9d,r12d
  1032. and esi,r15d
  1033. xor r14d,r10d
  1034. add r9d,r13d
  1035. xor esi,r11d
  1036. add ebx,r9d
  1037. ror r14d,2
  1038. add r9d,esi
  1039. mov r13d,ebx
  1040. add r14d,r9d
  1041. ror r13d,14
  1042. mov r9d,r14d
  1043. mov r12d,ecx
  1044. xor r13d,ebx
  1045. ror r14d,9
  1046. xor r12d,edx
  1047. ror r13d,5
  1048. xor r14d,r9d
  1049. and r12d,ebx
  1050. vaesenclast xmm11,xmm9,xmm10
  1051. vaesenc xmm9,xmm9,xmm10
  1052. vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi]
  1053. xor r13d,ebx
  1054. add r8d,DWORD PTR[44+rsp]
  1055. mov esi,r9d
  1056. ror r14d,11
  1057. xor r12d,edx
  1058. xor esi,r10d
  1059. ror r13d,6
  1060. add r8d,r12d
  1061. and r15d,esi
  1062. xor r14d,r9d
  1063. add r8d,r13d
  1064. xor r15d,r10d
  1065. add eax,r8d
  1066. ror r14d,2
  1067. add r8d,r15d
  1068. mov r13d,eax
  1069. add r14d,r8d
  1070. ror r13d,14
  1071. mov r8d,r14d
  1072. mov r12d,ebx
  1073. xor r13d,eax
  1074. ror r14d,9
  1075. xor r12d,ecx
  1076. ror r13d,5
  1077. xor r14d,r8d
  1078. and r12d,eax
  1079. vpand xmm8,xmm11,xmm12
  1080. vaesenc xmm9,xmm9,xmm10
  1081. vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi]
  1082. xor r13d,eax
  1083. add edx,DWORD PTR[48+rsp]
  1084. mov r15d,r8d
  1085. ror r14d,11
  1086. xor r12d,ecx
  1087. xor r15d,r9d
  1088. ror r13d,6
  1089. add edx,r12d
  1090. and esi,r15d
  1091. xor r14d,r8d
  1092. add edx,r13d
  1093. xor esi,r9d
  1094. add r11d,edx
  1095. ror r14d,2
  1096. add edx,esi
  1097. mov r13d,r11d
  1098. add r14d,edx
  1099. ror r13d,14
  1100. mov edx,r14d
  1101. mov r12d,eax
  1102. xor r13d,r11d
  1103. ror r14d,9
  1104. xor r12d,ebx
  1105. ror r13d,5
  1106. xor r14d,edx
  1107. and r12d,r11d
  1108. vaesenclast xmm11,xmm9,xmm10
  1109. vaesenc xmm9,xmm9,xmm10
  1110. vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi]
  1111. xor r13d,r11d
  1112. add ecx,DWORD PTR[52+rsp]
  1113. mov esi,edx
  1114. ror r14d,11
  1115. xor r12d,ebx
  1116. xor esi,r8d
  1117. ror r13d,6
  1118. add ecx,r12d
  1119. and r15d,esi
  1120. xor r14d,edx
  1121. add ecx,r13d
  1122. xor r15d,r8d
  1123. add r10d,ecx
  1124. ror r14d,2
  1125. add ecx,r15d
  1126. mov r13d,r10d
  1127. add r14d,ecx
  1128. ror r13d,14
  1129. mov ecx,r14d
  1130. mov r12d,r11d
  1131. xor r13d,r10d
  1132. ror r14d,9
  1133. xor r12d,eax
  1134. ror r13d,5
  1135. xor r14d,ecx
  1136. and r12d,r10d
  1137. vpand xmm11,xmm11,xmm13
  1138. vaesenc xmm9,xmm9,xmm10
  1139. vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi]
  1140. xor r13d,r10d
  1141. add ebx,DWORD PTR[56+rsp]
  1142. mov r15d,ecx
  1143. ror r14d,11
  1144. xor r12d,eax
  1145. xor r15d,edx
  1146. ror r13d,6
  1147. add ebx,r12d
  1148. and esi,r15d
  1149. xor r14d,ecx
  1150. add ebx,r13d
  1151. xor esi,edx
  1152. add r9d,ebx
  1153. ror r14d,2
  1154. add ebx,esi
  1155. mov r13d,r9d
  1156. add r14d,ebx
  1157. ror r13d,14
  1158. mov ebx,r14d
  1159. mov r12d,r10d
  1160. xor r13d,r9d
  1161. ror r14d,9
  1162. xor r12d,r11d
  1163. ror r13d,5
  1164. xor r14d,ebx
  1165. and r12d,r9d
  1166. vpor xmm8,xmm8,xmm11
  1167. vaesenclast xmm11,xmm9,xmm10
  1168. vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi]
  1169. xor r13d,r9d
  1170. add eax,DWORD PTR[60+rsp]
  1171. mov esi,ebx
  1172. ror r14d,11
  1173. xor r12d,r11d
  1174. xor esi,ecx
  1175. ror r13d,6
  1176. add eax,r12d
  1177. and r15d,esi
  1178. xor r14d,ebx
  1179. add eax,r13d
  1180. xor r15d,ecx
  1181. add r8d,eax
  1182. ror r14d,2
  1183. add eax,r15d
  1184. mov r13d,r8d
  1185. add r14d,eax
  1186. mov r12,QWORD PTR[((64+0))+rsp]
  1187. mov r13,QWORD PTR[((64+8))+rsp]
  1188. mov r15,QWORD PTR[((64+40))+rsp]
  1189. mov rsi,QWORD PTR[((64+48))+rsp]
  1190. vpand xmm11,xmm11,xmm14
  1191. mov eax,r14d
  1192. vpor xmm8,xmm8,xmm11
  1193. vmovdqu XMMWORD PTR[r13*1+r12],xmm8
  1194. lea r12,QWORD PTR[16+r12]
  1195. add eax,DWORD PTR[r15]
  1196. add ebx,DWORD PTR[4+r15]
  1197. add ecx,DWORD PTR[8+r15]
  1198. add edx,DWORD PTR[12+r15]
  1199. add r8d,DWORD PTR[16+r15]
  1200. add r9d,DWORD PTR[20+r15]
  1201. add r10d,DWORD PTR[24+r15]
  1202. add r11d,DWORD PTR[28+r15]
  1203. cmp r12,QWORD PTR[((64+16))+rsp]
  1204. mov DWORD PTR[r15],eax
  1205. mov DWORD PTR[4+r15],ebx
  1206. mov DWORD PTR[8+r15],ecx
  1207. mov DWORD PTR[12+r15],edx
  1208. mov DWORD PTR[16+r15],r8d
  1209. mov DWORD PTR[20+r15],r9d
  1210. mov DWORD PTR[24+r15],r10d
  1211. mov DWORD PTR[28+r15],r11d
  1212. jb $L$loop_xop
  1213. mov r8,QWORD PTR[((64+32))+rsp]
  1214. mov rsi,QWORD PTR[120+rsp]
  1215. vmovdqu XMMWORD PTR[r8],xmm8
  1216. vzeroall
  1217. movaps xmm6,XMMWORD PTR[128+rsp]
  1218. movaps xmm7,XMMWORD PTR[144+rsp]
  1219. movaps xmm8,XMMWORD PTR[160+rsp]
  1220. movaps xmm9,XMMWORD PTR[176+rsp]
  1221. movaps xmm10,XMMWORD PTR[192+rsp]
  1222. movaps xmm11,XMMWORD PTR[208+rsp]
  1223. movaps xmm12,XMMWORD PTR[224+rsp]
  1224. movaps xmm13,XMMWORD PTR[240+rsp]
  1225. movaps xmm14,XMMWORD PTR[256+rsp]
  1226. movaps xmm15,XMMWORD PTR[272+rsp]
  1227. mov r15,QWORD PTR[((-48))+rsi]
  1228. mov r14,QWORD PTR[((-40))+rsi]
  1229. mov r13,QWORD PTR[((-32))+rsi]
  1230. mov r12,QWORD PTR[((-24))+rsi]
  1231. mov rbp,QWORD PTR[((-16))+rsi]
  1232. mov rbx,QWORD PTR[((-8))+rsi]
  1233. lea rsp,QWORD PTR[rsi]
  1234. $L$epilogue_xop::
  1235. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1236. mov rsi,QWORD PTR[16+rsp]
  1237. DB 0F3h,0C3h ;repret
  1238. $L$SEH_end_aesni_cbc_sha256_enc_xop::
  1239. aesni_cbc_sha256_enc_xop ENDP
  1240. ALIGN 64
  1241. aesni_cbc_sha256_enc_avx PROC PRIVATE
  1242. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  1243. mov QWORD PTR[16+rsp],rsi
  1244. mov rax,rsp
  1245. $L$SEH_begin_aesni_cbc_sha256_enc_avx::
  1246. mov rdi,rcx
  1247. mov rsi,rdx
  1248. mov rdx,r8
  1249. mov rcx,r9
  1250. mov r8,QWORD PTR[40+rsp]
  1251. mov r9,QWORD PTR[48+rsp]
  1252. $L$avx_shortcut::
  1253. mov r10,QWORD PTR[56+rsp]
  1254. mov rax,rsp
  1255. push rbx
  1256. push rbp
  1257. push r12
  1258. push r13
  1259. push r14
  1260. push r15
  1261. sub rsp,288
  1262. and rsp,-64
  1263. shl rdx,6
  1264. sub rsi,rdi
  1265. sub r10,rdi
  1266. add rdx,rdi
  1267. mov QWORD PTR[((64+8))+rsp],rsi
  1268. mov QWORD PTR[((64+16))+rsp],rdx
  1269. mov QWORD PTR[((64+32))+rsp],r8
  1270. mov QWORD PTR[((64+40))+rsp],r9
  1271. mov QWORD PTR[((64+48))+rsp],r10
  1272. mov QWORD PTR[120+rsp],rax
  1273. movaps XMMWORD PTR[128+rsp],xmm6
  1274. movaps XMMWORD PTR[144+rsp],xmm7
  1275. movaps XMMWORD PTR[160+rsp],xmm8
  1276. movaps XMMWORD PTR[176+rsp],xmm9
  1277. movaps XMMWORD PTR[192+rsp],xmm10
  1278. movaps XMMWORD PTR[208+rsp],xmm11
  1279. movaps XMMWORD PTR[224+rsp],xmm12
  1280. movaps XMMWORD PTR[240+rsp],xmm13
  1281. movaps XMMWORD PTR[256+rsp],xmm14
  1282. movaps XMMWORD PTR[272+rsp],xmm15
  1283. $L$prologue_avx::
  1284. vzeroall
  1285. mov r12,rdi
  1286. lea rdi,QWORD PTR[128+rcx]
  1287. lea r13,QWORD PTR[((K256+544))]
  1288. mov r14d,DWORD PTR[((240-128))+rdi]
  1289. mov r15,r9
  1290. mov rsi,r10
  1291. vmovdqu xmm8,XMMWORD PTR[r8]
  1292. sub r14,9
  1293. mov eax,DWORD PTR[r15]
  1294. mov ebx,DWORD PTR[4+r15]
  1295. mov ecx,DWORD PTR[8+r15]
  1296. mov edx,DWORD PTR[12+r15]
  1297. mov r8d,DWORD PTR[16+r15]
  1298. mov r9d,DWORD PTR[20+r15]
  1299. mov r10d,DWORD PTR[24+r15]
  1300. mov r11d,DWORD PTR[28+r15]
  1301. vmovdqa xmm14,XMMWORD PTR[r14*8+r13]
  1302. vmovdqa xmm13,XMMWORD PTR[16+r14*8+r13]
  1303. vmovdqa xmm12,XMMWORD PTR[32+r14*8+r13]
  1304. vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi]
  1305. jmp $L$loop_avx
  1306. ALIGN 16
  1307. $L$loop_avx::
  1308. vmovdqa xmm7,XMMWORD PTR[((K256+512))]
  1309. vmovdqu xmm0,XMMWORD PTR[r12*1+rsi]
  1310. vmovdqu xmm1,XMMWORD PTR[16+r12*1+rsi]
  1311. vmovdqu xmm2,XMMWORD PTR[32+r12*1+rsi]
  1312. vmovdqu xmm3,XMMWORD PTR[48+r12*1+rsi]
  1313. vpshufb xmm0,xmm0,xmm7
  1314. lea rbp,QWORD PTR[K256]
  1315. vpshufb xmm1,xmm1,xmm7
  1316. vpshufb xmm2,xmm2,xmm7
  1317. vpaddd xmm4,xmm0,XMMWORD PTR[rbp]
  1318. vpshufb xmm3,xmm3,xmm7
  1319. vpaddd xmm5,xmm1,XMMWORD PTR[32+rbp]
  1320. vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp]
  1321. vpaddd xmm7,xmm3,XMMWORD PTR[96+rbp]
  1322. vmovdqa XMMWORD PTR[rsp],xmm4
  1323. mov r14d,eax
  1324. vmovdqa XMMWORD PTR[16+rsp],xmm5
  1325. mov esi,ebx
  1326. vmovdqa XMMWORD PTR[32+rsp],xmm6
  1327. xor esi,ecx
  1328. vmovdqa XMMWORD PTR[48+rsp],xmm7
  1329. mov r13d,r8d
  1330. jmp $L$avx_00_47
  1331. ALIGN 16
  1332. $L$avx_00_47::
  1333. sub rbp,-16*2*4
  1334. vmovdqu xmm9,XMMWORD PTR[r12]
  1335. mov QWORD PTR[((64+0))+rsp],r12
  1336. vpalignr xmm4,xmm1,xmm0,4
  1337. shrd r13d,r13d,14
  1338. mov eax,r14d
  1339. mov r12d,r9d
  1340. vpalignr xmm7,xmm3,xmm2,4
  1341. xor r13d,r8d
  1342. shrd r14d,r14d,9
  1343. xor r12d,r10d
  1344. vpsrld xmm6,xmm4,7
  1345. shrd r13d,r13d,5
  1346. xor r14d,eax
  1347. and r12d,r8d
  1348. vpaddd xmm0,xmm0,xmm7
  1349. vpxor xmm9,xmm9,xmm10
  1350. vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi]
  1351. xor r13d,r8d
  1352. add r11d,DWORD PTR[rsp]
  1353. mov r15d,eax
  1354. vpsrld xmm7,xmm4,3
  1355. shrd r14d,r14d,11
  1356. xor r12d,r10d
  1357. xor r15d,ebx
  1358. vpslld xmm5,xmm4,14
  1359. shrd r13d,r13d,6
  1360. add r11d,r12d
  1361. and esi,r15d
  1362. vpxor xmm4,xmm7,xmm6
  1363. xor r14d,eax
  1364. add r11d,r13d
  1365. xor esi,ebx
  1366. vpshufd xmm7,xmm3,250
  1367. add edx,r11d
  1368. shrd r14d,r14d,2
  1369. add r11d,esi
  1370. vpsrld xmm6,xmm6,11
  1371. mov r13d,edx
  1372. add r14d,r11d
  1373. shrd r13d,r13d,14
  1374. vpxor xmm4,xmm4,xmm5
  1375. mov r11d,r14d
  1376. mov r12d,r8d
  1377. xor r13d,edx
  1378. vpslld xmm5,xmm5,11
  1379. shrd r14d,r14d,9
  1380. xor r12d,r9d
  1381. shrd r13d,r13d,5
  1382. vpxor xmm4,xmm4,xmm6
  1383. xor r14d,r11d
  1384. and r12d,edx
  1385. vpxor xmm9,xmm9,xmm8
  1386. xor r13d,edx
  1387. vpsrld xmm6,xmm7,10
  1388. add r10d,DWORD PTR[4+rsp]
  1389. mov esi,r11d
  1390. shrd r14d,r14d,11
  1391. vpxor xmm4,xmm4,xmm5
  1392. xor r12d,r9d
  1393. xor esi,eax
  1394. shrd r13d,r13d,6
  1395. vpsrlq xmm7,xmm7,17
  1396. add r10d,r12d
  1397. and r15d,esi
  1398. xor r14d,r11d
  1399. vpaddd xmm0,xmm0,xmm4
  1400. add r10d,r13d
  1401. xor r15d,eax
  1402. add ecx,r10d
  1403. vpxor xmm6,xmm6,xmm7
  1404. shrd r14d,r14d,2
  1405. add r10d,r15d
  1406. mov r13d,ecx
  1407. vpsrlq xmm7,xmm7,2
  1408. add r14d,r10d
  1409. shrd r13d,r13d,14
  1410. mov r10d,r14d
  1411. vpxor xmm6,xmm6,xmm7
  1412. mov r12d,edx
  1413. xor r13d,ecx
  1414. shrd r14d,r14d,9
  1415. vpshufd xmm6,xmm6,132
  1416. xor r12d,r8d
  1417. shrd r13d,r13d,5
  1418. xor r14d,r10d
  1419. vpsrldq xmm6,xmm6,8
  1420. and r12d,ecx
  1421. vaesenc xmm9,xmm9,xmm10
  1422. vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi]
  1423. xor r13d,ecx
  1424. add r9d,DWORD PTR[8+rsp]
  1425. vpaddd xmm0,xmm0,xmm6
  1426. mov r15d,r10d
  1427. shrd r14d,r14d,11
  1428. xor r12d,r8d
  1429. vpshufd xmm7,xmm0,80
  1430. xor r15d,r11d
  1431. shrd r13d,r13d,6
  1432. add r9d,r12d
  1433. vpsrld xmm6,xmm7,10
  1434. and esi,r15d
  1435. xor r14d,r10d
  1436. add r9d,r13d
  1437. vpsrlq xmm7,xmm7,17
  1438. xor esi,r11d
  1439. add ebx,r9d
  1440. shrd r14d,r14d,2
  1441. vpxor xmm6,xmm6,xmm7
  1442. add r9d,esi
  1443. mov r13d,ebx
  1444. add r14d,r9d
  1445. vpsrlq xmm7,xmm7,2
  1446. shrd r13d,r13d,14
  1447. mov r9d,r14d
  1448. mov r12d,ecx
  1449. vpxor xmm6,xmm6,xmm7
  1450. xor r13d,ebx
  1451. shrd r14d,r14d,9
  1452. xor r12d,edx
  1453. vpshufd xmm6,xmm6,232
  1454. shrd r13d,r13d,5
  1455. xor r14d,r9d
  1456. and r12d,ebx
  1457. vpslldq xmm6,xmm6,8
  1458. vaesenc xmm9,xmm9,xmm10
  1459. vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi]
  1460. xor r13d,ebx
  1461. add r8d,DWORD PTR[12+rsp]
  1462. mov esi,r9d
  1463. vpaddd xmm0,xmm0,xmm6
  1464. shrd r14d,r14d,11
  1465. xor r12d,edx
  1466. xor esi,r10d
  1467. vpaddd xmm6,xmm0,XMMWORD PTR[rbp]
  1468. shrd r13d,r13d,6
  1469. add r8d,r12d
  1470. and r15d,esi
  1471. xor r14d,r9d
  1472. add r8d,r13d
  1473. xor r15d,r10d
  1474. add eax,r8d
  1475. shrd r14d,r14d,2
  1476. add r8d,r15d
  1477. mov r13d,eax
  1478. add r14d,r8d
  1479. vmovdqa XMMWORD PTR[rsp],xmm6
  1480. vpalignr xmm4,xmm2,xmm1,4
  1481. shrd r13d,r13d,14
  1482. mov r8d,r14d
  1483. mov r12d,ebx
  1484. vpalignr xmm7,xmm0,xmm3,4
  1485. xor r13d,eax
  1486. shrd r14d,r14d,9
  1487. xor r12d,ecx
  1488. vpsrld xmm6,xmm4,7
  1489. shrd r13d,r13d,5
  1490. xor r14d,r8d
  1491. and r12d,eax
  1492. vpaddd xmm1,xmm1,xmm7
  1493. vaesenc xmm9,xmm9,xmm10
  1494. vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi]
  1495. xor r13d,eax
  1496. add edx,DWORD PTR[16+rsp]
  1497. mov r15d,r8d
  1498. vpsrld xmm7,xmm4,3
  1499. shrd r14d,r14d,11
  1500. xor r12d,ecx
  1501. xor r15d,r9d
  1502. vpslld xmm5,xmm4,14
  1503. shrd r13d,r13d,6
  1504. add edx,r12d
  1505. and esi,r15d
  1506. vpxor xmm4,xmm7,xmm6
  1507. xor r14d,r8d
  1508. add edx,r13d
  1509. xor esi,r9d
  1510. vpshufd xmm7,xmm0,250
  1511. add r11d,edx
  1512. shrd r14d,r14d,2
  1513. add edx,esi
  1514. vpsrld xmm6,xmm6,11
  1515. mov r13d,r11d
  1516. add r14d,edx
  1517. shrd r13d,r13d,14
  1518. vpxor xmm4,xmm4,xmm5
  1519. mov edx,r14d
  1520. mov r12d,eax
  1521. xor r13d,r11d
  1522. vpslld xmm5,xmm5,11
  1523. shrd r14d,r14d,9
  1524. xor r12d,ebx
  1525. shrd r13d,r13d,5
  1526. vpxor xmm4,xmm4,xmm6
  1527. xor r14d,edx
  1528. and r12d,r11d
  1529. vaesenc xmm9,xmm9,xmm10
  1530. vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi]
  1531. xor r13d,r11d
  1532. vpsrld xmm6,xmm7,10
  1533. add ecx,DWORD PTR[20+rsp]
  1534. mov esi,edx
  1535. shrd r14d,r14d,11
  1536. vpxor xmm4,xmm4,xmm5
  1537. xor r12d,ebx
  1538. xor esi,r8d
  1539. shrd r13d,r13d,6
  1540. vpsrlq xmm7,xmm7,17
  1541. add ecx,r12d
  1542. and r15d,esi
  1543. xor r14d,edx
  1544. vpaddd xmm1,xmm1,xmm4
  1545. add ecx,r13d
  1546. xor r15d,r8d
  1547. add r10d,ecx
  1548. vpxor xmm6,xmm6,xmm7
  1549. shrd r14d,r14d,2
  1550. add ecx,r15d
  1551. mov r13d,r10d
  1552. vpsrlq xmm7,xmm7,2
  1553. add r14d,ecx
  1554. shrd r13d,r13d,14
  1555. mov ecx,r14d
  1556. vpxor xmm6,xmm6,xmm7
  1557. mov r12d,r11d
  1558. xor r13d,r10d
  1559. shrd r14d,r14d,9
  1560. vpshufd xmm6,xmm6,132
  1561. xor r12d,eax
  1562. shrd r13d,r13d,5
  1563. xor r14d,ecx
  1564. vpsrldq xmm6,xmm6,8
  1565. and r12d,r10d
  1566. vaesenc xmm9,xmm9,xmm10
  1567. vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi]
  1568. xor r13d,r10d
  1569. add ebx,DWORD PTR[24+rsp]
  1570. vpaddd xmm1,xmm1,xmm6
  1571. mov r15d,ecx
  1572. shrd r14d,r14d,11
  1573. xor r12d,eax
  1574. vpshufd xmm7,xmm1,80
  1575. xor r15d,edx
  1576. shrd r13d,r13d,6
  1577. add ebx,r12d
  1578. vpsrld xmm6,xmm7,10
  1579. and esi,r15d
  1580. xor r14d,ecx
  1581. add ebx,r13d
  1582. vpsrlq xmm7,xmm7,17
  1583. xor esi,edx
  1584. add r9d,ebx
  1585. shrd r14d,r14d,2
  1586. vpxor xmm6,xmm6,xmm7
  1587. add ebx,esi
  1588. mov r13d,r9d
  1589. add r14d,ebx
  1590. vpsrlq xmm7,xmm7,2
  1591. shrd r13d,r13d,14
  1592. mov ebx,r14d
  1593. mov r12d,r10d
  1594. vpxor xmm6,xmm6,xmm7
  1595. xor r13d,r9d
  1596. shrd r14d,r14d,9
  1597. xor r12d,r11d
  1598. vpshufd xmm6,xmm6,232
  1599. shrd r13d,r13d,5
  1600. xor r14d,ebx
  1601. and r12d,r9d
  1602. vpslldq xmm6,xmm6,8
  1603. vaesenc xmm9,xmm9,xmm10
  1604. vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi]
  1605. xor r13d,r9d
  1606. add eax,DWORD PTR[28+rsp]
  1607. mov esi,ebx
  1608. vpaddd xmm1,xmm1,xmm6
  1609. shrd r14d,r14d,11
  1610. xor r12d,r11d
  1611. xor esi,ecx
  1612. vpaddd xmm6,xmm1,XMMWORD PTR[32+rbp]
  1613. shrd r13d,r13d,6
  1614. add eax,r12d
  1615. and r15d,esi
  1616. xor r14d,ebx
  1617. add eax,r13d
  1618. xor r15d,ecx
  1619. add r8d,eax
  1620. shrd r14d,r14d,2
  1621. add eax,r15d
  1622. mov r13d,r8d
  1623. add r14d,eax
  1624. vmovdqa XMMWORD PTR[16+rsp],xmm6
  1625. vpalignr xmm4,xmm3,xmm2,4
  1626. shrd r13d,r13d,14
  1627. mov eax,r14d
  1628. mov r12d,r9d
  1629. vpalignr xmm7,xmm1,xmm0,4
  1630. xor r13d,r8d
  1631. shrd r14d,r14d,9
  1632. xor r12d,r10d
  1633. vpsrld xmm6,xmm4,7
  1634. shrd r13d,r13d,5
  1635. xor r14d,eax
  1636. and r12d,r8d
  1637. vpaddd xmm2,xmm2,xmm7
  1638. vaesenc xmm9,xmm9,xmm10
  1639. vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi]
  1640. xor r13d,r8d
  1641. add r11d,DWORD PTR[32+rsp]
  1642. mov r15d,eax
  1643. vpsrld xmm7,xmm4,3
  1644. shrd r14d,r14d,11
  1645. xor r12d,r10d
  1646. xor r15d,ebx
  1647. vpslld xmm5,xmm4,14
  1648. shrd r13d,r13d,6
  1649. add r11d,r12d
  1650. and esi,r15d
  1651. vpxor xmm4,xmm7,xmm6
  1652. xor r14d,eax
  1653. add r11d,r13d
  1654. xor esi,ebx
  1655. vpshufd xmm7,xmm1,250
  1656. add edx,r11d
  1657. shrd r14d,r14d,2
  1658. add r11d,esi
  1659. vpsrld xmm6,xmm6,11
  1660. mov r13d,edx
  1661. add r14d,r11d
  1662. shrd r13d,r13d,14
  1663. vpxor xmm4,xmm4,xmm5
  1664. mov r11d,r14d
  1665. mov r12d,r8d
  1666. xor r13d,edx
  1667. vpslld xmm5,xmm5,11
  1668. shrd r14d,r14d,9
  1669. xor r12d,r9d
  1670. shrd r13d,r13d,5
  1671. vpxor xmm4,xmm4,xmm6
  1672. xor r14d,r11d
  1673. and r12d,edx
  1674. vaesenc xmm9,xmm9,xmm10
  1675. vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi]
  1676. xor r13d,edx
  1677. vpsrld xmm6,xmm7,10
  1678. add r10d,DWORD PTR[36+rsp]
  1679. mov esi,r11d
  1680. shrd r14d,r14d,11
  1681. vpxor xmm4,xmm4,xmm5
  1682. xor r12d,r9d
  1683. xor esi,eax
  1684. shrd r13d,r13d,6
  1685. vpsrlq xmm7,xmm7,17
  1686. add r10d,r12d
  1687. and r15d,esi
  1688. xor r14d,r11d
  1689. vpaddd xmm2,xmm2,xmm4
  1690. add r10d,r13d
  1691. xor r15d,eax
  1692. add ecx,r10d
  1693. vpxor xmm6,xmm6,xmm7
  1694. shrd r14d,r14d,2
  1695. add r10d,r15d
  1696. mov r13d,ecx
  1697. vpsrlq xmm7,xmm7,2
  1698. add r14d,r10d
  1699. shrd r13d,r13d,14
  1700. mov r10d,r14d
  1701. vpxor xmm6,xmm6,xmm7
  1702. mov r12d,edx
  1703. xor r13d,ecx
  1704. shrd r14d,r14d,9
  1705. vpshufd xmm6,xmm6,132
  1706. xor r12d,r8d
  1707. shrd r13d,r13d,5
  1708. xor r14d,r10d
  1709. vpsrldq xmm6,xmm6,8
  1710. and r12d,ecx
  1711. vaesenc xmm9,xmm9,xmm10
  1712. vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi]
  1713. xor r13d,ecx
  1714. add r9d,DWORD PTR[40+rsp]
  1715. vpaddd xmm2,xmm2,xmm6
  1716. mov r15d,r10d
  1717. shrd r14d,r14d,11
  1718. xor r12d,r8d
  1719. vpshufd xmm7,xmm2,80
  1720. xor r15d,r11d
  1721. shrd r13d,r13d,6
  1722. add r9d,r12d
  1723. vpsrld xmm6,xmm7,10
  1724. and esi,r15d
  1725. xor r14d,r10d
  1726. add r9d,r13d
  1727. vpsrlq xmm7,xmm7,17
  1728. xor esi,r11d
  1729. add ebx,r9d
  1730. shrd r14d,r14d,2
  1731. vpxor xmm6,xmm6,xmm7
  1732. add r9d,esi
  1733. mov r13d,ebx
  1734. add r14d,r9d
  1735. vpsrlq xmm7,xmm7,2
  1736. shrd r13d,r13d,14
  1737. mov r9d,r14d
  1738. mov r12d,ecx
  1739. vpxor xmm6,xmm6,xmm7
  1740. xor r13d,ebx
  1741. shrd r14d,r14d,9
  1742. xor r12d,edx
  1743. vpshufd xmm6,xmm6,232
  1744. shrd r13d,r13d,5
  1745. xor r14d,r9d
  1746. and r12d,ebx
  1747. vpslldq xmm6,xmm6,8
  1748. vaesenclast xmm11,xmm9,xmm10
  1749. vaesenc xmm9,xmm9,xmm10
  1750. vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi]
  1751. xor r13d,ebx
  1752. add r8d,DWORD PTR[44+rsp]
  1753. mov esi,r9d
  1754. vpaddd xmm2,xmm2,xmm6
  1755. shrd r14d,r14d,11
  1756. xor r12d,edx
  1757. xor esi,r10d
  1758. vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp]
  1759. shrd r13d,r13d,6
  1760. add r8d,r12d
  1761. and r15d,esi
  1762. xor r14d,r9d
  1763. add r8d,r13d
  1764. xor r15d,r10d
  1765. add eax,r8d
  1766. shrd r14d,r14d,2
  1767. add r8d,r15d
  1768. mov r13d,eax
  1769. add r14d,r8d
  1770. vmovdqa XMMWORD PTR[32+rsp],xmm6
  1771. vpalignr xmm4,xmm0,xmm3,4
  1772. shrd r13d,r13d,14
  1773. mov r8d,r14d
  1774. mov r12d,ebx
  1775. vpalignr xmm7,xmm2,xmm1,4
  1776. xor r13d,eax
  1777. shrd r14d,r14d,9
  1778. xor r12d,ecx
  1779. vpsrld xmm6,xmm4,7
  1780. shrd r13d,r13d,5
  1781. xor r14d,r8d
  1782. and r12d,eax
  1783. vpaddd xmm3,xmm3,xmm7
  1784. vpand xmm8,xmm11,xmm12
  1785. vaesenc xmm9,xmm9,xmm10
  1786. vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi]
  1787. xor r13d,eax
  1788. add edx,DWORD PTR[48+rsp]
  1789. mov r15d,r8d
  1790. vpsrld xmm7,xmm4,3
  1791. shrd r14d,r14d,11
  1792. xor r12d,ecx
  1793. xor r15d,r9d
  1794. vpslld xmm5,xmm4,14
  1795. shrd r13d,r13d,6
  1796. add edx,r12d
  1797. and esi,r15d
  1798. vpxor xmm4,xmm7,xmm6
  1799. xor r14d,r8d
  1800. add edx,r13d
  1801. xor esi,r9d
  1802. vpshufd xmm7,xmm2,250
  1803. add r11d,edx
  1804. shrd r14d,r14d,2
  1805. add edx,esi
  1806. vpsrld xmm6,xmm6,11
  1807. mov r13d,r11d
  1808. add r14d,edx
  1809. shrd r13d,r13d,14
  1810. vpxor xmm4,xmm4,xmm5
  1811. mov edx,r14d
  1812. mov r12d,eax
  1813. xor r13d,r11d
  1814. vpslld xmm5,xmm5,11
  1815. shrd r14d,r14d,9
  1816. xor r12d,ebx
  1817. shrd r13d,r13d,5
  1818. vpxor xmm4,xmm4,xmm6
  1819. xor r14d,edx
  1820. and r12d,r11d
  1821. vaesenclast xmm11,xmm9,xmm10
  1822. vaesenc xmm9,xmm9,xmm10
  1823. vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi]
  1824. xor r13d,r11d
  1825. vpsrld xmm6,xmm7,10
  1826. add ecx,DWORD PTR[52+rsp]
  1827. mov esi,edx
  1828. shrd r14d,r14d,11
  1829. vpxor xmm4,xmm4,xmm5
  1830. xor r12d,ebx
  1831. xor esi,r8d
  1832. shrd r13d,r13d,6
  1833. vpsrlq xmm7,xmm7,17
  1834. add ecx,r12d
  1835. and r15d,esi
  1836. xor r14d,edx
  1837. vpaddd xmm3,xmm3,xmm4
  1838. add ecx,r13d
  1839. xor r15d,r8d
  1840. add r10d,ecx
  1841. vpxor xmm6,xmm6,xmm7
  1842. shrd r14d,r14d,2
  1843. add ecx,r15d
  1844. mov r13d,r10d
  1845. vpsrlq xmm7,xmm7,2
  1846. add r14d,ecx
  1847. shrd r13d,r13d,14
  1848. mov ecx,r14d
  1849. vpxor xmm6,xmm6,xmm7
  1850. mov r12d,r11d
  1851. xor r13d,r10d
  1852. shrd r14d,r14d,9
  1853. vpshufd xmm6,xmm6,132
  1854. xor r12d,eax
  1855. shrd r13d,r13d,5
  1856. xor r14d,ecx
  1857. vpsrldq xmm6,xmm6,8
  1858. and r12d,r10d
  1859. vpand xmm11,xmm11,xmm13
  1860. vaesenc xmm9,xmm9,xmm10
  1861. vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi]
  1862. xor r13d,r10d
  1863. add ebx,DWORD PTR[56+rsp]
  1864. vpaddd xmm3,xmm3,xmm6
  1865. mov r15d,ecx
  1866. shrd r14d,r14d,11
  1867. xor r12d,eax
  1868. vpshufd xmm7,xmm3,80
  1869. xor r15d,edx
  1870. shrd r13d,r13d,6
  1871. add ebx,r12d
  1872. vpsrld xmm6,xmm7,10
  1873. and esi,r15d
  1874. xor r14d,ecx
  1875. add ebx,r13d
  1876. vpsrlq xmm7,xmm7,17
  1877. xor esi,edx
  1878. add r9d,ebx
  1879. shrd r14d,r14d,2
  1880. vpxor xmm6,xmm6,xmm7
  1881. add ebx,esi
  1882. mov r13d,r9d
  1883. add r14d,ebx
  1884. vpsrlq xmm7,xmm7,2
  1885. shrd r13d,r13d,14
  1886. mov ebx,r14d
  1887. mov r12d,r10d
  1888. vpxor xmm6,xmm6,xmm7
  1889. xor r13d,r9d
  1890. shrd r14d,r14d,9
  1891. xor r12d,r11d
  1892. vpshufd xmm6,xmm6,232
  1893. shrd r13d,r13d,5
  1894. xor r14d,ebx
  1895. and r12d,r9d
  1896. vpslldq xmm6,xmm6,8
  1897. vpor xmm8,xmm8,xmm11
  1898. vaesenclast xmm11,xmm9,xmm10
  1899. vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi]
  1900. xor r13d,r9d
  1901. add eax,DWORD PTR[60+rsp]
  1902. mov esi,ebx
  1903. vpaddd xmm3,xmm3,xmm6
  1904. shrd r14d,r14d,11
  1905. xor r12d,r11d
  1906. xor esi,ecx
  1907. vpaddd xmm6,xmm3,XMMWORD PTR[96+rbp]
  1908. shrd r13d,r13d,6
  1909. add eax,r12d
  1910. and r15d,esi
  1911. xor r14d,ebx
  1912. add eax,r13d
  1913. xor r15d,ecx
  1914. add r8d,eax
  1915. shrd r14d,r14d,2
  1916. add eax,r15d
  1917. mov r13d,r8d
  1918. add r14d,eax
  1919. vmovdqa XMMWORD PTR[48+rsp],xmm6
  1920. mov r12,QWORD PTR[((64+0))+rsp]
  1921. vpand xmm11,xmm11,xmm14
  1922. mov r15,QWORD PTR[((64+8))+rsp]
  1923. vpor xmm8,xmm8,xmm11
  1924. vmovdqu XMMWORD PTR[r12*1+r15],xmm8
  1925. lea r12,QWORD PTR[16+r12]
  1926. cmp BYTE PTR[131+rbp],0
  1927. jne $L$avx_00_47
  1928. vmovdqu xmm9,XMMWORD PTR[r12]
  1929. mov QWORD PTR[((64+0))+rsp],r12
  1930. shrd r13d,r13d,14
  1931. mov eax,r14d
  1932. mov r12d,r9d
  1933. xor r13d,r8d
  1934. shrd r14d,r14d,9
  1935. xor r12d,r10d
  1936. shrd r13d,r13d,5
  1937. xor r14d,eax
  1938. and r12d,r8d
  1939. vpxor xmm9,xmm9,xmm10
  1940. vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi]
  1941. xor r13d,r8d
  1942. add r11d,DWORD PTR[rsp]
  1943. mov r15d,eax
  1944. shrd r14d,r14d,11
  1945. xor r12d,r10d
  1946. xor r15d,ebx
  1947. shrd r13d,r13d,6
  1948. add r11d,r12d
  1949. and esi,r15d
  1950. xor r14d,eax
  1951. add r11d,r13d
  1952. xor esi,ebx
  1953. add edx,r11d
  1954. shrd r14d,r14d,2
  1955. add r11d,esi
  1956. mov r13d,edx
  1957. add r14d,r11d
  1958. shrd r13d,r13d,14
  1959. mov r11d,r14d
  1960. mov r12d,r8d
  1961. xor r13d,edx
  1962. shrd r14d,r14d,9
  1963. xor r12d,r9d
  1964. shrd r13d,r13d,5
  1965. xor r14d,r11d
  1966. and r12d,edx
  1967. vpxor xmm9,xmm9,xmm8
  1968. xor r13d,edx
  1969. add r10d,DWORD PTR[4+rsp]
  1970. mov esi,r11d
  1971. shrd r14d,r14d,11
  1972. xor r12d,r9d
  1973. xor esi,eax
  1974. shrd r13d,r13d,6
  1975. add r10d,r12d
  1976. and r15d,esi
  1977. xor r14d,r11d
  1978. add r10d,r13d
  1979. xor r15d,eax
  1980. add ecx,r10d
  1981. shrd r14d,r14d,2
  1982. add r10d,r15d
  1983. mov r13d,ecx
  1984. add r14d,r10d
  1985. shrd r13d,r13d,14
  1986. mov r10d,r14d
  1987. mov r12d,edx
  1988. xor r13d,ecx
  1989. shrd r14d,r14d,9
  1990. xor r12d,r8d
  1991. shrd r13d,r13d,5
  1992. xor r14d,r10d
  1993. and r12d,ecx
  1994. vaesenc xmm9,xmm9,xmm10
  1995. vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi]
  1996. xor r13d,ecx
  1997. add r9d,DWORD PTR[8+rsp]
  1998. mov r15d,r10d
  1999. shrd r14d,r14d,11
  2000. xor r12d,r8d
  2001. xor r15d,r11d
  2002. shrd r13d,r13d,6
  2003. add r9d,r12d
  2004. and esi,r15d
  2005. xor r14d,r10d
  2006. add r9d,r13d
  2007. xor esi,r11d
  2008. add ebx,r9d
  2009. shrd r14d,r14d,2
  2010. add r9d,esi
  2011. mov r13d,ebx
  2012. add r14d,r9d
  2013. shrd r13d,r13d,14
  2014. mov r9d,r14d
  2015. mov r12d,ecx
  2016. xor r13d,ebx
  2017. shrd r14d,r14d,9
  2018. xor r12d,edx
  2019. shrd r13d,r13d,5
  2020. xor r14d,r9d
  2021. and r12d,ebx
  2022. vaesenc xmm9,xmm9,xmm10
  2023. vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi]
  2024. xor r13d,ebx
  2025. add r8d,DWORD PTR[12+rsp]
  2026. mov esi,r9d
  2027. shrd r14d,r14d,11
  2028. xor r12d,edx
  2029. xor esi,r10d
  2030. shrd r13d,r13d,6
  2031. add r8d,r12d
  2032. and r15d,esi
  2033. xor r14d,r9d
  2034. add r8d,r13d
  2035. xor r15d,r10d
  2036. add eax,r8d
  2037. shrd r14d,r14d,2
  2038. add r8d,r15d
  2039. mov r13d,eax
  2040. add r14d,r8d
  2041. shrd r13d,r13d,14
  2042. mov r8d,r14d
  2043. mov r12d,ebx
  2044. xor r13d,eax
  2045. shrd r14d,r14d,9
  2046. xor r12d,ecx
  2047. shrd r13d,r13d,5
  2048. xor r14d,r8d
  2049. and r12d,eax
  2050. vaesenc xmm9,xmm9,xmm10
  2051. vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi]
  2052. xor r13d,eax
  2053. add edx,DWORD PTR[16+rsp]
  2054. mov r15d,r8d
  2055. shrd r14d,r14d,11
  2056. xor r12d,ecx
  2057. xor r15d,r9d
  2058. shrd r13d,r13d,6
  2059. add edx,r12d
  2060. and esi,r15d
  2061. xor r14d,r8d
  2062. add edx,r13d
  2063. xor esi,r9d
  2064. add r11d,edx
  2065. shrd r14d,r14d,2
  2066. add edx,esi
  2067. mov r13d,r11d
  2068. add r14d,edx
  2069. shrd r13d,r13d,14
  2070. mov edx,r14d
  2071. mov r12d,eax
  2072. xor r13d,r11d
  2073. shrd r14d,r14d,9
  2074. xor r12d,ebx
  2075. shrd r13d,r13d,5
  2076. xor r14d,edx
  2077. and r12d,r11d
  2078. vaesenc xmm9,xmm9,xmm10
  2079. vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi]
  2080. xor r13d,r11d
  2081. add ecx,DWORD PTR[20+rsp]
  2082. mov esi,edx
  2083. shrd r14d,r14d,11
  2084. xor r12d,ebx
  2085. xor esi,r8d
  2086. shrd r13d,r13d,6
  2087. add ecx,r12d
  2088. and r15d,esi
  2089. xor r14d,edx
  2090. add ecx,r13d
  2091. xor r15d,r8d
  2092. add r10d,ecx
  2093. shrd r14d,r14d,2
  2094. add ecx,r15d
  2095. mov r13d,r10d
  2096. add r14d,ecx
  2097. shrd r13d,r13d,14
  2098. mov ecx,r14d
  2099. mov r12d,r11d
  2100. xor r13d,r10d
  2101. shrd r14d,r14d,9
  2102. xor r12d,eax
  2103. shrd r13d,r13d,5
  2104. xor r14d,ecx
  2105. and r12d,r10d
  2106. vaesenc xmm9,xmm9,xmm10
  2107. vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi]
  2108. xor r13d,r10d
  2109. add ebx,DWORD PTR[24+rsp]
  2110. mov r15d,ecx
  2111. shrd r14d,r14d,11
  2112. xor r12d,eax
  2113. xor r15d,edx
  2114. shrd r13d,r13d,6
  2115. add ebx,r12d
  2116. and esi,r15d
  2117. xor r14d,ecx
  2118. add ebx,r13d
  2119. xor esi,edx
  2120. add r9d,ebx
  2121. shrd r14d,r14d,2
  2122. add ebx,esi
  2123. mov r13d,r9d
  2124. add r14d,ebx
  2125. shrd r13d,r13d,14
  2126. mov ebx,r14d
  2127. mov r12d,r10d
  2128. xor r13d,r9d
  2129. shrd r14d,r14d,9
  2130. xor r12d,r11d
  2131. shrd r13d,r13d,5
  2132. xor r14d,ebx
  2133. and r12d,r9d
  2134. vaesenc xmm9,xmm9,xmm10
  2135. vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi]
  2136. xor r13d,r9d
  2137. add eax,DWORD PTR[28+rsp]
  2138. mov esi,ebx
  2139. shrd r14d,r14d,11
  2140. xor r12d,r11d
  2141. xor esi,ecx
  2142. shrd r13d,r13d,6
  2143. add eax,r12d
  2144. and r15d,esi
  2145. xor r14d,ebx
  2146. add eax,r13d
  2147. xor r15d,ecx
  2148. add r8d,eax
  2149. shrd r14d,r14d,2
  2150. add eax,r15d
  2151. mov r13d,r8d
  2152. add r14d,eax
  2153. shrd r13d,r13d,14
  2154. mov eax,r14d
  2155. mov r12d,r9d
  2156. xor r13d,r8d
  2157. shrd r14d,r14d,9
  2158. xor r12d,r10d
  2159. shrd r13d,r13d,5
  2160. xor r14d,eax
  2161. and r12d,r8d
  2162. vaesenc xmm9,xmm9,xmm10
  2163. vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi]
  2164. xor r13d,r8d
  2165. add r11d,DWORD PTR[32+rsp]
  2166. mov r15d,eax
  2167. shrd r14d,r14d,11
  2168. xor r12d,r10d
  2169. xor r15d,ebx
  2170. shrd r13d,r13d,6
  2171. add r11d,r12d
  2172. and esi,r15d
  2173. xor r14d,eax
  2174. add r11d,r13d
  2175. xor esi,ebx
  2176. add edx,r11d
  2177. shrd r14d,r14d,2
  2178. add r11d,esi
  2179. mov r13d,edx
  2180. add r14d,r11d
  2181. shrd r13d,r13d,14
  2182. mov r11d,r14d
  2183. mov r12d,r8d
  2184. xor r13d,edx
  2185. shrd r14d,r14d,9
  2186. xor r12d,r9d
  2187. shrd r13d,r13d,5
  2188. xor r14d,r11d
  2189. and r12d,edx
  2190. vaesenc xmm9,xmm9,xmm10
  2191. vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi]
  2192. xor r13d,edx
  2193. add r10d,DWORD PTR[36+rsp]
  2194. mov esi,r11d
  2195. shrd r14d,r14d,11
  2196. xor r12d,r9d
  2197. xor esi,eax
  2198. shrd r13d,r13d,6
  2199. add r10d,r12d
  2200. and r15d,esi
  2201. xor r14d,r11d
  2202. add r10d,r13d
  2203. xor r15d,eax
  2204. add ecx,r10d
  2205. shrd r14d,r14d,2
  2206. add r10d,r15d
  2207. mov r13d,ecx
  2208. add r14d,r10d
  2209. shrd r13d,r13d,14
  2210. mov r10d,r14d
  2211. mov r12d,edx
  2212. xor r13d,ecx
  2213. shrd r14d,r14d,9
  2214. xor r12d,r8d
  2215. shrd r13d,r13d,5
  2216. xor r14d,r10d
  2217. and r12d,ecx
  2218. vaesenc xmm9,xmm9,xmm10
  2219. vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi]
  2220. xor r13d,ecx
  2221. add r9d,DWORD PTR[40+rsp]
  2222. mov r15d,r10d
  2223. shrd r14d,r14d,11
  2224. xor r12d,r8d
  2225. xor r15d,r11d
  2226. shrd r13d,r13d,6
  2227. add r9d,r12d
  2228. and esi,r15d
  2229. xor r14d,r10d
  2230. add r9d,r13d
  2231. xor esi,r11d
  2232. add ebx,r9d
  2233. shrd r14d,r14d,2
  2234. add r9d,esi
  2235. mov r13d,ebx
  2236. add r14d,r9d
  2237. shrd r13d,r13d,14
  2238. mov r9d,r14d
  2239. mov r12d,ecx
  2240. xor r13d,ebx
  2241. shrd r14d,r14d,9
  2242. xor r12d,edx
  2243. shrd r13d,r13d,5
  2244. xor r14d,r9d
  2245. and r12d,ebx
  2246. vaesenclast xmm11,xmm9,xmm10
  2247. vaesenc xmm9,xmm9,xmm10
  2248. vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi]
  2249. xor r13d,ebx
  2250. add r8d,DWORD PTR[44+rsp]
  2251. mov esi,r9d
  2252. shrd r14d,r14d,11
  2253. xor r12d,edx
  2254. xor esi,r10d
  2255. shrd r13d,r13d,6
  2256. add r8d,r12d
  2257. and r15d,esi
  2258. xor r14d,r9d
  2259. add r8d,r13d
  2260. xor r15d,r10d
  2261. add eax,r8d
  2262. shrd r14d,r14d,2
  2263. add r8d,r15d
  2264. mov r13d,eax
  2265. add r14d,r8d
  2266. shrd r13d,r13d,14
  2267. mov r8d,r14d
  2268. mov r12d,ebx
  2269. xor r13d,eax
  2270. shrd r14d,r14d,9
  2271. xor r12d,ecx
  2272. shrd r13d,r13d,5
  2273. xor r14d,r8d
  2274. and r12d,eax
  2275. vpand xmm8,xmm11,xmm12
  2276. vaesenc xmm9,xmm9,xmm10
  2277. vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi]
  2278. xor r13d,eax
  2279. add edx,DWORD PTR[48+rsp]
  2280. mov r15d,r8d
  2281. shrd r14d,r14d,11
  2282. xor r12d,ecx
  2283. xor r15d,r9d
  2284. shrd r13d,r13d,6
  2285. add edx,r12d
  2286. and esi,r15d
  2287. xor r14d,r8d
  2288. add edx,r13d
  2289. xor esi,r9d
  2290. add r11d,edx
  2291. shrd r14d,r14d,2
  2292. add edx,esi
  2293. mov r13d,r11d
  2294. add r14d,edx
  2295. shrd r13d,r13d,14
  2296. mov edx,r14d
  2297. mov r12d,eax
  2298. xor r13d,r11d
  2299. shrd r14d,r14d,9
  2300. xor r12d,ebx
  2301. shrd r13d,r13d,5
  2302. xor r14d,edx
  2303. and r12d,r11d
  2304. vaesenclast xmm11,xmm9,xmm10
  2305. vaesenc xmm9,xmm9,xmm10
  2306. vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi]
  2307. xor r13d,r11d
  2308. add ecx,DWORD PTR[52+rsp]
  2309. mov esi,edx
  2310. shrd r14d,r14d,11
  2311. xor r12d,ebx
  2312. xor esi,r8d
  2313. shrd r13d,r13d,6
  2314. add ecx,r12d
  2315. and r15d,esi
  2316. xor r14d,edx
  2317. add ecx,r13d
  2318. xor r15d,r8d
  2319. add r10d,ecx
  2320. shrd r14d,r14d,2
  2321. add ecx,r15d
  2322. mov r13d,r10d
  2323. add r14d,ecx
  2324. shrd r13d,r13d,14
  2325. mov ecx,r14d
  2326. mov r12d,r11d
  2327. xor r13d,r10d
  2328. shrd r14d,r14d,9
  2329. xor r12d,eax
  2330. shrd r13d,r13d,5
  2331. xor r14d,ecx
  2332. and r12d,r10d
  2333. vpand xmm11,xmm11,xmm13
  2334. vaesenc xmm9,xmm9,xmm10
  2335. vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi]
  2336. xor r13d,r10d
  2337. add ebx,DWORD PTR[56+rsp]
  2338. mov r15d,ecx
  2339. shrd r14d,r14d,11
  2340. xor r12d,eax
  2341. xor r15d,edx
  2342. shrd r13d,r13d,6
  2343. add ebx,r12d
  2344. and esi,r15d
  2345. xor r14d,ecx
  2346. add ebx,r13d
  2347. xor esi,edx
  2348. add r9d,ebx
  2349. shrd r14d,r14d,2
  2350. add ebx,esi
  2351. mov r13d,r9d
  2352. add r14d,ebx
  2353. shrd r13d,r13d,14
  2354. mov ebx,r14d
  2355. mov r12d,r10d
  2356. xor r13d,r9d
  2357. shrd r14d,r14d,9
  2358. xor r12d,r11d
  2359. shrd r13d,r13d,5
  2360. xor r14d,ebx
  2361. and r12d,r9d
  2362. vpor xmm8,xmm8,xmm11
  2363. vaesenclast xmm11,xmm9,xmm10
  2364. vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi]
  2365. xor r13d,r9d
  2366. add eax,DWORD PTR[60+rsp]
  2367. mov esi,ebx
  2368. shrd r14d,r14d,11
  2369. xor r12d,r11d
  2370. xor esi,ecx
  2371. shrd r13d,r13d,6
  2372. add eax,r12d
  2373. and r15d,esi
  2374. xor r14d,ebx
  2375. add eax,r13d
  2376. xor r15d,ecx
  2377. add r8d,eax
  2378. shrd r14d,r14d,2
  2379. add eax,r15d
  2380. mov r13d,r8d
  2381. add r14d,eax
  2382. mov r12,QWORD PTR[((64+0))+rsp]
  2383. mov r13,QWORD PTR[((64+8))+rsp]
  2384. mov r15,QWORD PTR[((64+40))+rsp]
  2385. mov rsi,QWORD PTR[((64+48))+rsp]
  2386. vpand xmm11,xmm11,xmm14
  2387. mov eax,r14d
  2388. vpor xmm8,xmm8,xmm11
  2389. vmovdqu XMMWORD PTR[r13*1+r12],xmm8
  2390. lea r12,QWORD PTR[16+r12]
  2391. add eax,DWORD PTR[r15]
  2392. add ebx,DWORD PTR[4+r15]
  2393. add ecx,DWORD PTR[8+r15]
  2394. add edx,DWORD PTR[12+r15]
  2395. add r8d,DWORD PTR[16+r15]
  2396. add r9d,DWORD PTR[20+r15]
  2397. add r10d,DWORD PTR[24+r15]
  2398. add r11d,DWORD PTR[28+r15]
  2399. cmp r12,QWORD PTR[((64+16))+rsp]
  2400. mov DWORD PTR[r15],eax
  2401. mov DWORD PTR[4+r15],ebx
  2402. mov DWORD PTR[8+r15],ecx
  2403. mov DWORD PTR[12+r15],edx
  2404. mov DWORD PTR[16+r15],r8d
  2405. mov DWORD PTR[20+r15],r9d
  2406. mov DWORD PTR[24+r15],r10d
  2407. mov DWORD PTR[28+r15],r11d
  2408. jb $L$loop_avx
  2409. mov r8,QWORD PTR[((64+32))+rsp]
  2410. mov rsi,QWORD PTR[120+rsp]
  2411. vmovdqu XMMWORD PTR[r8],xmm8
  2412. vzeroall
  2413. movaps xmm6,XMMWORD PTR[128+rsp]
  2414. movaps xmm7,XMMWORD PTR[144+rsp]
  2415. movaps xmm8,XMMWORD PTR[160+rsp]
  2416. movaps xmm9,XMMWORD PTR[176+rsp]
  2417. movaps xmm10,XMMWORD PTR[192+rsp]
  2418. movaps xmm11,XMMWORD PTR[208+rsp]
  2419. movaps xmm12,XMMWORD PTR[224+rsp]
  2420. movaps xmm13,XMMWORD PTR[240+rsp]
  2421. movaps xmm14,XMMWORD PTR[256+rsp]
  2422. movaps xmm15,XMMWORD PTR[272+rsp]
  2423. mov r15,QWORD PTR[((-48))+rsi]
  2424. mov r14,QWORD PTR[((-40))+rsi]
  2425. mov r13,QWORD PTR[((-32))+rsi]
  2426. mov r12,QWORD PTR[((-24))+rsi]
  2427. mov rbp,QWORD PTR[((-16))+rsi]
  2428. mov rbx,QWORD PTR[((-8))+rsi]
  2429. lea rsp,QWORD PTR[rsi]
  2430. $L$epilogue_avx::
  2431. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  2432. mov rsi,QWORD PTR[16+rsp]
  2433. DB 0F3h,0C3h ;repret
  2434. $L$SEH_end_aesni_cbc_sha256_enc_avx::
  2435. aesni_cbc_sha256_enc_avx ENDP
  2436. ALIGN 64
  2437. aesni_cbc_sha256_enc_avx2 PROC PRIVATE
  2438. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  2439. mov QWORD PTR[16+rsp],rsi
  2440. mov rax,rsp
  2441. $L$SEH_begin_aesni_cbc_sha256_enc_avx2::
  2442. mov rdi,rcx
  2443. mov rsi,rdx
  2444. mov rdx,r8
  2445. mov rcx,r9
  2446. mov r8,QWORD PTR[40+rsp]
  2447. mov r9,QWORD PTR[48+rsp]
  2448. $L$avx2_shortcut::
  2449. mov r10,QWORD PTR[56+rsp]
  2450. mov rax,rsp
  2451. push rbx
  2452. push rbp
  2453. push r12
  2454. push r13
  2455. push r14
  2456. push r15
  2457. sub rsp,736
  2458. and rsp,-256*4
  2459. add rsp,448
  2460. shl rdx,6
  2461. sub rsi,rdi
  2462. sub r10,rdi
  2463. add rdx,rdi
  2464. mov QWORD PTR[((64+16))+rsp],rdx
  2465. mov QWORD PTR[((64+32))+rsp],r8
  2466. mov QWORD PTR[((64+40))+rsp],r9
  2467. mov QWORD PTR[((64+48))+rsp],r10
  2468. mov QWORD PTR[120+rsp],rax
  2469. movaps XMMWORD PTR[128+rsp],xmm6
  2470. movaps XMMWORD PTR[144+rsp],xmm7
  2471. movaps XMMWORD PTR[160+rsp],xmm8
  2472. movaps XMMWORD PTR[176+rsp],xmm9
  2473. movaps XMMWORD PTR[192+rsp],xmm10
  2474. movaps XMMWORD PTR[208+rsp],xmm11
  2475. movaps XMMWORD PTR[224+rsp],xmm12
  2476. movaps XMMWORD PTR[240+rsp],xmm13
  2477. movaps XMMWORD PTR[256+rsp],xmm14
  2478. movaps XMMWORD PTR[272+rsp],xmm15
  2479. $L$prologue_avx2::
  2480. vzeroall
  2481. mov r13,rdi
  2482. vpinsrq xmm15,xmm15,rsi,1
  2483. lea rdi,QWORD PTR[128+rcx]
  2484. lea r12,QWORD PTR[((K256+544))]
  2485. mov r14d,DWORD PTR[((240-128))+rdi]
  2486. mov r15,r9
  2487. mov rsi,r10
  2488. vmovdqu xmm8,XMMWORD PTR[r8]
  2489. lea r14,QWORD PTR[((-9))+r14]
  2490. vmovdqa xmm14,XMMWORD PTR[r14*8+r12]
  2491. vmovdqa xmm13,XMMWORD PTR[16+r14*8+r12]
  2492. vmovdqa xmm12,XMMWORD PTR[32+r14*8+r12]
  2493. sub r13,-16*4
  2494. mov eax,DWORD PTR[r15]
  2495. lea r12,QWORD PTR[r13*1+rsi]
  2496. mov ebx,DWORD PTR[4+r15]
  2497. cmp r13,rdx
  2498. mov ecx,DWORD PTR[8+r15]
  2499. cmove r12,rsp
  2500. mov edx,DWORD PTR[12+r15]
  2501. mov r8d,DWORD PTR[16+r15]
  2502. mov r9d,DWORD PTR[20+r15]
  2503. mov r10d,DWORD PTR[24+r15]
  2504. mov r11d,DWORD PTR[28+r15]
  2505. vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi]
  2506. jmp $L$oop_avx2
  2507. ALIGN 16
  2508. $L$oop_avx2::
  2509. vmovdqa ymm7,YMMWORD PTR[((K256+512))]
  2510. vmovdqu xmm0,XMMWORD PTR[((-64+0))+r13*1+rsi]
  2511. vmovdqu xmm1,XMMWORD PTR[((-64+16))+r13*1+rsi]
  2512. vmovdqu xmm2,XMMWORD PTR[((-64+32))+r13*1+rsi]
  2513. vmovdqu xmm3,XMMWORD PTR[((-64+48))+r13*1+rsi]
  2514. vinserti128 ymm0,ymm0,XMMWORD PTR[r12],1
  2515. vinserti128 ymm1,ymm1,XMMWORD PTR[16+r12],1
  2516. vpshufb ymm0,ymm0,ymm7
  2517. vinserti128 ymm2,ymm2,XMMWORD PTR[32+r12],1
  2518. vpshufb ymm1,ymm1,ymm7
  2519. vinserti128 ymm3,ymm3,XMMWORD PTR[48+r12],1
  2520. lea rbp,QWORD PTR[K256]
  2521. vpshufb ymm2,ymm2,ymm7
  2522. lea r13,QWORD PTR[((-64))+r13]
  2523. vpaddd ymm4,ymm0,YMMWORD PTR[rbp]
  2524. vpshufb ymm3,ymm3,ymm7
  2525. vpaddd ymm5,ymm1,YMMWORD PTR[32+rbp]
  2526. vpaddd ymm6,ymm2,YMMWORD PTR[64+rbp]
  2527. vpaddd ymm7,ymm3,YMMWORD PTR[96+rbp]
  2528. vmovdqa YMMWORD PTR[rsp],ymm4
  2529. xor r14d,r14d
  2530. vmovdqa YMMWORD PTR[32+rsp],ymm5
  2531. lea rsp,QWORD PTR[((-64))+rsp]
  2532. mov esi,ebx
  2533. vmovdqa YMMWORD PTR[rsp],ymm6
  2534. xor esi,ecx
  2535. vmovdqa YMMWORD PTR[32+rsp],ymm7
  2536. mov r12d,r9d
  2537. sub rbp,-16*2*4
  2538. jmp $L$avx2_00_47
  2539. ALIGN 16
  2540. $L$avx2_00_47::
  2541. vmovdqu xmm9,XMMWORD PTR[r13]
  2542. vpinsrq xmm15,xmm15,r13,0
  2543. lea rsp,QWORD PTR[((-64))+rsp]
  2544. vpalignr ymm4,ymm1,ymm0,4
  2545. add r11d,DWORD PTR[((0+128))+rsp]
  2546. and r12d,r8d
  2547. rorx r13d,r8d,25
  2548. vpalignr ymm7,ymm3,ymm2,4
  2549. rorx r15d,r8d,11
  2550. lea eax,DWORD PTR[r14*1+rax]
  2551. lea r11d,DWORD PTR[r12*1+r11]
  2552. vpsrld ymm6,ymm4,7
  2553. andn r12d,r8d,r10d
  2554. xor r13d,r15d
  2555. rorx r14d,r8d,6
  2556. vpaddd ymm0,ymm0,ymm7
  2557. lea r11d,DWORD PTR[r12*1+r11]
  2558. xor r13d,r14d
  2559. mov r15d,eax
  2560. vpsrld ymm7,ymm4,3
  2561. rorx r12d,eax,22
  2562. lea r11d,DWORD PTR[r13*1+r11]
  2563. xor r15d,ebx
  2564. vpslld ymm5,ymm4,14
  2565. rorx r14d,eax,13
  2566. rorx r13d,eax,2
  2567. lea edx,DWORD PTR[r11*1+rdx]
  2568. vpxor ymm4,ymm7,ymm6
  2569. and esi,r15d
  2570. vpxor xmm9,xmm9,xmm10
  2571. vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi]
  2572. xor r14d,r12d
  2573. xor esi,ebx
  2574. vpshufd ymm7,ymm3,250
  2575. xor r14d,r13d
  2576. lea r11d,DWORD PTR[rsi*1+r11]
  2577. mov r12d,r8d
  2578. vpsrld ymm6,ymm6,11
  2579. add r10d,DWORD PTR[((4+128))+rsp]
  2580. and r12d,edx
  2581. rorx r13d,edx,25
  2582. vpxor ymm4,ymm4,ymm5
  2583. rorx esi,edx,11
  2584. lea r11d,DWORD PTR[r14*1+r11]
  2585. lea r10d,DWORD PTR[r12*1+r10]
  2586. vpslld ymm5,ymm5,11
  2587. andn r12d,edx,r9d
  2588. xor r13d,esi
  2589. rorx r14d,edx,6
  2590. vpxor ymm4,ymm4,ymm6
  2591. lea r10d,DWORD PTR[r12*1+r10]
  2592. xor r13d,r14d
  2593. mov esi,r11d
  2594. vpsrld ymm6,ymm7,10
  2595. rorx r12d,r11d,22
  2596. lea r10d,DWORD PTR[r13*1+r10]
  2597. xor esi,eax
  2598. vpxor ymm4,ymm4,ymm5
  2599. rorx r14d,r11d,13
  2600. rorx r13d,r11d,2
  2601. lea ecx,DWORD PTR[r10*1+rcx]
  2602. vpsrlq ymm7,ymm7,17
  2603. and r15d,esi
  2604. vpxor xmm9,xmm9,xmm8
  2605. xor r14d,r12d
  2606. xor r15d,eax
  2607. vpaddd ymm0,ymm0,ymm4
  2608. xor r14d,r13d
  2609. lea r10d,DWORD PTR[r15*1+r10]
  2610. mov r12d,edx
  2611. vpxor ymm6,ymm6,ymm7
  2612. add r9d,DWORD PTR[((8+128))+rsp]
  2613. and r12d,ecx
  2614. rorx r13d,ecx,25
  2615. vpsrlq ymm7,ymm7,2
  2616. rorx r15d,ecx,11
  2617. lea r10d,DWORD PTR[r14*1+r10]
  2618. lea r9d,DWORD PTR[r12*1+r9]
  2619. vpxor ymm6,ymm6,ymm7
  2620. andn r12d,ecx,r8d
  2621. xor r13d,r15d
  2622. rorx r14d,ecx,6
  2623. vpshufd ymm6,ymm6,132
  2624. lea r9d,DWORD PTR[r12*1+r9]
  2625. xor r13d,r14d
  2626. mov r15d,r10d
  2627. vpsrldq ymm6,ymm6,8
  2628. rorx r12d,r10d,22
  2629. lea r9d,DWORD PTR[r13*1+r9]
  2630. xor r15d,r11d
  2631. vpaddd ymm0,ymm0,ymm6
  2632. rorx r14d,r10d,13
  2633. rorx r13d,r10d,2
  2634. lea ebx,DWORD PTR[r9*1+rbx]
  2635. vpshufd ymm7,ymm0,80
  2636. and esi,r15d
  2637. vaesenc xmm9,xmm9,xmm10
  2638. vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi]
  2639. xor r14d,r12d
  2640. xor esi,r11d
  2641. vpsrld ymm6,ymm7,10
  2642. xor r14d,r13d
  2643. lea r9d,DWORD PTR[rsi*1+r9]
  2644. mov r12d,ecx
  2645. vpsrlq ymm7,ymm7,17
  2646. add r8d,DWORD PTR[((12+128))+rsp]
  2647. and r12d,ebx
  2648. rorx r13d,ebx,25
  2649. vpxor ymm6,ymm6,ymm7
  2650. rorx esi,ebx,11
  2651. lea r9d,DWORD PTR[r14*1+r9]
  2652. lea r8d,DWORD PTR[r12*1+r8]
  2653. vpsrlq ymm7,ymm7,2
  2654. andn r12d,ebx,edx
  2655. xor r13d,esi
  2656. rorx r14d,ebx,6
  2657. vpxor ymm6,ymm6,ymm7
  2658. lea r8d,DWORD PTR[r12*1+r8]
  2659. xor r13d,r14d
  2660. mov esi,r9d
  2661. vpshufd ymm6,ymm6,232
  2662. rorx r12d,r9d,22
  2663. lea r8d,DWORD PTR[r13*1+r8]
  2664. xor esi,r10d
  2665. vpslldq ymm6,ymm6,8
  2666. rorx r14d,r9d,13
  2667. rorx r13d,r9d,2
  2668. lea eax,DWORD PTR[r8*1+rax]
  2669. vpaddd ymm0,ymm0,ymm6
  2670. and r15d,esi
  2671. vaesenc xmm9,xmm9,xmm10
  2672. vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi]
  2673. xor r14d,r12d
  2674. xor r15d,r10d
  2675. vpaddd ymm6,ymm0,YMMWORD PTR[rbp]
  2676. xor r14d,r13d
  2677. lea r8d,DWORD PTR[r15*1+r8]
  2678. mov r12d,ebx
  2679. vmovdqa YMMWORD PTR[rsp],ymm6
  2680. vpalignr ymm4,ymm2,ymm1,4
  2681. add edx,DWORD PTR[((32+128))+rsp]
  2682. and r12d,eax
  2683. rorx r13d,eax,25
  2684. vpalignr ymm7,ymm0,ymm3,4
  2685. rorx r15d,eax,11
  2686. lea r8d,DWORD PTR[r14*1+r8]
  2687. lea edx,DWORD PTR[r12*1+rdx]
  2688. vpsrld ymm6,ymm4,7
  2689. andn r12d,eax,ecx
  2690. xor r13d,r15d
  2691. rorx r14d,eax,6
  2692. vpaddd ymm1,ymm1,ymm7
  2693. lea edx,DWORD PTR[r12*1+rdx]
  2694. xor r13d,r14d
  2695. mov r15d,r8d
  2696. vpsrld ymm7,ymm4,3
  2697. rorx r12d,r8d,22
  2698. lea edx,DWORD PTR[r13*1+rdx]
  2699. xor r15d,r9d
  2700. vpslld ymm5,ymm4,14
  2701. rorx r14d,r8d,13
  2702. rorx r13d,r8d,2
  2703. lea r11d,DWORD PTR[rdx*1+r11]
  2704. vpxor ymm4,ymm7,ymm6
  2705. and esi,r15d
  2706. vaesenc xmm9,xmm9,xmm10
  2707. vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi]
  2708. xor r14d,r12d
  2709. xor esi,r9d
  2710. vpshufd ymm7,ymm0,250
  2711. xor r14d,r13d
  2712. lea edx,DWORD PTR[rsi*1+rdx]
  2713. mov r12d,eax
  2714. vpsrld ymm6,ymm6,11
  2715. add ecx,DWORD PTR[((36+128))+rsp]
  2716. and r12d,r11d
  2717. rorx r13d,r11d,25
  2718. vpxor ymm4,ymm4,ymm5
  2719. rorx esi,r11d,11
  2720. lea edx,DWORD PTR[r14*1+rdx]
  2721. lea ecx,DWORD PTR[r12*1+rcx]
  2722. vpslld ymm5,ymm5,11
  2723. andn r12d,r11d,ebx
  2724. xor r13d,esi
  2725. rorx r14d,r11d,6
  2726. vpxor ymm4,ymm4,ymm6
  2727. lea ecx,DWORD PTR[r12*1+rcx]
  2728. xor r13d,r14d
  2729. mov esi,edx
  2730. vpsrld ymm6,ymm7,10
  2731. rorx r12d,edx,22
  2732. lea ecx,DWORD PTR[r13*1+rcx]
  2733. xor esi,r8d
  2734. vpxor ymm4,ymm4,ymm5
  2735. rorx r14d,edx,13
  2736. rorx r13d,edx,2
  2737. lea r10d,DWORD PTR[rcx*1+r10]
  2738. vpsrlq ymm7,ymm7,17
  2739. and r15d,esi
  2740. vaesenc xmm9,xmm9,xmm10
  2741. vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi]
  2742. xor r14d,r12d
  2743. xor r15d,r8d
  2744. vpaddd ymm1,ymm1,ymm4
  2745. xor r14d,r13d
  2746. lea ecx,DWORD PTR[r15*1+rcx]
  2747. mov r12d,r11d
  2748. vpxor ymm6,ymm6,ymm7
  2749. add ebx,DWORD PTR[((40+128))+rsp]
  2750. and r12d,r10d
  2751. rorx r13d,r10d,25
  2752. vpsrlq ymm7,ymm7,2
  2753. rorx r15d,r10d,11
  2754. lea ecx,DWORD PTR[r14*1+rcx]
  2755. lea ebx,DWORD PTR[r12*1+rbx]
  2756. vpxor ymm6,ymm6,ymm7
  2757. andn r12d,r10d,eax
  2758. xor r13d,r15d
  2759. rorx r14d,r10d,6
  2760. vpshufd ymm6,ymm6,132
  2761. lea ebx,DWORD PTR[r12*1+rbx]
  2762. xor r13d,r14d
  2763. mov r15d,ecx
  2764. vpsrldq ymm6,ymm6,8
  2765. rorx r12d,ecx,22
  2766. lea ebx,DWORD PTR[r13*1+rbx]
  2767. xor r15d,edx
  2768. vpaddd ymm1,ymm1,ymm6
  2769. rorx r14d,ecx,13
  2770. rorx r13d,ecx,2
  2771. lea r9d,DWORD PTR[rbx*1+r9]
  2772. vpshufd ymm7,ymm1,80
  2773. and esi,r15d
  2774. vaesenc xmm9,xmm9,xmm10
  2775. vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi]
  2776. xor r14d,r12d
  2777. xor esi,edx
  2778. vpsrld ymm6,ymm7,10
  2779. xor r14d,r13d
  2780. lea ebx,DWORD PTR[rsi*1+rbx]
  2781. mov r12d,r10d
  2782. vpsrlq ymm7,ymm7,17
  2783. add eax,DWORD PTR[((44+128))+rsp]
  2784. and r12d,r9d
  2785. rorx r13d,r9d,25
  2786. vpxor ymm6,ymm6,ymm7
  2787. rorx esi,r9d,11
  2788. lea ebx,DWORD PTR[r14*1+rbx]
  2789. lea eax,DWORD PTR[r12*1+rax]
  2790. vpsrlq ymm7,ymm7,2
  2791. andn r12d,r9d,r11d
  2792. xor r13d,esi
  2793. rorx r14d,r9d,6
  2794. vpxor ymm6,ymm6,ymm7
  2795. lea eax,DWORD PTR[r12*1+rax]
  2796. xor r13d,r14d
  2797. mov esi,ebx
  2798. vpshufd ymm6,ymm6,232
  2799. rorx r12d,ebx,22
  2800. lea eax,DWORD PTR[r13*1+rax]
  2801. xor esi,ecx
  2802. vpslldq ymm6,ymm6,8
  2803. rorx r14d,ebx,13
  2804. rorx r13d,ebx,2
  2805. lea r8d,DWORD PTR[rax*1+r8]
  2806. vpaddd ymm1,ymm1,ymm6
  2807. and r15d,esi
  2808. vaesenc xmm9,xmm9,xmm10
  2809. vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi]
  2810. xor r14d,r12d
  2811. xor r15d,ecx
  2812. vpaddd ymm6,ymm1,YMMWORD PTR[32+rbp]
  2813. xor r14d,r13d
  2814. lea eax,DWORD PTR[r15*1+rax]
  2815. mov r12d,r9d
  2816. vmovdqa YMMWORD PTR[32+rsp],ymm6
  2817. lea rsp,QWORD PTR[((-64))+rsp]
  2818. vpalignr ymm4,ymm3,ymm2,4
  2819. add r11d,DWORD PTR[((0+128))+rsp]
  2820. and r12d,r8d
  2821. rorx r13d,r8d,25
  2822. vpalignr ymm7,ymm1,ymm0,4
  2823. rorx r15d,r8d,11
  2824. lea eax,DWORD PTR[r14*1+rax]
  2825. lea r11d,DWORD PTR[r12*1+r11]
  2826. vpsrld ymm6,ymm4,7
  2827. andn r12d,r8d,r10d
  2828. xor r13d,r15d
  2829. rorx r14d,r8d,6
  2830. vpaddd ymm2,ymm2,ymm7
  2831. lea r11d,DWORD PTR[r12*1+r11]
  2832. xor r13d,r14d
  2833. mov r15d,eax
  2834. vpsrld ymm7,ymm4,3
  2835. rorx r12d,eax,22
  2836. lea r11d,DWORD PTR[r13*1+r11]
  2837. xor r15d,ebx
  2838. vpslld ymm5,ymm4,14
  2839. rorx r14d,eax,13
  2840. rorx r13d,eax,2
  2841. lea edx,DWORD PTR[r11*1+rdx]
  2842. vpxor ymm4,ymm7,ymm6
  2843. and esi,r15d
  2844. vaesenc xmm9,xmm9,xmm10
  2845. vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi]
  2846. xor r14d,r12d
  2847. xor esi,ebx
  2848. vpshufd ymm7,ymm1,250
  2849. xor r14d,r13d
  2850. lea r11d,DWORD PTR[rsi*1+r11]
  2851. mov r12d,r8d
  2852. vpsrld ymm6,ymm6,11
  2853. add r10d,DWORD PTR[((4+128))+rsp]
  2854. and r12d,edx
  2855. rorx r13d,edx,25
  2856. vpxor ymm4,ymm4,ymm5
  2857. rorx esi,edx,11
  2858. lea r11d,DWORD PTR[r14*1+r11]
  2859. lea r10d,DWORD PTR[r12*1+r10]
  2860. vpslld ymm5,ymm5,11
  2861. andn r12d,edx,r9d
  2862. xor r13d,esi
  2863. rorx r14d,edx,6
  2864. vpxor ymm4,ymm4,ymm6
  2865. lea r10d,DWORD PTR[r12*1+r10]
  2866. xor r13d,r14d
  2867. mov esi,r11d
  2868. vpsrld ymm6,ymm7,10
  2869. rorx r12d,r11d,22
  2870. lea r10d,DWORD PTR[r13*1+r10]
  2871. xor esi,eax
  2872. vpxor ymm4,ymm4,ymm5
  2873. rorx r14d,r11d,13
  2874. rorx r13d,r11d,2
  2875. lea ecx,DWORD PTR[r10*1+rcx]
  2876. vpsrlq ymm7,ymm7,17
  2877. and r15d,esi
  2878. vaesenc xmm9,xmm9,xmm10
  2879. vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi]
  2880. xor r14d,r12d
  2881. xor r15d,eax
  2882. vpaddd ymm2,ymm2,ymm4
  2883. xor r14d,r13d
  2884. lea r10d,DWORD PTR[r15*1+r10]
  2885. mov r12d,edx
  2886. vpxor ymm6,ymm6,ymm7
  2887. add r9d,DWORD PTR[((8+128))+rsp]
  2888. and r12d,ecx
  2889. rorx r13d,ecx,25
  2890. vpsrlq ymm7,ymm7,2
  2891. rorx r15d,ecx,11
  2892. lea r10d,DWORD PTR[r14*1+r10]
  2893. lea r9d,DWORD PTR[r12*1+r9]
  2894. vpxor ymm6,ymm6,ymm7
  2895. andn r12d,ecx,r8d
  2896. xor r13d,r15d
  2897. rorx r14d,ecx,6
  2898. vpshufd ymm6,ymm6,132
  2899. lea r9d,DWORD PTR[r12*1+r9]
  2900. xor r13d,r14d
  2901. mov r15d,r10d
  2902. vpsrldq ymm6,ymm6,8
  2903. rorx r12d,r10d,22
  2904. lea r9d,DWORD PTR[r13*1+r9]
  2905. xor r15d,r11d
  2906. vpaddd ymm2,ymm2,ymm6
  2907. rorx r14d,r10d,13
  2908. rorx r13d,r10d,2
  2909. lea ebx,DWORD PTR[r9*1+rbx]
  2910. vpshufd ymm7,ymm2,80
  2911. and esi,r15d
  2912. vaesenc xmm9,xmm9,xmm10
  2913. vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi]
  2914. xor r14d,r12d
  2915. xor esi,r11d
  2916. vpsrld ymm6,ymm7,10
  2917. xor r14d,r13d
  2918. lea r9d,DWORD PTR[rsi*1+r9]
  2919. mov r12d,ecx
  2920. vpsrlq ymm7,ymm7,17
  2921. add r8d,DWORD PTR[((12+128))+rsp]
  2922. and r12d,ebx
  2923. rorx r13d,ebx,25
  2924. vpxor ymm6,ymm6,ymm7
  2925. rorx esi,ebx,11
  2926. lea r9d,DWORD PTR[r14*1+r9]
  2927. lea r8d,DWORD PTR[r12*1+r8]
  2928. vpsrlq ymm7,ymm7,2
  2929. andn r12d,ebx,edx
  2930. xor r13d,esi
  2931. rorx r14d,ebx,6
  2932. vpxor ymm6,ymm6,ymm7
  2933. lea r8d,DWORD PTR[r12*1+r8]
  2934. xor r13d,r14d
  2935. mov esi,r9d
  2936. vpshufd ymm6,ymm6,232
  2937. rorx r12d,r9d,22
  2938. lea r8d,DWORD PTR[r13*1+r8]
  2939. xor esi,r10d
  2940. vpslldq ymm6,ymm6,8
  2941. rorx r14d,r9d,13
  2942. rorx r13d,r9d,2
  2943. lea eax,DWORD PTR[r8*1+rax]
  2944. vpaddd ymm2,ymm2,ymm6
  2945. and r15d,esi
  2946. vaesenclast xmm11,xmm9,xmm10
  2947. vaesenc xmm9,xmm9,xmm10
  2948. vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi]
  2949. xor r14d,r12d
  2950. xor r15d,r10d
  2951. vpaddd ymm6,ymm2,YMMWORD PTR[64+rbp]
  2952. xor r14d,r13d
  2953. lea r8d,DWORD PTR[r15*1+r8]
  2954. mov r12d,ebx
  2955. vmovdqa YMMWORD PTR[rsp],ymm6
  2956. vpalignr ymm4,ymm0,ymm3,4
  2957. add edx,DWORD PTR[((32+128))+rsp]
  2958. and r12d,eax
  2959. rorx r13d,eax,25
  2960. vpalignr ymm7,ymm2,ymm1,4
  2961. rorx r15d,eax,11
  2962. lea r8d,DWORD PTR[r14*1+r8]
  2963. lea edx,DWORD PTR[r12*1+rdx]
  2964. vpsrld ymm6,ymm4,7
  2965. andn r12d,eax,ecx
  2966. xor r13d,r15d
  2967. rorx r14d,eax,6
  2968. vpaddd ymm3,ymm3,ymm7
  2969. lea edx,DWORD PTR[r12*1+rdx]
  2970. xor r13d,r14d
  2971. mov r15d,r8d
  2972. vpsrld ymm7,ymm4,3
  2973. rorx r12d,r8d,22
  2974. lea edx,DWORD PTR[r13*1+rdx]
  2975. xor r15d,r9d
  2976. vpslld ymm5,ymm4,14
  2977. rorx r14d,r8d,13
  2978. rorx r13d,r8d,2
  2979. lea r11d,DWORD PTR[rdx*1+r11]
  2980. vpxor ymm4,ymm7,ymm6
  2981. and esi,r15d
  2982. vpand xmm8,xmm11,xmm12
  2983. vaesenc xmm9,xmm9,xmm10
  2984. vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi]
  2985. xor r14d,r12d
  2986. xor esi,r9d
  2987. vpshufd ymm7,ymm2,250
  2988. xor r14d,r13d
  2989. lea edx,DWORD PTR[rsi*1+rdx]
  2990. mov r12d,eax
  2991. vpsrld ymm6,ymm6,11
  2992. add ecx,DWORD PTR[((36+128))+rsp]
  2993. and r12d,r11d
  2994. rorx r13d,r11d,25
  2995. vpxor ymm4,ymm4,ymm5
  2996. rorx esi,r11d,11
  2997. lea edx,DWORD PTR[r14*1+rdx]
  2998. lea ecx,DWORD PTR[r12*1+rcx]
  2999. vpslld ymm5,ymm5,11
  3000. andn r12d,r11d,ebx
  3001. xor r13d,esi
  3002. rorx r14d,r11d,6
  3003. vpxor ymm4,ymm4,ymm6
  3004. lea ecx,DWORD PTR[r12*1+rcx]
  3005. xor r13d,r14d
  3006. mov esi,edx
  3007. vpsrld ymm6,ymm7,10
  3008. rorx r12d,edx,22
  3009. lea ecx,DWORD PTR[r13*1+rcx]
  3010. xor esi,r8d
  3011. vpxor ymm4,ymm4,ymm5
  3012. rorx r14d,edx,13
  3013. rorx r13d,edx,2
  3014. lea r10d,DWORD PTR[rcx*1+r10]
  3015. vpsrlq ymm7,ymm7,17
  3016. and r15d,esi
  3017. vaesenclast xmm11,xmm9,xmm10
  3018. vaesenc xmm9,xmm9,xmm10
  3019. vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi]
  3020. xor r14d,r12d
  3021. xor r15d,r8d
  3022. vpaddd ymm3,ymm3,ymm4
  3023. xor r14d,r13d
  3024. lea ecx,DWORD PTR[r15*1+rcx]
  3025. mov r12d,r11d
  3026. vpxor ymm6,ymm6,ymm7
  3027. add ebx,DWORD PTR[((40+128))+rsp]
  3028. and r12d,r10d
  3029. rorx r13d,r10d,25
  3030. vpsrlq ymm7,ymm7,2
  3031. rorx r15d,r10d,11
  3032. lea ecx,DWORD PTR[r14*1+rcx]
  3033. lea ebx,DWORD PTR[r12*1+rbx]
  3034. vpxor ymm6,ymm6,ymm7
  3035. andn r12d,r10d,eax
  3036. xor r13d,r15d
  3037. rorx r14d,r10d,6
  3038. vpshufd ymm6,ymm6,132
  3039. lea ebx,DWORD PTR[r12*1+rbx]
  3040. xor r13d,r14d
  3041. mov r15d,ecx
  3042. vpsrldq ymm6,ymm6,8
  3043. rorx r12d,ecx,22
  3044. lea ebx,DWORD PTR[r13*1+rbx]
  3045. xor r15d,edx
  3046. vpaddd ymm3,ymm3,ymm6
  3047. rorx r14d,ecx,13
  3048. rorx r13d,ecx,2
  3049. lea r9d,DWORD PTR[rbx*1+r9]
  3050. vpshufd ymm7,ymm3,80
  3051. and esi,r15d
  3052. vpand xmm11,xmm11,xmm13
  3053. vaesenc xmm9,xmm9,xmm10
  3054. vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi]
  3055. xor r14d,r12d
  3056. xor esi,edx
  3057. vpsrld ymm6,ymm7,10
  3058. xor r14d,r13d
  3059. lea ebx,DWORD PTR[rsi*1+rbx]
  3060. mov r12d,r10d
  3061. vpsrlq ymm7,ymm7,17
  3062. add eax,DWORD PTR[((44+128))+rsp]
  3063. and r12d,r9d
  3064. rorx r13d,r9d,25
  3065. vpxor ymm6,ymm6,ymm7
  3066. rorx esi,r9d,11
  3067. lea ebx,DWORD PTR[r14*1+rbx]
  3068. lea eax,DWORD PTR[r12*1+rax]
  3069. vpsrlq ymm7,ymm7,2
  3070. andn r12d,r9d,r11d
  3071. xor r13d,esi
  3072. rorx r14d,r9d,6
  3073. vpxor ymm6,ymm6,ymm7
  3074. lea eax,DWORD PTR[r12*1+rax]
  3075. xor r13d,r14d
  3076. mov esi,ebx
  3077. vpshufd ymm6,ymm6,232
  3078. rorx r12d,ebx,22
  3079. lea eax,DWORD PTR[r13*1+rax]
  3080. xor esi,ecx
  3081. vpslldq ymm6,ymm6,8
  3082. rorx r14d,ebx,13
  3083. rorx r13d,ebx,2
  3084. lea r8d,DWORD PTR[rax*1+r8]
  3085. vpaddd ymm3,ymm3,ymm6
  3086. and r15d,esi
  3087. vpor xmm8,xmm8,xmm11
  3088. vaesenclast xmm11,xmm9,xmm10
  3089. vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi]
  3090. xor r14d,r12d
  3091. xor r15d,ecx
  3092. vpaddd ymm6,ymm3,YMMWORD PTR[96+rbp]
  3093. xor r14d,r13d
  3094. lea eax,DWORD PTR[r15*1+rax]
  3095. mov r12d,r9d
  3096. vmovdqa YMMWORD PTR[32+rsp],ymm6
  3097. vmovq r13,xmm15
  3098. vpextrq r15,xmm15,1
  3099. vpand xmm11,xmm11,xmm14
  3100. vpor xmm8,xmm8,xmm11
  3101. vmovdqu XMMWORD PTR[r13*1+r15],xmm8
  3102. lea r13,QWORD PTR[16+r13]
  3103. lea rbp,QWORD PTR[128+rbp]
  3104. cmp BYTE PTR[3+rbp],0
  3105. jne $L$avx2_00_47
  3106. vmovdqu xmm9,XMMWORD PTR[r13]
  3107. vpinsrq xmm15,xmm15,r13,0
  3108. add r11d,DWORD PTR[((0+64))+rsp]
  3109. and r12d,r8d
  3110. rorx r13d,r8d,25
  3111. rorx r15d,r8d,11
  3112. lea eax,DWORD PTR[r14*1+rax]
  3113. lea r11d,DWORD PTR[r12*1+r11]
  3114. andn r12d,r8d,r10d
  3115. xor r13d,r15d
  3116. rorx r14d,r8d,6
  3117. lea r11d,DWORD PTR[r12*1+r11]
  3118. xor r13d,r14d
  3119. mov r15d,eax
  3120. rorx r12d,eax,22
  3121. lea r11d,DWORD PTR[r13*1+r11]
  3122. xor r15d,ebx
  3123. rorx r14d,eax,13
  3124. rorx r13d,eax,2
  3125. lea edx,DWORD PTR[r11*1+rdx]
  3126. and esi,r15d
  3127. vpxor xmm9,xmm9,xmm10
  3128. vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi]
  3129. xor r14d,r12d
  3130. xor esi,ebx
  3131. xor r14d,r13d
  3132. lea r11d,DWORD PTR[rsi*1+r11]
  3133. mov r12d,r8d
  3134. add r10d,DWORD PTR[((4+64))+rsp]
  3135. and r12d,edx
  3136. rorx r13d,edx,25
  3137. rorx esi,edx,11
  3138. lea r11d,DWORD PTR[r14*1+r11]
  3139. lea r10d,DWORD PTR[r12*1+r10]
  3140. andn r12d,edx,r9d
  3141. xor r13d,esi
  3142. rorx r14d,edx,6
  3143. lea r10d,DWORD PTR[r12*1+r10]
  3144. xor r13d,r14d
  3145. mov esi,r11d
  3146. rorx r12d,r11d,22
  3147. lea r10d,DWORD PTR[r13*1+r10]
  3148. xor esi,eax
  3149. rorx r14d,r11d,13
  3150. rorx r13d,r11d,2
  3151. lea ecx,DWORD PTR[r10*1+rcx]
  3152. and r15d,esi
  3153. vpxor xmm9,xmm9,xmm8
  3154. xor r14d,r12d
  3155. xor r15d,eax
  3156. xor r14d,r13d
  3157. lea r10d,DWORD PTR[r15*1+r10]
  3158. mov r12d,edx
  3159. add r9d,DWORD PTR[((8+64))+rsp]
  3160. and r12d,ecx
  3161. rorx r13d,ecx,25
  3162. rorx r15d,ecx,11
  3163. lea r10d,DWORD PTR[r14*1+r10]
  3164. lea r9d,DWORD PTR[r12*1+r9]
  3165. andn r12d,ecx,r8d
  3166. xor r13d,r15d
  3167. rorx r14d,ecx,6
  3168. lea r9d,DWORD PTR[r12*1+r9]
  3169. xor r13d,r14d
  3170. mov r15d,r10d
  3171. rorx r12d,r10d,22
  3172. lea r9d,DWORD PTR[r13*1+r9]
  3173. xor r15d,r11d
  3174. rorx r14d,r10d,13
  3175. rorx r13d,r10d,2
  3176. lea ebx,DWORD PTR[r9*1+rbx]
  3177. and esi,r15d
  3178. vaesenc xmm9,xmm9,xmm10
  3179. vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi]
  3180. xor r14d,r12d
  3181. xor esi,r11d
  3182. xor r14d,r13d
  3183. lea r9d,DWORD PTR[rsi*1+r9]
  3184. mov r12d,ecx
  3185. add r8d,DWORD PTR[((12+64))+rsp]
  3186. and r12d,ebx
  3187. rorx r13d,ebx,25
  3188. rorx esi,ebx,11
  3189. lea r9d,DWORD PTR[r14*1+r9]
  3190. lea r8d,DWORD PTR[r12*1+r8]
  3191. andn r12d,ebx,edx
  3192. xor r13d,esi
  3193. rorx r14d,ebx,6
  3194. lea r8d,DWORD PTR[r12*1+r8]
  3195. xor r13d,r14d
  3196. mov esi,r9d
  3197. rorx r12d,r9d,22
  3198. lea r8d,DWORD PTR[r13*1+r8]
  3199. xor esi,r10d
  3200. rorx r14d,r9d,13
  3201. rorx r13d,r9d,2
  3202. lea eax,DWORD PTR[r8*1+rax]
  3203. and r15d,esi
  3204. vaesenc xmm9,xmm9,xmm10
  3205. vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi]
  3206. xor r14d,r12d
  3207. xor r15d,r10d
  3208. xor r14d,r13d
  3209. lea r8d,DWORD PTR[r15*1+r8]
  3210. mov r12d,ebx
  3211. add edx,DWORD PTR[((32+64))+rsp]
  3212. and r12d,eax
  3213. rorx r13d,eax,25
  3214. rorx r15d,eax,11
  3215. lea r8d,DWORD PTR[r14*1+r8]
  3216. lea edx,DWORD PTR[r12*1+rdx]
  3217. andn r12d,eax,ecx
  3218. xor r13d,r15d
  3219. rorx r14d,eax,6
  3220. lea edx,DWORD PTR[r12*1+rdx]
  3221. xor r13d,r14d
  3222. mov r15d,r8d
  3223. rorx r12d,r8d,22
  3224. lea edx,DWORD PTR[r13*1+rdx]
  3225. xor r15d,r9d
  3226. rorx r14d,r8d,13
  3227. rorx r13d,r8d,2
  3228. lea r11d,DWORD PTR[rdx*1+r11]
  3229. and esi,r15d
  3230. vaesenc xmm9,xmm9,xmm10
  3231. vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi]
  3232. xor r14d,r12d
  3233. xor esi,r9d
  3234. xor r14d,r13d
  3235. lea edx,DWORD PTR[rsi*1+rdx]
  3236. mov r12d,eax
  3237. add ecx,DWORD PTR[((36+64))+rsp]
  3238. and r12d,r11d
  3239. rorx r13d,r11d,25
  3240. rorx esi,r11d,11
  3241. lea edx,DWORD PTR[r14*1+rdx]
  3242. lea ecx,DWORD PTR[r12*1+rcx]
  3243. andn r12d,r11d,ebx
  3244. xor r13d,esi
  3245. rorx r14d,r11d,6
  3246. lea ecx,DWORD PTR[r12*1+rcx]
  3247. xor r13d,r14d
  3248. mov esi,edx
  3249. rorx r12d,edx,22
  3250. lea ecx,DWORD PTR[r13*1+rcx]
  3251. xor esi,r8d
  3252. rorx r14d,edx,13
  3253. rorx r13d,edx,2
  3254. lea r10d,DWORD PTR[rcx*1+r10]
  3255. and r15d,esi
  3256. vaesenc xmm9,xmm9,xmm10
  3257. vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi]
  3258. xor r14d,r12d
  3259. xor r15d,r8d
  3260. xor r14d,r13d
  3261. lea ecx,DWORD PTR[r15*1+rcx]
  3262. mov r12d,r11d
  3263. add ebx,DWORD PTR[((40+64))+rsp]
  3264. and r12d,r10d
  3265. rorx r13d,r10d,25
  3266. rorx r15d,r10d,11
  3267. lea ecx,DWORD PTR[r14*1+rcx]
  3268. lea ebx,DWORD PTR[r12*1+rbx]
  3269. andn r12d,r10d,eax
  3270. xor r13d,r15d
  3271. rorx r14d,r10d,6
  3272. lea ebx,DWORD PTR[r12*1+rbx]
  3273. xor r13d,r14d
  3274. mov r15d,ecx
  3275. rorx r12d,ecx,22
  3276. lea ebx,DWORD PTR[r13*1+rbx]
  3277. xor r15d,edx
  3278. rorx r14d,ecx,13
  3279. rorx r13d,ecx,2
  3280. lea r9d,DWORD PTR[rbx*1+r9]
  3281. and esi,r15d
  3282. vaesenc xmm9,xmm9,xmm10
  3283. vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi]
  3284. xor r14d,r12d
  3285. xor esi,edx
  3286. xor r14d,r13d
  3287. lea ebx,DWORD PTR[rsi*1+rbx]
  3288. mov r12d,r10d
  3289. add eax,DWORD PTR[((44+64))+rsp]
  3290. and r12d,r9d
  3291. rorx r13d,r9d,25
  3292. rorx esi,r9d,11
  3293. lea ebx,DWORD PTR[r14*1+rbx]
  3294. lea eax,DWORD PTR[r12*1+rax]
  3295. andn r12d,r9d,r11d
  3296. xor r13d,esi
  3297. rorx r14d,r9d,6
  3298. lea eax,DWORD PTR[r12*1+rax]
  3299. xor r13d,r14d
  3300. mov esi,ebx
  3301. rorx r12d,ebx,22
  3302. lea eax,DWORD PTR[r13*1+rax]
  3303. xor esi,ecx
  3304. rorx r14d,ebx,13
  3305. rorx r13d,ebx,2
  3306. lea r8d,DWORD PTR[rax*1+r8]
  3307. and r15d,esi
  3308. vaesenc xmm9,xmm9,xmm10
  3309. vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi]
  3310. xor r14d,r12d
  3311. xor r15d,ecx
  3312. xor r14d,r13d
  3313. lea eax,DWORD PTR[r15*1+rax]
  3314. mov r12d,r9d
  3315. add r11d,DWORD PTR[rsp]
  3316. and r12d,r8d
  3317. rorx r13d,r8d,25
  3318. rorx r15d,r8d,11
  3319. lea eax,DWORD PTR[r14*1+rax]
  3320. lea r11d,DWORD PTR[r12*1+r11]
  3321. andn r12d,r8d,r10d
  3322. xor r13d,r15d
  3323. rorx r14d,r8d,6
  3324. lea r11d,DWORD PTR[r12*1+r11]
  3325. xor r13d,r14d
  3326. mov r15d,eax
  3327. rorx r12d,eax,22
  3328. lea r11d,DWORD PTR[r13*1+r11]
  3329. xor r15d,ebx
  3330. rorx r14d,eax,13
  3331. rorx r13d,eax,2
  3332. lea edx,DWORD PTR[r11*1+rdx]
  3333. and esi,r15d
  3334. vaesenc xmm9,xmm9,xmm10
  3335. vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi]
  3336. xor r14d,r12d
  3337. xor esi,ebx
  3338. xor r14d,r13d
  3339. lea r11d,DWORD PTR[rsi*1+r11]
  3340. mov r12d,r8d
  3341. add r10d,DWORD PTR[4+rsp]
  3342. and r12d,edx
  3343. rorx r13d,edx,25
  3344. rorx esi,edx,11
  3345. lea r11d,DWORD PTR[r14*1+r11]
  3346. lea r10d,DWORD PTR[r12*1+r10]
  3347. andn r12d,edx,r9d
  3348. xor r13d,esi
  3349. rorx r14d,edx,6
  3350. lea r10d,DWORD PTR[r12*1+r10]
  3351. xor r13d,r14d
  3352. mov esi,r11d
  3353. rorx r12d,r11d,22
  3354. lea r10d,DWORD PTR[r13*1+r10]
  3355. xor esi,eax
  3356. rorx r14d,r11d,13
  3357. rorx r13d,r11d,2
  3358. lea ecx,DWORD PTR[r10*1+rcx]
  3359. and r15d,esi
  3360. vaesenc xmm9,xmm9,xmm10
  3361. vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi]
  3362. xor r14d,r12d
  3363. xor r15d,eax
  3364. xor r14d,r13d
  3365. lea r10d,DWORD PTR[r15*1+r10]
  3366. mov r12d,edx
  3367. add r9d,DWORD PTR[8+rsp]
  3368. and r12d,ecx
  3369. rorx r13d,ecx,25
  3370. rorx r15d,ecx,11
  3371. lea r10d,DWORD PTR[r14*1+r10]
  3372. lea r9d,DWORD PTR[r12*1+r9]
  3373. andn r12d,ecx,r8d
  3374. xor r13d,r15d
  3375. rorx r14d,ecx,6
  3376. lea r9d,DWORD PTR[r12*1+r9]
  3377. xor r13d,r14d
  3378. mov r15d,r10d
  3379. rorx r12d,r10d,22
  3380. lea r9d,DWORD PTR[r13*1+r9]
  3381. xor r15d,r11d
  3382. rorx r14d,r10d,13
  3383. rorx r13d,r10d,2
  3384. lea ebx,DWORD PTR[r9*1+rbx]
  3385. and esi,r15d
  3386. vaesenc xmm9,xmm9,xmm10
  3387. vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi]
  3388. xor r14d,r12d
  3389. xor esi,r11d
  3390. xor r14d,r13d
  3391. lea r9d,DWORD PTR[rsi*1+r9]
  3392. mov r12d,ecx
  3393. add r8d,DWORD PTR[12+rsp]
  3394. and r12d,ebx
  3395. rorx r13d,ebx,25
  3396. rorx esi,ebx,11
  3397. lea r9d,DWORD PTR[r14*1+r9]
  3398. lea r8d,DWORD PTR[r12*1+r8]
  3399. andn r12d,ebx,edx
  3400. xor r13d,esi
  3401. rorx r14d,ebx,6
  3402. lea r8d,DWORD PTR[r12*1+r8]
  3403. xor r13d,r14d
  3404. mov esi,r9d
  3405. rorx r12d,r9d,22
  3406. lea r8d,DWORD PTR[r13*1+r8]
  3407. xor esi,r10d
  3408. rorx r14d,r9d,13
  3409. rorx r13d,r9d,2
  3410. lea eax,DWORD PTR[r8*1+rax]
  3411. and r15d,esi
  3412. vaesenclast xmm11,xmm9,xmm10
  3413. vaesenc xmm9,xmm9,xmm10
  3414. vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi]
  3415. xor r14d,r12d
  3416. xor r15d,r10d
  3417. xor r14d,r13d
  3418. lea r8d,DWORD PTR[r15*1+r8]
  3419. mov r12d,ebx
  3420. add edx,DWORD PTR[32+rsp]
  3421. and r12d,eax
  3422. rorx r13d,eax,25
  3423. rorx r15d,eax,11
  3424. lea r8d,DWORD PTR[r14*1+r8]
  3425. lea edx,DWORD PTR[r12*1+rdx]
  3426. andn r12d,eax,ecx
  3427. xor r13d,r15d
  3428. rorx r14d,eax,6
  3429. lea edx,DWORD PTR[r12*1+rdx]
  3430. xor r13d,r14d
  3431. mov r15d,r8d
  3432. rorx r12d,r8d,22
  3433. lea edx,DWORD PTR[r13*1+rdx]
  3434. xor r15d,r9d
  3435. rorx r14d,r8d,13
  3436. rorx r13d,r8d,2
  3437. lea r11d,DWORD PTR[rdx*1+r11]
  3438. and esi,r15d
  3439. vpand xmm8,xmm11,xmm12
  3440. vaesenc xmm9,xmm9,xmm10
  3441. vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi]
  3442. xor r14d,r12d
  3443. xor esi,r9d
  3444. xor r14d,r13d
  3445. lea edx,DWORD PTR[rsi*1+rdx]
  3446. mov r12d,eax
  3447. add ecx,DWORD PTR[36+rsp]
  3448. and r12d,r11d
  3449. rorx r13d,r11d,25
  3450. rorx esi,r11d,11
  3451. lea edx,DWORD PTR[r14*1+rdx]
  3452. lea ecx,DWORD PTR[r12*1+rcx]
  3453. andn r12d,r11d,ebx
  3454. xor r13d,esi
  3455. rorx r14d,r11d,6
  3456. lea ecx,DWORD PTR[r12*1+rcx]
  3457. xor r13d,r14d
  3458. mov esi,edx
  3459. rorx r12d,edx,22
  3460. lea ecx,DWORD PTR[r13*1+rcx]
  3461. xor esi,r8d
  3462. rorx r14d,edx,13
  3463. rorx r13d,edx,2
  3464. lea r10d,DWORD PTR[rcx*1+r10]
  3465. and r15d,esi
  3466. vaesenclast xmm11,xmm9,xmm10
  3467. vaesenc xmm9,xmm9,xmm10
  3468. vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi]
  3469. xor r14d,r12d
  3470. xor r15d,r8d
  3471. xor r14d,r13d
  3472. lea ecx,DWORD PTR[r15*1+rcx]
  3473. mov r12d,r11d
  3474. add ebx,DWORD PTR[40+rsp]
  3475. and r12d,r10d
  3476. rorx r13d,r10d,25
  3477. rorx r15d,r10d,11
  3478. lea ecx,DWORD PTR[r14*1+rcx]
  3479. lea ebx,DWORD PTR[r12*1+rbx]
  3480. andn r12d,r10d,eax
  3481. xor r13d,r15d
  3482. rorx r14d,r10d,6
  3483. lea ebx,DWORD PTR[r12*1+rbx]
  3484. xor r13d,r14d
  3485. mov r15d,ecx
  3486. rorx r12d,ecx,22
  3487. lea ebx,DWORD PTR[r13*1+rbx]
  3488. xor r15d,edx
  3489. rorx r14d,ecx,13
  3490. rorx r13d,ecx,2
  3491. lea r9d,DWORD PTR[rbx*1+r9]
  3492. and esi,r15d
  3493. vpand xmm11,xmm11,xmm13
  3494. vaesenc xmm9,xmm9,xmm10
  3495. vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi]
  3496. xor r14d,r12d
  3497. xor esi,edx
  3498. xor r14d,r13d
  3499. lea ebx,DWORD PTR[rsi*1+rbx]
  3500. mov r12d,r10d
  3501. add eax,DWORD PTR[44+rsp]
  3502. and r12d,r9d
  3503. rorx r13d,r9d,25
  3504. rorx esi,r9d,11
  3505. lea ebx,DWORD PTR[r14*1+rbx]
  3506. lea eax,DWORD PTR[r12*1+rax]
  3507. andn r12d,r9d,r11d
  3508. xor r13d,esi
  3509. rorx r14d,r9d,6
  3510. lea eax,DWORD PTR[r12*1+rax]
  3511. xor r13d,r14d
  3512. mov esi,ebx
  3513. rorx r12d,ebx,22
  3514. lea eax,DWORD PTR[r13*1+rax]
  3515. xor esi,ecx
  3516. rorx r14d,ebx,13
  3517. rorx r13d,ebx,2
  3518. lea r8d,DWORD PTR[rax*1+r8]
  3519. and r15d,esi
  3520. vpor xmm8,xmm8,xmm11
  3521. vaesenclast xmm11,xmm9,xmm10
  3522. vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi]
  3523. xor r14d,r12d
  3524. xor r15d,ecx
  3525. xor r14d,r13d
  3526. lea eax,DWORD PTR[r15*1+rax]
  3527. mov r12d,r9d
  3528. vpextrq r12,xmm15,1
  3529. vmovq r13,xmm15
  3530. mov r15,QWORD PTR[552+rsp]
  3531. add eax,r14d
  3532. lea rbp,QWORD PTR[448+rsp]
  3533. vpand xmm11,xmm11,xmm14
  3534. vpor xmm8,xmm8,xmm11
  3535. vmovdqu XMMWORD PTR[r13*1+r12],xmm8
  3536. lea r13,QWORD PTR[16+r13]
  3537. add eax,DWORD PTR[r15]
  3538. add ebx,DWORD PTR[4+r15]
  3539. add ecx,DWORD PTR[8+r15]
  3540. add edx,DWORD PTR[12+r15]
  3541. add r8d,DWORD PTR[16+r15]
  3542. add r9d,DWORD PTR[20+r15]
  3543. add r10d,DWORD PTR[24+r15]
  3544. add r11d,DWORD PTR[28+r15]
  3545. mov DWORD PTR[r15],eax
  3546. mov DWORD PTR[4+r15],ebx
  3547. mov DWORD PTR[8+r15],ecx
  3548. mov DWORD PTR[12+r15],edx
  3549. mov DWORD PTR[16+r15],r8d
  3550. mov DWORD PTR[20+r15],r9d
  3551. mov DWORD PTR[24+r15],r10d
  3552. mov DWORD PTR[28+r15],r11d
  3553. cmp r13,QWORD PTR[80+rbp]
  3554. je $L$done_avx2
  3555. xor r14d,r14d
  3556. mov esi,ebx
  3557. mov r12d,r9d
  3558. xor esi,ecx
  3559. jmp $L$ower_avx2
  3560. ALIGN 16
  3561. $L$ower_avx2::
  3562. vmovdqu xmm9,XMMWORD PTR[r13]
  3563. vpinsrq xmm15,xmm15,r13,0
  3564. add r11d,DWORD PTR[((0+16))+rbp]
  3565. and r12d,r8d
  3566. rorx r13d,r8d,25
  3567. rorx r15d,r8d,11
  3568. lea eax,DWORD PTR[r14*1+rax]
  3569. lea r11d,DWORD PTR[r12*1+r11]
  3570. andn r12d,r8d,r10d
  3571. xor r13d,r15d
  3572. rorx r14d,r8d,6
  3573. lea r11d,DWORD PTR[r12*1+r11]
  3574. xor r13d,r14d
  3575. mov r15d,eax
  3576. rorx r12d,eax,22
  3577. lea r11d,DWORD PTR[r13*1+r11]
  3578. xor r15d,ebx
  3579. rorx r14d,eax,13
  3580. rorx r13d,eax,2
  3581. lea edx,DWORD PTR[r11*1+rdx]
  3582. and esi,r15d
  3583. vpxor xmm9,xmm9,xmm10
  3584. vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi]
  3585. xor r14d,r12d
  3586. xor esi,ebx
  3587. xor r14d,r13d
  3588. lea r11d,DWORD PTR[rsi*1+r11]
  3589. mov r12d,r8d
  3590. add r10d,DWORD PTR[((4+16))+rbp]
  3591. and r12d,edx
  3592. rorx r13d,edx,25
  3593. rorx esi,edx,11
  3594. lea r11d,DWORD PTR[r14*1+r11]
  3595. lea r10d,DWORD PTR[r12*1+r10]
  3596. andn r12d,edx,r9d
  3597. xor r13d,esi
  3598. rorx r14d,edx,6
  3599. lea r10d,DWORD PTR[r12*1+r10]
  3600. xor r13d,r14d
  3601. mov esi,r11d
  3602. rorx r12d,r11d,22
  3603. lea r10d,DWORD PTR[r13*1+r10]
  3604. xor esi,eax
  3605. rorx r14d,r11d,13
  3606. rorx r13d,r11d,2
  3607. lea ecx,DWORD PTR[r10*1+rcx]
  3608. and r15d,esi
  3609. vpxor xmm9,xmm9,xmm8
  3610. xor r14d,r12d
  3611. xor r15d,eax
  3612. xor r14d,r13d
  3613. lea r10d,DWORD PTR[r15*1+r10]
  3614. mov r12d,edx
  3615. add r9d,DWORD PTR[((8+16))+rbp]
  3616. and r12d,ecx
  3617. rorx r13d,ecx,25
  3618. rorx r15d,ecx,11
  3619. lea r10d,DWORD PTR[r14*1+r10]
  3620. lea r9d,DWORD PTR[r12*1+r9]
  3621. andn r12d,ecx,r8d
  3622. xor r13d,r15d
  3623. rorx r14d,ecx,6
  3624. lea r9d,DWORD PTR[r12*1+r9]
  3625. xor r13d,r14d
  3626. mov r15d,r10d
  3627. rorx r12d,r10d,22
  3628. lea r9d,DWORD PTR[r13*1+r9]
  3629. xor r15d,r11d
  3630. rorx r14d,r10d,13
  3631. rorx r13d,r10d,2
  3632. lea ebx,DWORD PTR[r9*1+rbx]
  3633. and esi,r15d
  3634. vaesenc xmm9,xmm9,xmm10
  3635. vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi]
  3636. xor r14d,r12d
  3637. xor esi,r11d
  3638. xor r14d,r13d
  3639. lea r9d,DWORD PTR[rsi*1+r9]
  3640. mov r12d,ecx
  3641. add r8d,DWORD PTR[((12+16))+rbp]
  3642. and r12d,ebx
  3643. rorx r13d,ebx,25
  3644. rorx esi,ebx,11
  3645. lea r9d,DWORD PTR[r14*1+r9]
  3646. lea r8d,DWORD PTR[r12*1+r8]
  3647. andn r12d,ebx,edx
  3648. xor r13d,esi
  3649. rorx r14d,ebx,6
  3650. lea r8d,DWORD PTR[r12*1+r8]
  3651. xor r13d,r14d
  3652. mov esi,r9d
  3653. rorx r12d,r9d,22
  3654. lea r8d,DWORD PTR[r13*1+r8]
  3655. xor esi,r10d
  3656. rorx r14d,r9d,13
  3657. rorx r13d,r9d,2
  3658. lea eax,DWORD PTR[r8*1+rax]
  3659. and r15d,esi
  3660. vaesenc xmm9,xmm9,xmm10
  3661. vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi]
  3662. xor r14d,r12d
  3663. xor r15d,r10d
  3664. xor r14d,r13d
  3665. lea r8d,DWORD PTR[r15*1+r8]
  3666. mov r12d,ebx
  3667. add edx,DWORD PTR[((32+16))+rbp]
  3668. and r12d,eax
  3669. rorx r13d,eax,25
  3670. rorx r15d,eax,11
  3671. lea r8d,DWORD PTR[r14*1+r8]
  3672. lea edx,DWORD PTR[r12*1+rdx]
  3673. andn r12d,eax,ecx
  3674. xor r13d,r15d
  3675. rorx r14d,eax,6
  3676. lea edx,DWORD PTR[r12*1+rdx]
  3677. xor r13d,r14d
  3678. mov r15d,r8d
  3679. rorx r12d,r8d,22
  3680. lea edx,DWORD PTR[r13*1+rdx]
  3681. xor r15d,r9d
  3682. rorx r14d,r8d,13
  3683. rorx r13d,r8d,2
  3684. lea r11d,DWORD PTR[rdx*1+r11]
  3685. and esi,r15d
  3686. vaesenc xmm9,xmm9,xmm10
  3687. vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi]
  3688. xor r14d,r12d
  3689. xor esi,r9d
  3690. xor r14d,r13d
  3691. lea edx,DWORD PTR[rsi*1+rdx]
  3692. mov r12d,eax
  3693. add ecx,DWORD PTR[((36+16))+rbp]
  3694. and r12d,r11d
  3695. rorx r13d,r11d,25
  3696. rorx esi,r11d,11
  3697. lea edx,DWORD PTR[r14*1+rdx]
  3698. lea ecx,DWORD PTR[r12*1+rcx]
  3699. andn r12d,r11d,ebx
  3700. xor r13d,esi
  3701. rorx r14d,r11d,6
  3702. lea ecx,DWORD PTR[r12*1+rcx]
  3703. xor r13d,r14d
  3704. mov esi,edx
  3705. rorx r12d,edx,22
  3706. lea ecx,DWORD PTR[r13*1+rcx]
  3707. xor esi,r8d
  3708. rorx r14d,edx,13
  3709. rorx r13d,edx,2
  3710. lea r10d,DWORD PTR[rcx*1+r10]
  3711. and r15d,esi
  3712. vaesenc xmm9,xmm9,xmm10
  3713. vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi]
  3714. xor r14d,r12d
  3715. xor r15d,r8d
  3716. xor r14d,r13d
  3717. lea ecx,DWORD PTR[r15*1+rcx]
  3718. mov r12d,r11d
  3719. add ebx,DWORD PTR[((40+16))+rbp]
  3720. and r12d,r10d
  3721. rorx r13d,r10d,25
  3722. rorx r15d,r10d,11
  3723. lea ecx,DWORD PTR[r14*1+rcx]
  3724. lea ebx,DWORD PTR[r12*1+rbx]
  3725. andn r12d,r10d,eax
  3726. xor r13d,r15d
  3727. rorx r14d,r10d,6
  3728. lea ebx,DWORD PTR[r12*1+rbx]
  3729. xor r13d,r14d
  3730. mov r15d,ecx
  3731. rorx r12d,ecx,22
  3732. lea ebx,DWORD PTR[r13*1+rbx]
  3733. xor r15d,edx
  3734. rorx r14d,ecx,13
  3735. rorx r13d,ecx,2
  3736. lea r9d,DWORD PTR[rbx*1+r9]
  3737. and esi,r15d
  3738. vaesenc xmm9,xmm9,xmm10
  3739. vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi]
  3740. xor r14d,r12d
  3741. xor esi,edx
  3742. xor r14d,r13d
  3743. lea ebx,DWORD PTR[rsi*1+rbx]
  3744. mov r12d,r10d
  3745. add eax,DWORD PTR[((44+16))+rbp]
  3746. and r12d,r9d
  3747. rorx r13d,r9d,25
  3748. rorx esi,r9d,11
  3749. lea ebx,DWORD PTR[r14*1+rbx]
  3750. lea eax,DWORD PTR[r12*1+rax]
  3751. andn r12d,r9d,r11d
  3752. xor r13d,esi
  3753. rorx r14d,r9d,6
  3754. lea eax,DWORD PTR[r12*1+rax]
  3755. xor r13d,r14d
  3756. mov esi,ebx
  3757. rorx r12d,ebx,22
  3758. lea eax,DWORD PTR[r13*1+rax]
  3759. xor esi,ecx
  3760. rorx r14d,ebx,13
  3761. rorx r13d,ebx,2
  3762. lea r8d,DWORD PTR[rax*1+r8]
  3763. and r15d,esi
  3764. vaesenc xmm9,xmm9,xmm10
  3765. vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi]
  3766. xor r14d,r12d
  3767. xor r15d,ecx
  3768. xor r14d,r13d
  3769. lea eax,DWORD PTR[r15*1+rax]
  3770. mov r12d,r9d
  3771. lea rbp,QWORD PTR[((-64))+rbp]
  3772. add r11d,DWORD PTR[((0+16))+rbp]
  3773. and r12d,r8d
  3774. rorx r13d,r8d,25
  3775. rorx r15d,r8d,11
  3776. lea eax,DWORD PTR[r14*1+rax]
  3777. lea r11d,DWORD PTR[r12*1+r11]
  3778. andn r12d,r8d,r10d
  3779. xor r13d,r15d
  3780. rorx r14d,r8d,6
  3781. lea r11d,DWORD PTR[r12*1+r11]
  3782. xor r13d,r14d
  3783. mov r15d,eax
  3784. rorx r12d,eax,22
  3785. lea r11d,DWORD PTR[r13*1+r11]
  3786. xor r15d,ebx
  3787. rorx r14d,eax,13
  3788. rorx r13d,eax,2
  3789. lea edx,DWORD PTR[r11*1+rdx]
  3790. and esi,r15d
  3791. vaesenc xmm9,xmm9,xmm10
  3792. vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi]
  3793. xor r14d,r12d
  3794. xor esi,ebx
  3795. xor r14d,r13d
  3796. lea r11d,DWORD PTR[rsi*1+r11]
  3797. mov r12d,r8d
  3798. add r10d,DWORD PTR[((4+16))+rbp]
  3799. and r12d,edx
  3800. rorx r13d,edx,25
  3801. rorx esi,edx,11
  3802. lea r11d,DWORD PTR[r14*1+r11]
  3803. lea r10d,DWORD PTR[r12*1+r10]
  3804. andn r12d,edx,r9d
  3805. xor r13d,esi
  3806. rorx r14d,edx,6
  3807. lea r10d,DWORD PTR[r12*1+r10]
  3808. xor r13d,r14d
  3809. mov esi,r11d
  3810. rorx r12d,r11d,22
  3811. lea r10d,DWORD PTR[r13*1+r10]
  3812. xor esi,eax
  3813. rorx r14d,r11d,13
  3814. rorx r13d,r11d,2
  3815. lea ecx,DWORD PTR[r10*1+rcx]
  3816. and r15d,esi
  3817. vaesenc xmm9,xmm9,xmm10
  3818. vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi]
  3819. xor r14d,r12d
  3820. xor r15d,eax
  3821. xor r14d,r13d
  3822. lea r10d,DWORD PTR[r15*1+r10]
  3823. mov r12d,edx
  3824. add r9d,DWORD PTR[((8+16))+rbp]
  3825. and r12d,ecx
  3826. rorx r13d,ecx,25
  3827. rorx r15d,ecx,11
  3828. lea r10d,DWORD PTR[r14*1+r10]
  3829. lea r9d,DWORD PTR[r12*1+r9]
  3830. andn r12d,ecx,r8d
  3831. xor r13d,r15d
  3832. rorx r14d,ecx,6
  3833. lea r9d,DWORD PTR[r12*1+r9]
  3834. xor r13d,r14d
  3835. mov r15d,r10d
  3836. rorx r12d,r10d,22
  3837. lea r9d,DWORD PTR[r13*1+r9]
  3838. xor r15d,r11d
  3839. rorx r14d,r10d,13
  3840. rorx r13d,r10d,2
  3841. lea ebx,DWORD PTR[r9*1+rbx]
  3842. and esi,r15d
  3843. vaesenc xmm9,xmm9,xmm10
  3844. vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi]
  3845. xor r14d,r12d
  3846. xor esi,r11d
  3847. xor r14d,r13d
  3848. lea r9d,DWORD PTR[rsi*1+r9]
  3849. mov r12d,ecx
  3850. add r8d,DWORD PTR[((12+16))+rbp]
  3851. and r12d,ebx
  3852. rorx r13d,ebx,25
  3853. rorx esi,ebx,11
  3854. lea r9d,DWORD PTR[r14*1+r9]
  3855. lea r8d,DWORD PTR[r12*1+r8]
  3856. andn r12d,ebx,edx
  3857. xor r13d,esi
  3858. rorx r14d,ebx,6
  3859. lea r8d,DWORD PTR[r12*1+r8]
  3860. xor r13d,r14d
  3861. mov esi,r9d
  3862. rorx r12d,r9d,22
  3863. lea r8d,DWORD PTR[r13*1+r8]
  3864. xor esi,r10d
  3865. rorx r14d,r9d,13
  3866. rorx r13d,r9d,2
  3867. lea eax,DWORD PTR[r8*1+rax]
  3868. and r15d,esi
  3869. vaesenclast xmm11,xmm9,xmm10
  3870. vaesenc xmm9,xmm9,xmm10
  3871. vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi]
  3872. xor r14d,r12d
  3873. xor r15d,r10d
  3874. xor r14d,r13d
  3875. lea r8d,DWORD PTR[r15*1+r8]
  3876. mov r12d,ebx
  3877. add edx,DWORD PTR[((32+16))+rbp]
  3878. and r12d,eax
  3879. rorx r13d,eax,25
  3880. rorx r15d,eax,11
  3881. lea r8d,DWORD PTR[r14*1+r8]
  3882. lea edx,DWORD PTR[r12*1+rdx]
  3883. andn r12d,eax,ecx
  3884. xor r13d,r15d
  3885. rorx r14d,eax,6
  3886. lea edx,DWORD PTR[r12*1+rdx]
  3887. xor r13d,r14d
  3888. mov r15d,r8d
  3889. rorx r12d,r8d,22
  3890. lea edx,DWORD PTR[r13*1+rdx]
  3891. xor r15d,r9d
  3892. rorx r14d,r8d,13
  3893. rorx r13d,r8d,2
  3894. lea r11d,DWORD PTR[rdx*1+r11]
  3895. and esi,r15d
  3896. vpand xmm8,xmm11,xmm12
  3897. vaesenc xmm9,xmm9,xmm10
  3898. vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi]
  3899. xor r14d,r12d
  3900. xor esi,r9d
  3901. xor r14d,r13d
  3902. lea edx,DWORD PTR[rsi*1+rdx]
  3903. mov r12d,eax
  3904. add ecx,DWORD PTR[((36+16))+rbp]
  3905. and r12d,r11d
  3906. rorx r13d,r11d,25
  3907. rorx esi,r11d,11
  3908. lea edx,DWORD PTR[r14*1+rdx]
  3909. lea ecx,DWORD PTR[r12*1+rcx]
  3910. andn r12d,r11d,ebx
  3911. xor r13d,esi
  3912. rorx r14d,r11d,6
  3913. lea ecx,DWORD PTR[r12*1+rcx]
  3914. xor r13d,r14d
  3915. mov esi,edx
  3916. rorx r12d,edx,22
  3917. lea ecx,DWORD PTR[r13*1+rcx]
  3918. xor esi,r8d
  3919. rorx r14d,edx,13
  3920. rorx r13d,edx,2
  3921. lea r10d,DWORD PTR[rcx*1+r10]
  3922. and r15d,esi
  3923. vaesenclast xmm11,xmm9,xmm10
  3924. vaesenc xmm9,xmm9,xmm10
  3925. vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi]
  3926. xor r14d,r12d
  3927. xor r15d,r8d
  3928. xor r14d,r13d
  3929. lea ecx,DWORD PTR[r15*1+rcx]
  3930. mov r12d,r11d
  3931. add ebx,DWORD PTR[((40+16))+rbp]
  3932. and r12d,r10d
  3933. rorx r13d,r10d,25
  3934. rorx r15d,r10d,11
  3935. lea ecx,DWORD PTR[r14*1+rcx]
  3936. lea ebx,DWORD PTR[r12*1+rbx]
  3937. andn r12d,r10d,eax
  3938. xor r13d,r15d
  3939. rorx r14d,r10d,6
  3940. lea ebx,DWORD PTR[r12*1+rbx]
  3941. xor r13d,r14d
  3942. mov r15d,ecx
  3943. rorx r12d,ecx,22
  3944. lea ebx,DWORD PTR[r13*1+rbx]
  3945. xor r15d,edx
  3946. rorx r14d,ecx,13
  3947. rorx r13d,ecx,2
  3948. lea r9d,DWORD PTR[rbx*1+r9]
  3949. and esi,r15d
  3950. vpand xmm11,xmm11,xmm13
  3951. vaesenc xmm9,xmm9,xmm10
  3952. vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi]
  3953. xor r14d,r12d
  3954. xor esi,edx
  3955. xor r14d,r13d
  3956. lea ebx,DWORD PTR[rsi*1+rbx]
  3957. mov r12d,r10d
  3958. add eax,DWORD PTR[((44+16))+rbp]
  3959. and r12d,r9d
  3960. rorx r13d,r9d,25
  3961. rorx esi,r9d,11
  3962. lea ebx,DWORD PTR[r14*1+rbx]
  3963. lea eax,DWORD PTR[r12*1+rax]
  3964. andn r12d,r9d,r11d
  3965. xor r13d,esi
  3966. rorx r14d,r9d,6
  3967. lea eax,DWORD PTR[r12*1+rax]
  3968. xor r13d,r14d
  3969. mov esi,ebx
  3970. rorx r12d,ebx,22
  3971. lea eax,DWORD PTR[r13*1+rax]
  3972. xor esi,ecx
  3973. rorx r14d,ebx,13
  3974. rorx r13d,ebx,2
  3975. lea r8d,DWORD PTR[rax*1+r8]
  3976. and r15d,esi
  3977. vpor xmm8,xmm8,xmm11
  3978. vaesenclast xmm11,xmm9,xmm10
  3979. vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi]
  3980. xor r14d,r12d
  3981. xor r15d,ecx
  3982. xor r14d,r13d
  3983. lea eax,DWORD PTR[r15*1+rax]
  3984. mov r12d,r9d
  3985. vmovq r13,xmm15
  3986. vpextrq r15,xmm15,1
  3987. vpand xmm11,xmm11,xmm14
  3988. vpor xmm8,xmm8,xmm11
  3989. lea rbp,QWORD PTR[((-64))+rbp]
  3990. vmovdqu XMMWORD PTR[r13*1+r15],xmm8
  3991. lea r13,QWORD PTR[16+r13]
  3992. cmp rbp,rsp
  3993. jae $L$ower_avx2
  3994. mov r15,QWORD PTR[552+rsp]
  3995. lea r13,QWORD PTR[64+r13]
  3996. mov rsi,QWORD PTR[560+rsp]
  3997. add eax,r14d
  3998. lea rsp,QWORD PTR[448+rsp]
  3999. add eax,DWORD PTR[r15]
  4000. add ebx,DWORD PTR[4+r15]
  4001. add ecx,DWORD PTR[8+r15]
  4002. add edx,DWORD PTR[12+r15]
  4003. add r8d,DWORD PTR[16+r15]
  4004. add r9d,DWORD PTR[20+r15]
  4005. add r10d,DWORD PTR[24+r15]
  4006. lea r12,QWORD PTR[r13*1+rsi]
  4007. add r11d,DWORD PTR[28+r15]
  4008. cmp r13,QWORD PTR[((64+16))+rsp]
  4009. mov DWORD PTR[r15],eax
  4010. cmove r12,rsp
  4011. mov DWORD PTR[4+r15],ebx
  4012. mov DWORD PTR[8+r15],ecx
  4013. mov DWORD PTR[12+r15],edx
  4014. mov DWORD PTR[16+r15],r8d
  4015. mov DWORD PTR[20+r15],r9d
  4016. mov DWORD PTR[24+r15],r10d
  4017. mov DWORD PTR[28+r15],r11d
  4018. jbe $L$oop_avx2
  4019. lea rbp,QWORD PTR[rsp]
  4020. $L$done_avx2::
  4021. mov r8,QWORD PTR[((64+32))+rbp]
  4022. mov rsi,QWORD PTR[((64+56))+rbp]
  4023. vmovdqu XMMWORD PTR[r8],xmm8
  4024. vzeroall
  4025. movaps xmm6,XMMWORD PTR[128+rbp]
  4026. movaps xmm7,XMMWORD PTR[144+rbp]
  4027. movaps xmm8,XMMWORD PTR[160+rbp]
  4028. movaps xmm9,XMMWORD PTR[176+rbp]
  4029. movaps xmm10,XMMWORD PTR[192+rbp]
  4030. movaps xmm11,XMMWORD PTR[208+rbp]
  4031. movaps xmm12,XMMWORD PTR[224+rbp]
  4032. movaps xmm13,XMMWORD PTR[240+rbp]
  4033. movaps xmm14,XMMWORD PTR[256+rbp]
  4034. movaps xmm15,XMMWORD PTR[272+rbp]
  4035. mov r15,QWORD PTR[((-48))+rsi]
  4036. mov r14,QWORD PTR[((-40))+rsi]
  4037. mov r13,QWORD PTR[((-32))+rsi]
  4038. mov r12,QWORD PTR[((-24))+rsi]
  4039. mov rbp,QWORD PTR[((-16))+rsi]
  4040. mov rbx,QWORD PTR[((-8))+rsi]
  4041. lea rsp,QWORD PTR[rsi]
  4042. $L$epilogue_avx2::
  4043. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  4044. mov rsi,QWORD PTR[16+rsp]
  4045. DB 0F3h,0C3h ;repret
  4046. $L$SEH_end_aesni_cbc_sha256_enc_avx2::
  4047. aesni_cbc_sha256_enc_avx2 ENDP
  4048. ALIGN 32
  4049. aesni_cbc_sha256_enc_shaext PROC PRIVATE
  4050. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  4051. mov QWORD PTR[16+rsp],rsi
  4052. mov rax,rsp
  4053. $L$SEH_begin_aesni_cbc_sha256_enc_shaext::
  4054. mov rdi,rcx
  4055. mov rsi,rdx
  4056. mov rdx,r8
  4057. mov rcx,r9
  4058. mov r8,QWORD PTR[40+rsp]
  4059. mov r9,QWORD PTR[48+rsp]
  4060. mov r10,QWORD PTR[56+rsp]
  4061. lea rsp,QWORD PTR[((-168))+rsp]
  4062. movaps XMMWORD PTR[(-8-160)+rax],xmm6
  4063. movaps XMMWORD PTR[(-8-144)+rax],xmm7
  4064. movaps XMMWORD PTR[(-8-128)+rax],xmm8
  4065. movaps XMMWORD PTR[(-8-112)+rax],xmm9
  4066. movaps XMMWORD PTR[(-8-96)+rax],xmm10
  4067. movaps XMMWORD PTR[(-8-80)+rax],xmm11
  4068. movaps XMMWORD PTR[(-8-64)+rax],xmm12
  4069. movaps XMMWORD PTR[(-8-48)+rax],xmm13
  4070. movaps XMMWORD PTR[(-8-32)+rax],xmm14
  4071. movaps XMMWORD PTR[(-8-16)+rax],xmm15
  4072. $L$prologue_shaext::
  4073. lea rax,QWORD PTR[((K256+128))]
  4074. movdqu xmm1,XMMWORD PTR[r9]
  4075. movdqu xmm2,XMMWORD PTR[16+r9]
  4076. movdqa xmm3,XMMWORD PTR[((512-128))+rax]
  4077. mov r11d,DWORD PTR[240+rcx]
  4078. sub rsi,rdi
  4079. movups xmm15,XMMWORD PTR[rcx]
  4080. movups xmm6,XMMWORD PTR[r8]
  4081. movups xmm4,XMMWORD PTR[16+rcx]
  4082. lea rcx,QWORD PTR[112+rcx]
  4083. pshufd xmm0,xmm1,01bh
  4084. pshufd xmm1,xmm1,0b1h
  4085. pshufd xmm2,xmm2,01bh
  4086. movdqa xmm7,xmm3
  4087. DB 102,15,58,15,202,8
  4088. punpcklqdq xmm2,xmm0
  4089. jmp $L$oop_shaext
  4090. ALIGN 16
  4091. $L$oop_shaext::
  4092. movdqu xmm10,XMMWORD PTR[r10]
  4093. movdqu xmm11,XMMWORD PTR[16+r10]
  4094. movdqu xmm12,XMMWORD PTR[32+r10]
  4095. DB 102,68,15,56,0,211
  4096. movdqu xmm13,XMMWORD PTR[48+r10]
  4097. movdqa xmm0,XMMWORD PTR[((0-128))+rax]
  4098. paddd xmm0,xmm10
  4099. DB 102,68,15,56,0,219
  4100. movdqa xmm9,xmm2
  4101. movdqa xmm8,xmm1
  4102. movups xmm14,XMMWORD PTR[rdi]
  4103. xorps xmm14,xmm15
  4104. xorps xmm6,xmm14
  4105. movups xmm5,XMMWORD PTR[((-80))+rcx]
  4106. aesenc xmm6,xmm4
  4107. DB 15,56,203,209
  4108. pshufd xmm0,xmm0,00eh
  4109. movups xmm4,XMMWORD PTR[((-64))+rcx]
  4110. aesenc xmm6,xmm5
  4111. DB 15,56,203,202
  4112. movdqa xmm0,XMMWORD PTR[((32-128))+rax]
  4113. paddd xmm0,xmm11
  4114. DB 102,68,15,56,0,227
  4115. lea r10,QWORD PTR[64+r10]
  4116. movups xmm5,XMMWORD PTR[((-48))+rcx]
  4117. aesenc xmm6,xmm4
  4118. DB 15,56,203,209
  4119. pshufd xmm0,xmm0,00eh
  4120. movups xmm4,XMMWORD PTR[((-32))+rcx]
  4121. aesenc xmm6,xmm5
  4122. DB 15,56,203,202
  4123. movdqa xmm0,XMMWORD PTR[((64-128))+rax]
  4124. paddd xmm0,xmm12
  4125. DB 102,68,15,56,0,235
  4126. DB 69,15,56,204,211
  4127. movups xmm5,XMMWORD PTR[((-16))+rcx]
  4128. aesenc xmm6,xmm4
  4129. DB 15,56,203,209
  4130. pshufd xmm0,xmm0,00eh
  4131. movdqa xmm3,xmm13
  4132. DB 102,65,15,58,15,220,4
  4133. paddd xmm10,xmm3
  4134. movups xmm4,XMMWORD PTR[rcx]
  4135. aesenc xmm6,xmm5
  4136. DB 15,56,203,202
  4137. movdqa xmm0,XMMWORD PTR[((96-128))+rax]
  4138. paddd xmm0,xmm13
  4139. DB 69,15,56,205,213
  4140. DB 69,15,56,204,220
  4141. movups xmm5,XMMWORD PTR[16+rcx]
  4142. aesenc xmm6,xmm4
  4143. DB 15,56,203,209
  4144. pshufd xmm0,xmm0,00eh
  4145. movups xmm4,XMMWORD PTR[32+rcx]
  4146. aesenc xmm6,xmm5
  4147. movdqa xmm3,xmm10
  4148. DB 102,65,15,58,15,221,4
  4149. paddd xmm11,xmm3
  4150. DB 15,56,203,202
  4151. movdqa xmm0,XMMWORD PTR[((128-128))+rax]
  4152. paddd xmm0,xmm10
  4153. DB 69,15,56,205,218
  4154. DB 69,15,56,204,229
  4155. movups xmm5,XMMWORD PTR[48+rcx]
  4156. aesenc xmm6,xmm4
  4157. DB 15,56,203,209
  4158. pshufd xmm0,xmm0,00eh
  4159. movdqa xmm3,xmm11
  4160. DB 102,65,15,58,15,218,4
  4161. paddd xmm12,xmm3
  4162. cmp r11d,11
  4163. jb $L$aesenclast1
  4164. movups xmm4,XMMWORD PTR[64+rcx]
  4165. aesenc xmm6,xmm5
  4166. movups xmm5,XMMWORD PTR[80+rcx]
  4167. aesenc xmm6,xmm4
  4168. je $L$aesenclast1
  4169. movups xmm4,XMMWORD PTR[96+rcx]
  4170. aesenc xmm6,xmm5
  4171. movups xmm5,XMMWORD PTR[112+rcx]
  4172. aesenc xmm6,xmm4
  4173. $L$aesenclast1::
  4174. aesenclast xmm6,xmm5
  4175. movups xmm4,XMMWORD PTR[((16-112))+rcx]
  4176. nop
  4177. DB 15,56,203,202
  4178. movups xmm14,XMMWORD PTR[16+rdi]
  4179. xorps xmm14,xmm15
  4180. movups XMMWORD PTR[rdi*1+rsi],xmm6
  4181. xorps xmm6,xmm14
  4182. movups xmm5,XMMWORD PTR[((-80))+rcx]
  4183. aesenc xmm6,xmm4
  4184. movdqa xmm0,XMMWORD PTR[((160-128))+rax]
  4185. paddd xmm0,xmm11
  4186. DB 69,15,56,205,227
  4187. DB 69,15,56,204,234
  4188. movups xmm4,XMMWORD PTR[((-64))+rcx]
  4189. aesenc xmm6,xmm5
  4190. DB 15,56,203,209
  4191. pshufd xmm0,xmm0,00eh
  4192. movdqa xmm3,xmm12
  4193. DB 102,65,15,58,15,219,4
  4194. paddd xmm13,xmm3
  4195. movups xmm5,XMMWORD PTR[((-48))+rcx]
  4196. aesenc xmm6,xmm4
  4197. DB 15,56,203,202
  4198. movdqa xmm0,XMMWORD PTR[((192-128))+rax]
  4199. paddd xmm0,xmm12
  4200. DB 69,15,56,205,236
  4201. DB 69,15,56,204,211
  4202. movups xmm4,XMMWORD PTR[((-32))+rcx]
  4203. aesenc xmm6,xmm5
  4204. DB 15,56,203,209
  4205. pshufd xmm0,xmm0,00eh
  4206. movdqa xmm3,xmm13
  4207. DB 102,65,15,58,15,220,4
  4208. paddd xmm10,xmm3
  4209. movups xmm5,XMMWORD PTR[((-16))+rcx]
  4210. aesenc xmm6,xmm4
  4211. DB 15,56,203,202
  4212. movdqa xmm0,XMMWORD PTR[((224-128))+rax]
  4213. paddd xmm0,xmm13
  4214. DB 69,15,56,205,213
  4215. DB 69,15,56,204,220
  4216. movups xmm4,XMMWORD PTR[rcx]
  4217. aesenc xmm6,xmm5
  4218. DB 15,56,203,209
  4219. pshufd xmm0,xmm0,00eh
  4220. movdqa xmm3,xmm10
  4221. DB 102,65,15,58,15,221,4
  4222. paddd xmm11,xmm3
  4223. movups xmm5,XMMWORD PTR[16+rcx]
  4224. aesenc xmm6,xmm4
  4225. DB 15,56,203,202
  4226. movdqa xmm0,XMMWORD PTR[((256-128))+rax]
  4227. paddd xmm0,xmm10
  4228. DB 69,15,56,205,218
  4229. DB 69,15,56,204,229
  4230. movups xmm4,XMMWORD PTR[32+rcx]
  4231. aesenc xmm6,xmm5
  4232. DB 15,56,203,209
  4233. pshufd xmm0,xmm0,00eh
  4234. movdqa xmm3,xmm11
  4235. DB 102,65,15,58,15,218,4
  4236. paddd xmm12,xmm3
  4237. movups xmm5,XMMWORD PTR[48+rcx]
  4238. aesenc xmm6,xmm4
  4239. cmp r11d,11
  4240. jb $L$aesenclast2
  4241. movups xmm4,XMMWORD PTR[64+rcx]
  4242. aesenc xmm6,xmm5
  4243. movups xmm5,XMMWORD PTR[80+rcx]
  4244. aesenc xmm6,xmm4
  4245. je $L$aesenclast2
  4246. movups xmm4,XMMWORD PTR[96+rcx]
  4247. aesenc xmm6,xmm5
  4248. movups xmm5,XMMWORD PTR[112+rcx]
  4249. aesenc xmm6,xmm4
  4250. $L$aesenclast2::
  4251. aesenclast xmm6,xmm5
  4252. movups xmm4,XMMWORD PTR[((16-112))+rcx]
  4253. nop
  4254. DB 15,56,203,202
  4255. movups xmm14,XMMWORD PTR[32+rdi]
  4256. xorps xmm14,xmm15
  4257. movups XMMWORD PTR[16+rdi*1+rsi],xmm6
  4258. xorps xmm6,xmm14
  4259. movups xmm5,XMMWORD PTR[((-80))+rcx]
  4260. aesenc xmm6,xmm4
  4261. movdqa xmm0,XMMWORD PTR[((288-128))+rax]
  4262. paddd xmm0,xmm11
  4263. DB 69,15,56,205,227
  4264. DB 69,15,56,204,234
  4265. movups xmm4,XMMWORD PTR[((-64))+rcx]
  4266. aesenc xmm6,xmm5
  4267. DB 15,56,203,209
  4268. pshufd xmm0,xmm0,00eh
  4269. movdqa xmm3,xmm12
  4270. DB 102,65,15,58,15,219,4
  4271. paddd xmm13,xmm3
  4272. movups xmm5,XMMWORD PTR[((-48))+rcx]
  4273. aesenc xmm6,xmm4
  4274. DB 15,56,203,202
  4275. movdqa xmm0,XMMWORD PTR[((320-128))+rax]
  4276. paddd xmm0,xmm12
  4277. DB 69,15,56,205,236
  4278. DB 69,15,56,204,211
  4279. movups xmm4,XMMWORD PTR[((-32))+rcx]
  4280. aesenc xmm6,xmm5
  4281. DB 15,56,203,209
  4282. pshufd xmm0,xmm0,00eh
  4283. movdqa xmm3,xmm13
  4284. DB 102,65,15,58,15,220,4
  4285. paddd xmm10,xmm3
  4286. movups xmm5,XMMWORD PTR[((-16))+rcx]
  4287. aesenc xmm6,xmm4
  4288. DB 15,56,203,202
  4289. movdqa xmm0,XMMWORD PTR[((352-128))+rax]
  4290. paddd xmm0,xmm13
  4291. DB 69,15,56,205,213
  4292. DB 69,15,56,204,220
  4293. movups xmm4,XMMWORD PTR[rcx]
  4294. aesenc xmm6,xmm5
  4295. DB 15,56,203,209
  4296. pshufd xmm0,xmm0,00eh
  4297. movdqa xmm3,xmm10
  4298. DB 102,65,15,58,15,221,4
  4299. paddd xmm11,xmm3
  4300. movups xmm5,XMMWORD PTR[16+rcx]
  4301. aesenc xmm6,xmm4
  4302. DB 15,56,203,202
  4303. movdqa xmm0,XMMWORD PTR[((384-128))+rax]
  4304. paddd xmm0,xmm10
  4305. DB 69,15,56,205,218
  4306. DB 69,15,56,204,229
  4307. movups xmm4,XMMWORD PTR[32+rcx]
  4308. aesenc xmm6,xmm5
  4309. DB 15,56,203,209
  4310. pshufd xmm0,xmm0,00eh
  4311. movdqa xmm3,xmm11
  4312. DB 102,65,15,58,15,218,4
  4313. paddd xmm12,xmm3
  4314. movups xmm5,XMMWORD PTR[48+rcx]
  4315. aesenc xmm6,xmm4
  4316. DB 15,56,203,202
  4317. movdqa xmm0,XMMWORD PTR[((416-128))+rax]
  4318. paddd xmm0,xmm11
  4319. DB 69,15,56,205,227
  4320. DB 69,15,56,204,234
  4321. cmp r11d,11
  4322. jb $L$aesenclast3
  4323. movups xmm4,XMMWORD PTR[64+rcx]
  4324. aesenc xmm6,xmm5
  4325. movups xmm5,XMMWORD PTR[80+rcx]
  4326. aesenc xmm6,xmm4
  4327. je $L$aesenclast3
  4328. movups xmm4,XMMWORD PTR[96+rcx]
  4329. aesenc xmm6,xmm5
  4330. movups xmm5,XMMWORD PTR[112+rcx]
  4331. aesenc xmm6,xmm4
  4332. $L$aesenclast3::
  4333. aesenclast xmm6,xmm5
  4334. movups xmm4,XMMWORD PTR[((16-112))+rcx]
  4335. nop
  4336. DB 15,56,203,209
  4337. pshufd xmm0,xmm0,00eh
  4338. movdqa xmm3,xmm12
  4339. DB 102,65,15,58,15,219,4
  4340. paddd xmm13,xmm3
  4341. movups xmm14,XMMWORD PTR[48+rdi]
  4342. xorps xmm14,xmm15
  4343. movups XMMWORD PTR[32+rdi*1+rsi],xmm6
  4344. xorps xmm6,xmm14
  4345. movups xmm5,XMMWORD PTR[((-80))+rcx]
  4346. aesenc xmm6,xmm4
  4347. movups xmm4,XMMWORD PTR[((-64))+rcx]
  4348. aesenc xmm6,xmm5
  4349. DB 15,56,203,202
  4350. movdqa xmm0,XMMWORD PTR[((448-128))+rax]
  4351. paddd xmm0,xmm12
  4352. DB 69,15,56,205,236
  4353. movdqa xmm3,xmm7
  4354. movups xmm5,XMMWORD PTR[((-48))+rcx]
  4355. aesenc xmm6,xmm4
  4356. DB 15,56,203,209
  4357. pshufd xmm0,xmm0,00eh
  4358. movups xmm4,XMMWORD PTR[((-32))+rcx]
  4359. aesenc xmm6,xmm5
  4360. DB 15,56,203,202
  4361. movdqa xmm0,XMMWORD PTR[((480-128))+rax]
  4362. paddd xmm0,xmm13
  4363. movups xmm5,XMMWORD PTR[((-16))+rcx]
  4364. aesenc xmm6,xmm4
  4365. movups xmm4,XMMWORD PTR[rcx]
  4366. aesenc xmm6,xmm5
  4367. DB 15,56,203,209
  4368. pshufd xmm0,xmm0,00eh
  4369. movups xmm5,XMMWORD PTR[16+rcx]
  4370. aesenc xmm6,xmm4
  4371. DB 15,56,203,202
  4372. movups xmm4,XMMWORD PTR[32+rcx]
  4373. aesenc xmm6,xmm5
  4374. movups xmm5,XMMWORD PTR[48+rcx]
  4375. aesenc xmm6,xmm4
  4376. cmp r11d,11
  4377. jb $L$aesenclast4
  4378. movups xmm4,XMMWORD PTR[64+rcx]
  4379. aesenc xmm6,xmm5
  4380. movups xmm5,XMMWORD PTR[80+rcx]
  4381. aesenc xmm6,xmm4
  4382. je $L$aesenclast4
  4383. movups xmm4,XMMWORD PTR[96+rcx]
  4384. aesenc xmm6,xmm5
  4385. movups xmm5,XMMWORD PTR[112+rcx]
  4386. aesenc xmm6,xmm4
  4387. $L$aesenclast4::
  4388. aesenclast xmm6,xmm5
  4389. movups xmm4,XMMWORD PTR[((16-112))+rcx]
  4390. nop
  4391. paddd xmm2,xmm9
  4392. paddd xmm1,xmm8
  4393. dec rdx
  4394. movups XMMWORD PTR[48+rdi*1+rsi],xmm6
  4395. lea rdi,QWORD PTR[64+rdi]
  4396. jnz $L$oop_shaext
  4397. pshufd xmm2,xmm2,0b1h
  4398. pshufd xmm3,xmm1,01bh
  4399. pshufd xmm1,xmm1,0b1h
  4400. punpckhqdq xmm1,xmm2
  4401. DB 102,15,58,15,211,8
  4402. movups XMMWORD PTR[r8],xmm6
  4403. movdqu XMMWORD PTR[r9],xmm1
  4404. movdqu XMMWORD PTR[16+r9],xmm2
  4405. movaps xmm6,XMMWORD PTR[rsp]
  4406. movaps xmm7,XMMWORD PTR[16+rsp]
  4407. movaps xmm8,XMMWORD PTR[32+rsp]
  4408. movaps xmm9,XMMWORD PTR[48+rsp]
  4409. movaps xmm10,XMMWORD PTR[64+rsp]
  4410. movaps xmm11,XMMWORD PTR[80+rsp]
  4411. movaps xmm12,XMMWORD PTR[96+rsp]
  4412. movaps xmm13,XMMWORD PTR[112+rsp]
  4413. movaps xmm14,XMMWORD PTR[128+rsp]
  4414. movaps xmm15,XMMWORD PTR[144+rsp]
  4415. lea rsp,QWORD PTR[((8+160))+rsp]
  4416. $L$epilogue_shaext::
  4417. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  4418. mov rsi,QWORD PTR[16+rsp]
  4419. DB 0F3h,0C3h ;repret
  4420. $L$SEH_end_aesni_cbc_sha256_enc_shaext::
  4421. aesni_cbc_sha256_enc_shaext ENDP
  4422. EXTERN __imp_RtlVirtualUnwind:NEAR
  4423. ALIGN 16
  4424. se_handler PROC PRIVATE
  4425. push rsi
  4426. push rdi
  4427. push rbx
  4428. push rbp
  4429. push r12
  4430. push r13
  4431. push r14
  4432. push r15
  4433. pushfq
  4434. sub rsp,64
  4435. mov rax,QWORD PTR[120+r8]
  4436. mov rbx,QWORD PTR[248+r8]
  4437. mov rsi,QWORD PTR[8+r9]
  4438. mov r11,QWORD PTR[56+r9]
  4439. mov r10d,DWORD PTR[r11]
  4440. lea r10,QWORD PTR[r10*1+rsi]
  4441. cmp rbx,r10
  4442. jb $L$in_prologue
  4443. mov rax,QWORD PTR[152+r8]
  4444. mov r10d,DWORD PTR[4+r11]
  4445. lea r10,QWORD PTR[r10*1+rsi]
  4446. cmp rbx,r10
  4447. jae $L$in_prologue
  4448. lea r10,QWORD PTR[aesni_cbc_sha256_enc_shaext]
  4449. cmp rbx,r10
  4450. jb $L$not_in_shaext
  4451. lea rsi,QWORD PTR[rax]
  4452. lea rdi,QWORD PTR[512+r8]
  4453. mov ecx,20
  4454. DD 0a548f3fch
  4455. lea rax,QWORD PTR[168+rax]
  4456. jmp $L$in_prologue
  4457. $L$not_in_shaext::
  4458. lea r10,QWORD PTR[$L$avx2_shortcut]
  4459. cmp rbx,r10
  4460. jb $L$not_in_avx2
  4461. and rax,-256*4
  4462. add rax,448
  4463. $L$not_in_avx2::
  4464. mov rsi,rax
  4465. mov rax,QWORD PTR[((64+56))+rax]
  4466. mov rbx,QWORD PTR[((-8))+rax]
  4467. mov rbp,QWORD PTR[((-16))+rax]
  4468. mov r12,QWORD PTR[((-24))+rax]
  4469. mov r13,QWORD PTR[((-32))+rax]
  4470. mov r14,QWORD PTR[((-40))+rax]
  4471. mov r15,QWORD PTR[((-48))+rax]
  4472. mov QWORD PTR[144+r8],rbx
  4473. mov QWORD PTR[160+r8],rbp
  4474. mov QWORD PTR[216+r8],r12
  4475. mov QWORD PTR[224+r8],r13
  4476. mov QWORD PTR[232+r8],r14
  4477. mov QWORD PTR[240+r8],r15
  4478. lea rsi,QWORD PTR[((64+64))+rsi]
  4479. lea rdi,QWORD PTR[512+r8]
  4480. mov ecx,20
  4481. DD 0a548f3fch
  4482. $L$in_prologue::
  4483. mov rdi,QWORD PTR[8+rax]
  4484. mov rsi,QWORD PTR[16+rax]
  4485. mov QWORD PTR[152+r8],rax
  4486. mov QWORD PTR[168+r8],rsi
  4487. mov QWORD PTR[176+r8],rdi
  4488. mov rdi,QWORD PTR[40+r9]
  4489. mov rsi,r8
  4490. mov ecx,154
  4491. DD 0a548f3fch
  4492. mov rsi,r9
  4493. xor rcx,rcx
  4494. mov rdx,QWORD PTR[8+rsi]
  4495. mov r8,QWORD PTR[rsi]
  4496. mov r9,QWORD PTR[16+rsi]
  4497. mov r10,QWORD PTR[40+rsi]
  4498. lea r11,QWORD PTR[56+rsi]
  4499. lea r12,QWORD PTR[24+rsi]
  4500. mov QWORD PTR[32+rsp],r10
  4501. mov QWORD PTR[40+rsp],r11
  4502. mov QWORD PTR[48+rsp],r12
  4503. mov QWORD PTR[56+rsp],rcx
  4504. call QWORD PTR[__imp_RtlVirtualUnwind]
  4505. mov eax,1
  4506. add rsp,64
  4507. popfq
  4508. pop r15
  4509. pop r14
  4510. pop r13
  4511. pop r12
  4512. pop rbp
  4513. pop rbx
  4514. pop rdi
  4515. pop rsi
  4516. DB 0F3h,0C3h ;repret
  4517. se_handler ENDP
  4518. .text$ ENDS
  4519. .pdata SEGMENT READONLY ALIGN(4)
  4520. DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_xop
  4521. DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_xop
  4522. DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_xop
  4523. DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_avx
  4524. DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_avx
  4525. DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_avx
  4526. DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_avx2
  4527. DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_avx2
  4528. DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_avx2
  4529. DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_shaext
  4530. DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_shaext
  4531. DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_shaext
  4532. .pdata ENDS
  4533. .xdata SEGMENT READONLY ALIGN(8)
  4534. ALIGN 8
  4535. $L$SEH_info_aesni_cbc_sha256_enc_xop::
  4536. DB 9,0,0,0
  4537. DD imagerel se_handler
  4538. DD imagerel $L$prologue_xop,imagerel $L$epilogue_xop
  4539. $L$SEH_info_aesni_cbc_sha256_enc_avx::
  4540. DB 9,0,0,0
  4541. DD imagerel se_handler
  4542. DD imagerel $L$prologue_avx,imagerel $L$epilogue_avx
  4543. $L$SEH_info_aesni_cbc_sha256_enc_avx2::
  4544. DB 9,0,0,0
  4545. DD imagerel se_handler
  4546. DD imagerel $L$prologue_avx2,imagerel $L$epilogue_avx2
  4547. $L$SEH_info_aesni_cbc_sha256_enc_shaext::
  4548. DB 9,0,0,0
  4549. DD imagerel se_handler
  4550. DD imagerel $L$prologue_shaext,imagerel $L$epilogue_shaext
  4551. .xdata ENDS
  4552. END