aesni-mb-x86_64.masm 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770
  1. OPTION DOTNAME
  2. .text$ SEGMENT ALIGN(256) 'CODE'
  3. EXTERN OPENSSL_ia32cap_P:NEAR
  4. PUBLIC aesni_multi_cbc_encrypt
  5. ALIGN 32
  6. aesni_multi_cbc_encrypt PROC PUBLIC
  7. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  8. mov QWORD PTR[16+rsp],rsi
  9. mov rax,rsp
  10. $L$SEH_begin_aesni_multi_cbc_encrypt::
  11. mov rdi,rcx
  12. mov rsi,rdx
  13. mov rdx,r8
  14. cmp edx,2
  15. jb $L$enc_non_avx
  16. mov ecx,DWORD PTR[((OPENSSL_ia32cap_P+4))]
  17. test ecx,268435456
  18. jnz _avx_cbc_enc_shortcut
  19. jmp $L$enc_non_avx
  20. ALIGN 16
  21. $L$enc_non_avx::
  22. mov rax,rsp
  23. push rbx
  24. push rbp
  25. push r12
  26. push r13
  27. push r14
  28. push r15
  29. lea rsp,QWORD PTR[((-168))+rsp]
  30. movaps XMMWORD PTR[rsp],xmm6
  31. movaps XMMWORD PTR[16+rsp],xmm7
  32. movaps XMMWORD PTR[32+rsp],xmm8
  33. movaps XMMWORD PTR[48+rsp],xmm9
  34. movaps XMMWORD PTR[64+rsp],xmm10
  35. movaps XMMWORD PTR[80+rsp],xmm11
  36. movaps XMMWORD PTR[96+rsp],xmm12
  37. movaps XMMWORD PTR[(-104)+rax],xmm13
  38. movaps XMMWORD PTR[(-88)+rax],xmm14
  39. movaps XMMWORD PTR[(-72)+rax],xmm15
  40. sub rsp,48
  41. and rsp,-64
  42. mov QWORD PTR[16+rsp],rax
  43. $L$enc4x_body::
  44. movdqu xmm12,XMMWORD PTR[rsi]
  45. lea rsi,QWORD PTR[120+rsi]
  46. lea rdi,QWORD PTR[80+rdi]
  47. $L$enc4x_loop_grande::
  48. mov DWORD PTR[24+rsp],edx
  49. xor edx,edx
  50. mov ecx,DWORD PTR[((-64))+rdi]
  51. mov r8,QWORD PTR[((-80))+rdi]
  52. cmp ecx,edx
  53. mov r12,QWORD PTR[((-72))+rdi]
  54. cmovg edx,ecx
  55. test ecx,ecx
  56. movdqu xmm2,XMMWORD PTR[((-56))+rdi]
  57. mov DWORD PTR[32+rsp],ecx
  58. cmovle r8,rsp
  59. mov ecx,DWORD PTR[((-24))+rdi]
  60. mov r9,QWORD PTR[((-40))+rdi]
  61. cmp ecx,edx
  62. mov r13,QWORD PTR[((-32))+rdi]
  63. cmovg edx,ecx
  64. test ecx,ecx
  65. movdqu xmm3,XMMWORD PTR[((-16))+rdi]
  66. mov DWORD PTR[36+rsp],ecx
  67. cmovle r9,rsp
  68. mov ecx,DWORD PTR[16+rdi]
  69. mov r10,QWORD PTR[rdi]
  70. cmp ecx,edx
  71. mov r14,QWORD PTR[8+rdi]
  72. cmovg edx,ecx
  73. test ecx,ecx
  74. movdqu xmm4,XMMWORD PTR[24+rdi]
  75. mov DWORD PTR[40+rsp],ecx
  76. cmovle r10,rsp
  77. mov ecx,DWORD PTR[56+rdi]
  78. mov r11,QWORD PTR[40+rdi]
  79. cmp ecx,edx
  80. mov r15,QWORD PTR[48+rdi]
  81. cmovg edx,ecx
  82. test ecx,ecx
  83. movdqu xmm5,XMMWORD PTR[64+rdi]
  84. mov DWORD PTR[44+rsp],ecx
  85. cmovle r11,rsp
  86. test edx,edx
  87. jz $L$enc4x_done
  88. movups xmm1,XMMWORD PTR[((16-120))+rsi]
  89. pxor xmm2,xmm12
  90. movups xmm0,XMMWORD PTR[((32-120))+rsi]
  91. pxor xmm3,xmm12
  92. mov eax,DWORD PTR[((240-120))+rsi]
  93. pxor xmm4,xmm12
  94. movdqu xmm6,XMMWORD PTR[r8]
  95. pxor xmm5,xmm12
  96. movdqu xmm7,XMMWORD PTR[r9]
  97. pxor xmm2,xmm6
  98. movdqu xmm8,XMMWORD PTR[r10]
  99. pxor xmm3,xmm7
  100. movdqu xmm9,XMMWORD PTR[r11]
  101. pxor xmm4,xmm8
  102. pxor xmm5,xmm9
  103. movdqa xmm10,XMMWORD PTR[32+rsp]
  104. xor rbx,rbx
  105. jmp $L$oop_enc4x
  106. ALIGN 32
  107. $L$oop_enc4x::
  108. add rbx,16
  109. lea rbp,QWORD PTR[16+rsp]
  110. mov ecx,1
  111. sub rbp,rbx
  112. DB 102,15,56,220,209
  113. prefetcht0 [31+rbx*1+r8]
  114. prefetcht0 [31+rbx*1+r9]
  115. DB 102,15,56,220,217
  116. prefetcht0 [31+rbx*1+r10]
  117. prefetcht0 [31+rbx*1+r10]
  118. DB 102,15,56,220,225
  119. DB 102,15,56,220,233
  120. movups xmm1,XMMWORD PTR[((48-120))+rsi]
  121. cmp ecx,DWORD PTR[32+rsp]
  122. DB 102,15,56,220,208
  123. DB 102,15,56,220,216
  124. DB 102,15,56,220,224
  125. cmovge r8,rbp
  126. cmovg r12,rbp
  127. DB 102,15,56,220,232
  128. movups xmm0,XMMWORD PTR[((-56))+rsi]
  129. cmp ecx,DWORD PTR[36+rsp]
  130. DB 102,15,56,220,209
  131. DB 102,15,56,220,217
  132. DB 102,15,56,220,225
  133. cmovge r9,rbp
  134. cmovg r13,rbp
  135. DB 102,15,56,220,233
  136. movups xmm1,XMMWORD PTR[((-40))+rsi]
  137. cmp ecx,DWORD PTR[40+rsp]
  138. DB 102,15,56,220,208
  139. DB 102,15,56,220,216
  140. DB 102,15,56,220,224
  141. cmovge r10,rbp
  142. cmovg r14,rbp
  143. DB 102,15,56,220,232
  144. movups xmm0,XMMWORD PTR[((-24))+rsi]
  145. cmp ecx,DWORD PTR[44+rsp]
  146. DB 102,15,56,220,209
  147. DB 102,15,56,220,217
  148. DB 102,15,56,220,225
  149. cmovge r11,rbp
  150. cmovg r15,rbp
  151. DB 102,15,56,220,233
  152. movups xmm1,XMMWORD PTR[((-8))+rsi]
  153. movdqa xmm11,xmm10
  154. DB 102,15,56,220,208
  155. prefetcht0 [15+rbx*1+r12]
  156. prefetcht0 [15+rbx*1+r13]
  157. DB 102,15,56,220,216
  158. prefetcht0 [15+rbx*1+r14]
  159. prefetcht0 [15+rbx*1+r15]
  160. DB 102,15,56,220,224
  161. DB 102,15,56,220,232
  162. movups xmm0,XMMWORD PTR[((128-120))+rsi]
  163. pxor xmm12,xmm12
  164. DB 102,15,56,220,209
  165. pcmpgtd xmm11,xmm12
  166. movdqu xmm12,XMMWORD PTR[((-120))+rsi]
  167. DB 102,15,56,220,217
  168. paddd xmm10,xmm11
  169. movdqa XMMWORD PTR[32+rsp],xmm10
  170. DB 102,15,56,220,225
  171. DB 102,15,56,220,233
  172. movups xmm1,XMMWORD PTR[((144-120))+rsi]
  173. cmp eax,11
  174. DB 102,15,56,220,208
  175. DB 102,15,56,220,216
  176. DB 102,15,56,220,224
  177. DB 102,15,56,220,232
  178. movups xmm0,XMMWORD PTR[((160-120))+rsi]
  179. jb $L$enc4x_tail
  180. DB 102,15,56,220,209
  181. DB 102,15,56,220,217
  182. DB 102,15,56,220,225
  183. DB 102,15,56,220,233
  184. movups xmm1,XMMWORD PTR[((176-120))+rsi]
  185. DB 102,15,56,220,208
  186. DB 102,15,56,220,216
  187. DB 102,15,56,220,224
  188. DB 102,15,56,220,232
  189. movups xmm0,XMMWORD PTR[((192-120))+rsi]
  190. je $L$enc4x_tail
  191. DB 102,15,56,220,209
  192. DB 102,15,56,220,217
  193. DB 102,15,56,220,225
  194. DB 102,15,56,220,233
  195. movups xmm1,XMMWORD PTR[((208-120))+rsi]
  196. DB 102,15,56,220,208
  197. DB 102,15,56,220,216
  198. DB 102,15,56,220,224
  199. DB 102,15,56,220,232
  200. movups xmm0,XMMWORD PTR[((224-120))+rsi]
  201. jmp $L$enc4x_tail
  202. ALIGN 32
  203. $L$enc4x_tail::
  204. DB 102,15,56,220,209
  205. DB 102,15,56,220,217
  206. DB 102,15,56,220,225
  207. DB 102,15,56,220,233
  208. movdqu xmm6,XMMWORD PTR[rbx*1+r8]
  209. movdqu xmm1,XMMWORD PTR[((16-120))+rsi]
  210. DB 102,15,56,221,208
  211. movdqu xmm7,XMMWORD PTR[rbx*1+r9]
  212. pxor xmm6,xmm12
  213. DB 102,15,56,221,216
  214. movdqu xmm8,XMMWORD PTR[rbx*1+r10]
  215. pxor xmm7,xmm12
  216. DB 102,15,56,221,224
  217. movdqu xmm9,XMMWORD PTR[rbx*1+r11]
  218. pxor xmm8,xmm12
  219. DB 102,15,56,221,232
  220. movdqu xmm0,XMMWORD PTR[((32-120))+rsi]
  221. pxor xmm9,xmm12
  222. movups XMMWORD PTR[(-16)+rbx*1+r12],xmm2
  223. pxor xmm2,xmm6
  224. movups XMMWORD PTR[(-16)+rbx*1+r13],xmm3
  225. pxor xmm3,xmm7
  226. movups XMMWORD PTR[(-16)+rbx*1+r14],xmm4
  227. pxor xmm4,xmm8
  228. movups XMMWORD PTR[(-16)+rbx*1+r15],xmm5
  229. pxor xmm5,xmm9
  230. dec edx
  231. jnz $L$oop_enc4x
  232. mov rax,QWORD PTR[16+rsp]
  233. mov edx,DWORD PTR[24+rsp]
  234. lea rdi,QWORD PTR[160+rdi]
  235. dec edx
  236. jnz $L$enc4x_loop_grande
  237. $L$enc4x_done::
  238. movaps xmm6,XMMWORD PTR[((-216))+rax]
  239. movaps xmm7,XMMWORD PTR[((-200))+rax]
  240. movaps xmm8,XMMWORD PTR[((-184))+rax]
  241. movaps xmm9,XMMWORD PTR[((-168))+rax]
  242. movaps xmm10,XMMWORD PTR[((-152))+rax]
  243. movaps xmm11,XMMWORD PTR[((-136))+rax]
  244. movaps xmm12,XMMWORD PTR[((-120))+rax]
  245. mov r15,QWORD PTR[((-48))+rax]
  246. mov r14,QWORD PTR[((-40))+rax]
  247. mov r13,QWORD PTR[((-32))+rax]
  248. mov r12,QWORD PTR[((-24))+rax]
  249. mov rbp,QWORD PTR[((-16))+rax]
  250. mov rbx,QWORD PTR[((-8))+rax]
  251. lea rsp,QWORD PTR[rax]
  252. $L$enc4x_epilogue::
  253. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  254. mov rsi,QWORD PTR[16+rsp]
  255. DB 0F3h,0C3h ;repret
  256. $L$SEH_end_aesni_multi_cbc_encrypt::
  257. aesni_multi_cbc_encrypt ENDP
  258. PUBLIC aesni_multi_cbc_decrypt
  259. ALIGN 32
  260. aesni_multi_cbc_decrypt PROC PUBLIC
  261. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  262. mov QWORD PTR[16+rsp],rsi
  263. mov rax,rsp
  264. $L$SEH_begin_aesni_multi_cbc_decrypt::
  265. mov rdi,rcx
  266. mov rsi,rdx
  267. mov rdx,r8
  268. cmp edx,2
  269. jb $L$dec_non_avx
  270. mov ecx,DWORD PTR[((OPENSSL_ia32cap_P+4))]
  271. test ecx,268435456
  272. jnz _avx_cbc_dec_shortcut
  273. jmp $L$dec_non_avx
  274. ALIGN 16
  275. $L$dec_non_avx::
  276. mov rax,rsp
  277. push rbx
  278. push rbp
  279. push r12
  280. push r13
  281. push r14
  282. push r15
  283. lea rsp,QWORD PTR[((-168))+rsp]
  284. movaps XMMWORD PTR[rsp],xmm6
  285. movaps XMMWORD PTR[16+rsp],xmm7
  286. movaps XMMWORD PTR[32+rsp],xmm8
  287. movaps XMMWORD PTR[48+rsp],xmm9
  288. movaps XMMWORD PTR[64+rsp],xmm10
  289. movaps XMMWORD PTR[80+rsp],xmm11
  290. movaps XMMWORD PTR[96+rsp],xmm12
  291. movaps XMMWORD PTR[(-104)+rax],xmm13
  292. movaps XMMWORD PTR[(-88)+rax],xmm14
  293. movaps XMMWORD PTR[(-72)+rax],xmm15
  294. sub rsp,48
  295. and rsp,-64
  296. mov QWORD PTR[16+rsp],rax
  297. $L$dec4x_body::
  298. movdqu xmm12,XMMWORD PTR[rsi]
  299. lea rsi,QWORD PTR[120+rsi]
  300. lea rdi,QWORD PTR[80+rdi]
  301. $L$dec4x_loop_grande::
  302. mov DWORD PTR[24+rsp],edx
  303. xor edx,edx
  304. mov ecx,DWORD PTR[((-64))+rdi]
  305. mov r8,QWORD PTR[((-80))+rdi]
  306. cmp ecx,edx
  307. mov r12,QWORD PTR[((-72))+rdi]
  308. cmovg edx,ecx
  309. test ecx,ecx
  310. movdqu xmm6,XMMWORD PTR[((-56))+rdi]
  311. mov DWORD PTR[32+rsp],ecx
  312. cmovle r8,rsp
  313. mov ecx,DWORD PTR[((-24))+rdi]
  314. mov r9,QWORD PTR[((-40))+rdi]
  315. cmp ecx,edx
  316. mov r13,QWORD PTR[((-32))+rdi]
  317. cmovg edx,ecx
  318. test ecx,ecx
  319. movdqu xmm7,XMMWORD PTR[((-16))+rdi]
  320. mov DWORD PTR[36+rsp],ecx
  321. cmovle r9,rsp
  322. mov ecx,DWORD PTR[16+rdi]
  323. mov r10,QWORD PTR[rdi]
  324. cmp ecx,edx
  325. mov r14,QWORD PTR[8+rdi]
  326. cmovg edx,ecx
  327. test ecx,ecx
  328. movdqu xmm8,XMMWORD PTR[24+rdi]
  329. mov DWORD PTR[40+rsp],ecx
  330. cmovle r10,rsp
  331. mov ecx,DWORD PTR[56+rdi]
  332. mov r11,QWORD PTR[40+rdi]
  333. cmp ecx,edx
  334. mov r15,QWORD PTR[48+rdi]
  335. cmovg edx,ecx
  336. test ecx,ecx
  337. movdqu xmm9,XMMWORD PTR[64+rdi]
  338. mov DWORD PTR[44+rsp],ecx
  339. cmovle r11,rsp
  340. test edx,edx
  341. jz $L$dec4x_done
  342. movups xmm1,XMMWORD PTR[((16-120))+rsi]
  343. movups xmm0,XMMWORD PTR[((32-120))+rsi]
  344. mov eax,DWORD PTR[((240-120))+rsi]
  345. movdqu xmm2,XMMWORD PTR[r8]
  346. movdqu xmm3,XMMWORD PTR[r9]
  347. pxor xmm2,xmm12
  348. movdqu xmm4,XMMWORD PTR[r10]
  349. pxor xmm3,xmm12
  350. movdqu xmm5,XMMWORD PTR[r11]
  351. pxor xmm4,xmm12
  352. pxor xmm5,xmm12
  353. movdqa xmm10,XMMWORD PTR[32+rsp]
  354. xor rbx,rbx
  355. jmp $L$oop_dec4x
  356. ALIGN 32
  357. $L$oop_dec4x::
  358. add rbx,16
  359. lea rbp,QWORD PTR[16+rsp]
  360. mov ecx,1
  361. sub rbp,rbx
  362. DB 102,15,56,222,209
  363. prefetcht0 [31+rbx*1+r8]
  364. prefetcht0 [31+rbx*1+r9]
  365. DB 102,15,56,222,217
  366. prefetcht0 [31+rbx*1+r10]
  367. prefetcht0 [31+rbx*1+r11]
  368. DB 102,15,56,222,225
  369. DB 102,15,56,222,233
  370. movups xmm1,XMMWORD PTR[((48-120))+rsi]
  371. cmp ecx,DWORD PTR[32+rsp]
  372. DB 102,15,56,222,208
  373. DB 102,15,56,222,216
  374. DB 102,15,56,222,224
  375. cmovge r8,rbp
  376. cmovg r12,rbp
  377. DB 102,15,56,222,232
  378. movups xmm0,XMMWORD PTR[((-56))+rsi]
  379. cmp ecx,DWORD PTR[36+rsp]
  380. DB 102,15,56,222,209
  381. DB 102,15,56,222,217
  382. DB 102,15,56,222,225
  383. cmovge r9,rbp
  384. cmovg r13,rbp
  385. DB 102,15,56,222,233
  386. movups xmm1,XMMWORD PTR[((-40))+rsi]
  387. cmp ecx,DWORD PTR[40+rsp]
  388. DB 102,15,56,222,208
  389. DB 102,15,56,222,216
  390. DB 102,15,56,222,224
  391. cmovge r10,rbp
  392. cmovg r14,rbp
  393. DB 102,15,56,222,232
  394. movups xmm0,XMMWORD PTR[((-24))+rsi]
  395. cmp ecx,DWORD PTR[44+rsp]
  396. DB 102,15,56,222,209
  397. DB 102,15,56,222,217
  398. DB 102,15,56,222,225
  399. cmovge r11,rbp
  400. cmovg r15,rbp
  401. DB 102,15,56,222,233
  402. movups xmm1,XMMWORD PTR[((-8))+rsi]
  403. movdqa xmm11,xmm10
  404. DB 102,15,56,222,208
  405. prefetcht0 [15+rbx*1+r12]
  406. prefetcht0 [15+rbx*1+r13]
  407. DB 102,15,56,222,216
  408. prefetcht0 [15+rbx*1+r14]
  409. prefetcht0 [15+rbx*1+r15]
  410. DB 102,15,56,222,224
  411. DB 102,15,56,222,232
  412. movups xmm0,XMMWORD PTR[((128-120))+rsi]
  413. pxor xmm12,xmm12
  414. DB 102,15,56,222,209
  415. pcmpgtd xmm11,xmm12
  416. movdqu xmm12,XMMWORD PTR[((-120))+rsi]
  417. DB 102,15,56,222,217
  418. paddd xmm10,xmm11
  419. movdqa XMMWORD PTR[32+rsp],xmm10
  420. DB 102,15,56,222,225
  421. DB 102,15,56,222,233
  422. movups xmm1,XMMWORD PTR[((144-120))+rsi]
  423. cmp eax,11
  424. DB 102,15,56,222,208
  425. DB 102,15,56,222,216
  426. DB 102,15,56,222,224
  427. DB 102,15,56,222,232
  428. movups xmm0,XMMWORD PTR[((160-120))+rsi]
  429. jb $L$dec4x_tail
  430. DB 102,15,56,222,209
  431. DB 102,15,56,222,217
  432. DB 102,15,56,222,225
  433. DB 102,15,56,222,233
  434. movups xmm1,XMMWORD PTR[((176-120))+rsi]
  435. DB 102,15,56,222,208
  436. DB 102,15,56,222,216
  437. DB 102,15,56,222,224
  438. DB 102,15,56,222,232
  439. movups xmm0,XMMWORD PTR[((192-120))+rsi]
  440. je $L$dec4x_tail
  441. DB 102,15,56,222,209
  442. DB 102,15,56,222,217
  443. DB 102,15,56,222,225
  444. DB 102,15,56,222,233
  445. movups xmm1,XMMWORD PTR[((208-120))+rsi]
  446. DB 102,15,56,222,208
  447. DB 102,15,56,222,216
  448. DB 102,15,56,222,224
  449. DB 102,15,56,222,232
  450. movups xmm0,XMMWORD PTR[((224-120))+rsi]
  451. jmp $L$dec4x_tail
  452. ALIGN 32
  453. $L$dec4x_tail::
  454. DB 102,15,56,222,209
  455. DB 102,15,56,222,217
  456. DB 102,15,56,222,225
  457. pxor xmm6,xmm0
  458. pxor xmm7,xmm0
  459. DB 102,15,56,222,233
  460. movdqu xmm1,XMMWORD PTR[((16-120))+rsi]
  461. pxor xmm8,xmm0
  462. pxor xmm9,xmm0
  463. movdqu xmm0,XMMWORD PTR[((32-120))+rsi]
  464. DB 102,15,56,223,214
  465. DB 102,15,56,223,223
  466. movdqu xmm6,XMMWORD PTR[((-16))+rbx*1+r8]
  467. movdqu xmm7,XMMWORD PTR[((-16))+rbx*1+r9]
  468. DB 102,65,15,56,223,224
  469. DB 102,65,15,56,223,233
  470. movdqu xmm8,XMMWORD PTR[((-16))+rbx*1+r10]
  471. movdqu xmm9,XMMWORD PTR[((-16))+rbx*1+r11]
  472. movups XMMWORD PTR[(-16)+rbx*1+r12],xmm2
  473. movdqu xmm2,XMMWORD PTR[rbx*1+r8]
  474. movups XMMWORD PTR[(-16)+rbx*1+r13],xmm3
  475. movdqu xmm3,XMMWORD PTR[rbx*1+r9]
  476. pxor xmm2,xmm12
  477. movups XMMWORD PTR[(-16)+rbx*1+r14],xmm4
  478. movdqu xmm4,XMMWORD PTR[rbx*1+r10]
  479. pxor xmm3,xmm12
  480. movups XMMWORD PTR[(-16)+rbx*1+r15],xmm5
  481. movdqu xmm5,XMMWORD PTR[rbx*1+r11]
  482. pxor xmm4,xmm12
  483. pxor xmm5,xmm12
  484. dec edx
  485. jnz $L$oop_dec4x
  486. mov rax,QWORD PTR[16+rsp]
  487. mov edx,DWORD PTR[24+rsp]
  488. lea rdi,QWORD PTR[160+rdi]
  489. dec edx
  490. jnz $L$dec4x_loop_grande
  491. $L$dec4x_done::
  492. movaps xmm6,XMMWORD PTR[((-216))+rax]
  493. movaps xmm7,XMMWORD PTR[((-200))+rax]
  494. movaps xmm8,XMMWORD PTR[((-184))+rax]
  495. movaps xmm9,XMMWORD PTR[((-168))+rax]
  496. movaps xmm10,XMMWORD PTR[((-152))+rax]
  497. movaps xmm11,XMMWORD PTR[((-136))+rax]
  498. movaps xmm12,XMMWORD PTR[((-120))+rax]
  499. mov r15,QWORD PTR[((-48))+rax]
  500. mov r14,QWORD PTR[((-40))+rax]
  501. mov r13,QWORD PTR[((-32))+rax]
  502. mov r12,QWORD PTR[((-24))+rax]
  503. mov rbp,QWORD PTR[((-16))+rax]
  504. mov rbx,QWORD PTR[((-8))+rax]
  505. lea rsp,QWORD PTR[rax]
  506. $L$dec4x_epilogue::
  507. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  508. mov rsi,QWORD PTR[16+rsp]
  509. DB 0F3h,0C3h ;repret
  510. $L$SEH_end_aesni_multi_cbc_decrypt::
  511. aesni_multi_cbc_decrypt ENDP
  512. ALIGN 32
  513. aesni_multi_cbc_encrypt_avx PROC PRIVATE
  514. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  515. mov QWORD PTR[16+rsp],rsi
  516. mov rax,rsp
  517. $L$SEH_begin_aesni_multi_cbc_encrypt_avx::
  518. mov rdi,rcx
  519. mov rsi,rdx
  520. mov rdx,r8
  521. _avx_cbc_enc_shortcut::
  522. mov rax,rsp
  523. push rbx
  524. push rbp
  525. push r12
  526. push r13
  527. push r14
  528. push r15
  529. lea rsp,QWORD PTR[((-168))+rsp]
  530. movaps XMMWORD PTR[rsp],xmm6
  531. movaps XMMWORD PTR[16+rsp],xmm7
  532. movaps XMMWORD PTR[32+rsp],xmm8
  533. movaps XMMWORD PTR[48+rsp],xmm9
  534. movaps XMMWORD PTR[64+rsp],xmm10
  535. movaps XMMWORD PTR[80+rsp],xmm11
  536. movaps XMMWORD PTR[(-120)+rax],xmm12
  537. movaps XMMWORD PTR[(-104)+rax],xmm13
  538. movaps XMMWORD PTR[(-88)+rax],xmm14
  539. movaps XMMWORD PTR[(-72)+rax],xmm15
  540. sub rsp,192
  541. and rsp,-128
  542. mov QWORD PTR[16+rsp],rax
  543. $L$enc8x_body::
  544. vzeroupper
  545. vmovdqu xmm15,XMMWORD PTR[rsi]
  546. lea rsi,QWORD PTR[120+rsi]
  547. lea rdi,QWORD PTR[160+rdi]
  548. shr edx,1
  549. $L$enc8x_loop_grande::
  550. xor edx,edx
  551. mov ecx,DWORD PTR[((-144))+rdi]
  552. mov r8,QWORD PTR[((-160))+rdi]
  553. cmp ecx,edx
  554. mov rbx,QWORD PTR[((-152))+rdi]
  555. cmovg edx,ecx
  556. test ecx,ecx
  557. vmovdqu xmm2,XMMWORD PTR[((-136))+rdi]
  558. mov DWORD PTR[32+rsp],ecx
  559. cmovle r8,rsp
  560. sub rbx,r8
  561. mov QWORD PTR[64+rsp],rbx
  562. mov ecx,DWORD PTR[((-104))+rdi]
  563. mov r9,QWORD PTR[((-120))+rdi]
  564. cmp ecx,edx
  565. mov rbp,QWORD PTR[((-112))+rdi]
  566. cmovg edx,ecx
  567. test ecx,ecx
  568. vmovdqu xmm3,XMMWORD PTR[((-96))+rdi]
  569. mov DWORD PTR[36+rsp],ecx
  570. cmovle r9,rsp
  571. sub rbp,r9
  572. mov QWORD PTR[72+rsp],rbp
  573. mov ecx,DWORD PTR[((-64))+rdi]
  574. mov r10,QWORD PTR[((-80))+rdi]
  575. cmp ecx,edx
  576. mov rbp,QWORD PTR[((-72))+rdi]
  577. cmovg edx,ecx
  578. test ecx,ecx
  579. vmovdqu xmm4,XMMWORD PTR[((-56))+rdi]
  580. mov DWORD PTR[40+rsp],ecx
  581. cmovle r10,rsp
  582. sub rbp,r10
  583. mov QWORD PTR[80+rsp],rbp
  584. mov ecx,DWORD PTR[((-24))+rdi]
  585. mov r11,QWORD PTR[((-40))+rdi]
  586. cmp ecx,edx
  587. mov rbp,QWORD PTR[((-32))+rdi]
  588. cmovg edx,ecx
  589. test ecx,ecx
  590. vmovdqu xmm5,XMMWORD PTR[((-16))+rdi]
  591. mov DWORD PTR[44+rsp],ecx
  592. cmovle r11,rsp
  593. sub rbp,r11
  594. mov QWORD PTR[88+rsp],rbp
  595. mov ecx,DWORD PTR[16+rdi]
  596. mov r12,QWORD PTR[rdi]
  597. cmp ecx,edx
  598. mov rbp,QWORD PTR[8+rdi]
  599. cmovg edx,ecx
  600. test ecx,ecx
  601. vmovdqu xmm6,XMMWORD PTR[24+rdi]
  602. mov DWORD PTR[48+rsp],ecx
  603. cmovle r12,rsp
  604. sub rbp,r12
  605. mov QWORD PTR[96+rsp],rbp
  606. mov ecx,DWORD PTR[56+rdi]
  607. mov r13,QWORD PTR[40+rdi]
  608. cmp ecx,edx
  609. mov rbp,QWORD PTR[48+rdi]
  610. cmovg edx,ecx
  611. test ecx,ecx
  612. vmovdqu xmm7,XMMWORD PTR[64+rdi]
  613. mov DWORD PTR[52+rsp],ecx
  614. cmovle r13,rsp
  615. sub rbp,r13
  616. mov QWORD PTR[104+rsp],rbp
  617. mov ecx,DWORD PTR[96+rdi]
  618. mov r14,QWORD PTR[80+rdi]
  619. cmp ecx,edx
  620. mov rbp,QWORD PTR[88+rdi]
  621. cmovg edx,ecx
  622. test ecx,ecx
  623. vmovdqu xmm8,XMMWORD PTR[104+rdi]
  624. mov DWORD PTR[56+rsp],ecx
  625. cmovle r14,rsp
  626. sub rbp,r14
  627. mov QWORD PTR[112+rsp],rbp
  628. mov ecx,DWORD PTR[136+rdi]
  629. mov r15,QWORD PTR[120+rdi]
  630. cmp ecx,edx
  631. mov rbp,QWORD PTR[128+rdi]
  632. cmovg edx,ecx
  633. test ecx,ecx
  634. vmovdqu xmm9,XMMWORD PTR[144+rdi]
  635. mov DWORD PTR[60+rsp],ecx
  636. cmovle r15,rsp
  637. sub rbp,r15
  638. mov QWORD PTR[120+rsp],rbp
  639. test edx,edx
  640. jz $L$enc8x_done
  641. vmovups xmm1,XMMWORD PTR[((16-120))+rsi]
  642. vmovups xmm0,XMMWORD PTR[((32-120))+rsi]
  643. mov eax,DWORD PTR[((240-120))+rsi]
  644. vpxor xmm10,xmm15,XMMWORD PTR[r8]
  645. lea rbp,QWORD PTR[128+rsp]
  646. vpxor xmm11,xmm15,XMMWORD PTR[r9]
  647. vpxor xmm12,xmm15,XMMWORD PTR[r10]
  648. vpxor xmm13,xmm15,XMMWORD PTR[r11]
  649. vpxor xmm2,xmm2,xmm10
  650. vpxor xmm10,xmm15,XMMWORD PTR[r12]
  651. vpxor xmm3,xmm3,xmm11
  652. vpxor xmm11,xmm15,XMMWORD PTR[r13]
  653. vpxor xmm4,xmm4,xmm12
  654. vpxor xmm12,xmm15,XMMWORD PTR[r14]
  655. vpxor xmm5,xmm5,xmm13
  656. vpxor xmm13,xmm15,XMMWORD PTR[r15]
  657. vpxor xmm6,xmm6,xmm10
  658. mov ecx,1
  659. vpxor xmm7,xmm7,xmm11
  660. vpxor xmm8,xmm8,xmm12
  661. vpxor xmm9,xmm9,xmm13
  662. jmp $L$oop_enc8x
  663. ALIGN 32
  664. $L$oop_enc8x::
  665. vaesenc xmm2,xmm2,xmm1
  666. cmp ecx,DWORD PTR[((32+0))+rsp]
  667. vaesenc xmm3,xmm3,xmm1
  668. prefetcht0 [31+r8]
  669. vaesenc xmm4,xmm4,xmm1
  670. vaesenc xmm5,xmm5,xmm1
  671. lea rbx,QWORD PTR[rbx*1+r8]
  672. cmovge r8,rsp
  673. vaesenc xmm6,xmm6,xmm1
  674. cmovg rbx,rsp
  675. vaesenc xmm7,xmm7,xmm1
  676. sub rbx,r8
  677. vaesenc xmm8,xmm8,xmm1
  678. vpxor xmm10,xmm15,XMMWORD PTR[16+r8]
  679. mov QWORD PTR[((64+0))+rsp],rbx
  680. vaesenc xmm9,xmm9,xmm1
  681. vmovups xmm1,XMMWORD PTR[((-72))+rsi]
  682. lea r8,QWORD PTR[16+rbx*1+r8]
  683. vmovdqu XMMWORD PTR[rbp],xmm10
  684. vaesenc xmm2,xmm2,xmm0
  685. cmp ecx,DWORD PTR[((32+4))+rsp]
  686. mov rbx,QWORD PTR[((64+8))+rsp]
  687. vaesenc xmm3,xmm3,xmm0
  688. prefetcht0 [31+r9]
  689. vaesenc xmm4,xmm4,xmm0
  690. vaesenc xmm5,xmm5,xmm0
  691. lea rbx,QWORD PTR[rbx*1+r9]
  692. cmovge r9,rsp
  693. vaesenc xmm6,xmm6,xmm0
  694. cmovg rbx,rsp
  695. vaesenc xmm7,xmm7,xmm0
  696. sub rbx,r9
  697. vaesenc xmm8,xmm8,xmm0
  698. vpxor xmm11,xmm15,XMMWORD PTR[16+r9]
  699. mov QWORD PTR[((64+8))+rsp],rbx
  700. vaesenc xmm9,xmm9,xmm0
  701. vmovups xmm0,XMMWORD PTR[((-56))+rsi]
  702. lea r9,QWORD PTR[16+rbx*1+r9]
  703. vmovdqu XMMWORD PTR[16+rbp],xmm11
  704. vaesenc xmm2,xmm2,xmm1
  705. cmp ecx,DWORD PTR[((32+8))+rsp]
  706. mov rbx,QWORD PTR[((64+16))+rsp]
  707. vaesenc xmm3,xmm3,xmm1
  708. prefetcht0 [31+r10]
  709. vaesenc xmm4,xmm4,xmm1
  710. prefetcht0 [15+r8]
  711. vaesenc xmm5,xmm5,xmm1
  712. lea rbx,QWORD PTR[rbx*1+r10]
  713. cmovge r10,rsp
  714. vaesenc xmm6,xmm6,xmm1
  715. cmovg rbx,rsp
  716. vaesenc xmm7,xmm7,xmm1
  717. sub rbx,r10
  718. vaesenc xmm8,xmm8,xmm1
  719. vpxor xmm12,xmm15,XMMWORD PTR[16+r10]
  720. mov QWORD PTR[((64+16))+rsp],rbx
  721. vaesenc xmm9,xmm9,xmm1
  722. vmovups xmm1,XMMWORD PTR[((-40))+rsi]
  723. lea r10,QWORD PTR[16+rbx*1+r10]
  724. vmovdqu XMMWORD PTR[32+rbp],xmm12
  725. vaesenc xmm2,xmm2,xmm0
  726. cmp ecx,DWORD PTR[((32+12))+rsp]
  727. mov rbx,QWORD PTR[((64+24))+rsp]
  728. vaesenc xmm3,xmm3,xmm0
  729. prefetcht0 [31+r11]
  730. vaesenc xmm4,xmm4,xmm0
  731. prefetcht0 [15+r9]
  732. vaesenc xmm5,xmm5,xmm0
  733. lea rbx,QWORD PTR[rbx*1+r11]
  734. cmovge r11,rsp
  735. vaesenc xmm6,xmm6,xmm0
  736. cmovg rbx,rsp
  737. vaesenc xmm7,xmm7,xmm0
  738. sub rbx,r11
  739. vaesenc xmm8,xmm8,xmm0
  740. vpxor xmm13,xmm15,XMMWORD PTR[16+r11]
  741. mov QWORD PTR[((64+24))+rsp],rbx
  742. vaesenc xmm9,xmm9,xmm0
  743. vmovups xmm0,XMMWORD PTR[((-24))+rsi]
  744. lea r11,QWORD PTR[16+rbx*1+r11]
  745. vmovdqu XMMWORD PTR[48+rbp],xmm13
  746. vaesenc xmm2,xmm2,xmm1
  747. cmp ecx,DWORD PTR[((32+16))+rsp]
  748. mov rbx,QWORD PTR[((64+32))+rsp]
  749. vaesenc xmm3,xmm3,xmm1
  750. prefetcht0 [31+r12]
  751. vaesenc xmm4,xmm4,xmm1
  752. prefetcht0 [15+r10]
  753. vaesenc xmm5,xmm5,xmm1
  754. lea rbx,QWORD PTR[rbx*1+r12]
  755. cmovge r12,rsp
  756. vaesenc xmm6,xmm6,xmm1
  757. cmovg rbx,rsp
  758. vaesenc xmm7,xmm7,xmm1
  759. sub rbx,r12
  760. vaesenc xmm8,xmm8,xmm1
  761. vpxor xmm10,xmm15,XMMWORD PTR[16+r12]
  762. mov QWORD PTR[((64+32))+rsp],rbx
  763. vaesenc xmm9,xmm9,xmm1
  764. vmovups xmm1,XMMWORD PTR[((-8))+rsi]
  765. lea r12,QWORD PTR[16+rbx*1+r12]
  766. vaesenc xmm2,xmm2,xmm0
  767. cmp ecx,DWORD PTR[((32+20))+rsp]
  768. mov rbx,QWORD PTR[((64+40))+rsp]
  769. vaesenc xmm3,xmm3,xmm0
  770. prefetcht0 [31+r13]
  771. vaesenc xmm4,xmm4,xmm0
  772. prefetcht0 [15+r11]
  773. vaesenc xmm5,xmm5,xmm0
  774. lea rbx,QWORD PTR[r13*1+rbx]
  775. cmovge r13,rsp
  776. vaesenc xmm6,xmm6,xmm0
  777. cmovg rbx,rsp
  778. vaesenc xmm7,xmm7,xmm0
  779. sub rbx,r13
  780. vaesenc xmm8,xmm8,xmm0
  781. vpxor xmm11,xmm15,XMMWORD PTR[16+r13]
  782. mov QWORD PTR[((64+40))+rsp],rbx
  783. vaesenc xmm9,xmm9,xmm0
  784. vmovups xmm0,XMMWORD PTR[8+rsi]
  785. lea r13,QWORD PTR[16+rbx*1+r13]
  786. vaesenc xmm2,xmm2,xmm1
  787. cmp ecx,DWORD PTR[((32+24))+rsp]
  788. mov rbx,QWORD PTR[((64+48))+rsp]
  789. vaesenc xmm3,xmm3,xmm1
  790. prefetcht0 [31+r14]
  791. vaesenc xmm4,xmm4,xmm1
  792. prefetcht0 [15+r12]
  793. vaesenc xmm5,xmm5,xmm1
  794. lea rbx,QWORD PTR[rbx*1+r14]
  795. cmovge r14,rsp
  796. vaesenc xmm6,xmm6,xmm1
  797. cmovg rbx,rsp
  798. vaesenc xmm7,xmm7,xmm1
  799. sub rbx,r14
  800. vaesenc xmm8,xmm8,xmm1
  801. vpxor xmm12,xmm15,XMMWORD PTR[16+r14]
  802. mov QWORD PTR[((64+48))+rsp],rbx
  803. vaesenc xmm9,xmm9,xmm1
  804. vmovups xmm1,XMMWORD PTR[24+rsi]
  805. lea r14,QWORD PTR[16+rbx*1+r14]
  806. vaesenc xmm2,xmm2,xmm0
  807. cmp ecx,DWORD PTR[((32+28))+rsp]
  808. mov rbx,QWORD PTR[((64+56))+rsp]
  809. vaesenc xmm3,xmm3,xmm0
  810. prefetcht0 [31+r15]
  811. vaesenc xmm4,xmm4,xmm0
  812. prefetcht0 [15+r13]
  813. vaesenc xmm5,xmm5,xmm0
  814. lea rbx,QWORD PTR[rbx*1+r15]
  815. cmovge r15,rsp
  816. vaesenc xmm6,xmm6,xmm0
  817. cmovg rbx,rsp
  818. vaesenc xmm7,xmm7,xmm0
  819. sub rbx,r15
  820. vaesenc xmm8,xmm8,xmm0
  821. vpxor xmm13,xmm15,XMMWORD PTR[16+r15]
  822. mov QWORD PTR[((64+56))+rsp],rbx
  823. vaesenc xmm9,xmm9,xmm0
  824. vmovups xmm0,XMMWORD PTR[40+rsi]
  825. lea r15,QWORD PTR[16+rbx*1+r15]
  826. vmovdqu xmm14,XMMWORD PTR[32+rsp]
  827. prefetcht0 [15+r14]
  828. prefetcht0 [15+r15]
  829. cmp eax,11
  830. jb $L$enc8x_tail
  831. vaesenc xmm2,xmm2,xmm1
  832. vaesenc xmm3,xmm3,xmm1
  833. vaesenc xmm4,xmm4,xmm1
  834. vaesenc xmm5,xmm5,xmm1
  835. vaesenc xmm6,xmm6,xmm1
  836. vaesenc xmm7,xmm7,xmm1
  837. vaesenc xmm8,xmm8,xmm1
  838. vaesenc xmm9,xmm9,xmm1
  839. vmovups xmm1,XMMWORD PTR[((176-120))+rsi]
  840. vaesenc xmm2,xmm2,xmm0
  841. vaesenc xmm3,xmm3,xmm0
  842. vaesenc xmm4,xmm4,xmm0
  843. vaesenc xmm5,xmm5,xmm0
  844. vaesenc xmm6,xmm6,xmm0
  845. vaesenc xmm7,xmm7,xmm0
  846. vaesenc xmm8,xmm8,xmm0
  847. vaesenc xmm9,xmm9,xmm0
  848. vmovups xmm0,XMMWORD PTR[((192-120))+rsi]
  849. je $L$enc8x_tail
  850. vaesenc xmm2,xmm2,xmm1
  851. vaesenc xmm3,xmm3,xmm1
  852. vaesenc xmm4,xmm4,xmm1
  853. vaesenc xmm5,xmm5,xmm1
  854. vaesenc xmm6,xmm6,xmm1
  855. vaesenc xmm7,xmm7,xmm1
  856. vaesenc xmm8,xmm8,xmm1
  857. vaesenc xmm9,xmm9,xmm1
  858. vmovups xmm1,XMMWORD PTR[((208-120))+rsi]
  859. vaesenc xmm2,xmm2,xmm0
  860. vaesenc xmm3,xmm3,xmm0
  861. vaesenc xmm4,xmm4,xmm0
  862. vaesenc xmm5,xmm5,xmm0
  863. vaesenc xmm6,xmm6,xmm0
  864. vaesenc xmm7,xmm7,xmm0
  865. vaesenc xmm8,xmm8,xmm0
  866. vaesenc xmm9,xmm9,xmm0
  867. vmovups xmm0,XMMWORD PTR[((224-120))+rsi]
  868. $L$enc8x_tail::
  869. vaesenc xmm2,xmm2,xmm1
  870. vpxor xmm15,xmm15,xmm15
  871. vaesenc xmm3,xmm3,xmm1
  872. vaesenc xmm4,xmm4,xmm1
  873. vpcmpgtd xmm15,xmm14,xmm15
  874. vaesenc xmm5,xmm5,xmm1
  875. vaesenc xmm6,xmm6,xmm1
  876. vpaddd xmm15,xmm15,xmm14
  877. vmovdqu xmm14,XMMWORD PTR[48+rsp]
  878. vaesenc xmm7,xmm7,xmm1
  879. mov rbx,QWORD PTR[64+rsp]
  880. vaesenc xmm8,xmm8,xmm1
  881. vaesenc xmm9,xmm9,xmm1
  882. vmovups xmm1,XMMWORD PTR[((16-120))+rsi]
  883. vaesenclast xmm2,xmm2,xmm0
  884. vmovdqa XMMWORD PTR[32+rsp],xmm15
  885. vpxor xmm15,xmm15,xmm15
  886. vaesenclast xmm3,xmm3,xmm0
  887. vaesenclast xmm4,xmm4,xmm0
  888. vpcmpgtd xmm15,xmm14,xmm15
  889. vaesenclast xmm5,xmm5,xmm0
  890. vaesenclast xmm6,xmm6,xmm0
  891. vpaddd xmm14,xmm14,xmm15
  892. vmovdqu xmm15,XMMWORD PTR[((-120))+rsi]
  893. vaesenclast xmm7,xmm7,xmm0
  894. vaesenclast xmm8,xmm8,xmm0
  895. vmovdqa XMMWORD PTR[48+rsp],xmm14
  896. vaesenclast xmm9,xmm9,xmm0
  897. vmovups xmm0,XMMWORD PTR[((32-120))+rsi]
  898. vmovups XMMWORD PTR[(-16)+r8],xmm2
  899. sub r8,rbx
  900. vpxor xmm2,xmm2,XMMWORD PTR[rbp]
  901. vmovups XMMWORD PTR[(-16)+r9],xmm3
  902. sub r9,QWORD PTR[72+rsp]
  903. vpxor xmm3,xmm3,XMMWORD PTR[16+rbp]
  904. vmovups XMMWORD PTR[(-16)+r10],xmm4
  905. sub r10,QWORD PTR[80+rsp]
  906. vpxor xmm4,xmm4,XMMWORD PTR[32+rbp]
  907. vmovups XMMWORD PTR[(-16)+r11],xmm5
  908. sub r11,QWORD PTR[88+rsp]
  909. vpxor xmm5,xmm5,XMMWORD PTR[48+rbp]
  910. vmovups XMMWORD PTR[(-16)+r12],xmm6
  911. sub r12,QWORD PTR[96+rsp]
  912. vpxor xmm6,xmm6,xmm10
  913. vmovups XMMWORD PTR[(-16)+r13],xmm7
  914. sub r13,QWORD PTR[104+rsp]
  915. vpxor xmm7,xmm7,xmm11
  916. vmovups XMMWORD PTR[(-16)+r14],xmm8
  917. sub r14,QWORD PTR[112+rsp]
  918. vpxor xmm8,xmm8,xmm12
  919. vmovups XMMWORD PTR[(-16)+r15],xmm9
  920. sub r15,QWORD PTR[120+rsp]
  921. vpxor xmm9,xmm9,xmm13
  922. dec edx
  923. jnz $L$oop_enc8x
  924. mov rax,QWORD PTR[16+rsp]
  925. $L$enc8x_done::
  926. vzeroupper
  927. movaps xmm6,XMMWORD PTR[((-216))+rax]
  928. movaps xmm7,XMMWORD PTR[((-200))+rax]
  929. movaps xmm8,XMMWORD PTR[((-184))+rax]
  930. movaps xmm9,XMMWORD PTR[((-168))+rax]
  931. movaps xmm10,XMMWORD PTR[((-152))+rax]
  932. movaps xmm11,XMMWORD PTR[((-136))+rax]
  933. movaps xmm12,XMMWORD PTR[((-120))+rax]
  934. movaps xmm13,XMMWORD PTR[((-104))+rax]
  935. movaps xmm14,XMMWORD PTR[((-88))+rax]
  936. movaps xmm15,XMMWORD PTR[((-72))+rax]
  937. mov r15,QWORD PTR[((-48))+rax]
  938. mov r14,QWORD PTR[((-40))+rax]
  939. mov r13,QWORD PTR[((-32))+rax]
  940. mov r12,QWORD PTR[((-24))+rax]
  941. mov rbp,QWORD PTR[((-16))+rax]
  942. mov rbx,QWORD PTR[((-8))+rax]
  943. lea rsp,QWORD PTR[rax]
  944. $L$enc8x_epilogue::
  945. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  946. mov rsi,QWORD PTR[16+rsp]
  947. DB 0F3h,0C3h ;repret
  948. $L$SEH_end_aesni_multi_cbc_encrypt_avx::
  949. aesni_multi_cbc_encrypt_avx ENDP
  950. ALIGN 32
  951. aesni_multi_cbc_decrypt_avx PROC PRIVATE
  952. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  953. mov QWORD PTR[16+rsp],rsi
  954. mov rax,rsp
  955. $L$SEH_begin_aesni_multi_cbc_decrypt_avx::
  956. mov rdi,rcx
  957. mov rsi,rdx
  958. mov rdx,r8
  959. _avx_cbc_dec_shortcut::
  960. mov rax,rsp
  961. push rbx
  962. push rbp
  963. push r12
  964. push r13
  965. push r14
  966. push r15
  967. lea rsp,QWORD PTR[((-168))+rsp]
  968. movaps XMMWORD PTR[rsp],xmm6
  969. movaps XMMWORD PTR[16+rsp],xmm7
  970. movaps XMMWORD PTR[32+rsp],xmm8
  971. movaps XMMWORD PTR[48+rsp],xmm9
  972. movaps XMMWORD PTR[64+rsp],xmm10
  973. movaps XMMWORD PTR[80+rsp],xmm11
  974. movaps XMMWORD PTR[(-120)+rax],xmm12
  975. movaps XMMWORD PTR[(-104)+rax],xmm13
  976. movaps XMMWORD PTR[(-88)+rax],xmm14
  977. movaps XMMWORD PTR[(-72)+rax],xmm15
  978. sub rsp,256
  979. and rsp,-256
  980. sub rsp,192
  981. mov QWORD PTR[16+rsp],rax
  982. $L$dec8x_body::
  983. vzeroupper
  984. vmovdqu xmm15,XMMWORD PTR[rsi]
  985. lea rsi,QWORD PTR[120+rsi]
  986. lea rdi,QWORD PTR[160+rdi]
  987. shr edx,1
  988. $L$dec8x_loop_grande::
  989. xor edx,edx
  990. mov ecx,DWORD PTR[((-144))+rdi]
  991. mov r8,QWORD PTR[((-160))+rdi]
  992. cmp ecx,edx
  993. mov rbx,QWORD PTR[((-152))+rdi]
  994. cmovg edx,ecx
  995. test ecx,ecx
  996. vmovdqu xmm2,XMMWORD PTR[((-136))+rdi]
  997. mov DWORD PTR[32+rsp],ecx
  998. cmovle r8,rsp
  999. sub rbx,r8
  1000. mov QWORD PTR[64+rsp],rbx
  1001. vmovdqu XMMWORD PTR[192+rsp],xmm2
  1002. mov ecx,DWORD PTR[((-104))+rdi]
  1003. mov r9,QWORD PTR[((-120))+rdi]
  1004. cmp ecx,edx
  1005. mov rbp,QWORD PTR[((-112))+rdi]
  1006. cmovg edx,ecx
  1007. test ecx,ecx
  1008. vmovdqu xmm3,XMMWORD PTR[((-96))+rdi]
  1009. mov DWORD PTR[36+rsp],ecx
  1010. cmovle r9,rsp
  1011. sub rbp,r9
  1012. mov QWORD PTR[72+rsp],rbp
  1013. vmovdqu XMMWORD PTR[208+rsp],xmm3
  1014. mov ecx,DWORD PTR[((-64))+rdi]
  1015. mov r10,QWORD PTR[((-80))+rdi]
  1016. cmp ecx,edx
  1017. mov rbp,QWORD PTR[((-72))+rdi]
  1018. cmovg edx,ecx
  1019. test ecx,ecx
  1020. vmovdqu xmm4,XMMWORD PTR[((-56))+rdi]
  1021. mov DWORD PTR[40+rsp],ecx
  1022. cmovle r10,rsp
  1023. sub rbp,r10
  1024. mov QWORD PTR[80+rsp],rbp
  1025. vmovdqu XMMWORD PTR[224+rsp],xmm4
  1026. mov ecx,DWORD PTR[((-24))+rdi]
  1027. mov r11,QWORD PTR[((-40))+rdi]
  1028. cmp ecx,edx
  1029. mov rbp,QWORD PTR[((-32))+rdi]
  1030. cmovg edx,ecx
  1031. test ecx,ecx
  1032. vmovdqu xmm5,XMMWORD PTR[((-16))+rdi]
  1033. mov DWORD PTR[44+rsp],ecx
  1034. cmovle r11,rsp
  1035. sub rbp,r11
  1036. mov QWORD PTR[88+rsp],rbp
  1037. vmovdqu XMMWORD PTR[240+rsp],xmm5
  1038. mov ecx,DWORD PTR[16+rdi]
  1039. mov r12,QWORD PTR[rdi]
  1040. cmp ecx,edx
  1041. mov rbp,QWORD PTR[8+rdi]
  1042. cmovg edx,ecx
  1043. test ecx,ecx
  1044. vmovdqu xmm6,XMMWORD PTR[24+rdi]
  1045. mov DWORD PTR[48+rsp],ecx
  1046. cmovle r12,rsp
  1047. sub rbp,r12
  1048. mov QWORD PTR[96+rsp],rbp
  1049. vmovdqu XMMWORD PTR[256+rsp],xmm6
  1050. mov ecx,DWORD PTR[56+rdi]
  1051. mov r13,QWORD PTR[40+rdi]
  1052. cmp ecx,edx
  1053. mov rbp,QWORD PTR[48+rdi]
  1054. cmovg edx,ecx
  1055. test ecx,ecx
  1056. vmovdqu xmm7,XMMWORD PTR[64+rdi]
  1057. mov DWORD PTR[52+rsp],ecx
  1058. cmovle r13,rsp
  1059. sub rbp,r13
  1060. mov QWORD PTR[104+rsp],rbp
  1061. vmovdqu XMMWORD PTR[272+rsp],xmm7
  1062. mov ecx,DWORD PTR[96+rdi]
  1063. mov r14,QWORD PTR[80+rdi]
  1064. cmp ecx,edx
  1065. mov rbp,QWORD PTR[88+rdi]
  1066. cmovg edx,ecx
  1067. test ecx,ecx
  1068. vmovdqu xmm8,XMMWORD PTR[104+rdi]
  1069. mov DWORD PTR[56+rsp],ecx
  1070. cmovle r14,rsp
  1071. sub rbp,r14
  1072. mov QWORD PTR[112+rsp],rbp
  1073. vmovdqu XMMWORD PTR[288+rsp],xmm8
  1074. mov ecx,DWORD PTR[136+rdi]
  1075. mov r15,QWORD PTR[120+rdi]
  1076. cmp ecx,edx
  1077. mov rbp,QWORD PTR[128+rdi]
  1078. cmovg edx,ecx
  1079. test ecx,ecx
  1080. vmovdqu xmm9,XMMWORD PTR[144+rdi]
  1081. mov DWORD PTR[60+rsp],ecx
  1082. cmovle r15,rsp
  1083. sub rbp,r15
  1084. mov QWORD PTR[120+rsp],rbp
  1085. vmovdqu XMMWORD PTR[304+rsp],xmm9
  1086. test edx,edx
  1087. jz $L$dec8x_done
  1088. vmovups xmm1,XMMWORD PTR[((16-120))+rsi]
  1089. vmovups xmm0,XMMWORD PTR[((32-120))+rsi]
  1090. mov eax,DWORD PTR[((240-120))+rsi]
  1091. lea rbp,QWORD PTR[((192+128))+rsp]
  1092. vmovdqu xmm2,XMMWORD PTR[r8]
  1093. vmovdqu xmm3,XMMWORD PTR[r9]
  1094. vmovdqu xmm4,XMMWORD PTR[r10]
  1095. vmovdqu xmm5,XMMWORD PTR[r11]
  1096. vmovdqu xmm6,XMMWORD PTR[r12]
  1097. vmovdqu xmm7,XMMWORD PTR[r13]
  1098. vmovdqu xmm8,XMMWORD PTR[r14]
  1099. vmovdqu xmm9,XMMWORD PTR[r15]
  1100. vmovdqu XMMWORD PTR[rbp],xmm2
  1101. vpxor xmm2,xmm2,xmm15
  1102. vmovdqu XMMWORD PTR[16+rbp],xmm3
  1103. vpxor xmm3,xmm3,xmm15
  1104. vmovdqu XMMWORD PTR[32+rbp],xmm4
  1105. vpxor xmm4,xmm4,xmm15
  1106. vmovdqu XMMWORD PTR[48+rbp],xmm5
  1107. vpxor xmm5,xmm5,xmm15
  1108. vmovdqu XMMWORD PTR[64+rbp],xmm6
  1109. vpxor xmm6,xmm6,xmm15
  1110. vmovdqu XMMWORD PTR[80+rbp],xmm7
  1111. vpxor xmm7,xmm7,xmm15
  1112. vmovdqu XMMWORD PTR[96+rbp],xmm8
  1113. vpxor xmm8,xmm8,xmm15
  1114. vmovdqu XMMWORD PTR[112+rbp],xmm9
  1115. vpxor xmm9,xmm9,xmm15
  1116. xor rbp,080h
  1117. mov ecx,1
  1118. jmp $L$oop_dec8x
  1119. ALIGN 32
  1120. $L$oop_dec8x::
  1121. vaesdec xmm2,xmm2,xmm1
  1122. cmp ecx,DWORD PTR[((32+0))+rsp]
  1123. vaesdec xmm3,xmm3,xmm1
  1124. prefetcht0 [31+r8]
  1125. vaesdec xmm4,xmm4,xmm1
  1126. vaesdec xmm5,xmm5,xmm1
  1127. lea rbx,QWORD PTR[rbx*1+r8]
  1128. cmovge r8,rsp
  1129. vaesdec xmm6,xmm6,xmm1
  1130. cmovg rbx,rsp
  1131. vaesdec xmm7,xmm7,xmm1
  1132. sub rbx,r8
  1133. vaesdec xmm8,xmm8,xmm1
  1134. vmovdqu xmm10,XMMWORD PTR[16+r8]
  1135. mov QWORD PTR[((64+0))+rsp],rbx
  1136. vaesdec xmm9,xmm9,xmm1
  1137. vmovups xmm1,XMMWORD PTR[((-72))+rsi]
  1138. lea r8,QWORD PTR[16+rbx*1+r8]
  1139. vmovdqu XMMWORD PTR[128+rsp],xmm10
  1140. vaesdec xmm2,xmm2,xmm0
  1141. cmp ecx,DWORD PTR[((32+4))+rsp]
  1142. mov rbx,QWORD PTR[((64+8))+rsp]
  1143. vaesdec xmm3,xmm3,xmm0
  1144. prefetcht0 [31+r9]
  1145. vaesdec xmm4,xmm4,xmm0
  1146. vaesdec xmm5,xmm5,xmm0
  1147. lea rbx,QWORD PTR[rbx*1+r9]
  1148. cmovge r9,rsp
  1149. vaesdec xmm6,xmm6,xmm0
  1150. cmovg rbx,rsp
  1151. vaesdec xmm7,xmm7,xmm0
  1152. sub rbx,r9
  1153. vaesdec xmm8,xmm8,xmm0
  1154. vmovdqu xmm11,XMMWORD PTR[16+r9]
  1155. mov QWORD PTR[((64+8))+rsp],rbx
  1156. vaesdec xmm9,xmm9,xmm0
  1157. vmovups xmm0,XMMWORD PTR[((-56))+rsi]
  1158. lea r9,QWORD PTR[16+rbx*1+r9]
  1159. vmovdqu XMMWORD PTR[144+rsp],xmm11
  1160. vaesdec xmm2,xmm2,xmm1
  1161. cmp ecx,DWORD PTR[((32+8))+rsp]
  1162. mov rbx,QWORD PTR[((64+16))+rsp]
  1163. vaesdec xmm3,xmm3,xmm1
  1164. prefetcht0 [31+r10]
  1165. vaesdec xmm4,xmm4,xmm1
  1166. prefetcht0 [15+r8]
  1167. vaesdec xmm5,xmm5,xmm1
  1168. lea rbx,QWORD PTR[rbx*1+r10]
  1169. cmovge r10,rsp
  1170. vaesdec xmm6,xmm6,xmm1
  1171. cmovg rbx,rsp
  1172. vaesdec xmm7,xmm7,xmm1
  1173. sub rbx,r10
  1174. vaesdec xmm8,xmm8,xmm1
  1175. vmovdqu xmm12,XMMWORD PTR[16+r10]
  1176. mov QWORD PTR[((64+16))+rsp],rbx
  1177. vaesdec xmm9,xmm9,xmm1
  1178. vmovups xmm1,XMMWORD PTR[((-40))+rsi]
  1179. lea r10,QWORD PTR[16+rbx*1+r10]
  1180. vmovdqu XMMWORD PTR[160+rsp],xmm12
  1181. vaesdec xmm2,xmm2,xmm0
  1182. cmp ecx,DWORD PTR[((32+12))+rsp]
  1183. mov rbx,QWORD PTR[((64+24))+rsp]
  1184. vaesdec xmm3,xmm3,xmm0
  1185. prefetcht0 [31+r11]
  1186. vaesdec xmm4,xmm4,xmm0
  1187. prefetcht0 [15+r9]
  1188. vaesdec xmm5,xmm5,xmm0
  1189. lea rbx,QWORD PTR[rbx*1+r11]
  1190. cmovge r11,rsp
  1191. vaesdec xmm6,xmm6,xmm0
  1192. cmovg rbx,rsp
  1193. vaesdec xmm7,xmm7,xmm0
  1194. sub rbx,r11
  1195. vaesdec xmm8,xmm8,xmm0
  1196. vmovdqu xmm13,XMMWORD PTR[16+r11]
  1197. mov QWORD PTR[((64+24))+rsp],rbx
  1198. vaesdec xmm9,xmm9,xmm0
  1199. vmovups xmm0,XMMWORD PTR[((-24))+rsi]
  1200. lea r11,QWORD PTR[16+rbx*1+r11]
  1201. vmovdqu XMMWORD PTR[176+rsp],xmm13
  1202. vaesdec xmm2,xmm2,xmm1
  1203. cmp ecx,DWORD PTR[((32+16))+rsp]
  1204. mov rbx,QWORD PTR[((64+32))+rsp]
  1205. vaesdec xmm3,xmm3,xmm1
  1206. prefetcht0 [31+r12]
  1207. vaesdec xmm4,xmm4,xmm1
  1208. prefetcht0 [15+r10]
  1209. vaesdec xmm5,xmm5,xmm1
  1210. lea rbx,QWORD PTR[rbx*1+r12]
  1211. cmovge r12,rsp
  1212. vaesdec xmm6,xmm6,xmm1
  1213. cmovg rbx,rsp
  1214. vaesdec xmm7,xmm7,xmm1
  1215. sub rbx,r12
  1216. vaesdec xmm8,xmm8,xmm1
  1217. vmovdqu xmm10,XMMWORD PTR[16+r12]
  1218. mov QWORD PTR[((64+32))+rsp],rbx
  1219. vaesdec xmm9,xmm9,xmm1
  1220. vmovups xmm1,XMMWORD PTR[((-8))+rsi]
  1221. lea r12,QWORD PTR[16+rbx*1+r12]
  1222. vaesdec xmm2,xmm2,xmm0
  1223. cmp ecx,DWORD PTR[((32+20))+rsp]
  1224. mov rbx,QWORD PTR[((64+40))+rsp]
  1225. vaesdec xmm3,xmm3,xmm0
  1226. prefetcht0 [31+r13]
  1227. vaesdec xmm4,xmm4,xmm0
  1228. prefetcht0 [15+r11]
  1229. vaesdec xmm5,xmm5,xmm0
  1230. lea rbx,QWORD PTR[r13*1+rbx]
  1231. cmovge r13,rsp
  1232. vaesdec xmm6,xmm6,xmm0
  1233. cmovg rbx,rsp
  1234. vaesdec xmm7,xmm7,xmm0
  1235. sub rbx,r13
  1236. vaesdec xmm8,xmm8,xmm0
  1237. vmovdqu xmm11,XMMWORD PTR[16+r13]
  1238. mov QWORD PTR[((64+40))+rsp],rbx
  1239. vaesdec xmm9,xmm9,xmm0
  1240. vmovups xmm0,XMMWORD PTR[8+rsi]
  1241. lea r13,QWORD PTR[16+rbx*1+r13]
  1242. vaesdec xmm2,xmm2,xmm1
  1243. cmp ecx,DWORD PTR[((32+24))+rsp]
  1244. mov rbx,QWORD PTR[((64+48))+rsp]
  1245. vaesdec xmm3,xmm3,xmm1
  1246. prefetcht0 [31+r14]
  1247. vaesdec xmm4,xmm4,xmm1
  1248. prefetcht0 [15+r12]
  1249. vaesdec xmm5,xmm5,xmm1
  1250. lea rbx,QWORD PTR[rbx*1+r14]
  1251. cmovge r14,rsp
  1252. vaesdec xmm6,xmm6,xmm1
  1253. cmovg rbx,rsp
  1254. vaesdec xmm7,xmm7,xmm1
  1255. sub rbx,r14
  1256. vaesdec xmm8,xmm8,xmm1
  1257. vmovdqu xmm12,XMMWORD PTR[16+r14]
  1258. mov QWORD PTR[((64+48))+rsp],rbx
  1259. vaesdec xmm9,xmm9,xmm1
  1260. vmovups xmm1,XMMWORD PTR[24+rsi]
  1261. lea r14,QWORD PTR[16+rbx*1+r14]
  1262. vaesdec xmm2,xmm2,xmm0
  1263. cmp ecx,DWORD PTR[((32+28))+rsp]
  1264. mov rbx,QWORD PTR[((64+56))+rsp]
  1265. vaesdec xmm3,xmm3,xmm0
  1266. prefetcht0 [31+r15]
  1267. vaesdec xmm4,xmm4,xmm0
  1268. prefetcht0 [15+r13]
  1269. vaesdec xmm5,xmm5,xmm0
  1270. lea rbx,QWORD PTR[rbx*1+r15]
  1271. cmovge r15,rsp
  1272. vaesdec xmm6,xmm6,xmm0
  1273. cmovg rbx,rsp
  1274. vaesdec xmm7,xmm7,xmm0
  1275. sub rbx,r15
  1276. vaesdec xmm8,xmm8,xmm0
  1277. vmovdqu xmm13,XMMWORD PTR[16+r15]
  1278. mov QWORD PTR[((64+56))+rsp],rbx
  1279. vaesdec xmm9,xmm9,xmm0
  1280. vmovups xmm0,XMMWORD PTR[40+rsi]
  1281. lea r15,QWORD PTR[16+rbx*1+r15]
  1282. vmovdqu xmm14,XMMWORD PTR[32+rsp]
  1283. prefetcht0 [15+r14]
  1284. prefetcht0 [15+r15]
  1285. cmp eax,11
  1286. jb $L$dec8x_tail
  1287. vaesdec xmm2,xmm2,xmm1
  1288. vaesdec xmm3,xmm3,xmm1
  1289. vaesdec xmm4,xmm4,xmm1
  1290. vaesdec xmm5,xmm5,xmm1
  1291. vaesdec xmm6,xmm6,xmm1
  1292. vaesdec xmm7,xmm7,xmm1
  1293. vaesdec xmm8,xmm8,xmm1
  1294. vaesdec xmm9,xmm9,xmm1
  1295. vmovups xmm1,XMMWORD PTR[((176-120))+rsi]
  1296. vaesdec xmm2,xmm2,xmm0
  1297. vaesdec xmm3,xmm3,xmm0
  1298. vaesdec xmm4,xmm4,xmm0
  1299. vaesdec xmm5,xmm5,xmm0
  1300. vaesdec xmm6,xmm6,xmm0
  1301. vaesdec xmm7,xmm7,xmm0
  1302. vaesdec xmm8,xmm8,xmm0
  1303. vaesdec xmm9,xmm9,xmm0
  1304. vmovups xmm0,XMMWORD PTR[((192-120))+rsi]
  1305. je $L$dec8x_tail
  1306. vaesdec xmm2,xmm2,xmm1
  1307. vaesdec xmm3,xmm3,xmm1
  1308. vaesdec xmm4,xmm4,xmm1
  1309. vaesdec xmm5,xmm5,xmm1
  1310. vaesdec xmm6,xmm6,xmm1
  1311. vaesdec xmm7,xmm7,xmm1
  1312. vaesdec xmm8,xmm8,xmm1
  1313. vaesdec xmm9,xmm9,xmm1
  1314. vmovups xmm1,XMMWORD PTR[((208-120))+rsi]
  1315. vaesdec xmm2,xmm2,xmm0
  1316. vaesdec xmm3,xmm3,xmm0
  1317. vaesdec xmm4,xmm4,xmm0
  1318. vaesdec xmm5,xmm5,xmm0
  1319. vaesdec xmm6,xmm6,xmm0
  1320. vaesdec xmm7,xmm7,xmm0
  1321. vaesdec xmm8,xmm8,xmm0
  1322. vaesdec xmm9,xmm9,xmm0
  1323. vmovups xmm0,XMMWORD PTR[((224-120))+rsi]
  1324. $L$dec8x_tail::
  1325. vaesdec xmm2,xmm2,xmm1
  1326. vpxor xmm15,xmm15,xmm15
  1327. vaesdec xmm3,xmm3,xmm1
  1328. vaesdec xmm4,xmm4,xmm1
  1329. vpcmpgtd xmm15,xmm14,xmm15
  1330. vaesdec xmm5,xmm5,xmm1
  1331. vaesdec xmm6,xmm6,xmm1
  1332. vpaddd xmm15,xmm15,xmm14
  1333. vmovdqu xmm14,XMMWORD PTR[48+rsp]
  1334. vaesdec xmm7,xmm7,xmm1
  1335. mov rbx,QWORD PTR[64+rsp]
  1336. vaesdec xmm8,xmm8,xmm1
  1337. vaesdec xmm9,xmm9,xmm1
  1338. vmovups xmm1,XMMWORD PTR[((16-120))+rsi]
  1339. vaesdeclast xmm2,xmm2,xmm0
  1340. vmovdqa XMMWORD PTR[32+rsp],xmm15
  1341. vpxor xmm15,xmm15,xmm15
  1342. vaesdeclast xmm3,xmm3,xmm0
  1343. vpxor xmm2,xmm2,XMMWORD PTR[rbp]
  1344. vaesdeclast xmm4,xmm4,xmm0
  1345. vpxor xmm3,xmm3,XMMWORD PTR[16+rbp]
  1346. vpcmpgtd xmm15,xmm14,xmm15
  1347. vaesdeclast xmm5,xmm5,xmm0
  1348. vpxor xmm4,xmm4,XMMWORD PTR[32+rbp]
  1349. vaesdeclast xmm6,xmm6,xmm0
  1350. vpxor xmm5,xmm5,XMMWORD PTR[48+rbp]
  1351. vpaddd xmm14,xmm14,xmm15
  1352. vmovdqu xmm15,XMMWORD PTR[((-120))+rsi]
  1353. vaesdeclast xmm7,xmm7,xmm0
  1354. vpxor xmm6,xmm6,XMMWORD PTR[64+rbp]
  1355. vaesdeclast xmm8,xmm8,xmm0
  1356. vpxor xmm7,xmm7,XMMWORD PTR[80+rbp]
  1357. vmovdqa XMMWORD PTR[48+rsp],xmm14
  1358. vaesdeclast xmm9,xmm9,xmm0
  1359. vpxor xmm8,xmm8,XMMWORD PTR[96+rbp]
  1360. vmovups xmm0,XMMWORD PTR[((32-120))+rsi]
  1361. vmovups XMMWORD PTR[(-16)+r8],xmm2
  1362. sub r8,rbx
  1363. vmovdqu xmm2,XMMWORD PTR[((128+0))+rsp]
  1364. vpxor xmm9,xmm9,XMMWORD PTR[112+rbp]
  1365. vmovups XMMWORD PTR[(-16)+r9],xmm3
  1366. sub r9,QWORD PTR[72+rsp]
  1367. vmovdqu XMMWORD PTR[rbp],xmm2
  1368. vpxor xmm2,xmm2,xmm15
  1369. vmovdqu xmm3,XMMWORD PTR[((128+16))+rsp]
  1370. vmovups XMMWORD PTR[(-16)+r10],xmm4
  1371. sub r10,QWORD PTR[80+rsp]
  1372. vmovdqu XMMWORD PTR[16+rbp],xmm3
  1373. vpxor xmm3,xmm3,xmm15
  1374. vmovdqu xmm4,XMMWORD PTR[((128+32))+rsp]
  1375. vmovups XMMWORD PTR[(-16)+r11],xmm5
  1376. sub r11,QWORD PTR[88+rsp]
  1377. vmovdqu XMMWORD PTR[32+rbp],xmm4
  1378. vpxor xmm4,xmm4,xmm15
  1379. vmovdqu xmm5,XMMWORD PTR[((128+48))+rsp]
  1380. vmovups XMMWORD PTR[(-16)+r12],xmm6
  1381. sub r12,QWORD PTR[96+rsp]
  1382. vmovdqu XMMWORD PTR[48+rbp],xmm5
  1383. vpxor xmm5,xmm5,xmm15
  1384. vmovdqu XMMWORD PTR[64+rbp],xmm10
  1385. vpxor xmm6,xmm15,xmm10
  1386. vmovups XMMWORD PTR[(-16)+r13],xmm7
  1387. sub r13,QWORD PTR[104+rsp]
  1388. vmovdqu XMMWORD PTR[80+rbp],xmm11
  1389. vpxor xmm7,xmm15,xmm11
  1390. vmovups XMMWORD PTR[(-16)+r14],xmm8
  1391. sub r14,QWORD PTR[112+rsp]
  1392. vmovdqu XMMWORD PTR[96+rbp],xmm12
  1393. vpxor xmm8,xmm15,xmm12
  1394. vmovups XMMWORD PTR[(-16)+r15],xmm9
  1395. sub r15,QWORD PTR[120+rsp]
  1396. vmovdqu XMMWORD PTR[112+rbp],xmm13
  1397. vpxor xmm9,xmm15,xmm13
  1398. xor rbp,128
  1399. dec edx
  1400. jnz $L$oop_dec8x
  1401. mov rax,QWORD PTR[16+rsp]
  1402. $L$dec8x_done::
  1403. vzeroupper
  1404. movaps xmm6,XMMWORD PTR[((-216))+rax]
  1405. movaps xmm7,XMMWORD PTR[((-200))+rax]
  1406. movaps xmm8,XMMWORD PTR[((-184))+rax]
  1407. movaps xmm9,XMMWORD PTR[((-168))+rax]
  1408. movaps xmm10,XMMWORD PTR[((-152))+rax]
  1409. movaps xmm11,XMMWORD PTR[((-136))+rax]
  1410. movaps xmm12,XMMWORD PTR[((-120))+rax]
  1411. movaps xmm13,XMMWORD PTR[((-104))+rax]
  1412. movaps xmm14,XMMWORD PTR[((-88))+rax]
  1413. movaps xmm15,XMMWORD PTR[((-72))+rax]
  1414. mov r15,QWORD PTR[((-48))+rax]
  1415. mov r14,QWORD PTR[((-40))+rax]
  1416. mov r13,QWORD PTR[((-32))+rax]
  1417. mov r12,QWORD PTR[((-24))+rax]
  1418. mov rbp,QWORD PTR[((-16))+rax]
  1419. mov rbx,QWORD PTR[((-8))+rax]
  1420. lea rsp,QWORD PTR[rax]
  1421. $L$dec8x_epilogue::
  1422. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1423. mov rsi,QWORD PTR[16+rsp]
  1424. DB 0F3h,0C3h ;repret
  1425. $L$SEH_end_aesni_multi_cbc_decrypt_avx::
  1426. aesni_multi_cbc_decrypt_avx ENDP
  1427. EXTERN __imp_RtlVirtualUnwind:NEAR
  1428. ALIGN 16
  1429. se_handler PROC PRIVATE
  1430. push rsi
  1431. push rdi
  1432. push rbx
  1433. push rbp
  1434. push r12
  1435. push r13
  1436. push r14
  1437. push r15
  1438. pushfq
  1439. sub rsp,64
  1440. mov rax,QWORD PTR[120+r8]
  1441. mov rbx,QWORD PTR[248+r8]
  1442. mov rsi,QWORD PTR[8+r9]
  1443. mov r11,QWORD PTR[56+r9]
  1444. mov r10d,DWORD PTR[r11]
  1445. lea r10,QWORD PTR[r10*1+rsi]
  1446. cmp rbx,r10
  1447. jb $L$in_prologue
  1448. mov rax,QWORD PTR[152+r8]
  1449. mov r10d,DWORD PTR[4+r11]
  1450. lea r10,QWORD PTR[r10*1+rsi]
  1451. cmp rbx,r10
  1452. jae $L$in_prologue
  1453. mov rax,QWORD PTR[16+rax]
  1454. mov rbx,QWORD PTR[((-8))+rax]
  1455. mov rbp,QWORD PTR[((-16))+rax]
  1456. mov r12,QWORD PTR[((-24))+rax]
  1457. mov r13,QWORD PTR[((-32))+rax]
  1458. mov r14,QWORD PTR[((-40))+rax]
  1459. mov r15,QWORD PTR[((-48))+rax]
  1460. mov QWORD PTR[144+r8],rbx
  1461. mov QWORD PTR[160+r8],rbp
  1462. mov QWORD PTR[216+r8],r12
  1463. mov QWORD PTR[224+r8],r13
  1464. mov QWORD PTR[232+r8],r14
  1465. mov QWORD PTR[240+r8],r15
  1466. lea rsi,QWORD PTR[((-56-160))+rax]
  1467. lea rdi,QWORD PTR[512+r8]
  1468. mov ecx,20
  1469. DD 0a548f3fch
  1470. $L$in_prologue::
  1471. mov rdi,QWORD PTR[8+rax]
  1472. mov rsi,QWORD PTR[16+rax]
  1473. mov QWORD PTR[152+r8],rax
  1474. mov QWORD PTR[168+r8],rsi
  1475. mov QWORD PTR[176+r8],rdi
  1476. mov rdi,QWORD PTR[40+r9]
  1477. mov rsi,r8
  1478. mov ecx,154
  1479. DD 0a548f3fch
  1480. mov rsi,r9
  1481. xor rcx,rcx
  1482. mov rdx,QWORD PTR[8+rsi]
  1483. mov r8,QWORD PTR[rsi]
  1484. mov r9,QWORD PTR[16+rsi]
  1485. mov r10,QWORD PTR[40+rsi]
  1486. lea r11,QWORD PTR[56+rsi]
  1487. lea r12,QWORD PTR[24+rsi]
  1488. mov QWORD PTR[32+rsp],r10
  1489. mov QWORD PTR[40+rsp],r11
  1490. mov QWORD PTR[48+rsp],r12
  1491. mov QWORD PTR[56+rsp],rcx
  1492. call QWORD PTR[__imp_RtlVirtualUnwind]
  1493. mov eax,1
  1494. add rsp,64
  1495. popfq
  1496. pop r15
  1497. pop r14
  1498. pop r13
  1499. pop r12
  1500. pop rbp
  1501. pop rbx
  1502. pop rdi
  1503. pop rsi
  1504. DB 0F3h,0C3h ;repret
  1505. se_handler ENDP
  1506. .text$ ENDS
  1507. .pdata SEGMENT READONLY ALIGN(4)
  1508. ALIGN 4
  1509. DD imagerel $L$SEH_begin_aesni_multi_cbc_encrypt
  1510. DD imagerel $L$SEH_end_aesni_multi_cbc_encrypt
  1511. DD imagerel $L$SEH_info_aesni_multi_cbc_encrypt
  1512. DD imagerel $L$SEH_begin_aesni_multi_cbc_decrypt
  1513. DD imagerel $L$SEH_end_aesni_multi_cbc_decrypt
  1514. DD imagerel $L$SEH_info_aesni_multi_cbc_decrypt
  1515. DD imagerel $L$SEH_begin_aesni_multi_cbc_encrypt_avx
  1516. DD imagerel $L$SEH_end_aesni_multi_cbc_encrypt_avx
  1517. DD imagerel $L$SEH_info_aesni_multi_cbc_encrypt_avx
  1518. DD imagerel $L$SEH_begin_aesni_multi_cbc_decrypt_avx
  1519. DD imagerel $L$SEH_end_aesni_multi_cbc_decrypt_avx
  1520. DD imagerel $L$SEH_info_aesni_multi_cbc_decrypt_avx
  1521. .pdata ENDS
  1522. .xdata SEGMENT READONLY ALIGN(8)
  1523. ALIGN 8
  1524. $L$SEH_info_aesni_multi_cbc_encrypt::
  1525. DB 9,0,0,0
  1526. DD imagerel se_handler
  1527. DD imagerel $L$enc4x_body,imagerel $L$enc4x_epilogue
  1528. $L$SEH_info_aesni_multi_cbc_decrypt::
  1529. DB 9,0,0,0
  1530. DD imagerel se_handler
  1531. DD imagerel $L$dec4x_body,imagerel $L$dec4x_epilogue
  1532. $L$SEH_info_aesni_multi_cbc_encrypt_avx::
  1533. DB 9,0,0,0
  1534. DD imagerel se_handler
  1535. DD imagerel $L$enc8x_body,imagerel $L$enc8x_epilogue
  1536. $L$SEH_info_aesni_multi_cbc_decrypt_avx::
  1537. DB 9,0,0,0
  1538. DD imagerel se_handler
  1539. DD imagerel $L$dec8x_body,imagerel $L$dec8x_epilogue
  1540. .xdata ENDS
  1541. END