poly1305-x86_64.masm 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385
  1. OPTION DOTNAME
  2. .text$ SEGMENT ALIGN(256) 'CODE'
  3. EXTERN OPENSSL_ia32cap_P:NEAR
  4. PUBLIC poly1305_init
  5. PUBLIC poly1305_blocks
  6. PUBLIC poly1305_emit
  7. ALIGN 32
  8. poly1305_init PROC PUBLIC
  9. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  10. mov QWORD PTR[16+rsp],rsi
  11. mov rax,rsp
  12. $L$SEH_begin_poly1305_init::
  13. mov rdi,rcx
  14. mov rsi,rdx
  15. mov rdx,r8
  16. xor rax,rax
  17. mov QWORD PTR[rdi],rax
  18. mov QWORD PTR[8+rdi],rax
  19. mov QWORD PTR[16+rdi],rax
  20. cmp rsi,0
  21. je $L$no_key
  22. lea r10,QWORD PTR[poly1305_blocks]
  23. lea r11,QWORD PTR[poly1305_emit]
  24. mov r9,QWORD PTR[((OPENSSL_ia32cap_P+4))]
  25. lea rax,QWORD PTR[poly1305_blocks_avx]
  26. lea rcx,QWORD PTR[poly1305_emit_avx]
  27. bt r9,28
  28. cmovc r10,rax
  29. cmovc r11,rcx
  30. lea rax,QWORD PTR[poly1305_blocks_avx2]
  31. bt r9,37
  32. cmovc r10,rax
  33. mov rax,00ffffffc0fffffffh
  34. mov rcx,00ffffffc0ffffffch
  35. and rax,QWORD PTR[rsi]
  36. and rcx,QWORD PTR[8+rsi]
  37. mov QWORD PTR[24+rdi],rax
  38. mov QWORD PTR[32+rdi],rcx
  39. mov QWORD PTR[rdx],r10
  40. mov QWORD PTR[8+rdx],r11
  41. mov eax,1
  42. $L$no_key::
  43. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  44. mov rsi,QWORD PTR[16+rsp]
  45. DB 0F3h,0C3h ;repret
  46. $L$SEH_end_poly1305_init::
  47. poly1305_init ENDP
  48. ALIGN 32
  49. poly1305_blocks PROC PUBLIC
  50. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  51. mov QWORD PTR[16+rsp],rsi
  52. mov rax,rsp
  53. $L$SEH_begin_poly1305_blocks::
  54. mov rdi,rcx
  55. mov rsi,rdx
  56. mov rdx,r8
  57. mov rcx,r9
  58. $L$blocks::
  59. shr rdx,4
  60. jz $L$no_data
  61. push rbx
  62. push rbp
  63. push r12
  64. push r13
  65. push r14
  66. push r15
  67. $L$blocks_body::
  68. mov r15,rdx
  69. mov r11,QWORD PTR[24+rdi]
  70. mov r13,QWORD PTR[32+rdi]
  71. mov r14,QWORD PTR[rdi]
  72. mov rbx,QWORD PTR[8+rdi]
  73. mov rbp,QWORD PTR[16+rdi]
  74. mov r12,r13
  75. shr r13,2
  76. mov rax,r12
  77. add r13,r12
  78. jmp $L$oop
  79. ALIGN 32
  80. $L$oop::
  81. add r14,QWORD PTR[rsi]
  82. adc rbx,QWORD PTR[8+rsi]
  83. lea rsi,QWORD PTR[16+rsi]
  84. adc rbp,rcx
  85. mul r14
  86. mov r9,rax
  87. mov rax,r11
  88. mov r10,rdx
  89. mul r14
  90. mov r14,rax
  91. mov rax,r11
  92. mov r8,rdx
  93. mul rbx
  94. add r9,rax
  95. mov rax,r13
  96. adc r10,rdx
  97. mul rbx
  98. mov rbx,rbp
  99. add r14,rax
  100. adc r8,rdx
  101. imul rbx,r13
  102. add r9,rbx
  103. mov rbx,r8
  104. adc r10,0
  105. imul rbp,r11
  106. add rbx,r9
  107. mov rax,-4
  108. adc r10,rbp
  109. and rax,r10
  110. mov rbp,r10
  111. shr r10,2
  112. and rbp,3
  113. add rax,r10
  114. add r14,rax
  115. adc rbx,0
  116. adc rbp,0
  117. mov rax,r12
  118. dec r15
  119. jnz $L$oop
  120. mov QWORD PTR[rdi],r14
  121. mov QWORD PTR[8+rdi],rbx
  122. mov QWORD PTR[16+rdi],rbp
  123. mov r15,QWORD PTR[rsp]
  124. mov r14,QWORD PTR[8+rsp]
  125. mov r13,QWORD PTR[16+rsp]
  126. mov r12,QWORD PTR[24+rsp]
  127. mov rbp,QWORD PTR[32+rsp]
  128. mov rbx,QWORD PTR[40+rsp]
  129. lea rsp,QWORD PTR[48+rsp]
  130. $L$no_data::
  131. $L$blocks_epilogue::
  132. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  133. mov rsi,QWORD PTR[16+rsp]
  134. DB 0F3h,0C3h ;repret
  135. $L$SEH_end_poly1305_blocks::
  136. poly1305_blocks ENDP
  137. ALIGN 32
  138. poly1305_emit PROC PUBLIC
  139. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  140. mov QWORD PTR[16+rsp],rsi
  141. mov rax,rsp
  142. $L$SEH_begin_poly1305_emit::
  143. mov rdi,rcx
  144. mov rsi,rdx
  145. mov rdx,r8
  146. $L$emit::
  147. mov r8,QWORD PTR[rdi]
  148. mov r9,QWORD PTR[8+rdi]
  149. mov r10,QWORD PTR[16+rdi]
  150. mov rax,r8
  151. add r8,5
  152. mov rcx,r9
  153. adc r9,0
  154. adc r10,0
  155. shr r10,2
  156. cmovnz rax,r8
  157. cmovnz rcx,r9
  158. add rax,QWORD PTR[rdx]
  159. adc rcx,QWORD PTR[8+rdx]
  160. mov QWORD PTR[rsi],rax
  161. mov QWORD PTR[8+rsi],rcx
  162. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  163. mov rsi,QWORD PTR[16+rsp]
  164. DB 0F3h,0C3h ;repret
  165. $L$SEH_end_poly1305_emit::
  166. poly1305_emit ENDP
  167. ALIGN 32
  168. __poly1305_block PROC PRIVATE
  169. mul r14
  170. mov r9,rax
  171. mov rax,r11
  172. mov r10,rdx
  173. mul r14
  174. mov r14,rax
  175. mov rax,r11
  176. mov r8,rdx
  177. mul rbx
  178. add r9,rax
  179. mov rax,r13
  180. adc r10,rdx
  181. mul rbx
  182. mov rbx,rbp
  183. add r14,rax
  184. adc r8,rdx
  185. imul rbx,r13
  186. add r9,rbx
  187. mov rbx,r8
  188. adc r10,0
  189. imul rbp,r11
  190. add rbx,r9
  191. mov rax,-4
  192. adc r10,rbp
  193. and rax,r10
  194. mov rbp,r10
  195. shr r10,2
  196. and rbp,3
  197. add rax,r10
  198. add r14,rax
  199. adc rbx,0
  200. adc rbp,0
  201. DB 0F3h,0C3h ;repret
  202. __poly1305_block ENDP
  203. ALIGN 32
  204. __poly1305_init_avx PROC PRIVATE
  205. mov r14,r11
  206. mov rbx,r12
  207. xor rbp,rbp
  208. lea rdi,QWORD PTR[((48+64))+rdi]
  209. mov rax,r12
  210. call __poly1305_block
  211. mov eax,03ffffffh
  212. mov edx,03ffffffh
  213. mov r8,r14
  214. and eax,r14d
  215. mov r9,r11
  216. and edx,r11d
  217. mov DWORD PTR[((-64))+rdi],eax
  218. shr r8,26
  219. mov DWORD PTR[((-60))+rdi],edx
  220. shr r9,26
  221. mov eax,03ffffffh
  222. mov edx,03ffffffh
  223. and eax,r8d
  224. and edx,r9d
  225. mov DWORD PTR[((-48))+rdi],eax
  226. lea eax,DWORD PTR[rax*4+rax]
  227. mov DWORD PTR[((-44))+rdi],edx
  228. lea edx,DWORD PTR[rdx*4+rdx]
  229. mov DWORD PTR[((-32))+rdi],eax
  230. shr r8,26
  231. mov DWORD PTR[((-28))+rdi],edx
  232. shr r9,26
  233. mov rax,rbx
  234. mov rdx,r12
  235. shl rax,12
  236. shl rdx,12
  237. or rax,r8
  238. or rdx,r9
  239. and eax,03ffffffh
  240. and edx,03ffffffh
  241. mov DWORD PTR[((-16))+rdi],eax
  242. lea eax,DWORD PTR[rax*4+rax]
  243. mov DWORD PTR[((-12))+rdi],edx
  244. lea edx,DWORD PTR[rdx*4+rdx]
  245. mov DWORD PTR[rdi],eax
  246. mov r8,rbx
  247. mov DWORD PTR[4+rdi],edx
  248. mov r9,r12
  249. mov eax,03ffffffh
  250. mov edx,03ffffffh
  251. shr r8,14
  252. shr r9,14
  253. and eax,r8d
  254. and edx,r9d
  255. mov DWORD PTR[16+rdi],eax
  256. lea eax,DWORD PTR[rax*4+rax]
  257. mov DWORD PTR[20+rdi],edx
  258. lea edx,DWORD PTR[rdx*4+rdx]
  259. mov DWORD PTR[32+rdi],eax
  260. shr r8,26
  261. mov DWORD PTR[36+rdi],edx
  262. shr r9,26
  263. mov rax,rbp
  264. shl rax,24
  265. or r8,rax
  266. mov DWORD PTR[48+rdi],r8d
  267. lea r8,QWORD PTR[r8*4+r8]
  268. mov DWORD PTR[52+rdi],r9d
  269. lea r9,QWORD PTR[r9*4+r9]
  270. mov DWORD PTR[64+rdi],r8d
  271. mov DWORD PTR[68+rdi],r9d
  272. mov rax,r12
  273. call __poly1305_block
  274. mov eax,03ffffffh
  275. mov r8,r14
  276. and eax,r14d
  277. shr r8,26
  278. mov DWORD PTR[((-52))+rdi],eax
  279. mov edx,03ffffffh
  280. and edx,r8d
  281. mov DWORD PTR[((-36))+rdi],edx
  282. lea edx,DWORD PTR[rdx*4+rdx]
  283. shr r8,26
  284. mov DWORD PTR[((-20))+rdi],edx
  285. mov rax,rbx
  286. shl rax,12
  287. or rax,r8
  288. and eax,03ffffffh
  289. mov DWORD PTR[((-4))+rdi],eax
  290. lea eax,DWORD PTR[rax*4+rax]
  291. mov r8,rbx
  292. mov DWORD PTR[12+rdi],eax
  293. mov edx,03ffffffh
  294. shr r8,14
  295. and edx,r8d
  296. mov DWORD PTR[28+rdi],edx
  297. lea edx,DWORD PTR[rdx*4+rdx]
  298. shr r8,26
  299. mov DWORD PTR[44+rdi],edx
  300. mov rax,rbp
  301. shl rax,24
  302. or r8,rax
  303. mov DWORD PTR[60+rdi],r8d
  304. lea r8,QWORD PTR[r8*4+r8]
  305. mov DWORD PTR[76+rdi],r8d
  306. mov rax,r12
  307. call __poly1305_block
  308. mov eax,03ffffffh
  309. mov r8,r14
  310. and eax,r14d
  311. shr r8,26
  312. mov DWORD PTR[((-56))+rdi],eax
  313. mov edx,03ffffffh
  314. and edx,r8d
  315. mov DWORD PTR[((-40))+rdi],edx
  316. lea edx,DWORD PTR[rdx*4+rdx]
  317. shr r8,26
  318. mov DWORD PTR[((-24))+rdi],edx
  319. mov rax,rbx
  320. shl rax,12
  321. or rax,r8
  322. and eax,03ffffffh
  323. mov DWORD PTR[((-8))+rdi],eax
  324. lea eax,DWORD PTR[rax*4+rax]
  325. mov r8,rbx
  326. mov DWORD PTR[8+rdi],eax
  327. mov edx,03ffffffh
  328. shr r8,14
  329. and edx,r8d
  330. mov DWORD PTR[24+rdi],edx
  331. lea edx,DWORD PTR[rdx*4+rdx]
  332. shr r8,26
  333. mov DWORD PTR[40+rdi],edx
  334. mov rax,rbp
  335. shl rax,24
  336. or r8,rax
  337. mov DWORD PTR[56+rdi],r8d
  338. lea r8,QWORD PTR[r8*4+r8]
  339. mov DWORD PTR[72+rdi],r8d
  340. lea rdi,QWORD PTR[((-48-64))+rdi]
  341. DB 0F3h,0C3h ;repret
  342. __poly1305_init_avx ENDP
  343. ALIGN 32
  344. poly1305_blocks_avx PROC PRIVATE
  345. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  346. mov QWORD PTR[16+rsp],rsi
  347. mov rax,rsp
  348. $L$SEH_begin_poly1305_blocks_avx::
  349. mov rdi,rcx
  350. mov rsi,rdx
  351. mov rdx,r8
  352. mov rcx,r9
  353. mov r8d,DWORD PTR[20+rdi]
  354. cmp rdx,128
  355. jae $L$blocks_avx
  356. test r8d,r8d
  357. jz $L$blocks
  358. $L$blocks_avx::
  359. and rdx,-16
  360. jz $L$no_data_avx
  361. vzeroupper
  362. test r8d,r8d
  363. jz $L$base2_64_avx
  364. test rdx,31
  365. jz $L$even_avx
  366. push rbx
  367. push rbp
  368. push r12
  369. push r13
  370. push r14
  371. push r15
  372. $L$blocks_avx_body::
  373. mov r15,rdx
  374. mov r8,QWORD PTR[rdi]
  375. mov r9,QWORD PTR[8+rdi]
  376. mov ebp,DWORD PTR[16+rdi]
  377. mov r11,QWORD PTR[24+rdi]
  378. mov r13,QWORD PTR[32+rdi]
  379. mov r14d,r8d
  380. and r8,-2147483648
  381. mov r12,r9
  382. mov ebx,r9d
  383. and r9,-2147483648
  384. shr r8,6
  385. shl r12,52
  386. add r14,r8
  387. shr rbx,12
  388. shr r9,18
  389. add r14,r12
  390. adc rbx,r9
  391. mov r8,rbp
  392. shl r8,40
  393. shr rbp,24
  394. add rbx,r8
  395. adc rbp,0
  396. mov r9,-4
  397. mov r8,rbp
  398. and r9,rbp
  399. shr r8,2
  400. and rbp,3
  401. add r8,r9
  402. add r14,r8
  403. adc rbx,0
  404. adc rbp,0
  405. mov r12,r13
  406. mov rax,r13
  407. shr r13,2
  408. add r13,r12
  409. add r14,QWORD PTR[rsi]
  410. adc rbx,QWORD PTR[8+rsi]
  411. lea rsi,QWORD PTR[16+rsi]
  412. adc rbp,rcx
  413. call __poly1305_block
  414. test rcx,rcx
  415. jz $L$store_base2_64_avx
  416. mov rax,r14
  417. mov rdx,r14
  418. shr r14,52
  419. mov r11,rbx
  420. mov r12,rbx
  421. shr rdx,26
  422. and rax,03ffffffh
  423. shl r11,12
  424. and rdx,03ffffffh
  425. shr rbx,14
  426. or r14,r11
  427. shl rbp,24
  428. and r14,03ffffffh
  429. shr r12,40
  430. and rbx,03ffffffh
  431. or rbp,r12
  432. sub r15,16
  433. jz $L$store_base2_26_avx
  434. vmovd xmm0,eax
  435. vmovd xmm1,edx
  436. vmovd xmm2,r14d
  437. vmovd xmm3,ebx
  438. vmovd xmm4,ebp
  439. jmp $L$proceed_avx
  440. ALIGN 32
  441. $L$store_base2_64_avx::
  442. mov QWORD PTR[rdi],r14
  443. mov QWORD PTR[8+rdi],rbx
  444. mov QWORD PTR[16+rdi],rbp
  445. jmp $L$done_avx
  446. ALIGN 16
  447. $L$store_base2_26_avx::
  448. mov DWORD PTR[rdi],eax
  449. mov DWORD PTR[4+rdi],edx
  450. mov DWORD PTR[8+rdi],r14d
  451. mov DWORD PTR[12+rdi],ebx
  452. mov DWORD PTR[16+rdi],ebp
  453. ALIGN 16
  454. $L$done_avx::
  455. mov r15,QWORD PTR[rsp]
  456. mov r14,QWORD PTR[8+rsp]
  457. mov r13,QWORD PTR[16+rsp]
  458. mov r12,QWORD PTR[24+rsp]
  459. mov rbp,QWORD PTR[32+rsp]
  460. mov rbx,QWORD PTR[40+rsp]
  461. lea rsp,QWORD PTR[48+rsp]
  462. $L$no_data_avx::
  463. $L$blocks_avx_epilogue::
  464. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  465. mov rsi,QWORD PTR[16+rsp]
  466. DB 0F3h,0C3h ;repret
  467. ALIGN 32
  468. $L$base2_64_avx::
  469. push rbx
  470. push rbp
  471. push r12
  472. push r13
  473. push r14
  474. push r15
  475. $L$base2_64_avx_body::
  476. mov r15,rdx
  477. mov r11,QWORD PTR[24+rdi]
  478. mov r13,QWORD PTR[32+rdi]
  479. mov r14,QWORD PTR[rdi]
  480. mov rbx,QWORD PTR[8+rdi]
  481. mov ebp,DWORD PTR[16+rdi]
  482. mov r12,r13
  483. mov rax,r13
  484. shr r13,2
  485. add r13,r12
  486. test rdx,31
  487. jz $L$init_avx
  488. add r14,QWORD PTR[rsi]
  489. adc rbx,QWORD PTR[8+rsi]
  490. lea rsi,QWORD PTR[16+rsi]
  491. adc rbp,rcx
  492. sub r15,16
  493. call __poly1305_block
  494. $L$init_avx::
  495. mov rax,r14
  496. mov rdx,r14
  497. shr r14,52
  498. mov r8,rbx
  499. mov r9,rbx
  500. shr rdx,26
  501. and rax,03ffffffh
  502. shl r8,12
  503. and rdx,03ffffffh
  504. shr rbx,14
  505. or r14,r8
  506. shl rbp,24
  507. and r14,03ffffffh
  508. shr r9,40
  509. and rbx,03ffffffh
  510. or rbp,r9
  511. vmovd xmm0,eax
  512. vmovd xmm1,edx
  513. vmovd xmm2,r14d
  514. vmovd xmm3,ebx
  515. vmovd xmm4,ebp
  516. mov DWORD PTR[20+rdi],1
  517. call __poly1305_init_avx
  518. $L$proceed_avx::
  519. mov rdx,r15
  520. mov r15,QWORD PTR[rsp]
  521. mov r14,QWORD PTR[8+rsp]
  522. mov r13,QWORD PTR[16+rsp]
  523. mov r12,QWORD PTR[24+rsp]
  524. mov rbp,QWORD PTR[32+rsp]
  525. mov rbx,QWORD PTR[40+rsp]
  526. lea rax,QWORD PTR[48+rsp]
  527. lea rsp,QWORD PTR[48+rsp]
  528. $L$base2_64_avx_epilogue::
  529. jmp $L$do_avx
  530. ALIGN 32
  531. $L$even_avx::
  532. vmovd xmm0,DWORD PTR[rdi]
  533. vmovd xmm1,DWORD PTR[4+rdi]
  534. vmovd xmm2,DWORD PTR[8+rdi]
  535. vmovd xmm3,DWORD PTR[12+rdi]
  536. vmovd xmm4,DWORD PTR[16+rdi]
  537. $L$do_avx::
  538. lea r11,QWORD PTR[((-248))+rsp]
  539. sub rsp,0218h
  540. vmovdqa XMMWORD PTR[80+r11],xmm6
  541. vmovdqa XMMWORD PTR[96+r11],xmm7
  542. vmovdqa XMMWORD PTR[112+r11],xmm8
  543. vmovdqa XMMWORD PTR[128+r11],xmm9
  544. vmovdqa XMMWORD PTR[144+r11],xmm10
  545. vmovdqa XMMWORD PTR[160+r11],xmm11
  546. vmovdqa XMMWORD PTR[176+r11],xmm12
  547. vmovdqa XMMWORD PTR[192+r11],xmm13
  548. vmovdqa XMMWORD PTR[208+r11],xmm14
  549. vmovdqa XMMWORD PTR[224+r11],xmm15
  550. $L$do_avx_body::
  551. sub rdx,64
  552. lea rax,QWORD PTR[((-32))+rsi]
  553. cmovc rsi,rax
  554. vmovdqu xmm14,XMMWORD PTR[48+rdi]
  555. lea rdi,QWORD PTR[112+rdi]
  556. lea rcx,QWORD PTR[$L$const]
  557. vmovdqu xmm5,XMMWORD PTR[32+rsi]
  558. vmovdqu xmm6,XMMWORD PTR[48+rsi]
  559. vmovdqa xmm15,XMMWORD PTR[64+rcx]
  560. vpsrldq xmm7,xmm5,6
  561. vpsrldq xmm8,xmm6,6
  562. vpunpckhqdq xmm9,xmm5,xmm6
  563. vpunpcklqdq xmm5,xmm5,xmm6
  564. vpunpcklqdq xmm8,xmm7,xmm8
  565. vpsrlq xmm9,xmm9,40
  566. vpsrlq xmm6,xmm5,26
  567. vpand xmm5,xmm5,xmm15
  568. vpsrlq xmm7,xmm8,4
  569. vpand xmm6,xmm6,xmm15
  570. vpsrlq xmm8,xmm8,30
  571. vpand xmm7,xmm7,xmm15
  572. vpand xmm8,xmm8,xmm15
  573. vpor xmm9,xmm9,XMMWORD PTR[32+rcx]
  574. jbe $L$skip_loop_avx
  575. vmovdqu xmm11,XMMWORD PTR[((-48))+rdi]
  576. vmovdqu xmm12,XMMWORD PTR[((-32))+rdi]
  577. vpshufd xmm13,xmm14,0EEh
  578. vpshufd xmm10,xmm14,044h
  579. vmovdqa XMMWORD PTR[(-144)+r11],xmm13
  580. vmovdqa XMMWORD PTR[rsp],xmm10
  581. vpshufd xmm14,xmm11,0EEh
  582. vmovdqu xmm10,XMMWORD PTR[((-16))+rdi]
  583. vpshufd xmm11,xmm11,044h
  584. vmovdqa XMMWORD PTR[(-128)+r11],xmm14
  585. vmovdqa XMMWORD PTR[16+rsp],xmm11
  586. vpshufd xmm13,xmm12,0EEh
  587. vmovdqu xmm11,XMMWORD PTR[rdi]
  588. vpshufd xmm12,xmm12,044h
  589. vmovdqa XMMWORD PTR[(-112)+r11],xmm13
  590. vmovdqa XMMWORD PTR[32+rsp],xmm12
  591. vpshufd xmm14,xmm10,0EEh
  592. vmovdqu xmm12,XMMWORD PTR[16+rdi]
  593. vpshufd xmm10,xmm10,044h
  594. vmovdqa XMMWORD PTR[(-96)+r11],xmm14
  595. vmovdqa XMMWORD PTR[48+rsp],xmm10
  596. vpshufd xmm13,xmm11,0EEh
  597. vmovdqu xmm10,XMMWORD PTR[32+rdi]
  598. vpshufd xmm11,xmm11,044h
  599. vmovdqa XMMWORD PTR[(-80)+r11],xmm13
  600. vmovdqa XMMWORD PTR[64+rsp],xmm11
  601. vpshufd xmm14,xmm12,0EEh
  602. vmovdqu xmm11,XMMWORD PTR[48+rdi]
  603. vpshufd xmm12,xmm12,044h
  604. vmovdqa XMMWORD PTR[(-64)+r11],xmm14
  605. vmovdqa XMMWORD PTR[80+rsp],xmm12
  606. vpshufd xmm13,xmm10,0EEh
  607. vmovdqu xmm12,XMMWORD PTR[64+rdi]
  608. vpshufd xmm10,xmm10,044h
  609. vmovdqa XMMWORD PTR[(-48)+r11],xmm13
  610. vmovdqa XMMWORD PTR[96+rsp],xmm10
  611. vpshufd xmm14,xmm11,0EEh
  612. vpshufd xmm11,xmm11,044h
  613. vmovdqa XMMWORD PTR[(-32)+r11],xmm14
  614. vmovdqa XMMWORD PTR[112+rsp],xmm11
  615. vpshufd xmm13,xmm12,0EEh
  616. vmovdqa xmm14,XMMWORD PTR[rsp]
  617. vpshufd xmm12,xmm12,044h
  618. vmovdqa XMMWORD PTR[(-16)+r11],xmm13
  619. vmovdqa XMMWORD PTR[128+rsp],xmm12
  620. jmp $L$oop_avx
  621. ALIGN 32
  622. $L$oop_avx::
  623. vpmuludq xmm10,xmm14,xmm5
  624. vpmuludq xmm11,xmm14,xmm6
  625. vmovdqa XMMWORD PTR[32+r11],xmm2
  626. vpmuludq xmm12,xmm14,xmm7
  627. vmovdqa xmm2,XMMWORD PTR[16+rsp]
  628. vpmuludq xmm13,xmm14,xmm8
  629. vpmuludq xmm14,xmm14,xmm9
  630. vmovdqa XMMWORD PTR[r11],xmm0
  631. vpmuludq xmm0,xmm9,XMMWORD PTR[32+rsp]
  632. vmovdqa XMMWORD PTR[16+r11],xmm1
  633. vpmuludq xmm1,xmm2,xmm8
  634. vpaddq xmm10,xmm10,xmm0
  635. vpaddq xmm14,xmm14,xmm1
  636. vmovdqa XMMWORD PTR[48+r11],xmm3
  637. vpmuludq xmm0,xmm2,xmm7
  638. vpmuludq xmm1,xmm2,xmm6
  639. vpaddq xmm13,xmm13,xmm0
  640. vmovdqa xmm3,XMMWORD PTR[48+rsp]
  641. vpaddq xmm12,xmm12,xmm1
  642. vmovdqa XMMWORD PTR[64+r11],xmm4
  643. vpmuludq xmm2,xmm2,xmm5
  644. vpmuludq xmm0,xmm3,xmm7
  645. vpaddq xmm11,xmm11,xmm2
  646. vmovdqa xmm4,XMMWORD PTR[64+rsp]
  647. vpaddq xmm14,xmm14,xmm0
  648. vpmuludq xmm1,xmm3,xmm6
  649. vpmuludq xmm3,xmm3,xmm5
  650. vpaddq xmm13,xmm13,xmm1
  651. vmovdqa xmm2,XMMWORD PTR[80+rsp]
  652. vpaddq xmm12,xmm12,xmm3
  653. vpmuludq xmm0,xmm4,xmm9
  654. vpmuludq xmm4,xmm4,xmm8
  655. vpaddq xmm11,xmm11,xmm0
  656. vmovdqa xmm3,XMMWORD PTR[96+rsp]
  657. vpaddq xmm10,xmm10,xmm4
  658. vmovdqa xmm4,XMMWORD PTR[128+rsp]
  659. vpmuludq xmm1,xmm2,xmm6
  660. vpmuludq xmm2,xmm2,xmm5
  661. vpaddq xmm14,xmm14,xmm1
  662. vpaddq xmm13,xmm13,xmm2
  663. vpmuludq xmm0,xmm3,xmm9
  664. vpmuludq xmm1,xmm3,xmm8
  665. vpaddq xmm12,xmm12,xmm0
  666. vmovdqu xmm0,XMMWORD PTR[rsi]
  667. vpaddq xmm11,xmm11,xmm1
  668. vpmuludq xmm3,xmm3,xmm7
  669. vpmuludq xmm7,xmm4,xmm7
  670. vpaddq xmm10,xmm10,xmm3
  671. vmovdqu xmm1,XMMWORD PTR[16+rsi]
  672. vpaddq xmm11,xmm11,xmm7
  673. vpmuludq xmm8,xmm4,xmm8
  674. vpmuludq xmm9,xmm4,xmm9
  675. vpsrldq xmm2,xmm0,6
  676. vpaddq xmm12,xmm12,xmm8
  677. vpaddq xmm13,xmm13,xmm9
  678. vpsrldq xmm3,xmm1,6
  679. vpmuludq xmm9,xmm5,XMMWORD PTR[112+rsp]
  680. vpmuludq xmm5,xmm4,xmm6
  681. vpunpckhqdq xmm4,xmm0,xmm1
  682. vpaddq xmm14,xmm14,xmm9
  683. vmovdqa xmm9,XMMWORD PTR[((-144))+r11]
  684. vpaddq xmm10,xmm10,xmm5
  685. vpunpcklqdq xmm0,xmm0,xmm1
  686. vpunpcklqdq xmm3,xmm2,xmm3
  687. vpsrldq xmm4,xmm4,5
  688. vpsrlq xmm1,xmm0,26
  689. vpand xmm0,xmm0,xmm15
  690. vpsrlq xmm2,xmm3,4
  691. vpand xmm1,xmm1,xmm15
  692. vpand xmm4,xmm4,XMMWORD PTR[rcx]
  693. vpsrlq xmm3,xmm3,30
  694. vpand xmm2,xmm2,xmm15
  695. vpand xmm3,xmm3,xmm15
  696. vpor xmm4,xmm4,XMMWORD PTR[32+rcx]
  697. vpaddq xmm0,xmm0,XMMWORD PTR[r11]
  698. vpaddq xmm1,xmm1,XMMWORD PTR[16+r11]
  699. vpaddq xmm2,xmm2,XMMWORD PTR[32+r11]
  700. vpaddq xmm3,xmm3,XMMWORD PTR[48+r11]
  701. vpaddq xmm4,xmm4,XMMWORD PTR[64+r11]
  702. lea rax,QWORD PTR[32+rsi]
  703. lea rsi,QWORD PTR[64+rsi]
  704. sub rdx,64
  705. cmovc rsi,rax
  706. vpmuludq xmm5,xmm9,xmm0
  707. vpmuludq xmm6,xmm9,xmm1
  708. vpaddq xmm10,xmm10,xmm5
  709. vpaddq xmm11,xmm11,xmm6
  710. vmovdqa xmm7,XMMWORD PTR[((-128))+r11]
  711. vpmuludq xmm5,xmm9,xmm2
  712. vpmuludq xmm6,xmm9,xmm3
  713. vpaddq xmm12,xmm12,xmm5
  714. vpaddq xmm13,xmm13,xmm6
  715. vpmuludq xmm9,xmm9,xmm4
  716. vpmuludq xmm5,xmm4,XMMWORD PTR[((-112))+r11]
  717. vpaddq xmm14,xmm14,xmm9
  718. vpaddq xmm10,xmm10,xmm5
  719. vpmuludq xmm6,xmm7,xmm2
  720. vpmuludq xmm5,xmm7,xmm3
  721. vpaddq xmm13,xmm13,xmm6
  722. vmovdqa xmm8,XMMWORD PTR[((-96))+r11]
  723. vpaddq xmm14,xmm14,xmm5
  724. vpmuludq xmm6,xmm7,xmm1
  725. vpmuludq xmm7,xmm7,xmm0
  726. vpaddq xmm12,xmm12,xmm6
  727. vpaddq xmm11,xmm11,xmm7
  728. vmovdqa xmm9,XMMWORD PTR[((-80))+r11]
  729. vpmuludq xmm5,xmm8,xmm2
  730. vpmuludq xmm6,xmm8,xmm1
  731. vpaddq xmm14,xmm14,xmm5
  732. vpaddq xmm13,xmm13,xmm6
  733. vmovdqa xmm7,XMMWORD PTR[((-64))+r11]
  734. vpmuludq xmm8,xmm8,xmm0
  735. vpmuludq xmm5,xmm9,xmm4
  736. vpaddq xmm12,xmm12,xmm8
  737. vpaddq xmm11,xmm11,xmm5
  738. vmovdqa xmm8,XMMWORD PTR[((-48))+r11]
  739. vpmuludq xmm9,xmm9,xmm3
  740. vpmuludq xmm6,xmm7,xmm1
  741. vpaddq xmm10,xmm10,xmm9
  742. vmovdqa xmm9,XMMWORD PTR[((-16))+r11]
  743. vpaddq xmm14,xmm14,xmm6
  744. vpmuludq xmm7,xmm7,xmm0
  745. vpmuludq xmm5,xmm8,xmm4
  746. vpaddq xmm13,xmm13,xmm7
  747. vpaddq xmm12,xmm12,xmm5
  748. vmovdqu xmm5,XMMWORD PTR[32+rsi]
  749. vpmuludq xmm7,xmm8,xmm3
  750. vpmuludq xmm8,xmm8,xmm2
  751. vpaddq xmm11,xmm11,xmm7
  752. vmovdqu xmm6,XMMWORD PTR[48+rsi]
  753. vpaddq xmm10,xmm10,xmm8
  754. vpmuludq xmm2,xmm9,xmm2
  755. vpmuludq xmm3,xmm9,xmm3
  756. vpsrldq xmm7,xmm5,6
  757. vpaddq xmm11,xmm11,xmm2
  758. vpmuludq xmm4,xmm9,xmm4
  759. vpsrldq xmm8,xmm6,6
  760. vpaddq xmm2,xmm12,xmm3
  761. vpaddq xmm3,xmm13,xmm4
  762. vpmuludq xmm4,xmm0,XMMWORD PTR[((-32))+r11]
  763. vpmuludq xmm0,xmm9,xmm1
  764. vpunpckhqdq xmm9,xmm5,xmm6
  765. vpaddq xmm4,xmm14,xmm4
  766. vpaddq xmm0,xmm10,xmm0
  767. vpunpcklqdq xmm5,xmm5,xmm6
  768. vpunpcklqdq xmm8,xmm7,xmm8
  769. vpsrldq xmm9,xmm9,5
  770. vpsrlq xmm6,xmm5,26
  771. vmovdqa xmm14,XMMWORD PTR[rsp]
  772. vpand xmm5,xmm5,xmm15
  773. vpsrlq xmm7,xmm8,4
  774. vpand xmm6,xmm6,xmm15
  775. vpand xmm9,xmm9,XMMWORD PTR[rcx]
  776. vpsrlq xmm8,xmm8,30
  777. vpand xmm7,xmm7,xmm15
  778. vpand xmm8,xmm8,xmm15
  779. vpor xmm9,xmm9,XMMWORD PTR[32+rcx]
  780. vpsrlq xmm13,xmm3,26
  781. vpand xmm3,xmm3,xmm15
  782. vpaddq xmm4,xmm4,xmm13
  783. vpsrlq xmm10,xmm0,26
  784. vpand xmm0,xmm0,xmm15
  785. vpaddq xmm1,xmm11,xmm10
  786. vpsrlq xmm10,xmm4,26
  787. vpand xmm4,xmm4,xmm15
  788. vpsrlq xmm11,xmm1,26
  789. vpand xmm1,xmm1,xmm15
  790. vpaddq xmm2,xmm2,xmm11
  791. vpaddq xmm0,xmm0,xmm10
  792. vpsllq xmm10,xmm10,2
  793. vpaddq xmm0,xmm0,xmm10
  794. vpsrlq xmm12,xmm2,26
  795. vpand xmm2,xmm2,xmm15
  796. vpaddq xmm3,xmm3,xmm12
  797. vpsrlq xmm10,xmm0,26
  798. vpand xmm0,xmm0,xmm15
  799. vpaddq xmm1,xmm1,xmm10
  800. vpsrlq xmm13,xmm3,26
  801. vpand xmm3,xmm3,xmm15
  802. vpaddq xmm4,xmm4,xmm13
  803. ja $L$oop_avx
  804. $L$skip_loop_avx::
  805. vpshufd xmm14,xmm14,010h
  806. add rdx,32
  807. jnz $L$ong_tail_avx
  808. vpaddq xmm7,xmm7,xmm2
  809. vpaddq xmm5,xmm5,xmm0
  810. vpaddq xmm6,xmm6,xmm1
  811. vpaddq xmm8,xmm8,xmm3
  812. vpaddq xmm9,xmm9,xmm4
  813. $L$ong_tail_avx::
  814. vmovdqa XMMWORD PTR[32+r11],xmm2
  815. vmovdqa XMMWORD PTR[r11],xmm0
  816. vmovdqa XMMWORD PTR[16+r11],xmm1
  817. vmovdqa XMMWORD PTR[48+r11],xmm3
  818. vmovdqa XMMWORD PTR[64+r11],xmm4
  819. vpmuludq xmm12,xmm14,xmm7
  820. vpmuludq xmm10,xmm14,xmm5
  821. vpshufd xmm2,XMMWORD PTR[((-48))+rdi],010h
  822. vpmuludq xmm11,xmm14,xmm6
  823. vpmuludq xmm13,xmm14,xmm8
  824. vpmuludq xmm14,xmm14,xmm9
  825. vpmuludq xmm0,xmm2,xmm8
  826. vpaddq xmm14,xmm14,xmm0
  827. vpshufd xmm3,XMMWORD PTR[((-32))+rdi],010h
  828. vpmuludq xmm1,xmm2,xmm7
  829. vpaddq xmm13,xmm13,xmm1
  830. vpshufd xmm4,XMMWORD PTR[((-16))+rdi],010h
  831. vpmuludq xmm0,xmm2,xmm6
  832. vpaddq xmm12,xmm12,xmm0
  833. vpmuludq xmm2,xmm2,xmm5
  834. vpaddq xmm11,xmm11,xmm2
  835. vpmuludq xmm3,xmm3,xmm9
  836. vpaddq xmm10,xmm10,xmm3
  837. vpshufd xmm2,XMMWORD PTR[rdi],010h
  838. vpmuludq xmm1,xmm4,xmm7
  839. vpaddq xmm14,xmm14,xmm1
  840. vpmuludq xmm0,xmm4,xmm6
  841. vpaddq xmm13,xmm13,xmm0
  842. vpshufd xmm3,XMMWORD PTR[16+rdi],010h
  843. vpmuludq xmm4,xmm4,xmm5
  844. vpaddq xmm12,xmm12,xmm4
  845. vpmuludq xmm1,xmm2,xmm9
  846. vpaddq xmm11,xmm11,xmm1
  847. vpshufd xmm4,XMMWORD PTR[32+rdi],010h
  848. vpmuludq xmm2,xmm2,xmm8
  849. vpaddq xmm10,xmm10,xmm2
  850. vpmuludq xmm0,xmm3,xmm6
  851. vpaddq xmm14,xmm14,xmm0
  852. vpmuludq xmm3,xmm3,xmm5
  853. vpaddq xmm13,xmm13,xmm3
  854. vpshufd xmm2,XMMWORD PTR[48+rdi],010h
  855. vpmuludq xmm1,xmm4,xmm9
  856. vpaddq xmm12,xmm12,xmm1
  857. vpshufd xmm3,XMMWORD PTR[64+rdi],010h
  858. vpmuludq xmm0,xmm4,xmm8
  859. vpaddq xmm11,xmm11,xmm0
  860. vpmuludq xmm4,xmm4,xmm7
  861. vpaddq xmm10,xmm10,xmm4
  862. vpmuludq xmm2,xmm2,xmm5
  863. vpaddq xmm14,xmm14,xmm2
  864. vpmuludq xmm1,xmm3,xmm9
  865. vpaddq xmm13,xmm13,xmm1
  866. vpmuludq xmm0,xmm3,xmm8
  867. vpaddq xmm12,xmm12,xmm0
  868. vpmuludq xmm1,xmm3,xmm7
  869. vpaddq xmm11,xmm11,xmm1
  870. vpmuludq xmm3,xmm3,xmm6
  871. vpaddq xmm10,xmm10,xmm3
  872. jz $L$short_tail_avx
  873. vmovdqu xmm0,XMMWORD PTR[rsi]
  874. vmovdqu xmm1,XMMWORD PTR[16+rsi]
  875. vpsrldq xmm2,xmm0,6
  876. vpsrldq xmm3,xmm1,6
  877. vpunpckhqdq xmm4,xmm0,xmm1
  878. vpunpcklqdq xmm0,xmm0,xmm1
  879. vpunpcklqdq xmm3,xmm2,xmm3
  880. vpsrlq xmm4,xmm4,40
  881. vpsrlq xmm1,xmm0,26
  882. vpand xmm0,xmm0,xmm15
  883. vpsrlq xmm2,xmm3,4
  884. vpand xmm1,xmm1,xmm15
  885. vpsrlq xmm3,xmm3,30
  886. vpand xmm2,xmm2,xmm15
  887. vpand xmm3,xmm3,xmm15
  888. vpor xmm4,xmm4,XMMWORD PTR[32+rcx]
  889. vpshufd xmm9,XMMWORD PTR[((-64))+rdi],032h
  890. vpaddq xmm0,xmm0,XMMWORD PTR[r11]
  891. vpaddq xmm1,xmm1,XMMWORD PTR[16+r11]
  892. vpaddq xmm2,xmm2,XMMWORD PTR[32+r11]
  893. vpaddq xmm3,xmm3,XMMWORD PTR[48+r11]
  894. vpaddq xmm4,xmm4,XMMWORD PTR[64+r11]
  895. vpmuludq xmm5,xmm9,xmm0
  896. vpaddq xmm10,xmm10,xmm5
  897. vpmuludq xmm6,xmm9,xmm1
  898. vpaddq xmm11,xmm11,xmm6
  899. vpmuludq xmm5,xmm9,xmm2
  900. vpaddq xmm12,xmm12,xmm5
  901. vpshufd xmm7,XMMWORD PTR[((-48))+rdi],032h
  902. vpmuludq xmm6,xmm9,xmm3
  903. vpaddq xmm13,xmm13,xmm6
  904. vpmuludq xmm9,xmm9,xmm4
  905. vpaddq xmm14,xmm14,xmm9
  906. vpmuludq xmm5,xmm7,xmm3
  907. vpaddq xmm14,xmm14,xmm5
  908. vpshufd xmm8,XMMWORD PTR[((-32))+rdi],032h
  909. vpmuludq xmm6,xmm7,xmm2
  910. vpaddq xmm13,xmm13,xmm6
  911. vpshufd xmm9,XMMWORD PTR[((-16))+rdi],032h
  912. vpmuludq xmm5,xmm7,xmm1
  913. vpaddq xmm12,xmm12,xmm5
  914. vpmuludq xmm7,xmm7,xmm0
  915. vpaddq xmm11,xmm11,xmm7
  916. vpmuludq xmm8,xmm8,xmm4
  917. vpaddq xmm10,xmm10,xmm8
  918. vpshufd xmm7,XMMWORD PTR[rdi],032h
  919. vpmuludq xmm6,xmm9,xmm2
  920. vpaddq xmm14,xmm14,xmm6
  921. vpmuludq xmm5,xmm9,xmm1
  922. vpaddq xmm13,xmm13,xmm5
  923. vpshufd xmm8,XMMWORD PTR[16+rdi],032h
  924. vpmuludq xmm9,xmm9,xmm0
  925. vpaddq xmm12,xmm12,xmm9
  926. vpmuludq xmm6,xmm7,xmm4
  927. vpaddq xmm11,xmm11,xmm6
  928. vpshufd xmm9,XMMWORD PTR[32+rdi],032h
  929. vpmuludq xmm7,xmm7,xmm3
  930. vpaddq xmm10,xmm10,xmm7
  931. vpmuludq xmm5,xmm8,xmm1
  932. vpaddq xmm14,xmm14,xmm5
  933. vpmuludq xmm8,xmm8,xmm0
  934. vpaddq xmm13,xmm13,xmm8
  935. vpshufd xmm7,XMMWORD PTR[48+rdi],032h
  936. vpmuludq xmm6,xmm9,xmm4
  937. vpaddq xmm12,xmm12,xmm6
  938. vpshufd xmm8,XMMWORD PTR[64+rdi],032h
  939. vpmuludq xmm5,xmm9,xmm3
  940. vpaddq xmm11,xmm11,xmm5
  941. vpmuludq xmm9,xmm9,xmm2
  942. vpaddq xmm10,xmm10,xmm9
  943. vpmuludq xmm7,xmm7,xmm0
  944. vpaddq xmm14,xmm14,xmm7
  945. vpmuludq xmm6,xmm8,xmm4
  946. vpaddq xmm13,xmm13,xmm6
  947. vpmuludq xmm5,xmm8,xmm3
  948. vpaddq xmm12,xmm12,xmm5
  949. vpmuludq xmm6,xmm8,xmm2
  950. vpaddq xmm11,xmm11,xmm6
  951. vpmuludq xmm8,xmm8,xmm1
  952. vpaddq xmm10,xmm10,xmm8
  953. $L$short_tail_avx::
  954. vpsrldq xmm9,xmm14,8
  955. vpsrldq xmm8,xmm13,8
  956. vpsrldq xmm6,xmm11,8
  957. vpsrldq xmm5,xmm10,8
  958. vpsrldq xmm7,xmm12,8
  959. vpaddq xmm13,xmm13,xmm8
  960. vpaddq xmm14,xmm14,xmm9
  961. vpaddq xmm10,xmm10,xmm5
  962. vpaddq xmm11,xmm11,xmm6
  963. vpaddq xmm12,xmm12,xmm7
  964. vpsrlq xmm3,xmm13,26
  965. vpand xmm13,xmm13,xmm15
  966. vpaddq xmm14,xmm14,xmm3
  967. vpsrlq xmm0,xmm10,26
  968. vpand xmm10,xmm10,xmm15
  969. vpaddq xmm11,xmm11,xmm0
  970. vpsrlq xmm4,xmm14,26
  971. vpand xmm14,xmm14,xmm15
  972. vpsrlq xmm1,xmm11,26
  973. vpand xmm11,xmm11,xmm15
  974. vpaddq xmm12,xmm12,xmm1
  975. vpaddq xmm10,xmm10,xmm4
  976. vpsllq xmm4,xmm4,2
  977. vpaddq xmm10,xmm10,xmm4
  978. vpsrlq xmm2,xmm12,26
  979. vpand xmm12,xmm12,xmm15
  980. vpaddq xmm13,xmm13,xmm2
  981. vpsrlq xmm0,xmm10,26
  982. vpand xmm10,xmm10,xmm15
  983. vpaddq xmm11,xmm11,xmm0
  984. vpsrlq xmm3,xmm13,26
  985. vpand xmm13,xmm13,xmm15
  986. vpaddq xmm14,xmm14,xmm3
  987. vmovd DWORD PTR[(-112)+rdi],xmm10
  988. vmovd DWORD PTR[(-108)+rdi],xmm11
  989. vmovd DWORD PTR[(-104)+rdi],xmm12
  990. vmovd DWORD PTR[(-100)+rdi],xmm13
  991. vmovd DWORD PTR[(-96)+rdi],xmm14
  992. vmovdqa xmm6,XMMWORD PTR[80+r11]
  993. vmovdqa xmm7,XMMWORD PTR[96+r11]
  994. vmovdqa xmm8,XMMWORD PTR[112+r11]
  995. vmovdqa xmm9,XMMWORD PTR[128+r11]
  996. vmovdqa xmm10,XMMWORD PTR[144+r11]
  997. vmovdqa xmm11,XMMWORD PTR[160+r11]
  998. vmovdqa xmm12,XMMWORD PTR[176+r11]
  999. vmovdqa xmm13,XMMWORD PTR[192+r11]
  1000. vmovdqa xmm14,XMMWORD PTR[208+r11]
  1001. vmovdqa xmm15,XMMWORD PTR[224+r11]
  1002. lea rsp,QWORD PTR[248+r11]
  1003. $L$do_avx_epilogue::
  1004. vzeroupper
  1005. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1006. mov rsi,QWORD PTR[16+rsp]
  1007. DB 0F3h,0C3h ;repret
  1008. $L$SEH_end_poly1305_blocks_avx::
  1009. poly1305_blocks_avx ENDP
  1010. ALIGN 32
  1011. poly1305_emit_avx PROC PRIVATE
  1012. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  1013. mov QWORD PTR[16+rsp],rsi
  1014. mov rax,rsp
  1015. $L$SEH_begin_poly1305_emit_avx::
  1016. mov rdi,rcx
  1017. mov rsi,rdx
  1018. mov rdx,r8
  1019. cmp DWORD PTR[20+rdi],0
  1020. je $L$emit
  1021. mov eax,DWORD PTR[rdi]
  1022. mov ecx,DWORD PTR[4+rdi]
  1023. mov r8d,DWORD PTR[8+rdi]
  1024. mov r11d,DWORD PTR[12+rdi]
  1025. mov r10d,DWORD PTR[16+rdi]
  1026. shl rcx,26
  1027. mov r9,r8
  1028. shl r8,52
  1029. add rax,rcx
  1030. shr r9,12
  1031. add r8,rax
  1032. adc r9,0
  1033. shl r11,14
  1034. mov rax,r10
  1035. shr r10,24
  1036. add r9,r11
  1037. shl rax,40
  1038. add r9,rax
  1039. adc r10,0
  1040. mov rax,r10
  1041. mov rcx,r10
  1042. and r10,3
  1043. shr rax,2
  1044. and rcx,-4
  1045. add rax,rcx
  1046. add r8,rax
  1047. adc r9,0
  1048. adc r10,0
  1049. mov rax,r8
  1050. add r8,5
  1051. mov rcx,r9
  1052. adc r9,0
  1053. adc r10,0
  1054. shr r10,2
  1055. cmovnz rax,r8
  1056. cmovnz rcx,r9
  1057. add rax,QWORD PTR[rdx]
  1058. adc rcx,QWORD PTR[8+rdx]
  1059. mov QWORD PTR[rsi],rax
  1060. mov QWORD PTR[8+rsi],rcx
  1061. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1062. mov rsi,QWORD PTR[16+rsp]
  1063. DB 0F3h,0C3h ;repret
  1064. $L$SEH_end_poly1305_emit_avx::
  1065. poly1305_emit_avx ENDP
  1066. ALIGN 32
  1067. poly1305_blocks_avx2 PROC PRIVATE
  1068. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  1069. mov QWORD PTR[16+rsp],rsi
  1070. mov rax,rsp
  1071. $L$SEH_begin_poly1305_blocks_avx2::
  1072. mov rdi,rcx
  1073. mov rsi,rdx
  1074. mov rdx,r8
  1075. mov rcx,r9
  1076. mov r8d,DWORD PTR[20+rdi]
  1077. cmp rdx,128
  1078. jae $L$blocks_avx2
  1079. test r8d,r8d
  1080. jz $L$blocks
  1081. $L$blocks_avx2::
  1082. and rdx,-16
  1083. jz $L$no_data_avx2
  1084. vzeroupper
  1085. test r8d,r8d
  1086. jz $L$base2_64_avx2
  1087. test rdx,63
  1088. jz $L$even_avx2
  1089. push rbx
  1090. push rbp
  1091. push r12
  1092. push r13
  1093. push r14
  1094. push r15
  1095. $L$blocks_avx2_body::
  1096. mov r15,rdx
  1097. mov r8,QWORD PTR[rdi]
  1098. mov r9,QWORD PTR[8+rdi]
  1099. mov ebp,DWORD PTR[16+rdi]
  1100. mov r11,QWORD PTR[24+rdi]
  1101. mov r13,QWORD PTR[32+rdi]
  1102. mov r14d,r8d
  1103. and r8,-2147483648
  1104. mov r12,r9
  1105. mov ebx,r9d
  1106. and r9,-2147483648
  1107. shr r8,6
  1108. shl r12,52
  1109. add r14,r8
  1110. shr rbx,12
  1111. shr r9,18
  1112. add r14,r12
  1113. adc rbx,r9
  1114. mov r8,rbp
  1115. shl r8,40
  1116. shr rbp,24
  1117. add rbx,r8
  1118. adc rbp,0
  1119. mov r9,-4
  1120. mov r8,rbp
  1121. and r9,rbp
  1122. shr r8,2
  1123. and rbp,3
  1124. add r8,r9
  1125. add r14,r8
  1126. adc rbx,0
  1127. adc rbp,0
  1128. mov r12,r13
  1129. mov rax,r13
  1130. shr r13,2
  1131. add r13,r12
  1132. $L$base2_26_pre_avx2::
  1133. add r14,QWORD PTR[rsi]
  1134. adc rbx,QWORD PTR[8+rsi]
  1135. lea rsi,QWORD PTR[16+rsi]
  1136. adc rbp,rcx
  1137. sub r15,16
  1138. call __poly1305_block
  1139. mov rax,r12
  1140. test r15,63
  1141. jnz $L$base2_26_pre_avx2
  1142. test rcx,rcx
  1143. jz $L$store_base2_64_avx2
  1144. mov rax,r14
  1145. mov rdx,r14
  1146. shr r14,52
  1147. mov r11,rbx
  1148. mov r12,rbx
  1149. shr rdx,26
  1150. and rax,03ffffffh
  1151. shl r11,12
  1152. and rdx,03ffffffh
  1153. shr rbx,14
  1154. or r14,r11
  1155. shl rbp,24
  1156. and r14,03ffffffh
  1157. shr r12,40
  1158. and rbx,03ffffffh
  1159. or rbp,r12
  1160. test r15,r15
  1161. jz $L$store_base2_26_avx2
  1162. vmovd xmm0,eax
  1163. vmovd xmm1,edx
  1164. vmovd xmm2,r14d
  1165. vmovd xmm3,ebx
  1166. vmovd xmm4,ebp
  1167. jmp $L$proceed_avx2
  1168. ALIGN 32
  1169. $L$store_base2_64_avx2::
  1170. mov QWORD PTR[rdi],r14
  1171. mov QWORD PTR[8+rdi],rbx
  1172. mov QWORD PTR[16+rdi],rbp
  1173. jmp $L$done_avx2
  1174. ALIGN 16
  1175. $L$store_base2_26_avx2::
  1176. mov DWORD PTR[rdi],eax
  1177. mov DWORD PTR[4+rdi],edx
  1178. mov DWORD PTR[8+rdi],r14d
  1179. mov DWORD PTR[12+rdi],ebx
  1180. mov DWORD PTR[16+rdi],ebp
  1181. ALIGN 16
  1182. $L$done_avx2::
  1183. mov r15,QWORD PTR[rsp]
  1184. mov r14,QWORD PTR[8+rsp]
  1185. mov r13,QWORD PTR[16+rsp]
  1186. mov r12,QWORD PTR[24+rsp]
  1187. mov rbp,QWORD PTR[32+rsp]
  1188. mov rbx,QWORD PTR[40+rsp]
  1189. lea rsp,QWORD PTR[48+rsp]
  1190. $L$no_data_avx2::
  1191. $L$blocks_avx2_epilogue::
  1192. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1193. mov rsi,QWORD PTR[16+rsp]
  1194. DB 0F3h,0C3h ;repret
  1195. ALIGN 32
  1196. $L$base2_64_avx2::
  1197. push rbx
  1198. push rbp
  1199. push r12
  1200. push r13
  1201. push r14
  1202. push r15
  1203. $L$base2_64_avx2_body::
  1204. mov r15,rdx
  1205. mov r11,QWORD PTR[24+rdi]
  1206. mov r13,QWORD PTR[32+rdi]
  1207. mov r14,QWORD PTR[rdi]
  1208. mov rbx,QWORD PTR[8+rdi]
  1209. mov ebp,DWORD PTR[16+rdi]
  1210. mov r12,r13
  1211. mov rax,r13
  1212. shr r13,2
  1213. add r13,r12
  1214. test rdx,63
  1215. jz $L$init_avx2
  1216. $L$base2_64_pre_avx2::
  1217. add r14,QWORD PTR[rsi]
  1218. adc rbx,QWORD PTR[8+rsi]
  1219. lea rsi,QWORD PTR[16+rsi]
  1220. adc rbp,rcx
  1221. sub r15,16
  1222. call __poly1305_block
  1223. mov rax,r12
  1224. test r15,63
  1225. jnz $L$base2_64_pre_avx2
  1226. $L$init_avx2::
  1227. mov rax,r14
  1228. mov rdx,r14
  1229. shr r14,52
  1230. mov r8,rbx
  1231. mov r9,rbx
  1232. shr rdx,26
  1233. and rax,03ffffffh
  1234. shl r8,12
  1235. and rdx,03ffffffh
  1236. shr rbx,14
  1237. or r14,r8
  1238. shl rbp,24
  1239. and r14,03ffffffh
  1240. shr r9,40
  1241. and rbx,03ffffffh
  1242. or rbp,r9
  1243. vmovd xmm0,eax
  1244. vmovd xmm1,edx
  1245. vmovd xmm2,r14d
  1246. vmovd xmm3,ebx
  1247. vmovd xmm4,ebp
  1248. mov DWORD PTR[20+rdi],1
  1249. call __poly1305_init_avx
  1250. $L$proceed_avx2::
  1251. mov rdx,r15
  1252. mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
  1253. mov r11d,3221291008
  1254. mov r15,QWORD PTR[rsp]
  1255. mov r14,QWORD PTR[8+rsp]
  1256. mov r13,QWORD PTR[16+rsp]
  1257. mov r12,QWORD PTR[24+rsp]
  1258. mov rbp,QWORD PTR[32+rsp]
  1259. mov rbx,QWORD PTR[40+rsp]
  1260. lea rax,QWORD PTR[48+rsp]
  1261. lea rsp,QWORD PTR[48+rsp]
  1262. $L$base2_64_avx2_epilogue::
  1263. jmp $L$do_avx2
  1264. ALIGN 32
  1265. $L$even_avx2::
  1266. mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
  1267. vmovd xmm0,DWORD PTR[rdi]
  1268. vmovd xmm1,DWORD PTR[4+rdi]
  1269. vmovd xmm2,DWORD PTR[8+rdi]
  1270. vmovd xmm3,DWORD PTR[12+rdi]
  1271. vmovd xmm4,DWORD PTR[16+rdi]
  1272. $L$do_avx2::
  1273. lea r11,QWORD PTR[((-248))+rsp]
  1274. sub rsp,01c8h
  1275. vmovdqa XMMWORD PTR[80+r11],xmm6
  1276. vmovdqa XMMWORD PTR[96+r11],xmm7
  1277. vmovdqa XMMWORD PTR[112+r11],xmm8
  1278. vmovdqa XMMWORD PTR[128+r11],xmm9
  1279. vmovdqa XMMWORD PTR[144+r11],xmm10
  1280. vmovdqa XMMWORD PTR[160+r11],xmm11
  1281. vmovdqa XMMWORD PTR[176+r11],xmm12
  1282. vmovdqa XMMWORD PTR[192+r11],xmm13
  1283. vmovdqa XMMWORD PTR[208+r11],xmm14
  1284. vmovdqa XMMWORD PTR[224+r11],xmm15
  1285. $L$do_avx2_body::
  1286. lea rcx,QWORD PTR[$L$const]
  1287. lea rdi,QWORD PTR[((48+64))+rdi]
  1288. vmovdqa ymm7,YMMWORD PTR[96+rcx]
  1289. vmovdqu xmm9,XMMWORD PTR[((-64))+rdi]
  1290. and rsp,-512
  1291. vmovdqu xmm10,XMMWORD PTR[((-48))+rdi]
  1292. vmovdqu xmm6,XMMWORD PTR[((-32))+rdi]
  1293. vmovdqu xmm11,XMMWORD PTR[((-16))+rdi]
  1294. vmovdqu xmm12,XMMWORD PTR[rdi]
  1295. vmovdqu xmm13,XMMWORD PTR[16+rdi]
  1296. lea rax,QWORD PTR[144+rsp]
  1297. vmovdqu xmm14,XMMWORD PTR[32+rdi]
  1298. vpermd ymm9,ymm7,ymm9
  1299. vmovdqu xmm15,XMMWORD PTR[48+rdi]
  1300. vpermd ymm10,ymm7,ymm10
  1301. vmovdqu xmm5,XMMWORD PTR[64+rdi]
  1302. vpermd ymm6,ymm7,ymm6
  1303. vmovdqa YMMWORD PTR[rsp],ymm9
  1304. vpermd ymm11,ymm7,ymm11
  1305. vmovdqa YMMWORD PTR[(32-144)+rax],ymm10
  1306. vpermd ymm12,ymm7,ymm12
  1307. vmovdqa YMMWORD PTR[(64-144)+rax],ymm6
  1308. vpermd ymm13,ymm7,ymm13
  1309. vmovdqa YMMWORD PTR[(96-144)+rax],ymm11
  1310. vpermd ymm14,ymm7,ymm14
  1311. vmovdqa YMMWORD PTR[(128-144)+rax],ymm12
  1312. vpermd ymm15,ymm7,ymm15
  1313. vmovdqa YMMWORD PTR[(160-144)+rax],ymm13
  1314. vpermd ymm5,ymm7,ymm5
  1315. vmovdqa YMMWORD PTR[(192-144)+rax],ymm14
  1316. vmovdqa YMMWORD PTR[(224-144)+rax],ymm15
  1317. vmovdqa YMMWORD PTR[(256-144)+rax],ymm5
  1318. vmovdqa ymm5,YMMWORD PTR[64+rcx]
  1319. vmovdqu xmm7,XMMWORD PTR[rsi]
  1320. vmovdqu xmm8,XMMWORD PTR[16+rsi]
  1321. vinserti128 ymm7,ymm7,XMMWORD PTR[32+rsi],1
  1322. vinserti128 ymm8,ymm8,XMMWORD PTR[48+rsi],1
  1323. lea rsi,QWORD PTR[64+rsi]
  1324. vpsrldq ymm9,ymm7,6
  1325. vpsrldq ymm10,ymm8,6
  1326. vpunpckhqdq ymm6,ymm7,ymm8
  1327. vpunpcklqdq ymm9,ymm9,ymm10
  1328. vpunpcklqdq ymm7,ymm7,ymm8
  1329. vpsrlq ymm10,ymm9,30
  1330. vpsrlq ymm9,ymm9,4
  1331. vpsrlq ymm8,ymm7,26
  1332. vpsrlq ymm6,ymm6,40
  1333. vpand ymm9,ymm9,ymm5
  1334. vpand ymm7,ymm7,ymm5
  1335. vpand ymm8,ymm8,ymm5
  1336. vpand ymm10,ymm10,ymm5
  1337. vpor ymm6,ymm6,YMMWORD PTR[32+rcx]
  1338. vpaddq ymm2,ymm9,ymm2
  1339. sub rdx,64
  1340. jz $L$tail_avx2
  1341. jmp $L$oop_avx2
  1342. ALIGN 32
  1343. $L$oop_avx2::
  1344. vpaddq ymm0,ymm7,ymm0
  1345. vmovdqa ymm7,YMMWORD PTR[rsp]
  1346. vpaddq ymm1,ymm8,ymm1
  1347. vmovdqa ymm8,YMMWORD PTR[32+rsp]
  1348. vpaddq ymm3,ymm10,ymm3
  1349. vmovdqa ymm9,YMMWORD PTR[96+rsp]
  1350. vpaddq ymm4,ymm6,ymm4
  1351. vmovdqa ymm10,YMMWORD PTR[48+rax]
  1352. vmovdqa ymm5,YMMWORD PTR[112+rax]
  1353. vpmuludq ymm13,ymm7,ymm2
  1354. vpmuludq ymm14,ymm8,ymm2
  1355. vpmuludq ymm15,ymm9,ymm2
  1356. vpmuludq ymm11,ymm10,ymm2
  1357. vpmuludq ymm12,ymm5,ymm2
  1358. vpmuludq ymm6,ymm8,ymm0
  1359. vpmuludq ymm2,ymm8,ymm1
  1360. vpaddq ymm12,ymm12,ymm6
  1361. vpaddq ymm13,ymm13,ymm2
  1362. vpmuludq ymm6,ymm8,ymm3
  1363. vpmuludq ymm2,ymm4,YMMWORD PTR[64+rsp]
  1364. vpaddq ymm15,ymm15,ymm6
  1365. vpaddq ymm11,ymm11,ymm2
  1366. vmovdqa ymm8,YMMWORD PTR[((-16))+rax]
  1367. vpmuludq ymm6,ymm7,ymm0
  1368. vpmuludq ymm2,ymm7,ymm1
  1369. vpaddq ymm11,ymm11,ymm6
  1370. vpaddq ymm12,ymm12,ymm2
  1371. vpmuludq ymm6,ymm7,ymm3
  1372. vpmuludq ymm2,ymm7,ymm4
  1373. vmovdqu xmm7,XMMWORD PTR[rsi]
  1374. vpaddq ymm14,ymm14,ymm6
  1375. vpaddq ymm15,ymm15,ymm2
  1376. vinserti128 ymm7,ymm7,XMMWORD PTR[32+rsi],1
  1377. vpmuludq ymm6,ymm8,ymm3
  1378. vpmuludq ymm2,ymm8,ymm4
  1379. vmovdqu xmm8,XMMWORD PTR[16+rsi]
  1380. vpaddq ymm11,ymm11,ymm6
  1381. vpaddq ymm12,ymm12,ymm2
  1382. vmovdqa ymm2,YMMWORD PTR[16+rax]
  1383. vpmuludq ymm6,ymm9,ymm1
  1384. vpmuludq ymm9,ymm9,ymm0
  1385. vpaddq ymm14,ymm14,ymm6
  1386. vpaddq ymm13,ymm13,ymm9
  1387. vinserti128 ymm8,ymm8,XMMWORD PTR[48+rsi],1
  1388. lea rsi,QWORD PTR[64+rsi]
  1389. vpmuludq ymm6,ymm2,ymm1
  1390. vpmuludq ymm2,ymm2,ymm0
  1391. vpsrldq ymm9,ymm7,6
  1392. vpaddq ymm15,ymm15,ymm6
  1393. vpaddq ymm14,ymm14,ymm2
  1394. vpmuludq ymm6,ymm10,ymm3
  1395. vpmuludq ymm2,ymm10,ymm4
  1396. vpsrldq ymm10,ymm8,6
  1397. vpaddq ymm12,ymm12,ymm6
  1398. vpaddq ymm13,ymm13,ymm2
  1399. vpunpckhqdq ymm6,ymm7,ymm8
  1400. vpmuludq ymm3,ymm5,ymm3
  1401. vpmuludq ymm4,ymm5,ymm4
  1402. vpunpcklqdq ymm7,ymm7,ymm8
  1403. vpaddq ymm2,ymm13,ymm3
  1404. vpaddq ymm3,ymm14,ymm4
  1405. vpunpcklqdq ymm10,ymm9,ymm10
  1406. vpmuludq ymm4,ymm0,YMMWORD PTR[80+rax]
  1407. vpmuludq ymm0,ymm5,ymm1
  1408. vmovdqa ymm5,YMMWORD PTR[64+rcx]
  1409. vpaddq ymm4,ymm15,ymm4
  1410. vpaddq ymm0,ymm11,ymm0
  1411. vpsrlq ymm14,ymm3,26
  1412. vpand ymm3,ymm3,ymm5
  1413. vpaddq ymm4,ymm4,ymm14
  1414. vpsrlq ymm11,ymm0,26
  1415. vpand ymm0,ymm0,ymm5
  1416. vpaddq ymm1,ymm12,ymm11
  1417. vpsrlq ymm15,ymm4,26
  1418. vpand ymm4,ymm4,ymm5
  1419. vpsrlq ymm9,ymm10,4
  1420. vpsrlq ymm12,ymm1,26
  1421. vpand ymm1,ymm1,ymm5
  1422. vpaddq ymm2,ymm2,ymm12
  1423. vpaddq ymm0,ymm0,ymm15
  1424. vpsllq ymm15,ymm15,2
  1425. vpaddq ymm0,ymm0,ymm15
  1426. vpand ymm9,ymm9,ymm5
  1427. vpsrlq ymm8,ymm7,26
  1428. vpsrlq ymm13,ymm2,26
  1429. vpand ymm2,ymm2,ymm5
  1430. vpaddq ymm3,ymm3,ymm13
  1431. vpaddq ymm2,ymm2,ymm9
  1432. vpsrlq ymm10,ymm10,30
  1433. vpsrlq ymm11,ymm0,26
  1434. vpand ymm0,ymm0,ymm5
  1435. vpaddq ymm1,ymm1,ymm11
  1436. vpsrlq ymm6,ymm6,40
  1437. vpsrlq ymm14,ymm3,26
  1438. vpand ymm3,ymm3,ymm5
  1439. vpaddq ymm4,ymm4,ymm14
  1440. vpand ymm7,ymm7,ymm5
  1441. vpand ymm8,ymm8,ymm5
  1442. vpand ymm10,ymm10,ymm5
  1443. vpor ymm6,ymm6,YMMWORD PTR[32+rcx]
  1444. sub rdx,64
  1445. jnz $L$oop_avx2
  1446. DB 066h,090h
  1447. $L$tail_avx2::
  1448. vpaddq ymm0,ymm7,ymm0
  1449. vmovdqu ymm7,YMMWORD PTR[4+rsp]
  1450. vpaddq ymm1,ymm8,ymm1
  1451. vmovdqu ymm8,YMMWORD PTR[36+rsp]
  1452. vpaddq ymm3,ymm10,ymm3
  1453. vmovdqu ymm9,YMMWORD PTR[100+rsp]
  1454. vpaddq ymm4,ymm6,ymm4
  1455. vmovdqu ymm10,YMMWORD PTR[52+rax]
  1456. vmovdqu ymm5,YMMWORD PTR[116+rax]
  1457. vpmuludq ymm13,ymm7,ymm2
  1458. vpmuludq ymm14,ymm8,ymm2
  1459. vpmuludq ymm15,ymm9,ymm2
  1460. vpmuludq ymm11,ymm10,ymm2
  1461. vpmuludq ymm12,ymm5,ymm2
  1462. vpmuludq ymm6,ymm8,ymm0
  1463. vpmuludq ymm2,ymm8,ymm1
  1464. vpaddq ymm12,ymm12,ymm6
  1465. vpaddq ymm13,ymm13,ymm2
  1466. vpmuludq ymm6,ymm8,ymm3
  1467. vpmuludq ymm2,ymm4,YMMWORD PTR[68+rsp]
  1468. vpaddq ymm15,ymm15,ymm6
  1469. vpaddq ymm11,ymm11,ymm2
  1470. vpmuludq ymm6,ymm7,ymm0
  1471. vpmuludq ymm2,ymm7,ymm1
  1472. vpaddq ymm11,ymm11,ymm6
  1473. vmovdqu ymm8,YMMWORD PTR[((-12))+rax]
  1474. vpaddq ymm12,ymm12,ymm2
  1475. vpmuludq ymm6,ymm7,ymm3
  1476. vpmuludq ymm2,ymm7,ymm4
  1477. vpaddq ymm14,ymm14,ymm6
  1478. vpaddq ymm15,ymm15,ymm2
  1479. vpmuludq ymm6,ymm8,ymm3
  1480. vpmuludq ymm2,ymm8,ymm4
  1481. vpaddq ymm11,ymm11,ymm6
  1482. vpaddq ymm12,ymm12,ymm2
  1483. vmovdqu ymm2,YMMWORD PTR[20+rax]
  1484. vpmuludq ymm6,ymm9,ymm1
  1485. vpmuludq ymm9,ymm9,ymm0
  1486. vpaddq ymm14,ymm14,ymm6
  1487. vpaddq ymm13,ymm13,ymm9
  1488. vpmuludq ymm6,ymm2,ymm1
  1489. vpmuludq ymm2,ymm2,ymm0
  1490. vpaddq ymm15,ymm15,ymm6
  1491. vpaddq ymm14,ymm14,ymm2
  1492. vpmuludq ymm6,ymm10,ymm3
  1493. vpmuludq ymm2,ymm10,ymm4
  1494. vpaddq ymm12,ymm12,ymm6
  1495. vpaddq ymm13,ymm13,ymm2
  1496. vpmuludq ymm3,ymm5,ymm3
  1497. vpmuludq ymm4,ymm5,ymm4
  1498. vpaddq ymm2,ymm13,ymm3
  1499. vpaddq ymm3,ymm14,ymm4
  1500. vpmuludq ymm4,ymm0,YMMWORD PTR[84+rax]
  1501. vpmuludq ymm0,ymm5,ymm1
  1502. vmovdqa ymm5,YMMWORD PTR[64+rcx]
  1503. vpaddq ymm4,ymm15,ymm4
  1504. vpaddq ymm0,ymm11,ymm0
  1505. vpsrldq ymm8,ymm12,8
  1506. vpsrldq ymm9,ymm2,8
  1507. vpsrldq ymm10,ymm3,8
  1508. vpsrldq ymm6,ymm4,8
  1509. vpsrldq ymm7,ymm0,8
  1510. vpaddq ymm12,ymm12,ymm8
  1511. vpaddq ymm2,ymm2,ymm9
  1512. vpaddq ymm3,ymm3,ymm10
  1513. vpaddq ymm4,ymm4,ymm6
  1514. vpaddq ymm0,ymm0,ymm7
  1515. vpermq ymm10,ymm3,02h
  1516. vpermq ymm6,ymm4,02h
  1517. vpermq ymm7,ymm0,02h
  1518. vpermq ymm8,ymm12,02h
  1519. vpermq ymm9,ymm2,02h
  1520. vpaddq ymm3,ymm3,ymm10
  1521. vpaddq ymm4,ymm4,ymm6
  1522. vpaddq ymm0,ymm0,ymm7
  1523. vpaddq ymm12,ymm12,ymm8
  1524. vpaddq ymm2,ymm2,ymm9
  1525. vpsrlq ymm14,ymm3,26
  1526. vpand ymm3,ymm3,ymm5
  1527. vpaddq ymm4,ymm4,ymm14
  1528. vpsrlq ymm11,ymm0,26
  1529. vpand ymm0,ymm0,ymm5
  1530. vpaddq ymm1,ymm12,ymm11
  1531. vpsrlq ymm15,ymm4,26
  1532. vpand ymm4,ymm4,ymm5
  1533. vpsrlq ymm12,ymm1,26
  1534. vpand ymm1,ymm1,ymm5
  1535. vpaddq ymm2,ymm2,ymm12
  1536. vpaddq ymm0,ymm0,ymm15
  1537. vpsllq ymm15,ymm15,2
  1538. vpaddq ymm0,ymm0,ymm15
  1539. vpsrlq ymm13,ymm2,26
  1540. vpand ymm2,ymm2,ymm5
  1541. vpaddq ymm3,ymm3,ymm13
  1542. vpsrlq ymm11,ymm0,26
  1543. vpand ymm0,ymm0,ymm5
  1544. vpaddq ymm1,ymm1,ymm11
  1545. vpsrlq ymm14,ymm3,26
  1546. vpand ymm3,ymm3,ymm5
  1547. vpaddq ymm4,ymm4,ymm14
  1548. vmovd DWORD PTR[(-112)+rdi],xmm0
  1549. vmovd DWORD PTR[(-108)+rdi],xmm1
  1550. vmovd DWORD PTR[(-104)+rdi],xmm2
  1551. vmovd DWORD PTR[(-100)+rdi],xmm3
  1552. vmovd DWORD PTR[(-96)+rdi],xmm4
  1553. vmovdqa xmm6,XMMWORD PTR[80+r11]
  1554. vmovdqa xmm7,XMMWORD PTR[96+r11]
  1555. vmovdqa xmm8,XMMWORD PTR[112+r11]
  1556. vmovdqa xmm9,XMMWORD PTR[128+r11]
  1557. vmovdqa xmm10,XMMWORD PTR[144+r11]
  1558. vmovdqa xmm11,XMMWORD PTR[160+r11]
  1559. vmovdqa xmm12,XMMWORD PTR[176+r11]
  1560. vmovdqa xmm13,XMMWORD PTR[192+r11]
  1561. vmovdqa xmm14,XMMWORD PTR[208+r11]
  1562. vmovdqa xmm15,XMMWORD PTR[224+r11]
  1563. lea rsp,QWORD PTR[248+r11]
  1564. $L$do_avx2_epilogue::
  1565. vzeroupper
  1566. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1567. mov rsi,QWORD PTR[16+rsp]
  1568. DB 0F3h,0C3h ;repret
  1569. $L$SEH_end_poly1305_blocks_avx2::
  1570. poly1305_blocks_avx2 ENDP
  1571. ALIGN 64
  1572. $L$const::
  1573. $L$mask24::
  1574. DD 00ffffffh,0,00ffffffh,0,00ffffffh,0,00ffffffh,0
  1575. $L$129::
  1576. DD 16777216,0,16777216,0,16777216,0,16777216,0
  1577. $L$mask26::
  1578. DD 03ffffffh,0,03ffffffh,0,03ffffffh,0,03ffffffh,0
  1579. $L$permd_avx2::
  1580. DD 2,2,2,3,2,0,2,1
  1581. $L$permd_avx512::
  1582. DD 0,0,0,1,0,2,0,3,0,4,0,5,0,6,0,7
  1583. $L$2_44_inp_permd::
  1584. DD 0,1,1,2,2,3,7,7
  1585. $L$2_44_inp_shift::
  1586. DQ 0,12,24,64
  1587. $L$2_44_mask::
  1588. DQ 0fffffffffffh,0fffffffffffh,03ffffffffffh,0ffffffffffffffffh
  1589. $L$2_44_shift_rgt::
  1590. DQ 44,44,42,64
  1591. $L$2_44_shift_lft::
  1592. DQ 8,8,10,64
  1593. ALIGN 64
  1594. $L$x_mask44::
  1595. DQ 0fffffffffffh,0fffffffffffh,0fffffffffffh,0fffffffffffh
  1596. DQ 0fffffffffffh,0fffffffffffh,0fffffffffffh,0fffffffffffh
  1597. $L$x_mask42::
  1598. DQ 03ffffffffffh,03ffffffffffh,03ffffffffffh,03ffffffffffh
  1599. DQ 03ffffffffffh,03ffffffffffh,03ffffffffffh,03ffffffffffh
  1600. DB 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54
  1601. DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32
  1602. DB 98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115
  1603. DB 108,46,111,114,103,62,0
  1604. ALIGN 16
  1605. PUBLIC xor128_encrypt_n_pad
  1606. ALIGN 16
  1607. xor128_encrypt_n_pad PROC PUBLIC
  1608. sub rdx,r8
  1609. sub rcx,r8
  1610. mov r10,r9
  1611. shr r9,4
  1612. jz $L$tail_enc
  1613. nop
  1614. $L$oop_enc_xmm::
  1615. movdqu xmm0,XMMWORD PTR[r8*1+rdx]
  1616. pxor xmm0,XMMWORD PTR[r8]
  1617. movdqu XMMWORD PTR[r8*1+rcx],xmm0
  1618. movdqa XMMWORD PTR[r8],xmm0
  1619. lea r8,QWORD PTR[16+r8]
  1620. dec r9
  1621. jnz $L$oop_enc_xmm
  1622. and r10,15
  1623. jz $L$done_enc
  1624. $L$tail_enc::
  1625. mov r9,16
  1626. sub r9,r10
  1627. xor eax,eax
  1628. $L$oop_enc_byte::
  1629. mov al,BYTE PTR[r8*1+rdx]
  1630. xor al,BYTE PTR[r8]
  1631. mov BYTE PTR[r8*1+rcx],al
  1632. mov BYTE PTR[r8],al
  1633. lea r8,QWORD PTR[1+r8]
  1634. dec r10
  1635. jnz $L$oop_enc_byte
  1636. xor eax,eax
  1637. $L$oop_enc_pad::
  1638. mov BYTE PTR[r8],al
  1639. lea r8,QWORD PTR[1+r8]
  1640. dec r9
  1641. jnz $L$oop_enc_pad
  1642. $L$done_enc::
  1643. mov rax,r8
  1644. DB 0F3h,0C3h ;repret
  1645. xor128_encrypt_n_pad ENDP
  1646. PUBLIC xor128_decrypt_n_pad
  1647. ALIGN 16
  1648. xor128_decrypt_n_pad PROC PUBLIC
  1649. sub rdx,r8
  1650. sub rcx,r8
  1651. mov r10,r9
  1652. shr r9,4
  1653. jz $L$tail_dec
  1654. nop
  1655. $L$oop_dec_xmm::
  1656. movdqu xmm0,XMMWORD PTR[r8*1+rdx]
  1657. movdqa xmm1,XMMWORD PTR[r8]
  1658. pxor xmm1,xmm0
  1659. movdqu XMMWORD PTR[r8*1+rcx],xmm1
  1660. movdqa XMMWORD PTR[r8],xmm0
  1661. lea r8,QWORD PTR[16+r8]
  1662. dec r9
  1663. jnz $L$oop_dec_xmm
  1664. pxor xmm1,xmm1
  1665. and r10,15
  1666. jz $L$done_dec
  1667. $L$tail_dec::
  1668. mov r9,16
  1669. sub r9,r10
  1670. xor eax,eax
  1671. xor r11,r11
  1672. $L$oop_dec_byte::
  1673. mov r11b,BYTE PTR[r8*1+rdx]
  1674. mov al,BYTE PTR[r8]
  1675. xor al,r11b
  1676. mov BYTE PTR[r8*1+rcx],al
  1677. mov BYTE PTR[r8],r11b
  1678. lea r8,QWORD PTR[1+r8]
  1679. dec r10
  1680. jnz $L$oop_dec_byte
  1681. xor eax,eax
  1682. $L$oop_dec_pad::
  1683. mov BYTE PTR[r8],al
  1684. lea r8,QWORD PTR[1+r8]
  1685. dec r9
  1686. jnz $L$oop_dec_pad
  1687. $L$done_dec::
  1688. mov rax,r8
  1689. DB 0F3h,0C3h ;repret
  1690. xor128_decrypt_n_pad ENDP
  1691. EXTERN __imp_RtlVirtualUnwind:NEAR
  1692. ALIGN 16
  1693. se_handler PROC PRIVATE
  1694. push rsi
  1695. push rdi
  1696. push rbx
  1697. push rbp
  1698. push r12
  1699. push r13
  1700. push r14
  1701. push r15
  1702. pushfq
  1703. sub rsp,64
  1704. mov rax,QWORD PTR[120+r8]
  1705. mov rbx,QWORD PTR[248+r8]
  1706. mov rsi,QWORD PTR[8+r9]
  1707. mov r11,QWORD PTR[56+r9]
  1708. mov r10d,DWORD PTR[r11]
  1709. lea r10,QWORD PTR[r10*1+rsi]
  1710. cmp rbx,r10
  1711. jb $L$common_seh_tail
  1712. mov rax,QWORD PTR[152+r8]
  1713. mov r10d,DWORD PTR[4+r11]
  1714. lea r10,QWORD PTR[r10*1+rsi]
  1715. cmp rbx,r10
  1716. jae $L$common_seh_tail
  1717. lea rax,QWORD PTR[48+rax]
  1718. mov rbx,QWORD PTR[((-8))+rax]
  1719. mov rbp,QWORD PTR[((-16))+rax]
  1720. mov r12,QWORD PTR[((-24))+rax]
  1721. mov r13,QWORD PTR[((-32))+rax]
  1722. mov r14,QWORD PTR[((-40))+rax]
  1723. mov r15,QWORD PTR[((-48))+rax]
  1724. mov QWORD PTR[144+r8],rbx
  1725. mov QWORD PTR[160+r8],rbp
  1726. mov QWORD PTR[216+r8],r12
  1727. mov QWORD PTR[224+r8],r13
  1728. mov QWORD PTR[232+r8],r14
  1729. mov QWORD PTR[240+r8],r15
  1730. jmp $L$common_seh_tail
  1731. se_handler ENDP
  1732. ALIGN 16
  1733. avx_handler PROC PRIVATE
  1734. push rsi
  1735. push rdi
  1736. push rbx
  1737. push rbp
  1738. push r12
  1739. push r13
  1740. push r14
  1741. push r15
  1742. pushfq
  1743. sub rsp,64
  1744. mov rax,QWORD PTR[120+r8]
  1745. mov rbx,QWORD PTR[248+r8]
  1746. mov rsi,QWORD PTR[8+r9]
  1747. mov r11,QWORD PTR[56+r9]
  1748. mov r10d,DWORD PTR[r11]
  1749. lea r10,QWORD PTR[r10*1+rsi]
  1750. cmp rbx,r10
  1751. jb $L$common_seh_tail
  1752. mov rax,QWORD PTR[152+r8]
  1753. mov r10d,DWORD PTR[4+r11]
  1754. lea r10,QWORD PTR[r10*1+rsi]
  1755. cmp rbx,r10
  1756. jae $L$common_seh_tail
  1757. mov rax,QWORD PTR[208+r8]
  1758. lea rsi,QWORD PTR[80+rax]
  1759. lea rax,QWORD PTR[248+rax]
  1760. lea rdi,QWORD PTR[512+r8]
  1761. mov ecx,20
  1762. DD 0a548f3fch
  1763. $L$common_seh_tail::
  1764. mov rdi,QWORD PTR[8+rax]
  1765. mov rsi,QWORD PTR[16+rax]
  1766. mov QWORD PTR[152+r8],rax
  1767. mov QWORD PTR[168+r8],rsi
  1768. mov QWORD PTR[176+r8],rdi
  1769. mov rdi,QWORD PTR[40+r9]
  1770. mov rsi,r8
  1771. mov ecx,154
  1772. DD 0a548f3fch
  1773. mov rsi,r9
  1774. xor rcx,rcx
  1775. mov rdx,QWORD PTR[8+rsi]
  1776. mov r8,QWORD PTR[rsi]
  1777. mov r9,QWORD PTR[16+rsi]
  1778. mov r10,QWORD PTR[40+rsi]
  1779. lea r11,QWORD PTR[56+rsi]
  1780. lea r12,QWORD PTR[24+rsi]
  1781. mov QWORD PTR[32+rsp],r10
  1782. mov QWORD PTR[40+rsp],r11
  1783. mov QWORD PTR[48+rsp],r12
  1784. mov QWORD PTR[56+rsp],rcx
  1785. call QWORD PTR[__imp_RtlVirtualUnwind]
  1786. mov eax,1
  1787. add rsp,64
  1788. popfq
  1789. pop r15
  1790. pop r14
  1791. pop r13
  1792. pop r12
  1793. pop rbp
  1794. pop rbx
  1795. pop rdi
  1796. pop rsi
  1797. DB 0F3h,0C3h ;repret
  1798. avx_handler ENDP
  1799. .text$ ENDS
  1800. .pdata SEGMENT READONLY ALIGN(4)
  1801. ALIGN 4
  1802. DD imagerel $L$SEH_begin_poly1305_init
  1803. DD imagerel $L$SEH_end_poly1305_init
  1804. DD imagerel $L$SEH_info_poly1305_init
  1805. DD imagerel $L$SEH_begin_poly1305_blocks
  1806. DD imagerel $L$SEH_end_poly1305_blocks
  1807. DD imagerel $L$SEH_info_poly1305_blocks
  1808. DD imagerel $L$SEH_begin_poly1305_emit
  1809. DD imagerel $L$SEH_end_poly1305_emit
  1810. DD imagerel $L$SEH_info_poly1305_emit
  1811. DD imagerel $L$SEH_begin_poly1305_blocks_avx
  1812. DD imagerel $L$base2_64_avx
  1813. DD imagerel $L$SEH_info_poly1305_blocks_avx_1
  1814. DD imagerel $L$base2_64_avx
  1815. DD imagerel $L$even_avx
  1816. DD imagerel $L$SEH_info_poly1305_blocks_avx_2
  1817. DD imagerel $L$even_avx
  1818. DD imagerel $L$SEH_end_poly1305_blocks_avx
  1819. DD imagerel $L$SEH_info_poly1305_blocks_avx_3
  1820. DD imagerel $L$SEH_begin_poly1305_emit_avx
  1821. DD imagerel $L$SEH_end_poly1305_emit_avx
  1822. DD imagerel $L$SEH_info_poly1305_emit_avx
  1823. DD imagerel $L$SEH_begin_poly1305_blocks_avx2
  1824. DD imagerel $L$base2_64_avx2
  1825. DD imagerel $L$SEH_info_poly1305_blocks_avx2_1
  1826. DD imagerel $L$base2_64_avx2
  1827. DD imagerel $L$even_avx2
  1828. DD imagerel $L$SEH_info_poly1305_blocks_avx2_2
  1829. DD imagerel $L$even_avx2
  1830. DD imagerel $L$SEH_end_poly1305_blocks_avx2
  1831. DD imagerel $L$SEH_info_poly1305_blocks_avx2_3
  1832. .pdata ENDS
  1833. .xdata SEGMENT READONLY ALIGN(8)
  1834. ALIGN 8
  1835. $L$SEH_info_poly1305_init::
  1836. DB 9,0,0,0
  1837. DD imagerel se_handler
  1838. DD imagerel $L$SEH_begin_poly1305_init,imagerel $L$SEH_begin_poly1305_init
  1839. $L$SEH_info_poly1305_blocks::
  1840. DB 9,0,0,0
  1841. DD imagerel se_handler
  1842. DD imagerel $L$blocks_body,imagerel $L$blocks_epilogue
  1843. $L$SEH_info_poly1305_emit::
  1844. DB 9,0,0,0
  1845. DD imagerel se_handler
  1846. DD imagerel $L$SEH_begin_poly1305_emit,imagerel $L$SEH_begin_poly1305_emit
  1847. $L$SEH_info_poly1305_blocks_avx_1::
  1848. DB 9,0,0,0
  1849. DD imagerel se_handler
  1850. DD imagerel $L$blocks_avx_body,imagerel $L$blocks_avx_epilogue
  1851. $L$SEH_info_poly1305_blocks_avx_2::
  1852. DB 9,0,0,0
  1853. DD imagerel se_handler
  1854. DD imagerel $L$base2_64_avx_body,imagerel $L$base2_64_avx_epilogue
  1855. $L$SEH_info_poly1305_blocks_avx_3::
  1856. DB 9,0,0,0
  1857. DD imagerel avx_handler
  1858. DD imagerel $L$do_avx_body,imagerel $L$do_avx_epilogue
  1859. $L$SEH_info_poly1305_emit_avx::
  1860. DB 9,0,0,0
  1861. DD imagerel se_handler
  1862. DD imagerel $L$SEH_begin_poly1305_emit_avx,imagerel $L$SEH_begin_poly1305_emit_avx
  1863. $L$SEH_info_poly1305_blocks_avx2_1::
  1864. DB 9,0,0,0
  1865. DD imagerel se_handler
  1866. DD imagerel $L$blocks_avx2_body,imagerel $L$blocks_avx2_epilogue
  1867. $L$SEH_info_poly1305_blocks_avx2_2::
  1868. DB 9,0,0,0
  1869. DD imagerel se_handler
  1870. DD imagerel $L$base2_64_avx2_body,imagerel $L$base2_64_avx2_epilogue
  1871. $L$SEH_info_poly1305_blocks_avx2_3::
  1872. DB 9,0,0,0
  1873. DD imagerel avx_handler
  1874. DD imagerel $L$do_avx2_body,imagerel $L$do_avx2_epilogue
  1875. .xdata ENDS
  1876. END