ghash-x86_64.masm 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073
  1. OPTION DOTNAME
  2. .text$ SEGMENT ALIGN(256) 'CODE'
  3. EXTERN OPENSSL_ia32cap_P:NEAR
  4. PUBLIC gcm_gmult_4bit
  5. ALIGN 16
  6. gcm_gmult_4bit PROC PUBLIC
  7. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  8. mov QWORD PTR[16+rsp],rsi
  9. mov rax,rsp
  10. $L$SEH_begin_gcm_gmult_4bit::
  11. mov rdi,rcx
  12. mov rsi,rdx
  13. push rbx
  14. push rbp
  15. push r12
  16. push r13
  17. push r14
  18. push r15
  19. sub rsp,280
  20. $L$gmult_prologue::
  21. movzx r8,BYTE PTR[15+rdi]
  22. lea r11,QWORD PTR[$L$rem_4bit]
  23. xor rax,rax
  24. xor rbx,rbx
  25. mov al,r8b
  26. mov bl,r8b
  27. shl al,4
  28. mov rcx,14
  29. mov r8,QWORD PTR[8+rax*1+rsi]
  30. mov r9,QWORD PTR[rax*1+rsi]
  31. and bl,0f0h
  32. mov rdx,r8
  33. jmp $L$oop1
  34. ALIGN 16
  35. $L$oop1::
  36. shr r8,4
  37. and rdx,0fh
  38. mov r10,r9
  39. mov al,BYTE PTR[rcx*1+rdi]
  40. shr r9,4
  41. xor r8,QWORD PTR[8+rbx*1+rsi]
  42. shl r10,60
  43. xor r9,QWORD PTR[rbx*1+rsi]
  44. mov bl,al
  45. xor r9,QWORD PTR[rdx*8+r11]
  46. mov rdx,r8
  47. shl al,4
  48. xor r8,r10
  49. dec rcx
  50. js $L$break1
  51. shr r8,4
  52. and rdx,0fh
  53. mov r10,r9
  54. shr r9,4
  55. xor r8,QWORD PTR[8+rax*1+rsi]
  56. shl r10,60
  57. xor r9,QWORD PTR[rax*1+rsi]
  58. and bl,0f0h
  59. xor r9,QWORD PTR[rdx*8+r11]
  60. mov rdx,r8
  61. xor r8,r10
  62. jmp $L$oop1
  63. ALIGN 16
  64. $L$break1::
  65. shr r8,4
  66. and rdx,0fh
  67. mov r10,r9
  68. shr r9,4
  69. xor r8,QWORD PTR[8+rax*1+rsi]
  70. shl r10,60
  71. xor r9,QWORD PTR[rax*1+rsi]
  72. and bl,0f0h
  73. xor r9,QWORD PTR[rdx*8+r11]
  74. mov rdx,r8
  75. xor r8,r10
  76. shr r8,4
  77. and rdx,0fh
  78. mov r10,r9
  79. shr r9,4
  80. xor r8,QWORD PTR[8+rbx*1+rsi]
  81. shl r10,60
  82. xor r9,QWORD PTR[rbx*1+rsi]
  83. xor r8,r10
  84. xor r9,QWORD PTR[rdx*8+r11]
  85. bswap r8
  86. bswap r9
  87. mov QWORD PTR[8+rdi],r8
  88. mov QWORD PTR[rdi],r9
  89. lea rsi,QWORD PTR[((280+48))+rsp]
  90. mov rbx,QWORD PTR[((-8))+rsi]
  91. lea rsp,QWORD PTR[rsi]
  92. $L$gmult_epilogue::
  93. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  94. mov rsi,QWORD PTR[16+rsp]
  95. DB 0F3h,0C3h ;repret
  96. $L$SEH_end_gcm_gmult_4bit::
  97. gcm_gmult_4bit ENDP
  98. PUBLIC gcm_ghash_4bit
  99. ALIGN 16
  100. gcm_ghash_4bit PROC PUBLIC
  101. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  102. mov QWORD PTR[16+rsp],rsi
  103. mov rax,rsp
  104. $L$SEH_begin_gcm_ghash_4bit::
  105. mov rdi,rcx
  106. mov rsi,rdx
  107. mov rdx,r8
  108. mov rcx,r9
  109. push rbx
  110. push rbp
  111. push r12
  112. push r13
  113. push r14
  114. push r15
  115. sub rsp,280
  116. $L$ghash_prologue::
  117. mov r14,rdx
  118. mov r15,rcx
  119. sub rsi,-128
  120. lea rbp,QWORD PTR[((16+128))+rsp]
  121. xor edx,edx
  122. mov r8,QWORD PTR[((0+0-128))+rsi]
  123. mov rax,QWORD PTR[((0+8-128))+rsi]
  124. mov dl,al
  125. shr rax,4
  126. mov r10,r8
  127. shr r8,4
  128. mov r9,QWORD PTR[((16+0-128))+rsi]
  129. shl dl,4
  130. mov rbx,QWORD PTR[((16+8-128))+rsi]
  131. shl r10,60
  132. mov BYTE PTR[rsp],dl
  133. or rax,r10
  134. mov dl,bl
  135. shr rbx,4
  136. mov r10,r9
  137. shr r9,4
  138. mov QWORD PTR[rbp],r8
  139. mov r8,QWORD PTR[((32+0-128))+rsi]
  140. shl dl,4
  141. mov QWORD PTR[((0-128))+rbp],rax
  142. mov rax,QWORD PTR[((32+8-128))+rsi]
  143. shl r10,60
  144. mov BYTE PTR[1+rsp],dl
  145. or rbx,r10
  146. mov dl,al
  147. shr rax,4
  148. mov r10,r8
  149. shr r8,4
  150. mov QWORD PTR[8+rbp],r9
  151. mov r9,QWORD PTR[((48+0-128))+rsi]
  152. shl dl,4
  153. mov QWORD PTR[((8-128))+rbp],rbx
  154. mov rbx,QWORD PTR[((48+8-128))+rsi]
  155. shl r10,60
  156. mov BYTE PTR[2+rsp],dl
  157. or rax,r10
  158. mov dl,bl
  159. shr rbx,4
  160. mov r10,r9
  161. shr r9,4
  162. mov QWORD PTR[16+rbp],r8
  163. mov r8,QWORD PTR[((64+0-128))+rsi]
  164. shl dl,4
  165. mov QWORD PTR[((16-128))+rbp],rax
  166. mov rax,QWORD PTR[((64+8-128))+rsi]
  167. shl r10,60
  168. mov BYTE PTR[3+rsp],dl
  169. or rbx,r10
  170. mov dl,al
  171. shr rax,4
  172. mov r10,r8
  173. shr r8,4
  174. mov QWORD PTR[24+rbp],r9
  175. mov r9,QWORD PTR[((80+0-128))+rsi]
  176. shl dl,4
  177. mov QWORD PTR[((24-128))+rbp],rbx
  178. mov rbx,QWORD PTR[((80+8-128))+rsi]
  179. shl r10,60
  180. mov BYTE PTR[4+rsp],dl
  181. or rax,r10
  182. mov dl,bl
  183. shr rbx,4
  184. mov r10,r9
  185. shr r9,4
  186. mov QWORD PTR[32+rbp],r8
  187. mov r8,QWORD PTR[((96+0-128))+rsi]
  188. shl dl,4
  189. mov QWORD PTR[((32-128))+rbp],rax
  190. mov rax,QWORD PTR[((96+8-128))+rsi]
  191. shl r10,60
  192. mov BYTE PTR[5+rsp],dl
  193. or rbx,r10
  194. mov dl,al
  195. shr rax,4
  196. mov r10,r8
  197. shr r8,4
  198. mov QWORD PTR[40+rbp],r9
  199. mov r9,QWORD PTR[((112+0-128))+rsi]
  200. shl dl,4
  201. mov QWORD PTR[((40-128))+rbp],rbx
  202. mov rbx,QWORD PTR[((112+8-128))+rsi]
  203. shl r10,60
  204. mov BYTE PTR[6+rsp],dl
  205. or rax,r10
  206. mov dl,bl
  207. shr rbx,4
  208. mov r10,r9
  209. shr r9,4
  210. mov QWORD PTR[48+rbp],r8
  211. mov r8,QWORD PTR[((128+0-128))+rsi]
  212. shl dl,4
  213. mov QWORD PTR[((48-128))+rbp],rax
  214. mov rax,QWORD PTR[((128+8-128))+rsi]
  215. shl r10,60
  216. mov BYTE PTR[7+rsp],dl
  217. or rbx,r10
  218. mov dl,al
  219. shr rax,4
  220. mov r10,r8
  221. shr r8,4
  222. mov QWORD PTR[56+rbp],r9
  223. mov r9,QWORD PTR[((144+0-128))+rsi]
  224. shl dl,4
  225. mov QWORD PTR[((56-128))+rbp],rbx
  226. mov rbx,QWORD PTR[((144+8-128))+rsi]
  227. shl r10,60
  228. mov BYTE PTR[8+rsp],dl
  229. or rax,r10
  230. mov dl,bl
  231. shr rbx,4
  232. mov r10,r9
  233. shr r9,4
  234. mov QWORD PTR[64+rbp],r8
  235. mov r8,QWORD PTR[((160+0-128))+rsi]
  236. shl dl,4
  237. mov QWORD PTR[((64-128))+rbp],rax
  238. mov rax,QWORD PTR[((160+8-128))+rsi]
  239. shl r10,60
  240. mov BYTE PTR[9+rsp],dl
  241. or rbx,r10
  242. mov dl,al
  243. shr rax,4
  244. mov r10,r8
  245. shr r8,4
  246. mov QWORD PTR[72+rbp],r9
  247. mov r9,QWORD PTR[((176+0-128))+rsi]
  248. shl dl,4
  249. mov QWORD PTR[((72-128))+rbp],rbx
  250. mov rbx,QWORD PTR[((176+8-128))+rsi]
  251. shl r10,60
  252. mov BYTE PTR[10+rsp],dl
  253. or rax,r10
  254. mov dl,bl
  255. shr rbx,4
  256. mov r10,r9
  257. shr r9,4
  258. mov QWORD PTR[80+rbp],r8
  259. mov r8,QWORD PTR[((192+0-128))+rsi]
  260. shl dl,4
  261. mov QWORD PTR[((80-128))+rbp],rax
  262. mov rax,QWORD PTR[((192+8-128))+rsi]
  263. shl r10,60
  264. mov BYTE PTR[11+rsp],dl
  265. or rbx,r10
  266. mov dl,al
  267. shr rax,4
  268. mov r10,r8
  269. shr r8,4
  270. mov QWORD PTR[88+rbp],r9
  271. mov r9,QWORD PTR[((208+0-128))+rsi]
  272. shl dl,4
  273. mov QWORD PTR[((88-128))+rbp],rbx
  274. mov rbx,QWORD PTR[((208+8-128))+rsi]
  275. shl r10,60
  276. mov BYTE PTR[12+rsp],dl
  277. or rax,r10
  278. mov dl,bl
  279. shr rbx,4
  280. mov r10,r9
  281. shr r9,4
  282. mov QWORD PTR[96+rbp],r8
  283. mov r8,QWORD PTR[((224+0-128))+rsi]
  284. shl dl,4
  285. mov QWORD PTR[((96-128))+rbp],rax
  286. mov rax,QWORD PTR[((224+8-128))+rsi]
  287. shl r10,60
  288. mov BYTE PTR[13+rsp],dl
  289. or rbx,r10
  290. mov dl,al
  291. shr rax,4
  292. mov r10,r8
  293. shr r8,4
  294. mov QWORD PTR[104+rbp],r9
  295. mov r9,QWORD PTR[((240+0-128))+rsi]
  296. shl dl,4
  297. mov QWORD PTR[((104-128))+rbp],rbx
  298. mov rbx,QWORD PTR[((240+8-128))+rsi]
  299. shl r10,60
  300. mov BYTE PTR[14+rsp],dl
  301. or rax,r10
  302. mov dl,bl
  303. shr rbx,4
  304. mov r10,r9
  305. shr r9,4
  306. mov QWORD PTR[112+rbp],r8
  307. shl dl,4
  308. mov QWORD PTR[((112-128))+rbp],rax
  309. shl r10,60
  310. mov BYTE PTR[15+rsp],dl
  311. or rbx,r10
  312. mov QWORD PTR[120+rbp],r9
  313. mov QWORD PTR[((120-128))+rbp],rbx
  314. add rsi,-128
  315. mov r8,QWORD PTR[8+rdi]
  316. mov r9,QWORD PTR[rdi]
  317. add r15,r14
  318. lea r11,QWORD PTR[$L$rem_8bit]
  319. jmp $L$outer_loop
  320. ALIGN 16
  321. $L$outer_loop::
  322. xor r9,QWORD PTR[r14]
  323. mov rdx,QWORD PTR[8+r14]
  324. lea r14,QWORD PTR[16+r14]
  325. xor rdx,r8
  326. mov QWORD PTR[rdi],r9
  327. mov QWORD PTR[8+rdi],rdx
  328. shr rdx,32
  329. xor rax,rax
  330. rol edx,8
  331. mov al,dl
  332. movzx ebx,dl
  333. shl al,4
  334. shr ebx,4
  335. rol edx,8
  336. mov r8,QWORD PTR[8+rax*1+rsi]
  337. mov r9,QWORD PTR[rax*1+rsi]
  338. mov al,dl
  339. movzx ecx,dl
  340. shl al,4
  341. movzx r12,BYTE PTR[rbx*1+rsp]
  342. shr ecx,4
  343. xor r12,r8
  344. mov r10,r9
  345. shr r8,8
  346. movzx r12,r12b
  347. shr r9,8
  348. xor r8,QWORD PTR[((-128))+rbx*8+rbp]
  349. shl r10,56
  350. xor r9,QWORD PTR[rbx*8+rbp]
  351. rol edx,8
  352. xor r8,QWORD PTR[8+rax*1+rsi]
  353. xor r9,QWORD PTR[rax*1+rsi]
  354. mov al,dl
  355. xor r8,r10
  356. movzx r12,WORD PTR[r12*2+r11]
  357. movzx ebx,dl
  358. shl al,4
  359. movzx r13,BYTE PTR[rcx*1+rsp]
  360. shr ebx,4
  361. shl r12,48
  362. xor r13,r8
  363. mov r10,r9
  364. xor r9,r12
  365. shr r8,8
  366. movzx r13,r13b
  367. shr r9,8
  368. xor r8,QWORD PTR[((-128))+rcx*8+rbp]
  369. shl r10,56
  370. xor r9,QWORD PTR[rcx*8+rbp]
  371. rol edx,8
  372. xor r8,QWORD PTR[8+rax*1+rsi]
  373. xor r9,QWORD PTR[rax*1+rsi]
  374. mov al,dl
  375. xor r8,r10
  376. movzx r13,WORD PTR[r13*2+r11]
  377. movzx ecx,dl
  378. shl al,4
  379. movzx r12,BYTE PTR[rbx*1+rsp]
  380. shr ecx,4
  381. shl r13,48
  382. xor r12,r8
  383. mov r10,r9
  384. xor r9,r13
  385. shr r8,8
  386. movzx r12,r12b
  387. mov edx,DWORD PTR[8+rdi]
  388. shr r9,8
  389. xor r8,QWORD PTR[((-128))+rbx*8+rbp]
  390. shl r10,56
  391. xor r9,QWORD PTR[rbx*8+rbp]
  392. rol edx,8
  393. xor r8,QWORD PTR[8+rax*1+rsi]
  394. xor r9,QWORD PTR[rax*1+rsi]
  395. mov al,dl
  396. xor r8,r10
  397. movzx r12,WORD PTR[r12*2+r11]
  398. movzx ebx,dl
  399. shl al,4
  400. movzx r13,BYTE PTR[rcx*1+rsp]
  401. shr ebx,4
  402. shl r12,48
  403. xor r13,r8
  404. mov r10,r9
  405. xor r9,r12
  406. shr r8,8
  407. movzx r13,r13b
  408. shr r9,8
  409. xor r8,QWORD PTR[((-128))+rcx*8+rbp]
  410. shl r10,56
  411. xor r9,QWORD PTR[rcx*8+rbp]
  412. rol edx,8
  413. xor r8,QWORD PTR[8+rax*1+rsi]
  414. xor r9,QWORD PTR[rax*1+rsi]
  415. mov al,dl
  416. xor r8,r10
  417. movzx r13,WORD PTR[r13*2+r11]
  418. movzx ecx,dl
  419. shl al,4
  420. movzx r12,BYTE PTR[rbx*1+rsp]
  421. shr ecx,4
  422. shl r13,48
  423. xor r12,r8
  424. mov r10,r9
  425. xor r9,r13
  426. shr r8,8
  427. movzx r12,r12b
  428. shr r9,8
  429. xor r8,QWORD PTR[((-128))+rbx*8+rbp]
  430. shl r10,56
  431. xor r9,QWORD PTR[rbx*8+rbp]
  432. rol edx,8
  433. xor r8,QWORD PTR[8+rax*1+rsi]
  434. xor r9,QWORD PTR[rax*1+rsi]
  435. mov al,dl
  436. xor r8,r10
  437. movzx r12,WORD PTR[r12*2+r11]
  438. movzx ebx,dl
  439. shl al,4
  440. movzx r13,BYTE PTR[rcx*1+rsp]
  441. shr ebx,4
  442. shl r12,48
  443. xor r13,r8
  444. mov r10,r9
  445. xor r9,r12
  446. shr r8,8
  447. movzx r13,r13b
  448. shr r9,8
  449. xor r8,QWORD PTR[((-128))+rcx*8+rbp]
  450. shl r10,56
  451. xor r9,QWORD PTR[rcx*8+rbp]
  452. rol edx,8
  453. xor r8,QWORD PTR[8+rax*1+rsi]
  454. xor r9,QWORD PTR[rax*1+rsi]
  455. mov al,dl
  456. xor r8,r10
  457. movzx r13,WORD PTR[r13*2+r11]
  458. movzx ecx,dl
  459. shl al,4
  460. movzx r12,BYTE PTR[rbx*1+rsp]
  461. shr ecx,4
  462. shl r13,48
  463. xor r12,r8
  464. mov r10,r9
  465. xor r9,r13
  466. shr r8,8
  467. movzx r12,r12b
  468. mov edx,DWORD PTR[4+rdi]
  469. shr r9,8
  470. xor r8,QWORD PTR[((-128))+rbx*8+rbp]
  471. shl r10,56
  472. xor r9,QWORD PTR[rbx*8+rbp]
  473. rol edx,8
  474. xor r8,QWORD PTR[8+rax*1+rsi]
  475. xor r9,QWORD PTR[rax*1+rsi]
  476. mov al,dl
  477. xor r8,r10
  478. movzx r12,WORD PTR[r12*2+r11]
  479. movzx ebx,dl
  480. shl al,4
  481. movzx r13,BYTE PTR[rcx*1+rsp]
  482. shr ebx,4
  483. shl r12,48
  484. xor r13,r8
  485. mov r10,r9
  486. xor r9,r12
  487. shr r8,8
  488. movzx r13,r13b
  489. shr r9,8
  490. xor r8,QWORD PTR[((-128))+rcx*8+rbp]
  491. shl r10,56
  492. xor r9,QWORD PTR[rcx*8+rbp]
  493. rol edx,8
  494. xor r8,QWORD PTR[8+rax*1+rsi]
  495. xor r9,QWORD PTR[rax*1+rsi]
  496. mov al,dl
  497. xor r8,r10
  498. movzx r13,WORD PTR[r13*2+r11]
  499. movzx ecx,dl
  500. shl al,4
  501. movzx r12,BYTE PTR[rbx*1+rsp]
  502. shr ecx,4
  503. shl r13,48
  504. xor r12,r8
  505. mov r10,r9
  506. xor r9,r13
  507. shr r8,8
  508. movzx r12,r12b
  509. shr r9,8
  510. xor r8,QWORD PTR[((-128))+rbx*8+rbp]
  511. shl r10,56
  512. xor r9,QWORD PTR[rbx*8+rbp]
  513. rol edx,8
  514. xor r8,QWORD PTR[8+rax*1+rsi]
  515. xor r9,QWORD PTR[rax*1+rsi]
  516. mov al,dl
  517. xor r8,r10
  518. movzx r12,WORD PTR[r12*2+r11]
  519. movzx ebx,dl
  520. shl al,4
  521. movzx r13,BYTE PTR[rcx*1+rsp]
  522. shr ebx,4
  523. shl r12,48
  524. xor r13,r8
  525. mov r10,r9
  526. xor r9,r12
  527. shr r8,8
  528. movzx r13,r13b
  529. shr r9,8
  530. xor r8,QWORD PTR[((-128))+rcx*8+rbp]
  531. shl r10,56
  532. xor r9,QWORD PTR[rcx*8+rbp]
  533. rol edx,8
  534. xor r8,QWORD PTR[8+rax*1+rsi]
  535. xor r9,QWORD PTR[rax*1+rsi]
  536. mov al,dl
  537. xor r8,r10
  538. movzx r13,WORD PTR[r13*2+r11]
  539. movzx ecx,dl
  540. shl al,4
  541. movzx r12,BYTE PTR[rbx*1+rsp]
  542. shr ecx,4
  543. shl r13,48
  544. xor r12,r8
  545. mov r10,r9
  546. xor r9,r13
  547. shr r8,8
  548. movzx r12,r12b
  549. mov edx,DWORD PTR[rdi]
  550. shr r9,8
  551. xor r8,QWORD PTR[((-128))+rbx*8+rbp]
  552. shl r10,56
  553. xor r9,QWORD PTR[rbx*8+rbp]
  554. rol edx,8
  555. xor r8,QWORD PTR[8+rax*1+rsi]
  556. xor r9,QWORD PTR[rax*1+rsi]
  557. mov al,dl
  558. xor r8,r10
  559. movzx r12,WORD PTR[r12*2+r11]
  560. movzx ebx,dl
  561. shl al,4
  562. movzx r13,BYTE PTR[rcx*1+rsp]
  563. shr ebx,4
  564. shl r12,48
  565. xor r13,r8
  566. mov r10,r9
  567. xor r9,r12
  568. shr r8,8
  569. movzx r13,r13b
  570. shr r9,8
  571. xor r8,QWORD PTR[((-128))+rcx*8+rbp]
  572. shl r10,56
  573. xor r9,QWORD PTR[rcx*8+rbp]
  574. rol edx,8
  575. xor r8,QWORD PTR[8+rax*1+rsi]
  576. xor r9,QWORD PTR[rax*1+rsi]
  577. mov al,dl
  578. xor r8,r10
  579. movzx r13,WORD PTR[r13*2+r11]
  580. movzx ecx,dl
  581. shl al,4
  582. movzx r12,BYTE PTR[rbx*1+rsp]
  583. shr ecx,4
  584. shl r13,48
  585. xor r12,r8
  586. mov r10,r9
  587. xor r9,r13
  588. shr r8,8
  589. movzx r12,r12b
  590. shr r9,8
  591. xor r8,QWORD PTR[((-128))+rbx*8+rbp]
  592. shl r10,56
  593. xor r9,QWORD PTR[rbx*8+rbp]
  594. rol edx,8
  595. xor r8,QWORD PTR[8+rax*1+rsi]
  596. xor r9,QWORD PTR[rax*1+rsi]
  597. mov al,dl
  598. xor r8,r10
  599. movzx r12,WORD PTR[r12*2+r11]
  600. movzx ebx,dl
  601. shl al,4
  602. movzx r13,BYTE PTR[rcx*1+rsp]
  603. shr ebx,4
  604. shl r12,48
  605. xor r13,r8
  606. mov r10,r9
  607. xor r9,r12
  608. shr r8,8
  609. movzx r13,r13b
  610. shr r9,8
  611. xor r8,QWORD PTR[((-128))+rcx*8+rbp]
  612. shl r10,56
  613. xor r9,QWORD PTR[rcx*8+rbp]
  614. rol edx,8
  615. xor r8,QWORD PTR[8+rax*1+rsi]
  616. xor r9,QWORD PTR[rax*1+rsi]
  617. mov al,dl
  618. xor r8,r10
  619. movzx r13,WORD PTR[r13*2+r11]
  620. movzx ecx,dl
  621. shl al,4
  622. movzx r12,BYTE PTR[rbx*1+rsp]
  623. and ecx,240
  624. shl r13,48
  625. xor r12,r8
  626. mov r10,r9
  627. xor r9,r13
  628. shr r8,8
  629. movzx r12,r12b
  630. mov edx,DWORD PTR[((-4))+rdi]
  631. shr r9,8
  632. xor r8,QWORD PTR[((-128))+rbx*8+rbp]
  633. shl r10,56
  634. xor r9,QWORD PTR[rbx*8+rbp]
  635. movzx r12,WORD PTR[r12*2+r11]
  636. xor r8,QWORD PTR[8+rax*1+rsi]
  637. xor r9,QWORD PTR[rax*1+rsi]
  638. shl r12,48
  639. xor r8,r10
  640. xor r9,r12
  641. movzx r13,r8b
  642. shr r8,4
  643. mov r10,r9
  644. shl r13b,4
  645. shr r9,4
  646. xor r8,QWORD PTR[8+rcx*1+rsi]
  647. movzx r13,WORD PTR[r13*2+r11]
  648. shl r10,60
  649. xor r9,QWORD PTR[rcx*1+rsi]
  650. xor r8,r10
  651. shl r13,48
  652. bswap r8
  653. xor r9,r13
  654. bswap r9
  655. cmp r14,r15
  656. jb $L$outer_loop
  657. mov QWORD PTR[8+rdi],r8
  658. mov QWORD PTR[rdi],r9
  659. lea rsi,QWORD PTR[((280+48))+rsp]
  660. mov r15,QWORD PTR[((-48))+rsi]
  661. mov r14,QWORD PTR[((-40))+rsi]
  662. mov r13,QWORD PTR[((-32))+rsi]
  663. mov r12,QWORD PTR[((-24))+rsi]
  664. mov rbp,QWORD PTR[((-16))+rsi]
  665. mov rbx,QWORD PTR[((-8))+rsi]
  666. lea rsp,QWORD PTR[rsi]
  667. $L$ghash_epilogue::
  668. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  669. mov rsi,QWORD PTR[16+rsp]
  670. DB 0F3h,0C3h ;repret
  671. $L$SEH_end_gcm_ghash_4bit::
  672. gcm_ghash_4bit ENDP
  673. PUBLIC gcm_init_clmul
  674. ALIGN 16
  675. gcm_init_clmul PROC PUBLIC
  676. $L$_init_clmul::
  677. $L$SEH_begin_gcm_init_clmul::
  678. DB 048h,083h,0ech,018h
  679. DB 00fh,029h,034h,024h
  680. movdqu xmm2,XMMWORD PTR[rdx]
  681. pshufd xmm2,xmm2,78
  682. pshufd xmm4,xmm2,255
  683. movdqa xmm3,xmm2
  684. psllq xmm2,1
  685. pxor xmm5,xmm5
  686. psrlq xmm3,63
  687. pcmpgtd xmm5,xmm4
  688. pslldq xmm3,8
  689. por xmm2,xmm3
  690. pand xmm5,XMMWORD PTR[$L$0x1c2_polynomial]
  691. pxor xmm2,xmm5
  692. pshufd xmm6,xmm2,78
  693. movdqa xmm0,xmm2
  694. pxor xmm6,xmm2
  695. movdqa xmm1,xmm0
  696. pshufd xmm3,xmm0,78
  697. pxor xmm3,xmm0
  698. DB 102,15,58,68,194,0
  699. DB 102,15,58,68,202,17
  700. DB 102,15,58,68,222,0
  701. pxor xmm3,xmm0
  702. pxor xmm3,xmm1
  703. movdqa xmm4,xmm3
  704. psrldq xmm3,8
  705. pslldq xmm4,8
  706. pxor xmm1,xmm3
  707. pxor xmm0,xmm4
  708. movdqa xmm4,xmm0
  709. movdqa xmm3,xmm0
  710. psllq xmm0,5
  711. pxor xmm3,xmm0
  712. psllq xmm0,1
  713. pxor xmm0,xmm3
  714. psllq xmm0,57
  715. movdqa xmm3,xmm0
  716. pslldq xmm0,8
  717. psrldq xmm3,8
  718. pxor xmm0,xmm4
  719. pxor xmm1,xmm3
  720. movdqa xmm4,xmm0
  721. psrlq xmm0,1
  722. pxor xmm1,xmm4
  723. pxor xmm4,xmm0
  724. psrlq xmm0,5
  725. pxor xmm0,xmm4
  726. psrlq xmm0,1
  727. pxor xmm0,xmm1
  728. pshufd xmm3,xmm2,78
  729. pshufd xmm4,xmm0,78
  730. pxor xmm3,xmm2
  731. movdqu XMMWORD PTR[rcx],xmm2
  732. pxor xmm4,xmm0
  733. movdqu XMMWORD PTR[16+rcx],xmm0
  734. DB 102,15,58,15,227,8
  735. movdqu XMMWORD PTR[32+rcx],xmm4
  736. movdqa xmm1,xmm0
  737. pshufd xmm3,xmm0,78
  738. pxor xmm3,xmm0
  739. DB 102,15,58,68,194,0
  740. DB 102,15,58,68,202,17
  741. DB 102,15,58,68,222,0
  742. pxor xmm3,xmm0
  743. pxor xmm3,xmm1
  744. movdqa xmm4,xmm3
  745. psrldq xmm3,8
  746. pslldq xmm4,8
  747. pxor xmm1,xmm3
  748. pxor xmm0,xmm4
  749. movdqa xmm4,xmm0
  750. movdqa xmm3,xmm0
  751. psllq xmm0,5
  752. pxor xmm3,xmm0
  753. psllq xmm0,1
  754. pxor xmm0,xmm3
  755. psllq xmm0,57
  756. movdqa xmm3,xmm0
  757. pslldq xmm0,8
  758. psrldq xmm3,8
  759. pxor xmm0,xmm4
  760. pxor xmm1,xmm3
  761. movdqa xmm4,xmm0
  762. psrlq xmm0,1
  763. pxor xmm1,xmm4
  764. pxor xmm4,xmm0
  765. psrlq xmm0,5
  766. pxor xmm0,xmm4
  767. psrlq xmm0,1
  768. pxor xmm0,xmm1
  769. movdqa xmm5,xmm0
  770. movdqa xmm1,xmm0
  771. pshufd xmm3,xmm0,78
  772. pxor xmm3,xmm0
  773. DB 102,15,58,68,194,0
  774. DB 102,15,58,68,202,17
  775. DB 102,15,58,68,222,0
  776. pxor xmm3,xmm0
  777. pxor xmm3,xmm1
  778. movdqa xmm4,xmm3
  779. psrldq xmm3,8
  780. pslldq xmm4,8
  781. pxor xmm1,xmm3
  782. pxor xmm0,xmm4
  783. movdqa xmm4,xmm0
  784. movdqa xmm3,xmm0
  785. psllq xmm0,5
  786. pxor xmm3,xmm0
  787. psllq xmm0,1
  788. pxor xmm0,xmm3
  789. psllq xmm0,57
  790. movdqa xmm3,xmm0
  791. pslldq xmm0,8
  792. psrldq xmm3,8
  793. pxor xmm0,xmm4
  794. pxor xmm1,xmm3
  795. movdqa xmm4,xmm0
  796. psrlq xmm0,1
  797. pxor xmm1,xmm4
  798. pxor xmm4,xmm0
  799. psrlq xmm0,5
  800. pxor xmm0,xmm4
  801. psrlq xmm0,1
  802. pxor xmm0,xmm1
  803. pshufd xmm3,xmm5,78
  804. pshufd xmm4,xmm0,78
  805. pxor xmm3,xmm5
  806. movdqu XMMWORD PTR[48+rcx],xmm5
  807. pxor xmm4,xmm0
  808. movdqu XMMWORD PTR[64+rcx],xmm0
  809. DB 102,15,58,15,227,8
  810. movdqu XMMWORD PTR[80+rcx],xmm4
  811. movaps xmm6,XMMWORD PTR[rsp]
  812. lea rsp,QWORD PTR[24+rsp]
  813. $L$SEH_end_gcm_init_clmul::
  814. DB 0F3h,0C3h ;repret
  815. gcm_init_clmul ENDP
  816. PUBLIC gcm_gmult_clmul
  817. ALIGN 16
  818. gcm_gmult_clmul PROC PUBLIC
  819. $L$_gmult_clmul::
  820. movdqu xmm0,XMMWORD PTR[rcx]
  821. movdqa xmm5,XMMWORD PTR[$L$bswap_mask]
  822. movdqu xmm2,XMMWORD PTR[rdx]
  823. movdqu xmm4,XMMWORD PTR[32+rdx]
  824. DB 102,15,56,0,197
  825. movdqa xmm1,xmm0
  826. pshufd xmm3,xmm0,78
  827. pxor xmm3,xmm0
  828. DB 102,15,58,68,194,0
  829. DB 102,15,58,68,202,17
  830. DB 102,15,58,68,220,0
  831. pxor xmm3,xmm0
  832. pxor xmm3,xmm1
  833. movdqa xmm4,xmm3
  834. psrldq xmm3,8
  835. pslldq xmm4,8
  836. pxor xmm1,xmm3
  837. pxor xmm0,xmm4
  838. movdqa xmm4,xmm0
  839. movdqa xmm3,xmm0
  840. psllq xmm0,5
  841. pxor xmm3,xmm0
  842. psllq xmm0,1
  843. pxor xmm0,xmm3
  844. psllq xmm0,57
  845. movdqa xmm3,xmm0
  846. pslldq xmm0,8
  847. psrldq xmm3,8
  848. pxor xmm0,xmm4
  849. pxor xmm1,xmm3
  850. movdqa xmm4,xmm0
  851. psrlq xmm0,1
  852. pxor xmm1,xmm4
  853. pxor xmm4,xmm0
  854. psrlq xmm0,5
  855. pxor xmm0,xmm4
  856. psrlq xmm0,1
  857. pxor xmm0,xmm1
  858. DB 102,15,56,0,197
  859. movdqu XMMWORD PTR[rcx],xmm0
  860. DB 0F3h,0C3h ;repret
  861. gcm_gmult_clmul ENDP
  862. PUBLIC gcm_ghash_clmul
  863. ALIGN 32
  864. gcm_ghash_clmul PROC PUBLIC
  865. $L$_ghash_clmul::
  866. lea rax,QWORD PTR[((-136))+rsp]
  867. $L$SEH_begin_gcm_ghash_clmul::
  868. DB 048h,08dh,060h,0e0h
  869. DB 00fh,029h,070h,0e0h
  870. DB 00fh,029h,078h,0f0h
  871. DB 044h,00fh,029h,000h
  872. DB 044h,00fh,029h,048h,010h
  873. DB 044h,00fh,029h,050h,020h
  874. DB 044h,00fh,029h,058h,030h
  875. DB 044h,00fh,029h,060h,040h
  876. DB 044h,00fh,029h,068h,050h
  877. DB 044h,00fh,029h,070h,060h
  878. DB 044h,00fh,029h,078h,070h
  879. movdqa xmm10,XMMWORD PTR[$L$bswap_mask]
  880. movdqu xmm0,XMMWORD PTR[rcx]
  881. movdqu xmm2,XMMWORD PTR[rdx]
  882. movdqu xmm7,XMMWORD PTR[32+rdx]
  883. DB 102,65,15,56,0,194
  884. sub r9,010h
  885. jz $L$odd_tail
  886. movdqu xmm6,XMMWORD PTR[16+rdx]
  887. mov eax,DWORD PTR[((OPENSSL_ia32cap_P+4))]
  888. cmp r9,030h
  889. jb $L$skip4x
  890. and eax,71303168
  891. cmp eax,4194304
  892. je $L$skip4x
  893. sub r9,030h
  894. mov rax,0A040608020C0E000h
  895. movdqu xmm14,XMMWORD PTR[48+rdx]
  896. movdqu xmm15,XMMWORD PTR[64+rdx]
  897. movdqu xmm3,XMMWORD PTR[48+r8]
  898. movdqu xmm11,XMMWORD PTR[32+r8]
  899. DB 102,65,15,56,0,218
  900. DB 102,69,15,56,0,218
  901. movdqa xmm5,xmm3
  902. pshufd xmm4,xmm3,78
  903. pxor xmm4,xmm3
  904. DB 102,15,58,68,218,0
  905. DB 102,15,58,68,234,17
  906. DB 102,15,58,68,231,0
  907. movdqa xmm13,xmm11
  908. pshufd xmm12,xmm11,78
  909. pxor xmm12,xmm11
  910. DB 102,68,15,58,68,222,0
  911. DB 102,68,15,58,68,238,17
  912. DB 102,68,15,58,68,231,16
  913. xorps xmm3,xmm11
  914. xorps xmm5,xmm13
  915. movups xmm7,XMMWORD PTR[80+rdx]
  916. xorps xmm4,xmm12
  917. movdqu xmm11,XMMWORD PTR[16+r8]
  918. movdqu xmm8,XMMWORD PTR[r8]
  919. DB 102,69,15,56,0,218
  920. DB 102,69,15,56,0,194
  921. movdqa xmm13,xmm11
  922. pshufd xmm12,xmm11,78
  923. pxor xmm0,xmm8
  924. pxor xmm12,xmm11
  925. DB 102,69,15,58,68,222,0
  926. movdqa xmm1,xmm0
  927. pshufd xmm8,xmm0,78
  928. pxor xmm8,xmm0
  929. DB 102,69,15,58,68,238,17
  930. DB 102,68,15,58,68,231,0
  931. xorps xmm3,xmm11
  932. xorps xmm5,xmm13
  933. lea r8,QWORD PTR[64+r8]
  934. sub r9,040h
  935. jc $L$tail4x
  936. jmp $L$mod4_loop
  937. ALIGN 32
  938. $L$mod4_loop::
  939. DB 102,65,15,58,68,199,0
  940. xorps xmm4,xmm12
  941. movdqu xmm11,XMMWORD PTR[48+r8]
  942. DB 102,69,15,56,0,218
  943. DB 102,65,15,58,68,207,17
  944. xorps xmm0,xmm3
  945. movdqu xmm3,XMMWORD PTR[32+r8]
  946. movdqa xmm13,xmm11
  947. DB 102,68,15,58,68,199,16
  948. pshufd xmm12,xmm11,78
  949. xorps xmm1,xmm5
  950. pxor xmm12,xmm11
  951. DB 102,65,15,56,0,218
  952. movups xmm7,XMMWORD PTR[32+rdx]
  953. xorps xmm8,xmm4
  954. DB 102,68,15,58,68,218,0
  955. pshufd xmm4,xmm3,78
  956. pxor xmm8,xmm0
  957. movdqa xmm5,xmm3
  958. pxor xmm8,xmm1
  959. pxor xmm4,xmm3
  960. movdqa xmm9,xmm8
  961. DB 102,68,15,58,68,234,17
  962. pslldq xmm8,8
  963. psrldq xmm9,8
  964. pxor xmm0,xmm8
  965. movdqa xmm8,XMMWORD PTR[$L$7_mask]
  966. pxor xmm1,xmm9
  967. DB 102,76,15,110,200
  968. pand xmm8,xmm0
  969. DB 102,69,15,56,0,200
  970. pxor xmm9,xmm0
  971. DB 102,68,15,58,68,231,0
  972. psllq xmm9,57
  973. movdqa xmm8,xmm9
  974. pslldq xmm9,8
  975. DB 102,15,58,68,222,0
  976. psrldq xmm8,8
  977. pxor xmm0,xmm9
  978. pxor xmm1,xmm8
  979. movdqu xmm8,XMMWORD PTR[r8]
  980. movdqa xmm9,xmm0
  981. psrlq xmm0,1
  982. DB 102,15,58,68,238,17
  983. xorps xmm3,xmm11
  984. movdqu xmm11,XMMWORD PTR[16+r8]
  985. DB 102,69,15,56,0,218
  986. DB 102,15,58,68,231,16
  987. xorps xmm5,xmm13
  988. movups xmm7,XMMWORD PTR[80+rdx]
  989. DB 102,69,15,56,0,194
  990. pxor xmm1,xmm9
  991. pxor xmm9,xmm0
  992. psrlq xmm0,5
  993. movdqa xmm13,xmm11
  994. pxor xmm4,xmm12
  995. pshufd xmm12,xmm11,78
  996. pxor xmm0,xmm9
  997. pxor xmm1,xmm8
  998. pxor xmm12,xmm11
  999. DB 102,69,15,58,68,222,0
  1000. psrlq xmm0,1
  1001. pxor xmm0,xmm1
  1002. movdqa xmm1,xmm0
  1003. DB 102,69,15,58,68,238,17
  1004. xorps xmm3,xmm11
  1005. pshufd xmm8,xmm0,78
  1006. pxor xmm8,xmm0
  1007. DB 102,68,15,58,68,231,0
  1008. xorps xmm5,xmm13
  1009. lea r8,QWORD PTR[64+r8]
  1010. sub r9,040h
  1011. jnc $L$mod4_loop
  1012. $L$tail4x::
  1013. DB 102,65,15,58,68,199,0
  1014. DB 102,65,15,58,68,207,17
  1015. DB 102,68,15,58,68,199,16
  1016. xorps xmm4,xmm12
  1017. xorps xmm0,xmm3
  1018. xorps xmm1,xmm5
  1019. pxor xmm1,xmm0
  1020. pxor xmm8,xmm4
  1021. pxor xmm8,xmm1
  1022. pxor xmm1,xmm0
  1023. movdqa xmm9,xmm8
  1024. psrldq xmm8,8
  1025. pslldq xmm9,8
  1026. pxor xmm1,xmm8
  1027. pxor xmm0,xmm9
  1028. movdqa xmm4,xmm0
  1029. movdqa xmm3,xmm0
  1030. psllq xmm0,5
  1031. pxor xmm3,xmm0
  1032. psllq xmm0,1
  1033. pxor xmm0,xmm3
  1034. psllq xmm0,57
  1035. movdqa xmm3,xmm0
  1036. pslldq xmm0,8
  1037. psrldq xmm3,8
  1038. pxor xmm0,xmm4
  1039. pxor xmm1,xmm3
  1040. movdqa xmm4,xmm0
  1041. psrlq xmm0,1
  1042. pxor xmm1,xmm4
  1043. pxor xmm4,xmm0
  1044. psrlq xmm0,5
  1045. pxor xmm0,xmm4
  1046. psrlq xmm0,1
  1047. pxor xmm0,xmm1
  1048. add r9,040h
  1049. jz $L$done
  1050. movdqu xmm7,XMMWORD PTR[32+rdx]
  1051. sub r9,010h
  1052. jz $L$odd_tail
  1053. $L$skip4x::
  1054. movdqu xmm8,XMMWORD PTR[r8]
  1055. movdqu xmm3,XMMWORD PTR[16+r8]
  1056. DB 102,69,15,56,0,194
  1057. DB 102,65,15,56,0,218
  1058. pxor xmm0,xmm8
  1059. movdqa xmm5,xmm3
  1060. pshufd xmm4,xmm3,78
  1061. pxor xmm4,xmm3
  1062. DB 102,15,58,68,218,0
  1063. DB 102,15,58,68,234,17
  1064. DB 102,15,58,68,231,0
  1065. lea r8,QWORD PTR[32+r8]
  1066. nop
  1067. sub r9,020h
  1068. jbe $L$even_tail
  1069. nop
  1070. jmp $L$mod_loop
  1071. ALIGN 32
  1072. $L$mod_loop::
  1073. movdqa xmm1,xmm0
  1074. movdqa xmm8,xmm4
  1075. pshufd xmm4,xmm0,78
  1076. pxor xmm4,xmm0
  1077. DB 102,15,58,68,198,0
  1078. DB 102,15,58,68,206,17
  1079. DB 102,15,58,68,231,16
  1080. pxor xmm0,xmm3
  1081. pxor xmm1,xmm5
  1082. movdqu xmm9,XMMWORD PTR[r8]
  1083. pxor xmm8,xmm0
  1084. DB 102,69,15,56,0,202
  1085. movdqu xmm3,XMMWORD PTR[16+r8]
  1086. pxor xmm8,xmm1
  1087. pxor xmm1,xmm9
  1088. pxor xmm4,xmm8
  1089. DB 102,65,15,56,0,218
  1090. movdqa xmm8,xmm4
  1091. psrldq xmm8,8
  1092. pslldq xmm4,8
  1093. pxor xmm1,xmm8
  1094. pxor xmm0,xmm4
  1095. movdqa xmm5,xmm3
  1096. movdqa xmm9,xmm0
  1097. movdqa xmm8,xmm0
  1098. psllq xmm0,5
  1099. pxor xmm8,xmm0
  1100. DB 102,15,58,68,218,0
  1101. psllq xmm0,1
  1102. pxor xmm0,xmm8
  1103. psllq xmm0,57
  1104. movdqa xmm8,xmm0
  1105. pslldq xmm0,8
  1106. psrldq xmm8,8
  1107. pxor xmm0,xmm9
  1108. pshufd xmm4,xmm5,78
  1109. pxor xmm1,xmm8
  1110. pxor xmm4,xmm5
  1111. movdqa xmm9,xmm0
  1112. psrlq xmm0,1
  1113. DB 102,15,58,68,234,17
  1114. pxor xmm1,xmm9
  1115. pxor xmm9,xmm0
  1116. psrlq xmm0,5
  1117. pxor xmm0,xmm9
  1118. lea r8,QWORD PTR[32+r8]
  1119. psrlq xmm0,1
  1120. DB 102,15,58,68,231,0
  1121. pxor xmm0,xmm1
  1122. sub r9,020h
  1123. ja $L$mod_loop
  1124. $L$even_tail::
  1125. movdqa xmm1,xmm0
  1126. movdqa xmm8,xmm4
  1127. pshufd xmm4,xmm0,78
  1128. pxor xmm4,xmm0
  1129. DB 102,15,58,68,198,0
  1130. DB 102,15,58,68,206,17
  1131. DB 102,15,58,68,231,16
  1132. pxor xmm0,xmm3
  1133. pxor xmm1,xmm5
  1134. pxor xmm8,xmm0
  1135. pxor xmm8,xmm1
  1136. pxor xmm4,xmm8
  1137. movdqa xmm8,xmm4
  1138. psrldq xmm8,8
  1139. pslldq xmm4,8
  1140. pxor xmm1,xmm8
  1141. pxor xmm0,xmm4
  1142. movdqa xmm4,xmm0
  1143. movdqa xmm3,xmm0
  1144. psllq xmm0,5
  1145. pxor xmm3,xmm0
  1146. psllq xmm0,1
  1147. pxor xmm0,xmm3
  1148. psllq xmm0,57
  1149. movdqa xmm3,xmm0
  1150. pslldq xmm0,8
  1151. psrldq xmm3,8
  1152. pxor xmm0,xmm4
  1153. pxor xmm1,xmm3
  1154. movdqa xmm4,xmm0
  1155. psrlq xmm0,1
  1156. pxor xmm1,xmm4
  1157. pxor xmm4,xmm0
  1158. psrlq xmm0,5
  1159. pxor xmm0,xmm4
  1160. psrlq xmm0,1
  1161. pxor xmm0,xmm1
  1162. test r9,r9
  1163. jnz $L$done
  1164. $L$odd_tail::
  1165. movdqu xmm8,XMMWORD PTR[r8]
  1166. DB 102,69,15,56,0,194
  1167. pxor xmm0,xmm8
  1168. movdqa xmm1,xmm0
  1169. pshufd xmm3,xmm0,78
  1170. pxor xmm3,xmm0
  1171. DB 102,15,58,68,194,0
  1172. DB 102,15,58,68,202,17
  1173. DB 102,15,58,68,223,0
  1174. pxor xmm3,xmm0
  1175. pxor xmm3,xmm1
  1176. movdqa xmm4,xmm3
  1177. psrldq xmm3,8
  1178. pslldq xmm4,8
  1179. pxor xmm1,xmm3
  1180. pxor xmm0,xmm4
  1181. movdqa xmm4,xmm0
  1182. movdqa xmm3,xmm0
  1183. psllq xmm0,5
  1184. pxor xmm3,xmm0
  1185. psllq xmm0,1
  1186. pxor xmm0,xmm3
  1187. psllq xmm0,57
  1188. movdqa xmm3,xmm0
  1189. pslldq xmm0,8
  1190. psrldq xmm3,8
  1191. pxor xmm0,xmm4
  1192. pxor xmm1,xmm3
  1193. movdqa xmm4,xmm0
  1194. psrlq xmm0,1
  1195. pxor xmm1,xmm4
  1196. pxor xmm4,xmm0
  1197. psrlq xmm0,5
  1198. pxor xmm0,xmm4
  1199. psrlq xmm0,1
  1200. pxor xmm0,xmm1
  1201. $L$done::
  1202. DB 102,65,15,56,0,194
  1203. movdqu XMMWORD PTR[rcx],xmm0
  1204. movaps xmm6,XMMWORD PTR[rsp]
  1205. movaps xmm7,XMMWORD PTR[16+rsp]
  1206. movaps xmm8,XMMWORD PTR[32+rsp]
  1207. movaps xmm9,XMMWORD PTR[48+rsp]
  1208. movaps xmm10,XMMWORD PTR[64+rsp]
  1209. movaps xmm11,XMMWORD PTR[80+rsp]
  1210. movaps xmm12,XMMWORD PTR[96+rsp]
  1211. movaps xmm13,XMMWORD PTR[112+rsp]
  1212. movaps xmm14,XMMWORD PTR[128+rsp]
  1213. movaps xmm15,XMMWORD PTR[144+rsp]
  1214. lea rsp,QWORD PTR[168+rsp]
  1215. $L$SEH_end_gcm_ghash_clmul::
  1216. DB 0F3h,0C3h ;repret
  1217. gcm_ghash_clmul ENDP
  1218. PUBLIC gcm_init_avx
  1219. ALIGN 32
  1220. gcm_init_avx PROC PUBLIC
  1221. $L$SEH_begin_gcm_init_avx::
  1222. DB 048h,083h,0ech,018h
  1223. DB 00fh,029h,034h,024h
  1224. vzeroupper
  1225. vmovdqu xmm2,XMMWORD PTR[rdx]
  1226. vpshufd xmm2,xmm2,78
  1227. vpshufd xmm4,xmm2,255
  1228. vpsrlq xmm3,xmm2,63
  1229. vpsllq xmm2,xmm2,1
  1230. vpxor xmm5,xmm5,xmm5
  1231. vpcmpgtd xmm5,xmm5,xmm4
  1232. vpslldq xmm3,xmm3,8
  1233. vpor xmm2,xmm2,xmm3
  1234. vpand xmm5,xmm5,XMMWORD PTR[$L$0x1c2_polynomial]
  1235. vpxor xmm2,xmm2,xmm5
  1236. vpunpckhqdq xmm6,xmm2,xmm2
  1237. vmovdqa xmm0,xmm2
  1238. vpxor xmm6,xmm6,xmm2
  1239. mov r10,4
  1240. jmp $L$init_start_avx
  1241. ALIGN 32
  1242. $L$init_loop_avx::
  1243. vpalignr xmm5,xmm4,xmm3,8
  1244. vmovdqu XMMWORD PTR[(-16)+rcx],xmm5
  1245. vpunpckhqdq xmm3,xmm0,xmm0
  1246. vpxor xmm3,xmm3,xmm0
  1247. vpclmulqdq xmm1,xmm0,xmm2,011h
  1248. vpclmulqdq xmm0,xmm0,xmm2,000h
  1249. vpclmulqdq xmm3,xmm3,xmm6,000h
  1250. vpxor xmm4,xmm1,xmm0
  1251. vpxor xmm3,xmm3,xmm4
  1252. vpslldq xmm4,xmm3,8
  1253. vpsrldq xmm3,xmm3,8
  1254. vpxor xmm0,xmm0,xmm4
  1255. vpxor xmm1,xmm1,xmm3
  1256. vpsllq xmm3,xmm0,57
  1257. vpsllq xmm4,xmm0,62
  1258. vpxor xmm4,xmm4,xmm3
  1259. vpsllq xmm3,xmm0,63
  1260. vpxor xmm4,xmm4,xmm3
  1261. vpslldq xmm3,xmm4,8
  1262. vpsrldq xmm4,xmm4,8
  1263. vpxor xmm0,xmm0,xmm3
  1264. vpxor xmm1,xmm1,xmm4
  1265. vpsrlq xmm4,xmm0,1
  1266. vpxor xmm1,xmm1,xmm0
  1267. vpxor xmm0,xmm0,xmm4
  1268. vpsrlq xmm4,xmm4,5
  1269. vpxor xmm0,xmm0,xmm4
  1270. vpsrlq xmm0,xmm0,1
  1271. vpxor xmm0,xmm0,xmm1
  1272. $L$init_start_avx::
  1273. vmovdqa xmm5,xmm0
  1274. vpunpckhqdq xmm3,xmm0,xmm0
  1275. vpxor xmm3,xmm3,xmm0
  1276. vpclmulqdq xmm1,xmm0,xmm2,011h
  1277. vpclmulqdq xmm0,xmm0,xmm2,000h
  1278. vpclmulqdq xmm3,xmm3,xmm6,000h
  1279. vpxor xmm4,xmm1,xmm0
  1280. vpxor xmm3,xmm3,xmm4
  1281. vpslldq xmm4,xmm3,8
  1282. vpsrldq xmm3,xmm3,8
  1283. vpxor xmm0,xmm0,xmm4
  1284. vpxor xmm1,xmm1,xmm3
  1285. vpsllq xmm3,xmm0,57
  1286. vpsllq xmm4,xmm0,62
  1287. vpxor xmm4,xmm4,xmm3
  1288. vpsllq xmm3,xmm0,63
  1289. vpxor xmm4,xmm4,xmm3
  1290. vpslldq xmm3,xmm4,8
  1291. vpsrldq xmm4,xmm4,8
  1292. vpxor xmm0,xmm0,xmm3
  1293. vpxor xmm1,xmm1,xmm4
  1294. vpsrlq xmm4,xmm0,1
  1295. vpxor xmm1,xmm1,xmm0
  1296. vpxor xmm0,xmm0,xmm4
  1297. vpsrlq xmm4,xmm4,5
  1298. vpxor xmm0,xmm0,xmm4
  1299. vpsrlq xmm0,xmm0,1
  1300. vpxor xmm0,xmm0,xmm1
  1301. vpshufd xmm3,xmm5,78
  1302. vpshufd xmm4,xmm0,78
  1303. vpxor xmm3,xmm3,xmm5
  1304. vmovdqu XMMWORD PTR[rcx],xmm5
  1305. vpxor xmm4,xmm4,xmm0
  1306. vmovdqu XMMWORD PTR[16+rcx],xmm0
  1307. lea rcx,QWORD PTR[48+rcx]
  1308. sub r10,1
  1309. jnz $L$init_loop_avx
  1310. vpalignr xmm5,xmm3,xmm4,8
  1311. vmovdqu XMMWORD PTR[(-16)+rcx],xmm5
  1312. vzeroupper
  1313. movaps xmm6,XMMWORD PTR[rsp]
  1314. lea rsp,QWORD PTR[24+rsp]
  1315. $L$SEH_end_gcm_init_avx::
  1316. DB 0F3h,0C3h ;repret
  1317. gcm_init_avx ENDP
  1318. PUBLIC gcm_gmult_avx
  1319. ALIGN 32
  1320. gcm_gmult_avx PROC PUBLIC
  1321. jmp $L$_gmult_clmul
  1322. gcm_gmult_avx ENDP
  1323. PUBLIC gcm_ghash_avx
  1324. ALIGN 32
  1325. gcm_ghash_avx PROC PUBLIC
  1326. lea rax,QWORD PTR[((-136))+rsp]
  1327. $L$SEH_begin_gcm_ghash_avx::
  1328. DB 048h,08dh,060h,0e0h
  1329. DB 00fh,029h,070h,0e0h
  1330. DB 00fh,029h,078h,0f0h
  1331. DB 044h,00fh,029h,000h
  1332. DB 044h,00fh,029h,048h,010h
  1333. DB 044h,00fh,029h,050h,020h
  1334. DB 044h,00fh,029h,058h,030h
  1335. DB 044h,00fh,029h,060h,040h
  1336. DB 044h,00fh,029h,068h,050h
  1337. DB 044h,00fh,029h,070h,060h
  1338. DB 044h,00fh,029h,078h,070h
  1339. vzeroupper
  1340. vmovdqu xmm10,XMMWORD PTR[rcx]
  1341. lea r10,QWORD PTR[$L$0x1c2_polynomial]
  1342. lea rdx,QWORD PTR[64+rdx]
  1343. vmovdqu xmm13,XMMWORD PTR[$L$bswap_mask]
  1344. vpshufb xmm10,xmm10,xmm13
  1345. cmp r9,080h
  1346. jb $L$short_avx
  1347. sub r9,080h
  1348. vmovdqu xmm14,XMMWORD PTR[112+r8]
  1349. vmovdqu xmm6,XMMWORD PTR[((0-64))+rdx]
  1350. vpshufb xmm14,xmm14,xmm13
  1351. vmovdqu xmm7,XMMWORD PTR[((32-64))+rdx]
  1352. vpunpckhqdq xmm9,xmm14,xmm14
  1353. vmovdqu xmm15,XMMWORD PTR[96+r8]
  1354. vpclmulqdq xmm0,xmm14,xmm6,000h
  1355. vpxor xmm9,xmm9,xmm14
  1356. vpshufb xmm15,xmm15,xmm13
  1357. vpclmulqdq xmm1,xmm14,xmm6,011h
  1358. vmovdqu xmm6,XMMWORD PTR[((16-64))+rdx]
  1359. vpunpckhqdq xmm8,xmm15,xmm15
  1360. vmovdqu xmm14,XMMWORD PTR[80+r8]
  1361. vpclmulqdq xmm2,xmm9,xmm7,000h
  1362. vpxor xmm8,xmm8,xmm15
  1363. vpshufb xmm14,xmm14,xmm13
  1364. vpclmulqdq xmm3,xmm15,xmm6,000h
  1365. vpunpckhqdq xmm9,xmm14,xmm14
  1366. vpclmulqdq xmm4,xmm15,xmm6,011h
  1367. vmovdqu xmm6,XMMWORD PTR[((48-64))+rdx]
  1368. vpxor xmm9,xmm9,xmm14
  1369. vmovdqu xmm15,XMMWORD PTR[64+r8]
  1370. vpclmulqdq xmm5,xmm8,xmm7,010h
  1371. vmovdqu xmm7,XMMWORD PTR[((80-64))+rdx]
  1372. vpshufb xmm15,xmm15,xmm13
  1373. vpxor xmm3,xmm3,xmm0
  1374. vpclmulqdq xmm0,xmm14,xmm6,000h
  1375. vpxor xmm4,xmm4,xmm1
  1376. vpunpckhqdq xmm8,xmm15,xmm15
  1377. vpclmulqdq xmm1,xmm14,xmm6,011h
  1378. vmovdqu xmm6,XMMWORD PTR[((64-64))+rdx]
  1379. vpxor xmm5,xmm5,xmm2
  1380. vpclmulqdq xmm2,xmm9,xmm7,000h
  1381. vpxor xmm8,xmm8,xmm15
  1382. vmovdqu xmm14,XMMWORD PTR[48+r8]
  1383. vpxor xmm0,xmm0,xmm3
  1384. vpclmulqdq xmm3,xmm15,xmm6,000h
  1385. vpxor xmm1,xmm1,xmm4
  1386. vpshufb xmm14,xmm14,xmm13
  1387. vpclmulqdq xmm4,xmm15,xmm6,011h
  1388. vmovdqu xmm6,XMMWORD PTR[((96-64))+rdx]
  1389. vpxor xmm2,xmm2,xmm5
  1390. vpunpckhqdq xmm9,xmm14,xmm14
  1391. vpclmulqdq xmm5,xmm8,xmm7,010h
  1392. vmovdqu xmm7,XMMWORD PTR[((128-64))+rdx]
  1393. vpxor xmm9,xmm9,xmm14
  1394. vmovdqu xmm15,XMMWORD PTR[32+r8]
  1395. vpxor xmm3,xmm3,xmm0
  1396. vpclmulqdq xmm0,xmm14,xmm6,000h
  1397. vpxor xmm4,xmm4,xmm1
  1398. vpshufb xmm15,xmm15,xmm13
  1399. vpclmulqdq xmm1,xmm14,xmm6,011h
  1400. vmovdqu xmm6,XMMWORD PTR[((112-64))+rdx]
  1401. vpxor xmm5,xmm5,xmm2
  1402. vpunpckhqdq xmm8,xmm15,xmm15
  1403. vpclmulqdq xmm2,xmm9,xmm7,000h
  1404. vpxor xmm8,xmm8,xmm15
  1405. vmovdqu xmm14,XMMWORD PTR[16+r8]
  1406. vpxor xmm0,xmm0,xmm3
  1407. vpclmulqdq xmm3,xmm15,xmm6,000h
  1408. vpxor xmm1,xmm1,xmm4
  1409. vpshufb xmm14,xmm14,xmm13
  1410. vpclmulqdq xmm4,xmm15,xmm6,011h
  1411. vmovdqu xmm6,XMMWORD PTR[((144-64))+rdx]
  1412. vpxor xmm2,xmm2,xmm5
  1413. vpunpckhqdq xmm9,xmm14,xmm14
  1414. vpclmulqdq xmm5,xmm8,xmm7,010h
  1415. vmovdqu xmm7,XMMWORD PTR[((176-64))+rdx]
  1416. vpxor xmm9,xmm9,xmm14
  1417. vmovdqu xmm15,XMMWORD PTR[r8]
  1418. vpxor xmm3,xmm3,xmm0
  1419. vpclmulqdq xmm0,xmm14,xmm6,000h
  1420. vpxor xmm4,xmm4,xmm1
  1421. vpshufb xmm15,xmm15,xmm13
  1422. vpclmulqdq xmm1,xmm14,xmm6,011h
  1423. vmovdqu xmm6,XMMWORD PTR[((160-64))+rdx]
  1424. vpxor xmm5,xmm5,xmm2
  1425. vpclmulqdq xmm2,xmm9,xmm7,010h
  1426. lea r8,QWORD PTR[128+r8]
  1427. cmp r9,080h
  1428. jb $L$tail_avx
  1429. vpxor xmm15,xmm15,xmm10
  1430. sub r9,080h
  1431. jmp $L$oop8x_avx
  1432. ALIGN 32
  1433. $L$oop8x_avx::
  1434. vpunpckhqdq xmm8,xmm15,xmm15
  1435. vmovdqu xmm14,XMMWORD PTR[112+r8]
  1436. vpxor xmm3,xmm3,xmm0
  1437. vpxor xmm8,xmm8,xmm15
  1438. vpclmulqdq xmm10,xmm15,xmm6,000h
  1439. vpshufb xmm14,xmm14,xmm13
  1440. vpxor xmm4,xmm4,xmm1
  1441. vpclmulqdq xmm11,xmm15,xmm6,011h
  1442. vmovdqu xmm6,XMMWORD PTR[((0-64))+rdx]
  1443. vpunpckhqdq xmm9,xmm14,xmm14
  1444. vpxor xmm5,xmm5,xmm2
  1445. vpclmulqdq xmm12,xmm8,xmm7,000h
  1446. vmovdqu xmm7,XMMWORD PTR[((32-64))+rdx]
  1447. vpxor xmm9,xmm9,xmm14
  1448. vmovdqu xmm15,XMMWORD PTR[96+r8]
  1449. vpclmulqdq xmm0,xmm14,xmm6,000h
  1450. vpxor xmm10,xmm10,xmm3
  1451. vpshufb xmm15,xmm15,xmm13
  1452. vpclmulqdq xmm1,xmm14,xmm6,011h
  1453. vxorps xmm11,xmm11,xmm4
  1454. vmovdqu xmm6,XMMWORD PTR[((16-64))+rdx]
  1455. vpunpckhqdq xmm8,xmm15,xmm15
  1456. vpclmulqdq xmm2,xmm9,xmm7,000h
  1457. vpxor xmm12,xmm12,xmm5
  1458. vxorps xmm8,xmm8,xmm15
  1459. vmovdqu xmm14,XMMWORD PTR[80+r8]
  1460. vpxor xmm12,xmm12,xmm10
  1461. vpclmulqdq xmm3,xmm15,xmm6,000h
  1462. vpxor xmm12,xmm12,xmm11
  1463. vpslldq xmm9,xmm12,8
  1464. vpxor xmm3,xmm3,xmm0
  1465. vpclmulqdq xmm4,xmm15,xmm6,011h
  1466. vpsrldq xmm12,xmm12,8
  1467. vpxor xmm10,xmm10,xmm9
  1468. vmovdqu xmm6,XMMWORD PTR[((48-64))+rdx]
  1469. vpshufb xmm14,xmm14,xmm13
  1470. vxorps xmm11,xmm11,xmm12
  1471. vpxor xmm4,xmm4,xmm1
  1472. vpunpckhqdq xmm9,xmm14,xmm14
  1473. vpclmulqdq xmm5,xmm8,xmm7,010h
  1474. vmovdqu xmm7,XMMWORD PTR[((80-64))+rdx]
  1475. vpxor xmm9,xmm9,xmm14
  1476. vpxor xmm5,xmm5,xmm2
  1477. vmovdqu xmm15,XMMWORD PTR[64+r8]
  1478. vpalignr xmm12,xmm10,xmm10,8
  1479. vpclmulqdq xmm0,xmm14,xmm6,000h
  1480. vpshufb xmm15,xmm15,xmm13
  1481. vpxor xmm0,xmm0,xmm3
  1482. vpclmulqdq xmm1,xmm14,xmm6,011h
  1483. vmovdqu xmm6,XMMWORD PTR[((64-64))+rdx]
  1484. vpunpckhqdq xmm8,xmm15,xmm15
  1485. vpxor xmm1,xmm1,xmm4
  1486. vpclmulqdq xmm2,xmm9,xmm7,000h
  1487. vxorps xmm8,xmm8,xmm15
  1488. vpxor xmm2,xmm2,xmm5
  1489. vmovdqu xmm14,XMMWORD PTR[48+r8]
  1490. vpclmulqdq xmm10,xmm10,XMMWORD PTR[r10],010h
  1491. vpclmulqdq xmm3,xmm15,xmm6,000h
  1492. vpshufb xmm14,xmm14,xmm13
  1493. vpxor xmm3,xmm3,xmm0
  1494. vpclmulqdq xmm4,xmm15,xmm6,011h
  1495. vmovdqu xmm6,XMMWORD PTR[((96-64))+rdx]
  1496. vpunpckhqdq xmm9,xmm14,xmm14
  1497. vpxor xmm4,xmm4,xmm1
  1498. vpclmulqdq xmm5,xmm8,xmm7,010h
  1499. vmovdqu xmm7,XMMWORD PTR[((128-64))+rdx]
  1500. vpxor xmm9,xmm9,xmm14
  1501. vpxor xmm5,xmm5,xmm2
  1502. vmovdqu xmm15,XMMWORD PTR[32+r8]
  1503. vpclmulqdq xmm0,xmm14,xmm6,000h
  1504. vpshufb xmm15,xmm15,xmm13
  1505. vpxor xmm0,xmm0,xmm3
  1506. vpclmulqdq xmm1,xmm14,xmm6,011h
  1507. vmovdqu xmm6,XMMWORD PTR[((112-64))+rdx]
  1508. vpunpckhqdq xmm8,xmm15,xmm15
  1509. vpxor xmm1,xmm1,xmm4
  1510. vpclmulqdq xmm2,xmm9,xmm7,000h
  1511. vpxor xmm8,xmm8,xmm15
  1512. vpxor xmm2,xmm2,xmm5
  1513. vxorps xmm10,xmm10,xmm12
  1514. vmovdqu xmm14,XMMWORD PTR[16+r8]
  1515. vpalignr xmm12,xmm10,xmm10,8
  1516. vpclmulqdq xmm3,xmm15,xmm6,000h
  1517. vpshufb xmm14,xmm14,xmm13
  1518. vpxor xmm3,xmm3,xmm0
  1519. vpclmulqdq xmm4,xmm15,xmm6,011h
  1520. vmovdqu xmm6,XMMWORD PTR[((144-64))+rdx]
  1521. vpclmulqdq xmm10,xmm10,XMMWORD PTR[r10],010h
  1522. vxorps xmm12,xmm12,xmm11
  1523. vpunpckhqdq xmm9,xmm14,xmm14
  1524. vpxor xmm4,xmm4,xmm1
  1525. vpclmulqdq xmm5,xmm8,xmm7,010h
  1526. vmovdqu xmm7,XMMWORD PTR[((176-64))+rdx]
  1527. vpxor xmm9,xmm9,xmm14
  1528. vpxor xmm5,xmm5,xmm2
  1529. vmovdqu xmm15,XMMWORD PTR[r8]
  1530. vpclmulqdq xmm0,xmm14,xmm6,000h
  1531. vpshufb xmm15,xmm15,xmm13
  1532. vpclmulqdq xmm1,xmm14,xmm6,011h
  1533. vmovdqu xmm6,XMMWORD PTR[((160-64))+rdx]
  1534. vpxor xmm15,xmm15,xmm12
  1535. vpclmulqdq xmm2,xmm9,xmm7,010h
  1536. vpxor xmm15,xmm15,xmm10
  1537. lea r8,QWORD PTR[128+r8]
  1538. sub r9,080h
  1539. jnc $L$oop8x_avx
  1540. add r9,080h
  1541. jmp $L$tail_no_xor_avx
  1542. ALIGN 32
  1543. $L$short_avx::
  1544. vmovdqu xmm14,XMMWORD PTR[((-16))+r9*1+r8]
  1545. lea r8,QWORD PTR[r9*1+r8]
  1546. vmovdqu xmm6,XMMWORD PTR[((0-64))+rdx]
  1547. vmovdqu xmm7,XMMWORD PTR[((32-64))+rdx]
  1548. vpshufb xmm15,xmm14,xmm13
  1549. vmovdqa xmm3,xmm0
  1550. vmovdqa xmm4,xmm1
  1551. vmovdqa xmm5,xmm2
  1552. sub r9,010h
  1553. jz $L$tail_avx
  1554. vpunpckhqdq xmm8,xmm15,xmm15
  1555. vpxor xmm3,xmm3,xmm0
  1556. vpclmulqdq xmm0,xmm15,xmm6,000h
  1557. vpxor xmm8,xmm8,xmm15
  1558. vmovdqu xmm14,XMMWORD PTR[((-32))+r8]
  1559. vpxor xmm4,xmm4,xmm1
  1560. vpclmulqdq xmm1,xmm15,xmm6,011h
  1561. vmovdqu xmm6,XMMWORD PTR[((16-64))+rdx]
  1562. vpshufb xmm15,xmm14,xmm13
  1563. vpxor xmm5,xmm5,xmm2
  1564. vpclmulqdq xmm2,xmm8,xmm7,000h
  1565. vpsrldq xmm7,xmm7,8
  1566. sub r9,010h
  1567. jz $L$tail_avx
  1568. vpunpckhqdq xmm8,xmm15,xmm15
  1569. vpxor xmm3,xmm3,xmm0
  1570. vpclmulqdq xmm0,xmm15,xmm6,000h
  1571. vpxor xmm8,xmm8,xmm15
  1572. vmovdqu xmm14,XMMWORD PTR[((-48))+r8]
  1573. vpxor xmm4,xmm4,xmm1
  1574. vpclmulqdq xmm1,xmm15,xmm6,011h
  1575. vmovdqu xmm6,XMMWORD PTR[((48-64))+rdx]
  1576. vpshufb xmm15,xmm14,xmm13
  1577. vpxor xmm5,xmm5,xmm2
  1578. vpclmulqdq xmm2,xmm8,xmm7,000h
  1579. vmovdqu xmm7,XMMWORD PTR[((80-64))+rdx]
  1580. sub r9,010h
  1581. jz $L$tail_avx
  1582. vpunpckhqdq xmm8,xmm15,xmm15
  1583. vpxor xmm3,xmm3,xmm0
  1584. vpclmulqdq xmm0,xmm15,xmm6,000h
  1585. vpxor xmm8,xmm8,xmm15
  1586. vmovdqu xmm14,XMMWORD PTR[((-64))+r8]
  1587. vpxor xmm4,xmm4,xmm1
  1588. vpclmulqdq xmm1,xmm15,xmm6,011h
  1589. vmovdqu xmm6,XMMWORD PTR[((64-64))+rdx]
  1590. vpshufb xmm15,xmm14,xmm13
  1591. vpxor xmm5,xmm5,xmm2
  1592. vpclmulqdq xmm2,xmm8,xmm7,000h
  1593. vpsrldq xmm7,xmm7,8
  1594. sub r9,010h
  1595. jz $L$tail_avx
  1596. vpunpckhqdq xmm8,xmm15,xmm15
  1597. vpxor xmm3,xmm3,xmm0
  1598. vpclmulqdq xmm0,xmm15,xmm6,000h
  1599. vpxor xmm8,xmm8,xmm15
  1600. vmovdqu xmm14,XMMWORD PTR[((-80))+r8]
  1601. vpxor xmm4,xmm4,xmm1
  1602. vpclmulqdq xmm1,xmm15,xmm6,011h
  1603. vmovdqu xmm6,XMMWORD PTR[((96-64))+rdx]
  1604. vpshufb xmm15,xmm14,xmm13
  1605. vpxor xmm5,xmm5,xmm2
  1606. vpclmulqdq xmm2,xmm8,xmm7,000h
  1607. vmovdqu xmm7,XMMWORD PTR[((128-64))+rdx]
  1608. sub r9,010h
  1609. jz $L$tail_avx
  1610. vpunpckhqdq xmm8,xmm15,xmm15
  1611. vpxor xmm3,xmm3,xmm0
  1612. vpclmulqdq xmm0,xmm15,xmm6,000h
  1613. vpxor xmm8,xmm8,xmm15
  1614. vmovdqu xmm14,XMMWORD PTR[((-96))+r8]
  1615. vpxor xmm4,xmm4,xmm1
  1616. vpclmulqdq xmm1,xmm15,xmm6,011h
  1617. vmovdqu xmm6,XMMWORD PTR[((112-64))+rdx]
  1618. vpshufb xmm15,xmm14,xmm13
  1619. vpxor xmm5,xmm5,xmm2
  1620. vpclmulqdq xmm2,xmm8,xmm7,000h
  1621. vpsrldq xmm7,xmm7,8
  1622. sub r9,010h
  1623. jz $L$tail_avx
  1624. vpunpckhqdq xmm8,xmm15,xmm15
  1625. vpxor xmm3,xmm3,xmm0
  1626. vpclmulqdq xmm0,xmm15,xmm6,000h
  1627. vpxor xmm8,xmm8,xmm15
  1628. vmovdqu xmm14,XMMWORD PTR[((-112))+r8]
  1629. vpxor xmm4,xmm4,xmm1
  1630. vpclmulqdq xmm1,xmm15,xmm6,011h
  1631. vmovdqu xmm6,XMMWORD PTR[((144-64))+rdx]
  1632. vpshufb xmm15,xmm14,xmm13
  1633. vpxor xmm5,xmm5,xmm2
  1634. vpclmulqdq xmm2,xmm8,xmm7,000h
  1635. vmovq xmm7,QWORD PTR[((184-64))+rdx]
  1636. sub r9,010h
  1637. jmp $L$tail_avx
  1638. ALIGN 32
  1639. $L$tail_avx::
  1640. vpxor xmm15,xmm15,xmm10
  1641. $L$tail_no_xor_avx::
  1642. vpunpckhqdq xmm8,xmm15,xmm15
  1643. vpxor xmm3,xmm3,xmm0
  1644. vpclmulqdq xmm0,xmm15,xmm6,000h
  1645. vpxor xmm8,xmm8,xmm15
  1646. vpxor xmm4,xmm4,xmm1
  1647. vpclmulqdq xmm1,xmm15,xmm6,011h
  1648. vpxor xmm5,xmm5,xmm2
  1649. vpclmulqdq xmm2,xmm8,xmm7,000h
  1650. vmovdqu xmm12,XMMWORD PTR[r10]
  1651. vpxor xmm10,xmm3,xmm0
  1652. vpxor xmm11,xmm4,xmm1
  1653. vpxor xmm5,xmm5,xmm2
  1654. vpxor xmm5,xmm5,xmm10
  1655. vpxor xmm5,xmm5,xmm11
  1656. vpslldq xmm9,xmm5,8
  1657. vpsrldq xmm5,xmm5,8
  1658. vpxor xmm10,xmm10,xmm9
  1659. vpxor xmm11,xmm11,xmm5
  1660. vpclmulqdq xmm9,xmm10,xmm12,010h
  1661. vpalignr xmm10,xmm10,xmm10,8
  1662. vpxor xmm10,xmm10,xmm9
  1663. vpclmulqdq xmm9,xmm10,xmm12,010h
  1664. vpalignr xmm10,xmm10,xmm10,8
  1665. vpxor xmm10,xmm10,xmm11
  1666. vpxor xmm10,xmm10,xmm9
  1667. cmp r9,0
  1668. jne $L$short_avx
  1669. vpshufb xmm10,xmm10,xmm13
  1670. vmovdqu XMMWORD PTR[rcx],xmm10
  1671. vzeroupper
  1672. movaps xmm6,XMMWORD PTR[rsp]
  1673. movaps xmm7,XMMWORD PTR[16+rsp]
  1674. movaps xmm8,XMMWORD PTR[32+rsp]
  1675. movaps xmm9,XMMWORD PTR[48+rsp]
  1676. movaps xmm10,XMMWORD PTR[64+rsp]
  1677. movaps xmm11,XMMWORD PTR[80+rsp]
  1678. movaps xmm12,XMMWORD PTR[96+rsp]
  1679. movaps xmm13,XMMWORD PTR[112+rsp]
  1680. movaps xmm14,XMMWORD PTR[128+rsp]
  1681. movaps xmm15,XMMWORD PTR[144+rsp]
  1682. lea rsp,QWORD PTR[168+rsp]
  1683. $L$SEH_end_gcm_ghash_avx::
  1684. DB 0F3h,0C3h ;repret
  1685. gcm_ghash_avx ENDP
  1686. ALIGN 64
  1687. $L$bswap_mask::
  1688. DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  1689. $L$0x1c2_polynomial::
  1690. DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0c2h
  1691. $L$7_mask::
  1692. DD 7,0,7,0
  1693. $L$7_mask_poly::
  1694. DD 7,0,450,0
  1695. ALIGN 64
  1696. $L$rem_4bit::
  1697. DD 0,0,0,471859200,0,943718400,0,610271232
  1698. DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208
  1699. DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008
  1700. DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160
  1701. $L$rem_8bit::
  1702. DW 00000h,001C2h,00384h,00246h,00708h,006CAh,0048Ch,0054Eh
  1703. DW 00E10h,00FD2h,00D94h,00C56h,00918h,008DAh,00A9Ch,00B5Eh
  1704. DW 01C20h,01DE2h,01FA4h,01E66h,01B28h,01AEAh,018ACh,0196Eh
  1705. DW 01230h,013F2h,011B4h,01076h,01538h,014FAh,016BCh,0177Eh
  1706. DW 03840h,03982h,03BC4h,03A06h,03F48h,03E8Ah,03CCCh,03D0Eh
  1707. DW 03650h,03792h,035D4h,03416h,03158h,0309Ah,032DCh,0331Eh
  1708. DW 02460h,025A2h,027E4h,02626h,02368h,022AAh,020ECh,0212Eh
  1709. DW 02A70h,02BB2h,029F4h,02836h,02D78h,02CBAh,02EFCh,02F3Eh
  1710. DW 07080h,07142h,07304h,072C6h,07788h,0764Ah,0740Ch,075CEh
  1711. DW 07E90h,07F52h,07D14h,07CD6h,07998h,0785Ah,07A1Ch,07BDEh
  1712. DW 06CA0h,06D62h,06F24h,06EE6h,06BA8h,06A6Ah,0682Ch,069EEh
  1713. DW 062B0h,06372h,06134h,060F6h,065B8h,0647Ah,0663Ch,067FEh
  1714. DW 048C0h,04902h,04B44h,04A86h,04FC8h,04E0Ah,04C4Ch,04D8Eh
  1715. DW 046D0h,04712h,04554h,04496h,041D8h,0401Ah,0425Ch,0439Eh
  1716. DW 054E0h,05522h,05764h,056A6h,053E8h,0522Ah,0506Ch,051AEh
  1717. DW 05AF0h,05B32h,05974h,058B6h,05DF8h,05C3Ah,05E7Ch,05FBEh
  1718. DW 0E100h,0E0C2h,0E284h,0E346h,0E608h,0E7CAh,0E58Ch,0E44Eh
  1719. DW 0EF10h,0EED2h,0EC94h,0ED56h,0E818h,0E9DAh,0EB9Ch,0EA5Eh
  1720. DW 0FD20h,0FCE2h,0FEA4h,0FF66h,0FA28h,0FBEAh,0F9ACh,0F86Eh
  1721. DW 0F330h,0F2F2h,0F0B4h,0F176h,0F438h,0F5FAh,0F7BCh,0F67Eh
  1722. DW 0D940h,0D882h,0DAC4h,0DB06h,0DE48h,0DF8Ah,0DDCCh,0DC0Eh
  1723. DW 0D750h,0D692h,0D4D4h,0D516h,0D058h,0D19Ah,0D3DCh,0D21Eh
  1724. DW 0C560h,0C4A2h,0C6E4h,0C726h,0C268h,0C3AAh,0C1ECh,0C02Eh
  1725. DW 0CB70h,0CAB2h,0C8F4h,0C936h,0CC78h,0CDBAh,0CFFCh,0CE3Eh
  1726. DW 09180h,09042h,09204h,093C6h,09688h,0974Ah,0950Ch,094CEh
  1727. DW 09F90h,09E52h,09C14h,09DD6h,09898h,0995Ah,09B1Ch,09ADEh
  1728. DW 08DA0h,08C62h,08E24h,08FE6h,08AA8h,08B6Ah,0892Ch,088EEh
  1729. DW 083B0h,08272h,08034h,081F6h,084B8h,0857Ah,0873Ch,086FEh
  1730. DW 0A9C0h,0A802h,0AA44h,0AB86h,0AEC8h,0AF0Ah,0AD4Ch,0AC8Eh
  1731. DW 0A7D0h,0A612h,0A454h,0A596h,0A0D8h,0A11Ah,0A35Ch,0A29Eh
  1732. DW 0B5E0h,0B422h,0B664h,0B7A6h,0B2E8h,0B32Ah,0B16Ch,0B0AEh
  1733. DW 0BBF0h,0BA32h,0B874h,0B9B6h,0BCF8h,0BD3Ah,0BF7Ch,0BEBEh
  1734. DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
  1735. DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
  1736. DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
  1737. DB 114,103,62,0
  1738. ALIGN 64
  1739. EXTERN __imp_RtlVirtualUnwind:NEAR
  1740. ALIGN 16
  1741. se_handler PROC PRIVATE
  1742. push rsi
  1743. push rdi
  1744. push rbx
  1745. push rbp
  1746. push r12
  1747. push r13
  1748. push r14
  1749. push r15
  1750. pushfq
  1751. sub rsp,64
  1752. mov rax,QWORD PTR[120+r8]
  1753. mov rbx,QWORD PTR[248+r8]
  1754. mov rsi,QWORD PTR[8+r9]
  1755. mov r11,QWORD PTR[56+r9]
  1756. mov r10d,DWORD PTR[r11]
  1757. lea r10,QWORD PTR[r10*1+rsi]
  1758. cmp rbx,r10
  1759. jb $L$in_prologue
  1760. mov rax,QWORD PTR[152+r8]
  1761. mov r10d,DWORD PTR[4+r11]
  1762. lea r10,QWORD PTR[r10*1+rsi]
  1763. cmp rbx,r10
  1764. jae $L$in_prologue
  1765. lea rax,QWORD PTR[((48+280))+rax]
  1766. mov rbx,QWORD PTR[((-8))+rax]
  1767. mov rbp,QWORD PTR[((-16))+rax]
  1768. mov r12,QWORD PTR[((-24))+rax]
  1769. mov r13,QWORD PTR[((-32))+rax]
  1770. mov r14,QWORD PTR[((-40))+rax]
  1771. mov r15,QWORD PTR[((-48))+rax]
  1772. mov QWORD PTR[144+r8],rbx
  1773. mov QWORD PTR[160+r8],rbp
  1774. mov QWORD PTR[216+r8],r12
  1775. mov QWORD PTR[224+r8],r13
  1776. mov QWORD PTR[232+r8],r14
  1777. mov QWORD PTR[240+r8],r15
  1778. $L$in_prologue::
  1779. mov rdi,QWORD PTR[8+rax]
  1780. mov rsi,QWORD PTR[16+rax]
  1781. mov QWORD PTR[152+r8],rax
  1782. mov QWORD PTR[168+r8],rsi
  1783. mov QWORD PTR[176+r8],rdi
  1784. mov rdi,QWORD PTR[40+r9]
  1785. mov rsi,r8
  1786. mov ecx,154
  1787. DD 0a548f3fch
  1788. mov rsi,r9
  1789. xor rcx,rcx
  1790. mov rdx,QWORD PTR[8+rsi]
  1791. mov r8,QWORD PTR[rsi]
  1792. mov r9,QWORD PTR[16+rsi]
  1793. mov r10,QWORD PTR[40+rsi]
  1794. lea r11,QWORD PTR[56+rsi]
  1795. lea r12,QWORD PTR[24+rsi]
  1796. mov QWORD PTR[32+rsp],r10
  1797. mov QWORD PTR[40+rsp],r11
  1798. mov QWORD PTR[48+rsp],r12
  1799. mov QWORD PTR[56+rsp],rcx
  1800. call QWORD PTR[__imp_RtlVirtualUnwind]
  1801. mov eax,1
  1802. add rsp,64
  1803. popfq
  1804. pop r15
  1805. pop r14
  1806. pop r13
  1807. pop r12
  1808. pop rbp
  1809. pop rbx
  1810. pop rdi
  1811. pop rsi
  1812. DB 0F3h,0C3h ;repret
  1813. se_handler ENDP
  1814. .text$ ENDS
  1815. .pdata SEGMENT READONLY ALIGN(4)
  1816. ALIGN 4
  1817. DD imagerel $L$SEH_begin_gcm_gmult_4bit
  1818. DD imagerel $L$SEH_end_gcm_gmult_4bit
  1819. DD imagerel $L$SEH_info_gcm_gmult_4bit
  1820. DD imagerel $L$SEH_begin_gcm_ghash_4bit
  1821. DD imagerel $L$SEH_end_gcm_ghash_4bit
  1822. DD imagerel $L$SEH_info_gcm_ghash_4bit
  1823. DD imagerel $L$SEH_begin_gcm_init_clmul
  1824. DD imagerel $L$SEH_end_gcm_init_clmul
  1825. DD imagerel $L$SEH_info_gcm_init_clmul
  1826. DD imagerel $L$SEH_begin_gcm_ghash_clmul
  1827. DD imagerel $L$SEH_end_gcm_ghash_clmul
  1828. DD imagerel $L$SEH_info_gcm_ghash_clmul
  1829. DD imagerel $L$SEH_begin_gcm_init_avx
  1830. DD imagerel $L$SEH_end_gcm_init_avx
  1831. DD imagerel $L$SEH_info_gcm_init_clmul
  1832. DD imagerel $L$SEH_begin_gcm_ghash_avx
  1833. DD imagerel $L$SEH_end_gcm_ghash_avx
  1834. DD imagerel $L$SEH_info_gcm_ghash_clmul
  1835. .pdata ENDS
  1836. .xdata SEGMENT READONLY ALIGN(8)
  1837. ALIGN 8
  1838. $L$SEH_info_gcm_gmult_4bit::
  1839. DB 9,0,0,0
  1840. DD imagerel se_handler
  1841. DD imagerel $L$gmult_prologue,imagerel $L$gmult_epilogue
  1842. $L$SEH_info_gcm_ghash_4bit::
  1843. DB 9,0,0,0
  1844. DD imagerel se_handler
  1845. DD imagerel $L$ghash_prologue,imagerel $L$ghash_epilogue
  1846. $L$SEH_info_gcm_init_clmul::
  1847. DB 001h,008h,003h,000h
  1848. DB 008h,068h,000h,000h
  1849. DB 004h,022h,000h,000h
  1850. $L$SEH_info_gcm_ghash_clmul::
  1851. DB 001h,033h,016h,000h
  1852. DB 033h,0f8h,009h,000h
  1853. DB 02eh,0e8h,008h,000h
  1854. DB 029h,0d8h,007h,000h
  1855. DB 024h,0c8h,006h,000h
  1856. DB 01fh,0b8h,005h,000h
  1857. DB 01ah,0a8h,004h,000h
  1858. DB 015h,098h,003h,000h
  1859. DB 010h,088h,002h,000h
  1860. DB 00ch,078h,001h,000h
  1861. DB 008h,068h,000h,000h
  1862. DB 004h,001h,015h,000h
  1863. .xdata ENDS
  1864. END