rsaz-x86_64.masm 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267
  1. OPTION DOTNAME
  2. .text$ SEGMENT ALIGN(256) 'CODE'
  3. EXTERN OPENSSL_ia32cap_P:NEAR
  4. PUBLIC rsaz_512_sqr
  5. ALIGN 32
  6. rsaz_512_sqr PROC PUBLIC
  7. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  8. mov QWORD PTR[16+rsp],rsi
  9. mov rax,rsp
  10. $L$SEH_begin_rsaz_512_sqr::
  11. mov rdi,rcx
  12. mov rsi,rdx
  13. mov rdx,r8
  14. mov rcx,r9
  15. mov r8,QWORD PTR[40+rsp]
  16. push rbx
  17. push rbp
  18. push r12
  19. push r13
  20. push r14
  21. push r15
  22. sub rsp,128+24
  23. $L$sqr_body::
  24. DB 102,72,15,110,202
  25. mov rdx,QWORD PTR[rsi]
  26. mov rax,QWORD PTR[8+rsi]
  27. mov QWORD PTR[128+rsp],rcx
  28. mov r11d,080100h
  29. and r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
  30. cmp r11d,080100h
  31. je $L$oop_sqrx
  32. jmp $L$oop_sqr
  33. ALIGN 32
  34. $L$oop_sqr::
  35. mov DWORD PTR[((128+8))+rsp],r8d
  36. mov rbx,rdx
  37. mov rbp,rax
  38. mul rdx
  39. mov r8,rax
  40. mov rax,QWORD PTR[16+rsi]
  41. mov r9,rdx
  42. mul rbx
  43. add r9,rax
  44. mov rax,QWORD PTR[24+rsi]
  45. mov r10,rdx
  46. adc r10,0
  47. mul rbx
  48. add r10,rax
  49. mov rax,QWORD PTR[32+rsi]
  50. mov r11,rdx
  51. adc r11,0
  52. mul rbx
  53. add r11,rax
  54. mov rax,QWORD PTR[40+rsi]
  55. mov r12,rdx
  56. adc r12,0
  57. mul rbx
  58. add r12,rax
  59. mov rax,QWORD PTR[48+rsi]
  60. mov r13,rdx
  61. adc r13,0
  62. mul rbx
  63. add r13,rax
  64. mov rax,QWORD PTR[56+rsi]
  65. mov r14,rdx
  66. adc r14,0
  67. mul rbx
  68. add r14,rax
  69. mov rax,rbx
  70. adc rdx,0
  71. xor rcx,rcx
  72. add r8,r8
  73. mov r15,rdx
  74. adc rcx,0
  75. mul rax
  76. add rdx,r8
  77. adc rcx,0
  78. mov QWORD PTR[rsp],rax
  79. mov QWORD PTR[8+rsp],rdx
  80. mov rax,QWORD PTR[16+rsi]
  81. mul rbp
  82. add r10,rax
  83. mov rax,QWORD PTR[24+rsi]
  84. mov rbx,rdx
  85. adc rbx,0
  86. mul rbp
  87. add r11,rax
  88. mov rax,QWORD PTR[32+rsi]
  89. adc rdx,0
  90. add r11,rbx
  91. mov rbx,rdx
  92. adc rbx,0
  93. mul rbp
  94. add r12,rax
  95. mov rax,QWORD PTR[40+rsi]
  96. adc rdx,0
  97. add r12,rbx
  98. mov rbx,rdx
  99. adc rbx,0
  100. mul rbp
  101. add r13,rax
  102. mov rax,QWORD PTR[48+rsi]
  103. adc rdx,0
  104. add r13,rbx
  105. mov rbx,rdx
  106. adc rbx,0
  107. mul rbp
  108. add r14,rax
  109. mov rax,QWORD PTR[56+rsi]
  110. adc rdx,0
  111. add r14,rbx
  112. mov rbx,rdx
  113. adc rbx,0
  114. mul rbp
  115. add r15,rax
  116. mov rax,rbp
  117. adc rdx,0
  118. add r15,rbx
  119. adc rdx,0
  120. xor rbx,rbx
  121. add r9,r9
  122. mov r8,rdx
  123. adc r10,r10
  124. adc rbx,0
  125. mul rax
  126. add rax,rcx
  127. mov rbp,QWORD PTR[16+rsi]
  128. add r9,rax
  129. mov rax,QWORD PTR[24+rsi]
  130. adc r10,rdx
  131. adc rbx,0
  132. mov QWORD PTR[16+rsp],r9
  133. mov QWORD PTR[24+rsp],r10
  134. mul rbp
  135. add r12,rax
  136. mov rax,QWORD PTR[32+rsi]
  137. mov rcx,rdx
  138. adc rcx,0
  139. mul rbp
  140. add r13,rax
  141. mov rax,QWORD PTR[40+rsi]
  142. adc rdx,0
  143. add r13,rcx
  144. mov rcx,rdx
  145. adc rcx,0
  146. mul rbp
  147. add r14,rax
  148. mov rax,QWORD PTR[48+rsi]
  149. adc rdx,0
  150. add r14,rcx
  151. mov rcx,rdx
  152. adc rcx,0
  153. mul rbp
  154. add r15,rax
  155. mov rax,QWORD PTR[56+rsi]
  156. adc rdx,0
  157. add r15,rcx
  158. mov rcx,rdx
  159. adc rcx,0
  160. mul rbp
  161. add r8,rax
  162. mov rax,rbp
  163. adc rdx,0
  164. add r8,rcx
  165. adc rdx,0
  166. xor rcx,rcx
  167. add r11,r11
  168. mov r9,rdx
  169. adc r12,r12
  170. adc rcx,0
  171. mul rax
  172. add rax,rbx
  173. mov r10,QWORD PTR[24+rsi]
  174. add r11,rax
  175. mov rax,QWORD PTR[32+rsi]
  176. adc r12,rdx
  177. adc rcx,0
  178. mov QWORD PTR[32+rsp],r11
  179. mov QWORD PTR[40+rsp],r12
  180. mov r11,rax
  181. mul r10
  182. add r14,rax
  183. mov rax,QWORD PTR[40+rsi]
  184. mov rbx,rdx
  185. adc rbx,0
  186. mov r12,rax
  187. mul r10
  188. add r15,rax
  189. mov rax,QWORD PTR[48+rsi]
  190. adc rdx,0
  191. add r15,rbx
  192. mov rbx,rdx
  193. adc rbx,0
  194. mov rbp,rax
  195. mul r10
  196. add r8,rax
  197. mov rax,QWORD PTR[56+rsi]
  198. adc rdx,0
  199. add r8,rbx
  200. mov rbx,rdx
  201. adc rbx,0
  202. mul r10
  203. add r9,rax
  204. mov rax,r10
  205. adc rdx,0
  206. add r9,rbx
  207. adc rdx,0
  208. xor rbx,rbx
  209. add r13,r13
  210. mov r10,rdx
  211. adc r14,r14
  212. adc rbx,0
  213. mul rax
  214. add rax,rcx
  215. add r13,rax
  216. mov rax,r12
  217. adc r14,rdx
  218. adc rbx,0
  219. mov QWORD PTR[48+rsp],r13
  220. mov QWORD PTR[56+rsp],r14
  221. mul r11
  222. add r8,rax
  223. mov rax,rbp
  224. mov rcx,rdx
  225. adc rcx,0
  226. mul r11
  227. add r9,rax
  228. mov rax,QWORD PTR[56+rsi]
  229. adc rdx,0
  230. add r9,rcx
  231. mov rcx,rdx
  232. adc rcx,0
  233. mov r14,rax
  234. mul r11
  235. add r10,rax
  236. mov rax,r11
  237. adc rdx,0
  238. add r10,rcx
  239. adc rdx,0
  240. xor rcx,rcx
  241. add r15,r15
  242. mov r11,rdx
  243. adc r8,r8
  244. adc rcx,0
  245. mul rax
  246. add rax,rbx
  247. add r15,rax
  248. mov rax,rbp
  249. adc r8,rdx
  250. adc rcx,0
  251. mov QWORD PTR[64+rsp],r15
  252. mov QWORD PTR[72+rsp],r8
  253. mul r12
  254. add r10,rax
  255. mov rax,r14
  256. mov rbx,rdx
  257. adc rbx,0
  258. mul r12
  259. add r11,rax
  260. mov rax,r12
  261. adc rdx,0
  262. add r11,rbx
  263. adc rdx,0
  264. xor rbx,rbx
  265. add r9,r9
  266. mov r12,rdx
  267. adc r10,r10
  268. adc rbx,0
  269. mul rax
  270. add rax,rcx
  271. add r9,rax
  272. mov rax,r14
  273. adc r10,rdx
  274. adc rbx,0
  275. mov QWORD PTR[80+rsp],r9
  276. mov QWORD PTR[88+rsp],r10
  277. mul rbp
  278. add r12,rax
  279. mov rax,rbp
  280. adc rdx,0
  281. xor rcx,rcx
  282. add r11,r11
  283. mov r13,rdx
  284. adc r12,r12
  285. adc rcx,0
  286. mul rax
  287. add rax,rbx
  288. add r11,rax
  289. mov rax,r14
  290. adc r12,rdx
  291. adc rcx,0
  292. mov QWORD PTR[96+rsp],r11
  293. mov QWORD PTR[104+rsp],r12
  294. xor rbx,rbx
  295. add r13,r13
  296. adc rbx,0
  297. mul rax
  298. add rax,rcx
  299. add rax,r13
  300. adc rdx,rbx
  301. mov r8,QWORD PTR[rsp]
  302. mov r9,QWORD PTR[8+rsp]
  303. mov r10,QWORD PTR[16+rsp]
  304. mov r11,QWORD PTR[24+rsp]
  305. mov r12,QWORD PTR[32+rsp]
  306. mov r13,QWORD PTR[40+rsp]
  307. mov r14,QWORD PTR[48+rsp]
  308. mov r15,QWORD PTR[56+rsp]
  309. DB 102,72,15,126,205
  310. mov QWORD PTR[112+rsp],rax
  311. mov QWORD PTR[120+rsp],rdx
  312. call __rsaz_512_reduce
  313. add r8,QWORD PTR[64+rsp]
  314. adc r9,QWORD PTR[72+rsp]
  315. adc r10,QWORD PTR[80+rsp]
  316. adc r11,QWORD PTR[88+rsp]
  317. adc r12,QWORD PTR[96+rsp]
  318. adc r13,QWORD PTR[104+rsp]
  319. adc r14,QWORD PTR[112+rsp]
  320. adc r15,QWORD PTR[120+rsp]
  321. sbb rcx,rcx
  322. call __rsaz_512_subtract
  323. mov rdx,r8
  324. mov rax,r9
  325. mov r8d,DWORD PTR[((128+8))+rsp]
  326. mov rsi,rdi
  327. dec r8d
  328. jnz $L$oop_sqr
  329. jmp $L$sqr_tail
  330. ALIGN 32
  331. $L$oop_sqrx::
  332. mov DWORD PTR[((128+8))+rsp],r8d
  333. DB 102,72,15,110,199
  334. mulx r9,r8,rax
  335. mov rbx,rax
  336. mulx r10,rcx,QWORD PTR[16+rsi]
  337. xor rbp,rbp
  338. mulx r11,rax,QWORD PTR[24+rsi]
  339. adcx r9,rcx
  340. DB 0c4h,062h,0f3h,0f6h,0a6h,020h,000h,000h,000h
  341. adcx r10,rax
  342. DB 0c4h,062h,0fbh,0f6h,0aeh,028h,000h,000h,000h
  343. adcx r11,rcx
  344. mulx r14,rcx,QWORD PTR[48+rsi]
  345. adcx r12,rax
  346. adcx r13,rcx
  347. mulx r15,rax,QWORD PTR[56+rsi]
  348. adcx r14,rax
  349. adcx r15,rbp
  350. mulx rdi,rax,rdx
  351. mov rdx,rbx
  352. xor rcx,rcx
  353. adox r8,r8
  354. adcx r8,rdi
  355. adox rcx,rbp
  356. adcx rcx,rbp
  357. mov QWORD PTR[rsp],rax
  358. mov QWORD PTR[8+rsp],r8
  359. DB 0c4h,0e2h,0fbh,0f6h,09eh,010h,000h,000h,000h
  360. adox r10,rax
  361. adcx r11,rbx
  362. mulx r8,rdi,QWORD PTR[24+rsi]
  363. adox r11,rdi
  364. DB 066h
  365. adcx r12,r8
  366. mulx rbx,rax,QWORD PTR[32+rsi]
  367. adox r12,rax
  368. adcx r13,rbx
  369. mulx r8,rdi,QWORD PTR[40+rsi]
  370. adox r13,rdi
  371. adcx r14,r8
  372. DB 0c4h,0e2h,0fbh,0f6h,09eh,030h,000h,000h,000h
  373. adox r14,rax
  374. adcx r15,rbx
  375. DB 0c4h,062h,0c3h,0f6h,086h,038h,000h,000h,000h
  376. adox r15,rdi
  377. adcx r8,rbp
  378. mulx rdi,rax,rdx
  379. adox r8,rbp
  380. DB 048h,08bh,096h,010h,000h,000h,000h
  381. xor rbx,rbx
  382. adox r9,r9
  383. adcx rax,rcx
  384. adox r10,r10
  385. adcx r9,rax
  386. adox rbx,rbp
  387. adcx r10,rdi
  388. adcx rbx,rbp
  389. mov QWORD PTR[16+rsp],r9
  390. DB 04ch,089h,094h,024h,018h,000h,000h,000h
  391. mulx r9,rdi,QWORD PTR[24+rsi]
  392. adox r12,rdi
  393. adcx r13,r9
  394. mulx rcx,rax,QWORD PTR[32+rsi]
  395. adox r13,rax
  396. adcx r14,rcx
  397. DB 0c4h,062h,0c3h,0f6h,08eh,028h,000h,000h,000h
  398. adox r14,rdi
  399. adcx r15,r9
  400. DB 0c4h,0e2h,0fbh,0f6h,08eh,030h,000h,000h,000h
  401. adox r15,rax
  402. adcx r8,rcx
  403. mulx r9,rdi,QWORD PTR[56+rsi]
  404. adox r8,rdi
  405. adcx r9,rbp
  406. mulx rdi,rax,rdx
  407. adox r9,rbp
  408. mov rdx,QWORD PTR[24+rsi]
  409. xor rcx,rcx
  410. adox r11,r11
  411. adcx rax,rbx
  412. adox r12,r12
  413. adcx r11,rax
  414. adox rcx,rbp
  415. adcx r12,rdi
  416. adcx rcx,rbp
  417. mov QWORD PTR[32+rsp],r11
  418. mov QWORD PTR[40+rsp],r12
  419. mulx rbx,rax,QWORD PTR[32+rsi]
  420. adox r14,rax
  421. adcx r15,rbx
  422. mulx r10,rdi,QWORD PTR[40+rsi]
  423. adox r15,rdi
  424. adcx r8,r10
  425. mulx rbx,rax,QWORD PTR[48+rsi]
  426. adox r8,rax
  427. adcx r9,rbx
  428. mulx r10,rdi,QWORD PTR[56+rsi]
  429. adox r9,rdi
  430. adcx r10,rbp
  431. mulx rdi,rax,rdx
  432. adox r10,rbp
  433. mov rdx,QWORD PTR[32+rsi]
  434. xor rbx,rbx
  435. adox r13,r13
  436. adcx rax,rcx
  437. adox r14,r14
  438. adcx r13,rax
  439. adox rbx,rbp
  440. adcx r14,rdi
  441. adcx rbx,rbp
  442. mov QWORD PTR[48+rsp],r13
  443. mov QWORD PTR[56+rsp],r14
  444. mulx r11,rdi,QWORD PTR[40+rsi]
  445. adox r8,rdi
  446. adcx r9,r11
  447. mulx rcx,rax,QWORD PTR[48+rsi]
  448. adox r9,rax
  449. adcx r10,rcx
  450. mulx r11,rdi,QWORD PTR[56+rsi]
  451. adox r10,rdi
  452. adcx r11,rbp
  453. mulx rdi,rax,rdx
  454. mov rdx,QWORD PTR[40+rsi]
  455. adox r11,rbp
  456. xor rcx,rcx
  457. adox r15,r15
  458. adcx rax,rbx
  459. adox r8,r8
  460. adcx r15,rax
  461. adox rcx,rbp
  462. adcx r8,rdi
  463. adcx rcx,rbp
  464. mov QWORD PTR[64+rsp],r15
  465. mov QWORD PTR[72+rsp],r8
  466. DB 0c4h,0e2h,0fbh,0f6h,09eh,030h,000h,000h,000h
  467. adox r10,rax
  468. adcx r11,rbx
  469. DB 0c4h,062h,0c3h,0f6h,0a6h,038h,000h,000h,000h
  470. adox r11,rdi
  471. adcx r12,rbp
  472. mulx rdi,rax,rdx
  473. adox r12,rbp
  474. mov rdx,QWORD PTR[48+rsi]
  475. xor rbx,rbx
  476. adox r9,r9
  477. adcx rax,rcx
  478. adox r10,r10
  479. adcx r9,rax
  480. adcx r10,rdi
  481. adox rbx,rbp
  482. adcx rbx,rbp
  483. mov QWORD PTR[80+rsp],r9
  484. mov QWORD PTR[88+rsp],r10
  485. DB 0c4h,062h,0fbh,0f6h,0aeh,038h,000h,000h,000h
  486. adox r12,rax
  487. adox r13,rbp
  488. mulx rdi,rax,rdx
  489. xor rcx,rcx
  490. mov rdx,QWORD PTR[56+rsi]
  491. adox r11,r11
  492. adcx rax,rbx
  493. adox r12,r12
  494. adcx r11,rax
  495. adox rcx,rbp
  496. adcx r12,rdi
  497. adcx rcx,rbp
  498. DB 04ch,089h,09ch,024h,060h,000h,000h,000h
  499. DB 04ch,089h,0a4h,024h,068h,000h,000h,000h
  500. mulx rdx,rax,rdx
  501. xor rbx,rbx
  502. adox r13,r13
  503. adcx rax,rcx
  504. adox rbx,rbp
  505. adcx rax,r13
  506. adcx rbx,rdx
  507. DB 102,72,15,126,199
  508. DB 102,72,15,126,205
  509. mov rdx,QWORD PTR[128+rsp]
  510. mov r8,QWORD PTR[rsp]
  511. mov r9,QWORD PTR[8+rsp]
  512. mov r10,QWORD PTR[16+rsp]
  513. mov r11,QWORD PTR[24+rsp]
  514. mov r12,QWORD PTR[32+rsp]
  515. mov r13,QWORD PTR[40+rsp]
  516. mov r14,QWORD PTR[48+rsp]
  517. mov r15,QWORD PTR[56+rsp]
  518. mov QWORD PTR[112+rsp],rax
  519. mov QWORD PTR[120+rsp],rbx
  520. call __rsaz_512_reducex
  521. add r8,QWORD PTR[64+rsp]
  522. adc r9,QWORD PTR[72+rsp]
  523. adc r10,QWORD PTR[80+rsp]
  524. adc r11,QWORD PTR[88+rsp]
  525. adc r12,QWORD PTR[96+rsp]
  526. adc r13,QWORD PTR[104+rsp]
  527. adc r14,QWORD PTR[112+rsp]
  528. adc r15,QWORD PTR[120+rsp]
  529. sbb rcx,rcx
  530. call __rsaz_512_subtract
  531. mov rdx,r8
  532. mov rax,r9
  533. mov r8d,DWORD PTR[((128+8))+rsp]
  534. mov rsi,rdi
  535. dec r8d
  536. jnz $L$oop_sqrx
  537. $L$sqr_tail::
  538. lea rax,QWORD PTR[((128+24+48))+rsp]
  539. mov r15,QWORD PTR[((-48))+rax]
  540. mov r14,QWORD PTR[((-40))+rax]
  541. mov r13,QWORD PTR[((-32))+rax]
  542. mov r12,QWORD PTR[((-24))+rax]
  543. mov rbp,QWORD PTR[((-16))+rax]
  544. mov rbx,QWORD PTR[((-8))+rax]
  545. lea rsp,QWORD PTR[rax]
  546. $L$sqr_epilogue::
  547. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  548. mov rsi,QWORD PTR[16+rsp]
  549. DB 0F3h,0C3h ;repret
  550. $L$SEH_end_rsaz_512_sqr::
  551. rsaz_512_sqr ENDP
  552. PUBLIC rsaz_512_mul
  553. ALIGN 32
  554. rsaz_512_mul PROC PUBLIC
  555. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  556. mov QWORD PTR[16+rsp],rsi
  557. mov rax,rsp
  558. $L$SEH_begin_rsaz_512_mul::
  559. mov rdi,rcx
  560. mov rsi,rdx
  561. mov rdx,r8
  562. mov rcx,r9
  563. mov r8,QWORD PTR[40+rsp]
  564. push rbx
  565. push rbp
  566. push r12
  567. push r13
  568. push r14
  569. push r15
  570. sub rsp,128+24
  571. $L$mul_body::
  572. DB 102,72,15,110,199
  573. DB 102,72,15,110,201
  574. mov QWORD PTR[128+rsp],r8
  575. mov r11d,080100h
  576. and r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
  577. cmp r11d,080100h
  578. je $L$mulx
  579. mov rbx,QWORD PTR[rdx]
  580. mov rbp,rdx
  581. call __rsaz_512_mul
  582. DB 102,72,15,126,199
  583. DB 102,72,15,126,205
  584. mov r8,QWORD PTR[rsp]
  585. mov r9,QWORD PTR[8+rsp]
  586. mov r10,QWORD PTR[16+rsp]
  587. mov r11,QWORD PTR[24+rsp]
  588. mov r12,QWORD PTR[32+rsp]
  589. mov r13,QWORD PTR[40+rsp]
  590. mov r14,QWORD PTR[48+rsp]
  591. mov r15,QWORD PTR[56+rsp]
  592. call __rsaz_512_reduce
  593. jmp $L$mul_tail
  594. ALIGN 32
  595. $L$mulx::
  596. mov rbp,rdx
  597. mov rdx,QWORD PTR[rdx]
  598. call __rsaz_512_mulx
  599. DB 102,72,15,126,199
  600. DB 102,72,15,126,205
  601. mov rdx,QWORD PTR[128+rsp]
  602. mov r8,QWORD PTR[rsp]
  603. mov r9,QWORD PTR[8+rsp]
  604. mov r10,QWORD PTR[16+rsp]
  605. mov r11,QWORD PTR[24+rsp]
  606. mov r12,QWORD PTR[32+rsp]
  607. mov r13,QWORD PTR[40+rsp]
  608. mov r14,QWORD PTR[48+rsp]
  609. mov r15,QWORD PTR[56+rsp]
  610. call __rsaz_512_reducex
  611. $L$mul_tail::
  612. add r8,QWORD PTR[64+rsp]
  613. adc r9,QWORD PTR[72+rsp]
  614. adc r10,QWORD PTR[80+rsp]
  615. adc r11,QWORD PTR[88+rsp]
  616. adc r12,QWORD PTR[96+rsp]
  617. adc r13,QWORD PTR[104+rsp]
  618. adc r14,QWORD PTR[112+rsp]
  619. adc r15,QWORD PTR[120+rsp]
  620. sbb rcx,rcx
  621. call __rsaz_512_subtract
  622. lea rax,QWORD PTR[((128+24+48))+rsp]
  623. mov r15,QWORD PTR[((-48))+rax]
  624. mov r14,QWORD PTR[((-40))+rax]
  625. mov r13,QWORD PTR[((-32))+rax]
  626. mov r12,QWORD PTR[((-24))+rax]
  627. mov rbp,QWORD PTR[((-16))+rax]
  628. mov rbx,QWORD PTR[((-8))+rax]
  629. lea rsp,QWORD PTR[rax]
  630. $L$mul_epilogue::
  631. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  632. mov rsi,QWORD PTR[16+rsp]
  633. DB 0F3h,0C3h ;repret
  634. $L$SEH_end_rsaz_512_mul::
  635. rsaz_512_mul ENDP
  636. PUBLIC rsaz_512_mul_gather4
  637. ALIGN 32
  638. rsaz_512_mul_gather4 PROC PUBLIC
  639. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  640. mov QWORD PTR[16+rsp],rsi
  641. mov rax,rsp
  642. $L$SEH_begin_rsaz_512_mul_gather4::
  643. mov rdi,rcx
  644. mov rsi,rdx
  645. mov rdx,r8
  646. mov rcx,r9
  647. mov r8,QWORD PTR[40+rsp]
  648. mov r9,QWORD PTR[48+rsp]
  649. push rbx
  650. push rbp
  651. push r12
  652. push r13
  653. push r14
  654. push r15
  655. sub rsp,328
  656. movaps XMMWORD PTR[160+rsp],xmm6
  657. movaps XMMWORD PTR[176+rsp],xmm7
  658. movaps XMMWORD PTR[192+rsp],xmm8
  659. movaps XMMWORD PTR[208+rsp],xmm9
  660. movaps XMMWORD PTR[224+rsp],xmm10
  661. movaps XMMWORD PTR[240+rsp],xmm11
  662. movaps XMMWORD PTR[256+rsp],xmm12
  663. movaps XMMWORD PTR[272+rsp],xmm13
  664. movaps XMMWORD PTR[288+rsp],xmm14
  665. movaps XMMWORD PTR[304+rsp],xmm15
  666. $L$mul_gather4_body::
  667. movd xmm8,r9d
  668. movdqa xmm1,XMMWORD PTR[(($L$inc+16))]
  669. movdqa xmm0,XMMWORD PTR[$L$inc]
  670. pshufd xmm8,xmm8,0
  671. movdqa xmm7,xmm1
  672. movdqa xmm2,xmm1
  673. paddd xmm1,xmm0
  674. pcmpeqd xmm0,xmm8
  675. movdqa xmm3,xmm7
  676. paddd xmm2,xmm1
  677. pcmpeqd xmm1,xmm8
  678. movdqa xmm4,xmm7
  679. paddd xmm3,xmm2
  680. pcmpeqd xmm2,xmm8
  681. movdqa xmm5,xmm7
  682. paddd xmm4,xmm3
  683. pcmpeqd xmm3,xmm8
  684. movdqa xmm6,xmm7
  685. paddd xmm5,xmm4
  686. pcmpeqd xmm4,xmm8
  687. paddd xmm6,xmm5
  688. pcmpeqd xmm5,xmm8
  689. paddd xmm7,xmm6
  690. pcmpeqd xmm6,xmm8
  691. pcmpeqd xmm7,xmm8
  692. movdqa xmm8,XMMWORD PTR[rdx]
  693. movdqa xmm9,XMMWORD PTR[16+rdx]
  694. movdqa xmm10,XMMWORD PTR[32+rdx]
  695. movdqa xmm11,XMMWORD PTR[48+rdx]
  696. pand xmm8,xmm0
  697. movdqa xmm12,XMMWORD PTR[64+rdx]
  698. pand xmm9,xmm1
  699. movdqa xmm13,XMMWORD PTR[80+rdx]
  700. pand xmm10,xmm2
  701. movdqa xmm14,XMMWORD PTR[96+rdx]
  702. pand xmm11,xmm3
  703. movdqa xmm15,XMMWORD PTR[112+rdx]
  704. lea rbp,QWORD PTR[128+rdx]
  705. pand xmm12,xmm4
  706. pand xmm13,xmm5
  707. pand xmm14,xmm6
  708. pand xmm15,xmm7
  709. por xmm8,xmm10
  710. por xmm9,xmm11
  711. por xmm8,xmm12
  712. por xmm9,xmm13
  713. por xmm8,xmm14
  714. por xmm9,xmm15
  715. por xmm8,xmm9
  716. pshufd xmm9,xmm8,04eh
  717. por xmm8,xmm9
  718. mov r11d,080100h
  719. and r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
  720. cmp r11d,080100h
  721. je $L$mulx_gather
  722. DB 102,76,15,126,195
  723. mov QWORD PTR[128+rsp],r8
  724. mov QWORD PTR[((128+8))+rsp],rdi
  725. mov QWORD PTR[((128+16))+rsp],rcx
  726. mov rax,QWORD PTR[rsi]
  727. mov rcx,QWORD PTR[8+rsi]
  728. mul rbx
  729. mov QWORD PTR[rsp],rax
  730. mov rax,rcx
  731. mov r8,rdx
  732. mul rbx
  733. add r8,rax
  734. mov rax,QWORD PTR[16+rsi]
  735. mov r9,rdx
  736. adc r9,0
  737. mul rbx
  738. add r9,rax
  739. mov rax,QWORD PTR[24+rsi]
  740. mov r10,rdx
  741. adc r10,0
  742. mul rbx
  743. add r10,rax
  744. mov rax,QWORD PTR[32+rsi]
  745. mov r11,rdx
  746. adc r11,0
  747. mul rbx
  748. add r11,rax
  749. mov rax,QWORD PTR[40+rsi]
  750. mov r12,rdx
  751. adc r12,0
  752. mul rbx
  753. add r12,rax
  754. mov rax,QWORD PTR[48+rsi]
  755. mov r13,rdx
  756. adc r13,0
  757. mul rbx
  758. add r13,rax
  759. mov rax,QWORD PTR[56+rsi]
  760. mov r14,rdx
  761. adc r14,0
  762. mul rbx
  763. add r14,rax
  764. mov rax,QWORD PTR[rsi]
  765. mov r15,rdx
  766. adc r15,0
  767. lea rdi,QWORD PTR[8+rsp]
  768. mov ecx,7
  769. jmp $L$oop_mul_gather
  770. ALIGN 32
  771. $L$oop_mul_gather::
  772. movdqa xmm8,XMMWORD PTR[rbp]
  773. movdqa xmm9,XMMWORD PTR[16+rbp]
  774. movdqa xmm10,XMMWORD PTR[32+rbp]
  775. movdqa xmm11,XMMWORD PTR[48+rbp]
  776. pand xmm8,xmm0
  777. movdqa xmm12,XMMWORD PTR[64+rbp]
  778. pand xmm9,xmm1
  779. movdqa xmm13,XMMWORD PTR[80+rbp]
  780. pand xmm10,xmm2
  781. movdqa xmm14,XMMWORD PTR[96+rbp]
  782. pand xmm11,xmm3
  783. movdqa xmm15,XMMWORD PTR[112+rbp]
  784. lea rbp,QWORD PTR[128+rbp]
  785. pand xmm12,xmm4
  786. pand xmm13,xmm5
  787. pand xmm14,xmm6
  788. pand xmm15,xmm7
  789. por xmm8,xmm10
  790. por xmm9,xmm11
  791. por xmm8,xmm12
  792. por xmm9,xmm13
  793. por xmm8,xmm14
  794. por xmm9,xmm15
  795. por xmm8,xmm9
  796. pshufd xmm9,xmm8,04eh
  797. por xmm8,xmm9
  798. DB 102,76,15,126,195
  799. mul rbx
  800. add r8,rax
  801. mov rax,QWORD PTR[8+rsi]
  802. mov QWORD PTR[rdi],r8
  803. mov r8,rdx
  804. adc r8,0
  805. mul rbx
  806. add r9,rax
  807. mov rax,QWORD PTR[16+rsi]
  808. adc rdx,0
  809. add r8,r9
  810. mov r9,rdx
  811. adc r9,0
  812. mul rbx
  813. add r10,rax
  814. mov rax,QWORD PTR[24+rsi]
  815. adc rdx,0
  816. add r9,r10
  817. mov r10,rdx
  818. adc r10,0
  819. mul rbx
  820. add r11,rax
  821. mov rax,QWORD PTR[32+rsi]
  822. adc rdx,0
  823. add r10,r11
  824. mov r11,rdx
  825. adc r11,0
  826. mul rbx
  827. add r12,rax
  828. mov rax,QWORD PTR[40+rsi]
  829. adc rdx,0
  830. add r11,r12
  831. mov r12,rdx
  832. adc r12,0
  833. mul rbx
  834. add r13,rax
  835. mov rax,QWORD PTR[48+rsi]
  836. adc rdx,0
  837. add r12,r13
  838. mov r13,rdx
  839. adc r13,0
  840. mul rbx
  841. add r14,rax
  842. mov rax,QWORD PTR[56+rsi]
  843. adc rdx,0
  844. add r13,r14
  845. mov r14,rdx
  846. adc r14,0
  847. mul rbx
  848. add r15,rax
  849. mov rax,QWORD PTR[rsi]
  850. adc rdx,0
  851. add r14,r15
  852. mov r15,rdx
  853. adc r15,0
  854. lea rdi,QWORD PTR[8+rdi]
  855. dec ecx
  856. jnz $L$oop_mul_gather
  857. mov QWORD PTR[rdi],r8
  858. mov QWORD PTR[8+rdi],r9
  859. mov QWORD PTR[16+rdi],r10
  860. mov QWORD PTR[24+rdi],r11
  861. mov QWORD PTR[32+rdi],r12
  862. mov QWORD PTR[40+rdi],r13
  863. mov QWORD PTR[48+rdi],r14
  864. mov QWORD PTR[56+rdi],r15
  865. mov rdi,QWORD PTR[((128+8))+rsp]
  866. mov rbp,QWORD PTR[((128+16))+rsp]
  867. mov r8,QWORD PTR[rsp]
  868. mov r9,QWORD PTR[8+rsp]
  869. mov r10,QWORD PTR[16+rsp]
  870. mov r11,QWORD PTR[24+rsp]
  871. mov r12,QWORD PTR[32+rsp]
  872. mov r13,QWORD PTR[40+rsp]
  873. mov r14,QWORD PTR[48+rsp]
  874. mov r15,QWORD PTR[56+rsp]
  875. call __rsaz_512_reduce
  876. jmp $L$mul_gather_tail
  877. ALIGN 32
  878. $L$mulx_gather::
  879. DB 102,76,15,126,194
  880. mov QWORD PTR[128+rsp],r8
  881. mov QWORD PTR[((128+8))+rsp],rdi
  882. mov QWORD PTR[((128+16))+rsp],rcx
  883. mulx r8,rbx,QWORD PTR[rsi]
  884. mov QWORD PTR[rsp],rbx
  885. xor edi,edi
  886. mulx r9,rax,QWORD PTR[8+rsi]
  887. mulx r10,rbx,QWORD PTR[16+rsi]
  888. adcx r8,rax
  889. mulx r11,rax,QWORD PTR[24+rsi]
  890. adcx r9,rbx
  891. mulx r12,rbx,QWORD PTR[32+rsi]
  892. adcx r10,rax
  893. mulx r13,rax,QWORD PTR[40+rsi]
  894. adcx r11,rbx
  895. mulx r14,rbx,QWORD PTR[48+rsi]
  896. adcx r12,rax
  897. mulx r15,rax,QWORD PTR[56+rsi]
  898. adcx r13,rbx
  899. adcx r14,rax
  900. DB 067h
  901. mov rbx,r8
  902. adcx r15,rdi
  903. mov rcx,-7
  904. jmp $L$oop_mulx_gather
  905. ALIGN 32
  906. $L$oop_mulx_gather::
  907. movdqa xmm8,XMMWORD PTR[rbp]
  908. movdqa xmm9,XMMWORD PTR[16+rbp]
  909. movdqa xmm10,XMMWORD PTR[32+rbp]
  910. movdqa xmm11,XMMWORD PTR[48+rbp]
  911. pand xmm8,xmm0
  912. movdqa xmm12,XMMWORD PTR[64+rbp]
  913. pand xmm9,xmm1
  914. movdqa xmm13,XMMWORD PTR[80+rbp]
  915. pand xmm10,xmm2
  916. movdqa xmm14,XMMWORD PTR[96+rbp]
  917. pand xmm11,xmm3
  918. movdqa xmm15,XMMWORD PTR[112+rbp]
  919. lea rbp,QWORD PTR[128+rbp]
  920. pand xmm12,xmm4
  921. pand xmm13,xmm5
  922. pand xmm14,xmm6
  923. pand xmm15,xmm7
  924. por xmm8,xmm10
  925. por xmm9,xmm11
  926. por xmm8,xmm12
  927. por xmm9,xmm13
  928. por xmm8,xmm14
  929. por xmm9,xmm15
  930. por xmm8,xmm9
  931. pshufd xmm9,xmm8,04eh
  932. por xmm8,xmm9
  933. DB 102,76,15,126,194
  934. DB 0c4h,062h,0fbh,0f6h,086h,000h,000h,000h,000h
  935. adcx rbx,rax
  936. adox r8,r9
  937. mulx r9,rax,QWORD PTR[8+rsi]
  938. adcx r8,rax
  939. adox r9,r10
  940. mulx r10,rax,QWORD PTR[16+rsi]
  941. adcx r9,rax
  942. adox r10,r11
  943. DB 0c4h,062h,0fbh,0f6h,09eh,018h,000h,000h,000h
  944. adcx r10,rax
  945. adox r11,r12
  946. mulx r12,rax,QWORD PTR[32+rsi]
  947. adcx r11,rax
  948. adox r12,r13
  949. mulx r13,rax,QWORD PTR[40+rsi]
  950. adcx r12,rax
  951. adox r13,r14
  952. DB 0c4h,062h,0fbh,0f6h,0b6h,030h,000h,000h,000h
  953. adcx r13,rax
  954. DB 067h
  955. adox r14,r15
  956. mulx r15,rax,QWORD PTR[56+rsi]
  957. mov QWORD PTR[64+rcx*8+rsp],rbx
  958. adcx r14,rax
  959. adox r15,rdi
  960. mov rbx,r8
  961. adcx r15,rdi
  962. inc rcx
  963. jnz $L$oop_mulx_gather
  964. mov QWORD PTR[64+rsp],r8
  965. mov QWORD PTR[((64+8))+rsp],r9
  966. mov QWORD PTR[((64+16))+rsp],r10
  967. mov QWORD PTR[((64+24))+rsp],r11
  968. mov QWORD PTR[((64+32))+rsp],r12
  969. mov QWORD PTR[((64+40))+rsp],r13
  970. mov QWORD PTR[((64+48))+rsp],r14
  971. mov QWORD PTR[((64+56))+rsp],r15
  972. mov rdx,QWORD PTR[128+rsp]
  973. mov rdi,QWORD PTR[((128+8))+rsp]
  974. mov rbp,QWORD PTR[((128+16))+rsp]
  975. mov r8,QWORD PTR[rsp]
  976. mov r9,QWORD PTR[8+rsp]
  977. mov r10,QWORD PTR[16+rsp]
  978. mov r11,QWORD PTR[24+rsp]
  979. mov r12,QWORD PTR[32+rsp]
  980. mov r13,QWORD PTR[40+rsp]
  981. mov r14,QWORD PTR[48+rsp]
  982. mov r15,QWORD PTR[56+rsp]
  983. call __rsaz_512_reducex
  984. $L$mul_gather_tail::
  985. add r8,QWORD PTR[64+rsp]
  986. adc r9,QWORD PTR[72+rsp]
  987. adc r10,QWORD PTR[80+rsp]
  988. adc r11,QWORD PTR[88+rsp]
  989. adc r12,QWORD PTR[96+rsp]
  990. adc r13,QWORD PTR[104+rsp]
  991. adc r14,QWORD PTR[112+rsp]
  992. adc r15,QWORD PTR[120+rsp]
  993. sbb rcx,rcx
  994. call __rsaz_512_subtract
  995. lea rax,QWORD PTR[((128+24+48))+rsp]
  996. movaps xmm6,XMMWORD PTR[((160-200))+rax]
  997. movaps xmm7,XMMWORD PTR[((176-200))+rax]
  998. movaps xmm8,XMMWORD PTR[((192-200))+rax]
  999. movaps xmm9,XMMWORD PTR[((208-200))+rax]
  1000. movaps xmm10,XMMWORD PTR[((224-200))+rax]
  1001. movaps xmm11,XMMWORD PTR[((240-200))+rax]
  1002. movaps xmm12,XMMWORD PTR[((256-200))+rax]
  1003. movaps xmm13,XMMWORD PTR[((272-200))+rax]
  1004. movaps xmm14,XMMWORD PTR[((288-200))+rax]
  1005. movaps xmm15,XMMWORD PTR[((304-200))+rax]
  1006. lea rax,QWORD PTR[176+rax]
  1007. mov r15,QWORD PTR[((-48))+rax]
  1008. mov r14,QWORD PTR[((-40))+rax]
  1009. mov r13,QWORD PTR[((-32))+rax]
  1010. mov r12,QWORD PTR[((-24))+rax]
  1011. mov rbp,QWORD PTR[((-16))+rax]
  1012. mov rbx,QWORD PTR[((-8))+rax]
  1013. lea rsp,QWORD PTR[rax]
  1014. $L$mul_gather4_epilogue::
  1015. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1016. mov rsi,QWORD PTR[16+rsp]
  1017. DB 0F3h,0C3h ;repret
  1018. $L$SEH_end_rsaz_512_mul_gather4::
  1019. rsaz_512_mul_gather4 ENDP
  1020. PUBLIC rsaz_512_mul_scatter4
  1021. ALIGN 32
  1022. rsaz_512_mul_scatter4 PROC PUBLIC
  1023. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  1024. mov QWORD PTR[16+rsp],rsi
  1025. mov rax,rsp
  1026. $L$SEH_begin_rsaz_512_mul_scatter4::
  1027. mov rdi,rcx
  1028. mov rsi,rdx
  1029. mov rdx,r8
  1030. mov rcx,r9
  1031. mov r8,QWORD PTR[40+rsp]
  1032. mov r9,QWORD PTR[48+rsp]
  1033. push rbx
  1034. push rbp
  1035. push r12
  1036. push r13
  1037. push r14
  1038. push r15
  1039. mov r9d,r9d
  1040. sub rsp,128+24
  1041. $L$mul_scatter4_body::
  1042. lea r8,QWORD PTR[r9*8+r8]
  1043. DB 102,72,15,110,199
  1044. DB 102,72,15,110,202
  1045. DB 102,73,15,110,208
  1046. mov QWORD PTR[128+rsp],rcx
  1047. mov rbp,rdi
  1048. mov r11d,080100h
  1049. and r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
  1050. cmp r11d,080100h
  1051. je $L$mulx_scatter
  1052. mov rbx,QWORD PTR[rdi]
  1053. call __rsaz_512_mul
  1054. DB 102,72,15,126,199
  1055. DB 102,72,15,126,205
  1056. mov r8,QWORD PTR[rsp]
  1057. mov r9,QWORD PTR[8+rsp]
  1058. mov r10,QWORD PTR[16+rsp]
  1059. mov r11,QWORD PTR[24+rsp]
  1060. mov r12,QWORD PTR[32+rsp]
  1061. mov r13,QWORD PTR[40+rsp]
  1062. mov r14,QWORD PTR[48+rsp]
  1063. mov r15,QWORD PTR[56+rsp]
  1064. call __rsaz_512_reduce
  1065. jmp $L$mul_scatter_tail
  1066. ALIGN 32
  1067. $L$mulx_scatter::
  1068. mov rdx,QWORD PTR[rdi]
  1069. call __rsaz_512_mulx
  1070. DB 102,72,15,126,199
  1071. DB 102,72,15,126,205
  1072. mov rdx,QWORD PTR[128+rsp]
  1073. mov r8,QWORD PTR[rsp]
  1074. mov r9,QWORD PTR[8+rsp]
  1075. mov r10,QWORD PTR[16+rsp]
  1076. mov r11,QWORD PTR[24+rsp]
  1077. mov r12,QWORD PTR[32+rsp]
  1078. mov r13,QWORD PTR[40+rsp]
  1079. mov r14,QWORD PTR[48+rsp]
  1080. mov r15,QWORD PTR[56+rsp]
  1081. call __rsaz_512_reducex
  1082. $L$mul_scatter_tail::
  1083. add r8,QWORD PTR[64+rsp]
  1084. adc r9,QWORD PTR[72+rsp]
  1085. adc r10,QWORD PTR[80+rsp]
  1086. adc r11,QWORD PTR[88+rsp]
  1087. adc r12,QWORD PTR[96+rsp]
  1088. adc r13,QWORD PTR[104+rsp]
  1089. adc r14,QWORD PTR[112+rsp]
  1090. adc r15,QWORD PTR[120+rsp]
  1091. DB 102,72,15,126,214
  1092. sbb rcx,rcx
  1093. call __rsaz_512_subtract
  1094. mov QWORD PTR[rsi],r8
  1095. mov QWORD PTR[128+rsi],r9
  1096. mov QWORD PTR[256+rsi],r10
  1097. mov QWORD PTR[384+rsi],r11
  1098. mov QWORD PTR[512+rsi],r12
  1099. mov QWORD PTR[640+rsi],r13
  1100. mov QWORD PTR[768+rsi],r14
  1101. mov QWORD PTR[896+rsi],r15
  1102. lea rax,QWORD PTR[((128+24+48))+rsp]
  1103. mov r15,QWORD PTR[((-48))+rax]
  1104. mov r14,QWORD PTR[((-40))+rax]
  1105. mov r13,QWORD PTR[((-32))+rax]
  1106. mov r12,QWORD PTR[((-24))+rax]
  1107. mov rbp,QWORD PTR[((-16))+rax]
  1108. mov rbx,QWORD PTR[((-8))+rax]
  1109. lea rsp,QWORD PTR[rax]
  1110. $L$mul_scatter4_epilogue::
  1111. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1112. mov rsi,QWORD PTR[16+rsp]
  1113. DB 0F3h,0C3h ;repret
  1114. $L$SEH_end_rsaz_512_mul_scatter4::
  1115. rsaz_512_mul_scatter4 ENDP
  1116. PUBLIC rsaz_512_mul_by_one
  1117. ALIGN 32
  1118. rsaz_512_mul_by_one PROC PUBLIC
  1119. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  1120. mov QWORD PTR[16+rsp],rsi
  1121. mov rax,rsp
  1122. $L$SEH_begin_rsaz_512_mul_by_one::
  1123. mov rdi,rcx
  1124. mov rsi,rdx
  1125. mov rdx,r8
  1126. mov rcx,r9
  1127. push rbx
  1128. push rbp
  1129. push r12
  1130. push r13
  1131. push r14
  1132. push r15
  1133. sub rsp,128+24
  1134. $L$mul_by_one_body::
  1135. mov eax,DWORD PTR[((OPENSSL_ia32cap_P+8))]
  1136. mov rbp,rdx
  1137. mov QWORD PTR[128+rsp],rcx
  1138. mov r8,QWORD PTR[rsi]
  1139. pxor xmm0,xmm0
  1140. mov r9,QWORD PTR[8+rsi]
  1141. mov r10,QWORD PTR[16+rsi]
  1142. mov r11,QWORD PTR[24+rsi]
  1143. mov r12,QWORD PTR[32+rsi]
  1144. mov r13,QWORD PTR[40+rsi]
  1145. mov r14,QWORD PTR[48+rsi]
  1146. mov r15,QWORD PTR[56+rsi]
  1147. movdqa XMMWORD PTR[rsp],xmm0
  1148. movdqa XMMWORD PTR[16+rsp],xmm0
  1149. movdqa XMMWORD PTR[32+rsp],xmm0
  1150. movdqa XMMWORD PTR[48+rsp],xmm0
  1151. movdqa XMMWORD PTR[64+rsp],xmm0
  1152. movdqa XMMWORD PTR[80+rsp],xmm0
  1153. movdqa XMMWORD PTR[96+rsp],xmm0
  1154. and eax,080100h
  1155. cmp eax,080100h
  1156. je $L$by_one_callx
  1157. call __rsaz_512_reduce
  1158. jmp $L$by_one_tail
  1159. ALIGN 32
  1160. $L$by_one_callx::
  1161. mov rdx,QWORD PTR[128+rsp]
  1162. call __rsaz_512_reducex
  1163. $L$by_one_tail::
  1164. mov QWORD PTR[rdi],r8
  1165. mov QWORD PTR[8+rdi],r9
  1166. mov QWORD PTR[16+rdi],r10
  1167. mov QWORD PTR[24+rdi],r11
  1168. mov QWORD PTR[32+rdi],r12
  1169. mov QWORD PTR[40+rdi],r13
  1170. mov QWORD PTR[48+rdi],r14
  1171. mov QWORD PTR[56+rdi],r15
  1172. lea rax,QWORD PTR[((128+24+48))+rsp]
  1173. mov r15,QWORD PTR[((-48))+rax]
  1174. mov r14,QWORD PTR[((-40))+rax]
  1175. mov r13,QWORD PTR[((-32))+rax]
  1176. mov r12,QWORD PTR[((-24))+rax]
  1177. mov rbp,QWORD PTR[((-16))+rax]
  1178. mov rbx,QWORD PTR[((-8))+rax]
  1179. lea rsp,QWORD PTR[rax]
  1180. $L$mul_by_one_epilogue::
  1181. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1182. mov rsi,QWORD PTR[16+rsp]
  1183. DB 0F3h,0C3h ;repret
  1184. $L$SEH_end_rsaz_512_mul_by_one::
  1185. rsaz_512_mul_by_one ENDP
  1186. ALIGN 32
  1187. __rsaz_512_reduce PROC PRIVATE
  1188. mov rbx,r8
  1189. imul rbx,QWORD PTR[((128+8))+rsp]
  1190. mov rax,QWORD PTR[rbp]
  1191. mov ecx,8
  1192. jmp $L$reduction_loop
  1193. ALIGN 32
  1194. $L$reduction_loop::
  1195. mul rbx
  1196. mov rax,QWORD PTR[8+rbp]
  1197. neg r8
  1198. mov r8,rdx
  1199. adc r8,0
  1200. mul rbx
  1201. add r9,rax
  1202. mov rax,QWORD PTR[16+rbp]
  1203. adc rdx,0
  1204. add r8,r9
  1205. mov r9,rdx
  1206. adc r9,0
  1207. mul rbx
  1208. add r10,rax
  1209. mov rax,QWORD PTR[24+rbp]
  1210. adc rdx,0
  1211. add r9,r10
  1212. mov r10,rdx
  1213. adc r10,0
  1214. mul rbx
  1215. add r11,rax
  1216. mov rax,QWORD PTR[32+rbp]
  1217. adc rdx,0
  1218. add r10,r11
  1219. mov rsi,QWORD PTR[((128+8))+rsp]
  1220. adc rdx,0
  1221. mov r11,rdx
  1222. mul rbx
  1223. add r12,rax
  1224. mov rax,QWORD PTR[40+rbp]
  1225. adc rdx,0
  1226. imul rsi,r8
  1227. add r11,r12
  1228. mov r12,rdx
  1229. adc r12,0
  1230. mul rbx
  1231. add r13,rax
  1232. mov rax,QWORD PTR[48+rbp]
  1233. adc rdx,0
  1234. add r12,r13
  1235. mov r13,rdx
  1236. adc r13,0
  1237. mul rbx
  1238. add r14,rax
  1239. mov rax,QWORD PTR[56+rbp]
  1240. adc rdx,0
  1241. add r13,r14
  1242. mov r14,rdx
  1243. adc r14,0
  1244. mul rbx
  1245. mov rbx,rsi
  1246. add r15,rax
  1247. mov rax,QWORD PTR[rbp]
  1248. adc rdx,0
  1249. add r14,r15
  1250. mov r15,rdx
  1251. adc r15,0
  1252. dec ecx
  1253. jne $L$reduction_loop
  1254. DB 0F3h,0C3h ;repret
  1255. __rsaz_512_reduce ENDP
  1256. ALIGN 32
  1257. __rsaz_512_reducex PROC PRIVATE
  1258. imul rdx,r8
  1259. xor rsi,rsi
  1260. mov ecx,8
  1261. jmp $L$reduction_loopx
  1262. ALIGN 32
  1263. $L$reduction_loopx::
  1264. mov rbx,r8
  1265. mulx r8,rax,QWORD PTR[rbp]
  1266. adcx rax,rbx
  1267. adox r8,r9
  1268. mulx r9,rax,QWORD PTR[8+rbp]
  1269. adcx r8,rax
  1270. adox r9,r10
  1271. mulx r10,rbx,QWORD PTR[16+rbp]
  1272. adcx r9,rbx
  1273. adox r10,r11
  1274. mulx r11,rbx,QWORD PTR[24+rbp]
  1275. adcx r10,rbx
  1276. adox r11,r12
  1277. DB 0c4h,062h,0e3h,0f6h,0a5h,020h,000h,000h,000h
  1278. mov rax,rdx
  1279. mov rdx,r8
  1280. adcx r11,rbx
  1281. adox r12,r13
  1282. mulx rdx,rbx,QWORD PTR[((128+8))+rsp]
  1283. mov rdx,rax
  1284. mulx r13,rax,QWORD PTR[40+rbp]
  1285. adcx r12,rax
  1286. adox r13,r14
  1287. DB 0c4h,062h,0fbh,0f6h,0b5h,030h,000h,000h,000h
  1288. adcx r13,rax
  1289. adox r14,r15
  1290. mulx r15,rax,QWORD PTR[56+rbp]
  1291. mov rdx,rbx
  1292. adcx r14,rax
  1293. adox r15,rsi
  1294. adcx r15,rsi
  1295. dec ecx
  1296. jne $L$reduction_loopx
  1297. DB 0F3h,0C3h ;repret
  1298. __rsaz_512_reducex ENDP
  1299. ALIGN 32
  1300. __rsaz_512_subtract PROC PRIVATE
  1301. mov QWORD PTR[rdi],r8
  1302. mov QWORD PTR[8+rdi],r9
  1303. mov QWORD PTR[16+rdi],r10
  1304. mov QWORD PTR[24+rdi],r11
  1305. mov QWORD PTR[32+rdi],r12
  1306. mov QWORD PTR[40+rdi],r13
  1307. mov QWORD PTR[48+rdi],r14
  1308. mov QWORD PTR[56+rdi],r15
  1309. mov r8,QWORD PTR[rbp]
  1310. mov r9,QWORD PTR[8+rbp]
  1311. neg r8
  1312. not r9
  1313. and r8,rcx
  1314. mov r10,QWORD PTR[16+rbp]
  1315. and r9,rcx
  1316. not r10
  1317. mov r11,QWORD PTR[24+rbp]
  1318. and r10,rcx
  1319. not r11
  1320. mov r12,QWORD PTR[32+rbp]
  1321. and r11,rcx
  1322. not r12
  1323. mov r13,QWORD PTR[40+rbp]
  1324. and r12,rcx
  1325. not r13
  1326. mov r14,QWORD PTR[48+rbp]
  1327. and r13,rcx
  1328. not r14
  1329. mov r15,QWORD PTR[56+rbp]
  1330. and r14,rcx
  1331. not r15
  1332. and r15,rcx
  1333. add r8,QWORD PTR[rdi]
  1334. adc r9,QWORD PTR[8+rdi]
  1335. adc r10,QWORD PTR[16+rdi]
  1336. adc r11,QWORD PTR[24+rdi]
  1337. adc r12,QWORD PTR[32+rdi]
  1338. adc r13,QWORD PTR[40+rdi]
  1339. adc r14,QWORD PTR[48+rdi]
  1340. adc r15,QWORD PTR[56+rdi]
  1341. mov QWORD PTR[rdi],r8
  1342. mov QWORD PTR[8+rdi],r9
  1343. mov QWORD PTR[16+rdi],r10
  1344. mov QWORD PTR[24+rdi],r11
  1345. mov QWORD PTR[32+rdi],r12
  1346. mov QWORD PTR[40+rdi],r13
  1347. mov QWORD PTR[48+rdi],r14
  1348. mov QWORD PTR[56+rdi],r15
  1349. DB 0F3h,0C3h ;repret
  1350. __rsaz_512_subtract ENDP
  1351. ALIGN 32
  1352. __rsaz_512_mul PROC PRIVATE
  1353. lea rdi,QWORD PTR[8+rsp]
  1354. mov rax,QWORD PTR[rsi]
  1355. mul rbx
  1356. mov QWORD PTR[rdi],rax
  1357. mov rax,QWORD PTR[8+rsi]
  1358. mov r8,rdx
  1359. mul rbx
  1360. add r8,rax
  1361. mov rax,QWORD PTR[16+rsi]
  1362. mov r9,rdx
  1363. adc r9,0
  1364. mul rbx
  1365. add r9,rax
  1366. mov rax,QWORD PTR[24+rsi]
  1367. mov r10,rdx
  1368. adc r10,0
  1369. mul rbx
  1370. add r10,rax
  1371. mov rax,QWORD PTR[32+rsi]
  1372. mov r11,rdx
  1373. adc r11,0
  1374. mul rbx
  1375. add r11,rax
  1376. mov rax,QWORD PTR[40+rsi]
  1377. mov r12,rdx
  1378. adc r12,0
  1379. mul rbx
  1380. add r12,rax
  1381. mov rax,QWORD PTR[48+rsi]
  1382. mov r13,rdx
  1383. adc r13,0
  1384. mul rbx
  1385. add r13,rax
  1386. mov rax,QWORD PTR[56+rsi]
  1387. mov r14,rdx
  1388. adc r14,0
  1389. mul rbx
  1390. add r14,rax
  1391. mov rax,QWORD PTR[rsi]
  1392. mov r15,rdx
  1393. adc r15,0
  1394. lea rbp,QWORD PTR[8+rbp]
  1395. lea rdi,QWORD PTR[8+rdi]
  1396. mov ecx,7
  1397. jmp $L$oop_mul
  1398. ALIGN 32
  1399. $L$oop_mul::
  1400. mov rbx,QWORD PTR[rbp]
  1401. mul rbx
  1402. add r8,rax
  1403. mov rax,QWORD PTR[8+rsi]
  1404. mov QWORD PTR[rdi],r8
  1405. mov r8,rdx
  1406. adc r8,0
  1407. mul rbx
  1408. add r9,rax
  1409. mov rax,QWORD PTR[16+rsi]
  1410. adc rdx,0
  1411. add r8,r9
  1412. mov r9,rdx
  1413. adc r9,0
  1414. mul rbx
  1415. add r10,rax
  1416. mov rax,QWORD PTR[24+rsi]
  1417. adc rdx,0
  1418. add r9,r10
  1419. mov r10,rdx
  1420. adc r10,0
  1421. mul rbx
  1422. add r11,rax
  1423. mov rax,QWORD PTR[32+rsi]
  1424. adc rdx,0
  1425. add r10,r11
  1426. mov r11,rdx
  1427. adc r11,0
  1428. mul rbx
  1429. add r12,rax
  1430. mov rax,QWORD PTR[40+rsi]
  1431. adc rdx,0
  1432. add r11,r12
  1433. mov r12,rdx
  1434. adc r12,0
  1435. mul rbx
  1436. add r13,rax
  1437. mov rax,QWORD PTR[48+rsi]
  1438. adc rdx,0
  1439. add r12,r13
  1440. mov r13,rdx
  1441. adc r13,0
  1442. mul rbx
  1443. add r14,rax
  1444. mov rax,QWORD PTR[56+rsi]
  1445. adc rdx,0
  1446. add r13,r14
  1447. mov r14,rdx
  1448. lea rbp,QWORD PTR[8+rbp]
  1449. adc r14,0
  1450. mul rbx
  1451. add r15,rax
  1452. mov rax,QWORD PTR[rsi]
  1453. adc rdx,0
  1454. add r14,r15
  1455. mov r15,rdx
  1456. adc r15,0
  1457. lea rdi,QWORD PTR[8+rdi]
  1458. dec ecx
  1459. jnz $L$oop_mul
  1460. mov QWORD PTR[rdi],r8
  1461. mov QWORD PTR[8+rdi],r9
  1462. mov QWORD PTR[16+rdi],r10
  1463. mov QWORD PTR[24+rdi],r11
  1464. mov QWORD PTR[32+rdi],r12
  1465. mov QWORD PTR[40+rdi],r13
  1466. mov QWORD PTR[48+rdi],r14
  1467. mov QWORD PTR[56+rdi],r15
  1468. DB 0F3h,0C3h ;repret
  1469. __rsaz_512_mul ENDP
  1470. ALIGN 32
  1471. __rsaz_512_mulx PROC PRIVATE
  1472. mulx r8,rbx,QWORD PTR[rsi]
  1473. mov rcx,-6
  1474. mulx r9,rax,QWORD PTR[8+rsi]
  1475. mov QWORD PTR[8+rsp],rbx
  1476. mulx r10,rbx,QWORD PTR[16+rsi]
  1477. adc r8,rax
  1478. mulx r11,rax,QWORD PTR[24+rsi]
  1479. adc r9,rbx
  1480. mulx r12,rbx,QWORD PTR[32+rsi]
  1481. adc r10,rax
  1482. mulx r13,rax,QWORD PTR[40+rsi]
  1483. adc r11,rbx
  1484. mulx r14,rbx,QWORD PTR[48+rsi]
  1485. adc r12,rax
  1486. mulx r15,rax,QWORD PTR[56+rsi]
  1487. mov rdx,QWORD PTR[8+rbp]
  1488. adc r13,rbx
  1489. adc r14,rax
  1490. adc r15,0
  1491. xor rdi,rdi
  1492. jmp $L$oop_mulx
  1493. ALIGN 32
  1494. $L$oop_mulx::
  1495. mov rbx,r8
  1496. mulx r8,rax,QWORD PTR[rsi]
  1497. adcx rbx,rax
  1498. adox r8,r9
  1499. mulx r9,rax,QWORD PTR[8+rsi]
  1500. adcx r8,rax
  1501. adox r9,r10
  1502. mulx r10,rax,QWORD PTR[16+rsi]
  1503. adcx r9,rax
  1504. adox r10,r11
  1505. mulx r11,rax,QWORD PTR[24+rsi]
  1506. adcx r10,rax
  1507. adox r11,r12
  1508. DB 03eh,0c4h,062h,0fbh,0f6h,0a6h,020h,000h,000h,000h
  1509. adcx r11,rax
  1510. adox r12,r13
  1511. mulx r13,rax,QWORD PTR[40+rsi]
  1512. adcx r12,rax
  1513. adox r13,r14
  1514. mulx r14,rax,QWORD PTR[48+rsi]
  1515. adcx r13,rax
  1516. adox r14,r15
  1517. mulx r15,rax,QWORD PTR[56+rsi]
  1518. mov rdx,QWORD PTR[64+rcx*8+rbp]
  1519. mov QWORD PTR[((8+64-8))+rcx*8+rsp],rbx
  1520. adcx r14,rax
  1521. adox r15,rdi
  1522. adcx r15,rdi
  1523. inc rcx
  1524. jnz $L$oop_mulx
  1525. mov rbx,r8
  1526. mulx r8,rax,QWORD PTR[rsi]
  1527. adcx rbx,rax
  1528. adox r8,r9
  1529. DB 0c4h,062h,0fbh,0f6h,08eh,008h,000h,000h,000h
  1530. adcx r8,rax
  1531. adox r9,r10
  1532. DB 0c4h,062h,0fbh,0f6h,096h,010h,000h,000h,000h
  1533. adcx r9,rax
  1534. adox r10,r11
  1535. mulx r11,rax,QWORD PTR[24+rsi]
  1536. adcx r10,rax
  1537. adox r11,r12
  1538. mulx r12,rax,QWORD PTR[32+rsi]
  1539. adcx r11,rax
  1540. adox r12,r13
  1541. mulx r13,rax,QWORD PTR[40+rsi]
  1542. adcx r12,rax
  1543. adox r13,r14
  1544. DB 0c4h,062h,0fbh,0f6h,0b6h,030h,000h,000h,000h
  1545. adcx r13,rax
  1546. adox r14,r15
  1547. DB 0c4h,062h,0fbh,0f6h,0beh,038h,000h,000h,000h
  1548. adcx r14,rax
  1549. adox r15,rdi
  1550. adcx r15,rdi
  1551. mov QWORD PTR[((8+64-8))+rsp],rbx
  1552. mov QWORD PTR[((8+64))+rsp],r8
  1553. mov QWORD PTR[((8+64+8))+rsp],r9
  1554. mov QWORD PTR[((8+64+16))+rsp],r10
  1555. mov QWORD PTR[((8+64+24))+rsp],r11
  1556. mov QWORD PTR[((8+64+32))+rsp],r12
  1557. mov QWORD PTR[((8+64+40))+rsp],r13
  1558. mov QWORD PTR[((8+64+48))+rsp],r14
  1559. mov QWORD PTR[((8+64+56))+rsp],r15
  1560. DB 0F3h,0C3h ;repret
  1561. __rsaz_512_mulx ENDP
  1562. PUBLIC rsaz_512_scatter4
  1563. ALIGN 16
  1564. rsaz_512_scatter4 PROC PUBLIC
  1565. lea rcx,QWORD PTR[r8*8+rcx]
  1566. mov r9d,8
  1567. jmp $L$oop_scatter
  1568. ALIGN 16
  1569. $L$oop_scatter::
  1570. mov rax,QWORD PTR[rdx]
  1571. lea rdx,QWORD PTR[8+rdx]
  1572. mov QWORD PTR[rcx],rax
  1573. lea rcx,QWORD PTR[128+rcx]
  1574. dec r9d
  1575. jnz $L$oop_scatter
  1576. DB 0F3h,0C3h ;repret
  1577. rsaz_512_scatter4 ENDP
  1578. PUBLIC rsaz_512_gather4
  1579. ALIGN 16
  1580. rsaz_512_gather4 PROC PUBLIC
  1581. $L$SEH_begin_rsaz_512_gather4::
  1582. DB 048h,081h,0ech,0a8h,000h,000h,000h
  1583. DB 00fh,029h,034h,024h
  1584. DB 00fh,029h,07ch,024h,010h
  1585. DB 044h,00fh,029h,044h,024h,020h
  1586. DB 044h,00fh,029h,04ch,024h,030h
  1587. DB 044h,00fh,029h,054h,024h,040h
  1588. DB 044h,00fh,029h,05ch,024h,050h
  1589. DB 044h,00fh,029h,064h,024h,060h
  1590. DB 044h,00fh,029h,06ch,024h,070h
  1591. DB 044h,00fh,029h,0b4h,024h,080h,0,0,0
  1592. DB 044h,00fh,029h,0bch,024h,090h,0,0,0
  1593. movd xmm8,r8d
  1594. movdqa xmm1,XMMWORD PTR[(($L$inc+16))]
  1595. movdqa xmm0,XMMWORD PTR[$L$inc]
  1596. pshufd xmm8,xmm8,0
  1597. movdqa xmm7,xmm1
  1598. movdqa xmm2,xmm1
  1599. paddd xmm1,xmm0
  1600. pcmpeqd xmm0,xmm8
  1601. movdqa xmm3,xmm7
  1602. paddd xmm2,xmm1
  1603. pcmpeqd xmm1,xmm8
  1604. movdqa xmm4,xmm7
  1605. paddd xmm3,xmm2
  1606. pcmpeqd xmm2,xmm8
  1607. movdqa xmm5,xmm7
  1608. paddd xmm4,xmm3
  1609. pcmpeqd xmm3,xmm8
  1610. movdqa xmm6,xmm7
  1611. paddd xmm5,xmm4
  1612. pcmpeqd xmm4,xmm8
  1613. paddd xmm6,xmm5
  1614. pcmpeqd xmm5,xmm8
  1615. paddd xmm7,xmm6
  1616. pcmpeqd xmm6,xmm8
  1617. pcmpeqd xmm7,xmm8
  1618. mov r9d,8
  1619. jmp $L$oop_gather
  1620. ALIGN 16
  1621. $L$oop_gather::
  1622. movdqa xmm8,XMMWORD PTR[rdx]
  1623. movdqa xmm9,XMMWORD PTR[16+rdx]
  1624. movdqa xmm10,XMMWORD PTR[32+rdx]
  1625. movdqa xmm11,XMMWORD PTR[48+rdx]
  1626. pand xmm8,xmm0
  1627. movdqa xmm12,XMMWORD PTR[64+rdx]
  1628. pand xmm9,xmm1
  1629. movdqa xmm13,XMMWORD PTR[80+rdx]
  1630. pand xmm10,xmm2
  1631. movdqa xmm14,XMMWORD PTR[96+rdx]
  1632. pand xmm11,xmm3
  1633. movdqa xmm15,XMMWORD PTR[112+rdx]
  1634. lea rdx,QWORD PTR[128+rdx]
  1635. pand xmm12,xmm4
  1636. pand xmm13,xmm5
  1637. pand xmm14,xmm6
  1638. pand xmm15,xmm7
  1639. por xmm8,xmm10
  1640. por xmm9,xmm11
  1641. por xmm8,xmm12
  1642. por xmm9,xmm13
  1643. por xmm8,xmm14
  1644. por xmm9,xmm15
  1645. por xmm8,xmm9
  1646. pshufd xmm9,xmm8,04eh
  1647. por xmm8,xmm9
  1648. movq QWORD PTR[rcx],xmm8
  1649. lea rcx,QWORD PTR[8+rcx]
  1650. dec r9d
  1651. jnz $L$oop_gather
  1652. movaps xmm6,XMMWORD PTR[rsp]
  1653. movaps xmm7,XMMWORD PTR[16+rsp]
  1654. movaps xmm8,XMMWORD PTR[32+rsp]
  1655. movaps xmm9,XMMWORD PTR[48+rsp]
  1656. movaps xmm10,XMMWORD PTR[64+rsp]
  1657. movaps xmm11,XMMWORD PTR[80+rsp]
  1658. movaps xmm12,XMMWORD PTR[96+rsp]
  1659. movaps xmm13,XMMWORD PTR[112+rsp]
  1660. movaps xmm14,XMMWORD PTR[128+rsp]
  1661. movaps xmm15,XMMWORD PTR[144+rsp]
  1662. add rsp,0a8h
  1663. DB 0F3h,0C3h ;repret
  1664. $L$SEH_end_rsaz_512_gather4::
  1665. rsaz_512_gather4 ENDP
  1666. ALIGN 64
  1667. $L$inc::
  1668. DD 0,0,1,1
  1669. DD 2,2,2,2
  1670. EXTERN __imp_RtlVirtualUnwind:NEAR
  1671. ALIGN 16
  1672. se_handler PROC PRIVATE
  1673. push rsi
  1674. push rdi
  1675. push rbx
  1676. push rbp
  1677. push r12
  1678. push r13
  1679. push r14
  1680. push r15
  1681. pushfq
  1682. sub rsp,64
  1683. mov rax,QWORD PTR[120+r8]
  1684. mov rbx,QWORD PTR[248+r8]
  1685. mov rsi,QWORD PTR[8+r9]
  1686. mov r11,QWORD PTR[56+r9]
  1687. mov r10d,DWORD PTR[r11]
  1688. lea r10,QWORD PTR[r10*1+rsi]
  1689. cmp rbx,r10
  1690. jb $L$common_seh_tail
  1691. mov rax,QWORD PTR[152+r8]
  1692. mov r10d,DWORD PTR[4+r11]
  1693. lea r10,QWORD PTR[r10*1+rsi]
  1694. cmp rbx,r10
  1695. jae $L$common_seh_tail
  1696. lea rax,QWORD PTR[((128+24+48))+rax]
  1697. lea rbx,QWORD PTR[$L$mul_gather4_epilogue]
  1698. cmp rbx,r10
  1699. jne $L$se_not_in_mul_gather4
  1700. lea rax,QWORD PTR[176+rax]
  1701. lea rsi,QWORD PTR[((-48-168))+rax]
  1702. lea rdi,QWORD PTR[512+r8]
  1703. mov ecx,20
  1704. DD 0a548f3fch
  1705. $L$se_not_in_mul_gather4::
  1706. mov rbx,QWORD PTR[((-8))+rax]
  1707. mov rbp,QWORD PTR[((-16))+rax]
  1708. mov r12,QWORD PTR[((-24))+rax]
  1709. mov r13,QWORD PTR[((-32))+rax]
  1710. mov r14,QWORD PTR[((-40))+rax]
  1711. mov r15,QWORD PTR[((-48))+rax]
  1712. mov QWORD PTR[144+r8],rbx
  1713. mov QWORD PTR[160+r8],rbp
  1714. mov QWORD PTR[216+r8],r12
  1715. mov QWORD PTR[224+r8],r13
  1716. mov QWORD PTR[232+r8],r14
  1717. mov QWORD PTR[240+r8],r15
  1718. $L$common_seh_tail::
  1719. mov rdi,QWORD PTR[8+rax]
  1720. mov rsi,QWORD PTR[16+rax]
  1721. mov QWORD PTR[152+r8],rax
  1722. mov QWORD PTR[168+r8],rsi
  1723. mov QWORD PTR[176+r8],rdi
  1724. mov rdi,QWORD PTR[40+r9]
  1725. mov rsi,r8
  1726. mov ecx,154
  1727. DD 0a548f3fch
  1728. mov rsi,r9
  1729. xor rcx,rcx
  1730. mov rdx,QWORD PTR[8+rsi]
  1731. mov r8,QWORD PTR[rsi]
  1732. mov r9,QWORD PTR[16+rsi]
  1733. mov r10,QWORD PTR[40+rsi]
  1734. lea r11,QWORD PTR[56+rsi]
  1735. lea r12,QWORD PTR[24+rsi]
  1736. mov QWORD PTR[32+rsp],r10
  1737. mov QWORD PTR[40+rsp],r11
  1738. mov QWORD PTR[48+rsp],r12
  1739. mov QWORD PTR[56+rsp],rcx
  1740. call QWORD PTR[__imp_RtlVirtualUnwind]
  1741. mov eax,1
  1742. add rsp,64
  1743. popfq
  1744. pop r15
  1745. pop r14
  1746. pop r13
  1747. pop r12
  1748. pop rbp
  1749. pop rbx
  1750. pop rdi
  1751. pop rsi
  1752. DB 0F3h,0C3h ;repret
  1753. se_handler ENDP
  1754. .text$ ENDS
  1755. .pdata SEGMENT READONLY ALIGN(4)
  1756. ALIGN 4
  1757. DD imagerel $L$SEH_begin_rsaz_512_sqr
  1758. DD imagerel $L$SEH_end_rsaz_512_sqr
  1759. DD imagerel $L$SEH_info_rsaz_512_sqr
  1760. DD imagerel $L$SEH_begin_rsaz_512_mul
  1761. DD imagerel $L$SEH_end_rsaz_512_mul
  1762. DD imagerel $L$SEH_info_rsaz_512_mul
  1763. DD imagerel $L$SEH_begin_rsaz_512_mul_gather4
  1764. DD imagerel $L$SEH_end_rsaz_512_mul_gather4
  1765. DD imagerel $L$SEH_info_rsaz_512_mul_gather4
  1766. DD imagerel $L$SEH_begin_rsaz_512_mul_scatter4
  1767. DD imagerel $L$SEH_end_rsaz_512_mul_scatter4
  1768. DD imagerel $L$SEH_info_rsaz_512_mul_scatter4
  1769. DD imagerel $L$SEH_begin_rsaz_512_mul_by_one
  1770. DD imagerel $L$SEH_end_rsaz_512_mul_by_one
  1771. DD imagerel $L$SEH_info_rsaz_512_mul_by_one
  1772. DD imagerel $L$SEH_begin_rsaz_512_gather4
  1773. DD imagerel $L$SEH_end_rsaz_512_gather4
  1774. DD imagerel $L$SEH_info_rsaz_512_gather4
  1775. .pdata ENDS
  1776. .xdata SEGMENT READONLY ALIGN(8)
  1777. ALIGN 8
  1778. $L$SEH_info_rsaz_512_sqr::
  1779. DB 9,0,0,0
  1780. DD imagerel se_handler
  1781. DD imagerel $L$sqr_body,imagerel $L$sqr_epilogue
  1782. $L$SEH_info_rsaz_512_mul::
  1783. DB 9,0,0,0
  1784. DD imagerel se_handler
  1785. DD imagerel $L$mul_body,imagerel $L$mul_epilogue
  1786. $L$SEH_info_rsaz_512_mul_gather4::
  1787. DB 9,0,0,0
  1788. DD imagerel se_handler
  1789. DD imagerel $L$mul_gather4_body,imagerel $L$mul_gather4_epilogue
  1790. $L$SEH_info_rsaz_512_mul_scatter4::
  1791. DB 9,0,0,0
  1792. DD imagerel se_handler
  1793. DD imagerel $L$mul_scatter4_body,imagerel $L$mul_scatter4_epilogue
  1794. $L$SEH_info_rsaz_512_mul_by_one::
  1795. DB 9,0,0,0
  1796. DD imagerel se_handler
  1797. DD imagerel $L$mul_by_one_body,imagerel $L$mul_by_one_epilogue
  1798. $L$SEH_info_rsaz_512_gather4::
  1799. DB 001h,046h,016h,000h
  1800. DB 046h,0f8h,009h,000h
  1801. DB 03dh,0e8h,008h,000h
  1802. DB 034h,0d8h,007h,000h
  1803. DB 02eh,0c8h,006h,000h
  1804. DB 028h,0b8h,005h,000h
  1805. DB 022h,0a8h,004h,000h
  1806. DB 01ch,098h,003h,000h
  1807. DB 016h,088h,002h,000h
  1808. DB 010h,078h,001h,000h
  1809. DB 00bh,068h,000h,000h
  1810. DB 007h,001h,015h,000h
  1811. .xdata ENDS
  1812. END