x86_64-mont5.masm 72 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051
  1. OPTION DOTNAME
  2. .text$ SEGMENT ALIGN(256) 'CODE'
  3. EXTERN OPENSSL_ia32cap_P:NEAR
  4. PUBLIC bn_mul_mont_gather5
  5. ALIGN 64
  6. bn_mul_mont_gather5 PROC PUBLIC
  7. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  8. mov QWORD PTR[16+rsp],rsi
  9. mov rax,rsp
  10. $L$SEH_begin_bn_mul_mont_gather5::
  11. mov rdi,rcx
  12. mov rsi,rdx
  13. mov rdx,r8
  14. mov rcx,r9
  15. mov r8,QWORD PTR[40+rsp]
  16. mov r9,QWORD PTR[48+rsp]
  17. mov r9d,r9d
  18. mov rax,rsp
  19. test r9d,7
  20. jnz $L$mul_enter
  21. mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
  22. jmp $L$mul4x_enter
  23. ALIGN 16
  24. $L$mul_enter::
  25. movd xmm5,DWORD PTR[56+rsp]
  26. push rbx
  27. push rbp
  28. push r12
  29. push r13
  30. push r14
  31. push r15
  32. neg r9
  33. mov r11,rsp
  34. lea r10,QWORD PTR[((-280))+r9*8+rsp]
  35. neg r9
  36. and r10,-1024
  37. sub r11,r10
  38. and r11,-4096
  39. lea rsp,QWORD PTR[r11*1+r10]
  40. mov r11,QWORD PTR[rsp]
  41. cmp rsp,r10
  42. ja $L$mul_page_walk
  43. jmp $L$mul_page_walk_done
  44. $L$mul_page_walk::
  45. lea rsp,QWORD PTR[((-4096))+rsp]
  46. mov r11,QWORD PTR[rsp]
  47. cmp rsp,r10
  48. ja $L$mul_page_walk
  49. $L$mul_page_walk_done::
  50. lea r10,QWORD PTR[$L$inc]
  51. mov QWORD PTR[8+r9*8+rsp],rax
  52. $L$mul_body::
  53. lea r12,QWORD PTR[128+rdx]
  54. movdqa xmm0,XMMWORD PTR[r10]
  55. movdqa xmm1,XMMWORD PTR[16+r10]
  56. lea r10,QWORD PTR[((24-112))+r9*8+rsp]
  57. and r10,-16
  58. pshufd xmm5,xmm5,0
  59. movdqa xmm4,xmm1
  60. movdqa xmm2,xmm1
  61. paddd xmm1,xmm0
  62. pcmpeqd xmm0,xmm5
  63. DB 067h
  64. movdqa xmm3,xmm4
  65. paddd xmm2,xmm1
  66. pcmpeqd xmm1,xmm5
  67. movdqa XMMWORD PTR[112+r10],xmm0
  68. movdqa xmm0,xmm4
  69. paddd xmm3,xmm2
  70. pcmpeqd xmm2,xmm5
  71. movdqa XMMWORD PTR[128+r10],xmm1
  72. movdqa xmm1,xmm4
  73. paddd xmm0,xmm3
  74. pcmpeqd xmm3,xmm5
  75. movdqa XMMWORD PTR[144+r10],xmm2
  76. movdqa xmm2,xmm4
  77. paddd xmm1,xmm0
  78. pcmpeqd xmm0,xmm5
  79. movdqa XMMWORD PTR[160+r10],xmm3
  80. movdqa xmm3,xmm4
  81. paddd xmm2,xmm1
  82. pcmpeqd xmm1,xmm5
  83. movdqa XMMWORD PTR[176+r10],xmm0
  84. movdqa xmm0,xmm4
  85. paddd xmm3,xmm2
  86. pcmpeqd xmm2,xmm5
  87. movdqa XMMWORD PTR[192+r10],xmm1
  88. movdqa xmm1,xmm4
  89. paddd xmm0,xmm3
  90. pcmpeqd xmm3,xmm5
  91. movdqa XMMWORD PTR[208+r10],xmm2
  92. movdqa xmm2,xmm4
  93. paddd xmm1,xmm0
  94. pcmpeqd xmm0,xmm5
  95. movdqa XMMWORD PTR[224+r10],xmm3
  96. movdqa xmm3,xmm4
  97. paddd xmm2,xmm1
  98. pcmpeqd xmm1,xmm5
  99. movdqa XMMWORD PTR[240+r10],xmm0
  100. movdqa xmm0,xmm4
  101. paddd xmm3,xmm2
  102. pcmpeqd xmm2,xmm5
  103. movdqa XMMWORD PTR[256+r10],xmm1
  104. movdqa xmm1,xmm4
  105. paddd xmm0,xmm3
  106. pcmpeqd xmm3,xmm5
  107. movdqa XMMWORD PTR[272+r10],xmm2
  108. movdqa xmm2,xmm4
  109. paddd xmm1,xmm0
  110. pcmpeqd xmm0,xmm5
  111. movdqa XMMWORD PTR[288+r10],xmm3
  112. movdqa xmm3,xmm4
  113. paddd xmm2,xmm1
  114. pcmpeqd xmm1,xmm5
  115. movdqa XMMWORD PTR[304+r10],xmm0
  116. paddd xmm3,xmm2
  117. DB 067h
  118. pcmpeqd xmm2,xmm5
  119. movdqa XMMWORD PTR[320+r10],xmm1
  120. pcmpeqd xmm3,xmm5
  121. movdqa XMMWORD PTR[336+r10],xmm2
  122. pand xmm0,XMMWORD PTR[64+r12]
  123. pand xmm1,XMMWORD PTR[80+r12]
  124. pand xmm2,XMMWORD PTR[96+r12]
  125. movdqa XMMWORD PTR[352+r10],xmm3
  126. pand xmm3,XMMWORD PTR[112+r12]
  127. por xmm0,xmm2
  128. por xmm1,xmm3
  129. movdqa xmm4,XMMWORD PTR[((-128))+r12]
  130. movdqa xmm5,XMMWORD PTR[((-112))+r12]
  131. movdqa xmm2,XMMWORD PTR[((-96))+r12]
  132. pand xmm4,XMMWORD PTR[112+r10]
  133. movdqa xmm3,XMMWORD PTR[((-80))+r12]
  134. pand xmm5,XMMWORD PTR[128+r10]
  135. por xmm0,xmm4
  136. pand xmm2,XMMWORD PTR[144+r10]
  137. por xmm1,xmm5
  138. pand xmm3,XMMWORD PTR[160+r10]
  139. por xmm0,xmm2
  140. por xmm1,xmm3
  141. movdqa xmm4,XMMWORD PTR[((-64))+r12]
  142. movdqa xmm5,XMMWORD PTR[((-48))+r12]
  143. movdqa xmm2,XMMWORD PTR[((-32))+r12]
  144. pand xmm4,XMMWORD PTR[176+r10]
  145. movdqa xmm3,XMMWORD PTR[((-16))+r12]
  146. pand xmm5,XMMWORD PTR[192+r10]
  147. por xmm0,xmm4
  148. pand xmm2,XMMWORD PTR[208+r10]
  149. por xmm1,xmm5
  150. pand xmm3,XMMWORD PTR[224+r10]
  151. por xmm0,xmm2
  152. por xmm1,xmm3
  153. movdqa xmm4,XMMWORD PTR[r12]
  154. movdqa xmm5,XMMWORD PTR[16+r12]
  155. movdqa xmm2,XMMWORD PTR[32+r12]
  156. pand xmm4,XMMWORD PTR[240+r10]
  157. movdqa xmm3,XMMWORD PTR[48+r12]
  158. pand xmm5,XMMWORD PTR[256+r10]
  159. por xmm0,xmm4
  160. pand xmm2,XMMWORD PTR[272+r10]
  161. por xmm1,xmm5
  162. pand xmm3,XMMWORD PTR[288+r10]
  163. por xmm0,xmm2
  164. por xmm1,xmm3
  165. por xmm0,xmm1
  166. pshufd xmm1,xmm0,04eh
  167. por xmm0,xmm1
  168. lea r12,QWORD PTR[256+r12]
  169. DB 102,72,15,126,195
  170. mov r8,QWORD PTR[r8]
  171. mov rax,QWORD PTR[rsi]
  172. xor r14,r14
  173. xor r15,r15
  174. mov rbp,r8
  175. mul rbx
  176. mov r10,rax
  177. mov rax,QWORD PTR[rcx]
  178. imul rbp,r10
  179. mov r11,rdx
  180. mul rbp
  181. add r10,rax
  182. mov rax,QWORD PTR[8+rsi]
  183. adc rdx,0
  184. mov r13,rdx
  185. lea r15,QWORD PTR[1+r15]
  186. jmp $L$1st_enter
  187. ALIGN 16
  188. $L$1st::
  189. add r13,rax
  190. mov rax,QWORD PTR[r15*8+rsi]
  191. adc rdx,0
  192. add r13,r11
  193. mov r11,r10
  194. adc rdx,0
  195. mov QWORD PTR[((-16))+r15*8+rsp],r13
  196. mov r13,rdx
  197. $L$1st_enter::
  198. mul rbx
  199. add r11,rax
  200. mov rax,QWORD PTR[r15*8+rcx]
  201. adc rdx,0
  202. lea r15,QWORD PTR[1+r15]
  203. mov r10,rdx
  204. mul rbp
  205. cmp r15,r9
  206. jne $L$1st
  207. add r13,rax
  208. adc rdx,0
  209. add r13,r11
  210. adc rdx,0
  211. mov QWORD PTR[((-16))+r9*8+rsp],r13
  212. mov r13,rdx
  213. mov r11,r10
  214. xor rdx,rdx
  215. add r13,r11
  216. adc rdx,0
  217. mov QWORD PTR[((-8))+r9*8+rsp],r13
  218. mov QWORD PTR[r9*8+rsp],rdx
  219. lea r14,QWORD PTR[1+r14]
  220. jmp $L$outer
  221. ALIGN 16
  222. $L$outer::
  223. lea rdx,QWORD PTR[((24+128))+r9*8+rsp]
  224. and rdx,-16
  225. pxor xmm4,xmm4
  226. pxor xmm5,xmm5
  227. movdqa xmm0,XMMWORD PTR[((-128))+r12]
  228. movdqa xmm1,XMMWORD PTR[((-112))+r12]
  229. movdqa xmm2,XMMWORD PTR[((-96))+r12]
  230. movdqa xmm3,XMMWORD PTR[((-80))+r12]
  231. pand xmm0,XMMWORD PTR[((-128))+rdx]
  232. pand xmm1,XMMWORD PTR[((-112))+rdx]
  233. por xmm4,xmm0
  234. pand xmm2,XMMWORD PTR[((-96))+rdx]
  235. por xmm5,xmm1
  236. pand xmm3,XMMWORD PTR[((-80))+rdx]
  237. por xmm4,xmm2
  238. por xmm5,xmm3
  239. movdqa xmm0,XMMWORD PTR[((-64))+r12]
  240. movdqa xmm1,XMMWORD PTR[((-48))+r12]
  241. movdqa xmm2,XMMWORD PTR[((-32))+r12]
  242. movdqa xmm3,XMMWORD PTR[((-16))+r12]
  243. pand xmm0,XMMWORD PTR[((-64))+rdx]
  244. pand xmm1,XMMWORD PTR[((-48))+rdx]
  245. por xmm4,xmm0
  246. pand xmm2,XMMWORD PTR[((-32))+rdx]
  247. por xmm5,xmm1
  248. pand xmm3,XMMWORD PTR[((-16))+rdx]
  249. por xmm4,xmm2
  250. por xmm5,xmm3
  251. movdqa xmm0,XMMWORD PTR[r12]
  252. movdqa xmm1,XMMWORD PTR[16+r12]
  253. movdqa xmm2,XMMWORD PTR[32+r12]
  254. movdqa xmm3,XMMWORD PTR[48+r12]
  255. pand xmm0,XMMWORD PTR[rdx]
  256. pand xmm1,XMMWORD PTR[16+rdx]
  257. por xmm4,xmm0
  258. pand xmm2,XMMWORD PTR[32+rdx]
  259. por xmm5,xmm1
  260. pand xmm3,XMMWORD PTR[48+rdx]
  261. por xmm4,xmm2
  262. por xmm5,xmm3
  263. movdqa xmm0,XMMWORD PTR[64+r12]
  264. movdqa xmm1,XMMWORD PTR[80+r12]
  265. movdqa xmm2,XMMWORD PTR[96+r12]
  266. movdqa xmm3,XMMWORD PTR[112+r12]
  267. pand xmm0,XMMWORD PTR[64+rdx]
  268. pand xmm1,XMMWORD PTR[80+rdx]
  269. por xmm4,xmm0
  270. pand xmm2,XMMWORD PTR[96+rdx]
  271. por xmm5,xmm1
  272. pand xmm3,XMMWORD PTR[112+rdx]
  273. por xmm4,xmm2
  274. por xmm5,xmm3
  275. por xmm4,xmm5
  276. pshufd xmm0,xmm4,04eh
  277. por xmm0,xmm4
  278. lea r12,QWORD PTR[256+r12]
  279. mov rax,QWORD PTR[rsi]
  280. DB 102,72,15,126,195
  281. xor r15,r15
  282. mov rbp,r8
  283. mov r10,QWORD PTR[rsp]
  284. mul rbx
  285. add r10,rax
  286. mov rax,QWORD PTR[rcx]
  287. adc rdx,0
  288. imul rbp,r10
  289. mov r11,rdx
  290. mul rbp
  291. add r10,rax
  292. mov rax,QWORD PTR[8+rsi]
  293. adc rdx,0
  294. mov r10,QWORD PTR[8+rsp]
  295. mov r13,rdx
  296. lea r15,QWORD PTR[1+r15]
  297. jmp $L$inner_enter
  298. ALIGN 16
  299. $L$inner::
  300. add r13,rax
  301. mov rax,QWORD PTR[r15*8+rsi]
  302. adc rdx,0
  303. add r13,r10
  304. mov r10,QWORD PTR[r15*8+rsp]
  305. adc rdx,0
  306. mov QWORD PTR[((-16))+r15*8+rsp],r13
  307. mov r13,rdx
  308. $L$inner_enter::
  309. mul rbx
  310. add r11,rax
  311. mov rax,QWORD PTR[r15*8+rcx]
  312. adc rdx,0
  313. add r10,r11
  314. mov r11,rdx
  315. adc r11,0
  316. lea r15,QWORD PTR[1+r15]
  317. mul rbp
  318. cmp r15,r9
  319. jne $L$inner
  320. add r13,rax
  321. adc rdx,0
  322. add r13,r10
  323. mov r10,QWORD PTR[r9*8+rsp]
  324. adc rdx,0
  325. mov QWORD PTR[((-16))+r9*8+rsp],r13
  326. mov r13,rdx
  327. xor rdx,rdx
  328. add r13,r11
  329. adc rdx,0
  330. add r13,r10
  331. adc rdx,0
  332. mov QWORD PTR[((-8))+r9*8+rsp],r13
  333. mov QWORD PTR[r9*8+rsp],rdx
  334. lea r14,QWORD PTR[1+r14]
  335. cmp r14,r9
  336. jb $L$outer
  337. xor r14,r14
  338. mov rax,QWORD PTR[rsp]
  339. lea rsi,QWORD PTR[rsp]
  340. mov r15,r9
  341. jmp $L$sub
  342. ALIGN 16
  343. $L$sub:: sbb rax,QWORD PTR[r14*8+rcx]
  344. mov QWORD PTR[r14*8+rdi],rax
  345. mov rax,QWORD PTR[8+r14*8+rsi]
  346. lea r14,QWORD PTR[1+r14]
  347. dec r15
  348. jnz $L$sub
  349. sbb rax,0
  350. mov rbx,-1
  351. xor rbx,rax
  352. xor r14,r14
  353. mov r15,r9
  354. $L$copy::
  355. mov rcx,QWORD PTR[r14*8+rdi]
  356. mov rdx,QWORD PTR[r14*8+rsp]
  357. and rcx,rbx
  358. and rdx,rax
  359. mov QWORD PTR[r14*8+rsp],r14
  360. or rdx,rcx
  361. mov QWORD PTR[r14*8+rdi],rdx
  362. lea r14,QWORD PTR[1+r14]
  363. sub r15,1
  364. jnz $L$copy
  365. mov rsi,QWORD PTR[8+r9*8+rsp]
  366. mov rax,1
  367. mov r15,QWORD PTR[((-48))+rsi]
  368. mov r14,QWORD PTR[((-40))+rsi]
  369. mov r13,QWORD PTR[((-32))+rsi]
  370. mov r12,QWORD PTR[((-24))+rsi]
  371. mov rbp,QWORD PTR[((-16))+rsi]
  372. mov rbx,QWORD PTR[((-8))+rsi]
  373. lea rsp,QWORD PTR[rsi]
  374. $L$mul_epilogue::
  375. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  376. mov rsi,QWORD PTR[16+rsp]
  377. DB 0F3h,0C3h ;repret
  378. $L$SEH_end_bn_mul_mont_gather5::
  379. bn_mul_mont_gather5 ENDP
  380. ALIGN 32
  381. bn_mul4x_mont_gather5 PROC PRIVATE
  382. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  383. mov QWORD PTR[16+rsp],rsi
  384. mov rax,rsp
  385. $L$SEH_begin_bn_mul4x_mont_gather5::
  386. mov rdi,rcx
  387. mov rsi,rdx
  388. mov rdx,r8
  389. mov rcx,r9
  390. mov r8,QWORD PTR[40+rsp]
  391. mov r9,QWORD PTR[48+rsp]
  392. DB 067h
  393. mov rax,rsp
  394. $L$mul4x_enter::
  395. and r11d,080108h
  396. cmp r11d,080108h
  397. je $L$mulx4x_enter
  398. push rbx
  399. push rbp
  400. push r12
  401. push r13
  402. push r14
  403. push r15
  404. $L$mul4x_prologue::
  405. DB 067h
  406. shl r9d,3
  407. lea r10,QWORD PTR[r9*2+r9]
  408. neg r9
  409. lea r11,QWORD PTR[((-320))+r9*2+rsp]
  410. mov rbp,rsp
  411. sub r11,rdi
  412. and r11,4095
  413. cmp r10,r11
  414. jb $L$mul4xsp_alt
  415. sub rbp,r11
  416. lea rbp,QWORD PTR[((-320))+r9*2+rbp]
  417. jmp $L$mul4xsp_done
  418. ALIGN 32
  419. $L$mul4xsp_alt::
  420. lea r10,QWORD PTR[((4096-320))+r9*2]
  421. lea rbp,QWORD PTR[((-320))+r9*2+rbp]
  422. sub r11,r10
  423. mov r10,0
  424. cmovc r11,r10
  425. sub rbp,r11
  426. $L$mul4xsp_done::
  427. and rbp,-64
  428. mov r11,rsp
  429. sub r11,rbp
  430. and r11,-4096
  431. lea rsp,QWORD PTR[rbp*1+r11]
  432. mov r10,QWORD PTR[rsp]
  433. cmp rsp,rbp
  434. ja $L$mul4x_page_walk
  435. jmp $L$mul4x_page_walk_done
  436. $L$mul4x_page_walk::
  437. lea rsp,QWORD PTR[((-4096))+rsp]
  438. mov r10,QWORD PTR[rsp]
  439. cmp rsp,rbp
  440. ja $L$mul4x_page_walk
  441. $L$mul4x_page_walk_done::
  442. neg r9
  443. mov QWORD PTR[40+rsp],rax
  444. $L$mul4x_body::
  445. call mul4x_internal
  446. mov rsi,QWORD PTR[40+rsp]
  447. mov rax,1
  448. mov r15,QWORD PTR[((-48))+rsi]
  449. mov r14,QWORD PTR[((-40))+rsi]
  450. mov r13,QWORD PTR[((-32))+rsi]
  451. mov r12,QWORD PTR[((-24))+rsi]
  452. mov rbp,QWORD PTR[((-16))+rsi]
  453. mov rbx,QWORD PTR[((-8))+rsi]
  454. lea rsp,QWORD PTR[rsi]
  455. $L$mul4x_epilogue::
  456. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  457. mov rsi,QWORD PTR[16+rsp]
  458. DB 0F3h,0C3h ;repret
  459. $L$SEH_end_bn_mul4x_mont_gather5::
  460. bn_mul4x_mont_gather5 ENDP
  461. ALIGN 32
  462. mul4x_internal PROC PRIVATE
  463. shl r9,5
  464. movd xmm5,DWORD PTR[56+rax]
  465. lea rax,QWORD PTR[$L$inc]
  466. lea r13,QWORD PTR[128+r9*1+rdx]
  467. shr r9,5
  468. movdqa xmm0,XMMWORD PTR[rax]
  469. movdqa xmm1,XMMWORD PTR[16+rax]
  470. lea r10,QWORD PTR[((88-112))+r9*1+rsp]
  471. lea r12,QWORD PTR[128+rdx]
  472. pshufd xmm5,xmm5,0
  473. movdqa xmm4,xmm1
  474. DB 067h,067h
  475. movdqa xmm2,xmm1
  476. paddd xmm1,xmm0
  477. pcmpeqd xmm0,xmm5
  478. DB 067h
  479. movdqa xmm3,xmm4
  480. paddd xmm2,xmm1
  481. pcmpeqd xmm1,xmm5
  482. movdqa XMMWORD PTR[112+r10],xmm0
  483. movdqa xmm0,xmm4
  484. paddd xmm3,xmm2
  485. pcmpeqd xmm2,xmm5
  486. movdqa XMMWORD PTR[128+r10],xmm1
  487. movdqa xmm1,xmm4
  488. paddd xmm0,xmm3
  489. pcmpeqd xmm3,xmm5
  490. movdqa XMMWORD PTR[144+r10],xmm2
  491. movdqa xmm2,xmm4
  492. paddd xmm1,xmm0
  493. pcmpeqd xmm0,xmm5
  494. movdqa XMMWORD PTR[160+r10],xmm3
  495. movdqa xmm3,xmm4
  496. paddd xmm2,xmm1
  497. pcmpeqd xmm1,xmm5
  498. movdqa XMMWORD PTR[176+r10],xmm0
  499. movdqa xmm0,xmm4
  500. paddd xmm3,xmm2
  501. pcmpeqd xmm2,xmm5
  502. movdqa XMMWORD PTR[192+r10],xmm1
  503. movdqa xmm1,xmm4
  504. paddd xmm0,xmm3
  505. pcmpeqd xmm3,xmm5
  506. movdqa XMMWORD PTR[208+r10],xmm2
  507. movdqa xmm2,xmm4
  508. paddd xmm1,xmm0
  509. pcmpeqd xmm0,xmm5
  510. movdqa XMMWORD PTR[224+r10],xmm3
  511. movdqa xmm3,xmm4
  512. paddd xmm2,xmm1
  513. pcmpeqd xmm1,xmm5
  514. movdqa XMMWORD PTR[240+r10],xmm0
  515. movdqa xmm0,xmm4
  516. paddd xmm3,xmm2
  517. pcmpeqd xmm2,xmm5
  518. movdqa XMMWORD PTR[256+r10],xmm1
  519. movdqa xmm1,xmm4
  520. paddd xmm0,xmm3
  521. pcmpeqd xmm3,xmm5
  522. movdqa XMMWORD PTR[272+r10],xmm2
  523. movdqa xmm2,xmm4
  524. paddd xmm1,xmm0
  525. pcmpeqd xmm0,xmm5
  526. movdqa XMMWORD PTR[288+r10],xmm3
  527. movdqa xmm3,xmm4
  528. paddd xmm2,xmm1
  529. pcmpeqd xmm1,xmm5
  530. movdqa XMMWORD PTR[304+r10],xmm0
  531. paddd xmm3,xmm2
  532. DB 067h
  533. pcmpeqd xmm2,xmm5
  534. movdqa XMMWORD PTR[320+r10],xmm1
  535. pcmpeqd xmm3,xmm5
  536. movdqa XMMWORD PTR[336+r10],xmm2
  537. pand xmm0,XMMWORD PTR[64+r12]
  538. pand xmm1,XMMWORD PTR[80+r12]
  539. pand xmm2,XMMWORD PTR[96+r12]
  540. movdqa XMMWORD PTR[352+r10],xmm3
  541. pand xmm3,XMMWORD PTR[112+r12]
  542. por xmm0,xmm2
  543. por xmm1,xmm3
  544. movdqa xmm4,XMMWORD PTR[((-128))+r12]
  545. movdqa xmm5,XMMWORD PTR[((-112))+r12]
  546. movdqa xmm2,XMMWORD PTR[((-96))+r12]
  547. pand xmm4,XMMWORD PTR[112+r10]
  548. movdqa xmm3,XMMWORD PTR[((-80))+r12]
  549. pand xmm5,XMMWORD PTR[128+r10]
  550. por xmm0,xmm4
  551. pand xmm2,XMMWORD PTR[144+r10]
  552. por xmm1,xmm5
  553. pand xmm3,XMMWORD PTR[160+r10]
  554. por xmm0,xmm2
  555. por xmm1,xmm3
  556. movdqa xmm4,XMMWORD PTR[((-64))+r12]
  557. movdqa xmm5,XMMWORD PTR[((-48))+r12]
  558. movdqa xmm2,XMMWORD PTR[((-32))+r12]
  559. pand xmm4,XMMWORD PTR[176+r10]
  560. movdqa xmm3,XMMWORD PTR[((-16))+r12]
  561. pand xmm5,XMMWORD PTR[192+r10]
  562. por xmm0,xmm4
  563. pand xmm2,XMMWORD PTR[208+r10]
  564. por xmm1,xmm5
  565. pand xmm3,XMMWORD PTR[224+r10]
  566. por xmm0,xmm2
  567. por xmm1,xmm3
  568. movdqa xmm4,XMMWORD PTR[r12]
  569. movdqa xmm5,XMMWORD PTR[16+r12]
  570. movdqa xmm2,XMMWORD PTR[32+r12]
  571. pand xmm4,XMMWORD PTR[240+r10]
  572. movdqa xmm3,XMMWORD PTR[48+r12]
  573. pand xmm5,XMMWORD PTR[256+r10]
  574. por xmm0,xmm4
  575. pand xmm2,XMMWORD PTR[272+r10]
  576. por xmm1,xmm5
  577. pand xmm3,XMMWORD PTR[288+r10]
  578. por xmm0,xmm2
  579. por xmm1,xmm3
  580. por xmm0,xmm1
  581. pshufd xmm1,xmm0,04eh
  582. por xmm0,xmm1
  583. lea r12,QWORD PTR[256+r12]
  584. DB 102,72,15,126,195
  585. mov QWORD PTR[((16+8))+rsp],r13
  586. mov QWORD PTR[((56+8))+rsp],rdi
  587. mov r8,QWORD PTR[r8]
  588. mov rax,QWORD PTR[rsi]
  589. lea rsi,QWORD PTR[r9*1+rsi]
  590. neg r9
  591. mov rbp,r8
  592. mul rbx
  593. mov r10,rax
  594. mov rax,QWORD PTR[rcx]
  595. imul rbp,r10
  596. lea r14,QWORD PTR[((64+8))+rsp]
  597. mov r11,rdx
  598. mul rbp
  599. add r10,rax
  600. mov rax,QWORD PTR[8+r9*1+rsi]
  601. adc rdx,0
  602. mov rdi,rdx
  603. mul rbx
  604. add r11,rax
  605. mov rax,QWORD PTR[8+rcx]
  606. adc rdx,0
  607. mov r10,rdx
  608. mul rbp
  609. add rdi,rax
  610. mov rax,QWORD PTR[16+r9*1+rsi]
  611. adc rdx,0
  612. add rdi,r11
  613. lea r15,QWORD PTR[32+r9]
  614. lea rcx,QWORD PTR[32+rcx]
  615. adc rdx,0
  616. mov QWORD PTR[r14],rdi
  617. mov r13,rdx
  618. jmp $L$1st4x
  619. ALIGN 32
  620. $L$1st4x::
  621. mul rbx
  622. add r10,rax
  623. mov rax,QWORD PTR[((-16))+rcx]
  624. lea r14,QWORD PTR[32+r14]
  625. adc rdx,0
  626. mov r11,rdx
  627. mul rbp
  628. add r13,rax
  629. mov rax,QWORD PTR[((-8))+r15*1+rsi]
  630. adc rdx,0
  631. add r13,r10
  632. adc rdx,0
  633. mov QWORD PTR[((-24))+r14],r13
  634. mov rdi,rdx
  635. mul rbx
  636. add r11,rax
  637. mov rax,QWORD PTR[((-8))+rcx]
  638. adc rdx,0
  639. mov r10,rdx
  640. mul rbp
  641. add rdi,rax
  642. mov rax,QWORD PTR[r15*1+rsi]
  643. adc rdx,0
  644. add rdi,r11
  645. adc rdx,0
  646. mov QWORD PTR[((-16))+r14],rdi
  647. mov r13,rdx
  648. mul rbx
  649. add r10,rax
  650. mov rax,QWORD PTR[rcx]
  651. adc rdx,0
  652. mov r11,rdx
  653. mul rbp
  654. add r13,rax
  655. mov rax,QWORD PTR[8+r15*1+rsi]
  656. adc rdx,0
  657. add r13,r10
  658. adc rdx,0
  659. mov QWORD PTR[((-8))+r14],r13
  660. mov rdi,rdx
  661. mul rbx
  662. add r11,rax
  663. mov rax,QWORD PTR[8+rcx]
  664. adc rdx,0
  665. mov r10,rdx
  666. mul rbp
  667. add rdi,rax
  668. mov rax,QWORD PTR[16+r15*1+rsi]
  669. adc rdx,0
  670. add rdi,r11
  671. lea rcx,QWORD PTR[32+rcx]
  672. adc rdx,0
  673. mov QWORD PTR[r14],rdi
  674. mov r13,rdx
  675. add r15,32
  676. jnz $L$1st4x
  677. mul rbx
  678. add r10,rax
  679. mov rax,QWORD PTR[((-16))+rcx]
  680. lea r14,QWORD PTR[32+r14]
  681. adc rdx,0
  682. mov r11,rdx
  683. mul rbp
  684. add r13,rax
  685. mov rax,QWORD PTR[((-8))+rsi]
  686. adc rdx,0
  687. add r13,r10
  688. adc rdx,0
  689. mov QWORD PTR[((-24))+r14],r13
  690. mov rdi,rdx
  691. mul rbx
  692. add r11,rax
  693. mov rax,QWORD PTR[((-8))+rcx]
  694. adc rdx,0
  695. mov r10,rdx
  696. mul rbp
  697. add rdi,rax
  698. mov rax,QWORD PTR[r9*1+rsi]
  699. adc rdx,0
  700. add rdi,r11
  701. adc rdx,0
  702. mov QWORD PTR[((-16))+r14],rdi
  703. mov r13,rdx
  704. lea rcx,QWORD PTR[r9*1+rcx]
  705. xor rdi,rdi
  706. add r13,r10
  707. adc rdi,0
  708. mov QWORD PTR[((-8))+r14],r13
  709. jmp $L$outer4x
  710. ALIGN 32
  711. $L$outer4x::
  712. lea rdx,QWORD PTR[((16+128))+r14]
  713. pxor xmm4,xmm4
  714. pxor xmm5,xmm5
  715. movdqa xmm0,XMMWORD PTR[((-128))+r12]
  716. movdqa xmm1,XMMWORD PTR[((-112))+r12]
  717. movdqa xmm2,XMMWORD PTR[((-96))+r12]
  718. movdqa xmm3,XMMWORD PTR[((-80))+r12]
  719. pand xmm0,XMMWORD PTR[((-128))+rdx]
  720. pand xmm1,XMMWORD PTR[((-112))+rdx]
  721. por xmm4,xmm0
  722. pand xmm2,XMMWORD PTR[((-96))+rdx]
  723. por xmm5,xmm1
  724. pand xmm3,XMMWORD PTR[((-80))+rdx]
  725. por xmm4,xmm2
  726. por xmm5,xmm3
  727. movdqa xmm0,XMMWORD PTR[((-64))+r12]
  728. movdqa xmm1,XMMWORD PTR[((-48))+r12]
  729. movdqa xmm2,XMMWORD PTR[((-32))+r12]
  730. movdqa xmm3,XMMWORD PTR[((-16))+r12]
  731. pand xmm0,XMMWORD PTR[((-64))+rdx]
  732. pand xmm1,XMMWORD PTR[((-48))+rdx]
  733. por xmm4,xmm0
  734. pand xmm2,XMMWORD PTR[((-32))+rdx]
  735. por xmm5,xmm1
  736. pand xmm3,XMMWORD PTR[((-16))+rdx]
  737. por xmm4,xmm2
  738. por xmm5,xmm3
  739. movdqa xmm0,XMMWORD PTR[r12]
  740. movdqa xmm1,XMMWORD PTR[16+r12]
  741. movdqa xmm2,XMMWORD PTR[32+r12]
  742. movdqa xmm3,XMMWORD PTR[48+r12]
  743. pand xmm0,XMMWORD PTR[rdx]
  744. pand xmm1,XMMWORD PTR[16+rdx]
  745. por xmm4,xmm0
  746. pand xmm2,XMMWORD PTR[32+rdx]
  747. por xmm5,xmm1
  748. pand xmm3,XMMWORD PTR[48+rdx]
  749. por xmm4,xmm2
  750. por xmm5,xmm3
  751. movdqa xmm0,XMMWORD PTR[64+r12]
  752. movdqa xmm1,XMMWORD PTR[80+r12]
  753. movdqa xmm2,XMMWORD PTR[96+r12]
  754. movdqa xmm3,XMMWORD PTR[112+r12]
  755. pand xmm0,XMMWORD PTR[64+rdx]
  756. pand xmm1,XMMWORD PTR[80+rdx]
  757. por xmm4,xmm0
  758. pand xmm2,XMMWORD PTR[96+rdx]
  759. por xmm5,xmm1
  760. pand xmm3,XMMWORD PTR[112+rdx]
  761. por xmm4,xmm2
  762. por xmm5,xmm3
  763. por xmm4,xmm5
  764. pshufd xmm0,xmm4,04eh
  765. por xmm0,xmm4
  766. lea r12,QWORD PTR[256+r12]
  767. DB 102,72,15,126,195
  768. mov r10,QWORD PTR[r9*1+r14]
  769. mov rbp,r8
  770. mul rbx
  771. add r10,rax
  772. mov rax,QWORD PTR[rcx]
  773. adc rdx,0
  774. imul rbp,r10
  775. mov r11,rdx
  776. mov QWORD PTR[r14],rdi
  777. lea r14,QWORD PTR[r9*1+r14]
  778. mul rbp
  779. add r10,rax
  780. mov rax,QWORD PTR[8+r9*1+rsi]
  781. adc rdx,0
  782. mov rdi,rdx
  783. mul rbx
  784. add r11,rax
  785. mov rax,QWORD PTR[8+rcx]
  786. adc rdx,0
  787. add r11,QWORD PTR[8+r14]
  788. adc rdx,0
  789. mov r10,rdx
  790. mul rbp
  791. add rdi,rax
  792. mov rax,QWORD PTR[16+r9*1+rsi]
  793. adc rdx,0
  794. add rdi,r11
  795. lea r15,QWORD PTR[32+r9]
  796. lea rcx,QWORD PTR[32+rcx]
  797. adc rdx,0
  798. mov r13,rdx
  799. jmp $L$inner4x
  800. ALIGN 32
  801. $L$inner4x::
  802. mul rbx
  803. add r10,rax
  804. mov rax,QWORD PTR[((-16))+rcx]
  805. adc rdx,0
  806. add r10,QWORD PTR[16+r14]
  807. lea r14,QWORD PTR[32+r14]
  808. adc rdx,0
  809. mov r11,rdx
  810. mul rbp
  811. add r13,rax
  812. mov rax,QWORD PTR[((-8))+r15*1+rsi]
  813. adc rdx,0
  814. add r13,r10
  815. adc rdx,0
  816. mov QWORD PTR[((-32))+r14],rdi
  817. mov rdi,rdx
  818. mul rbx
  819. add r11,rax
  820. mov rax,QWORD PTR[((-8))+rcx]
  821. adc rdx,0
  822. add r11,QWORD PTR[((-8))+r14]
  823. adc rdx,0
  824. mov r10,rdx
  825. mul rbp
  826. add rdi,rax
  827. mov rax,QWORD PTR[r15*1+rsi]
  828. adc rdx,0
  829. add rdi,r11
  830. adc rdx,0
  831. mov QWORD PTR[((-24))+r14],r13
  832. mov r13,rdx
  833. mul rbx
  834. add r10,rax
  835. mov rax,QWORD PTR[rcx]
  836. adc rdx,0
  837. add r10,QWORD PTR[r14]
  838. adc rdx,0
  839. mov r11,rdx
  840. mul rbp
  841. add r13,rax
  842. mov rax,QWORD PTR[8+r15*1+rsi]
  843. adc rdx,0
  844. add r13,r10
  845. adc rdx,0
  846. mov QWORD PTR[((-16))+r14],rdi
  847. mov rdi,rdx
  848. mul rbx
  849. add r11,rax
  850. mov rax,QWORD PTR[8+rcx]
  851. adc rdx,0
  852. add r11,QWORD PTR[8+r14]
  853. adc rdx,0
  854. mov r10,rdx
  855. mul rbp
  856. add rdi,rax
  857. mov rax,QWORD PTR[16+r15*1+rsi]
  858. adc rdx,0
  859. add rdi,r11
  860. lea rcx,QWORD PTR[32+rcx]
  861. adc rdx,0
  862. mov QWORD PTR[((-8))+r14],r13
  863. mov r13,rdx
  864. add r15,32
  865. jnz $L$inner4x
  866. mul rbx
  867. add r10,rax
  868. mov rax,QWORD PTR[((-16))+rcx]
  869. adc rdx,0
  870. add r10,QWORD PTR[16+r14]
  871. lea r14,QWORD PTR[32+r14]
  872. adc rdx,0
  873. mov r11,rdx
  874. mul rbp
  875. add r13,rax
  876. mov rax,QWORD PTR[((-8))+rsi]
  877. adc rdx,0
  878. add r13,r10
  879. adc rdx,0
  880. mov QWORD PTR[((-32))+r14],rdi
  881. mov rdi,rdx
  882. mul rbx
  883. add r11,rax
  884. mov rax,rbp
  885. mov rbp,QWORD PTR[((-8))+rcx]
  886. adc rdx,0
  887. add r11,QWORD PTR[((-8))+r14]
  888. adc rdx,0
  889. mov r10,rdx
  890. mul rbp
  891. add rdi,rax
  892. mov rax,QWORD PTR[r9*1+rsi]
  893. adc rdx,0
  894. add rdi,r11
  895. adc rdx,0
  896. mov QWORD PTR[((-24))+r14],r13
  897. mov r13,rdx
  898. mov QWORD PTR[((-16))+r14],rdi
  899. lea rcx,QWORD PTR[r9*1+rcx]
  900. xor rdi,rdi
  901. add r13,r10
  902. adc rdi,0
  903. add r13,QWORD PTR[r14]
  904. adc rdi,0
  905. mov QWORD PTR[((-8))+r14],r13
  906. cmp r12,QWORD PTR[((16+8))+rsp]
  907. jb $L$outer4x
  908. xor rax,rax
  909. sub rbp,r13
  910. adc r15,r15
  911. or rdi,r15
  912. sub rax,rdi
  913. lea rbx,QWORD PTR[r9*1+r14]
  914. mov r12,QWORD PTR[rcx]
  915. lea rbp,QWORD PTR[rcx]
  916. mov rcx,r9
  917. sar rcx,3+2
  918. mov rdi,QWORD PTR[((56+8))+rsp]
  919. dec r12
  920. xor r10,r10
  921. mov r13,QWORD PTR[8+rbp]
  922. mov r14,QWORD PTR[16+rbp]
  923. mov r15,QWORD PTR[24+rbp]
  924. jmp $L$sqr4x_sub_entry
  925. mul4x_internal ENDP
  926. PUBLIC bn_power5
  927. ALIGN 32
  928. bn_power5 PROC PUBLIC
  929. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  930. mov QWORD PTR[16+rsp],rsi
  931. mov rax,rsp
  932. $L$SEH_begin_bn_power5::
  933. mov rdi,rcx
  934. mov rsi,rdx
  935. mov rdx,r8
  936. mov rcx,r9
  937. mov r8,QWORD PTR[40+rsp]
  938. mov r9,QWORD PTR[48+rsp]
  939. mov rax,rsp
  940. mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
  941. and r11d,080108h
  942. cmp r11d,080108h
  943. je $L$powerx5_enter
  944. push rbx
  945. push rbp
  946. push r12
  947. push r13
  948. push r14
  949. push r15
  950. $L$power5_prologue::
  951. shl r9d,3
  952. lea r10d,DWORD PTR[r9*2+r9]
  953. neg r9
  954. mov r8,QWORD PTR[r8]
  955. lea r11,QWORD PTR[((-320))+r9*2+rsp]
  956. mov rbp,rsp
  957. sub r11,rdi
  958. and r11,4095
  959. cmp r10,r11
  960. jb $L$pwr_sp_alt
  961. sub rbp,r11
  962. lea rbp,QWORD PTR[((-320))+r9*2+rbp]
  963. jmp $L$pwr_sp_done
  964. ALIGN 32
  965. $L$pwr_sp_alt::
  966. lea r10,QWORD PTR[((4096-320))+r9*2]
  967. lea rbp,QWORD PTR[((-320))+r9*2+rbp]
  968. sub r11,r10
  969. mov r10,0
  970. cmovc r11,r10
  971. sub rbp,r11
  972. $L$pwr_sp_done::
  973. and rbp,-64
  974. mov r11,rsp
  975. sub r11,rbp
  976. and r11,-4096
  977. lea rsp,QWORD PTR[rbp*1+r11]
  978. mov r10,QWORD PTR[rsp]
  979. cmp rsp,rbp
  980. ja $L$pwr_page_walk
  981. jmp $L$pwr_page_walk_done
  982. $L$pwr_page_walk::
  983. lea rsp,QWORD PTR[((-4096))+rsp]
  984. mov r10,QWORD PTR[rsp]
  985. cmp rsp,rbp
  986. ja $L$pwr_page_walk
  987. $L$pwr_page_walk_done::
  988. mov r10,r9
  989. neg r9
  990. mov QWORD PTR[32+rsp],r8
  991. mov QWORD PTR[40+rsp],rax
  992. $L$power5_body::
  993. DB 102,72,15,110,207
  994. DB 102,72,15,110,209
  995. DB 102,73,15,110,218
  996. DB 102,72,15,110,226
  997. call __bn_sqr8x_internal
  998. call __bn_post4x_internal
  999. call __bn_sqr8x_internal
  1000. call __bn_post4x_internal
  1001. call __bn_sqr8x_internal
  1002. call __bn_post4x_internal
  1003. call __bn_sqr8x_internal
  1004. call __bn_post4x_internal
  1005. call __bn_sqr8x_internal
  1006. call __bn_post4x_internal
  1007. DB 102,72,15,126,209
  1008. DB 102,72,15,126,226
  1009. mov rdi,rsi
  1010. mov rax,QWORD PTR[40+rsp]
  1011. lea r8,QWORD PTR[32+rsp]
  1012. call mul4x_internal
  1013. mov rsi,QWORD PTR[40+rsp]
  1014. mov rax,1
  1015. mov r15,QWORD PTR[((-48))+rsi]
  1016. mov r14,QWORD PTR[((-40))+rsi]
  1017. mov r13,QWORD PTR[((-32))+rsi]
  1018. mov r12,QWORD PTR[((-24))+rsi]
  1019. mov rbp,QWORD PTR[((-16))+rsi]
  1020. mov rbx,QWORD PTR[((-8))+rsi]
  1021. lea rsp,QWORD PTR[rsi]
  1022. $L$power5_epilogue::
  1023. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1024. mov rsi,QWORD PTR[16+rsp]
  1025. DB 0F3h,0C3h ;repret
  1026. $L$SEH_end_bn_power5::
  1027. bn_power5 ENDP
  1028. PUBLIC bn_sqr8x_internal
  1029. ALIGN 32
  1030. bn_sqr8x_internal PROC PUBLIC
  1031. __bn_sqr8x_internal::
  1032. lea rbp,QWORD PTR[32+r10]
  1033. lea rsi,QWORD PTR[r9*1+rsi]
  1034. mov rcx,r9
  1035. mov r14,QWORD PTR[((-32))+rbp*1+rsi]
  1036. lea rdi,QWORD PTR[((48+8))+r9*2+rsp]
  1037. mov rax,QWORD PTR[((-24))+rbp*1+rsi]
  1038. lea rdi,QWORD PTR[((-32))+rbp*1+rdi]
  1039. mov rbx,QWORD PTR[((-16))+rbp*1+rsi]
  1040. mov r15,rax
  1041. mul r14
  1042. mov r10,rax
  1043. mov rax,rbx
  1044. mov r11,rdx
  1045. mov QWORD PTR[((-24))+rbp*1+rdi],r10
  1046. mul r14
  1047. add r11,rax
  1048. mov rax,rbx
  1049. adc rdx,0
  1050. mov QWORD PTR[((-16))+rbp*1+rdi],r11
  1051. mov r10,rdx
  1052. mov rbx,QWORD PTR[((-8))+rbp*1+rsi]
  1053. mul r15
  1054. mov r12,rax
  1055. mov rax,rbx
  1056. mov r13,rdx
  1057. lea rcx,QWORD PTR[rbp]
  1058. mul r14
  1059. add r10,rax
  1060. mov rax,rbx
  1061. mov r11,rdx
  1062. adc r11,0
  1063. add r10,r12
  1064. adc r11,0
  1065. mov QWORD PTR[((-8))+rcx*1+rdi],r10
  1066. jmp $L$sqr4x_1st
  1067. ALIGN 32
  1068. $L$sqr4x_1st::
  1069. mov rbx,QWORD PTR[rcx*1+rsi]
  1070. mul r15
  1071. add r13,rax
  1072. mov rax,rbx
  1073. mov r12,rdx
  1074. adc r12,0
  1075. mul r14
  1076. add r11,rax
  1077. mov rax,rbx
  1078. mov rbx,QWORD PTR[8+rcx*1+rsi]
  1079. mov r10,rdx
  1080. adc r10,0
  1081. add r11,r13
  1082. adc r10,0
  1083. mul r15
  1084. add r12,rax
  1085. mov rax,rbx
  1086. mov QWORD PTR[rcx*1+rdi],r11
  1087. mov r13,rdx
  1088. adc r13,0
  1089. mul r14
  1090. add r10,rax
  1091. mov rax,rbx
  1092. mov rbx,QWORD PTR[16+rcx*1+rsi]
  1093. mov r11,rdx
  1094. adc r11,0
  1095. add r10,r12
  1096. adc r11,0
  1097. mul r15
  1098. add r13,rax
  1099. mov rax,rbx
  1100. mov QWORD PTR[8+rcx*1+rdi],r10
  1101. mov r12,rdx
  1102. adc r12,0
  1103. mul r14
  1104. add r11,rax
  1105. mov rax,rbx
  1106. mov rbx,QWORD PTR[24+rcx*1+rsi]
  1107. mov r10,rdx
  1108. adc r10,0
  1109. add r11,r13
  1110. adc r10,0
  1111. mul r15
  1112. add r12,rax
  1113. mov rax,rbx
  1114. mov QWORD PTR[16+rcx*1+rdi],r11
  1115. mov r13,rdx
  1116. adc r13,0
  1117. lea rcx,QWORD PTR[32+rcx]
  1118. mul r14
  1119. add r10,rax
  1120. mov rax,rbx
  1121. mov r11,rdx
  1122. adc r11,0
  1123. add r10,r12
  1124. adc r11,0
  1125. mov QWORD PTR[((-8))+rcx*1+rdi],r10
  1126. cmp rcx,0
  1127. jne $L$sqr4x_1st
  1128. mul r15
  1129. add r13,rax
  1130. lea rbp,QWORD PTR[16+rbp]
  1131. adc rdx,0
  1132. add r13,r11
  1133. adc rdx,0
  1134. mov QWORD PTR[rdi],r13
  1135. mov r12,rdx
  1136. mov QWORD PTR[8+rdi],rdx
  1137. jmp $L$sqr4x_outer
  1138. ALIGN 32
  1139. $L$sqr4x_outer::
  1140. mov r14,QWORD PTR[((-32))+rbp*1+rsi]
  1141. lea rdi,QWORD PTR[((48+8))+r9*2+rsp]
  1142. mov rax,QWORD PTR[((-24))+rbp*1+rsi]
  1143. lea rdi,QWORD PTR[((-32))+rbp*1+rdi]
  1144. mov rbx,QWORD PTR[((-16))+rbp*1+rsi]
  1145. mov r15,rax
  1146. mul r14
  1147. mov r10,QWORD PTR[((-24))+rbp*1+rdi]
  1148. add r10,rax
  1149. mov rax,rbx
  1150. adc rdx,0
  1151. mov QWORD PTR[((-24))+rbp*1+rdi],r10
  1152. mov r11,rdx
  1153. mul r14
  1154. add r11,rax
  1155. mov rax,rbx
  1156. adc rdx,0
  1157. add r11,QWORD PTR[((-16))+rbp*1+rdi]
  1158. mov r10,rdx
  1159. adc r10,0
  1160. mov QWORD PTR[((-16))+rbp*1+rdi],r11
  1161. xor r12,r12
  1162. mov rbx,QWORD PTR[((-8))+rbp*1+rsi]
  1163. mul r15
  1164. add r12,rax
  1165. mov rax,rbx
  1166. adc rdx,0
  1167. add r12,QWORD PTR[((-8))+rbp*1+rdi]
  1168. mov r13,rdx
  1169. adc r13,0
  1170. mul r14
  1171. add r10,rax
  1172. mov rax,rbx
  1173. adc rdx,0
  1174. add r10,r12
  1175. mov r11,rdx
  1176. adc r11,0
  1177. mov QWORD PTR[((-8))+rbp*1+rdi],r10
  1178. lea rcx,QWORD PTR[rbp]
  1179. jmp $L$sqr4x_inner
  1180. ALIGN 32
  1181. $L$sqr4x_inner::
  1182. mov rbx,QWORD PTR[rcx*1+rsi]
  1183. mul r15
  1184. add r13,rax
  1185. mov rax,rbx
  1186. mov r12,rdx
  1187. adc r12,0
  1188. add r13,QWORD PTR[rcx*1+rdi]
  1189. adc r12,0
  1190. DB 067h
  1191. mul r14
  1192. add r11,rax
  1193. mov rax,rbx
  1194. mov rbx,QWORD PTR[8+rcx*1+rsi]
  1195. mov r10,rdx
  1196. adc r10,0
  1197. add r11,r13
  1198. adc r10,0
  1199. mul r15
  1200. add r12,rax
  1201. mov QWORD PTR[rcx*1+rdi],r11
  1202. mov rax,rbx
  1203. mov r13,rdx
  1204. adc r13,0
  1205. add r12,QWORD PTR[8+rcx*1+rdi]
  1206. lea rcx,QWORD PTR[16+rcx]
  1207. adc r13,0
  1208. mul r14
  1209. add r10,rax
  1210. mov rax,rbx
  1211. adc rdx,0
  1212. add r10,r12
  1213. mov r11,rdx
  1214. adc r11,0
  1215. mov QWORD PTR[((-8))+rcx*1+rdi],r10
  1216. cmp rcx,0
  1217. jne $L$sqr4x_inner
  1218. DB 067h
  1219. mul r15
  1220. add r13,rax
  1221. adc rdx,0
  1222. add r13,r11
  1223. adc rdx,0
  1224. mov QWORD PTR[rdi],r13
  1225. mov r12,rdx
  1226. mov QWORD PTR[8+rdi],rdx
  1227. add rbp,16
  1228. jnz $L$sqr4x_outer
  1229. mov r14,QWORD PTR[((-32))+rsi]
  1230. lea rdi,QWORD PTR[((48+8))+r9*2+rsp]
  1231. mov rax,QWORD PTR[((-24))+rsi]
  1232. lea rdi,QWORD PTR[((-32))+rbp*1+rdi]
  1233. mov rbx,QWORD PTR[((-16))+rsi]
  1234. mov r15,rax
  1235. mul r14
  1236. add r10,rax
  1237. mov rax,rbx
  1238. mov r11,rdx
  1239. adc r11,0
  1240. mul r14
  1241. add r11,rax
  1242. mov rax,rbx
  1243. mov QWORD PTR[((-24))+rdi],r10
  1244. mov r10,rdx
  1245. adc r10,0
  1246. add r11,r13
  1247. mov rbx,QWORD PTR[((-8))+rsi]
  1248. adc r10,0
  1249. mul r15
  1250. add r12,rax
  1251. mov rax,rbx
  1252. mov QWORD PTR[((-16))+rdi],r11
  1253. mov r13,rdx
  1254. adc r13,0
  1255. mul r14
  1256. add r10,rax
  1257. mov rax,rbx
  1258. mov r11,rdx
  1259. adc r11,0
  1260. add r10,r12
  1261. adc r11,0
  1262. mov QWORD PTR[((-8))+rdi],r10
  1263. mul r15
  1264. add r13,rax
  1265. mov rax,QWORD PTR[((-16))+rsi]
  1266. adc rdx,0
  1267. add r13,r11
  1268. adc rdx,0
  1269. mov QWORD PTR[rdi],r13
  1270. mov r12,rdx
  1271. mov QWORD PTR[8+rdi],rdx
  1272. mul rbx
  1273. add rbp,16
  1274. xor r14,r14
  1275. sub rbp,r9
  1276. xor r15,r15
  1277. add rax,r12
  1278. adc rdx,0
  1279. mov QWORD PTR[8+rdi],rax
  1280. mov QWORD PTR[16+rdi],rdx
  1281. mov QWORD PTR[24+rdi],r15
  1282. mov rax,QWORD PTR[((-16))+rbp*1+rsi]
  1283. lea rdi,QWORD PTR[((48+8))+rsp]
  1284. xor r10,r10
  1285. mov r11,QWORD PTR[8+rdi]
  1286. lea r12,QWORD PTR[r10*2+r14]
  1287. shr r10,63
  1288. lea r13,QWORD PTR[r11*2+rcx]
  1289. shr r11,63
  1290. or r13,r10
  1291. mov r10,QWORD PTR[16+rdi]
  1292. mov r14,r11
  1293. mul rax
  1294. neg r15
  1295. mov r11,QWORD PTR[24+rdi]
  1296. adc r12,rax
  1297. mov rax,QWORD PTR[((-8))+rbp*1+rsi]
  1298. mov QWORD PTR[rdi],r12
  1299. adc r13,rdx
  1300. lea rbx,QWORD PTR[r10*2+r14]
  1301. mov QWORD PTR[8+rdi],r13
  1302. sbb r15,r15
  1303. shr r10,63
  1304. lea r8,QWORD PTR[r11*2+rcx]
  1305. shr r11,63
  1306. or r8,r10
  1307. mov r10,QWORD PTR[32+rdi]
  1308. mov r14,r11
  1309. mul rax
  1310. neg r15
  1311. mov r11,QWORD PTR[40+rdi]
  1312. adc rbx,rax
  1313. mov rax,QWORD PTR[rbp*1+rsi]
  1314. mov QWORD PTR[16+rdi],rbx
  1315. adc r8,rdx
  1316. lea rbp,QWORD PTR[16+rbp]
  1317. mov QWORD PTR[24+rdi],r8
  1318. sbb r15,r15
  1319. lea rdi,QWORD PTR[64+rdi]
  1320. jmp $L$sqr4x_shift_n_add
  1321. ALIGN 32
  1322. $L$sqr4x_shift_n_add::
  1323. lea r12,QWORD PTR[r10*2+r14]
  1324. shr r10,63
  1325. lea r13,QWORD PTR[r11*2+rcx]
  1326. shr r11,63
  1327. or r13,r10
  1328. mov r10,QWORD PTR[((-16))+rdi]
  1329. mov r14,r11
  1330. mul rax
  1331. neg r15
  1332. mov r11,QWORD PTR[((-8))+rdi]
  1333. adc r12,rax
  1334. mov rax,QWORD PTR[((-8))+rbp*1+rsi]
  1335. mov QWORD PTR[((-32))+rdi],r12
  1336. adc r13,rdx
  1337. lea rbx,QWORD PTR[r10*2+r14]
  1338. mov QWORD PTR[((-24))+rdi],r13
  1339. sbb r15,r15
  1340. shr r10,63
  1341. lea r8,QWORD PTR[r11*2+rcx]
  1342. shr r11,63
  1343. or r8,r10
  1344. mov r10,QWORD PTR[rdi]
  1345. mov r14,r11
  1346. mul rax
  1347. neg r15
  1348. mov r11,QWORD PTR[8+rdi]
  1349. adc rbx,rax
  1350. mov rax,QWORD PTR[rbp*1+rsi]
  1351. mov QWORD PTR[((-16))+rdi],rbx
  1352. adc r8,rdx
  1353. lea r12,QWORD PTR[r10*2+r14]
  1354. mov QWORD PTR[((-8))+rdi],r8
  1355. sbb r15,r15
  1356. shr r10,63
  1357. lea r13,QWORD PTR[r11*2+rcx]
  1358. shr r11,63
  1359. or r13,r10
  1360. mov r10,QWORD PTR[16+rdi]
  1361. mov r14,r11
  1362. mul rax
  1363. neg r15
  1364. mov r11,QWORD PTR[24+rdi]
  1365. adc r12,rax
  1366. mov rax,QWORD PTR[8+rbp*1+rsi]
  1367. mov QWORD PTR[rdi],r12
  1368. adc r13,rdx
  1369. lea rbx,QWORD PTR[r10*2+r14]
  1370. mov QWORD PTR[8+rdi],r13
  1371. sbb r15,r15
  1372. shr r10,63
  1373. lea r8,QWORD PTR[r11*2+rcx]
  1374. shr r11,63
  1375. or r8,r10
  1376. mov r10,QWORD PTR[32+rdi]
  1377. mov r14,r11
  1378. mul rax
  1379. neg r15
  1380. mov r11,QWORD PTR[40+rdi]
  1381. adc rbx,rax
  1382. mov rax,QWORD PTR[16+rbp*1+rsi]
  1383. mov QWORD PTR[16+rdi],rbx
  1384. adc r8,rdx
  1385. mov QWORD PTR[24+rdi],r8
  1386. sbb r15,r15
  1387. lea rdi,QWORD PTR[64+rdi]
  1388. add rbp,32
  1389. jnz $L$sqr4x_shift_n_add
  1390. lea r12,QWORD PTR[r10*2+r14]
  1391. DB 067h
  1392. shr r10,63
  1393. lea r13,QWORD PTR[r11*2+rcx]
  1394. shr r11,63
  1395. or r13,r10
  1396. mov r10,QWORD PTR[((-16))+rdi]
  1397. mov r14,r11
  1398. mul rax
  1399. neg r15
  1400. mov r11,QWORD PTR[((-8))+rdi]
  1401. adc r12,rax
  1402. mov rax,QWORD PTR[((-8))+rsi]
  1403. mov QWORD PTR[((-32))+rdi],r12
  1404. adc r13,rdx
  1405. lea rbx,QWORD PTR[r10*2+r14]
  1406. mov QWORD PTR[((-24))+rdi],r13
  1407. sbb r15,r15
  1408. shr r10,63
  1409. lea r8,QWORD PTR[r11*2+rcx]
  1410. shr r11,63
  1411. or r8,r10
  1412. mul rax
  1413. neg r15
  1414. adc rbx,rax
  1415. adc r8,rdx
  1416. mov QWORD PTR[((-16))+rdi],rbx
  1417. mov QWORD PTR[((-8))+rdi],r8
  1418. DB 102,72,15,126,213
  1419. __bn_sqr8x_reduction::
  1420. xor rax,rax
  1421. lea rcx,QWORD PTR[rbp*1+r9]
  1422. lea rdx,QWORD PTR[((48+8))+r9*2+rsp]
  1423. mov QWORD PTR[((0+8))+rsp],rcx
  1424. lea rdi,QWORD PTR[((48+8))+r9*1+rsp]
  1425. mov QWORD PTR[((8+8))+rsp],rdx
  1426. neg r9
  1427. jmp $L$8x_reduction_loop
  1428. ALIGN 32
  1429. $L$8x_reduction_loop::
  1430. lea rdi,QWORD PTR[r9*1+rdi]
  1431. DB 066h
  1432. mov rbx,QWORD PTR[rdi]
  1433. mov r9,QWORD PTR[8+rdi]
  1434. mov r10,QWORD PTR[16+rdi]
  1435. mov r11,QWORD PTR[24+rdi]
  1436. mov r12,QWORD PTR[32+rdi]
  1437. mov r13,QWORD PTR[40+rdi]
  1438. mov r14,QWORD PTR[48+rdi]
  1439. mov r15,QWORD PTR[56+rdi]
  1440. mov QWORD PTR[rdx],rax
  1441. lea rdi,QWORD PTR[64+rdi]
  1442. DB 067h
  1443. mov r8,rbx
  1444. imul rbx,QWORD PTR[((32+8))+rsp]
  1445. mov rax,QWORD PTR[rbp]
  1446. mov ecx,8
  1447. jmp $L$8x_reduce
  1448. ALIGN 32
  1449. $L$8x_reduce::
  1450. mul rbx
  1451. mov rax,QWORD PTR[8+rbp]
  1452. neg r8
  1453. mov r8,rdx
  1454. adc r8,0
  1455. mul rbx
  1456. add r9,rax
  1457. mov rax,QWORD PTR[16+rbp]
  1458. adc rdx,0
  1459. add r8,r9
  1460. mov QWORD PTR[((48-8+8))+rcx*8+rsp],rbx
  1461. mov r9,rdx
  1462. adc r9,0
  1463. mul rbx
  1464. add r10,rax
  1465. mov rax,QWORD PTR[24+rbp]
  1466. adc rdx,0
  1467. add r9,r10
  1468. mov rsi,QWORD PTR[((32+8))+rsp]
  1469. mov r10,rdx
  1470. adc r10,0
  1471. mul rbx
  1472. add r11,rax
  1473. mov rax,QWORD PTR[32+rbp]
  1474. adc rdx,0
  1475. imul rsi,r8
  1476. add r10,r11
  1477. mov r11,rdx
  1478. adc r11,0
  1479. mul rbx
  1480. add r12,rax
  1481. mov rax,QWORD PTR[40+rbp]
  1482. adc rdx,0
  1483. add r11,r12
  1484. mov r12,rdx
  1485. adc r12,0
  1486. mul rbx
  1487. add r13,rax
  1488. mov rax,QWORD PTR[48+rbp]
  1489. adc rdx,0
  1490. add r12,r13
  1491. mov r13,rdx
  1492. adc r13,0
  1493. mul rbx
  1494. add r14,rax
  1495. mov rax,QWORD PTR[56+rbp]
  1496. adc rdx,0
  1497. add r13,r14
  1498. mov r14,rdx
  1499. adc r14,0
  1500. mul rbx
  1501. mov rbx,rsi
  1502. add r15,rax
  1503. mov rax,QWORD PTR[rbp]
  1504. adc rdx,0
  1505. add r14,r15
  1506. mov r15,rdx
  1507. adc r15,0
  1508. dec ecx
  1509. jnz $L$8x_reduce
  1510. lea rbp,QWORD PTR[64+rbp]
  1511. xor rax,rax
  1512. mov rdx,QWORD PTR[((8+8))+rsp]
  1513. cmp rbp,QWORD PTR[((0+8))+rsp]
  1514. jae $L$8x_no_tail
  1515. DB 066h
  1516. add r8,QWORD PTR[rdi]
  1517. adc r9,QWORD PTR[8+rdi]
  1518. adc r10,QWORD PTR[16+rdi]
  1519. adc r11,QWORD PTR[24+rdi]
  1520. adc r12,QWORD PTR[32+rdi]
  1521. adc r13,QWORD PTR[40+rdi]
  1522. adc r14,QWORD PTR[48+rdi]
  1523. adc r15,QWORD PTR[56+rdi]
  1524. sbb rsi,rsi
  1525. mov rbx,QWORD PTR[((48+56+8))+rsp]
  1526. mov ecx,8
  1527. mov rax,QWORD PTR[rbp]
  1528. jmp $L$8x_tail
  1529. ALIGN 32
  1530. $L$8x_tail::
  1531. mul rbx
  1532. add r8,rax
  1533. mov rax,QWORD PTR[8+rbp]
  1534. mov QWORD PTR[rdi],r8
  1535. mov r8,rdx
  1536. adc r8,0
  1537. mul rbx
  1538. add r9,rax
  1539. mov rax,QWORD PTR[16+rbp]
  1540. adc rdx,0
  1541. add r8,r9
  1542. lea rdi,QWORD PTR[8+rdi]
  1543. mov r9,rdx
  1544. adc r9,0
  1545. mul rbx
  1546. add r10,rax
  1547. mov rax,QWORD PTR[24+rbp]
  1548. adc rdx,0
  1549. add r9,r10
  1550. mov r10,rdx
  1551. adc r10,0
  1552. mul rbx
  1553. add r11,rax
  1554. mov rax,QWORD PTR[32+rbp]
  1555. adc rdx,0
  1556. add r10,r11
  1557. mov r11,rdx
  1558. adc r11,0
  1559. mul rbx
  1560. add r12,rax
  1561. mov rax,QWORD PTR[40+rbp]
  1562. adc rdx,0
  1563. add r11,r12
  1564. mov r12,rdx
  1565. adc r12,0
  1566. mul rbx
  1567. add r13,rax
  1568. mov rax,QWORD PTR[48+rbp]
  1569. adc rdx,0
  1570. add r12,r13
  1571. mov r13,rdx
  1572. adc r13,0
  1573. mul rbx
  1574. add r14,rax
  1575. mov rax,QWORD PTR[56+rbp]
  1576. adc rdx,0
  1577. add r13,r14
  1578. mov r14,rdx
  1579. adc r14,0
  1580. mul rbx
  1581. mov rbx,QWORD PTR[((48-16+8))+rcx*8+rsp]
  1582. add r15,rax
  1583. adc rdx,0
  1584. add r14,r15
  1585. mov rax,QWORD PTR[rbp]
  1586. mov r15,rdx
  1587. adc r15,0
  1588. dec ecx
  1589. jnz $L$8x_tail
  1590. lea rbp,QWORD PTR[64+rbp]
  1591. mov rdx,QWORD PTR[((8+8))+rsp]
  1592. cmp rbp,QWORD PTR[((0+8))+rsp]
  1593. jae $L$8x_tail_done
  1594. mov rbx,QWORD PTR[((48+56+8))+rsp]
  1595. neg rsi
  1596. mov rax,QWORD PTR[rbp]
  1597. adc r8,QWORD PTR[rdi]
  1598. adc r9,QWORD PTR[8+rdi]
  1599. adc r10,QWORD PTR[16+rdi]
  1600. adc r11,QWORD PTR[24+rdi]
  1601. adc r12,QWORD PTR[32+rdi]
  1602. adc r13,QWORD PTR[40+rdi]
  1603. adc r14,QWORD PTR[48+rdi]
  1604. adc r15,QWORD PTR[56+rdi]
  1605. sbb rsi,rsi
  1606. mov ecx,8
  1607. jmp $L$8x_tail
  1608. ALIGN 32
  1609. $L$8x_tail_done::
  1610. xor rax,rax
  1611. add r8,QWORD PTR[rdx]
  1612. adc r9,0
  1613. adc r10,0
  1614. adc r11,0
  1615. adc r12,0
  1616. adc r13,0
  1617. adc r14,0
  1618. adc r15,0
  1619. adc rax,0
  1620. neg rsi
  1621. $L$8x_no_tail::
  1622. adc r8,QWORD PTR[rdi]
  1623. adc r9,QWORD PTR[8+rdi]
  1624. adc r10,QWORD PTR[16+rdi]
  1625. adc r11,QWORD PTR[24+rdi]
  1626. adc r12,QWORD PTR[32+rdi]
  1627. adc r13,QWORD PTR[40+rdi]
  1628. adc r14,QWORD PTR[48+rdi]
  1629. adc r15,QWORD PTR[56+rdi]
  1630. adc rax,0
  1631. mov rcx,QWORD PTR[((-8))+rbp]
  1632. xor rsi,rsi
  1633. DB 102,72,15,126,213
  1634. mov QWORD PTR[rdi],r8
  1635. mov QWORD PTR[8+rdi],r9
  1636. DB 102,73,15,126,217
  1637. mov QWORD PTR[16+rdi],r10
  1638. mov QWORD PTR[24+rdi],r11
  1639. mov QWORD PTR[32+rdi],r12
  1640. mov QWORD PTR[40+rdi],r13
  1641. mov QWORD PTR[48+rdi],r14
  1642. mov QWORD PTR[56+rdi],r15
  1643. lea rdi,QWORD PTR[64+rdi]
  1644. cmp rdi,rdx
  1645. jb $L$8x_reduction_loop
  1646. DB 0F3h,0C3h ;repret
  1647. bn_sqr8x_internal ENDP
  1648. ALIGN 32
  1649. __bn_post4x_internal PROC PRIVATE
  1650. mov r12,QWORD PTR[rbp]
  1651. lea rbx,QWORD PTR[r9*1+rdi]
  1652. mov rcx,r9
  1653. DB 102,72,15,126,207
  1654. neg rax
  1655. DB 102,72,15,126,206
  1656. sar rcx,3+2
  1657. dec r12
  1658. xor r10,r10
  1659. mov r13,QWORD PTR[8+rbp]
  1660. mov r14,QWORD PTR[16+rbp]
  1661. mov r15,QWORD PTR[24+rbp]
  1662. jmp $L$sqr4x_sub_entry
  1663. ALIGN 16
  1664. $L$sqr4x_sub::
  1665. mov r12,QWORD PTR[rbp]
  1666. mov r13,QWORD PTR[8+rbp]
  1667. mov r14,QWORD PTR[16+rbp]
  1668. mov r15,QWORD PTR[24+rbp]
  1669. $L$sqr4x_sub_entry::
  1670. lea rbp,QWORD PTR[32+rbp]
  1671. not r12
  1672. not r13
  1673. not r14
  1674. not r15
  1675. and r12,rax
  1676. and r13,rax
  1677. and r14,rax
  1678. and r15,rax
  1679. neg r10
  1680. adc r12,QWORD PTR[rbx]
  1681. adc r13,QWORD PTR[8+rbx]
  1682. adc r14,QWORD PTR[16+rbx]
  1683. adc r15,QWORD PTR[24+rbx]
  1684. mov QWORD PTR[rdi],r12
  1685. lea rbx,QWORD PTR[32+rbx]
  1686. mov QWORD PTR[8+rdi],r13
  1687. sbb r10,r10
  1688. mov QWORD PTR[16+rdi],r14
  1689. mov QWORD PTR[24+rdi],r15
  1690. lea rdi,QWORD PTR[32+rdi]
  1691. inc rcx
  1692. jnz $L$sqr4x_sub
  1693. mov r10,r9
  1694. neg r9
  1695. DB 0F3h,0C3h ;repret
  1696. __bn_post4x_internal ENDP
  1697. PUBLIC bn_from_montgomery
  1698. ALIGN 32
  1699. bn_from_montgomery PROC PUBLIC
  1700. test DWORD PTR[48+rsp],7
  1701. jz bn_from_mont8x
  1702. xor eax,eax
  1703. DB 0F3h,0C3h ;repret
  1704. bn_from_montgomery ENDP
  1705. ALIGN 32
  1706. bn_from_mont8x PROC PRIVATE
  1707. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  1708. mov QWORD PTR[16+rsp],rsi
  1709. mov rax,rsp
  1710. $L$SEH_begin_bn_from_mont8x::
  1711. mov rdi,rcx
  1712. mov rsi,rdx
  1713. mov rdx,r8
  1714. mov rcx,r9
  1715. mov r8,QWORD PTR[40+rsp]
  1716. mov r9,QWORD PTR[48+rsp]
  1717. DB 067h
  1718. mov rax,rsp
  1719. push rbx
  1720. push rbp
  1721. push r12
  1722. push r13
  1723. push r14
  1724. push r15
  1725. $L$from_prologue::
  1726. shl r9d,3
  1727. lea r10,QWORD PTR[r9*2+r9]
  1728. neg r9
  1729. mov r8,QWORD PTR[r8]
  1730. lea r11,QWORD PTR[((-320))+r9*2+rsp]
  1731. mov rbp,rsp
  1732. sub r11,rdi
  1733. and r11,4095
  1734. cmp r10,r11
  1735. jb $L$from_sp_alt
  1736. sub rbp,r11
  1737. lea rbp,QWORD PTR[((-320))+r9*2+rbp]
  1738. jmp $L$from_sp_done
  1739. ALIGN 32
  1740. $L$from_sp_alt::
  1741. lea r10,QWORD PTR[((4096-320))+r9*2]
  1742. lea rbp,QWORD PTR[((-320))+r9*2+rbp]
  1743. sub r11,r10
  1744. mov r10,0
  1745. cmovc r11,r10
  1746. sub rbp,r11
  1747. $L$from_sp_done::
  1748. and rbp,-64
  1749. mov r11,rsp
  1750. sub r11,rbp
  1751. and r11,-4096
  1752. lea rsp,QWORD PTR[rbp*1+r11]
  1753. mov r10,QWORD PTR[rsp]
  1754. cmp rsp,rbp
  1755. ja $L$from_page_walk
  1756. jmp $L$from_page_walk_done
  1757. $L$from_page_walk::
  1758. lea rsp,QWORD PTR[((-4096))+rsp]
  1759. mov r10,QWORD PTR[rsp]
  1760. cmp rsp,rbp
  1761. ja $L$from_page_walk
  1762. $L$from_page_walk_done::
  1763. mov r10,r9
  1764. neg r9
  1765. mov QWORD PTR[32+rsp],r8
  1766. mov QWORD PTR[40+rsp],rax
  1767. $L$from_body::
  1768. mov r11,r9
  1769. lea rax,QWORD PTR[48+rsp]
  1770. pxor xmm0,xmm0
  1771. jmp $L$mul_by_1
  1772. ALIGN 32
  1773. $L$mul_by_1::
  1774. movdqu xmm1,XMMWORD PTR[rsi]
  1775. movdqu xmm2,XMMWORD PTR[16+rsi]
  1776. movdqu xmm3,XMMWORD PTR[32+rsi]
  1777. movdqa XMMWORD PTR[r9*1+rax],xmm0
  1778. movdqu xmm4,XMMWORD PTR[48+rsi]
  1779. movdqa XMMWORD PTR[16+r9*1+rax],xmm0
  1780. DB 048h,08dh,0b6h,040h,000h,000h,000h
  1781. movdqa XMMWORD PTR[rax],xmm1
  1782. movdqa XMMWORD PTR[32+r9*1+rax],xmm0
  1783. movdqa XMMWORD PTR[16+rax],xmm2
  1784. movdqa XMMWORD PTR[48+r9*1+rax],xmm0
  1785. movdqa XMMWORD PTR[32+rax],xmm3
  1786. movdqa XMMWORD PTR[48+rax],xmm4
  1787. lea rax,QWORD PTR[64+rax]
  1788. sub r11,64
  1789. jnz $L$mul_by_1
  1790. DB 102,72,15,110,207
  1791. DB 102,72,15,110,209
  1792. DB 067h
  1793. mov rbp,rcx
  1794. DB 102,73,15,110,218
  1795. mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))]
  1796. and r11d,080108h
  1797. cmp r11d,080108h
  1798. jne $L$from_mont_nox
  1799. lea rdi,QWORD PTR[r9*1+rax]
  1800. call __bn_sqrx8x_reduction
  1801. call __bn_postx4x_internal
  1802. pxor xmm0,xmm0
  1803. lea rax,QWORD PTR[48+rsp]
  1804. jmp $L$from_mont_zero
  1805. ALIGN 32
  1806. $L$from_mont_nox::
  1807. call __bn_sqr8x_reduction
  1808. call __bn_post4x_internal
  1809. pxor xmm0,xmm0
  1810. lea rax,QWORD PTR[48+rsp]
  1811. jmp $L$from_mont_zero
  1812. ALIGN 32
  1813. $L$from_mont_zero::
  1814. mov rsi,QWORD PTR[40+rsp]
  1815. movdqa XMMWORD PTR[rax],xmm0
  1816. movdqa XMMWORD PTR[16+rax],xmm0
  1817. movdqa XMMWORD PTR[32+rax],xmm0
  1818. movdqa XMMWORD PTR[48+rax],xmm0
  1819. lea rax,QWORD PTR[64+rax]
  1820. sub r9,32
  1821. jnz $L$from_mont_zero
  1822. mov rax,1
  1823. mov r15,QWORD PTR[((-48))+rsi]
  1824. mov r14,QWORD PTR[((-40))+rsi]
  1825. mov r13,QWORD PTR[((-32))+rsi]
  1826. mov r12,QWORD PTR[((-24))+rsi]
  1827. mov rbp,QWORD PTR[((-16))+rsi]
  1828. mov rbx,QWORD PTR[((-8))+rsi]
  1829. lea rsp,QWORD PTR[rsi]
  1830. $L$from_epilogue::
  1831. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1832. mov rsi,QWORD PTR[16+rsp]
  1833. DB 0F3h,0C3h ;repret
  1834. $L$SEH_end_bn_from_mont8x::
  1835. bn_from_mont8x ENDP
  1836. ALIGN 32
  1837. bn_mulx4x_mont_gather5 PROC PRIVATE
  1838. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  1839. mov QWORD PTR[16+rsp],rsi
  1840. mov rax,rsp
  1841. $L$SEH_begin_bn_mulx4x_mont_gather5::
  1842. mov rdi,rcx
  1843. mov rsi,rdx
  1844. mov rdx,r8
  1845. mov rcx,r9
  1846. mov r8,QWORD PTR[40+rsp]
  1847. mov r9,QWORD PTR[48+rsp]
  1848. mov rax,rsp
  1849. $L$mulx4x_enter::
  1850. push rbx
  1851. push rbp
  1852. push r12
  1853. push r13
  1854. push r14
  1855. push r15
  1856. $L$mulx4x_prologue::
  1857. shl r9d,3
  1858. lea r10,QWORD PTR[r9*2+r9]
  1859. neg r9
  1860. mov r8,QWORD PTR[r8]
  1861. lea r11,QWORD PTR[((-320))+r9*2+rsp]
  1862. mov rbp,rsp
  1863. sub r11,rdi
  1864. and r11,4095
  1865. cmp r10,r11
  1866. jb $L$mulx4xsp_alt
  1867. sub rbp,r11
  1868. lea rbp,QWORD PTR[((-320))+r9*2+rbp]
  1869. jmp $L$mulx4xsp_done
  1870. $L$mulx4xsp_alt::
  1871. lea r10,QWORD PTR[((4096-320))+r9*2]
  1872. lea rbp,QWORD PTR[((-320))+r9*2+rbp]
  1873. sub r11,r10
  1874. mov r10,0
  1875. cmovc r11,r10
  1876. sub rbp,r11
  1877. $L$mulx4xsp_done::
  1878. and rbp,-64
  1879. mov r11,rsp
  1880. sub r11,rbp
  1881. and r11,-4096
  1882. lea rsp,QWORD PTR[rbp*1+r11]
  1883. mov r10,QWORD PTR[rsp]
  1884. cmp rsp,rbp
  1885. ja $L$mulx4x_page_walk
  1886. jmp $L$mulx4x_page_walk_done
  1887. $L$mulx4x_page_walk::
  1888. lea rsp,QWORD PTR[((-4096))+rsp]
  1889. mov r10,QWORD PTR[rsp]
  1890. cmp rsp,rbp
  1891. ja $L$mulx4x_page_walk
  1892. $L$mulx4x_page_walk_done::
  1893. mov QWORD PTR[32+rsp],r8
  1894. mov QWORD PTR[40+rsp],rax
  1895. $L$mulx4x_body::
  1896. call mulx4x_internal
  1897. mov rsi,QWORD PTR[40+rsp]
  1898. mov rax,1
  1899. mov r15,QWORD PTR[((-48))+rsi]
  1900. mov r14,QWORD PTR[((-40))+rsi]
  1901. mov r13,QWORD PTR[((-32))+rsi]
  1902. mov r12,QWORD PTR[((-24))+rsi]
  1903. mov rbp,QWORD PTR[((-16))+rsi]
  1904. mov rbx,QWORD PTR[((-8))+rsi]
  1905. lea rsp,QWORD PTR[rsi]
  1906. $L$mulx4x_epilogue::
  1907. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  1908. mov rsi,QWORD PTR[16+rsp]
  1909. DB 0F3h,0C3h ;repret
  1910. $L$SEH_end_bn_mulx4x_mont_gather5::
  1911. bn_mulx4x_mont_gather5 ENDP
  1912. ALIGN 32
  1913. mulx4x_internal PROC PRIVATE
  1914. mov QWORD PTR[8+rsp],r9
  1915. mov r10,r9
  1916. neg r9
  1917. shl r9,5
  1918. neg r10
  1919. lea r13,QWORD PTR[128+r9*1+rdx]
  1920. shr r9,5+5
  1921. movd xmm5,DWORD PTR[56+rax]
  1922. sub r9,1
  1923. lea rax,QWORD PTR[$L$inc]
  1924. mov QWORD PTR[((16+8))+rsp],r13
  1925. mov QWORD PTR[((24+8))+rsp],r9
  1926. mov QWORD PTR[((56+8))+rsp],rdi
  1927. movdqa xmm0,XMMWORD PTR[rax]
  1928. movdqa xmm1,XMMWORD PTR[16+rax]
  1929. lea r10,QWORD PTR[((88-112))+r10*1+rsp]
  1930. lea rdi,QWORD PTR[128+rdx]
  1931. pshufd xmm5,xmm5,0
  1932. movdqa xmm4,xmm1
  1933. DB 067h
  1934. movdqa xmm2,xmm1
  1935. DB 067h
  1936. paddd xmm1,xmm0
  1937. pcmpeqd xmm0,xmm5
  1938. movdqa xmm3,xmm4
  1939. paddd xmm2,xmm1
  1940. pcmpeqd xmm1,xmm5
  1941. movdqa XMMWORD PTR[112+r10],xmm0
  1942. movdqa xmm0,xmm4
  1943. paddd xmm3,xmm2
  1944. pcmpeqd xmm2,xmm5
  1945. movdqa XMMWORD PTR[128+r10],xmm1
  1946. movdqa xmm1,xmm4
  1947. paddd xmm0,xmm3
  1948. pcmpeqd xmm3,xmm5
  1949. movdqa XMMWORD PTR[144+r10],xmm2
  1950. movdqa xmm2,xmm4
  1951. paddd xmm1,xmm0
  1952. pcmpeqd xmm0,xmm5
  1953. movdqa XMMWORD PTR[160+r10],xmm3
  1954. movdqa xmm3,xmm4
  1955. paddd xmm2,xmm1
  1956. pcmpeqd xmm1,xmm5
  1957. movdqa XMMWORD PTR[176+r10],xmm0
  1958. movdqa xmm0,xmm4
  1959. paddd xmm3,xmm2
  1960. pcmpeqd xmm2,xmm5
  1961. movdqa XMMWORD PTR[192+r10],xmm1
  1962. movdqa xmm1,xmm4
  1963. paddd xmm0,xmm3
  1964. pcmpeqd xmm3,xmm5
  1965. movdqa XMMWORD PTR[208+r10],xmm2
  1966. movdqa xmm2,xmm4
  1967. paddd xmm1,xmm0
  1968. pcmpeqd xmm0,xmm5
  1969. movdqa XMMWORD PTR[224+r10],xmm3
  1970. movdqa xmm3,xmm4
  1971. paddd xmm2,xmm1
  1972. pcmpeqd xmm1,xmm5
  1973. movdqa XMMWORD PTR[240+r10],xmm0
  1974. movdqa xmm0,xmm4
  1975. paddd xmm3,xmm2
  1976. pcmpeqd xmm2,xmm5
  1977. movdqa XMMWORD PTR[256+r10],xmm1
  1978. movdqa xmm1,xmm4
  1979. paddd xmm0,xmm3
  1980. pcmpeqd xmm3,xmm5
  1981. movdqa XMMWORD PTR[272+r10],xmm2
  1982. movdqa xmm2,xmm4
  1983. paddd xmm1,xmm0
  1984. pcmpeqd xmm0,xmm5
  1985. movdqa XMMWORD PTR[288+r10],xmm3
  1986. movdqa xmm3,xmm4
  1987. DB 067h
  1988. paddd xmm2,xmm1
  1989. pcmpeqd xmm1,xmm5
  1990. movdqa XMMWORD PTR[304+r10],xmm0
  1991. paddd xmm3,xmm2
  1992. pcmpeqd xmm2,xmm5
  1993. movdqa XMMWORD PTR[320+r10],xmm1
  1994. pcmpeqd xmm3,xmm5
  1995. movdqa XMMWORD PTR[336+r10],xmm2
  1996. pand xmm0,XMMWORD PTR[64+rdi]
  1997. pand xmm1,XMMWORD PTR[80+rdi]
  1998. pand xmm2,XMMWORD PTR[96+rdi]
  1999. movdqa XMMWORD PTR[352+r10],xmm3
  2000. pand xmm3,XMMWORD PTR[112+rdi]
  2001. por xmm0,xmm2
  2002. por xmm1,xmm3
  2003. movdqa xmm4,XMMWORD PTR[((-128))+rdi]
  2004. movdqa xmm5,XMMWORD PTR[((-112))+rdi]
  2005. movdqa xmm2,XMMWORD PTR[((-96))+rdi]
  2006. pand xmm4,XMMWORD PTR[112+r10]
  2007. movdqa xmm3,XMMWORD PTR[((-80))+rdi]
  2008. pand xmm5,XMMWORD PTR[128+r10]
  2009. por xmm0,xmm4
  2010. pand xmm2,XMMWORD PTR[144+r10]
  2011. por xmm1,xmm5
  2012. pand xmm3,XMMWORD PTR[160+r10]
  2013. por xmm0,xmm2
  2014. por xmm1,xmm3
  2015. movdqa xmm4,XMMWORD PTR[((-64))+rdi]
  2016. movdqa xmm5,XMMWORD PTR[((-48))+rdi]
  2017. movdqa xmm2,XMMWORD PTR[((-32))+rdi]
  2018. pand xmm4,XMMWORD PTR[176+r10]
  2019. movdqa xmm3,XMMWORD PTR[((-16))+rdi]
  2020. pand xmm5,XMMWORD PTR[192+r10]
  2021. por xmm0,xmm4
  2022. pand xmm2,XMMWORD PTR[208+r10]
  2023. por xmm1,xmm5
  2024. pand xmm3,XMMWORD PTR[224+r10]
  2025. por xmm0,xmm2
  2026. por xmm1,xmm3
  2027. movdqa xmm4,XMMWORD PTR[rdi]
  2028. movdqa xmm5,XMMWORD PTR[16+rdi]
  2029. movdqa xmm2,XMMWORD PTR[32+rdi]
  2030. pand xmm4,XMMWORD PTR[240+r10]
  2031. movdqa xmm3,XMMWORD PTR[48+rdi]
  2032. pand xmm5,XMMWORD PTR[256+r10]
  2033. por xmm0,xmm4
  2034. pand xmm2,XMMWORD PTR[272+r10]
  2035. por xmm1,xmm5
  2036. pand xmm3,XMMWORD PTR[288+r10]
  2037. por xmm0,xmm2
  2038. por xmm1,xmm3
  2039. pxor xmm0,xmm1
  2040. pshufd xmm1,xmm0,04eh
  2041. por xmm0,xmm1
  2042. lea rdi,QWORD PTR[256+rdi]
  2043. DB 102,72,15,126,194
  2044. lea rbx,QWORD PTR[((64+32+8))+rsp]
  2045. mov r9,rdx
  2046. mulx rax,r8,QWORD PTR[rsi]
  2047. mulx r12,r11,QWORD PTR[8+rsi]
  2048. add r11,rax
  2049. mulx r13,rax,QWORD PTR[16+rsi]
  2050. adc r12,rax
  2051. adc r13,0
  2052. mulx r14,rax,QWORD PTR[24+rsi]
  2053. mov r15,r8
  2054. imul r8,QWORD PTR[((32+8))+rsp]
  2055. xor rbp,rbp
  2056. mov rdx,r8
  2057. mov QWORD PTR[((8+8))+rsp],rdi
  2058. lea rsi,QWORD PTR[32+rsi]
  2059. adcx r13,rax
  2060. adcx r14,rbp
  2061. mulx r10,rax,QWORD PTR[rcx]
  2062. adcx r15,rax
  2063. adox r10,r11
  2064. mulx r11,rax,QWORD PTR[8+rcx]
  2065. adcx r10,rax
  2066. adox r11,r12
  2067. mulx r12,rax,QWORD PTR[16+rcx]
  2068. mov rdi,QWORD PTR[((24+8))+rsp]
  2069. mov QWORD PTR[((-32))+rbx],r10
  2070. adcx r11,rax
  2071. adox r12,r13
  2072. mulx r15,rax,QWORD PTR[24+rcx]
  2073. mov rdx,r9
  2074. mov QWORD PTR[((-24))+rbx],r11
  2075. adcx r12,rax
  2076. adox r15,rbp
  2077. lea rcx,QWORD PTR[32+rcx]
  2078. mov QWORD PTR[((-16))+rbx],r12
  2079. jmp $L$mulx4x_1st
  2080. ALIGN 32
  2081. $L$mulx4x_1st::
  2082. adcx r15,rbp
  2083. mulx rax,r10,QWORD PTR[rsi]
  2084. adcx r10,r14
  2085. mulx r14,r11,QWORD PTR[8+rsi]
  2086. adcx r11,rax
  2087. mulx rax,r12,QWORD PTR[16+rsi]
  2088. adcx r12,r14
  2089. mulx r14,r13,QWORD PTR[24+rsi]
  2090. DB 067h,067h
  2091. mov rdx,r8
  2092. adcx r13,rax
  2093. adcx r14,rbp
  2094. lea rsi,QWORD PTR[32+rsi]
  2095. lea rbx,QWORD PTR[32+rbx]
  2096. adox r10,r15
  2097. mulx r15,rax,QWORD PTR[rcx]
  2098. adcx r10,rax
  2099. adox r11,r15
  2100. mulx r15,rax,QWORD PTR[8+rcx]
  2101. adcx r11,rax
  2102. adox r12,r15
  2103. mulx r15,rax,QWORD PTR[16+rcx]
  2104. mov QWORD PTR[((-40))+rbx],r10
  2105. adcx r12,rax
  2106. mov QWORD PTR[((-32))+rbx],r11
  2107. adox r13,r15
  2108. mulx r15,rax,QWORD PTR[24+rcx]
  2109. mov rdx,r9
  2110. mov QWORD PTR[((-24))+rbx],r12
  2111. adcx r13,rax
  2112. adox r15,rbp
  2113. lea rcx,QWORD PTR[32+rcx]
  2114. mov QWORD PTR[((-16))+rbx],r13
  2115. dec rdi
  2116. jnz $L$mulx4x_1st
  2117. mov rax,QWORD PTR[8+rsp]
  2118. adc r15,rbp
  2119. lea rsi,QWORD PTR[rax*1+rsi]
  2120. add r14,r15
  2121. mov rdi,QWORD PTR[((8+8))+rsp]
  2122. adc rbp,rbp
  2123. mov QWORD PTR[((-8))+rbx],r14
  2124. jmp $L$mulx4x_outer
  2125. ALIGN 32
  2126. $L$mulx4x_outer::
  2127. lea r10,QWORD PTR[((16-256))+rbx]
  2128. pxor xmm4,xmm4
  2129. DB 067h,067h
  2130. pxor xmm5,xmm5
  2131. movdqa xmm0,XMMWORD PTR[((-128))+rdi]
  2132. movdqa xmm1,XMMWORD PTR[((-112))+rdi]
  2133. movdqa xmm2,XMMWORD PTR[((-96))+rdi]
  2134. pand xmm0,XMMWORD PTR[256+r10]
  2135. movdqa xmm3,XMMWORD PTR[((-80))+rdi]
  2136. pand xmm1,XMMWORD PTR[272+r10]
  2137. por xmm4,xmm0
  2138. pand xmm2,XMMWORD PTR[288+r10]
  2139. por xmm5,xmm1
  2140. pand xmm3,XMMWORD PTR[304+r10]
  2141. por xmm4,xmm2
  2142. por xmm5,xmm3
  2143. movdqa xmm0,XMMWORD PTR[((-64))+rdi]
  2144. movdqa xmm1,XMMWORD PTR[((-48))+rdi]
  2145. movdqa xmm2,XMMWORD PTR[((-32))+rdi]
  2146. pand xmm0,XMMWORD PTR[320+r10]
  2147. movdqa xmm3,XMMWORD PTR[((-16))+rdi]
  2148. pand xmm1,XMMWORD PTR[336+r10]
  2149. por xmm4,xmm0
  2150. pand xmm2,XMMWORD PTR[352+r10]
  2151. por xmm5,xmm1
  2152. pand xmm3,XMMWORD PTR[368+r10]
  2153. por xmm4,xmm2
  2154. por xmm5,xmm3
  2155. movdqa xmm0,XMMWORD PTR[rdi]
  2156. movdqa xmm1,XMMWORD PTR[16+rdi]
  2157. movdqa xmm2,XMMWORD PTR[32+rdi]
  2158. pand xmm0,XMMWORD PTR[384+r10]
  2159. movdqa xmm3,XMMWORD PTR[48+rdi]
  2160. pand xmm1,XMMWORD PTR[400+r10]
  2161. por xmm4,xmm0
  2162. pand xmm2,XMMWORD PTR[416+r10]
  2163. por xmm5,xmm1
  2164. pand xmm3,XMMWORD PTR[432+r10]
  2165. por xmm4,xmm2
  2166. por xmm5,xmm3
  2167. movdqa xmm0,XMMWORD PTR[64+rdi]
  2168. movdqa xmm1,XMMWORD PTR[80+rdi]
  2169. movdqa xmm2,XMMWORD PTR[96+rdi]
  2170. pand xmm0,XMMWORD PTR[448+r10]
  2171. movdqa xmm3,XMMWORD PTR[112+rdi]
  2172. pand xmm1,XMMWORD PTR[464+r10]
  2173. por xmm4,xmm0
  2174. pand xmm2,XMMWORD PTR[480+r10]
  2175. por xmm5,xmm1
  2176. pand xmm3,XMMWORD PTR[496+r10]
  2177. por xmm4,xmm2
  2178. por xmm5,xmm3
  2179. por xmm4,xmm5
  2180. pshufd xmm0,xmm4,04eh
  2181. por xmm0,xmm4
  2182. lea rdi,QWORD PTR[256+rdi]
  2183. DB 102,72,15,126,194
  2184. mov QWORD PTR[rbx],rbp
  2185. lea rbx,QWORD PTR[32+rax*1+rbx]
  2186. mulx r11,r8,QWORD PTR[rsi]
  2187. xor rbp,rbp
  2188. mov r9,rdx
  2189. mulx r12,r14,QWORD PTR[8+rsi]
  2190. adox r8,QWORD PTR[((-32))+rbx]
  2191. adcx r11,r14
  2192. mulx r13,r15,QWORD PTR[16+rsi]
  2193. adox r11,QWORD PTR[((-24))+rbx]
  2194. adcx r12,r15
  2195. mulx r14,rdx,QWORD PTR[24+rsi]
  2196. adox r12,QWORD PTR[((-16))+rbx]
  2197. adcx r13,rdx
  2198. lea rcx,QWORD PTR[rax*1+rcx]
  2199. lea rsi,QWORD PTR[32+rsi]
  2200. adox r13,QWORD PTR[((-8))+rbx]
  2201. adcx r14,rbp
  2202. adox r14,rbp
  2203. mov r15,r8
  2204. imul r8,QWORD PTR[((32+8))+rsp]
  2205. mov rdx,r8
  2206. xor rbp,rbp
  2207. mov QWORD PTR[((8+8))+rsp],rdi
  2208. mulx r10,rax,QWORD PTR[rcx]
  2209. adcx r15,rax
  2210. adox r10,r11
  2211. mulx r11,rax,QWORD PTR[8+rcx]
  2212. adcx r10,rax
  2213. adox r11,r12
  2214. mulx r12,rax,QWORD PTR[16+rcx]
  2215. adcx r11,rax
  2216. adox r12,r13
  2217. mulx r15,rax,QWORD PTR[24+rcx]
  2218. mov rdx,r9
  2219. mov rdi,QWORD PTR[((24+8))+rsp]
  2220. mov QWORD PTR[((-32))+rbx],r10
  2221. adcx r12,rax
  2222. mov QWORD PTR[((-24))+rbx],r11
  2223. adox r15,rbp
  2224. mov QWORD PTR[((-16))+rbx],r12
  2225. lea rcx,QWORD PTR[32+rcx]
  2226. jmp $L$mulx4x_inner
  2227. ALIGN 32
  2228. $L$mulx4x_inner::
  2229. mulx rax,r10,QWORD PTR[rsi]
  2230. adcx r15,rbp
  2231. adox r10,r14
  2232. mulx r14,r11,QWORD PTR[8+rsi]
  2233. adcx r10,QWORD PTR[rbx]
  2234. adox r11,rax
  2235. mulx rax,r12,QWORD PTR[16+rsi]
  2236. adcx r11,QWORD PTR[8+rbx]
  2237. adox r12,r14
  2238. mulx r14,r13,QWORD PTR[24+rsi]
  2239. mov rdx,r8
  2240. adcx r12,QWORD PTR[16+rbx]
  2241. adox r13,rax
  2242. adcx r13,QWORD PTR[24+rbx]
  2243. adox r14,rbp
  2244. lea rsi,QWORD PTR[32+rsi]
  2245. lea rbx,QWORD PTR[32+rbx]
  2246. adcx r14,rbp
  2247. adox r10,r15
  2248. mulx r15,rax,QWORD PTR[rcx]
  2249. adcx r10,rax
  2250. adox r11,r15
  2251. mulx r15,rax,QWORD PTR[8+rcx]
  2252. adcx r11,rax
  2253. adox r12,r15
  2254. mulx r15,rax,QWORD PTR[16+rcx]
  2255. mov QWORD PTR[((-40))+rbx],r10
  2256. adcx r12,rax
  2257. adox r13,r15
  2258. mov QWORD PTR[((-32))+rbx],r11
  2259. mulx r15,rax,QWORD PTR[24+rcx]
  2260. mov rdx,r9
  2261. lea rcx,QWORD PTR[32+rcx]
  2262. mov QWORD PTR[((-24))+rbx],r12
  2263. adcx r13,rax
  2264. adox r15,rbp
  2265. mov QWORD PTR[((-16))+rbx],r13
  2266. dec rdi
  2267. jnz $L$mulx4x_inner
  2268. mov rax,QWORD PTR[((0+8))+rsp]
  2269. adc r15,rbp
  2270. sub rdi,QWORD PTR[rbx]
  2271. mov rdi,QWORD PTR[((8+8))+rsp]
  2272. mov r10,QWORD PTR[((16+8))+rsp]
  2273. adc r14,r15
  2274. lea rsi,QWORD PTR[rax*1+rsi]
  2275. adc rbp,rbp
  2276. mov QWORD PTR[((-8))+rbx],r14
  2277. cmp rdi,r10
  2278. jb $L$mulx4x_outer
  2279. mov r10,QWORD PTR[((-8))+rcx]
  2280. mov r8,rbp
  2281. mov r12,QWORD PTR[rax*1+rcx]
  2282. lea rbp,QWORD PTR[rax*1+rcx]
  2283. mov rcx,rax
  2284. lea rdi,QWORD PTR[rax*1+rbx]
  2285. xor eax,eax
  2286. xor r15,r15
  2287. sub r10,r14
  2288. adc r15,r15
  2289. or r8,r15
  2290. sar rcx,3+2
  2291. sub rax,r8
  2292. mov rdx,QWORD PTR[((56+8))+rsp]
  2293. dec r12
  2294. mov r13,QWORD PTR[8+rbp]
  2295. xor r8,r8
  2296. mov r14,QWORD PTR[16+rbp]
  2297. mov r15,QWORD PTR[24+rbp]
  2298. jmp $L$sqrx4x_sub_entry
  2299. mulx4x_internal ENDP
  2300. ALIGN 32
  2301. bn_powerx5 PROC PRIVATE
  2302. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  2303. mov QWORD PTR[16+rsp],rsi
  2304. mov rax,rsp
  2305. $L$SEH_begin_bn_powerx5::
  2306. mov rdi,rcx
  2307. mov rsi,rdx
  2308. mov rdx,r8
  2309. mov rcx,r9
  2310. mov r8,QWORD PTR[40+rsp]
  2311. mov r9,QWORD PTR[48+rsp]
  2312. mov rax,rsp
  2313. $L$powerx5_enter::
  2314. push rbx
  2315. push rbp
  2316. push r12
  2317. push r13
  2318. push r14
  2319. push r15
  2320. $L$powerx5_prologue::
  2321. shl r9d,3
  2322. lea r10,QWORD PTR[r9*2+r9]
  2323. neg r9
  2324. mov r8,QWORD PTR[r8]
  2325. lea r11,QWORD PTR[((-320))+r9*2+rsp]
  2326. mov rbp,rsp
  2327. sub r11,rdi
  2328. and r11,4095
  2329. cmp r10,r11
  2330. jb $L$pwrx_sp_alt
  2331. sub rbp,r11
  2332. lea rbp,QWORD PTR[((-320))+r9*2+rbp]
  2333. jmp $L$pwrx_sp_done
  2334. ALIGN 32
  2335. $L$pwrx_sp_alt::
  2336. lea r10,QWORD PTR[((4096-320))+r9*2]
  2337. lea rbp,QWORD PTR[((-320))+r9*2+rbp]
  2338. sub r11,r10
  2339. mov r10,0
  2340. cmovc r11,r10
  2341. sub rbp,r11
  2342. $L$pwrx_sp_done::
  2343. and rbp,-64
  2344. mov r11,rsp
  2345. sub r11,rbp
  2346. and r11,-4096
  2347. lea rsp,QWORD PTR[rbp*1+r11]
  2348. mov r10,QWORD PTR[rsp]
  2349. cmp rsp,rbp
  2350. ja $L$pwrx_page_walk
  2351. jmp $L$pwrx_page_walk_done
  2352. $L$pwrx_page_walk::
  2353. lea rsp,QWORD PTR[((-4096))+rsp]
  2354. mov r10,QWORD PTR[rsp]
  2355. cmp rsp,rbp
  2356. ja $L$pwrx_page_walk
  2357. $L$pwrx_page_walk_done::
  2358. mov r10,r9
  2359. neg r9
  2360. pxor xmm0,xmm0
  2361. DB 102,72,15,110,207
  2362. DB 102,72,15,110,209
  2363. DB 102,73,15,110,218
  2364. DB 102,72,15,110,226
  2365. mov QWORD PTR[32+rsp],r8
  2366. mov QWORD PTR[40+rsp],rax
  2367. $L$powerx5_body::
  2368. call __bn_sqrx8x_internal
  2369. call __bn_postx4x_internal
  2370. call __bn_sqrx8x_internal
  2371. call __bn_postx4x_internal
  2372. call __bn_sqrx8x_internal
  2373. call __bn_postx4x_internal
  2374. call __bn_sqrx8x_internal
  2375. call __bn_postx4x_internal
  2376. call __bn_sqrx8x_internal
  2377. call __bn_postx4x_internal
  2378. mov r9,r10
  2379. mov rdi,rsi
  2380. DB 102,72,15,126,209
  2381. DB 102,72,15,126,226
  2382. mov rax,QWORD PTR[40+rsp]
  2383. call mulx4x_internal
  2384. mov rsi,QWORD PTR[40+rsp]
  2385. mov rax,1
  2386. mov r15,QWORD PTR[((-48))+rsi]
  2387. mov r14,QWORD PTR[((-40))+rsi]
  2388. mov r13,QWORD PTR[((-32))+rsi]
  2389. mov r12,QWORD PTR[((-24))+rsi]
  2390. mov rbp,QWORD PTR[((-16))+rsi]
  2391. mov rbx,QWORD PTR[((-8))+rsi]
  2392. lea rsp,QWORD PTR[rsi]
  2393. $L$powerx5_epilogue::
  2394. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  2395. mov rsi,QWORD PTR[16+rsp]
  2396. DB 0F3h,0C3h ;repret
  2397. $L$SEH_end_bn_powerx5::
  2398. bn_powerx5 ENDP
  2399. PUBLIC bn_sqrx8x_internal
  2400. ALIGN 32
  2401. bn_sqrx8x_internal PROC PUBLIC
  2402. __bn_sqrx8x_internal::
  2403. lea rdi,QWORD PTR[((48+8))+rsp]
  2404. lea rbp,QWORD PTR[r9*1+rsi]
  2405. mov QWORD PTR[((0+8))+rsp],r9
  2406. mov QWORD PTR[((8+8))+rsp],rbp
  2407. jmp $L$sqr8x_zero_start
  2408. ALIGN 32
  2409. DB 066h,066h,066h,02eh,00fh,01fh,084h,000h,000h,000h,000h,000h
  2410. $L$sqrx8x_zero::
  2411. DB 03eh
  2412. movdqa XMMWORD PTR[rdi],xmm0
  2413. movdqa XMMWORD PTR[16+rdi],xmm0
  2414. movdqa XMMWORD PTR[32+rdi],xmm0
  2415. movdqa XMMWORD PTR[48+rdi],xmm0
  2416. $L$sqr8x_zero_start::
  2417. movdqa XMMWORD PTR[64+rdi],xmm0
  2418. movdqa XMMWORD PTR[80+rdi],xmm0
  2419. movdqa XMMWORD PTR[96+rdi],xmm0
  2420. movdqa XMMWORD PTR[112+rdi],xmm0
  2421. lea rdi,QWORD PTR[128+rdi]
  2422. sub r9,64
  2423. jnz $L$sqrx8x_zero
  2424. mov rdx,QWORD PTR[rsi]
  2425. xor r10,r10
  2426. xor r11,r11
  2427. xor r12,r12
  2428. xor r13,r13
  2429. xor r14,r14
  2430. xor r15,r15
  2431. lea rdi,QWORD PTR[((48+8))+rsp]
  2432. xor rbp,rbp
  2433. jmp $L$sqrx8x_outer_loop
  2434. ALIGN 32
  2435. $L$sqrx8x_outer_loop::
  2436. mulx rax,r8,QWORD PTR[8+rsi]
  2437. adcx r8,r9
  2438. adox r10,rax
  2439. mulx rax,r9,QWORD PTR[16+rsi]
  2440. adcx r9,r10
  2441. adox r11,rax
  2442. DB 0c4h,0e2h,0abh,0f6h,086h,018h,000h,000h,000h
  2443. adcx r10,r11
  2444. adox r12,rax
  2445. DB 0c4h,0e2h,0a3h,0f6h,086h,020h,000h,000h,000h
  2446. adcx r11,r12
  2447. adox r13,rax
  2448. mulx rax,r12,QWORD PTR[40+rsi]
  2449. adcx r12,r13
  2450. adox r14,rax
  2451. mulx rax,r13,QWORD PTR[48+rsi]
  2452. adcx r13,r14
  2453. adox rax,r15
  2454. mulx r15,r14,QWORD PTR[56+rsi]
  2455. mov rdx,QWORD PTR[8+rsi]
  2456. adcx r14,rax
  2457. adox r15,rbp
  2458. adc r15,QWORD PTR[64+rdi]
  2459. mov QWORD PTR[8+rdi],r8
  2460. mov QWORD PTR[16+rdi],r9
  2461. sbb rcx,rcx
  2462. xor rbp,rbp
  2463. mulx rbx,r8,QWORD PTR[16+rsi]
  2464. mulx rax,r9,QWORD PTR[24+rsi]
  2465. adcx r8,r10
  2466. adox r9,rbx
  2467. mulx rbx,r10,QWORD PTR[32+rsi]
  2468. adcx r9,r11
  2469. adox r10,rax
  2470. DB 0c4h,0e2h,0a3h,0f6h,086h,028h,000h,000h,000h
  2471. adcx r10,r12
  2472. adox r11,rbx
  2473. DB 0c4h,0e2h,09bh,0f6h,09eh,030h,000h,000h,000h
  2474. adcx r11,r13
  2475. adox r12,r14
  2476. DB 0c4h,062h,093h,0f6h,0b6h,038h,000h,000h,000h
  2477. mov rdx,QWORD PTR[16+rsi]
  2478. adcx r12,rax
  2479. adox r13,rbx
  2480. adcx r13,r15
  2481. adox r14,rbp
  2482. adcx r14,rbp
  2483. mov QWORD PTR[24+rdi],r8
  2484. mov QWORD PTR[32+rdi],r9
  2485. mulx rbx,r8,QWORD PTR[24+rsi]
  2486. mulx rax,r9,QWORD PTR[32+rsi]
  2487. adcx r8,r10
  2488. adox r9,rbx
  2489. mulx rbx,r10,QWORD PTR[40+rsi]
  2490. adcx r9,r11
  2491. adox r10,rax
  2492. DB 0c4h,0e2h,0a3h,0f6h,086h,030h,000h,000h,000h
  2493. adcx r10,r12
  2494. adox r11,r13
  2495. DB 0c4h,062h,09bh,0f6h,0aeh,038h,000h,000h,000h
  2496. DB 03eh
  2497. mov rdx,QWORD PTR[24+rsi]
  2498. adcx r11,rbx
  2499. adox r12,rax
  2500. adcx r12,r14
  2501. mov QWORD PTR[40+rdi],r8
  2502. mov QWORD PTR[48+rdi],r9
  2503. mulx rax,r8,QWORD PTR[32+rsi]
  2504. adox r13,rbp
  2505. adcx r13,rbp
  2506. mulx rbx,r9,QWORD PTR[40+rsi]
  2507. adcx r8,r10
  2508. adox r9,rax
  2509. mulx rax,r10,QWORD PTR[48+rsi]
  2510. adcx r9,r11
  2511. adox r10,r12
  2512. mulx r12,r11,QWORD PTR[56+rsi]
  2513. mov rdx,QWORD PTR[32+rsi]
  2514. mov r14,QWORD PTR[40+rsi]
  2515. adcx r10,rbx
  2516. adox r11,rax
  2517. mov r15,QWORD PTR[48+rsi]
  2518. adcx r11,r13
  2519. adox r12,rbp
  2520. adcx r12,rbp
  2521. mov QWORD PTR[56+rdi],r8
  2522. mov QWORD PTR[64+rdi],r9
  2523. mulx rax,r9,r14
  2524. mov r8,QWORD PTR[56+rsi]
  2525. adcx r9,r10
  2526. mulx rbx,r10,r15
  2527. adox r10,rax
  2528. adcx r10,r11
  2529. mulx rax,r11,r8
  2530. mov rdx,r14
  2531. adox r11,rbx
  2532. adcx r11,r12
  2533. adcx rax,rbp
  2534. mulx rbx,r14,r15
  2535. mulx r13,r12,r8
  2536. mov rdx,r15
  2537. lea rsi,QWORD PTR[64+rsi]
  2538. adcx r11,r14
  2539. adox r12,rbx
  2540. adcx r12,rax
  2541. adox r13,rbp
  2542. DB 067h,067h
  2543. mulx r14,r8,r8
  2544. adcx r13,r8
  2545. adcx r14,rbp
  2546. cmp rsi,QWORD PTR[((8+8))+rsp]
  2547. je $L$sqrx8x_outer_break
  2548. neg rcx
  2549. mov rcx,-8
  2550. mov r15,rbp
  2551. mov r8,QWORD PTR[64+rdi]
  2552. adcx r9,QWORD PTR[72+rdi]
  2553. adcx r10,QWORD PTR[80+rdi]
  2554. adcx r11,QWORD PTR[88+rdi]
  2555. adc r12,QWORD PTR[96+rdi]
  2556. adc r13,QWORD PTR[104+rdi]
  2557. adc r14,QWORD PTR[112+rdi]
  2558. adc r15,QWORD PTR[120+rdi]
  2559. lea rbp,QWORD PTR[rsi]
  2560. lea rdi,QWORD PTR[128+rdi]
  2561. sbb rax,rax
  2562. mov rdx,QWORD PTR[((-64))+rsi]
  2563. mov QWORD PTR[((16+8))+rsp],rax
  2564. mov QWORD PTR[((24+8))+rsp],rdi
  2565. xor eax,eax
  2566. jmp $L$sqrx8x_loop
  2567. ALIGN 32
  2568. $L$sqrx8x_loop::
  2569. mov rbx,r8
  2570. mulx r8,rax,QWORD PTR[rbp]
  2571. adcx rbx,rax
  2572. adox r8,r9
  2573. mulx r9,rax,QWORD PTR[8+rbp]
  2574. adcx r8,rax
  2575. adox r9,r10
  2576. mulx r10,rax,QWORD PTR[16+rbp]
  2577. adcx r9,rax
  2578. adox r10,r11
  2579. mulx r11,rax,QWORD PTR[24+rbp]
  2580. adcx r10,rax
  2581. adox r11,r12
  2582. DB 0c4h,062h,0fbh,0f6h,0a5h,020h,000h,000h,000h
  2583. adcx r11,rax
  2584. adox r12,r13
  2585. mulx r13,rax,QWORD PTR[40+rbp]
  2586. adcx r12,rax
  2587. adox r13,r14
  2588. mulx r14,rax,QWORD PTR[48+rbp]
  2589. mov QWORD PTR[rcx*8+rdi],rbx
  2590. mov ebx,0
  2591. adcx r13,rax
  2592. adox r14,r15
  2593. DB 0c4h,062h,0fbh,0f6h,0bdh,038h,000h,000h,000h
  2594. mov rdx,QWORD PTR[8+rcx*8+rsi]
  2595. adcx r14,rax
  2596. adox r15,rbx
  2597. adcx r15,rbx
  2598. DB 067h
  2599. inc rcx
  2600. jnz $L$sqrx8x_loop
  2601. lea rbp,QWORD PTR[64+rbp]
  2602. mov rcx,-8
  2603. cmp rbp,QWORD PTR[((8+8))+rsp]
  2604. je $L$sqrx8x_break
  2605. sub rbx,QWORD PTR[((16+8))+rsp]
  2606. DB 066h
  2607. mov rdx,QWORD PTR[((-64))+rsi]
  2608. adcx r8,QWORD PTR[rdi]
  2609. adcx r9,QWORD PTR[8+rdi]
  2610. adc r10,QWORD PTR[16+rdi]
  2611. adc r11,QWORD PTR[24+rdi]
  2612. adc r12,QWORD PTR[32+rdi]
  2613. adc r13,QWORD PTR[40+rdi]
  2614. adc r14,QWORD PTR[48+rdi]
  2615. adc r15,QWORD PTR[56+rdi]
  2616. lea rdi,QWORD PTR[64+rdi]
  2617. DB 067h
  2618. sbb rax,rax
  2619. xor ebx,ebx
  2620. mov QWORD PTR[((16+8))+rsp],rax
  2621. jmp $L$sqrx8x_loop
  2622. ALIGN 32
  2623. $L$sqrx8x_break::
  2624. xor rbp,rbp
  2625. sub rbx,QWORD PTR[((16+8))+rsp]
  2626. adcx r8,rbp
  2627. mov rcx,QWORD PTR[((24+8))+rsp]
  2628. adcx r9,rbp
  2629. mov rdx,QWORD PTR[rsi]
  2630. adc r10,0
  2631. mov QWORD PTR[rdi],r8
  2632. adc r11,0
  2633. adc r12,0
  2634. adc r13,0
  2635. adc r14,0
  2636. adc r15,0
  2637. cmp rdi,rcx
  2638. je $L$sqrx8x_outer_loop
  2639. mov QWORD PTR[8+rdi],r9
  2640. mov r9,QWORD PTR[8+rcx]
  2641. mov QWORD PTR[16+rdi],r10
  2642. mov r10,QWORD PTR[16+rcx]
  2643. mov QWORD PTR[24+rdi],r11
  2644. mov r11,QWORD PTR[24+rcx]
  2645. mov QWORD PTR[32+rdi],r12
  2646. mov r12,QWORD PTR[32+rcx]
  2647. mov QWORD PTR[40+rdi],r13
  2648. mov r13,QWORD PTR[40+rcx]
  2649. mov QWORD PTR[48+rdi],r14
  2650. mov r14,QWORD PTR[48+rcx]
  2651. mov QWORD PTR[56+rdi],r15
  2652. mov r15,QWORD PTR[56+rcx]
  2653. mov rdi,rcx
  2654. jmp $L$sqrx8x_outer_loop
  2655. ALIGN 32
  2656. $L$sqrx8x_outer_break::
  2657. mov QWORD PTR[72+rdi],r9
  2658. DB 102,72,15,126,217
  2659. mov QWORD PTR[80+rdi],r10
  2660. mov QWORD PTR[88+rdi],r11
  2661. mov QWORD PTR[96+rdi],r12
  2662. mov QWORD PTR[104+rdi],r13
  2663. mov QWORD PTR[112+rdi],r14
  2664. lea rdi,QWORD PTR[((48+8))+rsp]
  2665. mov rdx,QWORD PTR[rcx*1+rsi]
  2666. mov r11,QWORD PTR[8+rdi]
  2667. xor r10,r10
  2668. mov r9,QWORD PTR[((0+8))+rsp]
  2669. adox r11,r11
  2670. mov r12,QWORD PTR[16+rdi]
  2671. mov r13,QWORD PTR[24+rdi]
  2672. ALIGN 32
  2673. $L$sqrx4x_shift_n_add::
  2674. mulx rbx,rax,rdx
  2675. adox r12,r12
  2676. adcx rax,r10
  2677. DB 048h,08bh,094h,00eh,008h,000h,000h,000h
  2678. DB 04ch,08bh,097h,020h,000h,000h,000h
  2679. adox r13,r13
  2680. adcx rbx,r11
  2681. mov r11,QWORD PTR[40+rdi]
  2682. mov QWORD PTR[rdi],rax
  2683. mov QWORD PTR[8+rdi],rbx
  2684. mulx rbx,rax,rdx
  2685. adox r10,r10
  2686. adcx rax,r12
  2687. mov rdx,QWORD PTR[16+rcx*1+rsi]
  2688. mov r12,QWORD PTR[48+rdi]
  2689. adox r11,r11
  2690. adcx rbx,r13
  2691. mov r13,QWORD PTR[56+rdi]
  2692. mov QWORD PTR[16+rdi],rax
  2693. mov QWORD PTR[24+rdi],rbx
  2694. mulx rbx,rax,rdx
  2695. adox r12,r12
  2696. adcx rax,r10
  2697. mov rdx,QWORD PTR[24+rcx*1+rsi]
  2698. lea rcx,QWORD PTR[32+rcx]
  2699. mov r10,QWORD PTR[64+rdi]
  2700. adox r13,r13
  2701. adcx rbx,r11
  2702. mov r11,QWORD PTR[72+rdi]
  2703. mov QWORD PTR[32+rdi],rax
  2704. mov QWORD PTR[40+rdi],rbx
  2705. mulx rbx,rax,rdx
  2706. adox r10,r10
  2707. adcx rax,r12
  2708. jrcxz $L$sqrx4x_shift_n_add_break
  2709. DB 048h,08bh,094h,00eh,000h,000h,000h,000h
  2710. adox r11,r11
  2711. adcx rbx,r13
  2712. mov r12,QWORD PTR[80+rdi]
  2713. mov r13,QWORD PTR[88+rdi]
  2714. mov QWORD PTR[48+rdi],rax
  2715. mov QWORD PTR[56+rdi],rbx
  2716. lea rdi,QWORD PTR[64+rdi]
  2717. nop
  2718. jmp $L$sqrx4x_shift_n_add
  2719. ALIGN 32
  2720. $L$sqrx4x_shift_n_add_break::
  2721. adcx rbx,r13
  2722. mov QWORD PTR[48+rdi],rax
  2723. mov QWORD PTR[56+rdi],rbx
  2724. lea rdi,QWORD PTR[64+rdi]
  2725. DB 102,72,15,126,213
  2726. __bn_sqrx8x_reduction::
  2727. xor eax,eax
  2728. mov rbx,QWORD PTR[((32+8))+rsp]
  2729. mov rdx,QWORD PTR[((48+8))+rsp]
  2730. lea rcx,QWORD PTR[((-64))+r9*1+rbp]
  2731. mov QWORD PTR[((0+8))+rsp],rcx
  2732. mov QWORD PTR[((8+8))+rsp],rdi
  2733. lea rdi,QWORD PTR[((48+8))+rsp]
  2734. jmp $L$sqrx8x_reduction_loop
  2735. ALIGN 32
  2736. $L$sqrx8x_reduction_loop::
  2737. mov r9,QWORD PTR[8+rdi]
  2738. mov r10,QWORD PTR[16+rdi]
  2739. mov r11,QWORD PTR[24+rdi]
  2740. mov r12,QWORD PTR[32+rdi]
  2741. mov r8,rdx
  2742. imul rdx,rbx
  2743. mov r13,QWORD PTR[40+rdi]
  2744. mov r14,QWORD PTR[48+rdi]
  2745. mov r15,QWORD PTR[56+rdi]
  2746. mov QWORD PTR[((24+8))+rsp],rax
  2747. lea rdi,QWORD PTR[64+rdi]
  2748. xor rsi,rsi
  2749. mov rcx,-8
  2750. jmp $L$sqrx8x_reduce
  2751. ALIGN 32
  2752. $L$sqrx8x_reduce::
  2753. mov rbx,r8
  2754. mulx r8,rax,QWORD PTR[rbp]
  2755. adcx rax,rbx
  2756. adox r8,r9
  2757. mulx r9,rbx,QWORD PTR[8+rbp]
  2758. adcx r8,rbx
  2759. adox r9,r10
  2760. mulx r10,rbx,QWORD PTR[16+rbp]
  2761. adcx r9,rbx
  2762. adox r10,r11
  2763. mulx r11,rbx,QWORD PTR[24+rbp]
  2764. adcx r10,rbx
  2765. adox r11,r12
  2766. DB 0c4h,062h,0e3h,0f6h,0a5h,020h,000h,000h,000h
  2767. mov rax,rdx
  2768. mov rdx,r8
  2769. adcx r11,rbx
  2770. adox r12,r13
  2771. mulx rdx,rbx,QWORD PTR[((32+8))+rsp]
  2772. mov rdx,rax
  2773. mov QWORD PTR[((64+48+8))+rcx*8+rsp],rax
  2774. mulx r13,rax,QWORD PTR[40+rbp]
  2775. adcx r12,rax
  2776. adox r13,r14
  2777. mulx r14,rax,QWORD PTR[48+rbp]
  2778. adcx r13,rax
  2779. adox r14,r15
  2780. mulx r15,rax,QWORD PTR[56+rbp]
  2781. mov rdx,rbx
  2782. adcx r14,rax
  2783. adox r15,rsi
  2784. adcx r15,rsi
  2785. DB 067h,067h,067h
  2786. inc rcx
  2787. jnz $L$sqrx8x_reduce
  2788. mov rax,rsi
  2789. cmp rbp,QWORD PTR[((0+8))+rsp]
  2790. jae $L$sqrx8x_no_tail
  2791. mov rdx,QWORD PTR[((48+8))+rsp]
  2792. add r8,QWORD PTR[rdi]
  2793. lea rbp,QWORD PTR[64+rbp]
  2794. mov rcx,-8
  2795. adcx r9,QWORD PTR[8+rdi]
  2796. adcx r10,QWORD PTR[16+rdi]
  2797. adc r11,QWORD PTR[24+rdi]
  2798. adc r12,QWORD PTR[32+rdi]
  2799. adc r13,QWORD PTR[40+rdi]
  2800. adc r14,QWORD PTR[48+rdi]
  2801. adc r15,QWORD PTR[56+rdi]
  2802. lea rdi,QWORD PTR[64+rdi]
  2803. sbb rax,rax
  2804. xor rsi,rsi
  2805. mov QWORD PTR[((16+8))+rsp],rax
  2806. jmp $L$sqrx8x_tail
  2807. ALIGN 32
  2808. $L$sqrx8x_tail::
  2809. mov rbx,r8
  2810. mulx r8,rax,QWORD PTR[rbp]
  2811. adcx rbx,rax
  2812. adox r8,r9
  2813. mulx r9,rax,QWORD PTR[8+rbp]
  2814. adcx r8,rax
  2815. adox r9,r10
  2816. mulx r10,rax,QWORD PTR[16+rbp]
  2817. adcx r9,rax
  2818. adox r10,r11
  2819. mulx r11,rax,QWORD PTR[24+rbp]
  2820. adcx r10,rax
  2821. adox r11,r12
  2822. DB 0c4h,062h,0fbh,0f6h,0a5h,020h,000h,000h,000h
  2823. adcx r11,rax
  2824. adox r12,r13
  2825. mulx r13,rax,QWORD PTR[40+rbp]
  2826. adcx r12,rax
  2827. adox r13,r14
  2828. mulx r14,rax,QWORD PTR[48+rbp]
  2829. adcx r13,rax
  2830. adox r14,r15
  2831. mulx r15,rax,QWORD PTR[56+rbp]
  2832. mov rdx,QWORD PTR[((72+48+8))+rcx*8+rsp]
  2833. adcx r14,rax
  2834. adox r15,rsi
  2835. mov QWORD PTR[rcx*8+rdi],rbx
  2836. mov rbx,r8
  2837. adcx r15,rsi
  2838. inc rcx
  2839. jnz $L$sqrx8x_tail
  2840. cmp rbp,QWORD PTR[((0+8))+rsp]
  2841. jae $L$sqrx8x_tail_done
  2842. sub rsi,QWORD PTR[((16+8))+rsp]
  2843. mov rdx,QWORD PTR[((48+8))+rsp]
  2844. lea rbp,QWORD PTR[64+rbp]
  2845. adc r8,QWORD PTR[rdi]
  2846. adc r9,QWORD PTR[8+rdi]
  2847. adc r10,QWORD PTR[16+rdi]
  2848. adc r11,QWORD PTR[24+rdi]
  2849. adc r12,QWORD PTR[32+rdi]
  2850. adc r13,QWORD PTR[40+rdi]
  2851. adc r14,QWORD PTR[48+rdi]
  2852. adc r15,QWORD PTR[56+rdi]
  2853. lea rdi,QWORD PTR[64+rdi]
  2854. sbb rax,rax
  2855. sub rcx,8
  2856. xor rsi,rsi
  2857. mov QWORD PTR[((16+8))+rsp],rax
  2858. jmp $L$sqrx8x_tail
  2859. ALIGN 32
  2860. $L$sqrx8x_tail_done::
  2861. xor rax,rax
  2862. add r8,QWORD PTR[((24+8))+rsp]
  2863. adc r9,0
  2864. adc r10,0
  2865. adc r11,0
  2866. adc r12,0
  2867. adc r13,0
  2868. adc r14,0
  2869. adc r15,0
  2870. adc rax,0
  2871. sub rsi,QWORD PTR[((16+8))+rsp]
  2872. $L$sqrx8x_no_tail::
  2873. adc r8,QWORD PTR[rdi]
  2874. DB 102,72,15,126,217
  2875. adc r9,QWORD PTR[8+rdi]
  2876. mov rsi,QWORD PTR[56+rbp]
  2877. DB 102,72,15,126,213
  2878. adc r10,QWORD PTR[16+rdi]
  2879. adc r11,QWORD PTR[24+rdi]
  2880. adc r12,QWORD PTR[32+rdi]
  2881. adc r13,QWORD PTR[40+rdi]
  2882. adc r14,QWORD PTR[48+rdi]
  2883. adc r15,QWORD PTR[56+rdi]
  2884. adc rax,0
  2885. mov rbx,QWORD PTR[((32+8))+rsp]
  2886. mov rdx,QWORD PTR[64+rcx*1+rdi]
  2887. mov QWORD PTR[rdi],r8
  2888. lea r8,QWORD PTR[64+rdi]
  2889. mov QWORD PTR[8+rdi],r9
  2890. mov QWORD PTR[16+rdi],r10
  2891. mov QWORD PTR[24+rdi],r11
  2892. mov QWORD PTR[32+rdi],r12
  2893. mov QWORD PTR[40+rdi],r13
  2894. mov QWORD PTR[48+rdi],r14
  2895. mov QWORD PTR[56+rdi],r15
  2896. lea rdi,QWORD PTR[64+rcx*1+rdi]
  2897. cmp r8,QWORD PTR[((8+8))+rsp]
  2898. jb $L$sqrx8x_reduction_loop
  2899. DB 0F3h,0C3h ;repret
  2900. bn_sqrx8x_internal ENDP
  2901. ALIGN 32
  2902. __bn_postx4x_internal::
  2903. mov r12,QWORD PTR[rbp]
  2904. mov r10,rcx
  2905. mov r9,rcx
  2906. neg rax
  2907. sar rcx,3+2
  2908. DB 102,72,15,126,202
  2909. DB 102,72,15,126,206
  2910. dec r12
  2911. mov r13,QWORD PTR[8+rbp]
  2912. xor r8,r8
  2913. mov r14,QWORD PTR[16+rbp]
  2914. mov r15,QWORD PTR[24+rbp]
  2915. jmp $L$sqrx4x_sub_entry
  2916. ALIGN 16
  2917. $L$sqrx4x_sub::
  2918. mov r12,QWORD PTR[rbp]
  2919. mov r13,QWORD PTR[8+rbp]
  2920. mov r14,QWORD PTR[16+rbp]
  2921. mov r15,QWORD PTR[24+rbp]
  2922. $L$sqrx4x_sub_entry::
  2923. andn r12,r12,rax
  2924. lea rbp,QWORD PTR[32+rbp]
  2925. andn r13,r13,rax
  2926. andn r14,r14,rax
  2927. andn r15,r15,rax
  2928. neg r8
  2929. adc r12,QWORD PTR[rdi]
  2930. adc r13,QWORD PTR[8+rdi]
  2931. adc r14,QWORD PTR[16+rdi]
  2932. adc r15,QWORD PTR[24+rdi]
  2933. mov QWORD PTR[rdx],r12
  2934. lea rdi,QWORD PTR[32+rdi]
  2935. mov QWORD PTR[8+rdx],r13
  2936. sbb r8,r8
  2937. mov QWORD PTR[16+rdx],r14
  2938. mov QWORD PTR[24+rdx],r15
  2939. lea rdx,QWORD PTR[32+rdx]
  2940. inc rcx
  2941. jnz $L$sqrx4x_sub
  2942. neg r9
  2943. DB 0F3h,0C3h ;repret
  2944. PUBLIC bn_get_bits5
  2945. ALIGN 16
  2946. bn_get_bits5 PROC PUBLIC
  2947. lea r10,QWORD PTR[rcx]
  2948. lea r11,QWORD PTR[1+rcx]
  2949. mov ecx,edx
  2950. shr edx,4
  2951. and ecx,15
  2952. lea eax,DWORD PTR[((-8))+rcx]
  2953. cmp ecx,11
  2954. cmova r10,r11
  2955. cmova ecx,eax
  2956. movzx eax,WORD PTR[rdx*2+r10]
  2957. shr eax,cl
  2958. and eax,31
  2959. DB 0F3h,0C3h ;repret
  2960. bn_get_bits5 ENDP
  2961. PUBLIC bn_scatter5
  2962. ALIGN 16
  2963. bn_scatter5 PROC PUBLIC
  2964. cmp edx,0
  2965. jz $L$scatter_epilogue
  2966. lea r8,QWORD PTR[r9*8+r8]
  2967. $L$scatter::
  2968. mov rax,QWORD PTR[rcx]
  2969. lea rcx,QWORD PTR[8+rcx]
  2970. mov QWORD PTR[r8],rax
  2971. lea r8,QWORD PTR[256+r8]
  2972. sub edx,1
  2973. jnz $L$scatter
  2974. $L$scatter_epilogue::
  2975. DB 0F3h,0C3h ;repret
  2976. bn_scatter5 ENDP
  2977. PUBLIC bn_gather5
  2978. ALIGN 32
  2979. bn_gather5 PROC PUBLIC
  2980. $L$SEH_begin_bn_gather5::
  2981. DB 04ch,08dh,014h,024h
  2982. DB 048h,081h,0ech,008h,001h,000h,000h
  2983. lea rax,QWORD PTR[$L$inc]
  2984. and rsp,-16
  2985. movd xmm5,r9d
  2986. movdqa xmm0,XMMWORD PTR[rax]
  2987. movdqa xmm1,XMMWORD PTR[16+rax]
  2988. lea r11,QWORD PTR[128+r8]
  2989. lea rax,QWORD PTR[128+rsp]
  2990. pshufd xmm5,xmm5,0
  2991. movdqa xmm4,xmm1
  2992. movdqa xmm2,xmm1
  2993. paddd xmm1,xmm0
  2994. pcmpeqd xmm0,xmm5
  2995. movdqa xmm3,xmm4
  2996. paddd xmm2,xmm1
  2997. pcmpeqd xmm1,xmm5
  2998. movdqa XMMWORD PTR[(-128)+rax],xmm0
  2999. movdqa xmm0,xmm4
  3000. paddd xmm3,xmm2
  3001. pcmpeqd xmm2,xmm5
  3002. movdqa XMMWORD PTR[(-112)+rax],xmm1
  3003. movdqa xmm1,xmm4
  3004. paddd xmm0,xmm3
  3005. pcmpeqd xmm3,xmm5
  3006. movdqa XMMWORD PTR[(-96)+rax],xmm2
  3007. movdqa xmm2,xmm4
  3008. paddd xmm1,xmm0
  3009. pcmpeqd xmm0,xmm5
  3010. movdqa XMMWORD PTR[(-80)+rax],xmm3
  3011. movdqa xmm3,xmm4
  3012. paddd xmm2,xmm1
  3013. pcmpeqd xmm1,xmm5
  3014. movdqa XMMWORD PTR[(-64)+rax],xmm0
  3015. movdqa xmm0,xmm4
  3016. paddd xmm3,xmm2
  3017. pcmpeqd xmm2,xmm5
  3018. movdqa XMMWORD PTR[(-48)+rax],xmm1
  3019. movdqa xmm1,xmm4
  3020. paddd xmm0,xmm3
  3021. pcmpeqd xmm3,xmm5
  3022. movdqa XMMWORD PTR[(-32)+rax],xmm2
  3023. movdqa xmm2,xmm4
  3024. paddd xmm1,xmm0
  3025. pcmpeqd xmm0,xmm5
  3026. movdqa XMMWORD PTR[(-16)+rax],xmm3
  3027. movdqa xmm3,xmm4
  3028. paddd xmm2,xmm1
  3029. pcmpeqd xmm1,xmm5
  3030. movdqa XMMWORD PTR[rax],xmm0
  3031. movdqa xmm0,xmm4
  3032. paddd xmm3,xmm2
  3033. pcmpeqd xmm2,xmm5
  3034. movdqa XMMWORD PTR[16+rax],xmm1
  3035. movdqa xmm1,xmm4
  3036. paddd xmm0,xmm3
  3037. pcmpeqd xmm3,xmm5
  3038. movdqa XMMWORD PTR[32+rax],xmm2
  3039. movdqa xmm2,xmm4
  3040. paddd xmm1,xmm0
  3041. pcmpeqd xmm0,xmm5
  3042. movdqa XMMWORD PTR[48+rax],xmm3
  3043. movdqa xmm3,xmm4
  3044. paddd xmm2,xmm1
  3045. pcmpeqd xmm1,xmm5
  3046. movdqa XMMWORD PTR[64+rax],xmm0
  3047. movdqa xmm0,xmm4
  3048. paddd xmm3,xmm2
  3049. pcmpeqd xmm2,xmm5
  3050. movdqa XMMWORD PTR[80+rax],xmm1
  3051. movdqa xmm1,xmm4
  3052. paddd xmm0,xmm3
  3053. pcmpeqd xmm3,xmm5
  3054. movdqa XMMWORD PTR[96+rax],xmm2
  3055. movdqa xmm2,xmm4
  3056. movdqa XMMWORD PTR[112+rax],xmm3
  3057. jmp $L$gather
  3058. ALIGN 32
  3059. $L$gather::
  3060. pxor xmm4,xmm4
  3061. pxor xmm5,xmm5
  3062. movdqa xmm0,XMMWORD PTR[((-128))+r11]
  3063. movdqa xmm1,XMMWORD PTR[((-112))+r11]
  3064. movdqa xmm2,XMMWORD PTR[((-96))+r11]
  3065. pand xmm0,XMMWORD PTR[((-128))+rax]
  3066. movdqa xmm3,XMMWORD PTR[((-80))+r11]
  3067. pand xmm1,XMMWORD PTR[((-112))+rax]
  3068. por xmm4,xmm0
  3069. pand xmm2,XMMWORD PTR[((-96))+rax]
  3070. por xmm5,xmm1
  3071. pand xmm3,XMMWORD PTR[((-80))+rax]
  3072. por xmm4,xmm2
  3073. por xmm5,xmm3
  3074. movdqa xmm0,XMMWORD PTR[((-64))+r11]
  3075. movdqa xmm1,XMMWORD PTR[((-48))+r11]
  3076. movdqa xmm2,XMMWORD PTR[((-32))+r11]
  3077. pand xmm0,XMMWORD PTR[((-64))+rax]
  3078. movdqa xmm3,XMMWORD PTR[((-16))+r11]
  3079. pand xmm1,XMMWORD PTR[((-48))+rax]
  3080. por xmm4,xmm0
  3081. pand xmm2,XMMWORD PTR[((-32))+rax]
  3082. por xmm5,xmm1
  3083. pand xmm3,XMMWORD PTR[((-16))+rax]
  3084. por xmm4,xmm2
  3085. por xmm5,xmm3
  3086. movdqa xmm0,XMMWORD PTR[r11]
  3087. movdqa xmm1,XMMWORD PTR[16+r11]
  3088. movdqa xmm2,XMMWORD PTR[32+r11]
  3089. pand xmm0,XMMWORD PTR[rax]
  3090. movdqa xmm3,XMMWORD PTR[48+r11]
  3091. pand xmm1,XMMWORD PTR[16+rax]
  3092. por xmm4,xmm0
  3093. pand xmm2,XMMWORD PTR[32+rax]
  3094. por xmm5,xmm1
  3095. pand xmm3,XMMWORD PTR[48+rax]
  3096. por xmm4,xmm2
  3097. por xmm5,xmm3
  3098. movdqa xmm0,XMMWORD PTR[64+r11]
  3099. movdqa xmm1,XMMWORD PTR[80+r11]
  3100. movdqa xmm2,XMMWORD PTR[96+r11]
  3101. pand xmm0,XMMWORD PTR[64+rax]
  3102. movdqa xmm3,XMMWORD PTR[112+r11]
  3103. pand xmm1,XMMWORD PTR[80+rax]
  3104. por xmm4,xmm0
  3105. pand xmm2,XMMWORD PTR[96+rax]
  3106. por xmm5,xmm1
  3107. pand xmm3,XMMWORD PTR[112+rax]
  3108. por xmm4,xmm2
  3109. por xmm5,xmm3
  3110. por xmm4,xmm5
  3111. lea r11,QWORD PTR[256+r11]
  3112. pshufd xmm0,xmm4,04eh
  3113. por xmm0,xmm4
  3114. movq QWORD PTR[rcx],xmm0
  3115. lea rcx,QWORD PTR[8+rcx]
  3116. sub edx,1
  3117. jnz $L$gather
  3118. lea rsp,QWORD PTR[r10]
  3119. DB 0F3h,0C3h ;repret
  3120. $L$SEH_end_bn_gather5::
  3121. bn_gather5 ENDP
  3122. ALIGN 64
  3123. $L$inc::
  3124. DD 0,0,1,1
  3125. DD 2,2,2,2
  3126. DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
  3127. DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
  3128. DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
  3129. DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
  3130. DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
  3131. DB 112,101,110,115,115,108,46,111,114,103,62,0
  3132. EXTERN __imp_RtlVirtualUnwind:NEAR
  3133. ALIGN 16
  3134. mul_handler PROC PRIVATE
  3135. push rsi
  3136. push rdi
  3137. push rbx
  3138. push rbp
  3139. push r12
  3140. push r13
  3141. push r14
  3142. push r15
  3143. pushfq
  3144. sub rsp,64
  3145. mov rax,QWORD PTR[120+r8]
  3146. mov rbx,QWORD PTR[248+r8]
  3147. mov rsi,QWORD PTR[8+r9]
  3148. mov r11,QWORD PTR[56+r9]
  3149. mov r10d,DWORD PTR[r11]
  3150. lea r10,QWORD PTR[r10*1+rsi]
  3151. cmp rbx,r10
  3152. jb $L$common_seh_tail
  3153. mov r10d,DWORD PTR[4+r11]
  3154. lea r10,QWORD PTR[r10*1+rsi]
  3155. cmp rbx,r10
  3156. jb $L$common_pop_regs
  3157. mov rax,QWORD PTR[152+r8]
  3158. mov r10d,DWORD PTR[8+r11]
  3159. lea r10,QWORD PTR[r10*1+rsi]
  3160. cmp rbx,r10
  3161. jae $L$common_seh_tail
  3162. lea r10,QWORD PTR[$L$mul_epilogue]
  3163. cmp rbx,r10
  3164. ja $L$body_40
  3165. mov r10,QWORD PTR[192+r8]
  3166. mov rax,QWORD PTR[8+r10*8+rax]
  3167. jmp $L$common_pop_regs
  3168. $L$body_40::
  3169. mov rax,QWORD PTR[40+rax]
  3170. $L$common_pop_regs::
  3171. mov rbx,QWORD PTR[((-8))+rax]
  3172. mov rbp,QWORD PTR[((-16))+rax]
  3173. mov r12,QWORD PTR[((-24))+rax]
  3174. mov r13,QWORD PTR[((-32))+rax]
  3175. mov r14,QWORD PTR[((-40))+rax]
  3176. mov r15,QWORD PTR[((-48))+rax]
  3177. mov QWORD PTR[144+r8],rbx
  3178. mov QWORD PTR[160+r8],rbp
  3179. mov QWORD PTR[216+r8],r12
  3180. mov QWORD PTR[224+r8],r13
  3181. mov QWORD PTR[232+r8],r14
  3182. mov QWORD PTR[240+r8],r15
  3183. $L$common_seh_tail::
  3184. mov rdi,QWORD PTR[8+rax]
  3185. mov rsi,QWORD PTR[16+rax]
  3186. mov QWORD PTR[152+r8],rax
  3187. mov QWORD PTR[168+r8],rsi
  3188. mov QWORD PTR[176+r8],rdi
  3189. mov rdi,QWORD PTR[40+r9]
  3190. mov rsi,r8
  3191. mov ecx,154
  3192. DD 0a548f3fch
  3193. mov rsi,r9
  3194. xor rcx,rcx
  3195. mov rdx,QWORD PTR[8+rsi]
  3196. mov r8,QWORD PTR[rsi]
  3197. mov r9,QWORD PTR[16+rsi]
  3198. mov r10,QWORD PTR[40+rsi]
  3199. lea r11,QWORD PTR[56+rsi]
  3200. lea r12,QWORD PTR[24+rsi]
  3201. mov QWORD PTR[32+rsp],r10
  3202. mov QWORD PTR[40+rsp],r11
  3203. mov QWORD PTR[48+rsp],r12
  3204. mov QWORD PTR[56+rsp],rcx
  3205. call QWORD PTR[__imp_RtlVirtualUnwind]
  3206. mov eax,1
  3207. add rsp,64
  3208. popfq
  3209. pop r15
  3210. pop r14
  3211. pop r13
  3212. pop r12
  3213. pop rbp
  3214. pop rbx
  3215. pop rdi
  3216. pop rsi
  3217. DB 0F3h,0C3h ;repret
  3218. mul_handler ENDP
  3219. .text$ ENDS
  3220. .pdata SEGMENT READONLY ALIGN(4)
  3221. ALIGN 4
  3222. DD imagerel $L$SEH_begin_bn_mul_mont_gather5
  3223. DD imagerel $L$SEH_end_bn_mul_mont_gather5
  3224. DD imagerel $L$SEH_info_bn_mul_mont_gather5
  3225. DD imagerel $L$SEH_begin_bn_mul4x_mont_gather5
  3226. DD imagerel $L$SEH_end_bn_mul4x_mont_gather5
  3227. DD imagerel $L$SEH_info_bn_mul4x_mont_gather5
  3228. DD imagerel $L$SEH_begin_bn_power5
  3229. DD imagerel $L$SEH_end_bn_power5
  3230. DD imagerel $L$SEH_info_bn_power5
  3231. DD imagerel $L$SEH_begin_bn_from_mont8x
  3232. DD imagerel $L$SEH_end_bn_from_mont8x
  3233. DD imagerel $L$SEH_info_bn_from_mont8x
  3234. DD imagerel $L$SEH_begin_bn_mulx4x_mont_gather5
  3235. DD imagerel $L$SEH_end_bn_mulx4x_mont_gather5
  3236. DD imagerel $L$SEH_info_bn_mulx4x_mont_gather5
  3237. DD imagerel $L$SEH_begin_bn_powerx5
  3238. DD imagerel $L$SEH_end_bn_powerx5
  3239. DD imagerel $L$SEH_info_bn_powerx5
  3240. DD imagerel $L$SEH_begin_bn_gather5
  3241. DD imagerel $L$SEH_end_bn_gather5
  3242. DD imagerel $L$SEH_info_bn_gather5
  3243. .pdata ENDS
  3244. .xdata SEGMENT READONLY ALIGN(8)
  3245. ALIGN 8
  3246. $L$SEH_info_bn_mul_mont_gather5::
  3247. DB 9,0,0,0
  3248. DD imagerel mul_handler
  3249. DD imagerel $L$mul_body,imagerel $L$mul_body,imagerel $L$mul_epilogue
  3250. ALIGN 8
  3251. $L$SEH_info_bn_mul4x_mont_gather5::
  3252. DB 9,0,0,0
  3253. DD imagerel mul_handler
  3254. DD imagerel $L$mul4x_prologue,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
  3255. ALIGN 8
  3256. $L$SEH_info_bn_power5::
  3257. DB 9,0,0,0
  3258. DD imagerel mul_handler
  3259. DD imagerel $L$power5_prologue,imagerel $L$power5_body,imagerel $L$power5_epilogue
  3260. ALIGN 8
  3261. $L$SEH_info_bn_from_mont8x::
  3262. DB 9,0,0,0
  3263. DD imagerel mul_handler
  3264. DD imagerel $L$from_prologue,imagerel $L$from_body,imagerel $L$from_epilogue
  3265. ALIGN 8
  3266. $L$SEH_info_bn_mulx4x_mont_gather5::
  3267. DB 9,0,0,0
  3268. DD imagerel mul_handler
  3269. DD imagerel $L$mulx4x_prologue,imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue
  3270. ALIGN 8
  3271. $L$SEH_info_bn_powerx5::
  3272. DB 9,0,0,0
  3273. DD imagerel mul_handler
  3274. DD imagerel $L$powerx5_prologue,imagerel $L$powerx5_body,imagerel $L$powerx5_epilogue
  3275. ALIGN 8
  3276. $L$SEH_info_bn_gather5::
  3277. DB 001h,00bh,003h,00ah
  3278. DB 00bh,001h,021h,000h
  3279. DB 004h,0a3h,000h,000h
  3280. ALIGN 8
  3281. .xdata ENDS
  3282. END