123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603 |
- .text
- .globl _bn_mul_mont_gather5
- .p2align 6
- _bn_mul_mont_gather5:
- movl %r9d,%r9d
- movq %rsp,%rax
- testl $7,%r9d
- jnz L$mul_enter
- movl _OPENSSL_ia32cap_P+8(%rip),%r11d
- jmp L$mul4x_enter
- .p2align 4
- L$mul_enter:
- movd 8(%rsp),%xmm5
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- negq %r9
- movq %rsp,%r11
- leaq -280(%rsp,%r9,8),%r10
- negq %r9
- andq $-1024,%r10
- subq %r10,%r11
- andq $-4096,%r11
- leaq (%r10,%r11,1),%rsp
- movq (%rsp),%r11
- cmpq %r10,%rsp
- ja L$mul_page_walk
- jmp L$mul_page_walk_done
- L$mul_page_walk:
- leaq -4096(%rsp),%rsp
- movq (%rsp),%r11
- cmpq %r10,%rsp
- ja L$mul_page_walk
- L$mul_page_walk_done:
- leaq L$inc(%rip),%r10
- movq %rax,8(%rsp,%r9,8)
- L$mul_body:
- leaq 128(%rdx),%r12
- movdqa 0(%r10),%xmm0
- movdqa 16(%r10),%xmm1
- leaq 24-112(%rsp,%r9,8),%r10
- andq $-16,%r10
- pshufd $0,%xmm5,%xmm5
- movdqa %xmm1,%xmm4
- movdqa %xmm1,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- .byte 0x67
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,112(%r10)
- movdqa %xmm4,%xmm0
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,128(%r10)
- movdqa %xmm4,%xmm1
- paddd %xmm3,%xmm0
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,144(%r10)
- movdqa %xmm4,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm3,160(%r10)
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,176(%r10)
- movdqa %xmm4,%xmm0
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,192(%r10)
- movdqa %xmm4,%xmm1
- paddd %xmm3,%xmm0
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,208(%r10)
- movdqa %xmm4,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm3,224(%r10)
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,240(%r10)
- movdqa %xmm4,%xmm0
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,256(%r10)
- movdqa %xmm4,%xmm1
- paddd %xmm3,%xmm0
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,272(%r10)
- movdqa %xmm4,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm3,288(%r10)
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,304(%r10)
- paddd %xmm2,%xmm3
- .byte 0x67
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,320(%r10)
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,336(%r10)
- pand 64(%r12),%xmm0
- pand 80(%r12),%xmm1
- pand 96(%r12),%xmm2
- movdqa %xmm3,352(%r10)
- pand 112(%r12),%xmm3
- por %xmm2,%xmm0
- por %xmm3,%xmm1
- movdqa -128(%r12),%xmm4
- movdqa -112(%r12),%xmm5
- movdqa -96(%r12),%xmm2
- pand 112(%r10),%xmm4
- movdqa -80(%r12),%xmm3
- pand 128(%r10),%xmm5
- por %xmm4,%xmm0
- pand 144(%r10),%xmm2
- por %xmm5,%xmm1
- pand 160(%r10),%xmm3
- por %xmm2,%xmm0
- por %xmm3,%xmm1
- movdqa -64(%r12),%xmm4
- movdqa -48(%r12),%xmm5
- movdqa -32(%r12),%xmm2
- pand 176(%r10),%xmm4
- movdqa -16(%r12),%xmm3
- pand 192(%r10),%xmm5
- por %xmm4,%xmm0
- pand 208(%r10),%xmm2
- por %xmm5,%xmm1
- pand 224(%r10),%xmm3
- por %xmm2,%xmm0
- por %xmm3,%xmm1
- movdqa 0(%r12),%xmm4
- movdqa 16(%r12),%xmm5
- movdqa 32(%r12),%xmm2
- pand 240(%r10),%xmm4
- movdqa 48(%r12),%xmm3
- pand 256(%r10),%xmm5
- por %xmm4,%xmm0
- pand 272(%r10),%xmm2
- por %xmm5,%xmm1
- pand 288(%r10),%xmm3
- por %xmm2,%xmm0
- por %xmm3,%xmm1
- por %xmm1,%xmm0
- pshufd $0x4e,%xmm0,%xmm1
- por %xmm1,%xmm0
- leaq 256(%r12),%r12
- .byte 102,72,15,126,195
- movq (%r8),%r8
- movq (%rsi),%rax
- xorq %r14,%r14
- xorq %r15,%r15
- movq %r8,%rbp
- mulq %rbx
- movq %rax,%r10
- movq (%rcx),%rax
- imulq %r10,%rbp
- movq %rdx,%r11
- mulq %rbp
- addq %rax,%r10
- movq 8(%rsi),%rax
- adcq $0,%rdx
- movq %rdx,%r13
- leaq 1(%r15),%r15
- jmp L$1st_enter
- .p2align 4
- L$1st:
- addq %rax,%r13
- movq (%rsi,%r15,8),%rax
- adcq $0,%rdx
- addq %r11,%r13
- movq %r10,%r11
- adcq $0,%rdx
- movq %r13,-16(%rsp,%r15,8)
- movq %rdx,%r13
- L$1st_enter:
- mulq %rbx
- addq %rax,%r11
- movq (%rcx,%r15,8),%rax
- adcq $0,%rdx
- leaq 1(%r15),%r15
- movq %rdx,%r10
- mulq %rbp
- cmpq %r9,%r15
- jne L$1st
- addq %rax,%r13
- adcq $0,%rdx
- addq %r11,%r13
- adcq $0,%rdx
- movq %r13,-16(%rsp,%r9,8)
- movq %rdx,%r13
- movq %r10,%r11
- xorq %rdx,%rdx
- addq %r11,%r13
- adcq $0,%rdx
- movq %r13,-8(%rsp,%r9,8)
- movq %rdx,(%rsp,%r9,8)
- leaq 1(%r14),%r14
- jmp L$outer
- .p2align 4
- L$outer:
- leaq 24+128(%rsp,%r9,8),%rdx
- andq $-16,%rdx
- pxor %xmm4,%xmm4
- pxor %xmm5,%xmm5
- movdqa -128(%r12),%xmm0
- movdqa -112(%r12),%xmm1
- movdqa -96(%r12),%xmm2
- movdqa -80(%r12),%xmm3
- pand -128(%rdx),%xmm0
- pand -112(%rdx),%xmm1
- por %xmm0,%xmm4
- pand -96(%rdx),%xmm2
- por %xmm1,%xmm5
- pand -80(%rdx),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- movdqa -64(%r12),%xmm0
- movdqa -48(%r12),%xmm1
- movdqa -32(%r12),%xmm2
- movdqa -16(%r12),%xmm3
- pand -64(%rdx),%xmm0
- pand -48(%rdx),%xmm1
- por %xmm0,%xmm4
- pand -32(%rdx),%xmm2
- por %xmm1,%xmm5
- pand -16(%rdx),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- movdqa 0(%r12),%xmm0
- movdqa 16(%r12),%xmm1
- movdqa 32(%r12),%xmm2
- movdqa 48(%r12),%xmm3
- pand 0(%rdx),%xmm0
- pand 16(%rdx),%xmm1
- por %xmm0,%xmm4
- pand 32(%rdx),%xmm2
- por %xmm1,%xmm5
- pand 48(%rdx),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- movdqa 64(%r12),%xmm0
- movdqa 80(%r12),%xmm1
- movdqa 96(%r12),%xmm2
- movdqa 112(%r12),%xmm3
- pand 64(%rdx),%xmm0
- pand 80(%rdx),%xmm1
- por %xmm0,%xmm4
- pand 96(%rdx),%xmm2
- por %xmm1,%xmm5
- pand 112(%rdx),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- por %xmm5,%xmm4
- pshufd $0x4e,%xmm4,%xmm0
- por %xmm4,%xmm0
- leaq 256(%r12),%r12
- movq (%rsi),%rax
- .byte 102,72,15,126,195
- xorq %r15,%r15
- movq %r8,%rbp
- movq (%rsp),%r10
- mulq %rbx
- addq %rax,%r10
- movq (%rcx),%rax
- adcq $0,%rdx
- imulq %r10,%rbp
- movq %rdx,%r11
- mulq %rbp
- addq %rax,%r10
- movq 8(%rsi),%rax
- adcq $0,%rdx
- movq 8(%rsp),%r10
- movq %rdx,%r13
- leaq 1(%r15),%r15
- jmp L$inner_enter
- .p2align 4
- L$inner:
- addq %rax,%r13
- movq (%rsi,%r15,8),%rax
- adcq $0,%rdx
- addq %r10,%r13
- movq (%rsp,%r15,8),%r10
- adcq $0,%rdx
- movq %r13,-16(%rsp,%r15,8)
- movq %rdx,%r13
- L$inner_enter:
- mulq %rbx
- addq %rax,%r11
- movq (%rcx,%r15,8),%rax
- adcq $0,%rdx
- addq %r11,%r10
- movq %rdx,%r11
- adcq $0,%r11
- leaq 1(%r15),%r15
- mulq %rbp
- cmpq %r9,%r15
- jne L$inner
- addq %rax,%r13
- adcq $0,%rdx
- addq %r10,%r13
- movq (%rsp,%r9,8),%r10
- adcq $0,%rdx
- movq %r13,-16(%rsp,%r9,8)
- movq %rdx,%r13
- xorq %rdx,%rdx
- addq %r11,%r13
- adcq $0,%rdx
- addq %r10,%r13
- adcq $0,%rdx
- movq %r13,-8(%rsp,%r9,8)
- movq %rdx,(%rsp,%r9,8)
- leaq 1(%r14),%r14
- cmpq %r9,%r14
- jb L$outer
- xorq %r14,%r14
- movq (%rsp),%rax
- leaq (%rsp),%rsi
- movq %r9,%r15
- jmp L$sub
- .p2align 4
- L$sub: sbbq (%rcx,%r14,8),%rax
- movq %rax,(%rdi,%r14,8)
- movq 8(%rsi,%r14,8),%rax
- leaq 1(%r14),%r14
- decq %r15
- jnz L$sub
- sbbq $0,%rax
- movq $-1,%rbx
- xorq %rax,%rbx
- xorq %r14,%r14
- movq %r9,%r15
- L$copy:
- movq (%rdi,%r14,8),%rcx
- movq (%rsp,%r14,8),%rdx
- andq %rbx,%rcx
- andq %rax,%rdx
- movq %r14,(%rsp,%r14,8)
- orq %rcx,%rdx
- movq %rdx,(%rdi,%r14,8)
- leaq 1(%r14),%r14
- subq $1,%r15
- jnz L$copy
- movq 8(%rsp,%r9,8),%rsi
- movq $1,%rax
- movq -48(%rsi),%r15
- movq -40(%rsi),%r14
- movq -32(%rsi),%r13
- movq -24(%rsi),%r12
- movq -16(%rsi),%rbp
- movq -8(%rsi),%rbx
- leaq (%rsi),%rsp
- L$mul_epilogue:
- .byte 0xf3,0xc3
- .p2align 5
- bn_mul4x_mont_gather5:
- .byte 0x67
- movq %rsp,%rax
- L$mul4x_enter:
- andl $0x80108,%r11d
- cmpl $0x80108,%r11d
- je L$mulx4x_enter
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- L$mul4x_prologue:
- .byte 0x67
- shll $3,%r9d
- leaq (%r9,%r9,2),%r10
- negq %r9
- leaq -320(%rsp,%r9,2),%r11
- movq %rsp,%rbp
- subq %rdi,%r11
- andq $4095,%r11
- cmpq %r11,%r10
- jb L$mul4xsp_alt
- subq %r11,%rbp
- leaq -320(%rbp,%r9,2),%rbp
- jmp L$mul4xsp_done
- .p2align 5
- L$mul4xsp_alt:
- leaq 4096-320(,%r9,2),%r10
- leaq -320(%rbp,%r9,2),%rbp
- subq %r10,%r11
- movq $0,%r10
- cmovcq %r10,%r11
- subq %r11,%rbp
- L$mul4xsp_done:
- andq $-64,%rbp
- movq %rsp,%r11
- subq %rbp,%r11
- andq $-4096,%r11
- leaq (%r11,%rbp,1),%rsp
- movq (%rsp),%r10
- cmpq %rbp,%rsp
- ja L$mul4x_page_walk
- jmp L$mul4x_page_walk_done
- L$mul4x_page_walk:
- leaq -4096(%rsp),%rsp
- movq (%rsp),%r10
- cmpq %rbp,%rsp
- ja L$mul4x_page_walk
- L$mul4x_page_walk_done:
- negq %r9
- movq %rax,40(%rsp)
- L$mul4x_body:
- call mul4x_internal
- movq 40(%rsp),%rsi
- movq $1,%rax
- movq -48(%rsi),%r15
- movq -40(%rsi),%r14
- movq -32(%rsi),%r13
- movq -24(%rsi),%r12
- movq -16(%rsi),%rbp
- movq -8(%rsi),%rbx
- leaq (%rsi),%rsp
- L$mul4x_epilogue:
- .byte 0xf3,0xc3
- .p2align 5
- mul4x_internal:
- shlq $5,%r9
- movd 8(%rax),%xmm5
- leaq L$inc(%rip),%rax
- leaq 128(%rdx,%r9,1),%r13
- shrq $5,%r9
- movdqa 0(%rax),%xmm0
- movdqa 16(%rax),%xmm1
- leaq 88-112(%rsp,%r9,1),%r10
- leaq 128(%rdx),%r12
- pshufd $0,%xmm5,%xmm5
- movdqa %xmm1,%xmm4
- .byte 0x67,0x67
- movdqa %xmm1,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- .byte 0x67
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,112(%r10)
- movdqa %xmm4,%xmm0
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,128(%r10)
- movdqa %xmm4,%xmm1
- paddd %xmm3,%xmm0
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,144(%r10)
- movdqa %xmm4,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm3,160(%r10)
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,176(%r10)
- movdqa %xmm4,%xmm0
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,192(%r10)
- movdqa %xmm4,%xmm1
- paddd %xmm3,%xmm0
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,208(%r10)
- movdqa %xmm4,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm3,224(%r10)
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,240(%r10)
- movdqa %xmm4,%xmm0
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,256(%r10)
- movdqa %xmm4,%xmm1
- paddd %xmm3,%xmm0
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,272(%r10)
- movdqa %xmm4,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm3,288(%r10)
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,304(%r10)
- paddd %xmm2,%xmm3
- .byte 0x67
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,320(%r10)
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,336(%r10)
- pand 64(%r12),%xmm0
- pand 80(%r12),%xmm1
- pand 96(%r12),%xmm2
- movdqa %xmm3,352(%r10)
- pand 112(%r12),%xmm3
- por %xmm2,%xmm0
- por %xmm3,%xmm1
- movdqa -128(%r12),%xmm4
- movdqa -112(%r12),%xmm5
- movdqa -96(%r12),%xmm2
- pand 112(%r10),%xmm4
- movdqa -80(%r12),%xmm3
- pand 128(%r10),%xmm5
- por %xmm4,%xmm0
- pand 144(%r10),%xmm2
- por %xmm5,%xmm1
- pand 160(%r10),%xmm3
- por %xmm2,%xmm0
- por %xmm3,%xmm1
- movdqa -64(%r12),%xmm4
- movdqa -48(%r12),%xmm5
- movdqa -32(%r12),%xmm2
- pand 176(%r10),%xmm4
- movdqa -16(%r12),%xmm3
- pand 192(%r10),%xmm5
- por %xmm4,%xmm0
- pand 208(%r10),%xmm2
- por %xmm5,%xmm1
- pand 224(%r10),%xmm3
- por %xmm2,%xmm0
- por %xmm3,%xmm1
- movdqa 0(%r12),%xmm4
- movdqa 16(%r12),%xmm5
- movdqa 32(%r12),%xmm2
- pand 240(%r10),%xmm4
- movdqa 48(%r12),%xmm3
- pand 256(%r10),%xmm5
- por %xmm4,%xmm0
- pand 272(%r10),%xmm2
- por %xmm5,%xmm1
- pand 288(%r10),%xmm3
- por %xmm2,%xmm0
- por %xmm3,%xmm1
- por %xmm1,%xmm0
- pshufd $0x4e,%xmm0,%xmm1
- por %xmm1,%xmm0
- leaq 256(%r12),%r12
- .byte 102,72,15,126,195
- movq %r13,16+8(%rsp)
- movq %rdi,56+8(%rsp)
- movq (%r8),%r8
- movq (%rsi),%rax
- leaq (%rsi,%r9,1),%rsi
- negq %r9
- movq %r8,%rbp
- mulq %rbx
- movq %rax,%r10
- movq (%rcx),%rax
- imulq %r10,%rbp
- leaq 64+8(%rsp),%r14
- movq %rdx,%r11
- mulq %rbp
- addq %rax,%r10
- movq 8(%rsi,%r9,1),%rax
- adcq $0,%rdx
- movq %rdx,%rdi
- mulq %rbx
- addq %rax,%r11
- movq 8(%rcx),%rax
- adcq $0,%rdx
- movq %rdx,%r10
- mulq %rbp
- addq %rax,%rdi
- movq 16(%rsi,%r9,1),%rax
- adcq $0,%rdx
- addq %r11,%rdi
- leaq 32(%r9),%r15
- leaq 32(%rcx),%rcx
- adcq $0,%rdx
- movq %rdi,(%r14)
- movq %rdx,%r13
- jmp L$1st4x
- .p2align 5
- L$1st4x:
- mulq %rbx
- addq %rax,%r10
- movq -16(%rcx),%rax
- leaq 32(%r14),%r14
- adcq $0,%rdx
- movq %rdx,%r11
- mulq %rbp
- addq %rax,%r13
- movq -8(%rsi,%r15,1),%rax
- adcq $0,%rdx
- addq %r10,%r13
- adcq $0,%rdx
- movq %r13,-24(%r14)
- movq %rdx,%rdi
- mulq %rbx
- addq %rax,%r11
- movq -8(%rcx),%rax
- adcq $0,%rdx
- movq %rdx,%r10
- mulq %rbp
- addq %rax,%rdi
- movq (%rsi,%r15,1),%rax
- adcq $0,%rdx
- addq %r11,%rdi
- adcq $0,%rdx
- movq %rdi,-16(%r14)
- movq %rdx,%r13
- mulq %rbx
- addq %rax,%r10
- movq 0(%rcx),%rax
- adcq $0,%rdx
- movq %rdx,%r11
- mulq %rbp
- addq %rax,%r13
- movq 8(%rsi,%r15,1),%rax
- adcq $0,%rdx
- addq %r10,%r13
- adcq $0,%rdx
- movq %r13,-8(%r14)
- movq %rdx,%rdi
- mulq %rbx
- addq %rax,%r11
- movq 8(%rcx),%rax
- adcq $0,%rdx
- movq %rdx,%r10
- mulq %rbp
- addq %rax,%rdi
- movq 16(%rsi,%r15,1),%rax
- adcq $0,%rdx
- addq %r11,%rdi
- leaq 32(%rcx),%rcx
- adcq $0,%rdx
- movq %rdi,(%r14)
- movq %rdx,%r13
- addq $32,%r15
- jnz L$1st4x
- mulq %rbx
- addq %rax,%r10
- movq -16(%rcx),%rax
- leaq 32(%r14),%r14
- adcq $0,%rdx
- movq %rdx,%r11
- mulq %rbp
- addq %rax,%r13
- movq -8(%rsi),%rax
- adcq $0,%rdx
- addq %r10,%r13
- adcq $0,%rdx
- movq %r13,-24(%r14)
- movq %rdx,%rdi
- mulq %rbx
- addq %rax,%r11
- movq -8(%rcx),%rax
- adcq $0,%rdx
- movq %rdx,%r10
- mulq %rbp
- addq %rax,%rdi
- movq (%rsi,%r9,1),%rax
- adcq $0,%rdx
- addq %r11,%rdi
- adcq $0,%rdx
- movq %rdi,-16(%r14)
- movq %rdx,%r13
- leaq (%rcx,%r9,1),%rcx
- xorq %rdi,%rdi
- addq %r10,%r13
- adcq $0,%rdi
- movq %r13,-8(%r14)
- jmp L$outer4x
- .p2align 5
- L$outer4x:
- leaq 16+128(%r14),%rdx
- pxor %xmm4,%xmm4
- pxor %xmm5,%xmm5
- movdqa -128(%r12),%xmm0
- movdqa -112(%r12),%xmm1
- movdqa -96(%r12),%xmm2
- movdqa -80(%r12),%xmm3
- pand -128(%rdx),%xmm0
- pand -112(%rdx),%xmm1
- por %xmm0,%xmm4
- pand -96(%rdx),%xmm2
- por %xmm1,%xmm5
- pand -80(%rdx),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- movdqa -64(%r12),%xmm0
- movdqa -48(%r12),%xmm1
- movdqa -32(%r12),%xmm2
- movdqa -16(%r12),%xmm3
- pand -64(%rdx),%xmm0
- pand -48(%rdx),%xmm1
- por %xmm0,%xmm4
- pand -32(%rdx),%xmm2
- por %xmm1,%xmm5
- pand -16(%rdx),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- movdqa 0(%r12),%xmm0
- movdqa 16(%r12),%xmm1
- movdqa 32(%r12),%xmm2
- movdqa 48(%r12),%xmm3
- pand 0(%rdx),%xmm0
- pand 16(%rdx),%xmm1
- por %xmm0,%xmm4
- pand 32(%rdx),%xmm2
- por %xmm1,%xmm5
- pand 48(%rdx),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- movdqa 64(%r12),%xmm0
- movdqa 80(%r12),%xmm1
- movdqa 96(%r12),%xmm2
- movdqa 112(%r12),%xmm3
- pand 64(%rdx),%xmm0
- pand 80(%rdx),%xmm1
- por %xmm0,%xmm4
- pand 96(%rdx),%xmm2
- por %xmm1,%xmm5
- pand 112(%rdx),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- por %xmm5,%xmm4
- pshufd $0x4e,%xmm4,%xmm0
- por %xmm4,%xmm0
- leaq 256(%r12),%r12
- .byte 102,72,15,126,195
- movq (%r14,%r9,1),%r10
- movq %r8,%rbp
- mulq %rbx
- addq %rax,%r10
- movq (%rcx),%rax
- adcq $0,%rdx
- imulq %r10,%rbp
- movq %rdx,%r11
- movq %rdi,(%r14)
- leaq (%r14,%r9,1),%r14
- mulq %rbp
- addq %rax,%r10
- movq 8(%rsi,%r9,1),%rax
- adcq $0,%rdx
- movq %rdx,%rdi
- mulq %rbx
- addq %rax,%r11
- movq 8(%rcx),%rax
- adcq $0,%rdx
- addq 8(%r14),%r11
- adcq $0,%rdx
- movq %rdx,%r10
- mulq %rbp
- addq %rax,%rdi
- movq 16(%rsi,%r9,1),%rax
- adcq $0,%rdx
- addq %r11,%rdi
- leaq 32(%r9),%r15
- leaq 32(%rcx),%rcx
- adcq $0,%rdx
- movq %rdx,%r13
- jmp L$inner4x
- .p2align 5
- L$inner4x:
- mulq %rbx
- addq %rax,%r10
- movq -16(%rcx),%rax
- adcq $0,%rdx
- addq 16(%r14),%r10
- leaq 32(%r14),%r14
- adcq $0,%rdx
- movq %rdx,%r11
- mulq %rbp
- addq %rax,%r13
- movq -8(%rsi,%r15,1),%rax
- adcq $0,%rdx
- addq %r10,%r13
- adcq $0,%rdx
- movq %rdi,-32(%r14)
- movq %rdx,%rdi
- mulq %rbx
- addq %rax,%r11
- movq -8(%rcx),%rax
- adcq $0,%rdx
- addq -8(%r14),%r11
- adcq $0,%rdx
- movq %rdx,%r10
- mulq %rbp
- addq %rax,%rdi
- movq (%rsi,%r15,1),%rax
- adcq $0,%rdx
- addq %r11,%rdi
- adcq $0,%rdx
- movq %r13,-24(%r14)
- movq %rdx,%r13
- mulq %rbx
- addq %rax,%r10
- movq 0(%rcx),%rax
- adcq $0,%rdx
- addq (%r14),%r10
- adcq $0,%rdx
- movq %rdx,%r11
- mulq %rbp
- addq %rax,%r13
- movq 8(%rsi,%r15,1),%rax
- adcq $0,%rdx
- addq %r10,%r13
- adcq $0,%rdx
- movq %rdi,-16(%r14)
- movq %rdx,%rdi
- mulq %rbx
- addq %rax,%r11
- movq 8(%rcx),%rax
- adcq $0,%rdx
- addq 8(%r14),%r11
- adcq $0,%rdx
- movq %rdx,%r10
- mulq %rbp
- addq %rax,%rdi
- movq 16(%rsi,%r15,1),%rax
- adcq $0,%rdx
- addq %r11,%rdi
- leaq 32(%rcx),%rcx
- adcq $0,%rdx
- movq %r13,-8(%r14)
- movq %rdx,%r13
- addq $32,%r15
- jnz L$inner4x
- mulq %rbx
- addq %rax,%r10
- movq -16(%rcx),%rax
- adcq $0,%rdx
- addq 16(%r14),%r10
- leaq 32(%r14),%r14
- adcq $0,%rdx
- movq %rdx,%r11
- mulq %rbp
- addq %rax,%r13
- movq -8(%rsi),%rax
- adcq $0,%rdx
- addq %r10,%r13
- adcq $0,%rdx
- movq %rdi,-32(%r14)
- movq %rdx,%rdi
- mulq %rbx
- addq %rax,%r11
- movq %rbp,%rax
- movq -8(%rcx),%rbp
- adcq $0,%rdx
- addq -8(%r14),%r11
- adcq $0,%rdx
- movq %rdx,%r10
- mulq %rbp
- addq %rax,%rdi
- movq (%rsi,%r9,1),%rax
- adcq $0,%rdx
- addq %r11,%rdi
- adcq $0,%rdx
- movq %r13,-24(%r14)
- movq %rdx,%r13
- movq %rdi,-16(%r14)
- leaq (%rcx,%r9,1),%rcx
- xorq %rdi,%rdi
- addq %r10,%r13
- adcq $0,%rdi
- addq (%r14),%r13
- adcq $0,%rdi
- movq %r13,-8(%r14)
- cmpq 16+8(%rsp),%r12
- jb L$outer4x
- xorq %rax,%rax
- subq %r13,%rbp
- adcq %r15,%r15
- orq %r15,%rdi
- subq %rdi,%rax
- leaq (%r14,%r9,1),%rbx
- movq (%rcx),%r12
- leaq (%rcx),%rbp
- movq %r9,%rcx
- sarq $3+2,%rcx
- movq 56+8(%rsp),%rdi
- decq %r12
- xorq %r10,%r10
- movq 8(%rbp),%r13
- movq 16(%rbp),%r14
- movq 24(%rbp),%r15
- jmp L$sqr4x_sub_entry
- .globl _bn_power5
- .p2align 5
- _bn_power5:
- movq %rsp,%rax
- movl _OPENSSL_ia32cap_P+8(%rip),%r11d
- andl $0x80108,%r11d
- cmpl $0x80108,%r11d
- je L$powerx5_enter
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- L$power5_prologue:
- shll $3,%r9d
- leal (%r9,%r9,2),%r10d
- negq %r9
- movq (%r8),%r8
- leaq -320(%rsp,%r9,2),%r11
- movq %rsp,%rbp
- subq %rdi,%r11
- andq $4095,%r11
- cmpq %r11,%r10
- jb L$pwr_sp_alt
- subq %r11,%rbp
- leaq -320(%rbp,%r9,2),%rbp
- jmp L$pwr_sp_done
- .p2align 5
- L$pwr_sp_alt:
- leaq 4096-320(,%r9,2),%r10
- leaq -320(%rbp,%r9,2),%rbp
- subq %r10,%r11
- movq $0,%r10
- cmovcq %r10,%r11
- subq %r11,%rbp
- L$pwr_sp_done:
- andq $-64,%rbp
- movq %rsp,%r11
- subq %rbp,%r11
- andq $-4096,%r11
- leaq (%r11,%rbp,1),%rsp
- movq (%rsp),%r10
- cmpq %rbp,%rsp
- ja L$pwr_page_walk
- jmp L$pwr_page_walk_done
- L$pwr_page_walk:
- leaq -4096(%rsp),%rsp
- movq (%rsp),%r10
- cmpq %rbp,%rsp
- ja L$pwr_page_walk
- L$pwr_page_walk_done:
- movq %r9,%r10
- negq %r9
- movq %r8,32(%rsp)
- movq %rax,40(%rsp)
- L$power5_body:
- .byte 102,72,15,110,207
- .byte 102,72,15,110,209
- .byte 102,73,15,110,218
- .byte 102,72,15,110,226
- call __bn_sqr8x_internal
- call __bn_post4x_internal
- call __bn_sqr8x_internal
- call __bn_post4x_internal
- call __bn_sqr8x_internal
- call __bn_post4x_internal
- call __bn_sqr8x_internal
- call __bn_post4x_internal
- call __bn_sqr8x_internal
- call __bn_post4x_internal
- .byte 102,72,15,126,209
- .byte 102,72,15,126,226
- movq %rsi,%rdi
- movq 40(%rsp),%rax
- leaq 32(%rsp),%r8
- call mul4x_internal
- movq 40(%rsp),%rsi
- movq $1,%rax
- movq -48(%rsi),%r15
- movq -40(%rsi),%r14
- movq -32(%rsi),%r13
- movq -24(%rsi),%r12
- movq -16(%rsi),%rbp
- movq -8(%rsi),%rbx
- leaq (%rsi),%rsp
- L$power5_epilogue:
- .byte 0xf3,0xc3
- .globl _bn_sqr8x_internal
- .private_extern _bn_sqr8x_internal
- .p2align 5
- _bn_sqr8x_internal:
- __bn_sqr8x_internal:
- leaq 32(%r10),%rbp
- leaq (%rsi,%r9,1),%rsi
- movq %r9,%rcx
- movq -32(%rsi,%rbp,1),%r14
- leaq 48+8(%rsp,%r9,2),%rdi
- movq -24(%rsi,%rbp,1),%rax
- leaq -32(%rdi,%rbp,1),%rdi
- movq -16(%rsi,%rbp,1),%rbx
- movq %rax,%r15
- mulq %r14
- movq %rax,%r10
- movq %rbx,%rax
- movq %rdx,%r11
- movq %r10,-24(%rdi,%rbp,1)
- mulq %r14
- addq %rax,%r11
- movq %rbx,%rax
- adcq $0,%rdx
- movq %r11,-16(%rdi,%rbp,1)
- movq %rdx,%r10
- movq -8(%rsi,%rbp,1),%rbx
- mulq %r15
- movq %rax,%r12
- movq %rbx,%rax
- movq %rdx,%r13
- leaq (%rbp),%rcx
- mulq %r14
- addq %rax,%r10
- movq %rbx,%rax
- movq %rdx,%r11
- adcq $0,%r11
- addq %r12,%r10
- adcq $0,%r11
- movq %r10,-8(%rdi,%rcx,1)
- jmp L$sqr4x_1st
- .p2align 5
- L$sqr4x_1st:
- movq (%rsi,%rcx,1),%rbx
- mulq %r15
- addq %rax,%r13
- movq %rbx,%rax
- movq %rdx,%r12
- adcq $0,%r12
- mulq %r14
- addq %rax,%r11
- movq %rbx,%rax
- movq 8(%rsi,%rcx,1),%rbx
- movq %rdx,%r10
- adcq $0,%r10
- addq %r13,%r11
- adcq $0,%r10
- mulq %r15
- addq %rax,%r12
- movq %rbx,%rax
- movq %r11,(%rdi,%rcx,1)
- movq %rdx,%r13
- adcq $0,%r13
- mulq %r14
- addq %rax,%r10
- movq %rbx,%rax
- movq 16(%rsi,%rcx,1),%rbx
- movq %rdx,%r11
- adcq $0,%r11
- addq %r12,%r10
- adcq $0,%r11
- mulq %r15
- addq %rax,%r13
- movq %rbx,%rax
- movq %r10,8(%rdi,%rcx,1)
- movq %rdx,%r12
- adcq $0,%r12
- mulq %r14
- addq %rax,%r11
- movq %rbx,%rax
- movq 24(%rsi,%rcx,1),%rbx
- movq %rdx,%r10
- adcq $0,%r10
- addq %r13,%r11
- adcq $0,%r10
- mulq %r15
- addq %rax,%r12
- movq %rbx,%rax
- movq %r11,16(%rdi,%rcx,1)
- movq %rdx,%r13
- adcq $0,%r13
- leaq 32(%rcx),%rcx
- mulq %r14
- addq %rax,%r10
- movq %rbx,%rax
- movq %rdx,%r11
- adcq $0,%r11
- addq %r12,%r10
- adcq $0,%r11
- movq %r10,-8(%rdi,%rcx,1)
- cmpq $0,%rcx
- jne L$sqr4x_1st
- mulq %r15
- addq %rax,%r13
- leaq 16(%rbp),%rbp
- adcq $0,%rdx
- addq %r11,%r13
- adcq $0,%rdx
- movq %r13,(%rdi)
- movq %rdx,%r12
- movq %rdx,8(%rdi)
- jmp L$sqr4x_outer
- .p2align 5
- L$sqr4x_outer:
- movq -32(%rsi,%rbp,1),%r14
- leaq 48+8(%rsp,%r9,2),%rdi
- movq -24(%rsi,%rbp,1),%rax
- leaq -32(%rdi,%rbp,1),%rdi
- movq -16(%rsi,%rbp,1),%rbx
- movq %rax,%r15
- mulq %r14
- movq -24(%rdi,%rbp,1),%r10
- addq %rax,%r10
- movq %rbx,%rax
- adcq $0,%rdx
- movq %r10,-24(%rdi,%rbp,1)
- movq %rdx,%r11
- mulq %r14
- addq %rax,%r11
- movq %rbx,%rax
- adcq $0,%rdx
- addq -16(%rdi,%rbp,1),%r11
- movq %rdx,%r10
- adcq $0,%r10
- movq %r11,-16(%rdi,%rbp,1)
- xorq %r12,%r12
- movq -8(%rsi,%rbp,1),%rbx
- mulq %r15
- addq %rax,%r12
- movq %rbx,%rax
- adcq $0,%rdx
- addq -8(%rdi,%rbp,1),%r12
- movq %rdx,%r13
- adcq $0,%r13
- mulq %r14
- addq %rax,%r10
- movq %rbx,%rax
- adcq $0,%rdx
- addq %r12,%r10
- movq %rdx,%r11
- adcq $0,%r11
- movq %r10,-8(%rdi,%rbp,1)
- leaq (%rbp),%rcx
- jmp L$sqr4x_inner
- .p2align 5
- L$sqr4x_inner:
- movq (%rsi,%rcx,1),%rbx
- mulq %r15
- addq %rax,%r13
- movq %rbx,%rax
- movq %rdx,%r12
- adcq $0,%r12
- addq (%rdi,%rcx,1),%r13
- adcq $0,%r12
- .byte 0x67
- mulq %r14
- addq %rax,%r11
- movq %rbx,%rax
- movq 8(%rsi,%rcx,1),%rbx
- movq %rdx,%r10
- adcq $0,%r10
- addq %r13,%r11
- adcq $0,%r10
- mulq %r15
- addq %rax,%r12
- movq %r11,(%rdi,%rcx,1)
- movq %rbx,%rax
- movq %rdx,%r13
- adcq $0,%r13
- addq 8(%rdi,%rcx,1),%r12
- leaq 16(%rcx),%rcx
- adcq $0,%r13
- mulq %r14
- addq %rax,%r10
- movq %rbx,%rax
- adcq $0,%rdx
- addq %r12,%r10
- movq %rdx,%r11
- adcq $0,%r11
- movq %r10,-8(%rdi,%rcx,1)
- cmpq $0,%rcx
- jne L$sqr4x_inner
- .byte 0x67
- mulq %r15
- addq %rax,%r13
- adcq $0,%rdx
- addq %r11,%r13
- adcq $0,%rdx
- movq %r13,(%rdi)
- movq %rdx,%r12
- movq %rdx,8(%rdi)
- addq $16,%rbp
- jnz L$sqr4x_outer
- movq -32(%rsi),%r14
- leaq 48+8(%rsp,%r9,2),%rdi
- movq -24(%rsi),%rax
- leaq -32(%rdi,%rbp,1),%rdi
- movq -16(%rsi),%rbx
- movq %rax,%r15
- mulq %r14
- addq %rax,%r10
- movq %rbx,%rax
- movq %rdx,%r11
- adcq $0,%r11
- mulq %r14
- addq %rax,%r11
- movq %rbx,%rax
- movq %r10,-24(%rdi)
- movq %rdx,%r10
- adcq $0,%r10
- addq %r13,%r11
- movq -8(%rsi),%rbx
- adcq $0,%r10
- mulq %r15
- addq %rax,%r12
- movq %rbx,%rax
- movq %r11,-16(%rdi)
- movq %rdx,%r13
- adcq $0,%r13
- mulq %r14
- addq %rax,%r10
- movq %rbx,%rax
- movq %rdx,%r11
- adcq $0,%r11
- addq %r12,%r10
- adcq $0,%r11
- movq %r10,-8(%rdi)
- mulq %r15
- addq %rax,%r13
- movq -16(%rsi),%rax
- adcq $0,%rdx
- addq %r11,%r13
- adcq $0,%rdx
- movq %r13,(%rdi)
- movq %rdx,%r12
- movq %rdx,8(%rdi)
- mulq %rbx
- addq $16,%rbp
- xorq %r14,%r14
- subq %r9,%rbp
- xorq %r15,%r15
- addq %r12,%rax
- adcq $0,%rdx
- movq %rax,8(%rdi)
- movq %rdx,16(%rdi)
- movq %r15,24(%rdi)
- movq -16(%rsi,%rbp,1),%rax
- leaq 48+8(%rsp),%rdi
- xorq %r10,%r10
- movq 8(%rdi),%r11
- leaq (%r14,%r10,2),%r12
- shrq $63,%r10
- leaq (%rcx,%r11,2),%r13
- shrq $63,%r11
- orq %r10,%r13
- movq 16(%rdi),%r10
- movq %r11,%r14
- mulq %rax
- negq %r15
- movq 24(%rdi),%r11
- adcq %rax,%r12
- movq -8(%rsi,%rbp,1),%rax
- movq %r12,(%rdi)
- adcq %rdx,%r13
- leaq (%r14,%r10,2),%rbx
- movq %r13,8(%rdi)
- sbbq %r15,%r15
- shrq $63,%r10
- leaq (%rcx,%r11,2),%r8
- shrq $63,%r11
- orq %r10,%r8
- movq 32(%rdi),%r10
- movq %r11,%r14
- mulq %rax
- negq %r15
- movq 40(%rdi),%r11
- adcq %rax,%rbx
- movq 0(%rsi,%rbp,1),%rax
- movq %rbx,16(%rdi)
- adcq %rdx,%r8
- leaq 16(%rbp),%rbp
- movq %r8,24(%rdi)
- sbbq %r15,%r15
- leaq 64(%rdi),%rdi
- jmp L$sqr4x_shift_n_add
- .p2align 5
- L$sqr4x_shift_n_add:
- leaq (%r14,%r10,2),%r12
- shrq $63,%r10
- leaq (%rcx,%r11,2),%r13
- shrq $63,%r11
- orq %r10,%r13
- movq -16(%rdi),%r10
- movq %r11,%r14
- mulq %rax
- negq %r15
- movq -8(%rdi),%r11
- adcq %rax,%r12
- movq -8(%rsi,%rbp,1),%rax
- movq %r12,-32(%rdi)
- adcq %rdx,%r13
- leaq (%r14,%r10,2),%rbx
- movq %r13,-24(%rdi)
- sbbq %r15,%r15
- shrq $63,%r10
- leaq (%rcx,%r11,2),%r8
- shrq $63,%r11
- orq %r10,%r8
- movq 0(%rdi),%r10
- movq %r11,%r14
- mulq %rax
- negq %r15
- movq 8(%rdi),%r11
- adcq %rax,%rbx
- movq 0(%rsi,%rbp,1),%rax
- movq %rbx,-16(%rdi)
- adcq %rdx,%r8
- leaq (%r14,%r10,2),%r12
- movq %r8,-8(%rdi)
- sbbq %r15,%r15
- shrq $63,%r10
- leaq (%rcx,%r11,2),%r13
- shrq $63,%r11
- orq %r10,%r13
- movq 16(%rdi),%r10
- movq %r11,%r14
- mulq %rax
- negq %r15
- movq 24(%rdi),%r11
- adcq %rax,%r12
- movq 8(%rsi,%rbp,1),%rax
- movq %r12,0(%rdi)
- adcq %rdx,%r13
- leaq (%r14,%r10,2),%rbx
- movq %r13,8(%rdi)
- sbbq %r15,%r15
- shrq $63,%r10
- leaq (%rcx,%r11,2),%r8
- shrq $63,%r11
- orq %r10,%r8
- movq 32(%rdi),%r10
- movq %r11,%r14
- mulq %rax
- negq %r15
- movq 40(%rdi),%r11
- adcq %rax,%rbx
- movq 16(%rsi,%rbp,1),%rax
- movq %rbx,16(%rdi)
- adcq %rdx,%r8
- movq %r8,24(%rdi)
- sbbq %r15,%r15
- leaq 64(%rdi),%rdi
- addq $32,%rbp
- jnz L$sqr4x_shift_n_add
- leaq (%r14,%r10,2),%r12
- .byte 0x67
- shrq $63,%r10
- leaq (%rcx,%r11,2),%r13
- shrq $63,%r11
- orq %r10,%r13
- movq -16(%rdi),%r10
- movq %r11,%r14
- mulq %rax
- negq %r15
- movq -8(%rdi),%r11
- adcq %rax,%r12
- movq -8(%rsi),%rax
- movq %r12,-32(%rdi)
- adcq %rdx,%r13
- leaq (%r14,%r10,2),%rbx
- movq %r13,-24(%rdi)
- sbbq %r15,%r15
- shrq $63,%r10
- leaq (%rcx,%r11,2),%r8
- shrq $63,%r11
- orq %r10,%r8
- mulq %rax
- negq %r15
- adcq %rax,%rbx
- adcq %rdx,%r8
- movq %rbx,-16(%rdi)
- movq %r8,-8(%rdi)
- .byte 102,72,15,126,213
- __bn_sqr8x_reduction:
- xorq %rax,%rax
- leaq (%r9,%rbp,1),%rcx
- leaq 48+8(%rsp,%r9,2),%rdx
- movq %rcx,0+8(%rsp)
- leaq 48+8(%rsp,%r9,1),%rdi
- movq %rdx,8+8(%rsp)
- negq %r9
- jmp L$8x_reduction_loop
- .p2align 5
- L$8x_reduction_loop:
- leaq (%rdi,%r9,1),%rdi
- .byte 0x66
- movq 0(%rdi),%rbx
- movq 8(%rdi),%r9
- movq 16(%rdi),%r10
- movq 24(%rdi),%r11
- movq 32(%rdi),%r12
- movq 40(%rdi),%r13
- movq 48(%rdi),%r14
- movq 56(%rdi),%r15
- movq %rax,(%rdx)
- leaq 64(%rdi),%rdi
- .byte 0x67
- movq %rbx,%r8
- imulq 32+8(%rsp),%rbx
- movq 0(%rbp),%rax
- movl $8,%ecx
- jmp L$8x_reduce
- .p2align 5
- L$8x_reduce:
- mulq %rbx
- movq 8(%rbp),%rax
- negq %r8
- movq %rdx,%r8
- adcq $0,%r8
- mulq %rbx
- addq %rax,%r9
- movq 16(%rbp),%rax
- adcq $0,%rdx
- addq %r9,%r8
- movq %rbx,48-8+8(%rsp,%rcx,8)
- movq %rdx,%r9
- adcq $0,%r9
- mulq %rbx
- addq %rax,%r10
- movq 24(%rbp),%rax
- adcq $0,%rdx
- addq %r10,%r9
- movq 32+8(%rsp),%rsi
- movq %rdx,%r10
- adcq $0,%r10
- mulq %rbx
- addq %rax,%r11
- movq 32(%rbp),%rax
- adcq $0,%rdx
- imulq %r8,%rsi
- addq %r11,%r10
- movq %rdx,%r11
- adcq $0,%r11
- mulq %rbx
- addq %rax,%r12
- movq 40(%rbp),%rax
- adcq $0,%rdx
- addq %r12,%r11
- movq %rdx,%r12
- adcq $0,%r12
- mulq %rbx
- addq %rax,%r13
- movq 48(%rbp),%rax
- adcq $0,%rdx
- addq %r13,%r12
- movq %rdx,%r13
- adcq $0,%r13
- mulq %rbx
- addq %rax,%r14
- movq 56(%rbp),%rax
- adcq $0,%rdx
- addq %r14,%r13
- movq %rdx,%r14
- adcq $0,%r14
- mulq %rbx
- movq %rsi,%rbx
- addq %rax,%r15
- movq 0(%rbp),%rax
- adcq $0,%rdx
- addq %r15,%r14
- movq %rdx,%r15
- adcq $0,%r15
- decl %ecx
- jnz L$8x_reduce
- leaq 64(%rbp),%rbp
- xorq %rax,%rax
- movq 8+8(%rsp),%rdx
- cmpq 0+8(%rsp),%rbp
- jae L$8x_no_tail
- .byte 0x66
- addq 0(%rdi),%r8
- adcq 8(%rdi),%r9
- adcq 16(%rdi),%r10
- adcq 24(%rdi),%r11
- adcq 32(%rdi),%r12
- adcq 40(%rdi),%r13
- adcq 48(%rdi),%r14
- adcq 56(%rdi),%r15
- sbbq %rsi,%rsi
- movq 48+56+8(%rsp),%rbx
- movl $8,%ecx
- movq 0(%rbp),%rax
- jmp L$8x_tail
- .p2align 5
- L$8x_tail:
- mulq %rbx
- addq %rax,%r8
- movq 8(%rbp),%rax
- movq %r8,(%rdi)
- movq %rdx,%r8
- adcq $0,%r8
- mulq %rbx
- addq %rax,%r9
- movq 16(%rbp),%rax
- adcq $0,%rdx
- addq %r9,%r8
- leaq 8(%rdi),%rdi
- movq %rdx,%r9
- adcq $0,%r9
- mulq %rbx
- addq %rax,%r10
- movq 24(%rbp),%rax
- adcq $0,%rdx
- addq %r10,%r9
- movq %rdx,%r10
- adcq $0,%r10
- mulq %rbx
- addq %rax,%r11
- movq 32(%rbp),%rax
- adcq $0,%rdx
- addq %r11,%r10
- movq %rdx,%r11
- adcq $0,%r11
- mulq %rbx
- addq %rax,%r12
- movq 40(%rbp),%rax
- adcq $0,%rdx
- addq %r12,%r11
- movq %rdx,%r12
- adcq $0,%r12
- mulq %rbx
- addq %rax,%r13
- movq 48(%rbp),%rax
- adcq $0,%rdx
- addq %r13,%r12
- movq %rdx,%r13
- adcq $0,%r13
- mulq %rbx
- addq %rax,%r14
- movq 56(%rbp),%rax
- adcq $0,%rdx
- addq %r14,%r13
- movq %rdx,%r14
- adcq $0,%r14
- mulq %rbx
- movq 48-16+8(%rsp,%rcx,8),%rbx
- addq %rax,%r15
- adcq $0,%rdx
- addq %r15,%r14
- movq 0(%rbp),%rax
- movq %rdx,%r15
- adcq $0,%r15
- decl %ecx
- jnz L$8x_tail
- leaq 64(%rbp),%rbp
- movq 8+8(%rsp),%rdx
- cmpq 0+8(%rsp),%rbp
- jae L$8x_tail_done
- movq 48+56+8(%rsp),%rbx
- negq %rsi
- movq 0(%rbp),%rax
- adcq 0(%rdi),%r8
- adcq 8(%rdi),%r9
- adcq 16(%rdi),%r10
- adcq 24(%rdi),%r11
- adcq 32(%rdi),%r12
- adcq 40(%rdi),%r13
- adcq 48(%rdi),%r14
- adcq 56(%rdi),%r15
- sbbq %rsi,%rsi
- movl $8,%ecx
- jmp L$8x_tail
- .p2align 5
- L$8x_tail_done:
- xorq %rax,%rax
- addq (%rdx),%r8
- adcq $0,%r9
- adcq $0,%r10
- adcq $0,%r11
- adcq $0,%r12
- adcq $0,%r13
- adcq $0,%r14
- adcq $0,%r15
- adcq $0,%rax
- negq %rsi
- L$8x_no_tail:
- adcq 0(%rdi),%r8
- adcq 8(%rdi),%r9
- adcq 16(%rdi),%r10
- adcq 24(%rdi),%r11
- adcq 32(%rdi),%r12
- adcq 40(%rdi),%r13
- adcq 48(%rdi),%r14
- adcq 56(%rdi),%r15
- adcq $0,%rax
- movq -8(%rbp),%rcx
- xorq %rsi,%rsi
- .byte 102,72,15,126,213
- movq %r8,0(%rdi)
- movq %r9,8(%rdi)
- .byte 102,73,15,126,217
- movq %r10,16(%rdi)
- movq %r11,24(%rdi)
- movq %r12,32(%rdi)
- movq %r13,40(%rdi)
- movq %r14,48(%rdi)
- movq %r15,56(%rdi)
- leaq 64(%rdi),%rdi
- cmpq %rdx,%rdi
- jb L$8x_reduction_loop
- .byte 0xf3,0xc3
- .p2align 5
- __bn_post4x_internal:
- movq 0(%rbp),%r12
- leaq (%rdi,%r9,1),%rbx
- movq %r9,%rcx
- .byte 102,72,15,126,207
- negq %rax
- .byte 102,72,15,126,206
- sarq $3+2,%rcx
- decq %r12
- xorq %r10,%r10
- movq 8(%rbp),%r13
- movq 16(%rbp),%r14
- movq 24(%rbp),%r15
- jmp L$sqr4x_sub_entry
- .p2align 4
- L$sqr4x_sub:
- movq 0(%rbp),%r12
- movq 8(%rbp),%r13
- movq 16(%rbp),%r14
- movq 24(%rbp),%r15
- L$sqr4x_sub_entry:
- leaq 32(%rbp),%rbp
- notq %r12
- notq %r13
- notq %r14
- notq %r15
- andq %rax,%r12
- andq %rax,%r13
- andq %rax,%r14
- andq %rax,%r15
- negq %r10
- adcq 0(%rbx),%r12
- adcq 8(%rbx),%r13
- adcq 16(%rbx),%r14
- adcq 24(%rbx),%r15
- movq %r12,0(%rdi)
- leaq 32(%rbx),%rbx
- movq %r13,8(%rdi)
- sbbq %r10,%r10
- movq %r14,16(%rdi)
- movq %r15,24(%rdi)
- leaq 32(%rdi),%rdi
- incq %rcx
- jnz L$sqr4x_sub
- movq %r9,%r10
- negq %r9
- .byte 0xf3,0xc3
- .p2align 5
- bn_mulx4x_mont_gather5:
- movq %rsp,%rax
- L$mulx4x_enter:
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- L$mulx4x_prologue:
- shll $3,%r9d
- leaq (%r9,%r9,2),%r10
- negq %r9
- movq (%r8),%r8
- leaq -320(%rsp,%r9,2),%r11
- movq %rsp,%rbp
- subq %rdi,%r11
- andq $4095,%r11
- cmpq %r11,%r10
- jb L$mulx4xsp_alt
- subq %r11,%rbp
- leaq -320(%rbp,%r9,2),%rbp
- jmp L$mulx4xsp_done
- L$mulx4xsp_alt:
- leaq 4096-320(,%r9,2),%r10
- leaq -320(%rbp,%r9,2),%rbp
- subq %r10,%r11
- movq $0,%r10
- cmovcq %r10,%r11
- subq %r11,%rbp
- L$mulx4xsp_done:
- andq $-64,%rbp
- movq %rsp,%r11
- subq %rbp,%r11
- andq $-4096,%r11
- leaq (%r11,%rbp,1),%rsp
- movq (%rsp),%r10
- cmpq %rbp,%rsp
- ja L$mulx4x_page_walk
- jmp L$mulx4x_page_walk_done
- L$mulx4x_page_walk:
- leaq -4096(%rsp),%rsp
- movq (%rsp),%r10
- cmpq %rbp,%rsp
- ja L$mulx4x_page_walk
- L$mulx4x_page_walk_done:
- movq %r8,32(%rsp)
- movq %rax,40(%rsp)
- L$mulx4x_body:
- call mulx4x_internal
- movq 40(%rsp),%rsi
- movq $1,%rax
- movq -48(%rsi),%r15
- movq -40(%rsi),%r14
- movq -32(%rsi),%r13
- movq -24(%rsi),%r12
- movq -16(%rsi),%rbp
- movq -8(%rsi),%rbx
- leaq (%rsi),%rsp
- L$mulx4x_epilogue:
- .byte 0xf3,0xc3
- .p2align 5
- mulx4x_internal:
- movq %r9,8(%rsp)
- movq %r9,%r10
- negq %r9
- shlq $5,%r9
- negq %r10
- leaq 128(%rdx,%r9,1),%r13
- shrq $5+5,%r9
- movd 8(%rax),%xmm5
- subq $1,%r9
- leaq L$inc(%rip),%rax
- movq %r13,16+8(%rsp)
- movq %r9,24+8(%rsp)
- movq %rdi,56+8(%rsp)
- movdqa 0(%rax),%xmm0
- movdqa 16(%rax),%xmm1
- leaq 88-112(%rsp,%r10,1),%r10
- leaq 128(%rdx),%rdi
- pshufd $0,%xmm5,%xmm5
- movdqa %xmm1,%xmm4
- .byte 0x67
- movdqa %xmm1,%xmm2
- .byte 0x67
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,112(%r10)
- movdqa %xmm4,%xmm0
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,128(%r10)
- movdqa %xmm4,%xmm1
- paddd %xmm3,%xmm0
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,144(%r10)
- movdqa %xmm4,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm3,160(%r10)
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,176(%r10)
- movdqa %xmm4,%xmm0
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,192(%r10)
- movdqa %xmm4,%xmm1
- paddd %xmm3,%xmm0
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,208(%r10)
- movdqa %xmm4,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm3,224(%r10)
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,240(%r10)
- movdqa %xmm4,%xmm0
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,256(%r10)
- movdqa %xmm4,%xmm1
- paddd %xmm3,%xmm0
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,272(%r10)
- movdqa %xmm4,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm3,288(%r10)
- movdqa %xmm4,%xmm3
- .byte 0x67
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,304(%r10)
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,320(%r10)
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,336(%r10)
- pand 64(%rdi),%xmm0
- pand 80(%rdi),%xmm1
- pand 96(%rdi),%xmm2
- movdqa %xmm3,352(%r10)
- pand 112(%rdi),%xmm3
- por %xmm2,%xmm0
- por %xmm3,%xmm1
- movdqa -128(%rdi),%xmm4
- movdqa -112(%rdi),%xmm5
- movdqa -96(%rdi),%xmm2
- pand 112(%r10),%xmm4
- movdqa -80(%rdi),%xmm3
- pand 128(%r10),%xmm5
- por %xmm4,%xmm0
- pand 144(%r10),%xmm2
- por %xmm5,%xmm1
- pand 160(%r10),%xmm3
- por %xmm2,%xmm0
- por %xmm3,%xmm1
- movdqa -64(%rdi),%xmm4
- movdqa -48(%rdi),%xmm5
- movdqa -32(%rdi),%xmm2
- pand 176(%r10),%xmm4
- movdqa -16(%rdi),%xmm3
- pand 192(%r10),%xmm5
- por %xmm4,%xmm0
- pand 208(%r10),%xmm2
- por %xmm5,%xmm1
- pand 224(%r10),%xmm3
- por %xmm2,%xmm0
- por %xmm3,%xmm1
- movdqa 0(%rdi),%xmm4
- movdqa 16(%rdi),%xmm5
- movdqa 32(%rdi),%xmm2
- pand 240(%r10),%xmm4
- movdqa 48(%rdi),%xmm3
- pand 256(%r10),%xmm5
- por %xmm4,%xmm0
- pand 272(%r10),%xmm2
- por %xmm5,%xmm1
- pand 288(%r10),%xmm3
- por %xmm2,%xmm0
- por %xmm3,%xmm1
- pxor %xmm1,%xmm0
- pshufd $0x4e,%xmm0,%xmm1
- por %xmm1,%xmm0
- leaq 256(%rdi),%rdi
- .byte 102,72,15,126,194
- leaq 64+32+8(%rsp),%rbx
- movq %rdx,%r9
- mulxq 0(%rsi),%r8,%rax
- mulxq 8(%rsi),%r11,%r12
- addq %rax,%r11
- mulxq 16(%rsi),%rax,%r13
- adcq %rax,%r12
- adcq $0,%r13
- mulxq 24(%rsi),%rax,%r14
- movq %r8,%r15
- imulq 32+8(%rsp),%r8
- xorq %rbp,%rbp
- movq %r8,%rdx
- movq %rdi,8+8(%rsp)
- leaq 32(%rsi),%rsi
- adcxq %rax,%r13
- adcxq %rbp,%r14
- mulxq 0(%rcx),%rax,%r10
- adcxq %rax,%r15
- adoxq %r11,%r10
- mulxq 8(%rcx),%rax,%r11
- adcxq %rax,%r10
- adoxq %r12,%r11
- mulxq 16(%rcx),%rax,%r12
- movq 24+8(%rsp),%rdi
- movq %r10,-32(%rbx)
- adcxq %rax,%r11
- adoxq %r13,%r12
- mulxq 24(%rcx),%rax,%r15
- movq %r9,%rdx
- movq %r11,-24(%rbx)
- adcxq %rax,%r12
- adoxq %rbp,%r15
- leaq 32(%rcx),%rcx
- movq %r12,-16(%rbx)
- jmp L$mulx4x_1st
- .p2align 5
- L$mulx4x_1st:
- adcxq %rbp,%r15
- mulxq 0(%rsi),%r10,%rax
- adcxq %r14,%r10
- mulxq 8(%rsi),%r11,%r14
- adcxq %rax,%r11
- mulxq 16(%rsi),%r12,%rax
- adcxq %r14,%r12
- mulxq 24(%rsi),%r13,%r14
- .byte 0x67,0x67
- movq %r8,%rdx
- adcxq %rax,%r13
- adcxq %rbp,%r14
- leaq 32(%rsi),%rsi
- leaq 32(%rbx),%rbx
- adoxq %r15,%r10
- mulxq 0(%rcx),%rax,%r15
- adcxq %rax,%r10
- adoxq %r15,%r11
- mulxq 8(%rcx),%rax,%r15
- adcxq %rax,%r11
- adoxq %r15,%r12
- mulxq 16(%rcx),%rax,%r15
- movq %r10,-40(%rbx)
- adcxq %rax,%r12
- movq %r11,-32(%rbx)
- adoxq %r15,%r13
- mulxq 24(%rcx),%rax,%r15
- movq %r9,%rdx
- movq %r12,-24(%rbx)
- adcxq %rax,%r13
- adoxq %rbp,%r15
- leaq 32(%rcx),%rcx
- movq %r13,-16(%rbx)
- decq %rdi
- jnz L$mulx4x_1st
- movq 8(%rsp),%rax
- adcq %rbp,%r15
- leaq (%rsi,%rax,1),%rsi
- addq %r15,%r14
- movq 8+8(%rsp),%rdi
- adcq %rbp,%rbp
- movq %r14,-8(%rbx)
- jmp L$mulx4x_outer
- .p2align 5
- L$mulx4x_outer:
- leaq 16-256(%rbx),%r10
- pxor %xmm4,%xmm4
- .byte 0x67,0x67
- pxor %xmm5,%xmm5
- movdqa -128(%rdi),%xmm0
- movdqa -112(%rdi),%xmm1
- movdqa -96(%rdi),%xmm2
- pand 256(%r10),%xmm0
- movdqa -80(%rdi),%xmm3
- pand 272(%r10),%xmm1
- por %xmm0,%xmm4
- pand 288(%r10),%xmm2
- por %xmm1,%xmm5
- pand 304(%r10),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- movdqa -64(%rdi),%xmm0
- movdqa -48(%rdi),%xmm1
- movdqa -32(%rdi),%xmm2
- pand 320(%r10),%xmm0
- movdqa -16(%rdi),%xmm3
- pand 336(%r10),%xmm1
- por %xmm0,%xmm4
- pand 352(%r10),%xmm2
- por %xmm1,%xmm5
- pand 368(%r10),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- movdqa 0(%rdi),%xmm0
- movdqa 16(%rdi),%xmm1
- movdqa 32(%rdi),%xmm2
- pand 384(%r10),%xmm0
- movdqa 48(%rdi),%xmm3
- pand 400(%r10),%xmm1
- por %xmm0,%xmm4
- pand 416(%r10),%xmm2
- por %xmm1,%xmm5
- pand 432(%r10),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- movdqa 64(%rdi),%xmm0
- movdqa 80(%rdi),%xmm1
- movdqa 96(%rdi),%xmm2
- pand 448(%r10),%xmm0
- movdqa 112(%rdi),%xmm3
- pand 464(%r10),%xmm1
- por %xmm0,%xmm4
- pand 480(%r10),%xmm2
- por %xmm1,%xmm5
- pand 496(%r10),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- por %xmm5,%xmm4
- pshufd $0x4e,%xmm4,%xmm0
- por %xmm4,%xmm0
- leaq 256(%rdi),%rdi
- .byte 102,72,15,126,194
- movq %rbp,(%rbx)
- leaq 32(%rbx,%rax,1),%rbx
- mulxq 0(%rsi),%r8,%r11
- xorq %rbp,%rbp
- movq %rdx,%r9
- mulxq 8(%rsi),%r14,%r12
- adoxq -32(%rbx),%r8
- adcxq %r14,%r11
- mulxq 16(%rsi),%r15,%r13
- adoxq -24(%rbx),%r11
- adcxq %r15,%r12
- mulxq 24(%rsi),%rdx,%r14
- adoxq -16(%rbx),%r12
- adcxq %rdx,%r13
- leaq (%rcx,%rax,1),%rcx
- leaq 32(%rsi),%rsi
- adoxq -8(%rbx),%r13
- adcxq %rbp,%r14
- adoxq %rbp,%r14
- movq %r8,%r15
- imulq 32+8(%rsp),%r8
- movq %r8,%rdx
- xorq %rbp,%rbp
- movq %rdi,8+8(%rsp)
- mulxq 0(%rcx),%rax,%r10
- adcxq %rax,%r15
- adoxq %r11,%r10
- mulxq 8(%rcx),%rax,%r11
- adcxq %rax,%r10
- adoxq %r12,%r11
- mulxq 16(%rcx),%rax,%r12
- adcxq %rax,%r11
- adoxq %r13,%r12
- mulxq 24(%rcx),%rax,%r15
- movq %r9,%rdx
- movq 24+8(%rsp),%rdi
- movq %r10,-32(%rbx)
- adcxq %rax,%r12
- movq %r11,-24(%rbx)
- adoxq %rbp,%r15
- movq %r12,-16(%rbx)
- leaq 32(%rcx),%rcx
- jmp L$mulx4x_inner
- .p2align 5
- L$mulx4x_inner:
- mulxq 0(%rsi),%r10,%rax
- adcxq %rbp,%r15
- adoxq %r14,%r10
- mulxq 8(%rsi),%r11,%r14
- adcxq 0(%rbx),%r10
- adoxq %rax,%r11
- mulxq 16(%rsi),%r12,%rax
- adcxq 8(%rbx),%r11
- adoxq %r14,%r12
- mulxq 24(%rsi),%r13,%r14
- movq %r8,%rdx
- adcxq 16(%rbx),%r12
- adoxq %rax,%r13
- adcxq 24(%rbx),%r13
- adoxq %rbp,%r14
- leaq 32(%rsi),%rsi
- leaq 32(%rbx),%rbx
- adcxq %rbp,%r14
- adoxq %r15,%r10
- mulxq 0(%rcx),%rax,%r15
- adcxq %rax,%r10
- adoxq %r15,%r11
- mulxq 8(%rcx),%rax,%r15
- adcxq %rax,%r11
- adoxq %r15,%r12
- mulxq 16(%rcx),%rax,%r15
- movq %r10,-40(%rbx)
- adcxq %rax,%r12
- adoxq %r15,%r13
- movq %r11,-32(%rbx)
- mulxq 24(%rcx),%rax,%r15
- movq %r9,%rdx
- leaq 32(%rcx),%rcx
- movq %r12,-24(%rbx)
- adcxq %rax,%r13
- adoxq %rbp,%r15
- movq %r13,-16(%rbx)
- decq %rdi
- jnz L$mulx4x_inner
- movq 0+8(%rsp),%rax
- adcq %rbp,%r15
- subq 0(%rbx),%rdi
- movq 8+8(%rsp),%rdi
- movq 16+8(%rsp),%r10
- adcq %r15,%r14
- leaq (%rsi,%rax,1),%rsi
- adcq %rbp,%rbp
- movq %r14,-8(%rbx)
- cmpq %r10,%rdi
- jb L$mulx4x_outer
- movq -8(%rcx),%r10
- movq %rbp,%r8
- movq (%rcx,%rax,1),%r12
- leaq (%rcx,%rax,1),%rbp
- movq %rax,%rcx
- leaq (%rbx,%rax,1),%rdi
- xorl %eax,%eax
- xorq %r15,%r15
- subq %r14,%r10
- adcq %r15,%r15
- orq %r15,%r8
- sarq $3+2,%rcx
- subq %r8,%rax
- movq 56+8(%rsp),%rdx
- decq %r12
- movq 8(%rbp),%r13
- xorq %r8,%r8
- movq 16(%rbp),%r14
- movq 24(%rbp),%r15
- jmp L$sqrx4x_sub_entry
- .p2align 5
- bn_powerx5:
- movq %rsp,%rax
- L$powerx5_enter:
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- L$powerx5_prologue:
- shll $3,%r9d
- leaq (%r9,%r9,2),%r10
- negq %r9
- movq (%r8),%r8
- leaq -320(%rsp,%r9,2),%r11
- movq %rsp,%rbp
- subq %rdi,%r11
- andq $4095,%r11
- cmpq %r11,%r10
- jb L$pwrx_sp_alt
- subq %r11,%rbp
- leaq -320(%rbp,%r9,2),%rbp
- jmp L$pwrx_sp_done
- .p2align 5
- L$pwrx_sp_alt:
- leaq 4096-320(,%r9,2),%r10
- leaq -320(%rbp,%r9,2),%rbp
- subq %r10,%r11
- movq $0,%r10
- cmovcq %r10,%r11
- subq %r11,%rbp
- L$pwrx_sp_done:
- andq $-64,%rbp
- movq %rsp,%r11
- subq %rbp,%r11
- andq $-4096,%r11
- leaq (%r11,%rbp,1),%rsp
- movq (%rsp),%r10
- cmpq %rbp,%rsp
- ja L$pwrx_page_walk
- jmp L$pwrx_page_walk_done
- L$pwrx_page_walk:
- leaq -4096(%rsp),%rsp
- movq (%rsp),%r10
- cmpq %rbp,%rsp
- ja L$pwrx_page_walk
- L$pwrx_page_walk_done:
- movq %r9,%r10
- negq %r9
- pxor %xmm0,%xmm0
- .byte 102,72,15,110,207
- .byte 102,72,15,110,209
- .byte 102,73,15,110,218
- .byte 102,72,15,110,226
- movq %r8,32(%rsp)
- movq %rax,40(%rsp)
- L$powerx5_body:
- call __bn_sqrx8x_internal
- call __bn_postx4x_internal
- call __bn_sqrx8x_internal
- call __bn_postx4x_internal
- call __bn_sqrx8x_internal
- call __bn_postx4x_internal
- call __bn_sqrx8x_internal
- call __bn_postx4x_internal
- call __bn_sqrx8x_internal
- call __bn_postx4x_internal
- movq %r10,%r9
- movq %rsi,%rdi
- .byte 102,72,15,126,209
- .byte 102,72,15,126,226
- movq 40(%rsp),%rax
- call mulx4x_internal
- movq 40(%rsp),%rsi
- movq $1,%rax
- movq -48(%rsi),%r15
- movq -40(%rsi),%r14
- movq -32(%rsi),%r13
- movq -24(%rsi),%r12
- movq -16(%rsi),%rbp
- movq -8(%rsi),%rbx
- leaq (%rsi),%rsp
- L$powerx5_epilogue:
- .byte 0xf3,0xc3
- .globl _bn_sqrx8x_internal
- .private_extern _bn_sqrx8x_internal
- .p2align 5
- _bn_sqrx8x_internal:
- __bn_sqrx8x_internal:
- leaq 48+8(%rsp),%rdi
- leaq (%rsi,%r9,1),%rbp
- movq %r9,0+8(%rsp)
- movq %rbp,8+8(%rsp)
- jmp L$sqr8x_zero_start
- .p2align 5
- .byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
- L$sqrx8x_zero:
- .byte 0x3e
- movdqa %xmm0,0(%rdi)
- movdqa %xmm0,16(%rdi)
- movdqa %xmm0,32(%rdi)
- movdqa %xmm0,48(%rdi)
- L$sqr8x_zero_start:
- movdqa %xmm0,64(%rdi)
- movdqa %xmm0,80(%rdi)
- movdqa %xmm0,96(%rdi)
- movdqa %xmm0,112(%rdi)
- leaq 128(%rdi),%rdi
- subq $64,%r9
- jnz L$sqrx8x_zero
- movq 0(%rsi),%rdx
- xorq %r10,%r10
- xorq %r11,%r11
- xorq %r12,%r12
- xorq %r13,%r13
- xorq %r14,%r14
- xorq %r15,%r15
- leaq 48+8(%rsp),%rdi
- xorq %rbp,%rbp
- jmp L$sqrx8x_outer_loop
- .p2align 5
- L$sqrx8x_outer_loop:
- mulxq 8(%rsi),%r8,%rax
- adcxq %r9,%r8
- adoxq %rax,%r10
- mulxq 16(%rsi),%r9,%rax
- adcxq %r10,%r9
- adoxq %rax,%r11
- .byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
- adcxq %r11,%r10
- adoxq %rax,%r12
- .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
- adcxq %r12,%r11
- adoxq %rax,%r13
- mulxq 40(%rsi),%r12,%rax
- adcxq %r13,%r12
- adoxq %rax,%r14
- mulxq 48(%rsi),%r13,%rax
- adcxq %r14,%r13
- adoxq %r15,%rax
- mulxq 56(%rsi),%r14,%r15
- movq 8(%rsi),%rdx
- adcxq %rax,%r14
- adoxq %rbp,%r15
- adcq 64(%rdi),%r15
- movq %r8,8(%rdi)
- movq %r9,16(%rdi)
- sbbq %rcx,%rcx
- xorq %rbp,%rbp
- mulxq 16(%rsi),%r8,%rbx
- mulxq 24(%rsi),%r9,%rax
- adcxq %r10,%r8
- adoxq %rbx,%r9
- mulxq 32(%rsi),%r10,%rbx
- adcxq %r11,%r9
- adoxq %rax,%r10
- .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
- adcxq %r12,%r10
- adoxq %rbx,%r11
- .byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
- adcxq %r13,%r11
- adoxq %r14,%r12
- .byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
- movq 16(%rsi),%rdx
- adcxq %rax,%r12
- adoxq %rbx,%r13
- adcxq %r15,%r13
- adoxq %rbp,%r14
- adcxq %rbp,%r14
- movq %r8,24(%rdi)
- movq %r9,32(%rdi)
- mulxq 24(%rsi),%r8,%rbx
- mulxq 32(%rsi),%r9,%rax
- adcxq %r10,%r8
- adoxq %rbx,%r9
- mulxq 40(%rsi),%r10,%rbx
- adcxq %r11,%r9
- adoxq %rax,%r10
- .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
- adcxq %r12,%r10
- adoxq %r13,%r11
- .byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
- .byte 0x3e
- movq 24(%rsi),%rdx
- adcxq %rbx,%r11
- adoxq %rax,%r12
- adcxq %r14,%r12
- movq %r8,40(%rdi)
- movq %r9,48(%rdi)
- mulxq 32(%rsi),%r8,%rax
- adoxq %rbp,%r13
- adcxq %rbp,%r13
- mulxq 40(%rsi),%r9,%rbx
- adcxq %r10,%r8
- adoxq %rax,%r9
- mulxq 48(%rsi),%r10,%rax
- adcxq %r11,%r9
- adoxq %r12,%r10
- mulxq 56(%rsi),%r11,%r12
- movq 32(%rsi),%rdx
- movq 40(%rsi),%r14
- adcxq %rbx,%r10
- adoxq %rax,%r11
- movq 48(%rsi),%r15
- adcxq %r13,%r11
- adoxq %rbp,%r12
- adcxq %rbp,%r12
- movq %r8,56(%rdi)
- movq %r9,64(%rdi)
- mulxq %r14,%r9,%rax
- movq 56(%rsi),%r8
- adcxq %r10,%r9
- mulxq %r15,%r10,%rbx
- adoxq %rax,%r10
- adcxq %r11,%r10
- mulxq %r8,%r11,%rax
- movq %r14,%rdx
- adoxq %rbx,%r11
- adcxq %r12,%r11
- adcxq %rbp,%rax
- mulxq %r15,%r14,%rbx
- mulxq %r8,%r12,%r13
- movq %r15,%rdx
- leaq 64(%rsi),%rsi
- adcxq %r14,%r11
- adoxq %rbx,%r12
- adcxq %rax,%r12
- adoxq %rbp,%r13
- .byte 0x67,0x67
- mulxq %r8,%r8,%r14
- adcxq %r8,%r13
- adcxq %rbp,%r14
- cmpq 8+8(%rsp),%rsi
- je L$sqrx8x_outer_break
- negq %rcx
- movq $-8,%rcx
- movq %rbp,%r15
- movq 64(%rdi),%r8
- adcxq 72(%rdi),%r9
- adcxq 80(%rdi),%r10
- adcxq 88(%rdi),%r11
- adcq 96(%rdi),%r12
- adcq 104(%rdi),%r13
- adcq 112(%rdi),%r14
- adcq 120(%rdi),%r15
- leaq (%rsi),%rbp
- leaq 128(%rdi),%rdi
- sbbq %rax,%rax
- movq -64(%rsi),%rdx
- movq %rax,16+8(%rsp)
- movq %rdi,24+8(%rsp)
- xorl %eax,%eax
- jmp L$sqrx8x_loop
- .p2align 5
- L$sqrx8x_loop:
- movq %r8,%rbx
- mulxq 0(%rbp),%rax,%r8
- adcxq %rax,%rbx
- adoxq %r9,%r8
- mulxq 8(%rbp),%rax,%r9
- adcxq %rax,%r8
- adoxq %r10,%r9
- mulxq 16(%rbp),%rax,%r10
- adcxq %rax,%r9
- adoxq %r11,%r10
- mulxq 24(%rbp),%rax,%r11
- adcxq %rax,%r10
- adoxq %r12,%r11
- .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
- adcxq %rax,%r11
- adoxq %r13,%r12
- mulxq 40(%rbp),%rax,%r13
- adcxq %rax,%r12
- adoxq %r14,%r13
- mulxq 48(%rbp),%rax,%r14
- movq %rbx,(%rdi,%rcx,8)
- movl $0,%ebx
- adcxq %rax,%r13
- adoxq %r15,%r14
- .byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
- movq 8(%rsi,%rcx,8),%rdx
- adcxq %rax,%r14
- adoxq %rbx,%r15
- adcxq %rbx,%r15
- .byte 0x67
- incq %rcx
- jnz L$sqrx8x_loop
- leaq 64(%rbp),%rbp
- movq $-8,%rcx
- cmpq 8+8(%rsp),%rbp
- je L$sqrx8x_break
- subq 16+8(%rsp),%rbx
- .byte 0x66
- movq -64(%rsi),%rdx
- adcxq 0(%rdi),%r8
- adcxq 8(%rdi),%r9
- adcq 16(%rdi),%r10
- adcq 24(%rdi),%r11
- adcq 32(%rdi),%r12
- adcq 40(%rdi),%r13
- adcq 48(%rdi),%r14
- adcq 56(%rdi),%r15
- leaq 64(%rdi),%rdi
- .byte 0x67
- sbbq %rax,%rax
- xorl %ebx,%ebx
- movq %rax,16+8(%rsp)
- jmp L$sqrx8x_loop
- .p2align 5
- L$sqrx8x_break:
- xorq %rbp,%rbp
- subq 16+8(%rsp),%rbx
- adcxq %rbp,%r8
- movq 24+8(%rsp),%rcx
- adcxq %rbp,%r9
- movq 0(%rsi),%rdx
- adcq $0,%r10
- movq %r8,0(%rdi)
- adcq $0,%r11
- adcq $0,%r12
- adcq $0,%r13
- adcq $0,%r14
- adcq $0,%r15
- cmpq %rcx,%rdi
- je L$sqrx8x_outer_loop
- movq %r9,8(%rdi)
- movq 8(%rcx),%r9
- movq %r10,16(%rdi)
- movq 16(%rcx),%r10
- movq %r11,24(%rdi)
- movq 24(%rcx),%r11
- movq %r12,32(%rdi)
- movq 32(%rcx),%r12
- movq %r13,40(%rdi)
- movq 40(%rcx),%r13
- movq %r14,48(%rdi)
- movq 48(%rcx),%r14
- movq %r15,56(%rdi)
- movq 56(%rcx),%r15
- movq %rcx,%rdi
- jmp L$sqrx8x_outer_loop
- .p2align 5
- L$sqrx8x_outer_break:
- movq %r9,72(%rdi)
- .byte 102,72,15,126,217
- movq %r10,80(%rdi)
- movq %r11,88(%rdi)
- movq %r12,96(%rdi)
- movq %r13,104(%rdi)
- movq %r14,112(%rdi)
- leaq 48+8(%rsp),%rdi
- movq (%rsi,%rcx,1),%rdx
- movq 8(%rdi),%r11
- xorq %r10,%r10
- movq 0+8(%rsp),%r9
- adoxq %r11,%r11
- movq 16(%rdi),%r12
- movq 24(%rdi),%r13
- .p2align 5
- L$sqrx4x_shift_n_add:
- mulxq %rdx,%rax,%rbx
- adoxq %r12,%r12
- adcxq %r10,%rax
- .byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
- .byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
- adoxq %r13,%r13
- adcxq %r11,%rbx
- movq 40(%rdi),%r11
- movq %rax,0(%rdi)
- movq %rbx,8(%rdi)
- mulxq %rdx,%rax,%rbx
- adoxq %r10,%r10
- adcxq %r12,%rax
- movq 16(%rsi,%rcx,1),%rdx
- movq 48(%rdi),%r12
- adoxq %r11,%r11
- adcxq %r13,%rbx
- movq 56(%rdi),%r13
- movq %rax,16(%rdi)
- movq %rbx,24(%rdi)
- mulxq %rdx,%rax,%rbx
- adoxq %r12,%r12
- adcxq %r10,%rax
- movq 24(%rsi,%rcx,1),%rdx
- leaq 32(%rcx),%rcx
- movq 64(%rdi),%r10
- adoxq %r13,%r13
- adcxq %r11,%rbx
- movq 72(%rdi),%r11
- movq %rax,32(%rdi)
- movq %rbx,40(%rdi)
- mulxq %rdx,%rax,%rbx
- adoxq %r10,%r10
- adcxq %r12,%rax
- jrcxz L$sqrx4x_shift_n_add_break
- .byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
- adoxq %r11,%r11
- adcxq %r13,%rbx
- movq 80(%rdi),%r12
- movq 88(%rdi),%r13
- movq %rax,48(%rdi)
- movq %rbx,56(%rdi)
- leaq 64(%rdi),%rdi
- nop
- jmp L$sqrx4x_shift_n_add
- .p2align 5
- L$sqrx4x_shift_n_add_break:
- adcxq %r13,%rbx
- movq %rax,48(%rdi)
- movq %rbx,56(%rdi)
- leaq 64(%rdi),%rdi
- .byte 102,72,15,126,213
- __bn_sqrx8x_reduction:
- xorl %eax,%eax
- movq 32+8(%rsp),%rbx
- movq 48+8(%rsp),%rdx
- leaq -64(%rbp,%r9,1),%rcx
- movq %rcx,0+8(%rsp)
- movq %rdi,8+8(%rsp)
- leaq 48+8(%rsp),%rdi
- jmp L$sqrx8x_reduction_loop
- .p2align 5
- L$sqrx8x_reduction_loop:
- movq 8(%rdi),%r9
- movq 16(%rdi),%r10
- movq 24(%rdi),%r11
- movq 32(%rdi),%r12
- movq %rdx,%r8
- imulq %rbx,%rdx
- movq 40(%rdi),%r13
- movq 48(%rdi),%r14
- movq 56(%rdi),%r15
- movq %rax,24+8(%rsp)
- leaq 64(%rdi),%rdi
- xorq %rsi,%rsi
- movq $-8,%rcx
- jmp L$sqrx8x_reduce
- .p2align 5
- L$sqrx8x_reduce:
- movq %r8,%rbx
- mulxq 0(%rbp),%rax,%r8
- adcxq %rbx,%rax
- adoxq %r9,%r8
- mulxq 8(%rbp),%rbx,%r9
- adcxq %rbx,%r8
- adoxq %r10,%r9
- mulxq 16(%rbp),%rbx,%r10
- adcxq %rbx,%r9
- adoxq %r11,%r10
- mulxq 24(%rbp),%rbx,%r11
- adcxq %rbx,%r10
- adoxq %r12,%r11
- .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
- movq %rdx,%rax
- movq %r8,%rdx
- adcxq %rbx,%r11
- adoxq %r13,%r12
- mulxq 32+8(%rsp),%rbx,%rdx
- movq %rax,%rdx
- movq %rax,64+48+8(%rsp,%rcx,8)
- mulxq 40(%rbp),%rax,%r13
- adcxq %rax,%r12
- adoxq %r14,%r13
- mulxq 48(%rbp),%rax,%r14
- adcxq %rax,%r13
- adoxq %r15,%r14
- mulxq 56(%rbp),%rax,%r15
- movq %rbx,%rdx
- adcxq %rax,%r14
- adoxq %rsi,%r15
- adcxq %rsi,%r15
- .byte 0x67,0x67,0x67
- incq %rcx
- jnz L$sqrx8x_reduce
- movq %rsi,%rax
- cmpq 0+8(%rsp),%rbp
- jae L$sqrx8x_no_tail
- movq 48+8(%rsp),%rdx
- addq 0(%rdi),%r8
- leaq 64(%rbp),%rbp
- movq $-8,%rcx
- adcxq 8(%rdi),%r9
- adcxq 16(%rdi),%r10
- adcq 24(%rdi),%r11
- adcq 32(%rdi),%r12
- adcq 40(%rdi),%r13
- adcq 48(%rdi),%r14
- adcq 56(%rdi),%r15
- leaq 64(%rdi),%rdi
- sbbq %rax,%rax
- xorq %rsi,%rsi
- movq %rax,16+8(%rsp)
- jmp L$sqrx8x_tail
- .p2align 5
- L$sqrx8x_tail:
- movq %r8,%rbx
- mulxq 0(%rbp),%rax,%r8
- adcxq %rax,%rbx
- adoxq %r9,%r8
- mulxq 8(%rbp),%rax,%r9
- adcxq %rax,%r8
- adoxq %r10,%r9
- mulxq 16(%rbp),%rax,%r10
- adcxq %rax,%r9
- adoxq %r11,%r10
- mulxq 24(%rbp),%rax,%r11
- adcxq %rax,%r10
- adoxq %r12,%r11
- .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
- adcxq %rax,%r11
- adoxq %r13,%r12
- mulxq 40(%rbp),%rax,%r13
- adcxq %rax,%r12
- adoxq %r14,%r13
- mulxq 48(%rbp),%rax,%r14
- adcxq %rax,%r13
- adoxq %r15,%r14
- mulxq 56(%rbp),%rax,%r15
- movq 72+48+8(%rsp,%rcx,8),%rdx
- adcxq %rax,%r14
- adoxq %rsi,%r15
- movq %rbx,(%rdi,%rcx,8)
- movq %r8,%rbx
- adcxq %rsi,%r15
- incq %rcx
- jnz L$sqrx8x_tail
- cmpq 0+8(%rsp),%rbp
- jae L$sqrx8x_tail_done
- subq 16+8(%rsp),%rsi
- movq 48+8(%rsp),%rdx
- leaq 64(%rbp),%rbp
- adcq 0(%rdi),%r8
- adcq 8(%rdi),%r9
- adcq 16(%rdi),%r10
- adcq 24(%rdi),%r11
- adcq 32(%rdi),%r12
- adcq 40(%rdi),%r13
- adcq 48(%rdi),%r14
- adcq 56(%rdi),%r15
- leaq 64(%rdi),%rdi
- sbbq %rax,%rax
- subq $8,%rcx
- xorq %rsi,%rsi
- movq %rax,16+8(%rsp)
- jmp L$sqrx8x_tail
- .p2align 5
- L$sqrx8x_tail_done:
- xorq %rax,%rax
- addq 24+8(%rsp),%r8
- adcq $0,%r9
- adcq $0,%r10
- adcq $0,%r11
- adcq $0,%r12
- adcq $0,%r13
- adcq $0,%r14
- adcq $0,%r15
- adcq $0,%rax
- subq 16+8(%rsp),%rsi
- L$sqrx8x_no_tail:
- adcq 0(%rdi),%r8
- .byte 102,72,15,126,217
- adcq 8(%rdi),%r9
- movq 56(%rbp),%rsi
- .byte 102,72,15,126,213
- adcq 16(%rdi),%r10
- adcq 24(%rdi),%r11
- adcq 32(%rdi),%r12
- adcq 40(%rdi),%r13
- adcq 48(%rdi),%r14
- adcq 56(%rdi),%r15
- adcq $0,%rax
- movq 32+8(%rsp),%rbx
- movq 64(%rdi,%rcx,1),%rdx
- movq %r8,0(%rdi)
- leaq 64(%rdi),%r8
- movq %r9,8(%rdi)
- movq %r10,16(%rdi)
- movq %r11,24(%rdi)
- movq %r12,32(%rdi)
- movq %r13,40(%rdi)
- movq %r14,48(%rdi)
- movq %r15,56(%rdi)
- leaq 64(%rdi,%rcx,1),%rdi
- cmpq 8+8(%rsp),%r8
- jb L$sqrx8x_reduction_loop
- .byte 0xf3,0xc3
- .p2align 5
- __bn_postx4x_internal:
- movq 0(%rbp),%r12
- movq %rcx,%r10
- movq %rcx,%r9
- negq %rax
- sarq $3+2,%rcx
- .byte 102,72,15,126,202
- .byte 102,72,15,126,206
- decq %r12
- movq 8(%rbp),%r13
- xorq %r8,%r8
- movq 16(%rbp),%r14
- movq 24(%rbp),%r15
- jmp L$sqrx4x_sub_entry
- .p2align 4
- L$sqrx4x_sub:
- movq 0(%rbp),%r12
- movq 8(%rbp),%r13
- movq 16(%rbp),%r14
- movq 24(%rbp),%r15
- L$sqrx4x_sub_entry:
- andnq %rax,%r12,%r12
- leaq 32(%rbp),%rbp
- andnq %rax,%r13,%r13
- andnq %rax,%r14,%r14
- andnq %rax,%r15,%r15
- negq %r8
- adcq 0(%rdi),%r12
- adcq 8(%rdi),%r13
- adcq 16(%rdi),%r14
- adcq 24(%rdi),%r15
- movq %r12,0(%rdx)
- leaq 32(%rdi),%rdi
- movq %r13,8(%rdx)
- sbbq %r8,%r8
- movq %r14,16(%rdx)
- movq %r15,24(%rdx)
- leaq 32(%rdx),%rdx
- incq %rcx
- jnz L$sqrx4x_sub
- negq %r9
- .byte 0xf3,0xc3
- .globl _bn_get_bits5
- .p2align 4
- _bn_get_bits5:
- leaq 0(%rdi),%r10
- leaq 1(%rdi),%r11
- movl %esi,%ecx
- shrl $4,%esi
- andl $15,%ecx
- leal -8(%rcx),%eax
- cmpl $11,%ecx
- cmovaq %r11,%r10
- cmoval %eax,%ecx
- movzwl (%r10,%rsi,2),%eax
- shrl %cl,%eax
- andl $31,%eax
- .byte 0xf3,0xc3
- .globl _bn_scatter5
- .p2align 4
- _bn_scatter5:
- cmpl $0,%esi
- jz L$scatter_epilogue
- leaq (%rdx,%rcx,8),%rdx
- L$scatter:
- movq (%rdi),%rax
- leaq 8(%rdi),%rdi
- movq %rax,(%rdx)
- leaq 256(%rdx),%rdx
- subl $1,%esi
- jnz L$scatter
- L$scatter_epilogue:
- .byte 0xf3,0xc3
- .globl _bn_gather5
- .p2align 5
- _bn_gather5:
- L$SEH_begin_bn_gather5:
- .byte 0x4c,0x8d,0x14,0x24
- .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00
- leaq L$inc(%rip),%rax
- andq $-16,%rsp
- movd %ecx,%xmm5
- movdqa 0(%rax),%xmm0
- movdqa 16(%rax),%xmm1
- leaq 128(%rdx),%r11
- leaq 128(%rsp),%rax
- pshufd $0,%xmm5,%xmm5
- movdqa %xmm1,%xmm4
- movdqa %xmm1,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,-128(%rax)
- movdqa %xmm4,%xmm0
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,-112(%rax)
- movdqa %xmm4,%xmm1
- paddd %xmm3,%xmm0
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,-96(%rax)
- movdqa %xmm4,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm3,-80(%rax)
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,-64(%rax)
- movdqa %xmm4,%xmm0
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,-48(%rax)
- movdqa %xmm4,%xmm1
- paddd %xmm3,%xmm0
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,-32(%rax)
- movdqa %xmm4,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm3,-16(%rax)
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,0(%rax)
- movdqa %xmm4,%xmm0
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,16(%rax)
- movdqa %xmm4,%xmm1
- paddd %xmm3,%xmm0
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,32(%rax)
- movdqa %xmm4,%xmm2
- paddd %xmm0,%xmm1
- pcmpeqd %xmm5,%xmm0
- movdqa %xmm3,48(%rax)
- movdqa %xmm4,%xmm3
- paddd %xmm1,%xmm2
- pcmpeqd %xmm5,%xmm1
- movdqa %xmm0,64(%rax)
- movdqa %xmm4,%xmm0
- paddd %xmm2,%xmm3
- pcmpeqd %xmm5,%xmm2
- movdqa %xmm1,80(%rax)
- movdqa %xmm4,%xmm1
- paddd %xmm3,%xmm0
- pcmpeqd %xmm5,%xmm3
- movdqa %xmm2,96(%rax)
- movdqa %xmm4,%xmm2
- movdqa %xmm3,112(%rax)
- jmp L$gather
- .p2align 5
- L$gather:
- pxor %xmm4,%xmm4
- pxor %xmm5,%xmm5
- movdqa -128(%r11),%xmm0
- movdqa -112(%r11),%xmm1
- movdqa -96(%r11),%xmm2
- pand -128(%rax),%xmm0
- movdqa -80(%r11),%xmm3
- pand -112(%rax),%xmm1
- por %xmm0,%xmm4
- pand -96(%rax),%xmm2
- por %xmm1,%xmm5
- pand -80(%rax),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- movdqa -64(%r11),%xmm0
- movdqa -48(%r11),%xmm1
- movdqa -32(%r11),%xmm2
- pand -64(%rax),%xmm0
- movdqa -16(%r11),%xmm3
- pand -48(%rax),%xmm1
- por %xmm0,%xmm4
- pand -32(%rax),%xmm2
- por %xmm1,%xmm5
- pand -16(%rax),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- movdqa 0(%r11),%xmm0
- movdqa 16(%r11),%xmm1
- movdqa 32(%r11),%xmm2
- pand 0(%rax),%xmm0
- movdqa 48(%r11),%xmm3
- pand 16(%rax),%xmm1
- por %xmm0,%xmm4
- pand 32(%rax),%xmm2
- por %xmm1,%xmm5
- pand 48(%rax),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- movdqa 64(%r11),%xmm0
- movdqa 80(%r11),%xmm1
- movdqa 96(%r11),%xmm2
- pand 64(%rax),%xmm0
- movdqa 112(%r11),%xmm3
- pand 80(%rax),%xmm1
- por %xmm0,%xmm4
- pand 96(%rax),%xmm2
- por %xmm1,%xmm5
- pand 112(%rax),%xmm3
- por %xmm2,%xmm4
- por %xmm3,%xmm5
- por %xmm5,%xmm4
- leaq 256(%r11),%r11
- pshufd $0x4e,%xmm4,%xmm0
- por %xmm4,%xmm0
- movq %xmm0,(%rdi)
- leaq 8(%rdi),%rdi
- subl $1,%esi
- jnz L$gather
- leaq (%r10),%rsp
- .byte 0xf3,0xc3
- L$SEH_end_bn_gather5:
- .p2align 6
- L$inc:
- .long 0,0, 1,1
- .long 2,2, 2,2
- .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|