12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554 |
- .text
- .globl _poly1305_init
- .private_extern _poly1305_init
- .globl _poly1305_blocks
- .private_extern _poly1305_blocks
- .globl _poly1305_emit
- .private_extern _poly1305_emit
- .p2align 5
- _poly1305_init:
- xorq %rax,%rax
- movq %rax,0(%rdi)
- movq %rax,8(%rdi)
- movq %rax,16(%rdi)
- cmpq $0,%rsi
- je L$no_key
- leaq _poly1305_blocks(%rip),%r10
- leaq _poly1305_emit(%rip),%r11
- movq _OPENSSL_ia32cap_P+4(%rip),%r9
- leaq poly1305_blocks_avx(%rip),%rax
- leaq poly1305_emit_avx(%rip),%rcx
- btq $28,%r9
- cmovcq %rax,%r10
- cmovcq %rcx,%r11
- leaq poly1305_blocks_avx2(%rip),%rax
- btq $37,%r9
- cmovcq %rax,%r10
- movq $2149646336,%rax
- shrq $32,%r9
- andq %rax,%r9
- cmpq %rax,%r9
- je L$init_base2_44
- movq $0x0ffffffc0fffffff,%rax
- movq $0x0ffffffc0ffffffc,%rcx
- andq 0(%rsi),%rax
- andq 8(%rsi),%rcx
- movq %rax,24(%rdi)
- movq %rcx,32(%rdi)
- movq %r10,0(%rdx)
- movq %r11,8(%rdx)
- movl $1,%eax
- L$no_key:
- .byte 0xf3,0xc3
- .p2align 5
- _poly1305_blocks:
- L$blocks:
- shrq $4,%rdx
- jz L$no_data
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- L$blocks_body:
- movq %rdx,%r15
- movq 24(%rdi),%r11
- movq 32(%rdi),%r13
- movq 0(%rdi),%r14
- movq 8(%rdi),%rbx
- movq 16(%rdi),%rbp
- movq %r13,%r12
- shrq $2,%r13
- movq %r12,%rax
- addq %r12,%r13
- jmp L$oop
- .p2align 5
- L$oop:
- addq 0(%rsi),%r14
- adcq 8(%rsi),%rbx
- leaq 16(%rsi),%rsi
- adcq %rcx,%rbp
- mulq %r14
- movq %rax,%r9
- movq %r11,%rax
- movq %rdx,%r10
- mulq %r14
- movq %rax,%r14
- movq %r11,%rax
- movq %rdx,%r8
- mulq %rbx
- addq %rax,%r9
- movq %r13,%rax
- adcq %rdx,%r10
- mulq %rbx
- movq %rbp,%rbx
- addq %rax,%r14
- adcq %rdx,%r8
- imulq %r13,%rbx
- addq %rbx,%r9
- movq %r8,%rbx
- adcq $0,%r10
- imulq %r11,%rbp
- addq %r9,%rbx
- movq $-4,%rax
- adcq %rbp,%r10
- andq %r10,%rax
- movq %r10,%rbp
- shrq $2,%r10
- andq $3,%rbp
- addq %r10,%rax
- addq %rax,%r14
- adcq $0,%rbx
- adcq $0,%rbp
- movq %r12,%rax
- decq %r15
- jnz L$oop
- movq %r14,0(%rdi)
- movq %rbx,8(%rdi)
- movq %rbp,16(%rdi)
- movq 0(%rsp),%r15
- movq 8(%rsp),%r14
- movq 16(%rsp),%r13
- movq 24(%rsp),%r12
- movq 32(%rsp),%rbp
- movq 40(%rsp),%rbx
- leaq 48(%rsp),%rsp
- L$no_data:
- L$blocks_epilogue:
- .byte 0xf3,0xc3
- .p2align 5
- _poly1305_emit:
- L$emit:
- movq 0(%rdi),%r8
- movq 8(%rdi),%r9
- movq 16(%rdi),%r10
- movq %r8,%rax
- addq $5,%r8
- movq %r9,%rcx
- adcq $0,%r9
- adcq $0,%r10
- shrq $2,%r10
- cmovnzq %r8,%rax
- cmovnzq %r9,%rcx
- addq 0(%rdx),%rax
- adcq 8(%rdx),%rcx
- movq %rax,0(%rsi)
- movq %rcx,8(%rsi)
- .byte 0xf3,0xc3
- .p2align 5
- __poly1305_block:
- mulq %r14
- movq %rax,%r9
- movq %r11,%rax
- movq %rdx,%r10
- mulq %r14
- movq %rax,%r14
- movq %r11,%rax
- movq %rdx,%r8
- mulq %rbx
- addq %rax,%r9
- movq %r13,%rax
- adcq %rdx,%r10
- mulq %rbx
- movq %rbp,%rbx
- addq %rax,%r14
- adcq %rdx,%r8
- imulq %r13,%rbx
- addq %rbx,%r9
- movq %r8,%rbx
- adcq $0,%r10
- imulq %r11,%rbp
- addq %r9,%rbx
- movq $-4,%rax
- adcq %rbp,%r10
- andq %r10,%rax
- movq %r10,%rbp
- shrq $2,%r10
- andq $3,%rbp
- addq %r10,%rax
- addq %rax,%r14
- adcq $0,%rbx
- adcq $0,%rbp
- .byte 0xf3,0xc3
- .p2align 5
- __poly1305_init_avx:
- movq %r11,%r14
- movq %r12,%rbx
- xorq %rbp,%rbp
- leaq 48+64(%rdi),%rdi
- movq %r12,%rax
- call __poly1305_block
- movl $0x3ffffff,%eax
- movl $0x3ffffff,%edx
- movq %r14,%r8
- andl %r14d,%eax
- movq %r11,%r9
- andl %r11d,%edx
- movl %eax,-64(%rdi)
- shrq $26,%r8
- movl %edx,-60(%rdi)
- shrq $26,%r9
- movl $0x3ffffff,%eax
- movl $0x3ffffff,%edx
- andl %r8d,%eax
- andl %r9d,%edx
- movl %eax,-48(%rdi)
- leal (%rax,%rax,4),%eax
- movl %edx,-44(%rdi)
- leal (%rdx,%rdx,4),%edx
- movl %eax,-32(%rdi)
- shrq $26,%r8
- movl %edx,-28(%rdi)
- shrq $26,%r9
- movq %rbx,%rax
- movq %r12,%rdx
- shlq $12,%rax
- shlq $12,%rdx
- orq %r8,%rax
- orq %r9,%rdx
- andl $0x3ffffff,%eax
- andl $0x3ffffff,%edx
- movl %eax,-16(%rdi)
- leal (%rax,%rax,4),%eax
- movl %edx,-12(%rdi)
- leal (%rdx,%rdx,4),%edx
- movl %eax,0(%rdi)
- movq %rbx,%r8
- movl %edx,4(%rdi)
- movq %r12,%r9
- movl $0x3ffffff,%eax
- movl $0x3ffffff,%edx
- shrq $14,%r8
- shrq $14,%r9
- andl %r8d,%eax
- andl %r9d,%edx
- movl %eax,16(%rdi)
- leal (%rax,%rax,4),%eax
- movl %edx,20(%rdi)
- leal (%rdx,%rdx,4),%edx
- movl %eax,32(%rdi)
- shrq $26,%r8
- movl %edx,36(%rdi)
- shrq $26,%r9
- movq %rbp,%rax
- shlq $24,%rax
- orq %rax,%r8
- movl %r8d,48(%rdi)
- leaq (%r8,%r8,4),%r8
- movl %r9d,52(%rdi)
- leaq (%r9,%r9,4),%r9
- movl %r8d,64(%rdi)
- movl %r9d,68(%rdi)
- movq %r12,%rax
- call __poly1305_block
- movl $0x3ffffff,%eax
- movq %r14,%r8
- andl %r14d,%eax
- shrq $26,%r8
- movl %eax,-52(%rdi)
- movl $0x3ffffff,%edx
- andl %r8d,%edx
- movl %edx,-36(%rdi)
- leal (%rdx,%rdx,4),%edx
- shrq $26,%r8
- movl %edx,-20(%rdi)
- movq %rbx,%rax
- shlq $12,%rax
- orq %r8,%rax
- andl $0x3ffffff,%eax
- movl %eax,-4(%rdi)
- leal (%rax,%rax,4),%eax
- movq %rbx,%r8
- movl %eax,12(%rdi)
- movl $0x3ffffff,%edx
- shrq $14,%r8
- andl %r8d,%edx
- movl %edx,28(%rdi)
- leal (%rdx,%rdx,4),%edx
- shrq $26,%r8
- movl %edx,44(%rdi)
- movq %rbp,%rax
- shlq $24,%rax
- orq %rax,%r8
- movl %r8d,60(%rdi)
- leaq (%r8,%r8,4),%r8
- movl %r8d,76(%rdi)
- movq %r12,%rax
- call __poly1305_block
- movl $0x3ffffff,%eax
- movq %r14,%r8
- andl %r14d,%eax
- shrq $26,%r8
- movl %eax,-56(%rdi)
- movl $0x3ffffff,%edx
- andl %r8d,%edx
- movl %edx,-40(%rdi)
- leal (%rdx,%rdx,4),%edx
- shrq $26,%r8
- movl %edx,-24(%rdi)
- movq %rbx,%rax
- shlq $12,%rax
- orq %r8,%rax
- andl $0x3ffffff,%eax
- movl %eax,-8(%rdi)
- leal (%rax,%rax,4),%eax
- movq %rbx,%r8
- movl %eax,8(%rdi)
- movl $0x3ffffff,%edx
- shrq $14,%r8
- andl %r8d,%edx
- movl %edx,24(%rdi)
- leal (%rdx,%rdx,4),%edx
- shrq $26,%r8
- movl %edx,40(%rdi)
- movq %rbp,%rax
- shlq $24,%rax
- orq %rax,%r8
- movl %r8d,56(%rdi)
- leaq (%r8,%r8,4),%r8
- movl %r8d,72(%rdi)
- leaq -48-64(%rdi),%rdi
- .byte 0xf3,0xc3
- .p2align 5
- poly1305_blocks_avx:
- movl 20(%rdi),%r8d
- cmpq $128,%rdx
- jae L$blocks_avx
- testl %r8d,%r8d
- jz L$blocks
- L$blocks_avx:
- andq $-16,%rdx
- jz L$no_data_avx
- vzeroupper
- testl %r8d,%r8d
- jz L$base2_64_avx
- testq $31,%rdx
- jz L$even_avx
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- L$blocks_avx_body:
- movq %rdx,%r15
- movq 0(%rdi),%r8
- movq 8(%rdi),%r9
- movl 16(%rdi),%ebp
- movq 24(%rdi),%r11
- movq 32(%rdi),%r13
- movl %r8d,%r14d
- andq $-2147483648,%r8
- movq %r9,%r12
- movl %r9d,%ebx
- andq $-2147483648,%r9
- shrq $6,%r8
- shlq $52,%r12
- addq %r8,%r14
- shrq $12,%rbx
- shrq $18,%r9
- addq %r12,%r14
- adcq %r9,%rbx
- movq %rbp,%r8
- shlq $40,%r8
- shrq $24,%rbp
- addq %r8,%rbx
- adcq $0,%rbp
- movq $-4,%r9
- movq %rbp,%r8
- andq %rbp,%r9
- shrq $2,%r8
- andq $3,%rbp
- addq %r9,%r8
- addq %r8,%r14
- adcq $0,%rbx
- adcq $0,%rbp
- movq %r13,%r12
- movq %r13,%rax
- shrq $2,%r13
- addq %r12,%r13
- addq 0(%rsi),%r14
- adcq 8(%rsi),%rbx
- leaq 16(%rsi),%rsi
- adcq %rcx,%rbp
- call __poly1305_block
- testq %rcx,%rcx
- jz L$store_base2_64_avx
- movq %r14,%rax
- movq %r14,%rdx
- shrq $52,%r14
- movq %rbx,%r11
- movq %rbx,%r12
- shrq $26,%rdx
- andq $0x3ffffff,%rax
- shlq $12,%r11
- andq $0x3ffffff,%rdx
- shrq $14,%rbx
- orq %r11,%r14
- shlq $24,%rbp
- andq $0x3ffffff,%r14
- shrq $40,%r12
- andq $0x3ffffff,%rbx
- orq %r12,%rbp
- subq $16,%r15
- jz L$store_base2_26_avx
- vmovd %eax,%xmm0
- vmovd %edx,%xmm1
- vmovd %r14d,%xmm2
- vmovd %ebx,%xmm3
- vmovd %ebp,%xmm4
- jmp L$proceed_avx
- .p2align 5
- L$store_base2_64_avx:
- movq %r14,0(%rdi)
- movq %rbx,8(%rdi)
- movq %rbp,16(%rdi)
- jmp L$done_avx
- .p2align 4
- L$store_base2_26_avx:
- movl %eax,0(%rdi)
- movl %edx,4(%rdi)
- movl %r14d,8(%rdi)
- movl %ebx,12(%rdi)
- movl %ebp,16(%rdi)
- .p2align 4
- L$done_avx:
- movq 0(%rsp),%r15
- movq 8(%rsp),%r14
- movq 16(%rsp),%r13
- movq 24(%rsp),%r12
- movq 32(%rsp),%rbp
- movq 40(%rsp),%rbx
- leaq 48(%rsp),%rsp
- L$no_data_avx:
- L$blocks_avx_epilogue:
- .byte 0xf3,0xc3
- .p2align 5
- L$base2_64_avx:
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- L$base2_64_avx_body:
- movq %rdx,%r15
- movq 24(%rdi),%r11
- movq 32(%rdi),%r13
- movq 0(%rdi),%r14
- movq 8(%rdi),%rbx
- movl 16(%rdi),%ebp
- movq %r13,%r12
- movq %r13,%rax
- shrq $2,%r13
- addq %r12,%r13
- testq $31,%rdx
- jz L$init_avx
- addq 0(%rsi),%r14
- adcq 8(%rsi),%rbx
- leaq 16(%rsi),%rsi
- adcq %rcx,%rbp
- subq $16,%r15
- call __poly1305_block
- L$init_avx:
- movq %r14,%rax
- movq %r14,%rdx
- shrq $52,%r14
- movq %rbx,%r8
- movq %rbx,%r9
- shrq $26,%rdx
- andq $0x3ffffff,%rax
- shlq $12,%r8
- andq $0x3ffffff,%rdx
- shrq $14,%rbx
- orq %r8,%r14
- shlq $24,%rbp
- andq $0x3ffffff,%r14
- shrq $40,%r9
- andq $0x3ffffff,%rbx
- orq %r9,%rbp
- vmovd %eax,%xmm0
- vmovd %edx,%xmm1
- vmovd %r14d,%xmm2
- vmovd %ebx,%xmm3
- vmovd %ebp,%xmm4
- movl $1,20(%rdi)
- call __poly1305_init_avx
- L$proceed_avx:
- movq %r15,%rdx
- movq 0(%rsp),%r15
- movq 8(%rsp),%r14
- movq 16(%rsp),%r13
- movq 24(%rsp),%r12
- movq 32(%rsp),%rbp
- movq 40(%rsp),%rbx
- leaq 48(%rsp),%rax
- leaq 48(%rsp),%rsp
- L$base2_64_avx_epilogue:
- jmp L$do_avx
- .p2align 5
- L$even_avx:
- vmovd 0(%rdi),%xmm0
- vmovd 4(%rdi),%xmm1
- vmovd 8(%rdi),%xmm2
- vmovd 12(%rdi),%xmm3
- vmovd 16(%rdi),%xmm4
- L$do_avx:
- leaq -88(%rsp),%r11
- subq $0x178,%rsp
- subq $64,%rdx
- leaq -32(%rsi),%rax
- cmovcq %rax,%rsi
- vmovdqu 48(%rdi),%xmm14
- leaq 112(%rdi),%rdi
- leaq L$const(%rip),%rcx
- vmovdqu 32(%rsi),%xmm5
- vmovdqu 48(%rsi),%xmm6
- vmovdqa 64(%rcx),%xmm15
- vpsrldq $6,%xmm5,%xmm7
- vpsrldq $6,%xmm6,%xmm8
- vpunpckhqdq %xmm6,%xmm5,%xmm9
- vpunpcklqdq %xmm6,%xmm5,%xmm5
- vpunpcklqdq %xmm8,%xmm7,%xmm8
- vpsrlq $40,%xmm9,%xmm9
- vpsrlq $26,%xmm5,%xmm6
- vpand %xmm15,%xmm5,%xmm5
- vpsrlq $4,%xmm8,%xmm7
- vpand %xmm15,%xmm6,%xmm6
- vpsrlq $30,%xmm8,%xmm8
- vpand %xmm15,%xmm7,%xmm7
- vpand %xmm15,%xmm8,%xmm8
- vpor 32(%rcx),%xmm9,%xmm9
- jbe L$skip_loop_avx
- vmovdqu -48(%rdi),%xmm11
- vmovdqu -32(%rdi),%xmm12
- vpshufd $0xEE,%xmm14,%xmm13
- vpshufd $0x44,%xmm14,%xmm10
- vmovdqa %xmm13,-144(%r11)
- vmovdqa %xmm10,0(%rsp)
- vpshufd $0xEE,%xmm11,%xmm14
- vmovdqu -16(%rdi),%xmm10
- vpshufd $0x44,%xmm11,%xmm11
- vmovdqa %xmm14,-128(%r11)
- vmovdqa %xmm11,16(%rsp)
- vpshufd $0xEE,%xmm12,%xmm13
- vmovdqu 0(%rdi),%xmm11
- vpshufd $0x44,%xmm12,%xmm12
- vmovdqa %xmm13,-112(%r11)
- vmovdqa %xmm12,32(%rsp)
- vpshufd $0xEE,%xmm10,%xmm14
- vmovdqu 16(%rdi),%xmm12
- vpshufd $0x44,%xmm10,%xmm10
- vmovdqa %xmm14,-96(%r11)
- vmovdqa %xmm10,48(%rsp)
- vpshufd $0xEE,%xmm11,%xmm13
- vmovdqu 32(%rdi),%xmm10
- vpshufd $0x44,%xmm11,%xmm11
- vmovdqa %xmm13,-80(%r11)
- vmovdqa %xmm11,64(%rsp)
- vpshufd $0xEE,%xmm12,%xmm14
- vmovdqu 48(%rdi),%xmm11
- vpshufd $0x44,%xmm12,%xmm12
- vmovdqa %xmm14,-64(%r11)
- vmovdqa %xmm12,80(%rsp)
- vpshufd $0xEE,%xmm10,%xmm13
- vmovdqu 64(%rdi),%xmm12
- vpshufd $0x44,%xmm10,%xmm10
- vmovdqa %xmm13,-48(%r11)
- vmovdqa %xmm10,96(%rsp)
- vpshufd $0xEE,%xmm11,%xmm14
- vpshufd $0x44,%xmm11,%xmm11
- vmovdqa %xmm14,-32(%r11)
- vmovdqa %xmm11,112(%rsp)
- vpshufd $0xEE,%xmm12,%xmm13
- vmovdqa 0(%rsp),%xmm14
- vpshufd $0x44,%xmm12,%xmm12
- vmovdqa %xmm13,-16(%r11)
- vmovdqa %xmm12,128(%rsp)
- jmp L$oop_avx
- .p2align 5
- L$oop_avx:
- vpmuludq %xmm5,%xmm14,%xmm10
- vpmuludq %xmm6,%xmm14,%xmm11
- vmovdqa %xmm2,32(%r11)
- vpmuludq %xmm7,%xmm14,%xmm12
- vmovdqa 16(%rsp),%xmm2
- vpmuludq %xmm8,%xmm14,%xmm13
- vpmuludq %xmm9,%xmm14,%xmm14
- vmovdqa %xmm0,0(%r11)
- vpmuludq 32(%rsp),%xmm9,%xmm0
- vmovdqa %xmm1,16(%r11)
- vpmuludq %xmm8,%xmm2,%xmm1
- vpaddq %xmm0,%xmm10,%xmm10
- vpaddq %xmm1,%xmm14,%xmm14
- vmovdqa %xmm3,48(%r11)
- vpmuludq %xmm7,%xmm2,%xmm0
- vpmuludq %xmm6,%xmm2,%xmm1
- vpaddq %xmm0,%xmm13,%xmm13
- vmovdqa 48(%rsp),%xmm3
- vpaddq %xmm1,%xmm12,%xmm12
- vmovdqa %xmm4,64(%r11)
- vpmuludq %xmm5,%xmm2,%xmm2
- vpmuludq %xmm7,%xmm3,%xmm0
- vpaddq %xmm2,%xmm11,%xmm11
- vmovdqa 64(%rsp),%xmm4
- vpaddq %xmm0,%xmm14,%xmm14
- vpmuludq %xmm6,%xmm3,%xmm1
- vpmuludq %xmm5,%xmm3,%xmm3
- vpaddq %xmm1,%xmm13,%xmm13
- vmovdqa 80(%rsp),%xmm2
- vpaddq %xmm3,%xmm12,%xmm12
- vpmuludq %xmm9,%xmm4,%xmm0
- vpmuludq %xmm8,%xmm4,%xmm4
- vpaddq %xmm0,%xmm11,%xmm11
- vmovdqa 96(%rsp),%xmm3
- vpaddq %xmm4,%xmm10,%xmm10
- vmovdqa 128(%rsp),%xmm4
- vpmuludq %xmm6,%xmm2,%xmm1
- vpmuludq %xmm5,%xmm2,%xmm2
- vpaddq %xmm1,%xmm14,%xmm14
- vpaddq %xmm2,%xmm13,%xmm13
- vpmuludq %xmm9,%xmm3,%xmm0
- vpmuludq %xmm8,%xmm3,%xmm1
- vpaddq %xmm0,%xmm12,%xmm12
- vmovdqu 0(%rsi),%xmm0
- vpaddq %xmm1,%xmm11,%xmm11
- vpmuludq %xmm7,%xmm3,%xmm3
- vpmuludq %xmm7,%xmm4,%xmm7
- vpaddq %xmm3,%xmm10,%xmm10
- vmovdqu 16(%rsi),%xmm1
- vpaddq %xmm7,%xmm11,%xmm11
- vpmuludq %xmm8,%xmm4,%xmm8
- vpmuludq %xmm9,%xmm4,%xmm9
- vpsrldq $6,%xmm0,%xmm2
- vpaddq %xmm8,%xmm12,%xmm12
- vpaddq %xmm9,%xmm13,%xmm13
- vpsrldq $6,%xmm1,%xmm3
- vpmuludq 112(%rsp),%xmm5,%xmm9
- vpmuludq %xmm6,%xmm4,%xmm5
- vpunpckhqdq %xmm1,%xmm0,%xmm4
- vpaddq %xmm9,%xmm14,%xmm14
- vmovdqa -144(%r11),%xmm9
- vpaddq %xmm5,%xmm10,%xmm10
- vpunpcklqdq %xmm1,%xmm0,%xmm0
- vpunpcklqdq %xmm3,%xmm2,%xmm3
- vpsrldq $5,%xmm4,%xmm4
- vpsrlq $26,%xmm0,%xmm1
- vpand %xmm15,%xmm0,%xmm0
- vpsrlq $4,%xmm3,%xmm2
- vpand %xmm15,%xmm1,%xmm1
- vpand 0(%rcx),%xmm4,%xmm4
- vpsrlq $30,%xmm3,%xmm3
- vpand %xmm15,%xmm2,%xmm2
- vpand %xmm15,%xmm3,%xmm3
- vpor 32(%rcx),%xmm4,%xmm4
- vpaddq 0(%r11),%xmm0,%xmm0
- vpaddq 16(%r11),%xmm1,%xmm1
- vpaddq 32(%r11),%xmm2,%xmm2
- vpaddq 48(%r11),%xmm3,%xmm3
- vpaddq 64(%r11),%xmm4,%xmm4
- leaq 32(%rsi),%rax
- leaq 64(%rsi),%rsi
- subq $64,%rdx
- cmovcq %rax,%rsi
- vpmuludq %xmm0,%xmm9,%xmm5
- vpmuludq %xmm1,%xmm9,%xmm6
- vpaddq %xmm5,%xmm10,%xmm10
- vpaddq %xmm6,%xmm11,%xmm11
- vmovdqa -128(%r11),%xmm7
- vpmuludq %xmm2,%xmm9,%xmm5
- vpmuludq %xmm3,%xmm9,%xmm6
- vpaddq %xmm5,%xmm12,%xmm12
- vpaddq %xmm6,%xmm13,%xmm13
- vpmuludq %xmm4,%xmm9,%xmm9
- vpmuludq -112(%r11),%xmm4,%xmm5
- vpaddq %xmm9,%xmm14,%xmm14
- vpaddq %xmm5,%xmm10,%xmm10
- vpmuludq %xmm2,%xmm7,%xmm6
- vpmuludq %xmm3,%xmm7,%xmm5
- vpaddq %xmm6,%xmm13,%xmm13
- vmovdqa -96(%r11),%xmm8
- vpaddq %xmm5,%xmm14,%xmm14
- vpmuludq %xmm1,%xmm7,%xmm6
- vpmuludq %xmm0,%xmm7,%xmm7
- vpaddq %xmm6,%xmm12,%xmm12
- vpaddq %xmm7,%xmm11,%xmm11
- vmovdqa -80(%r11),%xmm9
- vpmuludq %xmm2,%xmm8,%xmm5
- vpmuludq %xmm1,%xmm8,%xmm6
- vpaddq %xmm5,%xmm14,%xmm14
- vpaddq %xmm6,%xmm13,%xmm13
- vmovdqa -64(%r11),%xmm7
- vpmuludq %xmm0,%xmm8,%xmm8
- vpmuludq %xmm4,%xmm9,%xmm5
- vpaddq %xmm8,%xmm12,%xmm12
- vpaddq %xmm5,%xmm11,%xmm11
- vmovdqa -48(%r11),%xmm8
- vpmuludq %xmm3,%xmm9,%xmm9
- vpmuludq %xmm1,%xmm7,%xmm6
- vpaddq %xmm9,%xmm10,%xmm10
- vmovdqa -16(%r11),%xmm9
- vpaddq %xmm6,%xmm14,%xmm14
- vpmuludq %xmm0,%xmm7,%xmm7
- vpmuludq %xmm4,%xmm8,%xmm5
- vpaddq %xmm7,%xmm13,%xmm13
- vpaddq %xmm5,%xmm12,%xmm12
- vmovdqu 32(%rsi),%xmm5
- vpmuludq %xmm3,%xmm8,%xmm7
- vpmuludq %xmm2,%xmm8,%xmm8
- vpaddq %xmm7,%xmm11,%xmm11
- vmovdqu 48(%rsi),%xmm6
- vpaddq %xmm8,%xmm10,%xmm10
- vpmuludq %xmm2,%xmm9,%xmm2
- vpmuludq %xmm3,%xmm9,%xmm3
- vpsrldq $6,%xmm5,%xmm7
- vpaddq %xmm2,%xmm11,%xmm11
- vpmuludq %xmm4,%xmm9,%xmm4
- vpsrldq $6,%xmm6,%xmm8
- vpaddq %xmm3,%xmm12,%xmm2
- vpaddq %xmm4,%xmm13,%xmm3
- vpmuludq -32(%r11),%xmm0,%xmm4
- vpmuludq %xmm1,%xmm9,%xmm0
- vpunpckhqdq %xmm6,%xmm5,%xmm9
- vpaddq %xmm4,%xmm14,%xmm4
- vpaddq %xmm0,%xmm10,%xmm0
- vpunpcklqdq %xmm6,%xmm5,%xmm5
- vpunpcklqdq %xmm8,%xmm7,%xmm8
- vpsrldq $5,%xmm9,%xmm9
- vpsrlq $26,%xmm5,%xmm6
- vmovdqa 0(%rsp),%xmm14
- vpand %xmm15,%xmm5,%xmm5
- vpsrlq $4,%xmm8,%xmm7
- vpand %xmm15,%xmm6,%xmm6
- vpand 0(%rcx),%xmm9,%xmm9
- vpsrlq $30,%xmm8,%xmm8
- vpand %xmm15,%xmm7,%xmm7
- vpand %xmm15,%xmm8,%xmm8
- vpor 32(%rcx),%xmm9,%xmm9
- vpsrlq $26,%xmm3,%xmm13
- vpand %xmm15,%xmm3,%xmm3
- vpaddq %xmm13,%xmm4,%xmm4
- vpsrlq $26,%xmm0,%xmm10
- vpand %xmm15,%xmm0,%xmm0
- vpaddq %xmm10,%xmm11,%xmm1
- vpsrlq $26,%xmm4,%xmm10
- vpand %xmm15,%xmm4,%xmm4
- vpsrlq $26,%xmm1,%xmm11
- vpand %xmm15,%xmm1,%xmm1
- vpaddq %xmm11,%xmm2,%xmm2
- vpaddq %xmm10,%xmm0,%xmm0
- vpsllq $2,%xmm10,%xmm10
- vpaddq %xmm10,%xmm0,%xmm0
- vpsrlq $26,%xmm2,%xmm12
- vpand %xmm15,%xmm2,%xmm2
- vpaddq %xmm12,%xmm3,%xmm3
- vpsrlq $26,%xmm0,%xmm10
- vpand %xmm15,%xmm0,%xmm0
- vpaddq %xmm10,%xmm1,%xmm1
- vpsrlq $26,%xmm3,%xmm13
- vpand %xmm15,%xmm3,%xmm3
- vpaddq %xmm13,%xmm4,%xmm4
- ja L$oop_avx
- L$skip_loop_avx:
- vpshufd $0x10,%xmm14,%xmm14
- addq $32,%rdx
- jnz L$ong_tail_avx
- vpaddq %xmm2,%xmm7,%xmm7
- vpaddq %xmm0,%xmm5,%xmm5
- vpaddq %xmm1,%xmm6,%xmm6
- vpaddq %xmm3,%xmm8,%xmm8
- vpaddq %xmm4,%xmm9,%xmm9
- L$ong_tail_avx:
- vmovdqa %xmm2,32(%r11)
- vmovdqa %xmm0,0(%r11)
- vmovdqa %xmm1,16(%r11)
- vmovdqa %xmm3,48(%r11)
- vmovdqa %xmm4,64(%r11)
- vpmuludq %xmm7,%xmm14,%xmm12
- vpmuludq %xmm5,%xmm14,%xmm10
- vpshufd $0x10,-48(%rdi),%xmm2
- vpmuludq %xmm6,%xmm14,%xmm11
- vpmuludq %xmm8,%xmm14,%xmm13
- vpmuludq %xmm9,%xmm14,%xmm14
- vpmuludq %xmm8,%xmm2,%xmm0
- vpaddq %xmm0,%xmm14,%xmm14
- vpshufd $0x10,-32(%rdi),%xmm3
- vpmuludq %xmm7,%xmm2,%xmm1
- vpaddq %xmm1,%xmm13,%xmm13
- vpshufd $0x10,-16(%rdi),%xmm4
- vpmuludq %xmm6,%xmm2,%xmm0
- vpaddq %xmm0,%xmm12,%xmm12
- vpmuludq %xmm5,%xmm2,%xmm2
- vpaddq %xmm2,%xmm11,%xmm11
- vpmuludq %xmm9,%xmm3,%xmm3
- vpaddq %xmm3,%xmm10,%xmm10
- vpshufd $0x10,0(%rdi),%xmm2
- vpmuludq %xmm7,%xmm4,%xmm1
- vpaddq %xmm1,%xmm14,%xmm14
- vpmuludq %xmm6,%xmm4,%xmm0
- vpaddq %xmm0,%xmm13,%xmm13
- vpshufd $0x10,16(%rdi),%xmm3
- vpmuludq %xmm5,%xmm4,%xmm4
- vpaddq %xmm4,%xmm12,%xmm12
- vpmuludq %xmm9,%xmm2,%xmm1
- vpaddq %xmm1,%xmm11,%xmm11
- vpshufd $0x10,32(%rdi),%xmm4
- vpmuludq %xmm8,%xmm2,%xmm2
- vpaddq %xmm2,%xmm10,%xmm10
- vpmuludq %xmm6,%xmm3,%xmm0
- vpaddq %xmm0,%xmm14,%xmm14
- vpmuludq %xmm5,%xmm3,%xmm3
- vpaddq %xmm3,%xmm13,%xmm13
- vpshufd $0x10,48(%rdi),%xmm2
- vpmuludq %xmm9,%xmm4,%xmm1
- vpaddq %xmm1,%xmm12,%xmm12
- vpshufd $0x10,64(%rdi),%xmm3
- vpmuludq %xmm8,%xmm4,%xmm0
- vpaddq %xmm0,%xmm11,%xmm11
- vpmuludq %xmm7,%xmm4,%xmm4
- vpaddq %xmm4,%xmm10,%xmm10
- vpmuludq %xmm5,%xmm2,%xmm2
- vpaddq %xmm2,%xmm14,%xmm14
- vpmuludq %xmm9,%xmm3,%xmm1
- vpaddq %xmm1,%xmm13,%xmm13
- vpmuludq %xmm8,%xmm3,%xmm0
- vpaddq %xmm0,%xmm12,%xmm12
- vpmuludq %xmm7,%xmm3,%xmm1
- vpaddq %xmm1,%xmm11,%xmm11
- vpmuludq %xmm6,%xmm3,%xmm3
- vpaddq %xmm3,%xmm10,%xmm10
- jz L$short_tail_avx
- vmovdqu 0(%rsi),%xmm0
- vmovdqu 16(%rsi),%xmm1
- vpsrldq $6,%xmm0,%xmm2
- vpsrldq $6,%xmm1,%xmm3
- vpunpckhqdq %xmm1,%xmm0,%xmm4
- vpunpcklqdq %xmm1,%xmm0,%xmm0
- vpunpcklqdq %xmm3,%xmm2,%xmm3
- vpsrlq $40,%xmm4,%xmm4
- vpsrlq $26,%xmm0,%xmm1
- vpand %xmm15,%xmm0,%xmm0
- vpsrlq $4,%xmm3,%xmm2
- vpand %xmm15,%xmm1,%xmm1
- vpsrlq $30,%xmm3,%xmm3
- vpand %xmm15,%xmm2,%xmm2
- vpand %xmm15,%xmm3,%xmm3
- vpor 32(%rcx),%xmm4,%xmm4
- vpshufd $0x32,-64(%rdi),%xmm9
- vpaddq 0(%r11),%xmm0,%xmm0
- vpaddq 16(%r11),%xmm1,%xmm1
- vpaddq 32(%r11),%xmm2,%xmm2
- vpaddq 48(%r11),%xmm3,%xmm3
- vpaddq 64(%r11),%xmm4,%xmm4
- vpmuludq %xmm0,%xmm9,%xmm5
- vpaddq %xmm5,%xmm10,%xmm10
- vpmuludq %xmm1,%xmm9,%xmm6
- vpaddq %xmm6,%xmm11,%xmm11
- vpmuludq %xmm2,%xmm9,%xmm5
- vpaddq %xmm5,%xmm12,%xmm12
- vpshufd $0x32,-48(%rdi),%xmm7
- vpmuludq %xmm3,%xmm9,%xmm6
- vpaddq %xmm6,%xmm13,%xmm13
- vpmuludq %xmm4,%xmm9,%xmm9
- vpaddq %xmm9,%xmm14,%xmm14
- vpmuludq %xmm3,%xmm7,%xmm5
- vpaddq %xmm5,%xmm14,%xmm14
- vpshufd $0x32,-32(%rdi),%xmm8
- vpmuludq %xmm2,%xmm7,%xmm6
- vpaddq %xmm6,%xmm13,%xmm13
- vpshufd $0x32,-16(%rdi),%xmm9
- vpmuludq %xmm1,%xmm7,%xmm5
- vpaddq %xmm5,%xmm12,%xmm12
- vpmuludq %xmm0,%xmm7,%xmm7
- vpaddq %xmm7,%xmm11,%xmm11
- vpmuludq %xmm4,%xmm8,%xmm8
- vpaddq %xmm8,%xmm10,%xmm10
- vpshufd $0x32,0(%rdi),%xmm7
- vpmuludq %xmm2,%xmm9,%xmm6
- vpaddq %xmm6,%xmm14,%xmm14
- vpmuludq %xmm1,%xmm9,%xmm5
- vpaddq %xmm5,%xmm13,%xmm13
- vpshufd $0x32,16(%rdi),%xmm8
- vpmuludq %xmm0,%xmm9,%xmm9
- vpaddq %xmm9,%xmm12,%xmm12
- vpmuludq %xmm4,%xmm7,%xmm6
- vpaddq %xmm6,%xmm11,%xmm11
- vpshufd $0x32,32(%rdi),%xmm9
- vpmuludq %xmm3,%xmm7,%xmm7
- vpaddq %xmm7,%xmm10,%xmm10
- vpmuludq %xmm1,%xmm8,%xmm5
- vpaddq %xmm5,%xmm14,%xmm14
- vpmuludq %xmm0,%xmm8,%xmm8
- vpaddq %xmm8,%xmm13,%xmm13
- vpshufd $0x32,48(%rdi),%xmm7
- vpmuludq %xmm4,%xmm9,%xmm6
- vpaddq %xmm6,%xmm12,%xmm12
- vpshufd $0x32,64(%rdi),%xmm8
- vpmuludq %xmm3,%xmm9,%xmm5
- vpaddq %xmm5,%xmm11,%xmm11
- vpmuludq %xmm2,%xmm9,%xmm9
- vpaddq %xmm9,%xmm10,%xmm10
- vpmuludq %xmm0,%xmm7,%xmm7
- vpaddq %xmm7,%xmm14,%xmm14
- vpmuludq %xmm4,%xmm8,%xmm6
- vpaddq %xmm6,%xmm13,%xmm13
- vpmuludq %xmm3,%xmm8,%xmm5
- vpaddq %xmm5,%xmm12,%xmm12
- vpmuludq %xmm2,%xmm8,%xmm6
- vpaddq %xmm6,%xmm11,%xmm11
- vpmuludq %xmm1,%xmm8,%xmm8
- vpaddq %xmm8,%xmm10,%xmm10
- L$short_tail_avx:
- vpsrldq $8,%xmm14,%xmm9
- vpsrldq $8,%xmm13,%xmm8
- vpsrldq $8,%xmm11,%xmm6
- vpsrldq $8,%xmm10,%xmm5
- vpsrldq $8,%xmm12,%xmm7
- vpaddq %xmm8,%xmm13,%xmm13
- vpaddq %xmm9,%xmm14,%xmm14
- vpaddq %xmm5,%xmm10,%xmm10
- vpaddq %xmm6,%xmm11,%xmm11
- vpaddq %xmm7,%xmm12,%xmm12
- vpsrlq $26,%xmm13,%xmm3
- vpand %xmm15,%xmm13,%xmm13
- vpaddq %xmm3,%xmm14,%xmm14
- vpsrlq $26,%xmm10,%xmm0
- vpand %xmm15,%xmm10,%xmm10
- vpaddq %xmm0,%xmm11,%xmm11
- vpsrlq $26,%xmm14,%xmm4
- vpand %xmm15,%xmm14,%xmm14
- vpsrlq $26,%xmm11,%xmm1
- vpand %xmm15,%xmm11,%xmm11
- vpaddq %xmm1,%xmm12,%xmm12
- vpaddq %xmm4,%xmm10,%xmm10
- vpsllq $2,%xmm4,%xmm4
- vpaddq %xmm4,%xmm10,%xmm10
- vpsrlq $26,%xmm12,%xmm2
- vpand %xmm15,%xmm12,%xmm12
- vpaddq %xmm2,%xmm13,%xmm13
- vpsrlq $26,%xmm10,%xmm0
- vpand %xmm15,%xmm10,%xmm10
- vpaddq %xmm0,%xmm11,%xmm11
- vpsrlq $26,%xmm13,%xmm3
- vpand %xmm15,%xmm13,%xmm13
- vpaddq %xmm3,%xmm14,%xmm14
- vmovd %xmm10,-112(%rdi)
- vmovd %xmm11,-108(%rdi)
- vmovd %xmm12,-104(%rdi)
- vmovd %xmm13,-100(%rdi)
- vmovd %xmm14,-96(%rdi)
- leaq 88(%r11),%rsp
- vzeroupper
- .byte 0xf3,0xc3
- .p2align 5
- poly1305_emit_avx:
- cmpl $0,20(%rdi)
- je L$emit
- movl 0(%rdi),%eax
- movl 4(%rdi),%ecx
- movl 8(%rdi),%r8d
- movl 12(%rdi),%r11d
- movl 16(%rdi),%r10d
- shlq $26,%rcx
- movq %r8,%r9
- shlq $52,%r8
- addq %rcx,%rax
- shrq $12,%r9
- addq %rax,%r8
- adcq $0,%r9
- shlq $14,%r11
- movq %r10,%rax
- shrq $24,%r10
- addq %r11,%r9
- shlq $40,%rax
- addq %rax,%r9
- adcq $0,%r10
- movq %r10,%rax
- movq %r10,%rcx
- andq $3,%r10
- shrq $2,%rax
- andq $-4,%rcx
- addq %rcx,%rax
- addq %rax,%r8
- adcq $0,%r9
- adcq $0,%r10
- movq %r8,%rax
- addq $5,%r8
- movq %r9,%rcx
- adcq $0,%r9
- adcq $0,%r10
- shrq $2,%r10
- cmovnzq %r8,%rax
- cmovnzq %r9,%rcx
- addq 0(%rdx),%rax
- adcq 8(%rdx),%rcx
- movq %rax,0(%rsi)
- movq %rcx,8(%rsi)
- .byte 0xf3,0xc3
- .p2align 5
- poly1305_blocks_avx2:
- movl 20(%rdi),%r8d
- cmpq $128,%rdx
- jae L$blocks_avx2
- testl %r8d,%r8d
- jz L$blocks
- L$blocks_avx2:
- andq $-16,%rdx
- jz L$no_data_avx2
- vzeroupper
- testl %r8d,%r8d
- jz L$base2_64_avx2
- testq $63,%rdx
- jz L$even_avx2
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- L$blocks_avx2_body:
- movq %rdx,%r15
- movq 0(%rdi),%r8
- movq 8(%rdi),%r9
- movl 16(%rdi),%ebp
- movq 24(%rdi),%r11
- movq 32(%rdi),%r13
- movl %r8d,%r14d
- andq $-2147483648,%r8
- movq %r9,%r12
- movl %r9d,%ebx
- andq $-2147483648,%r9
- shrq $6,%r8
- shlq $52,%r12
- addq %r8,%r14
- shrq $12,%rbx
- shrq $18,%r9
- addq %r12,%r14
- adcq %r9,%rbx
- movq %rbp,%r8
- shlq $40,%r8
- shrq $24,%rbp
- addq %r8,%rbx
- adcq $0,%rbp
- movq $-4,%r9
- movq %rbp,%r8
- andq %rbp,%r9
- shrq $2,%r8
- andq $3,%rbp
- addq %r9,%r8
- addq %r8,%r14
- adcq $0,%rbx
- adcq $0,%rbp
- movq %r13,%r12
- movq %r13,%rax
- shrq $2,%r13
- addq %r12,%r13
- L$base2_26_pre_avx2:
- addq 0(%rsi),%r14
- adcq 8(%rsi),%rbx
- leaq 16(%rsi),%rsi
- adcq %rcx,%rbp
- subq $16,%r15
- call __poly1305_block
- movq %r12,%rax
- testq $63,%r15
- jnz L$base2_26_pre_avx2
- testq %rcx,%rcx
- jz L$store_base2_64_avx2
- movq %r14,%rax
- movq %r14,%rdx
- shrq $52,%r14
- movq %rbx,%r11
- movq %rbx,%r12
- shrq $26,%rdx
- andq $0x3ffffff,%rax
- shlq $12,%r11
- andq $0x3ffffff,%rdx
- shrq $14,%rbx
- orq %r11,%r14
- shlq $24,%rbp
- andq $0x3ffffff,%r14
- shrq $40,%r12
- andq $0x3ffffff,%rbx
- orq %r12,%rbp
- testq %r15,%r15
- jz L$store_base2_26_avx2
- vmovd %eax,%xmm0
- vmovd %edx,%xmm1
- vmovd %r14d,%xmm2
- vmovd %ebx,%xmm3
- vmovd %ebp,%xmm4
- jmp L$proceed_avx2
- .p2align 5
- L$store_base2_64_avx2:
- movq %r14,0(%rdi)
- movq %rbx,8(%rdi)
- movq %rbp,16(%rdi)
- jmp L$done_avx2
- .p2align 4
- L$store_base2_26_avx2:
- movl %eax,0(%rdi)
- movl %edx,4(%rdi)
- movl %r14d,8(%rdi)
- movl %ebx,12(%rdi)
- movl %ebp,16(%rdi)
- .p2align 4
- L$done_avx2:
- movq 0(%rsp),%r15
- movq 8(%rsp),%r14
- movq 16(%rsp),%r13
- movq 24(%rsp),%r12
- movq 32(%rsp),%rbp
- movq 40(%rsp),%rbx
- leaq 48(%rsp),%rsp
- L$no_data_avx2:
- L$blocks_avx2_epilogue:
- .byte 0xf3,0xc3
- .p2align 5
- L$base2_64_avx2:
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- L$base2_64_avx2_body:
- movq %rdx,%r15
- movq 24(%rdi),%r11
- movq 32(%rdi),%r13
- movq 0(%rdi),%r14
- movq 8(%rdi),%rbx
- movl 16(%rdi),%ebp
- movq %r13,%r12
- movq %r13,%rax
- shrq $2,%r13
- addq %r12,%r13
- testq $63,%rdx
- jz L$init_avx2
- L$base2_64_pre_avx2:
- addq 0(%rsi),%r14
- adcq 8(%rsi),%rbx
- leaq 16(%rsi),%rsi
- adcq %rcx,%rbp
- subq $16,%r15
- call __poly1305_block
- movq %r12,%rax
- testq $63,%r15
- jnz L$base2_64_pre_avx2
- L$init_avx2:
- movq %r14,%rax
- movq %r14,%rdx
- shrq $52,%r14
- movq %rbx,%r8
- movq %rbx,%r9
- shrq $26,%rdx
- andq $0x3ffffff,%rax
- shlq $12,%r8
- andq $0x3ffffff,%rdx
- shrq $14,%rbx
- orq %r8,%r14
- shlq $24,%rbp
- andq $0x3ffffff,%r14
- shrq $40,%r9
- andq $0x3ffffff,%rbx
- orq %r9,%rbp
- vmovd %eax,%xmm0
- vmovd %edx,%xmm1
- vmovd %r14d,%xmm2
- vmovd %ebx,%xmm3
- vmovd %ebp,%xmm4
- movl $1,20(%rdi)
- call __poly1305_init_avx
- L$proceed_avx2:
- movq %r15,%rdx
- movl _OPENSSL_ia32cap_P+8(%rip),%r10d
- movl $3221291008,%r11d
- movq 0(%rsp),%r15
- movq 8(%rsp),%r14
- movq 16(%rsp),%r13
- movq 24(%rsp),%r12
- movq 32(%rsp),%rbp
- movq 40(%rsp),%rbx
- leaq 48(%rsp),%rax
- leaq 48(%rsp),%rsp
- L$base2_64_avx2_epilogue:
- jmp L$do_avx2
- .p2align 5
- L$even_avx2:
- movl _OPENSSL_ia32cap_P+8(%rip),%r10d
- vmovd 0(%rdi),%xmm0
- vmovd 4(%rdi),%xmm1
- vmovd 8(%rdi),%xmm2
- vmovd 12(%rdi),%xmm3
- vmovd 16(%rdi),%xmm4
- L$do_avx2:
- cmpq $512,%rdx
- jb L$skip_avx512
- andl %r11d,%r10d
- testl $65536,%r10d
- jnz L$blocks_avx512
- L$skip_avx512:
- leaq -8(%rsp),%r11
- subq $0x128,%rsp
- leaq L$const(%rip),%rcx
- leaq 48+64(%rdi),%rdi
- vmovdqa 96(%rcx),%ymm7
- vmovdqu -64(%rdi),%xmm9
- andq $-512,%rsp
- vmovdqu -48(%rdi),%xmm10
- vmovdqu -32(%rdi),%xmm6
- vmovdqu -16(%rdi),%xmm11
- vmovdqu 0(%rdi),%xmm12
- vmovdqu 16(%rdi),%xmm13
- leaq 144(%rsp),%rax
- vmovdqu 32(%rdi),%xmm14
- vpermd %ymm9,%ymm7,%ymm9
- vmovdqu 48(%rdi),%xmm15
- vpermd %ymm10,%ymm7,%ymm10
- vmovdqu 64(%rdi),%xmm5
- vpermd %ymm6,%ymm7,%ymm6
- vmovdqa %ymm9,0(%rsp)
- vpermd %ymm11,%ymm7,%ymm11
- vmovdqa %ymm10,32-144(%rax)
- vpermd %ymm12,%ymm7,%ymm12
- vmovdqa %ymm6,64-144(%rax)
- vpermd %ymm13,%ymm7,%ymm13
- vmovdqa %ymm11,96-144(%rax)
- vpermd %ymm14,%ymm7,%ymm14
- vmovdqa %ymm12,128-144(%rax)
- vpermd %ymm15,%ymm7,%ymm15
- vmovdqa %ymm13,160-144(%rax)
- vpermd %ymm5,%ymm7,%ymm5
- vmovdqa %ymm14,192-144(%rax)
- vmovdqa %ymm15,224-144(%rax)
- vmovdqa %ymm5,256-144(%rax)
- vmovdqa 64(%rcx),%ymm5
- vmovdqu 0(%rsi),%xmm7
- vmovdqu 16(%rsi),%xmm8
- vinserti128 $1,32(%rsi),%ymm7,%ymm7
- vinserti128 $1,48(%rsi),%ymm8,%ymm8
- leaq 64(%rsi),%rsi
- vpsrldq $6,%ymm7,%ymm9
- vpsrldq $6,%ymm8,%ymm10
- vpunpckhqdq %ymm8,%ymm7,%ymm6
- vpunpcklqdq %ymm10,%ymm9,%ymm9
- vpunpcklqdq %ymm8,%ymm7,%ymm7
- vpsrlq $30,%ymm9,%ymm10
- vpsrlq $4,%ymm9,%ymm9
- vpsrlq $26,%ymm7,%ymm8
- vpsrlq $40,%ymm6,%ymm6
- vpand %ymm5,%ymm9,%ymm9
- vpand %ymm5,%ymm7,%ymm7
- vpand %ymm5,%ymm8,%ymm8
- vpand %ymm5,%ymm10,%ymm10
- vpor 32(%rcx),%ymm6,%ymm6
- vpaddq %ymm2,%ymm9,%ymm2
- subq $64,%rdx
- jz L$tail_avx2
- jmp L$oop_avx2
- .p2align 5
- L$oop_avx2:
- vpaddq %ymm0,%ymm7,%ymm0
- vmovdqa 0(%rsp),%ymm7
- vpaddq %ymm1,%ymm8,%ymm1
- vmovdqa 32(%rsp),%ymm8
- vpaddq %ymm3,%ymm10,%ymm3
- vmovdqa 96(%rsp),%ymm9
- vpaddq %ymm4,%ymm6,%ymm4
- vmovdqa 48(%rax),%ymm10
- vmovdqa 112(%rax),%ymm5
- vpmuludq %ymm2,%ymm7,%ymm13
- vpmuludq %ymm2,%ymm8,%ymm14
- vpmuludq %ymm2,%ymm9,%ymm15
- vpmuludq %ymm2,%ymm10,%ymm11
- vpmuludq %ymm2,%ymm5,%ymm12
- vpmuludq %ymm0,%ymm8,%ymm6
- vpmuludq %ymm1,%ymm8,%ymm2
- vpaddq %ymm6,%ymm12,%ymm12
- vpaddq %ymm2,%ymm13,%ymm13
- vpmuludq %ymm3,%ymm8,%ymm6
- vpmuludq 64(%rsp),%ymm4,%ymm2
- vpaddq %ymm6,%ymm15,%ymm15
- vpaddq %ymm2,%ymm11,%ymm11
- vmovdqa -16(%rax),%ymm8
- vpmuludq %ymm0,%ymm7,%ymm6
- vpmuludq %ymm1,%ymm7,%ymm2
- vpaddq %ymm6,%ymm11,%ymm11
- vpaddq %ymm2,%ymm12,%ymm12
- vpmuludq %ymm3,%ymm7,%ymm6
- vpmuludq %ymm4,%ymm7,%ymm2
- vmovdqu 0(%rsi),%xmm7
- vpaddq %ymm6,%ymm14,%ymm14
- vpaddq %ymm2,%ymm15,%ymm15
- vinserti128 $1,32(%rsi),%ymm7,%ymm7
- vpmuludq %ymm3,%ymm8,%ymm6
- vpmuludq %ymm4,%ymm8,%ymm2
- vmovdqu 16(%rsi),%xmm8
- vpaddq %ymm6,%ymm11,%ymm11
- vpaddq %ymm2,%ymm12,%ymm12
- vmovdqa 16(%rax),%ymm2
- vpmuludq %ymm1,%ymm9,%ymm6
- vpmuludq %ymm0,%ymm9,%ymm9
- vpaddq %ymm6,%ymm14,%ymm14
- vpaddq %ymm9,%ymm13,%ymm13
- vinserti128 $1,48(%rsi),%ymm8,%ymm8
- leaq 64(%rsi),%rsi
- vpmuludq %ymm1,%ymm2,%ymm6
- vpmuludq %ymm0,%ymm2,%ymm2
- vpsrldq $6,%ymm7,%ymm9
- vpaddq %ymm6,%ymm15,%ymm15
- vpaddq %ymm2,%ymm14,%ymm14
- vpmuludq %ymm3,%ymm10,%ymm6
- vpmuludq %ymm4,%ymm10,%ymm2
- vpsrldq $6,%ymm8,%ymm10
- vpaddq %ymm6,%ymm12,%ymm12
- vpaddq %ymm2,%ymm13,%ymm13
- vpunpckhqdq %ymm8,%ymm7,%ymm6
- vpmuludq %ymm3,%ymm5,%ymm3
- vpmuludq %ymm4,%ymm5,%ymm4
- vpunpcklqdq %ymm8,%ymm7,%ymm7
- vpaddq %ymm3,%ymm13,%ymm2
- vpaddq %ymm4,%ymm14,%ymm3
- vpunpcklqdq %ymm10,%ymm9,%ymm10
- vpmuludq 80(%rax),%ymm0,%ymm4
- vpmuludq %ymm1,%ymm5,%ymm0
- vmovdqa 64(%rcx),%ymm5
- vpaddq %ymm4,%ymm15,%ymm4
- vpaddq %ymm0,%ymm11,%ymm0
- vpsrlq $26,%ymm3,%ymm14
- vpand %ymm5,%ymm3,%ymm3
- vpaddq %ymm14,%ymm4,%ymm4
- vpsrlq $26,%ymm0,%ymm11
- vpand %ymm5,%ymm0,%ymm0
- vpaddq %ymm11,%ymm12,%ymm1
- vpsrlq $26,%ymm4,%ymm15
- vpand %ymm5,%ymm4,%ymm4
- vpsrlq $4,%ymm10,%ymm9
- vpsrlq $26,%ymm1,%ymm12
- vpand %ymm5,%ymm1,%ymm1
- vpaddq %ymm12,%ymm2,%ymm2
- vpaddq %ymm15,%ymm0,%ymm0
- vpsllq $2,%ymm15,%ymm15
- vpaddq %ymm15,%ymm0,%ymm0
- vpand %ymm5,%ymm9,%ymm9
- vpsrlq $26,%ymm7,%ymm8
- vpsrlq $26,%ymm2,%ymm13
- vpand %ymm5,%ymm2,%ymm2
- vpaddq %ymm13,%ymm3,%ymm3
- vpaddq %ymm9,%ymm2,%ymm2
- vpsrlq $30,%ymm10,%ymm10
- vpsrlq $26,%ymm0,%ymm11
- vpand %ymm5,%ymm0,%ymm0
- vpaddq %ymm11,%ymm1,%ymm1
- vpsrlq $40,%ymm6,%ymm6
- vpsrlq $26,%ymm3,%ymm14
- vpand %ymm5,%ymm3,%ymm3
- vpaddq %ymm14,%ymm4,%ymm4
- vpand %ymm5,%ymm7,%ymm7
- vpand %ymm5,%ymm8,%ymm8
- vpand %ymm5,%ymm10,%ymm10
- vpor 32(%rcx),%ymm6,%ymm6
- subq $64,%rdx
- jnz L$oop_avx2
- .byte 0x66,0x90
- L$tail_avx2:
- vpaddq %ymm0,%ymm7,%ymm0
- vmovdqu 4(%rsp),%ymm7
- vpaddq %ymm1,%ymm8,%ymm1
- vmovdqu 36(%rsp),%ymm8
- vpaddq %ymm3,%ymm10,%ymm3
- vmovdqu 100(%rsp),%ymm9
- vpaddq %ymm4,%ymm6,%ymm4
- vmovdqu 52(%rax),%ymm10
- vmovdqu 116(%rax),%ymm5
- vpmuludq %ymm2,%ymm7,%ymm13
- vpmuludq %ymm2,%ymm8,%ymm14
- vpmuludq %ymm2,%ymm9,%ymm15
- vpmuludq %ymm2,%ymm10,%ymm11
- vpmuludq %ymm2,%ymm5,%ymm12
- vpmuludq %ymm0,%ymm8,%ymm6
- vpmuludq %ymm1,%ymm8,%ymm2
- vpaddq %ymm6,%ymm12,%ymm12
- vpaddq %ymm2,%ymm13,%ymm13
- vpmuludq %ymm3,%ymm8,%ymm6
- vpmuludq 68(%rsp),%ymm4,%ymm2
- vpaddq %ymm6,%ymm15,%ymm15
- vpaddq %ymm2,%ymm11,%ymm11
- vpmuludq %ymm0,%ymm7,%ymm6
- vpmuludq %ymm1,%ymm7,%ymm2
- vpaddq %ymm6,%ymm11,%ymm11
- vmovdqu -12(%rax),%ymm8
- vpaddq %ymm2,%ymm12,%ymm12
- vpmuludq %ymm3,%ymm7,%ymm6
- vpmuludq %ymm4,%ymm7,%ymm2
- vpaddq %ymm6,%ymm14,%ymm14
- vpaddq %ymm2,%ymm15,%ymm15
- vpmuludq %ymm3,%ymm8,%ymm6
- vpmuludq %ymm4,%ymm8,%ymm2
- vpaddq %ymm6,%ymm11,%ymm11
- vpaddq %ymm2,%ymm12,%ymm12
- vmovdqu 20(%rax),%ymm2
- vpmuludq %ymm1,%ymm9,%ymm6
- vpmuludq %ymm0,%ymm9,%ymm9
- vpaddq %ymm6,%ymm14,%ymm14
- vpaddq %ymm9,%ymm13,%ymm13
- vpmuludq %ymm1,%ymm2,%ymm6
- vpmuludq %ymm0,%ymm2,%ymm2
- vpaddq %ymm6,%ymm15,%ymm15
- vpaddq %ymm2,%ymm14,%ymm14
- vpmuludq %ymm3,%ymm10,%ymm6
- vpmuludq %ymm4,%ymm10,%ymm2
- vpaddq %ymm6,%ymm12,%ymm12
- vpaddq %ymm2,%ymm13,%ymm13
- vpmuludq %ymm3,%ymm5,%ymm3
- vpmuludq %ymm4,%ymm5,%ymm4
- vpaddq %ymm3,%ymm13,%ymm2
- vpaddq %ymm4,%ymm14,%ymm3
- vpmuludq 84(%rax),%ymm0,%ymm4
- vpmuludq %ymm1,%ymm5,%ymm0
- vmovdqa 64(%rcx),%ymm5
- vpaddq %ymm4,%ymm15,%ymm4
- vpaddq %ymm0,%ymm11,%ymm0
- vpsrldq $8,%ymm12,%ymm8
- vpsrldq $8,%ymm2,%ymm9
- vpsrldq $8,%ymm3,%ymm10
- vpsrldq $8,%ymm4,%ymm6
- vpsrldq $8,%ymm0,%ymm7
- vpaddq %ymm8,%ymm12,%ymm12
- vpaddq %ymm9,%ymm2,%ymm2
- vpaddq %ymm10,%ymm3,%ymm3
- vpaddq %ymm6,%ymm4,%ymm4
- vpaddq %ymm7,%ymm0,%ymm0
- vpermq $0x2,%ymm3,%ymm10
- vpermq $0x2,%ymm4,%ymm6
- vpermq $0x2,%ymm0,%ymm7
- vpermq $0x2,%ymm12,%ymm8
- vpermq $0x2,%ymm2,%ymm9
- vpaddq %ymm10,%ymm3,%ymm3
- vpaddq %ymm6,%ymm4,%ymm4
- vpaddq %ymm7,%ymm0,%ymm0
- vpaddq %ymm8,%ymm12,%ymm12
- vpaddq %ymm9,%ymm2,%ymm2
- vpsrlq $26,%ymm3,%ymm14
- vpand %ymm5,%ymm3,%ymm3
- vpaddq %ymm14,%ymm4,%ymm4
- vpsrlq $26,%ymm0,%ymm11
- vpand %ymm5,%ymm0,%ymm0
- vpaddq %ymm11,%ymm12,%ymm1
- vpsrlq $26,%ymm4,%ymm15
- vpand %ymm5,%ymm4,%ymm4
- vpsrlq $26,%ymm1,%ymm12
- vpand %ymm5,%ymm1,%ymm1
- vpaddq %ymm12,%ymm2,%ymm2
- vpaddq %ymm15,%ymm0,%ymm0
- vpsllq $2,%ymm15,%ymm15
- vpaddq %ymm15,%ymm0,%ymm0
- vpsrlq $26,%ymm2,%ymm13
- vpand %ymm5,%ymm2,%ymm2
- vpaddq %ymm13,%ymm3,%ymm3
- vpsrlq $26,%ymm0,%ymm11
- vpand %ymm5,%ymm0,%ymm0
- vpaddq %ymm11,%ymm1,%ymm1
- vpsrlq $26,%ymm3,%ymm14
- vpand %ymm5,%ymm3,%ymm3
- vpaddq %ymm14,%ymm4,%ymm4
- vmovd %xmm0,-112(%rdi)
- vmovd %xmm1,-108(%rdi)
- vmovd %xmm2,-104(%rdi)
- vmovd %xmm3,-100(%rdi)
- vmovd %xmm4,-96(%rdi)
- leaq 8(%r11),%rsp
- vzeroupper
- .byte 0xf3,0xc3
- .p2align 5
- poly1305_blocks_avx512:
- L$blocks_avx512:
- movl $15,%eax
- kmovw %eax,%k2
- leaq -8(%rsp),%r11
- subq $0x128,%rsp
- leaq L$const(%rip),%rcx
- leaq 48+64(%rdi),%rdi
- vmovdqa 96(%rcx),%ymm9
- vmovdqu -64(%rdi),%xmm11
- andq $-512,%rsp
- vmovdqu -48(%rdi),%xmm12
- movq $0x20,%rax
- vmovdqu -32(%rdi),%xmm7
- vmovdqu -16(%rdi),%xmm13
- vmovdqu 0(%rdi),%xmm8
- vmovdqu 16(%rdi),%xmm14
- vmovdqu 32(%rdi),%xmm10
- vmovdqu 48(%rdi),%xmm15
- vmovdqu 64(%rdi),%xmm6
- vpermd %zmm11,%zmm9,%zmm16
- vpbroadcastq 64(%rcx),%zmm5
- vpermd %zmm12,%zmm9,%zmm17
- vpermd %zmm7,%zmm9,%zmm21
- vpermd %zmm13,%zmm9,%zmm18
- vmovdqa64 %zmm16,0(%rsp){%k2}
- vpsrlq $32,%zmm16,%zmm7
- vpermd %zmm8,%zmm9,%zmm22
- vmovdqu64 %zmm17,0(%rsp,%rax,1){%k2}
- vpsrlq $32,%zmm17,%zmm8
- vpermd %zmm14,%zmm9,%zmm19
- vmovdqa64 %zmm21,64(%rsp){%k2}
- vpermd %zmm10,%zmm9,%zmm23
- vpermd %zmm15,%zmm9,%zmm20
- vmovdqu64 %zmm18,64(%rsp,%rax,1){%k2}
- vpermd %zmm6,%zmm9,%zmm24
- vmovdqa64 %zmm22,128(%rsp){%k2}
- vmovdqu64 %zmm19,128(%rsp,%rax,1){%k2}
- vmovdqa64 %zmm23,192(%rsp){%k2}
- vmovdqu64 %zmm20,192(%rsp,%rax,1){%k2}
- vmovdqa64 %zmm24,256(%rsp){%k2}
- vpmuludq %zmm7,%zmm16,%zmm11
- vpmuludq %zmm7,%zmm17,%zmm12
- vpmuludq %zmm7,%zmm18,%zmm13
- vpmuludq %zmm7,%zmm19,%zmm14
- vpmuludq %zmm7,%zmm20,%zmm15
- vpsrlq $32,%zmm18,%zmm9
- vpmuludq %zmm8,%zmm24,%zmm25
- vpmuludq %zmm8,%zmm16,%zmm26
- vpmuludq %zmm8,%zmm17,%zmm27
- vpmuludq %zmm8,%zmm18,%zmm28
- vpmuludq %zmm8,%zmm19,%zmm29
- vpsrlq $32,%zmm19,%zmm10
- vpaddq %zmm25,%zmm11,%zmm11
- vpaddq %zmm26,%zmm12,%zmm12
- vpaddq %zmm27,%zmm13,%zmm13
- vpaddq %zmm28,%zmm14,%zmm14
- vpaddq %zmm29,%zmm15,%zmm15
- vpmuludq %zmm9,%zmm23,%zmm25
- vpmuludq %zmm9,%zmm24,%zmm26
- vpmuludq %zmm9,%zmm17,%zmm28
- vpmuludq %zmm9,%zmm18,%zmm29
- vpmuludq %zmm9,%zmm16,%zmm27
- vpsrlq $32,%zmm20,%zmm6
- vpaddq %zmm25,%zmm11,%zmm11
- vpaddq %zmm26,%zmm12,%zmm12
- vpaddq %zmm28,%zmm14,%zmm14
- vpaddq %zmm29,%zmm15,%zmm15
- vpaddq %zmm27,%zmm13,%zmm13
- vpmuludq %zmm10,%zmm22,%zmm25
- vpmuludq %zmm10,%zmm16,%zmm28
- vpmuludq %zmm10,%zmm17,%zmm29
- vpmuludq %zmm10,%zmm23,%zmm26
- vpmuludq %zmm10,%zmm24,%zmm27
- vpaddq %zmm25,%zmm11,%zmm11
- vpaddq %zmm28,%zmm14,%zmm14
- vpaddq %zmm29,%zmm15,%zmm15
- vpaddq %zmm26,%zmm12,%zmm12
- vpaddq %zmm27,%zmm13,%zmm13
- vpmuludq %zmm6,%zmm24,%zmm28
- vpmuludq %zmm6,%zmm16,%zmm29
- vpmuludq %zmm6,%zmm21,%zmm25
- vpmuludq %zmm6,%zmm22,%zmm26
- vpmuludq %zmm6,%zmm23,%zmm27
- vpaddq %zmm28,%zmm14,%zmm14
- vpaddq %zmm29,%zmm15,%zmm15
- vpaddq %zmm25,%zmm11,%zmm11
- vpaddq %zmm26,%zmm12,%zmm12
- vpaddq %zmm27,%zmm13,%zmm13
- vmovdqu64 0(%rsi),%zmm10
- vmovdqu64 64(%rsi),%zmm6
- leaq 128(%rsi),%rsi
- vpsrlq $26,%zmm14,%zmm28
- vpandq %zmm5,%zmm14,%zmm14
- vpaddq %zmm28,%zmm15,%zmm15
- vpsrlq $26,%zmm11,%zmm25
- vpandq %zmm5,%zmm11,%zmm11
- vpaddq %zmm25,%zmm12,%zmm12
- vpsrlq $26,%zmm15,%zmm29
- vpandq %zmm5,%zmm15,%zmm15
- vpsrlq $26,%zmm12,%zmm26
- vpandq %zmm5,%zmm12,%zmm12
- vpaddq %zmm26,%zmm13,%zmm13
- vpaddq %zmm29,%zmm11,%zmm11
- vpsllq $2,%zmm29,%zmm29
- vpaddq %zmm29,%zmm11,%zmm11
- vpsrlq $26,%zmm13,%zmm27
- vpandq %zmm5,%zmm13,%zmm13
- vpaddq %zmm27,%zmm14,%zmm14
- vpsrlq $26,%zmm11,%zmm25
- vpandq %zmm5,%zmm11,%zmm11
- vpaddq %zmm25,%zmm12,%zmm12
- vpsrlq $26,%zmm14,%zmm28
- vpandq %zmm5,%zmm14,%zmm14
- vpaddq %zmm28,%zmm15,%zmm15
- vpunpcklqdq %zmm6,%zmm10,%zmm7
- vpunpckhqdq %zmm6,%zmm10,%zmm6
- vmovdqa32 128(%rcx),%zmm25
- movl $0x7777,%eax
- kmovw %eax,%k1
- vpermd %zmm16,%zmm25,%zmm16
- vpermd %zmm17,%zmm25,%zmm17
- vpermd %zmm18,%zmm25,%zmm18
- vpermd %zmm19,%zmm25,%zmm19
- vpermd %zmm20,%zmm25,%zmm20
- vpermd %zmm11,%zmm25,%zmm16{%k1}
- vpermd %zmm12,%zmm25,%zmm17{%k1}
- vpermd %zmm13,%zmm25,%zmm18{%k1}
- vpermd %zmm14,%zmm25,%zmm19{%k1}
- vpermd %zmm15,%zmm25,%zmm20{%k1}
- vpslld $2,%zmm17,%zmm21
- vpslld $2,%zmm18,%zmm22
- vpslld $2,%zmm19,%zmm23
- vpslld $2,%zmm20,%zmm24
- vpaddd %zmm17,%zmm21,%zmm21
- vpaddd %zmm18,%zmm22,%zmm22
- vpaddd %zmm19,%zmm23,%zmm23
- vpaddd %zmm20,%zmm24,%zmm24
- vpbroadcastq 32(%rcx),%zmm30
- vpsrlq $52,%zmm7,%zmm9
- vpsllq $12,%zmm6,%zmm10
- vporq %zmm10,%zmm9,%zmm9
- vpsrlq $26,%zmm7,%zmm8
- vpsrlq $14,%zmm6,%zmm10
- vpsrlq $40,%zmm6,%zmm6
- vpandq %zmm5,%zmm9,%zmm9
- vpandq %zmm5,%zmm7,%zmm7
- vpaddq %zmm2,%zmm9,%zmm2
- subq $192,%rdx
- jbe L$tail_avx512
- jmp L$oop_avx512
- .p2align 5
- L$oop_avx512:
- vpmuludq %zmm2,%zmm17,%zmm14
- vpaddq %zmm0,%zmm7,%zmm0
- vpmuludq %zmm2,%zmm18,%zmm15
- vpandq %zmm5,%zmm8,%zmm8
- vpmuludq %zmm2,%zmm23,%zmm11
- vpandq %zmm5,%zmm10,%zmm10
- vpmuludq %zmm2,%zmm24,%zmm12
- vporq %zmm30,%zmm6,%zmm6
- vpmuludq %zmm2,%zmm16,%zmm13
- vpaddq %zmm1,%zmm8,%zmm1
- vpaddq %zmm3,%zmm10,%zmm3
- vpaddq %zmm4,%zmm6,%zmm4
- vmovdqu64 0(%rsi),%zmm10
- vmovdqu64 64(%rsi),%zmm6
- leaq 128(%rsi),%rsi
- vpmuludq %zmm0,%zmm19,%zmm28
- vpmuludq %zmm0,%zmm20,%zmm29
- vpmuludq %zmm0,%zmm16,%zmm25
- vpmuludq %zmm0,%zmm17,%zmm26
- vpaddq %zmm28,%zmm14,%zmm14
- vpaddq %zmm29,%zmm15,%zmm15
- vpaddq %zmm25,%zmm11,%zmm11
- vpaddq %zmm26,%zmm12,%zmm12
- vpmuludq %zmm1,%zmm18,%zmm28
- vpmuludq %zmm1,%zmm19,%zmm29
- vpmuludq %zmm1,%zmm24,%zmm25
- vpmuludq %zmm0,%zmm18,%zmm27
- vpaddq %zmm28,%zmm14,%zmm14
- vpaddq %zmm29,%zmm15,%zmm15
- vpaddq %zmm25,%zmm11,%zmm11
- vpaddq %zmm27,%zmm13,%zmm13
- vpunpcklqdq %zmm6,%zmm10,%zmm7
- vpunpckhqdq %zmm6,%zmm10,%zmm6
- vpmuludq %zmm3,%zmm16,%zmm28
- vpmuludq %zmm3,%zmm17,%zmm29
- vpmuludq %zmm1,%zmm16,%zmm26
- vpmuludq %zmm1,%zmm17,%zmm27
- vpaddq %zmm28,%zmm14,%zmm14
- vpaddq %zmm29,%zmm15,%zmm15
- vpaddq %zmm26,%zmm12,%zmm12
- vpaddq %zmm27,%zmm13,%zmm13
- vpmuludq %zmm4,%zmm24,%zmm28
- vpmuludq %zmm4,%zmm16,%zmm29
- vpmuludq %zmm3,%zmm22,%zmm25
- vpmuludq %zmm3,%zmm23,%zmm26
- vpaddq %zmm28,%zmm14,%zmm14
- vpmuludq %zmm3,%zmm24,%zmm27
- vpaddq %zmm29,%zmm15,%zmm15
- vpaddq %zmm25,%zmm11,%zmm11
- vpaddq %zmm26,%zmm12,%zmm12
- vpaddq %zmm27,%zmm13,%zmm13
- vpmuludq %zmm4,%zmm21,%zmm25
- vpmuludq %zmm4,%zmm22,%zmm26
- vpmuludq %zmm4,%zmm23,%zmm27
- vpaddq %zmm25,%zmm11,%zmm0
- vpaddq %zmm26,%zmm12,%zmm1
- vpaddq %zmm27,%zmm13,%zmm2
- vpsrlq $52,%zmm7,%zmm9
- vpsllq $12,%zmm6,%zmm10
- vpsrlq $26,%zmm14,%zmm3
- vpandq %zmm5,%zmm14,%zmm14
- vpaddq %zmm3,%zmm15,%zmm4
- vporq %zmm10,%zmm9,%zmm9
- vpsrlq $26,%zmm0,%zmm11
- vpandq %zmm5,%zmm0,%zmm0
- vpaddq %zmm11,%zmm1,%zmm1
- vpandq %zmm5,%zmm9,%zmm9
- vpsrlq $26,%zmm4,%zmm15
- vpandq %zmm5,%zmm4,%zmm4
- vpsrlq $26,%zmm1,%zmm12
- vpandq %zmm5,%zmm1,%zmm1
- vpaddq %zmm12,%zmm2,%zmm2
- vpaddq %zmm15,%zmm0,%zmm0
- vpsllq $2,%zmm15,%zmm15
- vpaddq %zmm15,%zmm0,%zmm0
- vpaddq %zmm9,%zmm2,%zmm2
- vpsrlq $26,%zmm7,%zmm8
- vpsrlq $26,%zmm2,%zmm13
- vpandq %zmm5,%zmm2,%zmm2
- vpaddq %zmm13,%zmm14,%zmm3
- vpsrlq $14,%zmm6,%zmm10
- vpsrlq $26,%zmm0,%zmm11
- vpandq %zmm5,%zmm0,%zmm0
- vpaddq %zmm11,%zmm1,%zmm1
- vpsrlq $40,%zmm6,%zmm6
- vpsrlq $26,%zmm3,%zmm14
- vpandq %zmm5,%zmm3,%zmm3
- vpaddq %zmm14,%zmm4,%zmm4
- vpandq %zmm5,%zmm7,%zmm7
- subq $128,%rdx
- ja L$oop_avx512
- L$tail_avx512:
- vpsrlq $32,%zmm16,%zmm16
- vpsrlq $32,%zmm17,%zmm17
- vpsrlq $32,%zmm18,%zmm18
- vpsrlq $32,%zmm23,%zmm23
- vpsrlq $32,%zmm24,%zmm24
- vpsrlq $32,%zmm19,%zmm19
- vpsrlq $32,%zmm20,%zmm20
- vpsrlq $32,%zmm21,%zmm21
- vpsrlq $32,%zmm22,%zmm22
- leaq (%rsi,%rdx,1),%rsi
- vpaddq %zmm0,%zmm7,%zmm0
- vpmuludq %zmm2,%zmm17,%zmm14
- vpmuludq %zmm2,%zmm18,%zmm15
- vpmuludq %zmm2,%zmm23,%zmm11
- vpandq %zmm5,%zmm8,%zmm8
- vpmuludq %zmm2,%zmm24,%zmm12
- vpandq %zmm5,%zmm10,%zmm10
- vpmuludq %zmm2,%zmm16,%zmm13
- vporq %zmm30,%zmm6,%zmm6
- vpaddq %zmm1,%zmm8,%zmm1
- vpaddq %zmm3,%zmm10,%zmm3
- vpaddq %zmm4,%zmm6,%zmm4
- vmovdqu 0(%rsi),%xmm7
- vpmuludq %zmm0,%zmm19,%zmm28
- vpmuludq %zmm0,%zmm20,%zmm29
- vpmuludq %zmm0,%zmm16,%zmm25
- vpmuludq %zmm0,%zmm17,%zmm26
- vpaddq %zmm28,%zmm14,%zmm14
- vpaddq %zmm29,%zmm15,%zmm15
- vpaddq %zmm25,%zmm11,%zmm11
- vpaddq %zmm26,%zmm12,%zmm12
- vmovdqu 16(%rsi),%xmm8
- vpmuludq %zmm1,%zmm18,%zmm28
- vpmuludq %zmm1,%zmm19,%zmm29
- vpmuludq %zmm1,%zmm24,%zmm25
- vpmuludq %zmm0,%zmm18,%zmm27
- vpaddq %zmm28,%zmm14,%zmm14
- vpaddq %zmm29,%zmm15,%zmm15
- vpaddq %zmm25,%zmm11,%zmm11
- vpaddq %zmm27,%zmm13,%zmm13
- vinserti128 $1,32(%rsi),%ymm7,%ymm7
- vpmuludq %zmm3,%zmm16,%zmm28
- vpmuludq %zmm3,%zmm17,%zmm29
- vpmuludq %zmm1,%zmm16,%zmm26
- vpmuludq %zmm1,%zmm17,%zmm27
- vpaddq %zmm28,%zmm14,%zmm14
- vpaddq %zmm29,%zmm15,%zmm15
- vpaddq %zmm26,%zmm12,%zmm12
- vpaddq %zmm27,%zmm13,%zmm13
- vinserti128 $1,48(%rsi),%ymm8,%ymm8
- vpmuludq %zmm4,%zmm24,%zmm28
- vpmuludq %zmm4,%zmm16,%zmm29
- vpmuludq %zmm3,%zmm22,%zmm25
- vpmuludq %zmm3,%zmm23,%zmm26
- vpmuludq %zmm3,%zmm24,%zmm27
- vpaddq %zmm28,%zmm14,%zmm3
- vpaddq %zmm29,%zmm15,%zmm15
- vpaddq %zmm25,%zmm11,%zmm11
- vpaddq %zmm26,%zmm12,%zmm12
- vpaddq %zmm27,%zmm13,%zmm13
- vpmuludq %zmm4,%zmm21,%zmm25
- vpmuludq %zmm4,%zmm22,%zmm26
- vpmuludq %zmm4,%zmm23,%zmm27
- vpaddq %zmm25,%zmm11,%zmm0
- vpaddq %zmm26,%zmm12,%zmm1
- vpaddq %zmm27,%zmm13,%zmm2
- movl $1,%eax
- vpermq $0xb1,%zmm3,%zmm14
- vpermq $0xb1,%zmm15,%zmm4
- vpermq $0xb1,%zmm0,%zmm11
- vpermq $0xb1,%zmm1,%zmm12
- vpermq $0xb1,%zmm2,%zmm13
- vpaddq %zmm14,%zmm3,%zmm3
- vpaddq %zmm15,%zmm4,%zmm4
- vpaddq %zmm11,%zmm0,%zmm0
- vpaddq %zmm12,%zmm1,%zmm1
- vpaddq %zmm13,%zmm2,%zmm2
- kmovw %eax,%k3
- vpermq $0x2,%zmm3,%zmm14
- vpermq $0x2,%zmm4,%zmm15
- vpermq $0x2,%zmm0,%zmm11
- vpermq $0x2,%zmm1,%zmm12
- vpermq $0x2,%zmm2,%zmm13
- vpaddq %zmm14,%zmm3,%zmm3
- vpaddq %zmm15,%zmm4,%zmm4
- vpaddq %zmm11,%zmm0,%zmm0
- vpaddq %zmm12,%zmm1,%zmm1
- vpaddq %zmm13,%zmm2,%zmm2
- vextracti64x4 $0x1,%zmm3,%ymm14
- vextracti64x4 $0x1,%zmm4,%ymm15
- vextracti64x4 $0x1,%zmm0,%ymm11
- vextracti64x4 $0x1,%zmm1,%ymm12
- vextracti64x4 $0x1,%zmm2,%ymm13
- vpaddq %zmm14,%zmm3,%zmm3{%k3}{z}
- vpaddq %zmm15,%zmm4,%zmm4{%k3}{z}
- vpaddq %zmm11,%zmm0,%zmm0{%k3}{z}
- vpaddq %zmm12,%zmm1,%zmm1{%k3}{z}
- vpaddq %zmm13,%zmm2,%zmm2{%k3}{z}
- vpsrlq $26,%ymm3,%ymm14
- vpand %ymm5,%ymm3,%ymm3
- vpsrldq $6,%ymm7,%ymm9
- vpsrldq $6,%ymm8,%ymm10
- vpunpckhqdq %ymm8,%ymm7,%ymm6
- vpaddq %ymm14,%ymm4,%ymm4
- vpsrlq $26,%ymm0,%ymm11
- vpand %ymm5,%ymm0,%ymm0
- vpunpcklqdq %ymm10,%ymm9,%ymm9
- vpunpcklqdq %ymm8,%ymm7,%ymm7
- vpaddq %ymm11,%ymm1,%ymm1
- vpsrlq $26,%ymm4,%ymm15
- vpand %ymm5,%ymm4,%ymm4
- vpsrlq $26,%ymm1,%ymm12
- vpand %ymm5,%ymm1,%ymm1
- vpsrlq $30,%ymm9,%ymm10
- vpsrlq $4,%ymm9,%ymm9
- vpaddq %ymm12,%ymm2,%ymm2
- vpaddq %ymm15,%ymm0,%ymm0
- vpsllq $2,%ymm15,%ymm15
- vpsrlq $26,%ymm7,%ymm8
- vpsrlq $40,%ymm6,%ymm6
- vpaddq %ymm15,%ymm0,%ymm0
- vpsrlq $26,%ymm2,%ymm13
- vpand %ymm5,%ymm2,%ymm2
- vpand %ymm5,%ymm9,%ymm9
- vpand %ymm5,%ymm7,%ymm7
- vpaddq %ymm13,%ymm3,%ymm3
- vpsrlq $26,%ymm0,%ymm11
- vpand %ymm5,%ymm0,%ymm0
- vpaddq %ymm2,%ymm9,%ymm2
- vpand %ymm5,%ymm8,%ymm8
- vpaddq %ymm11,%ymm1,%ymm1
- vpsrlq $26,%ymm3,%ymm14
- vpand %ymm5,%ymm3,%ymm3
- vpand %ymm5,%ymm10,%ymm10
- vpor 32(%rcx),%ymm6,%ymm6
- vpaddq %ymm14,%ymm4,%ymm4
- leaq 144(%rsp),%rax
- addq $64,%rdx
- jnz L$tail_avx2
- vpsubq %ymm9,%ymm2,%ymm2
- vmovd %xmm0,-112(%rdi)
- vmovd %xmm1,-108(%rdi)
- vmovd %xmm2,-104(%rdi)
- vmovd %xmm3,-100(%rdi)
- vmovd %xmm4,-96(%rdi)
- vzeroall
- leaq 8(%r11),%rsp
- .byte 0xf3,0xc3
- .p2align 5
- poly1305_init_base2_44:
- xorq %rax,%rax
- movq %rax,0(%rdi)
- movq %rax,8(%rdi)
- movq %rax,16(%rdi)
- L$init_base2_44:
- leaq poly1305_blocks_vpmadd52(%rip),%r10
- leaq poly1305_emit_base2_44(%rip),%r11
- movq $0x0ffffffc0fffffff,%rax
- movq $0x0ffffffc0ffffffc,%rcx
- andq 0(%rsi),%rax
- movq $0x00000fffffffffff,%r8
- andq 8(%rsi),%rcx
- movq $0x00000fffffffffff,%r9
- andq %rax,%r8
- shrdq $44,%rcx,%rax
- movq %r8,40(%rdi)
- andq %r9,%rax
- shrq $24,%rcx
- movq %rax,48(%rdi)
- leaq (%rax,%rax,4),%rax
- movq %rcx,56(%rdi)
- shlq $2,%rax
- leaq (%rcx,%rcx,4),%rcx
- shlq $2,%rcx
- movq %rax,24(%rdi)
- movq %rcx,32(%rdi)
- movq $-1,64(%rdi)
- movq %r10,0(%rdx)
- movq %r11,8(%rdx)
- movl $1,%eax
- .byte 0xf3,0xc3
- .p2align 5
- poly1305_blocks_vpmadd52:
- shrq $4,%rdx
- jz L$no_data_vpmadd52
- shlq $40,%rcx
- movq 64(%rdi),%r8
- movq $3,%rax
- movq $1,%r10
- cmpq $4,%rdx
- cmovaeq %r10,%rax
- testq %r8,%r8
- cmovnsq %r10,%rax
- andq %rdx,%rax
- jz L$blocks_vpmadd52_4x
- subq %rax,%rdx
- movl $7,%r10d
- movl $1,%r11d
- kmovw %r10d,%k7
- leaq L$2_44_inp_permd(%rip),%r10
- kmovw %r11d,%k1
- vmovq %rcx,%xmm21
- vmovdqa64 0(%r10),%ymm19
- vmovdqa64 32(%r10),%ymm20
- vpermq $0xcf,%ymm21,%ymm21
- vmovdqa64 64(%r10),%ymm22
- vmovdqu64 0(%rdi),%ymm16{%k7}{z}
- vmovdqu64 40(%rdi),%ymm3{%k7}{z}
- vmovdqu64 32(%rdi),%ymm4{%k7}{z}
- vmovdqu64 24(%rdi),%ymm5{%k7}{z}
- vmovdqa64 96(%r10),%ymm23
- vmovdqa64 128(%r10),%ymm24
- jmp L$oop_vpmadd52
- .p2align 5
- L$oop_vpmadd52:
- vmovdqu32 0(%rsi),%xmm18
- leaq 16(%rsi),%rsi
- vpermd %ymm18,%ymm19,%ymm18
- vpsrlvq %ymm20,%ymm18,%ymm18
- vpandq %ymm22,%ymm18,%ymm18
- vporq %ymm21,%ymm18,%ymm18
- vpaddq %ymm18,%ymm16,%ymm16
- vpermq $0,%ymm16,%ymm0{%k7}{z}
- vpermq $85,%ymm16,%ymm1{%k7}{z}
- vpermq $170,%ymm16,%ymm2{%k7}{z}
- vpxord %ymm16,%ymm16,%ymm16
- vpxord %ymm17,%ymm17,%ymm17
- vpmadd52luq %ymm3,%ymm0,%ymm16
- vpmadd52huq %ymm3,%ymm0,%ymm17
- vpmadd52luq %ymm4,%ymm1,%ymm16
- vpmadd52huq %ymm4,%ymm1,%ymm17
- vpmadd52luq %ymm5,%ymm2,%ymm16
- vpmadd52huq %ymm5,%ymm2,%ymm17
- vpsrlvq %ymm23,%ymm16,%ymm18
- vpsllvq %ymm24,%ymm17,%ymm17
- vpandq %ymm22,%ymm16,%ymm16
- vpaddq %ymm18,%ymm17,%ymm17
- vpermq $147,%ymm17,%ymm17
- vpaddq %ymm17,%ymm16,%ymm16
- vpsrlvq %ymm23,%ymm16,%ymm18
- vpandq %ymm22,%ymm16,%ymm16
- vpermq $147,%ymm18,%ymm18
- vpaddq %ymm18,%ymm16,%ymm16
- vpermq $147,%ymm16,%ymm18{%k1}{z}
- vpaddq %ymm18,%ymm16,%ymm16
- vpsllq $2,%ymm18,%ymm18
- vpaddq %ymm18,%ymm16,%ymm16
- decq %rax
- jnz L$oop_vpmadd52
- vmovdqu64 %ymm16,0(%rdi){%k7}
- testq %rdx,%rdx
- jnz L$blocks_vpmadd52_4x
- L$no_data_vpmadd52:
- .byte 0xf3,0xc3
- .p2align 5
- poly1305_blocks_vpmadd52_4x:
- shrq $4,%rdx
- jz L$no_data_vpmadd52_4x
- shlq $40,%rcx
- movq 64(%rdi),%r8
- L$blocks_vpmadd52_4x:
- vpbroadcastq %rcx,%ymm31
- vmovdqa64 L$x_mask44(%rip),%ymm28
- movl $5,%eax
- vmovdqa64 L$x_mask42(%rip),%ymm29
- kmovw %eax,%k1
- testq %r8,%r8
- js L$init_vpmadd52
- vmovq 0(%rdi),%xmm0
- vmovq 8(%rdi),%xmm1
- vmovq 16(%rdi),%xmm2
- testq $3,%rdx
- jnz L$blocks_vpmadd52_2x_do
- L$blocks_vpmadd52_4x_do:
- vpbroadcastq 64(%rdi),%ymm3
- vpbroadcastq 96(%rdi),%ymm4
- vpbroadcastq 128(%rdi),%ymm5
- vpbroadcastq 160(%rdi),%ymm16
- L$blocks_vpmadd52_4x_key_loaded:
- vpsllq $2,%ymm5,%ymm17
- vpaddq %ymm5,%ymm17,%ymm17
- vpsllq $2,%ymm17,%ymm17
- testq $7,%rdx
- jz L$blocks_vpmadd52_8x
- vmovdqu64 0(%rsi),%ymm26
- vmovdqu64 32(%rsi),%ymm27
- leaq 64(%rsi),%rsi
- vpunpcklqdq %ymm27,%ymm26,%ymm25
- vpunpckhqdq %ymm27,%ymm26,%ymm27
- vpsrlq $24,%ymm27,%ymm26
- vporq %ymm31,%ymm26,%ymm26
- vpaddq %ymm26,%ymm2,%ymm2
- vpandq %ymm28,%ymm25,%ymm24
- vpsrlq $44,%ymm25,%ymm25
- vpsllq $20,%ymm27,%ymm27
- vporq %ymm27,%ymm25,%ymm25
- vpandq %ymm28,%ymm25,%ymm25
- subq $4,%rdx
- jz L$tail_vpmadd52_4x
- jmp L$oop_vpmadd52_4x
- ud2
- .p2align 5
- L$init_vpmadd52:
- vmovq 24(%rdi),%xmm16
- vmovq 56(%rdi),%xmm2
- vmovq 32(%rdi),%xmm17
- vmovq 40(%rdi),%xmm3
- vmovq 48(%rdi),%xmm4
- vmovdqa %ymm3,%ymm0
- vmovdqa %ymm4,%ymm1
- vmovdqa %ymm2,%ymm5
- movl $2,%eax
- L$mul_init_vpmadd52:
- vpxorq %ymm18,%ymm18,%ymm18
- vpmadd52luq %ymm2,%ymm16,%ymm18
- vpxorq %ymm19,%ymm19,%ymm19
- vpmadd52huq %ymm2,%ymm16,%ymm19
- vpxorq %ymm20,%ymm20,%ymm20
- vpmadd52luq %ymm2,%ymm17,%ymm20
- vpxorq %ymm21,%ymm21,%ymm21
- vpmadd52huq %ymm2,%ymm17,%ymm21
- vpxorq %ymm22,%ymm22,%ymm22
- vpmadd52luq %ymm2,%ymm3,%ymm22
- vpxorq %ymm23,%ymm23,%ymm23
- vpmadd52huq %ymm2,%ymm3,%ymm23
- vpmadd52luq %ymm0,%ymm3,%ymm18
- vpmadd52huq %ymm0,%ymm3,%ymm19
- vpmadd52luq %ymm0,%ymm4,%ymm20
- vpmadd52huq %ymm0,%ymm4,%ymm21
- vpmadd52luq %ymm0,%ymm5,%ymm22
- vpmadd52huq %ymm0,%ymm5,%ymm23
- vpmadd52luq %ymm1,%ymm17,%ymm18
- vpmadd52huq %ymm1,%ymm17,%ymm19
- vpmadd52luq %ymm1,%ymm3,%ymm20
- vpmadd52huq %ymm1,%ymm3,%ymm21
- vpmadd52luq %ymm1,%ymm4,%ymm22
- vpmadd52huq %ymm1,%ymm4,%ymm23
- vpsrlq $44,%ymm18,%ymm30
- vpsllq $8,%ymm19,%ymm19
- vpandq %ymm28,%ymm18,%ymm0
- vpaddq %ymm30,%ymm19,%ymm19
- vpaddq %ymm19,%ymm20,%ymm20
- vpsrlq $44,%ymm20,%ymm30
- vpsllq $8,%ymm21,%ymm21
- vpandq %ymm28,%ymm20,%ymm1
- vpaddq %ymm30,%ymm21,%ymm21
- vpaddq %ymm21,%ymm22,%ymm22
- vpsrlq $42,%ymm22,%ymm30
- vpsllq $10,%ymm23,%ymm23
- vpandq %ymm29,%ymm22,%ymm2
- vpaddq %ymm30,%ymm23,%ymm23
- vpaddq %ymm23,%ymm0,%ymm0
- vpsllq $2,%ymm23,%ymm23
- vpaddq %ymm23,%ymm0,%ymm0
- vpsrlq $44,%ymm0,%ymm30
- vpandq %ymm28,%ymm0,%ymm0
- vpaddq %ymm30,%ymm1,%ymm1
- decl %eax
- jz L$done_init_vpmadd52
- vpunpcklqdq %ymm4,%ymm1,%ymm4
- vpbroadcastq %xmm1,%xmm1
- vpunpcklqdq %ymm5,%ymm2,%ymm5
- vpbroadcastq %xmm2,%xmm2
- vpunpcklqdq %ymm3,%ymm0,%ymm3
- vpbroadcastq %xmm0,%xmm0
- vpsllq $2,%ymm4,%ymm16
- vpsllq $2,%ymm5,%ymm17
- vpaddq %ymm4,%ymm16,%ymm16
- vpaddq %ymm5,%ymm17,%ymm17
- vpsllq $2,%ymm16,%ymm16
- vpsllq $2,%ymm17,%ymm17
- jmp L$mul_init_vpmadd52
- ud2
- .p2align 5
- L$done_init_vpmadd52:
- vinserti128 $1,%xmm4,%ymm1,%ymm4
- vinserti128 $1,%xmm5,%ymm2,%ymm5
- vinserti128 $1,%xmm3,%ymm0,%ymm3
- vpermq $216,%ymm4,%ymm4
- vpermq $216,%ymm5,%ymm5
- vpermq $216,%ymm3,%ymm3
- vpsllq $2,%ymm4,%ymm16
- vpaddq %ymm4,%ymm16,%ymm16
- vpsllq $2,%ymm16,%ymm16
- vmovq 0(%rdi),%xmm0
- vmovq 8(%rdi),%xmm1
- vmovq 16(%rdi),%xmm2
- testq $3,%rdx
- jnz L$done_init_vpmadd52_2x
- vmovdqu64 %ymm3,64(%rdi)
- vpbroadcastq %xmm3,%ymm3
- vmovdqu64 %ymm4,96(%rdi)
- vpbroadcastq %xmm4,%ymm4
- vmovdqu64 %ymm5,128(%rdi)
- vpbroadcastq %xmm5,%ymm5
- vmovdqu64 %ymm16,160(%rdi)
- vpbroadcastq %xmm16,%ymm16
- jmp L$blocks_vpmadd52_4x_key_loaded
- ud2
- .p2align 5
- L$done_init_vpmadd52_2x:
- vmovdqu64 %ymm3,64(%rdi)
- vpsrldq $8,%ymm3,%ymm3
- vmovdqu64 %ymm4,96(%rdi)
- vpsrldq $8,%ymm4,%ymm4
- vmovdqu64 %ymm5,128(%rdi)
- vpsrldq $8,%ymm5,%ymm5
- vmovdqu64 %ymm16,160(%rdi)
- vpsrldq $8,%ymm16,%ymm16
- jmp L$blocks_vpmadd52_2x_key_loaded
- ud2
- .p2align 5
- L$blocks_vpmadd52_2x_do:
- vmovdqu64 128+8(%rdi),%ymm5{%k1}{z}
- vmovdqu64 160+8(%rdi),%ymm16{%k1}{z}
- vmovdqu64 64+8(%rdi),%ymm3{%k1}{z}
- vmovdqu64 96+8(%rdi),%ymm4{%k1}{z}
- L$blocks_vpmadd52_2x_key_loaded:
- vmovdqu64 0(%rsi),%ymm26
- vpxorq %ymm27,%ymm27,%ymm27
- leaq 32(%rsi),%rsi
- vpunpcklqdq %ymm27,%ymm26,%ymm25
- vpunpckhqdq %ymm27,%ymm26,%ymm27
- vpsrlq $24,%ymm27,%ymm26
- vporq %ymm31,%ymm26,%ymm26
- vpaddq %ymm26,%ymm2,%ymm2
- vpandq %ymm28,%ymm25,%ymm24
- vpsrlq $44,%ymm25,%ymm25
- vpsllq $20,%ymm27,%ymm27
- vporq %ymm27,%ymm25,%ymm25
- vpandq %ymm28,%ymm25,%ymm25
- jmp L$tail_vpmadd52_2x
- ud2
- .p2align 5
- L$oop_vpmadd52_4x:
- vpaddq %ymm24,%ymm0,%ymm0
- vpaddq %ymm25,%ymm1,%ymm1
- vpxorq %ymm18,%ymm18,%ymm18
- vpmadd52luq %ymm2,%ymm16,%ymm18
- vpxorq %ymm19,%ymm19,%ymm19
- vpmadd52huq %ymm2,%ymm16,%ymm19
- vpxorq %ymm20,%ymm20,%ymm20
- vpmadd52luq %ymm2,%ymm17,%ymm20
- vpxorq %ymm21,%ymm21,%ymm21
- vpmadd52huq %ymm2,%ymm17,%ymm21
- vpxorq %ymm22,%ymm22,%ymm22
- vpmadd52luq %ymm2,%ymm3,%ymm22
- vpxorq %ymm23,%ymm23,%ymm23
- vpmadd52huq %ymm2,%ymm3,%ymm23
- vmovdqu64 0(%rsi),%ymm26
- vmovdqu64 32(%rsi),%ymm27
- leaq 64(%rsi),%rsi
- vpmadd52luq %ymm0,%ymm3,%ymm18
- vpmadd52huq %ymm0,%ymm3,%ymm19
- vpmadd52luq %ymm0,%ymm4,%ymm20
- vpmadd52huq %ymm0,%ymm4,%ymm21
- vpmadd52luq %ymm0,%ymm5,%ymm22
- vpmadd52huq %ymm0,%ymm5,%ymm23
- vpunpcklqdq %ymm27,%ymm26,%ymm25
- vpunpckhqdq %ymm27,%ymm26,%ymm27
- vpmadd52luq %ymm1,%ymm17,%ymm18
- vpmadd52huq %ymm1,%ymm17,%ymm19
- vpmadd52luq %ymm1,%ymm3,%ymm20
- vpmadd52huq %ymm1,%ymm3,%ymm21
- vpmadd52luq %ymm1,%ymm4,%ymm22
- vpmadd52huq %ymm1,%ymm4,%ymm23
- vpsrlq $44,%ymm18,%ymm30
- vpsllq $8,%ymm19,%ymm19
- vpandq %ymm28,%ymm18,%ymm0
- vpaddq %ymm30,%ymm19,%ymm19
- vpsrlq $24,%ymm27,%ymm26
- vporq %ymm31,%ymm26,%ymm26
- vpaddq %ymm19,%ymm20,%ymm20
- vpsrlq $44,%ymm20,%ymm30
- vpsllq $8,%ymm21,%ymm21
- vpandq %ymm28,%ymm20,%ymm1
- vpaddq %ymm30,%ymm21,%ymm21
- vpandq %ymm28,%ymm25,%ymm24
- vpsrlq $44,%ymm25,%ymm25
- vpsllq $20,%ymm27,%ymm27
- vpaddq %ymm21,%ymm22,%ymm22
- vpsrlq $42,%ymm22,%ymm30
- vpsllq $10,%ymm23,%ymm23
- vpandq %ymm29,%ymm22,%ymm2
- vpaddq %ymm30,%ymm23,%ymm23
- vpaddq %ymm26,%ymm2,%ymm2
- vpaddq %ymm23,%ymm0,%ymm0
- vpsllq $2,%ymm23,%ymm23
- vpaddq %ymm23,%ymm0,%ymm0
- vporq %ymm27,%ymm25,%ymm25
- vpandq %ymm28,%ymm25,%ymm25
- vpsrlq $44,%ymm0,%ymm30
- vpandq %ymm28,%ymm0,%ymm0
- vpaddq %ymm30,%ymm1,%ymm1
- subq $4,%rdx
- jnz L$oop_vpmadd52_4x
- L$tail_vpmadd52_4x:
- vmovdqu64 128(%rdi),%ymm5
- vmovdqu64 160(%rdi),%ymm16
- vmovdqu64 64(%rdi),%ymm3
- vmovdqu64 96(%rdi),%ymm4
- L$tail_vpmadd52_2x:
- vpsllq $2,%ymm5,%ymm17
- vpaddq %ymm5,%ymm17,%ymm17
- vpsllq $2,%ymm17,%ymm17
- vpaddq %ymm24,%ymm0,%ymm0
- vpaddq %ymm25,%ymm1,%ymm1
- vpxorq %ymm18,%ymm18,%ymm18
- vpmadd52luq %ymm2,%ymm16,%ymm18
- vpxorq %ymm19,%ymm19,%ymm19
- vpmadd52huq %ymm2,%ymm16,%ymm19
- vpxorq %ymm20,%ymm20,%ymm20
- vpmadd52luq %ymm2,%ymm17,%ymm20
- vpxorq %ymm21,%ymm21,%ymm21
- vpmadd52huq %ymm2,%ymm17,%ymm21
- vpxorq %ymm22,%ymm22,%ymm22
- vpmadd52luq %ymm2,%ymm3,%ymm22
- vpxorq %ymm23,%ymm23,%ymm23
- vpmadd52huq %ymm2,%ymm3,%ymm23
- vpmadd52luq %ymm0,%ymm3,%ymm18
- vpmadd52huq %ymm0,%ymm3,%ymm19
- vpmadd52luq %ymm0,%ymm4,%ymm20
- vpmadd52huq %ymm0,%ymm4,%ymm21
- vpmadd52luq %ymm0,%ymm5,%ymm22
- vpmadd52huq %ymm0,%ymm5,%ymm23
- vpmadd52luq %ymm1,%ymm17,%ymm18
- vpmadd52huq %ymm1,%ymm17,%ymm19
- vpmadd52luq %ymm1,%ymm3,%ymm20
- vpmadd52huq %ymm1,%ymm3,%ymm21
- vpmadd52luq %ymm1,%ymm4,%ymm22
- vpmadd52huq %ymm1,%ymm4,%ymm23
- movl $1,%eax
- kmovw %eax,%k1
- vpsrldq $8,%ymm18,%ymm24
- vpsrldq $8,%ymm19,%ymm0
- vpsrldq $8,%ymm20,%ymm25
- vpsrldq $8,%ymm21,%ymm1
- vpaddq %ymm24,%ymm18,%ymm18
- vpaddq %ymm0,%ymm19,%ymm19
- vpsrldq $8,%ymm22,%ymm26
- vpsrldq $8,%ymm23,%ymm2
- vpaddq %ymm25,%ymm20,%ymm20
- vpaddq %ymm1,%ymm21,%ymm21
- vpermq $0x2,%ymm18,%ymm24
- vpermq $0x2,%ymm19,%ymm0
- vpaddq %ymm26,%ymm22,%ymm22
- vpaddq %ymm2,%ymm23,%ymm23
- vpermq $0x2,%ymm20,%ymm25
- vpermq $0x2,%ymm21,%ymm1
- vpaddq %ymm24,%ymm18,%ymm18{%k1}{z}
- vpaddq %ymm0,%ymm19,%ymm19{%k1}{z}
- vpermq $0x2,%ymm22,%ymm26
- vpermq $0x2,%ymm23,%ymm2
- vpaddq %ymm25,%ymm20,%ymm20{%k1}{z}
- vpaddq %ymm1,%ymm21,%ymm21{%k1}{z}
- vpaddq %ymm26,%ymm22,%ymm22{%k1}{z}
- vpaddq %ymm2,%ymm23,%ymm23{%k1}{z}
- vpsrlq $44,%ymm18,%ymm30
- vpsllq $8,%ymm19,%ymm19
- vpandq %ymm28,%ymm18,%ymm0
- vpaddq %ymm30,%ymm19,%ymm19
- vpaddq %ymm19,%ymm20,%ymm20
- vpsrlq $44,%ymm20,%ymm30
- vpsllq $8,%ymm21,%ymm21
- vpandq %ymm28,%ymm20,%ymm1
- vpaddq %ymm30,%ymm21,%ymm21
- vpaddq %ymm21,%ymm22,%ymm22
- vpsrlq $42,%ymm22,%ymm30
- vpsllq $10,%ymm23,%ymm23
- vpandq %ymm29,%ymm22,%ymm2
- vpaddq %ymm30,%ymm23,%ymm23
- vpaddq %ymm23,%ymm0,%ymm0
- vpsllq $2,%ymm23,%ymm23
- vpaddq %ymm23,%ymm0,%ymm0
- vpsrlq $44,%ymm0,%ymm30
- vpandq %ymm28,%ymm0,%ymm0
- vpaddq %ymm30,%ymm1,%ymm1
- subq $2,%rdx
- ja L$blocks_vpmadd52_4x_do
- vmovq %xmm0,0(%rdi)
- vmovq %xmm1,8(%rdi)
- vmovq %xmm2,16(%rdi)
- vzeroall
- L$no_data_vpmadd52_4x:
- .byte 0xf3,0xc3
- .p2align 5
- poly1305_blocks_vpmadd52_8x:
- shrq $4,%rdx
- jz L$no_data_vpmadd52_8x
- shlq $40,%rcx
- movq 64(%rdi),%r8
- vmovdqa64 L$x_mask44(%rip),%ymm28
- vmovdqa64 L$x_mask42(%rip),%ymm29
- testq %r8,%r8
- js L$init_vpmadd52
- vmovq 0(%rdi),%xmm0
- vmovq 8(%rdi),%xmm1
- vmovq 16(%rdi),%xmm2
- L$blocks_vpmadd52_8x:
- vmovdqu64 128(%rdi),%ymm5
- vmovdqu64 160(%rdi),%ymm16
- vmovdqu64 64(%rdi),%ymm3
- vmovdqu64 96(%rdi),%ymm4
- vpsllq $2,%ymm5,%ymm17
- vpaddq %ymm5,%ymm17,%ymm17
- vpsllq $2,%ymm17,%ymm17
- vpbroadcastq %xmm5,%ymm8
- vpbroadcastq %xmm3,%ymm6
- vpbroadcastq %xmm4,%ymm7
- vpxorq %ymm18,%ymm18,%ymm18
- vpmadd52luq %ymm8,%ymm16,%ymm18
- vpxorq %ymm19,%ymm19,%ymm19
- vpmadd52huq %ymm8,%ymm16,%ymm19
- vpxorq %ymm20,%ymm20,%ymm20
- vpmadd52luq %ymm8,%ymm17,%ymm20
- vpxorq %ymm21,%ymm21,%ymm21
- vpmadd52huq %ymm8,%ymm17,%ymm21
- vpxorq %ymm22,%ymm22,%ymm22
- vpmadd52luq %ymm8,%ymm3,%ymm22
- vpxorq %ymm23,%ymm23,%ymm23
- vpmadd52huq %ymm8,%ymm3,%ymm23
- vpmadd52luq %ymm6,%ymm3,%ymm18
- vpmadd52huq %ymm6,%ymm3,%ymm19
- vpmadd52luq %ymm6,%ymm4,%ymm20
- vpmadd52huq %ymm6,%ymm4,%ymm21
- vpmadd52luq %ymm6,%ymm5,%ymm22
- vpmadd52huq %ymm6,%ymm5,%ymm23
- vpmadd52luq %ymm7,%ymm17,%ymm18
- vpmadd52huq %ymm7,%ymm17,%ymm19
- vpmadd52luq %ymm7,%ymm3,%ymm20
- vpmadd52huq %ymm7,%ymm3,%ymm21
- vpmadd52luq %ymm7,%ymm4,%ymm22
- vpmadd52huq %ymm7,%ymm4,%ymm23
- vpsrlq $44,%ymm18,%ymm30
- vpsllq $8,%ymm19,%ymm19
- vpandq %ymm28,%ymm18,%ymm6
- vpaddq %ymm30,%ymm19,%ymm19
- vpaddq %ymm19,%ymm20,%ymm20
- vpsrlq $44,%ymm20,%ymm30
- vpsllq $8,%ymm21,%ymm21
- vpandq %ymm28,%ymm20,%ymm7
- vpaddq %ymm30,%ymm21,%ymm21
- vpaddq %ymm21,%ymm22,%ymm22
- vpsrlq $42,%ymm22,%ymm30
- vpsllq $10,%ymm23,%ymm23
- vpandq %ymm29,%ymm22,%ymm8
- vpaddq %ymm30,%ymm23,%ymm23
- vpaddq %ymm23,%ymm6,%ymm6
- vpsllq $2,%ymm23,%ymm23
- vpaddq %ymm23,%ymm6,%ymm6
- vpsrlq $44,%ymm6,%ymm30
- vpandq %ymm28,%ymm6,%ymm6
- vpaddq %ymm30,%ymm7,%ymm7
- vpunpcklqdq %ymm5,%ymm8,%ymm26
- vpunpckhqdq %ymm5,%ymm8,%ymm5
- vpunpcklqdq %ymm3,%ymm6,%ymm24
- vpunpckhqdq %ymm3,%ymm6,%ymm3
- vpunpcklqdq %ymm4,%ymm7,%ymm25
- vpunpckhqdq %ymm4,%ymm7,%ymm4
- vshufi64x2 $0x44,%zmm5,%zmm26,%zmm8
- vshufi64x2 $0x44,%zmm3,%zmm24,%zmm6
- vshufi64x2 $0x44,%zmm4,%zmm25,%zmm7
- vmovdqu64 0(%rsi),%zmm26
- vmovdqu64 64(%rsi),%zmm27
- leaq 128(%rsi),%rsi
- vpsllq $2,%zmm8,%zmm10
- vpsllq $2,%zmm7,%zmm9
- vpaddq %zmm8,%zmm10,%zmm10
- vpaddq %zmm7,%zmm9,%zmm9
- vpsllq $2,%zmm10,%zmm10
- vpsllq $2,%zmm9,%zmm9
- vpbroadcastq %rcx,%zmm31
- vpbroadcastq %xmm28,%zmm28
- vpbroadcastq %xmm29,%zmm29
- vpbroadcastq %xmm9,%zmm16
- vpbroadcastq %xmm10,%zmm17
- vpbroadcastq %xmm6,%zmm3
- vpbroadcastq %xmm7,%zmm4
- vpbroadcastq %xmm8,%zmm5
- vpunpcklqdq %zmm27,%zmm26,%zmm25
- vpunpckhqdq %zmm27,%zmm26,%zmm27
- vpsrlq $24,%zmm27,%zmm26
- vporq %zmm31,%zmm26,%zmm26
- vpaddq %zmm26,%zmm2,%zmm2
- vpandq %zmm28,%zmm25,%zmm24
- vpsrlq $44,%zmm25,%zmm25
- vpsllq $20,%zmm27,%zmm27
- vporq %zmm27,%zmm25,%zmm25
- vpandq %zmm28,%zmm25,%zmm25
- subq $8,%rdx
- jz L$tail_vpmadd52_8x
- jmp L$oop_vpmadd52_8x
- .p2align 5
- L$oop_vpmadd52_8x:
- vpaddq %zmm24,%zmm0,%zmm0
- vpaddq %zmm25,%zmm1,%zmm1
- vpxorq %zmm18,%zmm18,%zmm18
- vpmadd52luq %zmm2,%zmm16,%zmm18
- vpxorq %zmm19,%zmm19,%zmm19
- vpmadd52huq %zmm2,%zmm16,%zmm19
- vpxorq %zmm20,%zmm20,%zmm20
- vpmadd52luq %zmm2,%zmm17,%zmm20
- vpxorq %zmm21,%zmm21,%zmm21
- vpmadd52huq %zmm2,%zmm17,%zmm21
- vpxorq %zmm22,%zmm22,%zmm22
- vpmadd52luq %zmm2,%zmm3,%zmm22
- vpxorq %zmm23,%zmm23,%zmm23
- vpmadd52huq %zmm2,%zmm3,%zmm23
- vmovdqu64 0(%rsi),%zmm26
- vmovdqu64 64(%rsi),%zmm27
- leaq 128(%rsi),%rsi
- vpmadd52luq %zmm0,%zmm3,%zmm18
- vpmadd52huq %zmm0,%zmm3,%zmm19
- vpmadd52luq %zmm0,%zmm4,%zmm20
- vpmadd52huq %zmm0,%zmm4,%zmm21
- vpmadd52luq %zmm0,%zmm5,%zmm22
- vpmadd52huq %zmm0,%zmm5,%zmm23
- vpunpcklqdq %zmm27,%zmm26,%zmm25
- vpunpckhqdq %zmm27,%zmm26,%zmm27
- vpmadd52luq %zmm1,%zmm17,%zmm18
- vpmadd52huq %zmm1,%zmm17,%zmm19
- vpmadd52luq %zmm1,%zmm3,%zmm20
- vpmadd52huq %zmm1,%zmm3,%zmm21
- vpmadd52luq %zmm1,%zmm4,%zmm22
- vpmadd52huq %zmm1,%zmm4,%zmm23
- vpsrlq $44,%zmm18,%zmm30
- vpsllq $8,%zmm19,%zmm19
- vpandq %zmm28,%zmm18,%zmm0
- vpaddq %zmm30,%zmm19,%zmm19
- vpsrlq $24,%zmm27,%zmm26
- vporq %zmm31,%zmm26,%zmm26
- vpaddq %zmm19,%zmm20,%zmm20
- vpsrlq $44,%zmm20,%zmm30
- vpsllq $8,%zmm21,%zmm21
- vpandq %zmm28,%zmm20,%zmm1
- vpaddq %zmm30,%zmm21,%zmm21
- vpandq %zmm28,%zmm25,%zmm24
- vpsrlq $44,%zmm25,%zmm25
- vpsllq $20,%zmm27,%zmm27
- vpaddq %zmm21,%zmm22,%zmm22
- vpsrlq $42,%zmm22,%zmm30
- vpsllq $10,%zmm23,%zmm23
- vpandq %zmm29,%zmm22,%zmm2
- vpaddq %zmm30,%zmm23,%zmm23
- vpaddq %zmm26,%zmm2,%zmm2
- vpaddq %zmm23,%zmm0,%zmm0
- vpsllq $2,%zmm23,%zmm23
- vpaddq %zmm23,%zmm0,%zmm0
- vporq %zmm27,%zmm25,%zmm25
- vpandq %zmm28,%zmm25,%zmm25
- vpsrlq $44,%zmm0,%zmm30
- vpandq %zmm28,%zmm0,%zmm0
- vpaddq %zmm30,%zmm1,%zmm1
- subq $8,%rdx
- jnz L$oop_vpmadd52_8x
- L$tail_vpmadd52_8x:
- vpaddq %zmm24,%zmm0,%zmm0
- vpaddq %zmm25,%zmm1,%zmm1
- vpxorq %zmm18,%zmm18,%zmm18
- vpmadd52luq %zmm2,%zmm9,%zmm18
- vpxorq %zmm19,%zmm19,%zmm19
- vpmadd52huq %zmm2,%zmm9,%zmm19
- vpxorq %zmm20,%zmm20,%zmm20
- vpmadd52luq %zmm2,%zmm10,%zmm20
- vpxorq %zmm21,%zmm21,%zmm21
- vpmadd52huq %zmm2,%zmm10,%zmm21
- vpxorq %zmm22,%zmm22,%zmm22
- vpmadd52luq %zmm2,%zmm6,%zmm22
- vpxorq %zmm23,%zmm23,%zmm23
- vpmadd52huq %zmm2,%zmm6,%zmm23
- vpmadd52luq %zmm0,%zmm6,%zmm18
- vpmadd52huq %zmm0,%zmm6,%zmm19
- vpmadd52luq %zmm0,%zmm7,%zmm20
- vpmadd52huq %zmm0,%zmm7,%zmm21
- vpmadd52luq %zmm0,%zmm8,%zmm22
- vpmadd52huq %zmm0,%zmm8,%zmm23
- vpmadd52luq %zmm1,%zmm10,%zmm18
- vpmadd52huq %zmm1,%zmm10,%zmm19
- vpmadd52luq %zmm1,%zmm6,%zmm20
- vpmadd52huq %zmm1,%zmm6,%zmm21
- vpmadd52luq %zmm1,%zmm7,%zmm22
- vpmadd52huq %zmm1,%zmm7,%zmm23
- movl $1,%eax
- kmovw %eax,%k1
- vpsrldq $8,%zmm18,%zmm24
- vpsrldq $8,%zmm19,%zmm0
- vpsrldq $8,%zmm20,%zmm25
- vpsrldq $8,%zmm21,%zmm1
- vpaddq %zmm24,%zmm18,%zmm18
- vpaddq %zmm0,%zmm19,%zmm19
- vpsrldq $8,%zmm22,%zmm26
- vpsrldq $8,%zmm23,%zmm2
- vpaddq %zmm25,%zmm20,%zmm20
- vpaddq %zmm1,%zmm21,%zmm21
- vpermq $0x2,%zmm18,%zmm24
- vpermq $0x2,%zmm19,%zmm0
- vpaddq %zmm26,%zmm22,%zmm22
- vpaddq %zmm2,%zmm23,%zmm23
- vpermq $0x2,%zmm20,%zmm25
- vpermq $0x2,%zmm21,%zmm1
- vpaddq %zmm24,%zmm18,%zmm18
- vpaddq %zmm0,%zmm19,%zmm19
- vpermq $0x2,%zmm22,%zmm26
- vpermq $0x2,%zmm23,%zmm2
- vpaddq %zmm25,%zmm20,%zmm20
- vpaddq %zmm1,%zmm21,%zmm21
- vextracti64x4 $1,%zmm18,%ymm24
- vextracti64x4 $1,%zmm19,%ymm0
- vpaddq %zmm26,%zmm22,%zmm22
- vpaddq %zmm2,%zmm23,%zmm23
- vextracti64x4 $1,%zmm20,%ymm25
- vextracti64x4 $1,%zmm21,%ymm1
- vextracti64x4 $1,%zmm22,%ymm26
- vextracti64x4 $1,%zmm23,%ymm2
- vpaddq %ymm24,%ymm18,%ymm18{%k1}{z}
- vpaddq %ymm0,%ymm19,%ymm19{%k1}{z}
- vpaddq %ymm25,%ymm20,%ymm20{%k1}{z}
- vpaddq %ymm1,%ymm21,%ymm21{%k1}{z}
- vpaddq %ymm26,%ymm22,%ymm22{%k1}{z}
- vpaddq %ymm2,%ymm23,%ymm23{%k1}{z}
- vpsrlq $44,%ymm18,%ymm30
- vpsllq $8,%ymm19,%ymm19
- vpandq %ymm28,%ymm18,%ymm0
- vpaddq %ymm30,%ymm19,%ymm19
- vpaddq %ymm19,%ymm20,%ymm20
- vpsrlq $44,%ymm20,%ymm30
- vpsllq $8,%ymm21,%ymm21
- vpandq %ymm28,%ymm20,%ymm1
- vpaddq %ymm30,%ymm21,%ymm21
- vpaddq %ymm21,%ymm22,%ymm22
- vpsrlq $42,%ymm22,%ymm30
- vpsllq $10,%ymm23,%ymm23
- vpandq %ymm29,%ymm22,%ymm2
- vpaddq %ymm30,%ymm23,%ymm23
- vpaddq %ymm23,%ymm0,%ymm0
- vpsllq $2,%ymm23,%ymm23
- vpaddq %ymm23,%ymm0,%ymm0
- vpsrlq $44,%ymm0,%ymm30
- vpandq %ymm28,%ymm0,%ymm0
- vpaddq %ymm30,%ymm1,%ymm1
- vmovq %xmm0,0(%rdi)
- vmovq %xmm1,8(%rdi)
- vmovq %xmm2,16(%rdi)
- vzeroall
- L$no_data_vpmadd52_8x:
- .byte 0xf3,0xc3
- .p2align 5
- poly1305_emit_base2_44:
- movq 0(%rdi),%r8
- movq 8(%rdi),%r9
- movq 16(%rdi),%r10
- movq %r9,%rax
- shrq $20,%r9
- shlq $44,%rax
- movq %r10,%rcx
- shrq $40,%r10
- shlq $24,%rcx
- addq %rax,%r8
- adcq %rcx,%r9
- adcq $0,%r10
- movq %r8,%rax
- addq $5,%r8
- movq %r9,%rcx
- adcq $0,%r9
- adcq $0,%r10
- shrq $2,%r10
- cmovnzq %r8,%rax
- cmovnzq %r9,%rcx
- addq 0(%rdx),%rax
- adcq 8(%rdx),%rcx
- movq %rax,0(%rsi)
- movq %rcx,8(%rsi)
- .byte 0xf3,0xc3
- .p2align 6
- L$const:
- L$mask24:
- .long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0
- L$129:
- .long 16777216,0,16777216,0,16777216,0,16777216,0
- L$mask26:
- .long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
- L$permd_avx2:
- .long 2,2,2,3,2,0,2,1
- L$permd_avx512:
- .long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
- L$2_44_inp_permd:
- .long 0,1,1,2,2,3,7,7
- L$2_44_inp_shift:
- .quad 0,12,24,64
- L$2_44_mask:
- .quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff
- L$2_44_shift_rgt:
- .quad 44,44,42,64
- L$2_44_shift_lft:
- .quad 8,8,10,64
- .p2align 6
- L$x_mask44:
- .quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
- .quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
- L$x_mask42:
- .quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
- .quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
- .byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
- .p2align 4
- .globl _xor128_encrypt_n_pad
- .p2align 4
- _xor128_encrypt_n_pad:
- subq %rdx,%rsi
- subq %rdx,%rdi
- movq %rcx,%r10
- shrq $4,%rcx
- jz L$tail_enc
- nop
- L$oop_enc_xmm:
- movdqu (%rsi,%rdx,1),%xmm0
- pxor (%rdx),%xmm0
- movdqu %xmm0,(%rdi,%rdx,1)
- movdqa %xmm0,(%rdx)
- leaq 16(%rdx),%rdx
- decq %rcx
- jnz L$oop_enc_xmm
- andq $15,%r10
- jz L$done_enc
- L$tail_enc:
- movq $16,%rcx
- subq %r10,%rcx
- xorl %eax,%eax
- L$oop_enc_byte:
- movb (%rsi,%rdx,1),%al
- xorb (%rdx),%al
- movb %al,(%rdi,%rdx,1)
- movb %al,(%rdx)
- leaq 1(%rdx),%rdx
- decq %r10
- jnz L$oop_enc_byte
- xorl %eax,%eax
- L$oop_enc_pad:
- movb %al,(%rdx)
- leaq 1(%rdx),%rdx
- decq %rcx
- jnz L$oop_enc_pad
- L$done_enc:
- movq %rdx,%rax
- .byte 0xf3,0xc3
- .globl _xor128_decrypt_n_pad
- .p2align 4
- _xor128_decrypt_n_pad:
- subq %rdx,%rsi
- subq %rdx,%rdi
- movq %rcx,%r10
- shrq $4,%rcx
- jz L$tail_dec
- nop
- L$oop_dec_xmm:
- movdqu (%rsi,%rdx,1),%xmm0
- movdqa (%rdx),%xmm1
- pxor %xmm0,%xmm1
- movdqu %xmm1,(%rdi,%rdx,1)
- movdqa %xmm0,(%rdx)
- leaq 16(%rdx),%rdx
- decq %rcx
- jnz L$oop_dec_xmm
- pxor %xmm1,%xmm1
- andq $15,%r10
- jz L$done_dec
- L$tail_dec:
- movq $16,%rcx
- subq %r10,%rcx
- xorl %eax,%eax
- xorq %r11,%r11
- L$oop_dec_byte:
- movb (%rsi,%rdx,1),%r11b
- movb (%rdx),%al
- xorb %r11b,%al
- movb %al,(%rdi,%rdx,1)
- movb %r11b,(%rdx)
- leaq 1(%rdx),%rdx
- decq %r10
- jnz L$oop_dec_byte
- xorl %eax,%eax
- L$oop_dec_pad:
- movb %al,(%rdx)
- leaq 1(%rdx),%rdx
- decq %rcx
- jnz L$oop_dec_pad
- L$done_dec:
- movq %rdx,%rax
- .byte 0xf3,0xc3
|