aesni-sha256-x86_64.s 89 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435
  1. .text
  2. .globl aesni_cbc_sha256_enc
  3. .type aesni_cbc_sha256_enc,@function
  4. .align 16
  5. aesni_cbc_sha256_enc:
  6. .cfi_startproc
  7. leaq OPENSSL_ia32cap_P(%rip),%r11
  8. movl $1,%eax
  9. cmpq $0,%rdi
  10. je .Lprobe
  11. movl 0(%r11),%eax
  12. movq 4(%r11),%r10
  13. btq $61,%r10
  14. jc aesni_cbc_sha256_enc_shaext
  15. movq %r10,%r11
  16. shrq $32,%r11
  17. testl $2048,%r10d
  18. jnz aesni_cbc_sha256_enc_xop
  19. andl $296,%r11d
  20. cmpl $296,%r11d
  21. je aesni_cbc_sha256_enc_avx2
  22. andl $268435456,%r10d
  23. jnz aesni_cbc_sha256_enc_avx
  24. ud2
  25. xorl %eax,%eax
  26. cmpq $0,%rdi
  27. je .Lprobe
  28. ud2
  29. .Lprobe:
  30. .byte 0xf3,0xc3
  31. .cfi_endproc
  32. .size aesni_cbc_sha256_enc,.-aesni_cbc_sha256_enc
  33. .align 64
  34. .type K256,@object
  35. K256:
  36. .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
  37. .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
  38. .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
  39. .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
  40. .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
  41. .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
  42. .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
  43. .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
  44. .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
  45. .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
  46. .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
  47. .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
  48. .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
  49. .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
  50. .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
  51. .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
  52. .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
  53. .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
  54. .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
  55. .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
  56. .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
  57. .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
  58. .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
  59. .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
  60. .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
  61. .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
  62. .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
  63. .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
  64. .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
  65. .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
  66. .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  67. .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  68. .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
  69. .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
  70. .long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1
  71. .long 0,0,0,0, 0,0,0,0
  72. .byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
  73. .align 64
  74. .type aesni_cbc_sha256_enc_xop,@function
  75. .align 64
  76. aesni_cbc_sha256_enc_xop:
  77. .cfi_startproc
  78. .Lxop_shortcut:
  79. movq 8(%rsp),%r10
  80. movq %rsp,%rax
  81. .cfi_def_cfa_register %rax
  82. pushq %rbx
  83. .cfi_offset %rbx,-16
  84. pushq %rbp
  85. .cfi_offset %rbp,-24
  86. pushq %r12
  87. .cfi_offset %r12,-32
  88. pushq %r13
  89. .cfi_offset %r13,-40
  90. pushq %r14
  91. .cfi_offset %r14,-48
  92. pushq %r15
  93. .cfi_offset %r15,-56
  94. subq $128,%rsp
  95. andq $-64,%rsp
  96. shlq $6,%rdx
  97. subq %rdi,%rsi
  98. subq %rdi,%r10
  99. addq %rdi,%rdx
  100. movq %rsi,64+8(%rsp)
  101. movq %rdx,64+16(%rsp)
  102. movq %r8,64+32(%rsp)
  103. movq %r9,64+40(%rsp)
  104. movq %r10,64+48(%rsp)
  105. movq %rax,120(%rsp)
  106. .cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
  107. .Lprologue_xop:
  108. vzeroall
  109. movq %rdi,%r12
  110. leaq 128(%rcx),%rdi
  111. leaq K256+544(%rip),%r13
  112. movl 240-128(%rdi),%r14d
  113. movq %r9,%r15
  114. movq %r10,%rsi
  115. vmovdqu (%r8),%xmm8
  116. subq $9,%r14
  117. movl 0(%r15),%eax
  118. movl 4(%r15),%ebx
  119. movl 8(%r15),%ecx
  120. movl 12(%r15),%edx
  121. movl 16(%r15),%r8d
  122. movl 20(%r15),%r9d
  123. movl 24(%r15),%r10d
  124. movl 28(%r15),%r11d
  125. vmovdqa 0(%r13,%r14,8),%xmm14
  126. vmovdqa 16(%r13,%r14,8),%xmm13
  127. vmovdqa 32(%r13,%r14,8),%xmm12
  128. vmovdqu 0-128(%rdi),%xmm10
  129. jmp .Lloop_xop
  130. .align 16
  131. .Lloop_xop:
  132. vmovdqa K256+512(%rip),%xmm7
  133. vmovdqu 0(%rsi,%r12,1),%xmm0
  134. vmovdqu 16(%rsi,%r12,1),%xmm1
  135. vmovdqu 32(%rsi,%r12,1),%xmm2
  136. vmovdqu 48(%rsi,%r12,1),%xmm3
  137. vpshufb %xmm7,%xmm0,%xmm0
  138. leaq K256(%rip),%rbp
  139. vpshufb %xmm7,%xmm1,%xmm1
  140. vpshufb %xmm7,%xmm2,%xmm2
  141. vpaddd 0(%rbp),%xmm0,%xmm4
  142. vpshufb %xmm7,%xmm3,%xmm3
  143. vpaddd 32(%rbp),%xmm1,%xmm5
  144. vpaddd 64(%rbp),%xmm2,%xmm6
  145. vpaddd 96(%rbp),%xmm3,%xmm7
  146. vmovdqa %xmm4,0(%rsp)
  147. movl %eax,%r14d
  148. vmovdqa %xmm5,16(%rsp)
  149. movl %ebx,%esi
  150. vmovdqa %xmm6,32(%rsp)
  151. xorl %ecx,%esi
  152. vmovdqa %xmm7,48(%rsp)
  153. movl %r8d,%r13d
  154. jmp .Lxop_00_47
  155. .align 16
  156. .Lxop_00_47:
  157. subq $-32*4,%rbp
  158. vmovdqu (%r12),%xmm9
  159. movq %r12,64+0(%rsp)
  160. vpalignr $4,%xmm0,%xmm1,%xmm4
  161. rorl $14,%r13d
  162. movl %r14d,%eax
  163. vpalignr $4,%xmm2,%xmm3,%xmm7
  164. movl %r9d,%r12d
  165. xorl %r8d,%r13d
  166. .byte 143,232,120,194,236,14
  167. rorl $9,%r14d
  168. xorl %r10d,%r12d
  169. vpsrld $3,%xmm4,%xmm4
  170. rorl $5,%r13d
  171. xorl %eax,%r14d
  172. vpaddd %xmm7,%xmm0,%xmm0
  173. andl %r8d,%r12d
  174. vpxor %xmm10,%xmm9,%xmm9
  175. vmovdqu 16-128(%rdi),%xmm10
  176. xorl %r8d,%r13d
  177. addl 0(%rsp),%r11d
  178. movl %eax,%r15d
  179. .byte 143,232,120,194,245,11
  180. rorl $11,%r14d
  181. xorl %r10d,%r12d
  182. vpxor %xmm5,%xmm4,%xmm4
  183. xorl %ebx,%r15d
  184. rorl $6,%r13d
  185. addl %r12d,%r11d
  186. andl %r15d,%esi
  187. .byte 143,232,120,194,251,13
  188. xorl %eax,%r14d
  189. addl %r13d,%r11d
  190. vpxor %xmm6,%xmm4,%xmm4
  191. xorl %ebx,%esi
  192. addl %r11d,%edx
  193. vpsrld $10,%xmm3,%xmm6
  194. rorl $2,%r14d
  195. addl %esi,%r11d
  196. vpaddd %xmm4,%xmm0,%xmm0
  197. movl %edx,%r13d
  198. addl %r11d,%r14d
  199. .byte 143,232,120,194,239,2
  200. rorl $14,%r13d
  201. movl %r14d,%r11d
  202. vpxor %xmm6,%xmm7,%xmm7
  203. movl %r8d,%r12d
  204. xorl %edx,%r13d
  205. rorl $9,%r14d
  206. xorl %r9d,%r12d
  207. vpxor %xmm5,%xmm7,%xmm7
  208. rorl $5,%r13d
  209. xorl %r11d,%r14d
  210. andl %edx,%r12d
  211. vpxor %xmm8,%xmm9,%xmm9
  212. xorl %edx,%r13d
  213. vpsrldq $8,%xmm7,%xmm7
  214. addl 4(%rsp),%r10d
  215. movl %r11d,%esi
  216. rorl $11,%r14d
  217. xorl %r9d,%r12d
  218. vpaddd %xmm7,%xmm0,%xmm0
  219. xorl %eax,%esi
  220. rorl $6,%r13d
  221. addl %r12d,%r10d
  222. andl %esi,%r15d
  223. .byte 143,232,120,194,248,13
  224. xorl %r11d,%r14d
  225. addl %r13d,%r10d
  226. vpsrld $10,%xmm0,%xmm6
  227. xorl %eax,%r15d
  228. addl %r10d,%ecx
  229. .byte 143,232,120,194,239,2
  230. rorl $2,%r14d
  231. addl %r15d,%r10d
  232. vpxor %xmm6,%xmm7,%xmm7
  233. movl %ecx,%r13d
  234. addl %r10d,%r14d
  235. rorl $14,%r13d
  236. movl %r14d,%r10d
  237. vpxor %xmm5,%xmm7,%xmm7
  238. movl %edx,%r12d
  239. xorl %ecx,%r13d
  240. rorl $9,%r14d
  241. xorl %r8d,%r12d
  242. vpslldq $8,%xmm7,%xmm7
  243. rorl $5,%r13d
  244. xorl %r10d,%r14d
  245. andl %ecx,%r12d
  246. vaesenc %xmm10,%xmm9,%xmm9
  247. vmovdqu 32-128(%rdi),%xmm10
  248. xorl %ecx,%r13d
  249. vpaddd %xmm7,%xmm0,%xmm0
  250. addl 8(%rsp),%r9d
  251. movl %r10d,%r15d
  252. rorl $11,%r14d
  253. xorl %r8d,%r12d
  254. vpaddd 0(%rbp),%xmm0,%xmm6
  255. xorl %r11d,%r15d
  256. rorl $6,%r13d
  257. addl %r12d,%r9d
  258. andl %r15d,%esi
  259. xorl %r10d,%r14d
  260. addl %r13d,%r9d
  261. xorl %r11d,%esi
  262. addl %r9d,%ebx
  263. rorl $2,%r14d
  264. addl %esi,%r9d
  265. movl %ebx,%r13d
  266. addl %r9d,%r14d
  267. rorl $14,%r13d
  268. movl %r14d,%r9d
  269. movl %ecx,%r12d
  270. xorl %ebx,%r13d
  271. rorl $9,%r14d
  272. xorl %edx,%r12d
  273. rorl $5,%r13d
  274. xorl %r9d,%r14d
  275. andl %ebx,%r12d
  276. vaesenc %xmm10,%xmm9,%xmm9
  277. vmovdqu 48-128(%rdi),%xmm10
  278. xorl %ebx,%r13d
  279. addl 12(%rsp),%r8d
  280. movl %r9d,%esi
  281. rorl $11,%r14d
  282. xorl %edx,%r12d
  283. xorl %r10d,%esi
  284. rorl $6,%r13d
  285. addl %r12d,%r8d
  286. andl %esi,%r15d
  287. xorl %r9d,%r14d
  288. addl %r13d,%r8d
  289. xorl %r10d,%r15d
  290. addl %r8d,%eax
  291. rorl $2,%r14d
  292. addl %r15d,%r8d
  293. movl %eax,%r13d
  294. addl %r8d,%r14d
  295. vmovdqa %xmm6,0(%rsp)
  296. vpalignr $4,%xmm1,%xmm2,%xmm4
  297. rorl $14,%r13d
  298. movl %r14d,%r8d
  299. vpalignr $4,%xmm3,%xmm0,%xmm7
  300. movl %ebx,%r12d
  301. xorl %eax,%r13d
  302. .byte 143,232,120,194,236,14
  303. rorl $9,%r14d
  304. xorl %ecx,%r12d
  305. vpsrld $3,%xmm4,%xmm4
  306. rorl $5,%r13d
  307. xorl %r8d,%r14d
  308. vpaddd %xmm7,%xmm1,%xmm1
  309. andl %eax,%r12d
  310. vaesenc %xmm10,%xmm9,%xmm9
  311. vmovdqu 64-128(%rdi),%xmm10
  312. xorl %eax,%r13d
  313. addl 16(%rsp),%edx
  314. movl %r8d,%r15d
  315. .byte 143,232,120,194,245,11
  316. rorl $11,%r14d
  317. xorl %ecx,%r12d
  318. vpxor %xmm5,%xmm4,%xmm4
  319. xorl %r9d,%r15d
  320. rorl $6,%r13d
  321. addl %r12d,%edx
  322. andl %r15d,%esi
  323. .byte 143,232,120,194,248,13
  324. xorl %r8d,%r14d
  325. addl %r13d,%edx
  326. vpxor %xmm6,%xmm4,%xmm4
  327. xorl %r9d,%esi
  328. addl %edx,%r11d
  329. vpsrld $10,%xmm0,%xmm6
  330. rorl $2,%r14d
  331. addl %esi,%edx
  332. vpaddd %xmm4,%xmm1,%xmm1
  333. movl %r11d,%r13d
  334. addl %edx,%r14d
  335. .byte 143,232,120,194,239,2
  336. rorl $14,%r13d
  337. movl %r14d,%edx
  338. vpxor %xmm6,%xmm7,%xmm7
  339. movl %eax,%r12d
  340. xorl %r11d,%r13d
  341. rorl $9,%r14d
  342. xorl %ebx,%r12d
  343. vpxor %xmm5,%xmm7,%xmm7
  344. rorl $5,%r13d
  345. xorl %edx,%r14d
  346. andl %r11d,%r12d
  347. vaesenc %xmm10,%xmm9,%xmm9
  348. vmovdqu 80-128(%rdi),%xmm10
  349. xorl %r11d,%r13d
  350. vpsrldq $8,%xmm7,%xmm7
  351. addl 20(%rsp),%ecx
  352. movl %edx,%esi
  353. rorl $11,%r14d
  354. xorl %ebx,%r12d
  355. vpaddd %xmm7,%xmm1,%xmm1
  356. xorl %r8d,%esi
  357. rorl $6,%r13d
  358. addl %r12d,%ecx
  359. andl %esi,%r15d
  360. .byte 143,232,120,194,249,13
  361. xorl %edx,%r14d
  362. addl %r13d,%ecx
  363. vpsrld $10,%xmm1,%xmm6
  364. xorl %r8d,%r15d
  365. addl %ecx,%r10d
  366. .byte 143,232,120,194,239,2
  367. rorl $2,%r14d
  368. addl %r15d,%ecx
  369. vpxor %xmm6,%xmm7,%xmm7
  370. movl %r10d,%r13d
  371. addl %ecx,%r14d
  372. rorl $14,%r13d
  373. movl %r14d,%ecx
  374. vpxor %xmm5,%xmm7,%xmm7
  375. movl %r11d,%r12d
  376. xorl %r10d,%r13d
  377. rorl $9,%r14d
  378. xorl %eax,%r12d
  379. vpslldq $8,%xmm7,%xmm7
  380. rorl $5,%r13d
  381. xorl %ecx,%r14d
  382. andl %r10d,%r12d
  383. vaesenc %xmm10,%xmm9,%xmm9
  384. vmovdqu 96-128(%rdi),%xmm10
  385. xorl %r10d,%r13d
  386. vpaddd %xmm7,%xmm1,%xmm1
  387. addl 24(%rsp),%ebx
  388. movl %ecx,%r15d
  389. rorl $11,%r14d
  390. xorl %eax,%r12d
  391. vpaddd 32(%rbp),%xmm1,%xmm6
  392. xorl %edx,%r15d
  393. rorl $6,%r13d
  394. addl %r12d,%ebx
  395. andl %r15d,%esi
  396. xorl %ecx,%r14d
  397. addl %r13d,%ebx
  398. xorl %edx,%esi
  399. addl %ebx,%r9d
  400. rorl $2,%r14d
  401. addl %esi,%ebx
  402. movl %r9d,%r13d
  403. addl %ebx,%r14d
  404. rorl $14,%r13d
  405. movl %r14d,%ebx
  406. movl %r10d,%r12d
  407. xorl %r9d,%r13d
  408. rorl $9,%r14d
  409. xorl %r11d,%r12d
  410. rorl $5,%r13d
  411. xorl %ebx,%r14d
  412. andl %r9d,%r12d
  413. vaesenc %xmm10,%xmm9,%xmm9
  414. vmovdqu 112-128(%rdi),%xmm10
  415. xorl %r9d,%r13d
  416. addl 28(%rsp),%eax
  417. movl %ebx,%esi
  418. rorl $11,%r14d
  419. xorl %r11d,%r12d
  420. xorl %ecx,%esi
  421. rorl $6,%r13d
  422. addl %r12d,%eax
  423. andl %esi,%r15d
  424. xorl %ebx,%r14d
  425. addl %r13d,%eax
  426. xorl %ecx,%r15d
  427. addl %eax,%r8d
  428. rorl $2,%r14d
  429. addl %r15d,%eax
  430. movl %r8d,%r13d
  431. addl %eax,%r14d
  432. vmovdqa %xmm6,16(%rsp)
  433. vpalignr $4,%xmm2,%xmm3,%xmm4
  434. rorl $14,%r13d
  435. movl %r14d,%eax
  436. vpalignr $4,%xmm0,%xmm1,%xmm7
  437. movl %r9d,%r12d
  438. xorl %r8d,%r13d
  439. .byte 143,232,120,194,236,14
  440. rorl $9,%r14d
  441. xorl %r10d,%r12d
  442. vpsrld $3,%xmm4,%xmm4
  443. rorl $5,%r13d
  444. xorl %eax,%r14d
  445. vpaddd %xmm7,%xmm2,%xmm2
  446. andl %r8d,%r12d
  447. vaesenc %xmm10,%xmm9,%xmm9
  448. vmovdqu 128-128(%rdi),%xmm10
  449. xorl %r8d,%r13d
  450. addl 32(%rsp),%r11d
  451. movl %eax,%r15d
  452. .byte 143,232,120,194,245,11
  453. rorl $11,%r14d
  454. xorl %r10d,%r12d
  455. vpxor %xmm5,%xmm4,%xmm4
  456. xorl %ebx,%r15d
  457. rorl $6,%r13d
  458. addl %r12d,%r11d
  459. andl %r15d,%esi
  460. .byte 143,232,120,194,249,13
  461. xorl %eax,%r14d
  462. addl %r13d,%r11d
  463. vpxor %xmm6,%xmm4,%xmm4
  464. xorl %ebx,%esi
  465. addl %r11d,%edx
  466. vpsrld $10,%xmm1,%xmm6
  467. rorl $2,%r14d
  468. addl %esi,%r11d
  469. vpaddd %xmm4,%xmm2,%xmm2
  470. movl %edx,%r13d
  471. addl %r11d,%r14d
  472. .byte 143,232,120,194,239,2
  473. rorl $14,%r13d
  474. movl %r14d,%r11d
  475. vpxor %xmm6,%xmm7,%xmm7
  476. movl %r8d,%r12d
  477. xorl %edx,%r13d
  478. rorl $9,%r14d
  479. xorl %r9d,%r12d
  480. vpxor %xmm5,%xmm7,%xmm7
  481. rorl $5,%r13d
  482. xorl %r11d,%r14d
  483. andl %edx,%r12d
  484. vaesenc %xmm10,%xmm9,%xmm9
  485. vmovdqu 144-128(%rdi),%xmm10
  486. xorl %edx,%r13d
  487. vpsrldq $8,%xmm7,%xmm7
  488. addl 36(%rsp),%r10d
  489. movl %r11d,%esi
  490. rorl $11,%r14d
  491. xorl %r9d,%r12d
  492. vpaddd %xmm7,%xmm2,%xmm2
  493. xorl %eax,%esi
  494. rorl $6,%r13d
  495. addl %r12d,%r10d
  496. andl %esi,%r15d
  497. .byte 143,232,120,194,250,13
  498. xorl %r11d,%r14d
  499. addl %r13d,%r10d
  500. vpsrld $10,%xmm2,%xmm6
  501. xorl %eax,%r15d
  502. addl %r10d,%ecx
  503. .byte 143,232,120,194,239,2
  504. rorl $2,%r14d
  505. addl %r15d,%r10d
  506. vpxor %xmm6,%xmm7,%xmm7
  507. movl %ecx,%r13d
  508. addl %r10d,%r14d
  509. rorl $14,%r13d
  510. movl %r14d,%r10d
  511. vpxor %xmm5,%xmm7,%xmm7
  512. movl %edx,%r12d
  513. xorl %ecx,%r13d
  514. rorl $9,%r14d
  515. xorl %r8d,%r12d
  516. vpslldq $8,%xmm7,%xmm7
  517. rorl $5,%r13d
  518. xorl %r10d,%r14d
  519. andl %ecx,%r12d
  520. vaesenc %xmm10,%xmm9,%xmm9
  521. vmovdqu 160-128(%rdi),%xmm10
  522. xorl %ecx,%r13d
  523. vpaddd %xmm7,%xmm2,%xmm2
  524. addl 40(%rsp),%r9d
  525. movl %r10d,%r15d
  526. rorl $11,%r14d
  527. xorl %r8d,%r12d
  528. vpaddd 64(%rbp),%xmm2,%xmm6
  529. xorl %r11d,%r15d
  530. rorl $6,%r13d
  531. addl %r12d,%r9d
  532. andl %r15d,%esi
  533. xorl %r10d,%r14d
  534. addl %r13d,%r9d
  535. xorl %r11d,%esi
  536. addl %r9d,%ebx
  537. rorl $2,%r14d
  538. addl %esi,%r9d
  539. movl %ebx,%r13d
  540. addl %r9d,%r14d
  541. rorl $14,%r13d
  542. movl %r14d,%r9d
  543. movl %ecx,%r12d
  544. xorl %ebx,%r13d
  545. rorl $9,%r14d
  546. xorl %edx,%r12d
  547. rorl $5,%r13d
  548. xorl %r9d,%r14d
  549. andl %ebx,%r12d
  550. vaesenclast %xmm10,%xmm9,%xmm11
  551. vaesenc %xmm10,%xmm9,%xmm9
  552. vmovdqu 176-128(%rdi),%xmm10
  553. xorl %ebx,%r13d
  554. addl 44(%rsp),%r8d
  555. movl %r9d,%esi
  556. rorl $11,%r14d
  557. xorl %edx,%r12d
  558. xorl %r10d,%esi
  559. rorl $6,%r13d
  560. addl %r12d,%r8d
  561. andl %esi,%r15d
  562. xorl %r9d,%r14d
  563. addl %r13d,%r8d
  564. xorl %r10d,%r15d
  565. addl %r8d,%eax
  566. rorl $2,%r14d
  567. addl %r15d,%r8d
  568. movl %eax,%r13d
  569. addl %r8d,%r14d
  570. vmovdqa %xmm6,32(%rsp)
  571. vpalignr $4,%xmm3,%xmm0,%xmm4
  572. rorl $14,%r13d
  573. movl %r14d,%r8d
  574. vpalignr $4,%xmm1,%xmm2,%xmm7
  575. movl %ebx,%r12d
  576. xorl %eax,%r13d
  577. .byte 143,232,120,194,236,14
  578. rorl $9,%r14d
  579. xorl %ecx,%r12d
  580. vpsrld $3,%xmm4,%xmm4
  581. rorl $5,%r13d
  582. xorl %r8d,%r14d
  583. vpaddd %xmm7,%xmm3,%xmm3
  584. andl %eax,%r12d
  585. vpand %xmm12,%xmm11,%xmm8
  586. vaesenc %xmm10,%xmm9,%xmm9
  587. vmovdqu 192-128(%rdi),%xmm10
  588. xorl %eax,%r13d
  589. addl 48(%rsp),%edx
  590. movl %r8d,%r15d
  591. .byte 143,232,120,194,245,11
  592. rorl $11,%r14d
  593. xorl %ecx,%r12d
  594. vpxor %xmm5,%xmm4,%xmm4
  595. xorl %r9d,%r15d
  596. rorl $6,%r13d
  597. addl %r12d,%edx
  598. andl %r15d,%esi
  599. .byte 143,232,120,194,250,13
  600. xorl %r8d,%r14d
  601. addl %r13d,%edx
  602. vpxor %xmm6,%xmm4,%xmm4
  603. xorl %r9d,%esi
  604. addl %edx,%r11d
  605. vpsrld $10,%xmm2,%xmm6
  606. rorl $2,%r14d
  607. addl %esi,%edx
  608. vpaddd %xmm4,%xmm3,%xmm3
  609. movl %r11d,%r13d
  610. addl %edx,%r14d
  611. .byte 143,232,120,194,239,2
  612. rorl $14,%r13d
  613. movl %r14d,%edx
  614. vpxor %xmm6,%xmm7,%xmm7
  615. movl %eax,%r12d
  616. xorl %r11d,%r13d
  617. rorl $9,%r14d
  618. xorl %ebx,%r12d
  619. vpxor %xmm5,%xmm7,%xmm7
  620. rorl $5,%r13d
  621. xorl %edx,%r14d
  622. andl %r11d,%r12d
  623. vaesenclast %xmm10,%xmm9,%xmm11
  624. vaesenc %xmm10,%xmm9,%xmm9
  625. vmovdqu 208-128(%rdi),%xmm10
  626. xorl %r11d,%r13d
  627. vpsrldq $8,%xmm7,%xmm7
  628. addl 52(%rsp),%ecx
  629. movl %edx,%esi
  630. rorl $11,%r14d
  631. xorl %ebx,%r12d
  632. vpaddd %xmm7,%xmm3,%xmm3
  633. xorl %r8d,%esi
  634. rorl $6,%r13d
  635. addl %r12d,%ecx
  636. andl %esi,%r15d
  637. .byte 143,232,120,194,251,13
  638. xorl %edx,%r14d
  639. addl %r13d,%ecx
  640. vpsrld $10,%xmm3,%xmm6
  641. xorl %r8d,%r15d
  642. addl %ecx,%r10d
  643. .byte 143,232,120,194,239,2
  644. rorl $2,%r14d
  645. addl %r15d,%ecx
  646. vpxor %xmm6,%xmm7,%xmm7
  647. movl %r10d,%r13d
  648. addl %ecx,%r14d
  649. rorl $14,%r13d
  650. movl %r14d,%ecx
  651. vpxor %xmm5,%xmm7,%xmm7
  652. movl %r11d,%r12d
  653. xorl %r10d,%r13d
  654. rorl $9,%r14d
  655. xorl %eax,%r12d
  656. vpslldq $8,%xmm7,%xmm7
  657. rorl $5,%r13d
  658. xorl %ecx,%r14d
  659. andl %r10d,%r12d
  660. vpand %xmm13,%xmm11,%xmm11
  661. vaesenc %xmm10,%xmm9,%xmm9
  662. vmovdqu 224-128(%rdi),%xmm10
  663. xorl %r10d,%r13d
  664. vpaddd %xmm7,%xmm3,%xmm3
  665. addl 56(%rsp),%ebx
  666. movl %ecx,%r15d
  667. rorl $11,%r14d
  668. xorl %eax,%r12d
  669. vpaddd 96(%rbp),%xmm3,%xmm6
  670. xorl %edx,%r15d
  671. rorl $6,%r13d
  672. addl %r12d,%ebx
  673. andl %r15d,%esi
  674. xorl %ecx,%r14d
  675. addl %r13d,%ebx
  676. xorl %edx,%esi
  677. addl %ebx,%r9d
  678. rorl $2,%r14d
  679. addl %esi,%ebx
  680. movl %r9d,%r13d
  681. addl %ebx,%r14d
  682. rorl $14,%r13d
  683. movl %r14d,%ebx
  684. movl %r10d,%r12d
  685. xorl %r9d,%r13d
  686. rorl $9,%r14d
  687. xorl %r11d,%r12d
  688. rorl $5,%r13d
  689. xorl %ebx,%r14d
  690. andl %r9d,%r12d
  691. vpor %xmm11,%xmm8,%xmm8
  692. vaesenclast %xmm10,%xmm9,%xmm11
  693. vmovdqu 0-128(%rdi),%xmm10
  694. xorl %r9d,%r13d
  695. addl 60(%rsp),%eax
  696. movl %ebx,%esi
  697. rorl $11,%r14d
  698. xorl %r11d,%r12d
  699. xorl %ecx,%esi
  700. rorl $6,%r13d
  701. addl %r12d,%eax
  702. andl %esi,%r15d
  703. xorl %ebx,%r14d
  704. addl %r13d,%eax
  705. xorl %ecx,%r15d
  706. addl %eax,%r8d
  707. rorl $2,%r14d
  708. addl %r15d,%eax
  709. movl %r8d,%r13d
  710. addl %eax,%r14d
  711. vmovdqa %xmm6,48(%rsp)
  712. movq 64+0(%rsp),%r12
  713. vpand %xmm14,%xmm11,%xmm11
  714. movq 64+8(%rsp),%r15
  715. vpor %xmm11,%xmm8,%xmm8
  716. vmovdqu %xmm8,(%r15,%r12,1)
  717. leaq 16(%r12),%r12
  718. cmpb $0,131(%rbp)
  719. jne .Lxop_00_47
  720. vmovdqu (%r12),%xmm9
  721. movq %r12,64+0(%rsp)
  722. rorl $14,%r13d
  723. movl %r14d,%eax
  724. movl %r9d,%r12d
  725. xorl %r8d,%r13d
  726. rorl $9,%r14d
  727. xorl %r10d,%r12d
  728. rorl $5,%r13d
  729. xorl %eax,%r14d
  730. andl %r8d,%r12d
  731. vpxor %xmm10,%xmm9,%xmm9
  732. vmovdqu 16-128(%rdi),%xmm10
  733. xorl %r8d,%r13d
  734. addl 0(%rsp),%r11d
  735. movl %eax,%r15d
  736. rorl $11,%r14d
  737. xorl %r10d,%r12d
  738. xorl %ebx,%r15d
  739. rorl $6,%r13d
  740. addl %r12d,%r11d
  741. andl %r15d,%esi
  742. xorl %eax,%r14d
  743. addl %r13d,%r11d
  744. xorl %ebx,%esi
  745. addl %r11d,%edx
  746. rorl $2,%r14d
  747. addl %esi,%r11d
  748. movl %edx,%r13d
  749. addl %r11d,%r14d
  750. rorl $14,%r13d
  751. movl %r14d,%r11d
  752. movl %r8d,%r12d
  753. xorl %edx,%r13d
  754. rorl $9,%r14d
  755. xorl %r9d,%r12d
  756. rorl $5,%r13d
  757. xorl %r11d,%r14d
  758. andl %edx,%r12d
  759. vpxor %xmm8,%xmm9,%xmm9
  760. xorl %edx,%r13d
  761. addl 4(%rsp),%r10d
  762. movl %r11d,%esi
  763. rorl $11,%r14d
  764. xorl %r9d,%r12d
  765. xorl %eax,%esi
  766. rorl $6,%r13d
  767. addl %r12d,%r10d
  768. andl %esi,%r15d
  769. xorl %r11d,%r14d
  770. addl %r13d,%r10d
  771. xorl %eax,%r15d
  772. addl %r10d,%ecx
  773. rorl $2,%r14d
  774. addl %r15d,%r10d
  775. movl %ecx,%r13d
  776. addl %r10d,%r14d
  777. rorl $14,%r13d
  778. movl %r14d,%r10d
  779. movl %edx,%r12d
  780. xorl %ecx,%r13d
  781. rorl $9,%r14d
  782. xorl %r8d,%r12d
  783. rorl $5,%r13d
  784. xorl %r10d,%r14d
  785. andl %ecx,%r12d
  786. vaesenc %xmm10,%xmm9,%xmm9
  787. vmovdqu 32-128(%rdi),%xmm10
  788. xorl %ecx,%r13d
  789. addl 8(%rsp),%r9d
  790. movl %r10d,%r15d
  791. rorl $11,%r14d
  792. xorl %r8d,%r12d
  793. xorl %r11d,%r15d
  794. rorl $6,%r13d
  795. addl %r12d,%r9d
  796. andl %r15d,%esi
  797. xorl %r10d,%r14d
  798. addl %r13d,%r9d
  799. xorl %r11d,%esi
  800. addl %r9d,%ebx
  801. rorl $2,%r14d
  802. addl %esi,%r9d
  803. movl %ebx,%r13d
  804. addl %r9d,%r14d
  805. rorl $14,%r13d
  806. movl %r14d,%r9d
  807. movl %ecx,%r12d
  808. xorl %ebx,%r13d
  809. rorl $9,%r14d
  810. xorl %edx,%r12d
  811. rorl $5,%r13d
  812. xorl %r9d,%r14d
  813. andl %ebx,%r12d
  814. vaesenc %xmm10,%xmm9,%xmm9
  815. vmovdqu 48-128(%rdi),%xmm10
  816. xorl %ebx,%r13d
  817. addl 12(%rsp),%r8d
  818. movl %r9d,%esi
  819. rorl $11,%r14d
  820. xorl %edx,%r12d
  821. xorl %r10d,%esi
  822. rorl $6,%r13d
  823. addl %r12d,%r8d
  824. andl %esi,%r15d
  825. xorl %r9d,%r14d
  826. addl %r13d,%r8d
  827. xorl %r10d,%r15d
  828. addl %r8d,%eax
  829. rorl $2,%r14d
  830. addl %r15d,%r8d
  831. movl %eax,%r13d
  832. addl %r8d,%r14d
  833. rorl $14,%r13d
  834. movl %r14d,%r8d
  835. movl %ebx,%r12d
  836. xorl %eax,%r13d
  837. rorl $9,%r14d
  838. xorl %ecx,%r12d
  839. rorl $5,%r13d
  840. xorl %r8d,%r14d
  841. andl %eax,%r12d
  842. vaesenc %xmm10,%xmm9,%xmm9
  843. vmovdqu 64-128(%rdi),%xmm10
  844. xorl %eax,%r13d
  845. addl 16(%rsp),%edx
  846. movl %r8d,%r15d
  847. rorl $11,%r14d
  848. xorl %ecx,%r12d
  849. xorl %r9d,%r15d
  850. rorl $6,%r13d
  851. addl %r12d,%edx
  852. andl %r15d,%esi
  853. xorl %r8d,%r14d
  854. addl %r13d,%edx
  855. xorl %r9d,%esi
  856. addl %edx,%r11d
  857. rorl $2,%r14d
  858. addl %esi,%edx
  859. movl %r11d,%r13d
  860. addl %edx,%r14d
  861. rorl $14,%r13d
  862. movl %r14d,%edx
  863. movl %eax,%r12d
  864. xorl %r11d,%r13d
  865. rorl $9,%r14d
  866. xorl %ebx,%r12d
  867. rorl $5,%r13d
  868. xorl %edx,%r14d
  869. andl %r11d,%r12d
  870. vaesenc %xmm10,%xmm9,%xmm9
  871. vmovdqu 80-128(%rdi),%xmm10
  872. xorl %r11d,%r13d
  873. addl 20(%rsp),%ecx
  874. movl %edx,%esi
  875. rorl $11,%r14d
  876. xorl %ebx,%r12d
  877. xorl %r8d,%esi
  878. rorl $6,%r13d
  879. addl %r12d,%ecx
  880. andl %esi,%r15d
  881. xorl %edx,%r14d
  882. addl %r13d,%ecx
  883. xorl %r8d,%r15d
  884. addl %ecx,%r10d
  885. rorl $2,%r14d
  886. addl %r15d,%ecx
  887. movl %r10d,%r13d
  888. addl %ecx,%r14d
  889. rorl $14,%r13d
  890. movl %r14d,%ecx
  891. movl %r11d,%r12d
  892. xorl %r10d,%r13d
  893. rorl $9,%r14d
  894. xorl %eax,%r12d
  895. rorl $5,%r13d
  896. xorl %ecx,%r14d
  897. andl %r10d,%r12d
  898. vaesenc %xmm10,%xmm9,%xmm9
  899. vmovdqu 96-128(%rdi),%xmm10
  900. xorl %r10d,%r13d
  901. addl 24(%rsp),%ebx
  902. movl %ecx,%r15d
  903. rorl $11,%r14d
  904. xorl %eax,%r12d
  905. xorl %edx,%r15d
  906. rorl $6,%r13d
  907. addl %r12d,%ebx
  908. andl %r15d,%esi
  909. xorl %ecx,%r14d
  910. addl %r13d,%ebx
  911. xorl %edx,%esi
  912. addl %ebx,%r9d
  913. rorl $2,%r14d
  914. addl %esi,%ebx
  915. movl %r9d,%r13d
  916. addl %ebx,%r14d
  917. rorl $14,%r13d
  918. movl %r14d,%ebx
  919. movl %r10d,%r12d
  920. xorl %r9d,%r13d
  921. rorl $9,%r14d
  922. xorl %r11d,%r12d
  923. rorl $5,%r13d
  924. xorl %ebx,%r14d
  925. andl %r9d,%r12d
  926. vaesenc %xmm10,%xmm9,%xmm9
  927. vmovdqu 112-128(%rdi),%xmm10
  928. xorl %r9d,%r13d
  929. addl 28(%rsp),%eax
  930. movl %ebx,%esi
  931. rorl $11,%r14d
  932. xorl %r11d,%r12d
  933. xorl %ecx,%esi
  934. rorl $6,%r13d
  935. addl %r12d,%eax
  936. andl %esi,%r15d
  937. xorl %ebx,%r14d
  938. addl %r13d,%eax
  939. xorl %ecx,%r15d
  940. addl %eax,%r8d
  941. rorl $2,%r14d
  942. addl %r15d,%eax
  943. movl %r8d,%r13d
  944. addl %eax,%r14d
  945. rorl $14,%r13d
  946. movl %r14d,%eax
  947. movl %r9d,%r12d
  948. xorl %r8d,%r13d
  949. rorl $9,%r14d
  950. xorl %r10d,%r12d
  951. rorl $5,%r13d
  952. xorl %eax,%r14d
  953. andl %r8d,%r12d
  954. vaesenc %xmm10,%xmm9,%xmm9
  955. vmovdqu 128-128(%rdi),%xmm10
  956. xorl %r8d,%r13d
  957. addl 32(%rsp),%r11d
  958. movl %eax,%r15d
  959. rorl $11,%r14d
  960. xorl %r10d,%r12d
  961. xorl %ebx,%r15d
  962. rorl $6,%r13d
  963. addl %r12d,%r11d
  964. andl %r15d,%esi
  965. xorl %eax,%r14d
  966. addl %r13d,%r11d
  967. xorl %ebx,%esi
  968. addl %r11d,%edx
  969. rorl $2,%r14d
  970. addl %esi,%r11d
  971. movl %edx,%r13d
  972. addl %r11d,%r14d
  973. rorl $14,%r13d
  974. movl %r14d,%r11d
  975. movl %r8d,%r12d
  976. xorl %edx,%r13d
  977. rorl $9,%r14d
  978. xorl %r9d,%r12d
  979. rorl $5,%r13d
  980. xorl %r11d,%r14d
  981. andl %edx,%r12d
  982. vaesenc %xmm10,%xmm9,%xmm9
  983. vmovdqu 144-128(%rdi),%xmm10
  984. xorl %edx,%r13d
  985. addl 36(%rsp),%r10d
  986. movl %r11d,%esi
  987. rorl $11,%r14d
  988. xorl %r9d,%r12d
  989. xorl %eax,%esi
  990. rorl $6,%r13d
  991. addl %r12d,%r10d
  992. andl %esi,%r15d
  993. xorl %r11d,%r14d
  994. addl %r13d,%r10d
  995. xorl %eax,%r15d
  996. addl %r10d,%ecx
  997. rorl $2,%r14d
  998. addl %r15d,%r10d
  999. movl %ecx,%r13d
  1000. addl %r10d,%r14d
  1001. rorl $14,%r13d
  1002. movl %r14d,%r10d
  1003. movl %edx,%r12d
  1004. xorl %ecx,%r13d
  1005. rorl $9,%r14d
  1006. xorl %r8d,%r12d
  1007. rorl $5,%r13d
  1008. xorl %r10d,%r14d
  1009. andl %ecx,%r12d
  1010. vaesenc %xmm10,%xmm9,%xmm9
  1011. vmovdqu 160-128(%rdi),%xmm10
  1012. xorl %ecx,%r13d
  1013. addl 40(%rsp),%r9d
  1014. movl %r10d,%r15d
  1015. rorl $11,%r14d
  1016. xorl %r8d,%r12d
  1017. xorl %r11d,%r15d
  1018. rorl $6,%r13d
  1019. addl %r12d,%r9d
  1020. andl %r15d,%esi
  1021. xorl %r10d,%r14d
  1022. addl %r13d,%r9d
  1023. xorl %r11d,%esi
  1024. addl %r9d,%ebx
  1025. rorl $2,%r14d
  1026. addl %esi,%r9d
  1027. movl %ebx,%r13d
  1028. addl %r9d,%r14d
  1029. rorl $14,%r13d
  1030. movl %r14d,%r9d
  1031. movl %ecx,%r12d
  1032. xorl %ebx,%r13d
  1033. rorl $9,%r14d
  1034. xorl %edx,%r12d
  1035. rorl $5,%r13d
  1036. xorl %r9d,%r14d
  1037. andl %ebx,%r12d
  1038. vaesenclast %xmm10,%xmm9,%xmm11
  1039. vaesenc %xmm10,%xmm9,%xmm9
  1040. vmovdqu 176-128(%rdi),%xmm10
  1041. xorl %ebx,%r13d
  1042. addl 44(%rsp),%r8d
  1043. movl %r9d,%esi
  1044. rorl $11,%r14d
  1045. xorl %edx,%r12d
  1046. xorl %r10d,%esi
  1047. rorl $6,%r13d
  1048. addl %r12d,%r8d
  1049. andl %esi,%r15d
  1050. xorl %r9d,%r14d
  1051. addl %r13d,%r8d
  1052. xorl %r10d,%r15d
  1053. addl %r8d,%eax
  1054. rorl $2,%r14d
  1055. addl %r15d,%r8d
  1056. movl %eax,%r13d
  1057. addl %r8d,%r14d
  1058. rorl $14,%r13d
  1059. movl %r14d,%r8d
  1060. movl %ebx,%r12d
  1061. xorl %eax,%r13d
  1062. rorl $9,%r14d
  1063. xorl %ecx,%r12d
  1064. rorl $5,%r13d
  1065. xorl %r8d,%r14d
  1066. andl %eax,%r12d
  1067. vpand %xmm12,%xmm11,%xmm8
  1068. vaesenc %xmm10,%xmm9,%xmm9
  1069. vmovdqu 192-128(%rdi),%xmm10
  1070. xorl %eax,%r13d
  1071. addl 48(%rsp),%edx
  1072. movl %r8d,%r15d
  1073. rorl $11,%r14d
  1074. xorl %ecx,%r12d
  1075. xorl %r9d,%r15d
  1076. rorl $6,%r13d
  1077. addl %r12d,%edx
  1078. andl %r15d,%esi
  1079. xorl %r8d,%r14d
  1080. addl %r13d,%edx
  1081. xorl %r9d,%esi
  1082. addl %edx,%r11d
  1083. rorl $2,%r14d
  1084. addl %esi,%edx
  1085. movl %r11d,%r13d
  1086. addl %edx,%r14d
  1087. rorl $14,%r13d
  1088. movl %r14d,%edx
  1089. movl %eax,%r12d
  1090. xorl %r11d,%r13d
  1091. rorl $9,%r14d
  1092. xorl %ebx,%r12d
  1093. rorl $5,%r13d
  1094. xorl %edx,%r14d
  1095. andl %r11d,%r12d
  1096. vaesenclast %xmm10,%xmm9,%xmm11
  1097. vaesenc %xmm10,%xmm9,%xmm9
  1098. vmovdqu 208-128(%rdi),%xmm10
  1099. xorl %r11d,%r13d
  1100. addl 52(%rsp),%ecx
  1101. movl %edx,%esi
  1102. rorl $11,%r14d
  1103. xorl %ebx,%r12d
  1104. xorl %r8d,%esi
  1105. rorl $6,%r13d
  1106. addl %r12d,%ecx
  1107. andl %esi,%r15d
  1108. xorl %edx,%r14d
  1109. addl %r13d,%ecx
  1110. xorl %r8d,%r15d
  1111. addl %ecx,%r10d
  1112. rorl $2,%r14d
  1113. addl %r15d,%ecx
  1114. movl %r10d,%r13d
  1115. addl %ecx,%r14d
  1116. rorl $14,%r13d
  1117. movl %r14d,%ecx
  1118. movl %r11d,%r12d
  1119. xorl %r10d,%r13d
  1120. rorl $9,%r14d
  1121. xorl %eax,%r12d
  1122. rorl $5,%r13d
  1123. xorl %ecx,%r14d
  1124. andl %r10d,%r12d
  1125. vpand %xmm13,%xmm11,%xmm11
  1126. vaesenc %xmm10,%xmm9,%xmm9
  1127. vmovdqu 224-128(%rdi),%xmm10
  1128. xorl %r10d,%r13d
  1129. addl 56(%rsp),%ebx
  1130. movl %ecx,%r15d
  1131. rorl $11,%r14d
  1132. xorl %eax,%r12d
  1133. xorl %edx,%r15d
  1134. rorl $6,%r13d
  1135. addl %r12d,%ebx
  1136. andl %r15d,%esi
  1137. xorl %ecx,%r14d
  1138. addl %r13d,%ebx
  1139. xorl %edx,%esi
  1140. addl %ebx,%r9d
  1141. rorl $2,%r14d
  1142. addl %esi,%ebx
  1143. movl %r9d,%r13d
  1144. addl %ebx,%r14d
  1145. rorl $14,%r13d
  1146. movl %r14d,%ebx
  1147. movl %r10d,%r12d
  1148. xorl %r9d,%r13d
  1149. rorl $9,%r14d
  1150. xorl %r11d,%r12d
  1151. rorl $5,%r13d
  1152. xorl %ebx,%r14d
  1153. andl %r9d,%r12d
  1154. vpor %xmm11,%xmm8,%xmm8
  1155. vaesenclast %xmm10,%xmm9,%xmm11
  1156. vmovdqu 0-128(%rdi),%xmm10
  1157. xorl %r9d,%r13d
  1158. addl 60(%rsp),%eax
  1159. movl %ebx,%esi
  1160. rorl $11,%r14d
  1161. xorl %r11d,%r12d
  1162. xorl %ecx,%esi
  1163. rorl $6,%r13d
  1164. addl %r12d,%eax
  1165. andl %esi,%r15d
  1166. xorl %ebx,%r14d
  1167. addl %r13d,%eax
  1168. xorl %ecx,%r15d
  1169. addl %eax,%r8d
  1170. rorl $2,%r14d
  1171. addl %r15d,%eax
  1172. movl %r8d,%r13d
  1173. addl %eax,%r14d
  1174. movq 64+0(%rsp),%r12
  1175. movq 64+8(%rsp),%r13
  1176. movq 64+40(%rsp),%r15
  1177. movq 64+48(%rsp),%rsi
  1178. vpand %xmm14,%xmm11,%xmm11
  1179. movl %r14d,%eax
  1180. vpor %xmm11,%xmm8,%xmm8
  1181. vmovdqu %xmm8,(%r12,%r13,1)
  1182. leaq 16(%r12),%r12
  1183. addl 0(%r15),%eax
  1184. addl 4(%r15),%ebx
  1185. addl 8(%r15),%ecx
  1186. addl 12(%r15),%edx
  1187. addl 16(%r15),%r8d
  1188. addl 20(%r15),%r9d
  1189. addl 24(%r15),%r10d
  1190. addl 28(%r15),%r11d
  1191. cmpq 64+16(%rsp),%r12
  1192. movl %eax,0(%r15)
  1193. movl %ebx,4(%r15)
  1194. movl %ecx,8(%r15)
  1195. movl %edx,12(%r15)
  1196. movl %r8d,16(%r15)
  1197. movl %r9d,20(%r15)
  1198. movl %r10d,24(%r15)
  1199. movl %r11d,28(%r15)
  1200. jb .Lloop_xop
  1201. movq 64+32(%rsp),%r8
  1202. movq 120(%rsp),%rsi
  1203. .cfi_def_cfa %rsi,8
  1204. vmovdqu %xmm8,(%r8)
  1205. vzeroall
  1206. movq -48(%rsi),%r15
  1207. .cfi_restore %r15
  1208. movq -40(%rsi),%r14
  1209. .cfi_restore %r14
  1210. movq -32(%rsi),%r13
  1211. .cfi_restore %r13
  1212. movq -24(%rsi),%r12
  1213. .cfi_restore %r12
  1214. movq -16(%rsi),%rbp
  1215. .cfi_restore %rbp
  1216. movq -8(%rsi),%rbx
  1217. .cfi_restore %rbx
  1218. leaq (%rsi),%rsp
  1219. .cfi_def_cfa_register %rsp
  1220. .Lepilogue_xop:
  1221. .byte 0xf3,0xc3
  1222. .cfi_endproc
  1223. .size aesni_cbc_sha256_enc_xop,.-aesni_cbc_sha256_enc_xop
  1224. .type aesni_cbc_sha256_enc_avx,@function
  1225. .align 64
  1226. aesni_cbc_sha256_enc_avx:
  1227. .cfi_startproc
  1228. .Lavx_shortcut:
  1229. movq 8(%rsp),%r10
  1230. movq %rsp,%rax
  1231. .cfi_def_cfa_register %rax
  1232. pushq %rbx
  1233. .cfi_offset %rbx,-16
  1234. pushq %rbp
  1235. .cfi_offset %rbp,-24
  1236. pushq %r12
  1237. .cfi_offset %r12,-32
  1238. pushq %r13
  1239. .cfi_offset %r13,-40
  1240. pushq %r14
  1241. .cfi_offset %r14,-48
  1242. pushq %r15
  1243. .cfi_offset %r15,-56
  1244. subq $128,%rsp
  1245. andq $-64,%rsp
  1246. shlq $6,%rdx
  1247. subq %rdi,%rsi
  1248. subq %rdi,%r10
  1249. addq %rdi,%rdx
  1250. movq %rsi,64+8(%rsp)
  1251. movq %rdx,64+16(%rsp)
  1252. movq %r8,64+32(%rsp)
  1253. movq %r9,64+40(%rsp)
  1254. movq %r10,64+48(%rsp)
  1255. movq %rax,120(%rsp)
  1256. .cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
  1257. .Lprologue_avx:
  1258. vzeroall
  1259. movq %rdi,%r12
  1260. leaq 128(%rcx),%rdi
  1261. leaq K256+544(%rip),%r13
  1262. movl 240-128(%rdi),%r14d
  1263. movq %r9,%r15
  1264. movq %r10,%rsi
  1265. vmovdqu (%r8),%xmm8
  1266. subq $9,%r14
  1267. movl 0(%r15),%eax
  1268. movl 4(%r15),%ebx
  1269. movl 8(%r15),%ecx
  1270. movl 12(%r15),%edx
  1271. movl 16(%r15),%r8d
  1272. movl 20(%r15),%r9d
  1273. movl 24(%r15),%r10d
  1274. movl 28(%r15),%r11d
  1275. vmovdqa 0(%r13,%r14,8),%xmm14
  1276. vmovdqa 16(%r13,%r14,8),%xmm13
  1277. vmovdqa 32(%r13,%r14,8),%xmm12
  1278. vmovdqu 0-128(%rdi),%xmm10
  1279. jmp .Lloop_avx
  1280. .align 16
  1281. .Lloop_avx:
  1282. vmovdqa K256+512(%rip),%xmm7
  1283. vmovdqu 0(%rsi,%r12,1),%xmm0
  1284. vmovdqu 16(%rsi,%r12,1),%xmm1
  1285. vmovdqu 32(%rsi,%r12,1),%xmm2
  1286. vmovdqu 48(%rsi,%r12,1),%xmm3
  1287. vpshufb %xmm7,%xmm0,%xmm0
  1288. leaq K256(%rip),%rbp
  1289. vpshufb %xmm7,%xmm1,%xmm1
  1290. vpshufb %xmm7,%xmm2,%xmm2
  1291. vpaddd 0(%rbp),%xmm0,%xmm4
  1292. vpshufb %xmm7,%xmm3,%xmm3
  1293. vpaddd 32(%rbp),%xmm1,%xmm5
  1294. vpaddd 64(%rbp),%xmm2,%xmm6
  1295. vpaddd 96(%rbp),%xmm3,%xmm7
  1296. vmovdqa %xmm4,0(%rsp)
  1297. movl %eax,%r14d
  1298. vmovdqa %xmm5,16(%rsp)
  1299. movl %ebx,%esi
  1300. vmovdqa %xmm6,32(%rsp)
  1301. xorl %ecx,%esi
  1302. vmovdqa %xmm7,48(%rsp)
  1303. movl %r8d,%r13d
  1304. jmp .Lavx_00_47
  1305. .align 16
  1306. .Lavx_00_47:
  1307. subq $-32*4,%rbp
  1308. vmovdqu (%r12),%xmm9
  1309. movq %r12,64+0(%rsp)
  1310. vpalignr $4,%xmm0,%xmm1,%xmm4
  1311. shrdl $14,%r13d,%r13d
  1312. movl %r14d,%eax
  1313. movl %r9d,%r12d
  1314. vpalignr $4,%xmm2,%xmm3,%xmm7
  1315. xorl %r8d,%r13d
  1316. shrdl $9,%r14d,%r14d
  1317. xorl %r10d,%r12d
  1318. vpsrld $7,%xmm4,%xmm6
  1319. shrdl $5,%r13d,%r13d
  1320. xorl %eax,%r14d
  1321. andl %r8d,%r12d
  1322. vpaddd %xmm7,%xmm0,%xmm0
  1323. vpxor %xmm10,%xmm9,%xmm9
  1324. vmovdqu 16-128(%rdi),%xmm10
  1325. xorl %r8d,%r13d
  1326. addl 0(%rsp),%r11d
  1327. movl %eax,%r15d
  1328. vpsrld $3,%xmm4,%xmm7
  1329. shrdl $11,%r14d,%r14d
  1330. xorl %r10d,%r12d
  1331. xorl %ebx,%r15d
  1332. vpslld $14,%xmm4,%xmm5
  1333. shrdl $6,%r13d,%r13d
  1334. addl %r12d,%r11d
  1335. andl %r15d,%esi
  1336. vpxor %xmm6,%xmm7,%xmm4
  1337. xorl %eax,%r14d
  1338. addl %r13d,%r11d
  1339. xorl %ebx,%esi
  1340. vpshufd $250,%xmm3,%xmm7
  1341. addl %r11d,%edx
  1342. shrdl $2,%r14d,%r14d
  1343. addl %esi,%r11d
  1344. vpsrld $11,%xmm6,%xmm6
  1345. movl %edx,%r13d
  1346. addl %r11d,%r14d
  1347. shrdl $14,%r13d,%r13d
  1348. vpxor %xmm5,%xmm4,%xmm4
  1349. movl %r14d,%r11d
  1350. movl %r8d,%r12d
  1351. xorl %edx,%r13d
  1352. vpslld $11,%xmm5,%xmm5
  1353. shrdl $9,%r14d,%r14d
  1354. xorl %r9d,%r12d
  1355. shrdl $5,%r13d,%r13d
  1356. vpxor %xmm6,%xmm4,%xmm4
  1357. xorl %r11d,%r14d
  1358. andl %edx,%r12d
  1359. vpxor %xmm8,%xmm9,%xmm9
  1360. xorl %edx,%r13d
  1361. vpsrld $10,%xmm7,%xmm6
  1362. addl 4(%rsp),%r10d
  1363. movl %r11d,%esi
  1364. shrdl $11,%r14d,%r14d
  1365. vpxor %xmm5,%xmm4,%xmm4
  1366. xorl %r9d,%r12d
  1367. xorl %eax,%esi
  1368. shrdl $6,%r13d,%r13d
  1369. vpsrlq $17,%xmm7,%xmm7
  1370. addl %r12d,%r10d
  1371. andl %esi,%r15d
  1372. xorl %r11d,%r14d
  1373. vpaddd %xmm4,%xmm0,%xmm0
  1374. addl %r13d,%r10d
  1375. xorl %eax,%r15d
  1376. addl %r10d,%ecx
  1377. vpxor %xmm7,%xmm6,%xmm6
  1378. shrdl $2,%r14d,%r14d
  1379. addl %r15d,%r10d
  1380. movl %ecx,%r13d
  1381. vpsrlq $2,%xmm7,%xmm7
  1382. addl %r10d,%r14d
  1383. shrdl $14,%r13d,%r13d
  1384. movl %r14d,%r10d
  1385. vpxor %xmm7,%xmm6,%xmm6
  1386. movl %edx,%r12d
  1387. xorl %ecx,%r13d
  1388. shrdl $9,%r14d,%r14d
  1389. vpshufd $132,%xmm6,%xmm6
  1390. xorl %r8d,%r12d
  1391. shrdl $5,%r13d,%r13d
  1392. xorl %r10d,%r14d
  1393. vpsrldq $8,%xmm6,%xmm6
  1394. andl %ecx,%r12d
  1395. vaesenc %xmm10,%xmm9,%xmm9
  1396. vmovdqu 32-128(%rdi),%xmm10
  1397. xorl %ecx,%r13d
  1398. addl 8(%rsp),%r9d
  1399. vpaddd %xmm6,%xmm0,%xmm0
  1400. movl %r10d,%r15d
  1401. shrdl $11,%r14d,%r14d
  1402. xorl %r8d,%r12d
  1403. vpshufd $80,%xmm0,%xmm7
  1404. xorl %r11d,%r15d
  1405. shrdl $6,%r13d,%r13d
  1406. addl %r12d,%r9d
  1407. vpsrld $10,%xmm7,%xmm6
  1408. andl %r15d,%esi
  1409. xorl %r10d,%r14d
  1410. addl %r13d,%r9d
  1411. vpsrlq $17,%xmm7,%xmm7
  1412. xorl %r11d,%esi
  1413. addl %r9d,%ebx
  1414. shrdl $2,%r14d,%r14d
  1415. vpxor %xmm7,%xmm6,%xmm6
  1416. addl %esi,%r9d
  1417. movl %ebx,%r13d
  1418. addl %r9d,%r14d
  1419. vpsrlq $2,%xmm7,%xmm7
  1420. shrdl $14,%r13d,%r13d
  1421. movl %r14d,%r9d
  1422. movl %ecx,%r12d
  1423. vpxor %xmm7,%xmm6,%xmm6
  1424. xorl %ebx,%r13d
  1425. shrdl $9,%r14d,%r14d
  1426. xorl %edx,%r12d
  1427. vpshufd $232,%xmm6,%xmm6
  1428. shrdl $5,%r13d,%r13d
  1429. xorl %r9d,%r14d
  1430. andl %ebx,%r12d
  1431. vpslldq $8,%xmm6,%xmm6
  1432. vaesenc %xmm10,%xmm9,%xmm9
  1433. vmovdqu 48-128(%rdi),%xmm10
  1434. xorl %ebx,%r13d
  1435. addl 12(%rsp),%r8d
  1436. movl %r9d,%esi
  1437. vpaddd %xmm6,%xmm0,%xmm0
  1438. shrdl $11,%r14d,%r14d
  1439. xorl %edx,%r12d
  1440. xorl %r10d,%esi
  1441. vpaddd 0(%rbp),%xmm0,%xmm6
  1442. shrdl $6,%r13d,%r13d
  1443. addl %r12d,%r8d
  1444. andl %esi,%r15d
  1445. xorl %r9d,%r14d
  1446. addl %r13d,%r8d
  1447. xorl %r10d,%r15d
  1448. addl %r8d,%eax
  1449. shrdl $2,%r14d,%r14d
  1450. addl %r15d,%r8d
  1451. movl %eax,%r13d
  1452. addl %r8d,%r14d
  1453. vmovdqa %xmm6,0(%rsp)
  1454. vpalignr $4,%xmm1,%xmm2,%xmm4
  1455. shrdl $14,%r13d,%r13d
  1456. movl %r14d,%r8d
  1457. movl %ebx,%r12d
  1458. vpalignr $4,%xmm3,%xmm0,%xmm7
  1459. xorl %eax,%r13d
  1460. shrdl $9,%r14d,%r14d
  1461. xorl %ecx,%r12d
  1462. vpsrld $7,%xmm4,%xmm6
  1463. shrdl $5,%r13d,%r13d
  1464. xorl %r8d,%r14d
  1465. andl %eax,%r12d
  1466. vpaddd %xmm7,%xmm1,%xmm1
  1467. vaesenc %xmm10,%xmm9,%xmm9
  1468. vmovdqu 64-128(%rdi),%xmm10
  1469. xorl %eax,%r13d
  1470. addl 16(%rsp),%edx
  1471. movl %r8d,%r15d
  1472. vpsrld $3,%xmm4,%xmm7
  1473. shrdl $11,%r14d,%r14d
  1474. xorl %ecx,%r12d
  1475. xorl %r9d,%r15d
  1476. vpslld $14,%xmm4,%xmm5
  1477. shrdl $6,%r13d,%r13d
  1478. addl %r12d,%edx
  1479. andl %r15d,%esi
  1480. vpxor %xmm6,%xmm7,%xmm4
  1481. xorl %r8d,%r14d
  1482. addl %r13d,%edx
  1483. xorl %r9d,%esi
  1484. vpshufd $250,%xmm0,%xmm7
  1485. addl %edx,%r11d
  1486. shrdl $2,%r14d,%r14d
  1487. addl %esi,%edx
  1488. vpsrld $11,%xmm6,%xmm6
  1489. movl %r11d,%r13d
  1490. addl %edx,%r14d
  1491. shrdl $14,%r13d,%r13d
  1492. vpxor %xmm5,%xmm4,%xmm4
  1493. movl %r14d,%edx
  1494. movl %eax,%r12d
  1495. xorl %r11d,%r13d
  1496. vpslld $11,%xmm5,%xmm5
  1497. shrdl $9,%r14d,%r14d
  1498. xorl %ebx,%r12d
  1499. shrdl $5,%r13d,%r13d
  1500. vpxor %xmm6,%xmm4,%xmm4
  1501. xorl %edx,%r14d
  1502. andl %r11d,%r12d
  1503. vaesenc %xmm10,%xmm9,%xmm9
  1504. vmovdqu 80-128(%rdi),%xmm10
  1505. xorl %r11d,%r13d
  1506. vpsrld $10,%xmm7,%xmm6
  1507. addl 20(%rsp),%ecx
  1508. movl %edx,%esi
  1509. shrdl $11,%r14d,%r14d
  1510. vpxor %xmm5,%xmm4,%xmm4
  1511. xorl %ebx,%r12d
  1512. xorl %r8d,%esi
  1513. shrdl $6,%r13d,%r13d
  1514. vpsrlq $17,%xmm7,%xmm7
  1515. addl %r12d,%ecx
  1516. andl %esi,%r15d
  1517. xorl %edx,%r14d
  1518. vpaddd %xmm4,%xmm1,%xmm1
  1519. addl %r13d,%ecx
  1520. xorl %r8d,%r15d
  1521. addl %ecx,%r10d
  1522. vpxor %xmm7,%xmm6,%xmm6
  1523. shrdl $2,%r14d,%r14d
  1524. addl %r15d,%ecx
  1525. movl %r10d,%r13d
  1526. vpsrlq $2,%xmm7,%xmm7
  1527. addl %ecx,%r14d
  1528. shrdl $14,%r13d,%r13d
  1529. movl %r14d,%ecx
  1530. vpxor %xmm7,%xmm6,%xmm6
  1531. movl %r11d,%r12d
  1532. xorl %r10d,%r13d
  1533. shrdl $9,%r14d,%r14d
  1534. vpshufd $132,%xmm6,%xmm6
  1535. xorl %eax,%r12d
  1536. shrdl $5,%r13d,%r13d
  1537. xorl %ecx,%r14d
  1538. vpsrldq $8,%xmm6,%xmm6
  1539. andl %r10d,%r12d
  1540. vaesenc %xmm10,%xmm9,%xmm9
  1541. vmovdqu 96-128(%rdi),%xmm10
  1542. xorl %r10d,%r13d
  1543. addl 24(%rsp),%ebx
  1544. vpaddd %xmm6,%xmm1,%xmm1
  1545. movl %ecx,%r15d
  1546. shrdl $11,%r14d,%r14d
  1547. xorl %eax,%r12d
  1548. vpshufd $80,%xmm1,%xmm7
  1549. xorl %edx,%r15d
  1550. shrdl $6,%r13d,%r13d
  1551. addl %r12d,%ebx
  1552. vpsrld $10,%xmm7,%xmm6
  1553. andl %r15d,%esi
  1554. xorl %ecx,%r14d
  1555. addl %r13d,%ebx
  1556. vpsrlq $17,%xmm7,%xmm7
  1557. xorl %edx,%esi
  1558. addl %ebx,%r9d
  1559. shrdl $2,%r14d,%r14d
  1560. vpxor %xmm7,%xmm6,%xmm6
  1561. addl %esi,%ebx
  1562. movl %r9d,%r13d
  1563. addl %ebx,%r14d
  1564. vpsrlq $2,%xmm7,%xmm7
  1565. shrdl $14,%r13d,%r13d
  1566. movl %r14d,%ebx
  1567. movl %r10d,%r12d
  1568. vpxor %xmm7,%xmm6,%xmm6
  1569. xorl %r9d,%r13d
  1570. shrdl $9,%r14d,%r14d
  1571. xorl %r11d,%r12d
  1572. vpshufd $232,%xmm6,%xmm6
  1573. shrdl $5,%r13d,%r13d
  1574. xorl %ebx,%r14d
  1575. andl %r9d,%r12d
  1576. vpslldq $8,%xmm6,%xmm6
  1577. vaesenc %xmm10,%xmm9,%xmm9
  1578. vmovdqu 112-128(%rdi),%xmm10
  1579. xorl %r9d,%r13d
  1580. addl 28(%rsp),%eax
  1581. movl %ebx,%esi
  1582. vpaddd %xmm6,%xmm1,%xmm1
  1583. shrdl $11,%r14d,%r14d
  1584. xorl %r11d,%r12d
  1585. xorl %ecx,%esi
  1586. vpaddd 32(%rbp),%xmm1,%xmm6
  1587. shrdl $6,%r13d,%r13d
  1588. addl %r12d,%eax
  1589. andl %esi,%r15d
  1590. xorl %ebx,%r14d
  1591. addl %r13d,%eax
  1592. xorl %ecx,%r15d
  1593. addl %eax,%r8d
  1594. shrdl $2,%r14d,%r14d
  1595. addl %r15d,%eax
  1596. movl %r8d,%r13d
  1597. addl %eax,%r14d
  1598. vmovdqa %xmm6,16(%rsp)
  1599. vpalignr $4,%xmm2,%xmm3,%xmm4
  1600. shrdl $14,%r13d,%r13d
  1601. movl %r14d,%eax
  1602. movl %r9d,%r12d
  1603. vpalignr $4,%xmm0,%xmm1,%xmm7
  1604. xorl %r8d,%r13d
  1605. shrdl $9,%r14d,%r14d
  1606. xorl %r10d,%r12d
  1607. vpsrld $7,%xmm4,%xmm6
  1608. shrdl $5,%r13d,%r13d
  1609. xorl %eax,%r14d
  1610. andl %r8d,%r12d
  1611. vpaddd %xmm7,%xmm2,%xmm2
  1612. vaesenc %xmm10,%xmm9,%xmm9
  1613. vmovdqu 128-128(%rdi),%xmm10
  1614. xorl %r8d,%r13d
  1615. addl 32(%rsp),%r11d
  1616. movl %eax,%r15d
  1617. vpsrld $3,%xmm4,%xmm7
  1618. shrdl $11,%r14d,%r14d
  1619. xorl %r10d,%r12d
  1620. xorl %ebx,%r15d
  1621. vpslld $14,%xmm4,%xmm5
  1622. shrdl $6,%r13d,%r13d
  1623. addl %r12d,%r11d
  1624. andl %r15d,%esi
  1625. vpxor %xmm6,%xmm7,%xmm4
  1626. xorl %eax,%r14d
  1627. addl %r13d,%r11d
  1628. xorl %ebx,%esi
  1629. vpshufd $250,%xmm1,%xmm7
  1630. addl %r11d,%edx
  1631. shrdl $2,%r14d,%r14d
  1632. addl %esi,%r11d
  1633. vpsrld $11,%xmm6,%xmm6
  1634. movl %edx,%r13d
  1635. addl %r11d,%r14d
  1636. shrdl $14,%r13d,%r13d
  1637. vpxor %xmm5,%xmm4,%xmm4
  1638. movl %r14d,%r11d
  1639. movl %r8d,%r12d
  1640. xorl %edx,%r13d
  1641. vpslld $11,%xmm5,%xmm5
  1642. shrdl $9,%r14d,%r14d
  1643. xorl %r9d,%r12d
  1644. shrdl $5,%r13d,%r13d
  1645. vpxor %xmm6,%xmm4,%xmm4
  1646. xorl %r11d,%r14d
  1647. andl %edx,%r12d
  1648. vaesenc %xmm10,%xmm9,%xmm9
  1649. vmovdqu 144-128(%rdi),%xmm10
  1650. xorl %edx,%r13d
  1651. vpsrld $10,%xmm7,%xmm6
  1652. addl 36(%rsp),%r10d
  1653. movl %r11d,%esi
  1654. shrdl $11,%r14d,%r14d
  1655. vpxor %xmm5,%xmm4,%xmm4
  1656. xorl %r9d,%r12d
  1657. xorl %eax,%esi
  1658. shrdl $6,%r13d,%r13d
  1659. vpsrlq $17,%xmm7,%xmm7
  1660. addl %r12d,%r10d
  1661. andl %esi,%r15d
  1662. xorl %r11d,%r14d
  1663. vpaddd %xmm4,%xmm2,%xmm2
  1664. addl %r13d,%r10d
  1665. xorl %eax,%r15d
  1666. addl %r10d,%ecx
  1667. vpxor %xmm7,%xmm6,%xmm6
  1668. shrdl $2,%r14d,%r14d
  1669. addl %r15d,%r10d
  1670. movl %ecx,%r13d
  1671. vpsrlq $2,%xmm7,%xmm7
  1672. addl %r10d,%r14d
  1673. shrdl $14,%r13d,%r13d
  1674. movl %r14d,%r10d
  1675. vpxor %xmm7,%xmm6,%xmm6
  1676. movl %edx,%r12d
  1677. xorl %ecx,%r13d
  1678. shrdl $9,%r14d,%r14d
  1679. vpshufd $132,%xmm6,%xmm6
  1680. xorl %r8d,%r12d
  1681. shrdl $5,%r13d,%r13d
  1682. xorl %r10d,%r14d
  1683. vpsrldq $8,%xmm6,%xmm6
  1684. andl %ecx,%r12d
  1685. vaesenc %xmm10,%xmm9,%xmm9
  1686. vmovdqu 160-128(%rdi),%xmm10
  1687. xorl %ecx,%r13d
  1688. addl 40(%rsp),%r9d
  1689. vpaddd %xmm6,%xmm2,%xmm2
  1690. movl %r10d,%r15d
  1691. shrdl $11,%r14d,%r14d
  1692. xorl %r8d,%r12d
  1693. vpshufd $80,%xmm2,%xmm7
  1694. xorl %r11d,%r15d
  1695. shrdl $6,%r13d,%r13d
  1696. addl %r12d,%r9d
  1697. vpsrld $10,%xmm7,%xmm6
  1698. andl %r15d,%esi
  1699. xorl %r10d,%r14d
  1700. addl %r13d,%r9d
  1701. vpsrlq $17,%xmm7,%xmm7
  1702. xorl %r11d,%esi
  1703. addl %r9d,%ebx
  1704. shrdl $2,%r14d,%r14d
  1705. vpxor %xmm7,%xmm6,%xmm6
  1706. addl %esi,%r9d
  1707. movl %ebx,%r13d
  1708. addl %r9d,%r14d
  1709. vpsrlq $2,%xmm7,%xmm7
  1710. shrdl $14,%r13d,%r13d
  1711. movl %r14d,%r9d
  1712. movl %ecx,%r12d
  1713. vpxor %xmm7,%xmm6,%xmm6
  1714. xorl %ebx,%r13d
  1715. shrdl $9,%r14d,%r14d
  1716. xorl %edx,%r12d
  1717. vpshufd $232,%xmm6,%xmm6
  1718. shrdl $5,%r13d,%r13d
  1719. xorl %r9d,%r14d
  1720. andl %ebx,%r12d
  1721. vpslldq $8,%xmm6,%xmm6
  1722. vaesenclast %xmm10,%xmm9,%xmm11
  1723. vaesenc %xmm10,%xmm9,%xmm9
  1724. vmovdqu 176-128(%rdi),%xmm10
  1725. xorl %ebx,%r13d
  1726. addl 44(%rsp),%r8d
  1727. movl %r9d,%esi
  1728. vpaddd %xmm6,%xmm2,%xmm2
  1729. shrdl $11,%r14d,%r14d
  1730. xorl %edx,%r12d
  1731. xorl %r10d,%esi
  1732. vpaddd 64(%rbp),%xmm2,%xmm6
  1733. shrdl $6,%r13d,%r13d
  1734. addl %r12d,%r8d
  1735. andl %esi,%r15d
  1736. xorl %r9d,%r14d
  1737. addl %r13d,%r8d
  1738. xorl %r10d,%r15d
  1739. addl %r8d,%eax
  1740. shrdl $2,%r14d,%r14d
  1741. addl %r15d,%r8d
  1742. movl %eax,%r13d
  1743. addl %r8d,%r14d
  1744. vmovdqa %xmm6,32(%rsp)
  1745. vpalignr $4,%xmm3,%xmm0,%xmm4
  1746. shrdl $14,%r13d,%r13d
  1747. movl %r14d,%r8d
  1748. movl %ebx,%r12d
  1749. vpalignr $4,%xmm1,%xmm2,%xmm7
  1750. xorl %eax,%r13d
  1751. shrdl $9,%r14d,%r14d
  1752. xorl %ecx,%r12d
  1753. vpsrld $7,%xmm4,%xmm6
  1754. shrdl $5,%r13d,%r13d
  1755. xorl %r8d,%r14d
  1756. andl %eax,%r12d
  1757. vpaddd %xmm7,%xmm3,%xmm3
  1758. vpand %xmm12,%xmm11,%xmm8
  1759. vaesenc %xmm10,%xmm9,%xmm9
  1760. vmovdqu 192-128(%rdi),%xmm10
  1761. xorl %eax,%r13d
  1762. addl 48(%rsp),%edx
  1763. movl %r8d,%r15d
  1764. vpsrld $3,%xmm4,%xmm7
  1765. shrdl $11,%r14d,%r14d
  1766. xorl %ecx,%r12d
  1767. xorl %r9d,%r15d
  1768. vpslld $14,%xmm4,%xmm5
  1769. shrdl $6,%r13d,%r13d
  1770. addl %r12d,%edx
  1771. andl %r15d,%esi
  1772. vpxor %xmm6,%xmm7,%xmm4
  1773. xorl %r8d,%r14d
  1774. addl %r13d,%edx
  1775. xorl %r9d,%esi
  1776. vpshufd $250,%xmm2,%xmm7
  1777. addl %edx,%r11d
  1778. shrdl $2,%r14d,%r14d
  1779. addl %esi,%edx
  1780. vpsrld $11,%xmm6,%xmm6
  1781. movl %r11d,%r13d
  1782. addl %edx,%r14d
  1783. shrdl $14,%r13d,%r13d
  1784. vpxor %xmm5,%xmm4,%xmm4
  1785. movl %r14d,%edx
  1786. movl %eax,%r12d
  1787. xorl %r11d,%r13d
  1788. vpslld $11,%xmm5,%xmm5
  1789. shrdl $9,%r14d,%r14d
  1790. xorl %ebx,%r12d
  1791. shrdl $5,%r13d,%r13d
  1792. vpxor %xmm6,%xmm4,%xmm4
  1793. xorl %edx,%r14d
  1794. andl %r11d,%r12d
  1795. vaesenclast %xmm10,%xmm9,%xmm11
  1796. vaesenc %xmm10,%xmm9,%xmm9
  1797. vmovdqu 208-128(%rdi),%xmm10
  1798. xorl %r11d,%r13d
  1799. vpsrld $10,%xmm7,%xmm6
  1800. addl 52(%rsp),%ecx
  1801. movl %edx,%esi
  1802. shrdl $11,%r14d,%r14d
  1803. vpxor %xmm5,%xmm4,%xmm4
  1804. xorl %ebx,%r12d
  1805. xorl %r8d,%esi
  1806. shrdl $6,%r13d,%r13d
  1807. vpsrlq $17,%xmm7,%xmm7
  1808. addl %r12d,%ecx
  1809. andl %esi,%r15d
  1810. xorl %edx,%r14d
  1811. vpaddd %xmm4,%xmm3,%xmm3
  1812. addl %r13d,%ecx
  1813. xorl %r8d,%r15d
  1814. addl %ecx,%r10d
  1815. vpxor %xmm7,%xmm6,%xmm6
  1816. shrdl $2,%r14d,%r14d
  1817. addl %r15d,%ecx
  1818. movl %r10d,%r13d
  1819. vpsrlq $2,%xmm7,%xmm7
  1820. addl %ecx,%r14d
  1821. shrdl $14,%r13d,%r13d
  1822. movl %r14d,%ecx
  1823. vpxor %xmm7,%xmm6,%xmm6
  1824. movl %r11d,%r12d
  1825. xorl %r10d,%r13d
  1826. shrdl $9,%r14d,%r14d
  1827. vpshufd $132,%xmm6,%xmm6
  1828. xorl %eax,%r12d
  1829. shrdl $5,%r13d,%r13d
  1830. xorl %ecx,%r14d
  1831. vpsrldq $8,%xmm6,%xmm6
  1832. andl %r10d,%r12d
  1833. vpand %xmm13,%xmm11,%xmm11
  1834. vaesenc %xmm10,%xmm9,%xmm9
  1835. vmovdqu 224-128(%rdi),%xmm10
  1836. xorl %r10d,%r13d
  1837. addl 56(%rsp),%ebx
  1838. vpaddd %xmm6,%xmm3,%xmm3
  1839. movl %ecx,%r15d
  1840. shrdl $11,%r14d,%r14d
  1841. xorl %eax,%r12d
  1842. vpshufd $80,%xmm3,%xmm7
  1843. xorl %edx,%r15d
  1844. shrdl $6,%r13d,%r13d
  1845. addl %r12d,%ebx
  1846. vpsrld $10,%xmm7,%xmm6
  1847. andl %r15d,%esi
  1848. xorl %ecx,%r14d
  1849. addl %r13d,%ebx
  1850. vpsrlq $17,%xmm7,%xmm7
  1851. xorl %edx,%esi
  1852. addl %ebx,%r9d
  1853. shrdl $2,%r14d,%r14d
  1854. vpxor %xmm7,%xmm6,%xmm6
  1855. addl %esi,%ebx
  1856. movl %r9d,%r13d
  1857. addl %ebx,%r14d
  1858. vpsrlq $2,%xmm7,%xmm7
  1859. shrdl $14,%r13d,%r13d
  1860. movl %r14d,%ebx
  1861. movl %r10d,%r12d
  1862. vpxor %xmm7,%xmm6,%xmm6
  1863. xorl %r9d,%r13d
  1864. shrdl $9,%r14d,%r14d
  1865. xorl %r11d,%r12d
  1866. vpshufd $232,%xmm6,%xmm6
  1867. shrdl $5,%r13d,%r13d
  1868. xorl %ebx,%r14d
  1869. andl %r9d,%r12d
  1870. vpslldq $8,%xmm6,%xmm6
  1871. vpor %xmm11,%xmm8,%xmm8
  1872. vaesenclast %xmm10,%xmm9,%xmm11
  1873. vmovdqu 0-128(%rdi),%xmm10
  1874. xorl %r9d,%r13d
  1875. addl 60(%rsp),%eax
  1876. movl %ebx,%esi
  1877. vpaddd %xmm6,%xmm3,%xmm3
  1878. shrdl $11,%r14d,%r14d
  1879. xorl %r11d,%r12d
  1880. xorl %ecx,%esi
  1881. vpaddd 96(%rbp),%xmm3,%xmm6
  1882. shrdl $6,%r13d,%r13d
  1883. addl %r12d,%eax
  1884. andl %esi,%r15d
  1885. xorl %ebx,%r14d
  1886. addl %r13d,%eax
  1887. xorl %ecx,%r15d
  1888. addl %eax,%r8d
  1889. shrdl $2,%r14d,%r14d
  1890. addl %r15d,%eax
  1891. movl %r8d,%r13d
  1892. addl %eax,%r14d
  1893. vmovdqa %xmm6,48(%rsp)
  1894. movq 64+0(%rsp),%r12
  1895. vpand %xmm14,%xmm11,%xmm11
  1896. movq 64+8(%rsp),%r15
  1897. vpor %xmm11,%xmm8,%xmm8
  1898. vmovdqu %xmm8,(%r15,%r12,1)
  1899. leaq 16(%r12),%r12
  1900. cmpb $0,131(%rbp)
  1901. jne .Lavx_00_47
  1902. vmovdqu (%r12),%xmm9
  1903. movq %r12,64+0(%rsp)
  1904. shrdl $14,%r13d,%r13d
  1905. movl %r14d,%eax
  1906. movl %r9d,%r12d
  1907. xorl %r8d,%r13d
  1908. shrdl $9,%r14d,%r14d
  1909. xorl %r10d,%r12d
  1910. shrdl $5,%r13d,%r13d
  1911. xorl %eax,%r14d
  1912. andl %r8d,%r12d
  1913. vpxor %xmm10,%xmm9,%xmm9
  1914. vmovdqu 16-128(%rdi),%xmm10
  1915. xorl %r8d,%r13d
  1916. addl 0(%rsp),%r11d
  1917. movl %eax,%r15d
  1918. shrdl $11,%r14d,%r14d
  1919. xorl %r10d,%r12d
  1920. xorl %ebx,%r15d
  1921. shrdl $6,%r13d,%r13d
  1922. addl %r12d,%r11d
  1923. andl %r15d,%esi
  1924. xorl %eax,%r14d
  1925. addl %r13d,%r11d
  1926. xorl %ebx,%esi
  1927. addl %r11d,%edx
  1928. shrdl $2,%r14d,%r14d
  1929. addl %esi,%r11d
  1930. movl %edx,%r13d
  1931. addl %r11d,%r14d
  1932. shrdl $14,%r13d,%r13d
  1933. movl %r14d,%r11d
  1934. movl %r8d,%r12d
  1935. xorl %edx,%r13d
  1936. shrdl $9,%r14d,%r14d
  1937. xorl %r9d,%r12d
  1938. shrdl $5,%r13d,%r13d
  1939. xorl %r11d,%r14d
  1940. andl %edx,%r12d
  1941. vpxor %xmm8,%xmm9,%xmm9
  1942. xorl %edx,%r13d
  1943. addl 4(%rsp),%r10d
  1944. movl %r11d,%esi
  1945. shrdl $11,%r14d,%r14d
  1946. xorl %r9d,%r12d
  1947. xorl %eax,%esi
  1948. shrdl $6,%r13d,%r13d
  1949. addl %r12d,%r10d
  1950. andl %esi,%r15d
  1951. xorl %r11d,%r14d
  1952. addl %r13d,%r10d
  1953. xorl %eax,%r15d
  1954. addl %r10d,%ecx
  1955. shrdl $2,%r14d,%r14d
  1956. addl %r15d,%r10d
  1957. movl %ecx,%r13d
  1958. addl %r10d,%r14d
  1959. shrdl $14,%r13d,%r13d
  1960. movl %r14d,%r10d
  1961. movl %edx,%r12d
  1962. xorl %ecx,%r13d
  1963. shrdl $9,%r14d,%r14d
  1964. xorl %r8d,%r12d
  1965. shrdl $5,%r13d,%r13d
  1966. xorl %r10d,%r14d
  1967. andl %ecx,%r12d
  1968. vaesenc %xmm10,%xmm9,%xmm9
  1969. vmovdqu 32-128(%rdi),%xmm10
  1970. xorl %ecx,%r13d
  1971. addl 8(%rsp),%r9d
  1972. movl %r10d,%r15d
  1973. shrdl $11,%r14d,%r14d
  1974. xorl %r8d,%r12d
  1975. xorl %r11d,%r15d
  1976. shrdl $6,%r13d,%r13d
  1977. addl %r12d,%r9d
  1978. andl %r15d,%esi
  1979. xorl %r10d,%r14d
  1980. addl %r13d,%r9d
  1981. xorl %r11d,%esi
  1982. addl %r9d,%ebx
  1983. shrdl $2,%r14d,%r14d
  1984. addl %esi,%r9d
  1985. movl %ebx,%r13d
  1986. addl %r9d,%r14d
  1987. shrdl $14,%r13d,%r13d
  1988. movl %r14d,%r9d
  1989. movl %ecx,%r12d
  1990. xorl %ebx,%r13d
  1991. shrdl $9,%r14d,%r14d
  1992. xorl %edx,%r12d
  1993. shrdl $5,%r13d,%r13d
  1994. xorl %r9d,%r14d
  1995. andl %ebx,%r12d
  1996. vaesenc %xmm10,%xmm9,%xmm9
  1997. vmovdqu 48-128(%rdi),%xmm10
  1998. xorl %ebx,%r13d
  1999. addl 12(%rsp),%r8d
  2000. movl %r9d,%esi
  2001. shrdl $11,%r14d,%r14d
  2002. xorl %edx,%r12d
  2003. xorl %r10d,%esi
  2004. shrdl $6,%r13d,%r13d
  2005. addl %r12d,%r8d
  2006. andl %esi,%r15d
  2007. xorl %r9d,%r14d
  2008. addl %r13d,%r8d
  2009. xorl %r10d,%r15d
  2010. addl %r8d,%eax
  2011. shrdl $2,%r14d,%r14d
  2012. addl %r15d,%r8d
  2013. movl %eax,%r13d
  2014. addl %r8d,%r14d
  2015. shrdl $14,%r13d,%r13d
  2016. movl %r14d,%r8d
  2017. movl %ebx,%r12d
  2018. xorl %eax,%r13d
  2019. shrdl $9,%r14d,%r14d
  2020. xorl %ecx,%r12d
  2021. shrdl $5,%r13d,%r13d
  2022. xorl %r8d,%r14d
  2023. andl %eax,%r12d
  2024. vaesenc %xmm10,%xmm9,%xmm9
  2025. vmovdqu 64-128(%rdi),%xmm10
  2026. xorl %eax,%r13d
  2027. addl 16(%rsp),%edx
  2028. movl %r8d,%r15d
  2029. shrdl $11,%r14d,%r14d
  2030. xorl %ecx,%r12d
  2031. xorl %r9d,%r15d
  2032. shrdl $6,%r13d,%r13d
  2033. addl %r12d,%edx
  2034. andl %r15d,%esi
  2035. xorl %r8d,%r14d
  2036. addl %r13d,%edx
  2037. xorl %r9d,%esi
  2038. addl %edx,%r11d
  2039. shrdl $2,%r14d,%r14d
  2040. addl %esi,%edx
  2041. movl %r11d,%r13d
  2042. addl %edx,%r14d
  2043. shrdl $14,%r13d,%r13d
  2044. movl %r14d,%edx
  2045. movl %eax,%r12d
  2046. xorl %r11d,%r13d
  2047. shrdl $9,%r14d,%r14d
  2048. xorl %ebx,%r12d
  2049. shrdl $5,%r13d,%r13d
  2050. xorl %edx,%r14d
  2051. andl %r11d,%r12d
  2052. vaesenc %xmm10,%xmm9,%xmm9
  2053. vmovdqu 80-128(%rdi),%xmm10
  2054. xorl %r11d,%r13d
  2055. addl 20(%rsp),%ecx
  2056. movl %edx,%esi
  2057. shrdl $11,%r14d,%r14d
  2058. xorl %ebx,%r12d
  2059. xorl %r8d,%esi
  2060. shrdl $6,%r13d,%r13d
  2061. addl %r12d,%ecx
  2062. andl %esi,%r15d
  2063. xorl %edx,%r14d
  2064. addl %r13d,%ecx
  2065. xorl %r8d,%r15d
  2066. addl %ecx,%r10d
  2067. shrdl $2,%r14d,%r14d
  2068. addl %r15d,%ecx
  2069. movl %r10d,%r13d
  2070. addl %ecx,%r14d
  2071. shrdl $14,%r13d,%r13d
  2072. movl %r14d,%ecx
  2073. movl %r11d,%r12d
  2074. xorl %r10d,%r13d
  2075. shrdl $9,%r14d,%r14d
  2076. xorl %eax,%r12d
  2077. shrdl $5,%r13d,%r13d
  2078. xorl %ecx,%r14d
  2079. andl %r10d,%r12d
  2080. vaesenc %xmm10,%xmm9,%xmm9
  2081. vmovdqu 96-128(%rdi),%xmm10
  2082. xorl %r10d,%r13d
  2083. addl 24(%rsp),%ebx
  2084. movl %ecx,%r15d
  2085. shrdl $11,%r14d,%r14d
  2086. xorl %eax,%r12d
  2087. xorl %edx,%r15d
  2088. shrdl $6,%r13d,%r13d
  2089. addl %r12d,%ebx
  2090. andl %r15d,%esi
  2091. xorl %ecx,%r14d
  2092. addl %r13d,%ebx
  2093. xorl %edx,%esi
  2094. addl %ebx,%r9d
  2095. shrdl $2,%r14d,%r14d
  2096. addl %esi,%ebx
  2097. movl %r9d,%r13d
  2098. addl %ebx,%r14d
  2099. shrdl $14,%r13d,%r13d
  2100. movl %r14d,%ebx
  2101. movl %r10d,%r12d
  2102. xorl %r9d,%r13d
  2103. shrdl $9,%r14d,%r14d
  2104. xorl %r11d,%r12d
  2105. shrdl $5,%r13d,%r13d
  2106. xorl %ebx,%r14d
  2107. andl %r9d,%r12d
  2108. vaesenc %xmm10,%xmm9,%xmm9
  2109. vmovdqu 112-128(%rdi),%xmm10
  2110. xorl %r9d,%r13d
  2111. addl 28(%rsp),%eax
  2112. movl %ebx,%esi
  2113. shrdl $11,%r14d,%r14d
  2114. xorl %r11d,%r12d
  2115. xorl %ecx,%esi
  2116. shrdl $6,%r13d,%r13d
  2117. addl %r12d,%eax
  2118. andl %esi,%r15d
  2119. xorl %ebx,%r14d
  2120. addl %r13d,%eax
  2121. xorl %ecx,%r15d
  2122. addl %eax,%r8d
  2123. shrdl $2,%r14d,%r14d
  2124. addl %r15d,%eax
  2125. movl %r8d,%r13d
  2126. addl %eax,%r14d
  2127. shrdl $14,%r13d,%r13d
  2128. movl %r14d,%eax
  2129. movl %r9d,%r12d
  2130. xorl %r8d,%r13d
  2131. shrdl $9,%r14d,%r14d
  2132. xorl %r10d,%r12d
  2133. shrdl $5,%r13d,%r13d
  2134. xorl %eax,%r14d
  2135. andl %r8d,%r12d
  2136. vaesenc %xmm10,%xmm9,%xmm9
  2137. vmovdqu 128-128(%rdi),%xmm10
  2138. xorl %r8d,%r13d
  2139. addl 32(%rsp),%r11d
  2140. movl %eax,%r15d
  2141. shrdl $11,%r14d,%r14d
  2142. xorl %r10d,%r12d
  2143. xorl %ebx,%r15d
  2144. shrdl $6,%r13d,%r13d
  2145. addl %r12d,%r11d
  2146. andl %r15d,%esi
  2147. xorl %eax,%r14d
  2148. addl %r13d,%r11d
  2149. xorl %ebx,%esi
  2150. addl %r11d,%edx
  2151. shrdl $2,%r14d,%r14d
  2152. addl %esi,%r11d
  2153. movl %edx,%r13d
  2154. addl %r11d,%r14d
  2155. shrdl $14,%r13d,%r13d
  2156. movl %r14d,%r11d
  2157. movl %r8d,%r12d
  2158. xorl %edx,%r13d
  2159. shrdl $9,%r14d,%r14d
  2160. xorl %r9d,%r12d
  2161. shrdl $5,%r13d,%r13d
  2162. xorl %r11d,%r14d
  2163. andl %edx,%r12d
  2164. vaesenc %xmm10,%xmm9,%xmm9
  2165. vmovdqu 144-128(%rdi),%xmm10
  2166. xorl %edx,%r13d
  2167. addl 36(%rsp),%r10d
  2168. movl %r11d,%esi
  2169. shrdl $11,%r14d,%r14d
  2170. xorl %r9d,%r12d
  2171. xorl %eax,%esi
  2172. shrdl $6,%r13d,%r13d
  2173. addl %r12d,%r10d
  2174. andl %esi,%r15d
  2175. xorl %r11d,%r14d
  2176. addl %r13d,%r10d
  2177. xorl %eax,%r15d
  2178. addl %r10d,%ecx
  2179. shrdl $2,%r14d,%r14d
  2180. addl %r15d,%r10d
  2181. movl %ecx,%r13d
  2182. addl %r10d,%r14d
  2183. shrdl $14,%r13d,%r13d
  2184. movl %r14d,%r10d
  2185. movl %edx,%r12d
  2186. xorl %ecx,%r13d
  2187. shrdl $9,%r14d,%r14d
  2188. xorl %r8d,%r12d
  2189. shrdl $5,%r13d,%r13d
  2190. xorl %r10d,%r14d
  2191. andl %ecx,%r12d
  2192. vaesenc %xmm10,%xmm9,%xmm9
  2193. vmovdqu 160-128(%rdi),%xmm10
  2194. xorl %ecx,%r13d
  2195. addl 40(%rsp),%r9d
  2196. movl %r10d,%r15d
  2197. shrdl $11,%r14d,%r14d
  2198. xorl %r8d,%r12d
  2199. xorl %r11d,%r15d
  2200. shrdl $6,%r13d,%r13d
  2201. addl %r12d,%r9d
  2202. andl %r15d,%esi
  2203. xorl %r10d,%r14d
  2204. addl %r13d,%r9d
  2205. xorl %r11d,%esi
  2206. addl %r9d,%ebx
  2207. shrdl $2,%r14d,%r14d
  2208. addl %esi,%r9d
  2209. movl %ebx,%r13d
  2210. addl %r9d,%r14d
  2211. shrdl $14,%r13d,%r13d
  2212. movl %r14d,%r9d
  2213. movl %ecx,%r12d
  2214. xorl %ebx,%r13d
  2215. shrdl $9,%r14d,%r14d
  2216. xorl %edx,%r12d
  2217. shrdl $5,%r13d,%r13d
  2218. xorl %r9d,%r14d
  2219. andl %ebx,%r12d
  2220. vaesenclast %xmm10,%xmm9,%xmm11
  2221. vaesenc %xmm10,%xmm9,%xmm9
  2222. vmovdqu 176-128(%rdi),%xmm10
  2223. xorl %ebx,%r13d
  2224. addl 44(%rsp),%r8d
  2225. movl %r9d,%esi
  2226. shrdl $11,%r14d,%r14d
  2227. xorl %edx,%r12d
  2228. xorl %r10d,%esi
  2229. shrdl $6,%r13d,%r13d
  2230. addl %r12d,%r8d
  2231. andl %esi,%r15d
  2232. xorl %r9d,%r14d
  2233. addl %r13d,%r8d
  2234. xorl %r10d,%r15d
  2235. addl %r8d,%eax
  2236. shrdl $2,%r14d,%r14d
  2237. addl %r15d,%r8d
  2238. movl %eax,%r13d
  2239. addl %r8d,%r14d
  2240. shrdl $14,%r13d,%r13d
  2241. movl %r14d,%r8d
  2242. movl %ebx,%r12d
  2243. xorl %eax,%r13d
  2244. shrdl $9,%r14d,%r14d
  2245. xorl %ecx,%r12d
  2246. shrdl $5,%r13d,%r13d
  2247. xorl %r8d,%r14d
  2248. andl %eax,%r12d
  2249. vpand %xmm12,%xmm11,%xmm8
  2250. vaesenc %xmm10,%xmm9,%xmm9
  2251. vmovdqu 192-128(%rdi),%xmm10
  2252. xorl %eax,%r13d
  2253. addl 48(%rsp),%edx
  2254. movl %r8d,%r15d
  2255. shrdl $11,%r14d,%r14d
  2256. xorl %ecx,%r12d
  2257. xorl %r9d,%r15d
  2258. shrdl $6,%r13d,%r13d
  2259. addl %r12d,%edx
  2260. andl %r15d,%esi
  2261. xorl %r8d,%r14d
  2262. addl %r13d,%edx
  2263. xorl %r9d,%esi
  2264. addl %edx,%r11d
  2265. shrdl $2,%r14d,%r14d
  2266. addl %esi,%edx
  2267. movl %r11d,%r13d
  2268. addl %edx,%r14d
  2269. shrdl $14,%r13d,%r13d
  2270. movl %r14d,%edx
  2271. movl %eax,%r12d
  2272. xorl %r11d,%r13d
  2273. shrdl $9,%r14d,%r14d
  2274. xorl %ebx,%r12d
  2275. shrdl $5,%r13d,%r13d
  2276. xorl %edx,%r14d
  2277. andl %r11d,%r12d
  2278. vaesenclast %xmm10,%xmm9,%xmm11
  2279. vaesenc %xmm10,%xmm9,%xmm9
  2280. vmovdqu 208-128(%rdi),%xmm10
  2281. xorl %r11d,%r13d
  2282. addl 52(%rsp),%ecx
  2283. movl %edx,%esi
  2284. shrdl $11,%r14d,%r14d
  2285. xorl %ebx,%r12d
  2286. xorl %r8d,%esi
  2287. shrdl $6,%r13d,%r13d
  2288. addl %r12d,%ecx
  2289. andl %esi,%r15d
  2290. xorl %edx,%r14d
  2291. addl %r13d,%ecx
  2292. xorl %r8d,%r15d
  2293. addl %ecx,%r10d
  2294. shrdl $2,%r14d,%r14d
  2295. addl %r15d,%ecx
  2296. movl %r10d,%r13d
  2297. addl %ecx,%r14d
  2298. shrdl $14,%r13d,%r13d
  2299. movl %r14d,%ecx
  2300. movl %r11d,%r12d
  2301. xorl %r10d,%r13d
  2302. shrdl $9,%r14d,%r14d
  2303. xorl %eax,%r12d
  2304. shrdl $5,%r13d,%r13d
  2305. xorl %ecx,%r14d
  2306. andl %r10d,%r12d
  2307. vpand %xmm13,%xmm11,%xmm11
  2308. vaesenc %xmm10,%xmm9,%xmm9
  2309. vmovdqu 224-128(%rdi),%xmm10
  2310. xorl %r10d,%r13d
  2311. addl 56(%rsp),%ebx
  2312. movl %ecx,%r15d
  2313. shrdl $11,%r14d,%r14d
  2314. xorl %eax,%r12d
  2315. xorl %edx,%r15d
  2316. shrdl $6,%r13d,%r13d
  2317. addl %r12d,%ebx
  2318. andl %r15d,%esi
  2319. xorl %ecx,%r14d
  2320. addl %r13d,%ebx
  2321. xorl %edx,%esi
  2322. addl %ebx,%r9d
  2323. shrdl $2,%r14d,%r14d
  2324. addl %esi,%ebx
  2325. movl %r9d,%r13d
  2326. addl %ebx,%r14d
  2327. shrdl $14,%r13d,%r13d
  2328. movl %r14d,%ebx
  2329. movl %r10d,%r12d
  2330. xorl %r9d,%r13d
  2331. shrdl $9,%r14d,%r14d
  2332. xorl %r11d,%r12d
  2333. shrdl $5,%r13d,%r13d
  2334. xorl %ebx,%r14d
  2335. andl %r9d,%r12d
  2336. vpor %xmm11,%xmm8,%xmm8
  2337. vaesenclast %xmm10,%xmm9,%xmm11
  2338. vmovdqu 0-128(%rdi),%xmm10
  2339. xorl %r9d,%r13d
  2340. addl 60(%rsp),%eax
  2341. movl %ebx,%esi
  2342. shrdl $11,%r14d,%r14d
  2343. xorl %r11d,%r12d
  2344. xorl %ecx,%esi
  2345. shrdl $6,%r13d,%r13d
  2346. addl %r12d,%eax
  2347. andl %esi,%r15d
  2348. xorl %ebx,%r14d
  2349. addl %r13d,%eax
  2350. xorl %ecx,%r15d
  2351. addl %eax,%r8d
  2352. shrdl $2,%r14d,%r14d
  2353. addl %r15d,%eax
  2354. movl %r8d,%r13d
  2355. addl %eax,%r14d
  2356. movq 64+0(%rsp),%r12
  2357. movq 64+8(%rsp),%r13
  2358. movq 64+40(%rsp),%r15
  2359. movq 64+48(%rsp),%rsi
  2360. vpand %xmm14,%xmm11,%xmm11
  2361. movl %r14d,%eax
  2362. vpor %xmm11,%xmm8,%xmm8
  2363. vmovdqu %xmm8,(%r12,%r13,1)
  2364. leaq 16(%r12),%r12
  2365. addl 0(%r15),%eax
  2366. addl 4(%r15),%ebx
  2367. addl 8(%r15),%ecx
  2368. addl 12(%r15),%edx
  2369. addl 16(%r15),%r8d
  2370. addl 20(%r15),%r9d
  2371. addl 24(%r15),%r10d
  2372. addl 28(%r15),%r11d
  2373. cmpq 64+16(%rsp),%r12
  2374. movl %eax,0(%r15)
  2375. movl %ebx,4(%r15)
  2376. movl %ecx,8(%r15)
  2377. movl %edx,12(%r15)
  2378. movl %r8d,16(%r15)
  2379. movl %r9d,20(%r15)
  2380. movl %r10d,24(%r15)
  2381. movl %r11d,28(%r15)
  2382. jb .Lloop_avx
  2383. movq 64+32(%rsp),%r8
  2384. movq 120(%rsp),%rsi
  2385. .cfi_def_cfa %rsi,8
  2386. vmovdqu %xmm8,(%r8)
  2387. vzeroall
  2388. movq -48(%rsi),%r15
  2389. .cfi_restore %r15
  2390. movq -40(%rsi),%r14
  2391. .cfi_restore %r14
  2392. movq -32(%rsi),%r13
  2393. .cfi_restore %r13
  2394. movq -24(%rsi),%r12
  2395. .cfi_restore %r12
  2396. movq -16(%rsi),%rbp
  2397. .cfi_restore %rbp
  2398. movq -8(%rsi),%rbx
  2399. .cfi_restore %rbx
  2400. leaq (%rsi),%rsp
  2401. .cfi_def_cfa_register %rsp
  2402. .Lepilogue_avx:
  2403. .byte 0xf3,0xc3
  2404. .cfi_endproc
  2405. .size aesni_cbc_sha256_enc_avx,.-aesni_cbc_sha256_enc_avx
  2406. .type aesni_cbc_sha256_enc_avx2,@function
  2407. .align 64
  2408. aesni_cbc_sha256_enc_avx2:
  2409. .cfi_startproc
  2410. .Lavx2_shortcut:
  2411. movq 8(%rsp),%r10
  2412. movq %rsp,%rax
  2413. .cfi_def_cfa_register %rax
  2414. pushq %rbx
  2415. .cfi_offset %rbx,-16
  2416. pushq %rbp
  2417. .cfi_offset %rbp,-24
  2418. pushq %r12
  2419. .cfi_offset %r12,-32
  2420. pushq %r13
  2421. .cfi_offset %r13,-40
  2422. pushq %r14
  2423. .cfi_offset %r14,-48
  2424. pushq %r15
  2425. .cfi_offset %r15,-56
  2426. subq $576,%rsp
  2427. andq $-1024,%rsp
  2428. addq $448,%rsp
  2429. shlq $6,%rdx
  2430. subq %rdi,%rsi
  2431. subq %rdi,%r10
  2432. addq %rdi,%rdx
  2433. movq %rdx,64+16(%rsp)
  2434. movq %r8,64+32(%rsp)
  2435. movq %r9,64+40(%rsp)
  2436. movq %r10,64+48(%rsp)
  2437. movq %rax,120(%rsp)
  2438. .cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
  2439. .Lprologue_avx2:
  2440. vzeroall
  2441. movq %rdi,%r13
  2442. vpinsrq $1,%rsi,%xmm15,%xmm15
  2443. leaq 128(%rcx),%rdi
  2444. leaq K256+544(%rip),%r12
  2445. movl 240-128(%rdi),%r14d
  2446. movq %r9,%r15
  2447. movq %r10,%rsi
  2448. vmovdqu (%r8),%xmm8
  2449. leaq -9(%r14),%r14
  2450. vmovdqa 0(%r12,%r14,8),%xmm14
  2451. vmovdqa 16(%r12,%r14,8),%xmm13
  2452. vmovdqa 32(%r12,%r14,8),%xmm12
  2453. subq $-64,%r13
  2454. movl 0(%r15),%eax
  2455. leaq (%rsi,%r13,1),%r12
  2456. movl 4(%r15),%ebx
  2457. cmpq %rdx,%r13
  2458. movl 8(%r15),%ecx
  2459. cmoveq %rsp,%r12
  2460. movl 12(%r15),%edx
  2461. movl 16(%r15),%r8d
  2462. movl 20(%r15),%r9d
  2463. movl 24(%r15),%r10d
  2464. movl 28(%r15),%r11d
  2465. vmovdqu 0-128(%rdi),%xmm10
  2466. jmp .Loop_avx2
  2467. .align 16
  2468. .Loop_avx2:
  2469. vmovdqa K256+512(%rip),%ymm7
  2470. vmovdqu -64+0(%rsi,%r13,1),%xmm0
  2471. vmovdqu -64+16(%rsi,%r13,1),%xmm1
  2472. vmovdqu -64+32(%rsi,%r13,1),%xmm2
  2473. vmovdqu -64+48(%rsi,%r13,1),%xmm3
  2474. vinserti128 $1,(%r12),%ymm0,%ymm0
  2475. vinserti128 $1,16(%r12),%ymm1,%ymm1
  2476. vpshufb %ymm7,%ymm0,%ymm0
  2477. vinserti128 $1,32(%r12),%ymm2,%ymm2
  2478. vpshufb %ymm7,%ymm1,%ymm1
  2479. vinserti128 $1,48(%r12),%ymm3,%ymm3
  2480. leaq K256(%rip),%rbp
  2481. vpshufb %ymm7,%ymm2,%ymm2
  2482. leaq -64(%r13),%r13
  2483. vpaddd 0(%rbp),%ymm0,%ymm4
  2484. vpshufb %ymm7,%ymm3,%ymm3
  2485. vpaddd 32(%rbp),%ymm1,%ymm5
  2486. vpaddd 64(%rbp),%ymm2,%ymm6
  2487. vpaddd 96(%rbp),%ymm3,%ymm7
  2488. vmovdqa %ymm4,0(%rsp)
  2489. xorl %r14d,%r14d
  2490. vmovdqa %ymm5,32(%rsp)
  2491. movq 120(%rsp),%rsi
  2492. .cfi_def_cfa %rsi,8
  2493. leaq -64(%rsp),%rsp
  2494. movq %rsi,-8(%rsp)
  2495. .cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
  2496. movl %ebx,%esi
  2497. vmovdqa %ymm6,0(%rsp)
  2498. xorl %ecx,%esi
  2499. vmovdqa %ymm7,32(%rsp)
  2500. movl %r9d,%r12d
  2501. subq $-32*4,%rbp
  2502. jmp .Lavx2_00_47
  2503. .align 16
  2504. .Lavx2_00_47:
  2505. vmovdqu (%r13),%xmm9
  2506. vpinsrq $0,%r13,%xmm15,%xmm15
  2507. leaq -64(%rsp),%rsp
  2508. .cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08
  2509. pushq 64-8(%rsp)
  2510. .cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08
  2511. leaq 8(%rsp),%rsp
  2512. .cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
  2513. vpalignr $4,%ymm0,%ymm1,%ymm4
  2514. addl 0+128(%rsp),%r11d
  2515. andl %r8d,%r12d
  2516. rorxl $25,%r8d,%r13d
  2517. vpalignr $4,%ymm2,%ymm3,%ymm7
  2518. rorxl $11,%r8d,%r15d
  2519. leal (%rax,%r14,1),%eax
  2520. leal (%r11,%r12,1),%r11d
  2521. vpsrld $7,%ymm4,%ymm6
  2522. andnl %r10d,%r8d,%r12d
  2523. xorl %r15d,%r13d
  2524. rorxl $6,%r8d,%r14d
  2525. vpaddd %ymm7,%ymm0,%ymm0
  2526. leal (%r11,%r12,1),%r11d
  2527. xorl %r14d,%r13d
  2528. movl %eax,%r15d
  2529. vpsrld $3,%ymm4,%ymm7
  2530. rorxl $22,%eax,%r12d
  2531. leal (%r11,%r13,1),%r11d
  2532. xorl %ebx,%r15d
  2533. vpslld $14,%ymm4,%ymm5
  2534. rorxl $13,%eax,%r14d
  2535. rorxl $2,%eax,%r13d
  2536. leal (%rdx,%r11,1),%edx
  2537. vpxor %ymm6,%ymm7,%ymm4
  2538. andl %r15d,%esi
  2539. vpxor %xmm10,%xmm9,%xmm9
  2540. vmovdqu 16-128(%rdi),%xmm10
  2541. xorl %r12d,%r14d
  2542. xorl %ebx,%esi
  2543. vpshufd $250,%ymm3,%ymm7
  2544. xorl %r13d,%r14d
  2545. leal (%r11,%rsi,1),%r11d
  2546. movl %r8d,%r12d
  2547. vpsrld $11,%ymm6,%ymm6
  2548. addl 4+128(%rsp),%r10d
  2549. andl %edx,%r12d
  2550. rorxl $25,%edx,%r13d
  2551. vpxor %ymm5,%ymm4,%ymm4
  2552. rorxl $11,%edx,%esi
  2553. leal (%r11,%r14,1),%r11d
  2554. leal (%r10,%r12,1),%r10d
  2555. vpslld $11,%ymm5,%ymm5
  2556. andnl %r9d,%edx,%r12d
  2557. xorl %esi,%r13d
  2558. rorxl $6,%edx,%r14d
  2559. vpxor %ymm6,%ymm4,%ymm4
  2560. leal (%r10,%r12,1),%r10d
  2561. xorl %r14d,%r13d
  2562. movl %r11d,%esi
  2563. vpsrld $10,%ymm7,%ymm6
  2564. rorxl $22,%r11d,%r12d
  2565. leal (%r10,%r13,1),%r10d
  2566. xorl %eax,%esi
  2567. vpxor %ymm5,%ymm4,%ymm4
  2568. rorxl $13,%r11d,%r14d
  2569. rorxl $2,%r11d,%r13d
  2570. leal (%rcx,%r10,1),%ecx
  2571. vpsrlq $17,%ymm7,%ymm7
  2572. andl %esi,%r15d
  2573. vpxor %xmm8,%xmm9,%xmm9
  2574. xorl %r12d,%r14d
  2575. xorl %eax,%r15d
  2576. vpaddd %ymm4,%ymm0,%ymm0
  2577. xorl %r13d,%r14d
  2578. leal (%r10,%r15,1),%r10d
  2579. movl %edx,%r12d
  2580. vpxor %ymm7,%ymm6,%ymm6
  2581. addl 8+128(%rsp),%r9d
  2582. andl %ecx,%r12d
  2583. rorxl $25,%ecx,%r13d
  2584. vpsrlq $2,%ymm7,%ymm7
  2585. rorxl $11,%ecx,%r15d
  2586. leal (%r10,%r14,1),%r10d
  2587. leal (%r9,%r12,1),%r9d
  2588. vpxor %ymm7,%ymm6,%ymm6
  2589. andnl %r8d,%ecx,%r12d
  2590. xorl %r15d,%r13d
  2591. rorxl $6,%ecx,%r14d
  2592. vpshufd $132,%ymm6,%ymm6
  2593. leal (%r9,%r12,1),%r9d
  2594. xorl %r14d,%r13d
  2595. movl %r10d,%r15d
  2596. vpsrldq $8,%ymm6,%ymm6
  2597. rorxl $22,%r10d,%r12d
  2598. leal (%r9,%r13,1),%r9d
  2599. xorl %r11d,%r15d
  2600. vpaddd %ymm6,%ymm0,%ymm0
  2601. rorxl $13,%r10d,%r14d
  2602. rorxl $2,%r10d,%r13d
  2603. leal (%rbx,%r9,1),%ebx
  2604. vpshufd $80,%ymm0,%ymm7
  2605. andl %r15d,%esi
  2606. vaesenc %xmm10,%xmm9,%xmm9
  2607. vmovdqu 32-128(%rdi),%xmm10
  2608. xorl %r12d,%r14d
  2609. xorl %r11d,%esi
  2610. vpsrld $10,%ymm7,%ymm6
  2611. xorl %r13d,%r14d
  2612. leal (%r9,%rsi,1),%r9d
  2613. movl %ecx,%r12d
  2614. vpsrlq $17,%ymm7,%ymm7
  2615. addl 12+128(%rsp),%r8d
  2616. andl %ebx,%r12d
  2617. rorxl $25,%ebx,%r13d
  2618. vpxor %ymm7,%ymm6,%ymm6
  2619. rorxl $11,%ebx,%esi
  2620. leal (%r9,%r14,1),%r9d
  2621. leal (%r8,%r12,1),%r8d
  2622. vpsrlq $2,%ymm7,%ymm7
  2623. andnl %edx,%ebx,%r12d
  2624. xorl %esi,%r13d
  2625. rorxl $6,%ebx,%r14d
  2626. vpxor %ymm7,%ymm6,%ymm6
  2627. leal (%r8,%r12,1),%r8d
  2628. xorl %r14d,%r13d
  2629. movl %r9d,%esi
  2630. vpshufd $232,%ymm6,%ymm6
  2631. rorxl $22,%r9d,%r12d
  2632. leal (%r8,%r13,1),%r8d
  2633. xorl %r10d,%esi
  2634. vpslldq $8,%ymm6,%ymm6
  2635. rorxl $13,%r9d,%r14d
  2636. rorxl $2,%r9d,%r13d
  2637. leal (%rax,%r8,1),%eax
  2638. vpaddd %ymm6,%ymm0,%ymm0
  2639. andl %esi,%r15d
  2640. vaesenc %xmm10,%xmm9,%xmm9
  2641. vmovdqu 48-128(%rdi),%xmm10
  2642. xorl %r12d,%r14d
  2643. xorl %r10d,%r15d
  2644. vpaddd 0(%rbp),%ymm0,%ymm6
  2645. xorl %r13d,%r14d
  2646. leal (%r8,%r15,1),%r8d
  2647. movl %ebx,%r12d
  2648. vmovdqa %ymm6,0(%rsp)
  2649. vpalignr $4,%ymm1,%ymm2,%ymm4
  2650. addl 32+128(%rsp),%edx
  2651. andl %eax,%r12d
  2652. rorxl $25,%eax,%r13d
  2653. vpalignr $4,%ymm3,%ymm0,%ymm7
  2654. rorxl $11,%eax,%r15d
  2655. leal (%r8,%r14,1),%r8d
  2656. leal (%rdx,%r12,1),%edx
  2657. vpsrld $7,%ymm4,%ymm6
  2658. andnl %ecx,%eax,%r12d
  2659. xorl %r15d,%r13d
  2660. rorxl $6,%eax,%r14d
  2661. vpaddd %ymm7,%ymm1,%ymm1
  2662. leal (%rdx,%r12,1),%edx
  2663. xorl %r14d,%r13d
  2664. movl %r8d,%r15d
  2665. vpsrld $3,%ymm4,%ymm7
  2666. rorxl $22,%r8d,%r12d
  2667. leal (%rdx,%r13,1),%edx
  2668. xorl %r9d,%r15d
  2669. vpslld $14,%ymm4,%ymm5
  2670. rorxl $13,%r8d,%r14d
  2671. rorxl $2,%r8d,%r13d
  2672. leal (%r11,%rdx,1),%r11d
  2673. vpxor %ymm6,%ymm7,%ymm4
  2674. andl %r15d,%esi
  2675. vaesenc %xmm10,%xmm9,%xmm9
  2676. vmovdqu 64-128(%rdi),%xmm10
  2677. xorl %r12d,%r14d
  2678. xorl %r9d,%esi
  2679. vpshufd $250,%ymm0,%ymm7
  2680. xorl %r13d,%r14d
  2681. leal (%rdx,%rsi,1),%edx
  2682. movl %eax,%r12d
  2683. vpsrld $11,%ymm6,%ymm6
  2684. addl 36+128(%rsp),%ecx
  2685. andl %r11d,%r12d
  2686. rorxl $25,%r11d,%r13d
  2687. vpxor %ymm5,%ymm4,%ymm4
  2688. rorxl $11,%r11d,%esi
  2689. leal (%rdx,%r14,1),%edx
  2690. leal (%rcx,%r12,1),%ecx
  2691. vpslld $11,%ymm5,%ymm5
  2692. andnl %ebx,%r11d,%r12d
  2693. xorl %esi,%r13d
  2694. rorxl $6,%r11d,%r14d
  2695. vpxor %ymm6,%ymm4,%ymm4
  2696. leal (%rcx,%r12,1),%ecx
  2697. xorl %r14d,%r13d
  2698. movl %edx,%esi
  2699. vpsrld $10,%ymm7,%ymm6
  2700. rorxl $22,%edx,%r12d
  2701. leal (%rcx,%r13,1),%ecx
  2702. xorl %r8d,%esi
  2703. vpxor %ymm5,%ymm4,%ymm4
  2704. rorxl $13,%edx,%r14d
  2705. rorxl $2,%edx,%r13d
  2706. leal (%r10,%rcx,1),%r10d
  2707. vpsrlq $17,%ymm7,%ymm7
  2708. andl %esi,%r15d
  2709. vaesenc %xmm10,%xmm9,%xmm9
  2710. vmovdqu 80-128(%rdi),%xmm10
  2711. xorl %r12d,%r14d
  2712. xorl %r8d,%r15d
  2713. vpaddd %ymm4,%ymm1,%ymm1
  2714. xorl %r13d,%r14d
  2715. leal (%rcx,%r15,1),%ecx
  2716. movl %r11d,%r12d
  2717. vpxor %ymm7,%ymm6,%ymm6
  2718. addl 40+128(%rsp),%ebx
  2719. andl %r10d,%r12d
  2720. rorxl $25,%r10d,%r13d
  2721. vpsrlq $2,%ymm7,%ymm7
  2722. rorxl $11,%r10d,%r15d
  2723. leal (%rcx,%r14,1),%ecx
  2724. leal (%rbx,%r12,1),%ebx
  2725. vpxor %ymm7,%ymm6,%ymm6
  2726. andnl %eax,%r10d,%r12d
  2727. xorl %r15d,%r13d
  2728. rorxl $6,%r10d,%r14d
  2729. vpshufd $132,%ymm6,%ymm6
  2730. leal (%rbx,%r12,1),%ebx
  2731. xorl %r14d,%r13d
  2732. movl %ecx,%r15d
  2733. vpsrldq $8,%ymm6,%ymm6
  2734. rorxl $22,%ecx,%r12d
  2735. leal (%rbx,%r13,1),%ebx
  2736. xorl %edx,%r15d
  2737. vpaddd %ymm6,%ymm1,%ymm1
  2738. rorxl $13,%ecx,%r14d
  2739. rorxl $2,%ecx,%r13d
  2740. leal (%r9,%rbx,1),%r9d
  2741. vpshufd $80,%ymm1,%ymm7
  2742. andl %r15d,%esi
  2743. vaesenc %xmm10,%xmm9,%xmm9
  2744. vmovdqu 96-128(%rdi),%xmm10
  2745. xorl %r12d,%r14d
  2746. xorl %edx,%esi
  2747. vpsrld $10,%ymm7,%ymm6
  2748. xorl %r13d,%r14d
  2749. leal (%rbx,%rsi,1),%ebx
  2750. movl %r10d,%r12d
  2751. vpsrlq $17,%ymm7,%ymm7
  2752. addl 44+128(%rsp),%eax
  2753. andl %r9d,%r12d
  2754. rorxl $25,%r9d,%r13d
  2755. vpxor %ymm7,%ymm6,%ymm6
  2756. rorxl $11,%r9d,%esi
  2757. leal (%rbx,%r14,1),%ebx
  2758. leal (%rax,%r12,1),%eax
  2759. vpsrlq $2,%ymm7,%ymm7
  2760. andnl %r11d,%r9d,%r12d
  2761. xorl %esi,%r13d
  2762. rorxl $6,%r9d,%r14d
  2763. vpxor %ymm7,%ymm6,%ymm6
  2764. leal (%rax,%r12,1),%eax
  2765. xorl %r14d,%r13d
  2766. movl %ebx,%esi
  2767. vpshufd $232,%ymm6,%ymm6
  2768. rorxl $22,%ebx,%r12d
  2769. leal (%rax,%r13,1),%eax
  2770. xorl %ecx,%esi
  2771. vpslldq $8,%ymm6,%ymm6
  2772. rorxl $13,%ebx,%r14d
  2773. rorxl $2,%ebx,%r13d
  2774. leal (%r8,%rax,1),%r8d
  2775. vpaddd %ymm6,%ymm1,%ymm1
  2776. andl %esi,%r15d
  2777. vaesenc %xmm10,%xmm9,%xmm9
  2778. vmovdqu 112-128(%rdi),%xmm10
  2779. xorl %r12d,%r14d
  2780. xorl %ecx,%r15d
  2781. vpaddd 32(%rbp),%ymm1,%ymm6
  2782. xorl %r13d,%r14d
  2783. leal (%rax,%r15,1),%eax
  2784. movl %r9d,%r12d
  2785. vmovdqa %ymm6,32(%rsp)
  2786. leaq -64(%rsp),%rsp
  2787. .cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08
  2788. pushq 64-8(%rsp)
  2789. .cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08
  2790. leaq 8(%rsp),%rsp
  2791. .cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08
  2792. vpalignr $4,%ymm2,%ymm3,%ymm4
  2793. addl 0+128(%rsp),%r11d
  2794. andl %r8d,%r12d
  2795. rorxl $25,%r8d,%r13d
  2796. vpalignr $4,%ymm0,%ymm1,%ymm7
  2797. rorxl $11,%r8d,%r15d
  2798. leal (%rax,%r14,1),%eax
  2799. leal (%r11,%r12,1),%r11d
  2800. vpsrld $7,%ymm4,%ymm6
  2801. andnl %r10d,%r8d,%r12d
  2802. xorl %r15d,%r13d
  2803. rorxl $6,%r8d,%r14d
  2804. vpaddd %ymm7,%ymm2,%ymm2
  2805. leal (%r11,%r12,1),%r11d
  2806. xorl %r14d,%r13d
  2807. movl %eax,%r15d
  2808. vpsrld $3,%ymm4,%ymm7
  2809. rorxl $22,%eax,%r12d
  2810. leal (%r11,%r13,1),%r11d
  2811. xorl %ebx,%r15d
  2812. vpslld $14,%ymm4,%ymm5
  2813. rorxl $13,%eax,%r14d
  2814. rorxl $2,%eax,%r13d
  2815. leal (%rdx,%r11,1),%edx
  2816. vpxor %ymm6,%ymm7,%ymm4
  2817. andl %r15d,%esi
  2818. vaesenc %xmm10,%xmm9,%xmm9
  2819. vmovdqu 128-128(%rdi),%xmm10
  2820. xorl %r12d,%r14d
  2821. xorl %ebx,%esi
  2822. vpshufd $250,%ymm1,%ymm7
  2823. xorl %r13d,%r14d
  2824. leal (%r11,%rsi,1),%r11d
  2825. movl %r8d,%r12d
  2826. vpsrld $11,%ymm6,%ymm6
  2827. addl 4+128(%rsp),%r10d
  2828. andl %edx,%r12d
  2829. rorxl $25,%edx,%r13d
  2830. vpxor %ymm5,%ymm4,%ymm4
  2831. rorxl $11,%edx,%esi
  2832. leal (%r11,%r14,1),%r11d
  2833. leal (%r10,%r12,1),%r10d
  2834. vpslld $11,%ymm5,%ymm5
  2835. andnl %r9d,%edx,%r12d
  2836. xorl %esi,%r13d
  2837. rorxl $6,%edx,%r14d
  2838. vpxor %ymm6,%ymm4,%ymm4
  2839. leal (%r10,%r12,1),%r10d
  2840. xorl %r14d,%r13d
  2841. movl %r11d,%esi
  2842. vpsrld $10,%ymm7,%ymm6
  2843. rorxl $22,%r11d,%r12d
  2844. leal (%r10,%r13,1),%r10d
  2845. xorl %eax,%esi
  2846. vpxor %ymm5,%ymm4,%ymm4
  2847. rorxl $13,%r11d,%r14d
  2848. rorxl $2,%r11d,%r13d
  2849. leal (%rcx,%r10,1),%ecx
  2850. vpsrlq $17,%ymm7,%ymm7
  2851. andl %esi,%r15d
  2852. vaesenc %xmm10,%xmm9,%xmm9
  2853. vmovdqu 144-128(%rdi),%xmm10
  2854. xorl %r12d,%r14d
  2855. xorl %eax,%r15d
  2856. vpaddd %ymm4,%ymm2,%ymm2
  2857. xorl %r13d,%r14d
  2858. leal (%r10,%r15,1),%r10d
  2859. movl %edx,%r12d
  2860. vpxor %ymm7,%ymm6,%ymm6
  2861. addl 8+128(%rsp),%r9d
  2862. andl %ecx,%r12d
  2863. rorxl $25,%ecx,%r13d
  2864. vpsrlq $2,%ymm7,%ymm7
  2865. rorxl $11,%ecx,%r15d
  2866. leal (%r10,%r14,1),%r10d
  2867. leal (%r9,%r12,1),%r9d
  2868. vpxor %ymm7,%ymm6,%ymm6
  2869. andnl %r8d,%ecx,%r12d
  2870. xorl %r15d,%r13d
  2871. rorxl $6,%ecx,%r14d
  2872. vpshufd $132,%ymm6,%ymm6
  2873. leal (%r9,%r12,1),%r9d
  2874. xorl %r14d,%r13d
  2875. movl %r10d,%r15d
  2876. vpsrldq $8,%ymm6,%ymm6
  2877. rorxl $22,%r10d,%r12d
  2878. leal (%r9,%r13,1),%r9d
  2879. xorl %r11d,%r15d
  2880. vpaddd %ymm6,%ymm2,%ymm2
  2881. rorxl $13,%r10d,%r14d
  2882. rorxl $2,%r10d,%r13d
  2883. leal (%rbx,%r9,1),%ebx
  2884. vpshufd $80,%ymm2,%ymm7
  2885. andl %r15d,%esi
  2886. vaesenc %xmm10,%xmm9,%xmm9
  2887. vmovdqu 160-128(%rdi),%xmm10
  2888. xorl %r12d,%r14d
  2889. xorl %r11d,%esi
  2890. vpsrld $10,%ymm7,%ymm6
  2891. xorl %r13d,%r14d
  2892. leal (%r9,%rsi,1),%r9d
  2893. movl %ecx,%r12d
  2894. vpsrlq $17,%ymm7,%ymm7
  2895. addl 12+128(%rsp),%r8d
  2896. andl %ebx,%r12d
  2897. rorxl $25,%ebx,%r13d
  2898. vpxor %ymm7,%ymm6,%ymm6
  2899. rorxl $11,%ebx,%esi
  2900. leal (%r9,%r14,1),%r9d
  2901. leal (%r8,%r12,1),%r8d
  2902. vpsrlq $2,%ymm7,%ymm7
  2903. andnl %edx,%ebx,%r12d
  2904. xorl %esi,%r13d
  2905. rorxl $6,%ebx,%r14d
  2906. vpxor %ymm7,%ymm6,%ymm6
  2907. leal (%r8,%r12,1),%r8d
  2908. xorl %r14d,%r13d
  2909. movl %r9d,%esi
  2910. vpshufd $232,%ymm6,%ymm6
  2911. rorxl $22,%r9d,%r12d
  2912. leal (%r8,%r13,1),%r8d
  2913. xorl %r10d,%esi
  2914. vpslldq $8,%ymm6,%ymm6
  2915. rorxl $13,%r9d,%r14d
  2916. rorxl $2,%r9d,%r13d
  2917. leal (%rax,%r8,1),%eax
  2918. vpaddd %ymm6,%ymm2,%ymm2
  2919. andl %esi,%r15d
  2920. vaesenclast %xmm10,%xmm9,%xmm11
  2921. vaesenc %xmm10,%xmm9,%xmm9
  2922. vmovdqu 176-128(%rdi),%xmm10
  2923. xorl %r12d,%r14d
  2924. xorl %r10d,%r15d
  2925. vpaddd 64(%rbp),%ymm2,%ymm6
  2926. xorl %r13d,%r14d
  2927. leal (%r8,%r15,1),%r8d
  2928. movl %ebx,%r12d
  2929. vmovdqa %ymm6,0(%rsp)
  2930. vpalignr $4,%ymm3,%ymm0,%ymm4
  2931. addl 32+128(%rsp),%edx
  2932. andl %eax,%r12d
  2933. rorxl $25,%eax,%r13d
  2934. vpalignr $4,%ymm1,%ymm2,%ymm7
  2935. rorxl $11,%eax,%r15d
  2936. leal (%r8,%r14,1),%r8d
  2937. leal (%rdx,%r12,1),%edx
  2938. vpsrld $7,%ymm4,%ymm6
  2939. andnl %ecx,%eax,%r12d
  2940. xorl %r15d,%r13d
  2941. rorxl $6,%eax,%r14d
  2942. vpaddd %ymm7,%ymm3,%ymm3
  2943. leal (%rdx,%r12,1),%edx
  2944. xorl %r14d,%r13d
  2945. movl %r8d,%r15d
  2946. vpsrld $3,%ymm4,%ymm7
  2947. rorxl $22,%r8d,%r12d
  2948. leal (%rdx,%r13,1),%edx
  2949. xorl %r9d,%r15d
  2950. vpslld $14,%ymm4,%ymm5
  2951. rorxl $13,%r8d,%r14d
  2952. rorxl $2,%r8d,%r13d
  2953. leal (%r11,%rdx,1),%r11d
  2954. vpxor %ymm6,%ymm7,%ymm4
  2955. andl %r15d,%esi
  2956. vpand %xmm12,%xmm11,%xmm8
  2957. vaesenc %xmm10,%xmm9,%xmm9
  2958. vmovdqu 192-128(%rdi),%xmm10
  2959. xorl %r12d,%r14d
  2960. xorl %r9d,%esi
  2961. vpshufd $250,%ymm2,%ymm7
  2962. xorl %r13d,%r14d
  2963. leal (%rdx,%rsi,1),%edx
  2964. movl %eax,%r12d
  2965. vpsrld $11,%ymm6,%ymm6
  2966. addl 36+128(%rsp),%ecx
  2967. andl %r11d,%r12d
  2968. rorxl $25,%r11d,%r13d
  2969. vpxor %ymm5,%ymm4,%ymm4
  2970. rorxl $11,%r11d,%esi
  2971. leal (%rdx,%r14,1),%edx
  2972. leal (%rcx,%r12,1),%ecx
  2973. vpslld $11,%ymm5,%ymm5
  2974. andnl %ebx,%r11d,%r12d
  2975. xorl %esi,%r13d
  2976. rorxl $6,%r11d,%r14d
  2977. vpxor %ymm6,%ymm4,%ymm4
  2978. leal (%rcx,%r12,1),%ecx
  2979. xorl %r14d,%r13d
  2980. movl %edx,%esi
  2981. vpsrld $10,%ymm7,%ymm6
  2982. rorxl $22,%edx,%r12d
  2983. leal (%rcx,%r13,1),%ecx
  2984. xorl %r8d,%esi
  2985. vpxor %ymm5,%ymm4,%ymm4
  2986. rorxl $13,%edx,%r14d
  2987. rorxl $2,%edx,%r13d
  2988. leal (%r10,%rcx,1),%r10d
  2989. vpsrlq $17,%ymm7,%ymm7
  2990. andl %esi,%r15d
  2991. vaesenclast %xmm10,%xmm9,%xmm11
  2992. vaesenc %xmm10,%xmm9,%xmm9
  2993. vmovdqu 208-128(%rdi),%xmm10
  2994. xorl %r12d,%r14d
  2995. xorl %r8d,%r15d
  2996. vpaddd %ymm4,%ymm3,%ymm3
  2997. xorl %r13d,%r14d
  2998. leal (%rcx,%r15,1),%ecx
  2999. movl %r11d,%r12d
  3000. vpxor %ymm7,%ymm6,%ymm6
  3001. addl 40+128(%rsp),%ebx
  3002. andl %r10d,%r12d
  3003. rorxl $25,%r10d,%r13d
  3004. vpsrlq $2,%ymm7,%ymm7
  3005. rorxl $11,%r10d,%r15d
  3006. leal (%rcx,%r14,1),%ecx
  3007. leal (%rbx,%r12,1),%ebx
  3008. vpxor %ymm7,%ymm6,%ymm6
  3009. andnl %eax,%r10d,%r12d
  3010. xorl %r15d,%r13d
  3011. rorxl $6,%r10d,%r14d
  3012. vpshufd $132,%ymm6,%ymm6
  3013. leal (%rbx,%r12,1),%ebx
  3014. xorl %r14d,%r13d
  3015. movl %ecx,%r15d
  3016. vpsrldq $8,%ymm6,%ymm6
  3017. rorxl $22,%ecx,%r12d
  3018. leal (%rbx,%r13,1),%ebx
  3019. xorl %edx,%r15d
  3020. vpaddd %ymm6,%ymm3,%ymm3
  3021. rorxl $13,%ecx,%r14d
  3022. rorxl $2,%ecx,%r13d
  3023. leal (%r9,%rbx,1),%r9d
  3024. vpshufd $80,%ymm3,%ymm7
  3025. andl %r15d,%esi
  3026. vpand %xmm13,%xmm11,%xmm11
  3027. vaesenc %xmm10,%xmm9,%xmm9
  3028. vmovdqu 224-128(%rdi),%xmm10
  3029. xorl %r12d,%r14d
  3030. xorl %edx,%esi
  3031. vpsrld $10,%ymm7,%ymm6
  3032. xorl %r13d,%r14d
  3033. leal (%rbx,%rsi,1),%ebx
  3034. movl %r10d,%r12d
  3035. vpsrlq $17,%ymm7,%ymm7
  3036. addl 44+128(%rsp),%eax
  3037. andl %r9d,%r12d
  3038. rorxl $25,%r9d,%r13d
  3039. vpxor %ymm7,%ymm6,%ymm6
  3040. rorxl $11,%r9d,%esi
  3041. leal (%rbx,%r14,1),%ebx
  3042. leal (%rax,%r12,1),%eax
  3043. vpsrlq $2,%ymm7,%ymm7
  3044. andnl %r11d,%r9d,%r12d
  3045. xorl %esi,%r13d
  3046. rorxl $6,%r9d,%r14d
  3047. vpxor %ymm7,%ymm6,%ymm6
  3048. leal (%rax,%r12,1),%eax
  3049. xorl %r14d,%r13d
  3050. movl %ebx,%esi
  3051. vpshufd $232,%ymm6,%ymm6
  3052. rorxl $22,%ebx,%r12d
  3053. leal (%rax,%r13,1),%eax
  3054. xorl %ecx,%esi
  3055. vpslldq $8,%ymm6,%ymm6
  3056. rorxl $13,%ebx,%r14d
  3057. rorxl $2,%ebx,%r13d
  3058. leal (%r8,%rax,1),%r8d
  3059. vpaddd %ymm6,%ymm3,%ymm3
  3060. andl %esi,%r15d
  3061. vpor %xmm11,%xmm8,%xmm8
  3062. vaesenclast %xmm10,%xmm9,%xmm11
  3063. vmovdqu 0-128(%rdi),%xmm10
  3064. xorl %r12d,%r14d
  3065. xorl %ecx,%r15d
  3066. vpaddd 96(%rbp),%ymm3,%ymm6
  3067. xorl %r13d,%r14d
  3068. leal (%rax,%r15,1),%eax
  3069. movl %r9d,%r12d
  3070. vmovdqa %ymm6,32(%rsp)
  3071. vmovq %xmm15,%r13
  3072. vpextrq $1,%xmm15,%r15
  3073. vpand %xmm14,%xmm11,%xmm11
  3074. vpor %xmm11,%xmm8,%xmm8
  3075. vmovdqu %xmm8,(%r15,%r13,1)
  3076. leaq 16(%r13),%r13
  3077. leaq 128(%rbp),%rbp
  3078. cmpb $0,3(%rbp)
  3079. jne .Lavx2_00_47
  3080. vmovdqu (%r13),%xmm9
  3081. vpinsrq $0,%r13,%xmm15,%xmm15
  3082. addl 0+64(%rsp),%r11d
  3083. andl %r8d,%r12d
  3084. rorxl $25,%r8d,%r13d
  3085. rorxl $11,%r8d,%r15d
  3086. leal (%rax,%r14,1),%eax
  3087. leal (%r11,%r12,1),%r11d
  3088. andnl %r10d,%r8d,%r12d
  3089. xorl %r15d,%r13d
  3090. rorxl $6,%r8d,%r14d
  3091. leal (%r11,%r12,1),%r11d
  3092. xorl %r14d,%r13d
  3093. movl %eax,%r15d
  3094. rorxl $22,%eax,%r12d
  3095. leal (%r11,%r13,1),%r11d
  3096. xorl %ebx,%r15d
  3097. rorxl $13,%eax,%r14d
  3098. rorxl $2,%eax,%r13d
  3099. leal (%rdx,%r11,1),%edx
  3100. andl %r15d,%esi
  3101. vpxor %xmm10,%xmm9,%xmm9
  3102. vmovdqu 16-128(%rdi),%xmm10
  3103. xorl %r12d,%r14d
  3104. xorl %ebx,%esi
  3105. xorl %r13d,%r14d
  3106. leal (%r11,%rsi,1),%r11d
  3107. movl %r8d,%r12d
  3108. addl 4+64(%rsp),%r10d
  3109. andl %edx,%r12d
  3110. rorxl $25,%edx,%r13d
  3111. rorxl $11,%edx,%esi
  3112. leal (%r11,%r14,1),%r11d
  3113. leal (%r10,%r12,1),%r10d
  3114. andnl %r9d,%edx,%r12d
  3115. xorl %esi,%r13d
  3116. rorxl $6,%edx,%r14d
  3117. leal (%r10,%r12,1),%r10d
  3118. xorl %r14d,%r13d
  3119. movl %r11d,%esi
  3120. rorxl $22,%r11d,%r12d
  3121. leal (%r10,%r13,1),%r10d
  3122. xorl %eax,%esi
  3123. rorxl $13,%r11d,%r14d
  3124. rorxl $2,%r11d,%r13d
  3125. leal (%rcx,%r10,1),%ecx
  3126. andl %esi,%r15d
  3127. vpxor %xmm8,%xmm9,%xmm9
  3128. xorl %r12d,%r14d
  3129. xorl %eax,%r15d
  3130. xorl %r13d,%r14d
  3131. leal (%r10,%r15,1),%r10d
  3132. movl %edx,%r12d
  3133. addl 8+64(%rsp),%r9d
  3134. andl %ecx,%r12d
  3135. rorxl $25,%ecx,%r13d
  3136. rorxl $11,%ecx,%r15d
  3137. leal (%r10,%r14,1),%r10d
  3138. leal (%r9,%r12,1),%r9d
  3139. andnl %r8d,%ecx,%r12d
  3140. xorl %r15d,%r13d
  3141. rorxl $6,%ecx,%r14d
  3142. leal (%r9,%r12,1),%r9d
  3143. xorl %r14d,%r13d
  3144. movl %r10d,%r15d
  3145. rorxl $22,%r10d,%r12d
  3146. leal (%r9,%r13,1),%r9d
  3147. xorl %r11d,%r15d
  3148. rorxl $13,%r10d,%r14d
  3149. rorxl $2,%r10d,%r13d
  3150. leal (%rbx,%r9,1),%ebx
  3151. andl %r15d,%esi
  3152. vaesenc %xmm10,%xmm9,%xmm9
  3153. vmovdqu 32-128(%rdi),%xmm10
  3154. xorl %r12d,%r14d
  3155. xorl %r11d,%esi
  3156. xorl %r13d,%r14d
  3157. leal (%r9,%rsi,1),%r9d
  3158. movl %ecx,%r12d
  3159. addl 12+64(%rsp),%r8d
  3160. andl %ebx,%r12d
  3161. rorxl $25,%ebx,%r13d
  3162. rorxl $11,%ebx,%esi
  3163. leal (%r9,%r14,1),%r9d
  3164. leal (%r8,%r12,1),%r8d
  3165. andnl %edx,%ebx,%r12d
  3166. xorl %esi,%r13d
  3167. rorxl $6,%ebx,%r14d
  3168. leal (%r8,%r12,1),%r8d
  3169. xorl %r14d,%r13d
  3170. movl %r9d,%esi
  3171. rorxl $22,%r9d,%r12d
  3172. leal (%r8,%r13,1),%r8d
  3173. xorl %r10d,%esi
  3174. rorxl $13,%r9d,%r14d
  3175. rorxl $2,%r9d,%r13d
  3176. leal (%rax,%r8,1),%eax
  3177. andl %esi,%r15d
  3178. vaesenc %xmm10,%xmm9,%xmm9
  3179. vmovdqu 48-128(%rdi),%xmm10
  3180. xorl %r12d,%r14d
  3181. xorl %r10d,%r15d
  3182. xorl %r13d,%r14d
  3183. leal (%r8,%r15,1),%r8d
  3184. movl %ebx,%r12d
  3185. addl 32+64(%rsp),%edx
  3186. andl %eax,%r12d
  3187. rorxl $25,%eax,%r13d
  3188. rorxl $11,%eax,%r15d
  3189. leal (%r8,%r14,1),%r8d
  3190. leal (%rdx,%r12,1),%edx
  3191. andnl %ecx,%eax,%r12d
  3192. xorl %r15d,%r13d
  3193. rorxl $6,%eax,%r14d
  3194. leal (%rdx,%r12,1),%edx
  3195. xorl %r14d,%r13d
  3196. movl %r8d,%r15d
  3197. rorxl $22,%r8d,%r12d
  3198. leal (%rdx,%r13,1),%edx
  3199. xorl %r9d,%r15d
  3200. rorxl $13,%r8d,%r14d
  3201. rorxl $2,%r8d,%r13d
  3202. leal (%r11,%rdx,1),%r11d
  3203. andl %r15d,%esi
  3204. vaesenc %xmm10,%xmm9,%xmm9
  3205. vmovdqu 64-128(%rdi),%xmm10
  3206. xorl %r12d,%r14d
  3207. xorl %r9d,%esi
  3208. xorl %r13d,%r14d
  3209. leal (%rdx,%rsi,1),%edx
  3210. movl %eax,%r12d
  3211. addl 36+64(%rsp),%ecx
  3212. andl %r11d,%r12d
  3213. rorxl $25,%r11d,%r13d
  3214. rorxl $11,%r11d,%esi
  3215. leal (%rdx,%r14,1),%edx
  3216. leal (%rcx,%r12,1),%ecx
  3217. andnl %ebx,%r11d,%r12d
  3218. xorl %esi,%r13d
  3219. rorxl $6,%r11d,%r14d
  3220. leal (%rcx,%r12,1),%ecx
  3221. xorl %r14d,%r13d
  3222. movl %edx,%esi
  3223. rorxl $22,%edx,%r12d
  3224. leal (%rcx,%r13,1),%ecx
  3225. xorl %r8d,%esi
  3226. rorxl $13,%edx,%r14d
  3227. rorxl $2,%edx,%r13d
  3228. leal (%r10,%rcx,1),%r10d
  3229. andl %esi,%r15d
  3230. vaesenc %xmm10,%xmm9,%xmm9
  3231. vmovdqu 80-128(%rdi),%xmm10
  3232. xorl %r12d,%r14d
  3233. xorl %r8d,%r15d
  3234. xorl %r13d,%r14d
  3235. leal (%rcx,%r15,1),%ecx
  3236. movl %r11d,%r12d
  3237. addl 40+64(%rsp),%ebx
  3238. andl %r10d,%r12d
  3239. rorxl $25,%r10d,%r13d
  3240. rorxl $11,%r10d,%r15d
  3241. leal (%rcx,%r14,1),%ecx
  3242. leal (%rbx,%r12,1),%ebx
  3243. andnl %eax,%r10d,%r12d
  3244. xorl %r15d,%r13d
  3245. rorxl $6,%r10d,%r14d
  3246. leal (%rbx,%r12,1),%ebx
  3247. xorl %r14d,%r13d
  3248. movl %ecx,%r15d
  3249. rorxl $22,%ecx,%r12d
  3250. leal (%rbx,%r13,1),%ebx
  3251. xorl %edx,%r15d
  3252. rorxl $13,%ecx,%r14d
  3253. rorxl $2,%ecx,%r13d
  3254. leal (%r9,%rbx,1),%r9d
  3255. andl %r15d,%esi
  3256. vaesenc %xmm10,%xmm9,%xmm9
  3257. vmovdqu 96-128(%rdi),%xmm10
  3258. xorl %r12d,%r14d
  3259. xorl %edx,%esi
  3260. xorl %r13d,%r14d
  3261. leal (%rbx,%rsi,1),%ebx
  3262. movl %r10d,%r12d
  3263. addl 44+64(%rsp),%eax
  3264. andl %r9d,%r12d
  3265. rorxl $25,%r9d,%r13d
  3266. rorxl $11,%r9d,%esi
  3267. leal (%rbx,%r14,1),%ebx
  3268. leal (%rax,%r12,1),%eax
  3269. andnl %r11d,%r9d,%r12d
  3270. xorl %esi,%r13d
  3271. rorxl $6,%r9d,%r14d
  3272. leal (%rax,%r12,1),%eax
  3273. xorl %r14d,%r13d
  3274. movl %ebx,%esi
  3275. rorxl $22,%ebx,%r12d
  3276. leal (%rax,%r13,1),%eax
  3277. xorl %ecx,%esi
  3278. rorxl $13,%ebx,%r14d
  3279. rorxl $2,%ebx,%r13d
  3280. leal (%r8,%rax,1),%r8d
  3281. andl %esi,%r15d
  3282. vaesenc %xmm10,%xmm9,%xmm9
  3283. vmovdqu 112-128(%rdi),%xmm10
  3284. xorl %r12d,%r14d
  3285. xorl %ecx,%r15d
  3286. xorl %r13d,%r14d
  3287. leal (%rax,%r15,1),%eax
  3288. movl %r9d,%r12d
  3289. addl 0(%rsp),%r11d
  3290. andl %r8d,%r12d
  3291. rorxl $25,%r8d,%r13d
  3292. rorxl $11,%r8d,%r15d
  3293. leal (%rax,%r14,1),%eax
  3294. leal (%r11,%r12,1),%r11d
  3295. andnl %r10d,%r8d,%r12d
  3296. xorl %r15d,%r13d
  3297. rorxl $6,%r8d,%r14d
  3298. leal (%r11,%r12,1),%r11d
  3299. xorl %r14d,%r13d
  3300. movl %eax,%r15d
  3301. rorxl $22,%eax,%r12d
  3302. leal (%r11,%r13,1),%r11d
  3303. xorl %ebx,%r15d
  3304. rorxl $13,%eax,%r14d
  3305. rorxl $2,%eax,%r13d
  3306. leal (%rdx,%r11,1),%edx
  3307. andl %r15d,%esi
  3308. vaesenc %xmm10,%xmm9,%xmm9
  3309. vmovdqu 128-128(%rdi),%xmm10
  3310. xorl %r12d,%r14d
  3311. xorl %ebx,%esi
  3312. xorl %r13d,%r14d
  3313. leal (%r11,%rsi,1),%r11d
  3314. movl %r8d,%r12d
  3315. addl 4(%rsp),%r10d
  3316. andl %edx,%r12d
  3317. rorxl $25,%edx,%r13d
  3318. rorxl $11,%edx,%esi
  3319. leal (%r11,%r14,1),%r11d
  3320. leal (%r10,%r12,1),%r10d
  3321. andnl %r9d,%edx,%r12d
  3322. xorl %esi,%r13d
  3323. rorxl $6,%edx,%r14d
  3324. leal (%r10,%r12,1),%r10d
  3325. xorl %r14d,%r13d
  3326. movl %r11d,%esi
  3327. rorxl $22,%r11d,%r12d
  3328. leal (%r10,%r13,1),%r10d
  3329. xorl %eax,%esi
  3330. rorxl $13,%r11d,%r14d
  3331. rorxl $2,%r11d,%r13d
  3332. leal (%rcx,%r10,1),%ecx
  3333. andl %esi,%r15d
  3334. vaesenc %xmm10,%xmm9,%xmm9
  3335. vmovdqu 144-128(%rdi),%xmm10
  3336. xorl %r12d,%r14d
  3337. xorl %eax,%r15d
  3338. xorl %r13d,%r14d
  3339. leal (%r10,%r15,1),%r10d
  3340. movl %edx,%r12d
  3341. addl 8(%rsp),%r9d
  3342. andl %ecx,%r12d
  3343. rorxl $25,%ecx,%r13d
  3344. rorxl $11,%ecx,%r15d
  3345. leal (%r10,%r14,1),%r10d
  3346. leal (%r9,%r12,1),%r9d
  3347. andnl %r8d,%ecx,%r12d
  3348. xorl %r15d,%r13d
  3349. rorxl $6,%ecx,%r14d
  3350. leal (%r9,%r12,1),%r9d
  3351. xorl %r14d,%r13d
  3352. movl %r10d,%r15d
  3353. rorxl $22,%r10d,%r12d
  3354. leal (%r9,%r13,1),%r9d
  3355. xorl %r11d,%r15d
  3356. rorxl $13,%r10d,%r14d
  3357. rorxl $2,%r10d,%r13d
  3358. leal (%rbx,%r9,1),%ebx
  3359. andl %r15d,%esi
  3360. vaesenc %xmm10,%xmm9,%xmm9
  3361. vmovdqu 160-128(%rdi),%xmm10
  3362. xorl %r12d,%r14d
  3363. xorl %r11d,%esi
  3364. xorl %r13d,%r14d
  3365. leal (%r9,%rsi,1),%r9d
  3366. movl %ecx,%r12d
  3367. addl 12(%rsp),%r8d
  3368. andl %ebx,%r12d
  3369. rorxl $25,%ebx,%r13d
  3370. rorxl $11,%ebx,%esi
  3371. leal (%r9,%r14,1),%r9d
  3372. leal (%r8,%r12,1),%r8d
  3373. andnl %edx,%ebx,%r12d
  3374. xorl %esi,%r13d
  3375. rorxl $6,%ebx,%r14d
  3376. leal (%r8,%r12,1),%r8d
  3377. xorl %r14d,%r13d
  3378. movl %r9d,%esi
  3379. rorxl $22,%r9d,%r12d
  3380. leal (%r8,%r13,1),%r8d
  3381. xorl %r10d,%esi
  3382. rorxl $13,%r9d,%r14d
  3383. rorxl $2,%r9d,%r13d
  3384. leal (%rax,%r8,1),%eax
  3385. andl %esi,%r15d
  3386. vaesenclast %xmm10,%xmm9,%xmm11
  3387. vaesenc %xmm10,%xmm9,%xmm9
  3388. vmovdqu 176-128(%rdi),%xmm10
  3389. xorl %r12d,%r14d
  3390. xorl %r10d,%r15d
  3391. xorl %r13d,%r14d
  3392. leal (%r8,%r15,1),%r8d
  3393. movl %ebx,%r12d
  3394. addl 32(%rsp),%edx
  3395. andl %eax,%r12d
  3396. rorxl $25,%eax,%r13d
  3397. rorxl $11,%eax,%r15d
  3398. leal (%r8,%r14,1),%r8d
  3399. leal (%rdx,%r12,1),%edx
  3400. andnl %ecx,%eax,%r12d
  3401. xorl %r15d,%r13d
  3402. rorxl $6,%eax,%r14d
  3403. leal (%rdx,%r12,1),%edx
  3404. xorl %r14d,%r13d
  3405. movl %r8d,%r15d
  3406. rorxl $22,%r8d,%r12d
  3407. leal (%rdx,%r13,1),%edx
  3408. xorl %r9d,%r15d
  3409. rorxl $13,%r8d,%r14d
  3410. rorxl $2,%r8d,%r13d
  3411. leal (%r11,%rdx,1),%r11d
  3412. andl %r15d,%esi
  3413. vpand %xmm12,%xmm11,%xmm8
  3414. vaesenc %xmm10,%xmm9,%xmm9
  3415. vmovdqu 192-128(%rdi),%xmm10
  3416. xorl %r12d,%r14d
  3417. xorl %r9d,%esi
  3418. xorl %r13d,%r14d
  3419. leal (%rdx,%rsi,1),%edx
  3420. movl %eax,%r12d
  3421. addl 36(%rsp),%ecx
  3422. andl %r11d,%r12d
  3423. rorxl $25,%r11d,%r13d
  3424. rorxl $11,%r11d,%esi
  3425. leal (%rdx,%r14,1),%edx
  3426. leal (%rcx,%r12,1),%ecx
  3427. andnl %ebx,%r11d,%r12d
  3428. xorl %esi,%r13d
  3429. rorxl $6,%r11d,%r14d
  3430. leal (%rcx,%r12,1),%ecx
  3431. xorl %r14d,%r13d
  3432. movl %edx,%esi
  3433. rorxl $22,%edx,%r12d
  3434. leal (%rcx,%r13,1),%ecx
  3435. xorl %r8d,%esi
  3436. rorxl $13,%edx,%r14d
  3437. rorxl $2,%edx,%r13d
  3438. leal (%r10,%rcx,1),%r10d
  3439. andl %esi,%r15d
  3440. vaesenclast %xmm10,%xmm9,%xmm11
  3441. vaesenc %xmm10,%xmm9,%xmm9
  3442. vmovdqu 208-128(%rdi),%xmm10
  3443. xorl %r12d,%r14d
  3444. xorl %r8d,%r15d
  3445. xorl %r13d,%r14d
  3446. leal (%rcx,%r15,1),%ecx
  3447. movl %r11d,%r12d
  3448. addl 40(%rsp),%ebx
  3449. andl %r10d,%r12d
  3450. rorxl $25,%r10d,%r13d
  3451. rorxl $11,%r10d,%r15d
  3452. leal (%rcx,%r14,1),%ecx
  3453. leal (%rbx,%r12,1),%ebx
  3454. andnl %eax,%r10d,%r12d
  3455. xorl %r15d,%r13d
  3456. rorxl $6,%r10d,%r14d
  3457. leal (%rbx,%r12,1),%ebx
  3458. xorl %r14d,%r13d
  3459. movl %ecx,%r15d
  3460. rorxl $22,%ecx,%r12d
  3461. leal (%rbx,%r13,1),%ebx
  3462. xorl %edx,%r15d
  3463. rorxl $13,%ecx,%r14d
  3464. rorxl $2,%ecx,%r13d
  3465. leal (%r9,%rbx,1),%r9d
  3466. andl %r15d,%esi
  3467. vpand %xmm13,%xmm11,%xmm11
  3468. vaesenc %xmm10,%xmm9,%xmm9
  3469. vmovdqu 224-128(%rdi),%xmm10
  3470. xorl %r12d,%r14d
  3471. xorl %edx,%esi
  3472. xorl %r13d,%r14d
  3473. leal (%rbx,%rsi,1),%ebx
  3474. movl %r10d,%r12d
  3475. addl 44(%rsp),%eax
  3476. andl %r9d,%r12d
  3477. rorxl $25,%r9d,%r13d
  3478. rorxl $11,%r9d,%esi
  3479. leal (%rbx,%r14,1),%ebx
  3480. leal (%rax,%r12,1),%eax
  3481. andnl %r11d,%r9d,%r12d
  3482. xorl %esi,%r13d
  3483. rorxl $6,%r9d,%r14d
  3484. leal (%rax,%r12,1),%eax
  3485. xorl %r14d,%r13d
  3486. movl %ebx,%esi
  3487. rorxl $22,%ebx,%r12d
  3488. leal (%rax,%r13,1),%eax
  3489. xorl %ecx,%esi
  3490. rorxl $13,%ebx,%r14d
  3491. rorxl $2,%ebx,%r13d
  3492. leal (%r8,%rax,1),%r8d
  3493. andl %esi,%r15d
  3494. vpor %xmm11,%xmm8,%xmm8
  3495. vaesenclast %xmm10,%xmm9,%xmm11
  3496. vmovdqu 0-128(%rdi),%xmm10
  3497. xorl %r12d,%r14d
  3498. xorl %ecx,%r15d
  3499. xorl %r13d,%r14d
  3500. leal (%rax,%r15,1),%eax
  3501. movl %r9d,%r12d
  3502. vpextrq $1,%xmm15,%r12
  3503. vmovq %xmm15,%r13
  3504. movq 552(%rsp),%r15
  3505. addl %r14d,%eax
  3506. leaq 448(%rsp),%rbp
  3507. vpand %xmm14,%xmm11,%xmm11
  3508. vpor %xmm11,%xmm8,%xmm8
  3509. vmovdqu %xmm8,(%r12,%r13,1)
  3510. leaq 16(%r13),%r13
  3511. addl 0(%r15),%eax
  3512. addl 4(%r15),%ebx
  3513. addl 8(%r15),%ecx
  3514. addl 12(%r15),%edx
  3515. addl 16(%r15),%r8d
  3516. addl 20(%r15),%r9d
  3517. addl 24(%r15),%r10d
  3518. addl 28(%r15),%r11d
  3519. movl %eax,0(%r15)
  3520. movl %ebx,4(%r15)
  3521. movl %ecx,8(%r15)
  3522. movl %edx,12(%r15)
  3523. movl %r8d,16(%r15)
  3524. movl %r9d,20(%r15)
  3525. movl %r10d,24(%r15)
  3526. movl %r11d,28(%r15)
  3527. cmpq 80(%rbp),%r13
  3528. je .Ldone_avx2
  3529. xorl %r14d,%r14d
  3530. movl %ebx,%esi
  3531. movl %r9d,%r12d
  3532. xorl %ecx,%esi
  3533. jmp .Lower_avx2
  3534. .align 16
  3535. .Lower_avx2:
  3536. vmovdqu (%r13),%xmm9
  3537. vpinsrq $0,%r13,%xmm15,%xmm15
  3538. addl 0+16(%rbp),%r11d
  3539. andl %r8d,%r12d
  3540. rorxl $25,%r8d,%r13d
  3541. rorxl $11,%r8d,%r15d
  3542. leal (%rax,%r14,1),%eax
  3543. leal (%r11,%r12,1),%r11d
  3544. andnl %r10d,%r8d,%r12d
  3545. xorl %r15d,%r13d
  3546. rorxl $6,%r8d,%r14d
  3547. leal (%r11,%r12,1),%r11d
  3548. xorl %r14d,%r13d
  3549. movl %eax,%r15d
  3550. rorxl $22,%eax,%r12d
  3551. leal (%r11,%r13,1),%r11d
  3552. xorl %ebx,%r15d
  3553. rorxl $13,%eax,%r14d
  3554. rorxl $2,%eax,%r13d
  3555. leal (%rdx,%r11,1),%edx
  3556. andl %r15d,%esi
  3557. vpxor %xmm10,%xmm9,%xmm9
  3558. vmovdqu 16-128(%rdi),%xmm10
  3559. xorl %r12d,%r14d
  3560. xorl %ebx,%esi
  3561. xorl %r13d,%r14d
  3562. leal (%r11,%rsi,1),%r11d
  3563. movl %r8d,%r12d
  3564. addl 4+16(%rbp),%r10d
  3565. andl %edx,%r12d
  3566. rorxl $25,%edx,%r13d
  3567. rorxl $11,%edx,%esi
  3568. leal (%r11,%r14,1),%r11d
  3569. leal (%r10,%r12,1),%r10d
  3570. andnl %r9d,%edx,%r12d
  3571. xorl %esi,%r13d
  3572. rorxl $6,%edx,%r14d
  3573. leal (%r10,%r12,1),%r10d
  3574. xorl %r14d,%r13d
  3575. movl %r11d,%esi
  3576. rorxl $22,%r11d,%r12d
  3577. leal (%r10,%r13,1),%r10d
  3578. xorl %eax,%esi
  3579. rorxl $13,%r11d,%r14d
  3580. rorxl $2,%r11d,%r13d
  3581. leal (%rcx,%r10,1),%ecx
  3582. andl %esi,%r15d
  3583. vpxor %xmm8,%xmm9,%xmm9
  3584. xorl %r12d,%r14d
  3585. xorl %eax,%r15d
  3586. xorl %r13d,%r14d
  3587. leal (%r10,%r15,1),%r10d
  3588. movl %edx,%r12d
  3589. addl 8+16(%rbp),%r9d
  3590. andl %ecx,%r12d
  3591. rorxl $25,%ecx,%r13d
  3592. rorxl $11,%ecx,%r15d
  3593. leal (%r10,%r14,1),%r10d
  3594. leal (%r9,%r12,1),%r9d
  3595. andnl %r8d,%ecx,%r12d
  3596. xorl %r15d,%r13d
  3597. rorxl $6,%ecx,%r14d
  3598. leal (%r9,%r12,1),%r9d
  3599. xorl %r14d,%r13d
  3600. movl %r10d,%r15d
  3601. rorxl $22,%r10d,%r12d
  3602. leal (%r9,%r13,1),%r9d
  3603. xorl %r11d,%r15d
  3604. rorxl $13,%r10d,%r14d
  3605. rorxl $2,%r10d,%r13d
  3606. leal (%rbx,%r9,1),%ebx
  3607. andl %r15d,%esi
  3608. vaesenc %xmm10,%xmm9,%xmm9
  3609. vmovdqu 32-128(%rdi),%xmm10
  3610. xorl %r12d,%r14d
  3611. xorl %r11d,%esi
  3612. xorl %r13d,%r14d
  3613. leal (%r9,%rsi,1),%r9d
  3614. movl %ecx,%r12d
  3615. addl 12+16(%rbp),%r8d
  3616. andl %ebx,%r12d
  3617. rorxl $25,%ebx,%r13d
  3618. rorxl $11,%ebx,%esi
  3619. leal (%r9,%r14,1),%r9d
  3620. leal (%r8,%r12,1),%r8d
  3621. andnl %edx,%ebx,%r12d
  3622. xorl %esi,%r13d
  3623. rorxl $6,%ebx,%r14d
  3624. leal (%r8,%r12,1),%r8d
  3625. xorl %r14d,%r13d
  3626. movl %r9d,%esi
  3627. rorxl $22,%r9d,%r12d
  3628. leal (%r8,%r13,1),%r8d
  3629. xorl %r10d,%esi
  3630. rorxl $13,%r9d,%r14d
  3631. rorxl $2,%r9d,%r13d
  3632. leal (%rax,%r8,1),%eax
  3633. andl %esi,%r15d
  3634. vaesenc %xmm10,%xmm9,%xmm9
  3635. vmovdqu 48-128(%rdi),%xmm10
  3636. xorl %r12d,%r14d
  3637. xorl %r10d,%r15d
  3638. xorl %r13d,%r14d
  3639. leal (%r8,%r15,1),%r8d
  3640. movl %ebx,%r12d
  3641. addl 32+16(%rbp),%edx
  3642. andl %eax,%r12d
  3643. rorxl $25,%eax,%r13d
  3644. rorxl $11,%eax,%r15d
  3645. leal (%r8,%r14,1),%r8d
  3646. leal (%rdx,%r12,1),%edx
  3647. andnl %ecx,%eax,%r12d
  3648. xorl %r15d,%r13d
  3649. rorxl $6,%eax,%r14d
  3650. leal (%rdx,%r12,1),%edx
  3651. xorl %r14d,%r13d
  3652. movl %r8d,%r15d
  3653. rorxl $22,%r8d,%r12d
  3654. leal (%rdx,%r13,1),%edx
  3655. xorl %r9d,%r15d
  3656. rorxl $13,%r8d,%r14d
  3657. rorxl $2,%r8d,%r13d
  3658. leal (%r11,%rdx,1),%r11d
  3659. andl %r15d,%esi
  3660. vaesenc %xmm10,%xmm9,%xmm9
  3661. vmovdqu 64-128(%rdi),%xmm10
  3662. xorl %r12d,%r14d
  3663. xorl %r9d,%esi
  3664. xorl %r13d,%r14d
  3665. leal (%rdx,%rsi,1),%edx
  3666. movl %eax,%r12d
  3667. addl 36+16(%rbp),%ecx
  3668. andl %r11d,%r12d
  3669. rorxl $25,%r11d,%r13d
  3670. rorxl $11,%r11d,%esi
  3671. leal (%rdx,%r14,1),%edx
  3672. leal (%rcx,%r12,1),%ecx
  3673. andnl %ebx,%r11d,%r12d
  3674. xorl %esi,%r13d
  3675. rorxl $6,%r11d,%r14d
  3676. leal (%rcx,%r12,1),%ecx
  3677. xorl %r14d,%r13d
  3678. movl %edx,%esi
  3679. rorxl $22,%edx,%r12d
  3680. leal (%rcx,%r13,1),%ecx
  3681. xorl %r8d,%esi
  3682. rorxl $13,%edx,%r14d
  3683. rorxl $2,%edx,%r13d
  3684. leal (%r10,%rcx,1),%r10d
  3685. andl %esi,%r15d
  3686. vaesenc %xmm10,%xmm9,%xmm9
  3687. vmovdqu 80-128(%rdi),%xmm10
  3688. xorl %r12d,%r14d
  3689. xorl %r8d,%r15d
  3690. xorl %r13d,%r14d
  3691. leal (%rcx,%r15,1),%ecx
  3692. movl %r11d,%r12d
  3693. addl 40+16(%rbp),%ebx
  3694. andl %r10d,%r12d
  3695. rorxl $25,%r10d,%r13d
  3696. rorxl $11,%r10d,%r15d
  3697. leal (%rcx,%r14,1),%ecx
  3698. leal (%rbx,%r12,1),%ebx
  3699. andnl %eax,%r10d,%r12d
  3700. xorl %r15d,%r13d
  3701. rorxl $6,%r10d,%r14d
  3702. leal (%rbx,%r12,1),%ebx
  3703. xorl %r14d,%r13d
  3704. movl %ecx,%r15d
  3705. rorxl $22,%ecx,%r12d
  3706. leal (%rbx,%r13,1),%ebx
  3707. xorl %edx,%r15d
  3708. rorxl $13,%ecx,%r14d
  3709. rorxl $2,%ecx,%r13d
  3710. leal (%r9,%rbx,1),%r9d
  3711. andl %r15d,%esi
  3712. vaesenc %xmm10,%xmm9,%xmm9
  3713. vmovdqu 96-128(%rdi),%xmm10
  3714. xorl %r12d,%r14d
  3715. xorl %edx,%esi
  3716. xorl %r13d,%r14d
  3717. leal (%rbx,%rsi,1),%ebx
  3718. movl %r10d,%r12d
  3719. addl 44+16(%rbp),%eax
  3720. andl %r9d,%r12d
  3721. rorxl $25,%r9d,%r13d
  3722. rorxl $11,%r9d,%esi
  3723. leal (%rbx,%r14,1),%ebx
  3724. leal (%rax,%r12,1),%eax
  3725. andnl %r11d,%r9d,%r12d
  3726. xorl %esi,%r13d
  3727. rorxl $6,%r9d,%r14d
  3728. leal (%rax,%r12,1),%eax
  3729. xorl %r14d,%r13d
  3730. movl %ebx,%esi
  3731. rorxl $22,%ebx,%r12d
  3732. leal (%rax,%r13,1),%eax
  3733. xorl %ecx,%esi
  3734. rorxl $13,%ebx,%r14d
  3735. rorxl $2,%ebx,%r13d
  3736. leal (%r8,%rax,1),%r8d
  3737. andl %esi,%r15d
  3738. vaesenc %xmm10,%xmm9,%xmm9
  3739. vmovdqu 112-128(%rdi),%xmm10
  3740. xorl %r12d,%r14d
  3741. xorl %ecx,%r15d
  3742. xorl %r13d,%r14d
  3743. leal (%rax,%r15,1),%eax
  3744. movl %r9d,%r12d
  3745. leaq -64(%rbp),%rbp
  3746. addl 0+16(%rbp),%r11d
  3747. andl %r8d,%r12d
  3748. rorxl $25,%r8d,%r13d
  3749. rorxl $11,%r8d,%r15d
  3750. leal (%rax,%r14,1),%eax
  3751. leal (%r11,%r12,1),%r11d
  3752. andnl %r10d,%r8d,%r12d
  3753. xorl %r15d,%r13d
  3754. rorxl $6,%r8d,%r14d
  3755. leal (%r11,%r12,1),%r11d
  3756. xorl %r14d,%r13d
  3757. movl %eax,%r15d
  3758. rorxl $22,%eax,%r12d
  3759. leal (%r11,%r13,1),%r11d
  3760. xorl %ebx,%r15d
  3761. rorxl $13,%eax,%r14d
  3762. rorxl $2,%eax,%r13d
  3763. leal (%rdx,%r11,1),%edx
  3764. andl %r15d,%esi
  3765. vaesenc %xmm10,%xmm9,%xmm9
  3766. vmovdqu 128-128(%rdi),%xmm10
  3767. xorl %r12d,%r14d
  3768. xorl %ebx,%esi
  3769. xorl %r13d,%r14d
  3770. leal (%r11,%rsi,1),%r11d
  3771. movl %r8d,%r12d
  3772. addl 4+16(%rbp),%r10d
  3773. andl %edx,%r12d
  3774. rorxl $25,%edx,%r13d
  3775. rorxl $11,%edx,%esi
  3776. leal (%r11,%r14,1),%r11d
  3777. leal (%r10,%r12,1),%r10d
  3778. andnl %r9d,%edx,%r12d
  3779. xorl %esi,%r13d
  3780. rorxl $6,%edx,%r14d
  3781. leal (%r10,%r12,1),%r10d
  3782. xorl %r14d,%r13d
  3783. movl %r11d,%esi
  3784. rorxl $22,%r11d,%r12d
  3785. leal (%r10,%r13,1),%r10d
  3786. xorl %eax,%esi
  3787. rorxl $13,%r11d,%r14d
  3788. rorxl $2,%r11d,%r13d
  3789. leal (%rcx,%r10,1),%ecx
  3790. andl %esi,%r15d
  3791. vaesenc %xmm10,%xmm9,%xmm9
  3792. vmovdqu 144-128(%rdi),%xmm10
  3793. xorl %r12d,%r14d
  3794. xorl %eax,%r15d
  3795. xorl %r13d,%r14d
  3796. leal (%r10,%r15,1),%r10d
  3797. movl %edx,%r12d
  3798. addl 8+16(%rbp),%r9d
  3799. andl %ecx,%r12d
  3800. rorxl $25,%ecx,%r13d
  3801. rorxl $11,%ecx,%r15d
  3802. leal (%r10,%r14,1),%r10d
  3803. leal (%r9,%r12,1),%r9d
  3804. andnl %r8d,%ecx,%r12d
  3805. xorl %r15d,%r13d
  3806. rorxl $6,%ecx,%r14d
  3807. leal (%r9,%r12,1),%r9d
  3808. xorl %r14d,%r13d
  3809. movl %r10d,%r15d
  3810. rorxl $22,%r10d,%r12d
  3811. leal (%r9,%r13,1),%r9d
  3812. xorl %r11d,%r15d
  3813. rorxl $13,%r10d,%r14d
  3814. rorxl $2,%r10d,%r13d
  3815. leal (%rbx,%r9,1),%ebx
  3816. andl %r15d,%esi
  3817. vaesenc %xmm10,%xmm9,%xmm9
  3818. vmovdqu 160-128(%rdi),%xmm10
  3819. xorl %r12d,%r14d
  3820. xorl %r11d,%esi
  3821. xorl %r13d,%r14d
  3822. leal (%r9,%rsi,1),%r9d
  3823. movl %ecx,%r12d
  3824. addl 12+16(%rbp),%r8d
  3825. andl %ebx,%r12d
  3826. rorxl $25,%ebx,%r13d
  3827. rorxl $11,%ebx,%esi
  3828. leal (%r9,%r14,1),%r9d
  3829. leal (%r8,%r12,1),%r8d
  3830. andnl %edx,%ebx,%r12d
  3831. xorl %esi,%r13d
  3832. rorxl $6,%ebx,%r14d
  3833. leal (%r8,%r12,1),%r8d
  3834. xorl %r14d,%r13d
  3835. movl %r9d,%esi
  3836. rorxl $22,%r9d,%r12d
  3837. leal (%r8,%r13,1),%r8d
  3838. xorl %r10d,%esi
  3839. rorxl $13,%r9d,%r14d
  3840. rorxl $2,%r9d,%r13d
  3841. leal (%rax,%r8,1),%eax
  3842. andl %esi,%r15d
  3843. vaesenclast %xmm10,%xmm9,%xmm11
  3844. vaesenc %xmm10,%xmm9,%xmm9
  3845. vmovdqu 176-128(%rdi),%xmm10
  3846. xorl %r12d,%r14d
  3847. xorl %r10d,%r15d
  3848. xorl %r13d,%r14d
  3849. leal (%r8,%r15,1),%r8d
  3850. movl %ebx,%r12d
  3851. addl 32+16(%rbp),%edx
  3852. andl %eax,%r12d
  3853. rorxl $25,%eax,%r13d
  3854. rorxl $11,%eax,%r15d
  3855. leal (%r8,%r14,1),%r8d
  3856. leal (%rdx,%r12,1),%edx
  3857. andnl %ecx,%eax,%r12d
  3858. xorl %r15d,%r13d
  3859. rorxl $6,%eax,%r14d
  3860. leal (%rdx,%r12,1),%edx
  3861. xorl %r14d,%r13d
  3862. movl %r8d,%r15d
  3863. rorxl $22,%r8d,%r12d
  3864. leal (%rdx,%r13,1),%edx
  3865. xorl %r9d,%r15d
  3866. rorxl $13,%r8d,%r14d
  3867. rorxl $2,%r8d,%r13d
  3868. leal (%r11,%rdx,1),%r11d
  3869. andl %r15d,%esi
  3870. vpand %xmm12,%xmm11,%xmm8
  3871. vaesenc %xmm10,%xmm9,%xmm9
  3872. vmovdqu 192-128(%rdi),%xmm10
  3873. xorl %r12d,%r14d
  3874. xorl %r9d,%esi
  3875. xorl %r13d,%r14d
  3876. leal (%rdx,%rsi,1),%edx
  3877. movl %eax,%r12d
  3878. addl 36+16(%rbp),%ecx
  3879. andl %r11d,%r12d
  3880. rorxl $25,%r11d,%r13d
  3881. rorxl $11,%r11d,%esi
  3882. leal (%rdx,%r14,1),%edx
  3883. leal (%rcx,%r12,1),%ecx
  3884. andnl %ebx,%r11d,%r12d
  3885. xorl %esi,%r13d
  3886. rorxl $6,%r11d,%r14d
  3887. leal (%rcx,%r12,1),%ecx
  3888. xorl %r14d,%r13d
  3889. movl %edx,%esi
  3890. rorxl $22,%edx,%r12d
  3891. leal (%rcx,%r13,1),%ecx
  3892. xorl %r8d,%esi
  3893. rorxl $13,%edx,%r14d
  3894. rorxl $2,%edx,%r13d
  3895. leal (%r10,%rcx,1),%r10d
  3896. andl %esi,%r15d
  3897. vaesenclast %xmm10,%xmm9,%xmm11
  3898. vaesenc %xmm10,%xmm9,%xmm9
  3899. vmovdqu 208-128(%rdi),%xmm10
  3900. xorl %r12d,%r14d
  3901. xorl %r8d,%r15d
  3902. xorl %r13d,%r14d
  3903. leal (%rcx,%r15,1),%ecx
  3904. movl %r11d,%r12d
  3905. addl 40+16(%rbp),%ebx
  3906. andl %r10d,%r12d
  3907. rorxl $25,%r10d,%r13d
  3908. rorxl $11,%r10d,%r15d
  3909. leal (%rcx,%r14,1),%ecx
  3910. leal (%rbx,%r12,1),%ebx
  3911. andnl %eax,%r10d,%r12d
  3912. xorl %r15d,%r13d
  3913. rorxl $6,%r10d,%r14d
  3914. leal (%rbx,%r12,1),%ebx
  3915. xorl %r14d,%r13d
  3916. movl %ecx,%r15d
  3917. rorxl $22,%ecx,%r12d
  3918. leal (%rbx,%r13,1),%ebx
  3919. xorl %edx,%r15d
  3920. rorxl $13,%ecx,%r14d
  3921. rorxl $2,%ecx,%r13d
  3922. leal (%r9,%rbx,1),%r9d
  3923. andl %r15d,%esi
  3924. vpand %xmm13,%xmm11,%xmm11
  3925. vaesenc %xmm10,%xmm9,%xmm9
  3926. vmovdqu 224-128(%rdi),%xmm10
  3927. xorl %r12d,%r14d
  3928. xorl %edx,%esi
  3929. xorl %r13d,%r14d
  3930. leal (%rbx,%rsi,1),%ebx
  3931. movl %r10d,%r12d
  3932. addl 44+16(%rbp),%eax
  3933. andl %r9d,%r12d
  3934. rorxl $25,%r9d,%r13d
  3935. rorxl $11,%r9d,%esi
  3936. leal (%rbx,%r14,1),%ebx
  3937. leal (%rax,%r12,1),%eax
  3938. andnl %r11d,%r9d,%r12d
  3939. xorl %esi,%r13d
  3940. rorxl $6,%r9d,%r14d
  3941. leal (%rax,%r12,1),%eax
  3942. xorl %r14d,%r13d
  3943. movl %ebx,%esi
  3944. rorxl $22,%ebx,%r12d
  3945. leal (%rax,%r13,1),%eax
  3946. xorl %ecx,%esi
  3947. rorxl $13,%ebx,%r14d
  3948. rorxl $2,%ebx,%r13d
  3949. leal (%r8,%rax,1),%r8d
  3950. andl %esi,%r15d
  3951. vpor %xmm11,%xmm8,%xmm8
  3952. vaesenclast %xmm10,%xmm9,%xmm11
  3953. vmovdqu 0-128(%rdi),%xmm10
  3954. xorl %r12d,%r14d
  3955. xorl %ecx,%r15d
  3956. xorl %r13d,%r14d
  3957. leal (%rax,%r15,1),%eax
  3958. movl %r9d,%r12d
  3959. vmovq %xmm15,%r13
  3960. vpextrq $1,%xmm15,%r15
  3961. vpand %xmm14,%xmm11,%xmm11
  3962. vpor %xmm11,%xmm8,%xmm8
  3963. leaq -64(%rbp),%rbp
  3964. vmovdqu %xmm8,(%r15,%r13,1)
  3965. leaq 16(%r13),%r13
  3966. cmpq %rsp,%rbp
  3967. jae .Lower_avx2
  3968. movq 552(%rsp),%r15
  3969. leaq 64(%r13),%r13
  3970. movq 560(%rsp),%rsi
  3971. addl %r14d,%eax
  3972. leaq 448(%rsp),%rsp
  3973. addl 0(%r15),%eax
  3974. addl 4(%r15),%ebx
  3975. addl 8(%r15),%ecx
  3976. addl 12(%r15),%edx
  3977. addl 16(%r15),%r8d
  3978. addl 20(%r15),%r9d
  3979. addl 24(%r15),%r10d
  3980. leaq (%rsi,%r13,1),%r12
  3981. addl 28(%r15),%r11d
  3982. cmpq 64+16(%rsp),%r13
  3983. movl %eax,0(%r15)
  3984. cmoveq %rsp,%r12
  3985. movl %ebx,4(%r15)
  3986. movl %ecx,8(%r15)
  3987. movl %edx,12(%r15)
  3988. movl %r8d,16(%r15)
  3989. movl %r9d,20(%r15)
  3990. movl %r10d,24(%r15)
  3991. movl %r11d,28(%r15)
  3992. jbe .Loop_avx2
  3993. leaq (%rsp),%rbp
  3994. .cfi_escape 0x0f,0x06,0x76,0xf8,0x00,0x06,0x23,0x08
  3995. .Ldone_avx2:
  3996. movq 64+32(%rbp),%r8
  3997. movq 64+56(%rbp),%rsi
  3998. .cfi_def_cfa %rsi,8
  3999. vmovdqu %xmm8,(%r8)
  4000. vzeroall
  4001. movq -48(%rsi),%r15
  4002. .cfi_restore %r15
  4003. movq -40(%rsi),%r14
  4004. .cfi_restore %r14
  4005. movq -32(%rsi),%r13
  4006. .cfi_restore %r13
  4007. movq -24(%rsi),%r12
  4008. .cfi_restore %r12
  4009. movq -16(%rsi),%rbp
  4010. .cfi_restore %rbp
  4011. movq -8(%rsi),%rbx
  4012. .cfi_restore %rbx
  4013. leaq (%rsi),%rsp
  4014. .cfi_def_cfa_register %rsp
  4015. .Lepilogue_avx2:
  4016. .byte 0xf3,0xc3
  4017. .cfi_endproc
  4018. .size aesni_cbc_sha256_enc_avx2,.-aesni_cbc_sha256_enc_avx2
  4019. .type aesni_cbc_sha256_enc_shaext,@function
  4020. .align 32
  4021. aesni_cbc_sha256_enc_shaext:
  4022. .cfi_startproc
  4023. movq 8(%rsp),%r10
  4024. leaq K256+128(%rip),%rax
  4025. movdqu (%r9),%xmm1
  4026. movdqu 16(%r9),%xmm2
  4027. movdqa 512-128(%rax),%xmm3
  4028. movl 240(%rcx),%r11d
  4029. subq %rdi,%rsi
  4030. movups (%rcx),%xmm15
  4031. movups (%r8),%xmm6
  4032. movups 16(%rcx),%xmm4
  4033. leaq 112(%rcx),%rcx
  4034. pshufd $0x1b,%xmm1,%xmm0
  4035. pshufd $0xb1,%xmm1,%xmm1
  4036. pshufd $0x1b,%xmm2,%xmm2
  4037. movdqa %xmm3,%xmm7
  4038. .byte 102,15,58,15,202,8
  4039. punpcklqdq %xmm0,%xmm2
  4040. jmp .Loop_shaext
  4041. .align 16
  4042. .Loop_shaext:
  4043. movdqu (%r10),%xmm10
  4044. movdqu 16(%r10),%xmm11
  4045. movdqu 32(%r10),%xmm12
  4046. .byte 102,68,15,56,0,211
  4047. movdqu 48(%r10),%xmm13
  4048. movdqa 0-128(%rax),%xmm0
  4049. paddd %xmm10,%xmm0
  4050. .byte 102,68,15,56,0,219
  4051. movdqa %xmm2,%xmm9
  4052. movdqa %xmm1,%xmm8
  4053. movups 0(%rdi),%xmm14
  4054. xorps %xmm15,%xmm14
  4055. xorps %xmm14,%xmm6
  4056. movups -80(%rcx),%xmm5
  4057. aesenc %xmm4,%xmm6
  4058. .byte 15,56,203,209
  4059. pshufd $0x0e,%xmm0,%xmm0
  4060. movups -64(%rcx),%xmm4
  4061. aesenc %xmm5,%xmm6
  4062. .byte 15,56,203,202
  4063. movdqa 32-128(%rax),%xmm0
  4064. paddd %xmm11,%xmm0
  4065. .byte 102,68,15,56,0,227
  4066. leaq 64(%r10),%r10
  4067. movups -48(%rcx),%xmm5
  4068. aesenc %xmm4,%xmm6
  4069. .byte 15,56,203,209
  4070. pshufd $0x0e,%xmm0,%xmm0
  4071. movups -32(%rcx),%xmm4
  4072. aesenc %xmm5,%xmm6
  4073. .byte 15,56,203,202
  4074. movdqa 64-128(%rax),%xmm0
  4075. paddd %xmm12,%xmm0
  4076. .byte 102,68,15,56,0,235
  4077. .byte 69,15,56,204,211
  4078. movups -16(%rcx),%xmm5
  4079. aesenc %xmm4,%xmm6
  4080. .byte 15,56,203,209
  4081. pshufd $0x0e,%xmm0,%xmm0
  4082. movdqa %xmm13,%xmm3
  4083. .byte 102,65,15,58,15,220,4
  4084. paddd %xmm3,%xmm10
  4085. movups 0(%rcx),%xmm4
  4086. aesenc %xmm5,%xmm6
  4087. .byte 15,56,203,202
  4088. movdqa 96-128(%rax),%xmm0
  4089. paddd %xmm13,%xmm0
  4090. .byte 69,15,56,205,213
  4091. .byte 69,15,56,204,220
  4092. movups 16(%rcx),%xmm5
  4093. aesenc %xmm4,%xmm6
  4094. .byte 15,56,203,209
  4095. pshufd $0x0e,%xmm0,%xmm0
  4096. movups 32(%rcx),%xmm4
  4097. aesenc %xmm5,%xmm6
  4098. movdqa %xmm10,%xmm3
  4099. .byte 102,65,15,58,15,221,4
  4100. paddd %xmm3,%xmm11
  4101. .byte 15,56,203,202
  4102. movdqa 128-128(%rax),%xmm0
  4103. paddd %xmm10,%xmm0
  4104. .byte 69,15,56,205,218
  4105. .byte 69,15,56,204,229
  4106. movups 48(%rcx),%xmm5
  4107. aesenc %xmm4,%xmm6
  4108. .byte 15,56,203,209
  4109. pshufd $0x0e,%xmm0,%xmm0
  4110. movdqa %xmm11,%xmm3
  4111. .byte 102,65,15,58,15,218,4
  4112. paddd %xmm3,%xmm12
  4113. cmpl $11,%r11d
  4114. jb .Laesenclast1
  4115. movups 64(%rcx),%xmm4
  4116. aesenc %xmm5,%xmm6
  4117. movups 80(%rcx),%xmm5
  4118. aesenc %xmm4,%xmm6
  4119. je .Laesenclast1
  4120. movups 96(%rcx),%xmm4
  4121. aesenc %xmm5,%xmm6
  4122. movups 112(%rcx),%xmm5
  4123. aesenc %xmm4,%xmm6
  4124. .Laesenclast1:
  4125. aesenclast %xmm5,%xmm6
  4126. movups 16-112(%rcx),%xmm4
  4127. nop
  4128. .byte 15,56,203,202
  4129. movups 16(%rdi),%xmm14
  4130. xorps %xmm15,%xmm14
  4131. movups %xmm6,0(%rsi,%rdi,1)
  4132. xorps %xmm14,%xmm6
  4133. movups -80(%rcx),%xmm5
  4134. aesenc %xmm4,%xmm6
  4135. movdqa 160-128(%rax),%xmm0
  4136. paddd %xmm11,%xmm0
  4137. .byte 69,15,56,205,227
  4138. .byte 69,15,56,204,234
  4139. movups -64(%rcx),%xmm4
  4140. aesenc %xmm5,%xmm6
  4141. .byte 15,56,203,209
  4142. pshufd $0x0e,%xmm0,%xmm0
  4143. movdqa %xmm12,%xmm3
  4144. .byte 102,65,15,58,15,219,4
  4145. paddd %xmm3,%xmm13
  4146. movups -48(%rcx),%xmm5
  4147. aesenc %xmm4,%xmm6
  4148. .byte 15,56,203,202
  4149. movdqa 192-128(%rax),%xmm0
  4150. paddd %xmm12,%xmm0
  4151. .byte 69,15,56,205,236
  4152. .byte 69,15,56,204,211
  4153. movups -32(%rcx),%xmm4
  4154. aesenc %xmm5,%xmm6
  4155. .byte 15,56,203,209
  4156. pshufd $0x0e,%xmm0,%xmm0
  4157. movdqa %xmm13,%xmm3
  4158. .byte 102,65,15,58,15,220,4
  4159. paddd %xmm3,%xmm10
  4160. movups -16(%rcx),%xmm5
  4161. aesenc %xmm4,%xmm6
  4162. .byte 15,56,203,202
  4163. movdqa 224-128(%rax),%xmm0
  4164. paddd %xmm13,%xmm0
  4165. .byte 69,15,56,205,213
  4166. .byte 69,15,56,204,220
  4167. movups 0(%rcx),%xmm4
  4168. aesenc %xmm5,%xmm6
  4169. .byte 15,56,203,209
  4170. pshufd $0x0e,%xmm0,%xmm0
  4171. movdqa %xmm10,%xmm3
  4172. .byte 102,65,15,58,15,221,4
  4173. paddd %xmm3,%xmm11
  4174. movups 16(%rcx),%xmm5
  4175. aesenc %xmm4,%xmm6
  4176. .byte 15,56,203,202
  4177. movdqa 256-128(%rax),%xmm0
  4178. paddd %xmm10,%xmm0
  4179. .byte 69,15,56,205,218
  4180. .byte 69,15,56,204,229
  4181. movups 32(%rcx),%xmm4
  4182. aesenc %xmm5,%xmm6
  4183. .byte 15,56,203,209
  4184. pshufd $0x0e,%xmm0,%xmm0
  4185. movdqa %xmm11,%xmm3
  4186. .byte 102,65,15,58,15,218,4
  4187. paddd %xmm3,%xmm12
  4188. movups 48(%rcx),%xmm5
  4189. aesenc %xmm4,%xmm6
  4190. cmpl $11,%r11d
  4191. jb .Laesenclast2
  4192. movups 64(%rcx),%xmm4
  4193. aesenc %xmm5,%xmm6
  4194. movups 80(%rcx),%xmm5
  4195. aesenc %xmm4,%xmm6
  4196. je .Laesenclast2
  4197. movups 96(%rcx),%xmm4
  4198. aesenc %xmm5,%xmm6
  4199. movups 112(%rcx),%xmm5
  4200. aesenc %xmm4,%xmm6
  4201. .Laesenclast2:
  4202. aesenclast %xmm5,%xmm6
  4203. movups 16-112(%rcx),%xmm4
  4204. nop
  4205. .byte 15,56,203,202
  4206. movups 32(%rdi),%xmm14
  4207. xorps %xmm15,%xmm14
  4208. movups %xmm6,16(%rsi,%rdi,1)
  4209. xorps %xmm14,%xmm6
  4210. movups -80(%rcx),%xmm5
  4211. aesenc %xmm4,%xmm6
  4212. movdqa 288-128(%rax),%xmm0
  4213. paddd %xmm11,%xmm0
  4214. .byte 69,15,56,205,227
  4215. .byte 69,15,56,204,234
  4216. movups -64(%rcx),%xmm4
  4217. aesenc %xmm5,%xmm6
  4218. .byte 15,56,203,209
  4219. pshufd $0x0e,%xmm0,%xmm0
  4220. movdqa %xmm12,%xmm3
  4221. .byte 102,65,15,58,15,219,4
  4222. paddd %xmm3,%xmm13
  4223. movups -48(%rcx),%xmm5
  4224. aesenc %xmm4,%xmm6
  4225. .byte 15,56,203,202
  4226. movdqa 320-128(%rax),%xmm0
  4227. paddd %xmm12,%xmm0
  4228. .byte 69,15,56,205,236
  4229. .byte 69,15,56,204,211
  4230. movups -32(%rcx),%xmm4
  4231. aesenc %xmm5,%xmm6
  4232. .byte 15,56,203,209
  4233. pshufd $0x0e,%xmm0,%xmm0
  4234. movdqa %xmm13,%xmm3
  4235. .byte 102,65,15,58,15,220,4
  4236. paddd %xmm3,%xmm10
  4237. movups -16(%rcx),%xmm5
  4238. aesenc %xmm4,%xmm6
  4239. .byte 15,56,203,202
  4240. movdqa 352-128(%rax),%xmm0
  4241. paddd %xmm13,%xmm0
  4242. .byte 69,15,56,205,213
  4243. .byte 69,15,56,204,220
  4244. movups 0(%rcx),%xmm4
  4245. aesenc %xmm5,%xmm6
  4246. .byte 15,56,203,209
  4247. pshufd $0x0e,%xmm0,%xmm0
  4248. movdqa %xmm10,%xmm3
  4249. .byte 102,65,15,58,15,221,4
  4250. paddd %xmm3,%xmm11
  4251. movups 16(%rcx),%xmm5
  4252. aesenc %xmm4,%xmm6
  4253. .byte 15,56,203,202
  4254. movdqa 384-128(%rax),%xmm0
  4255. paddd %xmm10,%xmm0
  4256. .byte 69,15,56,205,218
  4257. .byte 69,15,56,204,229
  4258. movups 32(%rcx),%xmm4
  4259. aesenc %xmm5,%xmm6
  4260. .byte 15,56,203,209
  4261. pshufd $0x0e,%xmm0,%xmm0
  4262. movdqa %xmm11,%xmm3
  4263. .byte 102,65,15,58,15,218,4
  4264. paddd %xmm3,%xmm12
  4265. movups 48(%rcx),%xmm5
  4266. aesenc %xmm4,%xmm6
  4267. .byte 15,56,203,202
  4268. movdqa 416-128(%rax),%xmm0
  4269. paddd %xmm11,%xmm0
  4270. .byte 69,15,56,205,227
  4271. .byte 69,15,56,204,234
  4272. cmpl $11,%r11d
  4273. jb .Laesenclast3
  4274. movups 64(%rcx),%xmm4
  4275. aesenc %xmm5,%xmm6
  4276. movups 80(%rcx),%xmm5
  4277. aesenc %xmm4,%xmm6
  4278. je .Laesenclast3
  4279. movups 96(%rcx),%xmm4
  4280. aesenc %xmm5,%xmm6
  4281. movups 112(%rcx),%xmm5
  4282. aesenc %xmm4,%xmm6
  4283. .Laesenclast3:
  4284. aesenclast %xmm5,%xmm6
  4285. movups 16-112(%rcx),%xmm4
  4286. nop
  4287. .byte 15,56,203,209
  4288. pshufd $0x0e,%xmm0,%xmm0
  4289. movdqa %xmm12,%xmm3
  4290. .byte 102,65,15,58,15,219,4
  4291. paddd %xmm3,%xmm13
  4292. movups 48(%rdi),%xmm14
  4293. xorps %xmm15,%xmm14
  4294. movups %xmm6,32(%rsi,%rdi,1)
  4295. xorps %xmm14,%xmm6
  4296. movups -80(%rcx),%xmm5
  4297. aesenc %xmm4,%xmm6
  4298. movups -64(%rcx),%xmm4
  4299. aesenc %xmm5,%xmm6
  4300. .byte 15,56,203,202
  4301. movdqa 448-128(%rax),%xmm0
  4302. paddd %xmm12,%xmm0
  4303. .byte 69,15,56,205,236
  4304. movdqa %xmm7,%xmm3
  4305. movups -48(%rcx),%xmm5
  4306. aesenc %xmm4,%xmm6
  4307. .byte 15,56,203,209
  4308. pshufd $0x0e,%xmm0,%xmm0
  4309. movups -32(%rcx),%xmm4
  4310. aesenc %xmm5,%xmm6
  4311. .byte 15,56,203,202
  4312. movdqa 480-128(%rax),%xmm0
  4313. paddd %xmm13,%xmm0
  4314. movups -16(%rcx),%xmm5
  4315. aesenc %xmm4,%xmm6
  4316. movups 0(%rcx),%xmm4
  4317. aesenc %xmm5,%xmm6
  4318. .byte 15,56,203,209
  4319. pshufd $0x0e,%xmm0,%xmm0
  4320. movups 16(%rcx),%xmm5
  4321. aesenc %xmm4,%xmm6
  4322. .byte 15,56,203,202
  4323. movups 32(%rcx),%xmm4
  4324. aesenc %xmm5,%xmm6
  4325. movups 48(%rcx),%xmm5
  4326. aesenc %xmm4,%xmm6
  4327. cmpl $11,%r11d
  4328. jb .Laesenclast4
  4329. movups 64(%rcx),%xmm4
  4330. aesenc %xmm5,%xmm6
  4331. movups 80(%rcx),%xmm5
  4332. aesenc %xmm4,%xmm6
  4333. je .Laesenclast4
  4334. movups 96(%rcx),%xmm4
  4335. aesenc %xmm5,%xmm6
  4336. movups 112(%rcx),%xmm5
  4337. aesenc %xmm4,%xmm6
  4338. .Laesenclast4:
  4339. aesenclast %xmm5,%xmm6
  4340. movups 16-112(%rcx),%xmm4
  4341. nop
  4342. paddd %xmm9,%xmm2
  4343. paddd %xmm8,%xmm1
  4344. decq %rdx
  4345. movups %xmm6,48(%rsi,%rdi,1)
  4346. leaq 64(%rdi),%rdi
  4347. jnz .Loop_shaext
  4348. pshufd $0xb1,%xmm2,%xmm2
  4349. pshufd $0x1b,%xmm1,%xmm3
  4350. pshufd $0xb1,%xmm1,%xmm1
  4351. punpckhqdq %xmm2,%xmm1
  4352. .byte 102,15,58,15,211,8
  4353. movups %xmm6,(%r8)
  4354. movdqu %xmm1,(%r9)
  4355. movdqu %xmm2,16(%r9)
  4356. .byte 0xf3,0xc3
  4357. .cfi_endproc
  4358. .size aesni_cbc_sha256_enc_shaext,.-aesni_cbc_sha256_enc_shaext