aesni-sha256-x86_64.s 87 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435
  1. .text
  2. .globl _aesni_cbc_sha256_enc
  3. .p2align 4
  4. _aesni_cbc_sha256_enc:
  5. leaq _OPENSSL_ia32cap_P(%rip),%r11
  6. movl $1,%eax
  7. cmpq $0,%rdi
  8. je L$probe
  9. movl 0(%r11),%eax
  10. movq 4(%r11),%r10
  11. btq $61,%r10
  12. jc aesni_cbc_sha256_enc_shaext
  13. movq %r10,%r11
  14. shrq $32,%r11
  15. testl $2048,%r10d
  16. jnz aesni_cbc_sha256_enc_xop
  17. andl $296,%r11d
  18. cmpl $296,%r11d
  19. je aesni_cbc_sha256_enc_avx2
  20. andl $268435456,%r10d
  21. jnz aesni_cbc_sha256_enc_avx
  22. ud2
  23. xorl %eax,%eax
  24. cmpq $0,%rdi
  25. je L$probe
  26. ud2
  27. L$probe:
  28. .byte 0xf3,0xc3
  29. .p2align 6
  30. K256:
  31. .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
  32. .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
  33. .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
  34. .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
  35. .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
  36. .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
  37. .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
  38. .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
  39. .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
  40. .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
  41. .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
  42. .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
  43. .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
  44. .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
  45. .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
  46. .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
  47. .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
  48. .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
  49. .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
  50. .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
  51. .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
  52. .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
  53. .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
  54. .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
  55. .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
  56. .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
  57. .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
  58. .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
  59. .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
  60. .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
  61. .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  62. .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
  63. .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
  64. .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
  65. .long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1
  66. .long 0,0,0,0, 0,0,0,0
  67. .byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
  68. .p2align 6
  69. .p2align 6
  70. aesni_cbc_sha256_enc_xop:
  71. L$xop_shortcut:
  72. movq 8(%rsp),%r10
  73. movq %rsp,%rax
  74. pushq %rbx
  75. pushq %rbp
  76. pushq %r12
  77. pushq %r13
  78. pushq %r14
  79. pushq %r15
  80. subq $128,%rsp
  81. andq $-64,%rsp
  82. shlq $6,%rdx
  83. subq %rdi,%rsi
  84. subq %rdi,%r10
  85. addq %rdi,%rdx
  86. movq %rsi,64+8(%rsp)
  87. movq %rdx,64+16(%rsp)
  88. movq %r8,64+32(%rsp)
  89. movq %r9,64+40(%rsp)
  90. movq %r10,64+48(%rsp)
  91. movq %rax,120(%rsp)
  92. L$prologue_xop:
  93. vzeroall
  94. movq %rdi,%r12
  95. leaq 128(%rcx),%rdi
  96. leaq K256+544(%rip),%r13
  97. movl 240-128(%rdi),%r14d
  98. movq %r9,%r15
  99. movq %r10,%rsi
  100. vmovdqu (%r8),%xmm8
  101. subq $9,%r14
  102. movl 0(%r15),%eax
  103. movl 4(%r15),%ebx
  104. movl 8(%r15),%ecx
  105. movl 12(%r15),%edx
  106. movl 16(%r15),%r8d
  107. movl 20(%r15),%r9d
  108. movl 24(%r15),%r10d
  109. movl 28(%r15),%r11d
  110. vmovdqa 0(%r13,%r14,8),%xmm14
  111. vmovdqa 16(%r13,%r14,8),%xmm13
  112. vmovdqa 32(%r13,%r14,8),%xmm12
  113. vmovdqu 0-128(%rdi),%xmm10
  114. jmp L$loop_xop
  115. .p2align 4
  116. L$loop_xop:
  117. vmovdqa K256+512(%rip),%xmm7
  118. vmovdqu 0(%rsi,%r12,1),%xmm0
  119. vmovdqu 16(%rsi,%r12,1),%xmm1
  120. vmovdqu 32(%rsi,%r12,1),%xmm2
  121. vmovdqu 48(%rsi,%r12,1),%xmm3
  122. vpshufb %xmm7,%xmm0,%xmm0
  123. leaq K256(%rip),%rbp
  124. vpshufb %xmm7,%xmm1,%xmm1
  125. vpshufb %xmm7,%xmm2,%xmm2
  126. vpaddd 0(%rbp),%xmm0,%xmm4
  127. vpshufb %xmm7,%xmm3,%xmm3
  128. vpaddd 32(%rbp),%xmm1,%xmm5
  129. vpaddd 64(%rbp),%xmm2,%xmm6
  130. vpaddd 96(%rbp),%xmm3,%xmm7
  131. vmovdqa %xmm4,0(%rsp)
  132. movl %eax,%r14d
  133. vmovdqa %xmm5,16(%rsp)
  134. movl %ebx,%esi
  135. vmovdqa %xmm6,32(%rsp)
  136. xorl %ecx,%esi
  137. vmovdqa %xmm7,48(%rsp)
  138. movl %r8d,%r13d
  139. jmp L$xop_00_47
  140. .p2align 4
  141. L$xop_00_47:
  142. subq $-32*4,%rbp
  143. vmovdqu (%r12),%xmm9
  144. movq %r12,64+0(%rsp)
  145. vpalignr $4,%xmm0,%xmm1,%xmm4
  146. rorl $14,%r13d
  147. movl %r14d,%eax
  148. vpalignr $4,%xmm2,%xmm3,%xmm7
  149. movl %r9d,%r12d
  150. xorl %r8d,%r13d
  151. .byte 143,232,120,194,236,14
  152. rorl $9,%r14d
  153. xorl %r10d,%r12d
  154. vpsrld $3,%xmm4,%xmm4
  155. rorl $5,%r13d
  156. xorl %eax,%r14d
  157. vpaddd %xmm7,%xmm0,%xmm0
  158. andl %r8d,%r12d
  159. vpxor %xmm10,%xmm9,%xmm9
  160. vmovdqu 16-128(%rdi),%xmm10
  161. xorl %r8d,%r13d
  162. addl 0(%rsp),%r11d
  163. movl %eax,%r15d
  164. .byte 143,232,120,194,245,11
  165. rorl $11,%r14d
  166. xorl %r10d,%r12d
  167. vpxor %xmm5,%xmm4,%xmm4
  168. xorl %ebx,%r15d
  169. rorl $6,%r13d
  170. addl %r12d,%r11d
  171. andl %r15d,%esi
  172. .byte 143,232,120,194,251,13
  173. xorl %eax,%r14d
  174. addl %r13d,%r11d
  175. vpxor %xmm6,%xmm4,%xmm4
  176. xorl %ebx,%esi
  177. addl %r11d,%edx
  178. vpsrld $10,%xmm3,%xmm6
  179. rorl $2,%r14d
  180. addl %esi,%r11d
  181. vpaddd %xmm4,%xmm0,%xmm0
  182. movl %edx,%r13d
  183. addl %r11d,%r14d
  184. .byte 143,232,120,194,239,2
  185. rorl $14,%r13d
  186. movl %r14d,%r11d
  187. vpxor %xmm6,%xmm7,%xmm7
  188. movl %r8d,%r12d
  189. xorl %edx,%r13d
  190. rorl $9,%r14d
  191. xorl %r9d,%r12d
  192. vpxor %xmm5,%xmm7,%xmm7
  193. rorl $5,%r13d
  194. xorl %r11d,%r14d
  195. andl %edx,%r12d
  196. vpxor %xmm8,%xmm9,%xmm9
  197. xorl %edx,%r13d
  198. vpsrldq $8,%xmm7,%xmm7
  199. addl 4(%rsp),%r10d
  200. movl %r11d,%esi
  201. rorl $11,%r14d
  202. xorl %r9d,%r12d
  203. vpaddd %xmm7,%xmm0,%xmm0
  204. xorl %eax,%esi
  205. rorl $6,%r13d
  206. addl %r12d,%r10d
  207. andl %esi,%r15d
  208. .byte 143,232,120,194,248,13
  209. xorl %r11d,%r14d
  210. addl %r13d,%r10d
  211. vpsrld $10,%xmm0,%xmm6
  212. xorl %eax,%r15d
  213. addl %r10d,%ecx
  214. .byte 143,232,120,194,239,2
  215. rorl $2,%r14d
  216. addl %r15d,%r10d
  217. vpxor %xmm6,%xmm7,%xmm7
  218. movl %ecx,%r13d
  219. addl %r10d,%r14d
  220. rorl $14,%r13d
  221. movl %r14d,%r10d
  222. vpxor %xmm5,%xmm7,%xmm7
  223. movl %edx,%r12d
  224. xorl %ecx,%r13d
  225. rorl $9,%r14d
  226. xorl %r8d,%r12d
  227. vpslldq $8,%xmm7,%xmm7
  228. rorl $5,%r13d
  229. xorl %r10d,%r14d
  230. andl %ecx,%r12d
  231. vaesenc %xmm10,%xmm9,%xmm9
  232. vmovdqu 32-128(%rdi),%xmm10
  233. xorl %ecx,%r13d
  234. vpaddd %xmm7,%xmm0,%xmm0
  235. addl 8(%rsp),%r9d
  236. movl %r10d,%r15d
  237. rorl $11,%r14d
  238. xorl %r8d,%r12d
  239. vpaddd 0(%rbp),%xmm0,%xmm6
  240. xorl %r11d,%r15d
  241. rorl $6,%r13d
  242. addl %r12d,%r9d
  243. andl %r15d,%esi
  244. xorl %r10d,%r14d
  245. addl %r13d,%r9d
  246. xorl %r11d,%esi
  247. addl %r9d,%ebx
  248. rorl $2,%r14d
  249. addl %esi,%r9d
  250. movl %ebx,%r13d
  251. addl %r9d,%r14d
  252. rorl $14,%r13d
  253. movl %r14d,%r9d
  254. movl %ecx,%r12d
  255. xorl %ebx,%r13d
  256. rorl $9,%r14d
  257. xorl %edx,%r12d
  258. rorl $5,%r13d
  259. xorl %r9d,%r14d
  260. andl %ebx,%r12d
  261. vaesenc %xmm10,%xmm9,%xmm9
  262. vmovdqu 48-128(%rdi),%xmm10
  263. xorl %ebx,%r13d
  264. addl 12(%rsp),%r8d
  265. movl %r9d,%esi
  266. rorl $11,%r14d
  267. xorl %edx,%r12d
  268. xorl %r10d,%esi
  269. rorl $6,%r13d
  270. addl %r12d,%r8d
  271. andl %esi,%r15d
  272. xorl %r9d,%r14d
  273. addl %r13d,%r8d
  274. xorl %r10d,%r15d
  275. addl %r8d,%eax
  276. rorl $2,%r14d
  277. addl %r15d,%r8d
  278. movl %eax,%r13d
  279. addl %r8d,%r14d
  280. vmovdqa %xmm6,0(%rsp)
  281. vpalignr $4,%xmm1,%xmm2,%xmm4
  282. rorl $14,%r13d
  283. movl %r14d,%r8d
  284. vpalignr $4,%xmm3,%xmm0,%xmm7
  285. movl %ebx,%r12d
  286. xorl %eax,%r13d
  287. .byte 143,232,120,194,236,14
  288. rorl $9,%r14d
  289. xorl %ecx,%r12d
  290. vpsrld $3,%xmm4,%xmm4
  291. rorl $5,%r13d
  292. xorl %r8d,%r14d
  293. vpaddd %xmm7,%xmm1,%xmm1
  294. andl %eax,%r12d
  295. vaesenc %xmm10,%xmm9,%xmm9
  296. vmovdqu 64-128(%rdi),%xmm10
  297. xorl %eax,%r13d
  298. addl 16(%rsp),%edx
  299. movl %r8d,%r15d
  300. .byte 143,232,120,194,245,11
  301. rorl $11,%r14d
  302. xorl %ecx,%r12d
  303. vpxor %xmm5,%xmm4,%xmm4
  304. xorl %r9d,%r15d
  305. rorl $6,%r13d
  306. addl %r12d,%edx
  307. andl %r15d,%esi
  308. .byte 143,232,120,194,248,13
  309. xorl %r8d,%r14d
  310. addl %r13d,%edx
  311. vpxor %xmm6,%xmm4,%xmm4
  312. xorl %r9d,%esi
  313. addl %edx,%r11d
  314. vpsrld $10,%xmm0,%xmm6
  315. rorl $2,%r14d
  316. addl %esi,%edx
  317. vpaddd %xmm4,%xmm1,%xmm1
  318. movl %r11d,%r13d
  319. addl %edx,%r14d
  320. .byte 143,232,120,194,239,2
  321. rorl $14,%r13d
  322. movl %r14d,%edx
  323. vpxor %xmm6,%xmm7,%xmm7
  324. movl %eax,%r12d
  325. xorl %r11d,%r13d
  326. rorl $9,%r14d
  327. xorl %ebx,%r12d
  328. vpxor %xmm5,%xmm7,%xmm7
  329. rorl $5,%r13d
  330. xorl %edx,%r14d
  331. andl %r11d,%r12d
  332. vaesenc %xmm10,%xmm9,%xmm9
  333. vmovdqu 80-128(%rdi),%xmm10
  334. xorl %r11d,%r13d
  335. vpsrldq $8,%xmm7,%xmm7
  336. addl 20(%rsp),%ecx
  337. movl %edx,%esi
  338. rorl $11,%r14d
  339. xorl %ebx,%r12d
  340. vpaddd %xmm7,%xmm1,%xmm1
  341. xorl %r8d,%esi
  342. rorl $6,%r13d
  343. addl %r12d,%ecx
  344. andl %esi,%r15d
  345. .byte 143,232,120,194,249,13
  346. xorl %edx,%r14d
  347. addl %r13d,%ecx
  348. vpsrld $10,%xmm1,%xmm6
  349. xorl %r8d,%r15d
  350. addl %ecx,%r10d
  351. .byte 143,232,120,194,239,2
  352. rorl $2,%r14d
  353. addl %r15d,%ecx
  354. vpxor %xmm6,%xmm7,%xmm7
  355. movl %r10d,%r13d
  356. addl %ecx,%r14d
  357. rorl $14,%r13d
  358. movl %r14d,%ecx
  359. vpxor %xmm5,%xmm7,%xmm7
  360. movl %r11d,%r12d
  361. xorl %r10d,%r13d
  362. rorl $9,%r14d
  363. xorl %eax,%r12d
  364. vpslldq $8,%xmm7,%xmm7
  365. rorl $5,%r13d
  366. xorl %ecx,%r14d
  367. andl %r10d,%r12d
  368. vaesenc %xmm10,%xmm9,%xmm9
  369. vmovdqu 96-128(%rdi),%xmm10
  370. xorl %r10d,%r13d
  371. vpaddd %xmm7,%xmm1,%xmm1
  372. addl 24(%rsp),%ebx
  373. movl %ecx,%r15d
  374. rorl $11,%r14d
  375. xorl %eax,%r12d
  376. vpaddd 32(%rbp),%xmm1,%xmm6
  377. xorl %edx,%r15d
  378. rorl $6,%r13d
  379. addl %r12d,%ebx
  380. andl %r15d,%esi
  381. xorl %ecx,%r14d
  382. addl %r13d,%ebx
  383. xorl %edx,%esi
  384. addl %ebx,%r9d
  385. rorl $2,%r14d
  386. addl %esi,%ebx
  387. movl %r9d,%r13d
  388. addl %ebx,%r14d
  389. rorl $14,%r13d
  390. movl %r14d,%ebx
  391. movl %r10d,%r12d
  392. xorl %r9d,%r13d
  393. rorl $9,%r14d
  394. xorl %r11d,%r12d
  395. rorl $5,%r13d
  396. xorl %ebx,%r14d
  397. andl %r9d,%r12d
  398. vaesenc %xmm10,%xmm9,%xmm9
  399. vmovdqu 112-128(%rdi),%xmm10
  400. xorl %r9d,%r13d
  401. addl 28(%rsp),%eax
  402. movl %ebx,%esi
  403. rorl $11,%r14d
  404. xorl %r11d,%r12d
  405. xorl %ecx,%esi
  406. rorl $6,%r13d
  407. addl %r12d,%eax
  408. andl %esi,%r15d
  409. xorl %ebx,%r14d
  410. addl %r13d,%eax
  411. xorl %ecx,%r15d
  412. addl %eax,%r8d
  413. rorl $2,%r14d
  414. addl %r15d,%eax
  415. movl %r8d,%r13d
  416. addl %eax,%r14d
  417. vmovdqa %xmm6,16(%rsp)
  418. vpalignr $4,%xmm2,%xmm3,%xmm4
  419. rorl $14,%r13d
  420. movl %r14d,%eax
  421. vpalignr $4,%xmm0,%xmm1,%xmm7
  422. movl %r9d,%r12d
  423. xorl %r8d,%r13d
  424. .byte 143,232,120,194,236,14
  425. rorl $9,%r14d
  426. xorl %r10d,%r12d
  427. vpsrld $3,%xmm4,%xmm4
  428. rorl $5,%r13d
  429. xorl %eax,%r14d
  430. vpaddd %xmm7,%xmm2,%xmm2
  431. andl %r8d,%r12d
  432. vaesenc %xmm10,%xmm9,%xmm9
  433. vmovdqu 128-128(%rdi),%xmm10
  434. xorl %r8d,%r13d
  435. addl 32(%rsp),%r11d
  436. movl %eax,%r15d
  437. .byte 143,232,120,194,245,11
  438. rorl $11,%r14d
  439. xorl %r10d,%r12d
  440. vpxor %xmm5,%xmm4,%xmm4
  441. xorl %ebx,%r15d
  442. rorl $6,%r13d
  443. addl %r12d,%r11d
  444. andl %r15d,%esi
  445. .byte 143,232,120,194,249,13
  446. xorl %eax,%r14d
  447. addl %r13d,%r11d
  448. vpxor %xmm6,%xmm4,%xmm4
  449. xorl %ebx,%esi
  450. addl %r11d,%edx
  451. vpsrld $10,%xmm1,%xmm6
  452. rorl $2,%r14d
  453. addl %esi,%r11d
  454. vpaddd %xmm4,%xmm2,%xmm2
  455. movl %edx,%r13d
  456. addl %r11d,%r14d
  457. .byte 143,232,120,194,239,2
  458. rorl $14,%r13d
  459. movl %r14d,%r11d
  460. vpxor %xmm6,%xmm7,%xmm7
  461. movl %r8d,%r12d
  462. xorl %edx,%r13d
  463. rorl $9,%r14d
  464. xorl %r9d,%r12d
  465. vpxor %xmm5,%xmm7,%xmm7
  466. rorl $5,%r13d
  467. xorl %r11d,%r14d
  468. andl %edx,%r12d
  469. vaesenc %xmm10,%xmm9,%xmm9
  470. vmovdqu 144-128(%rdi),%xmm10
  471. xorl %edx,%r13d
  472. vpsrldq $8,%xmm7,%xmm7
  473. addl 36(%rsp),%r10d
  474. movl %r11d,%esi
  475. rorl $11,%r14d
  476. xorl %r9d,%r12d
  477. vpaddd %xmm7,%xmm2,%xmm2
  478. xorl %eax,%esi
  479. rorl $6,%r13d
  480. addl %r12d,%r10d
  481. andl %esi,%r15d
  482. .byte 143,232,120,194,250,13
  483. xorl %r11d,%r14d
  484. addl %r13d,%r10d
  485. vpsrld $10,%xmm2,%xmm6
  486. xorl %eax,%r15d
  487. addl %r10d,%ecx
  488. .byte 143,232,120,194,239,2
  489. rorl $2,%r14d
  490. addl %r15d,%r10d
  491. vpxor %xmm6,%xmm7,%xmm7
  492. movl %ecx,%r13d
  493. addl %r10d,%r14d
  494. rorl $14,%r13d
  495. movl %r14d,%r10d
  496. vpxor %xmm5,%xmm7,%xmm7
  497. movl %edx,%r12d
  498. xorl %ecx,%r13d
  499. rorl $9,%r14d
  500. xorl %r8d,%r12d
  501. vpslldq $8,%xmm7,%xmm7
  502. rorl $5,%r13d
  503. xorl %r10d,%r14d
  504. andl %ecx,%r12d
  505. vaesenc %xmm10,%xmm9,%xmm9
  506. vmovdqu 160-128(%rdi),%xmm10
  507. xorl %ecx,%r13d
  508. vpaddd %xmm7,%xmm2,%xmm2
  509. addl 40(%rsp),%r9d
  510. movl %r10d,%r15d
  511. rorl $11,%r14d
  512. xorl %r8d,%r12d
  513. vpaddd 64(%rbp),%xmm2,%xmm6
  514. xorl %r11d,%r15d
  515. rorl $6,%r13d
  516. addl %r12d,%r9d
  517. andl %r15d,%esi
  518. xorl %r10d,%r14d
  519. addl %r13d,%r9d
  520. xorl %r11d,%esi
  521. addl %r9d,%ebx
  522. rorl $2,%r14d
  523. addl %esi,%r9d
  524. movl %ebx,%r13d
  525. addl %r9d,%r14d
  526. rorl $14,%r13d
  527. movl %r14d,%r9d
  528. movl %ecx,%r12d
  529. xorl %ebx,%r13d
  530. rorl $9,%r14d
  531. xorl %edx,%r12d
  532. rorl $5,%r13d
  533. xorl %r9d,%r14d
  534. andl %ebx,%r12d
  535. vaesenclast %xmm10,%xmm9,%xmm11
  536. vaesenc %xmm10,%xmm9,%xmm9
  537. vmovdqu 176-128(%rdi),%xmm10
  538. xorl %ebx,%r13d
  539. addl 44(%rsp),%r8d
  540. movl %r9d,%esi
  541. rorl $11,%r14d
  542. xorl %edx,%r12d
  543. xorl %r10d,%esi
  544. rorl $6,%r13d
  545. addl %r12d,%r8d
  546. andl %esi,%r15d
  547. xorl %r9d,%r14d
  548. addl %r13d,%r8d
  549. xorl %r10d,%r15d
  550. addl %r8d,%eax
  551. rorl $2,%r14d
  552. addl %r15d,%r8d
  553. movl %eax,%r13d
  554. addl %r8d,%r14d
  555. vmovdqa %xmm6,32(%rsp)
  556. vpalignr $4,%xmm3,%xmm0,%xmm4
  557. rorl $14,%r13d
  558. movl %r14d,%r8d
  559. vpalignr $4,%xmm1,%xmm2,%xmm7
  560. movl %ebx,%r12d
  561. xorl %eax,%r13d
  562. .byte 143,232,120,194,236,14
  563. rorl $9,%r14d
  564. xorl %ecx,%r12d
  565. vpsrld $3,%xmm4,%xmm4
  566. rorl $5,%r13d
  567. xorl %r8d,%r14d
  568. vpaddd %xmm7,%xmm3,%xmm3
  569. andl %eax,%r12d
  570. vpand %xmm12,%xmm11,%xmm8
  571. vaesenc %xmm10,%xmm9,%xmm9
  572. vmovdqu 192-128(%rdi),%xmm10
  573. xorl %eax,%r13d
  574. addl 48(%rsp),%edx
  575. movl %r8d,%r15d
  576. .byte 143,232,120,194,245,11
  577. rorl $11,%r14d
  578. xorl %ecx,%r12d
  579. vpxor %xmm5,%xmm4,%xmm4
  580. xorl %r9d,%r15d
  581. rorl $6,%r13d
  582. addl %r12d,%edx
  583. andl %r15d,%esi
  584. .byte 143,232,120,194,250,13
  585. xorl %r8d,%r14d
  586. addl %r13d,%edx
  587. vpxor %xmm6,%xmm4,%xmm4
  588. xorl %r9d,%esi
  589. addl %edx,%r11d
  590. vpsrld $10,%xmm2,%xmm6
  591. rorl $2,%r14d
  592. addl %esi,%edx
  593. vpaddd %xmm4,%xmm3,%xmm3
  594. movl %r11d,%r13d
  595. addl %edx,%r14d
  596. .byte 143,232,120,194,239,2
  597. rorl $14,%r13d
  598. movl %r14d,%edx
  599. vpxor %xmm6,%xmm7,%xmm7
  600. movl %eax,%r12d
  601. xorl %r11d,%r13d
  602. rorl $9,%r14d
  603. xorl %ebx,%r12d
  604. vpxor %xmm5,%xmm7,%xmm7
  605. rorl $5,%r13d
  606. xorl %edx,%r14d
  607. andl %r11d,%r12d
  608. vaesenclast %xmm10,%xmm9,%xmm11
  609. vaesenc %xmm10,%xmm9,%xmm9
  610. vmovdqu 208-128(%rdi),%xmm10
  611. xorl %r11d,%r13d
  612. vpsrldq $8,%xmm7,%xmm7
  613. addl 52(%rsp),%ecx
  614. movl %edx,%esi
  615. rorl $11,%r14d
  616. xorl %ebx,%r12d
  617. vpaddd %xmm7,%xmm3,%xmm3
  618. xorl %r8d,%esi
  619. rorl $6,%r13d
  620. addl %r12d,%ecx
  621. andl %esi,%r15d
  622. .byte 143,232,120,194,251,13
  623. xorl %edx,%r14d
  624. addl %r13d,%ecx
  625. vpsrld $10,%xmm3,%xmm6
  626. xorl %r8d,%r15d
  627. addl %ecx,%r10d
  628. .byte 143,232,120,194,239,2
  629. rorl $2,%r14d
  630. addl %r15d,%ecx
  631. vpxor %xmm6,%xmm7,%xmm7
  632. movl %r10d,%r13d
  633. addl %ecx,%r14d
  634. rorl $14,%r13d
  635. movl %r14d,%ecx
  636. vpxor %xmm5,%xmm7,%xmm7
  637. movl %r11d,%r12d
  638. xorl %r10d,%r13d
  639. rorl $9,%r14d
  640. xorl %eax,%r12d
  641. vpslldq $8,%xmm7,%xmm7
  642. rorl $5,%r13d
  643. xorl %ecx,%r14d
  644. andl %r10d,%r12d
  645. vpand %xmm13,%xmm11,%xmm11
  646. vaesenc %xmm10,%xmm9,%xmm9
  647. vmovdqu 224-128(%rdi),%xmm10
  648. xorl %r10d,%r13d
  649. vpaddd %xmm7,%xmm3,%xmm3
  650. addl 56(%rsp),%ebx
  651. movl %ecx,%r15d
  652. rorl $11,%r14d
  653. xorl %eax,%r12d
  654. vpaddd 96(%rbp),%xmm3,%xmm6
  655. xorl %edx,%r15d
  656. rorl $6,%r13d
  657. addl %r12d,%ebx
  658. andl %r15d,%esi
  659. xorl %ecx,%r14d
  660. addl %r13d,%ebx
  661. xorl %edx,%esi
  662. addl %ebx,%r9d
  663. rorl $2,%r14d
  664. addl %esi,%ebx
  665. movl %r9d,%r13d
  666. addl %ebx,%r14d
  667. rorl $14,%r13d
  668. movl %r14d,%ebx
  669. movl %r10d,%r12d
  670. xorl %r9d,%r13d
  671. rorl $9,%r14d
  672. xorl %r11d,%r12d
  673. rorl $5,%r13d
  674. xorl %ebx,%r14d
  675. andl %r9d,%r12d
  676. vpor %xmm11,%xmm8,%xmm8
  677. vaesenclast %xmm10,%xmm9,%xmm11
  678. vmovdqu 0-128(%rdi),%xmm10
  679. xorl %r9d,%r13d
  680. addl 60(%rsp),%eax
  681. movl %ebx,%esi
  682. rorl $11,%r14d
  683. xorl %r11d,%r12d
  684. xorl %ecx,%esi
  685. rorl $6,%r13d
  686. addl %r12d,%eax
  687. andl %esi,%r15d
  688. xorl %ebx,%r14d
  689. addl %r13d,%eax
  690. xorl %ecx,%r15d
  691. addl %eax,%r8d
  692. rorl $2,%r14d
  693. addl %r15d,%eax
  694. movl %r8d,%r13d
  695. addl %eax,%r14d
  696. vmovdqa %xmm6,48(%rsp)
  697. movq 64+0(%rsp),%r12
  698. vpand %xmm14,%xmm11,%xmm11
  699. movq 64+8(%rsp),%r15
  700. vpor %xmm11,%xmm8,%xmm8
  701. vmovdqu %xmm8,(%r15,%r12,1)
  702. leaq 16(%r12),%r12
  703. cmpb $0,131(%rbp)
  704. jne L$xop_00_47
  705. vmovdqu (%r12),%xmm9
  706. movq %r12,64+0(%rsp)
  707. rorl $14,%r13d
  708. movl %r14d,%eax
  709. movl %r9d,%r12d
  710. xorl %r8d,%r13d
  711. rorl $9,%r14d
  712. xorl %r10d,%r12d
  713. rorl $5,%r13d
  714. xorl %eax,%r14d
  715. andl %r8d,%r12d
  716. vpxor %xmm10,%xmm9,%xmm9
  717. vmovdqu 16-128(%rdi),%xmm10
  718. xorl %r8d,%r13d
  719. addl 0(%rsp),%r11d
  720. movl %eax,%r15d
  721. rorl $11,%r14d
  722. xorl %r10d,%r12d
  723. xorl %ebx,%r15d
  724. rorl $6,%r13d
  725. addl %r12d,%r11d
  726. andl %r15d,%esi
  727. xorl %eax,%r14d
  728. addl %r13d,%r11d
  729. xorl %ebx,%esi
  730. addl %r11d,%edx
  731. rorl $2,%r14d
  732. addl %esi,%r11d
  733. movl %edx,%r13d
  734. addl %r11d,%r14d
  735. rorl $14,%r13d
  736. movl %r14d,%r11d
  737. movl %r8d,%r12d
  738. xorl %edx,%r13d
  739. rorl $9,%r14d
  740. xorl %r9d,%r12d
  741. rorl $5,%r13d
  742. xorl %r11d,%r14d
  743. andl %edx,%r12d
  744. vpxor %xmm8,%xmm9,%xmm9
  745. xorl %edx,%r13d
  746. addl 4(%rsp),%r10d
  747. movl %r11d,%esi
  748. rorl $11,%r14d
  749. xorl %r9d,%r12d
  750. xorl %eax,%esi
  751. rorl $6,%r13d
  752. addl %r12d,%r10d
  753. andl %esi,%r15d
  754. xorl %r11d,%r14d
  755. addl %r13d,%r10d
  756. xorl %eax,%r15d
  757. addl %r10d,%ecx
  758. rorl $2,%r14d
  759. addl %r15d,%r10d
  760. movl %ecx,%r13d
  761. addl %r10d,%r14d
  762. rorl $14,%r13d
  763. movl %r14d,%r10d
  764. movl %edx,%r12d
  765. xorl %ecx,%r13d
  766. rorl $9,%r14d
  767. xorl %r8d,%r12d
  768. rorl $5,%r13d
  769. xorl %r10d,%r14d
  770. andl %ecx,%r12d
  771. vaesenc %xmm10,%xmm9,%xmm9
  772. vmovdqu 32-128(%rdi),%xmm10
  773. xorl %ecx,%r13d
  774. addl 8(%rsp),%r9d
  775. movl %r10d,%r15d
  776. rorl $11,%r14d
  777. xorl %r8d,%r12d
  778. xorl %r11d,%r15d
  779. rorl $6,%r13d
  780. addl %r12d,%r9d
  781. andl %r15d,%esi
  782. xorl %r10d,%r14d
  783. addl %r13d,%r9d
  784. xorl %r11d,%esi
  785. addl %r9d,%ebx
  786. rorl $2,%r14d
  787. addl %esi,%r9d
  788. movl %ebx,%r13d
  789. addl %r9d,%r14d
  790. rorl $14,%r13d
  791. movl %r14d,%r9d
  792. movl %ecx,%r12d
  793. xorl %ebx,%r13d
  794. rorl $9,%r14d
  795. xorl %edx,%r12d
  796. rorl $5,%r13d
  797. xorl %r9d,%r14d
  798. andl %ebx,%r12d
  799. vaesenc %xmm10,%xmm9,%xmm9
  800. vmovdqu 48-128(%rdi),%xmm10
  801. xorl %ebx,%r13d
  802. addl 12(%rsp),%r8d
  803. movl %r9d,%esi
  804. rorl $11,%r14d
  805. xorl %edx,%r12d
  806. xorl %r10d,%esi
  807. rorl $6,%r13d
  808. addl %r12d,%r8d
  809. andl %esi,%r15d
  810. xorl %r9d,%r14d
  811. addl %r13d,%r8d
  812. xorl %r10d,%r15d
  813. addl %r8d,%eax
  814. rorl $2,%r14d
  815. addl %r15d,%r8d
  816. movl %eax,%r13d
  817. addl %r8d,%r14d
  818. rorl $14,%r13d
  819. movl %r14d,%r8d
  820. movl %ebx,%r12d
  821. xorl %eax,%r13d
  822. rorl $9,%r14d
  823. xorl %ecx,%r12d
  824. rorl $5,%r13d
  825. xorl %r8d,%r14d
  826. andl %eax,%r12d
  827. vaesenc %xmm10,%xmm9,%xmm9
  828. vmovdqu 64-128(%rdi),%xmm10
  829. xorl %eax,%r13d
  830. addl 16(%rsp),%edx
  831. movl %r8d,%r15d
  832. rorl $11,%r14d
  833. xorl %ecx,%r12d
  834. xorl %r9d,%r15d
  835. rorl $6,%r13d
  836. addl %r12d,%edx
  837. andl %r15d,%esi
  838. xorl %r8d,%r14d
  839. addl %r13d,%edx
  840. xorl %r9d,%esi
  841. addl %edx,%r11d
  842. rorl $2,%r14d
  843. addl %esi,%edx
  844. movl %r11d,%r13d
  845. addl %edx,%r14d
  846. rorl $14,%r13d
  847. movl %r14d,%edx
  848. movl %eax,%r12d
  849. xorl %r11d,%r13d
  850. rorl $9,%r14d
  851. xorl %ebx,%r12d
  852. rorl $5,%r13d
  853. xorl %edx,%r14d
  854. andl %r11d,%r12d
  855. vaesenc %xmm10,%xmm9,%xmm9
  856. vmovdqu 80-128(%rdi),%xmm10
  857. xorl %r11d,%r13d
  858. addl 20(%rsp),%ecx
  859. movl %edx,%esi
  860. rorl $11,%r14d
  861. xorl %ebx,%r12d
  862. xorl %r8d,%esi
  863. rorl $6,%r13d
  864. addl %r12d,%ecx
  865. andl %esi,%r15d
  866. xorl %edx,%r14d
  867. addl %r13d,%ecx
  868. xorl %r8d,%r15d
  869. addl %ecx,%r10d
  870. rorl $2,%r14d
  871. addl %r15d,%ecx
  872. movl %r10d,%r13d
  873. addl %ecx,%r14d
  874. rorl $14,%r13d
  875. movl %r14d,%ecx
  876. movl %r11d,%r12d
  877. xorl %r10d,%r13d
  878. rorl $9,%r14d
  879. xorl %eax,%r12d
  880. rorl $5,%r13d
  881. xorl %ecx,%r14d
  882. andl %r10d,%r12d
  883. vaesenc %xmm10,%xmm9,%xmm9
  884. vmovdqu 96-128(%rdi),%xmm10
  885. xorl %r10d,%r13d
  886. addl 24(%rsp),%ebx
  887. movl %ecx,%r15d
  888. rorl $11,%r14d
  889. xorl %eax,%r12d
  890. xorl %edx,%r15d
  891. rorl $6,%r13d
  892. addl %r12d,%ebx
  893. andl %r15d,%esi
  894. xorl %ecx,%r14d
  895. addl %r13d,%ebx
  896. xorl %edx,%esi
  897. addl %ebx,%r9d
  898. rorl $2,%r14d
  899. addl %esi,%ebx
  900. movl %r9d,%r13d
  901. addl %ebx,%r14d
  902. rorl $14,%r13d
  903. movl %r14d,%ebx
  904. movl %r10d,%r12d
  905. xorl %r9d,%r13d
  906. rorl $9,%r14d
  907. xorl %r11d,%r12d
  908. rorl $5,%r13d
  909. xorl %ebx,%r14d
  910. andl %r9d,%r12d
  911. vaesenc %xmm10,%xmm9,%xmm9
  912. vmovdqu 112-128(%rdi),%xmm10
  913. xorl %r9d,%r13d
  914. addl 28(%rsp),%eax
  915. movl %ebx,%esi
  916. rorl $11,%r14d
  917. xorl %r11d,%r12d
  918. xorl %ecx,%esi
  919. rorl $6,%r13d
  920. addl %r12d,%eax
  921. andl %esi,%r15d
  922. xorl %ebx,%r14d
  923. addl %r13d,%eax
  924. xorl %ecx,%r15d
  925. addl %eax,%r8d
  926. rorl $2,%r14d
  927. addl %r15d,%eax
  928. movl %r8d,%r13d
  929. addl %eax,%r14d
  930. rorl $14,%r13d
  931. movl %r14d,%eax
  932. movl %r9d,%r12d
  933. xorl %r8d,%r13d
  934. rorl $9,%r14d
  935. xorl %r10d,%r12d
  936. rorl $5,%r13d
  937. xorl %eax,%r14d
  938. andl %r8d,%r12d
  939. vaesenc %xmm10,%xmm9,%xmm9
  940. vmovdqu 128-128(%rdi),%xmm10
  941. xorl %r8d,%r13d
  942. addl 32(%rsp),%r11d
  943. movl %eax,%r15d
  944. rorl $11,%r14d
  945. xorl %r10d,%r12d
  946. xorl %ebx,%r15d
  947. rorl $6,%r13d
  948. addl %r12d,%r11d
  949. andl %r15d,%esi
  950. xorl %eax,%r14d
  951. addl %r13d,%r11d
  952. xorl %ebx,%esi
  953. addl %r11d,%edx
  954. rorl $2,%r14d
  955. addl %esi,%r11d
  956. movl %edx,%r13d
  957. addl %r11d,%r14d
  958. rorl $14,%r13d
  959. movl %r14d,%r11d
  960. movl %r8d,%r12d
  961. xorl %edx,%r13d
  962. rorl $9,%r14d
  963. xorl %r9d,%r12d
  964. rorl $5,%r13d
  965. xorl %r11d,%r14d
  966. andl %edx,%r12d
  967. vaesenc %xmm10,%xmm9,%xmm9
  968. vmovdqu 144-128(%rdi),%xmm10
  969. xorl %edx,%r13d
  970. addl 36(%rsp),%r10d
  971. movl %r11d,%esi
  972. rorl $11,%r14d
  973. xorl %r9d,%r12d
  974. xorl %eax,%esi
  975. rorl $6,%r13d
  976. addl %r12d,%r10d
  977. andl %esi,%r15d
  978. xorl %r11d,%r14d
  979. addl %r13d,%r10d
  980. xorl %eax,%r15d
  981. addl %r10d,%ecx
  982. rorl $2,%r14d
  983. addl %r15d,%r10d
  984. movl %ecx,%r13d
  985. addl %r10d,%r14d
  986. rorl $14,%r13d
  987. movl %r14d,%r10d
  988. movl %edx,%r12d
  989. xorl %ecx,%r13d
  990. rorl $9,%r14d
  991. xorl %r8d,%r12d
  992. rorl $5,%r13d
  993. xorl %r10d,%r14d
  994. andl %ecx,%r12d
  995. vaesenc %xmm10,%xmm9,%xmm9
  996. vmovdqu 160-128(%rdi),%xmm10
  997. xorl %ecx,%r13d
  998. addl 40(%rsp),%r9d
  999. movl %r10d,%r15d
  1000. rorl $11,%r14d
  1001. xorl %r8d,%r12d
  1002. xorl %r11d,%r15d
  1003. rorl $6,%r13d
  1004. addl %r12d,%r9d
  1005. andl %r15d,%esi
  1006. xorl %r10d,%r14d
  1007. addl %r13d,%r9d
  1008. xorl %r11d,%esi
  1009. addl %r9d,%ebx
  1010. rorl $2,%r14d
  1011. addl %esi,%r9d
  1012. movl %ebx,%r13d
  1013. addl %r9d,%r14d
  1014. rorl $14,%r13d
  1015. movl %r14d,%r9d
  1016. movl %ecx,%r12d
  1017. xorl %ebx,%r13d
  1018. rorl $9,%r14d
  1019. xorl %edx,%r12d
  1020. rorl $5,%r13d
  1021. xorl %r9d,%r14d
  1022. andl %ebx,%r12d
  1023. vaesenclast %xmm10,%xmm9,%xmm11
  1024. vaesenc %xmm10,%xmm9,%xmm9
  1025. vmovdqu 176-128(%rdi),%xmm10
  1026. xorl %ebx,%r13d
  1027. addl 44(%rsp),%r8d
  1028. movl %r9d,%esi
  1029. rorl $11,%r14d
  1030. xorl %edx,%r12d
  1031. xorl %r10d,%esi
  1032. rorl $6,%r13d
  1033. addl %r12d,%r8d
  1034. andl %esi,%r15d
  1035. xorl %r9d,%r14d
  1036. addl %r13d,%r8d
  1037. xorl %r10d,%r15d
  1038. addl %r8d,%eax
  1039. rorl $2,%r14d
  1040. addl %r15d,%r8d
  1041. movl %eax,%r13d
  1042. addl %r8d,%r14d
  1043. rorl $14,%r13d
  1044. movl %r14d,%r8d
  1045. movl %ebx,%r12d
  1046. xorl %eax,%r13d
  1047. rorl $9,%r14d
  1048. xorl %ecx,%r12d
  1049. rorl $5,%r13d
  1050. xorl %r8d,%r14d
  1051. andl %eax,%r12d
  1052. vpand %xmm12,%xmm11,%xmm8
  1053. vaesenc %xmm10,%xmm9,%xmm9
  1054. vmovdqu 192-128(%rdi),%xmm10
  1055. xorl %eax,%r13d
  1056. addl 48(%rsp),%edx
  1057. movl %r8d,%r15d
  1058. rorl $11,%r14d
  1059. xorl %ecx,%r12d
  1060. xorl %r9d,%r15d
  1061. rorl $6,%r13d
  1062. addl %r12d,%edx
  1063. andl %r15d,%esi
  1064. xorl %r8d,%r14d
  1065. addl %r13d,%edx
  1066. xorl %r9d,%esi
  1067. addl %edx,%r11d
  1068. rorl $2,%r14d
  1069. addl %esi,%edx
  1070. movl %r11d,%r13d
  1071. addl %edx,%r14d
  1072. rorl $14,%r13d
  1073. movl %r14d,%edx
  1074. movl %eax,%r12d
  1075. xorl %r11d,%r13d
  1076. rorl $9,%r14d
  1077. xorl %ebx,%r12d
  1078. rorl $5,%r13d
  1079. xorl %edx,%r14d
  1080. andl %r11d,%r12d
  1081. vaesenclast %xmm10,%xmm9,%xmm11
  1082. vaesenc %xmm10,%xmm9,%xmm9
  1083. vmovdqu 208-128(%rdi),%xmm10
  1084. xorl %r11d,%r13d
  1085. addl 52(%rsp),%ecx
  1086. movl %edx,%esi
  1087. rorl $11,%r14d
  1088. xorl %ebx,%r12d
  1089. xorl %r8d,%esi
  1090. rorl $6,%r13d
  1091. addl %r12d,%ecx
  1092. andl %esi,%r15d
  1093. xorl %edx,%r14d
  1094. addl %r13d,%ecx
  1095. xorl %r8d,%r15d
  1096. addl %ecx,%r10d
  1097. rorl $2,%r14d
  1098. addl %r15d,%ecx
  1099. movl %r10d,%r13d
  1100. addl %ecx,%r14d
  1101. rorl $14,%r13d
  1102. movl %r14d,%ecx
  1103. movl %r11d,%r12d
  1104. xorl %r10d,%r13d
  1105. rorl $9,%r14d
  1106. xorl %eax,%r12d
  1107. rorl $5,%r13d
  1108. xorl %ecx,%r14d
  1109. andl %r10d,%r12d
  1110. vpand %xmm13,%xmm11,%xmm11
  1111. vaesenc %xmm10,%xmm9,%xmm9
  1112. vmovdqu 224-128(%rdi),%xmm10
  1113. xorl %r10d,%r13d
  1114. addl 56(%rsp),%ebx
  1115. movl %ecx,%r15d
  1116. rorl $11,%r14d
  1117. xorl %eax,%r12d
  1118. xorl %edx,%r15d
  1119. rorl $6,%r13d
  1120. addl %r12d,%ebx
  1121. andl %r15d,%esi
  1122. xorl %ecx,%r14d
  1123. addl %r13d,%ebx
  1124. xorl %edx,%esi
  1125. addl %ebx,%r9d
  1126. rorl $2,%r14d
  1127. addl %esi,%ebx
  1128. movl %r9d,%r13d
  1129. addl %ebx,%r14d
  1130. rorl $14,%r13d
  1131. movl %r14d,%ebx
  1132. movl %r10d,%r12d
  1133. xorl %r9d,%r13d
  1134. rorl $9,%r14d
  1135. xorl %r11d,%r12d
  1136. rorl $5,%r13d
  1137. xorl %ebx,%r14d
  1138. andl %r9d,%r12d
  1139. vpor %xmm11,%xmm8,%xmm8
  1140. vaesenclast %xmm10,%xmm9,%xmm11
  1141. vmovdqu 0-128(%rdi),%xmm10
  1142. xorl %r9d,%r13d
  1143. addl 60(%rsp),%eax
  1144. movl %ebx,%esi
  1145. rorl $11,%r14d
  1146. xorl %r11d,%r12d
  1147. xorl %ecx,%esi
  1148. rorl $6,%r13d
  1149. addl %r12d,%eax
  1150. andl %esi,%r15d
  1151. xorl %ebx,%r14d
  1152. addl %r13d,%eax
  1153. xorl %ecx,%r15d
  1154. addl %eax,%r8d
  1155. rorl $2,%r14d
  1156. addl %r15d,%eax
  1157. movl %r8d,%r13d
  1158. addl %eax,%r14d
  1159. movq 64+0(%rsp),%r12
  1160. movq 64+8(%rsp),%r13
  1161. movq 64+40(%rsp),%r15
  1162. movq 64+48(%rsp),%rsi
  1163. vpand %xmm14,%xmm11,%xmm11
  1164. movl %r14d,%eax
  1165. vpor %xmm11,%xmm8,%xmm8
  1166. vmovdqu %xmm8,(%r12,%r13,1)
  1167. leaq 16(%r12),%r12
  1168. addl 0(%r15),%eax
  1169. addl 4(%r15),%ebx
  1170. addl 8(%r15),%ecx
  1171. addl 12(%r15),%edx
  1172. addl 16(%r15),%r8d
  1173. addl 20(%r15),%r9d
  1174. addl 24(%r15),%r10d
  1175. addl 28(%r15),%r11d
  1176. cmpq 64+16(%rsp),%r12
  1177. movl %eax,0(%r15)
  1178. movl %ebx,4(%r15)
  1179. movl %ecx,8(%r15)
  1180. movl %edx,12(%r15)
  1181. movl %r8d,16(%r15)
  1182. movl %r9d,20(%r15)
  1183. movl %r10d,24(%r15)
  1184. movl %r11d,28(%r15)
  1185. jb L$loop_xop
  1186. movq 64+32(%rsp),%r8
  1187. movq 120(%rsp),%rsi
  1188. vmovdqu %xmm8,(%r8)
  1189. vzeroall
  1190. movq -48(%rsi),%r15
  1191. movq -40(%rsi),%r14
  1192. movq -32(%rsi),%r13
  1193. movq -24(%rsi),%r12
  1194. movq -16(%rsi),%rbp
  1195. movq -8(%rsi),%rbx
  1196. leaq (%rsi),%rsp
  1197. L$epilogue_xop:
  1198. .byte 0xf3,0xc3
  1199. .p2align 6
  1200. aesni_cbc_sha256_enc_avx:
  1201. L$avx_shortcut:
  1202. movq 8(%rsp),%r10
  1203. movq %rsp,%rax
  1204. pushq %rbx
  1205. pushq %rbp
  1206. pushq %r12
  1207. pushq %r13
  1208. pushq %r14
  1209. pushq %r15
  1210. subq $128,%rsp
  1211. andq $-64,%rsp
  1212. shlq $6,%rdx
  1213. subq %rdi,%rsi
  1214. subq %rdi,%r10
  1215. addq %rdi,%rdx
  1216. movq %rsi,64+8(%rsp)
  1217. movq %rdx,64+16(%rsp)
  1218. movq %r8,64+32(%rsp)
  1219. movq %r9,64+40(%rsp)
  1220. movq %r10,64+48(%rsp)
  1221. movq %rax,120(%rsp)
  1222. L$prologue_avx:
  1223. vzeroall
  1224. movq %rdi,%r12
  1225. leaq 128(%rcx),%rdi
  1226. leaq K256+544(%rip),%r13
  1227. movl 240-128(%rdi),%r14d
  1228. movq %r9,%r15
  1229. movq %r10,%rsi
  1230. vmovdqu (%r8),%xmm8
  1231. subq $9,%r14
  1232. movl 0(%r15),%eax
  1233. movl 4(%r15),%ebx
  1234. movl 8(%r15),%ecx
  1235. movl 12(%r15),%edx
  1236. movl 16(%r15),%r8d
  1237. movl 20(%r15),%r9d
  1238. movl 24(%r15),%r10d
  1239. movl 28(%r15),%r11d
  1240. vmovdqa 0(%r13,%r14,8),%xmm14
  1241. vmovdqa 16(%r13,%r14,8),%xmm13
  1242. vmovdqa 32(%r13,%r14,8),%xmm12
  1243. vmovdqu 0-128(%rdi),%xmm10
  1244. jmp L$loop_avx
  1245. .p2align 4
  1246. L$loop_avx:
  1247. vmovdqa K256+512(%rip),%xmm7
  1248. vmovdqu 0(%rsi,%r12,1),%xmm0
  1249. vmovdqu 16(%rsi,%r12,1),%xmm1
  1250. vmovdqu 32(%rsi,%r12,1),%xmm2
  1251. vmovdqu 48(%rsi,%r12,1),%xmm3
  1252. vpshufb %xmm7,%xmm0,%xmm0
  1253. leaq K256(%rip),%rbp
  1254. vpshufb %xmm7,%xmm1,%xmm1
  1255. vpshufb %xmm7,%xmm2,%xmm2
  1256. vpaddd 0(%rbp),%xmm0,%xmm4
  1257. vpshufb %xmm7,%xmm3,%xmm3
  1258. vpaddd 32(%rbp),%xmm1,%xmm5
  1259. vpaddd 64(%rbp),%xmm2,%xmm6
  1260. vpaddd 96(%rbp),%xmm3,%xmm7
  1261. vmovdqa %xmm4,0(%rsp)
  1262. movl %eax,%r14d
  1263. vmovdqa %xmm5,16(%rsp)
  1264. movl %ebx,%esi
  1265. vmovdqa %xmm6,32(%rsp)
  1266. xorl %ecx,%esi
  1267. vmovdqa %xmm7,48(%rsp)
  1268. movl %r8d,%r13d
  1269. jmp L$avx_00_47
  1270. .p2align 4
  1271. L$avx_00_47:
  1272. subq $-32*4,%rbp
  1273. vmovdqu (%r12),%xmm9
  1274. movq %r12,64+0(%rsp)
  1275. vpalignr $4,%xmm0,%xmm1,%xmm4
  1276. shrdl $14,%r13d,%r13d
  1277. movl %r14d,%eax
  1278. movl %r9d,%r12d
  1279. vpalignr $4,%xmm2,%xmm3,%xmm7
  1280. xorl %r8d,%r13d
  1281. shrdl $9,%r14d,%r14d
  1282. xorl %r10d,%r12d
  1283. vpsrld $7,%xmm4,%xmm6
  1284. shrdl $5,%r13d,%r13d
  1285. xorl %eax,%r14d
  1286. andl %r8d,%r12d
  1287. vpaddd %xmm7,%xmm0,%xmm0
  1288. vpxor %xmm10,%xmm9,%xmm9
  1289. vmovdqu 16-128(%rdi),%xmm10
  1290. xorl %r8d,%r13d
  1291. addl 0(%rsp),%r11d
  1292. movl %eax,%r15d
  1293. vpsrld $3,%xmm4,%xmm7
  1294. shrdl $11,%r14d,%r14d
  1295. xorl %r10d,%r12d
  1296. xorl %ebx,%r15d
  1297. vpslld $14,%xmm4,%xmm5
  1298. shrdl $6,%r13d,%r13d
  1299. addl %r12d,%r11d
  1300. andl %r15d,%esi
  1301. vpxor %xmm6,%xmm7,%xmm4
  1302. xorl %eax,%r14d
  1303. addl %r13d,%r11d
  1304. xorl %ebx,%esi
  1305. vpshufd $250,%xmm3,%xmm7
  1306. addl %r11d,%edx
  1307. shrdl $2,%r14d,%r14d
  1308. addl %esi,%r11d
  1309. vpsrld $11,%xmm6,%xmm6
  1310. movl %edx,%r13d
  1311. addl %r11d,%r14d
  1312. shrdl $14,%r13d,%r13d
  1313. vpxor %xmm5,%xmm4,%xmm4
  1314. movl %r14d,%r11d
  1315. movl %r8d,%r12d
  1316. xorl %edx,%r13d
  1317. vpslld $11,%xmm5,%xmm5
  1318. shrdl $9,%r14d,%r14d
  1319. xorl %r9d,%r12d
  1320. shrdl $5,%r13d,%r13d
  1321. vpxor %xmm6,%xmm4,%xmm4
  1322. xorl %r11d,%r14d
  1323. andl %edx,%r12d
  1324. vpxor %xmm8,%xmm9,%xmm9
  1325. xorl %edx,%r13d
  1326. vpsrld $10,%xmm7,%xmm6
  1327. addl 4(%rsp),%r10d
  1328. movl %r11d,%esi
  1329. shrdl $11,%r14d,%r14d
  1330. vpxor %xmm5,%xmm4,%xmm4
  1331. xorl %r9d,%r12d
  1332. xorl %eax,%esi
  1333. shrdl $6,%r13d,%r13d
  1334. vpsrlq $17,%xmm7,%xmm7
  1335. addl %r12d,%r10d
  1336. andl %esi,%r15d
  1337. xorl %r11d,%r14d
  1338. vpaddd %xmm4,%xmm0,%xmm0
  1339. addl %r13d,%r10d
  1340. xorl %eax,%r15d
  1341. addl %r10d,%ecx
  1342. vpxor %xmm7,%xmm6,%xmm6
  1343. shrdl $2,%r14d,%r14d
  1344. addl %r15d,%r10d
  1345. movl %ecx,%r13d
  1346. vpsrlq $2,%xmm7,%xmm7
  1347. addl %r10d,%r14d
  1348. shrdl $14,%r13d,%r13d
  1349. movl %r14d,%r10d
  1350. vpxor %xmm7,%xmm6,%xmm6
  1351. movl %edx,%r12d
  1352. xorl %ecx,%r13d
  1353. shrdl $9,%r14d,%r14d
  1354. vpshufd $132,%xmm6,%xmm6
  1355. xorl %r8d,%r12d
  1356. shrdl $5,%r13d,%r13d
  1357. xorl %r10d,%r14d
  1358. vpsrldq $8,%xmm6,%xmm6
  1359. andl %ecx,%r12d
  1360. vaesenc %xmm10,%xmm9,%xmm9
  1361. vmovdqu 32-128(%rdi),%xmm10
  1362. xorl %ecx,%r13d
  1363. addl 8(%rsp),%r9d
  1364. vpaddd %xmm6,%xmm0,%xmm0
  1365. movl %r10d,%r15d
  1366. shrdl $11,%r14d,%r14d
  1367. xorl %r8d,%r12d
  1368. vpshufd $80,%xmm0,%xmm7
  1369. xorl %r11d,%r15d
  1370. shrdl $6,%r13d,%r13d
  1371. addl %r12d,%r9d
  1372. vpsrld $10,%xmm7,%xmm6
  1373. andl %r15d,%esi
  1374. xorl %r10d,%r14d
  1375. addl %r13d,%r9d
  1376. vpsrlq $17,%xmm7,%xmm7
  1377. xorl %r11d,%esi
  1378. addl %r9d,%ebx
  1379. shrdl $2,%r14d,%r14d
  1380. vpxor %xmm7,%xmm6,%xmm6
  1381. addl %esi,%r9d
  1382. movl %ebx,%r13d
  1383. addl %r9d,%r14d
  1384. vpsrlq $2,%xmm7,%xmm7
  1385. shrdl $14,%r13d,%r13d
  1386. movl %r14d,%r9d
  1387. movl %ecx,%r12d
  1388. vpxor %xmm7,%xmm6,%xmm6
  1389. xorl %ebx,%r13d
  1390. shrdl $9,%r14d,%r14d
  1391. xorl %edx,%r12d
  1392. vpshufd $232,%xmm6,%xmm6
  1393. shrdl $5,%r13d,%r13d
  1394. xorl %r9d,%r14d
  1395. andl %ebx,%r12d
  1396. vpslldq $8,%xmm6,%xmm6
  1397. vaesenc %xmm10,%xmm9,%xmm9
  1398. vmovdqu 48-128(%rdi),%xmm10
  1399. xorl %ebx,%r13d
  1400. addl 12(%rsp),%r8d
  1401. movl %r9d,%esi
  1402. vpaddd %xmm6,%xmm0,%xmm0
  1403. shrdl $11,%r14d,%r14d
  1404. xorl %edx,%r12d
  1405. xorl %r10d,%esi
  1406. vpaddd 0(%rbp),%xmm0,%xmm6
  1407. shrdl $6,%r13d,%r13d
  1408. addl %r12d,%r8d
  1409. andl %esi,%r15d
  1410. xorl %r9d,%r14d
  1411. addl %r13d,%r8d
  1412. xorl %r10d,%r15d
  1413. addl %r8d,%eax
  1414. shrdl $2,%r14d,%r14d
  1415. addl %r15d,%r8d
  1416. movl %eax,%r13d
  1417. addl %r8d,%r14d
  1418. vmovdqa %xmm6,0(%rsp)
  1419. vpalignr $4,%xmm1,%xmm2,%xmm4
  1420. shrdl $14,%r13d,%r13d
  1421. movl %r14d,%r8d
  1422. movl %ebx,%r12d
  1423. vpalignr $4,%xmm3,%xmm0,%xmm7
  1424. xorl %eax,%r13d
  1425. shrdl $9,%r14d,%r14d
  1426. xorl %ecx,%r12d
  1427. vpsrld $7,%xmm4,%xmm6
  1428. shrdl $5,%r13d,%r13d
  1429. xorl %r8d,%r14d
  1430. andl %eax,%r12d
  1431. vpaddd %xmm7,%xmm1,%xmm1
  1432. vaesenc %xmm10,%xmm9,%xmm9
  1433. vmovdqu 64-128(%rdi),%xmm10
  1434. xorl %eax,%r13d
  1435. addl 16(%rsp),%edx
  1436. movl %r8d,%r15d
  1437. vpsrld $3,%xmm4,%xmm7
  1438. shrdl $11,%r14d,%r14d
  1439. xorl %ecx,%r12d
  1440. xorl %r9d,%r15d
  1441. vpslld $14,%xmm4,%xmm5
  1442. shrdl $6,%r13d,%r13d
  1443. addl %r12d,%edx
  1444. andl %r15d,%esi
  1445. vpxor %xmm6,%xmm7,%xmm4
  1446. xorl %r8d,%r14d
  1447. addl %r13d,%edx
  1448. xorl %r9d,%esi
  1449. vpshufd $250,%xmm0,%xmm7
  1450. addl %edx,%r11d
  1451. shrdl $2,%r14d,%r14d
  1452. addl %esi,%edx
  1453. vpsrld $11,%xmm6,%xmm6
  1454. movl %r11d,%r13d
  1455. addl %edx,%r14d
  1456. shrdl $14,%r13d,%r13d
  1457. vpxor %xmm5,%xmm4,%xmm4
  1458. movl %r14d,%edx
  1459. movl %eax,%r12d
  1460. xorl %r11d,%r13d
  1461. vpslld $11,%xmm5,%xmm5
  1462. shrdl $9,%r14d,%r14d
  1463. xorl %ebx,%r12d
  1464. shrdl $5,%r13d,%r13d
  1465. vpxor %xmm6,%xmm4,%xmm4
  1466. xorl %edx,%r14d
  1467. andl %r11d,%r12d
  1468. vaesenc %xmm10,%xmm9,%xmm9
  1469. vmovdqu 80-128(%rdi),%xmm10
  1470. xorl %r11d,%r13d
  1471. vpsrld $10,%xmm7,%xmm6
  1472. addl 20(%rsp),%ecx
  1473. movl %edx,%esi
  1474. shrdl $11,%r14d,%r14d
  1475. vpxor %xmm5,%xmm4,%xmm4
  1476. xorl %ebx,%r12d
  1477. xorl %r8d,%esi
  1478. shrdl $6,%r13d,%r13d
  1479. vpsrlq $17,%xmm7,%xmm7
  1480. addl %r12d,%ecx
  1481. andl %esi,%r15d
  1482. xorl %edx,%r14d
  1483. vpaddd %xmm4,%xmm1,%xmm1
  1484. addl %r13d,%ecx
  1485. xorl %r8d,%r15d
  1486. addl %ecx,%r10d
  1487. vpxor %xmm7,%xmm6,%xmm6
  1488. shrdl $2,%r14d,%r14d
  1489. addl %r15d,%ecx
  1490. movl %r10d,%r13d
  1491. vpsrlq $2,%xmm7,%xmm7
  1492. addl %ecx,%r14d
  1493. shrdl $14,%r13d,%r13d
  1494. movl %r14d,%ecx
  1495. vpxor %xmm7,%xmm6,%xmm6
  1496. movl %r11d,%r12d
  1497. xorl %r10d,%r13d
  1498. shrdl $9,%r14d,%r14d
  1499. vpshufd $132,%xmm6,%xmm6
  1500. xorl %eax,%r12d
  1501. shrdl $5,%r13d,%r13d
  1502. xorl %ecx,%r14d
  1503. vpsrldq $8,%xmm6,%xmm6
  1504. andl %r10d,%r12d
  1505. vaesenc %xmm10,%xmm9,%xmm9
  1506. vmovdqu 96-128(%rdi),%xmm10
  1507. xorl %r10d,%r13d
  1508. addl 24(%rsp),%ebx
  1509. vpaddd %xmm6,%xmm1,%xmm1
  1510. movl %ecx,%r15d
  1511. shrdl $11,%r14d,%r14d
  1512. xorl %eax,%r12d
  1513. vpshufd $80,%xmm1,%xmm7
  1514. xorl %edx,%r15d
  1515. shrdl $6,%r13d,%r13d
  1516. addl %r12d,%ebx
  1517. vpsrld $10,%xmm7,%xmm6
  1518. andl %r15d,%esi
  1519. xorl %ecx,%r14d
  1520. addl %r13d,%ebx
  1521. vpsrlq $17,%xmm7,%xmm7
  1522. xorl %edx,%esi
  1523. addl %ebx,%r9d
  1524. shrdl $2,%r14d,%r14d
  1525. vpxor %xmm7,%xmm6,%xmm6
  1526. addl %esi,%ebx
  1527. movl %r9d,%r13d
  1528. addl %ebx,%r14d
  1529. vpsrlq $2,%xmm7,%xmm7
  1530. shrdl $14,%r13d,%r13d
  1531. movl %r14d,%ebx
  1532. movl %r10d,%r12d
  1533. vpxor %xmm7,%xmm6,%xmm6
  1534. xorl %r9d,%r13d
  1535. shrdl $9,%r14d,%r14d
  1536. xorl %r11d,%r12d
  1537. vpshufd $232,%xmm6,%xmm6
  1538. shrdl $5,%r13d,%r13d
  1539. xorl %ebx,%r14d
  1540. andl %r9d,%r12d
  1541. vpslldq $8,%xmm6,%xmm6
  1542. vaesenc %xmm10,%xmm9,%xmm9
  1543. vmovdqu 112-128(%rdi),%xmm10
  1544. xorl %r9d,%r13d
  1545. addl 28(%rsp),%eax
  1546. movl %ebx,%esi
  1547. vpaddd %xmm6,%xmm1,%xmm1
  1548. shrdl $11,%r14d,%r14d
  1549. xorl %r11d,%r12d
  1550. xorl %ecx,%esi
  1551. vpaddd 32(%rbp),%xmm1,%xmm6
  1552. shrdl $6,%r13d,%r13d
  1553. addl %r12d,%eax
  1554. andl %esi,%r15d
  1555. xorl %ebx,%r14d
  1556. addl %r13d,%eax
  1557. xorl %ecx,%r15d
  1558. addl %eax,%r8d
  1559. shrdl $2,%r14d,%r14d
  1560. addl %r15d,%eax
  1561. movl %r8d,%r13d
  1562. addl %eax,%r14d
  1563. vmovdqa %xmm6,16(%rsp)
  1564. vpalignr $4,%xmm2,%xmm3,%xmm4
  1565. shrdl $14,%r13d,%r13d
  1566. movl %r14d,%eax
  1567. movl %r9d,%r12d
  1568. vpalignr $4,%xmm0,%xmm1,%xmm7
  1569. xorl %r8d,%r13d
  1570. shrdl $9,%r14d,%r14d
  1571. xorl %r10d,%r12d
  1572. vpsrld $7,%xmm4,%xmm6
  1573. shrdl $5,%r13d,%r13d
  1574. xorl %eax,%r14d
  1575. andl %r8d,%r12d
  1576. vpaddd %xmm7,%xmm2,%xmm2
  1577. vaesenc %xmm10,%xmm9,%xmm9
  1578. vmovdqu 128-128(%rdi),%xmm10
  1579. xorl %r8d,%r13d
  1580. addl 32(%rsp),%r11d
  1581. movl %eax,%r15d
  1582. vpsrld $3,%xmm4,%xmm7
  1583. shrdl $11,%r14d,%r14d
  1584. xorl %r10d,%r12d
  1585. xorl %ebx,%r15d
  1586. vpslld $14,%xmm4,%xmm5
  1587. shrdl $6,%r13d,%r13d
  1588. addl %r12d,%r11d
  1589. andl %r15d,%esi
  1590. vpxor %xmm6,%xmm7,%xmm4
  1591. xorl %eax,%r14d
  1592. addl %r13d,%r11d
  1593. xorl %ebx,%esi
  1594. vpshufd $250,%xmm1,%xmm7
  1595. addl %r11d,%edx
  1596. shrdl $2,%r14d,%r14d
  1597. addl %esi,%r11d
  1598. vpsrld $11,%xmm6,%xmm6
  1599. movl %edx,%r13d
  1600. addl %r11d,%r14d
  1601. shrdl $14,%r13d,%r13d
  1602. vpxor %xmm5,%xmm4,%xmm4
  1603. movl %r14d,%r11d
  1604. movl %r8d,%r12d
  1605. xorl %edx,%r13d
  1606. vpslld $11,%xmm5,%xmm5
  1607. shrdl $9,%r14d,%r14d
  1608. xorl %r9d,%r12d
  1609. shrdl $5,%r13d,%r13d
  1610. vpxor %xmm6,%xmm4,%xmm4
  1611. xorl %r11d,%r14d
  1612. andl %edx,%r12d
  1613. vaesenc %xmm10,%xmm9,%xmm9
  1614. vmovdqu 144-128(%rdi),%xmm10
  1615. xorl %edx,%r13d
  1616. vpsrld $10,%xmm7,%xmm6
  1617. addl 36(%rsp),%r10d
  1618. movl %r11d,%esi
  1619. shrdl $11,%r14d,%r14d
  1620. vpxor %xmm5,%xmm4,%xmm4
  1621. xorl %r9d,%r12d
  1622. xorl %eax,%esi
  1623. shrdl $6,%r13d,%r13d
  1624. vpsrlq $17,%xmm7,%xmm7
  1625. addl %r12d,%r10d
  1626. andl %esi,%r15d
  1627. xorl %r11d,%r14d
  1628. vpaddd %xmm4,%xmm2,%xmm2
  1629. addl %r13d,%r10d
  1630. xorl %eax,%r15d
  1631. addl %r10d,%ecx
  1632. vpxor %xmm7,%xmm6,%xmm6
  1633. shrdl $2,%r14d,%r14d
  1634. addl %r15d,%r10d
  1635. movl %ecx,%r13d
  1636. vpsrlq $2,%xmm7,%xmm7
  1637. addl %r10d,%r14d
  1638. shrdl $14,%r13d,%r13d
  1639. movl %r14d,%r10d
  1640. vpxor %xmm7,%xmm6,%xmm6
  1641. movl %edx,%r12d
  1642. xorl %ecx,%r13d
  1643. shrdl $9,%r14d,%r14d
  1644. vpshufd $132,%xmm6,%xmm6
  1645. xorl %r8d,%r12d
  1646. shrdl $5,%r13d,%r13d
  1647. xorl %r10d,%r14d
  1648. vpsrldq $8,%xmm6,%xmm6
  1649. andl %ecx,%r12d
  1650. vaesenc %xmm10,%xmm9,%xmm9
  1651. vmovdqu 160-128(%rdi),%xmm10
  1652. xorl %ecx,%r13d
  1653. addl 40(%rsp),%r9d
  1654. vpaddd %xmm6,%xmm2,%xmm2
  1655. movl %r10d,%r15d
  1656. shrdl $11,%r14d,%r14d
  1657. xorl %r8d,%r12d
  1658. vpshufd $80,%xmm2,%xmm7
  1659. xorl %r11d,%r15d
  1660. shrdl $6,%r13d,%r13d
  1661. addl %r12d,%r9d
  1662. vpsrld $10,%xmm7,%xmm6
  1663. andl %r15d,%esi
  1664. xorl %r10d,%r14d
  1665. addl %r13d,%r9d
  1666. vpsrlq $17,%xmm7,%xmm7
  1667. xorl %r11d,%esi
  1668. addl %r9d,%ebx
  1669. shrdl $2,%r14d,%r14d
  1670. vpxor %xmm7,%xmm6,%xmm6
  1671. addl %esi,%r9d
  1672. movl %ebx,%r13d
  1673. addl %r9d,%r14d
  1674. vpsrlq $2,%xmm7,%xmm7
  1675. shrdl $14,%r13d,%r13d
  1676. movl %r14d,%r9d
  1677. movl %ecx,%r12d
  1678. vpxor %xmm7,%xmm6,%xmm6
  1679. xorl %ebx,%r13d
  1680. shrdl $9,%r14d,%r14d
  1681. xorl %edx,%r12d
  1682. vpshufd $232,%xmm6,%xmm6
  1683. shrdl $5,%r13d,%r13d
  1684. xorl %r9d,%r14d
  1685. andl %ebx,%r12d
  1686. vpslldq $8,%xmm6,%xmm6
  1687. vaesenclast %xmm10,%xmm9,%xmm11
  1688. vaesenc %xmm10,%xmm9,%xmm9
  1689. vmovdqu 176-128(%rdi),%xmm10
  1690. xorl %ebx,%r13d
  1691. addl 44(%rsp),%r8d
  1692. movl %r9d,%esi
  1693. vpaddd %xmm6,%xmm2,%xmm2
  1694. shrdl $11,%r14d,%r14d
  1695. xorl %edx,%r12d
  1696. xorl %r10d,%esi
  1697. vpaddd 64(%rbp),%xmm2,%xmm6
  1698. shrdl $6,%r13d,%r13d
  1699. addl %r12d,%r8d
  1700. andl %esi,%r15d
  1701. xorl %r9d,%r14d
  1702. addl %r13d,%r8d
  1703. xorl %r10d,%r15d
  1704. addl %r8d,%eax
  1705. shrdl $2,%r14d,%r14d
  1706. addl %r15d,%r8d
  1707. movl %eax,%r13d
  1708. addl %r8d,%r14d
  1709. vmovdqa %xmm6,32(%rsp)
  1710. vpalignr $4,%xmm3,%xmm0,%xmm4
  1711. shrdl $14,%r13d,%r13d
  1712. movl %r14d,%r8d
  1713. movl %ebx,%r12d
  1714. vpalignr $4,%xmm1,%xmm2,%xmm7
  1715. xorl %eax,%r13d
  1716. shrdl $9,%r14d,%r14d
  1717. xorl %ecx,%r12d
  1718. vpsrld $7,%xmm4,%xmm6
  1719. shrdl $5,%r13d,%r13d
  1720. xorl %r8d,%r14d
  1721. andl %eax,%r12d
  1722. vpaddd %xmm7,%xmm3,%xmm3
  1723. vpand %xmm12,%xmm11,%xmm8
  1724. vaesenc %xmm10,%xmm9,%xmm9
  1725. vmovdqu 192-128(%rdi),%xmm10
  1726. xorl %eax,%r13d
  1727. addl 48(%rsp),%edx
  1728. movl %r8d,%r15d
  1729. vpsrld $3,%xmm4,%xmm7
  1730. shrdl $11,%r14d,%r14d
  1731. xorl %ecx,%r12d
  1732. xorl %r9d,%r15d
  1733. vpslld $14,%xmm4,%xmm5
  1734. shrdl $6,%r13d,%r13d
  1735. addl %r12d,%edx
  1736. andl %r15d,%esi
  1737. vpxor %xmm6,%xmm7,%xmm4
  1738. xorl %r8d,%r14d
  1739. addl %r13d,%edx
  1740. xorl %r9d,%esi
  1741. vpshufd $250,%xmm2,%xmm7
  1742. addl %edx,%r11d
  1743. shrdl $2,%r14d,%r14d
  1744. addl %esi,%edx
  1745. vpsrld $11,%xmm6,%xmm6
  1746. movl %r11d,%r13d
  1747. addl %edx,%r14d
  1748. shrdl $14,%r13d,%r13d
  1749. vpxor %xmm5,%xmm4,%xmm4
  1750. movl %r14d,%edx
  1751. movl %eax,%r12d
  1752. xorl %r11d,%r13d
  1753. vpslld $11,%xmm5,%xmm5
  1754. shrdl $9,%r14d,%r14d
  1755. xorl %ebx,%r12d
  1756. shrdl $5,%r13d,%r13d
  1757. vpxor %xmm6,%xmm4,%xmm4
  1758. xorl %edx,%r14d
  1759. andl %r11d,%r12d
  1760. vaesenclast %xmm10,%xmm9,%xmm11
  1761. vaesenc %xmm10,%xmm9,%xmm9
  1762. vmovdqu 208-128(%rdi),%xmm10
  1763. xorl %r11d,%r13d
  1764. vpsrld $10,%xmm7,%xmm6
  1765. addl 52(%rsp),%ecx
  1766. movl %edx,%esi
  1767. shrdl $11,%r14d,%r14d
  1768. vpxor %xmm5,%xmm4,%xmm4
  1769. xorl %ebx,%r12d
  1770. xorl %r8d,%esi
  1771. shrdl $6,%r13d,%r13d
  1772. vpsrlq $17,%xmm7,%xmm7
  1773. addl %r12d,%ecx
  1774. andl %esi,%r15d
  1775. xorl %edx,%r14d
  1776. vpaddd %xmm4,%xmm3,%xmm3
  1777. addl %r13d,%ecx
  1778. xorl %r8d,%r15d
  1779. addl %ecx,%r10d
  1780. vpxor %xmm7,%xmm6,%xmm6
  1781. shrdl $2,%r14d,%r14d
  1782. addl %r15d,%ecx
  1783. movl %r10d,%r13d
  1784. vpsrlq $2,%xmm7,%xmm7
  1785. addl %ecx,%r14d
  1786. shrdl $14,%r13d,%r13d
  1787. movl %r14d,%ecx
  1788. vpxor %xmm7,%xmm6,%xmm6
  1789. movl %r11d,%r12d
  1790. xorl %r10d,%r13d
  1791. shrdl $9,%r14d,%r14d
  1792. vpshufd $132,%xmm6,%xmm6
  1793. xorl %eax,%r12d
  1794. shrdl $5,%r13d,%r13d
  1795. xorl %ecx,%r14d
  1796. vpsrldq $8,%xmm6,%xmm6
  1797. andl %r10d,%r12d
  1798. vpand %xmm13,%xmm11,%xmm11
  1799. vaesenc %xmm10,%xmm9,%xmm9
  1800. vmovdqu 224-128(%rdi),%xmm10
  1801. xorl %r10d,%r13d
  1802. addl 56(%rsp),%ebx
  1803. vpaddd %xmm6,%xmm3,%xmm3
  1804. movl %ecx,%r15d
  1805. shrdl $11,%r14d,%r14d
  1806. xorl %eax,%r12d
  1807. vpshufd $80,%xmm3,%xmm7
  1808. xorl %edx,%r15d
  1809. shrdl $6,%r13d,%r13d
  1810. addl %r12d,%ebx
  1811. vpsrld $10,%xmm7,%xmm6
  1812. andl %r15d,%esi
  1813. xorl %ecx,%r14d
  1814. addl %r13d,%ebx
  1815. vpsrlq $17,%xmm7,%xmm7
  1816. xorl %edx,%esi
  1817. addl %ebx,%r9d
  1818. shrdl $2,%r14d,%r14d
  1819. vpxor %xmm7,%xmm6,%xmm6
  1820. addl %esi,%ebx
  1821. movl %r9d,%r13d
  1822. addl %ebx,%r14d
  1823. vpsrlq $2,%xmm7,%xmm7
  1824. shrdl $14,%r13d,%r13d
  1825. movl %r14d,%ebx
  1826. movl %r10d,%r12d
  1827. vpxor %xmm7,%xmm6,%xmm6
  1828. xorl %r9d,%r13d
  1829. shrdl $9,%r14d,%r14d
  1830. xorl %r11d,%r12d
  1831. vpshufd $232,%xmm6,%xmm6
  1832. shrdl $5,%r13d,%r13d
  1833. xorl %ebx,%r14d
  1834. andl %r9d,%r12d
  1835. vpslldq $8,%xmm6,%xmm6
  1836. vpor %xmm11,%xmm8,%xmm8
  1837. vaesenclast %xmm10,%xmm9,%xmm11
  1838. vmovdqu 0-128(%rdi),%xmm10
  1839. xorl %r9d,%r13d
  1840. addl 60(%rsp),%eax
  1841. movl %ebx,%esi
  1842. vpaddd %xmm6,%xmm3,%xmm3
  1843. shrdl $11,%r14d,%r14d
  1844. xorl %r11d,%r12d
  1845. xorl %ecx,%esi
  1846. vpaddd 96(%rbp),%xmm3,%xmm6
  1847. shrdl $6,%r13d,%r13d
  1848. addl %r12d,%eax
  1849. andl %esi,%r15d
  1850. xorl %ebx,%r14d
  1851. addl %r13d,%eax
  1852. xorl %ecx,%r15d
  1853. addl %eax,%r8d
  1854. shrdl $2,%r14d,%r14d
  1855. addl %r15d,%eax
  1856. movl %r8d,%r13d
  1857. addl %eax,%r14d
  1858. vmovdqa %xmm6,48(%rsp)
  1859. movq 64+0(%rsp),%r12
  1860. vpand %xmm14,%xmm11,%xmm11
  1861. movq 64+8(%rsp),%r15
  1862. vpor %xmm11,%xmm8,%xmm8
  1863. vmovdqu %xmm8,(%r15,%r12,1)
  1864. leaq 16(%r12),%r12
  1865. cmpb $0,131(%rbp)
  1866. jne L$avx_00_47
  1867. vmovdqu (%r12),%xmm9
  1868. movq %r12,64+0(%rsp)
  1869. shrdl $14,%r13d,%r13d
  1870. movl %r14d,%eax
  1871. movl %r9d,%r12d
  1872. xorl %r8d,%r13d
  1873. shrdl $9,%r14d,%r14d
  1874. xorl %r10d,%r12d
  1875. shrdl $5,%r13d,%r13d
  1876. xorl %eax,%r14d
  1877. andl %r8d,%r12d
  1878. vpxor %xmm10,%xmm9,%xmm9
  1879. vmovdqu 16-128(%rdi),%xmm10
  1880. xorl %r8d,%r13d
  1881. addl 0(%rsp),%r11d
  1882. movl %eax,%r15d
  1883. shrdl $11,%r14d,%r14d
  1884. xorl %r10d,%r12d
  1885. xorl %ebx,%r15d
  1886. shrdl $6,%r13d,%r13d
  1887. addl %r12d,%r11d
  1888. andl %r15d,%esi
  1889. xorl %eax,%r14d
  1890. addl %r13d,%r11d
  1891. xorl %ebx,%esi
  1892. addl %r11d,%edx
  1893. shrdl $2,%r14d,%r14d
  1894. addl %esi,%r11d
  1895. movl %edx,%r13d
  1896. addl %r11d,%r14d
  1897. shrdl $14,%r13d,%r13d
  1898. movl %r14d,%r11d
  1899. movl %r8d,%r12d
  1900. xorl %edx,%r13d
  1901. shrdl $9,%r14d,%r14d
  1902. xorl %r9d,%r12d
  1903. shrdl $5,%r13d,%r13d
  1904. xorl %r11d,%r14d
  1905. andl %edx,%r12d
  1906. vpxor %xmm8,%xmm9,%xmm9
  1907. xorl %edx,%r13d
  1908. addl 4(%rsp),%r10d
  1909. movl %r11d,%esi
  1910. shrdl $11,%r14d,%r14d
  1911. xorl %r9d,%r12d
  1912. xorl %eax,%esi
  1913. shrdl $6,%r13d,%r13d
  1914. addl %r12d,%r10d
  1915. andl %esi,%r15d
  1916. xorl %r11d,%r14d
  1917. addl %r13d,%r10d
  1918. xorl %eax,%r15d
  1919. addl %r10d,%ecx
  1920. shrdl $2,%r14d,%r14d
  1921. addl %r15d,%r10d
  1922. movl %ecx,%r13d
  1923. addl %r10d,%r14d
  1924. shrdl $14,%r13d,%r13d
  1925. movl %r14d,%r10d
  1926. movl %edx,%r12d
  1927. xorl %ecx,%r13d
  1928. shrdl $9,%r14d,%r14d
  1929. xorl %r8d,%r12d
  1930. shrdl $5,%r13d,%r13d
  1931. xorl %r10d,%r14d
  1932. andl %ecx,%r12d
  1933. vaesenc %xmm10,%xmm9,%xmm9
  1934. vmovdqu 32-128(%rdi),%xmm10
  1935. xorl %ecx,%r13d
  1936. addl 8(%rsp),%r9d
  1937. movl %r10d,%r15d
  1938. shrdl $11,%r14d,%r14d
  1939. xorl %r8d,%r12d
  1940. xorl %r11d,%r15d
  1941. shrdl $6,%r13d,%r13d
  1942. addl %r12d,%r9d
  1943. andl %r15d,%esi
  1944. xorl %r10d,%r14d
  1945. addl %r13d,%r9d
  1946. xorl %r11d,%esi
  1947. addl %r9d,%ebx
  1948. shrdl $2,%r14d,%r14d
  1949. addl %esi,%r9d
  1950. movl %ebx,%r13d
  1951. addl %r9d,%r14d
  1952. shrdl $14,%r13d,%r13d
  1953. movl %r14d,%r9d
  1954. movl %ecx,%r12d
  1955. xorl %ebx,%r13d
  1956. shrdl $9,%r14d,%r14d
  1957. xorl %edx,%r12d
  1958. shrdl $5,%r13d,%r13d
  1959. xorl %r9d,%r14d
  1960. andl %ebx,%r12d
  1961. vaesenc %xmm10,%xmm9,%xmm9
  1962. vmovdqu 48-128(%rdi),%xmm10
  1963. xorl %ebx,%r13d
  1964. addl 12(%rsp),%r8d
  1965. movl %r9d,%esi
  1966. shrdl $11,%r14d,%r14d
  1967. xorl %edx,%r12d
  1968. xorl %r10d,%esi
  1969. shrdl $6,%r13d,%r13d
  1970. addl %r12d,%r8d
  1971. andl %esi,%r15d
  1972. xorl %r9d,%r14d
  1973. addl %r13d,%r8d
  1974. xorl %r10d,%r15d
  1975. addl %r8d,%eax
  1976. shrdl $2,%r14d,%r14d
  1977. addl %r15d,%r8d
  1978. movl %eax,%r13d
  1979. addl %r8d,%r14d
  1980. shrdl $14,%r13d,%r13d
  1981. movl %r14d,%r8d
  1982. movl %ebx,%r12d
  1983. xorl %eax,%r13d
  1984. shrdl $9,%r14d,%r14d
  1985. xorl %ecx,%r12d
  1986. shrdl $5,%r13d,%r13d
  1987. xorl %r8d,%r14d
  1988. andl %eax,%r12d
  1989. vaesenc %xmm10,%xmm9,%xmm9
  1990. vmovdqu 64-128(%rdi),%xmm10
  1991. xorl %eax,%r13d
  1992. addl 16(%rsp),%edx
  1993. movl %r8d,%r15d
  1994. shrdl $11,%r14d,%r14d
  1995. xorl %ecx,%r12d
  1996. xorl %r9d,%r15d
  1997. shrdl $6,%r13d,%r13d
  1998. addl %r12d,%edx
  1999. andl %r15d,%esi
  2000. xorl %r8d,%r14d
  2001. addl %r13d,%edx
  2002. xorl %r9d,%esi
  2003. addl %edx,%r11d
  2004. shrdl $2,%r14d,%r14d
  2005. addl %esi,%edx
  2006. movl %r11d,%r13d
  2007. addl %edx,%r14d
  2008. shrdl $14,%r13d,%r13d
  2009. movl %r14d,%edx
  2010. movl %eax,%r12d
  2011. xorl %r11d,%r13d
  2012. shrdl $9,%r14d,%r14d
  2013. xorl %ebx,%r12d
  2014. shrdl $5,%r13d,%r13d
  2015. xorl %edx,%r14d
  2016. andl %r11d,%r12d
  2017. vaesenc %xmm10,%xmm9,%xmm9
  2018. vmovdqu 80-128(%rdi),%xmm10
  2019. xorl %r11d,%r13d
  2020. addl 20(%rsp),%ecx
  2021. movl %edx,%esi
  2022. shrdl $11,%r14d,%r14d
  2023. xorl %ebx,%r12d
  2024. xorl %r8d,%esi
  2025. shrdl $6,%r13d,%r13d
  2026. addl %r12d,%ecx
  2027. andl %esi,%r15d
  2028. xorl %edx,%r14d
  2029. addl %r13d,%ecx
  2030. xorl %r8d,%r15d
  2031. addl %ecx,%r10d
  2032. shrdl $2,%r14d,%r14d
  2033. addl %r15d,%ecx
  2034. movl %r10d,%r13d
  2035. addl %ecx,%r14d
  2036. shrdl $14,%r13d,%r13d
  2037. movl %r14d,%ecx
  2038. movl %r11d,%r12d
  2039. xorl %r10d,%r13d
  2040. shrdl $9,%r14d,%r14d
  2041. xorl %eax,%r12d
  2042. shrdl $5,%r13d,%r13d
  2043. xorl %ecx,%r14d
  2044. andl %r10d,%r12d
  2045. vaesenc %xmm10,%xmm9,%xmm9
  2046. vmovdqu 96-128(%rdi),%xmm10
  2047. xorl %r10d,%r13d
  2048. addl 24(%rsp),%ebx
  2049. movl %ecx,%r15d
  2050. shrdl $11,%r14d,%r14d
  2051. xorl %eax,%r12d
  2052. xorl %edx,%r15d
  2053. shrdl $6,%r13d,%r13d
  2054. addl %r12d,%ebx
  2055. andl %r15d,%esi
  2056. xorl %ecx,%r14d
  2057. addl %r13d,%ebx
  2058. xorl %edx,%esi
  2059. addl %ebx,%r9d
  2060. shrdl $2,%r14d,%r14d
  2061. addl %esi,%ebx
  2062. movl %r9d,%r13d
  2063. addl %ebx,%r14d
  2064. shrdl $14,%r13d,%r13d
  2065. movl %r14d,%ebx
  2066. movl %r10d,%r12d
  2067. xorl %r9d,%r13d
  2068. shrdl $9,%r14d,%r14d
  2069. xorl %r11d,%r12d
  2070. shrdl $5,%r13d,%r13d
  2071. xorl %ebx,%r14d
  2072. andl %r9d,%r12d
  2073. vaesenc %xmm10,%xmm9,%xmm9
  2074. vmovdqu 112-128(%rdi),%xmm10
  2075. xorl %r9d,%r13d
  2076. addl 28(%rsp),%eax
  2077. movl %ebx,%esi
  2078. shrdl $11,%r14d,%r14d
  2079. xorl %r11d,%r12d
  2080. xorl %ecx,%esi
  2081. shrdl $6,%r13d,%r13d
  2082. addl %r12d,%eax
  2083. andl %esi,%r15d
  2084. xorl %ebx,%r14d
  2085. addl %r13d,%eax
  2086. xorl %ecx,%r15d
  2087. addl %eax,%r8d
  2088. shrdl $2,%r14d,%r14d
  2089. addl %r15d,%eax
  2090. movl %r8d,%r13d
  2091. addl %eax,%r14d
  2092. shrdl $14,%r13d,%r13d
  2093. movl %r14d,%eax
  2094. movl %r9d,%r12d
  2095. xorl %r8d,%r13d
  2096. shrdl $9,%r14d,%r14d
  2097. xorl %r10d,%r12d
  2098. shrdl $5,%r13d,%r13d
  2099. xorl %eax,%r14d
  2100. andl %r8d,%r12d
  2101. vaesenc %xmm10,%xmm9,%xmm9
  2102. vmovdqu 128-128(%rdi),%xmm10
  2103. xorl %r8d,%r13d
  2104. addl 32(%rsp),%r11d
  2105. movl %eax,%r15d
  2106. shrdl $11,%r14d,%r14d
  2107. xorl %r10d,%r12d
  2108. xorl %ebx,%r15d
  2109. shrdl $6,%r13d,%r13d
  2110. addl %r12d,%r11d
  2111. andl %r15d,%esi
  2112. xorl %eax,%r14d
  2113. addl %r13d,%r11d
  2114. xorl %ebx,%esi
  2115. addl %r11d,%edx
  2116. shrdl $2,%r14d,%r14d
  2117. addl %esi,%r11d
  2118. movl %edx,%r13d
  2119. addl %r11d,%r14d
  2120. shrdl $14,%r13d,%r13d
  2121. movl %r14d,%r11d
  2122. movl %r8d,%r12d
  2123. xorl %edx,%r13d
  2124. shrdl $9,%r14d,%r14d
  2125. xorl %r9d,%r12d
  2126. shrdl $5,%r13d,%r13d
  2127. xorl %r11d,%r14d
  2128. andl %edx,%r12d
  2129. vaesenc %xmm10,%xmm9,%xmm9
  2130. vmovdqu 144-128(%rdi),%xmm10
  2131. xorl %edx,%r13d
  2132. addl 36(%rsp),%r10d
  2133. movl %r11d,%esi
  2134. shrdl $11,%r14d,%r14d
  2135. xorl %r9d,%r12d
  2136. xorl %eax,%esi
  2137. shrdl $6,%r13d,%r13d
  2138. addl %r12d,%r10d
  2139. andl %esi,%r15d
  2140. xorl %r11d,%r14d
  2141. addl %r13d,%r10d
  2142. xorl %eax,%r15d
  2143. addl %r10d,%ecx
  2144. shrdl $2,%r14d,%r14d
  2145. addl %r15d,%r10d
  2146. movl %ecx,%r13d
  2147. addl %r10d,%r14d
  2148. shrdl $14,%r13d,%r13d
  2149. movl %r14d,%r10d
  2150. movl %edx,%r12d
  2151. xorl %ecx,%r13d
  2152. shrdl $9,%r14d,%r14d
  2153. xorl %r8d,%r12d
  2154. shrdl $5,%r13d,%r13d
  2155. xorl %r10d,%r14d
  2156. andl %ecx,%r12d
  2157. vaesenc %xmm10,%xmm9,%xmm9
  2158. vmovdqu 160-128(%rdi),%xmm10
  2159. xorl %ecx,%r13d
  2160. addl 40(%rsp),%r9d
  2161. movl %r10d,%r15d
  2162. shrdl $11,%r14d,%r14d
  2163. xorl %r8d,%r12d
  2164. xorl %r11d,%r15d
  2165. shrdl $6,%r13d,%r13d
  2166. addl %r12d,%r9d
  2167. andl %r15d,%esi
  2168. xorl %r10d,%r14d
  2169. addl %r13d,%r9d
  2170. xorl %r11d,%esi
  2171. addl %r9d,%ebx
  2172. shrdl $2,%r14d,%r14d
  2173. addl %esi,%r9d
  2174. movl %ebx,%r13d
  2175. addl %r9d,%r14d
  2176. shrdl $14,%r13d,%r13d
  2177. movl %r14d,%r9d
  2178. movl %ecx,%r12d
  2179. xorl %ebx,%r13d
  2180. shrdl $9,%r14d,%r14d
  2181. xorl %edx,%r12d
  2182. shrdl $5,%r13d,%r13d
  2183. xorl %r9d,%r14d
  2184. andl %ebx,%r12d
  2185. vaesenclast %xmm10,%xmm9,%xmm11
  2186. vaesenc %xmm10,%xmm9,%xmm9
  2187. vmovdqu 176-128(%rdi),%xmm10
  2188. xorl %ebx,%r13d
  2189. addl 44(%rsp),%r8d
  2190. movl %r9d,%esi
  2191. shrdl $11,%r14d,%r14d
  2192. xorl %edx,%r12d
  2193. xorl %r10d,%esi
  2194. shrdl $6,%r13d,%r13d
  2195. addl %r12d,%r8d
  2196. andl %esi,%r15d
  2197. xorl %r9d,%r14d
  2198. addl %r13d,%r8d
  2199. xorl %r10d,%r15d
  2200. addl %r8d,%eax
  2201. shrdl $2,%r14d,%r14d
  2202. addl %r15d,%r8d
  2203. movl %eax,%r13d
  2204. addl %r8d,%r14d
  2205. shrdl $14,%r13d,%r13d
  2206. movl %r14d,%r8d
  2207. movl %ebx,%r12d
  2208. xorl %eax,%r13d
  2209. shrdl $9,%r14d,%r14d
  2210. xorl %ecx,%r12d
  2211. shrdl $5,%r13d,%r13d
  2212. xorl %r8d,%r14d
  2213. andl %eax,%r12d
  2214. vpand %xmm12,%xmm11,%xmm8
  2215. vaesenc %xmm10,%xmm9,%xmm9
  2216. vmovdqu 192-128(%rdi),%xmm10
  2217. xorl %eax,%r13d
  2218. addl 48(%rsp),%edx
  2219. movl %r8d,%r15d
  2220. shrdl $11,%r14d,%r14d
  2221. xorl %ecx,%r12d
  2222. xorl %r9d,%r15d
  2223. shrdl $6,%r13d,%r13d
  2224. addl %r12d,%edx
  2225. andl %r15d,%esi
  2226. xorl %r8d,%r14d
  2227. addl %r13d,%edx
  2228. xorl %r9d,%esi
  2229. addl %edx,%r11d
  2230. shrdl $2,%r14d,%r14d
  2231. addl %esi,%edx
  2232. movl %r11d,%r13d
  2233. addl %edx,%r14d
  2234. shrdl $14,%r13d,%r13d
  2235. movl %r14d,%edx
  2236. movl %eax,%r12d
  2237. xorl %r11d,%r13d
  2238. shrdl $9,%r14d,%r14d
  2239. xorl %ebx,%r12d
  2240. shrdl $5,%r13d,%r13d
  2241. xorl %edx,%r14d
  2242. andl %r11d,%r12d
  2243. vaesenclast %xmm10,%xmm9,%xmm11
  2244. vaesenc %xmm10,%xmm9,%xmm9
  2245. vmovdqu 208-128(%rdi),%xmm10
  2246. xorl %r11d,%r13d
  2247. addl 52(%rsp),%ecx
  2248. movl %edx,%esi
  2249. shrdl $11,%r14d,%r14d
  2250. xorl %ebx,%r12d
  2251. xorl %r8d,%esi
  2252. shrdl $6,%r13d,%r13d
  2253. addl %r12d,%ecx
  2254. andl %esi,%r15d
  2255. xorl %edx,%r14d
  2256. addl %r13d,%ecx
  2257. xorl %r8d,%r15d
  2258. addl %ecx,%r10d
  2259. shrdl $2,%r14d,%r14d
  2260. addl %r15d,%ecx
  2261. movl %r10d,%r13d
  2262. addl %ecx,%r14d
  2263. shrdl $14,%r13d,%r13d
  2264. movl %r14d,%ecx
  2265. movl %r11d,%r12d
  2266. xorl %r10d,%r13d
  2267. shrdl $9,%r14d,%r14d
  2268. xorl %eax,%r12d
  2269. shrdl $5,%r13d,%r13d
  2270. xorl %ecx,%r14d
  2271. andl %r10d,%r12d
  2272. vpand %xmm13,%xmm11,%xmm11
  2273. vaesenc %xmm10,%xmm9,%xmm9
  2274. vmovdqu 224-128(%rdi),%xmm10
  2275. xorl %r10d,%r13d
  2276. addl 56(%rsp),%ebx
  2277. movl %ecx,%r15d
  2278. shrdl $11,%r14d,%r14d
  2279. xorl %eax,%r12d
  2280. xorl %edx,%r15d
  2281. shrdl $6,%r13d,%r13d
  2282. addl %r12d,%ebx
  2283. andl %r15d,%esi
  2284. xorl %ecx,%r14d
  2285. addl %r13d,%ebx
  2286. xorl %edx,%esi
  2287. addl %ebx,%r9d
  2288. shrdl $2,%r14d,%r14d
  2289. addl %esi,%ebx
  2290. movl %r9d,%r13d
  2291. addl %ebx,%r14d
  2292. shrdl $14,%r13d,%r13d
  2293. movl %r14d,%ebx
  2294. movl %r10d,%r12d
  2295. xorl %r9d,%r13d
  2296. shrdl $9,%r14d,%r14d
  2297. xorl %r11d,%r12d
  2298. shrdl $5,%r13d,%r13d
  2299. xorl %ebx,%r14d
  2300. andl %r9d,%r12d
  2301. vpor %xmm11,%xmm8,%xmm8
  2302. vaesenclast %xmm10,%xmm9,%xmm11
  2303. vmovdqu 0-128(%rdi),%xmm10
  2304. xorl %r9d,%r13d
  2305. addl 60(%rsp),%eax
  2306. movl %ebx,%esi
  2307. shrdl $11,%r14d,%r14d
  2308. xorl %r11d,%r12d
  2309. xorl %ecx,%esi
  2310. shrdl $6,%r13d,%r13d
  2311. addl %r12d,%eax
  2312. andl %esi,%r15d
  2313. xorl %ebx,%r14d
  2314. addl %r13d,%eax
  2315. xorl %ecx,%r15d
  2316. addl %eax,%r8d
  2317. shrdl $2,%r14d,%r14d
  2318. addl %r15d,%eax
  2319. movl %r8d,%r13d
  2320. addl %eax,%r14d
  2321. movq 64+0(%rsp),%r12
  2322. movq 64+8(%rsp),%r13
  2323. movq 64+40(%rsp),%r15
  2324. movq 64+48(%rsp),%rsi
  2325. vpand %xmm14,%xmm11,%xmm11
  2326. movl %r14d,%eax
  2327. vpor %xmm11,%xmm8,%xmm8
  2328. vmovdqu %xmm8,(%r12,%r13,1)
  2329. leaq 16(%r12),%r12
  2330. addl 0(%r15),%eax
  2331. addl 4(%r15),%ebx
  2332. addl 8(%r15),%ecx
  2333. addl 12(%r15),%edx
  2334. addl 16(%r15),%r8d
  2335. addl 20(%r15),%r9d
  2336. addl 24(%r15),%r10d
  2337. addl 28(%r15),%r11d
  2338. cmpq 64+16(%rsp),%r12
  2339. movl %eax,0(%r15)
  2340. movl %ebx,4(%r15)
  2341. movl %ecx,8(%r15)
  2342. movl %edx,12(%r15)
  2343. movl %r8d,16(%r15)
  2344. movl %r9d,20(%r15)
  2345. movl %r10d,24(%r15)
  2346. movl %r11d,28(%r15)
  2347. jb L$loop_avx
  2348. movq 64+32(%rsp),%r8
  2349. movq 120(%rsp),%rsi
  2350. vmovdqu %xmm8,(%r8)
  2351. vzeroall
  2352. movq -48(%rsi),%r15
  2353. movq -40(%rsi),%r14
  2354. movq -32(%rsi),%r13
  2355. movq -24(%rsi),%r12
  2356. movq -16(%rsi),%rbp
  2357. movq -8(%rsi),%rbx
  2358. leaq (%rsi),%rsp
  2359. L$epilogue_avx:
  2360. .byte 0xf3,0xc3
  2361. .p2align 6
  2362. aesni_cbc_sha256_enc_avx2:
  2363. L$avx2_shortcut:
  2364. movq 8(%rsp),%r10
  2365. movq %rsp,%rax
  2366. pushq %rbx
  2367. pushq %rbp
  2368. pushq %r12
  2369. pushq %r13
  2370. pushq %r14
  2371. pushq %r15
  2372. subq $576,%rsp
  2373. andq $-1024,%rsp
  2374. addq $448,%rsp
  2375. shlq $6,%rdx
  2376. subq %rdi,%rsi
  2377. subq %rdi,%r10
  2378. addq %rdi,%rdx
  2379. movq %rdx,64+16(%rsp)
  2380. movq %r8,64+32(%rsp)
  2381. movq %r9,64+40(%rsp)
  2382. movq %r10,64+48(%rsp)
  2383. movq %rax,120(%rsp)
  2384. L$prologue_avx2:
  2385. vzeroall
  2386. movq %rdi,%r13
  2387. vpinsrq $1,%rsi,%xmm15,%xmm15
  2388. leaq 128(%rcx),%rdi
  2389. leaq K256+544(%rip),%r12
  2390. movl 240-128(%rdi),%r14d
  2391. movq %r9,%r15
  2392. movq %r10,%rsi
  2393. vmovdqu (%r8),%xmm8
  2394. leaq -9(%r14),%r14
  2395. vmovdqa 0(%r12,%r14,8),%xmm14
  2396. vmovdqa 16(%r12,%r14,8),%xmm13
  2397. vmovdqa 32(%r12,%r14,8),%xmm12
  2398. subq $-64,%r13
  2399. movl 0(%r15),%eax
  2400. leaq (%rsi,%r13,1),%r12
  2401. movl 4(%r15),%ebx
  2402. cmpq %rdx,%r13
  2403. movl 8(%r15),%ecx
  2404. cmoveq %rsp,%r12
  2405. movl 12(%r15),%edx
  2406. movl 16(%r15),%r8d
  2407. movl 20(%r15),%r9d
  2408. movl 24(%r15),%r10d
  2409. movl 28(%r15),%r11d
  2410. vmovdqu 0-128(%rdi),%xmm10
  2411. jmp L$oop_avx2
  2412. .p2align 4
  2413. L$oop_avx2:
  2414. vmovdqa K256+512(%rip),%ymm7
  2415. vmovdqu -64+0(%rsi,%r13,1),%xmm0
  2416. vmovdqu -64+16(%rsi,%r13,1),%xmm1
  2417. vmovdqu -64+32(%rsi,%r13,1),%xmm2
  2418. vmovdqu -64+48(%rsi,%r13,1),%xmm3
  2419. vinserti128 $1,(%r12),%ymm0,%ymm0
  2420. vinserti128 $1,16(%r12),%ymm1,%ymm1
  2421. vpshufb %ymm7,%ymm0,%ymm0
  2422. vinserti128 $1,32(%r12),%ymm2,%ymm2
  2423. vpshufb %ymm7,%ymm1,%ymm1
  2424. vinserti128 $1,48(%r12),%ymm3,%ymm3
  2425. leaq K256(%rip),%rbp
  2426. vpshufb %ymm7,%ymm2,%ymm2
  2427. leaq -64(%r13),%r13
  2428. vpaddd 0(%rbp),%ymm0,%ymm4
  2429. vpshufb %ymm7,%ymm3,%ymm3
  2430. vpaddd 32(%rbp),%ymm1,%ymm5
  2431. vpaddd 64(%rbp),%ymm2,%ymm6
  2432. vpaddd 96(%rbp),%ymm3,%ymm7
  2433. vmovdqa %ymm4,0(%rsp)
  2434. xorl %r14d,%r14d
  2435. vmovdqa %ymm5,32(%rsp)
  2436. movq 120(%rsp),%rsi
  2437. leaq -64(%rsp),%rsp
  2438. movq %rsi,-8(%rsp)
  2439. movl %ebx,%esi
  2440. vmovdqa %ymm6,0(%rsp)
  2441. xorl %ecx,%esi
  2442. vmovdqa %ymm7,32(%rsp)
  2443. movl %r9d,%r12d
  2444. subq $-32*4,%rbp
  2445. jmp L$avx2_00_47
  2446. .p2align 4
  2447. L$avx2_00_47:
  2448. vmovdqu (%r13),%xmm9
  2449. vpinsrq $0,%r13,%xmm15,%xmm15
  2450. leaq -64(%rsp),%rsp
  2451. pushq 64-8(%rsp)
  2452. leaq 8(%rsp),%rsp
  2453. vpalignr $4,%ymm0,%ymm1,%ymm4
  2454. addl 0+128(%rsp),%r11d
  2455. andl %r8d,%r12d
  2456. rorxl $25,%r8d,%r13d
  2457. vpalignr $4,%ymm2,%ymm3,%ymm7
  2458. rorxl $11,%r8d,%r15d
  2459. leal (%rax,%r14,1),%eax
  2460. leal (%r11,%r12,1),%r11d
  2461. vpsrld $7,%ymm4,%ymm6
  2462. andnl %r10d,%r8d,%r12d
  2463. xorl %r15d,%r13d
  2464. rorxl $6,%r8d,%r14d
  2465. vpaddd %ymm7,%ymm0,%ymm0
  2466. leal (%r11,%r12,1),%r11d
  2467. xorl %r14d,%r13d
  2468. movl %eax,%r15d
  2469. vpsrld $3,%ymm4,%ymm7
  2470. rorxl $22,%eax,%r12d
  2471. leal (%r11,%r13,1),%r11d
  2472. xorl %ebx,%r15d
  2473. vpslld $14,%ymm4,%ymm5
  2474. rorxl $13,%eax,%r14d
  2475. rorxl $2,%eax,%r13d
  2476. leal (%rdx,%r11,1),%edx
  2477. vpxor %ymm6,%ymm7,%ymm4
  2478. andl %r15d,%esi
  2479. vpxor %xmm10,%xmm9,%xmm9
  2480. vmovdqu 16-128(%rdi),%xmm10
  2481. xorl %r12d,%r14d
  2482. xorl %ebx,%esi
  2483. vpshufd $250,%ymm3,%ymm7
  2484. xorl %r13d,%r14d
  2485. leal (%r11,%rsi,1),%r11d
  2486. movl %r8d,%r12d
  2487. vpsrld $11,%ymm6,%ymm6
  2488. addl 4+128(%rsp),%r10d
  2489. andl %edx,%r12d
  2490. rorxl $25,%edx,%r13d
  2491. vpxor %ymm5,%ymm4,%ymm4
  2492. rorxl $11,%edx,%esi
  2493. leal (%r11,%r14,1),%r11d
  2494. leal (%r10,%r12,1),%r10d
  2495. vpslld $11,%ymm5,%ymm5
  2496. andnl %r9d,%edx,%r12d
  2497. xorl %esi,%r13d
  2498. rorxl $6,%edx,%r14d
  2499. vpxor %ymm6,%ymm4,%ymm4
  2500. leal (%r10,%r12,1),%r10d
  2501. xorl %r14d,%r13d
  2502. movl %r11d,%esi
  2503. vpsrld $10,%ymm7,%ymm6
  2504. rorxl $22,%r11d,%r12d
  2505. leal (%r10,%r13,1),%r10d
  2506. xorl %eax,%esi
  2507. vpxor %ymm5,%ymm4,%ymm4
  2508. rorxl $13,%r11d,%r14d
  2509. rorxl $2,%r11d,%r13d
  2510. leal (%rcx,%r10,1),%ecx
  2511. vpsrlq $17,%ymm7,%ymm7
  2512. andl %esi,%r15d
  2513. vpxor %xmm8,%xmm9,%xmm9
  2514. xorl %r12d,%r14d
  2515. xorl %eax,%r15d
  2516. vpaddd %ymm4,%ymm0,%ymm0
  2517. xorl %r13d,%r14d
  2518. leal (%r10,%r15,1),%r10d
  2519. movl %edx,%r12d
  2520. vpxor %ymm7,%ymm6,%ymm6
  2521. addl 8+128(%rsp),%r9d
  2522. andl %ecx,%r12d
  2523. rorxl $25,%ecx,%r13d
  2524. vpsrlq $2,%ymm7,%ymm7
  2525. rorxl $11,%ecx,%r15d
  2526. leal (%r10,%r14,1),%r10d
  2527. leal (%r9,%r12,1),%r9d
  2528. vpxor %ymm7,%ymm6,%ymm6
  2529. andnl %r8d,%ecx,%r12d
  2530. xorl %r15d,%r13d
  2531. rorxl $6,%ecx,%r14d
  2532. vpshufd $132,%ymm6,%ymm6
  2533. leal (%r9,%r12,1),%r9d
  2534. xorl %r14d,%r13d
  2535. movl %r10d,%r15d
  2536. vpsrldq $8,%ymm6,%ymm6
  2537. rorxl $22,%r10d,%r12d
  2538. leal (%r9,%r13,1),%r9d
  2539. xorl %r11d,%r15d
  2540. vpaddd %ymm6,%ymm0,%ymm0
  2541. rorxl $13,%r10d,%r14d
  2542. rorxl $2,%r10d,%r13d
  2543. leal (%rbx,%r9,1),%ebx
  2544. vpshufd $80,%ymm0,%ymm7
  2545. andl %r15d,%esi
  2546. vaesenc %xmm10,%xmm9,%xmm9
  2547. vmovdqu 32-128(%rdi),%xmm10
  2548. xorl %r12d,%r14d
  2549. xorl %r11d,%esi
  2550. vpsrld $10,%ymm7,%ymm6
  2551. xorl %r13d,%r14d
  2552. leal (%r9,%rsi,1),%r9d
  2553. movl %ecx,%r12d
  2554. vpsrlq $17,%ymm7,%ymm7
  2555. addl 12+128(%rsp),%r8d
  2556. andl %ebx,%r12d
  2557. rorxl $25,%ebx,%r13d
  2558. vpxor %ymm7,%ymm6,%ymm6
  2559. rorxl $11,%ebx,%esi
  2560. leal (%r9,%r14,1),%r9d
  2561. leal (%r8,%r12,1),%r8d
  2562. vpsrlq $2,%ymm7,%ymm7
  2563. andnl %edx,%ebx,%r12d
  2564. xorl %esi,%r13d
  2565. rorxl $6,%ebx,%r14d
  2566. vpxor %ymm7,%ymm6,%ymm6
  2567. leal (%r8,%r12,1),%r8d
  2568. xorl %r14d,%r13d
  2569. movl %r9d,%esi
  2570. vpshufd $232,%ymm6,%ymm6
  2571. rorxl $22,%r9d,%r12d
  2572. leal (%r8,%r13,1),%r8d
  2573. xorl %r10d,%esi
  2574. vpslldq $8,%ymm6,%ymm6
  2575. rorxl $13,%r9d,%r14d
  2576. rorxl $2,%r9d,%r13d
  2577. leal (%rax,%r8,1),%eax
  2578. vpaddd %ymm6,%ymm0,%ymm0
  2579. andl %esi,%r15d
  2580. vaesenc %xmm10,%xmm9,%xmm9
  2581. vmovdqu 48-128(%rdi),%xmm10
  2582. xorl %r12d,%r14d
  2583. xorl %r10d,%r15d
  2584. vpaddd 0(%rbp),%ymm0,%ymm6
  2585. xorl %r13d,%r14d
  2586. leal (%r8,%r15,1),%r8d
  2587. movl %ebx,%r12d
  2588. vmovdqa %ymm6,0(%rsp)
  2589. vpalignr $4,%ymm1,%ymm2,%ymm4
  2590. addl 32+128(%rsp),%edx
  2591. andl %eax,%r12d
  2592. rorxl $25,%eax,%r13d
  2593. vpalignr $4,%ymm3,%ymm0,%ymm7
  2594. rorxl $11,%eax,%r15d
  2595. leal (%r8,%r14,1),%r8d
  2596. leal (%rdx,%r12,1),%edx
  2597. vpsrld $7,%ymm4,%ymm6
  2598. andnl %ecx,%eax,%r12d
  2599. xorl %r15d,%r13d
  2600. rorxl $6,%eax,%r14d
  2601. vpaddd %ymm7,%ymm1,%ymm1
  2602. leal (%rdx,%r12,1),%edx
  2603. xorl %r14d,%r13d
  2604. movl %r8d,%r15d
  2605. vpsrld $3,%ymm4,%ymm7
  2606. rorxl $22,%r8d,%r12d
  2607. leal (%rdx,%r13,1),%edx
  2608. xorl %r9d,%r15d
  2609. vpslld $14,%ymm4,%ymm5
  2610. rorxl $13,%r8d,%r14d
  2611. rorxl $2,%r8d,%r13d
  2612. leal (%r11,%rdx,1),%r11d
  2613. vpxor %ymm6,%ymm7,%ymm4
  2614. andl %r15d,%esi
  2615. vaesenc %xmm10,%xmm9,%xmm9
  2616. vmovdqu 64-128(%rdi),%xmm10
  2617. xorl %r12d,%r14d
  2618. xorl %r9d,%esi
  2619. vpshufd $250,%ymm0,%ymm7
  2620. xorl %r13d,%r14d
  2621. leal (%rdx,%rsi,1),%edx
  2622. movl %eax,%r12d
  2623. vpsrld $11,%ymm6,%ymm6
  2624. addl 36+128(%rsp),%ecx
  2625. andl %r11d,%r12d
  2626. rorxl $25,%r11d,%r13d
  2627. vpxor %ymm5,%ymm4,%ymm4
  2628. rorxl $11,%r11d,%esi
  2629. leal (%rdx,%r14,1),%edx
  2630. leal (%rcx,%r12,1),%ecx
  2631. vpslld $11,%ymm5,%ymm5
  2632. andnl %ebx,%r11d,%r12d
  2633. xorl %esi,%r13d
  2634. rorxl $6,%r11d,%r14d
  2635. vpxor %ymm6,%ymm4,%ymm4
  2636. leal (%rcx,%r12,1),%ecx
  2637. xorl %r14d,%r13d
  2638. movl %edx,%esi
  2639. vpsrld $10,%ymm7,%ymm6
  2640. rorxl $22,%edx,%r12d
  2641. leal (%rcx,%r13,1),%ecx
  2642. xorl %r8d,%esi
  2643. vpxor %ymm5,%ymm4,%ymm4
  2644. rorxl $13,%edx,%r14d
  2645. rorxl $2,%edx,%r13d
  2646. leal (%r10,%rcx,1),%r10d
  2647. vpsrlq $17,%ymm7,%ymm7
  2648. andl %esi,%r15d
  2649. vaesenc %xmm10,%xmm9,%xmm9
  2650. vmovdqu 80-128(%rdi),%xmm10
  2651. xorl %r12d,%r14d
  2652. xorl %r8d,%r15d
  2653. vpaddd %ymm4,%ymm1,%ymm1
  2654. xorl %r13d,%r14d
  2655. leal (%rcx,%r15,1),%ecx
  2656. movl %r11d,%r12d
  2657. vpxor %ymm7,%ymm6,%ymm6
  2658. addl 40+128(%rsp),%ebx
  2659. andl %r10d,%r12d
  2660. rorxl $25,%r10d,%r13d
  2661. vpsrlq $2,%ymm7,%ymm7
  2662. rorxl $11,%r10d,%r15d
  2663. leal (%rcx,%r14,1),%ecx
  2664. leal (%rbx,%r12,1),%ebx
  2665. vpxor %ymm7,%ymm6,%ymm6
  2666. andnl %eax,%r10d,%r12d
  2667. xorl %r15d,%r13d
  2668. rorxl $6,%r10d,%r14d
  2669. vpshufd $132,%ymm6,%ymm6
  2670. leal (%rbx,%r12,1),%ebx
  2671. xorl %r14d,%r13d
  2672. movl %ecx,%r15d
  2673. vpsrldq $8,%ymm6,%ymm6
  2674. rorxl $22,%ecx,%r12d
  2675. leal (%rbx,%r13,1),%ebx
  2676. xorl %edx,%r15d
  2677. vpaddd %ymm6,%ymm1,%ymm1
  2678. rorxl $13,%ecx,%r14d
  2679. rorxl $2,%ecx,%r13d
  2680. leal (%r9,%rbx,1),%r9d
  2681. vpshufd $80,%ymm1,%ymm7
  2682. andl %r15d,%esi
  2683. vaesenc %xmm10,%xmm9,%xmm9
  2684. vmovdqu 96-128(%rdi),%xmm10
  2685. xorl %r12d,%r14d
  2686. xorl %edx,%esi
  2687. vpsrld $10,%ymm7,%ymm6
  2688. xorl %r13d,%r14d
  2689. leal (%rbx,%rsi,1),%ebx
  2690. movl %r10d,%r12d
  2691. vpsrlq $17,%ymm7,%ymm7
  2692. addl 44+128(%rsp),%eax
  2693. andl %r9d,%r12d
  2694. rorxl $25,%r9d,%r13d
  2695. vpxor %ymm7,%ymm6,%ymm6
  2696. rorxl $11,%r9d,%esi
  2697. leal (%rbx,%r14,1),%ebx
  2698. leal (%rax,%r12,1),%eax
  2699. vpsrlq $2,%ymm7,%ymm7
  2700. andnl %r11d,%r9d,%r12d
  2701. xorl %esi,%r13d
  2702. rorxl $6,%r9d,%r14d
  2703. vpxor %ymm7,%ymm6,%ymm6
  2704. leal (%rax,%r12,1),%eax
  2705. xorl %r14d,%r13d
  2706. movl %ebx,%esi
  2707. vpshufd $232,%ymm6,%ymm6
  2708. rorxl $22,%ebx,%r12d
  2709. leal (%rax,%r13,1),%eax
  2710. xorl %ecx,%esi
  2711. vpslldq $8,%ymm6,%ymm6
  2712. rorxl $13,%ebx,%r14d
  2713. rorxl $2,%ebx,%r13d
  2714. leal (%r8,%rax,1),%r8d
  2715. vpaddd %ymm6,%ymm1,%ymm1
  2716. andl %esi,%r15d
  2717. vaesenc %xmm10,%xmm9,%xmm9
  2718. vmovdqu 112-128(%rdi),%xmm10
  2719. xorl %r12d,%r14d
  2720. xorl %ecx,%r15d
  2721. vpaddd 32(%rbp),%ymm1,%ymm6
  2722. xorl %r13d,%r14d
  2723. leal (%rax,%r15,1),%eax
  2724. movl %r9d,%r12d
  2725. vmovdqa %ymm6,32(%rsp)
  2726. leaq -64(%rsp),%rsp
  2727. pushq 64-8(%rsp)
  2728. leaq 8(%rsp),%rsp
  2729. vpalignr $4,%ymm2,%ymm3,%ymm4
  2730. addl 0+128(%rsp),%r11d
  2731. andl %r8d,%r12d
  2732. rorxl $25,%r8d,%r13d
  2733. vpalignr $4,%ymm0,%ymm1,%ymm7
  2734. rorxl $11,%r8d,%r15d
  2735. leal (%rax,%r14,1),%eax
  2736. leal (%r11,%r12,1),%r11d
  2737. vpsrld $7,%ymm4,%ymm6
  2738. andnl %r10d,%r8d,%r12d
  2739. xorl %r15d,%r13d
  2740. rorxl $6,%r8d,%r14d
  2741. vpaddd %ymm7,%ymm2,%ymm2
  2742. leal (%r11,%r12,1),%r11d
  2743. xorl %r14d,%r13d
  2744. movl %eax,%r15d
  2745. vpsrld $3,%ymm4,%ymm7
  2746. rorxl $22,%eax,%r12d
  2747. leal (%r11,%r13,1),%r11d
  2748. xorl %ebx,%r15d
  2749. vpslld $14,%ymm4,%ymm5
  2750. rorxl $13,%eax,%r14d
  2751. rorxl $2,%eax,%r13d
  2752. leal (%rdx,%r11,1),%edx
  2753. vpxor %ymm6,%ymm7,%ymm4
  2754. andl %r15d,%esi
  2755. vaesenc %xmm10,%xmm9,%xmm9
  2756. vmovdqu 128-128(%rdi),%xmm10
  2757. xorl %r12d,%r14d
  2758. xorl %ebx,%esi
  2759. vpshufd $250,%ymm1,%ymm7
  2760. xorl %r13d,%r14d
  2761. leal (%r11,%rsi,1),%r11d
  2762. movl %r8d,%r12d
  2763. vpsrld $11,%ymm6,%ymm6
  2764. addl 4+128(%rsp),%r10d
  2765. andl %edx,%r12d
  2766. rorxl $25,%edx,%r13d
  2767. vpxor %ymm5,%ymm4,%ymm4
  2768. rorxl $11,%edx,%esi
  2769. leal (%r11,%r14,1),%r11d
  2770. leal (%r10,%r12,1),%r10d
  2771. vpslld $11,%ymm5,%ymm5
  2772. andnl %r9d,%edx,%r12d
  2773. xorl %esi,%r13d
  2774. rorxl $6,%edx,%r14d
  2775. vpxor %ymm6,%ymm4,%ymm4
  2776. leal (%r10,%r12,1),%r10d
  2777. xorl %r14d,%r13d
  2778. movl %r11d,%esi
  2779. vpsrld $10,%ymm7,%ymm6
  2780. rorxl $22,%r11d,%r12d
  2781. leal (%r10,%r13,1),%r10d
  2782. xorl %eax,%esi
  2783. vpxor %ymm5,%ymm4,%ymm4
  2784. rorxl $13,%r11d,%r14d
  2785. rorxl $2,%r11d,%r13d
  2786. leal (%rcx,%r10,1),%ecx
  2787. vpsrlq $17,%ymm7,%ymm7
  2788. andl %esi,%r15d
  2789. vaesenc %xmm10,%xmm9,%xmm9
  2790. vmovdqu 144-128(%rdi),%xmm10
  2791. xorl %r12d,%r14d
  2792. xorl %eax,%r15d
  2793. vpaddd %ymm4,%ymm2,%ymm2
  2794. xorl %r13d,%r14d
  2795. leal (%r10,%r15,1),%r10d
  2796. movl %edx,%r12d
  2797. vpxor %ymm7,%ymm6,%ymm6
  2798. addl 8+128(%rsp),%r9d
  2799. andl %ecx,%r12d
  2800. rorxl $25,%ecx,%r13d
  2801. vpsrlq $2,%ymm7,%ymm7
  2802. rorxl $11,%ecx,%r15d
  2803. leal (%r10,%r14,1),%r10d
  2804. leal (%r9,%r12,1),%r9d
  2805. vpxor %ymm7,%ymm6,%ymm6
  2806. andnl %r8d,%ecx,%r12d
  2807. xorl %r15d,%r13d
  2808. rorxl $6,%ecx,%r14d
  2809. vpshufd $132,%ymm6,%ymm6
  2810. leal (%r9,%r12,1),%r9d
  2811. xorl %r14d,%r13d
  2812. movl %r10d,%r15d
  2813. vpsrldq $8,%ymm6,%ymm6
  2814. rorxl $22,%r10d,%r12d
  2815. leal (%r9,%r13,1),%r9d
  2816. xorl %r11d,%r15d
  2817. vpaddd %ymm6,%ymm2,%ymm2
  2818. rorxl $13,%r10d,%r14d
  2819. rorxl $2,%r10d,%r13d
  2820. leal (%rbx,%r9,1),%ebx
  2821. vpshufd $80,%ymm2,%ymm7
  2822. andl %r15d,%esi
  2823. vaesenc %xmm10,%xmm9,%xmm9
  2824. vmovdqu 160-128(%rdi),%xmm10
  2825. xorl %r12d,%r14d
  2826. xorl %r11d,%esi
  2827. vpsrld $10,%ymm7,%ymm6
  2828. xorl %r13d,%r14d
  2829. leal (%r9,%rsi,1),%r9d
  2830. movl %ecx,%r12d
  2831. vpsrlq $17,%ymm7,%ymm7
  2832. addl 12+128(%rsp),%r8d
  2833. andl %ebx,%r12d
  2834. rorxl $25,%ebx,%r13d
  2835. vpxor %ymm7,%ymm6,%ymm6
  2836. rorxl $11,%ebx,%esi
  2837. leal (%r9,%r14,1),%r9d
  2838. leal (%r8,%r12,1),%r8d
  2839. vpsrlq $2,%ymm7,%ymm7
  2840. andnl %edx,%ebx,%r12d
  2841. xorl %esi,%r13d
  2842. rorxl $6,%ebx,%r14d
  2843. vpxor %ymm7,%ymm6,%ymm6
  2844. leal (%r8,%r12,1),%r8d
  2845. xorl %r14d,%r13d
  2846. movl %r9d,%esi
  2847. vpshufd $232,%ymm6,%ymm6
  2848. rorxl $22,%r9d,%r12d
  2849. leal (%r8,%r13,1),%r8d
  2850. xorl %r10d,%esi
  2851. vpslldq $8,%ymm6,%ymm6
  2852. rorxl $13,%r9d,%r14d
  2853. rorxl $2,%r9d,%r13d
  2854. leal (%rax,%r8,1),%eax
  2855. vpaddd %ymm6,%ymm2,%ymm2
  2856. andl %esi,%r15d
  2857. vaesenclast %xmm10,%xmm9,%xmm11
  2858. vaesenc %xmm10,%xmm9,%xmm9
  2859. vmovdqu 176-128(%rdi),%xmm10
  2860. xorl %r12d,%r14d
  2861. xorl %r10d,%r15d
  2862. vpaddd 64(%rbp),%ymm2,%ymm6
  2863. xorl %r13d,%r14d
  2864. leal (%r8,%r15,1),%r8d
  2865. movl %ebx,%r12d
  2866. vmovdqa %ymm6,0(%rsp)
  2867. vpalignr $4,%ymm3,%ymm0,%ymm4
  2868. addl 32+128(%rsp),%edx
  2869. andl %eax,%r12d
  2870. rorxl $25,%eax,%r13d
  2871. vpalignr $4,%ymm1,%ymm2,%ymm7
  2872. rorxl $11,%eax,%r15d
  2873. leal (%r8,%r14,1),%r8d
  2874. leal (%rdx,%r12,1),%edx
  2875. vpsrld $7,%ymm4,%ymm6
  2876. andnl %ecx,%eax,%r12d
  2877. xorl %r15d,%r13d
  2878. rorxl $6,%eax,%r14d
  2879. vpaddd %ymm7,%ymm3,%ymm3
  2880. leal (%rdx,%r12,1),%edx
  2881. xorl %r14d,%r13d
  2882. movl %r8d,%r15d
  2883. vpsrld $3,%ymm4,%ymm7
  2884. rorxl $22,%r8d,%r12d
  2885. leal (%rdx,%r13,1),%edx
  2886. xorl %r9d,%r15d
  2887. vpslld $14,%ymm4,%ymm5
  2888. rorxl $13,%r8d,%r14d
  2889. rorxl $2,%r8d,%r13d
  2890. leal (%r11,%rdx,1),%r11d
  2891. vpxor %ymm6,%ymm7,%ymm4
  2892. andl %r15d,%esi
  2893. vpand %xmm12,%xmm11,%xmm8
  2894. vaesenc %xmm10,%xmm9,%xmm9
  2895. vmovdqu 192-128(%rdi),%xmm10
  2896. xorl %r12d,%r14d
  2897. xorl %r9d,%esi
  2898. vpshufd $250,%ymm2,%ymm7
  2899. xorl %r13d,%r14d
  2900. leal (%rdx,%rsi,1),%edx
  2901. movl %eax,%r12d
  2902. vpsrld $11,%ymm6,%ymm6
  2903. addl 36+128(%rsp),%ecx
  2904. andl %r11d,%r12d
  2905. rorxl $25,%r11d,%r13d
  2906. vpxor %ymm5,%ymm4,%ymm4
  2907. rorxl $11,%r11d,%esi
  2908. leal (%rdx,%r14,1),%edx
  2909. leal (%rcx,%r12,1),%ecx
  2910. vpslld $11,%ymm5,%ymm5
  2911. andnl %ebx,%r11d,%r12d
  2912. xorl %esi,%r13d
  2913. rorxl $6,%r11d,%r14d
  2914. vpxor %ymm6,%ymm4,%ymm4
  2915. leal (%rcx,%r12,1),%ecx
  2916. xorl %r14d,%r13d
  2917. movl %edx,%esi
  2918. vpsrld $10,%ymm7,%ymm6
  2919. rorxl $22,%edx,%r12d
  2920. leal (%rcx,%r13,1),%ecx
  2921. xorl %r8d,%esi
  2922. vpxor %ymm5,%ymm4,%ymm4
  2923. rorxl $13,%edx,%r14d
  2924. rorxl $2,%edx,%r13d
  2925. leal (%r10,%rcx,1),%r10d
  2926. vpsrlq $17,%ymm7,%ymm7
  2927. andl %esi,%r15d
  2928. vaesenclast %xmm10,%xmm9,%xmm11
  2929. vaesenc %xmm10,%xmm9,%xmm9
  2930. vmovdqu 208-128(%rdi),%xmm10
  2931. xorl %r12d,%r14d
  2932. xorl %r8d,%r15d
  2933. vpaddd %ymm4,%ymm3,%ymm3
  2934. xorl %r13d,%r14d
  2935. leal (%rcx,%r15,1),%ecx
  2936. movl %r11d,%r12d
  2937. vpxor %ymm7,%ymm6,%ymm6
  2938. addl 40+128(%rsp),%ebx
  2939. andl %r10d,%r12d
  2940. rorxl $25,%r10d,%r13d
  2941. vpsrlq $2,%ymm7,%ymm7
  2942. rorxl $11,%r10d,%r15d
  2943. leal (%rcx,%r14,1),%ecx
  2944. leal (%rbx,%r12,1),%ebx
  2945. vpxor %ymm7,%ymm6,%ymm6
  2946. andnl %eax,%r10d,%r12d
  2947. xorl %r15d,%r13d
  2948. rorxl $6,%r10d,%r14d
  2949. vpshufd $132,%ymm6,%ymm6
  2950. leal (%rbx,%r12,1),%ebx
  2951. xorl %r14d,%r13d
  2952. movl %ecx,%r15d
  2953. vpsrldq $8,%ymm6,%ymm6
  2954. rorxl $22,%ecx,%r12d
  2955. leal (%rbx,%r13,1),%ebx
  2956. xorl %edx,%r15d
  2957. vpaddd %ymm6,%ymm3,%ymm3
  2958. rorxl $13,%ecx,%r14d
  2959. rorxl $2,%ecx,%r13d
  2960. leal (%r9,%rbx,1),%r9d
  2961. vpshufd $80,%ymm3,%ymm7
  2962. andl %r15d,%esi
  2963. vpand %xmm13,%xmm11,%xmm11
  2964. vaesenc %xmm10,%xmm9,%xmm9
  2965. vmovdqu 224-128(%rdi),%xmm10
  2966. xorl %r12d,%r14d
  2967. xorl %edx,%esi
  2968. vpsrld $10,%ymm7,%ymm6
  2969. xorl %r13d,%r14d
  2970. leal (%rbx,%rsi,1),%ebx
  2971. movl %r10d,%r12d
  2972. vpsrlq $17,%ymm7,%ymm7
  2973. addl 44+128(%rsp),%eax
  2974. andl %r9d,%r12d
  2975. rorxl $25,%r9d,%r13d
  2976. vpxor %ymm7,%ymm6,%ymm6
  2977. rorxl $11,%r9d,%esi
  2978. leal (%rbx,%r14,1),%ebx
  2979. leal (%rax,%r12,1),%eax
  2980. vpsrlq $2,%ymm7,%ymm7
  2981. andnl %r11d,%r9d,%r12d
  2982. xorl %esi,%r13d
  2983. rorxl $6,%r9d,%r14d
  2984. vpxor %ymm7,%ymm6,%ymm6
  2985. leal (%rax,%r12,1),%eax
  2986. xorl %r14d,%r13d
  2987. movl %ebx,%esi
  2988. vpshufd $232,%ymm6,%ymm6
  2989. rorxl $22,%ebx,%r12d
  2990. leal (%rax,%r13,1),%eax
  2991. xorl %ecx,%esi
  2992. vpslldq $8,%ymm6,%ymm6
  2993. rorxl $13,%ebx,%r14d
  2994. rorxl $2,%ebx,%r13d
  2995. leal (%r8,%rax,1),%r8d
  2996. vpaddd %ymm6,%ymm3,%ymm3
  2997. andl %esi,%r15d
  2998. vpor %xmm11,%xmm8,%xmm8
  2999. vaesenclast %xmm10,%xmm9,%xmm11
  3000. vmovdqu 0-128(%rdi),%xmm10
  3001. xorl %r12d,%r14d
  3002. xorl %ecx,%r15d
  3003. vpaddd 96(%rbp),%ymm3,%ymm6
  3004. xorl %r13d,%r14d
  3005. leal (%rax,%r15,1),%eax
  3006. movl %r9d,%r12d
  3007. vmovdqa %ymm6,32(%rsp)
  3008. vmovq %xmm15,%r13
  3009. vpextrq $1,%xmm15,%r15
  3010. vpand %xmm14,%xmm11,%xmm11
  3011. vpor %xmm11,%xmm8,%xmm8
  3012. vmovdqu %xmm8,(%r15,%r13,1)
  3013. leaq 16(%r13),%r13
  3014. leaq 128(%rbp),%rbp
  3015. cmpb $0,3(%rbp)
  3016. jne L$avx2_00_47
  3017. vmovdqu (%r13),%xmm9
  3018. vpinsrq $0,%r13,%xmm15,%xmm15
  3019. addl 0+64(%rsp),%r11d
  3020. andl %r8d,%r12d
  3021. rorxl $25,%r8d,%r13d
  3022. rorxl $11,%r8d,%r15d
  3023. leal (%rax,%r14,1),%eax
  3024. leal (%r11,%r12,1),%r11d
  3025. andnl %r10d,%r8d,%r12d
  3026. xorl %r15d,%r13d
  3027. rorxl $6,%r8d,%r14d
  3028. leal (%r11,%r12,1),%r11d
  3029. xorl %r14d,%r13d
  3030. movl %eax,%r15d
  3031. rorxl $22,%eax,%r12d
  3032. leal (%r11,%r13,1),%r11d
  3033. xorl %ebx,%r15d
  3034. rorxl $13,%eax,%r14d
  3035. rorxl $2,%eax,%r13d
  3036. leal (%rdx,%r11,1),%edx
  3037. andl %r15d,%esi
  3038. vpxor %xmm10,%xmm9,%xmm9
  3039. vmovdqu 16-128(%rdi),%xmm10
  3040. xorl %r12d,%r14d
  3041. xorl %ebx,%esi
  3042. xorl %r13d,%r14d
  3043. leal (%r11,%rsi,1),%r11d
  3044. movl %r8d,%r12d
  3045. addl 4+64(%rsp),%r10d
  3046. andl %edx,%r12d
  3047. rorxl $25,%edx,%r13d
  3048. rorxl $11,%edx,%esi
  3049. leal (%r11,%r14,1),%r11d
  3050. leal (%r10,%r12,1),%r10d
  3051. andnl %r9d,%edx,%r12d
  3052. xorl %esi,%r13d
  3053. rorxl $6,%edx,%r14d
  3054. leal (%r10,%r12,1),%r10d
  3055. xorl %r14d,%r13d
  3056. movl %r11d,%esi
  3057. rorxl $22,%r11d,%r12d
  3058. leal (%r10,%r13,1),%r10d
  3059. xorl %eax,%esi
  3060. rorxl $13,%r11d,%r14d
  3061. rorxl $2,%r11d,%r13d
  3062. leal (%rcx,%r10,1),%ecx
  3063. andl %esi,%r15d
  3064. vpxor %xmm8,%xmm9,%xmm9
  3065. xorl %r12d,%r14d
  3066. xorl %eax,%r15d
  3067. xorl %r13d,%r14d
  3068. leal (%r10,%r15,1),%r10d
  3069. movl %edx,%r12d
  3070. addl 8+64(%rsp),%r9d
  3071. andl %ecx,%r12d
  3072. rorxl $25,%ecx,%r13d
  3073. rorxl $11,%ecx,%r15d
  3074. leal (%r10,%r14,1),%r10d
  3075. leal (%r9,%r12,1),%r9d
  3076. andnl %r8d,%ecx,%r12d
  3077. xorl %r15d,%r13d
  3078. rorxl $6,%ecx,%r14d
  3079. leal (%r9,%r12,1),%r9d
  3080. xorl %r14d,%r13d
  3081. movl %r10d,%r15d
  3082. rorxl $22,%r10d,%r12d
  3083. leal (%r9,%r13,1),%r9d
  3084. xorl %r11d,%r15d
  3085. rorxl $13,%r10d,%r14d
  3086. rorxl $2,%r10d,%r13d
  3087. leal (%rbx,%r9,1),%ebx
  3088. andl %r15d,%esi
  3089. vaesenc %xmm10,%xmm9,%xmm9
  3090. vmovdqu 32-128(%rdi),%xmm10
  3091. xorl %r12d,%r14d
  3092. xorl %r11d,%esi
  3093. xorl %r13d,%r14d
  3094. leal (%r9,%rsi,1),%r9d
  3095. movl %ecx,%r12d
  3096. addl 12+64(%rsp),%r8d
  3097. andl %ebx,%r12d
  3098. rorxl $25,%ebx,%r13d
  3099. rorxl $11,%ebx,%esi
  3100. leal (%r9,%r14,1),%r9d
  3101. leal (%r8,%r12,1),%r8d
  3102. andnl %edx,%ebx,%r12d
  3103. xorl %esi,%r13d
  3104. rorxl $6,%ebx,%r14d
  3105. leal (%r8,%r12,1),%r8d
  3106. xorl %r14d,%r13d
  3107. movl %r9d,%esi
  3108. rorxl $22,%r9d,%r12d
  3109. leal (%r8,%r13,1),%r8d
  3110. xorl %r10d,%esi
  3111. rorxl $13,%r9d,%r14d
  3112. rorxl $2,%r9d,%r13d
  3113. leal (%rax,%r8,1),%eax
  3114. andl %esi,%r15d
  3115. vaesenc %xmm10,%xmm9,%xmm9
  3116. vmovdqu 48-128(%rdi),%xmm10
  3117. xorl %r12d,%r14d
  3118. xorl %r10d,%r15d
  3119. xorl %r13d,%r14d
  3120. leal (%r8,%r15,1),%r8d
  3121. movl %ebx,%r12d
  3122. addl 32+64(%rsp),%edx
  3123. andl %eax,%r12d
  3124. rorxl $25,%eax,%r13d
  3125. rorxl $11,%eax,%r15d
  3126. leal (%r8,%r14,1),%r8d
  3127. leal (%rdx,%r12,1),%edx
  3128. andnl %ecx,%eax,%r12d
  3129. xorl %r15d,%r13d
  3130. rorxl $6,%eax,%r14d
  3131. leal (%rdx,%r12,1),%edx
  3132. xorl %r14d,%r13d
  3133. movl %r8d,%r15d
  3134. rorxl $22,%r8d,%r12d
  3135. leal (%rdx,%r13,1),%edx
  3136. xorl %r9d,%r15d
  3137. rorxl $13,%r8d,%r14d
  3138. rorxl $2,%r8d,%r13d
  3139. leal (%r11,%rdx,1),%r11d
  3140. andl %r15d,%esi
  3141. vaesenc %xmm10,%xmm9,%xmm9
  3142. vmovdqu 64-128(%rdi),%xmm10
  3143. xorl %r12d,%r14d
  3144. xorl %r9d,%esi
  3145. xorl %r13d,%r14d
  3146. leal (%rdx,%rsi,1),%edx
  3147. movl %eax,%r12d
  3148. addl 36+64(%rsp),%ecx
  3149. andl %r11d,%r12d
  3150. rorxl $25,%r11d,%r13d
  3151. rorxl $11,%r11d,%esi
  3152. leal (%rdx,%r14,1),%edx
  3153. leal (%rcx,%r12,1),%ecx
  3154. andnl %ebx,%r11d,%r12d
  3155. xorl %esi,%r13d
  3156. rorxl $6,%r11d,%r14d
  3157. leal (%rcx,%r12,1),%ecx
  3158. xorl %r14d,%r13d
  3159. movl %edx,%esi
  3160. rorxl $22,%edx,%r12d
  3161. leal (%rcx,%r13,1),%ecx
  3162. xorl %r8d,%esi
  3163. rorxl $13,%edx,%r14d
  3164. rorxl $2,%edx,%r13d
  3165. leal (%r10,%rcx,1),%r10d
  3166. andl %esi,%r15d
  3167. vaesenc %xmm10,%xmm9,%xmm9
  3168. vmovdqu 80-128(%rdi),%xmm10
  3169. xorl %r12d,%r14d
  3170. xorl %r8d,%r15d
  3171. xorl %r13d,%r14d
  3172. leal (%rcx,%r15,1),%ecx
  3173. movl %r11d,%r12d
  3174. addl 40+64(%rsp),%ebx
  3175. andl %r10d,%r12d
  3176. rorxl $25,%r10d,%r13d
  3177. rorxl $11,%r10d,%r15d
  3178. leal (%rcx,%r14,1),%ecx
  3179. leal (%rbx,%r12,1),%ebx
  3180. andnl %eax,%r10d,%r12d
  3181. xorl %r15d,%r13d
  3182. rorxl $6,%r10d,%r14d
  3183. leal (%rbx,%r12,1),%ebx
  3184. xorl %r14d,%r13d
  3185. movl %ecx,%r15d
  3186. rorxl $22,%ecx,%r12d
  3187. leal (%rbx,%r13,1),%ebx
  3188. xorl %edx,%r15d
  3189. rorxl $13,%ecx,%r14d
  3190. rorxl $2,%ecx,%r13d
  3191. leal (%r9,%rbx,1),%r9d
  3192. andl %r15d,%esi
  3193. vaesenc %xmm10,%xmm9,%xmm9
  3194. vmovdqu 96-128(%rdi),%xmm10
  3195. xorl %r12d,%r14d
  3196. xorl %edx,%esi
  3197. xorl %r13d,%r14d
  3198. leal (%rbx,%rsi,1),%ebx
  3199. movl %r10d,%r12d
  3200. addl 44+64(%rsp),%eax
  3201. andl %r9d,%r12d
  3202. rorxl $25,%r9d,%r13d
  3203. rorxl $11,%r9d,%esi
  3204. leal (%rbx,%r14,1),%ebx
  3205. leal (%rax,%r12,1),%eax
  3206. andnl %r11d,%r9d,%r12d
  3207. xorl %esi,%r13d
  3208. rorxl $6,%r9d,%r14d
  3209. leal (%rax,%r12,1),%eax
  3210. xorl %r14d,%r13d
  3211. movl %ebx,%esi
  3212. rorxl $22,%ebx,%r12d
  3213. leal (%rax,%r13,1),%eax
  3214. xorl %ecx,%esi
  3215. rorxl $13,%ebx,%r14d
  3216. rorxl $2,%ebx,%r13d
  3217. leal (%r8,%rax,1),%r8d
  3218. andl %esi,%r15d
  3219. vaesenc %xmm10,%xmm9,%xmm9
  3220. vmovdqu 112-128(%rdi),%xmm10
  3221. xorl %r12d,%r14d
  3222. xorl %ecx,%r15d
  3223. xorl %r13d,%r14d
  3224. leal (%rax,%r15,1),%eax
  3225. movl %r9d,%r12d
  3226. addl 0(%rsp),%r11d
  3227. andl %r8d,%r12d
  3228. rorxl $25,%r8d,%r13d
  3229. rorxl $11,%r8d,%r15d
  3230. leal (%rax,%r14,1),%eax
  3231. leal (%r11,%r12,1),%r11d
  3232. andnl %r10d,%r8d,%r12d
  3233. xorl %r15d,%r13d
  3234. rorxl $6,%r8d,%r14d
  3235. leal (%r11,%r12,1),%r11d
  3236. xorl %r14d,%r13d
  3237. movl %eax,%r15d
  3238. rorxl $22,%eax,%r12d
  3239. leal (%r11,%r13,1),%r11d
  3240. xorl %ebx,%r15d
  3241. rorxl $13,%eax,%r14d
  3242. rorxl $2,%eax,%r13d
  3243. leal (%rdx,%r11,1),%edx
  3244. andl %r15d,%esi
  3245. vaesenc %xmm10,%xmm9,%xmm9
  3246. vmovdqu 128-128(%rdi),%xmm10
  3247. xorl %r12d,%r14d
  3248. xorl %ebx,%esi
  3249. xorl %r13d,%r14d
  3250. leal (%r11,%rsi,1),%r11d
  3251. movl %r8d,%r12d
  3252. addl 4(%rsp),%r10d
  3253. andl %edx,%r12d
  3254. rorxl $25,%edx,%r13d
  3255. rorxl $11,%edx,%esi
  3256. leal (%r11,%r14,1),%r11d
  3257. leal (%r10,%r12,1),%r10d
  3258. andnl %r9d,%edx,%r12d
  3259. xorl %esi,%r13d
  3260. rorxl $6,%edx,%r14d
  3261. leal (%r10,%r12,1),%r10d
  3262. xorl %r14d,%r13d
  3263. movl %r11d,%esi
  3264. rorxl $22,%r11d,%r12d
  3265. leal (%r10,%r13,1),%r10d
  3266. xorl %eax,%esi
  3267. rorxl $13,%r11d,%r14d
  3268. rorxl $2,%r11d,%r13d
  3269. leal (%rcx,%r10,1),%ecx
  3270. andl %esi,%r15d
  3271. vaesenc %xmm10,%xmm9,%xmm9
  3272. vmovdqu 144-128(%rdi),%xmm10
  3273. xorl %r12d,%r14d
  3274. xorl %eax,%r15d
  3275. xorl %r13d,%r14d
  3276. leal (%r10,%r15,1),%r10d
  3277. movl %edx,%r12d
  3278. addl 8(%rsp),%r9d
  3279. andl %ecx,%r12d
  3280. rorxl $25,%ecx,%r13d
  3281. rorxl $11,%ecx,%r15d
  3282. leal (%r10,%r14,1),%r10d
  3283. leal (%r9,%r12,1),%r9d
  3284. andnl %r8d,%ecx,%r12d
  3285. xorl %r15d,%r13d
  3286. rorxl $6,%ecx,%r14d
  3287. leal (%r9,%r12,1),%r9d
  3288. xorl %r14d,%r13d
  3289. movl %r10d,%r15d
  3290. rorxl $22,%r10d,%r12d
  3291. leal (%r9,%r13,1),%r9d
  3292. xorl %r11d,%r15d
  3293. rorxl $13,%r10d,%r14d
  3294. rorxl $2,%r10d,%r13d
  3295. leal (%rbx,%r9,1),%ebx
  3296. andl %r15d,%esi
  3297. vaesenc %xmm10,%xmm9,%xmm9
  3298. vmovdqu 160-128(%rdi),%xmm10
  3299. xorl %r12d,%r14d
  3300. xorl %r11d,%esi
  3301. xorl %r13d,%r14d
  3302. leal (%r9,%rsi,1),%r9d
  3303. movl %ecx,%r12d
  3304. addl 12(%rsp),%r8d
  3305. andl %ebx,%r12d
  3306. rorxl $25,%ebx,%r13d
  3307. rorxl $11,%ebx,%esi
  3308. leal (%r9,%r14,1),%r9d
  3309. leal (%r8,%r12,1),%r8d
  3310. andnl %edx,%ebx,%r12d
  3311. xorl %esi,%r13d
  3312. rorxl $6,%ebx,%r14d
  3313. leal (%r8,%r12,1),%r8d
  3314. xorl %r14d,%r13d
  3315. movl %r9d,%esi
  3316. rorxl $22,%r9d,%r12d
  3317. leal (%r8,%r13,1),%r8d
  3318. xorl %r10d,%esi
  3319. rorxl $13,%r9d,%r14d
  3320. rorxl $2,%r9d,%r13d
  3321. leal (%rax,%r8,1),%eax
  3322. andl %esi,%r15d
  3323. vaesenclast %xmm10,%xmm9,%xmm11
  3324. vaesenc %xmm10,%xmm9,%xmm9
  3325. vmovdqu 176-128(%rdi),%xmm10
  3326. xorl %r12d,%r14d
  3327. xorl %r10d,%r15d
  3328. xorl %r13d,%r14d
  3329. leal (%r8,%r15,1),%r8d
  3330. movl %ebx,%r12d
  3331. addl 32(%rsp),%edx
  3332. andl %eax,%r12d
  3333. rorxl $25,%eax,%r13d
  3334. rorxl $11,%eax,%r15d
  3335. leal (%r8,%r14,1),%r8d
  3336. leal (%rdx,%r12,1),%edx
  3337. andnl %ecx,%eax,%r12d
  3338. xorl %r15d,%r13d
  3339. rorxl $6,%eax,%r14d
  3340. leal (%rdx,%r12,1),%edx
  3341. xorl %r14d,%r13d
  3342. movl %r8d,%r15d
  3343. rorxl $22,%r8d,%r12d
  3344. leal (%rdx,%r13,1),%edx
  3345. xorl %r9d,%r15d
  3346. rorxl $13,%r8d,%r14d
  3347. rorxl $2,%r8d,%r13d
  3348. leal (%r11,%rdx,1),%r11d
  3349. andl %r15d,%esi
  3350. vpand %xmm12,%xmm11,%xmm8
  3351. vaesenc %xmm10,%xmm9,%xmm9
  3352. vmovdqu 192-128(%rdi),%xmm10
  3353. xorl %r12d,%r14d
  3354. xorl %r9d,%esi
  3355. xorl %r13d,%r14d
  3356. leal (%rdx,%rsi,1),%edx
  3357. movl %eax,%r12d
  3358. addl 36(%rsp),%ecx
  3359. andl %r11d,%r12d
  3360. rorxl $25,%r11d,%r13d
  3361. rorxl $11,%r11d,%esi
  3362. leal (%rdx,%r14,1),%edx
  3363. leal (%rcx,%r12,1),%ecx
  3364. andnl %ebx,%r11d,%r12d
  3365. xorl %esi,%r13d
  3366. rorxl $6,%r11d,%r14d
  3367. leal (%rcx,%r12,1),%ecx
  3368. xorl %r14d,%r13d
  3369. movl %edx,%esi
  3370. rorxl $22,%edx,%r12d
  3371. leal (%rcx,%r13,1),%ecx
  3372. xorl %r8d,%esi
  3373. rorxl $13,%edx,%r14d
  3374. rorxl $2,%edx,%r13d
  3375. leal (%r10,%rcx,1),%r10d
  3376. andl %esi,%r15d
  3377. vaesenclast %xmm10,%xmm9,%xmm11
  3378. vaesenc %xmm10,%xmm9,%xmm9
  3379. vmovdqu 208-128(%rdi),%xmm10
  3380. xorl %r12d,%r14d
  3381. xorl %r8d,%r15d
  3382. xorl %r13d,%r14d
  3383. leal (%rcx,%r15,1),%ecx
  3384. movl %r11d,%r12d
  3385. addl 40(%rsp),%ebx
  3386. andl %r10d,%r12d
  3387. rorxl $25,%r10d,%r13d
  3388. rorxl $11,%r10d,%r15d
  3389. leal (%rcx,%r14,1),%ecx
  3390. leal (%rbx,%r12,1),%ebx
  3391. andnl %eax,%r10d,%r12d
  3392. xorl %r15d,%r13d
  3393. rorxl $6,%r10d,%r14d
  3394. leal (%rbx,%r12,1),%ebx
  3395. xorl %r14d,%r13d
  3396. movl %ecx,%r15d
  3397. rorxl $22,%ecx,%r12d
  3398. leal (%rbx,%r13,1),%ebx
  3399. xorl %edx,%r15d
  3400. rorxl $13,%ecx,%r14d
  3401. rorxl $2,%ecx,%r13d
  3402. leal (%r9,%rbx,1),%r9d
  3403. andl %r15d,%esi
  3404. vpand %xmm13,%xmm11,%xmm11
  3405. vaesenc %xmm10,%xmm9,%xmm9
  3406. vmovdqu 224-128(%rdi),%xmm10
  3407. xorl %r12d,%r14d
  3408. xorl %edx,%esi
  3409. xorl %r13d,%r14d
  3410. leal (%rbx,%rsi,1),%ebx
  3411. movl %r10d,%r12d
  3412. addl 44(%rsp),%eax
  3413. andl %r9d,%r12d
  3414. rorxl $25,%r9d,%r13d
  3415. rorxl $11,%r9d,%esi
  3416. leal (%rbx,%r14,1),%ebx
  3417. leal (%rax,%r12,1),%eax
  3418. andnl %r11d,%r9d,%r12d
  3419. xorl %esi,%r13d
  3420. rorxl $6,%r9d,%r14d
  3421. leal (%rax,%r12,1),%eax
  3422. xorl %r14d,%r13d
  3423. movl %ebx,%esi
  3424. rorxl $22,%ebx,%r12d
  3425. leal (%rax,%r13,1),%eax
  3426. xorl %ecx,%esi
  3427. rorxl $13,%ebx,%r14d
  3428. rorxl $2,%ebx,%r13d
  3429. leal (%r8,%rax,1),%r8d
  3430. andl %esi,%r15d
  3431. vpor %xmm11,%xmm8,%xmm8
  3432. vaesenclast %xmm10,%xmm9,%xmm11
  3433. vmovdqu 0-128(%rdi),%xmm10
  3434. xorl %r12d,%r14d
  3435. xorl %ecx,%r15d
  3436. xorl %r13d,%r14d
  3437. leal (%rax,%r15,1),%eax
  3438. movl %r9d,%r12d
  3439. vpextrq $1,%xmm15,%r12
  3440. vmovq %xmm15,%r13
  3441. movq 552(%rsp),%r15
  3442. addl %r14d,%eax
  3443. leaq 448(%rsp),%rbp
  3444. vpand %xmm14,%xmm11,%xmm11
  3445. vpor %xmm11,%xmm8,%xmm8
  3446. vmovdqu %xmm8,(%r12,%r13,1)
  3447. leaq 16(%r13),%r13
  3448. addl 0(%r15),%eax
  3449. addl 4(%r15),%ebx
  3450. addl 8(%r15),%ecx
  3451. addl 12(%r15),%edx
  3452. addl 16(%r15),%r8d
  3453. addl 20(%r15),%r9d
  3454. addl 24(%r15),%r10d
  3455. addl 28(%r15),%r11d
  3456. movl %eax,0(%r15)
  3457. movl %ebx,4(%r15)
  3458. movl %ecx,8(%r15)
  3459. movl %edx,12(%r15)
  3460. movl %r8d,16(%r15)
  3461. movl %r9d,20(%r15)
  3462. movl %r10d,24(%r15)
  3463. movl %r11d,28(%r15)
  3464. cmpq 80(%rbp),%r13
  3465. je L$done_avx2
  3466. xorl %r14d,%r14d
  3467. movl %ebx,%esi
  3468. movl %r9d,%r12d
  3469. xorl %ecx,%esi
  3470. jmp L$ower_avx2
  3471. .p2align 4
  3472. L$ower_avx2:
  3473. vmovdqu (%r13),%xmm9
  3474. vpinsrq $0,%r13,%xmm15,%xmm15
  3475. addl 0+16(%rbp),%r11d
  3476. andl %r8d,%r12d
  3477. rorxl $25,%r8d,%r13d
  3478. rorxl $11,%r8d,%r15d
  3479. leal (%rax,%r14,1),%eax
  3480. leal (%r11,%r12,1),%r11d
  3481. andnl %r10d,%r8d,%r12d
  3482. xorl %r15d,%r13d
  3483. rorxl $6,%r8d,%r14d
  3484. leal (%r11,%r12,1),%r11d
  3485. xorl %r14d,%r13d
  3486. movl %eax,%r15d
  3487. rorxl $22,%eax,%r12d
  3488. leal (%r11,%r13,1),%r11d
  3489. xorl %ebx,%r15d
  3490. rorxl $13,%eax,%r14d
  3491. rorxl $2,%eax,%r13d
  3492. leal (%rdx,%r11,1),%edx
  3493. andl %r15d,%esi
  3494. vpxor %xmm10,%xmm9,%xmm9
  3495. vmovdqu 16-128(%rdi),%xmm10
  3496. xorl %r12d,%r14d
  3497. xorl %ebx,%esi
  3498. xorl %r13d,%r14d
  3499. leal (%r11,%rsi,1),%r11d
  3500. movl %r8d,%r12d
  3501. addl 4+16(%rbp),%r10d
  3502. andl %edx,%r12d
  3503. rorxl $25,%edx,%r13d
  3504. rorxl $11,%edx,%esi
  3505. leal (%r11,%r14,1),%r11d
  3506. leal (%r10,%r12,1),%r10d
  3507. andnl %r9d,%edx,%r12d
  3508. xorl %esi,%r13d
  3509. rorxl $6,%edx,%r14d
  3510. leal (%r10,%r12,1),%r10d
  3511. xorl %r14d,%r13d
  3512. movl %r11d,%esi
  3513. rorxl $22,%r11d,%r12d
  3514. leal (%r10,%r13,1),%r10d
  3515. xorl %eax,%esi
  3516. rorxl $13,%r11d,%r14d
  3517. rorxl $2,%r11d,%r13d
  3518. leal (%rcx,%r10,1),%ecx
  3519. andl %esi,%r15d
  3520. vpxor %xmm8,%xmm9,%xmm9
  3521. xorl %r12d,%r14d
  3522. xorl %eax,%r15d
  3523. xorl %r13d,%r14d
  3524. leal (%r10,%r15,1),%r10d
  3525. movl %edx,%r12d
  3526. addl 8+16(%rbp),%r9d
  3527. andl %ecx,%r12d
  3528. rorxl $25,%ecx,%r13d
  3529. rorxl $11,%ecx,%r15d
  3530. leal (%r10,%r14,1),%r10d
  3531. leal (%r9,%r12,1),%r9d
  3532. andnl %r8d,%ecx,%r12d
  3533. xorl %r15d,%r13d
  3534. rorxl $6,%ecx,%r14d
  3535. leal (%r9,%r12,1),%r9d
  3536. xorl %r14d,%r13d
  3537. movl %r10d,%r15d
  3538. rorxl $22,%r10d,%r12d
  3539. leal (%r9,%r13,1),%r9d
  3540. xorl %r11d,%r15d
  3541. rorxl $13,%r10d,%r14d
  3542. rorxl $2,%r10d,%r13d
  3543. leal (%rbx,%r9,1),%ebx
  3544. andl %r15d,%esi
  3545. vaesenc %xmm10,%xmm9,%xmm9
  3546. vmovdqu 32-128(%rdi),%xmm10
  3547. xorl %r12d,%r14d
  3548. xorl %r11d,%esi
  3549. xorl %r13d,%r14d
  3550. leal (%r9,%rsi,1),%r9d
  3551. movl %ecx,%r12d
  3552. addl 12+16(%rbp),%r8d
  3553. andl %ebx,%r12d
  3554. rorxl $25,%ebx,%r13d
  3555. rorxl $11,%ebx,%esi
  3556. leal (%r9,%r14,1),%r9d
  3557. leal (%r8,%r12,1),%r8d
  3558. andnl %edx,%ebx,%r12d
  3559. xorl %esi,%r13d
  3560. rorxl $6,%ebx,%r14d
  3561. leal (%r8,%r12,1),%r8d
  3562. xorl %r14d,%r13d
  3563. movl %r9d,%esi
  3564. rorxl $22,%r9d,%r12d
  3565. leal (%r8,%r13,1),%r8d
  3566. xorl %r10d,%esi
  3567. rorxl $13,%r9d,%r14d
  3568. rorxl $2,%r9d,%r13d
  3569. leal (%rax,%r8,1),%eax
  3570. andl %esi,%r15d
  3571. vaesenc %xmm10,%xmm9,%xmm9
  3572. vmovdqu 48-128(%rdi),%xmm10
  3573. xorl %r12d,%r14d
  3574. xorl %r10d,%r15d
  3575. xorl %r13d,%r14d
  3576. leal (%r8,%r15,1),%r8d
  3577. movl %ebx,%r12d
  3578. addl 32+16(%rbp),%edx
  3579. andl %eax,%r12d
  3580. rorxl $25,%eax,%r13d
  3581. rorxl $11,%eax,%r15d
  3582. leal (%r8,%r14,1),%r8d
  3583. leal (%rdx,%r12,1),%edx
  3584. andnl %ecx,%eax,%r12d
  3585. xorl %r15d,%r13d
  3586. rorxl $6,%eax,%r14d
  3587. leal (%rdx,%r12,1),%edx
  3588. xorl %r14d,%r13d
  3589. movl %r8d,%r15d
  3590. rorxl $22,%r8d,%r12d
  3591. leal (%rdx,%r13,1),%edx
  3592. xorl %r9d,%r15d
  3593. rorxl $13,%r8d,%r14d
  3594. rorxl $2,%r8d,%r13d
  3595. leal (%r11,%rdx,1),%r11d
  3596. andl %r15d,%esi
  3597. vaesenc %xmm10,%xmm9,%xmm9
  3598. vmovdqu 64-128(%rdi),%xmm10
  3599. xorl %r12d,%r14d
  3600. xorl %r9d,%esi
  3601. xorl %r13d,%r14d
  3602. leal (%rdx,%rsi,1),%edx
  3603. movl %eax,%r12d
  3604. addl 36+16(%rbp),%ecx
  3605. andl %r11d,%r12d
  3606. rorxl $25,%r11d,%r13d
  3607. rorxl $11,%r11d,%esi
  3608. leal (%rdx,%r14,1),%edx
  3609. leal (%rcx,%r12,1),%ecx
  3610. andnl %ebx,%r11d,%r12d
  3611. xorl %esi,%r13d
  3612. rorxl $6,%r11d,%r14d
  3613. leal (%rcx,%r12,1),%ecx
  3614. xorl %r14d,%r13d
  3615. movl %edx,%esi
  3616. rorxl $22,%edx,%r12d
  3617. leal (%rcx,%r13,1),%ecx
  3618. xorl %r8d,%esi
  3619. rorxl $13,%edx,%r14d
  3620. rorxl $2,%edx,%r13d
  3621. leal (%r10,%rcx,1),%r10d
  3622. andl %esi,%r15d
  3623. vaesenc %xmm10,%xmm9,%xmm9
  3624. vmovdqu 80-128(%rdi),%xmm10
  3625. xorl %r12d,%r14d
  3626. xorl %r8d,%r15d
  3627. xorl %r13d,%r14d
  3628. leal (%rcx,%r15,1),%ecx
  3629. movl %r11d,%r12d
  3630. addl 40+16(%rbp),%ebx
  3631. andl %r10d,%r12d
  3632. rorxl $25,%r10d,%r13d
  3633. rorxl $11,%r10d,%r15d
  3634. leal (%rcx,%r14,1),%ecx
  3635. leal (%rbx,%r12,1),%ebx
  3636. andnl %eax,%r10d,%r12d
  3637. xorl %r15d,%r13d
  3638. rorxl $6,%r10d,%r14d
  3639. leal (%rbx,%r12,1),%ebx
  3640. xorl %r14d,%r13d
  3641. movl %ecx,%r15d
  3642. rorxl $22,%ecx,%r12d
  3643. leal (%rbx,%r13,1),%ebx
  3644. xorl %edx,%r15d
  3645. rorxl $13,%ecx,%r14d
  3646. rorxl $2,%ecx,%r13d
  3647. leal (%r9,%rbx,1),%r9d
  3648. andl %r15d,%esi
  3649. vaesenc %xmm10,%xmm9,%xmm9
  3650. vmovdqu 96-128(%rdi),%xmm10
  3651. xorl %r12d,%r14d
  3652. xorl %edx,%esi
  3653. xorl %r13d,%r14d
  3654. leal (%rbx,%rsi,1),%ebx
  3655. movl %r10d,%r12d
  3656. addl 44+16(%rbp),%eax
  3657. andl %r9d,%r12d
  3658. rorxl $25,%r9d,%r13d
  3659. rorxl $11,%r9d,%esi
  3660. leal (%rbx,%r14,1),%ebx
  3661. leal (%rax,%r12,1),%eax
  3662. andnl %r11d,%r9d,%r12d
  3663. xorl %esi,%r13d
  3664. rorxl $6,%r9d,%r14d
  3665. leal (%rax,%r12,1),%eax
  3666. xorl %r14d,%r13d
  3667. movl %ebx,%esi
  3668. rorxl $22,%ebx,%r12d
  3669. leal (%rax,%r13,1),%eax
  3670. xorl %ecx,%esi
  3671. rorxl $13,%ebx,%r14d
  3672. rorxl $2,%ebx,%r13d
  3673. leal (%r8,%rax,1),%r8d
  3674. andl %esi,%r15d
  3675. vaesenc %xmm10,%xmm9,%xmm9
  3676. vmovdqu 112-128(%rdi),%xmm10
  3677. xorl %r12d,%r14d
  3678. xorl %ecx,%r15d
  3679. xorl %r13d,%r14d
  3680. leal (%rax,%r15,1),%eax
  3681. movl %r9d,%r12d
  3682. leaq -64(%rbp),%rbp
  3683. addl 0+16(%rbp),%r11d
  3684. andl %r8d,%r12d
  3685. rorxl $25,%r8d,%r13d
  3686. rorxl $11,%r8d,%r15d
  3687. leal (%rax,%r14,1),%eax
  3688. leal (%r11,%r12,1),%r11d
  3689. andnl %r10d,%r8d,%r12d
  3690. xorl %r15d,%r13d
  3691. rorxl $6,%r8d,%r14d
  3692. leal (%r11,%r12,1),%r11d
  3693. xorl %r14d,%r13d
  3694. movl %eax,%r15d
  3695. rorxl $22,%eax,%r12d
  3696. leal (%r11,%r13,1),%r11d
  3697. xorl %ebx,%r15d
  3698. rorxl $13,%eax,%r14d
  3699. rorxl $2,%eax,%r13d
  3700. leal (%rdx,%r11,1),%edx
  3701. andl %r15d,%esi
  3702. vaesenc %xmm10,%xmm9,%xmm9
  3703. vmovdqu 128-128(%rdi),%xmm10
  3704. xorl %r12d,%r14d
  3705. xorl %ebx,%esi
  3706. xorl %r13d,%r14d
  3707. leal (%r11,%rsi,1),%r11d
  3708. movl %r8d,%r12d
  3709. addl 4+16(%rbp),%r10d
  3710. andl %edx,%r12d
  3711. rorxl $25,%edx,%r13d
  3712. rorxl $11,%edx,%esi
  3713. leal (%r11,%r14,1),%r11d
  3714. leal (%r10,%r12,1),%r10d
  3715. andnl %r9d,%edx,%r12d
  3716. xorl %esi,%r13d
  3717. rorxl $6,%edx,%r14d
  3718. leal (%r10,%r12,1),%r10d
  3719. xorl %r14d,%r13d
  3720. movl %r11d,%esi
  3721. rorxl $22,%r11d,%r12d
  3722. leal (%r10,%r13,1),%r10d
  3723. xorl %eax,%esi
  3724. rorxl $13,%r11d,%r14d
  3725. rorxl $2,%r11d,%r13d
  3726. leal (%rcx,%r10,1),%ecx
  3727. andl %esi,%r15d
  3728. vaesenc %xmm10,%xmm9,%xmm9
  3729. vmovdqu 144-128(%rdi),%xmm10
  3730. xorl %r12d,%r14d
  3731. xorl %eax,%r15d
  3732. xorl %r13d,%r14d
  3733. leal (%r10,%r15,1),%r10d
  3734. movl %edx,%r12d
  3735. addl 8+16(%rbp),%r9d
  3736. andl %ecx,%r12d
  3737. rorxl $25,%ecx,%r13d
  3738. rorxl $11,%ecx,%r15d
  3739. leal (%r10,%r14,1),%r10d
  3740. leal (%r9,%r12,1),%r9d
  3741. andnl %r8d,%ecx,%r12d
  3742. xorl %r15d,%r13d
  3743. rorxl $6,%ecx,%r14d
  3744. leal (%r9,%r12,1),%r9d
  3745. xorl %r14d,%r13d
  3746. movl %r10d,%r15d
  3747. rorxl $22,%r10d,%r12d
  3748. leal (%r9,%r13,1),%r9d
  3749. xorl %r11d,%r15d
  3750. rorxl $13,%r10d,%r14d
  3751. rorxl $2,%r10d,%r13d
  3752. leal (%rbx,%r9,1),%ebx
  3753. andl %r15d,%esi
  3754. vaesenc %xmm10,%xmm9,%xmm9
  3755. vmovdqu 160-128(%rdi),%xmm10
  3756. xorl %r12d,%r14d
  3757. xorl %r11d,%esi
  3758. xorl %r13d,%r14d
  3759. leal (%r9,%rsi,1),%r9d
  3760. movl %ecx,%r12d
  3761. addl 12+16(%rbp),%r8d
  3762. andl %ebx,%r12d
  3763. rorxl $25,%ebx,%r13d
  3764. rorxl $11,%ebx,%esi
  3765. leal (%r9,%r14,1),%r9d
  3766. leal (%r8,%r12,1),%r8d
  3767. andnl %edx,%ebx,%r12d
  3768. xorl %esi,%r13d
  3769. rorxl $6,%ebx,%r14d
  3770. leal (%r8,%r12,1),%r8d
  3771. xorl %r14d,%r13d
  3772. movl %r9d,%esi
  3773. rorxl $22,%r9d,%r12d
  3774. leal (%r8,%r13,1),%r8d
  3775. xorl %r10d,%esi
  3776. rorxl $13,%r9d,%r14d
  3777. rorxl $2,%r9d,%r13d
  3778. leal (%rax,%r8,1),%eax
  3779. andl %esi,%r15d
  3780. vaesenclast %xmm10,%xmm9,%xmm11
  3781. vaesenc %xmm10,%xmm9,%xmm9
  3782. vmovdqu 176-128(%rdi),%xmm10
  3783. xorl %r12d,%r14d
  3784. xorl %r10d,%r15d
  3785. xorl %r13d,%r14d
  3786. leal (%r8,%r15,1),%r8d
  3787. movl %ebx,%r12d
  3788. addl 32+16(%rbp),%edx
  3789. andl %eax,%r12d
  3790. rorxl $25,%eax,%r13d
  3791. rorxl $11,%eax,%r15d
  3792. leal (%r8,%r14,1),%r8d
  3793. leal (%rdx,%r12,1),%edx
  3794. andnl %ecx,%eax,%r12d
  3795. xorl %r15d,%r13d
  3796. rorxl $6,%eax,%r14d
  3797. leal (%rdx,%r12,1),%edx
  3798. xorl %r14d,%r13d
  3799. movl %r8d,%r15d
  3800. rorxl $22,%r8d,%r12d
  3801. leal (%rdx,%r13,1),%edx
  3802. xorl %r9d,%r15d
  3803. rorxl $13,%r8d,%r14d
  3804. rorxl $2,%r8d,%r13d
  3805. leal (%r11,%rdx,1),%r11d
  3806. andl %r15d,%esi
  3807. vpand %xmm12,%xmm11,%xmm8
  3808. vaesenc %xmm10,%xmm9,%xmm9
  3809. vmovdqu 192-128(%rdi),%xmm10
  3810. xorl %r12d,%r14d
  3811. xorl %r9d,%esi
  3812. xorl %r13d,%r14d
  3813. leal (%rdx,%rsi,1),%edx
  3814. movl %eax,%r12d
  3815. addl 36+16(%rbp),%ecx
  3816. andl %r11d,%r12d
  3817. rorxl $25,%r11d,%r13d
  3818. rorxl $11,%r11d,%esi
  3819. leal (%rdx,%r14,1),%edx
  3820. leal (%rcx,%r12,1),%ecx
  3821. andnl %ebx,%r11d,%r12d
  3822. xorl %esi,%r13d
  3823. rorxl $6,%r11d,%r14d
  3824. leal (%rcx,%r12,1),%ecx
  3825. xorl %r14d,%r13d
  3826. movl %edx,%esi
  3827. rorxl $22,%edx,%r12d
  3828. leal (%rcx,%r13,1),%ecx
  3829. xorl %r8d,%esi
  3830. rorxl $13,%edx,%r14d
  3831. rorxl $2,%edx,%r13d
  3832. leal (%r10,%rcx,1),%r10d
  3833. andl %esi,%r15d
  3834. vaesenclast %xmm10,%xmm9,%xmm11
  3835. vaesenc %xmm10,%xmm9,%xmm9
  3836. vmovdqu 208-128(%rdi),%xmm10
  3837. xorl %r12d,%r14d
  3838. xorl %r8d,%r15d
  3839. xorl %r13d,%r14d
  3840. leal (%rcx,%r15,1),%ecx
  3841. movl %r11d,%r12d
  3842. addl 40+16(%rbp),%ebx
  3843. andl %r10d,%r12d
  3844. rorxl $25,%r10d,%r13d
  3845. rorxl $11,%r10d,%r15d
  3846. leal (%rcx,%r14,1),%ecx
  3847. leal (%rbx,%r12,1),%ebx
  3848. andnl %eax,%r10d,%r12d
  3849. xorl %r15d,%r13d
  3850. rorxl $6,%r10d,%r14d
  3851. leal (%rbx,%r12,1),%ebx
  3852. xorl %r14d,%r13d
  3853. movl %ecx,%r15d
  3854. rorxl $22,%ecx,%r12d
  3855. leal (%rbx,%r13,1),%ebx
  3856. xorl %edx,%r15d
  3857. rorxl $13,%ecx,%r14d
  3858. rorxl $2,%ecx,%r13d
  3859. leal (%r9,%rbx,1),%r9d
  3860. andl %r15d,%esi
  3861. vpand %xmm13,%xmm11,%xmm11
  3862. vaesenc %xmm10,%xmm9,%xmm9
  3863. vmovdqu 224-128(%rdi),%xmm10
  3864. xorl %r12d,%r14d
  3865. xorl %edx,%esi
  3866. xorl %r13d,%r14d
  3867. leal (%rbx,%rsi,1),%ebx
  3868. movl %r10d,%r12d
  3869. addl 44+16(%rbp),%eax
  3870. andl %r9d,%r12d
  3871. rorxl $25,%r9d,%r13d
  3872. rorxl $11,%r9d,%esi
  3873. leal (%rbx,%r14,1),%ebx
  3874. leal (%rax,%r12,1),%eax
  3875. andnl %r11d,%r9d,%r12d
  3876. xorl %esi,%r13d
  3877. rorxl $6,%r9d,%r14d
  3878. leal (%rax,%r12,1),%eax
  3879. xorl %r14d,%r13d
  3880. movl %ebx,%esi
  3881. rorxl $22,%ebx,%r12d
  3882. leal (%rax,%r13,1),%eax
  3883. xorl %ecx,%esi
  3884. rorxl $13,%ebx,%r14d
  3885. rorxl $2,%ebx,%r13d
  3886. leal (%r8,%rax,1),%r8d
  3887. andl %esi,%r15d
  3888. vpor %xmm11,%xmm8,%xmm8
  3889. vaesenclast %xmm10,%xmm9,%xmm11
  3890. vmovdqu 0-128(%rdi),%xmm10
  3891. xorl %r12d,%r14d
  3892. xorl %ecx,%r15d
  3893. xorl %r13d,%r14d
  3894. leal (%rax,%r15,1),%eax
  3895. movl %r9d,%r12d
  3896. vmovq %xmm15,%r13
  3897. vpextrq $1,%xmm15,%r15
  3898. vpand %xmm14,%xmm11,%xmm11
  3899. vpor %xmm11,%xmm8,%xmm8
  3900. leaq -64(%rbp),%rbp
  3901. vmovdqu %xmm8,(%r15,%r13,1)
  3902. leaq 16(%r13),%r13
  3903. cmpq %rsp,%rbp
  3904. jae L$ower_avx2
  3905. movq 552(%rsp),%r15
  3906. leaq 64(%r13),%r13
  3907. movq 560(%rsp),%rsi
  3908. addl %r14d,%eax
  3909. leaq 448(%rsp),%rsp
  3910. addl 0(%r15),%eax
  3911. addl 4(%r15),%ebx
  3912. addl 8(%r15),%ecx
  3913. addl 12(%r15),%edx
  3914. addl 16(%r15),%r8d
  3915. addl 20(%r15),%r9d
  3916. addl 24(%r15),%r10d
  3917. leaq (%rsi,%r13,1),%r12
  3918. addl 28(%r15),%r11d
  3919. cmpq 64+16(%rsp),%r13
  3920. movl %eax,0(%r15)
  3921. cmoveq %rsp,%r12
  3922. movl %ebx,4(%r15)
  3923. movl %ecx,8(%r15)
  3924. movl %edx,12(%r15)
  3925. movl %r8d,16(%r15)
  3926. movl %r9d,20(%r15)
  3927. movl %r10d,24(%r15)
  3928. movl %r11d,28(%r15)
  3929. jbe L$oop_avx2
  3930. leaq (%rsp),%rbp
  3931. L$done_avx2:
  3932. movq 64+32(%rbp),%r8
  3933. movq 64+56(%rbp),%rsi
  3934. vmovdqu %xmm8,(%r8)
  3935. vzeroall
  3936. movq -48(%rsi),%r15
  3937. movq -40(%rsi),%r14
  3938. movq -32(%rsi),%r13
  3939. movq -24(%rsi),%r12
  3940. movq -16(%rsi),%rbp
  3941. movq -8(%rsi),%rbx
  3942. leaq (%rsi),%rsp
  3943. L$epilogue_avx2:
  3944. .byte 0xf3,0xc3
  3945. .p2align 5
  3946. aesni_cbc_sha256_enc_shaext:
  3947. movq 8(%rsp),%r10
  3948. leaq K256+128(%rip),%rax
  3949. movdqu (%r9),%xmm1
  3950. movdqu 16(%r9),%xmm2
  3951. movdqa 512-128(%rax),%xmm3
  3952. movl 240(%rcx),%r11d
  3953. subq %rdi,%rsi
  3954. movups (%rcx),%xmm15
  3955. movups (%r8),%xmm6
  3956. movups 16(%rcx),%xmm4
  3957. leaq 112(%rcx),%rcx
  3958. pshufd $0x1b,%xmm1,%xmm0
  3959. pshufd $0xb1,%xmm1,%xmm1
  3960. pshufd $0x1b,%xmm2,%xmm2
  3961. movdqa %xmm3,%xmm7
  3962. .byte 102,15,58,15,202,8
  3963. punpcklqdq %xmm0,%xmm2
  3964. jmp L$oop_shaext
  3965. .p2align 4
  3966. L$oop_shaext:
  3967. movdqu (%r10),%xmm10
  3968. movdqu 16(%r10),%xmm11
  3969. movdqu 32(%r10),%xmm12
  3970. .byte 102,68,15,56,0,211
  3971. movdqu 48(%r10),%xmm13
  3972. movdqa 0-128(%rax),%xmm0
  3973. paddd %xmm10,%xmm0
  3974. .byte 102,68,15,56,0,219
  3975. movdqa %xmm2,%xmm9
  3976. movdqa %xmm1,%xmm8
  3977. movups 0(%rdi),%xmm14
  3978. xorps %xmm15,%xmm14
  3979. xorps %xmm14,%xmm6
  3980. movups -80(%rcx),%xmm5
  3981. aesenc %xmm4,%xmm6
  3982. .byte 15,56,203,209
  3983. pshufd $0x0e,%xmm0,%xmm0
  3984. movups -64(%rcx),%xmm4
  3985. aesenc %xmm5,%xmm6
  3986. .byte 15,56,203,202
  3987. movdqa 32-128(%rax),%xmm0
  3988. paddd %xmm11,%xmm0
  3989. .byte 102,68,15,56,0,227
  3990. leaq 64(%r10),%r10
  3991. movups -48(%rcx),%xmm5
  3992. aesenc %xmm4,%xmm6
  3993. .byte 15,56,203,209
  3994. pshufd $0x0e,%xmm0,%xmm0
  3995. movups -32(%rcx),%xmm4
  3996. aesenc %xmm5,%xmm6
  3997. .byte 15,56,203,202
  3998. movdqa 64-128(%rax),%xmm0
  3999. paddd %xmm12,%xmm0
  4000. .byte 102,68,15,56,0,235
  4001. .byte 69,15,56,204,211
  4002. movups -16(%rcx),%xmm5
  4003. aesenc %xmm4,%xmm6
  4004. .byte 15,56,203,209
  4005. pshufd $0x0e,%xmm0,%xmm0
  4006. movdqa %xmm13,%xmm3
  4007. .byte 102,65,15,58,15,220,4
  4008. paddd %xmm3,%xmm10
  4009. movups 0(%rcx),%xmm4
  4010. aesenc %xmm5,%xmm6
  4011. .byte 15,56,203,202
  4012. movdqa 96-128(%rax),%xmm0
  4013. paddd %xmm13,%xmm0
  4014. .byte 69,15,56,205,213
  4015. .byte 69,15,56,204,220
  4016. movups 16(%rcx),%xmm5
  4017. aesenc %xmm4,%xmm6
  4018. .byte 15,56,203,209
  4019. pshufd $0x0e,%xmm0,%xmm0
  4020. movups 32(%rcx),%xmm4
  4021. aesenc %xmm5,%xmm6
  4022. movdqa %xmm10,%xmm3
  4023. .byte 102,65,15,58,15,221,4
  4024. paddd %xmm3,%xmm11
  4025. .byte 15,56,203,202
  4026. movdqa 128-128(%rax),%xmm0
  4027. paddd %xmm10,%xmm0
  4028. .byte 69,15,56,205,218
  4029. .byte 69,15,56,204,229
  4030. movups 48(%rcx),%xmm5
  4031. aesenc %xmm4,%xmm6
  4032. .byte 15,56,203,209
  4033. pshufd $0x0e,%xmm0,%xmm0
  4034. movdqa %xmm11,%xmm3
  4035. .byte 102,65,15,58,15,218,4
  4036. paddd %xmm3,%xmm12
  4037. cmpl $11,%r11d
  4038. jb L$aesenclast1
  4039. movups 64(%rcx),%xmm4
  4040. aesenc %xmm5,%xmm6
  4041. movups 80(%rcx),%xmm5
  4042. aesenc %xmm4,%xmm6
  4043. je L$aesenclast1
  4044. movups 96(%rcx),%xmm4
  4045. aesenc %xmm5,%xmm6
  4046. movups 112(%rcx),%xmm5
  4047. aesenc %xmm4,%xmm6
  4048. L$aesenclast1:
  4049. aesenclast %xmm5,%xmm6
  4050. movups 16-112(%rcx),%xmm4
  4051. nop
  4052. .byte 15,56,203,202
  4053. movups 16(%rdi),%xmm14
  4054. xorps %xmm15,%xmm14
  4055. movups %xmm6,0(%rsi,%rdi,1)
  4056. xorps %xmm14,%xmm6
  4057. movups -80(%rcx),%xmm5
  4058. aesenc %xmm4,%xmm6
  4059. movdqa 160-128(%rax),%xmm0
  4060. paddd %xmm11,%xmm0
  4061. .byte 69,15,56,205,227
  4062. .byte 69,15,56,204,234
  4063. movups -64(%rcx),%xmm4
  4064. aesenc %xmm5,%xmm6
  4065. .byte 15,56,203,209
  4066. pshufd $0x0e,%xmm0,%xmm0
  4067. movdqa %xmm12,%xmm3
  4068. .byte 102,65,15,58,15,219,4
  4069. paddd %xmm3,%xmm13
  4070. movups -48(%rcx),%xmm5
  4071. aesenc %xmm4,%xmm6
  4072. .byte 15,56,203,202
  4073. movdqa 192-128(%rax),%xmm0
  4074. paddd %xmm12,%xmm0
  4075. .byte 69,15,56,205,236
  4076. .byte 69,15,56,204,211
  4077. movups -32(%rcx),%xmm4
  4078. aesenc %xmm5,%xmm6
  4079. .byte 15,56,203,209
  4080. pshufd $0x0e,%xmm0,%xmm0
  4081. movdqa %xmm13,%xmm3
  4082. .byte 102,65,15,58,15,220,4
  4083. paddd %xmm3,%xmm10
  4084. movups -16(%rcx),%xmm5
  4085. aesenc %xmm4,%xmm6
  4086. .byte 15,56,203,202
  4087. movdqa 224-128(%rax),%xmm0
  4088. paddd %xmm13,%xmm0
  4089. .byte 69,15,56,205,213
  4090. .byte 69,15,56,204,220
  4091. movups 0(%rcx),%xmm4
  4092. aesenc %xmm5,%xmm6
  4093. .byte 15,56,203,209
  4094. pshufd $0x0e,%xmm0,%xmm0
  4095. movdqa %xmm10,%xmm3
  4096. .byte 102,65,15,58,15,221,4
  4097. paddd %xmm3,%xmm11
  4098. movups 16(%rcx),%xmm5
  4099. aesenc %xmm4,%xmm6
  4100. .byte 15,56,203,202
  4101. movdqa 256-128(%rax),%xmm0
  4102. paddd %xmm10,%xmm0
  4103. .byte 69,15,56,205,218
  4104. .byte 69,15,56,204,229
  4105. movups 32(%rcx),%xmm4
  4106. aesenc %xmm5,%xmm6
  4107. .byte 15,56,203,209
  4108. pshufd $0x0e,%xmm0,%xmm0
  4109. movdqa %xmm11,%xmm3
  4110. .byte 102,65,15,58,15,218,4
  4111. paddd %xmm3,%xmm12
  4112. movups 48(%rcx),%xmm5
  4113. aesenc %xmm4,%xmm6
  4114. cmpl $11,%r11d
  4115. jb L$aesenclast2
  4116. movups 64(%rcx),%xmm4
  4117. aesenc %xmm5,%xmm6
  4118. movups 80(%rcx),%xmm5
  4119. aesenc %xmm4,%xmm6
  4120. je L$aesenclast2
  4121. movups 96(%rcx),%xmm4
  4122. aesenc %xmm5,%xmm6
  4123. movups 112(%rcx),%xmm5
  4124. aesenc %xmm4,%xmm6
  4125. L$aesenclast2:
  4126. aesenclast %xmm5,%xmm6
  4127. movups 16-112(%rcx),%xmm4
  4128. nop
  4129. .byte 15,56,203,202
  4130. movups 32(%rdi),%xmm14
  4131. xorps %xmm15,%xmm14
  4132. movups %xmm6,16(%rsi,%rdi,1)
  4133. xorps %xmm14,%xmm6
  4134. movups -80(%rcx),%xmm5
  4135. aesenc %xmm4,%xmm6
  4136. movdqa 288-128(%rax),%xmm0
  4137. paddd %xmm11,%xmm0
  4138. .byte 69,15,56,205,227
  4139. .byte 69,15,56,204,234
  4140. movups -64(%rcx),%xmm4
  4141. aesenc %xmm5,%xmm6
  4142. .byte 15,56,203,209
  4143. pshufd $0x0e,%xmm0,%xmm0
  4144. movdqa %xmm12,%xmm3
  4145. .byte 102,65,15,58,15,219,4
  4146. paddd %xmm3,%xmm13
  4147. movups -48(%rcx),%xmm5
  4148. aesenc %xmm4,%xmm6
  4149. .byte 15,56,203,202
  4150. movdqa 320-128(%rax),%xmm0
  4151. paddd %xmm12,%xmm0
  4152. .byte 69,15,56,205,236
  4153. .byte 69,15,56,204,211
  4154. movups -32(%rcx),%xmm4
  4155. aesenc %xmm5,%xmm6
  4156. .byte 15,56,203,209
  4157. pshufd $0x0e,%xmm0,%xmm0
  4158. movdqa %xmm13,%xmm3
  4159. .byte 102,65,15,58,15,220,4
  4160. paddd %xmm3,%xmm10
  4161. movups -16(%rcx),%xmm5
  4162. aesenc %xmm4,%xmm6
  4163. .byte 15,56,203,202
  4164. movdqa 352-128(%rax),%xmm0
  4165. paddd %xmm13,%xmm0
  4166. .byte 69,15,56,205,213
  4167. .byte 69,15,56,204,220
  4168. movups 0(%rcx),%xmm4
  4169. aesenc %xmm5,%xmm6
  4170. .byte 15,56,203,209
  4171. pshufd $0x0e,%xmm0,%xmm0
  4172. movdqa %xmm10,%xmm3
  4173. .byte 102,65,15,58,15,221,4
  4174. paddd %xmm3,%xmm11
  4175. movups 16(%rcx),%xmm5
  4176. aesenc %xmm4,%xmm6
  4177. .byte 15,56,203,202
  4178. movdqa 384-128(%rax),%xmm0
  4179. paddd %xmm10,%xmm0
  4180. .byte 69,15,56,205,218
  4181. .byte 69,15,56,204,229
  4182. movups 32(%rcx),%xmm4
  4183. aesenc %xmm5,%xmm6
  4184. .byte 15,56,203,209
  4185. pshufd $0x0e,%xmm0,%xmm0
  4186. movdqa %xmm11,%xmm3
  4187. .byte 102,65,15,58,15,218,4
  4188. paddd %xmm3,%xmm12
  4189. movups 48(%rcx),%xmm5
  4190. aesenc %xmm4,%xmm6
  4191. .byte 15,56,203,202
  4192. movdqa 416-128(%rax),%xmm0
  4193. paddd %xmm11,%xmm0
  4194. .byte 69,15,56,205,227
  4195. .byte 69,15,56,204,234
  4196. cmpl $11,%r11d
  4197. jb L$aesenclast3
  4198. movups 64(%rcx),%xmm4
  4199. aesenc %xmm5,%xmm6
  4200. movups 80(%rcx),%xmm5
  4201. aesenc %xmm4,%xmm6
  4202. je L$aesenclast3
  4203. movups 96(%rcx),%xmm4
  4204. aesenc %xmm5,%xmm6
  4205. movups 112(%rcx),%xmm5
  4206. aesenc %xmm4,%xmm6
  4207. L$aesenclast3:
  4208. aesenclast %xmm5,%xmm6
  4209. movups 16-112(%rcx),%xmm4
  4210. nop
  4211. .byte 15,56,203,209
  4212. pshufd $0x0e,%xmm0,%xmm0
  4213. movdqa %xmm12,%xmm3
  4214. .byte 102,65,15,58,15,219,4
  4215. paddd %xmm3,%xmm13
  4216. movups 48(%rdi),%xmm14
  4217. xorps %xmm15,%xmm14
  4218. movups %xmm6,32(%rsi,%rdi,1)
  4219. xorps %xmm14,%xmm6
  4220. movups -80(%rcx),%xmm5
  4221. aesenc %xmm4,%xmm6
  4222. movups -64(%rcx),%xmm4
  4223. aesenc %xmm5,%xmm6
  4224. .byte 15,56,203,202
  4225. movdqa 448-128(%rax),%xmm0
  4226. paddd %xmm12,%xmm0
  4227. .byte 69,15,56,205,236
  4228. movdqa %xmm7,%xmm3
  4229. movups -48(%rcx),%xmm5
  4230. aesenc %xmm4,%xmm6
  4231. .byte 15,56,203,209
  4232. pshufd $0x0e,%xmm0,%xmm0
  4233. movups -32(%rcx),%xmm4
  4234. aesenc %xmm5,%xmm6
  4235. .byte 15,56,203,202
  4236. movdqa 480-128(%rax),%xmm0
  4237. paddd %xmm13,%xmm0
  4238. movups -16(%rcx),%xmm5
  4239. aesenc %xmm4,%xmm6
  4240. movups 0(%rcx),%xmm4
  4241. aesenc %xmm5,%xmm6
  4242. .byte 15,56,203,209
  4243. pshufd $0x0e,%xmm0,%xmm0
  4244. movups 16(%rcx),%xmm5
  4245. aesenc %xmm4,%xmm6
  4246. .byte 15,56,203,202
  4247. movups 32(%rcx),%xmm4
  4248. aesenc %xmm5,%xmm6
  4249. movups 48(%rcx),%xmm5
  4250. aesenc %xmm4,%xmm6
  4251. cmpl $11,%r11d
  4252. jb L$aesenclast4
  4253. movups 64(%rcx),%xmm4
  4254. aesenc %xmm5,%xmm6
  4255. movups 80(%rcx),%xmm5
  4256. aesenc %xmm4,%xmm6
  4257. je L$aesenclast4
  4258. movups 96(%rcx),%xmm4
  4259. aesenc %xmm5,%xmm6
  4260. movups 112(%rcx),%xmm5
  4261. aesenc %xmm4,%xmm6
  4262. L$aesenclast4:
  4263. aesenclast %xmm5,%xmm6
  4264. movups 16-112(%rcx),%xmm4
  4265. nop
  4266. paddd %xmm9,%xmm2
  4267. paddd %xmm8,%xmm1
  4268. decq %rdx
  4269. movups %xmm6,48(%rsi,%rdi,1)
  4270. leaq 64(%rdi),%rdi
  4271. jnz L$oop_shaext
  4272. pshufd $0xb1,%xmm2,%xmm2
  4273. pshufd $0x1b,%xmm1,%xmm3
  4274. pshufd $0xb1,%xmm1,%xmm1
  4275. punpckhqdq %xmm2,%xmm1
  4276. .byte 102,15,58,15,211,8
  4277. movups %xmm6,(%r8)
  4278. movdqu %xmm1,(%r9)
  4279. movdqu %xmm2,16(%r9)
  4280. .byte 0xf3,0xc3