sha256-mb-x86_64.masm 162 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260
  1. OPTION DOTNAME
  2. .text$ SEGMENT ALIGN(256) 'CODE'
  3. EXTERN OPENSSL_ia32cap_P:NEAR
  4. PUBLIC sha256_multi_block
  5. ALIGN 32
  6. sha256_multi_block PROC PUBLIC
  7. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  8. mov QWORD PTR[16+rsp],rsi
  9. mov rax,rsp
  10. $L$SEH_begin_sha256_multi_block::
  11. mov rdi,rcx
  12. mov rsi,rdx
  13. mov rdx,r8
  14. mov rcx,QWORD PTR[((OPENSSL_ia32cap_P+4))]
  15. bt rcx,61
  16. jc _shaext_shortcut
  17. test ecx,268435456
  18. jnz _avx_shortcut
  19. mov rax,rsp
  20. push rbx
  21. push rbp
  22. lea rsp,QWORD PTR[((-168))+rsp]
  23. movaps XMMWORD PTR[rsp],xmm6
  24. movaps XMMWORD PTR[16+rsp],xmm7
  25. movaps XMMWORD PTR[32+rsp],xmm8
  26. movaps XMMWORD PTR[48+rsp],xmm9
  27. movaps XMMWORD PTR[(-120)+rax],xmm10
  28. movaps XMMWORD PTR[(-104)+rax],xmm11
  29. movaps XMMWORD PTR[(-88)+rax],xmm12
  30. movaps XMMWORD PTR[(-72)+rax],xmm13
  31. movaps XMMWORD PTR[(-56)+rax],xmm14
  32. movaps XMMWORD PTR[(-40)+rax],xmm15
  33. sub rsp,288
  34. and rsp,-256
  35. mov QWORD PTR[272+rsp],rax
  36. $L$body::
  37. lea rbp,QWORD PTR[((K256+128))]
  38. lea rbx,QWORD PTR[256+rsp]
  39. lea rdi,QWORD PTR[128+rdi]
  40. $L$oop_grande::
  41. mov DWORD PTR[280+rsp],edx
  42. xor edx,edx
  43. mov r8,QWORD PTR[rsi]
  44. mov ecx,DWORD PTR[8+rsi]
  45. cmp ecx,edx
  46. cmovg edx,ecx
  47. test ecx,ecx
  48. mov DWORD PTR[rbx],ecx
  49. cmovle r8,rbp
  50. mov r9,QWORD PTR[16+rsi]
  51. mov ecx,DWORD PTR[24+rsi]
  52. cmp ecx,edx
  53. cmovg edx,ecx
  54. test ecx,ecx
  55. mov DWORD PTR[4+rbx],ecx
  56. cmovle r9,rbp
  57. mov r10,QWORD PTR[32+rsi]
  58. mov ecx,DWORD PTR[40+rsi]
  59. cmp ecx,edx
  60. cmovg edx,ecx
  61. test ecx,ecx
  62. mov DWORD PTR[8+rbx],ecx
  63. cmovle r10,rbp
  64. mov r11,QWORD PTR[48+rsi]
  65. mov ecx,DWORD PTR[56+rsi]
  66. cmp ecx,edx
  67. cmovg edx,ecx
  68. test ecx,ecx
  69. mov DWORD PTR[12+rbx],ecx
  70. cmovle r11,rbp
  71. test edx,edx
  72. jz $L$done
  73. movdqu xmm8,XMMWORD PTR[((0-128))+rdi]
  74. lea rax,QWORD PTR[128+rsp]
  75. movdqu xmm9,XMMWORD PTR[((32-128))+rdi]
  76. movdqu xmm10,XMMWORD PTR[((64-128))+rdi]
  77. movdqu xmm11,XMMWORD PTR[((96-128))+rdi]
  78. movdqu xmm12,XMMWORD PTR[((128-128))+rdi]
  79. movdqu xmm13,XMMWORD PTR[((160-128))+rdi]
  80. movdqu xmm14,XMMWORD PTR[((192-128))+rdi]
  81. movdqu xmm15,XMMWORD PTR[((224-128))+rdi]
  82. movdqu xmm6,XMMWORD PTR[$L$pbswap]
  83. jmp $L$oop
  84. ALIGN 32
  85. $L$oop::
  86. movdqa xmm4,xmm10
  87. pxor xmm4,xmm9
  88. movd xmm5,DWORD PTR[r8]
  89. movd xmm0,DWORD PTR[r9]
  90. movd xmm1,DWORD PTR[r10]
  91. movd xmm2,DWORD PTR[r11]
  92. punpckldq xmm5,xmm1
  93. punpckldq xmm0,xmm2
  94. punpckldq xmm5,xmm0
  95. movdqa xmm7,xmm12
  96. DB 102,15,56,0,238
  97. movdqa xmm2,xmm12
  98. psrld xmm7,6
  99. movdqa xmm1,xmm12
  100. pslld xmm2,7
  101. movdqa XMMWORD PTR[(0-128)+rax],xmm5
  102. paddd xmm5,xmm15
  103. psrld xmm1,11
  104. pxor xmm7,xmm2
  105. pslld xmm2,21-7
  106. paddd xmm5,XMMWORD PTR[((-128))+rbp]
  107. pxor xmm7,xmm1
  108. psrld xmm1,25-11
  109. movdqa xmm0,xmm12
  110. pxor xmm7,xmm2
  111. movdqa xmm3,xmm12
  112. pslld xmm2,26-21
  113. pandn xmm0,xmm14
  114. pand xmm3,xmm13
  115. pxor xmm7,xmm1
  116. movdqa xmm1,xmm8
  117. pxor xmm7,xmm2
  118. movdqa xmm2,xmm8
  119. psrld xmm1,2
  120. paddd xmm5,xmm7
  121. pxor xmm0,xmm3
  122. movdqa xmm3,xmm9
  123. movdqa xmm7,xmm8
  124. pslld xmm2,10
  125. pxor xmm3,xmm8
  126. psrld xmm7,13
  127. pxor xmm1,xmm2
  128. paddd xmm5,xmm0
  129. pslld xmm2,19-10
  130. pand xmm4,xmm3
  131. pxor xmm1,xmm7
  132. psrld xmm7,22-13
  133. pxor xmm1,xmm2
  134. movdqa xmm15,xmm9
  135. pslld xmm2,30-19
  136. pxor xmm7,xmm1
  137. pxor xmm15,xmm4
  138. paddd xmm11,xmm5
  139. pxor xmm7,xmm2
  140. paddd xmm15,xmm5
  141. paddd xmm15,xmm7
  142. movd xmm5,DWORD PTR[4+r8]
  143. movd xmm0,DWORD PTR[4+r9]
  144. movd xmm1,DWORD PTR[4+r10]
  145. movd xmm2,DWORD PTR[4+r11]
  146. punpckldq xmm5,xmm1
  147. punpckldq xmm0,xmm2
  148. punpckldq xmm5,xmm0
  149. movdqa xmm7,xmm11
  150. movdqa xmm2,xmm11
  151. DB 102,15,56,0,238
  152. psrld xmm7,6
  153. movdqa xmm1,xmm11
  154. pslld xmm2,7
  155. movdqa XMMWORD PTR[(16-128)+rax],xmm5
  156. paddd xmm5,xmm14
  157. psrld xmm1,11
  158. pxor xmm7,xmm2
  159. pslld xmm2,21-7
  160. paddd xmm5,XMMWORD PTR[((-96))+rbp]
  161. pxor xmm7,xmm1
  162. psrld xmm1,25-11
  163. movdqa xmm0,xmm11
  164. pxor xmm7,xmm2
  165. movdqa xmm4,xmm11
  166. pslld xmm2,26-21
  167. pandn xmm0,xmm13
  168. pand xmm4,xmm12
  169. pxor xmm7,xmm1
  170. movdqa xmm1,xmm15
  171. pxor xmm7,xmm2
  172. movdqa xmm2,xmm15
  173. psrld xmm1,2
  174. paddd xmm5,xmm7
  175. pxor xmm0,xmm4
  176. movdqa xmm4,xmm8
  177. movdqa xmm7,xmm15
  178. pslld xmm2,10
  179. pxor xmm4,xmm15
  180. psrld xmm7,13
  181. pxor xmm1,xmm2
  182. paddd xmm5,xmm0
  183. pslld xmm2,19-10
  184. pand xmm3,xmm4
  185. pxor xmm1,xmm7
  186. psrld xmm7,22-13
  187. pxor xmm1,xmm2
  188. movdqa xmm14,xmm8
  189. pslld xmm2,30-19
  190. pxor xmm7,xmm1
  191. pxor xmm14,xmm3
  192. paddd xmm10,xmm5
  193. pxor xmm7,xmm2
  194. paddd xmm14,xmm5
  195. paddd xmm14,xmm7
  196. movd xmm5,DWORD PTR[8+r8]
  197. movd xmm0,DWORD PTR[8+r9]
  198. movd xmm1,DWORD PTR[8+r10]
  199. movd xmm2,DWORD PTR[8+r11]
  200. punpckldq xmm5,xmm1
  201. punpckldq xmm0,xmm2
  202. punpckldq xmm5,xmm0
  203. movdqa xmm7,xmm10
  204. DB 102,15,56,0,238
  205. movdqa xmm2,xmm10
  206. psrld xmm7,6
  207. movdqa xmm1,xmm10
  208. pslld xmm2,7
  209. movdqa XMMWORD PTR[(32-128)+rax],xmm5
  210. paddd xmm5,xmm13
  211. psrld xmm1,11
  212. pxor xmm7,xmm2
  213. pslld xmm2,21-7
  214. paddd xmm5,XMMWORD PTR[((-64))+rbp]
  215. pxor xmm7,xmm1
  216. psrld xmm1,25-11
  217. movdqa xmm0,xmm10
  218. pxor xmm7,xmm2
  219. movdqa xmm3,xmm10
  220. pslld xmm2,26-21
  221. pandn xmm0,xmm12
  222. pand xmm3,xmm11
  223. pxor xmm7,xmm1
  224. movdqa xmm1,xmm14
  225. pxor xmm7,xmm2
  226. movdqa xmm2,xmm14
  227. psrld xmm1,2
  228. paddd xmm5,xmm7
  229. pxor xmm0,xmm3
  230. movdqa xmm3,xmm15
  231. movdqa xmm7,xmm14
  232. pslld xmm2,10
  233. pxor xmm3,xmm14
  234. psrld xmm7,13
  235. pxor xmm1,xmm2
  236. paddd xmm5,xmm0
  237. pslld xmm2,19-10
  238. pand xmm4,xmm3
  239. pxor xmm1,xmm7
  240. psrld xmm7,22-13
  241. pxor xmm1,xmm2
  242. movdqa xmm13,xmm15
  243. pslld xmm2,30-19
  244. pxor xmm7,xmm1
  245. pxor xmm13,xmm4
  246. paddd xmm9,xmm5
  247. pxor xmm7,xmm2
  248. paddd xmm13,xmm5
  249. paddd xmm13,xmm7
  250. movd xmm5,DWORD PTR[12+r8]
  251. movd xmm0,DWORD PTR[12+r9]
  252. movd xmm1,DWORD PTR[12+r10]
  253. movd xmm2,DWORD PTR[12+r11]
  254. punpckldq xmm5,xmm1
  255. punpckldq xmm0,xmm2
  256. punpckldq xmm5,xmm0
  257. movdqa xmm7,xmm9
  258. movdqa xmm2,xmm9
  259. DB 102,15,56,0,238
  260. psrld xmm7,6
  261. movdqa xmm1,xmm9
  262. pslld xmm2,7
  263. movdqa XMMWORD PTR[(48-128)+rax],xmm5
  264. paddd xmm5,xmm12
  265. psrld xmm1,11
  266. pxor xmm7,xmm2
  267. pslld xmm2,21-7
  268. paddd xmm5,XMMWORD PTR[((-32))+rbp]
  269. pxor xmm7,xmm1
  270. psrld xmm1,25-11
  271. movdqa xmm0,xmm9
  272. pxor xmm7,xmm2
  273. movdqa xmm4,xmm9
  274. pslld xmm2,26-21
  275. pandn xmm0,xmm11
  276. pand xmm4,xmm10
  277. pxor xmm7,xmm1
  278. movdqa xmm1,xmm13
  279. pxor xmm7,xmm2
  280. movdqa xmm2,xmm13
  281. psrld xmm1,2
  282. paddd xmm5,xmm7
  283. pxor xmm0,xmm4
  284. movdqa xmm4,xmm14
  285. movdqa xmm7,xmm13
  286. pslld xmm2,10
  287. pxor xmm4,xmm13
  288. psrld xmm7,13
  289. pxor xmm1,xmm2
  290. paddd xmm5,xmm0
  291. pslld xmm2,19-10
  292. pand xmm3,xmm4
  293. pxor xmm1,xmm7
  294. psrld xmm7,22-13
  295. pxor xmm1,xmm2
  296. movdqa xmm12,xmm14
  297. pslld xmm2,30-19
  298. pxor xmm7,xmm1
  299. pxor xmm12,xmm3
  300. paddd xmm8,xmm5
  301. pxor xmm7,xmm2
  302. paddd xmm12,xmm5
  303. paddd xmm12,xmm7
  304. movd xmm5,DWORD PTR[16+r8]
  305. movd xmm0,DWORD PTR[16+r9]
  306. movd xmm1,DWORD PTR[16+r10]
  307. movd xmm2,DWORD PTR[16+r11]
  308. punpckldq xmm5,xmm1
  309. punpckldq xmm0,xmm2
  310. punpckldq xmm5,xmm0
  311. movdqa xmm7,xmm8
  312. DB 102,15,56,0,238
  313. movdqa xmm2,xmm8
  314. psrld xmm7,6
  315. movdqa xmm1,xmm8
  316. pslld xmm2,7
  317. movdqa XMMWORD PTR[(64-128)+rax],xmm5
  318. paddd xmm5,xmm11
  319. psrld xmm1,11
  320. pxor xmm7,xmm2
  321. pslld xmm2,21-7
  322. paddd xmm5,XMMWORD PTR[rbp]
  323. pxor xmm7,xmm1
  324. psrld xmm1,25-11
  325. movdqa xmm0,xmm8
  326. pxor xmm7,xmm2
  327. movdqa xmm3,xmm8
  328. pslld xmm2,26-21
  329. pandn xmm0,xmm10
  330. pand xmm3,xmm9
  331. pxor xmm7,xmm1
  332. movdqa xmm1,xmm12
  333. pxor xmm7,xmm2
  334. movdqa xmm2,xmm12
  335. psrld xmm1,2
  336. paddd xmm5,xmm7
  337. pxor xmm0,xmm3
  338. movdqa xmm3,xmm13
  339. movdqa xmm7,xmm12
  340. pslld xmm2,10
  341. pxor xmm3,xmm12
  342. psrld xmm7,13
  343. pxor xmm1,xmm2
  344. paddd xmm5,xmm0
  345. pslld xmm2,19-10
  346. pand xmm4,xmm3
  347. pxor xmm1,xmm7
  348. psrld xmm7,22-13
  349. pxor xmm1,xmm2
  350. movdqa xmm11,xmm13
  351. pslld xmm2,30-19
  352. pxor xmm7,xmm1
  353. pxor xmm11,xmm4
  354. paddd xmm15,xmm5
  355. pxor xmm7,xmm2
  356. paddd xmm11,xmm5
  357. paddd xmm11,xmm7
  358. movd xmm5,DWORD PTR[20+r8]
  359. movd xmm0,DWORD PTR[20+r9]
  360. movd xmm1,DWORD PTR[20+r10]
  361. movd xmm2,DWORD PTR[20+r11]
  362. punpckldq xmm5,xmm1
  363. punpckldq xmm0,xmm2
  364. punpckldq xmm5,xmm0
  365. movdqa xmm7,xmm15
  366. movdqa xmm2,xmm15
  367. DB 102,15,56,0,238
  368. psrld xmm7,6
  369. movdqa xmm1,xmm15
  370. pslld xmm2,7
  371. movdqa XMMWORD PTR[(80-128)+rax],xmm5
  372. paddd xmm5,xmm10
  373. psrld xmm1,11
  374. pxor xmm7,xmm2
  375. pslld xmm2,21-7
  376. paddd xmm5,XMMWORD PTR[32+rbp]
  377. pxor xmm7,xmm1
  378. psrld xmm1,25-11
  379. movdqa xmm0,xmm15
  380. pxor xmm7,xmm2
  381. movdqa xmm4,xmm15
  382. pslld xmm2,26-21
  383. pandn xmm0,xmm9
  384. pand xmm4,xmm8
  385. pxor xmm7,xmm1
  386. movdqa xmm1,xmm11
  387. pxor xmm7,xmm2
  388. movdqa xmm2,xmm11
  389. psrld xmm1,2
  390. paddd xmm5,xmm7
  391. pxor xmm0,xmm4
  392. movdqa xmm4,xmm12
  393. movdqa xmm7,xmm11
  394. pslld xmm2,10
  395. pxor xmm4,xmm11
  396. psrld xmm7,13
  397. pxor xmm1,xmm2
  398. paddd xmm5,xmm0
  399. pslld xmm2,19-10
  400. pand xmm3,xmm4
  401. pxor xmm1,xmm7
  402. psrld xmm7,22-13
  403. pxor xmm1,xmm2
  404. movdqa xmm10,xmm12
  405. pslld xmm2,30-19
  406. pxor xmm7,xmm1
  407. pxor xmm10,xmm3
  408. paddd xmm14,xmm5
  409. pxor xmm7,xmm2
  410. paddd xmm10,xmm5
  411. paddd xmm10,xmm7
  412. movd xmm5,DWORD PTR[24+r8]
  413. movd xmm0,DWORD PTR[24+r9]
  414. movd xmm1,DWORD PTR[24+r10]
  415. movd xmm2,DWORD PTR[24+r11]
  416. punpckldq xmm5,xmm1
  417. punpckldq xmm0,xmm2
  418. punpckldq xmm5,xmm0
  419. movdqa xmm7,xmm14
  420. DB 102,15,56,0,238
  421. movdqa xmm2,xmm14
  422. psrld xmm7,6
  423. movdqa xmm1,xmm14
  424. pslld xmm2,7
  425. movdqa XMMWORD PTR[(96-128)+rax],xmm5
  426. paddd xmm5,xmm9
  427. psrld xmm1,11
  428. pxor xmm7,xmm2
  429. pslld xmm2,21-7
  430. paddd xmm5,XMMWORD PTR[64+rbp]
  431. pxor xmm7,xmm1
  432. psrld xmm1,25-11
  433. movdqa xmm0,xmm14
  434. pxor xmm7,xmm2
  435. movdqa xmm3,xmm14
  436. pslld xmm2,26-21
  437. pandn xmm0,xmm8
  438. pand xmm3,xmm15
  439. pxor xmm7,xmm1
  440. movdqa xmm1,xmm10
  441. pxor xmm7,xmm2
  442. movdqa xmm2,xmm10
  443. psrld xmm1,2
  444. paddd xmm5,xmm7
  445. pxor xmm0,xmm3
  446. movdqa xmm3,xmm11
  447. movdqa xmm7,xmm10
  448. pslld xmm2,10
  449. pxor xmm3,xmm10
  450. psrld xmm7,13
  451. pxor xmm1,xmm2
  452. paddd xmm5,xmm0
  453. pslld xmm2,19-10
  454. pand xmm4,xmm3
  455. pxor xmm1,xmm7
  456. psrld xmm7,22-13
  457. pxor xmm1,xmm2
  458. movdqa xmm9,xmm11
  459. pslld xmm2,30-19
  460. pxor xmm7,xmm1
  461. pxor xmm9,xmm4
  462. paddd xmm13,xmm5
  463. pxor xmm7,xmm2
  464. paddd xmm9,xmm5
  465. paddd xmm9,xmm7
  466. movd xmm5,DWORD PTR[28+r8]
  467. movd xmm0,DWORD PTR[28+r9]
  468. movd xmm1,DWORD PTR[28+r10]
  469. movd xmm2,DWORD PTR[28+r11]
  470. punpckldq xmm5,xmm1
  471. punpckldq xmm0,xmm2
  472. punpckldq xmm5,xmm0
  473. movdqa xmm7,xmm13
  474. movdqa xmm2,xmm13
  475. DB 102,15,56,0,238
  476. psrld xmm7,6
  477. movdqa xmm1,xmm13
  478. pslld xmm2,7
  479. movdqa XMMWORD PTR[(112-128)+rax],xmm5
  480. paddd xmm5,xmm8
  481. psrld xmm1,11
  482. pxor xmm7,xmm2
  483. pslld xmm2,21-7
  484. paddd xmm5,XMMWORD PTR[96+rbp]
  485. pxor xmm7,xmm1
  486. psrld xmm1,25-11
  487. movdqa xmm0,xmm13
  488. pxor xmm7,xmm2
  489. movdqa xmm4,xmm13
  490. pslld xmm2,26-21
  491. pandn xmm0,xmm15
  492. pand xmm4,xmm14
  493. pxor xmm7,xmm1
  494. movdqa xmm1,xmm9
  495. pxor xmm7,xmm2
  496. movdqa xmm2,xmm9
  497. psrld xmm1,2
  498. paddd xmm5,xmm7
  499. pxor xmm0,xmm4
  500. movdqa xmm4,xmm10
  501. movdqa xmm7,xmm9
  502. pslld xmm2,10
  503. pxor xmm4,xmm9
  504. psrld xmm7,13
  505. pxor xmm1,xmm2
  506. paddd xmm5,xmm0
  507. pslld xmm2,19-10
  508. pand xmm3,xmm4
  509. pxor xmm1,xmm7
  510. psrld xmm7,22-13
  511. pxor xmm1,xmm2
  512. movdqa xmm8,xmm10
  513. pslld xmm2,30-19
  514. pxor xmm7,xmm1
  515. pxor xmm8,xmm3
  516. paddd xmm12,xmm5
  517. pxor xmm7,xmm2
  518. paddd xmm8,xmm5
  519. paddd xmm8,xmm7
  520. lea rbp,QWORD PTR[256+rbp]
  521. movd xmm5,DWORD PTR[32+r8]
  522. movd xmm0,DWORD PTR[32+r9]
  523. movd xmm1,DWORD PTR[32+r10]
  524. movd xmm2,DWORD PTR[32+r11]
  525. punpckldq xmm5,xmm1
  526. punpckldq xmm0,xmm2
  527. punpckldq xmm5,xmm0
  528. movdqa xmm7,xmm12
  529. DB 102,15,56,0,238
  530. movdqa xmm2,xmm12
  531. psrld xmm7,6
  532. movdqa xmm1,xmm12
  533. pslld xmm2,7
  534. movdqa XMMWORD PTR[(128-128)+rax],xmm5
  535. paddd xmm5,xmm15
  536. psrld xmm1,11
  537. pxor xmm7,xmm2
  538. pslld xmm2,21-7
  539. paddd xmm5,XMMWORD PTR[((-128))+rbp]
  540. pxor xmm7,xmm1
  541. psrld xmm1,25-11
  542. movdqa xmm0,xmm12
  543. pxor xmm7,xmm2
  544. movdqa xmm3,xmm12
  545. pslld xmm2,26-21
  546. pandn xmm0,xmm14
  547. pand xmm3,xmm13
  548. pxor xmm7,xmm1
  549. movdqa xmm1,xmm8
  550. pxor xmm7,xmm2
  551. movdqa xmm2,xmm8
  552. psrld xmm1,2
  553. paddd xmm5,xmm7
  554. pxor xmm0,xmm3
  555. movdqa xmm3,xmm9
  556. movdqa xmm7,xmm8
  557. pslld xmm2,10
  558. pxor xmm3,xmm8
  559. psrld xmm7,13
  560. pxor xmm1,xmm2
  561. paddd xmm5,xmm0
  562. pslld xmm2,19-10
  563. pand xmm4,xmm3
  564. pxor xmm1,xmm7
  565. psrld xmm7,22-13
  566. pxor xmm1,xmm2
  567. movdqa xmm15,xmm9
  568. pslld xmm2,30-19
  569. pxor xmm7,xmm1
  570. pxor xmm15,xmm4
  571. paddd xmm11,xmm5
  572. pxor xmm7,xmm2
  573. paddd xmm15,xmm5
  574. paddd xmm15,xmm7
  575. movd xmm5,DWORD PTR[36+r8]
  576. movd xmm0,DWORD PTR[36+r9]
  577. movd xmm1,DWORD PTR[36+r10]
  578. movd xmm2,DWORD PTR[36+r11]
  579. punpckldq xmm5,xmm1
  580. punpckldq xmm0,xmm2
  581. punpckldq xmm5,xmm0
  582. movdqa xmm7,xmm11
  583. movdqa xmm2,xmm11
  584. DB 102,15,56,0,238
  585. psrld xmm7,6
  586. movdqa xmm1,xmm11
  587. pslld xmm2,7
  588. movdqa XMMWORD PTR[(144-128)+rax],xmm5
  589. paddd xmm5,xmm14
  590. psrld xmm1,11
  591. pxor xmm7,xmm2
  592. pslld xmm2,21-7
  593. paddd xmm5,XMMWORD PTR[((-96))+rbp]
  594. pxor xmm7,xmm1
  595. psrld xmm1,25-11
  596. movdqa xmm0,xmm11
  597. pxor xmm7,xmm2
  598. movdqa xmm4,xmm11
  599. pslld xmm2,26-21
  600. pandn xmm0,xmm13
  601. pand xmm4,xmm12
  602. pxor xmm7,xmm1
  603. movdqa xmm1,xmm15
  604. pxor xmm7,xmm2
  605. movdqa xmm2,xmm15
  606. psrld xmm1,2
  607. paddd xmm5,xmm7
  608. pxor xmm0,xmm4
  609. movdqa xmm4,xmm8
  610. movdqa xmm7,xmm15
  611. pslld xmm2,10
  612. pxor xmm4,xmm15
  613. psrld xmm7,13
  614. pxor xmm1,xmm2
  615. paddd xmm5,xmm0
  616. pslld xmm2,19-10
  617. pand xmm3,xmm4
  618. pxor xmm1,xmm7
  619. psrld xmm7,22-13
  620. pxor xmm1,xmm2
  621. movdqa xmm14,xmm8
  622. pslld xmm2,30-19
  623. pxor xmm7,xmm1
  624. pxor xmm14,xmm3
  625. paddd xmm10,xmm5
  626. pxor xmm7,xmm2
  627. paddd xmm14,xmm5
  628. paddd xmm14,xmm7
  629. movd xmm5,DWORD PTR[40+r8]
  630. movd xmm0,DWORD PTR[40+r9]
  631. movd xmm1,DWORD PTR[40+r10]
  632. movd xmm2,DWORD PTR[40+r11]
  633. punpckldq xmm5,xmm1
  634. punpckldq xmm0,xmm2
  635. punpckldq xmm5,xmm0
  636. movdqa xmm7,xmm10
  637. DB 102,15,56,0,238
  638. movdqa xmm2,xmm10
  639. psrld xmm7,6
  640. movdqa xmm1,xmm10
  641. pslld xmm2,7
  642. movdqa XMMWORD PTR[(160-128)+rax],xmm5
  643. paddd xmm5,xmm13
  644. psrld xmm1,11
  645. pxor xmm7,xmm2
  646. pslld xmm2,21-7
  647. paddd xmm5,XMMWORD PTR[((-64))+rbp]
  648. pxor xmm7,xmm1
  649. psrld xmm1,25-11
  650. movdqa xmm0,xmm10
  651. pxor xmm7,xmm2
  652. movdqa xmm3,xmm10
  653. pslld xmm2,26-21
  654. pandn xmm0,xmm12
  655. pand xmm3,xmm11
  656. pxor xmm7,xmm1
  657. movdqa xmm1,xmm14
  658. pxor xmm7,xmm2
  659. movdqa xmm2,xmm14
  660. psrld xmm1,2
  661. paddd xmm5,xmm7
  662. pxor xmm0,xmm3
  663. movdqa xmm3,xmm15
  664. movdqa xmm7,xmm14
  665. pslld xmm2,10
  666. pxor xmm3,xmm14
  667. psrld xmm7,13
  668. pxor xmm1,xmm2
  669. paddd xmm5,xmm0
  670. pslld xmm2,19-10
  671. pand xmm4,xmm3
  672. pxor xmm1,xmm7
  673. psrld xmm7,22-13
  674. pxor xmm1,xmm2
  675. movdqa xmm13,xmm15
  676. pslld xmm2,30-19
  677. pxor xmm7,xmm1
  678. pxor xmm13,xmm4
  679. paddd xmm9,xmm5
  680. pxor xmm7,xmm2
  681. paddd xmm13,xmm5
  682. paddd xmm13,xmm7
  683. movd xmm5,DWORD PTR[44+r8]
  684. movd xmm0,DWORD PTR[44+r9]
  685. movd xmm1,DWORD PTR[44+r10]
  686. movd xmm2,DWORD PTR[44+r11]
  687. punpckldq xmm5,xmm1
  688. punpckldq xmm0,xmm2
  689. punpckldq xmm5,xmm0
  690. movdqa xmm7,xmm9
  691. movdqa xmm2,xmm9
  692. DB 102,15,56,0,238
  693. psrld xmm7,6
  694. movdqa xmm1,xmm9
  695. pslld xmm2,7
  696. movdqa XMMWORD PTR[(176-128)+rax],xmm5
  697. paddd xmm5,xmm12
  698. psrld xmm1,11
  699. pxor xmm7,xmm2
  700. pslld xmm2,21-7
  701. paddd xmm5,XMMWORD PTR[((-32))+rbp]
  702. pxor xmm7,xmm1
  703. psrld xmm1,25-11
  704. movdqa xmm0,xmm9
  705. pxor xmm7,xmm2
  706. movdqa xmm4,xmm9
  707. pslld xmm2,26-21
  708. pandn xmm0,xmm11
  709. pand xmm4,xmm10
  710. pxor xmm7,xmm1
  711. movdqa xmm1,xmm13
  712. pxor xmm7,xmm2
  713. movdqa xmm2,xmm13
  714. psrld xmm1,2
  715. paddd xmm5,xmm7
  716. pxor xmm0,xmm4
  717. movdqa xmm4,xmm14
  718. movdqa xmm7,xmm13
  719. pslld xmm2,10
  720. pxor xmm4,xmm13
  721. psrld xmm7,13
  722. pxor xmm1,xmm2
  723. paddd xmm5,xmm0
  724. pslld xmm2,19-10
  725. pand xmm3,xmm4
  726. pxor xmm1,xmm7
  727. psrld xmm7,22-13
  728. pxor xmm1,xmm2
  729. movdqa xmm12,xmm14
  730. pslld xmm2,30-19
  731. pxor xmm7,xmm1
  732. pxor xmm12,xmm3
  733. paddd xmm8,xmm5
  734. pxor xmm7,xmm2
  735. paddd xmm12,xmm5
  736. paddd xmm12,xmm7
  737. movd xmm5,DWORD PTR[48+r8]
  738. movd xmm0,DWORD PTR[48+r9]
  739. movd xmm1,DWORD PTR[48+r10]
  740. movd xmm2,DWORD PTR[48+r11]
  741. punpckldq xmm5,xmm1
  742. punpckldq xmm0,xmm2
  743. punpckldq xmm5,xmm0
  744. movdqa xmm7,xmm8
  745. DB 102,15,56,0,238
  746. movdqa xmm2,xmm8
  747. psrld xmm7,6
  748. movdqa xmm1,xmm8
  749. pslld xmm2,7
  750. movdqa XMMWORD PTR[(192-128)+rax],xmm5
  751. paddd xmm5,xmm11
  752. psrld xmm1,11
  753. pxor xmm7,xmm2
  754. pslld xmm2,21-7
  755. paddd xmm5,XMMWORD PTR[rbp]
  756. pxor xmm7,xmm1
  757. psrld xmm1,25-11
  758. movdqa xmm0,xmm8
  759. pxor xmm7,xmm2
  760. movdqa xmm3,xmm8
  761. pslld xmm2,26-21
  762. pandn xmm0,xmm10
  763. pand xmm3,xmm9
  764. pxor xmm7,xmm1
  765. movdqa xmm1,xmm12
  766. pxor xmm7,xmm2
  767. movdqa xmm2,xmm12
  768. psrld xmm1,2
  769. paddd xmm5,xmm7
  770. pxor xmm0,xmm3
  771. movdqa xmm3,xmm13
  772. movdqa xmm7,xmm12
  773. pslld xmm2,10
  774. pxor xmm3,xmm12
  775. psrld xmm7,13
  776. pxor xmm1,xmm2
  777. paddd xmm5,xmm0
  778. pslld xmm2,19-10
  779. pand xmm4,xmm3
  780. pxor xmm1,xmm7
  781. psrld xmm7,22-13
  782. pxor xmm1,xmm2
  783. movdqa xmm11,xmm13
  784. pslld xmm2,30-19
  785. pxor xmm7,xmm1
  786. pxor xmm11,xmm4
  787. paddd xmm15,xmm5
  788. pxor xmm7,xmm2
  789. paddd xmm11,xmm5
  790. paddd xmm11,xmm7
  791. movd xmm5,DWORD PTR[52+r8]
  792. movd xmm0,DWORD PTR[52+r9]
  793. movd xmm1,DWORD PTR[52+r10]
  794. movd xmm2,DWORD PTR[52+r11]
  795. punpckldq xmm5,xmm1
  796. punpckldq xmm0,xmm2
  797. punpckldq xmm5,xmm0
  798. movdqa xmm7,xmm15
  799. movdqa xmm2,xmm15
  800. DB 102,15,56,0,238
  801. psrld xmm7,6
  802. movdqa xmm1,xmm15
  803. pslld xmm2,7
  804. movdqa XMMWORD PTR[(208-128)+rax],xmm5
  805. paddd xmm5,xmm10
  806. psrld xmm1,11
  807. pxor xmm7,xmm2
  808. pslld xmm2,21-7
  809. paddd xmm5,XMMWORD PTR[32+rbp]
  810. pxor xmm7,xmm1
  811. psrld xmm1,25-11
  812. movdqa xmm0,xmm15
  813. pxor xmm7,xmm2
  814. movdqa xmm4,xmm15
  815. pslld xmm2,26-21
  816. pandn xmm0,xmm9
  817. pand xmm4,xmm8
  818. pxor xmm7,xmm1
  819. movdqa xmm1,xmm11
  820. pxor xmm7,xmm2
  821. movdqa xmm2,xmm11
  822. psrld xmm1,2
  823. paddd xmm5,xmm7
  824. pxor xmm0,xmm4
  825. movdqa xmm4,xmm12
  826. movdqa xmm7,xmm11
  827. pslld xmm2,10
  828. pxor xmm4,xmm11
  829. psrld xmm7,13
  830. pxor xmm1,xmm2
  831. paddd xmm5,xmm0
  832. pslld xmm2,19-10
  833. pand xmm3,xmm4
  834. pxor xmm1,xmm7
  835. psrld xmm7,22-13
  836. pxor xmm1,xmm2
  837. movdqa xmm10,xmm12
  838. pslld xmm2,30-19
  839. pxor xmm7,xmm1
  840. pxor xmm10,xmm3
  841. paddd xmm14,xmm5
  842. pxor xmm7,xmm2
  843. paddd xmm10,xmm5
  844. paddd xmm10,xmm7
  845. movd xmm5,DWORD PTR[56+r8]
  846. movd xmm0,DWORD PTR[56+r9]
  847. movd xmm1,DWORD PTR[56+r10]
  848. movd xmm2,DWORD PTR[56+r11]
  849. punpckldq xmm5,xmm1
  850. punpckldq xmm0,xmm2
  851. punpckldq xmm5,xmm0
  852. movdqa xmm7,xmm14
  853. DB 102,15,56,0,238
  854. movdqa xmm2,xmm14
  855. psrld xmm7,6
  856. movdqa xmm1,xmm14
  857. pslld xmm2,7
  858. movdqa XMMWORD PTR[(224-128)+rax],xmm5
  859. paddd xmm5,xmm9
  860. psrld xmm1,11
  861. pxor xmm7,xmm2
  862. pslld xmm2,21-7
  863. paddd xmm5,XMMWORD PTR[64+rbp]
  864. pxor xmm7,xmm1
  865. psrld xmm1,25-11
  866. movdqa xmm0,xmm14
  867. pxor xmm7,xmm2
  868. movdqa xmm3,xmm14
  869. pslld xmm2,26-21
  870. pandn xmm0,xmm8
  871. pand xmm3,xmm15
  872. pxor xmm7,xmm1
  873. movdqa xmm1,xmm10
  874. pxor xmm7,xmm2
  875. movdqa xmm2,xmm10
  876. psrld xmm1,2
  877. paddd xmm5,xmm7
  878. pxor xmm0,xmm3
  879. movdqa xmm3,xmm11
  880. movdqa xmm7,xmm10
  881. pslld xmm2,10
  882. pxor xmm3,xmm10
  883. psrld xmm7,13
  884. pxor xmm1,xmm2
  885. paddd xmm5,xmm0
  886. pslld xmm2,19-10
  887. pand xmm4,xmm3
  888. pxor xmm1,xmm7
  889. psrld xmm7,22-13
  890. pxor xmm1,xmm2
  891. movdqa xmm9,xmm11
  892. pslld xmm2,30-19
  893. pxor xmm7,xmm1
  894. pxor xmm9,xmm4
  895. paddd xmm13,xmm5
  896. pxor xmm7,xmm2
  897. paddd xmm9,xmm5
  898. paddd xmm9,xmm7
  899. movd xmm5,DWORD PTR[60+r8]
  900. lea r8,QWORD PTR[64+r8]
  901. movd xmm0,DWORD PTR[60+r9]
  902. lea r9,QWORD PTR[64+r9]
  903. movd xmm1,DWORD PTR[60+r10]
  904. lea r10,QWORD PTR[64+r10]
  905. movd xmm2,DWORD PTR[60+r11]
  906. lea r11,QWORD PTR[64+r11]
  907. punpckldq xmm5,xmm1
  908. punpckldq xmm0,xmm2
  909. punpckldq xmm5,xmm0
  910. movdqa xmm7,xmm13
  911. movdqa xmm2,xmm13
  912. DB 102,15,56,0,238
  913. psrld xmm7,6
  914. movdqa xmm1,xmm13
  915. pslld xmm2,7
  916. movdqa XMMWORD PTR[(240-128)+rax],xmm5
  917. paddd xmm5,xmm8
  918. psrld xmm1,11
  919. pxor xmm7,xmm2
  920. pslld xmm2,21-7
  921. paddd xmm5,XMMWORD PTR[96+rbp]
  922. pxor xmm7,xmm1
  923. psrld xmm1,25-11
  924. movdqa xmm0,xmm13
  925. prefetcht0 [63+r8]
  926. pxor xmm7,xmm2
  927. movdqa xmm4,xmm13
  928. pslld xmm2,26-21
  929. pandn xmm0,xmm15
  930. pand xmm4,xmm14
  931. pxor xmm7,xmm1
  932. prefetcht0 [63+r9]
  933. movdqa xmm1,xmm9
  934. pxor xmm7,xmm2
  935. movdqa xmm2,xmm9
  936. psrld xmm1,2
  937. paddd xmm5,xmm7
  938. pxor xmm0,xmm4
  939. movdqa xmm4,xmm10
  940. movdqa xmm7,xmm9
  941. pslld xmm2,10
  942. pxor xmm4,xmm9
  943. prefetcht0 [63+r10]
  944. psrld xmm7,13
  945. pxor xmm1,xmm2
  946. paddd xmm5,xmm0
  947. pslld xmm2,19-10
  948. pand xmm3,xmm4
  949. pxor xmm1,xmm7
  950. prefetcht0 [63+r11]
  951. psrld xmm7,22-13
  952. pxor xmm1,xmm2
  953. movdqa xmm8,xmm10
  954. pslld xmm2,30-19
  955. pxor xmm7,xmm1
  956. pxor xmm8,xmm3
  957. paddd xmm12,xmm5
  958. pxor xmm7,xmm2
  959. paddd xmm8,xmm5
  960. paddd xmm8,xmm7
  961. lea rbp,QWORD PTR[256+rbp]
  962. movdqu xmm5,XMMWORD PTR[((0-128))+rax]
  963. mov ecx,3
  964. jmp $L$oop_16_xx
  965. ALIGN 32
  966. $L$oop_16_xx::
  967. movdqa xmm6,XMMWORD PTR[((16-128))+rax]
  968. paddd xmm5,XMMWORD PTR[((144-128))+rax]
  969. movdqa xmm7,xmm6
  970. movdqa xmm1,xmm6
  971. psrld xmm7,3
  972. movdqa xmm2,xmm6
  973. psrld xmm1,7
  974. movdqa xmm0,XMMWORD PTR[((224-128))+rax]
  975. pslld xmm2,14
  976. pxor xmm7,xmm1
  977. psrld xmm1,18-7
  978. movdqa xmm3,xmm0
  979. pxor xmm7,xmm2
  980. pslld xmm2,25-14
  981. pxor xmm7,xmm1
  982. psrld xmm0,10
  983. movdqa xmm1,xmm3
  984. psrld xmm3,17
  985. pxor xmm7,xmm2
  986. pslld xmm1,13
  987. paddd xmm5,xmm7
  988. pxor xmm0,xmm3
  989. psrld xmm3,19-17
  990. pxor xmm0,xmm1
  991. pslld xmm1,15-13
  992. pxor xmm0,xmm3
  993. pxor xmm0,xmm1
  994. paddd xmm5,xmm0
  995. movdqa xmm7,xmm12
  996. movdqa xmm2,xmm12
  997. psrld xmm7,6
  998. movdqa xmm1,xmm12
  999. pslld xmm2,7
  1000. movdqa XMMWORD PTR[(0-128)+rax],xmm5
  1001. paddd xmm5,xmm15
  1002. psrld xmm1,11
  1003. pxor xmm7,xmm2
  1004. pslld xmm2,21-7
  1005. paddd xmm5,XMMWORD PTR[((-128))+rbp]
  1006. pxor xmm7,xmm1
  1007. psrld xmm1,25-11
  1008. movdqa xmm0,xmm12
  1009. pxor xmm7,xmm2
  1010. movdqa xmm3,xmm12
  1011. pslld xmm2,26-21
  1012. pandn xmm0,xmm14
  1013. pand xmm3,xmm13
  1014. pxor xmm7,xmm1
  1015. movdqa xmm1,xmm8
  1016. pxor xmm7,xmm2
  1017. movdqa xmm2,xmm8
  1018. psrld xmm1,2
  1019. paddd xmm5,xmm7
  1020. pxor xmm0,xmm3
  1021. movdqa xmm3,xmm9
  1022. movdqa xmm7,xmm8
  1023. pslld xmm2,10
  1024. pxor xmm3,xmm8
  1025. psrld xmm7,13
  1026. pxor xmm1,xmm2
  1027. paddd xmm5,xmm0
  1028. pslld xmm2,19-10
  1029. pand xmm4,xmm3
  1030. pxor xmm1,xmm7
  1031. psrld xmm7,22-13
  1032. pxor xmm1,xmm2
  1033. movdqa xmm15,xmm9
  1034. pslld xmm2,30-19
  1035. pxor xmm7,xmm1
  1036. pxor xmm15,xmm4
  1037. paddd xmm11,xmm5
  1038. pxor xmm7,xmm2
  1039. paddd xmm15,xmm5
  1040. paddd xmm15,xmm7
  1041. movdqa xmm5,XMMWORD PTR[((32-128))+rax]
  1042. paddd xmm6,XMMWORD PTR[((160-128))+rax]
  1043. movdqa xmm7,xmm5
  1044. movdqa xmm1,xmm5
  1045. psrld xmm7,3
  1046. movdqa xmm2,xmm5
  1047. psrld xmm1,7
  1048. movdqa xmm0,XMMWORD PTR[((240-128))+rax]
  1049. pslld xmm2,14
  1050. pxor xmm7,xmm1
  1051. psrld xmm1,18-7
  1052. movdqa xmm4,xmm0
  1053. pxor xmm7,xmm2
  1054. pslld xmm2,25-14
  1055. pxor xmm7,xmm1
  1056. psrld xmm0,10
  1057. movdqa xmm1,xmm4
  1058. psrld xmm4,17
  1059. pxor xmm7,xmm2
  1060. pslld xmm1,13
  1061. paddd xmm6,xmm7
  1062. pxor xmm0,xmm4
  1063. psrld xmm4,19-17
  1064. pxor xmm0,xmm1
  1065. pslld xmm1,15-13
  1066. pxor xmm0,xmm4
  1067. pxor xmm0,xmm1
  1068. paddd xmm6,xmm0
  1069. movdqa xmm7,xmm11
  1070. movdqa xmm2,xmm11
  1071. psrld xmm7,6
  1072. movdqa xmm1,xmm11
  1073. pslld xmm2,7
  1074. movdqa XMMWORD PTR[(16-128)+rax],xmm6
  1075. paddd xmm6,xmm14
  1076. psrld xmm1,11
  1077. pxor xmm7,xmm2
  1078. pslld xmm2,21-7
  1079. paddd xmm6,XMMWORD PTR[((-96))+rbp]
  1080. pxor xmm7,xmm1
  1081. psrld xmm1,25-11
  1082. movdqa xmm0,xmm11
  1083. pxor xmm7,xmm2
  1084. movdqa xmm4,xmm11
  1085. pslld xmm2,26-21
  1086. pandn xmm0,xmm13
  1087. pand xmm4,xmm12
  1088. pxor xmm7,xmm1
  1089. movdqa xmm1,xmm15
  1090. pxor xmm7,xmm2
  1091. movdqa xmm2,xmm15
  1092. psrld xmm1,2
  1093. paddd xmm6,xmm7
  1094. pxor xmm0,xmm4
  1095. movdqa xmm4,xmm8
  1096. movdqa xmm7,xmm15
  1097. pslld xmm2,10
  1098. pxor xmm4,xmm15
  1099. psrld xmm7,13
  1100. pxor xmm1,xmm2
  1101. paddd xmm6,xmm0
  1102. pslld xmm2,19-10
  1103. pand xmm3,xmm4
  1104. pxor xmm1,xmm7
  1105. psrld xmm7,22-13
  1106. pxor xmm1,xmm2
  1107. movdqa xmm14,xmm8
  1108. pslld xmm2,30-19
  1109. pxor xmm7,xmm1
  1110. pxor xmm14,xmm3
  1111. paddd xmm10,xmm6
  1112. pxor xmm7,xmm2
  1113. paddd xmm14,xmm6
  1114. paddd xmm14,xmm7
  1115. movdqa xmm6,XMMWORD PTR[((48-128))+rax]
  1116. paddd xmm5,XMMWORD PTR[((176-128))+rax]
  1117. movdqa xmm7,xmm6
  1118. movdqa xmm1,xmm6
  1119. psrld xmm7,3
  1120. movdqa xmm2,xmm6
  1121. psrld xmm1,7
  1122. movdqa xmm0,XMMWORD PTR[((0-128))+rax]
  1123. pslld xmm2,14
  1124. pxor xmm7,xmm1
  1125. psrld xmm1,18-7
  1126. movdqa xmm3,xmm0
  1127. pxor xmm7,xmm2
  1128. pslld xmm2,25-14
  1129. pxor xmm7,xmm1
  1130. psrld xmm0,10
  1131. movdqa xmm1,xmm3
  1132. psrld xmm3,17
  1133. pxor xmm7,xmm2
  1134. pslld xmm1,13
  1135. paddd xmm5,xmm7
  1136. pxor xmm0,xmm3
  1137. psrld xmm3,19-17
  1138. pxor xmm0,xmm1
  1139. pslld xmm1,15-13
  1140. pxor xmm0,xmm3
  1141. pxor xmm0,xmm1
  1142. paddd xmm5,xmm0
  1143. movdqa xmm7,xmm10
  1144. movdqa xmm2,xmm10
  1145. psrld xmm7,6
  1146. movdqa xmm1,xmm10
  1147. pslld xmm2,7
  1148. movdqa XMMWORD PTR[(32-128)+rax],xmm5
  1149. paddd xmm5,xmm13
  1150. psrld xmm1,11
  1151. pxor xmm7,xmm2
  1152. pslld xmm2,21-7
  1153. paddd xmm5,XMMWORD PTR[((-64))+rbp]
  1154. pxor xmm7,xmm1
  1155. psrld xmm1,25-11
  1156. movdqa xmm0,xmm10
  1157. pxor xmm7,xmm2
  1158. movdqa xmm3,xmm10
  1159. pslld xmm2,26-21
  1160. pandn xmm0,xmm12
  1161. pand xmm3,xmm11
  1162. pxor xmm7,xmm1
  1163. movdqa xmm1,xmm14
  1164. pxor xmm7,xmm2
  1165. movdqa xmm2,xmm14
  1166. psrld xmm1,2
  1167. paddd xmm5,xmm7
  1168. pxor xmm0,xmm3
  1169. movdqa xmm3,xmm15
  1170. movdqa xmm7,xmm14
  1171. pslld xmm2,10
  1172. pxor xmm3,xmm14
  1173. psrld xmm7,13
  1174. pxor xmm1,xmm2
  1175. paddd xmm5,xmm0
  1176. pslld xmm2,19-10
  1177. pand xmm4,xmm3
  1178. pxor xmm1,xmm7
  1179. psrld xmm7,22-13
  1180. pxor xmm1,xmm2
  1181. movdqa xmm13,xmm15
  1182. pslld xmm2,30-19
  1183. pxor xmm7,xmm1
  1184. pxor xmm13,xmm4
  1185. paddd xmm9,xmm5
  1186. pxor xmm7,xmm2
  1187. paddd xmm13,xmm5
  1188. paddd xmm13,xmm7
  1189. movdqa xmm5,XMMWORD PTR[((64-128))+rax]
  1190. paddd xmm6,XMMWORD PTR[((192-128))+rax]
  1191. movdqa xmm7,xmm5
  1192. movdqa xmm1,xmm5
  1193. psrld xmm7,3
  1194. movdqa xmm2,xmm5
  1195. psrld xmm1,7
  1196. movdqa xmm0,XMMWORD PTR[((16-128))+rax]
  1197. pslld xmm2,14
  1198. pxor xmm7,xmm1
  1199. psrld xmm1,18-7
  1200. movdqa xmm4,xmm0
  1201. pxor xmm7,xmm2
  1202. pslld xmm2,25-14
  1203. pxor xmm7,xmm1
  1204. psrld xmm0,10
  1205. movdqa xmm1,xmm4
  1206. psrld xmm4,17
  1207. pxor xmm7,xmm2
  1208. pslld xmm1,13
  1209. paddd xmm6,xmm7
  1210. pxor xmm0,xmm4
  1211. psrld xmm4,19-17
  1212. pxor xmm0,xmm1
  1213. pslld xmm1,15-13
  1214. pxor xmm0,xmm4
  1215. pxor xmm0,xmm1
  1216. paddd xmm6,xmm0
  1217. movdqa xmm7,xmm9
  1218. movdqa xmm2,xmm9
  1219. psrld xmm7,6
  1220. movdqa xmm1,xmm9
  1221. pslld xmm2,7
  1222. movdqa XMMWORD PTR[(48-128)+rax],xmm6
  1223. paddd xmm6,xmm12
  1224. psrld xmm1,11
  1225. pxor xmm7,xmm2
  1226. pslld xmm2,21-7
  1227. paddd xmm6,XMMWORD PTR[((-32))+rbp]
  1228. pxor xmm7,xmm1
  1229. psrld xmm1,25-11
  1230. movdqa xmm0,xmm9
  1231. pxor xmm7,xmm2
  1232. movdqa xmm4,xmm9
  1233. pslld xmm2,26-21
  1234. pandn xmm0,xmm11
  1235. pand xmm4,xmm10
  1236. pxor xmm7,xmm1
  1237. movdqa xmm1,xmm13
  1238. pxor xmm7,xmm2
  1239. movdqa xmm2,xmm13
  1240. psrld xmm1,2
  1241. paddd xmm6,xmm7
  1242. pxor xmm0,xmm4
  1243. movdqa xmm4,xmm14
  1244. movdqa xmm7,xmm13
  1245. pslld xmm2,10
  1246. pxor xmm4,xmm13
  1247. psrld xmm7,13
  1248. pxor xmm1,xmm2
  1249. paddd xmm6,xmm0
  1250. pslld xmm2,19-10
  1251. pand xmm3,xmm4
  1252. pxor xmm1,xmm7
  1253. psrld xmm7,22-13
  1254. pxor xmm1,xmm2
  1255. movdqa xmm12,xmm14
  1256. pslld xmm2,30-19
  1257. pxor xmm7,xmm1
  1258. pxor xmm12,xmm3
  1259. paddd xmm8,xmm6
  1260. pxor xmm7,xmm2
  1261. paddd xmm12,xmm6
  1262. paddd xmm12,xmm7
  1263. movdqa xmm6,XMMWORD PTR[((80-128))+rax]
  1264. paddd xmm5,XMMWORD PTR[((208-128))+rax]
  1265. movdqa xmm7,xmm6
  1266. movdqa xmm1,xmm6
  1267. psrld xmm7,3
  1268. movdqa xmm2,xmm6
  1269. psrld xmm1,7
  1270. movdqa xmm0,XMMWORD PTR[((32-128))+rax]
  1271. pslld xmm2,14
  1272. pxor xmm7,xmm1
  1273. psrld xmm1,18-7
  1274. movdqa xmm3,xmm0
  1275. pxor xmm7,xmm2
  1276. pslld xmm2,25-14
  1277. pxor xmm7,xmm1
  1278. psrld xmm0,10
  1279. movdqa xmm1,xmm3
  1280. psrld xmm3,17
  1281. pxor xmm7,xmm2
  1282. pslld xmm1,13
  1283. paddd xmm5,xmm7
  1284. pxor xmm0,xmm3
  1285. psrld xmm3,19-17
  1286. pxor xmm0,xmm1
  1287. pslld xmm1,15-13
  1288. pxor xmm0,xmm3
  1289. pxor xmm0,xmm1
  1290. paddd xmm5,xmm0
  1291. movdqa xmm7,xmm8
  1292. movdqa xmm2,xmm8
  1293. psrld xmm7,6
  1294. movdqa xmm1,xmm8
  1295. pslld xmm2,7
  1296. movdqa XMMWORD PTR[(64-128)+rax],xmm5
  1297. paddd xmm5,xmm11
  1298. psrld xmm1,11
  1299. pxor xmm7,xmm2
  1300. pslld xmm2,21-7
  1301. paddd xmm5,XMMWORD PTR[rbp]
  1302. pxor xmm7,xmm1
  1303. psrld xmm1,25-11
  1304. movdqa xmm0,xmm8
  1305. pxor xmm7,xmm2
  1306. movdqa xmm3,xmm8
  1307. pslld xmm2,26-21
  1308. pandn xmm0,xmm10
  1309. pand xmm3,xmm9
  1310. pxor xmm7,xmm1
  1311. movdqa xmm1,xmm12
  1312. pxor xmm7,xmm2
  1313. movdqa xmm2,xmm12
  1314. psrld xmm1,2
  1315. paddd xmm5,xmm7
  1316. pxor xmm0,xmm3
  1317. movdqa xmm3,xmm13
  1318. movdqa xmm7,xmm12
  1319. pslld xmm2,10
  1320. pxor xmm3,xmm12
  1321. psrld xmm7,13
  1322. pxor xmm1,xmm2
  1323. paddd xmm5,xmm0
  1324. pslld xmm2,19-10
  1325. pand xmm4,xmm3
  1326. pxor xmm1,xmm7
  1327. psrld xmm7,22-13
  1328. pxor xmm1,xmm2
  1329. movdqa xmm11,xmm13
  1330. pslld xmm2,30-19
  1331. pxor xmm7,xmm1
  1332. pxor xmm11,xmm4
  1333. paddd xmm15,xmm5
  1334. pxor xmm7,xmm2
  1335. paddd xmm11,xmm5
  1336. paddd xmm11,xmm7
  1337. movdqa xmm5,XMMWORD PTR[((96-128))+rax]
  1338. paddd xmm6,XMMWORD PTR[((224-128))+rax]
  1339. movdqa xmm7,xmm5
  1340. movdqa xmm1,xmm5
  1341. psrld xmm7,3
  1342. movdqa xmm2,xmm5
  1343. psrld xmm1,7
  1344. movdqa xmm0,XMMWORD PTR[((48-128))+rax]
  1345. pslld xmm2,14
  1346. pxor xmm7,xmm1
  1347. psrld xmm1,18-7
  1348. movdqa xmm4,xmm0
  1349. pxor xmm7,xmm2
  1350. pslld xmm2,25-14
  1351. pxor xmm7,xmm1
  1352. psrld xmm0,10
  1353. movdqa xmm1,xmm4
  1354. psrld xmm4,17
  1355. pxor xmm7,xmm2
  1356. pslld xmm1,13
  1357. paddd xmm6,xmm7
  1358. pxor xmm0,xmm4
  1359. psrld xmm4,19-17
  1360. pxor xmm0,xmm1
  1361. pslld xmm1,15-13
  1362. pxor xmm0,xmm4
  1363. pxor xmm0,xmm1
  1364. paddd xmm6,xmm0
  1365. movdqa xmm7,xmm15
  1366. movdqa xmm2,xmm15
  1367. psrld xmm7,6
  1368. movdqa xmm1,xmm15
  1369. pslld xmm2,7
  1370. movdqa XMMWORD PTR[(80-128)+rax],xmm6
  1371. paddd xmm6,xmm10
  1372. psrld xmm1,11
  1373. pxor xmm7,xmm2
  1374. pslld xmm2,21-7
  1375. paddd xmm6,XMMWORD PTR[32+rbp]
  1376. pxor xmm7,xmm1
  1377. psrld xmm1,25-11
  1378. movdqa xmm0,xmm15
  1379. pxor xmm7,xmm2
  1380. movdqa xmm4,xmm15
  1381. pslld xmm2,26-21
  1382. pandn xmm0,xmm9
  1383. pand xmm4,xmm8
  1384. pxor xmm7,xmm1
  1385. movdqa xmm1,xmm11
  1386. pxor xmm7,xmm2
  1387. movdqa xmm2,xmm11
  1388. psrld xmm1,2
  1389. paddd xmm6,xmm7
  1390. pxor xmm0,xmm4
  1391. movdqa xmm4,xmm12
  1392. movdqa xmm7,xmm11
  1393. pslld xmm2,10
  1394. pxor xmm4,xmm11
  1395. psrld xmm7,13
  1396. pxor xmm1,xmm2
  1397. paddd xmm6,xmm0
  1398. pslld xmm2,19-10
  1399. pand xmm3,xmm4
  1400. pxor xmm1,xmm7
  1401. psrld xmm7,22-13
  1402. pxor xmm1,xmm2
  1403. movdqa xmm10,xmm12
  1404. pslld xmm2,30-19
  1405. pxor xmm7,xmm1
  1406. pxor xmm10,xmm3
  1407. paddd xmm14,xmm6
  1408. pxor xmm7,xmm2
  1409. paddd xmm10,xmm6
  1410. paddd xmm10,xmm7
  1411. movdqa xmm6,XMMWORD PTR[((112-128))+rax]
  1412. paddd xmm5,XMMWORD PTR[((240-128))+rax]
  1413. movdqa xmm7,xmm6
  1414. movdqa xmm1,xmm6
  1415. psrld xmm7,3
  1416. movdqa xmm2,xmm6
  1417. psrld xmm1,7
  1418. movdqa xmm0,XMMWORD PTR[((64-128))+rax]
  1419. pslld xmm2,14
  1420. pxor xmm7,xmm1
  1421. psrld xmm1,18-7
  1422. movdqa xmm3,xmm0
  1423. pxor xmm7,xmm2
  1424. pslld xmm2,25-14
  1425. pxor xmm7,xmm1
  1426. psrld xmm0,10
  1427. movdqa xmm1,xmm3
  1428. psrld xmm3,17
  1429. pxor xmm7,xmm2
  1430. pslld xmm1,13
  1431. paddd xmm5,xmm7
  1432. pxor xmm0,xmm3
  1433. psrld xmm3,19-17
  1434. pxor xmm0,xmm1
  1435. pslld xmm1,15-13
  1436. pxor xmm0,xmm3
  1437. pxor xmm0,xmm1
  1438. paddd xmm5,xmm0
  1439. movdqa xmm7,xmm14
  1440. movdqa xmm2,xmm14
  1441. psrld xmm7,6
  1442. movdqa xmm1,xmm14
  1443. pslld xmm2,7
  1444. movdqa XMMWORD PTR[(96-128)+rax],xmm5
  1445. paddd xmm5,xmm9
  1446. psrld xmm1,11
  1447. pxor xmm7,xmm2
  1448. pslld xmm2,21-7
  1449. paddd xmm5,XMMWORD PTR[64+rbp]
  1450. pxor xmm7,xmm1
  1451. psrld xmm1,25-11
  1452. movdqa xmm0,xmm14
  1453. pxor xmm7,xmm2
  1454. movdqa xmm3,xmm14
  1455. pslld xmm2,26-21
  1456. pandn xmm0,xmm8
  1457. pand xmm3,xmm15
  1458. pxor xmm7,xmm1
  1459. movdqa xmm1,xmm10
  1460. pxor xmm7,xmm2
  1461. movdqa xmm2,xmm10
  1462. psrld xmm1,2
  1463. paddd xmm5,xmm7
  1464. pxor xmm0,xmm3
  1465. movdqa xmm3,xmm11
  1466. movdqa xmm7,xmm10
  1467. pslld xmm2,10
  1468. pxor xmm3,xmm10
  1469. psrld xmm7,13
  1470. pxor xmm1,xmm2
  1471. paddd xmm5,xmm0
  1472. pslld xmm2,19-10
  1473. pand xmm4,xmm3
  1474. pxor xmm1,xmm7
  1475. psrld xmm7,22-13
  1476. pxor xmm1,xmm2
  1477. movdqa xmm9,xmm11
  1478. pslld xmm2,30-19
  1479. pxor xmm7,xmm1
  1480. pxor xmm9,xmm4
  1481. paddd xmm13,xmm5
  1482. pxor xmm7,xmm2
  1483. paddd xmm9,xmm5
  1484. paddd xmm9,xmm7
  1485. movdqa xmm5,XMMWORD PTR[((128-128))+rax]
  1486. paddd xmm6,XMMWORD PTR[((0-128))+rax]
  1487. movdqa xmm7,xmm5
  1488. movdqa xmm1,xmm5
  1489. psrld xmm7,3
  1490. movdqa xmm2,xmm5
  1491. psrld xmm1,7
  1492. movdqa xmm0,XMMWORD PTR[((80-128))+rax]
  1493. pslld xmm2,14
  1494. pxor xmm7,xmm1
  1495. psrld xmm1,18-7
  1496. movdqa xmm4,xmm0
  1497. pxor xmm7,xmm2
  1498. pslld xmm2,25-14
  1499. pxor xmm7,xmm1
  1500. psrld xmm0,10
  1501. movdqa xmm1,xmm4
  1502. psrld xmm4,17
  1503. pxor xmm7,xmm2
  1504. pslld xmm1,13
  1505. paddd xmm6,xmm7
  1506. pxor xmm0,xmm4
  1507. psrld xmm4,19-17
  1508. pxor xmm0,xmm1
  1509. pslld xmm1,15-13
  1510. pxor xmm0,xmm4
  1511. pxor xmm0,xmm1
  1512. paddd xmm6,xmm0
  1513. movdqa xmm7,xmm13
  1514. movdqa xmm2,xmm13
  1515. psrld xmm7,6
  1516. movdqa xmm1,xmm13
  1517. pslld xmm2,7
  1518. movdqa XMMWORD PTR[(112-128)+rax],xmm6
  1519. paddd xmm6,xmm8
  1520. psrld xmm1,11
  1521. pxor xmm7,xmm2
  1522. pslld xmm2,21-7
  1523. paddd xmm6,XMMWORD PTR[96+rbp]
  1524. pxor xmm7,xmm1
  1525. psrld xmm1,25-11
  1526. movdqa xmm0,xmm13
  1527. pxor xmm7,xmm2
  1528. movdqa xmm4,xmm13
  1529. pslld xmm2,26-21
  1530. pandn xmm0,xmm15
  1531. pand xmm4,xmm14
  1532. pxor xmm7,xmm1
  1533. movdqa xmm1,xmm9
  1534. pxor xmm7,xmm2
  1535. movdqa xmm2,xmm9
  1536. psrld xmm1,2
  1537. paddd xmm6,xmm7
  1538. pxor xmm0,xmm4
  1539. movdqa xmm4,xmm10
  1540. movdqa xmm7,xmm9
  1541. pslld xmm2,10
  1542. pxor xmm4,xmm9
  1543. psrld xmm7,13
  1544. pxor xmm1,xmm2
  1545. paddd xmm6,xmm0
  1546. pslld xmm2,19-10
  1547. pand xmm3,xmm4
  1548. pxor xmm1,xmm7
  1549. psrld xmm7,22-13
  1550. pxor xmm1,xmm2
  1551. movdqa xmm8,xmm10
  1552. pslld xmm2,30-19
  1553. pxor xmm7,xmm1
  1554. pxor xmm8,xmm3
  1555. paddd xmm12,xmm6
  1556. pxor xmm7,xmm2
  1557. paddd xmm8,xmm6
  1558. paddd xmm8,xmm7
  1559. lea rbp,QWORD PTR[256+rbp]
  1560. movdqa xmm6,XMMWORD PTR[((144-128))+rax]
  1561. paddd xmm5,XMMWORD PTR[((16-128))+rax]
  1562. movdqa xmm7,xmm6
  1563. movdqa xmm1,xmm6
  1564. psrld xmm7,3
  1565. movdqa xmm2,xmm6
  1566. psrld xmm1,7
  1567. movdqa xmm0,XMMWORD PTR[((96-128))+rax]
  1568. pslld xmm2,14
  1569. pxor xmm7,xmm1
  1570. psrld xmm1,18-7
  1571. movdqa xmm3,xmm0
  1572. pxor xmm7,xmm2
  1573. pslld xmm2,25-14
  1574. pxor xmm7,xmm1
  1575. psrld xmm0,10
  1576. movdqa xmm1,xmm3
  1577. psrld xmm3,17
  1578. pxor xmm7,xmm2
  1579. pslld xmm1,13
  1580. paddd xmm5,xmm7
  1581. pxor xmm0,xmm3
  1582. psrld xmm3,19-17
  1583. pxor xmm0,xmm1
  1584. pslld xmm1,15-13
  1585. pxor xmm0,xmm3
  1586. pxor xmm0,xmm1
  1587. paddd xmm5,xmm0
  1588. movdqa xmm7,xmm12
  1589. movdqa xmm2,xmm12
  1590. psrld xmm7,6
  1591. movdqa xmm1,xmm12
  1592. pslld xmm2,7
  1593. movdqa XMMWORD PTR[(128-128)+rax],xmm5
  1594. paddd xmm5,xmm15
  1595. psrld xmm1,11
  1596. pxor xmm7,xmm2
  1597. pslld xmm2,21-7
  1598. paddd xmm5,XMMWORD PTR[((-128))+rbp]
  1599. pxor xmm7,xmm1
  1600. psrld xmm1,25-11
  1601. movdqa xmm0,xmm12
  1602. pxor xmm7,xmm2
  1603. movdqa xmm3,xmm12
  1604. pslld xmm2,26-21
  1605. pandn xmm0,xmm14
  1606. pand xmm3,xmm13
  1607. pxor xmm7,xmm1
  1608. movdqa xmm1,xmm8
  1609. pxor xmm7,xmm2
  1610. movdqa xmm2,xmm8
  1611. psrld xmm1,2
  1612. paddd xmm5,xmm7
  1613. pxor xmm0,xmm3
  1614. movdqa xmm3,xmm9
  1615. movdqa xmm7,xmm8
  1616. pslld xmm2,10
  1617. pxor xmm3,xmm8
  1618. psrld xmm7,13
  1619. pxor xmm1,xmm2
  1620. paddd xmm5,xmm0
  1621. pslld xmm2,19-10
  1622. pand xmm4,xmm3
  1623. pxor xmm1,xmm7
  1624. psrld xmm7,22-13
  1625. pxor xmm1,xmm2
  1626. movdqa xmm15,xmm9
  1627. pslld xmm2,30-19
  1628. pxor xmm7,xmm1
  1629. pxor xmm15,xmm4
  1630. paddd xmm11,xmm5
  1631. pxor xmm7,xmm2
  1632. paddd xmm15,xmm5
  1633. paddd xmm15,xmm7
  1634. movdqa xmm5,XMMWORD PTR[((160-128))+rax]
  1635. paddd xmm6,XMMWORD PTR[((32-128))+rax]
  1636. movdqa xmm7,xmm5
  1637. movdqa xmm1,xmm5
  1638. psrld xmm7,3
  1639. movdqa xmm2,xmm5
  1640. psrld xmm1,7
  1641. movdqa xmm0,XMMWORD PTR[((112-128))+rax]
  1642. pslld xmm2,14
  1643. pxor xmm7,xmm1
  1644. psrld xmm1,18-7
  1645. movdqa xmm4,xmm0
  1646. pxor xmm7,xmm2
  1647. pslld xmm2,25-14
  1648. pxor xmm7,xmm1
  1649. psrld xmm0,10
  1650. movdqa xmm1,xmm4
  1651. psrld xmm4,17
  1652. pxor xmm7,xmm2
  1653. pslld xmm1,13
  1654. paddd xmm6,xmm7
  1655. pxor xmm0,xmm4
  1656. psrld xmm4,19-17
  1657. pxor xmm0,xmm1
  1658. pslld xmm1,15-13
  1659. pxor xmm0,xmm4
  1660. pxor xmm0,xmm1
  1661. paddd xmm6,xmm0
  1662. movdqa xmm7,xmm11
  1663. movdqa xmm2,xmm11
  1664. psrld xmm7,6
  1665. movdqa xmm1,xmm11
  1666. pslld xmm2,7
  1667. movdqa XMMWORD PTR[(144-128)+rax],xmm6
  1668. paddd xmm6,xmm14
  1669. psrld xmm1,11
  1670. pxor xmm7,xmm2
  1671. pslld xmm2,21-7
  1672. paddd xmm6,XMMWORD PTR[((-96))+rbp]
  1673. pxor xmm7,xmm1
  1674. psrld xmm1,25-11
  1675. movdqa xmm0,xmm11
  1676. pxor xmm7,xmm2
  1677. movdqa xmm4,xmm11
  1678. pslld xmm2,26-21
  1679. pandn xmm0,xmm13
  1680. pand xmm4,xmm12
  1681. pxor xmm7,xmm1
  1682. movdqa xmm1,xmm15
  1683. pxor xmm7,xmm2
  1684. movdqa xmm2,xmm15
  1685. psrld xmm1,2
  1686. paddd xmm6,xmm7
  1687. pxor xmm0,xmm4
  1688. movdqa xmm4,xmm8
  1689. movdqa xmm7,xmm15
  1690. pslld xmm2,10
  1691. pxor xmm4,xmm15
  1692. psrld xmm7,13
  1693. pxor xmm1,xmm2
  1694. paddd xmm6,xmm0
  1695. pslld xmm2,19-10
  1696. pand xmm3,xmm4
  1697. pxor xmm1,xmm7
  1698. psrld xmm7,22-13
  1699. pxor xmm1,xmm2
  1700. movdqa xmm14,xmm8
  1701. pslld xmm2,30-19
  1702. pxor xmm7,xmm1
  1703. pxor xmm14,xmm3
  1704. paddd xmm10,xmm6
  1705. pxor xmm7,xmm2
  1706. paddd xmm14,xmm6
  1707. paddd xmm14,xmm7
  1708. movdqa xmm6,XMMWORD PTR[((176-128))+rax]
  1709. paddd xmm5,XMMWORD PTR[((48-128))+rax]
  1710. movdqa xmm7,xmm6
  1711. movdqa xmm1,xmm6
  1712. psrld xmm7,3
  1713. movdqa xmm2,xmm6
  1714. psrld xmm1,7
  1715. movdqa xmm0,XMMWORD PTR[((128-128))+rax]
  1716. pslld xmm2,14
  1717. pxor xmm7,xmm1
  1718. psrld xmm1,18-7
  1719. movdqa xmm3,xmm0
  1720. pxor xmm7,xmm2
  1721. pslld xmm2,25-14
  1722. pxor xmm7,xmm1
  1723. psrld xmm0,10
  1724. movdqa xmm1,xmm3
  1725. psrld xmm3,17
  1726. pxor xmm7,xmm2
  1727. pslld xmm1,13
  1728. paddd xmm5,xmm7
  1729. pxor xmm0,xmm3
  1730. psrld xmm3,19-17
  1731. pxor xmm0,xmm1
  1732. pslld xmm1,15-13
  1733. pxor xmm0,xmm3
  1734. pxor xmm0,xmm1
  1735. paddd xmm5,xmm0
  1736. movdqa xmm7,xmm10
  1737. movdqa xmm2,xmm10
  1738. psrld xmm7,6
  1739. movdqa xmm1,xmm10
  1740. pslld xmm2,7
  1741. movdqa XMMWORD PTR[(160-128)+rax],xmm5
  1742. paddd xmm5,xmm13
  1743. psrld xmm1,11
  1744. pxor xmm7,xmm2
  1745. pslld xmm2,21-7
  1746. paddd xmm5,XMMWORD PTR[((-64))+rbp]
  1747. pxor xmm7,xmm1
  1748. psrld xmm1,25-11
  1749. movdqa xmm0,xmm10
  1750. pxor xmm7,xmm2
  1751. movdqa xmm3,xmm10
  1752. pslld xmm2,26-21
  1753. pandn xmm0,xmm12
  1754. pand xmm3,xmm11
  1755. pxor xmm7,xmm1
  1756. movdqa xmm1,xmm14
  1757. pxor xmm7,xmm2
  1758. movdqa xmm2,xmm14
  1759. psrld xmm1,2
  1760. paddd xmm5,xmm7
  1761. pxor xmm0,xmm3
  1762. movdqa xmm3,xmm15
  1763. movdqa xmm7,xmm14
  1764. pslld xmm2,10
  1765. pxor xmm3,xmm14
  1766. psrld xmm7,13
  1767. pxor xmm1,xmm2
  1768. paddd xmm5,xmm0
  1769. pslld xmm2,19-10
  1770. pand xmm4,xmm3
  1771. pxor xmm1,xmm7
  1772. psrld xmm7,22-13
  1773. pxor xmm1,xmm2
  1774. movdqa xmm13,xmm15
  1775. pslld xmm2,30-19
  1776. pxor xmm7,xmm1
  1777. pxor xmm13,xmm4
  1778. paddd xmm9,xmm5
  1779. pxor xmm7,xmm2
  1780. paddd xmm13,xmm5
  1781. paddd xmm13,xmm7
  1782. movdqa xmm5,XMMWORD PTR[((192-128))+rax]
  1783. paddd xmm6,XMMWORD PTR[((64-128))+rax]
  1784. movdqa xmm7,xmm5
  1785. movdqa xmm1,xmm5
  1786. psrld xmm7,3
  1787. movdqa xmm2,xmm5
  1788. psrld xmm1,7
  1789. movdqa xmm0,XMMWORD PTR[((144-128))+rax]
  1790. pslld xmm2,14
  1791. pxor xmm7,xmm1
  1792. psrld xmm1,18-7
  1793. movdqa xmm4,xmm0
  1794. pxor xmm7,xmm2
  1795. pslld xmm2,25-14
  1796. pxor xmm7,xmm1
  1797. psrld xmm0,10
  1798. movdqa xmm1,xmm4
  1799. psrld xmm4,17
  1800. pxor xmm7,xmm2
  1801. pslld xmm1,13
  1802. paddd xmm6,xmm7
  1803. pxor xmm0,xmm4
  1804. psrld xmm4,19-17
  1805. pxor xmm0,xmm1
  1806. pslld xmm1,15-13
  1807. pxor xmm0,xmm4
  1808. pxor xmm0,xmm1
  1809. paddd xmm6,xmm0
  1810. movdqa xmm7,xmm9
  1811. movdqa xmm2,xmm9
  1812. psrld xmm7,6
  1813. movdqa xmm1,xmm9
  1814. pslld xmm2,7
  1815. movdqa XMMWORD PTR[(176-128)+rax],xmm6
  1816. paddd xmm6,xmm12
  1817. psrld xmm1,11
  1818. pxor xmm7,xmm2
  1819. pslld xmm2,21-7
  1820. paddd xmm6,XMMWORD PTR[((-32))+rbp]
  1821. pxor xmm7,xmm1
  1822. psrld xmm1,25-11
  1823. movdqa xmm0,xmm9
  1824. pxor xmm7,xmm2
  1825. movdqa xmm4,xmm9
  1826. pslld xmm2,26-21
  1827. pandn xmm0,xmm11
  1828. pand xmm4,xmm10
  1829. pxor xmm7,xmm1
  1830. movdqa xmm1,xmm13
  1831. pxor xmm7,xmm2
  1832. movdqa xmm2,xmm13
  1833. psrld xmm1,2
  1834. paddd xmm6,xmm7
  1835. pxor xmm0,xmm4
  1836. movdqa xmm4,xmm14
  1837. movdqa xmm7,xmm13
  1838. pslld xmm2,10
  1839. pxor xmm4,xmm13
  1840. psrld xmm7,13
  1841. pxor xmm1,xmm2
  1842. paddd xmm6,xmm0
  1843. pslld xmm2,19-10
  1844. pand xmm3,xmm4
  1845. pxor xmm1,xmm7
  1846. psrld xmm7,22-13
  1847. pxor xmm1,xmm2
  1848. movdqa xmm12,xmm14
  1849. pslld xmm2,30-19
  1850. pxor xmm7,xmm1
  1851. pxor xmm12,xmm3
  1852. paddd xmm8,xmm6
  1853. pxor xmm7,xmm2
  1854. paddd xmm12,xmm6
  1855. paddd xmm12,xmm7
  1856. movdqa xmm6,XMMWORD PTR[((208-128))+rax]
  1857. paddd xmm5,XMMWORD PTR[((80-128))+rax]
  1858. movdqa xmm7,xmm6
  1859. movdqa xmm1,xmm6
  1860. psrld xmm7,3
  1861. movdqa xmm2,xmm6
  1862. psrld xmm1,7
  1863. movdqa xmm0,XMMWORD PTR[((160-128))+rax]
  1864. pslld xmm2,14
  1865. pxor xmm7,xmm1
  1866. psrld xmm1,18-7
  1867. movdqa xmm3,xmm0
  1868. pxor xmm7,xmm2
  1869. pslld xmm2,25-14
  1870. pxor xmm7,xmm1
  1871. psrld xmm0,10
  1872. movdqa xmm1,xmm3
  1873. psrld xmm3,17
  1874. pxor xmm7,xmm2
  1875. pslld xmm1,13
  1876. paddd xmm5,xmm7
  1877. pxor xmm0,xmm3
  1878. psrld xmm3,19-17
  1879. pxor xmm0,xmm1
  1880. pslld xmm1,15-13
  1881. pxor xmm0,xmm3
  1882. pxor xmm0,xmm1
  1883. paddd xmm5,xmm0
  1884. movdqa xmm7,xmm8
  1885. movdqa xmm2,xmm8
  1886. psrld xmm7,6
  1887. movdqa xmm1,xmm8
  1888. pslld xmm2,7
  1889. movdqa XMMWORD PTR[(192-128)+rax],xmm5
  1890. paddd xmm5,xmm11
  1891. psrld xmm1,11
  1892. pxor xmm7,xmm2
  1893. pslld xmm2,21-7
  1894. paddd xmm5,XMMWORD PTR[rbp]
  1895. pxor xmm7,xmm1
  1896. psrld xmm1,25-11
  1897. movdqa xmm0,xmm8
  1898. pxor xmm7,xmm2
  1899. movdqa xmm3,xmm8
  1900. pslld xmm2,26-21
  1901. pandn xmm0,xmm10
  1902. pand xmm3,xmm9
  1903. pxor xmm7,xmm1
  1904. movdqa xmm1,xmm12
  1905. pxor xmm7,xmm2
  1906. movdqa xmm2,xmm12
  1907. psrld xmm1,2
  1908. paddd xmm5,xmm7
  1909. pxor xmm0,xmm3
  1910. movdqa xmm3,xmm13
  1911. movdqa xmm7,xmm12
  1912. pslld xmm2,10
  1913. pxor xmm3,xmm12
  1914. psrld xmm7,13
  1915. pxor xmm1,xmm2
  1916. paddd xmm5,xmm0
  1917. pslld xmm2,19-10
  1918. pand xmm4,xmm3
  1919. pxor xmm1,xmm7
  1920. psrld xmm7,22-13
  1921. pxor xmm1,xmm2
  1922. movdqa xmm11,xmm13
  1923. pslld xmm2,30-19
  1924. pxor xmm7,xmm1
  1925. pxor xmm11,xmm4
  1926. paddd xmm15,xmm5
  1927. pxor xmm7,xmm2
  1928. paddd xmm11,xmm5
  1929. paddd xmm11,xmm7
  1930. movdqa xmm5,XMMWORD PTR[((224-128))+rax]
  1931. paddd xmm6,XMMWORD PTR[((96-128))+rax]
  1932. movdqa xmm7,xmm5
  1933. movdqa xmm1,xmm5
  1934. psrld xmm7,3
  1935. movdqa xmm2,xmm5
  1936. psrld xmm1,7
  1937. movdqa xmm0,XMMWORD PTR[((176-128))+rax]
  1938. pslld xmm2,14
  1939. pxor xmm7,xmm1
  1940. psrld xmm1,18-7
  1941. movdqa xmm4,xmm0
  1942. pxor xmm7,xmm2
  1943. pslld xmm2,25-14
  1944. pxor xmm7,xmm1
  1945. psrld xmm0,10
  1946. movdqa xmm1,xmm4
  1947. psrld xmm4,17
  1948. pxor xmm7,xmm2
  1949. pslld xmm1,13
  1950. paddd xmm6,xmm7
  1951. pxor xmm0,xmm4
  1952. psrld xmm4,19-17
  1953. pxor xmm0,xmm1
  1954. pslld xmm1,15-13
  1955. pxor xmm0,xmm4
  1956. pxor xmm0,xmm1
  1957. paddd xmm6,xmm0
  1958. movdqa xmm7,xmm15
  1959. movdqa xmm2,xmm15
  1960. psrld xmm7,6
  1961. movdqa xmm1,xmm15
  1962. pslld xmm2,7
  1963. movdqa XMMWORD PTR[(208-128)+rax],xmm6
  1964. paddd xmm6,xmm10
  1965. psrld xmm1,11
  1966. pxor xmm7,xmm2
  1967. pslld xmm2,21-7
  1968. paddd xmm6,XMMWORD PTR[32+rbp]
  1969. pxor xmm7,xmm1
  1970. psrld xmm1,25-11
  1971. movdqa xmm0,xmm15
  1972. pxor xmm7,xmm2
  1973. movdqa xmm4,xmm15
  1974. pslld xmm2,26-21
  1975. pandn xmm0,xmm9
  1976. pand xmm4,xmm8
  1977. pxor xmm7,xmm1
  1978. movdqa xmm1,xmm11
  1979. pxor xmm7,xmm2
  1980. movdqa xmm2,xmm11
  1981. psrld xmm1,2
  1982. paddd xmm6,xmm7
  1983. pxor xmm0,xmm4
  1984. movdqa xmm4,xmm12
  1985. movdqa xmm7,xmm11
  1986. pslld xmm2,10
  1987. pxor xmm4,xmm11
  1988. psrld xmm7,13
  1989. pxor xmm1,xmm2
  1990. paddd xmm6,xmm0
  1991. pslld xmm2,19-10
  1992. pand xmm3,xmm4
  1993. pxor xmm1,xmm7
  1994. psrld xmm7,22-13
  1995. pxor xmm1,xmm2
  1996. movdqa xmm10,xmm12
  1997. pslld xmm2,30-19
  1998. pxor xmm7,xmm1
  1999. pxor xmm10,xmm3
  2000. paddd xmm14,xmm6
  2001. pxor xmm7,xmm2
  2002. paddd xmm10,xmm6
  2003. paddd xmm10,xmm7
  2004. movdqa xmm6,XMMWORD PTR[((240-128))+rax]
  2005. paddd xmm5,XMMWORD PTR[((112-128))+rax]
  2006. movdqa xmm7,xmm6
  2007. movdqa xmm1,xmm6
  2008. psrld xmm7,3
  2009. movdqa xmm2,xmm6
  2010. psrld xmm1,7
  2011. movdqa xmm0,XMMWORD PTR[((192-128))+rax]
  2012. pslld xmm2,14
  2013. pxor xmm7,xmm1
  2014. psrld xmm1,18-7
  2015. movdqa xmm3,xmm0
  2016. pxor xmm7,xmm2
  2017. pslld xmm2,25-14
  2018. pxor xmm7,xmm1
  2019. psrld xmm0,10
  2020. movdqa xmm1,xmm3
  2021. psrld xmm3,17
  2022. pxor xmm7,xmm2
  2023. pslld xmm1,13
  2024. paddd xmm5,xmm7
  2025. pxor xmm0,xmm3
  2026. psrld xmm3,19-17
  2027. pxor xmm0,xmm1
  2028. pslld xmm1,15-13
  2029. pxor xmm0,xmm3
  2030. pxor xmm0,xmm1
  2031. paddd xmm5,xmm0
  2032. movdqa xmm7,xmm14
  2033. movdqa xmm2,xmm14
  2034. psrld xmm7,6
  2035. movdqa xmm1,xmm14
  2036. pslld xmm2,7
  2037. movdqa XMMWORD PTR[(224-128)+rax],xmm5
  2038. paddd xmm5,xmm9
  2039. psrld xmm1,11
  2040. pxor xmm7,xmm2
  2041. pslld xmm2,21-7
  2042. paddd xmm5,XMMWORD PTR[64+rbp]
  2043. pxor xmm7,xmm1
  2044. psrld xmm1,25-11
  2045. movdqa xmm0,xmm14
  2046. pxor xmm7,xmm2
  2047. movdqa xmm3,xmm14
  2048. pslld xmm2,26-21
  2049. pandn xmm0,xmm8
  2050. pand xmm3,xmm15
  2051. pxor xmm7,xmm1
  2052. movdqa xmm1,xmm10
  2053. pxor xmm7,xmm2
  2054. movdqa xmm2,xmm10
  2055. psrld xmm1,2
  2056. paddd xmm5,xmm7
  2057. pxor xmm0,xmm3
  2058. movdqa xmm3,xmm11
  2059. movdqa xmm7,xmm10
  2060. pslld xmm2,10
  2061. pxor xmm3,xmm10
  2062. psrld xmm7,13
  2063. pxor xmm1,xmm2
  2064. paddd xmm5,xmm0
  2065. pslld xmm2,19-10
  2066. pand xmm4,xmm3
  2067. pxor xmm1,xmm7
  2068. psrld xmm7,22-13
  2069. pxor xmm1,xmm2
  2070. movdqa xmm9,xmm11
  2071. pslld xmm2,30-19
  2072. pxor xmm7,xmm1
  2073. pxor xmm9,xmm4
  2074. paddd xmm13,xmm5
  2075. pxor xmm7,xmm2
  2076. paddd xmm9,xmm5
  2077. paddd xmm9,xmm7
  2078. movdqa xmm5,XMMWORD PTR[((0-128))+rax]
  2079. paddd xmm6,XMMWORD PTR[((128-128))+rax]
  2080. movdqa xmm7,xmm5
  2081. movdqa xmm1,xmm5
  2082. psrld xmm7,3
  2083. movdqa xmm2,xmm5
  2084. psrld xmm1,7
  2085. movdqa xmm0,XMMWORD PTR[((208-128))+rax]
  2086. pslld xmm2,14
  2087. pxor xmm7,xmm1
  2088. psrld xmm1,18-7
  2089. movdqa xmm4,xmm0
  2090. pxor xmm7,xmm2
  2091. pslld xmm2,25-14
  2092. pxor xmm7,xmm1
  2093. psrld xmm0,10
  2094. movdqa xmm1,xmm4
  2095. psrld xmm4,17
  2096. pxor xmm7,xmm2
  2097. pslld xmm1,13
  2098. paddd xmm6,xmm7
  2099. pxor xmm0,xmm4
  2100. psrld xmm4,19-17
  2101. pxor xmm0,xmm1
  2102. pslld xmm1,15-13
  2103. pxor xmm0,xmm4
  2104. pxor xmm0,xmm1
  2105. paddd xmm6,xmm0
  2106. movdqa xmm7,xmm13
  2107. movdqa xmm2,xmm13
  2108. psrld xmm7,6
  2109. movdqa xmm1,xmm13
  2110. pslld xmm2,7
  2111. movdqa XMMWORD PTR[(240-128)+rax],xmm6
  2112. paddd xmm6,xmm8
  2113. psrld xmm1,11
  2114. pxor xmm7,xmm2
  2115. pslld xmm2,21-7
  2116. paddd xmm6,XMMWORD PTR[96+rbp]
  2117. pxor xmm7,xmm1
  2118. psrld xmm1,25-11
  2119. movdqa xmm0,xmm13
  2120. pxor xmm7,xmm2
  2121. movdqa xmm4,xmm13
  2122. pslld xmm2,26-21
  2123. pandn xmm0,xmm15
  2124. pand xmm4,xmm14
  2125. pxor xmm7,xmm1
  2126. movdqa xmm1,xmm9
  2127. pxor xmm7,xmm2
  2128. movdqa xmm2,xmm9
  2129. psrld xmm1,2
  2130. paddd xmm6,xmm7
  2131. pxor xmm0,xmm4
  2132. movdqa xmm4,xmm10
  2133. movdqa xmm7,xmm9
  2134. pslld xmm2,10
  2135. pxor xmm4,xmm9
  2136. psrld xmm7,13
  2137. pxor xmm1,xmm2
  2138. paddd xmm6,xmm0
  2139. pslld xmm2,19-10
  2140. pand xmm3,xmm4
  2141. pxor xmm1,xmm7
  2142. psrld xmm7,22-13
  2143. pxor xmm1,xmm2
  2144. movdqa xmm8,xmm10
  2145. pslld xmm2,30-19
  2146. pxor xmm7,xmm1
  2147. pxor xmm8,xmm3
  2148. paddd xmm12,xmm6
  2149. pxor xmm7,xmm2
  2150. paddd xmm8,xmm6
  2151. paddd xmm8,xmm7
  2152. lea rbp,QWORD PTR[256+rbp]
  2153. dec ecx
  2154. jnz $L$oop_16_xx
  2155. mov ecx,1
  2156. lea rbp,QWORD PTR[((K256+128))]
  2157. movdqa xmm7,XMMWORD PTR[rbx]
  2158. cmp ecx,DWORD PTR[rbx]
  2159. pxor xmm0,xmm0
  2160. cmovge r8,rbp
  2161. cmp ecx,DWORD PTR[4+rbx]
  2162. movdqa xmm6,xmm7
  2163. cmovge r9,rbp
  2164. cmp ecx,DWORD PTR[8+rbx]
  2165. pcmpgtd xmm6,xmm0
  2166. cmovge r10,rbp
  2167. cmp ecx,DWORD PTR[12+rbx]
  2168. paddd xmm7,xmm6
  2169. cmovge r11,rbp
  2170. movdqu xmm0,XMMWORD PTR[((0-128))+rdi]
  2171. pand xmm8,xmm6
  2172. movdqu xmm1,XMMWORD PTR[((32-128))+rdi]
  2173. pand xmm9,xmm6
  2174. movdqu xmm2,XMMWORD PTR[((64-128))+rdi]
  2175. pand xmm10,xmm6
  2176. movdqu xmm5,XMMWORD PTR[((96-128))+rdi]
  2177. pand xmm11,xmm6
  2178. paddd xmm8,xmm0
  2179. movdqu xmm0,XMMWORD PTR[((128-128))+rdi]
  2180. pand xmm12,xmm6
  2181. paddd xmm9,xmm1
  2182. movdqu xmm1,XMMWORD PTR[((160-128))+rdi]
  2183. pand xmm13,xmm6
  2184. paddd xmm10,xmm2
  2185. movdqu xmm2,XMMWORD PTR[((192-128))+rdi]
  2186. pand xmm14,xmm6
  2187. paddd xmm11,xmm5
  2188. movdqu xmm5,XMMWORD PTR[((224-128))+rdi]
  2189. pand xmm15,xmm6
  2190. paddd xmm12,xmm0
  2191. paddd xmm13,xmm1
  2192. movdqu XMMWORD PTR[(0-128)+rdi],xmm8
  2193. paddd xmm14,xmm2
  2194. movdqu XMMWORD PTR[(32-128)+rdi],xmm9
  2195. paddd xmm15,xmm5
  2196. movdqu XMMWORD PTR[(64-128)+rdi],xmm10
  2197. movdqu XMMWORD PTR[(96-128)+rdi],xmm11
  2198. movdqu XMMWORD PTR[(128-128)+rdi],xmm12
  2199. movdqu XMMWORD PTR[(160-128)+rdi],xmm13
  2200. movdqu XMMWORD PTR[(192-128)+rdi],xmm14
  2201. movdqu XMMWORD PTR[(224-128)+rdi],xmm15
  2202. movdqa XMMWORD PTR[rbx],xmm7
  2203. movdqa xmm6,XMMWORD PTR[$L$pbswap]
  2204. dec edx
  2205. jnz $L$oop
  2206. mov edx,DWORD PTR[280+rsp]
  2207. lea rdi,QWORD PTR[16+rdi]
  2208. lea rsi,QWORD PTR[64+rsi]
  2209. dec edx
  2210. jnz $L$oop_grande
  2211. $L$done::
  2212. mov rax,QWORD PTR[272+rsp]
  2213. movaps xmm6,XMMWORD PTR[((-184))+rax]
  2214. movaps xmm7,XMMWORD PTR[((-168))+rax]
  2215. movaps xmm8,XMMWORD PTR[((-152))+rax]
  2216. movaps xmm9,XMMWORD PTR[((-136))+rax]
  2217. movaps xmm10,XMMWORD PTR[((-120))+rax]
  2218. movaps xmm11,XMMWORD PTR[((-104))+rax]
  2219. movaps xmm12,XMMWORD PTR[((-88))+rax]
  2220. movaps xmm13,XMMWORD PTR[((-72))+rax]
  2221. movaps xmm14,XMMWORD PTR[((-56))+rax]
  2222. movaps xmm15,XMMWORD PTR[((-40))+rax]
  2223. mov rbp,QWORD PTR[((-16))+rax]
  2224. mov rbx,QWORD PTR[((-8))+rax]
  2225. lea rsp,QWORD PTR[rax]
  2226. $L$epilogue::
  2227. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  2228. mov rsi,QWORD PTR[16+rsp]
  2229. DB 0F3h,0C3h ;repret
  2230. $L$SEH_end_sha256_multi_block::
  2231. sha256_multi_block ENDP
  2232. ALIGN 32
  2233. sha256_multi_block_shaext PROC PRIVATE
  2234. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  2235. mov QWORD PTR[16+rsp],rsi
  2236. mov rax,rsp
  2237. $L$SEH_begin_sha256_multi_block_shaext::
  2238. mov rdi,rcx
  2239. mov rsi,rdx
  2240. mov rdx,r8
  2241. _shaext_shortcut::
  2242. mov rax,rsp
  2243. push rbx
  2244. push rbp
  2245. lea rsp,QWORD PTR[((-168))+rsp]
  2246. movaps XMMWORD PTR[rsp],xmm6
  2247. movaps XMMWORD PTR[16+rsp],xmm7
  2248. movaps XMMWORD PTR[32+rsp],xmm8
  2249. movaps XMMWORD PTR[48+rsp],xmm9
  2250. movaps XMMWORD PTR[(-120)+rax],xmm10
  2251. movaps XMMWORD PTR[(-104)+rax],xmm11
  2252. movaps XMMWORD PTR[(-88)+rax],xmm12
  2253. movaps XMMWORD PTR[(-72)+rax],xmm13
  2254. movaps XMMWORD PTR[(-56)+rax],xmm14
  2255. movaps XMMWORD PTR[(-40)+rax],xmm15
  2256. sub rsp,288
  2257. shl edx,1
  2258. and rsp,-256
  2259. lea rdi,QWORD PTR[128+rdi]
  2260. mov QWORD PTR[272+rsp],rax
  2261. $L$body_shaext::
  2262. lea rbx,QWORD PTR[256+rsp]
  2263. lea rbp,QWORD PTR[((K256_shaext+128))]
  2264. $L$oop_grande_shaext::
  2265. mov DWORD PTR[280+rsp],edx
  2266. xor edx,edx
  2267. mov r8,QWORD PTR[rsi]
  2268. mov ecx,DWORD PTR[8+rsi]
  2269. cmp ecx,edx
  2270. cmovg edx,ecx
  2271. test ecx,ecx
  2272. mov DWORD PTR[rbx],ecx
  2273. cmovle r8,rsp
  2274. mov r9,QWORD PTR[16+rsi]
  2275. mov ecx,DWORD PTR[24+rsi]
  2276. cmp ecx,edx
  2277. cmovg edx,ecx
  2278. test ecx,ecx
  2279. mov DWORD PTR[4+rbx],ecx
  2280. cmovle r9,rsp
  2281. test edx,edx
  2282. jz $L$done_shaext
  2283. movq xmm12,QWORD PTR[((0-128))+rdi]
  2284. movq xmm4,QWORD PTR[((32-128))+rdi]
  2285. movq xmm13,QWORD PTR[((64-128))+rdi]
  2286. movq xmm5,QWORD PTR[((96-128))+rdi]
  2287. movq xmm8,QWORD PTR[((128-128))+rdi]
  2288. movq xmm9,QWORD PTR[((160-128))+rdi]
  2289. movq xmm10,QWORD PTR[((192-128))+rdi]
  2290. movq xmm11,QWORD PTR[((224-128))+rdi]
  2291. punpckldq xmm12,xmm4
  2292. punpckldq xmm13,xmm5
  2293. punpckldq xmm8,xmm9
  2294. punpckldq xmm10,xmm11
  2295. movdqa xmm3,XMMWORD PTR[((K256_shaext-16))]
  2296. movdqa xmm14,xmm12
  2297. movdqa xmm15,xmm13
  2298. punpcklqdq xmm12,xmm8
  2299. punpcklqdq xmm13,xmm10
  2300. punpckhqdq xmm14,xmm8
  2301. punpckhqdq xmm15,xmm10
  2302. pshufd xmm12,xmm12,27
  2303. pshufd xmm13,xmm13,27
  2304. pshufd xmm14,xmm14,27
  2305. pshufd xmm15,xmm15,27
  2306. jmp $L$oop_shaext
  2307. ALIGN 32
  2308. $L$oop_shaext::
  2309. movdqu xmm4,XMMWORD PTR[r8]
  2310. movdqu xmm8,XMMWORD PTR[r9]
  2311. movdqu xmm5,XMMWORD PTR[16+r8]
  2312. movdqu xmm9,XMMWORD PTR[16+r9]
  2313. movdqu xmm6,XMMWORD PTR[32+r8]
  2314. DB 102,15,56,0,227
  2315. movdqu xmm10,XMMWORD PTR[32+r9]
  2316. DB 102,68,15,56,0,195
  2317. movdqu xmm7,XMMWORD PTR[48+r8]
  2318. lea r8,QWORD PTR[64+r8]
  2319. movdqu xmm11,XMMWORD PTR[48+r9]
  2320. lea r9,QWORD PTR[64+r9]
  2321. movdqa xmm0,XMMWORD PTR[((0-128))+rbp]
  2322. DB 102,15,56,0,235
  2323. paddd xmm0,xmm4
  2324. pxor xmm4,xmm12
  2325. movdqa xmm1,xmm0
  2326. movdqa xmm2,XMMWORD PTR[((0-128))+rbp]
  2327. DB 102,68,15,56,0,203
  2328. paddd xmm2,xmm8
  2329. movdqa XMMWORD PTR[80+rsp],xmm13
  2330. DB 69,15,56,203,236
  2331. pxor xmm8,xmm14
  2332. movdqa xmm0,xmm2
  2333. movdqa XMMWORD PTR[112+rsp],xmm15
  2334. DB 69,15,56,203,254
  2335. pshufd xmm0,xmm1,00eh
  2336. pxor xmm4,xmm12
  2337. movdqa XMMWORD PTR[64+rsp],xmm12
  2338. DB 69,15,56,203,229
  2339. pshufd xmm0,xmm2,00eh
  2340. pxor xmm8,xmm14
  2341. movdqa XMMWORD PTR[96+rsp],xmm14
  2342. movdqa xmm1,XMMWORD PTR[((16-128))+rbp]
  2343. paddd xmm1,xmm5
  2344. DB 102,15,56,0,243
  2345. DB 69,15,56,203,247
  2346. movdqa xmm0,xmm1
  2347. movdqa xmm2,XMMWORD PTR[((16-128))+rbp]
  2348. paddd xmm2,xmm9
  2349. DB 69,15,56,203,236
  2350. movdqa xmm0,xmm2
  2351. prefetcht0 [127+r8]
  2352. DB 102,15,56,0,251
  2353. DB 102,68,15,56,0,211
  2354. prefetcht0 [127+r9]
  2355. DB 69,15,56,203,254
  2356. pshufd xmm0,xmm1,00eh
  2357. DB 102,68,15,56,0,219
  2358. DB 15,56,204,229
  2359. DB 69,15,56,203,229
  2360. pshufd xmm0,xmm2,00eh
  2361. movdqa xmm1,XMMWORD PTR[((32-128))+rbp]
  2362. paddd xmm1,xmm6
  2363. DB 69,15,56,203,247
  2364. movdqa xmm0,xmm1
  2365. movdqa xmm2,XMMWORD PTR[((32-128))+rbp]
  2366. paddd xmm2,xmm10
  2367. DB 69,15,56,203,236
  2368. DB 69,15,56,204,193
  2369. movdqa xmm0,xmm2
  2370. movdqa xmm3,xmm7
  2371. DB 69,15,56,203,254
  2372. pshufd xmm0,xmm1,00eh
  2373. DB 102,15,58,15,222,4
  2374. paddd xmm4,xmm3
  2375. movdqa xmm3,xmm11
  2376. DB 102,65,15,58,15,218,4
  2377. DB 15,56,204,238
  2378. DB 69,15,56,203,229
  2379. pshufd xmm0,xmm2,00eh
  2380. movdqa xmm1,XMMWORD PTR[((48-128))+rbp]
  2381. paddd xmm1,xmm7
  2382. DB 69,15,56,203,247
  2383. DB 69,15,56,204,202
  2384. movdqa xmm0,xmm1
  2385. movdqa xmm2,XMMWORD PTR[((48-128))+rbp]
  2386. paddd xmm8,xmm3
  2387. paddd xmm2,xmm11
  2388. DB 15,56,205,231
  2389. DB 69,15,56,203,236
  2390. movdqa xmm0,xmm2
  2391. movdqa xmm3,xmm4
  2392. DB 102,15,58,15,223,4
  2393. DB 69,15,56,203,254
  2394. DB 69,15,56,205,195
  2395. pshufd xmm0,xmm1,00eh
  2396. paddd xmm5,xmm3
  2397. movdqa xmm3,xmm8
  2398. DB 102,65,15,58,15,219,4
  2399. DB 15,56,204,247
  2400. DB 69,15,56,203,229
  2401. pshufd xmm0,xmm2,00eh
  2402. movdqa xmm1,XMMWORD PTR[((64-128))+rbp]
  2403. paddd xmm1,xmm4
  2404. DB 69,15,56,203,247
  2405. DB 69,15,56,204,211
  2406. movdqa xmm0,xmm1
  2407. movdqa xmm2,XMMWORD PTR[((64-128))+rbp]
  2408. paddd xmm9,xmm3
  2409. paddd xmm2,xmm8
  2410. DB 15,56,205,236
  2411. DB 69,15,56,203,236
  2412. movdqa xmm0,xmm2
  2413. movdqa xmm3,xmm5
  2414. DB 102,15,58,15,220,4
  2415. DB 69,15,56,203,254
  2416. DB 69,15,56,205,200
  2417. pshufd xmm0,xmm1,00eh
  2418. paddd xmm6,xmm3
  2419. movdqa xmm3,xmm9
  2420. DB 102,65,15,58,15,216,4
  2421. DB 15,56,204,252
  2422. DB 69,15,56,203,229
  2423. pshufd xmm0,xmm2,00eh
  2424. movdqa xmm1,XMMWORD PTR[((80-128))+rbp]
  2425. paddd xmm1,xmm5
  2426. DB 69,15,56,203,247
  2427. DB 69,15,56,204,216
  2428. movdqa xmm0,xmm1
  2429. movdqa xmm2,XMMWORD PTR[((80-128))+rbp]
  2430. paddd xmm10,xmm3
  2431. paddd xmm2,xmm9
  2432. DB 15,56,205,245
  2433. DB 69,15,56,203,236
  2434. movdqa xmm0,xmm2
  2435. movdqa xmm3,xmm6
  2436. DB 102,15,58,15,221,4
  2437. DB 69,15,56,203,254
  2438. DB 69,15,56,205,209
  2439. pshufd xmm0,xmm1,00eh
  2440. paddd xmm7,xmm3
  2441. movdqa xmm3,xmm10
  2442. DB 102,65,15,58,15,217,4
  2443. DB 15,56,204,229
  2444. DB 69,15,56,203,229
  2445. pshufd xmm0,xmm2,00eh
  2446. movdqa xmm1,XMMWORD PTR[((96-128))+rbp]
  2447. paddd xmm1,xmm6
  2448. DB 69,15,56,203,247
  2449. DB 69,15,56,204,193
  2450. movdqa xmm0,xmm1
  2451. movdqa xmm2,XMMWORD PTR[((96-128))+rbp]
  2452. paddd xmm11,xmm3
  2453. paddd xmm2,xmm10
  2454. DB 15,56,205,254
  2455. DB 69,15,56,203,236
  2456. movdqa xmm0,xmm2
  2457. movdqa xmm3,xmm7
  2458. DB 102,15,58,15,222,4
  2459. DB 69,15,56,203,254
  2460. DB 69,15,56,205,218
  2461. pshufd xmm0,xmm1,00eh
  2462. paddd xmm4,xmm3
  2463. movdqa xmm3,xmm11
  2464. DB 102,65,15,58,15,218,4
  2465. DB 15,56,204,238
  2466. DB 69,15,56,203,229
  2467. pshufd xmm0,xmm2,00eh
  2468. movdqa xmm1,XMMWORD PTR[((112-128))+rbp]
  2469. paddd xmm1,xmm7
  2470. DB 69,15,56,203,247
  2471. DB 69,15,56,204,202
  2472. movdqa xmm0,xmm1
  2473. movdqa xmm2,XMMWORD PTR[((112-128))+rbp]
  2474. paddd xmm8,xmm3
  2475. paddd xmm2,xmm11
  2476. DB 15,56,205,231
  2477. DB 69,15,56,203,236
  2478. movdqa xmm0,xmm2
  2479. movdqa xmm3,xmm4
  2480. DB 102,15,58,15,223,4
  2481. DB 69,15,56,203,254
  2482. DB 69,15,56,205,195
  2483. pshufd xmm0,xmm1,00eh
  2484. paddd xmm5,xmm3
  2485. movdqa xmm3,xmm8
  2486. DB 102,65,15,58,15,219,4
  2487. DB 15,56,204,247
  2488. DB 69,15,56,203,229
  2489. pshufd xmm0,xmm2,00eh
  2490. movdqa xmm1,XMMWORD PTR[((128-128))+rbp]
  2491. paddd xmm1,xmm4
  2492. DB 69,15,56,203,247
  2493. DB 69,15,56,204,211
  2494. movdqa xmm0,xmm1
  2495. movdqa xmm2,XMMWORD PTR[((128-128))+rbp]
  2496. paddd xmm9,xmm3
  2497. paddd xmm2,xmm8
  2498. DB 15,56,205,236
  2499. DB 69,15,56,203,236
  2500. movdqa xmm0,xmm2
  2501. movdqa xmm3,xmm5
  2502. DB 102,15,58,15,220,4
  2503. DB 69,15,56,203,254
  2504. DB 69,15,56,205,200
  2505. pshufd xmm0,xmm1,00eh
  2506. paddd xmm6,xmm3
  2507. movdqa xmm3,xmm9
  2508. DB 102,65,15,58,15,216,4
  2509. DB 15,56,204,252
  2510. DB 69,15,56,203,229
  2511. pshufd xmm0,xmm2,00eh
  2512. movdqa xmm1,XMMWORD PTR[((144-128))+rbp]
  2513. paddd xmm1,xmm5
  2514. DB 69,15,56,203,247
  2515. DB 69,15,56,204,216
  2516. movdqa xmm0,xmm1
  2517. movdqa xmm2,XMMWORD PTR[((144-128))+rbp]
  2518. paddd xmm10,xmm3
  2519. paddd xmm2,xmm9
  2520. DB 15,56,205,245
  2521. DB 69,15,56,203,236
  2522. movdqa xmm0,xmm2
  2523. movdqa xmm3,xmm6
  2524. DB 102,15,58,15,221,4
  2525. DB 69,15,56,203,254
  2526. DB 69,15,56,205,209
  2527. pshufd xmm0,xmm1,00eh
  2528. paddd xmm7,xmm3
  2529. movdqa xmm3,xmm10
  2530. DB 102,65,15,58,15,217,4
  2531. DB 15,56,204,229
  2532. DB 69,15,56,203,229
  2533. pshufd xmm0,xmm2,00eh
  2534. movdqa xmm1,XMMWORD PTR[((160-128))+rbp]
  2535. paddd xmm1,xmm6
  2536. DB 69,15,56,203,247
  2537. DB 69,15,56,204,193
  2538. movdqa xmm0,xmm1
  2539. movdqa xmm2,XMMWORD PTR[((160-128))+rbp]
  2540. paddd xmm11,xmm3
  2541. paddd xmm2,xmm10
  2542. DB 15,56,205,254
  2543. DB 69,15,56,203,236
  2544. movdqa xmm0,xmm2
  2545. movdqa xmm3,xmm7
  2546. DB 102,15,58,15,222,4
  2547. DB 69,15,56,203,254
  2548. DB 69,15,56,205,218
  2549. pshufd xmm0,xmm1,00eh
  2550. paddd xmm4,xmm3
  2551. movdqa xmm3,xmm11
  2552. DB 102,65,15,58,15,218,4
  2553. DB 15,56,204,238
  2554. DB 69,15,56,203,229
  2555. pshufd xmm0,xmm2,00eh
  2556. movdqa xmm1,XMMWORD PTR[((176-128))+rbp]
  2557. paddd xmm1,xmm7
  2558. DB 69,15,56,203,247
  2559. DB 69,15,56,204,202
  2560. movdqa xmm0,xmm1
  2561. movdqa xmm2,XMMWORD PTR[((176-128))+rbp]
  2562. paddd xmm8,xmm3
  2563. paddd xmm2,xmm11
  2564. DB 15,56,205,231
  2565. DB 69,15,56,203,236
  2566. movdqa xmm0,xmm2
  2567. movdqa xmm3,xmm4
  2568. DB 102,15,58,15,223,4
  2569. DB 69,15,56,203,254
  2570. DB 69,15,56,205,195
  2571. pshufd xmm0,xmm1,00eh
  2572. paddd xmm5,xmm3
  2573. movdqa xmm3,xmm8
  2574. DB 102,65,15,58,15,219,4
  2575. DB 15,56,204,247
  2576. DB 69,15,56,203,229
  2577. pshufd xmm0,xmm2,00eh
  2578. movdqa xmm1,XMMWORD PTR[((192-128))+rbp]
  2579. paddd xmm1,xmm4
  2580. DB 69,15,56,203,247
  2581. DB 69,15,56,204,211
  2582. movdqa xmm0,xmm1
  2583. movdqa xmm2,XMMWORD PTR[((192-128))+rbp]
  2584. paddd xmm9,xmm3
  2585. paddd xmm2,xmm8
  2586. DB 15,56,205,236
  2587. DB 69,15,56,203,236
  2588. movdqa xmm0,xmm2
  2589. movdqa xmm3,xmm5
  2590. DB 102,15,58,15,220,4
  2591. DB 69,15,56,203,254
  2592. DB 69,15,56,205,200
  2593. pshufd xmm0,xmm1,00eh
  2594. paddd xmm6,xmm3
  2595. movdqa xmm3,xmm9
  2596. DB 102,65,15,58,15,216,4
  2597. DB 15,56,204,252
  2598. DB 69,15,56,203,229
  2599. pshufd xmm0,xmm2,00eh
  2600. movdqa xmm1,XMMWORD PTR[((208-128))+rbp]
  2601. paddd xmm1,xmm5
  2602. DB 69,15,56,203,247
  2603. DB 69,15,56,204,216
  2604. movdqa xmm0,xmm1
  2605. movdqa xmm2,XMMWORD PTR[((208-128))+rbp]
  2606. paddd xmm10,xmm3
  2607. paddd xmm2,xmm9
  2608. DB 15,56,205,245
  2609. DB 69,15,56,203,236
  2610. movdqa xmm0,xmm2
  2611. movdqa xmm3,xmm6
  2612. DB 102,15,58,15,221,4
  2613. DB 69,15,56,203,254
  2614. DB 69,15,56,205,209
  2615. pshufd xmm0,xmm1,00eh
  2616. paddd xmm7,xmm3
  2617. movdqa xmm3,xmm10
  2618. DB 102,65,15,58,15,217,4
  2619. nop
  2620. DB 69,15,56,203,229
  2621. pshufd xmm0,xmm2,00eh
  2622. movdqa xmm1,XMMWORD PTR[((224-128))+rbp]
  2623. paddd xmm1,xmm6
  2624. DB 69,15,56,203,247
  2625. movdqa xmm0,xmm1
  2626. movdqa xmm2,XMMWORD PTR[((224-128))+rbp]
  2627. paddd xmm11,xmm3
  2628. paddd xmm2,xmm10
  2629. DB 15,56,205,254
  2630. nop
  2631. DB 69,15,56,203,236
  2632. movdqa xmm0,xmm2
  2633. mov ecx,1
  2634. pxor xmm6,xmm6
  2635. DB 69,15,56,203,254
  2636. DB 69,15,56,205,218
  2637. pshufd xmm0,xmm1,00eh
  2638. movdqa xmm1,XMMWORD PTR[((240-128))+rbp]
  2639. paddd xmm1,xmm7
  2640. movq xmm7,QWORD PTR[rbx]
  2641. nop
  2642. DB 69,15,56,203,229
  2643. pshufd xmm0,xmm2,00eh
  2644. movdqa xmm2,XMMWORD PTR[((240-128))+rbp]
  2645. paddd xmm2,xmm11
  2646. DB 69,15,56,203,247
  2647. movdqa xmm0,xmm1
  2648. cmp ecx,DWORD PTR[rbx]
  2649. cmovge r8,rsp
  2650. cmp ecx,DWORD PTR[4+rbx]
  2651. cmovge r9,rsp
  2652. pshufd xmm9,xmm7,000h
  2653. DB 69,15,56,203,236
  2654. movdqa xmm0,xmm2
  2655. pshufd xmm10,xmm7,055h
  2656. movdqa xmm11,xmm7
  2657. DB 69,15,56,203,254
  2658. pshufd xmm0,xmm1,00eh
  2659. pcmpgtd xmm9,xmm6
  2660. pcmpgtd xmm10,xmm6
  2661. DB 69,15,56,203,229
  2662. pshufd xmm0,xmm2,00eh
  2663. pcmpgtd xmm11,xmm6
  2664. movdqa xmm3,XMMWORD PTR[((K256_shaext-16))]
  2665. DB 69,15,56,203,247
  2666. pand xmm13,xmm9
  2667. pand xmm15,xmm10
  2668. pand xmm12,xmm9
  2669. pand xmm14,xmm10
  2670. paddd xmm11,xmm7
  2671. paddd xmm13,XMMWORD PTR[80+rsp]
  2672. paddd xmm15,XMMWORD PTR[112+rsp]
  2673. paddd xmm12,XMMWORD PTR[64+rsp]
  2674. paddd xmm14,XMMWORD PTR[96+rsp]
  2675. movq QWORD PTR[rbx],xmm11
  2676. dec edx
  2677. jnz $L$oop_shaext
  2678. mov edx,DWORD PTR[280+rsp]
  2679. pshufd xmm12,xmm12,27
  2680. pshufd xmm13,xmm13,27
  2681. pshufd xmm14,xmm14,27
  2682. pshufd xmm15,xmm15,27
  2683. movdqa xmm5,xmm12
  2684. movdqa xmm6,xmm13
  2685. punpckldq xmm12,xmm14
  2686. punpckhdq xmm5,xmm14
  2687. punpckldq xmm13,xmm15
  2688. punpckhdq xmm6,xmm15
  2689. movq QWORD PTR[(0-128)+rdi],xmm12
  2690. psrldq xmm12,8
  2691. movq QWORD PTR[(128-128)+rdi],xmm5
  2692. psrldq xmm5,8
  2693. movq QWORD PTR[(32-128)+rdi],xmm12
  2694. movq QWORD PTR[(160-128)+rdi],xmm5
  2695. movq QWORD PTR[(64-128)+rdi],xmm13
  2696. psrldq xmm13,8
  2697. movq QWORD PTR[(192-128)+rdi],xmm6
  2698. psrldq xmm6,8
  2699. movq QWORD PTR[(96-128)+rdi],xmm13
  2700. movq QWORD PTR[(224-128)+rdi],xmm6
  2701. lea rdi,QWORD PTR[8+rdi]
  2702. lea rsi,QWORD PTR[32+rsi]
  2703. dec edx
  2704. jnz $L$oop_grande_shaext
  2705. $L$done_shaext::
  2706. movaps xmm6,XMMWORD PTR[((-184))+rax]
  2707. movaps xmm7,XMMWORD PTR[((-168))+rax]
  2708. movaps xmm8,XMMWORD PTR[((-152))+rax]
  2709. movaps xmm9,XMMWORD PTR[((-136))+rax]
  2710. movaps xmm10,XMMWORD PTR[((-120))+rax]
  2711. movaps xmm11,XMMWORD PTR[((-104))+rax]
  2712. movaps xmm12,XMMWORD PTR[((-88))+rax]
  2713. movaps xmm13,XMMWORD PTR[((-72))+rax]
  2714. movaps xmm14,XMMWORD PTR[((-56))+rax]
  2715. movaps xmm15,XMMWORD PTR[((-40))+rax]
  2716. mov rbp,QWORD PTR[((-16))+rax]
  2717. mov rbx,QWORD PTR[((-8))+rax]
  2718. lea rsp,QWORD PTR[rax]
  2719. $L$epilogue_shaext::
  2720. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  2721. mov rsi,QWORD PTR[16+rsp]
  2722. DB 0F3h,0C3h ;repret
  2723. $L$SEH_end_sha256_multi_block_shaext::
  2724. sha256_multi_block_shaext ENDP
  2725. ALIGN 32
  2726. sha256_multi_block_avx PROC PRIVATE
  2727. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  2728. mov QWORD PTR[16+rsp],rsi
  2729. mov rax,rsp
  2730. $L$SEH_begin_sha256_multi_block_avx::
  2731. mov rdi,rcx
  2732. mov rsi,rdx
  2733. mov rdx,r8
  2734. _avx_shortcut::
  2735. shr rcx,32
  2736. cmp edx,2
  2737. jb $L$avx
  2738. test ecx,32
  2739. jnz _avx2_shortcut
  2740. jmp $L$avx
  2741. ALIGN 32
  2742. $L$avx::
  2743. mov rax,rsp
  2744. push rbx
  2745. push rbp
  2746. lea rsp,QWORD PTR[((-168))+rsp]
  2747. movaps XMMWORD PTR[rsp],xmm6
  2748. movaps XMMWORD PTR[16+rsp],xmm7
  2749. movaps XMMWORD PTR[32+rsp],xmm8
  2750. movaps XMMWORD PTR[48+rsp],xmm9
  2751. movaps XMMWORD PTR[(-120)+rax],xmm10
  2752. movaps XMMWORD PTR[(-104)+rax],xmm11
  2753. movaps XMMWORD PTR[(-88)+rax],xmm12
  2754. movaps XMMWORD PTR[(-72)+rax],xmm13
  2755. movaps XMMWORD PTR[(-56)+rax],xmm14
  2756. movaps XMMWORD PTR[(-40)+rax],xmm15
  2757. sub rsp,288
  2758. and rsp,-256
  2759. mov QWORD PTR[272+rsp],rax
  2760. $L$body_avx::
  2761. lea rbp,QWORD PTR[((K256+128))]
  2762. lea rbx,QWORD PTR[256+rsp]
  2763. lea rdi,QWORD PTR[128+rdi]
  2764. $L$oop_grande_avx::
  2765. mov DWORD PTR[280+rsp],edx
  2766. xor edx,edx
  2767. mov r8,QWORD PTR[rsi]
  2768. mov ecx,DWORD PTR[8+rsi]
  2769. cmp ecx,edx
  2770. cmovg edx,ecx
  2771. test ecx,ecx
  2772. mov DWORD PTR[rbx],ecx
  2773. cmovle r8,rbp
  2774. mov r9,QWORD PTR[16+rsi]
  2775. mov ecx,DWORD PTR[24+rsi]
  2776. cmp ecx,edx
  2777. cmovg edx,ecx
  2778. test ecx,ecx
  2779. mov DWORD PTR[4+rbx],ecx
  2780. cmovle r9,rbp
  2781. mov r10,QWORD PTR[32+rsi]
  2782. mov ecx,DWORD PTR[40+rsi]
  2783. cmp ecx,edx
  2784. cmovg edx,ecx
  2785. test ecx,ecx
  2786. mov DWORD PTR[8+rbx],ecx
  2787. cmovle r10,rbp
  2788. mov r11,QWORD PTR[48+rsi]
  2789. mov ecx,DWORD PTR[56+rsi]
  2790. cmp ecx,edx
  2791. cmovg edx,ecx
  2792. test ecx,ecx
  2793. mov DWORD PTR[12+rbx],ecx
  2794. cmovle r11,rbp
  2795. test edx,edx
  2796. jz $L$done_avx
  2797. vmovdqu xmm8,XMMWORD PTR[((0-128))+rdi]
  2798. lea rax,QWORD PTR[128+rsp]
  2799. vmovdqu xmm9,XMMWORD PTR[((32-128))+rdi]
  2800. vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi]
  2801. vmovdqu xmm11,XMMWORD PTR[((96-128))+rdi]
  2802. vmovdqu xmm12,XMMWORD PTR[((128-128))+rdi]
  2803. vmovdqu xmm13,XMMWORD PTR[((160-128))+rdi]
  2804. vmovdqu xmm14,XMMWORD PTR[((192-128))+rdi]
  2805. vmovdqu xmm15,XMMWORD PTR[((224-128))+rdi]
  2806. vmovdqu xmm6,XMMWORD PTR[$L$pbswap]
  2807. jmp $L$oop_avx
  2808. ALIGN 32
  2809. $L$oop_avx::
  2810. vpxor xmm4,xmm10,xmm9
  2811. vmovd xmm5,DWORD PTR[r8]
  2812. vmovd xmm0,DWORD PTR[r9]
  2813. vpinsrd xmm5,xmm5,DWORD PTR[r10],1
  2814. vpinsrd xmm0,xmm0,DWORD PTR[r11],1
  2815. vpunpckldq xmm5,xmm5,xmm0
  2816. vpshufb xmm5,xmm5,xmm6
  2817. vpsrld xmm7,xmm12,6
  2818. vpslld xmm2,xmm12,26
  2819. vmovdqu XMMWORD PTR[(0-128)+rax],xmm5
  2820. vpaddd xmm5,xmm5,xmm15
  2821. vpsrld xmm1,xmm12,11
  2822. vpxor xmm7,xmm7,xmm2
  2823. vpslld xmm2,xmm12,21
  2824. vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp]
  2825. vpxor xmm7,xmm7,xmm1
  2826. vpsrld xmm1,xmm12,25
  2827. vpxor xmm7,xmm7,xmm2
  2828. vpslld xmm2,xmm12,7
  2829. vpandn xmm0,xmm12,xmm14
  2830. vpand xmm3,xmm12,xmm13
  2831. vpxor xmm7,xmm7,xmm1
  2832. vpsrld xmm15,xmm8,2
  2833. vpxor xmm7,xmm7,xmm2
  2834. vpslld xmm1,xmm8,30
  2835. vpxor xmm0,xmm0,xmm3
  2836. vpxor xmm3,xmm9,xmm8
  2837. vpxor xmm15,xmm15,xmm1
  2838. vpaddd xmm5,xmm5,xmm7
  2839. vpsrld xmm1,xmm8,13
  2840. vpslld xmm2,xmm8,19
  2841. vpaddd xmm5,xmm5,xmm0
  2842. vpand xmm4,xmm4,xmm3
  2843. vpxor xmm7,xmm15,xmm1
  2844. vpsrld xmm1,xmm8,22
  2845. vpxor xmm7,xmm7,xmm2
  2846. vpslld xmm2,xmm8,10
  2847. vpxor xmm15,xmm9,xmm4
  2848. vpaddd xmm11,xmm11,xmm5
  2849. vpxor xmm7,xmm7,xmm1
  2850. vpxor xmm7,xmm7,xmm2
  2851. vpaddd xmm15,xmm15,xmm5
  2852. vpaddd xmm15,xmm15,xmm7
  2853. vmovd xmm5,DWORD PTR[4+r8]
  2854. vmovd xmm0,DWORD PTR[4+r9]
  2855. vpinsrd xmm5,xmm5,DWORD PTR[4+r10],1
  2856. vpinsrd xmm0,xmm0,DWORD PTR[4+r11],1
  2857. vpunpckldq xmm5,xmm5,xmm0
  2858. vpshufb xmm5,xmm5,xmm6
  2859. vpsrld xmm7,xmm11,6
  2860. vpslld xmm2,xmm11,26
  2861. vmovdqu XMMWORD PTR[(16-128)+rax],xmm5
  2862. vpaddd xmm5,xmm5,xmm14
  2863. vpsrld xmm1,xmm11,11
  2864. vpxor xmm7,xmm7,xmm2
  2865. vpslld xmm2,xmm11,21
  2866. vpaddd xmm5,xmm5,XMMWORD PTR[((-96))+rbp]
  2867. vpxor xmm7,xmm7,xmm1
  2868. vpsrld xmm1,xmm11,25
  2869. vpxor xmm7,xmm7,xmm2
  2870. vpslld xmm2,xmm11,7
  2871. vpandn xmm0,xmm11,xmm13
  2872. vpand xmm4,xmm11,xmm12
  2873. vpxor xmm7,xmm7,xmm1
  2874. vpsrld xmm14,xmm15,2
  2875. vpxor xmm7,xmm7,xmm2
  2876. vpslld xmm1,xmm15,30
  2877. vpxor xmm0,xmm0,xmm4
  2878. vpxor xmm4,xmm8,xmm15
  2879. vpxor xmm14,xmm14,xmm1
  2880. vpaddd xmm5,xmm5,xmm7
  2881. vpsrld xmm1,xmm15,13
  2882. vpslld xmm2,xmm15,19
  2883. vpaddd xmm5,xmm5,xmm0
  2884. vpand xmm3,xmm3,xmm4
  2885. vpxor xmm7,xmm14,xmm1
  2886. vpsrld xmm1,xmm15,22
  2887. vpxor xmm7,xmm7,xmm2
  2888. vpslld xmm2,xmm15,10
  2889. vpxor xmm14,xmm8,xmm3
  2890. vpaddd xmm10,xmm10,xmm5
  2891. vpxor xmm7,xmm7,xmm1
  2892. vpxor xmm7,xmm7,xmm2
  2893. vpaddd xmm14,xmm14,xmm5
  2894. vpaddd xmm14,xmm14,xmm7
  2895. vmovd xmm5,DWORD PTR[8+r8]
  2896. vmovd xmm0,DWORD PTR[8+r9]
  2897. vpinsrd xmm5,xmm5,DWORD PTR[8+r10],1
  2898. vpinsrd xmm0,xmm0,DWORD PTR[8+r11],1
  2899. vpunpckldq xmm5,xmm5,xmm0
  2900. vpshufb xmm5,xmm5,xmm6
  2901. vpsrld xmm7,xmm10,6
  2902. vpslld xmm2,xmm10,26
  2903. vmovdqu XMMWORD PTR[(32-128)+rax],xmm5
  2904. vpaddd xmm5,xmm5,xmm13
  2905. vpsrld xmm1,xmm10,11
  2906. vpxor xmm7,xmm7,xmm2
  2907. vpslld xmm2,xmm10,21
  2908. vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp]
  2909. vpxor xmm7,xmm7,xmm1
  2910. vpsrld xmm1,xmm10,25
  2911. vpxor xmm7,xmm7,xmm2
  2912. vpslld xmm2,xmm10,7
  2913. vpandn xmm0,xmm10,xmm12
  2914. vpand xmm3,xmm10,xmm11
  2915. vpxor xmm7,xmm7,xmm1
  2916. vpsrld xmm13,xmm14,2
  2917. vpxor xmm7,xmm7,xmm2
  2918. vpslld xmm1,xmm14,30
  2919. vpxor xmm0,xmm0,xmm3
  2920. vpxor xmm3,xmm15,xmm14
  2921. vpxor xmm13,xmm13,xmm1
  2922. vpaddd xmm5,xmm5,xmm7
  2923. vpsrld xmm1,xmm14,13
  2924. vpslld xmm2,xmm14,19
  2925. vpaddd xmm5,xmm5,xmm0
  2926. vpand xmm4,xmm4,xmm3
  2927. vpxor xmm7,xmm13,xmm1
  2928. vpsrld xmm1,xmm14,22
  2929. vpxor xmm7,xmm7,xmm2
  2930. vpslld xmm2,xmm14,10
  2931. vpxor xmm13,xmm15,xmm4
  2932. vpaddd xmm9,xmm9,xmm5
  2933. vpxor xmm7,xmm7,xmm1
  2934. vpxor xmm7,xmm7,xmm2
  2935. vpaddd xmm13,xmm13,xmm5
  2936. vpaddd xmm13,xmm13,xmm7
  2937. vmovd xmm5,DWORD PTR[12+r8]
  2938. vmovd xmm0,DWORD PTR[12+r9]
  2939. vpinsrd xmm5,xmm5,DWORD PTR[12+r10],1
  2940. vpinsrd xmm0,xmm0,DWORD PTR[12+r11],1
  2941. vpunpckldq xmm5,xmm5,xmm0
  2942. vpshufb xmm5,xmm5,xmm6
  2943. vpsrld xmm7,xmm9,6
  2944. vpslld xmm2,xmm9,26
  2945. vmovdqu XMMWORD PTR[(48-128)+rax],xmm5
  2946. vpaddd xmm5,xmm5,xmm12
  2947. vpsrld xmm1,xmm9,11
  2948. vpxor xmm7,xmm7,xmm2
  2949. vpslld xmm2,xmm9,21
  2950. vpaddd xmm5,xmm5,XMMWORD PTR[((-32))+rbp]
  2951. vpxor xmm7,xmm7,xmm1
  2952. vpsrld xmm1,xmm9,25
  2953. vpxor xmm7,xmm7,xmm2
  2954. vpslld xmm2,xmm9,7
  2955. vpandn xmm0,xmm9,xmm11
  2956. vpand xmm4,xmm9,xmm10
  2957. vpxor xmm7,xmm7,xmm1
  2958. vpsrld xmm12,xmm13,2
  2959. vpxor xmm7,xmm7,xmm2
  2960. vpslld xmm1,xmm13,30
  2961. vpxor xmm0,xmm0,xmm4
  2962. vpxor xmm4,xmm14,xmm13
  2963. vpxor xmm12,xmm12,xmm1
  2964. vpaddd xmm5,xmm5,xmm7
  2965. vpsrld xmm1,xmm13,13
  2966. vpslld xmm2,xmm13,19
  2967. vpaddd xmm5,xmm5,xmm0
  2968. vpand xmm3,xmm3,xmm4
  2969. vpxor xmm7,xmm12,xmm1
  2970. vpsrld xmm1,xmm13,22
  2971. vpxor xmm7,xmm7,xmm2
  2972. vpslld xmm2,xmm13,10
  2973. vpxor xmm12,xmm14,xmm3
  2974. vpaddd xmm8,xmm8,xmm5
  2975. vpxor xmm7,xmm7,xmm1
  2976. vpxor xmm7,xmm7,xmm2
  2977. vpaddd xmm12,xmm12,xmm5
  2978. vpaddd xmm12,xmm12,xmm7
  2979. vmovd xmm5,DWORD PTR[16+r8]
  2980. vmovd xmm0,DWORD PTR[16+r9]
  2981. vpinsrd xmm5,xmm5,DWORD PTR[16+r10],1
  2982. vpinsrd xmm0,xmm0,DWORD PTR[16+r11],1
  2983. vpunpckldq xmm5,xmm5,xmm0
  2984. vpshufb xmm5,xmm5,xmm6
  2985. vpsrld xmm7,xmm8,6
  2986. vpslld xmm2,xmm8,26
  2987. vmovdqu XMMWORD PTR[(64-128)+rax],xmm5
  2988. vpaddd xmm5,xmm5,xmm11
  2989. vpsrld xmm1,xmm8,11
  2990. vpxor xmm7,xmm7,xmm2
  2991. vpslld xmm2,xmm8,21
  2992. vpaddd xmm5,xmm5,XMMWORD PTR[rbp]
  2993. vpxor xmm7,xmm7,xmm1
  2994. vpsrld xmm1,xmm8,25
  2995. vpxor xmm7,xmm7,xmm2
  2996. vpslld xmm2,xmm8,7
  2997. vpandn xmm0,xmm8,xmm10
  2998. vpand xmm3,xmm8,xmm9
  2999. vpxor xmm7,xmm7,xmm1
  3000. vpsrld xmm11,xmm12,2
  3001. vpxor xmm7,xmm7,xmm2
  3002. vpslld xmm1,xmm12,30
  3003. vpxor xmm0,xmm0,xmm3
  3004. vpxor xmm3,xmm13,xmm12
  3005. vpxor xmm11,xmm11,xmm1
  3006. vpaddd xmm5,xmm5,xmm7
  3007. vpsrld xmm1,xmm12,13
  3008. vpslld xmm2,xmm12,19
  3009. vpaddd xmm5,xmm5,xmm0
  3010. vpand xmm4,xmm4,xmm3
  3011. vpxor xmm7,xmm11,xmm1
  3012. vpsrld xmm1,xmm12,22
  3013. vpxor xmm7,xmm7,xmm2
  3014. vpslld xmm2,xmm12,10
  3015. vpxor xmm11,xmm13,xmm4
  3016. vpaddd xmm15,xmm15,xmm5
  3017. vpxor xmm7,xmm7,xmm1
  3018. vpxor xmm7,xmm7,xmm2
  3019. vpaddd xmm11,xmm11,xmm5
  3020. vpaddd xmm11,xmm11,xmm7
  3021. vmovd xmm5,DWORD PTR[20+r8]
  3022. vmovd xmm0,DWORD PTR[20+r9]
  3023. vpinsrd xmm5,xmm5,DWORD PTR[20+r10],1
  3024. vpinsrd xmm0,xmm0,DWORD PTR[20+r11],1
  3025. vpunpckldq xmm5,xmm5,xmm0
  3026. vpshufb xmm5,xmm5,xmm6
  3027. vpsrld xmm7,xmm15,6
  3028. vpslld xmm2,xmm15,26
  3029. vmovdqu XMMWORD PTR[(80-128)+rax],xmm5
  3030. vpaddd xmm5,xmm5,xmm10
  3031. vpsrld xmm1,xmm15,11
  3032. vpxor xmm7,xmm7,xmm2
  3033. vpslld xmm2,xmm15,21
  3034. vpaddd xmm5,xmm5,XMMWORD PTR[32+rbp]
  3035. vpxor xmm7,xmm7,xmm1
  3036. vpsrld xmm1,xmm15,25
  3037. vpxor xmm7,xmm7,xmm2
  3038. vpslld xmm2,xmm15,7
  3039. vpandn xmm0,xmm15,xmm9
  3040. vpand xmm4,xmm15,xmm8
  3041. vpxor xmm7,xmm7,xmm1
  3042. vpsrld xmm10,xmm11,2
  3043. vpxor xmm7,xmm7,xmm2
  3044. vpslld xmm1,xmm11,30
  3045. vpxor xmm0,xmm0,xmm4
  3046. vpxor xmm4,xmm12,xmm11
  3047. vpxor xmm10,xmm10,xmm1
  3048. vpaddd xmm5,xmm5,xmm7
  3049. vpsrld xmm1,xmm11,13
  3050. vpslld xmm2,xmm11,19
  3051. vpaddd xmm5,xmm5,xmm0
  3052. vpand xmm3,xmm3,xmm4
  3053. vpxor xmm7,xmm10,xmm1
  3054. vpsrld xmm1,xmm11,22
  3055. vpxor xmm7,xmm7,xmm2
  3056. vpslld xmm2,xmm11,10
  3057. vpxor xmm10,xmm12,xmm3
  3058. vpaddd xmm14,xmm14,xmm5
  3059. vpxor xmm7,xmm7,xmm1
  3060. vpxor xmm7,xmm7,xmm2
  3061. vpaddd xmm10,xmm10,xmm5
  3062. vpaddd xmm10,xmm10,xmm7
  3063. vmovd xmm5,DWORD PTR[24+r8]
  3064. vmovd xmm0,DWORD PTR[24+r9]
  3065. vpinsrd xmm5,xmm5,DWORD PTR[24+r10],1
  3066. vpinsrd xmm0,xmm0,DWORD PTR[24+r11],1
  3067. vpunpckldq xmm5,xmm5,xmm0
  3068. vpshufb xmm5,xmm5,xmm6
  3069. vpsrld xmm7,xmm14,6
  3070. vpslld xmm2,xmm14,26
  3071. vmovdqu XMMWORD PTR[(96-128)+rax],xmm5
  3072. vpaddd xmm5,xmm5,xmm9
  3073. vpsrld xmm1,xmm14,11
  3074. vpxor xmm7,xmm7,xmm2
  3075. vpslld xmm2,xmm14,21
  3076. vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp]
  3077. vpxor xmm7,xmm7,xmm1
  3078. vpsrld xmm1,xmm14,25
  3079. vpxor xmm7,xmm7,xmm2
  3080. vpslld xmm2,xmm14,7
  3081. vpandn xmm0,xmm14,xmm8
  3082. vpand xmm3,xmm14,xmm15
  3083. vpxor xmm7,xmm7,xmm1
  3084. vpsrld xmm9,xmm10,2
  3085. vpxor xmm7,xmm7,xmm2
  3086. vpslld xmm1,xmm10,30
  3087. vpxor xmm0,xmm0,xmm3
  3088. vpxor xmm3,xmm11,xmm10
  3089. vpxor xmm9,xmm9,xmm1
  3090. vpaddd xmm5,xmm5,xmm7
  3091. vpsrld xmm1,xmm10,13
  3092. vpslld xmm2,xmm10,19
  3093. vpaddd xmm5,xmm5,xmm0
  3094. vpand xmm4,xmm4,xmm3
  3095. vpxor xmm7,xmm9,xmm1
  3096. vpsrld xmm1,xmm10,22
  3097. vpxor xmm7,xmm7,xmm2
  3098. vpslld xmm2,xmm10,10
  3099. vpxor xmm9,xmm11,xmm4
  3100. vpaddd xmm13,xmm13,xmm5
  3101. vpxor xmm7,xmm7,xmm1
  3102. vpxor xmm7,xmm7,xmm2
  3103. vpaddd xmm9,xmm9,xmm5
  3104. vpaddd xmm9,xmm9,xmm7
  3105. vmovd xmm5,DWORD PTR[28+r8]
  3106. vmovd xmm0,DWORD PTR[28+r9]
  3107. vpinsrd xmm5,xmm5,DWORD PTR[28+r10],1
  3108. vpinsrd xmm0,xmm0,DWORD PTR[28+r11],1
  3109. vpunpckldq xmm5,xmm5,xmm0
  3110. vpshufb xmm5,xmm5,xmm6
  3111. vpsrld xmm7,xmm13,6
  3112. vpslld xmm2,xmm13,26
  3113. vmovdqu XMMWORD PTR[(112-128)+rax],xmm5
  3114. vpaddd xmm5,xmm5,xmm8
  3115. vpsrld xmm1,xmm13,11
  3116. vpxor xmm7,xmm7,xmm2
  3117. vpslld xmm2,xmm13,21
  3118. vpaddd xmm5,xmm5,XMMWORD PTR[96+rbp]
  3119. vpxor xmm7,xmm7,xmm1
  3120. vpsrld xmm1,xmm13,25
  3121. vpxor xmm7,xmm7,xmm2
  3122. vpslld xmm2,xmm13,7
  3123. vpandn xmm0,xmm13,xmm15
  3124. vpand xmm4,xmm13,xmm14
  3125. vpxor xmm7,xmm7,xmm1
  3126. vpsrld xmm8,xmm9,2
  3127. vpxor xmm7,xmm7,xmm2
  3128. vpslld xmm1,xmm9,30
  3129. vpxor xmm0,xmm0,xmm4
  3130. vpxor xmm4,xmm10,xmm9
  3131. vpxor xmm8,xmm8,xmm1
  3132. vpaddd xmm5,xmm5,xmm7
  3133. vpsrld xmm1,xmm9,13
  3134. vpslld xmm2,xmm9,19
  3135. vpaddd xmm5,xmm5,xmm0
  3136. vpand xmm3,xmm3,xmm4
  3137. vpxor xmm7,xmm8,xmm1
  3138. vpsrld xmm1,xmm9,22
  3139. vpxor xmm7,xmm7,xmm2
  3140. vpslld xmm2,xmm9,10
  3141. vpxor xmm8,xmm10,xmm3
  3142. vpaddd xmm12,xmm12,xmm5
  3143. vpxor xmm7,xmm7,xmm1
  3144. vpxor xmm7,xmm7,xmm2
  3145. vpaddd xmm8,xmm8,xmm5
  3146. vpaddd xmm8,xmm8,xmm7
  3147. add rbp,256
  3148. vmovd xmm5,DWORD PTR[32+r8]
  3149. vmovd xmm0,DWORD PTR[32+r9]
  3150. vpinsrd xmm5,xmm5,DWORD PTR[32+r10],1
  3151. vpinsrd xmm0,xmm0,DWORD PTR[32+r11],1
  3152. vpunpckldq xmm5,xmm5,xmm0
  3153. vpshufb xmm5,xmm5,xmm6
  3154. vpsrld xmm7,xmm12,6
  3155. vpslld xmm2,xmm12,26
  3156. vmovdqu XMMWORD PTR[(128-128)+rax],xmm5
  3157. vpaddd xmm5,xmm5,xmm15
  3158. vpsrld xmm1,xmm12,11
  3159. vpxor xmm7,xmm7,xmm2
  3160. vpslld xmm2,xmm12,21
  3161. vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp]
  3162. vpxor xmm7,xmm7,xmm1
  3163. vpsrld xmm1,xmm12,25
  3164. vpxor xmm7,xmm7,xmm2
  3165. vpslld xmm2,xmm12,7
  3166. vpandn xmm0,xmm12,xmm14
  3167. vpand xmm3,xmm12,xmm13
  3168. vpxor xmm7,xmm7,xmm1
  3169. vpsrld xmm15,xmm8,2
  3170. vpxor xmm7,xmm7,xmm2
  3171. vpslld xmm1,xmm8,30
  3172. vpxor xmm0,xmm0,xmm3
  3173. vpxor xmm3,xmm9,xmm8
  3174. vpxor xmm15,xmm15,xmm1
  3175. vpaddd xmm5,xmm5,xmm7
  3176. vpsrld xmm1,xmm8,13
  3177. vpslld xmm2,xmm8,19
  3178. vpaddd xmm5,xmm5,xmm0
  3179. vpand xmm4,xmm4,xmm3
  3180. vpxor xmm7,xmm15,xmm1
  3181. vpsrld xmm1,xmm8,22
  3182. vpxor xmm7,xmm7,xmm2
  3183. vpslld xmm2,xmm8,10
  3184. vpxor xmm15,xmm9,xmm4
  3185. vpaddd xmm11,xmm11,xmm5
  3186. vpxor xmm7,xmm7,xmm1
  3187. vpxor xmm7,xmm7,xmm2
  3188. vpaddd xmm15,xmm15,xmm5
  3189. vpaddd xmm15,xmm15,xmm7
  3190. vmovd xmm5,DWORD PTR[36+r8]
  3191. vmovd xmm0,DWORD PTR[36+r9]
  3192. vpinsrd xmm5,xmm5,DWORD PTR[36+r10],1
  3193. vpinsrd xmm0,xmm0,DWORD PTR[36+r11],1
  3194. vpunpckldq xmm5,xmm5,xmm0
  3195. vpshufb xmm5,xmm5,xmm6
  3196. vpsrld xmm7,xmm11,6
  3197. vpslld xmm2,xmm11,26
  3198. vmovdqu XMMWORD PTR[(144-128)+rax],xmm5
  3199. vpaddd xmm5,xmm5,xmm14
  3200. vpsrld xmm1,xmm11,11
  3201. vpxor xmm7,xmm7,xmm2
  3202. vpslld xmm2,xmm11,21
  3203. vpaddd xmm5,xmm5,XMMWORD PTR[((-96))+rbp]
  3204. vpxor xmm7,xmm7,xmm1
  3205. vpsrld xmm1,xmm11,25
  3206. vpxor xmm7,xmm7,xmm2
  3207. vpslld xmm2,xmm11,7
  3208. vpandn xmm0,xmm11,xmm13
  3209. vpand xmm4,xmm11,xmm12
  3210. vpxor xmm7,xmm7,xmm1
  3211. vpsrld xmm14,xmm15,2
  3212. vpxor xmm7,xmm7,xmm2
  3213. vpslld xmm1,xmm15,30
  3214. vpxor xmm0,xmm0,xmm4
  3215. vpxor xmm4,xmm8,xmm15
  3216. vpxor xmm14,xmm14,xmm1
  3217. vpaddd xmm5,xmm5,xmm7
  3218. vpsrld xmm1,xmm15,13
  3219. vpslld xmm2,xmm15,19
  3220. vpaddd xmm5,xmm5,xmm0
  3221. vpand xmm3,xmm3,xmm4
  3222. vpxor xmm7,xmm14,xmm1
  3223. vpsrld xmm1,xmm15,22
  3224. vpxor xmm7,xmm7,xmm2
  3225. vpslld xmm2,xmm15,10
  3226. vpxor xmm14,xmm8,xmm3
  3227. vpaddd xmm10,xmm10,xmm5
  3228. vpxor xmm7,xmm7,xmm1
  3229. vpxor xmm7,xmm7,xmm2
  3230. vpaddd xmm14,xmm14,xmm5
  3231. vpaddd xmm14,xmm14,xmm7
  3232. vmovd xmm5,DWORD PTR[40+r8]
  3233. vmovd xmm0,DWORD PTR[40+r9]
  3234. vpinsrd xmm5,xmm5,DWORD PTR[40+r10],1
  3235. vpinsrd xmm0,xmm0,DWORD PTR[40+r11],1
  3236. vpunpckldq xmm5,xmm5,xmm0
  3237. vpshufb xmm5,xmm5,xmm6
  3238. vpsrld xmm7,xmm10,6
  3239. vpslld xmm2,xmm10,26
  3240. vmovdqu XMMWORD PTR[(160-128)+rax],xmm5
  3241. vpaddd xmm5,xmm5,xmm13
  3242. vpsrld xmm1,xmm10,11
  3243. vpxor xmm7,xmm7,xmm2
  3244. vpslld xmm2,xmm10,21
  3245. vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp]
  3246. vpxor xmm7,xmm7,xmm1
  3247. vpsrld xmm1,xmm10,25
  3248. vpxor xmm7,xmm7,xmm2
  3249. vpslld xmm2,xmm10,7
  3250. vpandn xmm0,xmm10,xmm12
  3251. vpand xmm3,xmm10,xmm11
  3252. vpxor xmm7,xmm7,xmm1
  3253. vpsrld xmm13,xmm14,2
  3254. vpxor xmm7,xmm7,xmm2
  3255. vpslld xmm1,xmm14,30
  3256. vpxor xmm0,xmm0,xmm3
  3257. vpxor xmm3,xmm15,xmm14
  3258. vpxor xmm13,xmm13,xmm1
  3259. vpaddd xmm5,xmm5,xmm7
  3260. vpsrld xmm1,xmm14,13
  3261. vpslld xmm2,xmm14,19
  3262. vpaddd xmm5,xmm5,xmm0
  3263. vpand xmm4,xmm4,xmm3
  3264. vpxor xmm7,xmm13,xmm1
  3265. vpsrld xmm1,xmm14,22
  3266. vpxor xmm7,xmm7,xmm2
  3267. vpslld xmm2,xmm14,10
  3268. vpxor xmm13,xmm15,xmm4
  3269. vpaddd xmm9,xmm9,xmm5
  3270. vpxor xmm7,xmm7,xmm1
  3271. vpxor xmm7,xmm7,xmm2
  3272. vpaddd xmm13,xmm13,xmm5
  3273. vpaddd xmm13,xmm13,xmm7
  3274. vmovd xmm5,DWORD PTR[44+r8]
  3275. vmovd xmm0,DWORD PTR[44+r9]
  3276. vpinsrd xmm5,xmm5,DWORD PTR[44+r10],1
  3277. vpinsrd xmm0,xmm0,DWORD PTR[44+r11],1
  3278. vpunpckldq xmm5,xmm5,xmm0
  3279. vpshufb xmm5,xmm5,xmm6
  3280. vpsrld xmm7,xmm9,6
  3281. vpslld xmm2,xmm9,26
  3282. vmovdqu XMMWORD PTR[(176-128)+rax],xmm5
  3283. vpaddd xmm5,xmm5,xmm12
  3284. vpsrld xmm1,xmm9,11
  3285. vpxor xmm7,xmm7,xmm2
  3286. vpslld xmm2,xmm9,21
  3287. vpaddd xmm5,xmm5,XMMWORD PTR[((-32))+rbp]
  3288. vpxor xmm7,xmm7,xmm1
  3289. vpsrld xmm1,xmm9,25
  3290. vpxor xmm7,xmm7,xmm2
  3291. vpslld xmm2,xmm9,7
  3292. vpandn xmm0,xmm9,xmm11
  3293. vpand xmm4,xmm9,xmm10
  3294. vpxor xmm7,xmm7,xmm1
  3295. vpsrld xmm12,xmm13,2
  3296. vpxor xmm7,xmm7,xmm2
  3297. vpslld xmm1,xmm13,30
  3298. vpxor xmm0,xmm0,xmm4
  3299. vpxor xmm4,xmm14,xmm13
  3300. vpxor xmm12,xmm12,xmm1
  3301. vpaddd xmm5,xmm5,xmm7
  3302. vpsrld xmm1,xmm13,13
  3303. vpslld xmm2,xmm13,19
  3304. vpaddd xmm5,xmm5,xmm0
  3305. vpand xmm3,xmm3,xmm4
  3306. vpxor xmm7,xmm12,xmm1
  3307. vpsrld xmm1,xmm13,22
  3308. vpxor xmm7,xmm7,xmm2
  3309. vpslld xmm2,xmm13,10
  3310. vpxor xmm12,xmm14,xmm3
  3311. vpaddd xmm8,xmm8,xmm5
  3312. vpxor xmm7,xmm7,xmm1
  3313. vpxor xmm7,xmm7,xmm2
  3314. vpaddd xmm12,xmm12,xmm5
  3315. vpaddd xmm12,xmm12,xmm7
  3316. vmovd xmm5,DWORD PTR[48+r8]
  3317. vmovd xmm0,DWORD PTR[48+r9]
  3318. vpinsrd xmm5,xmm5,DWORD PTR[48+r10],1
  3319. vpinsrd xmm0,xmm0,DWORD PTR[48+r11],1
  3320. vpunpckldq xmm5,xmm5,xmm0
  3321. vpshufb xmm5,xmm5,xmm6
  3322. vpsrld xmm7,xmm8,6
  3323. vpslld xmm2,xmm8,26
  3324. vmovdqu XMMWORD PTR[(192-128)+rax],xmm5
  3325. vpaddd xmm5,xmm5,xmm11
  3326. vpsrld xmm1,xmm8,11
  3327. vpxor xmm7,xmm7,xmm2
  3328. vpslld xmm2,xmm8,21
  3329. vpaddd xmm5,xmm5,XMMWORD PTR[rbp]
  3330. vpxor xmm7,xmm7,xmm1
  3331. vpsrld xmm1,xmm8,25
  3332. vpxor xmm7,xmm7,xmm2
  3333. vpslld xmm2,xmm8,7
  3334. vpandn xmm0,xmm8,xmm10
  3335. vpand xmm3,xmm8,xmm9
  3336. vpxor xmm7,xmm7,xmm1
  3337. vpsrld xmm11,xmm12,2
  3338. vpxor xmm7,xmm7,xmm2
  3339. vpslld xmm1,xmm12,30
  3340. vpxor xmm0,xmm0,xmm3
  3341. vpxor xmm3,xmm13,xmm12
  3342. vpxor xmm11,xmm11,xmm1
  3343. vpaddd xmm5,xmm5,xmm7
  3344. vpsrld xmm1,xmm12,13
  3345. vpslld xmm2,xmm12,19
  3346. vpaddd xmm5,xmm5,xmm0
  3347. vpand xmm4,xmm4,xmm3
  3348. vpxor xmm7,xmm11,xmm1
  3349. vpsrld xmm1,xmm12,22
  3350. vpxor xmm7,xmm7,xmm2
  3351. vpslld xmm2,xmm12,10
  3352. vpxor xmm11,xmm13,xmm4
  3353. vpaddd xmm15,xmm15,xmm5
  3354. vpxor xmm7,xmm7,xmm1
  3355. vpxor xmm7,xmm7,xmm2
  3356. vpaddd xmm11,xmm11,xmm5
  3357. vpaddd xmm11,xmm11,xmm7
  3358. vmovd xmm5,DWORD PTR[52+r8]
  3359. vmovd xmm0,DWORD PTR[52+r9]
  3360. vpinsrd xmm5,xmm5,DWORD PTR[52+r10],1
  3361. vpinsrd xmm0,xmm0,DWORD PTR[52+r11],1
  3362. vpunpckldq xmm5,xmm5,xmm0
  3363. vpshufb xmm5,xmm5,xmm6
  3364. vpsrld xmm7,xmm15,6
  3365. vpslld xmm2,xmm15,26
  3366. vmovdqu XMMWORD PTR[(208-128)+rax],xmm5
  3367. vpaddd xmm5,xmm5,xmm10
  3368. vpsrld xmm1,xmm15,11
  3369. vpxor xmm7,xmm7,xmm2
  3370. vpslld xmm2,xmm15,21
  3371. vpaddd xmm5,xmm5,XMMWORD PTR[32+rbp]
  3372. vpxor xmm7,xmm7,xmm1
  3373. vpsrld xmm1,xmm15,25
  3374. vpxor xmm7,xmm7,xmm2
  3375. vpslld xmm2,xmm15,7
  3376. vpandn xmm0,xmm15,xmm9
  3377. vpand xmm4,xmm15,xmm8
  3378. vpxor xmm7,xmm7,xmm1
  3379. vpsrld xmm10,xmm11,2
  3380. vpxor xmm7,xmm7,xmm2
  3381. vpslld xmm1,xmm11,30
  3382. vpxor xmm0,xmm0,xmm4
  3383. vpxor xmm4,xmm12,xmm11
  3384. vpxor xmm10,xmm10,xmm1
  3385. vpaddd xmm5,xmm5,xmm7
  3386. vpsrld xmm1,xmm11,13
  3387. vpslld xmm2,xmm11,19
  3388. vpaddd xmm5,xmm5,xmm0
  3389. vpand xmm3,xmm3,xmm4
  3390. vpxor xmm7,xmm10,xmm1
  3391. vpsrld xmm1,xmm11,22
  3392. vpxor xmm7,xmm7,xmm2
  3393. vpslld xmm2,xmm11,10
  3394. vpxor xmm10,xmm12,xmm3
  3395. vpaddd xmm14,xmm14,xmm5
  3396. vpxor xmm7,xmm7,xmm1
  3397. vpxor xmm7,xmm7,xmm2
  3398. vpaddd xmm10,xmm10,xmm5
  3399. vpaddd xmm10,xmm10,xmm7
  3400. vmovd xmm5,DWORD PTR[56+r8]
  3401. vmovd xmm0,DWORD PTR[56+r9]
  3402. vpinsrd xmm5,xmm5,DWORD PTR[56+r10],1
  3403. vpinsrd xmm0,xmm0,DWORD PTR[56+r11],1
  3404. vpunpckldq xmm5,xmm5,xmm0
  3405. vpshufb xmm5,xmm5,xmm6
  3406. vpsrld xmm7,xmm14,6
  3407. vpslld xmm2,xmm14,26
  3408. vmovdqu XMMWORD PTR[(224-128)+rax],xmm5
  3409. vpaddd xmm5,xmm5,xmm9
  3410. vpsrld xmm1,xmm14,11
  3411. vpxor xmm7,xmm7,xmm2
  3412. vpslld xmm2,xmm14,21
  3413. vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp]
  3414. vpxor xmm7,xmm7,xmm1
  3415. vpsrld xmm1,xmm14,25
  3416. vpxor xmm7,xmm7,xmm2
  3417. vpslld xmm2,xmm14,7
  3418. vpandn xmm0,xmm14,xmm8
  3419. vpand xmm3,xmm14,xmm15
  3420. vpxor xmm7,xmm7,xmm1
  3421. vpsrld xmm9,xmm10,2
  3422. vpxor xmm7,xmm7,xmm2
  3423. vpslld xmm1,xmm10,30
  3424. vpxor xmm0,xmm0,xmm3
  3425. vpxor xmm3,xmm11,xmm10
  3426. vpxor xmm9,xmm9,xmm1
  3427. vpaddd xmm5,xmm5,xmm7
  3428. vpsrld xmm1,xmm10,13
  3429. vpslld xmm2,xmm10,19
  3430. vpaddd xmm5,xmm5,xmm0
  3431. vpand xmm4,xmm4,xmm3
  3432. vpxor xmm7,xmm9,xmm1
  3433. vpsrld xmm1,xmm10,22
  3434. vpxor xmm7,xmm7,xmm2
  3435. vpslld xmm2,xmm10,10
  3436. vpxor xmm9,xmm11,xmm4
  3437. vpaddd xmm13,xmm13,xmm5
  3438. vpxor xmm7,xmm7,xmm1
  3439. vpxor xmm7,xmm7,xmm2
  3440. vpaddd xmm9,xmm9,xmm5
  3441. vpaddd xmm9,xmm9,xmm7
  3442. vmovd xmm5,DWORD PTR[60+r8]
  3443. lea r8,QWORD PTR[64+r8]
  3444. vmovd xmm0,DWORD PTR[60+r9]
  3445. lea r9,QWORD PTR[64+r9]
  3446. vpinsrd xmm5,xmm5,DWORD PTR[60+r10],1
  3447. lea r10,QWORD PTR[64+r10]
  3448. vpinsrd xmm0,xmm0,DWORD PTR[60+r11],1
  3449. lea r11,QWORD PTR[64+r11]
  3450. vpunpckldq xmm5,xmm5,xmm0
  3451. vpshufb xmm5,xmm5,xmm6
  3452. vpsrld xmm7,xmm13,6
  3453. vpslld xmm2,xmm13,26
  3454. vmovdqu XMMWORD PTR[(240-128)+rax],xmm5
  3455. vpaddd xmm5,xmm5,xmm8
  3456. vpsrld xmm1,xmm13,11
  3457. vpxor xmm7,xmm7,xmm2
  3458. vpslld xmm2,xmm13,21
  3459. vpaddd xmm5,xmm5,XMMWORD PTR[96+rbp]
  3460. vpxor xmm7,xmm7,xmm1
  3461. vpsrld xmm1,xmm13,25
  3462. vpxor xmm7,xmm7,xmm2
  3463. prefetcht0 [63+r8]
  3464. vpslld xmm2,xmm13,7
  3465. vpandn xmm0,xmm13,xmm15
  3466. vpand xmm4,xmm13,xmm14
  3467. prefetcht0 [63+r9]
  3468. vpxor xmm7,xmm7,xmm1
  3469. vpsrld xmm8,xmm9,2
  3470. vpxor xmm7,xmm7,xmm2
  3471. prefetcht0 [63+r10]
  3472. vpslld xmm1,xmm9,30
  3473. vpxor xmm0,xmm0,xmm4
  3474. vpxor xmm4,xmm10,xmm9
  3475. prefetcht0 [63+r11]
  3476. vpxor xmm8,xmm8,xmm1
  3477. vpaddd xmm5,xmm5,xmm7
  3478. vpsrld xmm1,xmm9,13
  3479. vpslld xmm2,xmm9,19
  3480. vpaddd xmm5,xmm5,xmm0
  3481. vpand xmm3,xmm3,xmm4
  3482. vpxor xmm7,xmm8,xmm1
  3483. vpsrld xmm1,xmm9,22
  3484. vpxor xmm7,xmm7,xmm2
  3485. vpslld xmm2,xmm9,10
  3486. vpxor xmm8,xmm10,xmm3
  3487. vpaddd xmm12,xmm12,xmm5
  3488. vpxor xmm7,xmm7,xmm1
  3489. vpxor xmm7,xmm7,xmm2
  3490. vpaddd xmm8,xmm8,xmm5
  3491. vpaddd xmm8,xmm8,xmm7
  3492. add rbp,256
  3493. vmovdqu xmm5,XMMWORD PTR[((0-128))+rax]
  3494. mov ecx,3
  3495. jmp $L$oop_16_xx_avx
  3496. ALIGN 32
  3497. $L$oop_16_xx_avx::
  3498. vmovdqu xmm6,XMMWORD PTR[((16-128))+rax]
  3499. vpaddd xmm5,xmm5,XMMWORD PTR[((144-128))+rax]
  3500. vpsrld xmm7,xmm6,3
  3501. vpsrld xmm1,xmm6,7
  3502. vpslld xmm2,xmm6,25
  3503. vpxor xmm7,xmm7,xmm1
  3504. vpsrld xmm1,xmm6,18
  3505. vpxor xmm7,xmm7,xmm2
  3506. vpslld xmm2,xmm6,14
  3507. vmovdqu xmm0,XMMWORD PTR[((224-128))+rax]
  3508. vpsrld xmm3,xmm0,10
  3509. vpxor xmm7,xmm7,xmm1
  3510. vpsrld xmm1,xmm0,17
  3511. vpxor xmm7,xmm7,xmm2
  3512. vpslld xmm2,xmm0,15
  3513. vpaddd xmm5,xmm5,xmm7
  3514. vpxor xmm7,xmm3,xmm1
  3515. vpsrld xmm1,xmm0,19
  3516. vpxor xmm7,xmm7,xmm2
  3517. vpslld xmm2,xmm0,13
  3518. vpxor xmm7,xmm7,xmm1
  3519. vpxor xmm7,xmm7,xmm2
  3520. vpaddd xmm5,xmm5,xmm7
  3521. vpsrld xmm7,xmm12,6
  3522. vpslld xmm2,xmm12,26
  3523. vmovdqu XMMWORD PTR[(0-128)+rax],xmm5
  3524. vpaddd xmm5,xmm5,xmm15
  3525. vpsrld xmm1,xmm12,11
  3526. vpxor xmm7,xmm7,xmm2
  3527. vpslld xmm2,xmm12,21
  3528. vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp]
  3529. vpxor xmm7,xmm7,xmm1
  3530. vpsrld xmm1,xmm12,25
  3531. vpxor xmm7,xmm7,xmm2
  3532. vpslld xmm2,xmm12,7
  3533. vpandn xmm0,xmm12,xmm14
  3534. vpand xmm3,xmm12,xmm13
  3535. vpxor xmm7,xmm7,xmm1
  3536. vpsrld xmm15,xmm8,2
  3537. vpxor xmm7,xmm7,xmm2
  3538. vpslld xmm1,xmm8,30
  3539. vpxor xmm0,xmm0,xmm3
  3540. vpxor xmm3,xmm9,xmm8
  3541. vpxor xmm15,xmm15,xmm1
  3542. vpaddd xmm5,xmm5,xmm7
  3543. vpsrld xmm1,xmm8,13
  3544. vpslld xmm2,xmm8,19
  3545. vpaddd xmm5,xmm5,xmm0
  3546. vpand xmm4,xmm4,xmm3
  3547. vpxor xmm7,xmm15,xmm1
  3548. vpsrld xmm1,xmm8,22
  3549. vpxor xmm7,xmm7,xmm2
  3550. vpslld xmm2,xmm8,10
  3551. vpxor xmm15,xmm9,xmm4
  3552. vpaddd xmm11,xmm11,xmm5
  3553. vpxor xmm7,xmm7,xmm1
  3554. vpxor xmm7,xmm7,xmm2
  3555. vpaddd xmm15,xmm15,xmm5
  3556. vpaddd xmm15,xmm15,xmm7
  3557. vmovdqu xmm5,XMMWORD PTR[((32-128))+rax]
  3558. vpaddd xmm6,xmm6,XMMWORD PTR[((160-128))+rax]
  3559. vpsrld xmm7,xmm5,3
  3560. vpsrld xmm1,xmm5,7
  3561. vpslld xmm2,xmm5,25
  3562. vpxor xmm7,xmm7,xmm1
  3563. vpsrld xmm1,xmm5,18
  3564. vpxor xmm7,xmm7,xmm2
  3565. vpslld xmm2,xmm5,14
  3566. vmovdqu xmm0,XMMWORD PTR[((240-128))+rax]
  3567. vpsrld xmm4,xmm0,10
  3568. vpxor xmm7,xmm7,xmm1
  3569. vpsrld xmm1,xmm0,17
  3570. vpxor xmm7,xmm7,xmm2
  3571. vpslld xmm2,xmm0,15
  3572. vpaddd xmm6,xmm6,xmm7
  3573. vpxor xmm7,xmm4,xmm1
  3574. vpsrld xmm1,xmm0,19
  3575. vpxor xmm7,xmm7,xmm2
  3576. vpslld xmm2,xmm0,13
  3577. vpxor xmm7,xmm7,xmm1
  3578. vpxor xmm7,xmm7,xmm2
  3579. vpaddd xmm6,xmm6,xmm7
  3580. vpsrld xmm7,xmm11,6
  3581. vpslld xmm2,xmm11,26
  3582. vmovdqu XMMWORD PTR[(16-128)+rax],xmm6
  3583. vpaddd xmm6,xmm6,xmm14
  3584. vpsrld xmm1,xmm11,11
  3585. vpxor xmm7,xmm7,xmm2
  3586. vpslld xmm2,xmm11,21
  3587. vpaddd xmm6,xmm6,XMMWORD PTR[((-96))+rbp]
  3588. vpxor xmm7,xmm7,xmm1
  3589. vpsrld xmm1,xmm11,25
  3590. vpxor xmm7,xmm7,xmm2
  3591. vpslld xmm2,xmm11,7
  3592. vpandn xmm0,xmm11,xmm13
  3593. vpand xmm4,xmm11,xmm12
  3594. vpxor xmm7,xmm7,xmm1
  3595. vpsrld xmm14,xmm15,2
  3596. vpxor xmm7,xmm7,xmm2
  3597. vpslld xmm1,xmm15,30
  3598. vpxor xmm0,xmm0,xmm4
  3599. vpxor xmm4,xmm8,xmm15
  3600. vpxor xmm14,xmm14,xmm1
  3601. vpaddd xmm6,xmm6,xmm7
  3602. vpsrld xmm1,xmm15,13
  3603. vpslld xmm2,xmm15,19
  3604. vpaddd xmm6,xmm6,xmm0
  3605. vpand xmm3,xmm3,xmm4
  3606. vpxor xmm7,xmm14,xmm1
  3607. vpsrld xmm1,xmm15,22
  3608. vpxor xmm7,xmm7,xmm2
  3609. vpslld xmm2,xmm15,10
  3610. vpxor xmm14,xmm8,xmm3
  3611. vpaddd xmm10,xmm10,xmm6
  3612. vpxor xmm7,xmm7,xmm1
  3613. vpxor xmm7,xmm7,xmm2
  3614. vpaddd xmm14,xmm14,xmm6
  3615. vpaddd xmm14,xmm14,xmm7
  3616. vmovdqu xmm6,XMMWORD PTR[((48-128))+rax]
  3617. vpaddd xmm5,xmm5,XMMWORD PTR[((176-128))+rax]
  3618. vpsrld xmm7,xmm6,3
  3619. vpsrld xmm1,xmm6,7
  3620. vpslld xmm2,xmm6,25
  3621. vpxor xmm7,xmm7,xmm1
  3622. vpsrld xmm1,xmm6,18
  3623. vpxor xmm7,xmm7,xmm2
  3624. vpslld xmm2,xmm6,14
  3625. vmovdqu xmm0,XMMWORD PTR[((0-128))+rax]
  3626. vpsrld xmm3,xmm0,10
  3627. vpxor xmm7,xmm7,xmm1
  3628. vpsrld xmm1,xmm0,17
  3629. vpxor xmm7,xmm7,xmm2
  3630. vpslld xmm2,xmm0,15
  3631. vpaddd xmm5,xmm5,xmm7
  3632. vpxor xmm7,xmm3,xmm1
  3633. vpsrld xmm1,xmm0,19
  3634. vpxor xmm7,xmm7,xmm2
  3635. vpslld xmm2,xmm0,13
  3636. vpxor xmm7,xmm7,xmm1
  3637. vpxor xmm7,xmm7,xmm2
  3638. vpaddd xmm5,xmm5,xmm7
  3639. vpsrld xmm7,xmm10,6
  3640. vpslld xmm2,xmm10,26
  3641. vmovdqu XMMWORD PTR[(32-128)+rax],xmm5
  3642. vpaddd xmm5,xmm5,xmm13
  3643. vpsrld xmm1,xmm10,11
  3644. vpxor xmm7,xmm7,xmm2
  3645. vpslld xmm2,xmm10,21
  3646. vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp]
  3647. vpxor xmm7,xmm7,xmm1
  3648. vpsrld xmm1,xmm10,25
  3649. vpxor xmm7,xmm7,xmm2
  3650. vpslld xmm2,xmm10,7
  3651. vpandn xmm0,xmm10,xmm12
  3652. vpand xmm3,xmm10,xmm11
  3653. vpxor xmm7,xmm7,xmm1
  3654. vpsrld xmm13,xmm14,2
  3655. vpxor xmm7,xmm7,xmm2
  3656. vpslld xmm1,xmm14,30
  3657. vpxor xmm0,xmm0,xmm3
  3658. vpxor xmm3,xmm15,xmm14
  3659. vpxor xmm13,xmm13,xmm1
  3660. vpaddd xmm5,xmm5,xmm7
  3661. vpsrld xmm1,xmm14,13
  3662. vpslld xmm2,xmm14,19
  3663. vpaddd xmm5,xmm5,xmm0
  3664. vpand xmm4,xmm4,xmm3
  3665. vpxor xmm7,xmm13,xmm1
  3666. vpsrld xmm1,xmm14,22
  3667. vpxor xmm7,xmm7,xmm2
  3668. vpslld xmm2,xmm14,10
  3669. vpxor xmm13,xmm15,xmm4
  3670. vpaddd xmm9,xmm9,xmm5
  3671. vpxor xmm7,xmm7,xmm1
  3672. vpxor xmm7,xmm7,xmm2
  3673. vpaddd xmm13,xmm13,xmm5
  3674. vpaddd xmm13,xmm13,xmm7
  3675. vmovdqu xmm5,XMMWORD PTR[((64-128))+rax]
  3676. vpaddd xmm6,xmm6,XMMWORD PTR[((192-128))+rax]
  3677. vpsrld xmm7,xmm5,3
  3678. vpsrld xmm1,xmm5,7
  3679. vpslld xmm2,xmm5,25
  3680. vpxor xmm7,xmm7,xmm1
  3681. vpsrld xmm1,xmm5,18
  3682. vpxor xmm7,xmm7,xmm2
  3683. vpslld xmm2,xmm5,14
  3684. vmovdqu xmm0,XMMWORD PTR[((16-128))+rax]
  3685. vpsrld xmm4,xmm0,10
  3686. vpxor xmm7,xmm7,xmm1
  3687. vpsrld xmm1,xmm0,17
  3688. vpxor xmm7,xmm7,xmm2
  3689. vpslld xmm2,xmm0,15
  3690. vpaddd xmm6,xmm6,xmm7
  3691. vpxor xmm7,xmm4,xmm1
  3692. vpsrld xmm1,xmm0,19
  3693. vpxor xmm7,xmm7,xmm2
  3694. vpslld xmm2,xmm0,13
  3695. vpxor xmm7,xmm7,xmm1
  3696. vpxor xmm7,xmm7,xmm2
  3697. vpaddd xmm6,xmm6,xmm7
  3698. vpsrld xmm7,xmm9,6
  3699. vpslld xmm2,xmm9,26
  3700. vmovdqu XMMWORD PTR[(48-128)+rax],xmm6
  3701. vpaddd xmm6,xmm6,xmm12
  3702. vpsrld xmm1,xmm9,11
  3703. vpxor xmm7,xmm7,xmm2
  3704. vpslld xmm2,xmm9,21
  3705. vpaddd xmm6,xmm6,XMMWORD PTR[((-32))+rbp]
  3706. vpxor xmm7,xmm7,xmm1
  3707. vpsrld xmm1,xmm9,25
  3708. vpxor xmm7,xmm7,xmm2
  3709. vpslld xmm2,xmm9,7
  3710. vpandn xmm0,xmm9,xmm11
  3711. vpand xmm4,xmm9,xmm10
  3712. vpxor xmm7,xmm7,xmm1
  3713. vpsrld xmm12,xmm13,2
  3714. vpxor xmm7,xmm7,xmm2
  3715. vpslld xmm1,xmm13,30
  3716. vpxor xmm0,xmm0,xmm4
  3717. vpxor xmm4,xmm14,xmm13
  3718. vpxor xmm12,xmm12,xmm1
  3719. vpaddd xmm6,xmm6,xmm7
  3720. vpsrld xmm1,xmm13,13
  3721. vpslld xmm2,xmm13,19
  3722. vpaddd xmm6,xmm6,xmm0
  3723. vpand xmm3,xmm3,xmm4
  3724. vpxor xmm7,xmm12,xmm1
  3725. vpsrld xmm1,xmm13,22
  3726. vpxor xmm7,xmm7,xmm2
  3727. vpslld xmm2,xmm13,10
  3728. vpxor xmm12,xmm14,xmm3
  3729. vpaddd xmm8,xmm8,xmm6
  3730. vpxor xmm7,xmm7,xmm1
  3731. vpxor xmm7,xmm7,xmm2
  3732. vpaddd xmm12,xmm12,xmm6
  3733. vpaddd xmm12,xmm12,xmm7
  3734. vmovdqu xmm6,XMMWORD PTR[((80-128))+rax]
  3735. vpaddd xmm5,xmm5,XMMWORD PTR[((208-128))+rax]
  3736. vpsrld xmm7,xmm6,3
  3737. vpsrld xmm1,xmm6,7
  3738. vpslld xmm2,xmm6,25
  3739. vpxor xmm7,xmm7,xmm1
  3740. vpsrld xmm1,xmm6,18
  3741. vpxor xmm7,xmm7,xmm2
  3742. vpslld xmm2,xmm6,14
  3743. vmovdqu xmm0,XMMWORD PTR[((32-128))+rax]
  3744. vpsrld xmm3,xmm0,10
  3745. vpxor xmm7,xmm7,xmm1
  3746. vpsrld xmm1,xmm0,17
  3747. vpxor xmm7,xmm7,xmm2
  3748. vpslld xmm2,xmm0,15
  3749. vpaddd xmm5,xmm5,xmm7
  3750. vpxor xmm7,xmm3,xmm1
  3751. vpsrld xmm1,xmm0,19
  3752. vpxor xmm7,xmm7,xmm2
  3753. vpslld xmm2,xmm0,13
  3754. vpxor xmm7,xmm7,xmm1
  3755. vpxor xmm7,xmm7,xmm2
  3756. vpaddd xmm5,xmm5,xmm7
  3757. vpsrld xmm7,xmm8,6
  3758. vpslld xmm2,xmm8,26
  3759. vmovdqu XMMWORD PTR[(64-128)+rax],xmm5
  3760. vpaddd xmm5,xmm5,xmm11
  3761. vpsrld xmm1,xmm8,11
  3762. vpxor xmm7,xmm7,xmm2
  3763. vpslld xmm2,xmm8,21
  3764. vpaddd xmm5,xmm5,XMMWORD PTR[rbp]
  3765. vpxor xmm7,xmm7,xmm1
  3766. vpsrld xmm1,xmm8,25
  3767. vpxor xmm7,xmm7,xmm2
  3768. vpslld xmm2,xmm8,7
  3769. vpandn xmm0,xmm8,xmm10
  3770. vpand xmm3,xmm8,xmm9
  3771. vpxor xmm7,xmm7,xmm1
  3772. vpsrld xmm11,xmm12,2
  3773. vpxor xmm7,xmm7,xmm2
  3774. vpslld xmm1,xmm12,30
  3775. vpxor xmm0,xmm0,xmm3
  3776. vpxor xmm3,xmm13,xmm12
  3777. vpxor xmm11,xmm11,xmm1
  3778. vpaddd xmm5,xmm5,xmm7
  3779. vpsrld xmm1,xmm12,13
  3780. vpslld xmm2,xmm12,19
  3781. vpaddd xmm5,xmm5,xmm0
  3782. vpand xmm4,xmm4,xmm3
  3783. vpxor xmm7,xmm11,xmm1
  3784. vpsrld xmm1,xmm12,22
  3785. vpxor xmm7,xmm7,xmm2
  3786. vpslld xmm2,xmm12,10
  3787. vpxor xmm11,xmm13,xmm4
  3788. vpaddd xmm15,xmm15,xmm5
  3789. vpxor xmm7,xmm7,xmm1
  3790. vpxor xmm7,xmm7,xmm2
  3791. vpaddd xmm11,xmm11,xmm5
  3792. vpaddd xmm11,xmm11,xmm7
  3793. vmovdqu xmm5,XMMWORD PTR[((96-128))+rax]
  3794. vpaddd xmm6,xmm6,XMMWORD PTR[((224-128))+rax]
  3795. vpsrld xmm7,xmm5,3
  3796. vpsrld xmm1,xmm5,7
  3797. vpslld xmm2,xmm5,25
  3798. vpxor xmm7,xmm7,xmm1
  3799. vpsrld xmm1,xmm5,18
  3800. vpxor xmm7,xmm7,xmm2
  3801. vpslld xmm2,xmm5,14
  3802. vmovdqu xmm0,XMMWORD PTR[((48-128))+rax]
  3803. vpsrld xmm4,xmm0,10
  3804. vpxor xmm7,xmm7,xmm1
  3805. vpsrld xmm1,xmm0,17
  3806. vpxor xmm7,xmm7,xmm2
  3807. vpslld xmm2,xmm0,15
  3808. vpaddd xmm6,xmm6,xmm7
  3809. vpxor xmm7,xmm4,xmm1
  3810. vpsrld xmm1,xmm0,19
  3811. vpxor xmm7,xmm7,xmm2
  3812. vpslld xmm2,xmm0,13
  3813. vpxor xmm7,xmm7,xmm1
  3814. vpxor xmm7,xmm7,xmm2
  3815. vpaddd xmm6,xmm6,xmm7
  3816. vpsrld xmm7,xmm15,6
  3817. vpslld xmm2,xmm15,26
  3818. vmovdqu XMMWORD PTR[(80-128)+rax],xmm6
  3819. vpaddd xmm6,xmm6,xmm10
  3820. vpsrld xmm1,xmm15,11
  3821. vpxor xmm7,xmm7,xmm2
  3822. vpslld xmm2,xmm15,21
  3823. vpaddd xmm6,xmm6,XMMWORD PTR[32+rbp]
  3824. vpxor xmm7,xmm7,xmm1
  3825. vpsrld xmm1,xmm15,25
  3826. vpxor xmm7,xmm7,xmm2
  3827. vpslld xmm2,xmm15,7
  3828. vpandn xmm0,xmm15,xmm9
  3829. vpand xmm4,xmm15,xmm8
  3830. vpxor xmm7,xmm7,xmm1
  3831. vpsrld xmm10,xmm11,2
  3832. vpxor xmm7,xmm7,xmm2
  3833. vpslld xmm1,xmm11,30
  3834. vpxor xmm0,xmm0,xmm4
  3835. vpxor xmm4,xmm12,xmm11
  3836. vpxor xmm10,xmm10,xmm1
  3837. vpaddd xmm6,xmm6,xmm7
  3838. vpsrld xmm1,xmm11,13
  3839. vpslld xmm2,xmm11,19
  3840. vpaddd xmm6,xmm6,xmm0
  3841. vpand xmm3,xmm3,xmm4
  3842. vpxor xmm7,xmm10,xmm1
  3843. vpsrld xmm1,xmm11,22
  3844. vpxor xmm7,xmm7,xmm2
  3845. vpslld xmm2,xmm11,10
  3846. vpxor xmm10,xmm12,xmm3
  3847. vpaddd xmm14,xmm14,xmm6
  3848. vpxor xmm7,xmm7,xmm1
  3849. vpxor xmm7,xmm7,xmm2
  3850. vpaddd xmm10,xmm10,xmm6
  3851. vpaddd xmm10,xmm10,xmm7
  3852. vmovdqu xmm6,XMMWORD PTR[((112-128))+rax]
  3853. vpaddd xmm5,xmm5,XMMWORD PTR[((240-128))+rax]
  3854. vpsrld xmm7,xmm6,3
  3855. vpsrld xmm1,xmm6,7
  3856. vpslld xmm2,xmm6,25
  3857. vpxor xmm7,xmm7,xmm1
  3858. vpsrld xmm1,xmm6,18
  3859. vpxor xmm7,xmm7,xmm2
  3860. vpslld xmm2,xmm6,14
  3861. vmovdqu xmm0,XMMWORD PTR[((64-128))+rax]
  3862. vpsrld xmm3,xmm0,10
  3863. vpxor xmm7,xmm7,xmm1
  3864. vpsrld xmm1,xmm0,17
  3865. vpxor xmm7,xmm7,xmm2
  3866. vpslld xmm2,xmm0,15
  3867. vpaddd xmm5,xmm5,xmm7
  3868. vpxor xmm7,xmm3,xmm1
  3869. vpsrld xmm1,xmm0,19
  3870. vpxor xmm7,xmm7,xmm2
  3871. vpslld xmm2,xmm0,13
  3872. vpxor xmm7,xmm7,xmm1
  3873. vpxor xmm7,xmm7,xmm2
  3874. vpaddd xmm5,xmm5,xmm7
  3875. vpsrld xmm7,xmm14,6
  3876. vpslld xmm2,xmm14,26
  3877. vmovdqu XMMWORD PTR[(96-128)+rax],xmm5
  3878. vpaddd xmm5,xmm5,xmm9
  3879. vpsrld xmm1,xmm14,11
  3880. vpxor xmm7,xmm7,xmm2
  3881. vpslld xmm2,xmm14,21
  3882. vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp]
  3883. vpxor xmm7,xmm7,xmm1
  3884. vpsrld xmm1,xmm14,25
  3885. vpxor xmm7,xmm7,xmm2
  3886. vpslld xmm2,xmm14,7
  3887. vpandn xmm0,xmm14,xmm8
  3888. vpand xmm3,xmm14,xmm15
  3889. vpxor xmm7,xmm7,xmm1
  3890. vpsrld xmm9,xmm10,2
  3891. vpxor xmm7,xmm7,xmm2
  3892. vpslld xmm1,xmm10,30
  3893. vpxor xmm0,xmm0,xmm3
  3894. vpxor xmm3,xmm11,xmm10
  3895. vpxor xmm9,xmm9,xmm1
  3896. vpaddd xmm5,xmm5,xmm7
  3897. vpsrld xmm1,xmm10,13
  3898. vpslld xmm2,xmm10,19
  3899. vpaddd xmm5,xmm5,xmm0
  3900. vpand xmm4,xmm4,xmm3
  3901. vpxor xmm7,xmm9,xmm1
  3902. vpsrld xmm1,xmm10,22
  3903. vpxor xmm7,xmm7,xmm2
  3904. vpslld xmm2,xmm10,10
  3905. vpxor xmm9,xmm11,xmm4
  3906. vpaddd xmm13,xmm13,xmm5
  3907. vpxor xmm7,xmm7,xmm1
  3908. vpxor xmm7,xmm7,xmm2
  3909. vpaddd xmm9,xmm9,xmm5
  3910. vpaddd xmm9,xmm9,xmm7
  3911. vmovdqu xmm5,XMMWORD PTR[((128-128))+rax]
  3912. vpaddd xmm6,xmm6,XMMWORD PTR[((0-128))+rax]
  3913. vpsrld xmm7,xmm5,3
  3914. vpsrld xmm1,xmm5,7
  3915. vpslld xmm2,xmm5,25
  3916. vpxor xmm7,xmm7,xmm1
  3917. vpsrld xmm1,xmm5,18
  3918. vpxor xmm7,xmm7,xmm2
  3919. vpslld xmm2,xmm5,14
  3920. vmovdqu xmm0,XMMWORD PTR[((80-128))+rax]
  3921. vpsrld xmm4,xmm0,10
  3922. vpxor xmm7,xmm7,xmm1
  3923. vpsrld xmm1,xmm0,17
  3924. vpxor xmm7,xmm7,xmm2
  3925. vpslld xmm2,xmm0,15
  3926. vpaddd xmm6,xmm6,xmm7
  3927. vpxor xmm7,xmm4,xmm1
  3928. vpsrld xmm1,xmm0,19
  3929. vpxor xmm7,xmm7,xmm2
  3930. vpslld xmm2,xmm0,13
  3931. vpxor xmm7,xmm7,xmm1
  3932. vpxor xmm7,xmm7,xmm2
  3933. vpaddd xmm6,xmm6,xmm7
  3934. vpsrld xmm7,xmm13,6
  3935. vpslld xmm2,xmm13,26
  3936. vmovdqu XMMWORD PTR[(112-128)+rax],xmm6
  3937. vpaddd xmm6,xmm6,xmm8
  3938. vpsrld xmm1,xmm13,11
  3939. vpxor xmm7,xmm7,xmm2
  3940. vpslld xmm2,xmm13,21
  3941. vpaddd xmm6,xmm6,XMMWORD PTR[96+rbp]
  3942. vpxor xmm7,xmm7,xmm1
  3943. vpsrld xmm1,xmm13,25
  3944. vpxor xmm7,xmm7,xmm2
  3945. vpslld xmm2,xmm13,7
  3946. vpandn xmm0,xmm13,xmm15
  3947. vpand xmm4,xmm13,xmm14
  3948. vpxor xmm7,xmm7,xmm1
  3949. vpsrld xmm8,xmm9,2
  3950. vpxor xmm7,xmm7,xmm2
  3951. vpslld xmm1,xmm9,30
  3952. vpxor xmm0,xmm0,xmm4
  3953. vpxor xmm4,xmm10,xmm9
  3954. vpxor xmm8,xmm8,xmm1
  3955. vpaddd xmm6,xmm6,xmm7
  3956. vpsrld xmm1,xmm9,13
  3957. vpslld xmm2,xmm9,19
  3958. vpaddd xmm6,xmm6,xmm0
  3959. vpand xmm3,xmm3,xmm4
  3960. vpxor xmm7,xmm8,xmm1
  3961. vpsrld xmm1,xmm9,22
  3962. vpxor xmm7,xmm7,xmm2
  3963. vpslld xmm2,xmm9,10
  3964. vpxor xmm8,xmm10,xmm3
  3965. vpaddd xmm12,xmm12,xmm6
  3966. vpxor xmm7,xmm7,xmm1
  3967. vpxor xmm7,xmm7,xmm2
  3968. vpaddd xmm8,xmm8,xmm6
  3969. vpaddd xmm8,xmm8,xmm7
  3970. add rbp,256
  3971. vmovdqu xmm6,XMMWORD PTR[((144-128))+rax]
  3972. vpaddd xmm5,xmm5,XMMWORD PTR[((16-128))+rax]
  3973. vpsrld xmm7,xmm6,3
  3974. vpsrld xmm1,xmm6,7
  3975. vpslld xmm2,xmm6,25
  3976. vpxor xmm7,xmm7,xmm1
  3977. vpsrld xmm1,xmm6,18
  3978. vpxor xmm7,xmm7,xmm2
  3979. vpslld xmm2,xmm6,14
  3980. vmovdqu xmm0,XMMWORD PTR[((96-128))+rax]
  3981. vpsrld xmm3,xmm0,10
  3982. vpxor xmm7,xmm7,xmm1
  3983. vpsrld xmm1,xmm0,17
  3984. vpxor xmm7,xmm7,xmm2
  3985. vpslld xmm2,xmm0,15
  3986. vpaddd xmm5,xmm5,xmm7
  3987. vpxor xmm7,xmm3,xmm1
  3988. vpsrld xmm1,xmm0,19
  3989. vpxor xmm7,xmm7,xmm2
  3990. vpslld xmm2,xmm0,13
  3991. vpxor xmm7,xmm7,xmm1
  3992. vpxor xmm7,xmm7,xmm2
  3993. vpaddd xmm5,xmm5,xmm7
  3994. vpsrld xmm7,xmm12,6
  3995. vpslld xmm2,xmm12,26
  3996. vmovdqu XMMWORD PTR[(128-128)+rax],xmm5
  3997. vpaddd xmm5,xmm5,xmm15
  3998. vpsrld xmm1,xmm12,11
  3999. vpxor xmm7,xmm7,xmm2
  4000. vpslld xmm2,xmm12,21
  4001. vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp]
  4002. vpxor xmm7,xmm7,xmm1
  4003. vpsrld xmm1,xmm12,25
  4004. vpxor xmm7,xmm7,xmm2
  4005. vpslld xmm2,xmm12,7
  4006. vpandn xmm0,xmm12,xmm14
  4007. vpand xmm3,xmm12,xmm13
  4008. vpxor xmm7,xmm7,xmm1
  4009. vpsrld xmm15,xmm8,2
  4010. vpxor xmm7,xmm7,xmm2
  4011. vpslld xmm1,xmm8,30
  4012. vpxor xmm0,xmm0,xmm3
  4013. vpxor xmm3,xmm9,xmm8
  4014. vpxor xmm15,xmm15,xmm1
  4015. vpaddd xmm5,xmm5,xmm7
  4016. vpsrld xmm1,xmm8,13
  4017. vpslld xmm2,xmm8,19
  4018. vpaddd xmm5,xmm5,xmm0
  4019. vpand xmm4,xmm4,xmm3
  4020. vpxor xmm7,xmm15,xmm1
  4021. vpsrld xmm1,xmm8,22
  4022. vpxor xmm7,xmm7,xmm2
  4023. vpslld xmm2,xmm8,10
  4024. vpxor xmm15,xmm9,xmm4
  4025. vpaddd xmm11,xmm11,xmm5
  4026. vpxor xmm7,xmm7,xmm1
  4027. vpxor xmm7,xmm7,xmm2
  4028. vpaddd xmm15,xmm15,xmm5
  4029. vpaddd xmm15,xmm15,xmm7
  4030. vmovdqu xmm5,XMMWORD PTR[((160-128))+rax]
  4031. vpaddd xmm6,xmm6,XMMWORD PTR[((32-128))+rax]
  4032. vpsrld xmm7,xmm5,3
  4033. vpsrld xmm1,xmm5,7
  4034. vpslld xmm2,xmm5,25
  4035. vpxor xmm7,xmm7,xmm1
  4036. vpsrld xmm1,xmm5,18
  4037. vpxor xmm7,xmm7,xmm2
  4038. vpslld xmm2,xmm5,14
  4039. vmovdqu xmm0,XMMWORD PTR[((112-128))+rax]
  4040. vpsrld xmm4,xmm0,10
  4041. vpxor xmm7,xmm7,xmm1
  4042. vpsrld xmm1,xmm0,17
  4043. vpxor xmm7,xmm7,xmm2
  4044. vpslld xmm2,xmm0,15
  4045. vpaddd xmm6,xmm6,xmm7
  4046. vpxor xmm7,xmm4,xmm1
  4047. vpsrld xmm1,xmm0,19
  4048. vpxor xmm7,xmm7,xmm2
  4049. vpslld xmm2,xmm0,13
  4050. vpxor xmm7,xmm7,xmm1
  4051. vpxor xmm7,xmm7,xmm2
  4052. vpaddd xmm6,xmm6,xmm7
  4053. vpsrld xmm7,xmm11,6
  4054. vpslld xmm2,xmm11,26
  4055. vmovdqu XMMWORD PTR[(144-128)+rax],xmm6
  4056. vpaddd xmm6,xmm6,xmm14
  4057. vpsrld xmm1,xmm11,11
  4058. vpxor xmm7,xmm7,xmm2
  4059. vpslld xmm2,xmm11,21
  4060. vpaddd xmm6,xmm6,XMMWORD PTR[((-96))+rbp]
  4061. vpxor xmm7,xmm7,xmm1
  4062. vpsrld xmm1,xmm11,25
  4063. vpxor xmm7,xmm7,xmm2
  4064. vpslld xmm2,xmm11,7
  4065. vpandn xmm0,xmm11,xmm13
  4066. vpand xmm4,xmm11,xmm12
  4067. vpxor xmm7,xmm7,xmm1
  4068. vpsrld xmm14,xmm15,2
  4069. vpxor xmm7,xmm7,xmm2
  4070. vpslld xmm1,xmm15,30
  4071. vpxor xmm0,xmm0,xmm4
  4072. vpxor xmm4,xmm8,xmm15
  4073. vpxor xmm14,xmm14,xmm1
  4074. vpaddd xmm6,xmm6,xmm7
  4075. vpsrld xmm1,xmm15,13
  4076. vpslld xmm2,xmm15,19
  4077. vpaddd xmm6,xmm6,xmm0
  4078. vpand xmm3,xmm3,xmm4
  4079. vpxor xmm7,xmm14,xmm1
  4080. vpsrld xmm1,xmm15,22
  4081. vpxor xmm7,xmm7,xmm2
  4082. vpslld xmm2,xmm15,10
  4083. vpxor xmm14,xmm8,xmm3
  4084. vpaddd xmm10,xmm10,xmm6
  4085. vpxor xmm7,xmm7,xmm1
  4086. vpxor xmm7,xmm7,xmm2
  4087. vpaddd xmm14,xmm14,xmm6
  4088. vpaddd xmm14,xmm14,xmm7
  4089. vmovdqu xmm6,XMMWORD PTR[((176-128))+rax]
  4090. vpaddd xmm5,xmm5,XMMWORD PTR[((48-128))+rax]
  4091. vpsrld xmm7,xmm6,3
  4092. vpsrld xmm1,xmm6,7
  4093. vpslld xmm2,xmm6,25
  4094. vpxor xmm7,xmm7,xmm1
  4095. vpsrld xmm1,xmm6,18
  4096. vpxor xmm7,xmm7,xmm2
  4097. vpslld xmm2,xmm6,14
  4098. vmovdqu xmm0,XMMWORD PTR[((128-128))+rax]
  4099. vpsrld xmm3,xmm0,10
  4100. vpxor xmm7,xmm7,xmm1
  4101. vpsrld xmm1,xmm0,17
  4102. vpxor xmm7,xmm7,xmm2
  4103. vpslld xmm2,xmm0,15
  4104. vpaddd xmm5,xmm5,xmm7
  4105. vpxor xmm7,xmm3,xmm1
  4106. vpsrld xmm1,xmm0,19
  4107. vpxor xmm7,xmm7,xmm2
  4108. vpslld xmm2,xmm0,13
  4109. vpxor xmm7,xmm7,xmm1
  4110. vpxor xmm7,xmm7,xmm2
  4111. vpaddd xmm5,xmm5,xmm7
  4112. vpsrld xmm7,xmm10,6
  4113. vpslld xmm2,xmm10,26
  4114. vmovdqu XMMWORD PTR[(160-128)+rax],xmm5
  4115. vpaddd xmm5,xmm5,xmm13
  4116. vpsrld xmm1,xmm10,11
  4117. vpxor xmm7,xmm7,xmm2
  4118. vpslld xmm2,xmm10,21
  4119. vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp]
  4120. vpxor xmm7,xmm7,xmm1
  4121. vpsrld xmm1,xmm10,25
  4122. vpxor xmm7,xmm7,xmm2
  4123. vpslld xmm2,xmm10,7
  4124. vpandn xmm0,xmm10,xmm12
  4125. vpand xmm3,xmm10,xmm11
  4126. vpxor xmm7,xmm7,xmm1
  4127. vpsrld xmm13,xmm14,2
  4128. vpxor xmm7,xmm7,xmm2
  4129. vpslld xmm1,xmm14,30
  4130. vpxor xmm0,xmm0,xmm3
  4131. vpxor xmm3,xmm15,xmm14
  4132. vpxor xmm13,xmm13,xmm1
  4133. vpaddd xmm5,xmm5,xmm7
  4134. vpsrld xmm1,xmm14,13
  4135. vpslld xmm2,xmm14,19
  4136. vpaddd xmm5,xmm5,xmm0
  4137. vpand xmm4,xmm4,xmm3
  4138. vpxor xmm7,xmm13,xmm1
  4139. vpsrld xmm1,xmm14,22
  4140. vpxor xmm7,xmm7,xmm2
  4141. vpslld xmm2,xmm14,10
  4142. vpxor xmm13,xmm15,xmm4
  4143. vpaddd xmm9,xmm9,xmm5
  4144. vpxor xmm7,xmm7,xmm1
  4145. vpxor xmm7,xmm7,xmm2
  4146. vpaddd xmm13,xmm13,xmm5
  4147. vpaddd xmm13,xmm13,xmm7
  4148. vmovdqu xmm5,XMMWORD PTR[((192-128))+rax]
  4149. vpaddd xmm6,xmm6,XMMWORD PTR[((64-128))+rax]
  4150. vpsrld xmm7,xmm5,3
  4151. vpsrld xmm1,xmm5,7
  4152. vpslld xmm2,xmm5,25
  4153. vpxor xmm7,xmm7,xmm1
  4154. vpsrld xmm1,xmm5,18
  4155. vpxor xmm7,xmm7,xmm2
  4156. vpslld xmm2,xmm5,14
  4157. vmovdqu xmm0,XMMWORD PTR[((144-128))+rax]
  4158. vpsrld xmm4,xmm0,10
  4159. vpxor xmm7,xmm7,xmm1
  4160. vpsrld xmm1,xmm0,17
  4161. vpxor xmm7,xmm7,xmm2
  4162. vpslld xmm2,xmm0,15
  4163. vpaddd xmm6,xmm6,xmm7
  4164. vpxor xmm7,xmm4,xmm1
  4165. vpsrld xmm1,xmm0,19
  4166. vpxor xmm7,xmm7,xmm2
  4167. vpslld xmm2,xmm0,13
  4168. vpxor xmm7,xmm7,xmm1
  4169. vpxor xmm7,xmm7,xmm2
  4170. vpaddd xmm6,xmm6,xmm7
  4171. vpsrld xmm7,xmm9,6
  4172. vpslld xmm2,xmm9,26
  4173. vmovdqu XMMWORD PTR[(176-128)+rax],xmm6
  4174. vpaddd xmm6,xmm6,xmm12
  4175. vpsrld xmm1,xmm9,11
  4176. vpxor xmm7,xmm7,xmm2
  4177. vpslld xmm2,xmm9,21
  4178. vpaddd xmm6,xmm6,XMMWORD PTR[((-32))+rbp]
  4179. vpxor xmm7,xmm7,xmm1
  4180. vpsrld xmm1,xmm9,25
  4181. vpxor xmm7,xmm7,xmm2
  4182. vpslld xmm2,xmm9,7
  4183. vpandn xmm0,xmm9,xmm11
  4184. vpand xmm4,xmm9,xmm10
  4185. vpxor xmm7,xmm7,xmm1
  4186. vpsrld xmm12,xmm13,2
  4187. vpxor xmm7,xmm7,xmm2
  4188. vpslld xmm1,xmm13,30
  4189. vpxor xmm0,xmm0,xmm4
  4190. vpxor xmm4,xmm14,xmm13
  4191. vpxor xmm12,xmm12,xmm1
  4192. vpaddd xmm6,xmm6,xmm7
  4193. vpsrld xmm1,xmm13,13
  4194. vpslld xmm2,xmm13,19
  4195. vpaddd xmm6,xmm6,xmm0
  4196. vpand xmm3,xmm3,xmm4
  4197. vpxor xmm7,xmm12,xmm1
  4198. vpsrld xmm1,xmm13,22
  4199. vpxor xmm7,xmm7,xmm2
  4200. vpslld xmm2,xmm13,10
  4201. vpxor xmm12,xmm14,xmm3
  4202. vpaddd xmm8,xmm8,xmm6
  4203. vpxor xmm7,xmm7,xmm1
  4204. vpxor xmm7,xmm7,xmm2
  4205. vpaddd xmm12,xmm12,xmm6
  4206. vpaddd xmm12,xmm12,xmm7
  4207. vmovdqu xmm6,XMMWORD PTR[((208-128))+rax]
  4208. vpaddd xmm5,xmm5,XMMWORD PTR[((80-128))+rax]
  4209. vpsrld xmm7,xmm6,3
  4210. vpsrld xmm1,xmm6,7
  4211. vpslld xmm2,xmm6,25
  4212. vpxor xmm7,xmm7,xmm1
  4213. vpsrld xmm1,xmm6,18
  4214. vpxor xmm7,xmm7,xmm2
  4215. vpslld xmm2,xmm6,14
  4216. vmovdqu xmm0,XMMWORD PTR[((160-128))+rax]
  4217. vpsrld xmm3,xmm0,10
  4218. vpxor xmm7,xmm7,xmm1
  4219. vpsrld xmm1,xmm0,17
  4220. vpxor xmm7,xmm7,xmm2
  4221. vpslld xmm2,xmm0,15
  4222. vpaddd xmm5,xmm5,xmm7
  4223. vpxor xmm7,xmm3,xmm1
  4224. vpsrld xmm1,xmm0,19
  4225. vpxor xmm7,xmm7,xmm2
  4226. vpslld xmm2,xmm0,13
  4227. vpxor xmm7,xmm7,xmm1
  4228. vpxor xmm7,xmm7,xmm2
  4229. vpaddd xmm5,xmm5,xmm7
  4230. vpsrld xmm7,xmm8,6
  4231. vpslld xmm2,xmm8,26
  4232. vmovdqu XMMWORD PTR[(192-128)+rax],xmm5
  4233. vpaddd xmm5,xmm5,xmm11
  4234. vpsrld xmm1,xmm8,11
  4235. vpxor xmm7,xmm7,xmm2
  4236. vpslld xmm2,xmm8,21
  4237. vpaddd xmm5,xmm5,XMMWORD PTR[rbp]
  4238. vpxor xmm7,xmm7,xmm1
  4239. vpsrld xmm1,xmm8,25
  4240. vpxor xmm7,xmm7,xmm2
  4241. vpslld xmm2,xmm8,7
  4242. vpandn xmm0,xmm8,xmm10
  4243. vpand xmm3,xmm8,xmm9
  4244. vpxor xmm7,xmm7,xmm1
  4245. vpsrld xmm11,xmm12,2
  4246. vpxor xmm7,xmm7,xmm2
  4247. vpslld xmm1,xmm12,30
  4248. vpxor xmm0,xmm0,xmm3
  4249. vpxor xmm3,xmm13,xmm12
  4250. vpxor xmm11,xmm11,xmm1
  4251. vpaddd xmm5,xmm5,xmm7
  4252. vpsrld xmm1,xmm12,13
  4253. vpslld xmm2,xmm12,19
  4254. vpaddd xmm5,xmm5,xmm0
  4255. vpand xmm4,xmm4,xmm3
  4256. vpxor xmm7,xmm11,xmm1
  4257. vpsrld xmm1,xmm12,22
  4258. vpxor xmm7,xmm7,xmm2
  4259. vpslld xmm2,xmm12,10
  4260. vpxor xmm11,xmm13,xmm4
  4261. vpaddd xmm15,xmm15,xmm5
  4262. vpxor xmm7,xmm7,xmm1
  4263. vpxor xmm7,xmm7,xmm2
  4264. vpaddd xmm11,xmm11,xmm5
  4265. vpaddd xmm11,xmm11,xmm7
  4266. vmovdqu xmm5,XMMWORD PTR[((224-128))+rax]
  4267. vpaddd xmm6,xmm6,XMMWORD PTR[((96-128))+rax]
  4268. vpsrld xmm7,xmm5,3
  4269. vpsrld xmm1,xmm5,7
  4270. vpslld xmm2,xmm5,25
  4271. vpxor xmm7,xmm7,xmm1
  4272. vpsrld xmm1,xmm5,18
  4273. vpxor xmm7,xmm7,xmm2
  4274. vpslld xmm2,xmm5,14
  4275. vmovdqu xmm0,XMMWORD PTR[((176-128))+rax]
  4276. vpsrld xmm4,xmm0,10
  4277. vpxor xmm7,xmm7,xmm1
  4278. vpsrld xmm1,xmm0,17
  4279. vpxor xmm7,xmm7,xmm2
  4280. vpslld xmm2,xmm0,15
  4281. vpaddd xmm6,xmm6,xmm7
  4282. vpxor xmm7,xmm4,xmm1
  4283. vpsrld xmm1,xmm0,19
  4284. vpxor xmm7,xmm7,xmm2
  4285. vpslld xmm2,xmm0,13
  4286. vpxor xmm7,xmm7,xmm1
  4287. vpxor xmm7,xmm7,xmm2
  4288. vpaddd xmm6,xmm6,xmm7
  4289. vpsrld xmm7,xmm15,6
  4290. vpslld xmm2,xmm15,26
  4291. vmovdqu XMMWORD PTR[(208-128)+rax],xmm6
  4292. vpaddd xmm6,xmm6,xmm10
  4293. vpsrld xmm1,xmm15,11
  4294. vpxor xmm7,xmm7,xmm2
  4295. vpslld xmm2,xmm15,21
  4296. vpaddd xmm6,xmm6,XMMWORD PTR[32+rbp]
  4297. vpxor xmm7,xmm7,xmm1
  4298. vpsrld xmm1,xmm15,25
  4299. vpxor xmm7,xmm7,xmm2
  4300. vpslld xmm2,xmm15,7
  4301. vpandn xmm0,xmm15,xmm9
  4302. vpand xmm4,xmm15,xmm8
  4303. vpxor xmm7,xmm7,xmm1
  4304. vpsrld xmm10,xmm11,2
  4305. vpxor xmm7,xmm7,xmm2
  4306. vpslld xmm1,xmm11,30
  4307. vpxor xmm0,xmm0,xmm4
  4308. vpxor xmm4,xmm12,xmm11
  4309. vpxor xmm10,xmm10,xmm1
  4310. vpaddd xmm6,xmm6,xmm7
  4311. vpsrld xmm1,xmm11,13
  4312. vpslld xmm2,xmm11,19
  4313. vpaddd xmm6,xmm6,xmm0
  4314. vpand xmm3,xmm3,xmm4
  4315. vpxor xmm7,xmm10,xmm1
  4316. vpsrld xmm1,xmm11,22
  4317. vpxor xmm7,xmm7,xmm2
  4318. vpslld xmm2,xmm11,10
  4319. vpxor xmm10,xmm12,xmm3
  4320. vpaddd xmm14,xmm14,xmm6
  4321. vpxor xmm7,xmm7,xmm1
  4322. vpxor xmm7,xmm7,xmm2
  4323. vpaddd xmm10,xmm10,xmm6
  4324. vpaddd xmm10,xmm10,xmm7
  4325. vmovdqu xmm6,XMMWORD PTR[((240-128))+rax]
  4326. vpaddd xmm5,xmm5,XMMWORD PTR[((112-128))+rax]
  4327. vpsrld xmm7,xmm6,3
  4328. vpsrld xmm1,xmm6,7
  4329. vpslld xmm2,xmm6,25
  4330. vpxor xmm7,xmm7,xmm1
  4331. vpsrld xmm1,xmm6,18
  4332. vpxor xmm7,xmm7,xmm2
  4333. vpslld xmm2,xmm6,14
  4334. vmovdqu xmm0,XMMWORD PTR[((192-128))+rax]
  4335. vpsrld xmm3,xmm0,10
  4336. vpxor xmm7,xmm7,xmm1
  4337. vpsrld xmm1,xmm0,17
  4338. vpxor xmm7,xmm7,xmm2
  4339. vpslld xmm2,xmm0,15
  4340. vpaddd xmm5,xmm5,xmm7
  4341. vpxor xmm7,xmm3,xmm1
  4342. vpsrld xmm1,xmm0,19
  4343. vpxor xmm7,xmm7,xmm2
  4344. vpslld xmm2,xmm0,13
  4345. vpxor xmm7,xmm7,xmm1
  4346. vpxor xmm7,xmm7,xmm2
  4347. vpaddd xmm5,xmm5,xmm7
  4348. vpsrld xmm7,xmm14,6
  4349. vpslld xmm2,xmm14,26
  4350. vmovdqu XMMWORD PTR[(224-128)+rax],xmm5
  4351. vpaddd xmm5,xmm5,xmm9
  4352. vpsrld xmm1,xmm14,11
  4353. vpxor xmm7,xmm7,xmm2
  4354. vpslld xmm2,xmm14,21
  4355. vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp]
  4356. vpxor xmm7,xmm7,xmm1
  4357. vpsrld xmm1,xmm14,25
  4358. vpxor xmm7,xmm7,xmm2
  4359. vpslld xmm2,xmm14,7
  4360. vpandn xmm0,xmm14,xmm8
  4361. vpand xmm3,xmm14,xmm15
  4362. vpxor xmm7,xmm7,xmm1
  4363. vpsrld xmm9,xmm10,2
  4364. vpxor xmm7,xmm7,xmm2
  4365. vpslld xmm1,xmm10,30
  4366. vpxor xmm0,xmm0,xmm3
  4367. vpxor xmm3,xmm11,xmm10
  4368. vpxor xmm9,xmm9,xmm1
  4369. vpaddd xmm5,xmm5,xmm7
  4370. vpsrld xmm1,xmm10,13
  4371. vpslld xmm2,xmm10,19
  4372. vpaddd xmm5,xmm5,xmm0
  4373. vpand xmm4,xmm4,xmm3
  4374. vpxor xmm7,xmm9,xmm1
  4375. vpsrld xmm1,xmm10,22
  4376. vpxor xmm7,xmm7,xmm2
  4377. vpslld xmm2,xmm10,10
  4378. vpxor xmm9,xmm11,xmm4
  4379. vpaddd xmm13,xmm13,xmm5
  4380. vpxor xmm7,xmm7,xmm1
  4381. vpxor xmm7,xmm7,xmm2
  4382. vpaddd xmm9,xmm9,xmm5
  4383. vpaddd xmm9,xmm9,xmm7
  4384. vmovdqu xmm5,XMMWORD PTR[((0-128))+rax]
  4385. vpaddd xmm6,xmm6,XMMWORD PTR[((128-128))+rax]
  4386. vpsrld xmm7,xmm5,3
  4387. vpsrld xmm1,xmm5,7
  4388. vpslld xmm2,xmm5,25
  4389. vpxor xmm7,xmm7,xmm1
  4390. vpsrld xmm1,xmm5,18
  4391. vpxor xmm7,xmm7,xmm2
  4392. vpslld xmm2,xmm5,14
  4393. vmovdqu xmm0,XMMWORD PTR[((208-128))+rax]
  4394. vpsrld xmm4,xmm0,10
  4395. vpxor xmm7,xmm7,xmm1
  4396. vpsrld xmm1,xmm0,17
  4397. vpxor xmm7,xmm7,xmm2
  4398. vpslld xmm2,xmm0,15
  4399. vpaddd xmm6,xmm6,xmm7
  4400. vpxor xmm7,xmm4,xmm1
  4401. vpsrld xmm1,xmm0,19
  4402. vpxor xmm7,xmm7,xmm2
  4403. vpslld xmm2,xmm0,13
  4404. vpxor xmm7,xmm7,xmm1
  4405. vpxor xmm7,xmm7,xmm2
  4406. vpaddd xmm6,xmm6,xmm7
  4407. vpsrld xmm7,xmm13,6
  4408. vpslld xmm2,xmm13,26
  4409. vmovdqu XMMWORD PTR[(240-128)+rax],xmm6
  4410. vpaddd xmm6,xmm6,xmm8
  4411. vpsrld xmm1,xmm13,11
  4412. vpxor xmm7,xmm7,xmm2
  4413. vpslld xmm2,xmm13,21
  4414. vpaddd xmm6,xmm6,XMMWORD PTR[96+rbp]
  4415. vpxor xmm7,xmm7,xmm1
  4416. vpsrld xmm1,xmm13,25
  4417. vpxor xmm7,xmm7,xmm2
  4418. vpslld xmm2,xmm13,7
  4419. vpandn xmm0,xmm13,xmm15
  4420. vpand xmm4,xmm13,xmm14
  4421. vpxor xmm7,xmm7,xmm1
  4422. vpsrld xmm8,xmm9,2
  4423. vpxor xmm7,xmm7,xmm2
  4424. vpslld xmm1,xmm9,30
  4425. vpxor xmm0,xmm0,xmm4
  4426. vpxor xmm4,xmm10,xmm9
  4427. vpxor xmm8,xmm8,xmm1
  4428. vpaddd xmm6,xmm6,xmm7
  4429. vpsrld xmm1,xmm9,13
  4430. vpslld xmm2,xmm9,19
  4431. vpaddd xmm6,xmm6,xmm0
  4432. vpand xmm3,xmm3,xmm4
  4433. vpxor xmm7,xmm8,xmm1
  4434. vpsrld xmm1,xmm9,22
  4435. vpxor xmm7,xmm7,xmm2
  4436. vpslld xmm2,xmm9,10
  4437. vpxor xmm8,xmm10,xmm3
  4438. vpaddd xmm12,xmm12,xmm6
  4439. vpxor xmm7,xmm7,xmm1
  4440. vpxor xmm7,xmm7,xmm2
  4441. vpaddd xmm8,xmm8,xmm6
  4442. vpaddd xmm8,xmm8,xmm7
  4443. add rbp,256
  4444. dec ecx
  4445. jnz $L$oop_16_xx_avx
  4446. mov ecx,1
  4447. lea rbp,QWORD PTR[((K256+128))]
  4448. cmp ecx,DWORD PTR[rbx]
  4449. cmovge r8,rbp
  4450. cmp ecx,DWORD PTR[4+rbx]
  4451. cmovge r9,rbp
  4452. cmp ecx,DWORD PTR[8+rbx]
  4453. cmovge r10,rbp
  4454. cmp ecx,DWORD PTR[12+rbx]
  4455. cmovge r11,rbp
  4456. vmovdqa xmm7,XMMWORD PTR[rbx]
  4457. vpxor xmm0,xmm0,xmm0
  4458. vmovdqa xmm6,xmm7
  4459. vpcmpgtd xmm6,xmm6,xmm0
  4460. vpaddd xmm7,xmm7,xmm6
  4461. vmovdqu xmm0,XMMWORD PTR[((0-128))+rdi]
  4462. vpand xmm8,xmm8,xmm6
  4463. vmovdqu xmm1,XMMWORD PTR[((32-128))+rdi]
  4464. vpand xmm9,xmm9,xmm6
  4465. vmovdqu xmm2,XMMWORD PTR[((64-128))+rdi]
  4466. vpand xmm10,xmm10,xmm6
  4467. vmovdqu xmm5,XMMWORD PTR[((96-128))+rdi]
  4468. vpand xmm11,xmm11,xmm6
  4469. vpaddd xmm8,xmm8,xmm0
  4470. vmovdqu xmm0,XMMWORD PTR[((128-128))+rdi]
  4471. vpand xmm12,xmm12,xmm6
  4472. vpaddd xmm9,xmm9,xmm1
  4473. vmovdqu xmm1,XMMWORD PTR[((160-128))+rdi]
  4474. vpand xmm13,xmm13,xmm6
  4475. vpaddd xmm10,xmm10,xmm2
  4476. vmovdqu xmm2,XMMWORD PTR[((192-128))+rdi]
  4477. vpand xmm14,xmm14,xmm6
  4478. vpaddd xmm11,xmm11,xmm5
  4479. vmovdqu xmm5,XMMWORD PTR[((224-128))+rdi]
  4480. vpand xmm15,xmm15,xmm6
  4481. vpaddd xmm12,xmm12,xmm0
  4482. vpaddd xmm13,xmm13,xmm1
  4483. vmovdqu XMMWORD PTR[(0-128)+rdi],xmm8
  4484. vpaddd xmm14,xmm14,xmm2
  4485. vmovdqu XMMWORD PTR[(32-128)+rdi],xmm9
  4486. vpaddd xmm15,xmm15,xmm5
  4487. vmovdqu XMMWORD PTR[(64-128)+rdi],xmm10
  4488. vmovdqu XMMWORD PTR[(96-128)+rdi],xmm11
  4489. vmovdqu XMMWORD PTR[(128-128)+rdi],xmm12
  4490. vmovdqu XMMWORD PTR[(160-128)+rdi],xmm13
  4491. vmovdqu XMMWORD PTR[(192-128)+rdi],xmm14
  4492. vmovdqu XMMWORD PTR[(224-128)+rdi],xmm15
  4493. vmovdqu XMMWORD PTR[rbx],xmm7
  4494. vmovdqu xmm6,XMMWORD PTR[$L$pbswap]
  4495. dec edx
  4496. jnz $L$oop_avx
  4497. mov edx,DWORD PTR[280+rsp]
  4498. lea rdi,QWORD PTR[16+rdi]
  4499. lea rsi,QWORD PTR[64+rsi]
  4500. dec edx
  4501. jnz $L$oop_grande_avx
  4502. $L$done_avx::
  4503. mov rax,QWORD PTR[272+rsp]
  4504. vzeroupper
  4505. movaps xmm6,XMMWORD PTR[((-184))+rax]
  4506. movaps xmm7,XMMWORD PTR[((-168))+rax]
  4507. movaps xmm8,XMMWORD PTR[((-152))+rax]
  4508. movaps xmm9,XMMWORD PTR[((-136))+rax]
  4509. movaps xmm10,XMMWORD PTR[((-120))+rax]
  4510. movaps xmm11,XMMWORD PTR[((-104))+rax]
  4511. movaps xmm12,XMMWORD PTR[((-88))+rax]
  4512. movaps xmm13,XMMWORD PTR[((-72))+rax]
  4513. movaps xmm14,XMMWORD PTR[((-56))+rax]
  4514. movaps xmm15,XMMWORD PTR[((-40))+rax]
  4515. mov rbp,QWORD PTR[((-16))+rax]
  4516. mov rbx,QWORD PTR[((-8))+rax]
  4517. lea rsp,QWORD PTR[rax]
  4518. $L$epilogue_avx::
  4519. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  4520. mov rsi,QWORD PTR[16+rsp]
  4521. DB 0F3h,0C3h ;repret
  4522. $L$SEH_end_sha256_multi_block_avx::
  4523. sha256_multi_block_avx ENDP
  4524. ALIGN 32
  4525. sha256_multi_block_avx2 PROC PRIVATE
  4526. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  4527. mov QWORD PTR[16+rsp],rsi
  4528. mov rax,rsp
  4529. $L$SEH_begin_sha256_multi_block_avx2::
  4530. mov rdi,rcx
  4531. mov rsi,rdx
  4532. mov rdx,r8
  4533. _avx2_shortcut::
  4534. mov rax,rsp
  4535. push rbx
  4536. push rbp
  4537. push r12
  4538. push r13
  4539. push r14
  4540. push r15
  4541. lea rsp,QWORD PTR[((-168))+rsp]
  4542. movaps XMMWORD PTR[rsp],xmm6
  4543. movaps XMMWORD PTR[16+rsp],xmm7
  4544. movaps XMMWORD PTR[32+rsp],xmm8
  4545. movaps XMMWORD PTR[48+rsp],xmm9
  4546. movaps XMMWORD PTR[64+rsp],xmm10
  4547. movaps XMMWORD PTR[80+rsp],xmm11
  4548. movaps XMMWORD PTR[(-120)+rax],xmm12
  4549. movaps XMMWORD PTR[(-104)+rax],xmm13
  4550. movaps XMMWORD PTR[(-88)+rax],xmm14
  4551. movaps XMMWORD PTR[(-72)+rax],xmm15
  4552. sub rsp,576
  4553. and rsp,-256
  4554. mov QWORD PTR[544+rsp],rax
  4555. $L$body_avx2::
  4556. lea rbp,QWORD PTR[((K256+128))]
  4557. lea rdi,QWORD PTR[128+rdi]
  4558. $L$oop_grande_avx2::
  4559. mov DWORD PTR[552+rsp],edx
  4560. xor edx,edx
  4561. lea rbx,QWORD PTR[512+rsp]
  4562. mov r12,QWORD PTR[rsi]
  4563. mov ecx,DWORD PTR[8+rsi]
  4564. cmp ecx,edx
  4565. cmovg edx,ecx
  4566. test ecx,ecx
  4567. mov DWORD PTR[rbx],ecx
  4568. cmovle r12,rbp
  4569. mov r13,QWORD PTR[16+rsi]
  4570. mov ecx,DWORD PTR[24+rsi]
  4571. cmp ecx,edx
  4572. cmovg edx,ecx
  4573. test ecx,ecx
  4574. mov DWORD PTR[4+rbx],ecx
  4575. cmovle r13,rbp
  4576. mov r14,QWORD PTR[32+rsi]
  4577. mov ecx,DWORD PTR[40+rsi]
  4578. cmp ecx,edx
  4579. cmovg edx,ecx
  4580. test ecx,ecx
  4581. mov DWORD PTR[8+rbx],ecx
  4582. cmovle r14,rbp
  4583. mov r15,QWORD PTR[48+rsi]
  4584. mov ecx,DWORD PTR[56+rsi]
  4585. cmp ecx,edx
  4586. cmovg edx,ecx
  4587. test ecx,ecx
  4588. mov DWORD PTR[12+rbx],ecx
  4589. cmovle r15,rbp
  4590. mov r8,QWORD PTR[64+rsi]
  4591. mov ecx,DWORD PTR[72+rsi]
  4592. cmp ecx,edx
  4593. cmovg edx,ecx
  4594. test ecx,ecx
  4595. mov DWORD PTR[16+rbx],ecx
  4596. cmovle r8,rbp
  4597. mov r9,QWORD PTR[80+rsi]
  4598. mov ecx,DWORD PTR[88+rsi]
  4599. cmp ecx,edx
  4600. cmovg edx,ecx
  4601. test ecx,ecx
  4602. mov DWORD PTR[20+rbx],ecx
  4603. cmovle r9,rbp
  4604. mov r10,QWORD PTR[96+rsi]
  4605. mov ecx,DWORD PTR[104+rsi]
  4606. cmp ecx,edx
  4607. cmovg edx,ecx
  4608. test ecx,ecx
  4609. mov DWORD PTR[24+rbx],ecx
  4610. cmovle r10,rbp
  4611. mov r11,QWORD PTR[112+rsi]
  4612. mov ecx,DWORD PTR[120+rsi]
  4613. cmp ecx,edx
  4614. cmovg edx,ecx
  4615. test ecx,ecx
  4616. mov DWORD PTR[28+rbx],ecx
  4617. cmovle r11,rbp
  4618. vmovdqu ymm8,YMMWORD PTR[((0-128))+rdi]
  4619. lea rax,QWORD PTR[128+rsp]
  4620. vmovdqu ymm9,YMMWORD PTR[((32-128))+rdi]
  4621. lea rbx,QWORD PTR[((256+128))+rsp]
  4622. vmovdqu ymm10,YMMWORD PTR[((64-128))+rdi]
  4623. vmovdqu ymm11,YMMWORD PTR[((96-128))+rdi]
  4624. vmovdqu ymm12,YMMWORD PTR[((128-128))+rdi]
  4625. vmovdqu ymm13,YMMWORD PTR[((160-128))+rdi]
  4626. vmovdqu ymm14,YMMWORD PTR[((192-128))+rdi]
  4627. vmovdqu ymm15,YMMWORD PTR[((224-128))+rdi]
  4628. vmovdqu ymm6,YMMWORD PTR[$L$pbswap]
  4629. jmp $L$oop_avx2
  4630. ALIGN 32
  4631. $L$oop_avx2::
  4632. vpxor ymm4,ymm10,ymm9
  4633. vmovd xmm5,DWORD PTR[r12]
  4634. vmovd xmm0,DWORD PTR[r8]
  4635. vmovd xmm1,DWORD PTR[r13]
  4636. vmovd xmm2,DWORD PTR[r9]
  4637. vpinsrd xmm5,xmm5,DWORD PTR[r14],1
  4638. vpinsrd xmm0,xmm0,DWORD PTR[r10],1
  4639. vpinsrd xmm1,xmm1,DWORD PTR[r15],1
  4640. vpunpckldq ymm5,ymm5,ymm1
  4641. vpinsrd xmm2,xmm2,DWORD PTR[r11],1
  4642. vpunpckldq ymm0,ymm0,ymm2
  4643. vinserti128 ymm5,ymm5,xmm0,1
  4644. vpshufb ymm5,ymm5,ymm6
  4645. vpsrld ymm7,ymm12,6
  4646. vpslld ymm2,ymm12,26
  4647. vmovdqu YMMWORD PTR[(0-128)+rax],ymm5
  4648. vpaddd ymm5,ymm5,ymm15
  4649. vpsrld ymm1,ymm12,11
  4650. vpxor ymm7,ymm7,ymm2
  4651. vpslld ymm2,ymm12,21
  4652. vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp]
  4653. vpxor ymm7,ymm7,ymm1
  4654. vpsrld ymm1,ymm12,25
  4655. vpxor ymm7,ymm7,ymm2
  4656. vpslld ymm2,ymm12,7
  4657. vpandn ymm0,ymm12,ymm14
  4658. vpand ymm3,ymm12,ymm13
  4659. vpxor ymm7,ymm7,ymm1
  4660. vpsrld ymm15,ymm8,2
  4661. vpxor ymm7,ymm7,ymm2
  4662. vpslld ymm1,ymm8,30
  4663. vpxor ymm0,ymm0,ymm3
  4664. vpxor ymm3,ymm9,ymm8
  4665. vpxor ymm15,ymm15,ymm1
  4666. vpaddd ymm5,ymm5,ymm7
  4667. vpsrld ymm1,ymm8,13
  4668. vpslld ymm2,ymm8,19
  4669. vpaddd ymm5,ymm5,ymm0
  4670. vpand ymm4,ymm4,ymm3
  4671. vpxor ymm7,ymm15,ymm1
  4672. vpsrld ymm1,ymm8,22
  4673. vpxor ymm7,ymm7,ymm2
  4674. vpslld ymm2,ymm8,10
  4675. vpxor ymm15,ymm9,ymm4
  4676. vpaddd ymm11,ymm11,ymm5
  4677. vpxor ymm7,ymm7,ymm1
  4678. vpxor ymm7,ymm7,ymm2
  4679. vpaddd ymm15,ymm15,ymm5
  4680. vpaddd ymm15,ymm15,ymm7
  4681. vmovd xmm5,DWORD PTR[4+r12]
  4682. vmovd xmm0,DWORD PTR[4+r8]
  4683. vmovd xmm1,DWORD PTR[4+r13]
  4684. vmovd xmm2,DWORD PTR[4+r9]
  4685. vpinsrd xmm5,xmm5,DWORD PTR[4+r14],1
  4686. vpinsrd xmm0,xmm0,DWORD PTR[4+r10],1
  4687. vpinsrd xmm1,xmm1,DWORD PTR[4+r15],1
  4688. vpunpckldq ymm5,ymm5,ymm1
  4689. vpinsrd xmm2,xmm2,DWORD PTR[4+r11],1
  4690. vpunpckldq ymm0,ymm0,ymm2
  4691. vinserti128 ymm5,ymm5,xmm0,1
  4692. vpshufb ymm5,ymm5,ymm6
  4693. vpsrld ymm7,ymm11,6
  4694. vpslld ymm2,ymm11,26
  4695. vmovdqu YMMWORD PTR[(32-128)+rax],ymm5
  4696. vpaddd ymm5,ymm5,ymm14
  4697. vpsrld ymm1,ymm11,11
  4698. vpxor ymm7,ymm7,ymm2
  4699. vpslld ymm2,ymm11,21
  4700. vpaddd ymm5,ymm5,YMMWORD PTR[((-96))+rbp]
  4701. vpxor ymm7,ymm7,ymm1
  4702. vpsrld ymm1,ymm11,25
  4703. vpxor ymm7,ymm7,ymm2
  4704. vpslld ymm2,ymm11,7
  4705. vpandn ymm0,ymm11,ymm13
  4706. vpand ymm4,ymm11,ymm12
  4707. vpxor ymm7,ymm7,ymm1
  4708. vpsrld ymm14,ymm15,2
  4709. vpxor ymm7,ymm7,ymm2
  4710. vpslld ymm1,ymm15,30
  4711. vpxor ymm0,ymm0,ymm4
  4712. vpxor ymm4,ymm8,ymm15
  4713. vpxor ymm14,ymm14,ymm1
  4714. vpaddd ymm5,ymm5,ymm7
  4715. vpsrld ymm1,ymm15,13
  4716. vpslld ymm2,ymm15,19
  4717. vpaddd ymm5,ymm5,ymm0
  4718. vpand ymm3,ymm3,ymm4
  4719. vpxor ymm7,ymm14,ymm1
  4720. vpsrld ymm1,ymm15,22
  4721. vpxor ymm7,ymm7,ymm2
  4722. vpslld ymm2,ymm15,10
  4723. vpxor ymm14,ymm8,ymm3
  4724. vpaddd ymm10,ymm10,ymm5
  4725. vpxor ymm7,ymm7,ymm1
  4726. vpxor ymm7,ymm7,ymm2
  4727. vpaddd ymm14,ymm14,ymm5
  4728. vpaddd ymm14,ymm14,ymm7
  4729. vmovd xmm5,DWORD PTR[8+r12]
  4730. vmovd xmm0,DWORD PTR[8+r8]
  4731. vmovd xmm1,DWORD PTR[8+r13]
  4732. vmovd xmm2,DWORD PTR[8+r9]
  4733. vpinsrd xmm5,xmm5,DWORD PTR[8+r14],1
  4734. vpinsrd xmm0,xmm0,DWORD PTR[8+r10],1
  4735. vpinsrd xmm1,xmm1,DWORD PTR[8+r15],1
  4736. vpunpckldq ymm5,ymm5,ymm1
  4737. vpinsrd xmm2,xmm2,DWORD PTR[8+r11],1
  4738. vpunpckldq ymm0,ymm0,ymm2
  4739. vinserti128 ymm5,ymm5,xmm0,1
  4740. vpshufb ymm5,ymm5,ymm6
  4741. vpsrld ymm7,ymm10,6
  4742. vpslld ymm2,ymm10,26
  4743. vmovdqu YMMWORD PTR[(64-128)+rax],ymm5
  4744. vpaddd ymm5,ymm5,ymm13
  4745. vpsrld ymm1,ymm10,11
  4746. vpxor ymm7,ymm7,ymm2
  4747. vpslld ymm2,ymm10,21
  4748. vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp]
  4749. vpxor ymm7,ymm7,ymm1
  4750. vpsrld ymm1,ymm10,25
  4751. vpxor ymm7,ymm7,ymm2
  4752. vpslld ymm2,ymm10,7
  4753. vpandn ymm0,ymm10,ymm12
  4754. vpand ymm3,ymm10,ymm11
  4755. vpxor ymm7,ymm7,ymm1
  4756. vpsrld ymm13,ymm14,2
  4757. vpxor ymm7,ymm7,ymm2
  4758. vpslld ymm1,ymm14,30
  4759. vpxor ymm0,ymm0,ymm3
  4760. vpxor ymm3,ymm15,ymm14
  4761. vpxor ymm13,ymm13,ymm1
  4762. vpaddd ymm5,ymm5,ymm7
  4763. vpsrld ymm1,ymm14,13
  4764. vpslld ymm2,ymm14,19
  4765. vpaddd ymm5,ymm5,ymm0
  4766. vpand ymm4,ymm4,ymm3
  4767. vpxor ymm7,ymm13,ymm1
  4768. vpsrld ymm1,ymm14,22
  4769. vpxor ymm7,ymm7,ymm2
  4770. vpslld ymm2,ymm14,10
  4771. vpxor ymm13,ymm15,ymm4
  4772. vpaddd ymm9,ymm9,ymm5
  4773. vpxor ymm7,ymm7,ymm1
  4774. vpxor ymm7,ymm7,ymm2
  4775. vpaddd ymm13,ymm13,ymm5
  4776. vpaddd ymm13,ymm13,ymm7
  4777. vmovd xmm5,DWORD PTR[12+r12]
  4778. vmovd xmm0,DWORD PTR[12+r8]
  4779. vmovd xmm1,DWORD PTR[12+r13]
  4780. vmovd xmm2,DWORD PTR[12+r9]
  4781. vpinsrd xmm5,xmm5,DWORD PTR[12+r14],1
  4782. vpinsrd xmm0,xmm0,DWORD PTR[12+r10],1
  4783. vpinsrd xmm1,xmm1,DWORD PTR[12+r15],1
  4784. vpunpckldq ymm5,ymm5,ymm1
  4785. vpinsrd xmm2,xmm2,DWORD PTR[12+r11],1
  4786. vpunpckldq ymm0,ymm0,ymm2
  4787. vinserti128 ymm5,ymm5,xmm0,1
  4788. vpshufb ymm5,ymm5,ymm6
  4789. vpsrld ymm7,ymm9,6
  4790. vpslld ymm2,ymm9,26
  4791. vmovdqu YMMWORD PTR[(96-128)+rax],ymm5
  4792. vpaddd ymm5,ymm5,ymm12
  4793. vpsrld ymm1,ymm9,11
  4794. vpxor ymm7,ymm7,ymm2
  4795. vpslld ymm2,ymm9,21
  4796. vpaddd ymm5,ymm5,YMMWORD PTR[((-32))+rbp]
  4797. vpxor ymm7,ymm7,ymm1
  4798. vpsrld ymm1,ymm9,25
  4799. vpxor ymm7,ymm7,ymm2
  4800. vpslld ymm2,ymm9,7
  4801. vpandn ymm0,ymm9,ymm11
  4802. vpand ymm4,ymm9,ymm10
  4803. vpxor ymm7,ymm7,ymm1
  4804. vpsrld ymm12,ymm13,2
  4805. vpxor ymm7,ymm7,ymm2
  4806. vpslld ymm1,ymm13,30
  4807. vpxor ymm0,ymm0,ymm4
  4808. vpxor ymm4,ymm14,ymm13
  4809. vpxor ymm12,ymm12,ymm1
  4810. vpaddd ymm5,ymm5,ymm7
  4811. vpsrld ymm1,ymm13,13
  4812. vpslld ymm2,ymm13,19
  4813. vpaddd ymm5,ymm5,ymm0
  4814. vpand ymm3,ymm3,ymm4
  4815. vpxor ymm7,ymm12,ymm1
  4816. vpsrld ymm1,ymm13,22
  4817. vpxor ymm7,ymm7,ymm2
  4818. vpslld ymm2,ymm13,10
  4819. vpxor ymm12,ymm14,ymm3
  4820. vpaddd ymm8,ymm8,ymm5
  4821. vpxor ymm7,ymm7,ymm1
  4822. vpxor ymm7,ymm7,ymm2
  4823. vpaddd ymm12,ymm12,ymm5
  4824. vpaddd ymm12,ymm12,ymm7
  4825. vmovd xmm5,DWORD PTR[16+r12]
  4826. vmovd xmm0,DWORD PTR[16+r8]
  4827. vmovd xmm1,DWORD PTR[16+r13]
  4828. vmovd xmm2,DWORD PTR[16+r9]
  4829. vpinsrd xmm5,xmm5,DWORD PTR[16+r14],1
  4830. vpinsrd xmm0,xmm0,DWORD PTR[16+r10],1
  4831. vpinsrd xmm1,xmm1,DWORD PTR[16+r15],1
  4832. vpunpckldq ymm5,ymm5,ymm1
  4833. vpinsrd xmm2,xmm2,DWORD PTR[16+r11],1
  4834. vpunpckldq ymm0,ymm0,ymm2
  4835. vinserti128 ymm5,ymm5,xmm0,1
  4836. vpshufb ymm5,ymm5,ymm6
  4837. vpsrld ymm7,ymm8,6
  4838. vpslld ymm2,ymm8,26
  4839. vmovdqu YMMWORD PTR[(128-128)+rax],ymm5
  4840. vpaddd ymm5,ymm5,ymm11
  4841. vpsrld ymm1,ymm8,11
  4842. vpxor ymm7,ymm7,ymm2
  4843. vpslld ymm2,ymm8,21
  4844. vpaddd ymm5,ymm5,YMMWORD PTR[rbp]
  4845. vpxor ymm7,ymm7,ymm1
  4846. vpsrld ymm1,ymm8,25
  4847. vpxor ymm7,ymm7,ymm2
  4848. vpslld ymm2,ymm8,7
  4849. vpandn ymm0,ymm8,ymm10
  4850. vpand ymm3,ymm8,ymm9
  4851. vpxor ymm7,ymm7,ymm1
  4852. vpsrld ymm11,ymm12,2
  4853. vpxor ymm7,ymm7,ymm2
  4854. vpslld ymm1,ymm12,30
  4855. vpxor ymm0,ymm0,ymm3
  4856. vpxor ymm3,ymm13,ymm12
  4857. vpxor ymm11,ymm11,ymm1
  4858. vpaddd ymm5,ymm5,ymm7
  4859. vpsrld ymm1,ymm12,13
  4860. vpslld ymm2,ymm12,19
  4861. vpaddd ymm5,ymm5,ymm0
  4862. vpand ymm4,ymm4,ymm3
  4863. vpxor ymm7,ymm11,ymm1
  4864. vpsrld ymm1,ymm12,22
  4865. vpxor ymm7,ymm7,ymm2
  4866. vpslld ymm2,ymm12,10
  4867. vpxor ymm11,ymm13,ymm4
  4868. vpaddd ymm15,ymm15,ymm5
  4869. vpxor ymm7,ymm7,ymm1
  4870. vpxor ymm7,ymm7,ymm2
  4871. vpaddd ymm11,ymm11,ymm5
  4872. vpaddd ymm11,ymm11,ymm7
  4873. vmovd xmm5,DWORD PTR[20+r12]
  4874. vmovd xmm0,DWORD PTR[20+r8]
  4875. vmovd xmm1,DWORD PTR[20+r13]
  4876. vmovd xmm2,DWORD PTR[20+r9]
  4877. vpinsrd xmm5,xmm5,DWORD PTR[20+r14],1
  4878. vpinsrd xmm0,xmm0,DWORD PTR[20+r10],1
  4879. vpinsrd xmm1,xmm1,DWORD PTR[20+r15],1
  4880. vpunpckldq ymm5,ymm5,ymm1
  4881. vpinsrd xmm2,xmm2,DWORD PTR[20+r11],1
  4882. vpunpckldq ymm0,ymm0,ymm2
  4883. vinserti128 ymm5,ymm5,xmm0,1
  4884. vpshufb ymm5,ymm5,ymm6
  4885. vpsrld ymm7,ymm15,6
  4886. vpslld ymm2,ymm15,26
  4887. vmovdqu YMMWORD PTR[(160-128)+rax],ymm5
  4888. vpaddd ymm5,ymm5,ymm10
  4889. vpsrld ymm1,ymm15,11
  4890. vpxor ymm7,ymm7,ymm2
  4891. vpslld ymm2,ymm15,21
  4892. vpaddd ymm5,ymm5,YMMWORD PTR[32+rbp]
  4893. vpxor ymm7,ymm7,ymm1
  4894. vpsrld ymm1,ymm15,25
  4895. vpxor ymm7,ymm7,ymm2
  4896. vpslld ymm2,ymm15,7
  4897. vpandn ymm0,ymm15,ymm9
  4898. vpand ymm4,ymm15,ymm8
  4899. vpxor ymm7,ymm7,ymm1
  4900. vpsrld ymm10,ymm11,2
  4901. vpxor ymm7,ymm7,ymm2
  4902. vpslld ymm1,ymm11,30
  4903. vpxor ymm0,ymm0,ymm4
  4904. vpxor ymm4,ymm12,ymm11
  4905. vpxor ymm10,ymm10,ymm1
  4906. vpaddd ymm5,ymm5,ymm7
  4907. vpsrld ymm1,ymm11,13
  4908. vpslld ymm2,ymm11,19
  4909. vpaddd ymm5,ymm5,ymm0
  4910. vpand ymm3,ymm3,ymm4
  4911. vpxor ymm7,ymm10,ymm1
  4912. vpsrld ymm1,ymm11,22
  4913. vpxor ymm7,ymm7,ymm2
  4914. vpslld ymm2,ymm11,10
  4915. vpxor ymm10,ymm12,ymm3
  4916. vpaddd ymm14,ymm14,ymm5
  4917. vpxor ymm7,ymm7,ymm1
  4918. vpxor ymm7,ymm7,ymm2
  4919. vpaddd ymm10,ymm10,ymm5
  4920. vpaddd ymm10,ymm10,ymm7
  4921. vmovd xmm5,DWORD PTR[24+r12]
  4922. vmovd xmm0,DWORD PTR[24+r8]
  4923. vmovd xmm1,DWORD PTR[24+r13]
  4924. vmovd xmm2,DWORD PTR[24+r9]
  4925. vpinsrd xmm5,xmm5,DWORD PTR[24+r14],1
  4926. vpinsrd xmm0,xmm0,DWORD PTR[24+r10],1
  4927. vpinsrd xmm1,xmm1,DWORD PTR[24+r15],1
  4928. vpunpckldq ymm5,ymm5,ymm1
  4929. vpinsrd xmm2,xmm2,DWORD PTR[24+r11],1
  4930. vpunpckldq ymm0,ymm0,ymm2
  4931. vinserti128 ymm5,ymm5,xmm0,1
  4932. vpshufb ymm5,ymm5,ymm6
  4933. vpsrld ymm7,ymm14,6
  4934. vpslld ymm2,ymm14,26
  4935. vmovdqu YMMWORD PTR[(192-128)+rax],ymm5
  4936. vpaddd ymm5,ymm5,ymm9
  4937. vpsrld ymm1,ymm14,11
  4938. vpxor ymm7,ymm7,ymm2
  4939. vpslld ymm2,ymm14,21
  4940. vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp]
  4941. vpxor ymm7,ymm7,ymm1
  4942. vpsrld ymm1,ymm14,25
  4943. vpxor ymm7,ymm7,ymm2
  4944. vpslld ymm2,ymm14,7
  4945. vpandn ymm0,ymm14,ymm8
  4946. vpand ymm3,ymm14,ymm15
  4947. vpxor ymm7,ymm7,ymm1
  4948. vpsrld ymm9,ymm10,2
  4949. vpxor ymm7,ymm7,ymm2
  4950. vpslld ymm1,ymm10,30
  4951. vpxor ymm0,ymm0,ymm3
  4952. vpxor ymm3,ymm11,ymm10
  4953. vpxor ymm9,ymm9,ymm1
  4954. vpaddd ymm5,ymm5,ymm7
  4955. vpsrld ymm1,ymm10,13
  4956. vpslld ymm2,ymm10,19
  4957. vpaddd ymm5,ymm5,ymm0
  4958. vpand ymm4,ymm4,ymm3
  4959. vpxor ymm7,ymm9,ymm1
  4960. vpsrld ymm1,ymm10,22
  4961. vpxor ymm7,ymm7,ymm2
  4962. vpslld ymm2,ymm10,10
  4963. vpxor ymm9,ymm11,ymm4
  4964. vpaddd ymm13,ymm13,ymm5
  4965. vpxor ymm7,ymm7,ymm1
  4966. vpxor ymm7,ymm7,ymm2
  4967. vpaddd ymm9,ymm9,ymm5
  4968. vpaddd ymm9,ymm9,ymm7
  4969. vmovd xmm5,DWORD PTR[28+r12]
  4970. vmovd xmm0,DWORD PTR[28+r8]
  4971. vmovd xmm1,DWORD PTR[28+r13]
  4972. vmovd xmm2,DWORD PTR[28+r9]
  4973. vpinsrd xmm5,xmm5,DWORD PTR[28+r14],1
  4974. vpinsrd xmm0,xmm0,DWORD PTR[28+r10],1
  4975. vpinsrd xmm1,xmm1,DWORD PTR[28+r15],1
  4976. vpunpckldq ymm5,ymm5,ymm1
  4977. vpinsrd xmm2,xmm2,DWORD PTR[28+r11],1
  4978. vpunpckldq ymm0,ymm0,ymm2
  4979. vinserti128 ymm5,ymm5,xmm0,1
  4980. vpshufb ymm5,ymm5,ymm6
  4981. vpsrld ymm7,ymm13,6
  4982. vpslld ymm2,ymm13,26
  4983. vmovdqu YMMWORD PTR[(224-128)+rax],ymm5
  4984. vpaddd ymm5,ymm5,ymm8
  4985. vpsrld ymm1,ymm13,11
  4986. vpxor ymm7,ymm7,ymm2
  4987. vpslld ymm2,ymm13,21
  4988. vpaddd ymm5,ymm5,YMMWORD PTR[96+rbp]
  4989. vpxor ymm7,ymm7,ymm1
  4990. vpsrld ymm1,ymm13,25
  4991. vpxor ymm7,ymm7,ymm2
  4992. vpslld ymm2,ymm13,7
  4993. vpandn ymm0,ymm13,ymm15
  4994. vpand ymm4,ymm13,ymm14
  4995. vpxor ymm7,ymm7,ymm1
  4996. vpsrld ymm8,ymm9,2
  4997. vpxor ymm7,ymm7,ymm2
  4998. vpslld ymm1,ymm9,30
  4999. vpxor ymm0,ymm0,ymm4
  5000. vpxor ymm4,ymm10,ymm9
  5001. vpxor ymm8,ymm8,ymm1
  5002. vpaddd ymm5,ymm5,ymm7
  5003. vpsrld ymm1,ymm9,13
  5004. vpslld ymm2,ymm9,19
  5005. vpaddd ymm5,ymm5,ymm0
  5006. vpand ymm3,ymm3,ymm4
  5007. vpxor ymm7,ymm8,ymm1
  5008. vpsrld ymm1,ymm9,22
  5009. vpxor ymm7,ymm7,ymm2
  5010. vpslld ymm2,ymm9,10
  5011. vpxor ymm8,ymm10,ymm3
  5012. vpaddd ymm12,ymm12,ymm5
  5013. vpxor ymm7,ymm7,ymm1
  5014. vpxor ymm7,ymm7,ymm2
  5015. vpaddd ymm8,ymm8,ymm5
  5016. vpaddd ymm8,ymm8,ymm7
  5017. add rbp,256
  5018. vmovd xmm5,DWORD PTR[32+r12]
  5019. vmovd xmm0,DWORD PTR[32+r8]
  5020. vmovd xmm1,DWORD PTR[32+r13]
  5021. vmovd xmm2,DWORD PTR[32+r9]
  5022. vpinsrd xmm5,xmm5,DWORD PTR[32+r14],1
  5023. vpinsrd xmm0,xmm0,DWORD PTR[32+r10],1
  5024. vpinsrd xmm1,xmm1,DWORD PTR[32+r15],1
  5025. vpunpckldq ymm5,ymm5,ymm1
  5026. vpinsrd xmm2,xmm2,DWORD PTR[32+r11],1
  5027. vpunpckldq ymm0,ymm0,ymm2
  5028. vinserti128 ymm5,ymm5,xmm0,1
  5029. vpshufb ymm5,ymm5,ymm6
  5030. vpsrld ymm7,ymm12,6
  5031. vpslld ymm2,ymm12,26
  5032. vmovdqu YMMWORD PTR[(256-256-128)+rbx],ymm5
  5033. vpaddd ymm5,ymm5,ymm15
  5034. vpsrld ymm1,ymm12,11
  5035. vpxor ymm7,ymm7,ymm2
  5036. vpslld ymm2,ymm12,21
  5037. vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp]
  5038. vpxor ymm7,ymm7,ymm1
  5039. vpsrld ymm1,ymm12,25
  5040. vpxor ymm7,ymm7,ymm2
  5041. vpslld ymm2,ymm12,7
  5042. vpandn ymm0,ymm12,ymm14
  5043. vpand ymm3,ymm12,ymm13
  5044. vpxor ymm7,ymm7,ymm1
  5045. vpsrld ymm15,ymm8,2
  5046. vpxor ymm7,ymm7,ymm2
  5047. vpslld ymm1,ymm8,30
  5048. vpxor ymm0,ymm0,ymm3
  5049. vpxor ymm3,ymm9,ymm8
  5050. vpxor ymm15,ymm15,ymm1
  5051. vpaddd ymm5,ymm5,ymm7
  5052. vpsrld ymm1,ymm8,13
  5053. vpslld ymm2,ymm8,19
  5054. vpaddd ymm5,ymm5,ymm0
  5055. vpand ymm4,ymm4,ymm3
  5056. vpxor ymm7,ymm15,ymm1
  5057. vpsrld ymm1,ymm8,22
  5058. vpxor ymm7,ymm7,ymm2
  5059. vpslld ymm2,ymm8,10
  5060. vpxor ymm15,ymm9,ymm4
  5061. vpaddd ymm11,ymm11,ymm5
  5062. vpxor ymm7,ymm7,ymm1
  5063. vpxor ymm7,ymm7,ymm2
  5064. vpaddd ymm15,ymm15,ymm5
  5065. vpaddd ymm15,ymm15,ymm7
  5066. vmovd xmm5,DWORD PTR[36+r12]
  5067. vmovd xmm0,DWORD PTR[36+r8]
  5068. vmovd xmm1,DWORD PTR[36+r13]
  5069. vmovd xmm2,DWORD PTR[36+r9]
  5070. vpinsrd xmm5,xmm5,DWORD PTR[36+r14],1
  5071. vpinsrd xmm0,xmm0,DWORD PTR[36+r10],1
  5072. vpinsrd xmm1,xmm1,DWORD PTR[36+r15],1
  5073. vpunpckldq ymm5,ymm5,ymm1
  5074. vpinsrd xmm2,xmm2,DWORD PTR[36+r11],1
  5075. vpunpckldq ymm0,ymm0,ymm2
  5076. vinserti128 ymm5,ymm5,xmm0,1
  5077. vpshufb ymm5,ymm5,ymm6
  5078. vpsrld ymm7,ymm11,6
  5079. vpslld ymm2,ymm11,26
  5080. vmovdqu YMMWORD PTR[(288-256-128)+rbx],ymm5
  5081. vpaddd ymm5,ymm5,ymm14
  5082. vpsrld ymm1,ymm11,11
  5083. vpxor ymm7,ymm7,ymm2
  5084. vpslld ymm2,ymm11,21
  5085. vpaddd ymm5,ymm5,YMMWORD PTR[((-96))+rbp]
  5086. vpxor ymm7,ymm7,ymm1
  5087. vpsrld ymm1,ymm11,25
  5088. vpxor ymm7,ymm7,ymm2
  5089. vpslld ymm2,ymm11,7
  5090. vpandn ymm0,ymm11,ymm13
  5091. vpand ymm4,ymm11,ymm12
  5092. vpxor ymm7,ymm7,ymm1
  5093. vpsrld ymm14,ymm15,2
  5094. vpxor ymm7,ymm7,ymm2
  5095. vpslld ymm1,ymm15,30
  5096. vpxor ymm0,ymm0,ymm4
  5097. vpxor ymm4,ymm8,ymm15
  5098. vpxor ymm14,ymm14,ymm1
  5099. vpaddd ymm5,ymm5,ymm7
  5100. vpsrld ymm1,ymm15,13
  5101. vpslld ymm2,ymm15,19
  5102. vpaddd ymm5,ymm5,ymm0
  5103. vpand ymm3,ymm3,ymm4
  5104. vpxor ymm7,ymm14,ymm1
  5105. vpsrld ymm1,ymm15,22
  5106. vpxor ymm7,ymm7,ymm2
  5107. vpslld ymm2,ymm15,10
  5108. vpxor ymm14,ymm8,ymm3
  5109. vpaddd ymm10,ymm10,ymm5
  5110. vpxor ymm7,ymm7,ymm1
  5111. vpxor ymm7,ymm7,ymm2
  5112. vpaddd ymm14,ymm14,ymm5
  5113. vpaddd ymm14,ymm14,ymm7
  5114. vmovd xmm5,DWORD PTR[40+r12]
  5115. vmovd xmm0,DWORD PTR[40+r8]
  5116. vmovd xmm1,DWORD PTR[40+r13]
  5117. vmovd xmm2,DWORD PTR[40+r9]
  5118. vpinsrd xmm5,xmm5,DWORD PTR[40+r14],1
  5119. vpinsrd xmm0,xmm0,DWORD PTR[40+r10],1
  5120. vpinsrd xmm1,xmm1,DWORD PTR[40+r15],1
  5121. vpunpckldq ymm5,ymm5,ymm1
  5122. vpinsrd xmm2,xmm2,DWORD PTR[40+r11],1
  5123. vpunpckldq ymm0,ymm0,ymm2
  5124. vinserti128 ymm5,ymm5,xmm0,1
  5125. vpshufb ymm5,ymm5,ymm6
  5126. vpsrld ymm7,ymm10,6
  5127. vpslld ymm2,ymm10,26
  5128. vmovdqu YMMWORD PTR[(320-256-128)+rbx],ymm5
  5129. vpaddd ymm5,ymm5,ymm13
  5130. vpsrld ymm1,ymm10,11
  5131. vpxor ymm7,ymm7,ymm2
  5132. vpslld ymm2,ymm10,21
  5133. vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp]
  5134. vpxor ymm7,ymm7,ymm1
  5135. vpsrld ymm1,ymm10,25
  5136. vpxor ymm7,ymm7,ymm2
  5137. vpslld ymm2,ymm10,7
  5138. vpandn ymm0,ymm10,ymm12
  5139. vpand ymm3,ymm10,ymm11
  5140. vpxor ymm7,ymm7,ymm1
  5141. vpsrld ymm13,ymm14,2
  5142. vpxor ymm7,ymm7,ymm2
  5143. vpslld ymm1,ymm14,30
  5144. vpxor ymm0,ymm0,ymm3
  5145. vpxor ymm3,ymm15,ymm14
  5146. vpxor ymm13,ymm13,ymm1
  5147. vpaddd ymm5,ymm5,ymm7
  5148. vpsrld ymm1,ymm14,13
  5149. vpslld ymm2,ymm14,19
  5150. vpaddd ymm5,ymm5,ymm0
  5151. vpand ymm4,ymm4,ymm3
  5152. vpxor ymm7,ymm13,ymm1
  5153. vpsrld ymm1,ymm14,22
  5154. vpxor ymm7,ymm7,ymm2
  5155. vpslld ymm2,ymm14,10
  5156. vpxor ymm13,ymm15,ymm4
  5157. vpaddd ymm9,ymm9,ymm5
  5158. vpxor ymm7,ymm7,ymm1
  5159. vpxor ymm7,ymm7,ymm2
  5160. vpaddd ymm13,ymm13,ymm5
  5161. vpaddd ymm13,ymm13,ymm7
  5162. vmovd xmm5,DWORD PTR[44+r12]
  5163. vmovd xmm0,DWORD PTR[44+r8]
  5164. vmovd xmm1,DWORD PTR[44+r13]
  5165. vmovd xmm2,DWORD PTR[44+r9]
  5166. vpinsrd xmm5,xmm5,DWORD PTR[44+r14],1
  5167. vpinsrd xmm0,xmm0,DWORD PTR[44+r10],1
  5168. vpinsrd xmm1,xmm1,DWORD PTR[44+r15],1
  5169. vpunpckldq ymm5,ymm5,ymm1
  5170. vpinsrd xmm2,xmm2,DWORD PTR[44+r11],1
  5171. vpunpckldq ymm0,ymm0,ymm2
  5172. vinserti128 ymm5,ymm5,xmm0,1
  5173. vpshufb ymm5,ymm5,ymm6
  5174. vpsrld ymm7,ymm9,6
  5175. vpslld ymm2,ymm9,26
  5176. vmovdqu YMMWORD PTR[(352-256-128)+rbx],ymm5
  5177. vpaddd ymm5,ymm5,ymm12
  5178. vpsrld ymm1,ymm9,11
  5179. vpxor ymm7,ymm7,ymm2
  5180. vpslld ymm2,ymm9,21
  5181. vpaddd ymm5,ymm5,YMMWORD PTR[((-32))+rbp]
  5182. vpxor ymm7,ymm7,ymm1
  5183. vpsrld ymm1,ymm9,25
  5184. vpxor ymm7,ymm7,ymm2
  5185. vpslld ymm2,ymm9,7
  5186. vpandn ymm0,ymm9,ymm11
  5187. vpand ymm4,ymm9,ymm10
  5188. vpxor ymm7,ymm7,ymm1
  5189. vpsrld ymm12,ymm13,2
  5190. vpxor ymm7,ymm7,ymm2
  5191. vpslld ymm1,ymm13,30
  5192. vpxor ymm0,ymm0,ymm4
  5193. vpxor ymm4,ymm14,ymm13
  5194. vpxor ymm12,ymm12,ymm1
  5195. vpaddd ymm5,ymm5,ymm7
  5196. vpsrld ymm1,ymm13,13
  5197. vpslld ymm2,ymm13,19
  5198. vpaddd ymm5,ymm5,ymm0
  5199. vpand ymm3,ymm3,ymm4
  5200. vpxor ymm7,ymm12,ymm1
  5201. vpsrld ymm1,ymm13,22
  5202. vpxor ymm7,ymm7,ymm2
  5203. vpslld ymm2,ymm13,10
  5204. vpxor ymm12,ymm14,ymm3
  5205. vpaddd ymm8,ymm8,ymm5
  5206. vpxor ymm7,ymm7,ymm1
  5207. vpxor ymm7,ymm7,ymm2
  5208. vpaddd ymm12,ymm12,ymm5
  5209. vpaddd ymm12,ymm12,ymm7
  5210. vmovd xmm5,DWORD PTR[48+r12]
  5211. vmovd xmm0,DWORD PTR[48+r8]
  5212. vmovd xmm1,DWORD PTR[48+r13]
  5213. vmovd xmm2,DWORD PTR[48+r9]
  5214. vpinsrd xmm5,xmm5,DWORD PTR[48+r14],1
  5215. vpinsrd xmm0,xmm0,DWORD PTR[48+r10],1
  5216. vpinsrd xmm1,xmm1,DWORD PTR[48+r15],1
  5217. vpunpckldq ymm5,ymm5,ymm1
  5218. vpinsrd xmm2,xmm2,DWORD PTR[48+r11],1
  5219. vpunpckldq ymm0,ymm0,ymm2
  5220. vinserti128 ymm5,ymm5,xmm0,1
  5221. vpshufb ymm5,ymm5,ymm6
  5222. vpsrld ymm7,ymm8,6
  5223. vpslld ymm2,ymm8,26
  5224. vmovdqu YMMWORD PTR[(384-256-128)+rbx],ymm5
  5225. vpaddd ymm5,ymm5,ymm11
  5226. vpsrld ymm1,ymm8,11
  5227. vpxor ymm7,ymm7,ymm2
  5228. vpslld ymm2,ymm8,21
  5229. vpaddd ymm5,ymm5,YMMWORD PTR[rbp]
  5230. vpxor ymm7,ymm7,ymm1
  5231. vpsrld ymm1,ymm8,25
  5232. vpxor ymm7,ymm7,ymm2
  5233. vpslld ymm2,ymm8,7
  5234. vpandn ymm0,ymm8,ymm10
  5235. vpand ymm3,ymm8,ymm9
  5236. vpxor ymm7,ymm7,ymm1
  5237. vpsrld ymm11,ymm12,2
  5238. vpxor ymm7,ymm7,ymm2
  5239. vpslld ymm1,ymm12,30
  5240. vpxor ymm0,ymm0,ymm3
  5241. vpxor ymm3,ymm13,ymm12
  5242. vpxor ymm11,ymm11,ymm1
  5243. vpaddd ymm5,ymm5,ymm7
  5244. vpsrld ymm1,ymm12,13
  5245. vpslld ymm2,ymm12,19
  5246. vpaddd ymm5,ymm5,ymm0
  5247. vpand ymm4,ymm4,ymm3
  5248. vpxor ymm7,ymm11,ymm1
  5249. vpsrld ymm1,ymm12,22
  5250. vpxor ymm7,ymm7,ymm2
  5251. vpslld ymm2,ymm12,10
  5252. vpxor ymm11,ymm13,ymm4
  5253. vpaddd ymm15,ymm15,ymm5
  5254. vpxor ymm7,ymm7,ymm1
  5255. vpxor ymm7,ymm7,ymm2
  5256. vpaddd ymm11,ymm11,ymm5
  5257. vpaddd ymm11,ymm11,ymm7
  5258. vmovd xmm5,DWORD PTR[52+r12]
  5259. vmovd xmm0,DWORD PTR[52+r8]
  5260. vmovd xmm1,DWORD PTR[52+r13]
  5261. vmovd xmm2,DWORD PTR[52+r9]
  5262. vpinsrd xmm5,xmm5,DWORD PTR[52+r14],1
  5263. vpinsrd xmm0,xmm0,DWORD PTR[52+r10],1
  5264. vpinsrd xmm1,xmm1,DWORD PTR[52+r15],1
  5265. vpunpckldq ymm5,ymm5,ymm1
  5266. vpinsrd xmm2,xmm2,DWORD PTR[52+r11],1
  5267. vpunpckldq ymm0,ymm0,ymm2
  5268. vinserti128 ymm5,ymm5,xmm0,1
  5269. vpshufb ymm5,ymm5,ymm6
  5270. vpsrld ymm7,ymm15,6
  5271. vpslld ymm2,ymm15,26
  5272. vmovdqu YMMWORD PTR[(416-256-128)+rbx],ymm5
  5273. vpaddd ymm5,ymm5,ymm10
  5274. vpsrld ymm1,ymm15,11
  5275. vpxor ymm7,ymm7,ymm2
  5276. vpslld ymm2,ymm15,21
  5277. vpaddd ymm5,ymm5,YMMWORD PTR[32+rbp]
  5278. vpxor ymm7,ymm7,ymm1
  5279. vpsrld ymm1,ymm15,25
  5280. vpxor ymm7,ymm7,ymm2
  5281. vpslld ymm2,ymm15,7
  5282. vpandn ymm0,ymm15,ymm9
  5283. vpand ymm4,ymm15,ymm8
  5284. vpxor ymm7,ymm7,ymm1
  5285. vpsrld ymm10,ymm11,2
  5286. vpxor ymm7,ymm7,ymm2
  5287. vpslld ymm1,ymm11,30
  5288. vpxor ymm0,ymm0,ymm4
  5289. vpxor ymm4,ymm12,ymm11
  5290. vpxor ymm10,ymm10,ymm1
  5291. vpaddd ymm5,ymm5,ymm7
  5292. vpsrld ymm1,ymm11,13
  5293. vpslld ymm2,ymm11,19
  5294. vpaddd ymm5,ymm5,ymm0
  5295. vpand ymm3,ymm3,ymm4
  5296. vpxor ymm7,ymm10,ymm1
  5297. vpsrld ymm1,ymm11,22
  5298. vpxor ymm7,ymm7,ymm2
  5299. vpslld ymm2,ymm11,10
  5300. vpxor ymm10,ymm12,ymm3
  5301. vpaddd ymm14,ymm14,ymm5
  5302. vpxor ymm7,ymm7,ymm1
  5303. vpxor ymm7,ymm7,ymm2
  5304. vpaddd ymm10,ymm10,ymm5
  5305. vpaddd ymm10,ymm10,ymm7
  5306. vmovd xmm5,DWORD PTR[56+r12]
  5307. vmovd xmm0,DWORD PTR[56+r8]
  5308. vmovd xmm1,DWORD PTR[56+r13]
  5309. vmovd xmm2,DWORD PTR[56+r9]
  5310. vpinsrd xmm5,xmm5,DWORD PTR[56+r14],1
  5311. vpinsrd xmm0,xmm0,DWORD PTR[56+r10],1
  5312. vpinsrd xmm1,xmm1,DWORD PTR[56+r15],1
  5313. vpunpckldq ymm5,ymm5,ymm1
  5314. vpinsrd xmm2,xmm2,DWORD PTR[56+r11],1
  5315. vpunpckldq ymm0,ymm0,ymm2
  5316. vinserti128 ymm5,ymm5,xmm0,1
  5317. vpshufb ymm5,ymm5,ymm6
  5318. vpsrld ymm7,ymm14,6
  5319. vpslld ymm2,ymm14,26
  5320. vmovdqu YMMWORD PTR[(448-256-128)+rbx],ymm5
  5321. vpaddd ymm5,ymm5,ymm9
  5322. vpsrld ymm1,ymm14,11
  5323. vpxor ymm7,ymm7,ymm2
  5324. vpslld ymm2,ymm14,21
  5325. vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp]
  5326. vpxor ymm7,ymm7,ymm1
  5327. vpsrld ymm1,ymm14,25
  5328. vpxor ymm7,ymm7,ymm2
  5329. vpslld ymm2,ymm14,7
  5330. vpandn ymm0,ymm14,ymm8
  5331. vpand ymm3,ymm14,ymm15
  5332. vpxor ymm7,ymm7,ymm1
  5333. vpsrld ymm9,ymm10,2
  5334. vpxor ymm7,ymm7,ymm2
  5335. vpslld ymm1,ymm10,30
  5336. vpxor ymm0,ymm0,ymm3
  5337. vpxor ymm3,ymm11,ymm10
  5338. vpxor ymm9,ymm9,ymm1
  5339. vpaddd ymm5,ymm5,ymm7
  5340. vpsrld ymm1,ymm10,13
  5341. vpslld ymm2,ymm10,19
  5342. vpaddd ymm5,ymm5,ymm0
  5343. vpand ymm4,ymm4,ymm3
  5344. vpxor ymm7,ymm9,ymm1
  5345. vpsrld ymm1,ymm10,22
  5346. vpxor ymm7,ymm7,ymm2
  5347. vpslld ymm2,ymm10,10
  5348. vpxor ymm9,ymm11,ymm4
  5349. vpaddd ymm13,ymm13,ymm5
  5350. vpxor ymm7,ymm7,ymm1
  5351. vpxor ymm7,ymm7,ymm2
  5352. vpaddd ymm9,ymm9,ymm5
  5353. vpaddd ymm9,ymm9,ymm7
  5354. vmovd xmm5,DWORD PTR[60+r12]
  5355. lea r12,QWORD PTR[64+r12]
  5356. vmovd xmm0,DWORD PTR[60+r8]
  5357. lea r8,QWORD PTR[64+r8]
  5358. vmovd xmm1,DWORD PTR[60+r13]
  5359. lea r13,QWORD PTR[64+r13]
  5360. vmovd xmm2,DWORD PTR[60+r9]
  5361. lea r9,QWORD PTR[64+r9]
  5362. vpinsrd xmm5,xmm5,DWORD PTR[60+r14],1
  5363. lea r14,QWORD PTR[64+r14]
  5364. vpinsrd xmm0,xmm0,DWORD PTR[60+r10],1
  5365. lea r10,QWORD PTR[64+r10]
  5366. vpinsrd xmm1,xmm1,DWORD PTR[60+r15],1
  5367. lea r15,QWORD PTR[64+r15]
  5368. vpunpckldq ymm5,ymm5,ymm1
  5369. vpinsrd xmm2,xmm2,DWORD PTR[60+r11],1
  5370. lea r11,QWORD PTR[64+r11]
  5371. vpunpckldq ymm0,ymm0,ymm2
  5372. vinserti128 ymm5,ymm5,xmm0,1
  5373. vpshufb ymm5,ymm5,ymm6
  5374. vpsrld ymm7,ymm13,6
  5375. vpslld ymm2,ymm13,26
  5376. vmovdqu YMMWORD PTR[(480-256-128)+rbx],ymm5
  5377. vpaddd ymm5,ymm5,ymm8
  5378. vpsrld ymm1,ymm13,11
  5379. vpxor ymm7,ymm7,ymm2
  5380. vpslld ymm2,ymm13,21
  5381. vpaddd ymm5,ymm5,YMMWORD PTR[96+rbp]
  5382. vpxor ymm7,ymm7,ymm1
  5383. vpsrld ymm1,ymm13,25
  5384. vpxor ymm7,ymm7,ymm2
  5385. prefetcht0 [63+r12]
  5386. vpslld ymm2,ymm13,7
  5387. vpandn ymm0,ymm13,ymm15
  5388. vpand ymm4,ymm13,ymm14
  5389. prefetcht0 [63+r13]
  5390. vpxor ymm7,ymm7,ymm1
  5391. vpsrld ymm8,ymm9,2
  5392. vpxor ymm7,ymm7,ymm2
  5393. prefetcht0 [63+r14]
  5394. vpslld ymm1,ymm9,30
  5395. vpxor ymm0,ymm0,ymm4
  5396. vpxor ymm4,ymm10,ymm9
  5397. prefetcht0 [63+r15]
  5398. vpxor ymm8,ymm8,ymm1
  5399. vpaddd ymm5,ymm5,ymm7
  5400. vpsrld ymm1,ymm9,13
  5401. prefetcht0 [63+r8]
  5402. vpslld ymm2,ymm9,19
  5403. vpaddd ymm5,ymm5,ymm0
  5404. vpand ymm3,ymm3,ymm4
  5405. prefetcht0 [63+r9]
  5406. vpxor ymm7,ymm8,ymm1
  5407. vpsrld ymm1,ymm9,22
  5408. vpxor ymm7,ymm7,ymm2
  5409. prefetcht0 [63+r10]
  5410. vpslld ymm2,ymm9,10
  5411. vpxor ymm8,ymm10,ymm3
  5412. vpaddd ymm12,ymm12,ymm5
  5413. prefetcht0 [63+r11]
  5414. vpxor ymm7,ymm7,ymm1
  5415. vpxor ymm7,ymm7,ymm2
  5416. vpaddd ymm8,ymm8,ymm5
  5417. vpaddd ymm8,ymm8,ymm7
  5418. add rbp,256
  5419. vmovdqu ymm5,YMMWORD PTR[((0-128))+rax]
  5420. mov ecx,3
  5421. jmp $L$oop_16_xx_avx2
  5422. ALIGN 32
  5423. $L$oop_16_xx_avx2::
  5424. vmovdqu ymm6,YMMWORD PTR[((32-128))+rax]
  5425. vpaddd ymm5,ymm5,YMMWORD PTR[((288-256-128))+rbx]
  5426. vpsrld ymm7,ymm6,3
  5427. vpsrld ymm1,ymm6,7
  5428. vpslld ymm2,ymm6,25
  5429. vpxor ymm7,ymm7,ymm1
  5430. vpsrld ymm1,ymm6,18
  5431. vpxor ymm7,ymm7,ymm2
  5432. vpslld ymm2,ymm6,14
  5433. vmovdqu ymm0,YMMWORD PTR[((448-256-128))+rbx]
  5434. vpsrld ymm3,ymm0,10
  5435. vpxor ymm7,ymm7,ymm1
  5436. vpsrld ymm1,ymm0,17
  5437. vpxor ymm7,ymm7,ymm2
  5438. vpslld ymm2,ymm0,15
  5439. vpaddd ymm5,ymm5,ymm7
  5440. vpxor ymm7,ymm3,ymm1
  5441. vpsrld ymm1,ymm0,19
  5442. vpxor ymm7,ymm7,ymm2
  5443. vpslld ymm2,ymm0,13
  5444. vpxor ymm7,ymm7,ymm1
  5445. vpxor ymm7,ymm7,ymm2
  5446. vpaddd ymm5,ymm5,ymm7
  5447. vpsrld ymm7,ymm12,6
  5448. vpslld ymm2,ymm12,26
  5449. vmovdqu YMMWORD PTR[(0-128)+rax],ymm5
  5450. vpaddd ymm5,ymm5,ymm15
  5451. vpsrld ymm1,ymm12,11
  5452. vpxor ymm7,ymm7,ymm2
  5453. vpslld ymm2,ymm12,21
  5454. vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp]
  5455. vpxor ymm7,ymm7,ymm1
  5456. vpsrld ymm1,ymm12,25
  5457. vpxor ymm7,ymm7,ymm2
  5458. vpslld ymm2,ymm12,7
  5459. vpandn ymm0,ymm12,ymm14
  5460. vpand ymm3,ymm12,ymm13
  5461. vpxor ymm7,ymm7,ymm1
  5462. vpsrld ymm15,ymm8,2
  5463. vpxor ymm7,ymm7,ymm2
  5464. vpslld ymm1,ymm8,30
  5465. vpxor ymm0,ymm0,ymm3
  5466. vpxor ymm3,ymm9,ymm8
  5467. vpxor ymm15,ymm15,ymm1
  5468. vpaddd ymm5,ymm5,ymm7
  5469. vpsrld ymm1,ymm8,13
  5470. vpslld ymm2,ymm8,19
  5471. vpaddd ymm5,ymm5,ymm0
  5472. vpand ymm4,ymm4,ymm3
  5473. vpxor ymm7,ymm15,ymm1
  5474. vpsrld ymm1,ymm8,22
  5475. vpxor ymm7,ymm7,ymm2
  5476. vpslld ymm2,ymm8,10
  5477. vpxor ymm15,ymm9,ymm4
  5478. vpaddd ymm11,ymm11,ymm5
  5479. vpxor ymm7,ymm7,ymm1
  5480. vpxor ymm7,ymm7,ymm2
  5481. vpaddd ymm15,ymm15,ymm5
  5482. vpaddd ymm15,ymm15,ymm7
  5483. vmovdqu ymm5,YMMWORD PTR[((64-128))+rax]
  5484. vpaddd ymm6,ymm6,YMMWORD PTR[((320-256-128))+rbx]
  5485. vpsrld ymm7,ymm5,3
  5486. vpsrld ymm1,ymm5,7
  5487. vpslld ymm2,ymm5,25
  5488. vpxor ymm7,ymm7,ymm1
  5489. vpsrld ymm1,ymm5,18
  5490. vpxor ymm7,ymm7,ymm2
  5491. vpslld ymm2,ymm5,14
  5492. vmovdqu ymm0,YMMWORD PTR[((480-256-128))+rbx]
  5493. vpsrld ymm4,ymm0,10
  5494. vpxor ymm7,ymm7,ymm1
  5495. vpsrld ymm1,ymm0,17
  5496. vpxor ymm7,ymm7,ymm2
  5497. vpslld ymm2,ymm0,15
  5498. vpaddd ymm6,ymm6,ymm7
  5499. vpxor ymm7,ymm4,ymm1
  5500. vpsrld ymm1,ymm0,19
  5501. vpxor ymm7,ymm7,ymm2
  5502. vpslld ymm2,ymm0,13
  5503. vpxor ymm7,ymm7,ymm1
  5504. vpxor ymm7,ymm7,ymm2
  5505. vpaddd ymm6,ymm6,ymm7
  5506. vpsrld ymm7,ymm11,6
  5507. vpslld ymm2,ymm11,26
  5508. vmovdqu YMMWORD PTR[(32-128)+rax],ymm6
  5509. vpaddd ymm6,ymm6,ymm14
  5510. vpsrld ymm1,ymm11,11
  5511. vpxor ymm7,ymm7,ymm2
  5512. vpslld ymm2,ymm11,21
  5513. vpaddd ymm6,ymm6,YMMWORD PTR[((-96))+rbp]
  5514. vpxor ymm7,ymm7,ymm1
  5515. vpsrld ymm1,ymm11,25
  5516. vpxor ymm7,ymm7,ymm2
  5517. vpslld ymm2,ymm11,7
  5518. vpandn ymm0,ymm11,ymm13
  5519. vpand ymm4,ymm11,ymm12
  5520. vpxor ymm7,ymm7,ymm1
  5521. vpsrld ymm14,ymm15,2
  5522. vpxor ymm7,ymm7,ymm2
  5523. vpslld ymm1,ymm15,30
  5524. vpxor ymm0,ymm0,ymm4
  5525. vpxor ymm4,ymm8,ymm15
  5526. vpxor ymm14,ymm14,ymm1
  5527. vpaddd ymm6,ymm6,ymm7
  5528. vpsrld ymm1,ymm15,13
  5529. vpslld ymm2,ymm15,19
  5530. vpaddd ymm6,ymm6,ymm0
  5531. vpand ymm3,ymm3,ymm4
  5532. vpxor ymm7,ymm14,ymm1
  5533. vpsrld ymm1,ymm15,22
  5534. vpxor ymm7,ymm7,ymm2
  5535. vpslld ymm2,ymm15,10
  5536. vpxor ymm14,ymm8,ymm3
  5537. vpaddd ymm10,ymm10,ymm6
  5538. vpxor ymm7,ymm7,ymm1
  5539. vpxor ymm7,ymm7,ymm2
  5540. vpaddd ymm14,ymm14,ymm6
  5541. vpaddd ymm14,ymm14,ymm7
  5542. vmovdqu ymm6,YMMWORD PTR[((96-128))+rax]
  5543. vpaddd ymm5,ymm5,YMMWORD PTR[((352-256-128))+rbx]
  5544. vpsrld ymm7,ymm6,3
  5545. vpsrld ymm1,ymm6,7
  5546. vpslld ymm2,ymm6,25
  5547. vpxor ymm7,ymm7,ymm1
  5548. vpsrld ymm1,ymm6,18
  5549. vpxor ymm7,ymm7,ymm2
  5550. vpslld ymm2,ymm6,14
  5551. vmovdqu ymm0,YMMWORD PTR[((0-128))+rax]
  5552. vpsrld ymm3,ymm0,10
  5553. vpxor ymm7,ymm7,ymm1
  5554. vpsrld ymm1,ymm0,17
  5555. vpxor ymm7,ymm7,ymm2
  5556. vpslld ymm2,ymm0,15
  5557. vpaddd ymm5,ymm5,ymm7
  5558. vpxor ymm7,ymm3,ymm1
  5559. vpsrld ymm1,ymm0,19
  5560. vpxor ymm7,ymm7,ymm2
  5561. vpslld ymm2,ymm0,13
  5562. vpxor ymm7,ymm7,ymm1
  5563. vpxor ymm7,ymm7,ymm2
  5564. vpaddd ymm5,ymm5,ymm7
  5565. vpsrld ymm7,ymm10,6
  5566. vpslld ymm2,ymm10,26
  5567. vmovdqu YMMWORD PTR[(64-128)+rax],ymm5
  5568. vpaddd ymm5,ymm5,ymm13
  5569. vpsrld ymm1,ymm10,11
  5570. vpxor ymm7,ymm7,ymm2
  5571. vpslld ymm2,ymm10,21
  5572. vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp]
  5573. vpxor ymm7,ymm7,ymm1
  5574. vpsrld ymm1,ymm10,25
  5575. vpxor ymm7,ymm7,ymm2
  5576. vpslld ymm2,ymm10,7
  5577. vpandn ymm0,ymm10,ymm12
  5578. vpand ymm3,ymm10,ymm11
  5579. vpxor ymm7,ymm7,ymm1
  5580. vpsrld ymm13,ymm14,2
  5581. vpxor ymm7,ymm7,ymm2
  5582. vpslld ymm1,ymm14,30
  5583. vpxor ymm0,ymm0,ymm3
  5584. vpxor ymm3,ymm15,ymm14
  5585. vpxor ymm13,ymm13,ymm1
  5586. vpaddd ymm5,ymm5,ymm7
  5587. vpsrld ymm1,ymm14,13
  5588. vpslld ymm2,ymm14,19
  5589. vpaddd ymm5,ymm5,ymm0
  5590. vpand ymm4,ymm4,ymm3
  5591. vpxor ymm7,ymm13,ymm1
  5592. vpsrld ymm1,ymm14,22
  5593. vpxor ymm7,ymm7,ymm2
  5594. vpslld ymm2,ymm14,10
  5595. vpxor ymm13,ymm15,ymm4
  5596. vpaddd ymm9,ymm9,ymm5
  5597. vpxor ymm7,ymm7,ymm1
  5598. vpxor ymm7,ymm7,ymm2
  5599. vpaddd ymm13,ymm13,ymm5
  5600. vpaddd ymm13,ymm13,ymm7
  5601. vmovdqu ymm5,YMMWORD PTR[((128-128))+rax]
  5602. vpaddd ymm6,ymm6,YMMWORD PTR[((384-256-128))+rbx]
  5603. vpsrld ymm7,ymm5,3
  5604. vpsrld ymm1,ymm5,7
  5605. vpslld ymm2,ymm5,25
  5606. vpxor ymm7,ymm7,ymm1
  5607. vpsrld ymm1,ymm5,18
  5608. vpxor ymm7,ymm7,ymm2
  5609. vpslld ymm2,ymm5,14
  5610. vmovdqu ymm0,YMMWORD PTR[((32-128))+rax]
  5611. vpsrld ymm4,ymm0,10
  5612. vpxor ymm7,ymm7,ymm1
  5613. vpsrld ymm1,ymm0,17
  5614. vpxor ymm7,ymm7,ymm2
  5615. vpslld ymm2,ymm0,15
  5616. vpaddd ymm6,ymm6,ymm7
  5617. vpxor ymm7,ymm4,ymm1
  5618. vpsrld ymm1,ymm0,19
  5619. vpxor ymm7,ymm7,ymm2
  5620. vpslld ymm2,ymm0,13
  5621. vpxor ymm7,ymm7,ymm1
  5622. vpxor ymm7,ymm7,ymm2
  5623. vpaddd ymm6,ymm6,ymm7
  5624. vpsrld ymm7,ymm9,6
  5625. vpslld ymm2,ymm9,26
  5626. vmovdqu YMMWORD PTR[(96-128)+rax],ymm6
  5627. vpaddd ymm6,ymm6,ymm12
  5628. vpsrld ymm1,ymm9,11
  5629. vpxor ymm7,ymm7,ymm2
  5630. vpslld ymm2,ymm9,21
  5631. vpaddd ymm6,ymm6,YMMWORD PTR[((-32))+rbp]
  5632. vpxor ymm7,ymm7,ymm1
  5633. vpsrld ymm1,ymm9,25
  5634. vpxor ymm7,ymm7,ymm2
  5635. vpslld ymm2,ymm9,7
  5636. vpandn ymm0,ymm9,ymm11
  5637. vpand ymm4,ymm9,ymm10
  5638. vpxor ymm7,ymm7,ymm1
  5639. vpsrld ymm12,ymm13,2
  5640. vpxor ymm7,ymm7,ymm2
  5641. vpslld ymm1,ymm13,30
  5642. vpxor ymm0,ymm0,ymm4
  5643. vpxor ymm4,ymm14,ymm13
  5644. vpxor ymm12,ymm12,ymm1
  5645. vpaddd ymm6,ymm6,ymm7
  5646. vpsrld ymm1,ymm13,13
  5647. vpslld ymm2,ymm13,19
  5648. vpaddd ymm6,ymm6,ymm0
  5649. vpand ymm3,ymm3,ymm4
  5650. vpxor ymm7,ymm12,ymm1
  5651. vpsrld ymm1,ymm13,22
  5652. vpxor ymm7,ymm7,ymm2
  5653. vpslld ymm2,ymm13,10
  5654. vpxor ymm12,ymm14,ymm3
  5655. vpaddd ymm8,ymm8,ymm6
  5656. vpxor ymm7,ymm7,ymm1
  5657. vpxor ymm7,ymm7,ymm2
  5658. vpaddd ymm12,ymm12,ymm6
  5659. vpaddd ymm12,ymm12,ymm7
  5660. vmovdqu ymm6,YMMWORD PTR[((160-128))+rax]
  5661. vpaddd ymm5,ymm5,YMMWORD PTR[((416-256-128))+rbx]
  5662. vpsrld ymm7,ymm6,3
  5663. vpsrld ymm1,ymm6,7
  5664. vpslld ymm2,ymm6,25
  5665. vpxor ymm7,ymm7,ymm1
  5666. vpsrld ymm1,ymm6,18
  5667. vpxor ymm7,ymm7,ymm2
  5668. vpslld ymm2,ymm6,14
  5669. vmovdqu ymm0,YMMWORD PTR[((64-128))+rax]
  5670. vpsrld ymm3,ymm0,10
  5671. vpxor ymm7,ymm7,ymm1
  5672. vpsrld ymm1,ymm0,17
  5673. vpxor ymm7,ymm7,ymm2
  5674. vpslld ymm2,ymm0,15
  5675. vpaddd ymm5,ymm5,ymm7
  5676. vpxor ymm7,ymm3,ymm1
  5677. vpsrld ymm1,ymm0,19
  5678. vpxor ymm7,ymm7,ymm2
  5679. vpslld ymm2,ymm0,13
  5680. vpxor ymm7,ymm7,ymm1
  5681. vpxor ymm7,ymm7,ymm2
  5682. vpaddd ymm5,ymm5,ymm7
  5683. vpsrld ymm7,ymm8,6
  5684. vpslld ymm2,ymm8,26
  5685. vmovdqu YMMWORD PTR[(128-128)+rax],ymm5
  5686. vpaddd ymm5,ymm5,ymm11
  5687. vpsrld ymm1,ymm8,11
  5688. vpxor ymm7,ymm7,ymm2
  5689. vpslld ymm2,ymm8,21
  5690. vpaddd ymm5,ymm5,YMMWORD PTR[rbp]
  5691. vpxor ymm7,ymm7,ymm1
  5692. vpsrld ymm1,ymm8,25
  5693. vpxor ymm7,ymm7,ymm2
  5694. vpslld ymm2,ymm8,7
  5695. vpandn ymm0,ymm8,ymm10
  5696. vpand ymm3,ymm8,ymm9
  5697. vpxor ymm7,ymm7,ymm1
  5698. vpsrld ymm11,ymm12,2
  5699. vpxor ymm7,ymm7,ymm2
  5700. vpslld ymm1,ymm12,30
  5701. vpxor ymm0,ymm0,ymm3
  5702. vpxor ymm3,ymm13,ymm12
  5703. vpxor ymm11,ymm11,ymm1
  5704. vpaddd ymm5,ymm5,ymm7
  5705. vpsrld ymm1,ymm12,13
  5706. vpslld ymm2,ymm12,19
  5707. vpaddd ymm5,ymm5,ymm0
  5708. vpand ymm4,ymm4,ymm3
  5709. vpxor ymm7,ymm11,ymm1
  5710. vpsrld ymm1,ymm12,22
  5711. vpxor ymm7,ymm7,ymm2
  5712. vpslld ymm2,ymm12,10
  5713. vpxor ymm11,ymm13,ymm4
  5714. vpaddd ymm15,ymm15,ymm5
  5715. vpxor ymm7,ymm7,ymm1
  5716. vpxor ymm7,ymm7,ymm2
  5717. vpaddd ymm11,ymm11,ymm5
  5718. vpaddd ymm11,ymm11,ymm7
  5719. vmovdqu ymm5,YMMWORD PTR[((192-128))+rax]
  5720. vpaddd ymm6,ymm6,YMMWORD PTR[((448-256-128))+rbx]
  5721. vpsrld ymm7,ymm5,3
  5722. vpsrld ymm1,ymm5,7
  5723. vpslld ymm2,ymm5,25
  5724. vpxor ymm7,ymm7,ymm1
  5725. vpsrld ymm1,ymm5,18
  5726. vpxor ymm7,ymm7,ymm2
  5727. vpslld ymm2,ymm5,14
  5728. vmovdqu ymm0,YMMWORD PTR[((96-128))+rax]
  5729. vpsrld ymm4,ymm0,10
  5730. vpxor ymm7,ymm7,ymm1
  5731. vpsrld ymm1,ymm0,17
  5732. vpxor ymm7,ymm7,ymm2
  5733. vpslld ymm2,ymm0,15
  5734. vpaddd ymm6,ymm6,ymm7
  5735. vpxor ymm7,ymm4,ymm1
  5736. vpsrld ymm1,ymm0,19
  5737. vpxor ymm7,ymm7,ymm2
  5738. vpslld ymm2,ymm0,13
  5739. vpxor ymm7,ymm7,ymm1
  5740. vpxor ymm7,ymm7,ymm2
  5741. vpaddd ymm6,ymm6,ymm7
  5742. vpsrld ymm7,ymm15,6
  5743. vpslld ymm2,ymm15,26
  5744. vmovdqu YMMWORD PTR[(160-128)+rax],ymm6
  5745. vpaddd ymm6,ymm6,ymm10
  5746. vpsrld ymm1,ymm15,11
  5747. vpxor ymm7,ymm7,ymm2
  5748. vpslld ymm2,ymm15,21
  5749. vpaddd ymm6,ymm6,YMMWORD PTR[32+rbp]
  5750. vpxor ymm7,ymm7,ymm1
  5751. vpsrld ymm1,ymm15,25
  5752. vpxor ymm7,ymm7,ymm2
  5753. vpslld ymm2,ymm15,7
  5754. vpandn ymm0,ymm15,ymm9
  5755. vpand ymm4,ymm15,ymm8
  5756. vpxor ymm7,ymm7,ymm1
  5757. vpsrld ymm10,ymm11,2
  5758. vpxor ymm7,ymm7,ymm2
  5759. vpslld ymm1,ymm11,30
  5760. vpxor ymm0,ymm0,ymm4
  5761. vpxor ymm4,ymm12,ymm11
  5762. vpxor ymm10,ymm10,ymm1
  5763. vpaddd ymm6,ymm6,ymm7
  5764. vpsrld ymm1,ymm11,13
  5765. vpslld ymm2,ymm11,19
  5766. vpaddd ymm6,ymm6,ymm0
  5767. vpand ymm3,ymm3,ymm4
  5768. vpxor ymm7,ymm10,ymm1
  5769. vpsrld ymm1,ymm11,22
  5770. vpxor ymm7,ymm7,ymm2
  5771. vpslld ymm2,ymm11,10
  5772. vpxor ymm10,ymm12,ymm3
  5773. vpaddd ymm14,ymm14,ymm6
  5774. vpxor ymm7,ymm7,ymm1
  5775. vpxor ymm7,ymm7,ymm2
  5776. vpaddd ymm10,ymm10,ymm6
  5777. vpaddd ymm10,ymm10,ymm7
  5778. vmovdqu ymm6,YMMWORD PTR[((224-128))+rax]
  5779. vpaddd ymm5,ymm5,YMMWORD PTR[((480-256-128))+rbx]
  5780. vpsrld ymm7,ymm6,3
  5781. vpsrld ymm1,ymm6,7
  5782. vpslld ymm2,ymm6,25
  5783. vpxor ymm7,ymm7,ymm1
  5784. vpsrld ymm1,ymm6,18
  5785. vpxor ymm7,ymm7,ymm2
  5786. vpslld ymm2,ymm6,14
  5787. vmovdqu ymm0,YMMWORD PTR[((128-128))+rax]
  5788. vpsrld ymm3,ymm0,10
  5789. vpxor ymm7,ymm7,ymm1
  5790. vpsrld ymm1,ymm0,17
  5791. vpxor ymm7,ymm7,ymm2
  5792. vpslld ymm2,ymm0,15
  5793. vpaddd ymm5,ymm5,ymm7
  5794. vpxor ymm7,ymm3,ymm1
  5795. vpsrld ymm1,ymm0,19
  5796. vpxor ymm7,ymm7,ymm2
  5797. vpslld ymm2,ymm0,13
  5798. vpxor ymm7,ymm7,ymm1
  5799. vpxor ymm7,ymm7,ymm2
  5800. vpaddd ymm5,ymm5,ymm7
  5801. vpsrld ymm7,ymm14,6
  5802. vpslld ymm2,ymm14,26
  5803. vmovdqu YMMWORD PTR[(192-128)+rax],ymm5
  5804. vpaddd ymm5,ymm5,ymm9
  5805. vpsrld ymm1,ymm14,11
  5806. vpxor ymm7,ymm7,ymm2
  5807. vpslld ymm2,ymm14,21
  5808. vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp]
  5809. vpxor ymm7,ymm7,ymm1
  5810. vpsrld ymm1,ymm14,25
  5811. vpxor ymm7,ymm7,ymm2
  5812. vpslld ymm2,ymm14,7
  5813. vpandn ymm0,ymm14,ymm8
  5814. vpand ymm3,ymm14,ymm15
  5815. vpxor ymm7,ymm7,ymm1
  5816. vpsrld ymm9,ymm10,2
  5817. vpxor ymm7,ymm7,ymm2
  5818. vpslld ymm1,ymm10,30
  5819. vpxor ymm0,ymm0,ymm3
  5820. vpxor ymm3,ymm11,ymm10
  5821. vpxor ymm9,ymm9,ymm1
  5822. vpaddd ymm5,ymm5,ymm7
  5823. vpsrld ymm1,ymm10,13
  5824. vpslld ymm2,ymm10,19
  5825. vpaddd ymm5,ymm5,ymm0
  5826. vpand ymm4,ymm4,ymm3
  5827. vpxor ymm7,ymm9,ymm1
  5828. vpsrld ymm1,ymm10,22
  5829. vpxor ymm7,ymm7,ymm2
  5830. vpslld ymm2,ymm10,10
  5831. vpxor ymm9,ymm11,ymm4
  5832. vpaddd ymm13,ymm13,ymm5
  5833. vpxor ymm7,ymm7,ymm1
  5834. vpxor ymm7,ymm7,ymm2
  5835. vpaddd ymm9,ymm9,ymm5
  5836. vpaddd ymm9,ymm9,ymm7
  5837. vmovdqu ymm5,YMMWORD PTR[((256-256-128))+rbx]
  5838. vpaddd ymm6,ymm6,YMMWORD PTR[((0-128))+rax]
  5839. vpsrld ymm7,ymm5,3
  5840. vpsrld ymm1,ymm5,7
  5841. vpslld ymm2,ymm5,25
  5842. vpxor ymm7,ymm7,ymm1
  5843. vpsrld ymm1,ymm5,18
  5844. vpxor ymm7,ymm7,ymm2
  5845. vpslld ymm2,ymm5,14
  5846. vmovdqu ymm0,YMMWORD PTR[((160-128))+rax]
  5847. vpsrld ymm4,ymm0,10
  5848. vpxor ymm7,ymm7,ymm1
  5849. vpsrld ymm1,ymm0,17
  5850. vpxor ymm7,ymm7,ymm2
  5851. vpslld ymm2,ymm0,15
  5852. vpaddd ymm6,ymm6,ymm7
  5853. vpxor ymm7,ymm4,ymm1
  5854. vpsrld ymm1,ymm0,19
  5855. vpxor ymm7,ymm7,ymm2
  5856. vpslld ymm2,ymm0,13
  5857. vpxor ymm7,ymm7,ymm1
  5858. vpxor ymm7,ymm7,ymm2
  5859. vpaddd ymm6,ymm6,ymm7
  5860. vpsrld ymm7,ymm13,6
  5861. vpslld ymm2,ymm13,26
  5862. vmovdqu YMMWORD PTR[(224-128)+rax],ymm6
  5863. vpaddd ymm6,ymm6,ymm8
  5864. vpsrld ymm1,ymm13,11
  5865. vpxor ymm7,ymm7,ymm2
  5866. vpslld ymm2,ymm13,21
  5867. vpaddd ymm6,ymm6,YMMWORD PTR[96+rbp]
  5868. vpxor ymm7,ymm7,ymm1
  5869. vpsrld ymm1,ymm13,25
  5870. vpxor ymm7,ymm7,ymm2
  5871. vpslld ymm2,ymm13,7
  5872. vpandn ymm0,ymm13,ymm15
  5873. vpand ymm4,ymm13,ymm14
  5874. vpxor ymm7,ymm7,ymm1
  5875. vpsrld ymm8,ymm9,2
  5876. vpxor ymm7,ymm7,ymm2
  5877. vpslld ymm1,ymm9,30
  5878. vpxor ymm0,ymm0,ymm4
  5879. vpxor ymm4,ymm10,ymm9
  5880. vpxor ymm8,ymm8,ymm1
  5881. vpaddd ymm6,ymm6,ymm7
  5882. vpsrld ymm1,ymm9,13
  5883. vpslld ymm2,ymm9,19
  5884. vpaddd ymm6,ymm6,ymm0
  5885. vpand ymm3,ymm3,ymm4
  5886. vpxor ymm7,ymm8,ymm1
  5887. vpsrld ymm1,ymm9,22
  5888. vpxor ymm7,ymm7,ymm2
  5889. vpslld ymm2,ymm9,10
  5890. vpxor ymm8,ymm10,ymm3
  5891. vpaddd ymm12,ymm12,ymm6
  5892. vpxor ymm7,ymm7,ymm1
  5893. vpxor ymm7,ymm7,ymm2
  5894. vpaddd ymm8,ymm8,ymm6
  5895. vpaddd ymm8,ymm8,ymm7
  5896. add rbp,256
  5897. vmovdqu ymm6,YMMWORD PTR[((288-256-128))+rbx]
  5898. vpaddd ymm5,ymm5,YMMWORD PTR[((32-128))+rax]
  5899. vpsrld ymm7,ymm6,3
  5900. vpsrld ymm1,ymm6,7
  5901. vpslld ymm2,ymm6,25
  5902. vpxor ymm7,ymm7,ymm1
  5903. vpsrld ymm1,ymm6,18
  5904. vpxor ymm7,ymm7,ymm2
  5905. vpslld ymm2,ymm6,14
  5906. vmovdqu ymm0,YMMWORD PTR[((192-128))+rax]
  5907. vpsrld ymm3,ymm0,10
  5908. vpxor ymm7,ymm7,ymm1
  5909. vpsrld ymm1,ymm0,17
  5910. vpxor ymm7,ymm7,ymm2
  5911. vpslld ymm2,ymm0,15
  5912. vpaddd ymm5,ymm5,ymm7
  5913. vpxor ymm7,ymm3,ymm1
  5914. vpsrld ymm1,ymm0,19
  5915. vpxor ymm7,ymm7,ymm2
  5916. vpslld ymm2,ymm0,13
  5917. vpxor ymm7,ymm7,ymm1
  5918. vpxor ymm7,ymm7,ymm2
  5919. vpaddd ymm5,ymm5,ymm7
  5920. vpsrld ymm7,ymm12,6
  5921. vpslld ymm2,ymm12,26
  5922. vmovdqu YMMWORD PTR[(256-256-128)+rbx],ymm5
  5923. vpaddd ymm5,ymm5,ymm15
  5924. vpsrld ymm1,ymm12,11
  5925. vpxor ymm7,ymm7,ymm2
  5926. vpslld ymm2,ymm12,21
  5927. vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp]
  5928. vpxor ymm7,ymm7,ymm1
  5929. vpsrld ymm1,ymm12,25
  5930. vpxor ymm7,ymm7,ymm2
  5931. vpslld ymm2,ymm12,7
  5932. vpandn ymm0,ymm12,ymm14
  5933. vpand ymm3,ymm12,ymm13
  5934. vpxor ymm7,ymm7,ymm1
  5935. vpsrld ymm15,ymm8,2
  5936. vpxor ymm7,ymm7,ymm2
  5937. vpslld ymm1,ymm8,30
  5938. vpxor ymm0,ymm0,ymm3
  5939. vpxor ymm3,ymm9,ymm8
  5940. vpxor ymm15,ymm15,ymm1
  5941. vpaddd ymm5,ymm5,ymm7
  5942. vpsrld ymm1,ymm8,13
  5943. vpslld ymm2,ymm8,19
  5944. vpaddd ymm5,ymm5,ymm0
  5945. vpand ymm4,ymm4,ymm3
  5946. vpxor ymm7,ymm15,ymm1
  5947. vpsrld ymm1,ymm8,22
  5948. vpxor ymm7,ymm7,ymm2
  5949. vpslld ymm2,ymm8,10
  5950. vpxor ymm15,ymm9,ymm4
  5951. vpaddd ymm11,ymm11,ymm5
  5952. vpxor ymm7,ymm7,ymm1
  5953. vpxor ymm7,ymm7,ymm2
  5954. vpaddd ymm15,ymm15,ymm5
  5955. vpaddd ymm15,ymm15,ymm7
  5956. vmovdqu ymm5,YMMWORD PTR[((320-256-128))+rbx]
  5957. vpaddd ymm6,ymm6,YMMWORD PTR[((64-128))+rax]
  5958. vpsrld ymm7,ymm5,3
  5959. vpsrld ymm1,ymm5,7
  5960. vpslld ymm2,ymm5,25
  5961. vpxor ymm7,ymm7,ymm1
  5962. vpsrld ymm1,ymm5,18
  5963. vpxor ymm7,ymm7,ymm2
  5964. vpslld ymm2,ymm5,14
  5965. vmovdqu ymm0,YMMWORD PTR[((224-128))+rax]
  5966. vpsrld ymm4,ymm0,10
  5967. vpxor ymm7,ymm7,ymm1
  5968. vpsrld ymm1,ymm0,17
  5969. vpxor ymm7,ymm7,ymm2
  5970. vpslld ymm2,ymm0,15
  5971. vpaddd ymm6,ymm6,ymm7
  5972. vpxor ymm7,ymm4,ymm1
  5973. vpsrld ymm1,ymm0,19
  5974. vpxor ymm7,ymm7,ymm2
  5975. vpslld ymm2,ymm0,13
  5976. vpxor ymm7,ymm7,ymm1
  5977. vpxor ymm7,ymm7,ymm2
  5978. vpaddd ymm6,ymm6,ymm7
  5979. vpsrld ymm7,ymm11,6
  5980. vpslld ymm2,ymm11,26
  5981. vmovdqu YMMWORD PTR[(288-256-128)+rbx],ymm6
  5982. vpaddd ymm6,ymm6,ymm14
  5983. vpsrld ymm1,ymm11,11
  5984. vpxor ymm7,ymm7,ymm2
  5985. vpslld ymm2,ymm11,21
  5986. vpaddd ymm6,ymm6,YMMWORD PTR[((-96))+rbp]
  5987. vpxor ymm7,ymm7,ymm1
  5988. vpsrld ymm1,ymm11,25
  5989. vpxor ymm7,ymm7,ymm2
  5990. vpslld ymm2,ymm11,7
  5991. vpandn ymm0,ymm11,ymm13
  5992. vpand ymm4,ymm11,ymm12
  5993. vpxor ymm7,ymm7,ymm1
  5994. vpsrld ymm14,ymm15,2
  5995. vpxor ymm7,ymm7,ymm2
  5996. vpslld ymm1,ymm15,30
  5997. vpxor ymm0,ymm0,ymm4
  5998. vpxor ymm4,ymm8,ymm15
  5999. vpxor ymm14,ymm14,ymm1
  6000. vpaddd ymm6,ymm6,ymm7
  6001. vpsrld ymm1,ymm15,13
  6002. vpslld ymm2,ymm15,19
  6003. vpaddd ymm6,ymm6,ymm0
  6004. vpand ymm3,ymm3,ymm4
  6005. vpxor ymm7,ymm14,ymm1
  6006. vpsrld ymm1,ymm15,22
  6007. vpxor ymm7,ymm7,ymm2
  6008. vpslld ymm2,ymm15,10
  6009. vpxor ymm14,ymm8,ymm3
  6010. vpaddd ymm10,ymm10,ymm6
  6011. vpxor ymm7,ymm7,ymm1
  6012. vpxor ymm7,ymm7,ymm2
  6013. vpaddd ymm14,ymm14,ymm6
  6014. vpaddd ymm14,ymm14,ymm7
  6015. vmovdqu ymm6,YMMWORD PTR[((352-256-128))+rbx]
  6016. vpaddd ymm5,ymm5,YMMWORD PTR[((96-128))+rax]
  6017. vpsrld ymm7,ymm6,3
  6018. vpsrld ymm1,ymm6,7
  6019. vpslld ymm2,ymm6,25
  6020. vpxor ymm7,ymm7,ymm1
  6021. vpsrld ymm1,ymm6,18
  6022. vpxor ymm7,ymm7,ymm2
  6023. vpslld ymm2,ymm6,14
  6024. vmovdqu ymm0,YMMWORD PTR[((256-256-128))+rbx]
  6025. vpsrld ymm3,ymm0,10
  6026. vpxor ymm7,ymm7,ymm1
  6027. vpsrld ymm1,ymm0,17
  6028. vpxor ymm7,ymm7,ymm2
  6029. vpslld ymm2,ymm0,15
  6030. vpaddd ymm5,ymm5,ymm7
  6031. vpxor ymm7,ymm3,ymm1
  6032. vpsrld ymm1,ymm0,19
  6033. vpxor ymm7,ymm7,ymm2
  6034. vpslld ymm2,ymm0,13
  6035. vpxor ymm7,ymm7,ymm1
  6036. vpxor ymm7,ymm7,ymm2
  6037. vpaddd ymm5,ymm5,ymm7
  6038. vpsrld ymm7,ymm10,6
  6039. vpslld ymm2,ymm10,26
  6040. vmovdqu YMMWORD PTR[(320-256-128)+rbx],ymm5
  6041. vpaddd ymm5,ymm5,ymm13
  6042. vpsrld ymm1,ymm10,11
  6043. vpxor ymm7,ymm7,ymm2
  6044. vpslld ymm2,ymm10,21
  6045. vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp]
  6046. vpxor ymm7,ymm7,ymm1
  6047. vpsrld ymm1,ymm10,25
  6048. vpxor ymm7,ymm7,ymm2
  6049. vpslld ymm2,ymm10,7
  6050. vpandn ymm0,ymm10,ymm12
  6051. vpand ymm3,ymm10,ymm11
  6052. vpxor ymm7,ymm7,ymm1
  6053. vpsrld ymm13,ymm14,2
  6054. vpxor ymm7,ymm7,ymm2
  6055. vpslld ymm1,ymm14,30
  6056. vpxor ymm0,ymm0,ymm3
  6057. vpxor ymm3,ymm15,ymm14
  6058. vpxor ymm13,ymm13,ymm1
  6059. vpaddd ymm5,ymm5,ymm7
  6060. vpsrld ymm1,ymm14,13
  6061. vpslld ymm2,ymm14,19
  6062. vpaddd ymm5,ymm5,ymm0
  6063. vpand ymm4,ymm4,ymm3
  6064. vpxor ymm7,ymm13,ymm1
  6065. vpsrld ymm1,ymm14,22
  6066. vpxor ymm7,ymm7,ymm2
  6067. vpslld ymm2,ymm14,10
  6068. vpxor ymm13,ymm15,ymm4
  6069. vpaddd ymm9,ymm9,ymm5
  6070. vpxor ymm7,ymm7,ymm1
  6071. vpxor ymm7,ymm7,ymm2
  6072. vpaddd ymm13,ymm13,ymm5
  6073. vpaddd ymm13,ymm13,ymm7
  6074. vmovdqu ymm5,YMMWORD PTR[((384-256-128))+rbx]
  6075. vpaddd ymm6,ymm6,YMMWORD PTR[((128-128))+rax]
  6076. vpsrld ymm7,ymm5,3
  6077. vpsrld ymm1,ymm5,7
  6078. vpslld ymm2,ymm5,25
  6079. vpxor ymm7,ymm7,ymm1
  6080. vpsrld ymm1,ymm5,18
  6081. vpxor ymm7,ymm7,ymm2
  6082. vpslld ymm2,ymm5,14
  6083. vmovdqu ymm0,YMMWORD PTR[((288-256-128))+rbx]
  6084. vpsrld ymm4,ymm0,10
  6085. vpxor ymm7,ymm7,ymm1
  6086. vpsrld ymm1,ymm0,17
  6087. vpxor ymm7,ymm7,ymm2
  6088. vpslld ymm2,ymm0,15
  6089. vpaddd ymm6,ymm6,ymm7
  6090. vpxor ymm7,ymm4,ymm1
  6091. vpsrld ymm1,ymm0,19
  6092. vpxor ymm7,ymm7,ymm2
  6093. vpslld ymm2,ymm0,13
  6094. vpxor ymm7,ymm7,ymm1
  6095. vpxor ymm7,ymm7,ymm2
  6096. vpaddd ymm6,ymm6,ymm7
  6097. vpsrld ymm7,ymm9,6
  6098. vpslld ymm2,ymm9,26
  6099. vmovdqu YMMWORD PTR[(352-256-128)+rbx],ymm6
  6100. vpaddd ymm6,ymm6,ymm12
  6101. vpsrld ymm1,ymm9,11
  6102. vpxor ymm7,ymm7,ymm2
  6103. vpslld ymm2,ymm9,21
  6104. vpaddd ymm6,ymm6,YMMWORD PTR[((-32))+rbp]
  6105. vpxor ymm7,ymm7,ymm1
  6106. vpsrld ymm1,ymm9,25
  6107. vpxor ymm7,ymm7,ymm2
  6108. vpslld ymm2,ymm9,7
  6109. vpandn ymm0,ymm9,ymm11
  6110. vpand ymm4,ymm9,ymm10
  6111. vpxor ymm7,ymm7,ymm1
  6112. vpsrld ymm12,ymm13,2
  6113. vpxor ymm7,ymm7,ymm2
  6114. vpslld ymm1,ymm13,30
  6115. vpxor ymm0,ymm0,ymm4
  6116. vpxor ymm4,ymm14,ymm13
  6117. vpxor ymm12,ymm12,ymm1
  6118. vpaddd ymm6,ymm6,ymm7
  6119. vpsrld ymm1,ymm13,13
  6120. vpslld ymm2,ymm13,19
  6121. vpaddd ymm6,ymm6,ymm0
  6122. vpand ymm3,ymm3,ymm4
  6123. vpxor ymm7,ymm12,ymm1
  6124. vpsrld ymm1,ymm13,22
  6125. vpxor ymm7,ymm7,ymm2
  6126. vpslld ymm2,ymm13,10
  6127. vpxor ymm12,ymm14,ymm3
  6128. vpaddd ymm8,ymm8,ymm6
  6129. vpxor ymm7,ymm7,ymm1
  6130. vpxor ymm7,ymm7,ymm2
  6131. vpaddd ymm12,ymm12,ymm6
  6132. vpaddd ymm12,ymm12,ymm7
  6133. vmovdqu ymm6,YMMWORD PTR[((416-256-128))+rbx]
  6134. vpaddd ymm5,ymm5,YMMWORD PTR[((160-128))+rax]
  6135. vpsrld ymm7,ymm6,3
  6136. vpsrld ymm1,ymm6,7
  6137. vpslld ymm2,ymm6,25
  6138. vpxor ymm7,ymm7,ymm1
  6139. vpsrld ymm1,ymm6,18
  6140. vpxor ymm7,ymm7,ymm2
  6141. vpslld ymm2,ymm6,14
  6142. vmovdqu ymm0,YMMWORD PTR[((320-256-128))+rbx]
  6143. vpsrld ymm3,ymm0,10
  6144. vpxor ymm7,ymm7,ymm1
  6145. vpsrld ymm1,ymm0,17
  6146. vpxor ymm7,ymm7,ymm2
  6147. vpslld ymm2,ymm0,15
  6148. vpaddd ymm5,ymm5,ymm7
  6149. vpxor ymm7,ymm3,ymm1
  6150. vpsrld ymm1,ymm0,19
  6151. vpxor ymm7,ymm7,ymm2
  6152. vpslld ymm2,ymm0,13
  6153. vpxor ymm7,ymm7,ymm1
  6154. vpxor ymm7,ymm7,ymm2
  6155. vpaddd ymm5,ymm5,ymm7
  6156. vpsrld ymm7,ymm8,6
  6157. vpslld ymm2,ymm8,26
  6158. vmovdqu YMMWORD PTR[(384-256-128)+rbx],ymm5
  6159. vpaddd ymm5,ymm5,ymm11
  6160. vpsrld ymm1,ymm8,11
  6161. vpxor ymm7,ymm7,ymm2
  6162. vpslld ymm2,ymm8,21
  6163. vpaddd ymm5,ymm5,YMMWORD PTR[rbp]
  6164. vpxor ymm7,ymm7,ymm1
  6165. vpsrld ymm1,ymm8,25
  6166. vpxor ymm7,ymm7,ymm2
  6167. vpslld ymm2,ymm8,7
  6168. vpandn ymm0,ymm8,ymm10
  6169. vpand ymm3,ymm8,ymm9
  6170. vpxor ymm7,ymm7,ymm1
  6171. vpsrld ymm11,ymm12,2
  6172. vpxor ymm7,ymm7,ymm2
  6173. vpslld ymm1,ymm12,30
  6174. vpxor ymm0,ymm0,ymm3
  6175. vpxor ymm3,ymm13,ymm12
  6176. vpxor ymm11,ymm11,ymm1
  6177. vpaddd ymm5,ymm5,ymm7
  6178. vpsrld ymm1,ymm12,13
  6179. vpslld ymm2,ymm12,19
  6180. vpaddd ymm5,ymm5,ymm0
  6181. vpand ymm4,ymm4,ymm3
  6182. vpxor ymm7,ymm11,ymm1
  6183. vpsrld ymm1,ymm12,22
  6184. vpxor ymm7,ymm7,ymm2
  6185. vpslld ymm2,ymm12,10
  6186. vpxor ymm11,ymm13,ymm4
  6187. vpaddd ymm15,ymm15,ymm5
  6188. vpxor ymm7,ymm7,ymm1
  6189. vpxor ymm7,ymm7,ymm2
  6190. vpaddd ymm11,ymm11,ymm5
  6191. vpaddd ymm11,ymm11,ymm7
  6192. vmovdqu ymm5,YMMWORD PTR[((448-256-128))+rbx]
  6193. vpaddd ymm6,ymm6,YMMWORD PTR[((192-128))+rax]
  6194. vpsrld ymm7,ymm5,3
  6195. vpsrld ymm1,ymm5,7
  6196. vpslld ymm2,ymm5,25
  6197. vpxor ymm7,ymm7,ymm1
  6198. vpsrld ymm1,ymm5,18
  6199. vpxor ymm7,ymm7,ymm2
  6200. vpslld ymm2,ymm5,14
  6201. vmovdqu ymm0,YMMWORD PTR[((352-256-128))+rbx]
  6202. vpsrld ymm4,ymm0,10
  6203. vpxor ymm7,ymm7,ymm1
  6204. vpsrld ymm1,ymm0,17
  6205. vpxor ymm7,ymm7,ymm2
  6206. vpslld ymm2,ymm0,15
  6207. vpaddd ymm6,ymm6,ymm7
  6208. vpxor ymm7,ymm4,ymm1
  6209. vpsrld ymm1,ymm0,19
  6210. vpxor ymm7,ymm7,ymm2
  6211. vpslld ymm2,ymm0,13
  6212. vpxor ymm7,ymm7,ymm1
  6213. vpxor ymm7,ymm7,ymm2
  6214. vpaddd ymm6,ymm6,ymm7
  6215. vpsrld ymm7,ymm15,6
  6216. vpslld ymm2,ymm15,26
  6217. vmovdqu YMMWORD PTR[(416-256-128)+rbx],ymm6
  6218. vpaddd ymm6,ymm6,ymm10
  6219. vpsrld ymm1,ymm15,11
  6220. vpxor ymm7,ymm7,ymm2
  6221. vpslld ymm2,ymm15,21
  6222. vpaddd ymm6,ymm6,YMMWORD PTR[32+rbp]
  6223. vpxor ymm7,ymm7,ymm1
  6224. vpsrld ymm1,ymm15,25
  6225. vpxor ymm7,ymm7,ymm2
  6226. vpslld ymm2,ymm15,7
  6227. vpandn ymm0,ymm15,ymm9
  6228. vpand ymm4,ymm15,ymm8
  6229. vpxor ymm7,ymm7,ymm1
  6230. vpsrld ymm10,ymm11,2
  6231. vpxor ymm7,ymm7,ymm2
  6232. vpslld ymm1,ymm11,30
  6233. vpxor ymm0,ymm0,ymm4
  6234. vpxor ymm4,ymm12,ymm11
  6235. vpxor ymm10,ymm10,ymm1
  6236. vpaddd ymm6,ymm6,ymm7
  6237. vpsrld ymm1,ymm11,13
  6238. vpslld ymm2,ymm11,19
  6239. vpaddd ymm6,ymm6,ymm0
  6240. vpand ymm3,ymm3,ymm4
  6241. vpxor ymm7,ymm10,ymm1
  6242. vpsrld ymm1,ymm11,22
  6243. vpxor ymm7,ymm7,ymm2
  6244. vpslld ymm2,ymm11,10
  6245. vpxor ymm10,ymm12,ymm3
  6246. vpaddd ymm14,ymm14,ymm6
  6247. vpxor ymm7,ymm7,ymm1
  6248. vpxor ymm7,ymm7,ymm2
  6249. vpaddd ymm10,ymm10,ymm6
  6250. vpaddd ymm10,ymm10,ymm7
  6251. vmovdqu ymm6,YMMWORD PTR[((480-256-128))+rbx]
  6252. vpaddd ymm5,ymm5,YMMWORD PTR[((224-128))+rax]
  6253. vpsrld ymm7,ymm6,3
  6254. vpsrld ymm1,ymm6,7
  6255. vpslld ymm2,ymm6,25
  6256. vpxor ymm7,ymm7,ymm1
  6257. vpsrld ymm1,ymm6,18
  6258. vpxor ymm7,ymm7,ymm2
  6259. vpslld ymm2,ymm6,14
  6260. vmovdqu ymm0,YMMWORD PTR[((384-256-128))+rbx]
  6261. vpsrld ymm3,ymm0,10
  6262. vpxor ymm7,ymm7,ymm1
  6263. vpsrld ymm1,ymm0,17
  6264. vpxor ymm7,ymm7,ymm2
  6265. vpslld ymm2,ymm0,15
  6266. vpaddd ymm5,ymm5,ymm7
  6267. vpxor ymm7,ymm3,ymm1
  6268. vpsrld ymm1,ymm0,19
  6269. vpxor ymm7,ymm7,ymm2
  6270. vpslld ymm2,ymm0,13
  6271. vpxor ymm7,ymm7,ymm1
  6272. vpxor ymm7,ymm7,ymm2
  6273. vpaddd ymm5,ymm5,ymm7
  6274. vpsrld ymm7,ymm14,6
  6275. vpslld ymm2,ymm14,26
  6276. vmovdqu YMMWORD PTR[(448-256-128)+rbx],ymm5
  6277. vpaddd ymm5,ymm5,ymm9
  6278. vpsrld ymm1,ymm14,11
  6279. vpxor ymm7,ymm7,ymm2
  6280. vpslld ymm2,ymm14,21
  6281. vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp]
  6282. vpxor ymm7,ymm7,ymm1
  6283. vpsrld ymm1,ymm14,25
  6284. vpxor ymm7,ymm7,ymm2
  6285. vpslld ymm2,ymm14,7
  6286. vpandn ymm0,ymm14,ymm8
  6287. vpand ymm3,ymm14,ymm15
  6288. vpxor ymm7,ymm7,ymm1
  6289. vpsrld ymm9,ymm10,2
  6290. vpxor ymm7,ymm7,ymm2
  6291. vpslld ymm1,ymm10,30
  6292. vpxor ymm0,ymm0,ymm3
  6293. vpxor ymm3,ymm11,ymm10
  6294. vpxor ymm9,ymm9,ymm1
  6295. vpaddd ymm5,ymm5,ymm7
  6296. vpsrld ymm1,ymm10,13
  6297. vpslld ymm2,ymm10,19
  6298. vpaddd ymm5,ymm5,ymm0
  6299. vpand ymm4,ymm4,ymm3
  6300. vpxor ymm7,ymm9,ymm1
  6301. vpsrld ymm1,ymm10,22
  6302. vpxor ymm7,ymm7,ymm2
  6303. vpslld ymm2,ymm10,10
  6304. vpxor ymm9,ymm11,ymm4
  6305. vpaddd ymm13,ymm13,ymm5
  6306. vpxor ymm7,ymm7,ymm1
  6307. vpxor ymm7,ymm7,ymm2
  6308. vpaddd ymm9,ymm9,ymm5
  6309. vpaddd ymm9,ymm9,ymm7
  6310. vmovdqu ymm5,YMMWORD PTR[((0-128))+rax]
  6311. vpaddd ymm6,ymm6,YMMWORD PTR[((256-256-128))+rbx]
  6312. vpsrld ymm7,ymm5,3
  6313. vpsrld ymm1,ymm5,7
  6314. vpslld ymm2,ymm5,25
  6315. vpxor ymm7,ymm7,ymm1
  6316. vpsrld ymm1,ymm5,18
  6317. vpxor ymm7,ymm7,ymm2
  6318. vpslld ymm2,ymm5,14
  6319. vmovdqu ymm0,YMMWORD PTR[((416-256-128))+rbx]
  6320. vpsrld ymm4,ymm0,10
  6321. vpxor ymm7,ymm7,ymm1
  6322. vpsrld ymm1,ymm0,17
  6323. vpxor ymm7,ymm7,ymm2
  6324. vpslld ymm2,ymm0,15
  6325. vpaddd ymm6,ymm6,ymm7
  6326. vpxor ymm7,ymm4,ymm1
  6327. vpsrld ymm1,ymm0,19
  6328. vpxor ymm7,ymm7,ymm2
  6329. vpslld ymm2,ymm0,13
  6330. vpxor ymm7,ymm7,ymm1
  6331. vpxor ymm7,ymm7,ymm2
  6332. vpaddd ymm6,ymm6,ymm7
  6333. vpsrld ymm7,ymm13,6
  6334. vpslld ymm2,ymm13,26
  6335. vmovdqu YMMWORD PTR[(480-256-128)+rbx],ymm6
  6336. vpaddd ymm6,ymm6,ymm8
  6337. vpsrld ymm1,ymm13,11
  6338. vpxor ymm7,ymm7,ymm2
  6339. vpslld ymm2,ymm13,21
  6340. vpaddd ymm6,ymm6,YMMWORD PTR[96+rbp]
  6341. vpxor ymm7,ymm7,ymm1
  6342. vpsrld ymm1,ymm13,25
  6343. vpxor ymm7,ymm7,ymm2
  6344. vpslld ymm2,ymm13,7
  6345. vpandn ymm0,ymm13,ymm15
  6346. vpand ymm4,ymm13,ymm14
  6347. vpxor ymm7,ymm7,ymm1
  6348. vpsrld ymm8,ymm9,2
  6349. vpxor ymm7,ymm7,ymm2
  6350. vpslld ymm1,ymm9,30
  6351. vpxor ymm0,ymm0,ymm4
  6352. vpxor ymm4,ymm10,ymm9
  6353. vpxor ymm8,ymm8,ymm1
  6354. vpaddd ymm6,ymm6,ymm7
  6355. vpsrld ymm1,ymm9,13
  6356. vpslld ymm2,ymm9,19
  6357. vpaddd ymm6,ymm6,ymm0
  6358. vpand ymm3,ymm3,ymm4
  6359. vpxor ymm7,ymm8,ymm1
  6360. vpsrld ymm1,ymm9,22
  6361. vpxor ymm7,ymm7,ymm2
  6362. vpslld ymm2,ymm9,10
  6363. vpxor ymm8,ymm10,ymm3
  6364. vpaddd ymm12,ymm12,ymm6
  6365. vpxor ymm7,ymm7,ymm1
  6366. vpxor ymm7,ymm7,ymm2
  6367. vpaddd ymm8,ymm8,ymm6
  6368. vpaddd ymm8,ymm8,ymm7
  6369. add rbp,256
  6370. dec ecx
  6371. jnz $L$oop_16_xx_avx2
  6372. mov ecx,1
  6373. lea rbx,QWORD PTR[512+rsp]
  6374. lea rbp,QWORD PTR[((K256+128))]
  6375. cmp ecx,DWORD PTR[rbx]
  6376. cmovge r12,rbp
  6377. cmp ecx,DWORD PTR[4+rbx]
  6378. cmovge r13,rbp
  6379. cmp ecx,DWORD PTR[8+rbx]
  6380. cmovge r14,rbp
  6381. cmp ecx,DWORD PTR[12+rbx]
  6382. cmovge r15,rbp
  6383. cmp ecx,DWORD PTR[16+rbx]
  6384. cmovge r8,rbp
  6385. cmp ecx,DWORD PTR[20+rbx]
  6386. cmovge r9,rbp
  6387. cmp ecx,DWORD PTR[24+rbx]
  6388. cmovge r10,rbp
  6389. cmp ecx,DWORD PTR[28+rbx]
  6390. cmovge r11,rbp
  6391. vmovdqa ymm7,YMMWORD PTR[rbx]
  6392. vpxor ymm0,ymm0,ymm0
  6393. vmovdqa ymm6,ymm7
  6394. vpcmpgtd ymm6,ymm6,ymm0
  6395. vpaddd ymm7,ymm7,ymm6
  6396. vmovdqu ymm0,YMMWORD PTR[((0-128))+rdi]
  6397. vpand ymm8,ymm8,ymm6
  6398. vmovdqu ymm1,YMMWORD PTR[((32-128))+rdi]
  6399. vpand ymm9,ymm9,ymm6
  6400. vmovdqu ymm2,YMMWORD PTR[((64-128))+rdi]
  6401. vpand ymm10,ymm10,ymm6
  6402. vmovdqu ymm5,YMMWORD PTR[((96-128))+rdi]
  6403. vpand ymm11,ymm11,ymm6
  6404. vpaddd ymm8,ymm8,ymm0
  6405. vmovdqu ymm0,YMMWORD PTR[((128-128))+rdi]
  6406. vpand ymm12,ymm12,ymm6
  6407. vpaddd ymm9,ymm9,ymm1
  6408. vmovdqu ymm1,YMMWORD PTR[((160-128))+rdi]
  6409. vpand ymm13,ymm13,ymm6
  6410. vpaddd ymm10,ymm10,ymm2
  6411. vmovdqu ymm2,YMMWORD PTR[((192-128))+rdi]
  6412. vpand ymm14,ymm14,ymm6
  6413. vpaddd ymm11,ymm11,ymm5
  6414. vmovdqu ymm5,YMMWORD PTR[((224-128))+rdi]
  6415. vpand ymm15,ymm15,ymm6
  6416. vpaddd ymm12,ymm12,ymm0
  6417. vpaddd ymm13,ymm13,ymm1
  6418. vmovdqu YMMWORD PTR[(0-128)+rdi],ymm8
  6419. vpaddd ymm14,ymm14,ymm2
  6420. vmovdqu YMMWORD PTR[(32-128)+rdi],ymm9
  6421. vpaddd ymm15,ymm15,ymm5
  6422. vmovdqu YMMWORD PTR[(64-128)+rdi],ymm10
  6423. vmovdqu YMMWORD PTR[(96-128)+rdi],ymm11
  6424. vmovdqu YMMWORD PTR[(128-128)+rdi],ymm12
  6425. vmovdqu YMMWORD PTR[(160-128)+rdi],ymm13
  6426. vmovdqu YMMWORD PTR[(192-128)+rdi],ymm14
  6427. vmovdqu YMMWORD PTR[(224-128)+rdi],ymm15
  6428. vmovdqu YMMWORD PTR[rbx],ymm7
  6429. lea rbx,QWORD PTR[((256+128))+rsp]
  6430. vmovdqu ymm6,YMMWORD PTR[$L$pbswap]
  6431. dec edx
  6432. jnz $L$oop_avx2
  6433. $L$done_avx2::
  6434. mov rax,QWORD PTR[544+rsp]
  6435. vzeroupper
  6436. movaps xmm6,XMMWORD PTR[((-216))+rax]
  6437. movaps xmm7,XMMWORD PTR[((-200))+rax]
  6438. movaps xmm8,XMMWORD PTR[((-184))+rax]
  6439. movaps xmm9,XMMWORD PTR[((-168))+rax]
  6440. movaps xmm10,XMMWORD PTR[((-152))+rax]
  6441. movaps xmm11,XMMWORD PTR[((-136))+rax]
  6442. movaps xmm12,XMMWORD PTR[((-120))+rax]
  6443. movaps xmm13,XMMWORD PTR[((-104))+rax]
  6444. movaps xmm14,XMMWORD PTR[((-88))+rax]
  6445. movaps xmm15,XMMWORD PTR[((-72))+rax]
  6446. mov r15,QWORD PTR[((-48))+rax]
  6447. mov r14,QWORD PTR[((-40))+rax]
  6448. mov r13,QWORD PTR[((-32))+rax]
  6449. mov r12,QWORD PTR[((-24))+rax]
  6450. mov rbp,QWORD PTR[((-16))+rax]
  6451. mov rbx,QWORD PTR[((-8))+rax]
  6452. lea rsp,QWORD PTR[rax]
  6453. $L$epilogue_avx2::
  6454. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  6455. mov rsi,QWORD PTR[16+rsp]
  6456. DB 0F3h,0C3h ;repret
  6457. $L$SEH_end_sha256_multi_block_avx2::
  6458. sha256_multi_block_avx2 ENDP
  6459. ALIGN 256
  6460. K256::
  6461. DD 1116352408,1116352408,1116352408,1116352408
  6462. DD 1116352408,1116352408,1116352408,1116352408
  6463. DD 1899447441,1899447441,1899447441,1899447441
  6464. DD 1899447441,1899447441,1899447441,1899447441
  6465. DD 3049323471,3049323471,3049323471,3049323471
  6466. DD 3049323471,3049323471,3049323471,3049323471
  6467. DD 3921009573,3921009573,3921009573,3921009573
  6468. DD 3921009573,3921009573,3921009573,3921009573
  6469. DD 961987163,961987163,961987163,961987163
  6470. DD 961987163,961987163,961987163,961987163
  6471. DD 1508970993,1508970993,1508970993,1508970993
  6472. DD 1508970993,1508970993,1508970993,1508970993
  6473. DD 2453635748,2453635748,2453635748,2453635748
  6474. DD 2453635748,2453635748,2453635748,2453635748
  6475. DD 2870763221,2870763221,2870763221,2870763221
  6476. DD 2870763221,2870763221,2870763221,2870763221
  6477. DD 3624381080,3624381080,3624381080,3624381080
  6478. DD 3624381080,3624381080,3624381080,3624381080
  6479. DD 310598401,310598401,310598401,310598401
  6480. DD 310598401,310598401,310598401,310598401
  6481. DD 607225278,607225278,607225278,607225278
  6482. DD 607225278,607225278,607225278,607225278
  6483. DD 1426881987,1426881987,1426881987,1426881987
  6484. DD 1426881987,1426881987,1426881987,1426881987
  6485. DD 1925078388,1925078388,1925078388,1925078388
  6486. DD 1925078388,1925078388,1925078388,1925078388
  6487. DD 2162078206,2162078206,2162078206,2162078206
  6488. DD 2162078206,2162078206,2162078206,2162078206
  6489. DD 2614888103,2614888103,2614888103,2614888103
  6490. DD 2614888103,2614888103,2614888103,2614888103
  6491. DD 3248222580,3248222580,3248222580,3248222580
  6492. DD 3248222580,3248222580,3248222580,3248222580
  6493. DD 3835390401,3835390401,3835390401,3835390401
  6494. DD 3835390401,3835390401,3835390401,3835390401
  6495. DD 4022224774,4022224774,4022224774,4022224774
  6496. DD 4022224774,4022224774,4022224774,4022224774
  6497. DD 264347078,264347078,264347078,264347078
  6498. DD 264347078,264347078,264347078,264347078
  6499. DD 604807628,604807628,604807628,604807628
  6500. DD 604807628,604807628,604807628,604807628
  6501. DD 770255983,770255983,770255983,770255983
  6502. DD 770255983,770255983,770255983,770255983
  6503. DD 1249150122,1249150122,1249150122,1249150122
  6504. DD 1249150122,1249150122,1249150122,1249150122
  6505. DD 1555081692,1555081692,1555081692,1555081692
  6506. DD 1555081692,1555081692,1555081692,1555081692
  6507. DD 1996064986,1996064986,1996064986,1996064986
  6508. DD 1996064986,1996064986,1996064986,1996064986
  6509. DD 2554220882,2554220882,2554220882,2554220882
  6510. DD 2554220882,2554220882,2554220882,2554220882
  6511. DD 2821834349,2821834349,2821834349,2821834349
  6512. DD 2821834349,2821834349,2821834349,2821834349
  6513. DD 2952996808,2952996808,2952996808,2952996808
  6514. DD 2952996808,2952996808,2952996808,2952996808
  6515. DD 3210313671,3210313671,3210313671,3210313671
  6516. DD 3210313671,3210313671,3210313671,3210313671
  6517. DD 3336571891,3336571891,3336571891,3336571891
  6518. DD 3336571891,3336571891,3336571891,3336571891
  6519. DD 3584528711,3584528711,3584528711,3584528711
  6520. DD 3584528711,3584528711,3584528711,3584528711
  6521. DD 113926993,113926993,113926993,113926993
  6522. DD 113926993,113926993,113926993,113926993
  6523. DD 338241895,338241895,338241895,338241895
  6524. DD 338241895,338241895,338241895,338241895
  6525. DD 666307205,666307205,666307205,666307205
  6526. DD 666307205,666307205,666307205,666307205
  6527. DD 773529912,773529912,773529912,773529912
  6528. DD 773529912,773529912,773529912,773529912
  6529. DD 1294757372,1294757372,1294757372,1294757372
  6530. DD 1294757372,1294757372,1294757372,1294757372
  6531. DD 1396182291,1396182291,1396182291,1396182291
  6532. DD 1396182291,1396182291,1396182291,1396182291
  6533. DD 1695183700,1695183700,1695183700,1695183700
  6534. DD 1695183700,1695183700,1695183700,1695183700
  6535. DD 1986661051,1986661051,1986661051,1986661051
  6536. DD 1986661051,1986661051,1986661051,1986661051
  6537. DD 2177026350,2177026350,2177026350,2177026350
  6538. DD 2177026350,2177026350,2177026350,2177026350
  6539. DD 2456956037,2456956037,2456956037,2456956037
  6540. DD 2456956037,2456956037,2456956037,2456956037
  6541. DD 2730485921,2730485921,2730485921,2730485921
  6542. DD 2730485921,2730485921,2730485921,2730485921
  6543. DD 2820302411,2820302411,2820302411,2820302411
  6544. DD 2820302411,2820302411,2820302411,2820302411
  6545. DD 3259730800,3259730800,3259730800,3259730800
  6546. DD 3259730800,3259730800,3259730800,3259730800
  6547. DD 3345764771,3345764771,3345764771,3345764771
  6548. DD 3345764771,3345764771,3345764771,3345764771
  6549. DD 3516065817,3516065817,3516065817,3516065817
  6550. DD 3516065817,3516065817,3516065817,3516065817
  6551. DD 3600352804,3600352804,3600352804,3600352804
  6552. DD 3600352804,3600352804,3600352804,3600352804
  6553. DD 4094571909,4094571909,4094571909,4094571909
  6554. DD 4094571909,4094571909,4094571909,4094571909
  6555. DD 275423344,275423344,275423344,275423344
  6556. DD 275423344,275423344,275423344,275423344
  6557. DD 430227734,430227734,430227734,430227734
  6558. DD 430227734,430227734,430227734,430227734
  6559. DD 506948616,506948616,506948616,506948616
  6560. DD 506948616,506948616,506948616,506948616
  6561. DD 659060556,659060556,659060556,659060556
  6562. DD 659060556,659060556,659060556,659060556
  6563. DD 883997877,883997877,883997877,883997877
  6564. DD 883997877,883997877,883997877,883997877
  6565. DD 958139571,958139571,958139571,958139571
  6566. DD 958139571,958139571,958139571,958139571
  6567. DD 1322822218,1322822218,1322822218,1322822218
  6568. DD 1322822218,1322822218,1322822218,1322822218
  6569. DD 1537002063,1537002063,1537002063,1537002063
  6570. DD 1537002063,1537002063,1537002063,1537002063
  6571. DD 1747873779,1747873779,1747873779,1747873779
  6572. DD 1747873779,1747873779,1747873779,1747873779
  6573. DD 1955562222,1955562222,1955562222,1955562222
  6574. DD 1955562222,1955562222,1955562222,1955562222
  6575. DD 2024104815,2024104815,2024104815,2024104815
  6576. DD 2024104815,2024104815,2024104815,2024104815
  6577. DD 2227730452,2227730452,2227730452,2227730452
  6578. DD 2227730452,2227730452,2227730452,2227730452
  6579. DD 2361852424,2361852424,2361852424,2361852424
  6580. DD 2361852424,2361852424,2361852424,2361852424
  6581. DD 2428436474,2428436474,2428436474,2428436474
  6582. DD 2428436474,2428436474,2428436474,2428436474
  6583. DD 2756734187,2756734187,2756734187,2756734187
  6584. DD 2756734187,2756734187,2756734187,2756734187
  6585. DD 3204031479,3204031479,3204031479,3204031479
  6586. DD 3204031479,3204031479,3204031479,3204031479
  6587. DD 3329325298,3329325298,3329325298,3329325298
  6588. DD 3329325298,3329325298,3329325298,3329325298
  6589. $L$pbswap::
  6590. DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
  6591. DD 000010203h,004050607h,008090a0bh,00c0d0e0fh
  6592. K256_shaext::
  6593. DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h
  6594. DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h
  6595. DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h
  6596. DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h
  6597. DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch
  6598. DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah
  6599. DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h
  6600. DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h
  6601. DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h
  6602. DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h
  6603. DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h
  6604. DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h
  6605. DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h
  6606. DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h
  6607. DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h
  6608. DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h
  6609. DB 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111
  6610. DB 99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114
  6611. DB 32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
  6612. DB 65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112
  6613. DB 101,110,115,115,108,46,111,114,103,62,0
  6614. EXTERN __imp_RtlVirtualUnwind:NEAR
  6615. ALIGN 16
  6616. se_handler PROC PRIVATE
  6617. push rsi
  6618. push rdi
  6619. push rbx
  6620. push rbp
  6621. push r12
  6622. push r13
  6623. push r14
  6624. push r15
  6625. pushfq
  6626. sub rsp,64
  6627. mov rax,QWORD PTR[120+r8]
  6628. mov rbx,QWORD PTR[248+r8]
  6629. mov rsi,QWORD PTR[8+r9]
  6630. mov r11,QWORD PTR[56+r9]
  6631. mov r10d,DWORD PTR[r11]
  6632. lea r10,QWORD PTR[r10*1+rsi]
  6633. cmp rbx,r10
  6634. jb $L$in_prologue
  6635. mov rax,QWORD PTR[152+r8]
  6636. mov r10d,DWORD PTR[4+r11]
  6637. lea r10,QWORD PTR[r10*1+rsi]
  6638. cmp rbx,r10
  6639. jae $L$in_prologue
  6640. mov rax,QWORD PTR[272+rax]
  6641. mov rbx,QWORD PTR[((-8))+rax]
  6642. mov rbp,QWORD PTR[((-16))+rax]
  6643. mov QWORD PTR[144+r8],rbx
  6644. mov QWORD PTR[160+r8],rbp
  6645. lea rsi,QWORD PTR[((-24-160))+rax]
  6646. lea rdi,QWORD PTR[512+r8]
  6647. mov ecx,20
  6648. DD 0a548f3fch
  6649. $L$in_prologue::
  6650. mov rdi,QWORD PTR[8+rax]
  6651. mov rsi,QWORD PTR[16+rax]
  6652. mov QWORD PTR[152+r8],rax
  6653. mov QWORD PTR[168+r8],rsi
  6654. mov QWORD PTR[176+r8],rdi
  6655. mov rdi,QWORD PTR[40+r9]
  6656. mov rsi,r8
  6657. mov ecx,154
  6658. DD 0a548f3fch
  6659. mov rsi,r9
  6660. xor rcx,rcx
  6661. mov rdx,QWORD PTR[8+rsi]
  6662. mov r8,QWORD PTR[rsi]
  6663. mov r9,QWORD PTR[16+rsi]
  6664. mov r10,QWORD PTR[40+rsi]
  6665. lea r11,QWORD PTR[56+rsi]
  6666. lea r12,QWORD PTR[24+rsi]
  6667. mov QWORD PTR[32+rsp],r10
  6668. mov QWORD PTR[40+rsp],r11
  6669. mov QWORD PTR[48+rsp],r12
  6670. mov QWORD PTR[56+rsp],rcx
  6671. call QWORD PTR[__imp_RtlVirtualUnwind]
  6672. mov eax,1
  6673. add rsp,64
  6674. popfq
  6675. pop r15
  6676. pop r14
  6677. pop r13
  6678. pop r12
  6679. pop rbp
  6680. pop rbx
  6681. pop rdi
  6682. pop rsi
  6683. DB 0F3h,0C3h ;repret
  6684. se_handler ENDP
  6685. ALIGN 16
  6686. avx2_handler PROC PRIVATE
  6687. push rsi
  6688. push rdi
  6689. push rbx
  6690. push rbp
  6691. push r12
  6692. push r13
  6693. push r14
  6694. push r15
  6695. pushfq
  6696. sub rsp,64
  6697. mov rax,QWORD PTR[120+r8]
  6698. mov rbx,QWORD PTR[248+r8]
  6699. mov rsi,QWORD PTR[8+r9]
  6700. mov r11,QWORD PTR[56+r9]
  6701. mov r10d,DWORD PTR[r11]
  6702. lea r10,QWORD PTR[r10*1+rsi]
  6703. cmp rbx,r10
  6704. jb $L$in_prologue
  6705. mov rax,QWORD PTR[152+r8]
  6706. mov r10d,DWORD PTR[4+r11]
  6707. lea r10,QWORD PTR[r10*1+rsi]
  6708. cmp rbx,r10
  6709. jae $L$in_prologue
  6710. mov rax,QWORD PTR[544+r8]
  6711. mov rbx,QWORD PTR[((-8))+rax]
  6712. mov rbp,QWORD PTR[((-16))+rax]
  6713. mov r12,QWORD PTR[((-24))+rax]
  6714. mov r13,QWORD PTR[((-32))+rax]
  6715. mov r14,QWORD PTR[((-40))+rax]
  6716. mov r15,QWORD PTR[((-48))+rax]
  6717. mov QWORD PTR[144+r8],rbx
  6718. mov QWORD PTR[160+r8],rbp
  6719. mov QWORD PTR[216+r8],r12
  6720. mov QWORD PTR[224+r8],r13
  6721. mov QWORD PTR[232+r8],r14
  6722. mov QWORD PTR[240+r8],r15
  6723. lea rsi,QWORD PTR[((-56-160))+rax]
  6724. lea rdi,QWORD PTR[512+r8]
  6725. mov ecx,20
  6726. DD 0a548f3fch
  6727. jmp $L$in_prologue
  6728. avx2_handler ENDP
  6729. .text$ ENDS
  6730. .pdata SEGMENT READONLY ALIGN(4)
  6731. ALIGN 4
  6732. DD imagerel $L$SEH_begin_sha256_multi_block
  6733. DD imagerel $L$SEH_end_sha256_multi_block
  6734. DD imagerel $L$SEH_info_sha256_multi_block
  6735. DD imagerel $L$SEH_begin_sha256_multi_block_shaext
  6736. DD imagerel $L$SEH_end_sha256_multi_block_shaext
  6737. DD imagerel $L$SEH_info_sha256_multi_block_shaext
  6738. DD imagerel $L$SEH_begin_sha256_multi_block_avx
  6739. DD imagerel $L$SEH_end_sha256_multi_block_avx
  6740. DD imagerel $L$SEH_info_sha256_multi_block_avx
  6741. DD imagerel $L$SEH_begin_sha256_multi_block_avx2
  6742. DD imagerel $L$SEH_end_sha256_multi_block_avx2
  6743. DD imagerel $L$SEH_info_sha256_multi_block_avx2
  6744. .pdata ENDS
  6745. .xdata SEGMENT READONLY ALIGN(8)
  6746. ALIGN 8
  6747. $L$SEH_info_sha256_multi_block::
  6748. DB 9,0,0,0
  6749. DD imagerel se_handler
  6750. DD imagerel $L$body,imagerel $L$epilogue
  6751. $L$SEH_info_sha256_multi_block_shaext::
  6752. DB 9,0,0,0
  6753. DD imagerel se_handler
  6754. DD imagerel $L$body_shaext,imagerel $L$epilogue_shaext
  6755. $L$SEH_info_sha256_multi_block_avx::
  6756. DB 9,0,0,0
  6757. DD imagerel se_handler
  6758. DD imagerel $L$body_avx,imagerel $L$epilogue_avx
  6759. $L$SEH_info_sha256_multi_block_avx2::
  6760. DB 9,0,0,0
  6761. DD imagerel avx2_handler
  6762. DD imagerel $L$body_avx2,imagerel $L$epilogue_avx2
  6763. .xdata ENDS
  6764. END