LoongArchISelLowering.cpp 118 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111
  1. //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the interfaces that LoongArch uses to lower LLVM code into
  10. // a selection DAG.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "LoongArchISelLowering.h"
  14. #include "LoongArch.h"
  15. #include "LoongArchMachineFunctionInfo.h"
  16. #include "LoongArchRegisterInfo.h"
  17. #include "LoongArchSubtarget.h"
  18. #include "LoongArchTargetMachine.h"
  19. #include "MCTargetDesc/LoongArchBaseInfo.h"
  20. #include "MCTargetDesc/LoongArchMCTargetDesc.h"
  21. #include "llvm/ADT/Statistic.h"
  22. #include "llvm/CodeGen/ISDOpcodes.h"
  23. #include "llvm/CodeGen/RuntimeLibcalls.h"
  24. #include "llvm/IR/IRBuilder.h"
  25. #include "llvm/IR/IntrinsicsLoongArch.h"
  26. #include "llvm/Support/Debug.h"
  27. #include "llvm/Support/KnownBits.h"
  28. #include "llvm/Support/MathExtras.h"
  29. using namespace llvm;
  30. #define DEBUG_TYPE "loongarch-isel-lowering"
  31. STATISTIC(NumTailCalls, "Number of tail calls");
  32. static cl::opt<bool> ZeroDivCheck(
  33. "loongarch-check-zero-division", cl::Hidden,
  34. cl::desc("Trap on integer division by zero."),
  35. cl::init(false));
  36. LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
  37. const LoongArchSubtarget &STI)
  38. : TargetLowering(TM), Subtarget(STI) {
  39. MVT GRLenVT = Subtarget.getGRLenVT();
  40. // Set up the register classes.
  41. addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
  42. if (Subtarget.hasBasicF())
  43. addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
  44. if (Subtarget.hasBasicD())
  45. addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
  46. setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
  47. MVT::i1, Promote);
  48. // TODO: add necessary setOperationAction calls later.
  49. setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
  50. setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
  51. setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
  52. setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
  53. setOperationAction(ISD::ROTL, GRLenVT, Expand);
  54. setOperationAction(ISD::CTPOP, GRLenVT, Expand);
  55. setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
  56. setOperationAction(ISD::TRAP, MVT::Other, Legal);
  57. setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
  58. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
  59. setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
  60. ISD::JumpTable},
  61. GRLenVT, Custom);
  62. setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom);
  63. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  64. setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
  65. if (Subtarget.is64Bit())
  66. setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
  67. setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
  68. setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
  69. setOperationAction(ISD::VASTART, MVT::Other, Custom);
  70. setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
  71. if (Subtarget.is64Bit()) {
  72. setOperationAction(ISD::SHL, MVT::i32, Custom);
  73. setOperationAction(ISD::SRA, MVT::i32, Custom);
  74. setOperationAction(ISD::SRL, MVT::i32, Custom);
  75. setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
  76. setOperationAction(ISD::BITCAST, MVT::i32, Custom);
  77. setOperationAction(ISD::ROTR, MVT::i32, Custom);
  78. setOperationAction(ISD::ROTL, MVT::i32, Custom);
  79. setOperationAction(ISD::CTTZ, MVT::i32, Custom);
  80. setOperationAction(ISD::CTLZ, MVT::i32, Custom);
  81. setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
  82. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
  83. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
  84. setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
  85. setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
  86. if (Subtarget.hasBasicF() && !Subtarget.hasBasicD())
  87. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
  88. if (Subtarget.hasBasicF())
  89. setOperationAction(ISD::FRINT, MVT::f32, Legal);
  90. if (Subtarget.hasBasicD())
  91. setOperationAction(ISD::FRINT, MVT::f64, Legal);
  92. }
  93. // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
  94. // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
  95. // and i32 could still be byte-swapped relatively cheaply.
  96. setOperationAction(ISD::BSWAP, MVT::i16, Custom);
  97. if (Subtarget.is64Bit()) {
  98. setOperationAction(ISD::BSWAP, MVT::i32, Custom);
  99. }
  100. // Expand bitreverse.i16 with native-width bitrev and shift for now, before
  101. // we get to know which of sll and revb.2h is faster.
  102. setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
  103. if (Subtarget.is64Bit()) {
  104. setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
  105. setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
  106. } else {
  107. setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
  108. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
  109. setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
  110. setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
  111. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
  112. setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
  113. }
  114. static const ISD::CondCode FPCCToExpand[] = {
  115. ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
  116. ISD::SETGE, ISD::SETNE, ISD::SETGT};
  117. if (Subtarget.hasBasicF()) {
  118. setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
  119. setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
  120. setOperationAction(ISD::BR_CC, MVT::f32, Expand);
  121. setOperationAction(ISD::FMA, MVT::f32, Legal);
  122. setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
  123. setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
  124. setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
  125. setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
  126. setOperationAction(ISD::FSIN, MVT::f32, Expand);
  127. setOperationAction(ISD::FCOS, MVT::f32, Expand);
  128. setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
  129. setOperationAction(ISD::FPOW, MVT::f32, Expand);
  130. setOperationAction(ISD::FREM, MVT::f32, Expand);
  131. }
  132. if (Subtarget.hasBasicD()) {
  133. setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
  134. setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
  135. setOperationAction(ISD::BR_CC, MVT::f64, Expand);
  136. setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
  137. setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
  138. setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
  139. setOperationAction(ISD::FMA, MVT::f64, Legal);
  140. setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
  141. setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
  142. setOperationAction(ISD::FSIN, MVT::f64, Expand);
  143. setOperationAction(ISD::FCOS, MVT::f64, Expand);
  144. setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
  145. setOperationAction(ISD::FPOW, MVT::f64, Expand);
  146. setOperationAction(ISD::FREM, MVT::f64, Expand);
  147. setTruncStoreAction(MVT::f64, MVT::f32, Expand);
  148. }
  149. setOperationAction(ISD::BR_JT, MVT::Other, Expand);
  150. setOperationAction(ISD::BR_CC, GRLenVT, Expand);
  151. setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
  152. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
  153. setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
  154. if (!Subtarget.is64Bit())
  155. setLibcallName(RTLIB::MUL_I128, nullptr);
  156. setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
  157. setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
  158. if ((Subtarget.is64Bit() && Subtarget.hasBasicF() &&
  159. !Subtarget.hasBasicD())) {
  160. setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom);
  161. setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom);
  162. }
  163. // Compute derived properties from the register classes.
  164. computeRegisterProperties(STI.getRegisterInfo());
  165. setStackPointerRegisterToSaveRestore(LoongArch::R3);
  166. setBooleanContents(ZeroOrOneBooleanContent);
  167. setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
  168. setMinCmpXchgSizeInBits(32);
  169. // Function alignments.
  170. const Align FunctionAlignment(4);
  171. setMinFunctionAlignment(FunctionAlignment);
  172. setTargetDAGCombine(ISD::AND);
  173. setTargetDAGCombine(ISD::OR);
  174. setTargetDAGCombine(ISD::SRL);
  175. }
  176. bool LoongArchTargetLowering::isOffsetFoldingLegal(
  177. const GlobalAddressSDNode *GA) const {
  178. // In order to maximise the opportunity for common subexpression elimination,
  179. // keep a separate ADD node for the global address offset instead of folding
  180. // it in the global address node. Later peephole optimisations may choose to
  181. // fold it back in when profitable.
  182. return false;
  183. }
  184. SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
  185. SelectionDAG &DAG) const {
  186. switch (Op.getOpcode()) {
  187. case ISD::EH_DWARF_CFA:
  188. return lowerEH_DWARF_CFA(Op, DAG);
  189. case ISD::GlobalAddress:
  190. return lowerGlobalAddress(Op, DAG);
  191. case ISD::GlobalTLSAddress:
  192. return lowerGlobalTLSAddress(Op, DAG);
  193. case ISD::INTRINSIC_WO_CHAIN:
  194. return lowerINTRINSIC_WO_CHAIN(Op, DAG);
  195. case ISD::INTRINSIC_W_CHAIN:
  196. return lowerINTRINSIC_W_CHAIN(Op, DAG);
  197. case ISD::INTRINSIC_VOID:
  198. return lowerINTRINSIC_VOID(Op, DAG);
  199. case ISD::BlockAddress:
  200. return lowerBlockAddress(Op, DAG);
  201. case ISD::JumpTable:
  202. return lowerJumpTable(Op, DAG);
  203. case ISD::SHL_PARTS:
  204. return lowerShiftLeftParts(Op, DAG);
  205. case ISD::SRA_PARTS:
  206. return lowerShiftRightParts(Op, DAG, true);
  207. case ISD::SRL_PARTS:
  208. return lowerShiftRightParts(Op, DAG, false);
  209. case ISD::ConstantPool:
  210. return lowerConstantPool(Op, DAG);
  211. case ISD::FP_TO_SINT:
  212. return lowerFP_TO_SINT(Op, DAG);
  213. case ISD::BITCAST:
  214. return lowerBITCAST(Op, DAG);
  215. case ISD::UINT_TO_FP:
  216. return lowerUINT_TO_FP(Op, DAG);
  217. case ISD::SINT_TO_FP:
  218. return lowerSINT_TO_FP(Op, DAG);
  219. case ISD::VASTART:
  220. return lowerVASTART(Op, DAG);
  221. case ISD::FRAMEADDR:
  222. return lowerFRAMEADDR(Op, DAG);
  223. case ISD::RETURNADDR:
  224. return lowerRETURNADDR(Op, DAG);
  225. case ISD::WRITE_REGISTER:
  226. return lowerWRITE_REGISTER(Op, DAG);
  227. }
  228. return SDValue();
  229. }
  230. SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
  231. SelectionDAG &DAG) const {
  232. if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
  233. DAG.getContext()->emitError(
  234. "On LA64, only 64-bit registers can be written.");
  235. return Op.getOperand(0);
  236. }
  237. if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
  238. DAG.getContext()->emitError(
  239. "On LA32, only 32-bit registers can be written.");
  240. return Op.getOperand(0);
  241. }
  242. return Op;
  243. }
  244. SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
  245. SelectionDAG &DAG) const {
  246. if (!isa<ConstantSDNode>(Op.getOperand(0))) {
  247. DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
  248. "be a constant integer");
  249. return SDValue();
  250. }
  251. MachineFunction &MF = DAG.getMachineFunction();
  252. MF.getFrameInfo().setFrameAddressIsTaken(true);
  253. Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
  254. EVT VT = Op.getValueType();
  255. SDLoc DL(Op);
  256. SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
  257. unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
  258. int GRLenInBytes = Subtarget.getGRLen() / 8;
  259. while (Depth--) {
  260. int Offset = -(GRLenInBytes * 2);
  261. SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
  262. DAG.getIntPtrConstant(Offset, DL));
  263. FrameAddr =
  264. DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
  265. }
  266. return FrameAddr;
  267. }
  268. SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
  269. SelectionDAG &DAG) const {
  270. if (verifyReturnAddressArgumentIsConstant(Op, DAG))
  271. return SDValue();
  272. // Currently only support lowering return address for current frame.
  273. if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
  274. DAG.getContext()->emitError(
  275. "return address can only be determined for the current frame");
  276. return SDValue();
  277. }
  278. MachineFunction &MF = DAG.getMachineFunction();
  279. MF.getFrameInfo().setReturnAddressIsTaken(true);
  280. MVT GRLenVT = Subtarget.getGRLenVT();
  281. // Return the value of the return address register, marking it an implicit
  282. // live-in.
  283. Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
  284. getRegClassFor(GRLenVT));
  285. return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
  286. }
  287. SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
  288. SelectionDAG &DAG) const {
  289. MachineFunction &MF = DAG.getMachineFunction();
  290. auto Size = Subtarget.getGRLen() / 8;
  291. auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
  292. return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
  293. }
  294. SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
  295. SelectionDAG &DAG) const {
  296. MachineFunction &MF = DAG.getMachineFunction();
  297. auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
  298. SDLoc DL(Op);
  299. SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
  300. getPointerTy(MF.getDataLayout()));
  301. // vastart just stores the address of the VarArgsFrameIndex slot into the
  302. // memory location argument.
  303. const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
  304. return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
  305. MachinePointerInfo(SV));
  306. }
  307. SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
  308. SelectionDAG &DAG) const {
  309. assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
  310. !Subtarget.hasBasicD() && "unexpected target features");
  311. SDLoc DL(Op);
  312. SDValue Op0 = Op.getOperand(0);
  313. if (Op0->getOpcode() == ISD::AND) {
  314. auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
  315. if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
  316. return Op;
  317. }
  318. if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
  319. Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
  320. Op0.getConstantOperandVal(2) == UINT64_C(0))
  321. return Op;
  322. if (Op0.getOpcode() == ISD::AssertZext &&
  323. dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
  324. return Op;
  325. EVT OpVT = Op0.getValueType();
  326. EVT RetVT = Op.getValueType();
  327. RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
  328. MakeLibCallOptions CallOptions;
  329. CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
  330. SDValue Chain = SDValue();
  331. SDValue Result;
  332. std::tie(Result, Chain) =
  333. makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
  334. return Result;
  335. }
  336. SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
  337. SelectionDAG &DAG) const {
  338. assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
  339. !Subtarget.hasBasicD() && "unexpected target features");
  340. SDLoc DL(Op);
  341. SDValue Op0 = Op.getOperand(0);
  342. if ((Op0.getOpcode() == ISD::AssertSext ||
  343. Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
  344. dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
  345. return Op;
  346. EVT OpVT = Op0.getValueType();
  347. EVT RetVT = Op.getValueType();
  348. RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
  349. MakeLibCallOptions CallOptions;
  350. CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
  351. SDValue Chain = SDValue();
  352. SDValue Result;
  353. std::tie(Result, Chain) =
  354. makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
  355. return Result;
  356. }
  357. SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
  358. SelectionDAG &DAG) const {
  359. SDLoc DL(Op);
  360. SDValue Op0 = Op.getOperand(0);
  361. if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
  362. Subtarget.is64Bit() && Subtarget.hasBasicF()) {
  363. SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
  364. return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
  365. }
  366. return Op;
  367. }
  368. SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
  369. SelectionDAG &DAG) const {
  370. SDLoc DL(Op);
  371. if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
  372. !Subtarget.hasBasicD()) {
  373. SDValue Dst =
  374. DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
  375. return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
  376. }
  377. EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
  378. SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
  379. return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
  380. }
  381. static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
  382. SelectionDAG &DAG, unsigned Flags) {
  383. return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
  384. }
  385. static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
  386. SelectionDAG &DAG, unsigned Flags) {
  387. return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
  388. Flags);
  389. }
  390. static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
  391. SelectionDAG &DAG, unsigned Flags) {
  392. return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
  393. N->getOffset(), Flags);
  394. }
  395. static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
  396. SelectionDAG &DAG, unsigned Flags) {
  397. return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
  398. }
  399. template <class NodeTy>
  400. SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
  401. bool IsLocal) const {
  402. SDLoc DL(N);
  403. EVT Ty = getPointerTy(DAG.getDataLayout());
  404. SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
  405. // TODO: Check CodeModel.
  406. if (IsLocal)
  407. // This generates the pattern (PseudoLA_PCREL sym), which expands to
  408. // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
  409. return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr),
  410. 0);
  411. // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
  412. // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
  413. return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
  414. }
  415. SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
  416. SelectionDAG &DAG) const {
  417. return getAddr(cast<BlockAddressSDNode>(Op), DAG);
  418. }
  419. SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
  420. SelectionDAG &DAG) const {
  421. return getAddr(cast<JumpTableSDNode>(Op), DAG);
  422. }
  423. SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
  424. SelectionDAG &DAG) const {
  425. return getAddr(cast<ConstantPoolSDNode>(Op), DAG);
  426. }
  427. SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
  428. SelectionDAG &DAG) const {
  429. GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
  430. assert(N->getOffset() == 0 && "unexpected offset in global node");
  431. return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
  432. }
  433. SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
  434. SelectionDAG &DAG,
  435. unsigned Opc) const {
  436. SDLoc DL(N);
  437. EVT Ty = getPointerTy(DAG.getDataLayout());
  438. MVT GRLenVT = Subtarget.getGRLenVT();
  439. SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
  440. SDValue Offset = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
  441. // Add the thread pointer.
  442. return DAG.getNode(ISD::ADD, DL, Ty, Offset,
  443. DAG.getRegister(LoongArch::R2, GRLenVT));
  444. }
  445. SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
  446. SelectionDAG &DAG,
  447. unsigned Opc) const {
  448. SDLoc DL(N);
  449. EVT Ty = getPointerTy(DAG.getDataLayout());
  450. IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
  451. // Use a PC-relative addressing mode to access the dynamic GOT address.
  452. SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
  453. SDValue Load = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
  454. // Prepare argument list to generate call.
  455. ArgListTy Args;
  456. ArgListEntry Entry;
  457. Entry.Node = Load;
  458. Entry.Ty = CallTy;
  459. Args.push_back(Entry);
  460. // Setup call to __tls_get_addr.
  461. TargetLowering::CallLoweringInfo CLI(DAG);
  462. CLI.setDebugLoc(DL)
  463. .setChain(DAG.getEntryNode())
  464. .setLibCallee(CallingConv::C, CallTy,
  465. DAG.getExternalSymbol("__tls_get_addr", Ty),
  466. std::move(Args));
  467. return LowerCallTo(CLI).first;
  468. }
  469. SDValue
  470. LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
  471. SelectionDAG &DAG) const {
  472. if (DAG.getMachineFunction().getFunction().getCallingConv() ==
  473. CallingConv::GHC)
  474. report_fatal_error("In GHC calling convention TLS is not supported");
  475. GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
  476. assert(N->getOffset() == 0 && "unexpected offset in global node");
  477. SDValue Addr;
  478. switch (getTargetMachine().getTLSModel(N->getGlobal())) {
  479. case TLSModel::GeneralDynamic:
  480. // In this model, application code calls the dynamic linker function
  481. // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
  482. // runtime.
  483. Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_GD);
  484. break;
  485. case TLSModel::LocalDynamic:
  486. // Same as GeneralDynamic, except for assembly modifiers and relocation
  487. // records.
  488. Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LD);
  489. break;
  490. case TLSModel::InitialExec:
  491. // This model uses the GOT to resolve TLS offsets.
  492. Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_IE);
  493. break;
  494. case TLSModel::LocalExec:
  495. // This model is used when static linking as the TLS offsets are resolved
  496. // during program linking.
  497. Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
  498. break;
  499. }
  500. return Addr;
  501. }
  502. SDValue
  503. LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
  504. SelectionDAG &DAG) const {
  505. switch (Op.getConstantOperandVal(0)) {
  506. default:
  507. return SDValue(); // Don't custom lower most intrinsics.
  508. case Intrinsic::thread_pointer: {
  509. EVT PtrVT = getPointerTy(DAG.getDataLayout());
  510. return DAG.getRegister(LoongArch::R2, PtrVT);
  511. }
  512. }
  513. }
  514. // Helper function that emits error message for intrinsics with chain.
  515. static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
  516. StringRef ErrorMsg,
  517. SelectionDAG &DAG) {
  518. DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " +
  519. ErrorMsg);
  520. return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
  521. SDLoc(Op));
  522. }
  523. SDValue
  524. LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
  525. SelectionDAG &DAG) const {
  526. SDLoc DL(Op);
  527. MVT GRLenVT = Subtarget.getGRLenVT();
  528. SDValue Op0 = Op.getOperand(0);
  529. std::string Name = Op->getOperationName(0);
  530. const StringRef ErrorMsgOOR = "out of range";
  531. switch (Op.getConstantOperandVal(1)) {
  532. default:
  533. return Op;
  534. case Intrinsic::loongarch_crc_w_b_w:
  535. case Intrinsic::loongarch_crc_w_h_w:
  536. case Intrinsic::loongarch_crc_w_w_w:
  537. case Intrinsic::loongarch_crc_w_d_w:
  538. case Intrinsic::loongarch_crcc_w_b_w:
  539. case Intrinsic::loongarch_crcc_w_h_w:
  540. case Intrinsic::loongarch_crcc_w_w_w:
  541. case Intrinsic::loongarch_crcc_w_d_w: {
  542. std::string Name = Op->getOperationName(0);
  543. DAG.getContext()->emitError(Name + " requires target: loongarch64");
  544. return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
  545. }
  546. case Intrinsic::loongarch_csrrd_w:
  547. case Intrinsic::loongarch_csrrd_d: {
  548. unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
  549. if (!isUInt<14>(Imm))
  550. return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
  551. return DAG.getMergeValues(
  552. {DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0,
  553. DAG.getConstant(Imm, DL, GRLenVT)),
  554. Op0},
  555. DL);
  556. }
  557. case Intrinsic::loongarch_csrwr_w:
  558. case Intrinsic::loongarch_csrwr_d: {
  559. unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
  560. if (!isUInt<14>(Imm))
  561. return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
  562. return DAG.getMergeValues(
  563. {DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0, Op.getOperand(2),
  564. DAG.getConstant(Imm, DL, GRLenVT)),
  565. Op0},
  566. DL);
  567. }
  568. case Intrinsic::loongarch_csrxchg_w:
  569. case Intrinsic::loongarch_csrxchg_d: {
  570. unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
  571. if (!isUInt<14>(Imm))
  572. return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
  573. return DAG.getMergeValues(
  574. {DAG.getNode(LoongArchISD::CSRXCHG, DL, GRLenVT, Op0, Op.getOperand(2),
  575. Op.getOperand(3), DAG.getConstant(Imm, DL, GRLenVT)),
  576. Op0},
  577. DL);
  578. }
  579. case Intrinsic::loongarch_iocsrrd_d: {
  580. if (Subtarget.is64Bit())
  581. return DAG.getMergeValues(
  582. {DAG.getNode(
  583. LoongArchISD::IOCSRRD_D, DL, GRLenVT, Op0,
  584. DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))),
  585. Op0},
  586. DL);
  587. else {
  588. DAG.getContext()->emitError(
  589. "llvm.loongarch.crc.w.d.w requires target: loongarch64");
  590. return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
  591. }
  592. }
  593. #define IOCSRRD_CASE(NAME, NODE) \
  594. case Intrinsic::loongarch_##NAME: { \
  595. return DAG.getMergeValues( \
  596. {DAG.getNode(LoongArchISD::NODE, DL, GRLenVT, Op0, Op.getOperand(2)), \
  597. Op0}, \
  598. DL); \
  599. }
  600. IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
  601. IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
  602. IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
  603. #undef IOCSRRD_CASE
  604. case Intrinsic::loongarch_cpucfg: {
  605. return DAG.getMergeValues(
  606. {DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0, Op.getOperand(2)),
  607. Op0},
  608. DL);
  609. }
  610. case Intrinsic::loongarch_lddir_d: {
  611. unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
  612. if (!isUInt<8>(Imm)) {
  613. DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) +
  614. "' out of range");
  615. return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
  616. }
  617. return Op;
  618. }
  619. case Intrinsic::loongarch_movfcsr2gr: {
  620. if (!Subtarget.hasBasicF()) {
  621. DAG.getContext()->emitError(
  622. "llvm.loongarch.movfcsr2gr expects basic f target feature");
  623. return DAG.getMergeValues(
  624. {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op));
  625. }
  626. unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
  627. if (!isUInt<2>(Imm)) {
  628. DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) +
  629. "' " + ErrorMsgOOR);
  630. return DAG.getMergeValues(
  631. {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op));
  632. }
  633. return DAG.getMergeValues(
  634. {DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, Op.getValueType(),
  635. DAG.getConstant(Imm, DL, GRLenVT)),
  636. Op.getOperand(0)},
  637. DL);
  638. }
  639. }
  640. }
  641. // Helper function that emits error message for intrinsics with void return
  642. // value.
  643. static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
  644. SelectionDAG &DAG) {
  645. DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " +
  646. ErrorMsg);
  647. return Op.getOperand(0);
  648. }
  649. SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
  650. SelectionDAG &DAG) const {
  651. SDLoc DL(Op);
  652. MVT GRLenVT = Subtarget.getGRLenVT();
  653. SDValue Op0 = Op.getOperand(0);
  654. uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
  655. SDValue Op2 = Op.getOperand(2);
  656. const StringRef ErrorMsgOOR = "out of range";
  657. switch (IntrinsicEnum) {
  658. default:
  659. // TODO: Add more Intrinsics.
  660. return SDValue();
  661. case Intrinsic::loongarch_cacop_d:
  662. case Intrinsic::loongarch_cacop_w: {
  663. if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) {
  664. DAG.getContext()->emitError(
  665. "llvm.loongarch.cacop.d requires target: loongarch64");
  666. return Op.getOperand(0);
  667. }
  668. if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) {
  669. DAG.getContext()->emitError(
  670. "llvm.loongarch.cacop.w requires target: loongarch32");
  671. return Op.getOperand(0);
  672. }
  673. // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
  674. unsigned Imm1 = cast<ConstantSDNode>(Op2)->getZExtValue();
  675. if (!isUInt<5>(Imm1))
  676. return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
  677. SDValue Op4 = Op.getOperand(4);
  678. int Imm2 = cast<ConstantSDNode>(Op4)->getSExtValue();
  679. if (!isInt<12>(Imm2))
  680. return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
  681. return Op;
  682. }
  683. case Intrinsic::loongarch_dbar: {
  684. unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
  685. if (!isUInt<15>(Imm))
  686. return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
  687. return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op0,
  688. DAG.getConstant(Imm, DL, GRLenVT));
  689. }
  690. case Intrinsic::loongarch_ibar: {
  691. unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
  692. if (!isUInt<15>(Imm))
  693. return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
  694. return DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Op0,
  695. DAG.getConstant(Imm, DL, GRLenVT));
  696. }
  697. case Intrinsic::loongarch_break: {
  698. unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
  699. if (!isUInt<15>(Imm))
  700. return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
  701. return DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Op0,
  702. DAG.getConstant(Imm, DL, GRLenVT));
  703. }
  704. case Intrinsic::loongarch_movgr2fcsr: {
  705. if (!Subtarget.hasBasicF()) {
  706. DAG.getContext()->emitError(
  707. "llvm.loongarch.movgr2fcsr expects basic f target feature");
  708. return Op0;
  709. }
  710. unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
  711. if (!isUInt<2>(Imm))
  712. return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
  713. return DAG.getNode(
  714. LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Op0,
  715. DAG.getConstant(Imm, DL, GRLenVT),
  716. DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Op.getOperand(3)));
  717. }
  718. case Intrinsic::loongarch_syscall: {
  719. unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
  720. if (!isUInt<15>(Imm))
  721. return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
  722. return DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Op0,
  723. DAG.getConstant(Imm, DL, GRLenVT));
  724. }
  725. #define IOCSRWR_CASE(NAME, NODE) \
  726. case Intrinsic::loongarch_##NAME: { \
  727. SDValue Op3 = Op.getOperand(3); \
  728. if (Subtarget.is64Bit()) \
  729. return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, \
  730. DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
  731. DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)); \
  732. else \
  733. return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, Op2, Op3); \
  734. }
  735. IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
  736. IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
  737. IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
  738. #undef IOCSRWR_CASE
  739. case Intrinsic::loongarch_iocsrwr_d: {
  740. if (Subtarget.is64Bit())
  741. return DAG.getNode(
  742. LoongArchISD::IOCSRWR_D, DL, MVT::Other, Op0, Op2,
  743. DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(3)));
  744. else {
  745. DAG.getContext()->emitError(
  746. "llvm.loongarch.iocsrwr.d requires target: loongarch64");
  747. return Op.getOperand(0);
  748. }
  749. }
  750. #define ASRT_LE_GT_CASE(NAME) \
  751. case Intrinsic::loongarch_##NAME: { \
  752. if (!Subtarget.is64Bit()) { \
  753. DAG.getContext()->emitError(Op->getOperationName(0) + \
  754. " requires target: loongarch64"); \
  755. return Op.getOperand(0); \
  756. } \
  757. return Op; \
  758. }
  759. ASRT_LE_GT_CASE(asrtle_d)
  760. ASRT_LE_GT_CASE(asrtgt_d)
  761. #undef ASRT_LE_GT_CASE
  762. case Intrinsic::loongarch_ldpte_d: {
  763. unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
  764. if (!isUInt<8>(Imm))
  765. return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
  766. if (!Subtarget.is64Bit()) {
  767. DAG.getContext()->emitError(Op->getOperationName(0) +
  768. " requires target: loongarch64");
  769. return Op.getOperand(0);
  770. }
  771. return Op;
  772. }
  773. }
  774. }
  775. SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
  776. SelectionDAG &DAG) const {
  777. SDLoc DL(Op);
  778. SDValue Lo = Op.getOperand(0);
  779. SDValue Hi = Op.getOperand(1);
  780. SDValue Shamt = Op.getOperand(2);
  781. EVT VT = Lo.getValueType();
  782. // if Shamt-GRLen < 0: // Shamt < GRLen
  783. // Lo = Lo << Shamt
  784. // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
  785. // else:
  786. // Lo = 0
  787. // Hi = Lo << (Shamt-GRLen)
  788. SDValue Zero = DAG.getConstant(0, DL, VT);
  789. SDValue One = DAG.getConstant(1, DL, VT);
  790. SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
  791. SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
  792. SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
  793. SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
  794. SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
  795. SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
  796. SDValue ShiftRightLo =
  797. DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
  798. SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
  799. SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
  800. SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
  801. SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
  802. Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
  803. Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
  804. SDValue Parts[2] = {Lo, Hi};
  805. return DAG.getMergeValues(Parts, DL);
  806. }
  807. SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
  808. SelectionDAG &DAG,
  809. bool IsSRA) const {
  810. SDLoc DL(Op);
  811. SDValue Lo = Op.getOperand(0);
  812. SDValue Hi = Op.getOperand(1);
  813. SDValue Shamt = Op.getOperand(2);
  814. EVT VT = Lo.getValueType();
  815. // SRA expansion:
  816. // if Shamt-GRLen < 0: // Shamt < GRLen
  817. // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
  818. // Hi = Hi >>s Shamt
  819. // else:
  820. // Lo = Hi >>s (Shamt-GRLen);
  821. // Hi = Hi >>s (GRLen-1)
  822. //
  823. // SRL expansion:
  824. // if Shamt-GRLen < 0: // Shamt < GRLen
  825. // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
  826. // Hi = Hi >>u Shamt
  827. // else:
  828. // Lo = Hi >>u (Shamt-GRLen);
  829. // Hi = 0;
  830. unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
  831. SDValue Zero = DAG.getConstant(0, DL, VT);
  832. SDValue One = DAG.getConstant(1, DL, VT);
  833. SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
  834. SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
  835. SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
  836. SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
  837. SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
  838. SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
  839. SDValue ShiftLeftHi =
  840. DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
  841. SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
  842. SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
  843. SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
  844. SDValue HiFalse =
  845. IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
  846. SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
  847. Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
  848. Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
  849. SDValue Parts[2] = {Lo, Hi};
  850. return DAG.getMergeValues(Parts, DL);
  851. }
  852. // Returns the opcode of the target-specific SDNode that implements the 32-bit
  853. // form of the given Opcode.
  854. static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
  855. switch (Opcode) {
  856. default:
  857. llvm_unreachable("Unexpected opcode");
  858. case ISD::SHL:
  859. return LoongArchISD::SLL_W;
  860. case ISD::SRA:
  861. return LoongArchISD::SRA_W;
  862. case ISD::SRL:
  863. return LoongArchISD::SRL_W;
  864. case ISD::ROTR:
  865. return LoongArchISD::ROTR_W;
  866. case ISD::ROTL:
  867. return LoongArchISD::ROTL_W;
  868. case ISD::CTTZ:
  869. return LoongArchISD::CTZ_W;
  870. case ISD::CTLZ:
  871. return LoongArchISD::CLZ_W;
  872. }
  873. }
  874. // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
  875. // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
  876. // otherwise be promoted to i64, making it difficult to select the
  877. // SLL_W/.../*W later one because the fact the operation was originally of
  878. // type i8/i16/i32 is lost.
  879. static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
  880. unsigned ExtOpc = ISD::ANY_EXTEND) {
  881. SDLoc DL(N);
  882. LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
  883. SDValue NewOp0, NewRes;
  884. switch (NumOp) {
  885. default:
  886. llvm_unreachable("Unexpected NumOp");
  887. case 1: {
  888. NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
  889. NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
  890. break;
  891. }
  892. case 2: {
  893. NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
  894. SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
  895. NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
  896. break;
  897. }
  898. // TODO:Handle more NumOp.
  899. }
  900. // ReplaceNodeResults requires we maintain the same type for the return
  901. // value.
  902. return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
  903. }
  904. void LoongArchTargetLowering::ReplaceNodeResults(
  905. SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
  906. SDLoc DL(N);
  907. EVT VT = N->getValueType(0);
  908. switch (N->getOpcode()) {
  909. default:
  910. llvm_unreachable("Don't know how to legalize this operation");
  911. case ISD::SHL:
  912. case ISD::SRA:
  913. case ISD::SRL:
  914. case ISD::ROTR:
  915. assert(VT == MVT::i32 && Subtarget.is64Bit() &&
  916. "Unexpected custom legalisation");
  917. if (N->getOperand(1).getOpcode() != ISD::Constant) {
  918. Results.push_back(customLegalizeToWOp(N, DAG, 2));
  919. break;
  920. }
  921. break;
  922. case ISD::ROTL:
  923. ConstantSDNode *CN;
  924. if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
  925. Results.push_back(customLegalizeToWOp(N, DAG, 2));
  926. break;
  927. }
  928. break;
  929. case ISD::FP_TO_SINT: {
  930. assert(VT == MVT::i32 && Subtarget.is64Bit() &&
  931. "Unexpected custom legalisation");
  932. SDValue Src = N->getOperand(0);
  933. EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
  934. if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
  935. TargetLowering::TypeSoftenFloat) {
  936. SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
  937. Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
  938. return;
  939. }
  940. // If the FP type needs to be softened, emit a library call using the 'si'
  941. // version. If we left it to default legalization we'd end up with 'di'.
  942. RTLIB::Libcall LC;
  943. LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
  944. MakeLibCallOptions CallOptions;
  945. EVT OpVT = Src.getValueType();
  946. CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
  947. SDValue Chain = SDValue();
  948. SDValue Result;
  949. std::tie(Result, Chain) =
  950. makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
  951. Results.push_back(Result);
  952. break;
  953. }
  954. case ISD::BITCAST: {
  955. SDValue Src = N->getOperand(0);
  956. EVT SrcVT = Src.getValueType();
  957. if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
  958. Subtarget.hasBasicF()) {
  959. SDValue Dst =
  960. DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
  961. Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
  962. }
  963. break;
  964. }
  965. case ISD::FP_TO_UINT: {
  966. assert(VT == MVT::i32 && Subtarget.is64Bit() &&
  967. "Unexpected custom legalisation");
  968. auto &TLI = DAG.getTargetLoweringInfo();
  969. SDValue Tmp1, Tmp2;
  970. TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
  971. Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
  972. break;
  973. }
  974. case ISD::BSWAP: {
  975. SDValue Src = N->getOperand(0);
  976. assert((VT == MVT::i16 || VT == MVT::i32) &&
  977. "Unexpected custom legalization");
  978. MVT GRLenVT = Subtarget.getGRLenVT();
  979. SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
  980. SDValue Tmp;
  981. switch (VT.getSizeInBits()) {
  982. default:
  983. llvm_unreachable("Unexpected operand width");
  984. case 16:
  985. Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
  986. break;
  987. case 32:
  988. // Only LA64 will get to here due to the size mismatch between VT and
  989. // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
  990. Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
  991. break;
  992. }
  993. Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
  994. break;
  995. }
  996. case ISD::BITREVERSE: {
  997. SDValue Src = N->getOperand(0);
  998. assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
  999. "Unexpected custom legalization");
  1000. MVT GRLenVT = Subtarget.getGRLenVT();
  1001. SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
  1002. SDValue Tmp;
  1003. switch (VT.getSizeInBits()) {
  1004. default:
  1005. llvm_unreachable("Unexpected operand width");
  1006. case 8:
  1007. Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
  1008. break;
  1009. case 32:
  1010. Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
  1011. break;
  1012. }
  1013. Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
  1014. break;
  1015. }
  1016. case ISD::CTLZ:
  1017. case ISD::CTTZ: {
  1018. assert(VT == MVT::i32 && Subtarget.is64Bit() &&
  1019. "Unexpected custom legalisation");
  1020. Results.push_back(customLegalizeToWOp(N, DAG, 1));
  1021. break;
  1022. }
  1023. case ISD::INTRINSIC_W_CHAIN: {
  1024. SDValue Op0 = N->getOperand(0);
  1025. EVT VT = N->getValueType(0);
  1026. uint64_t Op1 = N->getConstantOperandVal(1);
  1027. MVT GRLenVT = Subtarget.getGRLenVT();
  1028. if (Op1 == Intrinsic::loongarch_movfcsr2gr) {
  1029. if (!Subtarget.hasBasicF()) {
  1030. DAG.getContext()->emitError(
  1031. "llvm.loongarch.movfcsr2gr expects basic f target feature");
  1032. Results.push_back(DAG.getMergeValues(
  1033. {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N)));
  1034. Results.push_back(N->getOperand(0));
  1035. return;
  1036. }
  1037. unsigned Imm = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
  1038. if (!isUInt<2>(Imm)) {
  1039. DAG.getContext()->emitError("argument to '" + N->getOperationName(0) +
  1040. "' " + "out of range");
  1041. Results.push_back(DAG.getMergeValues(
  1042. {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N)));
  1043. Results.push_back(N->getOperand(0));
  1044. return;
  1045. }
  1046. Results.push_back(
  1047. DAG.getNode(ISD::TRUNCATE, DL, VT,
  1048. DAG.getNode(LoongArchISD::MOVFCSR2GR, SDLoc(N), MVT::i64,
  1049. DAG.getConstant(Imm, DL, GRLenVT))));
  1050. Results.push_back(N->getOperand(0));
  1051. return;
  1052. }
  1053. SDValue Op2 = N->getOperand(2);
  1054. std::string Name = N->getOperationName(0);
  1055. switch (Op1) {
  1056. default:
  1057. llvm_unreachable("Unexpected Intrinsic.");
  1058. #define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
  1059. case Intrinsic::loongarch_##NAME: { \
  1060. Results.push_back(DAG.getNode( \
  1061. ISD::TRUNCATE, DL, VT, \
  1062. DAG.getNode( \
  1063. LoongArchISD::NODE, DL, MVT::i64, \
  1064. DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
  1065. DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))))); \
  1066. Results.push_back(N->getOperand(0)); \
  1067. break; \
  1068. }
  1069. CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
  1070. CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
  1071. CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
  1072. CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
  1073. CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
  1074. CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
  1075. #undef CRC_CASE_EXT_BINARYOP
  1076. #define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
  1077. case Intrinsic::loongarch_##NAME: { \
  1078. Results.push_back( \
  1079. DAG.getNode(ISD::TRUNCATE, DL, VT, \
  1080. DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op2, \
  1081. DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, \
  1082. N->getOperand(3))))); \
  1083. Results.push_back(N->getOperand(0)); \
  1084. break; \
  1085. }
  1086. CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
  1087. CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
  1088. #undef CRC_CASE_EXT_UNARYOP
  1089. #define CSR_CASE(ID) \
  1090. case Intrinsic::loongarch_##ID: { \
  1091. if (!Subtarget.is64Bit()) { \
  1092. DAG.getContext()->emitError(Name + " requires target: loongarch64"); \
  1093. Results.push_back(DAG.getUNDEF(VT)); \
  1094. Results.push_back(N->getOperand(0)); \
  1095. } \
  1096. break; \
  1097. }
  1098. CSR_CASE(csrrd_d);
  1099. CSR_CASE(csrwr_d);
  1100. CSR_CASE(csrxchg_d);
  1101. CSR_CASE(iocsrrd_d);
  1102. #undef CSR_CASE
  1103. case Intrinsic::loongarch_csrrd_w: {
  1104. unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
  1105. if (!isUInt<14>(Imm)) {
  1106. DAG.getContext()->emitError("argument to '" + Name + "' out of range");
  1107. Results.push_back(DAG.getUNDEF(VT));
  1108. Results.push_back(N->getOperand(0));
  1109. break;
  1110. }
  1111. Results.push_back(
  1112. DAG.getNode(ISD::TRUNCATE, DL, VT,
  1113. DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0,
  1114. DAG.getConstant(Imm, DL, GRLenVT))));
  1115. Results.push_back(N->getOperand(0));
  1116. break;
  1117. }
  1118. case Intrinsic::loongarch_csrwr_w: {
  1119. unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
  1120. if (!isUInt<14>(Imm)) {
  1121. DAG.getContext()->emitError("argument to '" + Name + "' out of range");
  1122. Results.push_back(DAG.getUNDEF(VT));
  1123. Results.push_back(N->getOperand(0));
  1124. break;
  1125. }
  1126. Results.push_back(DAG.getNode(
  1127. ISD::TRUNCATE, DL, VT,
  1128. DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0,
  1129. DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
  1130. DAG.getConstant(Imm, DL, GRLenVT))));
  1131. Results.push_back(N->getOperand(0));
  1132. break;
  1133. }
  1134. case Intrinsic::loongarch_csrxchg_w: {
  1135. unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
  1136. if (!isUInt<14>(Imm)) {
  1137. DAG.getContext()->emitError("argument to '" + Name + "' out of range");
  1138. Results.push_back(DAG.getUNDEF(VT));
  1139. Results.push_back(N->getOperand(0));
  1140. break;
  1141. }
  1142. Results.push_back(DAG.getNode(
  1143. ISD::TRUNCATE, DL, VT,
  1144. DAG.getNode(
  1145. LoongArchISD::CSRXCHG, DL, GRLenVT, Op0,
  1146. DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
  1147. DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
  1148. DAG.getConstant(Imm, DL, GRLenVT))));
  1149. Results.push_back(N->getOperand(0));
  1150. break;
  1151. }
  1152. #define IOCSRRD_CASE(NAME, NODE) \
  1153. case Intrinsic::loongarch_##NAME: { \
  1154. Results.push_back(DAG.getNode( \
  1155. ISD::TRUNCATE, DL, N->getValueType(0), \
  1156. DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op0, \
  1157. DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)))); \
  1158. Results.push_back(N->getOperand(0)); \
  1159. break; \
  1160. }
  1161. IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
  1162. IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
  1163. IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
  1164. #undef IOCSRRD_CASE
  1165. case Intrinsic::loongarch_cpucfg: {
  1166. Results.push_back(DAG.getNode(
  1167. ISD::TRUNCATE, DL, VT,
  1168. DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0,
  1169. DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2))));
  1170. Results.push_back(Op0);
  1171. break;
  1172. }
  1173. case Intrinsic::loongarch_lddir_d: {
  1174. if (!Subtarget.is64Bit()) {
  1175. DAG.getContext()->emitError(N->getOperationName(0) +
  1176. " requires target: loongarch64");
  1177. Results.push_back(DAG.getUNDEF(VT));
  1178. Results.push_back(Op0);
  1179. break;
  1180. }
  1181. break;
  1182. }
  1183. }
  1184. break;
  1185. }
  1186. case ISD::READ_REGISTER: {
  1187. if (Subtarget.is64Bit())
  1188. DAG.getContext()->emitError(
  1189. "On LA64, only 64-bit registers can be read.");
  1190. else
  1191. DAG.getContext()->emitError(
  1192. "On LA32, only 32-bit registers can be read.");
  1193. Results.push_back(DAG.getUNDEF(VT));
  1194. Results.push_back(N->getOperand(0));
  1195. break;
  1196. }
  1197. }
  1198. }
  1199. static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
  1200. TargetLowering::DAGCombinerInfo &DCI,
  1201. const LoongArchSubtarget &Subtarget) {
  1202. if (DCI.isBeforeLegalizeOps())
  1203. return SDValue();
  1204. SDValue FirstOperand = N->getOperand(0);
  1205. SDValue SecondOperand = N->getOperand(1);
  1206. unsigned FirstOperandOpc = FirstOperand.getOpcode();
  1207. EVT ValTy = N->getValueType(0);
  1208. SDLoc DL(N);
  1209. uint64_t lsb, msb;
  1210. unsigned SMIdx, SMLen;
  1211. ConstantSDNode *CN;
  1212. SDValue NewOperand;
  1213. MVT GRLenVT = Subtarget.getGRLenVT();
  1214. // Op's second operand must be a shifted mask.
  1215. if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
  1216. !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
  1217. return SDValue();
  1218. if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
  1219. // Pattern match BSTRPICK.
  1220. // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
  1221. // => BSTRPICK $dst, $src, msb, lsb
  1222. // where msb = lsb + len - 1
  1223. // The second operand of the shift must be an immediate.
  1224. if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
  1225. return SDValue();
  1226. lsb = CN->getZExtValue();
  1227. // Return if the shifted mask does not start at bit 0 or the sum of its
  1228. // length and lsb exceeds the word's size.
  1229. if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
  1230. return SDValue();
  1231. NewOperand = FirstOperand.getOperand(0);
  1232. } else {
  1233. // Pattern match BSTRPICK.
  1234. // $dst = and $src, (2**len- 1) , if len > 12
  1235. // => BSTRPICK $dst, $src, msb, lsb
  1236. // where lsb = 0 and msb = len - 1
  1237. // If the mask is <= 0xfff, andi can be used instead.
  1238. if (CN->getZExtValue() <= 0xfff)
  1239. return SDValue();
  1240. // Return if the mask doesn't start at position 0.
  1241. if (SMIdx)
  1242. return SDValue();
  1243. lsb = 0;
  1244. NewOperand = FirstOperand;
  1245. }
  1246. msb = lsb + SMLen - 1;
  1247. return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
  1248. DAG.getConstant(msb, DL, GRLenVT),
  1249. DAG.getConstant(lsb, DL, GRLenVT));
  1250. }
  1251. static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
  1252. TargetLowering::DAGCombinerInfo &DCI,
  1253. const LoongArchSubtarget &Subtarget) {
  1254. if (DCI.isBeforeLegalizeOps())
  1255. return SDValue();
  1256. // $dst = srl (and $src, Mask), Shamt
  1257. // =>
  1258. // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
  1259. // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
  1260. //
  1261. SDValue FirstOperand = N->getOperand(0);
  1262. ConstantSDNode *CN;
  1263. EVT ValTy = N->getValueType(0);
  1264. SDLoc DL(N);
  1265. MVT GRLenVT = Subtarget.getGRLenVT();
  1266. unsigned MaskIdx, MaskLen;
  1267. uint64_t Shamt;
  1268. // The first operand must be an AND and the second operand of the AND must be
  1269. // a shifted mask.
  1270. if (FirstOperand.getOpcode() != ISD::AND ||
  1271. !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
  1272. !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
  1273. return SDValue();
  1274. // The second operand (shift amount) must be an immediate.
  1275. if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
  1276. return SDValue();
  1277. Shamt = CN->getZExtValue();
  1278. if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
  1279. return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
  1280. FirstOperand->getOperand(0),
  1281. DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
  1282. DAG.getConstant(Shamt, DL, GRLenVT));
  1283. return SDValue();
  1284. }
  1285. static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
  1286. TargetLowering::DAGCombinerInfo &DCI,
  1287. const LoongArchSubtarget &Subtarget) {
  1288. MVT GRLenVT = Subtarget.getGRLenVT();
  1289. EVT ValTy = N->getValueType(0);
  1290. SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
  1291. ConstantSDNode *CN0, *CN1;
  1292. SDLoc DL(N);
  1293. unsigned ValBits = ValTy.getSizeInBits();
  1294. unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
  1295. unsigned Shamt;
  1296. bool SwapAndRetried = false;
  1297. if (DCI.isBeforeLegalizeOps())
  1298. return SDValue();
  1299. if (ValBits != 32 && ValBits != 64)
  1300. return SDValue();
  1301. Retry:
  1302. // 1st pattern to match BSTRINS:
  1303. // R = or (and X, mask0), (and (shl Y, lsb), mask1)
  1304. // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
  1305. // =>
  1306. // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
  1307. if (N0.getOpcode() == ISD::AND &&
  1308. (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
  1309. isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
  1310. N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
  1311. (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
  1312. isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
  1313. MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
  1314. (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
  1315. (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
  1316. (MaskIdx0 + MaskLen0 <= ValBits)) {
  1317. LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
  1318. return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
  1319. N1.getOperand(0).getOperand(0),
  1320. DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
  1321. DAG.getConstant(MaskIdx0, DL, GRLenVT));
  1322. }
  1323. // 2nd pattern to match BSTRINS:
  1324. // R = or (and X, mask0), (shl (and Y, mask1), lsb)
  1325. // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
  1326. // =>
  1327. // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
  1328. if (N0.getOpcode() == ISD::AND &&
  1329. (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
  1330. isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
  1331. N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
  1332. (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
  1333. (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
  1334. (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
  1335. isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
  1336. MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
  1337. (MaskIdx0 + MaskLen0 <= ValBits)) {
  1338. LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
  1339. return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
  1340. N1.getOperand(0).getOperand(0),
  1341. DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
  1342. DAG.getConstant(MaskIdx0, DL, GRLenVT));
  1343. }
  1344. // 3rd pattern to match BSTRINS:
  1345. // R = or (and X, mask0), (and Y, mask1)
  1346. // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
  1347. // =>
  1348. // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
  1349. // where msb = lsb + size - 1
  1350. if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
  1351. (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
  1352. isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
  1353. (MaskIdx0 + MaskLen0 <= 64) &&
  1354. (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
  1355. (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
  1356. LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
  1357. return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
  1358. DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
  1359. DAG.getConstant(MaskIdx0, DL, GRLenVT)),
  1360. DAG.getConstant(ValBits == 32
  1361. ? (MaskIdx0 + (MaskLen0 & 31) - 1)
  1362. : (MaskIdx0 + MaskLen0 - 1),
  1363. DL, GRLenVT),
  1364. DAG.getConstant(MaskIdx0, DL, GRLenVT));
  1365. }
  1366. // 4th pattern to match BSTRINS:
  1367. // R = or (and X, mask), (shl Y, shamt)
  1368. // where mask = (2**shamt - 1)
  1369. // =>
  1370. // R = BSTRINS X, Y, ValBits - 1, shamt
  1371. // where ValBits = 32 or 64
  1372. if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
  1373. (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
  1374. isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
  1375. MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
  1376. (Shamt = CN1->getZExtValue()) == MaskLen0 &&
  1377. (MaskIdx0 + MaskLen0 <= ValBits)) {
  1378. LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
  1379. return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
  1380. N1.getOperand(0),
  1381. DAG.getConstant((ValBits - 1), DL, GRLenVT),
  1382. DAG.getConstant(Shamt, DL, GRLenVT));
  1383. }
  1384. // 5th pattern to match BSTRINS:
  1385. // R = or (and X, mask), const
  1386. // where ~mask = (2**size - 1) << lsb, mask & const = 0
  1387. // =>
  1388. // R = BSTRINS X, (const >> lsb), msb, lsb
  1389. // where msb = lsb + size - 1
  1390. if (N0.getOpcode() == ISD::AND &&
  1391. (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
  1392. isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
  1393. (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
  1394. (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
  1395. LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
  1396. return DAG.getNode(
  1397. LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
  1398. DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
  1399. DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
  1400. DAG.getConstant(MaskIdx0, DL, GRLenVT));
  1401. }
  1402. // 6th pattern.
  1403. // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
  1404. // by the incoming bits are known to be zero.
  1405. // =>
  1406. // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
  1407. //
  1408. // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
  1409. // pattern is more common than the 1st. So we put the 1st before the 6th in
  1410. // order to match as many nodes as possible.
  1411. ConstantSDNode *CNMask, *CNShamt;
  1412. unsigned MaskIdx, MaskLen;
  1413. if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
  1414. (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
  1415. isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
  1416. MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
  1417. CNShamt->getZExtValue() + MaskLen <= ValBits) {
  1418. Shamt = CNShamt->getZExtValue();
  1419. APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
  1420. if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
  1421. LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
  1422. return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
  1423. N1.getOperand(0).getOperand(0),
  1424. DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
  1425. DAG.getConstant(Shamt, DL, GRLenVT));
  1426. }
  1427. }
  1428. // 7th pattern.
  1429. // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
  1430. // overwritten by the incoming bits are known to be zero.
  1431. // =>
  1432. // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
  1433. //
  1434. // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
  1435. // before the 7th in order to match as many nodes as possible.
  1436. if (N1.getOpcode() == ISD::AND &&
  1437. (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
  1438. isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
  1439. N1.getOperand(0).getOpcode() == ISD::SHL &&
  1440. (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
  1441. CNShamt->getZExtValue() == MaskIdx) {
  1442. APInt ShMask(ValBits, CNMask->getZExtValue());
  1443. if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
  1444. LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
  1445. return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
  1446. N1.getOperand(0).getOperand(0),
  1447. DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
  1448. DAG.getConstant(MaskIdx, DL, GRLenVT));
  1449. }
  1450. }
  1451. // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
  1452. if (!SwapAndRetried) {
  1453. std::swap(N0, N1);
  1454. SwapAndRetried = true;
  1455. goto Retry;
  1456. }
  1457. SwapAndRetried = false;
  1458. Retry2:
  1459. // 8th pattern.
  1460. // a = b | (c & shifted_mask), where all positions in b to be overwritten by
  1461. // the incoming bits are known to be zero.
  1462. // =>
  1463. // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
  1464. //
  1465. // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
  1466. // we put it here in order to match as many nodes as possible or generate less
  1467. // instructions.
  1468. if (N1.getOpcode() == ISD::AND &&
  1469. (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
  1470. isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
  1471. APInt ShMask(ValBits, CNMask->getZExtValue());
  1472. if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
  1473. LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
  1474. return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
  1475. DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
  1476. N1->getOperand(0),
  1477. DAG.getConstant(MaskIdx, DL, GRLenVT)),
  1478. DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
  1479. DAG.getConstant(MaskIdx, DL, GRLenVT));
  1480. }
  1481. }
  1482. // Swap N0/N1 and retry.
  1483. if (!SwapAndRetried) {
  1484. std::swap(N0, N1);
  1485. SwapAndRetried = true;
  1486. goto Retry2;
  1487. }
  1488. return SDValue();
  1489. }
  1490. // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
  1491. static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
  1492. TargetLowering::DAGCombinerInfo &DCI,
  1493. const LoongArchSubtarget &Subtarget) {
  1494. if (DCI.isBeforeLegalizeOps())
  1495. return SDValue();
  1496. SDValue Src = N->getOperand(0);
  1497. if (Src.getOpcode() != LoongArchISD::REVB_2W)
  1498. return SDValue();
  1499. return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
  1500. Src.getOperand(0));
  1501. }
  1502. SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
  1503. DAGCombinerInfo &DCI) const {
  1504. SelectionDAG &DAG = DCI.DAG;
  1505. switch (N->getOpcode()) {
  1506. default:
  1507. break;
  1508. case ISD::AND:
  1509. return performANDCombine(N, DAG, DCI, Subtarget);
  1510. case ISD::OR:
  1511. return performORCombine(N, DAG, DCI, Subtarget);
  1512. case ISD::SRL:
  1513. return performSRLCombine(N, DAG, DCI, Subtarget);
  1514. case LoongArchISD::BITREV_W:
  1515. return performBITREV_WCombine(N, DAG, DCI, Subtarget);
  1516. }
  1517. return SDValue();
  1518. }
  1519. static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
  1520. MachineBasicBlock *MBB) {
  1521. if (!ZeroDivCheck)
  1522. return MBB;
  1523. // Build instructions:
  1524. // MBB:
  1525. // div(or mod) $dst, $dividend, $divisor
  1526. // bnez $divisor, SinkMBB
  1527. // BreakMBB:
  1528. // break 7 // BRK_DIVZERO
  1529. // SinkMBB:
  1530. // fallthrough
  1531. const BasicBlock *LLVM_BB = MBB->getBasicBlock();
  1532. MachineFunction::iterator It = ++MBB->getIterator();
  1533. MachineFunction *MF = MBB->getParent();
  1534. auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  1535. auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  1536. MF->insert(It, BreakMBB);
  1537. MF->insert(It, SinkMBB);
  1538. // Transfer the remainder of MBB and its successor edges to SinkMBB.
  1539. SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
  1540. SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
  1541. const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
  1542. DebugLoc DL = MI.getDebugLoc();
  1543. MachineOperand &Divisor = MI.getOperand(2);
  1544. Register DivisorReg = Divisor.getReg();
  1545. // MBB:
  1546. BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
  1547. .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
  1548. .addMBB(SinkMBB);
  1549. MBB->addSuccessor(BreakMBB);
  1550. MBB->addSuccessor(SinkMBB);
  1551. // BreakMBB:
  1552. // See linux header file arch/loongarch/include/uapi/asm/break.h for the
  1553. // definition of BRK_DIVZERO.
  1554. BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
  1555. BreakMBB->addSuccessor(SinkMBB);
  1556. // Clear Divisor's kill flag.
  1557. Divisor.setIsKill(false);
  1558. return SinkMBB;
  1559. }
  1560. MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
  1561. MachineInstr &MI, MachineBasicBlock *BB) const {
  1562. const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  1563. DebugLoc DL = MI.getDebugLoc();
  1564. switch (MI.getOpcode()) {
  1565. default:
  1566. llvm_unreachable("Unexpected instr type to insert");
  1567. case LoongArch::DIV_W:
  1568. case LoongArch::DIV_WU:
  1569. case LoongArch::MOD_W:
  1570. case LoongArch::MOD_WU:
  1571. case LoongArch::DIV_D:
  1572. case LoongArch::DIV_DU:
  1573. case LoongArch::MOD_D:
  1574. case LoongArch::MOD_DU:
  1575. return insertDivByZeroTrap(MI, BB);
  1576. break;
  1577. case LoongArch::WRFCSR: {
  1578. BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
  1579. LoongArch::FCSR0 + MI.getOperand(0).getImm())
  1580. .addReg(MI.getOperand(1).getReg());
  1581. MI.eraseFromParent();
  1582. return BB;
  1583. }
  1584. case LoongArch::RDFCSR: {
  1585. MachineInstr *ReadFCSR =
  1586. BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
  1587. MI.getOperand(0).getReg())
  1588. .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
  1589. ReadFCSR->getOperand(1).setIsUndef();
  1590. MI.eraseFromParent();
  1591. return BB;
  1592. }
  1593. }
  1594. }
  1595. const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
  1596. switch ((LoongArchISD::NodeType)Opcode) {
  1597. case LoongArchISD::FIRST_NUMBER:
  1598. break;
  1599. #define NODE_NAME_CASE(node) \
  1600. case LoongArchISD::node: \
  1601. return "LoongArchISD::" #node;
  1602. // TODO: Add more target-dependent nodes later.
  1603. NODE_NAME_CASE(CALL)
  1604. NODE_NAME_CASE(RET)
  1605. NODE_NAME_CASE(TAIL)
  1606. NODE_NAME_CASE(SLL_W)
  1607. NODE_NAME_CASE(SRA_W)
  1608. NODE_NAME_CASE(SRL_W)
  1609. NODE_NAME_CASE(BSTRINS)
  1610. NODE_NAME_CASE(BSTRPICK)
  1611. NODE_NAME_CASE(MOVGR2FR_W_LA64)
  1612. NODE_NAME_CASE(MOVFR2GR_S_LA64)
  1613. NODE_NAME_CASE(FTINT)
  1614. NODE_NAME_CASE(REVB_2H)
  1615. NODE_NAME_CASE(REVB_2W)
  1616. NODE_NAME_CASE(BITREV_4B)
  1617. NODE_NAME_CASE(BITREV_W)
  1618. NODE_NAME_CASE(ROTR_W)
  1619. NODE_NAME_CASE(ROTL_W)
  1620. NODE_NAME_CASE(CLZ_W)
  1621. NODE_NAME_CASE(CTZ_W)
  1622. NODE_NAME_CASE(DBAR)
  1623. NODE_NAME_CASE(IBAR)
  1624. NODE_NAME_CASE(BREAK)
  1625. NODE_NAME_CASE(SYSCALL)
  1626. NODE_NAME_CASE(CRC_W_B_W)
  1627. NODE_NAME_CASE(CRC_W_H_W)
  1628. NODE_NAME_CASE(CRC_W_W_W)
  1629. NODE_NAME_CASE(CRC_W_D_W)
  1630. NODE_NAME_CASE(CRCC_W_B_W)
  1631. NODE_NAME_CASE(CRCC_W_H_W)
  1632. NODE_NAME_CASE(CRCC_W_W_W)
  1633. NODE_NAME_CASE(CRCC_W_D_W)
  1634. NODE_NAME_CASE(CSRRD)
  1635. NODE_NAME_CASE(CSRWR)
  1636. NODE_NAME_CASE(CSRXCHG)
  1637. NODE_NAME_CASE(IOCSRRD_B)
  1638. NODE_NAME_CASE(IOCSRRD_H)
  1639. NODE_NAME_CASE(IOCSRRD_W)
  1640. NODE_NAME_CASE(IOCSRRD_D)
  1641. NODE_NAME_CASE(IOCSRWR_B)
  1642. NODE_NAME_CASE(IOCSRWR_H)
  1643. NODE_NAME_CASE(IOCSRWR_W)
  1644. NODE_NAME_CASE(IOCSRWR_D)
  1645. NODE_NAME_CASE(CPUCFG)
  1646. NODE_NAME_CASE(MOVGR2FCSR)
  1647. NODE_NAME_CASE(MOVFCSR2GR)
  1648. NODE_NAME_CASE(CACOP_D)
  1649. NODE_NAME_CASE(CACOP_W)
  1650. }
  1651. #undef NODE_NAME_CASE
  1652. return nullptr;
  1653. }
  1654. //===----------------------------------------------------------------------===//
  1655. // Calling Convention Implementation
  1656. //===----------------------------------------------------------------------===//
  1657. // Eight general-purpose registers a0-a7 used for passing integer arguments,
  1658. // with a0-a1 reused to return values. Generally, the GPRs are used to pass
  1659. // fixed-point arguments, and floating-point arguments when no FPR is available
  1660. // or with soft float ABI.
  1661. const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
  1662. LoongArch::R7, LoongArch::R8, LoongArch::R9,
  1663. LoongArch::R10, LoongArch::R11};
  1664. // Eight floating-point registers fa0-fa7 used for passing floating-point
  1665. // arguments, and fa0-fa1 are also used to return values.
  1666. const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
  1667. LoongArch::F3, LoongArch::F4, LoongArch::F5,
  1668. LoongArch::F6, LoongArch::F7};
  1669. // FPR32 and FPR64 alias each other.
  1670. const MCPhysReg ArgFPR64s[] = {
  1671. LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
  1672. LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
  1673. // Pass a 2*GRLen argument that has been split into two GRLen values through
  1674. // registers or the stack as necessary.
  1675. static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
  1676. CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
  1677. unsigned ValNo2, MVT ValVT2, MVT LocVT2,
  1678. ISD::ArgFlagsTy ArgFlags2) {
  1679. unsigned GRLenInBytes = GRLen / 8;
  1680. if (Register Reg = State.AllocateReg(ArgGPRs)) {
  1681. // At least one half can be passed via register.
  1682. State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
  1683. VA1.getLocVT(), CCValAssign::Full));
  1684. } else {
  1685. // Both halves must be passed on the stack, with proper alignment.
  1686. Align StackAlign =
  1687. std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
  1688. State.addLoc(
  1689. CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
  1690. State.AllocateStack(GRLenInBytes, StackAlign),
  1691. VA1.getLocVT(), CCValAssign::Full));
  1692. State.addLoc(CCValAssign::getMem(
  1693. ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
  1694. LocVT2, CCValAssign::Full));
  1695. return false;
  1696. }
  1697. if (Register Reg = State.AllocateReg(ArgGPRs)) {
  1698. // The second half can also be passed via register.
  1699. State.addLoc(
  1700. CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
  1701. } else {
  1702. // The second half is passed via the stack, without additional alignment.
  1703. State.addLoc(CCValAssign::getMem(
  1704. ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
  1705. LocVT2, CCValAssign::Full));
  1706. }
  1707. return false;
  1708. }
  1709. // Implements the LoongArch calling convention. Returns true upon failure.
  1710. static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
  1711. unsigned ValNo, MVT ValVT,
  1712. CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
  1713. CCState &State, bool IsFixed, bool IsRet,
  1714. Type *OrigTy) {
  1715. unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
  1716. assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
  1717. MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
  1718. MVT LocVT = ValVT;
  1719. // Any return value split into more than two values can't be returned
  1720. // directly.
  1721. if (IsRet && ValNo > 1)
  1722. return true;
  1723. // If passing a variadic argument, or if no FPR is available.
  1724. bool UseGPRForFloat = true;
  1725. switch (ABI) {
  1726. default:
  1727. llvm_unreachable("Unexpected ABI");
  1728. case LoongArchABI::ABI_ILP32S:
  1729. case LoongArchABI::ABI_LP64S:
  1730. case LoongArchABI::ABI_ILP32F:
  1731. case LoongArchABI::ABI_LP64F:
  1732. report_fatal_error("Unimplemented ABI");
  1733. break;
  1734. case LoongArchABI::ABI_ILP32D:
  1735. case LoongArchABI::ABI_LP64D:
  1736. UseGPRForFloat = !IsFixed;
  1737. break;
  1738. }
  1739. // FPR32 and FPR64 alias each other.
  1740. if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
  1741. UseGPRForFloat = true;
  1742. if (UseGPRForFloat && ValVT == MVT::f32) {
  1743. LocVT = GRLenVT;
  1744. LocInfo = CCValAssign::BCvt;
  1745. } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
  1746. LocVT = MVT::i64;
  1747. LocInfo = CCValAssign::BCvt;
  1748. } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
  1749. // TODO: Handle passing f64 on LA32 with D feature.
  1750. report_fatal_error("Passing f64 with GPR on LA32 is undefined");
  1751. }
  1752. // If this is a variadic argument, the LoongArch calling convention requires
  1753. // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
  1754. // byte alignment. An aligned register should be used regardless of whether
  1755. // the original argument was split during legalisation or not. The argument
  1756. // will not be passed by registers if the original type is larger than
  1757. // 2*GRLen, so the register alignment rule does not apply.
  1758. unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
  1759. if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
  1760. DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
  1761. unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
  1762. // Skip 'odd' register if necessary.
  1763. if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
  1764. State.AllocateReg(ArgGPRs);
  1765. }
  1766. SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
  1767. SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
  1768. State.getPendingArgFlags();
  1769. assert(PendingLocs.size() == PendingArgFlags.size() &&
  1770. "PendingLocs and PendingArgFlags out of sync");
  1771. // Split arguments might be passed indirectly, so keep track of the pending
  1772. // values.
  1773. if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
  1774. LocVT = GRLenVT;
  1775. LocInfo = CCValAssign::Indirect;
  1776. PendingLocs.push_back(
  1777. CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
  1778. PendingArgFlags.push_back(ArgFlags);
  1779. if (!ArgFlags.isSplitEnd()) {
  1780. return false;
  1781. }
  1782. }
  1783. // If the split argument only had two elements, it should be passed directly
  1784. // in registers or on the stack.
  1785. if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
  1786. PendingLocs.size() <= 2) {
  1787. assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
  1788. // Apply the normal calling convention rules to the first half of the
  1789. // split argument.
  1790. CCValAssign VA = PendingLocs[0];
  1791. ISD::ArgFlagsTy AF = PendingArgFlags[0];
  1792. PendingLocs.clear();
  1793. PendingArgFlags.clear();
  1794. return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
  1795. ArgFlags);
  1796. }
  1797. // Allocate to a register if possible, or else a stack slot.
  1798. Register Reg;
  1799. unsigned StoreSizeBytes = GRLen / 8;
  1800. Align StackAlign = Align(GRLen / 8);
  1801. if (ValVT == MVT::f32 && !UseGPRForFloat)
  1802. Reg = State.AllocateReg(ArgFPR32s);
  1803. else if (ValVT == MVT::f64 && !UseGPRForFloat)
  1804. Reg = State.AllocateReg(ArgFPR64s);
  1805. else
  1806. Reg = State.AllocateReg(ArgGPRs);
  1807. unsigned StackOffset =
  1808. Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
  1809. // If we reach this point and PendingLocs is non-empty, we must be at the
  1810. // end of a split argument that must be passed indirectly.
  1811. if (!PendingLocs.empty()) {
  1812. assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
  1813. assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
  1814. for (auto &It : PendingLocs) {
  1815. if (Reg)
  1816. It.convertToReg(Reg);
  1817. else
  1818. It.convertToMem(StackOffset);
  1819. State.addLoc(It);
  1820. }
  1821. PendingLocs.clear();
  1822. PendingArgFlags.clear();
  1823. return false;
  1824. }
  1825. assert((!UseGPRForFloat || LocVT == GRLenVT) &&
  1826. "Expected an GRLenVT at this stage");
  1827. if (Reg) {
  1828. State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  1829. return false;
  1830. }
  1831. // When a floating-point value is passed on the stack, no bit-cast is needed.
  1832. if (ValVT.isFloatingPoint()) {
  1833. LocVT = ValVT;
  1834. LocInfo = CCValAssign::Full;
  1835. }
  1836. State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
  1837. return false;
  1838. }
  1839. void LoongArchTargetLowering::analyzeInputArgs(
  1840. MachineFunction &MF, CCState &CCInfo,
  1841. const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
  1842. LoongArchCCAssignFn Fn) const {
  1843. FunctionType *FType = MF.getFunction().getFunctionType();
  1844. for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
  1845. MVT ArgVT = Ins[i].VT;
  1846. Type *ArgTy = nullptr;
  1847. if (IsRet)
  1848. ArgTy = FType->getReturnType();
  1849. else if (Ins[i].isOrigArg())
  1850. ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
  1851. LoongArchABI::ABI ABI =
  1852. MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
  1853. if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
  1854. CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
  1855. LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
  1856. << EVT(ArgVT).getEVTString() << '\n');
  1857. llvm_unreachable("");
  1858. }
  1859. }
  1860. }
  1861. void LoongArchTargetLowering::analyzeOutputArgs(
  1862. MachineFunction &MF, CCState &CCInfo,
  1863. const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
  1864. CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
  1865. for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
  1866. MVT ArgVT = Outs[i].VT;
  1867. Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
  1868. LoongArchABI::ABI ABI =
  1869. MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
  1870. if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
  1871. CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
  1872. LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
  1873. << EVT(ArgVT).getEVTString() << "\n");
  1874. llvm_unreachable("");
  1875. }
  1876. }
  1877. }
  1878. // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
  1879. // values.
  1880. static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
  1881. const CCValAssign &VA, const SDLoc &DL) {
  1882. switch (VA.getLocInfo()) {
  1883. default:
  1884. llvm_unreachable("Unexpected CCValAssign::LocInfo");
  1885. case CCValAssign::Full:
  1886. case CCValAssign::Indirect:
  1887. break;
  1888. case CCValAssign::BCvt:
  1889. if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
  1890. Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
  1891. else
  1892. Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
  1893. break;
  1894. }
  1895. return Val;
  1896. }
  1897. static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
  1898. const CCValAssign &VA, const SDLoc &DL,
  1899. const LoongArchTargetLowering &TLI) {
  1900. MachineFunction &MF = DAG.getMachineFunction();
  1901. MachineRegisterInfo &RegInfo = MF.getRegInfo();
  1902. EVT LocVT = VA.getLocVT();
  1903. SDValue Val;
  1904. const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
  1905. Register VReg = RegInfo.createVirtualRegister(RC);
  1906. RegInfo.addLiveIn(VA.getLocReg(), VReg);
  1907. Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
  1908. return convertLocVTToValVT(DAG, Val, VA, DL);
  1909. }
  1910. // The caller is responsible for loading the full value if the argument is
  1911. // passed with CCValAssign::Indirect.
  1912. static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
  1913. const CCValAssign &VA, const SDLoc &DL) {
  1914. MachineFunction &MF = DAG.getMachineFunction();
  1915. MachineFrameInfo &MFI = MF.getFrameInfo();
  1916. EVT ValVT = VA.getValVT();
  1917. int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
  1918. /*IsImmutable=*/true);
  1919. SDValue FIN = DAG.getFrameIndex(
  1920. FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
  1921. ISD::LoadExtType ExtType;
  1922. switch (VA.getLocInfo()) {
  1923. default:
  1924. llvm_unreachable("Unexpected CCValAssign::LocInfo");
  1925. case CCValAssign::Full:
  1926. case CCValAssign::Indirect:
  1927. case CCValAssign::BCvt:
  1928. ExtType = ISD::NON_EXTLOAD;
  1929. break;
  1930. }
  1931. return DAG.getExtLoad(
  1932. ExtType, DL, VA.getLocVT(), Chain, FIN,
  1933. MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
  1934. }
  1935. static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
  1936. const CCValAssign &VA, const SDLoc &DL) {
  1937. EVT LocVT = VA.getLocVT();
  1938. switch (VA.getLocInfo()) {
  1939. default:
  1940. llvm_unreachable("Unexpected CCValAssign::LocInfo");
  1941. case CCValAssign::Full:
  1942. break;
  1943. case CCValAssign::BCvt:
  1944. if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
  1945. Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
  1946. else
  1947. Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
  1948. break;
  1949. }
  1950. return Val;
  1951. }
  1952. static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
  1953. CCValAssign::LocInfo LocInfo,
  1954. ISD::ArgFlagsTy ArgFlags, CCState &State) {
  1955. if (LocVT == MVT::i32 || LocVT == MVT::i64) {
  1956. // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
  1957. // s0 s1 s2 s3 s4 s5 s6 s7 s8
  1958. static const MCPhysReg GPRList[] = {
  1959. LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26, LoongArch::R27,
  1960. LoongArch::R28, LoongArch::R29, LoongArch::R30, LoongArch::R31};
  1961. if (unsigned Reg = State.AllocateReg(GPRList)) {
  1962. State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  1963. return false;
  1964. }
  1965. }
  1966. if (LocVT == MVT::f32) {
  1967. // Pass in STG registers: F1, F2, F3, F4
  1968. // fs0,fs1,fs2,fs3
  1969. static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
  1970. LoongArch::F26, LoongArch::F27};
  1971. if (unsigned Reg = State.AllocateReg(FPR32List)) {
  1972. State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  1973. return false;
  1974. }
  1975. }
  1976. if (LocVT == MVT::f64) {
  1977. // Pass in STG registers: D1, D2, D3, D4
  1978. // fs4,fs5,fs6,fs7
  1979. static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
  1980. LoongArch::F30_64, LoongArch::F31_64};
  1981. if (unsigned Reg = State.AllocateReg(FPR64List)) {
  1982. State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  1983. return false;
  1984. }
  1985. }
  1986. report_fatal_error("No registers left in GHC calling convention");
  1987. return true;
  1988. }
  1989. // Transform physical registers into virtual registers.
  1990. SDValue LoongArchTargetLowering::LowerFormalArguments(
  1991. SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
  1992. const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
  1993. SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
  1994. MachineFunction &MF = DAG.getMachineFunction();
  1995. switch (CallConv) {
  1996. default:
  1997. llvm_unreachable("Unsupported calling convention");
  1998. case CallingConv::C:
  1999. case CallingConv::Fast:
  2000. break;
  2001. case CallingConv::GHC:
  2002. if (!MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicF] ||
  2003. !MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicD])
  2004. report_fatal_error(
  2005. "GHC calling convention requires the F and D extensions");
  2006. }
  2007. EVT PtrVT = getPointerTy(DAG.getDataLayout());
  2008. MVT GRLenVT = Subtarget.getGRLenVT();
  2009. unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
  2010. // Used with varargs to acumulate store chains.
  2011. std::vector<SDValue> OutChains;
  2012. // Assign locations to all of the incoming arguments.
  2013. SmallVector<CCValAssign> ArgLocs;
  2014. CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
  2015. if (CallConv == CallingConv::GHC)
  2016. CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
  2017. else
  2018. analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
  2019. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
  2020. CCValAssign &VA = ArgLocs[i];
  2021. SDValue ArgValue;
  2022. if (VA.isRegLoc())
  2023. ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
  2024. else
  2025. ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
  2026. if (VA.getLocInfo() == CCValAssign::Indirect) {
  2027. // If the original argument was split and passed by reference, we need to
  2028. // load all parts of it here (using the same address).
  2029. InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
  2030. MachinePointerInfo()));
  2031. unsigned ArgIndex = Ins[i].OrigArgIndex;
  2032. unsigned ArgPartOffset = Ins[i].PartOffset;
  2033. assert(ArgPartOffset == 0);
  2034. while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
  2035. CCValAssign &PartVA = ArgLocs[i + 1];
  2036. unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
  2037. SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
  2038. SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
  2039. InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
  2040. MachinePointerInfo()));
  2041. ++i;
  2042. }
  2043. continue;
  2044. }
  2045. InVals.push_back(ArgValue);
  2046. }
  2047. if (IsVarArg) {
  2048. ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
  2049. unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
  2050. const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
  2051. MachineFrameInfo &MFI = MF.getFrameInfo();
  2052. MachineRegisterInfo &RegInfo = MF.getRegInfo();
  2053. auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
  2054. // Offset of the first variable argument from stack pointer, and size of
  2055. // the vararg save area. For now, the varargs save area is either zero or
  2056. // large enough to hold a0-a7.
  2057. int VaArgOffset, VarArgsSaveSize;
  2058. // If all registers are allocated, then all varargs must be passed on the
  2059. // stack and we don't need to save any argregs.
  2060. if (ArgRegs.size() == Idx) {
  2061. VaArgOffset = CCInfo.getNextStackOffset();
  2062. VarArgsSaveSize = 0;
  2063. } else {
  2064. VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
  2065. VaArgOffset = -VarArgsSaveSize;
  2066. }
  2067. // Record the frame index of the first variable argument
  2068. // which is a value necessary to VASTART.
  2069. int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
  2070. LoongArchFI->setVarArgsFrameIndex(FI);
  2071. // If saving an odd number of registers then create an extra stack slot to
  2072. // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
  2073. // offsets to even-numbered registered remain 2*GRLen-aligned.
  2074. if (Idx % 2) {
  2075. MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
  2076. true);
  2077. VarArgsSaveSize += GRLenInBytes;
  2078. }
  2079. // Copy the integer registers that may have been used for passing varargs
  2080. // to the vararg save area.
  2081. for (unsigned I = Idx; I < ArgRegs.size();
  2082. ++I, VaArgOffset += GRLenInBytes) {
  2083. const Register Reg = RegInfo.createVirtualRegister(RC);
  2084. RegInfo.addLiveIn(ArgRegs[I], Reg);
  2085. SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
  2086. FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
  2087. SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
  2088. SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
  2089. MachinePointerInfo::getFixedStack(MF, FI));
  2090. cast<StoreSDNode>(Store.getNode())
  2091. ->getMemOperand()
  2092. ->setValue((Value *)nullptr);
  2093. OutChains.push_back(Store);
  2094. }
  2095. LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
  2096. }
  2097. // All stores are grouped in one node to allow the matching between
  2098. // the size of Ins and InVals. This only happens for vararg functions.
  2099. if (!OutChains.empty()) {
  2100. OutChains.push_back(Chain);
  2101. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
  2102. }
  2103. return Chain;
  2104. }
  2105. bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
  2106. return CI->isTailCall();
  2107. }
  2108. // Check whether the call is eligible for tail call optimization.
  2109. bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
  2110. CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
  2111. const SmallVectorImpl<CCValAssign> &ArgLocs) const {
  2112. auto CalleeCC = CLI.CallConv;
  2113. auto &Outs = CLI.Outs;
  2114. auto &Caller = MF.getFunction();
  2115. auto CallerCC = Caller.getCallingConv();
  2116. // Do not tail call opt if the stack is used to pass parameters.
  2117. if (CCInfo.getNextStackOffset() != 0)
  2118. return false;
  2119. // Do not tail call opt if any parameters need to be passed indirectly.
  2120. for (auto &VA : ArgLocs)
  2121. if (VA.getLocInfo() == CCValAssign::Indirect)
  2122. return false;
  2123. // Do not tail call opt if either caller or callee uses struct return
  2124. // semantics.
  2125. auto IsCallerStructRet = Caller.hasStructRetAttr();
  2126. auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
  2127. if (IsCallerStructRet || IsCalleeStructRet)
  2128. return false;
  2129. // Do not tail call opt if either the callee or caller has a byval argument.
  2130. for (auto &Arg : Outs)
  2131. if (Arg.Flags.isByVal())
  2132. return false;
  2133. // The callee has to preserve all registers the caller needs to preserve.
  2134. const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
  2135. const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
  2136. if (CalleeCC != CallerCC) {
  2137. const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
  2138. if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
  2139. return false;
  2140. }
  2141. return true;
  2142. }
  2143. static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
  2144. return DAG.getDataLayout().getPrefTypeAlign(
  2145. VT.getTypeForEVT(*DAG.getContext()));
  2146. }
  2147. // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
  2148. // and output parameter nodes.
  2149. SDValue
  2150. LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
  2151. SmallVectorImpl<SDValue> &InVals) const {
  2152. SelectionDAG &DAG = CLI.DAG;
  2153. SDLoc &DL = CLI.DL;
  2154. SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
  2155. SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
  2156. SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
  2157. SDValue Chain = CLI.Chain;
  2158. SDValue Callee = CLI.Callee;
  2159. CallingConv::ID CallConv = CLI.CallConv;
  2160. bool IsVarArg = CLI.IsVarArg;
  2161. EVT PtrVT = getPointerTy(DAG.getDataLayout());
  2162. MVT GRLenVT = Subtarget.getGRLenVT();
  2163. bool &IsTailCall = CLI.IsTailCall;
  2164. MachineFunction &MF = DAG.getMachineFunction();
  2165. // Analyze the operands of the call, assigning locations to each operand.
  2166. SmallVector<CCValAssign> ArgLocs;
  2167. CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
  2168. if (CallConv == CallingConv::GHC)
  2169. ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
  2170. else
  2171. analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
  2172. // Check if it's really possible to do a tail call.
  2173. if (IsTailCall)
  2174. IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
  2175. if (IsTailCall)
  2176. ++NumTailCalls;
  2177. else if (CLI.CB && CLI.CB->isMustTailCall())
  2178. report_fatal_error("failed to perform tail call elimination on a call "
  2179. "site marked musttail");
  2180. // Get a count of how many bytes are to be pushed on the stack.
  2181. unsigned NumBytes = ArgCCInfo.getNextStackOffset();
  2182. // Create local copies for byval args.
  2183. SmallVector<SDValue> ByValArgs;
  2184. for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
  2185. ISD::ArgFlagsTy Flags = Outs[i].Flags;
  2186. if (!Flags.isByVal())
  2187. continue;
  2188. SDValue Arg = OutVals[i];
  2189. unsigned Size = Flags.getByValSize();
  2190. Align Alignment = Flags.getNonZeroByValAlign();
  2191. int FI =
  2192. MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
  2193. SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
  2194. SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
  2195. Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
  2196. /*IsVolatile=*/false,
  2197. /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
  2198. MachinePointerInfo(), MachinePointerInfo());
  2199. ByValArgs.push_back(FIPtr);
  2200. }
  2201. if (!IsTailCall)
  2202. Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
  2203. // Copy argument values to their designated locations.
  2204. SmallVector<std::pair<Register, SDValue>> RegsToPass;
  2205. SmallVector<SDValue> MemOpChains;
  2206. SDValue StackPtr;
  2207. for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
  2208. CCValAssign &VA = ArgLocs[i];
  2209. SDValue ArgValue = OutVals[i];
  2210. ISD::ArgFlagsTy Flags = Outs[i].Flags;
  2211. // Promote the value if needed.
  2212. // For now, only handle fully promoted and indirect arguments.
  2213. if (VA.getLocInfo() == CCValAssign::Indirect) {
  2214. // Store the argument in a stack slot and pass its address.
  2215. Align StackAlign =
  2216. std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
  2217. getPrefTypeAlign(ArgValue.getValueType(), DAG));
  2218. TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
  2219. // If the original argument was split and passed by reference, we need to
  2220. // store the required parts of it here (and pass just one address).
  2221. unsigned ArgIndex = Outs[i].OrigArgIndex;
  2222. unsigned ArgPartOffset = Outs[i].PartOffset;
  2223. assert(ArgPartOffset == 0);
  2224. // Calculate the total size to store. We don't have access to what we're
  2225. // actually storing other than performing the loop and collecting the
  2226. // info.
  2227. SmallVector<std::pair<SDValue, SDValue>> Parts;
  2228. while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
  2229. SDValue PartValue = OutVals[i + 1];
  2230. unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
  2231. SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
  2232. EVT PartVT = PartValue.getValueType();
  2233. StoredSize += PartVT.getStoreSize();
  2234. StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
  2235. Parts.push_back(std::make_pair(PartValue, Offset));
  2236. ++i;
  2237. }
  2238. SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
  2239. int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
  2240. MemOpChains.push_back(
  2241. DAG.getStore(Chain, DL, ArgValue, SpillSlot,
  2242. MachinePointerInfo::getFixedStack(MF, FI)));
  2243. for (const auto &Part : Parts) {
  2244. SDValue PartValue = Part.first;
  2245. SDValue PartOffset = Part.second;
  2246. SDValue Address =
  2247. DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
  2248. MemOpChains.push_back(
  2249. DAG.getStore(Chain, DL, PartValue, Address,
  2250. MachinePointerInfo::getFixedStack(MF, FI)));
  2251. }
  2252. ArgValue = SpillSlot;
  2253. } else {
  2254. ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
  2255. }
  2256. // Use local copy if it is a byval arg.
  2257. if (Flags.isByVal())
  2258. ArgValue = ByValArgs[j++];
  2259. if (VA.isRegLoc()) {
  2260. // Queue up the argument copies and emit them at the end.
  2261. RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
  2262. } else {
  2263. assert(VA.isMemLoc() && "Argument not register or memory");
  2264. assert(!IsTailCall && "Tail call not allowed if stack is used "
  2265. "for passing parameters");
  2266. // Work out the address of the stack slot.
  2267. if (!StackPtr.getNode())
  2268. StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
  2269. SDValue Address =
  2270. DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
  2271. DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
  2272. // Emit the store.
  2273. MemOpChains.push_back(
  2274. DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
  2275. }
  2276. }
  2277. // Join the stores, which are independent of one another.
  2278. if (!MemOpChains.empty())
  2279. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
  2280. SDValue Glue;
  2281. // Build a sequence of copy-to-reg nodes, chained and glued together.
  2282. for (auto &Reg : RegsToPass) {
  2283. Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
  2284. Glue = Chain.getValue(1);
  2285. }
  2286. // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
  2287. // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
  2288. // split it and then direct call can be matched by PseudoCALL.
  2289. if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
  2290. const GlobalValue *GV = S->getGlobal();
  2291. unsigned OpFlags =
  2292. getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)
  2293. ? LoongArchII::MO_CALL
  2294. : LoongArchII::MO_CALL_PLT;
  2295. Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
  2296. } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
  2297. unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(
  2298. *MF.getFunction().getParent(), nullptr)
  2299. ? LoongArchII::MO_CALL
  2300. : LoongArchII::MO_CALL_PLT;
  2301. Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
  2302. }
  2303. // The first call operand is the chain and the second is the target address.
  2304. SmallVector<SDValue> Ops;
  2305. Ops.push_back(Chain);
  2306. Ops.push_back(Callee);
  2307. // Add argument registers to the end of the list so that they are
  2308. // known live into the call.
  2309. for (auto &Reg : RegsToPass)
  2310. Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
  2311. if (!IsTailCall) {
  2312. // Add a register mask operand representing the call-preserved registers.
  2313. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
  2314. const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
  2315. assert(Mask && "Missing call preserved mask for calling convention");
  2316. Ops.push_back(DAG.getRegisterMask(Mask));
  2317. }
  2318. // Glue the call to the argument copies, if any.
  2319. if (Glue.getNode())
  2320. Ops.push_back(Glue);
  2321. // Emit the call.
  2322. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
  2323. if (IsTailCall) {
  2324. MF.getFrameInfo().setHasTailCall();
  2325. return DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops);
  2326. }
  2327. Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops);
  2328. DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
  2329. Glue = Chain.getValue(1);
  2330. // Mark the end of the call, which is glued to the call itself.
  2331. Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
  2332. Glue = Chain.getValue(1);
  2333. // Assign locations to each value returned by this call.
  2334. SmallVector<CCValAssign> RVLocs;
  2335. CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
  2336. analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
  2337. // Copy all of the result registers out of their specified physreg.
  2338. for (auto &VA : RVLocs) {
  2339. // Copy the value out.
  2340. SDValue RetValue =
  2341. DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
  2342. // Glue the RetValue to the end of the call sequence.
  2343. Chain = RetValue.getValue(1);
  2344. Glue = RetValue.getValue(2);
  2345. RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
  2346. InVals.push_back(RetValue);
  2347. }
  2348. return Chain;
  2349. }
  2350. bool LoongArchTargetLowering::CanLowerReturn(
  2351. CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
  2352. const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
  2353. SmallVector<CCValAssign> RVLocs;
  2354. CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
  2355. for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
  2356. LoongArchABI::ABI ABI =
  2357. MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
  2358. if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
  2359. Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
  2360. nullptr))
  2361. return false;
  2362. }
  2363. return true;
  2364. }
  2365. SDValue LoongArchTargetLowering::LowerReturn(
  2366. SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
  2367. const SmallVectorImpl<ISD::OutputArg> &Outs,
  2368. const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
  2369. SelectionDAG &DAG) const {
  2370. // Stores the assignment of the return value to a location.
  2371. SmallVector<CCValAssign> RVLocs;
  2372. // Info about the registers and stack slot.
  2373. CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
  2374. *DAG.getContext());
  2375. analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
  2376. nullptr, CC_LoongArch);
  2377. if (CallConv == CallingConv::GHC && !RVLocs.empty())
  2378. report_fatal_error("GHC functions return void only");
  2379. SDValue Glue;
  2380. SmallVector<SDValue, 4> RetOps(1, Chain);
  2381. // Copy the result values into the output registers.
  2382. for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
  2383. CCValAssign &VA = RVLocs[i];
  2384. assert(VA.isRegLoc() && "Can only return in registers!");
  2385. // Handle a 'normal' return.
  2386. SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
  2387. Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
  2388. // Guarantee that all emitted copies are stuck together.
  2389. Glue = Chain.getValue(1);
  2390. RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
  2391. }
  2392. RetOps[0] = Chain; // Update chain.
  2393. // Add the glue node if we have it.
  2394. if (Glue.getNode())
  2395. RetOps.push_back(Glue);
  2396. return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
  2397. }
  2398. bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
  2399. bool ForCodeSize) const {
  2400. // TODO: Maybe need more checks here after vector extension is supported.
  2401. if (VT == MVT::f32 && !Subtarget.hasBasicF())
  2402. return false;
  2403. if (VT == MVT::f64 && !Subtarget.hasBasicD())
  2404. return false;
  2405. return (Imm.isZero() || Imm.isExactlyValue(+1.0));
  2406. }
  2407. bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
  2408. return true;
  2409. }
  2410. bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
  2411. return true;
  2412. }
  2413. bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
  2414. const Instruction *I) const {
  2415. if (!Subtarget.is64Bit())
  2416. return isa<LoadInst>(I) || isa<StoreInst>(I);
  2417. if (isa<LoadInst>(I))
  2418. return true;
  2419. // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
  2420. // require fences beacuse we can use amswap_db.[w/d].
  2421. if (isa<StoreInst>(I)) {
  2422. unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
  2423. return (Size == 8 || Size == 16);
  2424. }
  2425. return false;
  2426. }
  2427. EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
  2428. LLVMContext &Context,
  2429. EVT VT) const {
  2430. if (!VT.isVector())
  2431. return getPointerTy(DL);
  2432. return VT.changeVectorElementTypeToInteger();
  2433. }
  2434. bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
  2435. // TODO: Support vectors.
  2436. return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
  2437. }
  2438. bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
  2439. const CallInst &I,
  2440. MachineFunction &MF,
  2441. unsigned Intrinsic) const {
  2442. switch (Intrinsic) {
  2443. default:
  2444. return false;
  2445. case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
  2446. case Intrinsic::loongarch_masked_atomicrmw_add_i32:
  2447. case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
  2448. case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
  2449. Info.opc = ISD::INTRINSIC_W_CHAIN;
  2450. Info.memVT = MVT::i32;
  2451. Info.ptrVal = I.getArgOperand(0);
  2452. Info.offset = 0;
  2453. Info.align = Align(4);
  2454. Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
  2455. MachineMemOperand::MOVolatile;
  2456. return true;
  2457. // TODO: Add more Intrinsics later.
  2458. }
  2459. }
  2460. TargetLowering::AtomicExpansionKind
  2461. LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
  2462. // TODO: Add more AtomicRMWInst that needs to be extended.
  2463. // Since floating-point operation requires a non-trivial set of data
  2464. // operations, use CmpXChg to expand.
  2465. if (AI->isFloatingPointOperation() ||
  2466. AI->getOperation() == AtomicRMWInst::UIncWrap ||
  2467. AI->getOperation() == AtomicRMWInst::UDecWrap)
  2468. return AtomicExpansionKind::CmpXChg;
  2469. unsigned Size = AI->getType()->getPrimitiveSizeInBits();
  2470. if (Size == 8 || Size == 16)
  2471. return AtomicExpansionKind::MaskedIntrinsic;
  2472. return AtomicExpansionKind::None;
  2473. }
  2474. static Intrinsic::ID
  2475. getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
  2476. AtomicRMWInst::BinOp BinOp) {
  2477. if (GRLen == 64) {
  2478. switch (BinOp) {
  2479. default:
  2480. llvm_unreachable("Unexpected AtomicRMW BinOp");
  2481. case AtomicRMWInst::Xchg:
  2482. return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
  2483. case AtomicRMWInst::Add:
  2484. return Intrinsic::loongarch_masked_atomicrmw_add_i64;
  2485. case AtomicRMWInst::Sub:
  2486. return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
  2487. case AtomicRMWInst::Nand:
  2488. return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
  2489. case AtomicRMWInst::UMax:
  2490. return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
  2491. case AtomicRMWInst::UMin:
  2492. return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
  2493. case AtomicRMWInst::Max:
  2494. return Intrinsic::loongarch_masked_atomicrmw_max_i64;
  2495. case AtomicRMWInst::Min:
  2496. return Intrinsic::loongarch_masked_atomicrmw_min_i64;
  2497. // TODO: support other AtomicRMWInst.
  2498. }
  2499. }
  2500. if (GRLen == 32) {
  2501. switch (BinOp) {
  2502. default:
  2503. llvm_unreachable("Unexpected AtomicRMW BinOp");
  2504. case AtomicRMWInst::Xchg:
  2505. return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
  2506. case AtomicRMWInst::Add:
  2507. return Intrinsic::loongarch_masked_atomicrmw_add_i32;
  2508. case AtomicRMWInst::Sub:
  2509. return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
  2510. case AtomicRMWInst::Nand:
  2511. return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
  2512. // TODO: support other AtomicRMWInst.
  2513. }
  2514. }
  2515. llvm_unreachable("Unexpected GRLen\n");
  2516. }
  2517. TargetLowering::AtomicExpansionKind
  2518. LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
  2519. AtomicCmpXchgInst *CI) const {
  2520. unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
  2521. if (Size == 8 || Size == 16)
  2522. return AtomicExpansionKind::MaskedIntrinsic;
  2523. return AtomicExpansionKind::None;
  2524. }
  2525. Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
  2526. IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
  2527. Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
  2528. Value *Ordering =
  2529. Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(Ord));
  2530. // TODO: Support cmpxchg on LA32.
  2531. Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
  2532. CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
  2533. NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
  2534. Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
  2535. Type *Tys[] = {AlignedAddr->getType()};
  2536. Function *MaskedCmpXchg =
  2537. Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
  2538. Value *Result = Builder.CreateCall(
  2539. MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
  2540. Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
  2541. return Result;
  2542. }
  2543. Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
  2544. IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
  2545. Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
  2546. unsigned GRLen = Subtarget.getGRLen();
  2547. Value *Ordering =
  2548. Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
  2549. Type *Tys[] = {AlignedAddr->getType()};
  2550. Function *LlwOpScwLoop = Intrinsic::getDeclaration(
  2551. AI->getModule(),
  2552. getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
  2553. if (GRLen == 64) {
  2554. Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
  2555. Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
  2556. ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
  2557. }
  2558. Value *Result;
  2559. // Must pass the shift amount needed to sign extend the loaded value prior
  2560. // to performing a signed comparison for min/max. ShiftAmt is the number of
  2561. // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
  2562. // is the number of bits to left+right shift the value in order to
  2563. // sign-extend.
  2564. if (AI->getOperation() == AtomicRMWInst::Min ||
  2565. AI->getOperation() == AtomicRMWInst::Max) {
  2566. const DataLayout &DL = AI->getModule()->getDataLayout();
  2567. unsigned ValWidth =
  2568. DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
  2569. Value *SextShamt =
  2570. Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
  2571. Result = Builder.CreateCall(LlwOpScwLoop,
  2572. {AlignedAddr, Incr, Mask, SextShamt, Ordering});
  2573. } else {
  2574. Result =
  2575. Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
  2576. }
  2577. if (GRLen == 64)
  2578. Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
  2579. return Result;
  2580. }
  2581. bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
  2582. const MachineFunction &MF, EVT VT) const {
  2583. VT = VT.getScalarType();
  2584. if (!VT.isSimple())
  2585. return false;
  2586. switch (VT.getSimpleVT().SimpleTy) {
  2587. case MVT::f32:
  2588. case MVT::f64:
  2589. return true;
  2590. default:
  2591. break;
  2592. }
  2593. return false;
  2594. }
  2595. Register LoongArchTargetLowering::getExceptionPointerRegister(
  2596. const Constant *PersonalityFn) const {
  2597. return LoongArch::R4;
  2598. }
  2599. Register LoongArchTargetLowering::getExceptionSelectorRegister(
  2600. const Constant *PersonalityFn) const {
  2601. return LoongArch::R5;
  2602. }
  2603. //===----------------------------------------------------------------------===//
  2604. // LoongArch Inline Assembly Support
  2605. //===----------------------------------------------------------------------===//
  2606. LoongArchTargetLowering::ConstraintType
  2607. LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
  2608. // LoongArch specific constraints in GCC: config/loongarch/constraints.md
  2609. //
  2610. // 'f': A floating-point register (if available).
  2611. // 'k': A memory operand whose address is formed by a base register and
  2612. // (optionally scaled) index register.
  2613. // 'l': A signed 16-bit constant.
  2614. // 'm': A memory operand whose address is formed by a base register and
  2615. // offset that is suitable for use in instructions with the same
  2616. // addressing mode as st.w and ld.w.
  2617. // 'I': A signed 12-bit constant (for arithmetic instructions).
  2618. // 'J': Integer zero.
  2619. // 'K': An unsigned 12-bit constant (for logic instructions).
  2620. // "ZB": An address that is held in a general-purpose register. The offset is
  2621. // zero.
  2622. // "ZC": A memory operand whose address is formed by a base register and
  2623. // offset that is suitable for use in instructions with the same
  2624. // addressing mode as ll.w and sc.w.
  2625. if (Constraint.size() == 1) {
  2626. switch (Constraint[0]) {
  2627. default:
  2628. break;
  2629. case 'f':
  2630. return C_RegisterClass;
  2631. case 'l':
  2632. case 'I':
  2633. case 'J':
  2634. case 'K':
  2635. return C_Immediate;
  2636. case 'k':
  2637. return C_Memory;
  2638. }
  2639. }
  2640. if (Constraint == "ZC" || Constraint == "ZB")
  2641. return C_Memory;
  2642. // 'm' is handled here.
  2643. return TargetLowering::getConstraintType(Constraint);
  2644. }
  2645. unsigned LoongArchTargetLowering::getInlineAsmMemConstraint(
  2646. StringRef ConstraintCode) const {
  2647. return StringSwitch<unsigned>(ConstraintCode)
  2648. .Case("k", InlineAsm::Constraint_k)
  2649. .Case("ZB", InlineAsm::Constraint_ZB)
  2650. .Case("ZC", InlineAsm::Constraint_ZC)
  2651. .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
  2652. }
  2653. std::pair<unsigned, const TargetRegisterClass *>
  2654. LoongArchTargetLowering::getRegForInlineAsmConstraint(
  2655. const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
  2656. // First, see if this is a constraint that directly corresponds to a LoongArch
  2657. // register class.
  2658. if (Constraint.size() == 1) {
  2659. switch (Constraint[0]) {
  2660. case 'r':
  2661. // TODO: Support fixed vectors up to GRLen?
  2662. if (VT.isVector())
  2663. break;
  2664. return std::make_pair(0U, &LoongArch::GPRRegClass);
  2665. case 'f':
  2666. if (Subtarget.hasBasicF() && VT == MVT::f32)
  2667. return std::make_pair(0U, &LoongArch::FPR32RegClass);
  2668. if (Subtarget.hasBasicD() && VT == MVT::f64)
  2669. return std::make_pair(0U, &LoongArch::FPR64RegClass);
  2670. break;
  2671. default:
  2672. break;
  2673. }
  2674. }
  2675. // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
  2676. // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
  2677. // constraints while the official register name is prefixed with a '$'. So we
  2678. // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
  2679. // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
  2680. // case insensitive, so no need to convert the constraint to upper case here.
  2681. //
  2682. // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
  2683. // decode the usage of register name aliases into their official names. And
  2684. // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
  2685. // official register names.
  2686. if (Constraint.startswith("{$r") || Constraint.startswith("{$f")) {
  2687. bool IsFP = Constraint[2] == 'f';
  2688. std::pair<StringRef, StringRef> Temp = Constraint.split('$');
  2689. std::pair<unsigned, const TargetRegisterClass *> R;
  2690. R = TargetLowering::getRegForInlineAsmConstraint(
  2691. TRI, join_items("", Temp.first, Temp.second), VT);
  2692. // Match those names to the widest floating point register type available.
  2693. if (IsFP) {
  2694. unsigned RegNo = R.first;
  2695. if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
  2696. if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
  2697. unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
  2698. return std::make_pair(DReg, &LoongArch::FPR64RegClass);
  2699. }
  2700. }
  2701. }
  2702. return R;
  2703. }
  2704. return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
  2705. }
  2706. void LoongArchTargetLowering::LowerAsmOperandForConstraint(
  2707. SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
  2708. SelectionDAG &DAG) const {
  2709. // Currently only support length 1 constraints.
  2710. if (Constraint.length() == 1) {
  2711. switch (Constraint[0]) {
  2712. case 'l':
  2713. // Validate & create a 16-bit signed immediate operand.
  2714. if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
  2715. uint64_t CVal = C->getSExtValue();
  2716. if (isInt<16>(CVal))
  2717. Ops.push_back(
  2718. DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
  2719. }
  2720. return;
  2721. case 'I':
  2722. // Validate & create a 12-bit signed immediate operand.
  2723. if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
  2724. uint64_t CVal = C->getSExtValue();
  2725. if (isInt<12>(CVal))
  2726. Ops.push_back(
  2727. DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
  2728. }
  2729. return;
  2730. case 'J':
  2731. // Validate & create an integer zero operand.
  2732. if (auto *C = dyn_cast<ConstantSDNode>(Op))
  2733. if (C->getZExtValue() == 0)
  2734. Ops.push_back(
  2735. DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
  2736. return;
  2737. case 'K':
  2738. // Validate & create a 12-bit unsigned immediate operand.
  2739. if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
  2740. uint64_t CVal = C->getZExtValue();
  2741. if (isUInt<12>(CVal))
  2742. Ops.push_back(
  2743. DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
  2744. }
  2745. return;
  2746. default:
  2747. break;
  2748. }
  2749. }
  2750. TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
  2751. }
  2752. #define GET_REGISTER_MATCHER
  2753. #include "LoongArchGenAsmMatcher.inc"
  2754. Register
  2755. LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
  2756. const MachineFunction &MF) const {
  2757. std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
  2758. std::string NewRegName = Name.second.str();
  2759. Register Reg = MatchRegisterAltName(NewRegName);
  2760. if (Reg == LoongArch::NoRegister)
  2761. Reg = MatchRegisterName(NewRegName);
  2762. if (Reg == LoongArch::NoRegister)
  2763. report_fatal_error(
  2764. Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
  2765. BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
  2766. if (!ReservedRegs.test(Reg))
  2767. report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
  2768. StringRef(RegName) + "\"."));
  2769. return Reg;
  2770. }
  2771. bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
  2772. EVT VT, SDValue C) const {
  2773. // TODO: Support vectors.
  2774. if (!VT.isScalarInteger())
  2775. return false;
  2776. // Omit the optimization if the data size exceeds GRLen.
  2777. if (VT.getSizeInBits() > Subtarget.getGRLen())
  2778. return false;
  2779. // Break MUL into (SLLI + ADD/SUB) or ALSL.
  2780. if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
  2781. const APInt &Imm = ConstNode->getAPIntValue();
  2782. if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
  2783. (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
  2784. return true;
  2785. }
  2786. return false;
  2787. }