ARMFastISel.cpp 106 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090
  1. //===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the ARM-specific support for the FastISel class. Some
  10. // of the target-specific code is generated by tablegen in the file
  11. // ARMGenFastISel.inc, which is #included here.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "ARM.h"
  15. #include "ARMBaseInstrInfo.h"
  16. #include "ARMBaseRegisterInfo.h"
  17. #include "ARMCallingConv.h"
  18. #include "ARMConstantPoolValue.h"
  19. #include "ARMISelLowering.h"
  20. #include "ARMMachineFunctionInfo.h"
  21. #include "ARMSubtarget.h"
  22. #include "MCTargetDesc/ARMAddressingModes.h"
  23. #include "MCTargetDesc/ARMBaseInfo.h"
  24. #include "Utils/ARMBaseInfo.h"
  25. #include "llvm/ADT/APFloat.h"
  26. #include "llvm/ADT/APInt.h"
  27. #include "llvm/ADT/DenseMap.h"
  28. #include "llvm/ADT/SmallVector.h"
  29. #include "llvm/CodeGen/CallingConvLower.h"
  30. #include "llvm/CodeGen/FastISel.h"
  31. #include "llvm/CodeGen/FunctionLoweringInfo.h"
  32. #include "llvm/CodeGen/ISDOpcodes.h"
  33. #include "llvm/CodeGen/MachineBasicBlock.h"
  34. #include "llvm/CodeGen/MachineConstantPool.h"
  35. #include "llvm/CodeGen/MachineFrameInfo.h"
  36. #include "llvm/CodeGen/MachineFunction.h"
  37. #include "llvm/CodeGen/MachineInstr.h"
  38. #include "llvm/CodeGen/MachineInstrBuilder.h"
  39. #include "llvm/CodeGen/MachineMemOperand.h"
  40. #include "llvm/CodeGen/MachineOperand.h"
  41. #include "llvm/CodeGen/MachineRegisterInfo.h"
  42. #include "llvm/CodeGen/RuntimeLibcalls.h"
  43. #include "llvm/CodeGen/TargetInstrInfo.h"
  44. #include "llvm/CodeGen/TargetLowering.h"
  45. #include "llvm/CodeGen/TargetOpcodes.h"
  46. #include "llvm/CodeGen/TargetRegisterInfo.h"
  47. #include "llvm/CodeGen/ValueTypes.h"
  48. #include "llvm/IR/Argument.h"
  49. #include "llvm/IR/Attributes.h"
  50. #include "llvm/IR/CallingConv.h"
  51. #include "llvm/IR/Constant.h"
  52. #include "llvm/IR/Constants.h"
  53. #include "llvm/IR/DataLayout.h"
  54. #include "llvm/IR/DerivedTypes.h"
  55. #include "llvm/IR/Function.h"
  56. #include "llvm/IR/GetElementPtrTypeIterator.h"
  57. #include "llvm/IR/GlobalValue.h"
  58. #include "llvm/IR/GlobalVariable.h"
  59. #include "llvm/IR/InstrTypes.h"
  60. #include "llvm/IR/Instruction.h"
  61. #include "llvm/IR/Instructions.h"
  62. #include "llvm/IR/IntrinsicInst.h"
  63. #include "llvm/IR/Intrinsics.h"
  64. #include "llvm/IR/Module.h"
  65. #include "llvm/IR/Operator.h"
  66. #include "llvm/IR/Type.h"
  67. #include "llvm/IR/User.h"
  68. #include "llvm/IR/Value.h"
  69. #include "llvm/MC/MCInstrDesc.h"
  70. #include "llvm/MC/MCRegisterInfo.h"
  71. #include "llvm/Support/Casting.h"
  72. #include "llvm/Support/Compiler.h"
  73. #include "llvm/Support/ErrorHandling.h"
  74. #include "llvm/Support/MachineValueType.h"
  75. #include "llvm/Support/MathExtras.h"
  76. #include "llvm/Target/TargetMachine.h"
  77. #include "llvm/Target/TargetOptions.h"
  78. #include <cassert>
  79. #include <cstdint>
  80. #include <utility>
  81. using namespace llvm;
  82. namespace {
  83. // All possible address modes, plus some.
  84. struct Address {
  85. enum {
  86. RegBase,
  87. FrameIndexBase
  88. } BaseType = RegBase;
  89. union {
  90. unsigned Reg;
  91. int FI;
  92. } Base;
  93. int Offset = 0;
  94. // Innocuous defaults for our address.
  95. Address() {
  96. Base.Reg = 0;
  97. }
  98. };
  99. class ARMFastISel final : public FastISel {
  100. /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  101. /// make the right decision when generating code for different targets.
  102. const ARMSubtarget *Subtarget;
  103. Module &M;
  104. const TargetMachine &TM;
  105. const TargetInstrInfo &TII;
  106. const TargetLowering &TLI;
  107. ARMFunctionInfo *AFI;
  108. // Convenience variables to avoid some queries.
  109. bool isThumb2;
  110. LLVMContext *Context;
  111. public:
  112. explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
  113. const TargetLibraryInfo *libInfo)
  114. : FastISel(funcInfo, libInfo),
  115. Subtarget(&funcInfo.MF->getSubtarget<ARMSubtarget>()),
  116. M(const_cast<Module &>(*funcInfo.Fn->getParent())),
  117. TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()),
  118. TLI(*Subtarget->getTargetLowering()) {
  119. AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
  120. isThumb2 = AFI->isThumbFunction();
  121. Context = &funcInfo.Fn->getContext();
  122. }
  123. private:
  124. // Code from FastISel.cpp.
  125. unsigned fastEmitInst_r(unsigned MachineInstOpcode,
  126. const TargetRegisterClass *RC, unsigned Op0);
  127. unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
  128. const TargetRegisterClass *RC,
  129. unsigned Op0, unsigned Op1);
  130. unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
  131. const TargetRegisterClass *RC,
  132. unsigned Op0, uint64_t Imm);
  133. unsigned fastEmitInst_i(unsigned MachineInstOpcode,
  134. const TargetRegisterClass *RC,
  135. uint64_t Imm);
  136. // Backend specific FastISel code.
  137. bool fastSelectInstruction(const Instruction *I) override;
  138. unsigned fastMaterializeConstant(const Constant *C) override;
  139. unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
  140. bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
  141. const LoadInst *LI) override;
  142. bool fastLowerArguments() override;
  143. #include "ARMGenFastISel.inc"
  144. // Instruction selection routines.
  145. bool SelectLoad(const Instruction *I);
  146. bool SelectStore(const Instruction *I);
  147. bool SelectBranch(const Instruction *I);
  148. bool SelectIndirectBr(const Instruction *I);
  149. bool SelectCmp(const Instruction *I);
  150. bool SelectFPExt(const Instruction *I);
  151. bool SelectFPTrunc(const Instruction *I);
  152. bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
  153. bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
  154. bool SelectIToFP(const Instruction *I, bool isSigned);
  155. bool SelectFPToI(const Instruction *I, bool isSigned);
  156. bool SelectDiv(const Instruction *I, bool isSigned);
  157. bool SelectRem(const Instruction *I, bool isSigned);
  158. bool SelectCall(const Instruction *I, const char *IntrMemName);
  159. bool SelectIntrinsicCall(const IntrinsicInst &I);
  160. bool SelectSelect(const Instruction *I);
  161. bool SelectRet(const Instruction *I);
  162. bool SelectTrunc(const Instruction *I);
  163. bool SelectIntExt(const Instruction *I);
  164. bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
  165. // Utility routines.
  166. bool isPositionIndependent() const;
  167. bool isTypeLegal(Type *Ty, MVT &VT);
  168. bool isLoadTypeLegal(Type *Ty, MVT &VT);
  169. bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
  170. bool isZExt);
  171. bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
  172. MaybeAlign Alignment = std::nullopt, bool isZExt = true,
  173. bool allocReg = true);
  174. bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
  175. MaybeAlign Alignment = std::nullopt);
  176. bool ARMComputeAddress(const Value *Obj, Address &Addr);
  177. void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
  178. bool ARMIsMemCpySmall(uint64_t Len);
  179. bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
  180. MaybeAlign Alignment);
  181. unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
  182. unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
  183. unsigned ARMMaterializeInt(const Constant *C, MVT VT);
  184. unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
  185. unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
  186. unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
  187. unsigned ARMSelectCallOp(bool UseReg);
  188. unsigned ARMLowerPICELF(const GlobalValue *GV, MVT VT);
  189. const TargetLowering *getTargetLowering() { return &TLI; }
  190. // Call handling routines.
  191. CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
  192. bool Return,
  193. bool isVarArg);
  194. bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
  195. SmallVectorImpl<Register> &ArgRegs,
  196. SmallVectorImpl<MVT> &ArgVTs,
  197. SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
  198. SmallVectorImpl<Register> &RegArgs,
  199. CallingConv::ID CC,
  200. unsigned &NumBytes,
  201. bool isVarArg);
  202. unsigned getLibcallReg(const Twine &Name);
  203. bool FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
  204. const Instruction *I, CallingConv::ID CC,
  205. unsigned &NumBytes, bool isVarArg);
  206. bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
  207. // OptionalDef handling routines.
  208. bool isARMNEONPred(const MachineInstr *MI);
  209. bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
  210. const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
  211. void AddLoadStoreOperands(MVT VT, Address &Addr,
  212. const MachineInstrBuilder &MIB,
  213. MachineMemOperand::Flags Flags, bool useAM3);
  214. };
  215. } // end anonymous namespace
  216. // DefinesOptionalPredicate - This is different from DefinesPredicate in that
  217. // we don't care about implicit defs here, just places we'll need to add a
  218. // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
  219. bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
  220. if (!MI->hasOptionalDef())
  221. return false;
  222. // Look to see if our OptionalDef is defining CPSR or CCR.
  223. for (const MachineOperand &MO : MI->operands()) {
  224. if (!MO.isReg() || !MO.isDef()) continue;
  225. if (MO.getReg() == ARM::CPSR)
  226. *CPSR = true;
  227. }
  228. return true;
  229. }
  230. bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
  231. const MCInstrDesc &MCID = MI->getDesc();
  232. // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
  233. if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
  234. AFI->isThumb2Function())
  235. return MI->isPredicable();
  236. for (const MCOperandInfo &opInfo : MCID.operands())
  237. if (opInfo.isPredicate())
  238. return true;
  239. return false;
  240. }
  241. // If the machine is predicable go ahead and add the predicate operands, if
  242. // it needs default CC operands add those.
  243. // TODO: If we want to support thumb1 then we'll need to deal with optional
  244. // CPSR defs that need to be added before the remaining operands. See s_cc_out
  245. // for descriptions why.
  246. const MachineInstrBuilder &
  247. ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
  248. MachineInstr *MI = &*MIB;
  249. // Do we use a predicate? or...
  250. // Are we NEON in ARM mode and have a predicate operand? If so, I know
  251. // we're not predicable but add it anyways.
  252. if (isARMNEONPred(MI))
  253. MIB.add(predOps(ARMCC::AL));
  254. // Do we optionally set a predicate? Preds is size > 0 iff the predicate
  255. // defines CPSR. All other OptionalDefines in ARM are the CCR register.
  256. bool CPSR = false;
  257. if (DefinesOptionalPredicate(MI, &CPSR))
  258. MIB.add(CPSR ? t1CondCodeOp() : condCodeOp());
  259. return MIB;
  260. }
  261. unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
  262. const TargetRegisterClass *RC,
  263. unsigned Op0) {
  264. Register ResultReg = createResultReg(RC);
  265. const MCInstrDesc &II = TII.get(MachineInstOpcode);
  266. // Make sure the input operand is sufficiently constrained to be legal
  267. // for this instruction.
  268. Op0 = constrainOperandRegClass(II, Op0, 1);
  269. if (II.getNumDefs() >= 1) {
  270. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
  271. ResultReg).addReg(Op0));
  272. } else {
  273. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
  274. .addReg(Op0));
  275. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  276. TII.get(TargetOpcode::COPY), ResultReg)
  277. .addReg(II.implicit_defs()[0]));
  278. }
  279. return ResultReg;
  280. }
  281. unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
  282. const TargetRegisterClass *RC,
  283. unsigned Op0, unsigned Op1) {
  284. Register ResultReg = createResultReg(RC);
  285. const MCInstrDesc &II = TII.get(MachineInstOpcode);
  286. // Make sure the input operands are sufficiently constrained to be legal
  287. // for this instruction.
  288. Op0 = constrainOperandRegClass(II, Op0, 1);
  289. Op1 = constrainOperandRegClass(II, Op1, 2);
  290. if (II.getNumDefs() >= 1) {
  291. AddOptionalDefs(
  292. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
  293. .addReg(Op0)
  294. .addReg(Op1));
  295. } else {
  296. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
  297. .addReg(Op0)
  298. .addReg(Op1));
  299. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  300. TII.get(TargetOpcode::COPY), ResultReg)
  301. .addReg(II.implicit_defs()[0]));
  302. }
  303. return ResultReg;
  304. }
  305. unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
  306. const TargetRegisterClass *RC,
  307. unsigned Op0, uint64_t Imm) {
  308. Register ResultReg = createResultReg(RC);
  309. const MCInstrDesc &II = TII.get(MachineInstOpcode);
  310. // Make sure the input operand is sufficiently constrained to be legal
  311. // for this instruction.
  312. Op0 = constrainOperandRegClass(II, Op0, 1);
  313. if (II.getNumDefs() >= 1) {
  314. AddOptionalDefs(
  315. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
  316. .addReg(Op0)
  317. .addImm(Imm));
  318. } else {
  319. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
  320. .addReg(Op0)
  321. .addImm(Imm));
  322. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  323. TII.get(TargetOpcode::COPY), ResultReg)
  324. .addReg(II.implicit_defs()[0]));
  325. }
  326. return ResultReg;
  327. }
  328. unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
  329. const TargetRegisterClass *RC,
  330. uint64_t Imm) {
  331. Register ResultReg = createResultReg(RC);
  332. const MCInstrDesc &II = TII.get(MachineInstOpcode);
  333. if (II.getNumDefs() >= 1) {
  334. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
  335. ResultReg).addImm(Imm));
  336. } else {
  337. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
  338. .addImm(Imm));
  339. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  340. TII.get(TargetOpcode::COPY), ResultReg)
  341. .addReg(II.implicit_defs()[0]));
  342. }
  343. return ResultReg;
  344. }
  345. // TODO: Don't worry about 64-bit now, but when this is fixed remove the
  346. // checks from the various callers.
  347. unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
  348. if (VT == MVT::f64) return 0;
  349. Register MoveReg = createResultReg(TLI.getRegClassFor(VT));
  350. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  351. TII.get(ARM::VMOVSR), MoveReg)
  352. .addReg(SrcReg));
  353. return MoveReg;
  354. }
  355. unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
  356. if (VT == MVT::i64) return 0;
  357. Register MoveReg = createResultReg(TLI.getRegClassFor(VT));
  358. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  359. TII.get(ARM::VMOVRS), MoveReg)
  360. .addReg(SrcReg));
  361. return MoveReg;
  362. }
  363. // For double width floating point we need to materialize two constants
  364. // (the high and the low) into integer registers then use a move to get
  365. // the combined constant into an FP reg.
  366. unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
  367. const APFloat Val = CFP->getValueAPF();
  368. bool is64bit = VT == MVT::f64;
  369. // This checks to see if we can use VFP3 instructions to materialize
  370. // a constant, otherwise we have to go through the constant pool.
  371. if (TLI.isFPImmLegal(Val, VT)) {
  372. int Imm;
  373. unsigned Opc;
  374. if (is64bit) {
  375. Imm = ARM_AM::getFP64Imm(Val);
  376. Opc = ARM::FCONSTD;
  377. } else {
  378. Imm = ARM_AM::getFP32Imm(Val);
  379. Opc = ARM::FCONSTS;
  380. }
  381. Register DestReg = createResultReg(TLI.getRegClassFor(VT));
  382. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  383. TII.get(Opc), DestReg).addImm(Imm));
  384. return DestReg;
  385. }
  386. // Require VFP2 for loading fp constants.
  387. if (!Subtarget->hasVFP2Base()) return false;
  388. // MachineConstantPool wants an explicit alignment.
  389. Align Alignment = DL.getPrefTypeAlign(CFP->getType());
  390. unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
  391. Register DestReg = createResultReg(TLI.getRegClassFor(VT));
  392. unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
  393. // The extra reg is for addrmode5.
  394. AddOptionalDefs(
  395. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
  396. .addConstantPoolIndex(Idx)
  397. .addReg(0));
  398. return DestReg;
  399. }
  400. unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
  401. if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
  402. return 0;
  403. // If we can do this in a single instruction without a constant pool entry
  404. // do so now.
  405. const ConstantInt *CI = cast<ConstantInt>(C);
  406. if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
  407. unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
  408. const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
  409. &ARM::GPRRegClass;
  410. Register ImmReg = createResultReg(RC);
  411. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  412. TII.get(Opc), ImmReg)
  413. .addImm(CI->getZExtValue()));
  414. return ImmReg;
  415. }
  416. // Use MVN to emit negative constants.
  417. if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
  418. unsigned Imm = (unsigned)~(CI->getSExtValue());
  419. bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
  420. (ARM_AM::getSOImmVal(Imm) != -1);
  421. if (UseImm) {
  422. unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
  423. const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
  424. &ARM::GPRRegClass;
  425. Register ImmReg = createResultReg(RC);
  426. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  427. TII.get(Opc), ImmReg)
  428. .addImm(Imm));
  429. return ImmReg;
  430. }
  431. }
  432. unsigned ResultReg = 0;
  433. if (Subtarget->useMovt())
  434. ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
  435. if (ResultReg)
  436. return ResultReg;
  437. // Load from constant pool. For now 32-bit only.
  438. if (VT != MVT::i32)
  439. return 0;
  440. // MachineConstantPool wants an explicit alignment.
  441. Align Alignment = DL.getPrefTypeAlign(C->getType());
  442. unsigned Idx = MCP.getConstantPoolIndex(C, Alignment);
  443. ResultReg = createResultReg(TLI.getRegClassFor(VT));
  444. if (isThumb2)
  445. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  446. TII.get(ARM::t2LDRpci), ResultReg)
  447. .addConstantPoolIndex(Idx));
  448. else {
  449. // The extra immediate is for addrmode2.
  450. ResultReg = constrainOperandRegClass(TII.get(ARM::LDRcp), ResultReg, 0);
  451. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  452. TII.get(ARM::LDRcp), ResultReg)
  453. .addConstantPoolIndex(Idx)
  454. .addImm(0));
  455. }
  456. return ResultReg;
  457. }
  458. bool ARMFastISel::isPositionIndependent() const {
  459. return TLI.isPositionIndependent();
  460. }
  461. unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
  462. // For now 32-bit only.
  463. if (VT != MVT::i32 || GV->isThreadLocal()) return 0;
  464. // ROPI/RWPI not currently supported.
  465. if (Subtarget->isROPI() || Subtarget->isRWPI())
  466. return 0;
  467. bool IsIndirect = Subtarget->isGVIndirectSymbol(GV);
  468. const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
  469. : &ARM::GPRRegClass;
  470. Register DestReg = createResultReg(RC);
  471. // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
  472. const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
  473. bool IsThreadLocal = GVar && GVar->isThreadLocal();
  474. if (!Subtarget->isTargetMachO() && IsThreadLocal) return 0;
  475. bool IsPositionIndependent = isPositionIndependent();
  476. // Use movw+movt when possible, it avoids constant pool entries.
  477. // Non-darwin targets only support static movt relocations in FastISel.
  478. if (Subtarget->useMovt() &&
  479. (Subtarget->isTargetMachO() || !IsPositionIndependent)) {
  480. unsigned Opc;
  481. unsigned char TF = 0;
  482. if (Subtarget->isTargetMachO())
  483. TF = ARMII::MO_NONLAZY;
  484. if (IsPositionIndependent)
  485. Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
  486. else
  487. Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
  488. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  489. TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF));
  490. } else {
  491. // MachineConstantPool wants an explicit alignment.
  492. Align Alignment = DL.getPrefTypeAlign(GV->getType());
  493. if (Subtarget->isTargetELF() && IsPositionIndependent)
  494. return ARMLowerPICELF(GV, VT);
  495. // Grab index.
  496. unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
  497. unsigned Id = AFI->createPICLabelUId();
  498. ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
  499. ARMCP::CPValue,
  500. PCAdj);
  501. unsigned Idx = MCP.getConstantPoolIndex(CPV, Alignment);
  502. // Load value.
  503. MachineInstrBuilder MIB;
  504. if (isThumb2) {
  505. unsigned Opc = IsPositionIndependent ? ARM::t2LDRpci_pic : ARM::t2LDRpci;
  506. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc),
  507. DestReg).addConstantPoolIndex(Idx);
  508. if (IsPositionIndependent)
  509. MIB.addImm(Id);
  510. AddOptionalDefs(MIB);
  511. } else {
  512. // The extra immediate is for addrmode2.
  513. DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
  514. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  515. TII.get(ARM::LDRcp), DestReg)
  516. .addConstantPoolIndex(Idx)
  517. .addImm(0);
  518. AddOptionalDefs(MIB);
  519. if (IsPositionIndependent) {
  520. unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
  521. Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));
  522. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
  523. MIMD, TII.get(Opc), NewDestReg)
  524. .addReg(DestReg)
  525. .addImm(Id);
  526. AddOptionalDefs(MIB);
  527. return NewDestReg;
  528. }
  529. }
  530. }
  531. if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) ||
  532. (Subtarget->isTargetMachO() && IsIndirect)) {
  533. MachineInstrBuilder MIB;
  534. Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));
  535. if (isThumb2)
  536. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  537. TII.get(ARM::t2LDRi12), NewDestReg)
  538. .addReg(DestReg)
  539. .addImm(0);
  540. else
  541. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  542. TII.get(ARM::LDRi12), NewDestReg)
  543. .addReg(DestReg)
  544. .addImm(0);
  545. DestReg = NewDestReg;
  546. AddOptionalDefs(MIB);
  547. }
  548. return DestReg;
  549. }
  550. unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {
  551. EVT CEVT = TLI.getValueType(DL, C->getType(), true);
  552. // Only handle simple types.
  553. if (!CEVT.isSimple()) return 0;
  554. MVT VT = CEVT.getSimpleVT();
  555. if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
  556. return ARMMaterializeFP(CFP, VT);
  557. else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
  558. return ARMMaterializeGV(GV, VT);
  559. else if (isa<ConstantInt>(C))
  560. return ARMMaterializeInt(C, VT);
  561. return 0;
  562. }
  563. // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
  564. unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
  565. // Don't handle dynamic allocas.
  566. if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
  567. MVT VT;
  568. if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
  569. DenseMap<const AllocaInst*, int>::iterator SI =
  570. FuncInfo.StaticAllocaMap.find(AI);
  571. // This will get lowered later into the correct offsets and registers
  572. // via rewriteXFrameIndex.
  573. if (SI != FuncInfo.StaticAllocaMap.end()) {
  574. unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
  575. const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
  576. Register ResultReg = createResultReg(RC);
  577. ResultReg = constrainOperandRegClass(TII.get(Opc), ResultReg, 0);
  578. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  579. TII.get(Opc), ResultReg)
  580. .addFrameIndex(SI->second)
  581. .addImm(0));
  582. return ResultReg;
  583. }
  584. return 0;
  585. }
  586. bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
  587. EVT evt = TLI.getValueType(DL, Ty, true);
  588. // Only handle simple types.
  589. if (evt == MVT::Other || !evt.isSimple()) return false;
  590. VT = evt.getSimpleVT();
  591. // Handle all legal types, i.e. a register that will directly hold this
  592. // value.
  593. return TLI.isTypeLegal(VT);
  594. }
  595. bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
  596. if (isTypeLegal(Ty, VT)) return true;
  597. // If this is a type than can be sign or zero-extended to a basic operation
  598. // go ahead and accept it now.
  599. if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
  600. return true;
  601. return false;
  602. }
  603. // Computes the address to get to an object.
  604. bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
  605. // Some boilerplate from the X86 FastISel.
  606. const User *U = nullptr;
  607. unsigned Opcode = Instruction::UserOp1;
  608. if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
  609. // Don't walk into other basic blocks unless the object is an alloca from
  610. // another block, otherwise it may not have a virtual register assigned.
  611. if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
  612. FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
  613. Opcode = I->getOpcode();
  614. U = I;
  615. }
  616. } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
  617. Opcode = C->getOpcode();
  618. U = C;
  619. }
  620. if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
  621. if (Ty->getAddressSpace() > 255)
  622. // Fast instruction selection doesn't support the special
  623. // address spaces.
  624. return false;
  625. switch (Opcode) {
  626. default:
  627. break;
  628. case Instruction::BitCast:
  629. // Look through bitcasts.
  630. return ARMComputeAddress(U->getOperand(0), Addr);
  631. case Instruction::IntToPtr:
  632. // Look past no-op inttoptrs.
  633. if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
  634. TLI.getPointerTy(DL))
  635. return ARMComputeAddress(U->getOperand(0), Addr);
  636. break;
  637. case Instruction::PtrToInt:
  638. // Look past no-op ptrtoints.
  639. if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
  640. return ARMComputeAddress(U->getOperand(0), Addr);
  641. break;
  642. case Instruction::GetElementPtr: {
  643. Address SavedAddr = Addr;
  644. int TmpOffset = Addr.Offset;
  645. // Iterate through the GEP folding the constants into offsets where
  646. // we can.
  647. gep_type_iterator GTI = gep_type_begin(U);
  648. for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
  649. i != e; ++i, ++GTI) {
  650. const Value *Op = *i;
  651. if (StructType *STy = GTI.getStructTypeOrNull()) {
  652. const StructLayout *SL = DL.getStructLayout(STy);
  653. unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
  654. TmpOffset += SL->getElementOffset(Idx);
  655. } else {
  656. uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
  657. while (true) {
  658. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
  659. // Constant-offset addressing.
  660. TmpOffset += CI->getSExtValue() * S;
  661. break;
  662. }
  663. if (canFoldAddIntoGEP(U, Op)) {
  664. // A compatible add with a constant operand. Fold the constant.
  665. ConstantInt *CI =
  666. cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
  667. TmpOffset += CI->getSExtValue() * S;
  668. // Iterate on the other operand.
  669. Op = cast<AddOperator>(Op)->getOperand(0);
  670. continue;
  671. }
  672. // Unsupported
  673. goto unsupported_gep;
  674. }
  675. }
  676. }
  677. // Try to grab the base operand now.
  678. Addr.Offset = TmpOffset;
  679. if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
  680. // We failed, restore everything and try the other options.
  681. Addr = SavedAddr;
  682. unsupported_gep:
  683. break;
  684. }
  685. case Instruction::Alloca: {
  686. const AllocaInst *AI = cast<AllocaInst>(Obj);
  687. DenseMap<const AllocaInst*, int>::iterator SI =
  688. FuncInfo.StaticAllocaMap.find(AI);
  689. if (SI != FuncInfo.StaticAllocaMap.end()) {
  690. Addr.BaseType = Address::FrameIndexBase;
  691. Addr.Base.FI = SI->second;
  692. return true;
  693. }
  694. break;
  695. }
  696. }
  697. // Try to get this in a register if nothing else has worked.
  698. if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
  699. return Addr.Base.Reg != 0;
  700. }
  701. void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
  702. bool needsLowering = false;
  703. switch (VT.SimpleTy) {
  704. default: llvm_unreachable("Unhandled load/store type!");
  705. case MVT::i1:
  706. case MVT::i8:
  707. case MVT::i16:
  708. case MVT::i32:
  709. if (!useAM3) {
  710. // Integer loads/stores handle 12-bit offsets.
  711. needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
  712. // Handle negative offsets.
  713. if (needsLowering && isThumb2)
  714. needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
  715. Addr.Offset > -256);
  716. } else {
  717. // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
  718. needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
  719. }
  720. break;
  721. case MVT::f32:
  722. case MVT::f64:
  723. // Floating point operands handle 8-bit offsets.
  724. needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
  725. break;
  726. }
  727. // If this is a stack pointer and the offset needs to be simplified then
  728. // put the alloca address into a register, set the base type back to
  729. // register and continue. This should almost never happen.
  730. if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
  731. const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
  732. : &ARM::GPRRegClass;
  733. Register ResultReg = createResultReg(RC);
  734. unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
  735. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  736. TII.get(Opc), ResultReg)
  737. .addFrameIndex(Addr.Base.FI)
  738. .addImm(0));
  739. Addr.Base.Reg = ResultReg;
  740. Addr.BaseType = Address::RegBase;
  741. }
  742. // Since the offset is too large for the load/store instruction
  743. // get the reg+offset into a register.
  744. if (needsLowering) {
  745. Addr.Base.Reg = fastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
  746. Addr.Offset, MVT::i32);
  747. Addr.Offset = 0;
  748. }
  749. }
  750. void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
  751. const MachineInstrBuilder &MIB,
  752. MachineMemOperand::Flags Flags,
  753. bool useAM3) {
  754. // addrmode5 output depends on the selection dag addressing dividing the
  755. // offset by 4 that it then later multiplies. Do this here as well.
  756. if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
  757. Addr.Offset /= 4;
  758. // Frame base works a bit differently. Handle it separately.
  759. if (Addr.BaseType == Address::FrameIndexBase) {
  760. int FI = Addr.Base.FI;
  761. int Offset = Addr.Offset;
  762. MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
  763. MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
  764. MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
  765. // Now add the rest of the operands.
  766. MIB.addFrameIndex(FI);
  767. // ARM halfword load/stores and signed byte loads need an additional
  768. // operand.
  769. if (useAM3) {
  770. int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
  771. MIB.addReg(0);
  772. MIB.addImm(Imm);
  773. } else {
  774. MIB.addImm(Addr.Offset);
  775. }
  776. MIB.addMemOperand(MMO);
  777. } else {
  778. // Now add the rest of the operands.
  779. MIB.addReg(Addr.Base.Reg);
  780. // ARM halfword load/stores and signed byte loads need an additional
  781. // operand.
  782. if (useAM3) {
  783. int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
  784. MIB.addReg(0);
  785. MIB.addImm(Imm);
  786. } else {
  787. MIB.addImm(Addr.Offset);
  788. }
  789. }
  790. AddOptionalDefs(MIB);
  791. }
  792. bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
  793. MaybeAlign Alignment, bool isZExt,
  794. bool allocReg) {
  795. unsigned Opc;
  796. bool useAM3 = false;
  797. bool needVMOV = false;
  798. const TargetRegisterClass *RC;
  799. switch (VT.SimpleTy) {
  800. // This is mostly going to be Neon/vector support.
  801. default: return false;
  802. case MVT::i1:
  803. case MVT::i8:
  804. if (isThumb2) {
  805. if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
  806. Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
  807. else
  808. Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
  809. } else {
  810. if (isZExt) {
  811. Opc = ARM::LDRBi12;
  812. } else {
  813. Opc = ARM::LDRSB;
  814. useAM3 = true;
  815. }
  816. }
  817. RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
  818. break;
  819. case MVT::i16:
  820. if (Alignment && *Alignment < Align(2) &&
  821. !Subtarget->allowsUnalignedMem())
  822. return false;
  823. if (isThumb2) {
  824. if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
  825. Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
  826. else
  827. Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
  828. } else {
  829. Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
  830. useAM3 = true;
  831. }
  832. RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
  833. break;
  834. case MVT::i32:
  835. if (Alignment && *Alignment < Align(4) &&
  836. !Subtarget->allowsUnalignedMem())
  837. return false;
  838. if (isThumb2) {
  839. if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
  840. Opc = ARM::t2LDRi8;
  841. else
  842. Opc = ARM::t2LDRi12;
  843. } else {
  844. Opc = ARM::LDRi12;
  845. }
  846. RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
  847. break;
  848. case MVT::f32:
  849. if (!Subtarget->hasVFP2Base()) return false;
  850. // Unaligned loads need special handling. Floats require word-alignment.
  851. if (Alignment && *Alignment < Align(4)) {
  852. needVMOV = true;
  853. VT = MVT::i32;
  854. Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
  855. RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
  856. } else {
  857. Opc = ARM::VLDRS;
  858. RC = TLI.getRegClassFor(VT);
  859. }
  860. break;
  861. case MVT::f64:
  862. // Can load and store double precision even without FeatureFP64
  863. if (!Subtarget->hasVFP2Base()) return false;
  864. // FIXME: Unaligned loads need special handling. Doublewords require
  865. // word-alignment.
  866. if (Alignment && *Alignment < Align(4))
  867. return false;
  868. Opc = ARM::VLDRD;
  869. RC = TLI.getRegClassFor(VT);
  870. break;
  871. }
  872. // Simplify this down to something we can handle.
  873. ARMSimplifyAddress(Addr, VT, useAM3);
  874. // Create the base instruction, then add the operands.
  875. if (allocReg)
  876. ResultReg = createResultReg(RC);
  877. assert(ResultReg > 255 && "Expected an allocated virtual register.");
  878. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  879. TII.get(Opc), ResultReg);
  880. AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
  881. // If we had an unaligned load of a float we've converted it to an regular
  882. // load. Now we must move from the GRP to the FP register.
  883. if (needVMOV) {
  884. Register MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
  885. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  886. TII.get(ARM::VMOVSR), MoveReg)
  887. .addReg(ResultReg));
  888. ResultReg = MoveReg;
  889. }
  890. return true;
  891. }
  892. bool ARMFastISel::SelectLoad(const Instruction *I) {
  893. // Atomic loads need special handling.
  894. if (cast<LoadInst>(I)->isAtomic())
  895. return false;
  896. const Value *SV = I->getOperand(0);
  897. if (TLI.supportSwiftError()) {
  898. // Swifterror values can come from either a function parameter with
  899. // swifterror attribute or an alloca with swifterror attribute.
  900. if (const Argument *Arg = dyn_cast<Argument>(SV)) {
  901. if (Arg->hasSwiftErrorAttr())
  902. return false;
  903. }
  904. if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
  905. if (Alloca->isSwiftError())
  906. return false;
  907. }
  908. }
  909. // Verify we have a legal type before going any further.
  910. MVT VT;
  911. if (!isLoadTypeLegal(I->getType(), VT))
  912. return false;
  913. // See if we can handle this address.
  914. Address Addr;
  915. if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
  916. Register ResultReg;
  917. if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlign()))
  918. return false;
  919. updateValueMap(I, ResultReg);
  920. return true;
  921. }
  922. bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
  923. MaybeAlign Alignment) {
  924. unsigned StrOpc;
  925. bool useAM3 = false;
  926. switch (VT.SimpleTy) {
  927. // This is mostly going to be Neon/vector support.
  928. default: return false;
  929. case MVT::i1: {
  930. Register Res = createResultReg(isThumb2 ? &ARM::tGPRRegClass
  931. : &ARM::GPRRegClass);
  932. unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
  933. SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
  934. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  935. TII.get(Opc), Res)
  936. .addReg(SrcReg).addImm(1));
  937. SrcReg = Res;
  938. [[fallthrough]];
  939. }
  940. case MVT::i8:
  941. if (isThumb2) {
  942. if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
  943. StrOpc = ARM::t2STRBi8;
  944. else
  945. StrOpc = ARM::t2STRBi12;
  946. } else {
  947. StrOpc = ARM::STRBi12;
  948. }
  949. break;
  950. case MVT::i16:
  951. if (Alignment && *Alignment < Align(2) &&
  952. !Subtarget->allowsUnalignedMem())
  953. return false;
  954. if (isThumb2) {
  955. if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
  956. StrOpc = ARM::t2STRHi8;
  957. else
  958. StrOpc = ARM::t2STRHi12;
  959. } else {
  960. StrOpc = ARM::STRH;
  961. useAM3 = true;
  962. }
  963. break;
  964. case MVT::i32:
  965. if (Alignment && *Alignment < Align(4) &&
  966. !Subtarget->allowsUnalignedMem())
  967. return false;
  968. if (isThumb2) {
  969. if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
  970. StrOpc = ARM::t2STRi8;
  971. else
  972. StrOpc = ARM::t2STRi12;
  973. } else {
  974. StrOpc = ARM::STRi12;
  975. }
  976. break;
  977. case MVT::f32:
  978. if (!Subtarget->hasVFP2Base()) return false;
  979. // Unaligned stores need special handling. Floats require word-alignment.
  980. if (Alignment && *Alignment < Align(4)) {
  981. Register MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
  982. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  983. TII.get(ARM::VMOVRS), MoveReg)
  984. .addReg(SrcReg));
  985. SrcReg = MoveReg;
  986. VT = MVT::i32;
  987. StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
  988. } else {
  989. StrOpc = ARM::VSTRS;
  990. }
  991. break;
  992. case MVT::f64:
  993. // Can load and store double precision even without FeatureFP64
  994. if (!Subtarget->hasVFP2Base()) return false;
  995. // FIXME: Unaligned stores need special handling. Doublewords require
  996. // word-alignment.
  997. if (Alignment && *Alignment < Align(4))
  998. return false;
  999. StrOpc = ARM::VSTRD;
  1000. break;
  1001. }
  1002. // Simplify this down to something we can handle.
  1003. ARMSimplifyAddress(Addr, VT, useAM3);
  1004. // Create the base instruction, then add the operands.
  1005. SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0);
  1006. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1007. TII.get(StrOpc))
  1008. .addReg(SrcReg);
  1009. AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
  1010. return true;
  1011. }
  1012. bool ARMFastISel::SelectStore(const Instruction *I) {
  1013. Value *Op0 = I->getOperand(0);
  1014. unsigned SrcReg = 0;
  1015. // Atomic stores need special handling.
  1016. if (cast<StoreInst>(I)->isAtomic())
  1017. return false;
  1018. const Value *PtrV = I->getOperand(1);
  1019. if (TLI.supportSwiftError()) {
  1020. // Swifterror values can come from either a function parameter with
  1021. // swifterror attribute or an alloca with swifterror attribute.
  1022. if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
  1023. if (Arg->hasSwiftErrorAttr())
  1024. return false;
  1025. }
  1026. if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
  1027. if (Alloca->isSwiftError())
  1028. return false;
  1029. }
  1030. }
  1031. // Verify we have a legal type before going any further.
  1032. MVT VT;
  1033. if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
  1034. return false;
  1035. // Get the value to be stored into a register.
  1036. SrcReg = getRegForValue(Op0);
  1037. if (SrcReg == 0) return false;
  1038. // See if we can handle this address.
  1039. Address Addr;
  1040. if (!ARMComputeAddress(I->getOperand(1), Addr))
  1041. return false;
  1042. if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlign()))
  1043. return false;
  1044. return true;
  1045. }
  1046. static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
  1047. switch (Pred) {
  1048. // Needs two compares...
  1049. case CmpInst::FCMP_ONE:
  1050. case CmpInst::FCMP_UEQ:
  1051. default:
  1052. // AL is our "false" for now. The other two need more compares.
  1053. return ARMCC::AL;
  1054. case CmpInst::ICMP_EQ:
  1055. case CmpInst::FCMP_OEQ:
  1056. return ARMCC::EQ;
  1057. case CmpInst::ICMP_SGT:
  1058. case CmpInst::FCMP_OGT:
  1059. return ARMCC::GT;
  1060. case CmpInst::ICMP_SGE:
  1061. case CmpInst::FCMP_OGE:
  1062. return ARMCC::GE;
  1063. case CmpInst::ICMP_UGT:
  1064. case CmpInst::FCMP_UGT:
  1065. return ARMCC::HI;
  1066. case CmpInst::FCMP_OLT:
  1067. return ARMCC::MI;
  1068. case CmpInst::ICMP_ULE:
  1069. case CmpInst::FCMP_OLE:
  1070. return ARMCC::LS;
  1071. case CmpInst::FCMP_ORD:
  1072. return ARMCC::VC;
  1073. case CmpInst::FCMP_UNO:
  1074. return ARMCC::VS;
  1075. case CmpInst::FCMP_UGE:
  1076. return ARMCC::PL;
  1077. case CmpInst::ICMP_SLT:
  1078. case CmpInst::FCMP_ULT:
  1079. return ARMCC::LT;
  1080. case CmpInst::ICMP_SLE:
  1081. case CmpInst::FCMP_ULE:
  1082. return ARMCC::LE;
  1083. case CmpInst::FCMP_UNE:
  1084. case CmpInst::ICMP_NE:
  1085. return ARMCC::NE;
  1086. case CmpInst::ICMP_UGE:
  1087. return ARMCC::HS;
  1088. case CmpInst::ICMP_ULT:
  1089. return ARMCC::LO;
  1090. }
  1091. }
  1092. bool ARMFastISel::SelectBranch(const Instruction *I) {
  1093. const BranchInst *BI = cast<BranchInst>(I);
  1094. MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
  1095. MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
  1096. // Simple branch support.
  1097. // If we can, avoid recomputing the compare - redoing it could lead to wonky
  1098. // behavior.
  1099. if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
  1100. if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
  1101. // Get the compare predicate.
  1102. // Try to take advantage of fallthrough opportunities.
  1103. CmpInst::Predicate Predicate = CI->getPredicate();
  1104. if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
  1105. std::swap(TBB, FBB);
  1106. Predicate = CmpInst::getInversePredicate(Predicate);
  1107. }
  1108. ARMCC::CondCodes ARMPred = getComparePred(Predicate);
  1109. // We may not handle every CC for now.
  1110. if (ARMPred == ARMCC::AL) return false;
  1111. // Emit the compare.
  1112. if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
  1113. return false;
  1114. unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
  1115. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(BrOpc))
  1116. .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
  1117. finishCondBranch(BI->getParent(), TBB, FBB);
  1118. return true;
  1119. }
  1120. } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
  1121. MVT SourceVT;
  1122. if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
  1123. (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
  1124. unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
  1125. Register OpReg = getRegForValue(TI->getOperand(0));
  1126. OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);
  1127. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1128. TII.get(TstOpc))
  1129. .addReg(OpReg).addImm(1));
  1130. unsigned CCMode = ARMCC::NE;
  1131. if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
  1132. std::swap(TBB, FBB);
  1133. CCMode = ARMCC::EQ;
  1134. }
  1135. unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
  1136. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(BrOpc))
  1137. .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
  1138. finishCondBranch(BI->getParent(), TBB, FBB);
  1139. return true;
  1140. }
  1141. } else if (const ConstantInt *CI =
  1142. dyn_cast<ConstantInt>(BI->getCondition())) {
  1143. uint64_t Imm = CI->getZExtValue();
  1144. MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
  1145. fastEmitBranch(Target, MIMD.getDL());
  1146. return true;
  1147. }
  1148. Register CmpReg = getRegForValue(BI->getCondition());
  1149. if (CmpReg == 0) return false;
  1150. // We've been divorced from our compare! Our block was split, and
  1151. // now our compare lives in a predecessor block. We musn't
  1152. // re-compare here, as the children of the compare aren't guaranteed
  1153. // live across the block boundary (we *could* check for this).
  1154. // Regardless, the compare has been done in the predecessor block,
  1155. // and it left a value for us in a virtual register. Ergo, we test
  1156. // the one-bit value left in the virtual register.
  1157. unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
  1158. CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0);
  1159. AddOptionalDefs(
  1160. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TstOpc))
  1161. .addReg(CmpReg)
  1162. .addImm(1));
  1163. unsigned CCMode = ARMCC::NE;
  1164. if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
  1165. std::swap(TBB, FBB);
  1166. CCMode = ARMCC::EQ;
  1167. }
  1168. unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
  1169. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(BrOpc))
  1170. .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
  1171. finishCondBranch(BI->getParent(), TBB, FBB);
  1172. return true;
  1173. }
  1174. bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
  1175. Register AddrReg = getRegForValue(I->getOperand(0));
  1176. if (AddrReg == 0) return false;
  1177. unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
  1178. assert(isThumb2 || Subtarget->hasV4TOps());
  1179. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1180. TII.get(Opc)).addReg(AddrReg));
  1181. const IndirectBrInst *IB = cast<IndirectBrInst>(I);
  1182. for (const BasicBlock *SuccBB : IB->successors())
  1183. FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
  1184. return true;
  1185. }
  1186. bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
  1187. bool isZExt) {
  1188. Type *Ty = Src1Value->getType();
  1189. EVT SrcEVT = TLI.getValueType(DL, Ty, true);
  1190. if (!SrcEVT.isSimple()) return false;
  1191. MVT SrcVT = SrcEVT.getSimpleVT();
  1192. if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
  1193. return false;
  1194. if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
  1195. return false;
  1196. // Check to see if the 2nd operand is a constant that we can encode directly
  1197. // in the compare.
  1198. int Imm = 0;
  1199. bool UseImm = false;
  1200. bool isNegativeImm = false;
  1201. // FIXME: At -O0 we don't have anything that canonicalizes operand order.
  1202. // Thus, Src1Value may be a ConstantInt, but we're missing it.
  1203. if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
  1204. if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
  1205. SrcVT == MVT::i1) {
  1206. const APInt &CIVal = ConstInt->getValue();
  1207. Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
  1208. // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
  1209. // then a cmn, because there is no way to represent 2147483648 as a
  1210. // signed 32-bit int.
  1211. if (Imm < 0 && Imm != (int)0x80000000) {
  1212. isNegativeImm = true;
  1213. Imm = -Imm;
  1214. }
  1215. UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
  1216. (ARM_AM::getSOImmVal(Imm) != -1);
  1217. }
  1218. } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
  1219. if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
  1220. if (ConstFP->isZero() && !ConstFP->isNegative())
  1221. UseImm = true;
  1222. }
  1223. unsigned CmpOpc;
  1224. bool isICmp = true;
  1225. bool needsExt = false;
  1226. switch (SrcVT.SimpleTy) {
  1227. default: return false;
  1228. // TODO: Verify compares.
  1229. case MVT::f32:
  1230. isICmp = false;
  1231. CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
  1232. break;
  1233. case MVT::f64:
  1234. isICmp = false;
  1235. CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
  1236. break;
  1237. case MVT::i1:
  1238. case MVT::i8:
  1239. case MVT::i16:
  1240. needsExt = true;
  1241. [[fallthrough]];
  1242. case MVT::i32:
  1243. if (isThumb2) {
  1244. if (!UseImm)
  1245. CmpOpc = ARM::t2CMPrr;
  1246. else
  1247. CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
  1248. } else {
  1249. if (!UseImm)
  1250. CmpOpc = ARM::CMPrr;
  1251. else
  1252. CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
  1253. }
  1254. break;
  1255. }
  1256. Register SrcReg1 = getRegForValue(Src1Value);
  1257. if (SrcReg1 == 0) return false;
  1258. unsigned SrcReg2 = 0;
  1259. if (!UseImm) {
  1260. SrcReg2 = getRegForValue(Src2Value);
  1261. if (SrcReg2 == 0) return false;
  1262. }
  1263. // We have i1, i8, or i16, we need to either zero extend or sign extend.
  1264. if (needsExt) {
  1265. SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
  1266. if (SrcReg1 == 0) return false;
  1267. if (!UseImm) {
  1268. SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
  1269. if (SrcReg2 == 0) return false;
  1270. }
  1271. }
  1272. const MCInstrDesc &II = TII.get(CmpOpc);
  1273. SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0);
  1274. if (!UseImm) {
  1275. SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1);
  1276. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
  1277. .addReg(SrcReg1).addReg(SrcReg2));
  1278. } else {
  1279. MachineInstrBuilder MIB;
  1280. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
  1281. .addReg(SrcReg1);
  1282. // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
  1283. if (isICmp)
  1284. MIB.addImm(Imm);
  1285. AddOptionalDefs(MIB);
  1286. }
  1287. // For floating point we need to move the result to a comparison register
  1288. // that we can then use for branches.
  1289. if (Ty->isFloatTy() || Ty->isDoubleTy())
  1290. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1291. TII.get(ARM::FMSTAT)));
  1292. return true;
  1293. }
  1294. bool ARMFastISel::SelectCmp(const Instruction *I) {
  1295. const CmpInst *CI = cast<CmpInst>(I);
  1296. // Get the compare predicate.
  1297. ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
  1298. // We may not handle every CC for now.
  1299. if (ARMPred == ARMCC::AL) return false;
  1300. // Emit the compare.
  1301. if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
  1302. return false;
  1303. // Now set a register based on the comparison. Explicitly set the predicates
  1304. // here.
  1305. unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
  1306. const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
  1307. : &ARM::GPRRegClass;
  1308. Register DestReg = createResultReg(RC);
  1309. Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
  1310. unsigned ZeroReg = fastMaterializeConstant(Zero);
  1311. // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
  1312. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(MovCCOpc), DestReg)
  1313. .addReg(ZeroReg).addImm(1)
  1314. .addImm(ARMPred).addReg(ARM::CPSR);
  1315. updateValueMap(I, DestReg);
  1316. return true;
  1317. }
  1318. bool ARMFastISel::SelectFPExt(const Instruction *I) {
  1319. // Make sure we have VFP and that we're extending float to double.
  1320. if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
  1321. Value *V = I->getOperand(0);
  1322. if (!I->getType()->isDoubleTy() ||
  1323. !V->getType()->isFloatTy()) return false;
  1324. Register Op = getRegForValue(V);
  1325. if (Op == 0) return false;
  1326. Register Result = createResultReg(&ARM::DPRRegClass);
  1327. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1328. TII.get(ARM::VCVTDS), Result)
  1329. .addReg(Op));
  1330. updateValueMap(I, Result);
  1331. return true;
  1332. }
  1333. bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
  1334. // Make sure we have VFP and that we're truncating double to float.
  1335. if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
  1336. Value *V = I->getOperand(0);
  1337. if (!(I->getType()->isFloatTy() &&
  1338. V->getType()->isDoubleTy())) return false;
  1339. Register Op = getRegForValue(V);
  1340. if (Op == 0) return false;
  1341. Register Result = createResultReg(&ARM::SPRRegClass);
  1342. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1343. TII.get(ARM::VCVTSD), Result)
  1344. .addReg(Op));
  1345. updateValueMap(I, Result);
  1346. return true;
  1347. }
  1348. bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
  1349. // Make sure we have VFP.
  1350. if (!Subtarget->hasVFP2Base()) return false;
  1351. MVT DstVT;
  1352. Type *Ty = I->getType();
  1353. if (!isTypeLegal(Ty, DstVT))
  1354. return false;
  1355. Value *Src = I->getOperand(0);
  1356. EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
  1357. if (!SrcEVT.isSimple())
  1358. return false;
  1359. MVT SrcVT = SrcEVT.getSimpleVT();
  1360. if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
  1361. return false;
  1362. Register SrcReg = getRegForValue(Src);
  1363. if (SrcReg == 0) return false;
  1364. // Handle sign-extension.
  1365. if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
  1366. SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
  1367. /*isZExt*/!isSigned);
  1368. if (SrcReg == 0) return false;
  1369. }
  1370. // The conversion routine works on fp-reg to fp-reg and the operand above
  1371. // was an integer, move it to the fp registers if possible.
  1372. unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
  1373. if (FP == 0) return false;
  1374. unsigned Opc;
  1375. if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
  1376. else if (Ty->isDoubleTy() && Subtarget->hasFP64())
  1377. Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
  1378. else return false;
  1379. Register ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
  1380. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1381. TII.get(Opc), ResultReg).addReg(FP));
  1382. updateValueMap(I, ResultReg);
  1383. return true;
  1384. }
  1385. bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
  1386. // Make sure we have VFP.
  1387. if (!Subtarget->hasVFP2Base()) return false;
  1388. MVT DstVT;
  1389. Type *RetTy = I->getType();
  1390. if (!isTypeLegal(RetTy, DstVT))
  1391. return false;
  1392. Register Op = getRegForValue(I->getOperand(0));
  1393. if (Op == 0) return false;
  1394. unsigned Opc;
  1395. Type *OpTy = I->getOperand(0)->getType();
  1396. if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
  1397. else if (OpTy->isDoubleTy() && Subtarget->hasFP64())
  1398. Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
  1399. else return false;
  1400. // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
  1401. Register ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
  1402. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1403. TII.get(Opc), ResultReg).addReg(Op));
  1404. // This result needs to be in an integer register, but the conversion only
  1405. // takes place in fp-regs.
  1406. unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
  1407. if (IntReg == 0) return false;
  1408. updateValueMap(I, IntReg);
  1409. return true;
  1410. }
  1411. bool ARMFastISel::SelectSelect(const Instruction *I) {
  1412. MVT VT;
  1413. if (!isTypeLegal(I->getType(), VT))
  1414. return false;
  1415. // Things need to be register sized for register moves.
  1416. if (VT != MVT::i32) return false;
  1417. Register CondReg = getRegForValue(I->getOperand(0));
  1418. if (CondReg == 0) return false;
  1419. Register Op1Reg = getRegForValue(I->getOperand(1));
  1420. if (Op1Reg == 0) return false;
  1421. // Check to see if we can use an immediate in the conditional move.
  1422. int Imm = 0;
  1423. bool UseImm = false;
  1424. bool isNegativeImm = false;
  1425. if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
  1426. assert(VT == MVT::i32 && "Expecting an i32.");
  1427. Imm = (int)ConstInt->getValue().getZExtValue();
  1428. if (Imm < 0) {
  1429. isNegativeImm = true;
  1430. Imm = ~Imm;
  1431. }
  1432. UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
  1433. (ARM_AM::getSOImmVal(Imm) != -1);
  1434. }
  1435. unsigned Op2Reg = 0;
  1436. if (!UseImm) {
  1437. Op2Reg = getRegForValue(I->getOperand(2));
  1438. if (Op2Reg == 0) return false;
  1439. }
  1440. unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
  1441. CondReg = constrainOperandRegClass(TII.get(TstOpc), CondReg, 0);
  1442. AddOptionalDefs(
  1443. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TstOpc))
  1444. .addReg(CondReg)
  1445. .addImm(1));
  1446. unsigned MovCCOpc;
  1447. const TargetRegisterClass *RC;
  1448. if (!UseImm) {
  1449. RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
  1450. MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
  1451. } else {
  1452. RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
  1453. if (!isNegativeImm)
  1454. MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
  1455. else
  1456. MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
  1457. }
  1458. Register ResultReg = createResultReg(RC);
  1459. if (!UseImm) {
  1460. Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);
  1461. Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);
  1462. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(MovCCOpc),
  1463. ResultReg)
  1464. .addReg(Op2Reg)
  1465. .addReg(Op1Reg)
  1466. .addImm(ARMCC::NE)
  1467. .addReg(ARM::CPSR);
  1468. } else {
  1469. Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1);
  1470. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(MovCCOpc),
  1471. ResultReg)
  1472. .addReg(Op1Reg)
  1473. .addImm(Imm)
  1474. .addImm(ARMCC::EQ)
  1475. .addReg(ARM::CPSR);
  1476. }
  1477. updateValueMap(I, ResultReg);
  1478. return true;
  1479. }
  1480. bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
  1481. MVT VT;
  1482. Type *Ty = I->getType();
  1483. if (!isTypeLegal(Ty, VT))
  1484. return false;
  1485. // If we have integer div support we should have selected this automagically.
  1486. // In case we have a real miss go ahead and return false and we'll pick
  1487. // it up later.
  1488. if (Subtarget->hasDivideInThumbMode())
  1489. return false;
  1490. // Otherwise emit a libcall.
  1491. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
  1492. if (VT == MVT::i8)
  1493. LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
  1494. else if (VT == MVT::i16)
  1495. LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
  1496. else if (VT == MVT::i32)
  1497. LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
  1498. else if (VT == MVT::i64)
  1499. LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
  1500. else if (VT == MVT::i128)
  1501. LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
  1502. assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
  1503. return ARMEmitLibcall(I, LC);
  1504. }
  1505. bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
  1506. MVT VT;
  1507. Type *Ty = I->getType();
  1508. if (!isTypeLegal(Ty, VT))
  1509. return false;
  1510. // Many ABIs do not provide a libcall for standalone remainder, so we need to
  1511. // use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
  1512. // multi-reg returns, we'll have to bail out.
  1513. if (!TLI.hasStandaloneRem(VT)) {
  1514. return false;
  1515. }
  1516. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
  1517. if (VT == MVT::i8)
  1518. LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
  1519. else if (VT == MVT::i16)
  1520. LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
  1521. else if (VT == MVT::i32)
  1522. LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
  1523. else if (VT == MVT::i64)
  1524. LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
  1525. else if (VT == MVT::i128)
  1526. LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
  1527. assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
  1528. return ARMEmitLibcall(I, LC);
  1529. }
  1530. bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
  1531. EVT DestVT = TLI.getValueType(DL, I->getType(), true);
  1532. // We can get here in the case when we have a binary operation on a non-legal
  1533. // type and the target independent selector doesn't know how to handle it.
  1534. if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
  1535. return false;
  1536. unsigned Opc;
  1537. switch (ISDOpcode) {
  1538. default: return false;
  1539. case ISD::ADD:
  1540. Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
  1541. break;
  1542. case ISD::OR:
  1543. Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
  1544. break;
  1545. case ISD::SUB:
  1546. Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
  1547. break;
  1548. }
  1549. Register SrcReg1 = getRegForValue(I->getOperand(0));
  1550. if (SrcReg1 == 0) return false;
  1551. // TODO: Often the 2nd operand is an immediate, which can be encoded directly
  1552. // in the instruction, rather then materializing the value in a register.
  1553. Register SrcReg2 = getRegForValue(I->getOperand(1));
  1554. if (SrcReg2 == 0) return false;
  1555. Register ResultReg = createResultReg(&ARM::GPRnopcRegClass);
  1556. SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);
  1557. SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);
  1558. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1559. TII.get(Opc), ResultReg)
  1560. .addReg(SrcReg1).addReg(SrcReg2));
  1561. updateValueMap(I, ResultReg);
  1562. return true;
  1563. }
  1564. bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
  1565. EVT FPVT = TLI.getValueType(DL, I->getType(), true);
  1566. if (!FPVT.isSimple()) return false;
  1567. MVT VT = FPVT.getSimpleVT();
  1568. // FIXME: Support vector types where possible.
  1569. if (VT.isVector())
  1570. return false;
  1571. // We can get here in the case when we want to use NEON for our fp
  1572. // operations, but can't figure out how to. Just use the vfp instructions
  1573. // if we have them.
  1574. // FIXME: It'd be nice to use NEON instructions.
  1575. Type *Ty = I->getType();
  1576. if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
  1577. return false;
  1578. if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
  1579. return false;
  1580. unsigned Opc;
  1581. bool is64bit = VT == MVT::f64 || VT == MVT::i64;
  1582. switch (ISDOpcode) {
  1583. default: return false;
  1584. case ISD::FADD:
  1585. Opc = is64bit ? ARM::VADDD : ARM::VADDS;
  1586. break;
  1587. case ISD::FSUB:
  1588. Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
  1589. break;
  1590. case ISD::FMUL:
  1591. Opc = is64bit ? ARM::VMULD : ARM::VMULS;
  1592. break;
  1593. }
  1594. Register Op1 = getRegForValue(I->getOperand(0));
  1595. if (Op1 == 0) return false;
  1596. Register Op2 = getRegForValue(I->getOperand(1));
  1597. if (Op2 == 0) return false;
  1598. Register ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
  1599. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1600. TII.get(Opc), ResultReg)
  1601. .addReg(Op1).addReg(Op2));
  1602. updateValueMap(I, ResultReg);
  1603. return true;
  1604. }
  1605. // Call Handling Code
  1606. // This is largely taken directly from CCAssignFnForNode
  1607. // TODO: We may not support all of this.
  1608. CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
  1609. bool Return,
  1610. bool isVarArg) {
  1611. switch (CC) {
  1612. default:
  1613. report_fatal_error("Unsupported calling convention");
  1614. case CallingConv::Fast:
  1615. if (Subtarget->hasVFP2Base() && !isVarArg) {
  1616. if (!Subtarget->isAAPCS_ABI())
  1617. return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
  1618. // For AAPCS ABI targets, just use VFP variant of the calling convention.
  1619. return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
  1620. }
  1621. [[fallthrough]];
  1622. case CallingConv::C:
  1623. case CallingConv::CXX_FAST_TLS:
  1624. // Use target triple & subtarget features to do actual dispatch.
  1625. if (Subtarget->isAAPCS_ABI()) {
  1626. if (Subtarget->hasVFP2Base() &&
  1627. TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
  1628. return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
  1629. else
  1630. return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
  1631. } else {
  1632. return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
  1633. }
  1634. case CallingConv::ARM_AAPCS_VFP:
  1635. case CallingConv::Swift:
  1636. case CallingConv::SwiftTail:
  1637. if (!isVarArg)
  1638. return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
  1639. // Fall through to soft float variant, variadic functions don't
  1640. // use hard floating point ABI.
  1641. [[fallthrough]];
  1642. case CallingConv::ARM_AAPCS:
  1643. return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
  1644. case CallingConv::ARM_APCS:
  1645. return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
  1646. case CallingConv::GHC:
  1647. if (Return)
  1648. report_fatal_error("Can't return in GHC call convention");
  1649. else
  1650. return CC_ARM_APCS_GHC;
  1651. case CallingConv::CFGuard_Check:
  1652. return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
  1653. }
  1654. }
  1655. bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
  1656. SmallVectorImpl<Register> &ArgRegs,
  1657. SmallVectorImpl<MVT> &ArgVTs,
  1658. SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
  1659. SmallVectorImpl<Register> &RegArgs,
  1660. CallingConv::ID CC,
  1661. unsigned &NumBytes,
  1662. bool isVarArg) {
  1663. SmallVector<CCValAssign, 16> ArgLocs;
  1664. CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context);
  1665. CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
  1666. CCAssignFnForCall(CC, false, isVarArg));
  1667. // Check that we can handle all of the arguments. If we can't, then bail out
  1668. // now before we add code to the MBB.
  1669. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
  1670. CCValAssign &VA = ArgLocs[i];
  1671. MVT ArgVT = ArgVTs[VA.getValNo()];
  1672. // We don't handle NEON/vector parameters yet.
  1673. if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
  1674. return false;
  1675. // Now copy/store arg to correct locations.
  1676. if (VA.isRegLoc() && !VA.needsCustom()) {
  1677. continue;
  1678. } else if (VA.needsCustom()) {
  1679. // TODO: We need custom lowering for vector (v2f64) args.
  1680. if (VA.getLocVT() != MVT::f64 ||
  1681. // TODO: Only handle register args for now.
  1682. !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
  1683. return false;
  1684. } else {
  1685. switch (ArgVT.SimpleTy) {
  1686. default:
  1687. return false;
  1688. case MVT::i1:
  1689. case MVT::i8:
  1690. case MVT::i16:
  1691. case MVT::i32:
  1692. break;
  1693. case MVT::f32:
  1694. if (!Subtarget->hasVFP2Base())
  1695. return false;
  1696. break;
  1697. case MVT::f64:
  1698. if (!Subtarget->hasVFP2Base())
  1699. return false;
  1700. break;
  1701. }
  1702. }
  1703. }
  1704. // At the point, we are able to handle the call's arguments in fast isel.
  1705. // Get a count of how many bytes are to be pushed on the stack.
  1706. NumBytes = CCInfo.getNextStackOffset();
  1707. // Issue CALLSEQ_START
  1708. unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
  1709. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1710. TII.get(AdjStackDown))
  1711. .addImm(NumBytes).addImm(0));
  1712. // Process the args.
  1713. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
  1714. CCValAssign &VA = ArgLocs[i];
  1715. const Value *ArgVal = Args[VA.getValNo()];
  1716. Register Arg = ArgRegs[VA.getValNo()];
  1717. MVT ArgVT = ArgVTs[VA.getValNo()];
  1718. assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
  1719. "We don't handle NEON/vector parameters yet.");
  1720. // Handle arg promotion, etc.
  1721. switch (VA.getLocInfo()) {
  1722. case CCValAssign::Full: break;
  1723. case CCValAssign::SExt: {
  1724. MVT DestVT = VA.getLocVT();
  1725. Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
  1726. assert(Arg != 0 && "Failed to emit a sext");
  1727. ArgVT = DestVT;
  1728. break;
  1729. }
  1730. case CCValAssign::AExt:
  1731. // Intentional fall-through. Handle AExt and ZExt.
  1732. case CCValAssign::ZExt: {
  1733. MVT DestVT = VA.getLocVT();
  1734. Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
  1735. assert(Arg != 0 && "Failed to emit a zext");
  1736. ArgVT = DestVT;
  1737. break;
  1738. }
  1739. case CCValAssign::BCvt: {
  1740. unsigned BC = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg);
  1741. assert(BC != 0 && "Failed to emit a bitcast!");
  1742. Arg = BC;
  1743. ArgVT = VA.getLocVT();
  1744. break;
  1745. }
  1746. default: llvm_unreachable("Unknown arg promotion!");
  1747. }
  1748. // Now copy/store arg to correct locations.
  1749. if (VA.isRegLoc() && !VA.needsCustom()) {
  1750. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1751. TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
  1752. RegArgs.push_back(VA.getLocReg());
  1753. } else if (VA.needsCustom()) {
  1754. // TODO: We need custom lowering for vector (v2f64) args.
  1755. assert(VA.getLocVT() == MVT::f64 &&
  1756. "Custom lowering for v2f64 args not available");
  1757. // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
  1758. CCValAssign &NextVA = ArgLocs[++i];
  1759. assert(VA.isRegLoc() && NextVA.isRegLoc() &&
  1760. "We only handle register args!");
  1761. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1762. TII.get(ARM::VMOVRRD), VA.getLocReg())
  1763. .addReg(NextVA.getLocReg(), RegState::Define)
  1764. .addReg(Arg));
  1765. RegArgs.push_back(VA.getLocReg());
  1766. RegArgs.push_back(NextVA.getLocReg());
  1767. } else {
  1768. assert(VA.isMemLoc());
  1769. // Need to store on the stack.
  1770. // Don't emit stores for undef values.
  1771. if (isa<UndefValue>(ArgVal))
  1772. continue;
  1773. Address Addr;
  1774. Addr.BaseType = Address::RegBase;
  1775. Addr.Base.Reg = ARM::SP;
  1776. Addr.Offset = VA.getLocMemOffset();
  1777. bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
  1778. assert(EmitRet && "Could not emit a store for argument!");
  1779. }
  1780. }
  1781. return true;
  1782. }
  1783. bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
  1784. const Instruction *I, CallingConv::ID CC,
  1785. unsigned &NumBytes, bool isVarArg) {
  1786. // Issue CALLSEQ_END
  1787. unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
  1788. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1789. TII.get(AdjStackUp))
  1790. .addImm(NumBytes).addImm(-1ULL));
  1791. // Now the return value.
  1792. if (RetVT != MVT::isVoid) {
  1793. SmallVector<CCValAssign, 16> RVLocs;
  1794. CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
  1795. CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
  1796. // Copy all of the result registers out of their specified physreg.
  1797. if (RVLocs.size() == 2 && RetVT == MVT::f64) {
  1798. // For this move we copy into two registers and then move into the
  1799. // double fp reg we want.
  1800. MVT DestVT = RVLocs[0].getValVT();
  1801. const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
  1802. Register ResultReg = createResultReg(DstRC);
  1803. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1804. TII.get(ARM::VMOVDRR), ResultReg)
  1805. .addReg(RVLocs[0].getLocReg())
  1806. .addReg(RVLocs[1].getLocReg()));
  1807. UsedRegs.push_back(RVLocs[0].getLocReg());
  1808. UsedRegs.push_back(RVLocs[1].getLocReg());
  1809. // Finally update the result.
  1810. updateValueMap(I, ResultReg);
  1811. } else {
  1812. assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
  1813. MVT CopyVT = RVLocs[0].getValVT();
  1814. // Special handling for extended integers.
  1815. if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
  1816. CopyVT = MVT::i32;
  1817. const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
  1818. Register ResultReg = createResultReg(DstRC);
  1819. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1820. TII.get(TargetOpcode::COPY),
  1821. ResultReg).addReg(RVLocs[0].getLocReg());
  1822. UsedRegs.push_back(RVLocs[0].getLocReg());
  1823. // Finally update the result.
  1824. updateValueMap(I, ResultReg);
  1825. }
  1826. }
  1827. return true;
  1828. }
  1829. bool ARMFastISel::SelectRet(const Instruction *I) {
  1830. const ReturnInst *Ret = cast<ReturnInst>(I);
  1831. const Function &F = *I->getParent()->getParent();
  1832. const bool IsCmseNSEntry = F.hasFnAttribute("cmse_nonsecure_entry");
  1833. if (!FuncInfo.CanLowerReturn)
  1834. return false;
  1835. if (TLI.supportSwiftError() &&
  1836. F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
  1837. return false;
  1838. if (TLI.supportSplitCSR(FuncInfo.MF))
  1839. return false;
  1840. // Build a list of return value registers.
  1841. SmallVector<unsigned, 4> RetRegs;
  1842. CallingConv::ID CC = F.getCallingConv();
  1843. if (Ret->getNumOperands() > 0) {
  1844. SmallVector<ISD::OutputArg, 4> Outs;
  1845. GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
  1846. // Analyze operands of the call, assigning locations to each operand.
  1847. SmallVector<CCValAssign, 16> ValLocs;
  1848. CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
  1849. CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
  1850. F.isVarArg()));
  1851. const Value *RV = Ret->getOperand(0);
  1852. Register Reg = getRegForValue(RV);
  1853. if (Reg == 0)
  1854. return false;
  1855. // Only handle a single return value for now.
  1856. if (ValLocs.size() != 1)
  1857. return false;
  1858. CCValAssign &VA = ValLocs[0];
  1859. // Don't bother handling odd stuff for now.
  1860. if (VA.getLocInfo() != CCValAssign::Full)
  1861. return false;
  1862. // Only handle register returns for now.
  1863. if (!VA.isRegLoc())
  1864. return false;
  1865. unsigned SrcReg = Reg + VA.getValNo();
  1866. EVT RVEVT = TLI.getValueType(DL, RV->getType());
  1867. if (!RVEVT.isSimple()) return false;
  1868. MVT RVVT = RVEVT.getSimpleVT();
  1869. MVT DestVT = VA.getValVT();
  1870. // Special handling for extended integers.
  1871. if (RVVT != DestVT) {
  1872. if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
  1873. return false;
  1874. assert(DestVT == MVT::i32 && "ARM should always ext to i32");
  1875. // Perform extension if flagged as either zext or sext. Otherwise, do
  1876. // nothing.
  1877. if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
  1878. SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
  1879. if (SrcReg == 0) return false;
  1880. }
  1881. }
  1882. // Make the copy.
  1883. Register DstReg = VA.getLocReg();
  1884. const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
  1885. // Avoid a cross-class copy. This is very unlikely.
  1886. if (!SrcRC->contains(DstReg))
  1887. return false;
  1888. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1889. TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
  1890. // Add register to return instruction.
  1891. RetRegs.push_back(VA.getLocReg());
  1892. }
  1893. unsigned RetOpc;
  1894. if (IsCmseNSEntry)
  1895. if (isThumb2)
  1896. RetOpc = ARM::tBXNS_RET;
  1897. else
  1898. llvm_unreachable("CMSE not valid for non-Thumb targets");
  1899. else
  1900. RetOpc = Subtarget->getReturnOpcode();
  1901. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  1902. TII.get(RetOpc));
  1903. AddOptionalDefs(MIB);
  1904. for (unsigned R : RetRegs)
  1905. MIB.addReg(R, RegState::Implicit);
  1906. return true;
  1907. }
  1908. unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
  1909. if (UseReg)
  1910. return isThumb2 ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF);
  1911. else
  1912. return isThumb2 ? ARM::tBL : ARM::BL;
  1913. }
  1914. unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
  1915. // Manually compute the global's type to avoid building it when unnecessary.
  1916. Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
  1917. EVT LCREVT = TLI.getValueType(DL, GVTy);
  1918. if (!LCREVT.isSimple()) return 0;
  1919. GlobalValue *GV = M.getNamedGlobal(Name.str());
  1920. if (!GV)
  1921. GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
  1922. GlobalValue::ExternalLinkage, nullptr, Name);
  1923. return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
  1924. }
  1925. // A quick function that will emit a call for a named libcall in F with the
  1926. // vector of passed arguments for the Instruction in I. We can assume that we
  1927. // can emit a call for any libcall we can produce. This is an abridged version
  1928. // of the full call infrastructure since we won't need to worry about things
  1929. // like computed function pointers or strange arguments at call sites.
  1930. // TODO: Try to unify this and the normal call bits for ARM, then try to unify
  1931. // with X86.
  1932. bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
  1933. CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
  1934. // Handle *simple* calls for now.
  1935. Type *RetTy = I->getType();
  1936. MVT RetVT;
  1937. if (RetTy->isVoidTy())
  1938. RetVT = MVT::isVoid;
  1939. else if (!isTypeLegal(RetTy, RetVT))
  1940. return false;
  1941. // Can't handle non-double multi-reg retvals.
  1942. if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
  1943. SmallVector<CCValAssign, 16> RVLocs;
  1944. CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
  1945. CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
  1946. if (RVLocs.size() >= 2 && RetVT != MVT::f64)
  1947. return false;
  1948. }
  1949. // Set up the argument vectors.
  1950. SmallVector<Value*, 8> Args;
  1951. SmallVector<Register, 8> ArgRegs;
  1952. SmallVector<MVT, 8> ArgVTs;
  1953. SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
  1954. Args.reserve(I->getNumOperands());
  1955. ArgRegs.reserve(I->getNumOperands());
  1956. ArgVTs.reserve(I->getNumOperands());
  1957. ArgFlags.reserve(I->getNumOperands());
  1958. for (Value *Op : I->operands()) {
  1959. Register Arg = getRegForValue(Op);
  1960. if (Arg == 0) return false;
  1961. Type *ArgTy = Op->getType();
  1962. MVT ArgVT;
  1963. if (!isTypeLegal(ArgTy, ArgVT)) return false;
  1964. ISD::ArgFlagsTy Flags;
  1965. Flags.setOrigAlign(DL.getABITypeAlign(ArgTy));
  1966. Args.push_back(Op);
  1967. ArgRegs.push_back(Arg);
  1968. ArgVTs.push_back(ArgVT);
  1969. ArgFlags.push_back(Flags);
  1970. }
  1971. // Handle the arguments now that we've gotten them.
  1972. SmallVector<Register, 4> RegArgs;
  1973. unsigned NumBytes;
  1974. if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
  1975. RegArgs, CC, NumBytes, false))
  1976. return false;
  1977. Register CalleeReg;
  1978. if (Subtarget->genLongCalls()) {
  1979. CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
  1980. if (CalleeReg == 0) return false;
  1981. }
  1982. // Issue the call.
  1983. unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls());
  1984. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
  1985. MIMD, TII.get(CallOpc));
  1986. // BL / BLX don't take a predicate, but tBL / tBLX do.
  1987. if (isThumb2)
  1988. MIB.add(predOps(ARMCC::AL));
  1989. if (Subtarget->genLongCalls()) {
  1990. CalleeReg =
  1991. constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);
  1992. MIB.addReg(CalleeReg);
  1993. } else
  1994. MIB.addExternalSymbol(TLI.getLibcallName(Call));
  1995. // Add implicit physical register uses to the call.
  1996. for (Register R : RegArgs)
  1997. MIB.addReg(R, RegState::Implicit);
  1998. // Add a register mask with the call-preserved registers.
  1999. // Proper defs for return values will be added by setPhysRegsDeadExcept().
  2000. MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
  2001. // Finish off the call including any return values.
  2002. SmallVector<Register, 4> UsedRegs;
  2003. if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
  2004. // Set all unused physreg defs as dead.
  2005. static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
  2006. return true;
  2007. }
  2008. bool ARMFastISel::SelectCall(const Instruction *I,
  2009. const char *IntrMemName = nullptr) {
  2010. const CallInst *CI = cast<CallInst>(I);
  2011. const Value *Callee = CI->getCalledOperand();
  2012. // Can't handle inline asm.
  2013. if (isa<InlineAsm>(Callee)) return false;
  2014. // Allow SelectionDAG isel to handle tail calls.
  2015. if (CI->isTailCall()) return false;
  2016. // Check the calling convention.
  2017. CallingConv::ID CC = CI->getCallingConv();
  2018. // TODO: Avoid some calling conventions?
  2019. FunctionType *FTy = CI->getFunctionType();
  2020. bool isVarArg = FTy->isVarArg();
  2021. // Handle *simple* calls for now.
  2022. Type *RetTy = I->getType();
  2023. MVT RetVT;
  2024. if (RetTy->isVoidTy())
  2025. RetVT = MVT::isVoid;
  2026. else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
  2027. RetVT != MVT::i8 && RetVT != MVT::i1)
  2028. return false;
  2029. // Can't handle non-double multi-reg retvals.
  2030. if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
  2031. RetVT != MVT::i16 && RetVT != MVT::i32) {
  2032. SmallVector<CCValAssign, 16> RVLocs;
  2033. CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
  2034. CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
  2035. if (RVLocs.size() >= 2 && RetVT != MVT::f64)
  2036. return false;
  2037. }
  2038. // Set up the argument vectors.
  2039. SmallVector<Value*, 8> Args;
  2040. SmallVector<Register, 8> ArgRegs;
  2041. SmallVector<MVT, 8> ArgVTs;
  2042. SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
  2043. unsigned arg_size = CI->arg_size();
  2044. Args.reserve(arg_size);
  2045. ArgRegs.reserve(arg_size);
  2046. ArgVTs.reserve(arg_size);
  2047. ArgFlags.reserve(arg_size);
  2048. for (auto ArgI = CI->arg_begin(), ArgE = CI->arg_end(); ArgI != ArgE; ++ArgI) {
  2049. // If we're lowering a memory intrinsic instead of a regular call, skip the
  2050. // last argument, which shouldn't be passed to the underlying function.
  2051. if (IntrMemName && ArgE - ArgI <= 1)
  2052. break;
  2053. ISD::ArgFlagsTy Flags;
  2054. unsigned ArgIdx = ArgI - CI->arg_begin();
  2055. if (CI->paramHasAttr(ArgIdx, Attribute::SExt))
  2056. Flags.setSExt();
  2057. if (CI->paramHasAttr(ArgIdx, Attribute::ZExt))
  2058. Flags.setZExt();
  2059. // FIXME: Only handle *easy* calls for now.
  2060. if (CI->paramHasAttr(ArgIdx, Attribute::InReg) ||
  2061. CI->paramHasAttr(ArgIdx, Attribute::StructRet) ||
  2062. CI->paramHasAttr(ArgIdx, Attribute::SwiftSelf) ||
  2063. CI->paramHasAttr(ArgIdx, Attribute::SwiftError) ||
  2064. CI->paramHasAttr(ArgIdx, Attribute::Nest) ||
  2065. CI->paramHasAttr(ArgIdx, Attribute::ByVal))
  2066. return false;
  2067. Type *ArgTy = (*ArgI)->getType();
  2068. MVT ArgVT;
  2069. if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
  2070. ArgVT != MVT::i1)
  2071. return false;
  2072. Register Arg = getRegForValue(*ArgI);
  2073. if (!Arg.isValid())
  2074. return false;
  2075. Flags.setOrigAlign(DL.getABITypeAlign(ArgTy));
  2076. Args.push_back(*ArgI);
  2077. ArgRegs.push_back(Arg);
  2078. ArgVTs.push_back(ArgVT);
  2079. ArgFlags.push_back(Flags);
  2080. }
  2081. // Handle the arguments now that we've gotten them.
  2082. SmallVector<Register, 4> RegArgs;
  2083. unsigned NumBytes;
  2084. if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
  2085. RegArgs, CC, NumBytes, isVarArg))
  2086. return false;
  2087. bool UseReg = false;
  2088. const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
  2089. if (!GV || Subtarget->genLongCalls()) UseReg = true;
  2090. Register CalleeReg;
  2091. if (UseReg) {
  2092. if (IntrMemName)
  2093. CalleeReg = getLibcallReg(IntrMemName);
  2094. else
  2095. CalleeReg = getRegForValue(Callee);
  2096. if (CalleeReg == 0) return false;
  2097. }
  2098. // Issue the call.
  2099. unsigned CallOpc = ARMSelectCallOp(UseReg);
  2100. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
  2101. MIMD, TII.get(CallOpc));
  2102. // ARM calls don't take a predicate, but tBL / tBLX do.
  2103. if(isThumb2)
  2104. MIB.add(predOps(ARMCC::AL));
  2105. if (UseReg) {
  2106. CalleeReg =
  2107. constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);
  2108. MIB.addReg(CalleeReg);
  2109. } else if (!IntrMemName)
  2110. MIB.addGlobalAddress(GV, 0, 0);
  2111. else
  2112. MIB.addExternalSymbol(IntrMemName, 0);
  2113. // Add implicit physical register uses to the call.
  2114. for (Register R : RegArgs)
  2115. MIB.addReg(R, RegState::Implicit);
  2116. // Add a register mask with the call-preserved registers.
  2117. // Proper defs for return values will be added by setPhysRegsDeadExcept().
  2118. MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
  2119. // Finish off the call including any return values.
  2120. SmallVector<Register, 4> UsedRegs;
  2121. if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
  2122. return false;
  2123. // Set all unused physreg defs as dead.
  2124. static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
  2125. return true;
  2126. }
  2127. bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
  2128. return Len <= 16;
  2129. }
  2130. bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
  2131. MaybeAlign Alignment) {
  2132. // Make sure we don't bloat code by inlining very large memcpy's.
  2133. if (!ARMIsMemCpySmall(Len))
  2134. return false;
  2135. while (Len) {
  2136. MVT VT;
  2137. if (!Alignment || *Alignment >= 4) {
  2138. if (Len >= 4)
  2139. VT = MVT::i32;
  2140. else if (Len >= 2)
  2141. VT = MVT::i16;
  2142. else {
  2143. assert(Len == 1 && "Expected a length of 1!");
  2144. VT = MVT::i8;
  2145. }
  2146. } else {
  2147. assert(Alignment && "Alignment is set in this branch");
  2148. // Bound based on alignment.
  2149. if (Len >= 2 && *Alignment == 2)
  2150. VT = MVT::i16;
  2151. else {
  2152. VT = MVT::i8;
  2153. }
  2154. }
  2155. bool RV;
  2156. Register ResultReg;
  2157. RV = ARMEmitLoad(VT, ResultReg, Src);
  2158. assert(RV && "Should be able to handle this load.");
  2159. RV = ARMEmitStore(VT, ResultReg, Dest);
  2160. assert(RV && "Should be able to handle this store.");
  2161. (void)RV;
  2162. unsigned Size = VT.getSizeInBits()/8;
  2163. Len -= Size;
  2164. Dest.Offset += Size;
  2165. Src.Offset += Size;
  2166. }
  2167. return true;
  2168. }
  2169. bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
  2170. // FIXME: Handle more intrinsics.
  2171. switch (I.getIntrinsicID()) {
  2172. default: return false;
  2173. case Intrinsic::frameaddress: {
  2174. MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
  2175. MFI.setFrameAddressIsTaken(true);
  2176. unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
  2177. const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
  2178. : &ARM::GPRRegClass;
  2179. const ARMBaseRegisterInfo *RegInfo =
  2180. static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());
  2181. Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
  2182. unsigned SrcReg = FramePtr;
  2183. // Recursively load frame address
  2184. // ldr r0 [fp]
  2185. // ldr r0 [r0]
  2186. // ldr r0 [r0]
  2187. // ...
  2188. unsigned DestReg;
  2189. unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
  2190. while (Depth--) {
  2191. DestReg = createResultReg(RC);
  2192. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  2193. TII.get(LdrOpc), DestReg)
  2194. .addReg(SrcReg).addImm(0));
  2195. SrcReg = DestReg;
  2196. }
  2197. updateValueMap(&I, SrcReg);
  2198. return true;
  2199. }
  2200. case Intrinsic::memcpy:
  2201. case Intrinsic::memmove: {
  2202. const MemTransferInst &MTI = cast<MemTransferInst>(I);
  2203. // Don't handle volatile.
  2204. if (MTI.isVolatile())
  2205. return false;
  2206. // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
  2207. // we would emit dead code because we don't currently handle memmoves.
  2208. bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
  2209. if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
  2210. // Small memcpy's are common enough that we want to do them without a call
  2211. // if possible.
  2212. uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
  2213. if (ARMIsMemCpySmall(Len)) {
  2214. Address Dest, Src;
  2215. if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
  2216. !ARMComputeAddress(MTI.getRawSource(), Src))
  2217. return false;
  2218. MaybeAlign Alignment;
  2219. if (MTI.getDestAlign() || MTI.getSourceAlign())
  2220. Alignment = std::min(MTI.getDestAlign().valueOrOne(),
  2221. MTI.getSourceAlign().valueOrOne());
  2222. if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
  2223. return true;
  2224. }
  2225. }
  2226. if (!MTI.getLength()->getType()->isIntegerTy(32))
  2227. return false;
  2228. if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
  2229. return false;
  2230. const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
  2231. return SelectCall(&I, IntrMemName);
  2232. }
  2233. case Intrinsic::memset: {
  2234. const MemSetInst &MSI = cast<MemSetInst>(I);
  2235. // Don't handle volatile.
  2236. if (MSI.isVolatile())
  2237. return false;
  2238. if (!MSI.getLength()->getType()->isIntegerTy(32))
  2239. return false;
  2240. if (MSI.getDestAddressSpace() > 255)
  2241. return false;
  2242. return SelectCall(&I, "memset");
  2243. }
  2244. case Intrinsic::trap: {
  2245. unsigned Opcode;
  2246. if (Subtarget->isThumb())
  2247. Opcode = ARM::tTRAP;
  2248. else
  2249. Opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
  2250. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opcode));
  2251. return true;
  2252. }
  2253. }
  2254. }
  2255. bool ARMFastISel::SelectTrunc(const Instruction *I) {
  2256. // The high bits for a type smaller than the register size are assumed to be
  2257. // undefined.
  2258. Value *Op = I->getOperand(0);
  2259. EVT SrcVT, DestVT;
  2260. SrcVT = TLI.getValueType(DL, Op->getType(), true);
  2261. DestVT = TLI.getValueType(DL, I->getType(), true);
  2262. if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
  2263. return false;
  2264. if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
  2265. return false;
  2266. Register SrcReg = getRegForValue(Op);
  2267. if (!SrcReg) return false;
  2268. // Because the high bits are undefined, a truncate doesn't generate
  2269. // any code.
  2270. updateValueMap(I, SrcReg);
  2271. return true;
  2272. }
  2273. unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
  2274. bool isZExt) {
  2275. if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
  2276. return 0;
  2277. if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
  2278. return 0;
  2279. // Table of which combinations can be emitted as a single instruction,
  2280. // and which will require two.
  2281. static const uint8_t isSingleInstrTbl[3][2][2][2] = {
  2282. // ARM Thumb
  2283. // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops
  2284. // ext: s z s z s z s z
  2285. /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
  2286. /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
  2287. /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
  2288. };
  2289. // Target registers for:
  2290. // - For ARM can never be PC.
  2291. // - For 16-bit Thumb are restricted to lower 8 registers.
  2292. // - For 32-bit Thumb are restricted to non-SP and non-PC.
  2293. static const TargetRegisterClass *RCTbl[2][2] = {
  2294. // Instructions: Two Single
  2295. /* ARM */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
  2296. /* Thumb */ { &ARM::tGPRRegClass, &ARM::rGPRRegClass }
  2297. };
  2298. // Table governing the instruction(s) to be emitted.
  2299. static const struct InstructionTable {
  2300. uint32_t Opc : 16;
  2301. uint32_t hasS : 1; // Some instructions have an S bit, always set it to 0.
  2302. uint32_t Shift : 7; // For shift operand addressing mode, used by MOVsi.
  2303. uint32_t Imm : 8; // All instructions have either a shift or a mask.
  2304. } IT[2][2][3][2] = {
  2305. { // Two instructions (first is left shift, second is in this table).
  2306. { // ARM Opc S Shift Imm
  2307. /* 1 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 31 },
  2308. /* 1 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 31 } },
  2309. /* 8 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 24 },
  2310. /* 8 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 24 } },
  2311. /* 16 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 16 },
  2312. /* 16 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 16 } }
  2313. },
  2314. { // Thumb Opc S Shift Imm
  2315. /* 1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 31 },
  2316. /* 1 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 31 } },
  2317. /* 8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 24 },
  2318. /* 8 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 24 } },
  2319. /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 16 },
  2320. /* 16 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 16 } }
  2321. }
  2322. },
  2323. { // Single instruction.
  2324. { // ARM Opc S Shift Imm
  2325. /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 },
  2326. /* 1 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 1 } },
  2327. /* 8 bit sext */ { { ARM::SXTB , 0, ARM_AM::no_shift, 0 },
  2328. /* 8 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 255 } },
  2329. /* 16 bit sext */ { { ARM::SXTH , 0, ARM_AM::no_shift, 0 },
  2330. /* 16 bit zext */ { ARM::UXTH , 0, ARM_AM::no_shift, 0 } }
  2331. },
  2332. { // Thumb Opc S Shift Imm
  2333. /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 },
  2334. /* 1 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 1 } },
  2335. /* 8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift, 0 },
  2336. /* 8 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } },
  2337. /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift, 0 },
  2338. /* 16 bit zext */ { ARM::t2UXTH , 0, ARM_AM::no_shift, 0 } }
  2339. }
  2340. }
  2341. };
  2342. unsigned SrcBits = SrcVT.getSizeInBits();
  2343. unsigned DestBits = DestVT.getSizeInBits();
  2344. (void) DestBits;
  2345. assert((SrcBits < DestBits) && "can only extend to larger types");
  2346. assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
  2347. "other sizes unimplemented");
  2348. assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
  2349. "other sizes unimplemented");
  2350. bool hasV6Ops = Subtarget->hasV6Ops();
  2351. unsigned Bitness = SrcBits / 8; // {1,8,16}=>{0,1,2}
  2352. assert((Bitness < 3) && "sanity-check table bounds");
  2353. bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
  2354. const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
  2355. const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];
  2356. unsigned Opc = ITP->Opc;
  2357. assert(ARM::KILL != Opc && "Invalid table entry");
  2358. unsigned hasS = ITP->hasS;
  2359. ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;
  2360. assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&
  2361. "only MOVsi has shift operand addressing mode");
  2362. unsigned Imm = ITP->Imm;
  2363. // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
  2364. bool setsCPSR = &ARM::tGPRRegClass == RC;
  2365. unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi;
  2366. unsigned ResultReg;
  2367. // MOVsi encodes shift and immediate in shift operand addressing mode.
  2368. // The following condition has the same value when emitting two
  2369. // instruction sequences: both are shifts.
  2370. bool ImmIsSO = (Shift != ARM_AM::no_shift);
  2371. // Either one or two instructions are emitted.
  2372. // They're always of the form:
  2373. // dst = in OP imm
  2374. // CPSR is set only by 16-bit Thumb instructions.
  2375. // Predicate, if any, is AL.
  2376. // S bit, if available, is always 0.
  2377. // When two are emitted the first's result will feed as the second's input,
  2378. // that value is then dead.
  2379. unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;
  2380. for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {
  2381. ResultReg = createResultReg(RC);
  2382. bool isLsl = (0 == Instr) && !isSingleInstr;
  2383. unsigned Opcode = isLsl ? LSLOpc : Opc;
  2384. ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift;
  2385. unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm;
  2386. bool isKill = 1 == Instr;
  2387. MachineInstrBuilder MIB = BuildMI(
  2388. *FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opcode), ResultReg);
  2389. if (setsCPSR)
  2390. MIB.addReg(ARM::CPSR, RegState::Define);
  2391. SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
  2392. MIB.addReg(SrcReg, isKill * RegState::Kill)
  2393. .addImm(ImmEnc)
  2394. .add(predOps(ARMCC::AL));
  2395. if (hasS)
  2396. MIB.add(condCodeOp());
  2397. // Second instruction consumes the first's result.
  2398. SrcReg = ResultReg;
  2399. }
  2400. return ResultReg;
  2401. }
  2402. bool ARMFastISel::SelectIntExt(const Instruction *I) {
  2403. // On ARM, in general, integer casts don't involve legal types; this code
  2404. // handles promotable integers.
  2405. Type *DestTy = I->getType();
  2406. Value *Src = I->getOperand(0);
  2407. Type *SrcTy = Src->getType();
  2408. bool isZExt = isa<ZExtInst>(I);
  2409. Register SrcReg = getRegForValue(Src);
  2410. if (!SrcReg) return false;
  2411. EVT SrcEVT, DestEVT;
  2412. SrcEVT = TLI.getValueType(DL, SrcTy, true);
  2413. DestEVT = TLI.getValueType(DL, DestTy, true);
  2414. if (!SrcEVT.isSimple()) return false;
  2415. if (!DestEVT.isSimple()) return false;
  2416. MVT SrcVT = SrcEVT.getSimpleVT();
  2417. MVT DestVT = DestEVT.getSimpleVT();
  2418. unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
  2419. if (ResultReg == 0) return false;
  2420. updateValueMap(I, ResultReg);
  2421. return true;
  2422. }
  2423. bool ARMFastISel::SelectShift(const Instruction *I,
  2424. ARM_AM::ShiftOpc ShiftTy) {
  2425. // We handle thumb2 mode by target independent selector
  2426. // or SelectionDAG ISel.
  2427. if (isThumb2)
  2428. return false;
  2429. // Only handle i32 now.
  2430. EVT DestVT = TLI.getValueType(DL, I->getType(), true);
  2431. if (DestVT != MVT::i32)
  2432. return false;
  2433. unsigned Opc = ARM::MOVsr;
  2434. unsigned ShiftImm;
  2435. Value *Src2Value = I->getOperand(1);
  2436. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
  2437. ShiftImm = CI->getZExtValue();
  2438. // Fall back to selection DAG isel if the shift amount
  2439. // is zero or greater than the width of the value type.
  2440. if (ShiftImm == 0 || ShiftImm >=32)
  2441. return false;
  2442. Opc = ARM::MOVsi;
  2443. }
  2444. Value *Src1Value = I->getOperand(0);
  2445. Register Reg1 = getRegForValue(Src1Value);
  2446. if (Reg1 == 0) return false;
  2447. unsigned Reg2 = 0;
  2448. if (Opc == ARM::MOVsr) {
  2449. Reg2 = getRegForValue(Src2Value);
  2450. if (Reg2 == 0) return false;
  2451. }
  2452. Register ResultReg = createResultReg(&ARM::GPRnopcRegClass);
  2453. if(ResultReg == 0) return false;
  2454. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  2455. TII.get(Opc), ResultReg)
  2456. .addReg(Reg1);
  2457. if (Opc == ARM::MOVsi)
  2458. MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
  2459. else if (Opc == ARM::MOVsr) {
  2460. MIB.addReg(Reg2);
  2461. MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
  2462. }
  2463. AddOptionalDefs(MIB);
  2464. updateValueMap(I, ResultReg);
  2465. return true;
  2466. }
  2467. // TODO: SoftFP support.
  2468. bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
  2469. switch (I->getOpcode()) {
  2470. case Instruction::Load:
  2471. return SelectLoad(I);
  2472. case Instruction::Store:
  2473. return SelectStore(I);
  2474. case Instruction::Br:
  2475. return SelectBranch(I);
  2476. case Instruction::IndirectBr:
  2477. return SelectIndirectBr(I);
  2478. case Instruction::ICmp:
  2479. case Instruction::FCmp:
  2480. return SelectCmp(I);
  2481. case Instruction::FPExt:
  2482. return SelectFPExt(I);
  2483. case Instruction::FPTrunc:
  2484. return SelectFPTrunc(I);
  2485. case Instruction::SIToFP:
  2486. return SelectIToFP(I, /*isSigned*/ true);
  2487. case Instruction::UIToFP:
  2488. return SelectIToFP(I, /*isSigned*/ false);
  2489. case Instruction::FPToSI:
  2490. return SelectFPToI(I, /*isSigned*/ true);
  2491. case Instruction::FPToUI:
  2492. return SelectFPToI(I, /*isSigned*/ false);
  2493. case Instruction::Add:
  2494. return SelectBinaryIntOp(I, ISD::ADD);
  2495. case Instruction::Or:
  2496. return SelectBinaryIntOp(I, ISD::OR);
  2497. case Instruction::Sub:
  2498. return SelectBinaryIntOp(I, ISD::SUB);
  2499. case Instruction::FAdd:
  2500. return SelectBinaryFPOp(I, ISD::FADD);
  2501. case Instruction::FSub:
  2502. return SelectBinaryFPOp(I, ISD::FSUB);
  2503. case Instruction::FMul:
  2504. return SelectBinaryFPOp(I, ISD::FMUL);
  2505. case Instruction::SDiv:
  2506. return SelectDiv(I, /*isSigned*/ true);
  2507. case Instruction::UDiv:
  2508. return SelectDiv(I, /*isSigned*/ false);
  2509. case Instruction::SRem:
  2510. return SelectRem(I, /*isSigned*/ true);
  2511. case Instruction::URem:
  2512. return SelectRem(I, /*isSigned*/ false);
  2513. case Instruction::Call:
  2514. if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
  2515. return SelectIntrinsicCall(*II);
  2516. return SelectCall(I);
  2517. case Instruction::Select:
  2518. return SelectSelect(I);
  2519. case Instruction::Ret:
  2520. return SelectRet(I);
  2521. case Instruction::Trunc:
  2522. return SelectTrunc(I);
  2523. case Instruction::ZExt:
  2524. case Instruction::SExt:
  2525. return SelectIntExt(I);
  2526. case Instruction::Shl:
  2527. return SelectShift(I, ARM_AM::lsl);
  2528. case Instruction::LShr:
  2529. return SelectShift(I, ARM_AM::lsr);
  2530. case Instruction::AShr:
  2531. return SelectShift(I, ARM_AM::asr);
  2532. default: break;
  2533. }
  2534. return false;
  2535. }
  2536. // This table describes sign- and zero-extend instructions which can be
  2537. // folded into a preceding load. All of these extends have an immediate
  2538. // (sometimes a mask and sometimes a shift) that's applied after
  2539. // extension.
  2540. static const struct FoldableLoadExtendsStruct {
  2541. uint16_t Opc[2]; // ARM, Thumb.
  2542. uint8_t ExpectedImm;
  2543. uint8_t isZExt : 1;
  2544. uint8_t ExpectedVT : 7;
  2545. } FoldableLoadExtends[] = {
  2546. { { ARM::SXTH, ARM::t2SXTH }, 0, 0, MVT::i16 },
  2547. { { ARM::UXTH, ARM::t2UXTH }, 0, 1, MVT::i16 },
  2548. { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8 },
  2549. { { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 },
  2550. { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 }
  2551. };
  2552. /// The specified machine instr operand is a vreg, and that
  2553. /// vreg is being provided by the specified load instruction. If possible,
  2554. /// try to fold the load as an operand to the instruction, returning true if
  2555. /// successful.
  2556. bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
  2557. const LoadInst *LI) {
  2558. // Verify we have a legal type before going any further.
  2559. MVT VT;
  2560. if (!isLoadTypeLegal(LI->getType(), VT))
  2561. return false;
  2562. // Combine load followed by zero- or sign-extend.
  2563. // ldrb r1, [r0] ldrb r1, [r0]
  2564. // uxtb r2, r1 =>
  2565. // mov r3, r2 mov r3, r1
  2566. if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm())
  2567. return false;
  2568. const uint64_t Imm = MI->getOperand(2).getImm();
  2569. bool Found = false;
  2570. bool isZExt;
  2571. for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) {
  2572. if (FLE.Opc[isThumb2] == MI->getOpcode() &&
  2573. (uint64_t)FLE.ExpectedImm == Imm &&
  2574. MVT((MVT::SimpleValueType)FLE.ExpectedVT) == VT) {
  2575. Found = true;
  2576. isZExt = FLE.isZExt;
  2577. }
  2578. }
  2579. if (!Found) return false;
  2580. // See if we can handle this address.
  2581. Address Addr;
  2582. if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
  2583. Register ResultReg = MI->getOperand(0).getReg();
  2584. if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlign(), isZExt, false))
  2585. return false;
  2586. MachineBasicBlock::iterator I(MI);
  2587. removeDeadCode(I, std::next(I));
  2588. return true;
  2589. }
  2590. unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, MVT VT) {
  2591. bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
  2592. LLVMContext *Context = &MF->getFunction().getContext();
  2593. unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
  2594. unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
  2595. ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
  2596. GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
  2597. UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
  2598. /*AddCurrentAddress=*/UseGOT_PREL);
  2599. Align ConstAlign =
  2600. MF->getDataLayout().getPrefTypeAlign(Type::getInt32PtrTy(*Context));
  2601. unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
  2602. MachineMemOperand *CPMMO =
  2603. MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
  2604. MachineMemOperand::MOLoad, 4, Align(4));
  2605. Register TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
  2606. unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
  2607. MachineInstrBuilder MIB =
  2608. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), TempReg)
  2609. .addConstantPoolIndex(Idx)
  2610. .addMemOperand(CPMMO);
  2611. if (Opc == ARM::LDRcp)
  2612. MIB.addImm(0);
  2613. MIB.add(predOps(ARMCC::AL));
  2614. // Fix the address by adding pc.
  2615. Register DestReg = createResultReg(TLI.getRegClassFor(VT));
  2616. Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR
  2617. : ARM::PICADD;
  2618. DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0);
  2619. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), DestReg)
  2620. .addReg(TempReg)
  2621. .addImm(ARMPCLabelIndex);
  2622. if (!Subtarget->isThumb())
  2623. MIB.add(predOps(ARMCC::AL));
  2624. if (UseGOT_PREL && Subtarget->isThumb()) {
  2625. Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));
  2626. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  2627. TII.get(ARM::t2LDRi12), NewDestReg)
  2628. .addReg(DestReg)
  2629. .addImm(0);
  2630. DestReg = NewDestReg;
  2631. AddOptionalDefs(MIB);
  2632. }
  2633. return DestReg;
  2634. }
  2635. bool ARMFastISel::fastLowerArguments() {
  2636. if (!FuncInfo.CanLowerReturn)
  2637. return false;
  2638. const Function *F = FuncInfo.Fn;
  2639. if (F->isVarArg())
  2640. return false;
  2641. CallingConv::ID CC = F->getCallingConv();
  2642. switch (CC) {
  2643. default:
  2644. return false;
  2645. case CallingConv::Fast:
  2646. case CallingConv::C:
  2647. case CallingConv::ARM_AAPCS_VFP:
  2648. case CallingConv::ARM_AAPCS:
  2649. case CallingConv::ARM_APCS:
  2650. case CallingConv::Swift:
  2651. case CallingConv::SwiftTail:
  2652. break;
  2653. }
  2654. // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
  2655. // which are passed in r0 - r3.
  2656. for (const Argument &Arg : F->args()) {
  2657. if (Arg.getArgNo() >= 4)
  2658. return false;
  2659. if (Arg.hasAttribute(Attribute::InReg) ||
  2660. Arg.hasAttribute(Attribute::StructRet) ||
  2661. Arg.hasAttribute(Attribute::SwiftSelf) ||
  2662. Arg.hasAttribute(Attribute::SwiftError) ||
  2663. Arg.hasAttribute(Attribute::ByVal))
  2664. return false;
  2665. Type *ArgTy = Arg.getType();
  2666. if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
  2667. return false;
  2668. EVT ArgVT = TLI.getValueType(DL, ArgTy);
  2669. if (!ArgVT.isSimple()) return false;
  2670. switch (ArgVT.getSimpleVT().SimpleTy) {
  2671. case MVT::i8:
  2672. case MVT::i16:
  2673. case MVT::i32:
  2674. break;
  2675. default:
  2676. return false;
  2677. }
  2678. }
  2679. static const MCPhysReg GPRArgRegs[] = {
  2680. ARM::R0, ARM::R1, ARM::R2, ARM::R3
  2681. };
  2682. const TargetRegisterClass *RC = &ARM::rGPRRegClass;
  2683. for (const Argument &Arg : F->args()) {
  2684. unsigned ArgNo = Arg.getArgNo();
  2685. unsigned SrcReg = GPRArgRegs[ArgNo];
  2686. Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
  2687. // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
  2688. // Without this, EmitLiveInCopies may eliminate the livein if its only
  2689. // use is a bitcast (which isn't turned into an instruction).
  2690. Register ResultReg = createResultReg(RC);
  2691. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
  2692. TII.get(TargetOpcode::COPY),
  2693. ResultReg).addReg(DstReg, getKillRegState(true));
  2694. updateValueMap(&Arg, ResultReg);
  2695. }
  2696. return true;
  2697. }
  2698. namespace llvm {
  2699. FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
  2700. const TargetLibraryInfo *libInfo) {
  2701. if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())
  2702. return new ARMFastISel(funcInfo, libInfo);
  2703. return nullptr;
  2704. }
  2705. } // end namespace llvm