ARMFastISel.cpp 106 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084
  1. //===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the ARM-specific support for the FastISel class. Some
  10. // of the target-specific code is generated by tablegen in the file
  11. // ARMGenFastISel.inc, which is #included here.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "ARM.h"
  15. #include "ARMBaseInstrInfo.h"
  16. #include "ARMBaseRegisterInfo.h"
  17. #include "ARMCallingConv.h"
  18. #include "ARMConstantPoolValue.h"
  19. #include "ARMISelLowering.h"
  20. #include "ARMMachineFunctionInfo.h"
  21. #include "ARMSubtarget.h"
  22. #include "MCTargetDesc/ARMAddressingModes.h"
  23. #include "MCTargetDesc/ARMBaseInfo.h"
  24. #include "Utils/ARMBaseInfo.h"
  25. #include "llvm/ADT/APFloat.h"
  26. #include "llvm/ADT/APInt.h"
  27. #include "llvm/ADT/DenseMap.h"
  28. #include "llvm/ADT/SmallVector.h"
  29. #include "llvm/CodeGen/CallingConvLower.h"
  30. #include "llvm/CodeGen/FastISel.h"
  31. #include "llvm/CodeGen/FunctionLoweringInfo.h"
  32. #include "llvm/CodeGen/ISDOpcodes.h"
  33. #include "llvm/CodeGen/MachineBasicBlock.h"
  34. #include "llvm/CodeGen/MachineConstantPool.h"
  35. #include "llvm/CodeGen/MachineFrameInfo.h"
  36. #include "llvm/CodeGen/MachineFunction.h"
  37. #include "llvm/CodeGen/MachineInstr.h"
  38. #include "llvm/CodeGen/MachineInstrBuilder.h"
  39. #include "llvm/CodeGen/MachineMemOperand.h"
  40. #include "llvm/CodeGen/MachineOperand.h"
  41. #include "llvm/CodeGen/MachineRegisterInfo.h"
  42. #include "llvm/CodeGen/RuntimeLibcalls.h"
  43. #include "llvm/CodeGen/TargetInstrInfo.h"
  44. #include "llvm/CodeGen/TargetLowering.h"
  45. #include "llvm/CodeGen/TargetOpcodes.h"
  46. #include "llvm/CodeGen/TargetRegisterInfo.h"
  47. #include "llvm/CodeGen/ValueTypes.h"
  48. #include "llvm/IR/Argument.h"
  49. #include "llvm/IR/Attributes.h"
  50. #include "llvm/IR/CallingConv.h"
  51. #include "llvm/IR/Constant.h"
  52. #include "llvm/IR/Constants.h"
  53. #include "llvm/IR/DataLayout.h"
  54. #include "llvm/IR/DerivedTypes.h"
  55. #include "llvm/IR/Function.h"
  56. #include "llvm/IR/GetElementPtrTypeIterator.h"
  57. #include "llvm/IR/GlobalValue.h"
  58. #include "llvm/IR/GlobalVariable.h"
  59. #include "llvm/IR/InstrTypes.h"
  60. #include "llvm/IR/Instruction.h"
  61. #include "llvm/IR/Instructions.h"
  62. #include "llvm/IR/IntrinsicInst.h"
  63. #include "llvm/IR/Intrinsics.h"
  64. #include "llvm/IR/Module.h"
  65. #include "llvm/IR/Operator.h"
  66. #include "llvm/IR/Type.h"
  67. #include "llvm/IR/User.h"
  68. #include "llvm/IR/Value.h"
  69. #include "llvm/MC/MCInstrDesc.h"
  70. #include "llvm/MC/MCRegisterInfo.h"
  71. #include "llvm/Support/Casting.h"
  72. #include "llvm/Support/Compiler.h"
  73. #include "llvm/Support/ErrorHandling.h"
  74. #include "llvm/Support/MachineValueType.h"
  75. #include "llvm/Support/MathExtras.h"
  76. #include "llvm/Target/TargetMachine.h"
  77. #include "llvm/Target/TargetOptions.h"
  78. #include <cassert>
  79. #include <cstdint>
  80. #include <utility>
  81. using namespace llvm;
  82. namespace {
  83. // All possible address modes, plus some.
  84. struct Address {
  85. enum {
  86. RegBase,
  87. FrameIndexBase
  88. } BaseType = RegBase;
  89. union {
  90. unsigned Reg;
  91. int FI;
  92. } Base;
  93. int Offset = 0;
  94. // Innocuous defaults for our address.
  95. Address() {
  96. Base.Reg = 0;
  97. }
  98. };
  99. class ARMFastISel final : public FastISel {
  100. /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  101. /// make the right decision when generating code for different targets.
  102. const ARMSubtarget *Subtarget;
  103. Module &M;
  104. const TargetMachine &TM;
  105. const TargetInstrInfo &TII;
  106. const TargetLowering &TLI;
  107. ARMFunctionInfo *AFI;
  108. // Convenience variables to avoid some queries.
  109. bool isThumb2;
  110. LLVMContext *Context;
  111. public:
  112. explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
  113. const TargetLibraryInfo *libInfo)
  114. : FastISel(funcInfo, libInfo),
  115. Subtarget(
  116. &static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget())),
  117. M(const_cast<Module &>(*funcInfo.Fn->getParent())),
  118. TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()),
  119. TLI(*Subtarget->getTargetLowering()) {
  120. AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
  121. isThumb2 = AFI->isThumbFunction();
  122. Context = &funcInfo.Fn->getContext();
  123. }
  124. private:
  125. // Code from FastISel.cpp.
  126. unsigned fastEmitInst_r(unsigned MachineInstOpcode,
  127. const TargetRegisterClass *RC, unsigned Op0);
  128. unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
  129. const TargetRegisterClass *RC,
  130. unsigned Op0, unsigned Op1);
  131. unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
  132. const TargetRegisterClass *RC,
  133. unsigned Op0, uint64_t Imm);
  134. unsigned fastEmitInst_i(unsigned MachineInstOpcode,
  135. const TargetRegisterClass *RC,
  136. uint64_t Imm);
  137. // Backend specific FastISel code.
  138. bool fastSelectInstruction(const Instruction *I) override;
  139. unsigned fastMaterializeConstant(const Constant *C) override;
  140. unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
  141. bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
  142. const LoadInst *LI) override;
  143. bool fastLowerArguments() override;
  144. #include "ARMGenFastISel.inc"
  145. // Instruction selection routines.
  146. bool SelectLoad(const Instruction *I);
  147. bool SelectStore(const Instruction *I);
  148. bool SelectBranch(const Instruction *I);
  149. bool SelectIndirectBr(const Instruction *I);
  150. bool SelectCmp(const Instruction *I);
  151. bool SelectFPExt(const Instruction *I);
  152. bool SelectFPTrunc(const Instruction *I);
  153. bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
  154. bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
  155. bool SelectIToFP(const Instruction *I, bool isSigned);
  156. bool SelectFPToI(const Instruction *I, bool isSigned);
  157. bool SelectDiv(const Instruction *I, bool isSigned);
  158. bool SelectRem(const Instruction *I, bool isSigned);
  159. bool SelectCall(const Instruction *I, const char *IntrMemName);
  160. bool SelectIntrinsicCall(const IntrinsicInst &I);
  161. bool SelectSelect(const Instruction *I);
  162. bool SelectRet(const Instruction *I);
  163. bool SelectTrunc(const Instruction *I);
  164. bool SelectIntExt(const Instruction *I);
  165. bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
  166. // Utility routines.
  167. bool isPositionIndependent() const;
  168. bool isTypeLegal(Type *Ty, MVT &VT);
  169. bool isLoadTypeLegal(Type *Ty, MVT &VT);
  170. bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
  171. bool isZExt);
  172. bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
  173. unsigned Alignment = 0, bool isZExt = true,
  174. bool allocReg = true);
  175. bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
  176. unsigned Alignment = 0);
  177. bool ARMComputeAddress(const Value *Obj, Address &Addr);
  178. void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
  179. bool ARMIsMemCpySmall(uint64_t Len);
  180. bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
  181. unsigned Alignment);
  182. unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
  183. unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
  184. unsigned ARMMaterializeInt(const Constant *C, MVT VT);
  185. unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
  186. unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
  187. unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
  188. unsigned ARMSelectCallOp(bool UseReg);
  189. unsigned ARMLowerPICELF(const GlobalValue *GV, MVT VT);
  190. const TargetLowering *getTargetLowering() { return &TLI; }
  191. // Call handling routines.
  192. CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
  193. bool Return,
  194. bool isVarArg);
  195. bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
  196. SmallVectorImpl<Register> &ArgRegs,
  197. SmallVectorImpl<MVT> &ArgVTs,
  198. SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
  199. SmallVectorImpl<Register> &RegArgs,
  200. CallingConv::ID CC,
  201. unsigned &NumBytes,
  202. bool isVarArg);
  203. unsigned getLibcallReg(const Twine &Name);
  204. bool FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
  205. const Instruction *I, CallingConv::ID CC,
  206. unsigned &NumBytes, bool isVarArg);
  207. bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
  208. // OptionalDef handling routines.
  209. bool isARMNEONPred(const MachineInstr *MI);
  210. bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
  211. const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
  212. void AddLoadStoreOperands(MVT VT, Address &Addr,
  213. const MachineInstrBuilder &MIB,
  214. MachineMemOperand::Flags Flags, bool useAM3);
  215. };
  216. } // end anonymous namespace
  217. // DefinesOptionalPredicate - This is different from DefinesPredicate in that
  218. // we don't care about implicit defs here, just places we'll need to add a
  219. // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
  220. bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
  221. if (!MI->hasOptionalDef())
  222. return false;
  223. // Look to see if our OptionalDef is defining CPSR or CCR.
  224. for (const MachineOperand &MO : MI->operands()) {
  225. if (!MO.isReg() || !MO.isDef()) continue;
  226. if (MO.getReg() == ARM::CPSR)
  227. *CPSR = true;
  228. }
  229. return true;
  230. }
  231. bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
  232. const MCInstrDesc &MCID = MI->getDesc();
  233. // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
  234. if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
  235. AFI->isThumb2Function())
  236. return MI->isPredicable();
  237. for (const MCOperandInfo &opInfo : MCID.operands())
  238. if (opInfo.isPredicate())
  239. return true;
  240. return false;
  241. }
  242. // If the machine is predicable go ahead and add the predicate operands, if
  243. // it needs default CC operands add those.
  244. // TODO: If we want to support thumb1 then we'll need to deal with optional
  245. // CPSR defs that need to be added before the remaining operands. See s_cc_out
  246. // for descriptions why.
  247. const MachineInstrBuilder &
  248. ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
  249. MachineInstr *MI = &*MIB;
  250. // Do we use a predicate? or...
  251. // Are we NEON in ARM mode and have a predicate operand? If so, I know
  252. // we're not predicable but add it anyways.
  253. if (isARMNEONPred(MI))
  254. MIB.add(predOps(ARMCC::AL));
  255. // Do we optionally set a predicate? Preds is size > 0 iff the predicate
  256. // defines CPSR. All other OptionalDefines in ARM are the CCR register.
  257. bool CPSR = false;
  258. if (DefinesOptionalPredicate(MI, &CPSR))
  259. MIB.add(CPSR ? t1CondCodeOp() : condCodeOp());
  260. return MIB;
  261. }
  262. unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
  263. const TargetRegisterClass *RC,
  264. unsigned Op0) {
  265. Register ResultReg = createResultReg(RC);
  266. const MCInstrDesc &II = TII.get(MachineInstOpcode);
  267. // Make sure the input operand is sufficiently constrained to be legal
  268. // for this instruction.
  269. Op0 = constrainOperandRegClass(II, Op0, 1);
  270. if (II.getNumDefs() >= 1) {
  271. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
  272. ResultReg).addReg(Op0));
  273. } else {
  274. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
  275. .addReg(Op0));
  276. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  277. TII.get(TargetOpcode::COPY), ResultReg)
  278. .addReg(II.ImplicitDefs[0]));
  279. }
  280. return ResultReg;
  281. }
  282. unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
  283. const TargetRegisterClass *RC,
  284. unsigned Op0, unsigned Op1) {
  285. Register ResultReg = createResultReg(RC);
  286. const MCInstrDesc &II = TII.get(MachineInstOpcode);
  287. // Make sure the input operands are sufficiently constrained to be legal
  288. // for this instruction.
  289. Op0 = constrainOperandRegClass(II, Op0, 1);
  290. Op1 = constrainOperandRegClass(II, Op1, 2);
  291. if (II.getNumDefs() >= 1) {
  292. AddOptionalDefs(
  293. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
  294. .addReg(Op0)
  295. .addReg(Op1));
  296. } else {
  297. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
  298. .addReg(Op0)
  299. .addReg(Op1));
  300. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  301. TII.get(TargetOpcode::COPY), ResultReg)
  302. .addReg(II.ImplicitDefs[0]));
  303. }
  304. return ResultReg;
  305. }
  306. unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
  307. const TargetRegisterClass *RC,
  308. unsigned Op0, uint64_t Imm) {
  309. Register ResultReg = createResultReg(RC);
  310. const MCInstrDesc &II = TII.get(MachineInstOpcode);
  311. // Make sure the input operand is sufficiently constrained to be legal
  312. // for this instruction.
  313. Op0 = constrainOperandRegClass(II, Op0, 1);
  314. if (II.getNumDefs() >= 1) {
  315. AddOptionalDefs(
  316. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
  317. .addReg(Op0)
  318. .addImm(Imm));
  319. } else {
  320. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
  321. .addReg(Op0)
  322. .addImm(Imm));
  323. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  324. TII.get(TargetOpcode::COPY), ResultReg)
  325. .addReg(II.ImplicitDefs[0]));
  326. }
  327. return ResultReg;
  328. }
  329. unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
  330. const TargetRegisterClass *RC,
  331. uint64_t Imm) {
  332. Register ResultReg = createResultReg(RC);
  333. const MCInstrDesc &II = TII.get(MachineInstOpcode);
  334. if (II.getNumDefs() >= 1) {
  335. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
  336. ResultReg).addImm(Imm));
  337. } else {
  338. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
  339. .addImm(Imm));
  340. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  341. TII.get(TargetOpcode::COPY), ResultReg)
  342. .addReg(II.ImplicitDefs[0]));
  343. }
  344. return ResultReg;
  345. }
  346. // TODO: Don't worry about 64-bit now, but when this is fixed remove the
  347. // checks from the various callers.
  348. unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
  349. if (VT == MVT::f64) return 0;
  350. Register MoveReg = createResultReg(TLI.getRegClassFor(VT));
  351. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  352. TII.get(ARM::VMOVSR), MoveReg)
  353. .addReg(SrcReg));
  354. return MoveReg;
  355. }
  356. unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
  357. if (VT == MVT::i64) return 0;
  358. Register MoveReg = createResultReg(TLI.getRegClassFor(VT));
  359. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  360. TII.get(ARM::VMOVRS), MoveReg)
  361. .addReg(SrcReg));
  362. return MoveReg;
  363. }
  364. // For double width floating point we need to materialize two constants
  365. // (the high and the low) into integer registers then use a move to get
  366. // the combined constant into an FP reg.
  367. unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
  368. const APFloat Val = CFP->getValueAPF();
  369. bool is64bit = VT == MVT::f64;
  370. // This checks to see if we can use VFP3 instructions to materialize
  371. // a constant, otherwise we have to go through the constant pool.
  372. if (TLI.isFPImmLegal(Val, VT)) {
  373. int Imm;
  374. unsigned Opc;
  375. if (is64bit) {
  376. Imm = ARM_AM::getFP64Imm(Val);
  377. Opc = ARM::FCONSTD;
  378. } else {
  379. Imm = ARM_AM::getFP32Imm(Val);
  380. Opc = ARM::FCONSTS;
  381. }
  382. Register DestReg = createResultReg(TLI.getRegClassFor(VT));
  383. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  384. TII.get(Opc), DestReg).addImm(Imm));
  385. return DestReg;
  386. }
  387. // Require VFP2 for loading fp constants.
  388. if (!Subtarget->hasVFP2Base()) return false;
  389. // MachineConstantPool wants an explicit alignment.
  390. Align Alignment = DL.getPrefTypeAlign(CFP->getType());
  391. unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
  392. Register DestReg = createResultReg(TLI.getRegClassFor(VT));
  393. unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
  394. // The extra reg is for addrmode5.
  395. AddOptionalDefs(
  396. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
  397. .addConstantPoolIndex(Idx)
  398. .addReg(0));
  399. return DestReg;
  400. }
  401. unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
  402. if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
  403. return 0;
  404. // If we can do this in a single instruction without a constant pool entry
  405. // do so now.
  406. const ConstantInt *CI = cast<ConstantInt>(C);
  407. if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
  408. unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
  409. const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
  410. &ARM::GPRRegClass;
  411. Register ImmReg = createResultReg(RC);
  412. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  413. TII.get(Opc), ImmReg)
  414. .addImm(CI->getZExtValue()));
  415. return ImmReg;
  416. }
  417. // Use MVN to emit negative constants.
  418. if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
  419. unsigned Imm = (unsigned)~(CI->getSExtValue());
  420. bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
  421. (ARM_AM::getSOImmVal(Imm) != -1);
  422. if (UseImm) {
  423. unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
  424. const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
  425. &ARM::GPRRegClass;
  426. Register ImmReg = createResultReg(RC);
  427. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  428. TII.get(Opc), ImmReg)
  429. .addImm(Imm));
  430. return ImmReg;
  431. }
  432. }
  433. unsigned ResultReg = 0;
  434. if (Subtarget->useMovt())
  435. ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
  436. if (ResultReg)
  437. return ResultReg;
  438. // Load from constant pool. For now 32-bit only.
  439. if (VT != MVT::i32)
  440. return 0;
  441. // MachineConstantPool wants an explicit alignment.
  442. Align Alignment = DL.getPrefTypeAlign(C->getType());
  443. unsigned Idx = MCP.getConstantPoolIndex(C, Alignment);
  444. ResultReg = createResultReg(TLI.getRegClassFor(VT));
  445. if (isThumb2)
  446. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  447. TII.get(ARM::t2LDRpci), ResultReg)
  448. .addConstantPoolIndex(Idx));
  449. else {
  450. // The extra immediate is for addrmode2.
  451. ResultReg = constrainOperandRegClass(TII.get(ARM::LDRcp), ResultReg, 0);
  452. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  453. TII.get(ARM::LDRcp), ResultReg)
  454. .addConstantPoolIndex(Idx)
  455. .addImm(0));
  456. }
  457. return ResultReg;
  458. }
  459. bool ARMFastISel::isPositionIndependent() const {
  460. return TLI.isPositionIndependent();
  461. }
  462. unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
  463. // For now 32-bit only.
  464. if (VT != MVT::i32 || GV->isThreadLocal()) return 0;
  465. // ROPI/RWPI not currently supported.
  466. if (Subtarget->isROPI() || Subtarget->isRWPI())
  467. return 0;
  468. bool IsIndirect = Subtarget->isGVIndirectSymbol(GV);
  469. const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
  470. : &ARM::GPRRegClass;
  471. Register DestReg = createResultReg(RC);
  472. // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
  473. const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
  474. bool IsThreadLocal = GVar && GVar->isThreadLocal();
  475. if (!Subtarget->isTargetMachO() && IsThreadLocal) return 0;
  476. bool IsPositionIndependent = isPositionIndependent();
  477. // Use movw+movt when possible, it avoids constant pool entries.
  478. // Non-darwin targets only support static movt relocations in FastISel.
  479. if (Subtarget->useMovt() &&
  480. (Subtarget->isTargetMachO() || !IsPositionIndependent)) {
  481. unsigned Opc;
  482. unsigned char TF = 0;
  483. if (Subtarget->isTargetMachO())
  484. TF = ARMII::MO_NONLAZY;
  485. if (IsPositionIndependent)
  486. Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
  487. else
  488. Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
  489. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  490. TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF));
  491. } else {
  492. // MachineConstantPool wants an explicit alignment.
  493. Align Alignment = DL.getPrefTypeAlign(GV->getType());
  494. if (Subtarget->isTargetELF() && IsPositionIndependent)
  495. return ARMLowerPICELF(GV, VT);
  496. // Grab index.
  497. unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
  498. unsigned Id = AFI->createPICLabelUId();
  499. ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
  500. ARMCP::CPValue,
  501. PCAdj);
  502. unsigned Idx = MCP.getConstantPoolIndex(CPV, Alignment);
  503. // Load value.
  504. MachineInstrBuilder MIB;
  505. if (isThumb2) {
  506. unsigned Opc = IsPositionIndependent ? ARM::t2LDRpci_pic : ARM::t2LDRpci;
  507. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
  508. DestReg).addConstantPoolIndex(Idx);
  509. if (IsPositionIndependent)
  510. MIB.addImm(Id);
  511. AddOptionalDefs(MIB);
  512. } else {
  513. // The extra immediate is for addrmode2.
  514. DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
  515. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  516. TII.get(ARM::LDRcp), DestReg)
  517. .addConstantPoolIndex(Idx)
  518. .addImm(0);
  519. AddOptionalDefs(MIB);
  520. if (IsPositionIndependent) {
  521. unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
  522. Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));
  523. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
  524. DbgLoc, TII.get(Opc), NewDestReg)
  525. .addReg(DestReg)
  526. .addImm(Id);
  527. AddOptionalDefs(MIB);
  528. return NewDestReg;
  529. }
  530. }
  531. }
  532. if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) ||
  533. (Subtarget->isTargetMachO() && IsIndirect) ||
  534. Subtarget->genLongCalls()) {
  535. MachineInstrBuilder MIB;
  536. Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));
  537. if (isThumb2)
  538. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  539. TII.get(ARM::t2LDRi12), NewDestReg)
  540. .addReg(DestReg)
  541. .addImm(0);
  542. else
  543. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  544. TII.get(ARM::LDRi12), NewDestReg)
  545. .addReg(DestReg)
  546. .addImm(0);
  547. DestReg = NewDestReg;
  548. AddOptionalDefs(MIB);
  549. }
  550. return DestReg;
  551. }
  552. unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {
  553. EVT CEVT = TLI.getValueType(DL, C->getType(), true);
  554. // Only handle simple types.
  555. if (!CEVT.isSimple()) return 0;
  556. MVT VT = CEVT.getSimpleVT();
  557. if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
  558. return ARMMaterializeFP(CFP, VT);
  559. else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
  560. return ARMMaterializeGV(GV, VT);
  561. else if (isa<ConstantInt>(C))
  562. return ARMMaterializeInt(C, VT);
  563. return 0;
  564. }
  565. // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
  566. unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
  567. // Don't handle dynamic allocas.
  568. if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
  569. MVT VT;
  570. if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
  571. DenseMap<const AllocaInst*, int>::iterator SI =
  572. FuncInfo.StaticAllocaMap.find(AI);
  573. // This will get lowered later into the correct offsets and registers
  574. // via rewriteXFrameIndex.
  575. if (SI != FuncInfo.StaticAllocaMap.end()) {
  576. unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
  577. const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
  578. Register ResultReg = createResultReg(RC);
  579. ResultReg = constrainOperandRegClass(TII.get(Opc), ResultReg, 0);
  580. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  581. TII.get(Opc), ResultReg)
  582. .addFrameIndex(SI->second)
  583. .addImm(0));
  584. return ResultReg;
  585. }
  586. return 0;
  587. }
  588. bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
  589. EVT evt = TLI.getValueType(DL, Ty, true);
  590. // Only handle simple types.
  591. if (evt == MVT::Other || !evt.isSimple()) return false;
  592. VT = evt.getSimpleVT();
  593. // Handle all legal types, i.e. a register that will directly hold this
  594. // value.
  595. return TLI.isTypeLegal(VT);
  596. }
  597. bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
  598. if (isTypeLegal(Ty, VT)) return true;
  599. // If this is a type than can be sign or zero-extended to a basic operation
  600. // go ahead and accept it now.
  601. if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
  602. return true;
  603. return false;
  604. }
  605. // Computes the address to get to an object.
  606. bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
  607. // Some boilerplate from the X86 FastISel.
  608. const User *U = nullptr;
  609. unsigned Opcode = Instruction::UserOp1;
  610. if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
  611. // Don't walk into other basic blocks unless the object is an alloca from
  612. // another block, otherwise it may not have a virtual register assigned.
  613. if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
  614. FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
  615. Opcode = I->getOpcode();
  616. U = I;
  617. }
  618. } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
  619. Opcode = C->getOpcode();
  620. U = C;
  621. }
  622. if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
  623. if (Ty->getAddressSpace() > 255)
  624. // Fast instruction selection doesn't support the special
  625. // address spaces.
  626. return false;
  627. switch (Opcode) {
  628. default:
  629. break;
  630. case Instruction::BitCast:
  631. // Look through bitcasts.
  632. return ARMComputeAddress(U->getOperand(0), Addr);
  633. case Instruction::IntToPtr:
  634. // Look past no-op inttoptrs.
  635. if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
  636. TLI.getPointerTy(DL))
  637. return ARMComputeAddress(U->getOperand(0), Addr);
  638. break;
  639. case Instruction::PtrToInt:
  640. // Look past no-op ptrtoints.
  641. if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
  642. return ARMComputeAddress(U->getOperand(0), Addr);
  643. break;
  644. case Instruction::GetElementPtr: {
  645. Address SavedAddr = Addr;
  646. int TmpOffset = Addr.Offset;
  647. // Iterate through the GEP folding the constants into offsets where
  648. // we can.
  649. gep_type_iterator GTI = gep_type_begin(U);
  650. for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
  651. i != e; ++i, ++GTI) {
  652. const Value *Op = *i;
  653. if (StructType *STy = GTI.getStructTypeOrNull()) {
  654. const StructLayout *SL = DL.getStructLayout(STy);
  655. unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
  656. TmpOffset += SL->getElementOffset(Idx);
  657. } else {
  658. uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
  659. while (true) {
  660. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
  661. // Constant-offset addressing.
  662. TmpOffset += CI->getSExtValue() * S;
  663. break;
  664. }
  665. if (canFoldAddIntoGEP(U, Op)) {
  666. // A compatible add with a constant operand. Fold the constant.
  667. ConstantInt *CI =
  668. cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
  669. TmpOffset += CI->getSExtValue() * S;
  670. // Iterate on the other operand.
  671. Op = cast<AddOperator>(Op)->getOperand(0);
  672. continue;
  673. }
  674. // Unsupported
  675. goto unsupported_gep;
  676. }
  677. }
  678. }
  679. // Try to grab the base operand now.
  680. Addr.Offset = TmpOffset;
  681. if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
  682. // We failed, restore everything and try the other options.
  683. Addr = SavedAddr;
  684. unsupported_gep:
  685. break;
  686. }
  687. case Instruction::Alloca: {
  688. const AllocaInst *AI = cast<AllocaInst>(Obj);
  689. DenseMap<const AllocaInst*, int>::iterator SI =
  690. FuncInfo.StaticAllocaMap.find(AI);
  691. if (SI != FuncInfo.StaticAllocaMap.end()) {
  692. Addr.BaseType = Address::FrameIndexBase;
  693. Addr.Base.FI = SI->second;
  694. return true;
  695. }
  696. break;
  697. }
  698. }
  699. // Try to get this in a register if nothing else has worked.
  700. if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
  701. return Addr.Base.Reg != 0;
  702. }
  703. void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
  704. bool needsLowering = false;
  705. switch (VT.SimpleTy) {
  706. default: llvm_unreachable("Unhandled load/store type!");
  707. case MVT::i1:
  708. case MVT::i8:
  709. case MVT::i16:
  710. case MVT::i32:
  711. if (!useAM3) {
  712. // Integer loads/stores handle 12-bit offsets.
  713. needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
  714. // Handle negative offsets.
  715. if (needsLowering && isThumb2)
  716. needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
  717. Addr.Offset > -256);
  718. } else {
  719. // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
  720. needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
  721. }
  722. break;
  723. case MVT::f32:
  724. case MVT::f64:
  725. // Floating point operands handle 8-bit offsets.
  726. needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
  727. break;
  728. }
  729. // If this is a stack pointer and the offset needs to be simplified then
  730. // put the alloca address into a register, set the base type back to
  731. // register and continue. This should almost never happen.
  732. if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
  733. const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
  734. : &ARM::GPRRegClass;
  735. Register ResultReg = createResultReg(RC);
  736. unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
  737. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  738. TII.get(Opc), ResultReg)
  739. .addFrameIndex(Addr.Base.FI)
  740. .addImm(0));
  741. Addr.Base.Reg = ResultReg;
  742. Addr.BaseType = Address::RegBase;
  743. }
  744. // Since the offset is too large for the load/store instruction
  745. // get the reg+offset into a register.
  746. if (needsLowering) {
  747. Addr.Base.Reg = fastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
  748. Addr.Offset, MVT::i32);
  749. Addr.Offset = 0;
  750. }
  751. }
  752. void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
  753. const MachineInstrBuilder &MIB,
  754. MachineMemOperand::Flags Flags,
  755. bool useAM3) {
  756. // addrmode5 output depends on the selection dag addressing dividing the
  757. // offset by 4 that it then later multiplies. Do this here as well.
  758. if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
  759. Addr.Offset /= 4;
  760. // Frame base works a bit differently. Handle it separately.
  761. if (Addr.BaseType == Address::FrameIndexBase) {
  762. int FI = Addr.Base.FI;
  763. int Offset = Addr.Offset;
  764. MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
  765. MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
  766. MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
  767. // Now add the rest of the operands.
  768. MIB.addFrameIndex(FI);
  769. // ARM halfword load/stores and signed byte loads need an additional
  770. // operand.
  771. if (useAM3) {
  772. int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
  773. MIB.addReg(0);
  774. MIB.addImm(Imm);
  775. } else {
  776. MIB.addImm(Addr.Offset);
  777. }
  778. MIB.addMemOperand(MMO);
  779. } else {
  780. // Now add the rest of the operands.
  781. MIB.addReg(Addr.Base.Reg);
  782. // ARM halfword load/stores and signed byte loads need an additional
  783. // operand.
  784. if (useAM3) {
  785. int Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
  786. MIB.addReg(0);
  787. MIB.addImm(Imm);
  788. } else {
  789. MIB.addImm(Addr.Offset);
  790. }
  791. }
  792. AddOptionalDefs(MIB);
  793. }
  794. bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
  795. unsigned Alignment, bool isZExt, bool allocReg) {
  796. unsigned Opc;
  797. bool useAM3 = false;
  798. bool needVMOV = false;
  799. const TargetRegisterClass *RC;
  800. switch (VT.SimpleTy) {
  801. // This is mostly going to be Neon/vector support.
  802. default: return false;
  803. case MVT::i1:
  804. case MVT::i8:
  805. if (isThumb2) {
  806. if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
  807. Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
  808. else
  809. Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
  810. } else {
  811. if (isZExt) {
  812. Opc = ARM::LDRBi12;
  813. } else {
  814. Opc = ARM::LDRSB;
  815. useAM3 = true;
  816. }
  817. }
  818. RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
  819. break;
  820. case MVT::i16:
  821. if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
  822. return false;
  823. if (isThumb2) {
  824. if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
  825. Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
  826. else
  827. Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
  828. } else {
  829. Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
  830. useAM3 = true;
  831. }
  832. RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
  833. break;
  834. case MVT::i32:
  835. if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
  836. return false;
  837. if (isThumb2) {
  838. if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
  839. Opc = ARM::t2LDRi8;
  840. else
  841. Opc = ARM::t2LDRi12;
  842. } else {
  843. Opc = ARM::LDRi12;
  844. }
  845. RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
  846. break;
  847. case MVT::f32:
  848. if (!Subtarget->hasVFP2Base()) return false;
  849. // Unaligned loads need special handling. Floats require word-alignment.
  850. if (Alignment && Alignment < 4) {
  851. needVMOV = true;
  852. VT = MVT::i32;
  853. Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
  854. RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
  855. } else {
  856. Opc = ARM::VLDRS;
  857. RC = TLI.getRegClassFor(VT);
  858. }
  859. break;
  860. case MVT::f64:
  861. // Can load and store double precision even without FeatureFP64
  862. if (!Subtarget->hasVFP2Base()) return false;
  863. // FIXME: Unaligned loads need special handling. Doublewords require
  864. // word-alignment.
  865. if (Alignment && Alignment < 4)
  866. return false;
  867. Opc = ARM::VLDRD;
  868. RC = TLI.getRegClassFor(VT);
  869. break;
  870. }
  871. // Simplify this down to something we can handle.
  872. ARMSimplifyAddress(Addr, VT, useAM3);
  873. // Create the base instruction, then add the operands.
  874. if (allocReg)
  875. ResultReg = createResultReg(RC);
  876. assert(ResultReg > 255 && "Expected an allocated virtual register.");
  877. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  878. TII.get(Opc), ResultReg);
  879. AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
  880. // If we had an unaligned load of a float we've converted it to an regular
  881. // load. Now we must move from the GRP to the FP register.
  882. if (needVMOV) {
  883. Register MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
  884. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  885. TII.get(ARM::VMOVSR), MoveReg)
  886. .addReg(ResultReg));
  887. ResultReg = MoveReg;
  888. }
  889. return true;
  890. }
  891. bool ARMFastISel::SelectLoad(const Instruction *I) {
  892. // Atomic loads need special handling.
  893. if (cast<LoadInst>(I)->isAtomic())
  894. return false;
  895. const Value *SV = I->getOperand(0);
  896. if (TLI.supportSwiftError()) {
  897. // Swifterror values can come from either a function parameter with
  898. // swifterror attribute or an alloca with swifterror attribute.
  899. if (const Argument *Arg = dyn_cast<Argument>(SV)) {
  900. if (Arg->hasSwiftErrorAttr())
  901. return false;
  902. }
  903. if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
  904. if (Alloca->isSwiftError())
  905. return false;
  906. }
  907. }
  908. // Verify we have a legal type before going any further.
  909. MVT VT;
  910. if (!isLoadTypeLegal(I->getType(), VT))
  911. return false;
  912. // See if we can handle this address.
  913. Address Addr;
  914. if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
  915. Register ResultReg;
  916. if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
  917. return false;
  918. updateValueMap(I, ResultReg);
  919. return true;
  920. }
  921. bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
  922. unsigned Alignment) {
  923. unsigned StrOpc;
  924. bool useAM3 = false;
  925. switch (VT.SimpleTy) {
  926. // This is mostly going to be Neon/vector support.
  927. default: return false;
  928. case MVT::i1: {
  929. Register Res = createResultReg(isThumb2 ? &ARM::tGPRRegClass
  930. : &ARM::GPRRegClass);
  931. unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
  932. SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
  933. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  934. TII.get(Opc), Res)
  935. .addReg(SrcReg).addImm(1));
  936. SrcReg = Res;
  937. LLVM_FALLTHROUGH;
  938. }
  939. case MVT::i8:
  940. if (isThumb2) {
  941. if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
  942. StrOpc = ARM::t2STRBi8;
  943. else
  944. StrOpc = ARM::t2STRBi12;
  945. } else {
  946. StrOpc = ARM::STRBi12;
  947. }
  948. break;
  949. case MVT::i16:
  950. if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
  951. return false;
  952. if (isThumb2) {
  953. if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
  954. StrOpc = ARM::t2STRHi8;
  955. else
  956. StrOpc = ARM::t2STRHi12;
  957. } else {
  958. StrOpc = ARM::STRH;
  959. useAM3 = true;
  960. }
  961. break;
  962. case MVT::i32:
  963. if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
  964. return false;
  965. if (isThumb2) {
  966. if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
  967. StrOpc = ARM::t2STRi8;
  968. else
  969. StrOpc = ARM::t2STRi12;
  970. } else {
  971. StrOpc = ARM::STRi12;
  972. }
  973. break;
  974. case MVT::f32:
  975. if (!Subtarget->hasVFP2Base()) return false;
  976. // Unaligned stores need special handling. Floats require word-alignment.
  977. if (Alignment && Alignment < 4) {
  978. Register MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
  979. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  980. TII.get(ARM::VMOVRS), MoveReg)
  981. .addReg(SrcReg));
  982. SrcReg = MoveReg;
  983. VT = MVT::i32;
  984. StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
  985. } else {
  986. StrOpc = ARM::VSTRS;
  987. }
  988. break;
  989. case MVT::f64:
  990. // Can load and store double precision even without FeatureFP64
  991. if (!Subtarget->hasVFP2Base()) return false;
  992. // FIXME: Unaligned stores need special handling. Doublewords require
  993. // word-alignment.
  994. if (Alignment && Alignment < 4)
  995. return false;
  996. StrOpc = ARM::VSTRD;
  997. break;
  998. }
  999. // Simplify this down to something we can handle.
  1000. ARMSimplifyAddress(Addr, VT, useAM3);
  1001. // Create the base instruction, then add the operands.
  1002. SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0);
  1003. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1004. TII.get(StrOpc))
  1005. .addReg(SrcReg);
  1006. AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
  1007. return true;
  1008. }
  1009. bool ARMFastISel::SelectStore(const Instruction *I) {
  1010. Value *Op0 = I->getOperand(0);
  1011. unsigned SrcReg = 0;
  1012. // Atomic stores need special handling.
  1013. if (cast<StoreInst>(I)->isAtomic())
  1014. return false;
  1015. const Value *PtrV = I->getOperand(1);
  1016. if (TLI.supportSwiftError()) {
  1017. // Swifterror values can come from either a function parameter with
  1018. // swifterror attribute or an alloca with swifterror attribute.
  1019. if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
  1020. if (Arg->hasSwiftErrorAttr())
  1021. return false;
  1022. }
  1023. if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
  1024. if (Alloca->isSwiftError())
  1025. return false;
  1026. }
  1027. }
  1028. // Verify we have a legal type before going any further.
  1029. MVT VT;
  1030. if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
  1031. return false;
  1032. // Get the value to be stored into a register.
  1033. SrcReg = getRegForValue(Op0);
  1034. if (SrcReg == 0) return false;
  1035. // See if we can handle this address.
  1036. Address Addr;
  1037. if (!ARMComputeAddress(I->getOperand(1), Addr))
  1038. return false;
  1039. if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
  1040. return false;
  1041. return true;
  1042. }
  1043. static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
  1044. switch (Pred) {
  1045. // Needs two compares...
  1046. case CmpInst::FCMP_ONE:
  1047. case CmpInst::FCMP_UEQ:
  1048. default:
  1049. // AL is our "false" for now. The other two need more compares.
  1050. return ARMCC::AL;
  1051. case CmpInst::ICMP_EQ:
  1052. case CmpInst::FCMP_OEQ:
  1053. return ARMCC::EQ;
  1054. case CmpInst::ICMP_SGT:
  1055. case CmpInst::FCMP_OGT:
  1056. return ARMCC::GT;
  1057. case CmpInst::ICMP_SGE:
  1058. case CmpInst::FCMP_OGE:
  1059. return ARMCC::GE;
  1060. case CmpInst::ICMP_UGT:
  1061. case CmpInst::FCMP_UGT:
  1062. return ARMCC::HI;
  1063. case CmpInst::FCMP_OLT:
  1064. return ARMCC::MI;
  1065. case CmpInst::ICMP_ULE:
  1066. case CmpInst::FCMP_OLE:
  1067. return ARMCC::LS;
  1068. case CmpInst::FCMP_ORD:
  1069. return ARMCC::VC;
  1070. case CmpInst::FCMP_UNO:
  1071. return ARMCC::VS;
  1072. case CmpInst::FCMP_UGE:
  1073. return ARMCC::PL;
  1074. case CmpInst::ICMP_SLT:
  1075. case CmpInst::FCMP_ULT:
  1076. return ARMCC::LT;
  1077. case CmpInst::ICMP_SLE:
  1078. case CmpInst::FCMP_ULE:
  1079. return ARMCC::LE;
  1080. case CmpInst::FCMP_UNE:
  1081. case CmpInst::ICMP_NE:
  1082. return ARMCC::NE;
  1083. case CmpInst::ICMP_UGE:
  1084. return ARMCC::HS;
  1085. case CmpInst::ICMP_ULT:
  1086. return ARMCC::LO;
  1087. }
  1088. }
  1089. bool ARMFastISel::SelectBranch(const Instruction *I) {
  1090. const BranchInst *BI = cast<BranchInst>(I);
  1091. MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
  1092. MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
  1093. // Simple branch support.
  1094. // If we can, avoid recomputing the compare - redoing it could lead to wonky
  1095. // behavior.
  1096. if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
  1097. if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
  1098. // Get the compare predicate.
  1099. // Try to take advantage of fallthrough opportunities.
  1100. CmpInst::Predicate Predicate = CI->getPredicate();
  1101. if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
  1102. std::swap(TBB, FBB);
  1103. Predicate = CmpInst::getInversePredicate(Predicate);
  1104. }
  1105. ARMCC::CondCodes ARMPred = getComparePred(Predicate);
  1106. // We may not handle every CC for now.
  1107. if (ARMPred == ARMCC::AL) return false;
  1108. // Emit the compare.
  1109. if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
  1110. return false;
  1111. unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
  1112. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
  1113. .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
  1114. finishCondBranch(BI->getParent(), TBB, FBB);
  1115. return true;
  1116. }
  1117. } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
  1118. MVT SourceVT;
  1119. if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
  1120. (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
  1121. unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
  1122. Register OpReg = getRegForValue(TI->getOperand(0));
  1123. OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);
  1124. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1125. TII.get(TstOpc))
  1126. .addReg(OpReg).addImm(1));
  1127. unsigned CCMode = ARMCC::NE;
  1128. if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
  1129. std::swap(TBB, FBB);
  1130. CCMode = ARMCC::EQ;
  1131. }
  1132. unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
  1133. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
  1134. .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
  1135. finishCondBranch(BI->getParent(), TBB, FBB);
  1136. return true;
  1137. }
  1138. } else if (const ConstantInt *CI =
  1139. dyn_cast<ConstantInt>(BI->getCondition())) {
  1140. uint64_t Imm = CI->getZExtValue();
  1141. MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
  1142. fastEmitBranch(Target, DbgLoc);
  1143. return true;
  1144. }
  1145. Register CmpReg = getRegForValue(BI->getCondition());
  1146. if (CmpReg == 0) return false;
  1147. // We've been divorced from our compare! Our block was split, and
  1148. // now our compare lives in a predecessor block. We musn't
  1149. // re-compare here, as the children of the compare aren't guaranteed
  1150. // live across the block boundary (we *could* check for this).
  1151. // Regardless, the compare has been done in the predecessor block,
  1152. // and it left a value for us in a virtual register. Ergo, we test
  1153. // the one-bit value left in the virtual register.
  1154. unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
  1155. CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0);
  1156. AddOptionalDefs(
  1157. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
  1158. .addReg(CmpReg)
  1159. .addImm(1));
  1160. unsigned CCMode = ARMCC::NE;
  1161. if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
  1162. std::swap(TBB, FBB);
  1163. CCMode = ARMCC::EQ;
  1164. }
  1165. unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
  1166. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
  1167. .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
  1168. finishCondBranch(BI->getParent(), TBB, FBB);
  1169. return true;
  1170. }
  1171. bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
  1172. Register AddrReg = getRegForValue(I->getOperand(0));
  1173. if (AddrReg == 0) return false;
  1174. unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
  1175. assert(isThumb2 || Subtarget->hasV4TOps());
  1176. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1177. TII.get(Opc)).addReg(AddrReg));
  1178. const IndirectBrInst *IB = cast<IndirectBrInst>(I);
  1179. for (const BasicBlock *SuccBB : IB->successors())
  1180. FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
  1181. return true;
  1182. }
  1183. bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
  1184. bool isZExt) {
  1185. Type *Ty = Src1Value->getType();
  1186. EVT SrcEVT = TLI.getValueType(DL, Ty, true);
  1187. if (!SrcEVT.isSimple()) return false;
  1188. MVT SrcVT = SrcEVT.getSimpleVT();
  1189. if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
  1190. return false;
  1191. if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
  1192. return false;
  1193. // Check to see if the 2nd operand is a constant that we can encode directly
  1194. // in the compare.
  1195. int Imm = 0;
  1196. bool UseImm = false;
  1197. bool isNegativeImm = false;
  1198. // FIXME: At -O0 we don't have anything that canonicalizes operand order.
  1199. // Thus, Src1Value may be a ConstantInt, but we're missing it.
  1200. if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
  1201. if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
  1202. SrcVT == MVT::i1) {
  1203. const APInt &CIVal = ConstInt->getValue();
  1204. Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
  1205. // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
  1206. // then a cmn, because there is no way to represent 2147483648 as a
  1207. // signed 32-bit int.
  1208. if (Imm < 0 && Imm != (int)0x80000000) {
  1209. isNegativeImm = true;
  1210. Imm = -Imm;
  1211. }
  1212. UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
  1213. (ARM_AM::getSOImmVal(Imm) != -1);
  1214. }
  1215. } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
  1216. if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
  1217. if (ConstFP->isZero() && !ConstFP->isNegative())
  1218. UseImm = true;
  1219. }
  1220. unsigned CmpOpc;
  1221. bool isICmp = true;
  1222. bool needsExt = false;
  1223. switch (SrcVT.SimpleTy) {
  1224. default: return false;
  1225. // TODO: Verify compares.
  1226. case MVT::f32:
  1227. isICmp = false;
  1228. CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
  1229. break;
  1230. case MVT::f64:
  1231. isICmp = false;
  1232. CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
  1233. break;
  1234. case MVT::i1:
  1235. case MVT::i8:
  1236. case MVT::i16:
  1237. needsExt = true;
  1238. LLVM_FALLTHROUGH;
  1239. case MVT::i32:
  1240. if (isThumb2) {
  1241. if (!UseImm)
  1242. CmpOpc = ARM::t2CMPrr;
  1243. else
  1244. CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
  1245. } else {
  1246. if (!UseImm)
  1247. CmpOpc = ARM::CMPrr;
  1248. else
  1249. CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
  1250. }
  1251. break;
  1252. }
  1253. Register SrcReg1 = getRegForValue(Src1Value);
  1254. if (SrcReg1 == 0) return false;
  1255. unsigned SrcReg2 = 0;
  1256. if (!UseImm) {
  1257. SrcReg2 = getRegForValue(Src2Value);
  1258. if (SrcReg2 == 0) return false;
  1259. }
  1260. // We have i1, i8, or i16, we need to either zero extend or sign extend.
  1261. if (needsExt) {
  1262. SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
  1263. if (SrcReg1 == 0) return false;
  1264. if (!UseImm) {
  1265. SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
  1266. if (SrcReg2 == 0) return false;
  1267. }
  1268. }
  1269. const MCInstrDesc &II = TII.get(CmpOpc);
  1270. SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0);
  1271. if (!UseImm) {
  1272. SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1);
  1273. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
  1274. .addReg(SrcReg1).addReg(SrcReg2));
  1275. } else {
  1276. MachineInstrBuilder MIB;
  1277. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
  1278. .addReg(SrcReg1);
  1279. // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
  1280. if (isICmp)
  1281. MIB.addImm(Imm);
  1282. AddOptionalDefs(MIB);
  1283. }
  1284. // For floating point we need to move the result to a comparison register
  1285. // that we can then use for branches.
  1286. if (Ty->isFloatTy() || Ty->isDoubleTy())
  1287. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1288. TII.get(ARM::FMSTAT)));
  1289. return true;
  1290. }
  1291. bool ARMFastISel::SelectCmp(const Instruction *I) {
  1292. const CmpInst *CI = cast<CmpInst>(I);
  1293. // Get the compare predicate.
  1294. ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
  1295. // We may not handle every CC for now.
  1296. if (ARMPred == ARMCC::AL) return false;
  1297. // Emit the compare.
  1298. if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
  1299. return false;
  1300. // Now set a register based on the comparison. Explicitly set the predicates
  1301. // here.
  1302. unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
  1303. const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
  1304. : &ARM::GPRRegClass;
  1305. Register DestReg = createResultReg(RC);
  1306. Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
  1307. unsigned ZeroReg = fastMaterializeConstant(Zero);
  1308. // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
  1309. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg)
  1310. .addReg(ZeroReg).addImm(1)
  1311. .addImm(ARMPred).addReg(ARM::CPSR);
  1312. updateValueMap(I, DestReg);
  1313. return true;
  1314. }
  1315. bool ARMFastISel::SelectFPExt(const Instruction *I) {
  1316. // Make sure we have VFP and that we're extending float to double.
  1317. if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
  1318. Value *V = I->getOperand(0);
  1319. if (!I->getType()->isDoubleTy() ||
  1320. !V->getType()->isFloatTy()) return false;
  1321. Register Op = getRegForValue(V);
  1322. if (Op == 0) return false;
  1323. Register Result = createResultReg(&ARM::DPRRegClass);
  1324. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1325. TII.get(ARM::VCVTDS), Result)
  1326. .addReg(Op));
  1327. updateValueMap(I, Result);
  1328. return true;
  1329. }
  1330. bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
  1331. // Make sure we have VFP and that we're truncating double to float.
  1332. if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
  1333. Value *V = I->getOperand(0);
  1334. if (!(I->getType()->isFloatTy() &&
  1335. V->getType()->isDoubleTy())) return false;
  1336. Register Op = getRegForValue(V);
  1337. if (Op == 0) return false;
  1338. Register Result = createResultReg(&ARM::SPRRegClass);
  1339. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1340. TII.get(ARM::VCVTSD), Result)
  1341. .addReg(Op));
  1342. updateValueMap(I, Result);
  1343. return true;
  1344. }
  1345. bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
  1346. // Make sure we have VFP.
  1347. if (!Subtarget->hasVFP2Base()) return false;
  1348. MVT DstVT;
  1349. Type *Ty = I->getType();
  1350. if (!isTypeLegal(Ty, DstVT))
  1351. return false;
  1352. Value *Src = I->getOperand(0);
  1353. EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
  1354. if (!SrcEVT.isSimple())
  1355. return false;
  1356. MVT SrcVT = SrcEVT.getSimpleVT();
  1357. if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
  1358. return false;
  1359. Register SrcReg = getRegForValue(Src);
  1360. if (SrcReg == 0) return false;
  1361. // Handle sign-extension.
  1362. if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
  1363. SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
  1364. /*isZExt*/!isSigned);
  1365. if (SrcReg == 0) return false;
  1366. }
  1367. // The conversion routine works on fp-reg to fp-reg and the operand above
  1368. // was an integer, move it to the fp registers if possible.
  1369. unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
  1370. if (FP == 0) return false;
  1371. unsigned Opc;
  1372. if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
  1373. else if (Ty->isDoubleTy() && Subtarget->hasFP64())
  1374. Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
  1375. else return false;
  1376. Register ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
  1377. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1378. TII.get(Opc), ResultReg).addReg(FP));
  1379. updateValueMap(I, ResultReg);
  1380. return true;
  1381. }
  1382. bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
  1383. // Make sure we have VFP.
  1384. if (!Subtarget->hasVFP2Base()) return false;
  1385. MVT DstVT;
  1386. Type *RetTy = I->getType();
  1387. if (!isTypeLegal(RetTy, DstVT))
  1388. return false;
  1389. Register Op = getRegForValue(I->getOperand(0));
  1390. if (Op == 0) return false;
  1391. unsigned Opc;
  1392. Type *OpTy = I->getOperand(0)->getType();
  1393. if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
  1394. else if (OpTy->isDoubleTy() && Subtarget->hasFP64())
  1395. Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
  1396. else return false;
  1397. // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
  1398. Register ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
  1399. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1400. TII.get(Opc), ResultReg).addReg(Op));
  1401. // This result needs to be in an integer register, but the conversion only
  1402. // takes place in fp-regs.
  1403. unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
  1404. if (IntReg == 0) return false;
  1405. updateValueMap(I, IntReg);
  1406. return true;
  1407. }
  1408. bool ARMFastISel::SelectSelect(const Instruction *I) {
  1409. MVT VT;
  1410. if (!isTypeLegal(I->getType(), VT))
  1411. return false;
  1412. // Things need to be register sized for register moves.
  1413. if (VT != MVT::i32) return false;
  1414. Register CondReg = getRegForValue(I->getOperand(0));
  1415. if (CondReg == 0) return false;
  1416. Register Op1Reg = getRegForValue(I->getOperand(1));
  1417. if (Op1Reg == 0) return false;
  1418. // Check to see if we can use an immediate in the conditional move.
  1419. int Imm = 0;
  1420. bool UseImm = false;
  1421. bool isNegativeImm = false;
  1422. if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
  1423. assert(VT == MVT::i32 && "Expecting an i32.");
  1424. Imm = (int)ConstInt->getValue().getZExtValue();
  1425. if (Imm < 0) {
  1426. isNegativeImm = true;
  1427. Imm = ~Imm;
  1428. }
  1429. UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
  1430. (ARM_AM::getSOImmVal(Imm) != -1);
  1431. }
  1432. unsigned Op2Reg = 0;
  1433. if (!UseImm) {
  1434. Op2Reg = getRegForValue(I->getOperand(2));
  1435. if (Op2Reg == 0) return false;
  1436. }
  1437. unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
  1438. CondReg = constrainOperandRegClass(TII.get(TstOpc), CondReg, 0);
  1439. AddOptionalDefs(
  1440. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
  1441. .addReg(CondReg)
  1442. .addImm(1));
  1443. unsigned MovCCOpc;
  1444. const TargetRegisterClass *RC;
  1445. if (!UseImm) {
  1446. RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
  1447. MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
  1448. } else {
  1449. RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
  1450. if (!isNegativeImm)
  1451. MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
  1452. else
  1453. MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
  1454. }
  1455. Register ResultReg = createResultReg(RC);
  1456. if (!UseImm) {
  1457. Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);
  1458. Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);
  1459. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
  1460. ResultReg)
  1461. .addReg(Op2Reg)
  1462. .addReg(Op1Reg)
  1463. .addImm(ARMCC::NE)
  1464. .addReg(ARM::CPSR);
  1465. } else {
  1466. Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1);
  1467. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
  1468. ResultReg)
  1469. .addReg(Op1Reg)
  1470. .addImm(Imm)
  1471. .addImm(ARMCC::EQ)
  1472. .addReg(ARM::CPSR);
  1473. }
  1474. updateValueMap(I, ResultReg);
  1475. return true;
  1476. }
  1477. bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
  1478. MVT VT;
  1479. Type *Ty = I->getType();
  1480. if (!isTypeLegal(Ty, VT))
  1481. return false;
  1482. // If we have integer div support we should have selected this automagically.
  1483. // In case we have a real miss go ahead and return false and we'll pick
  1484. // it up later.
  1485. if (Subtarget->hasDivideInThumbMode())
  1486. return false;
  1487. // Otherwise emit a libcall.
  1488. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
  1489. if (VT == MVT::i8)
  1490. LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
  1491. else if (VT == MVT::i16)
  1492. LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
  1493. else if (VT == MVT::i32)
  1494. LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
  1495. else if (VT == MVT::i64)
  1496. LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
  1497. else if (VT == MVT::i128)
  1498. LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
  1499. assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
  1500. return ARMEmitLibcall(I, LC);
  1501. }
  1502. bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
  1503. MVT VT;
  1504. Type *Ty = I->getType();
  1505. if (!isTypeLegal(Ty, VT))
  1506. return false;
  1507. // Many ABIs do not provide a libcall for standalone remainder, so we need to
  1508. // use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
  1509. // multi-reg returns, we'll have to bail out.
  1510. if (!TLI.hasStandaloneRem(VT)) {
  1511. return false;
  1512. }
  1513. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
  1514. if (VT == MVT::i8)
  1515. LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
  1516. else if (VT == MVT::i16)
  1517. LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
  1518. else if (VT == MVT::i32)
  1519. LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
  1520. else if (VT == MVT::i64)
  1521. LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
  1522. else if (VT == MVT::i128)
  1523. LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
  1524. assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
  1525. return ARMEmitLibcall(I, LC);
  1526. }
  1527. bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
  1528. EVT DestVT = TLI.getValueType(DL, I->getType(), true);
  1529. // We can get here in the case when we have a binary operation on a non-legal
  1530. // type and the target independent selector doesn't know how to handle it.
  1531. if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
  1532. return false;
  1533. unsigned Opc;
  1534. switch (ISDOpcode) {
  1535. default: return false;
  1536. case ISD::ADD:
  1537. Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
  1538. break;
  1539. case ISD::OR:
  1540. Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
  1541. break;
  1542. case ISD::SUB:
  1543. Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
  1544. break;
  1545. }
  1546. Register SrcReg1 = getRegForValue(I->getOperand(0));
  1547. if (SrcReg1 == 0) return false;
  1548. // TODO: Often the 2nd operand is an immediate, which can be encoded directly
  1549. // in the instruction, rather then materializing the value in a register.
  1550. Register SrcReg2 = getRegForValue(I->getOperand(1));
  1551. if (SrcReg2 == 0) return false;
  1552. Register ResultReg = createResultReg(&ARM::GPRnopcRegClass);
  1553. SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);
  1554. SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);
  1555. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1556. TII.get(Opc), ResultReg)
  1557. .addReg(SrcReg1).addReg(SrcReg2));
  1558. updateValueMap(I, ResultReg);
  1559. return true;
  1560. }
  1561. bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
  1562. EVT FPVT = TLI.getValueType(DL, I->getType(), true);
  1563. if (!FPVT.isSimple()) return false;
  1564. MVT VT = FPVT.getSimpleVT();
  1565. // FIXME: Support vector types where possible.
  1566. if (VT.isVector())
  1567. return false;
  1568. // We can get here in the case when we want to use NEON for our fp
  1569. // operations, but can't figure out how to. Just use the vfp instructions
  1570. // if we have them.
  1571. // FIXME: It'd be nice to use NEON instructions.
  1572. Type *Ty = I->getType();
  1573. if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
  1574. return false;
  1575. if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
  1576. return false;
  1577. unsigned Opc;
  1578. bool is64bit = VT == MVT::f64 || VT == MVT::i64;
  1579. switch (ISDOpcode) {
  1580. default: return false;
  1581. case ISD::FADD:
  1582. Opc = is64bit ? ARM::VADDD : ARM::VADDS;
  1583. break;
  1584. case ISD::FSUB:
  1585. Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
  1586. break;
  1587. case ISD::FMUL:
  1588. Opc = is64bit ? ARM::VMULD : ARM::VMULS;
  1589. break;
  1590. }
  1591. Register Op1 = getRegForValue(I->getOperand(0));
  1592. if (Op1 == 0) return false;
  1593. Register Op2 = getRegForValue(I->getOperand(1));
  1594. if (Op2 == 0) return false;
  1595. Register ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
  1596. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1597. TII.get(Opc), ResultReg)
  1598. .addReg(Op1).addReg(Op2));
  1599. updateValueMap(I, ResultReg);
  1600. return true;
  1601. }
  1602. // Call Handling Code
  1603. // This is largely taken directly from CCAssignFnForNode
  1604. // TODO: We may not support all of this.
  1605. CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
  1606. bool Return,
  1607. bool isVarArg) {
  1608. switch (CC) {
  1609. default:
  1610. report_fatal_error("Unsupported calling convention");
  1611. case CallingConv::Fast:
  1612. if (Subtarget->hasVFP2Base() && !isVarArg) {
  1613. if (!Subtarget->isAAPCS_ABI())
  1614. return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
  1615. // For AAPCS ABI targets, just use VFP variant of the calling convention.
  1616. return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
  1617. }
  1618. LLVM_FALLTHROUGH;
  1619. case CallingConv::C:
  1620. case CallingConv::CXX_FAST_TLS:
  1621. // Use target triple & subtarget features to do actual dispatch.
  1622. if (Subtarget->isAAPCS_ABI()) {
  1623. if (Subtarget->hasVFP2Base() &&
  1624. TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
  1625. return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
  1626. else
  1627. return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
  1628. } else {
  1629. return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
  1630. }
  1631. case CallingConv::ARM_AAPCS_VFP:
  1632. case CallingConv::Swift:
  1633. case CallingConv::SwiftTail:
  1634. if (!isVarArg)
  1635. return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
  1636. // Fall through to soft float variant, variadic functions don't
  1637. // use hard floating point ABI.
  1638. LLVM_FALLTHROUGH;
  1639. case CallingConv::ARM_AAPCS:
  1640. return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
  1641. case CallingConv::ARM_APCS:
  1642. return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
  1643. case CallingConv::GHC:
  1644. if (Return)
  1645. report_fatal_error("Can't return in GHC call convention");
  1646. else
  1647. return CC_ARM_APCS_GHC;
  1648. case CallingConv::CFGuard_Check:
  1649. return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
  1650. }
  1651. }
  1652. bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
  1653. SmallVectorImpl<Register> &ArgRegs,
  1654. SmallVectorImpl<MVT> &ArgVTs,
  1655. SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
  1656. SmallVectorImpl<Register> &RegArgs,
  1657. CallingConv::ID CC,
  1658. unsigned &NumBytes,
  1659. bool isVarArg) {
  1660. SmallVector<CCValAssign, 16> ArgLocs;
  1661. CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context);
  1662. CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
  1663. CCAssignFnForCall(CC, false, isVarArg));
  1664. // Check that we can handle all of the arguments. If we can't, then bail out
  1665. // now before we add code to the MBB.
  1666. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
  1667. CCValAssign &VA = ArgLocs[i];
  1668. MVT ArgVT = ArgVTs[VA.getValNo()];
  1669. // We don't handle NEON/vector parameters yet.
  1670. if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
  1671. return false;
  1672. // Now copy/store arg to correct locations.
  1673. if (VA.isRegLoc() && !VA.needsCustom()) {
  1674. continue;
  1675. } else if (VA.needsCustom()) {
  1676. // TODO: We need custom lowering for vector (v2f64) args.
  1677. if (VA.getLocVT() != MVT::f64 ||
  1678. // TODO: Only handle register args for now.
  1679. !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
  1680. return false;
  1681. } else {
  1682. switch (ArgVT.SimpleTy) {
  1683. default:
  1684. return false;
  1685. case MVT::i1:
  1686. case MVT::i8:
  1687. case MVT::i16:
  1688. case MVT::i32:
  1689. break;
  1690. case MVT::f32:
  1691. if (!Subtarget->hasVFP2Base())
  1692. return false;
  1693. break;
  1694. case MVT::f64:
  1695. if (!Subtarget->hasVFP2Base())
  1696. return false;
  1697. break;
  1698. }
  1699. }
  1700. }
  1701. // At the point, we are able to handle the call's arguments in fast isel.
  1702. // Get a count of how many bytes are to be pushed on the stack.
  1703. NumBytes = CCInfo.getNextStackOffset();
  1704. // Issue CALLSEQ_START
  1705. unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
  1706. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1707. TII.get(AdjStackDown))
  1708. .addImm(NumBytes).addImm(0));
  1709. // Process the args.
  1710. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
  1711. CCValAssign &VA = ArgLocs[i];
  1712. const Value *ArgVal = Args[VA.getValNo()];
  1713. Register Arg = ArgRegs[VA.getValNo()];
  1714. MVT ArgVT = ArgVTs[VA.getValNo()];
  1715. assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
  1716. "We don't handle NEON/vector parameters yet.");
  1717. // Handle arg promotion, etc.
  1718. switch (VA.getLocInfo()) {
  1719. case CCValAssign::Full: break;
  1720. case CCValAssign::SExt: {
  1721. MVT DestVT = VA.getLocVT();
  1722. Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
  1723. assert(Arg != 0 && "Failed to emit a sext");
  1724. ArgVT = DestVT;
  1725. break;
  1726. }
  1727. case CCValAssign::AExt:
  1728. // Intentional fall-through. Handle AExt and ZExt.
  1729. case CCValAssign::ZExt: {
  1730. MVT DestVT = VA.getLocVT();
  1731. Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
  1732. assert(Arg != 0 && "Failed to emit a zext");
  1733. ArgVT = DestVT;
  1734. break;
  1735. }
  1736. case CCValAssign::BCvt: {
  1737. unsigned BC = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg);
  1738. assert(BC != 0 && "Failed to emit a bitcast!");
  1739. Arg = BC;
  1740. ArgVT = VA.getLocVT();
  1741. break;
  1742. }
  1743. default: llvm_unreachable("Unknown arg promotion!");
  1744. }
  1745. // Now copy/store arg to correct locations.
  1746. if (VA.isRegLoc() && !VA.needsCustom()) {
  1747. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1748. TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
  1749. RegArgs.push_back(VA.getLocReg());
  1750. } else if (VA.needsCustom()) {
  1751. // TODO: We need custom lowering for vector (v2f64) args.
  1752. assert(VA.getLocVT() == MVT::f64 &&
  1753. "Custom lowering for v2f64 args not available");
  1754. // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
  1755. CCValAssign &NextVA = ArgLocs[++i];
  1756. assert(VA.isRegLoc() && NextVA.isRegLoc() &&
  1757. "We only handle register args!");
  1758. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1759. TII.get(ARM::VMOVRRD), VA.getLocReg())
  1760. .addReg(NextVA.getLocReg(), RegState::Define)
  1761. .addReg(Arg));
  1762. RegArgs.push_back(VA.getLocReg());
  1763. RegArgs.push_back(NextVA.getLocReg());
  1764. } else {
  1765. assert(VA.isMemLoc());
  1766. // Need to store on the stack.
  1767. // Don't emit stores for undef values.
  1768. if (isa<UndefValue>(ArgVal))
  1769. continue;
  1770. Address Addr;
  1771. Addr.BaseType = Address::RegBase;
  1772. Addr.Base.Reg = ARM::SP;
  1773. Addr.Offset = VA.getLocMemOffset();
  1774. bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
  1775. assert(EmitRet && "Could not emit a store for argument!");
  1776. }
  1777. }
  1778. return true;
  1779. }
  1780. bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
  1781. const Instruction *I, CallingConv::ID CC,
  1782. unsigned &NumBytes, bool isVarArg) {
  1783. // Issue CALLSEQ_END
  1784. unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
  1785. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1786. TII.get(AdjStackUp))
  1787. .addImm(NumBytes).addImm(-1ULL));
  1788. // Now the return value.
  1789. if (RetVT != MVT::isVoid) {
  1790. SmallVector<CCValAssign, 16> RVLocs;
  1791. CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
  1792. CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
  1793. // Copy all of the result registers out of their specified physreg.
  1794. if (RVLocs.size() == 2 && RetVT == MVT::f64) {
  1795. // For this move we copy into two registers and then move into the
  1796. // double fp reg we want.
  1797. MVT DestVT = RVLocs[0].getValVT();
  1798. const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
  1799. Register ResultReg = createResultReg(DstRC);
  1800. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1801. TII.get(ARM::VMOVDRR), ResultReg)
  1802. .addReg(RVLocs[0].getLocReg())
  1803. .addReg(RVLocs[1].getLocReg()));
  1804. UsedRegs.push_back(RVLocs[0].getLocReg());
  1805. UsedRegs.push_back(RVLocs[1].getLocReg());
  1806. // Finally update the result.
  1807. updateValueMap(I, ResultReg);
  1808. } else {
  1809. assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
  1810. MVT CopyVT = RVLocs[0].getValVT();
  1811. // Special handling for extended integers.
  1812. if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
  1813. CopyVT = MVT::i32;
  1814. const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
  1815. Register ResultReg = createResultReg(DstRC);
  1816. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1817. TII.get(TargetOpcode::COPY),
  1818. ResultReg).addReg(RVLocs[0].getLocReg());
  1819. UsedRegs.push_back(RVLocs[0].getLocReg());
  1820. // Finally update the result.
  1821. updateValueMap(I, ResultReg);
  1822. }
  1823. }
  1824. return true;
  1825. }
  1826. bool ARMFastISel::SelectRet(const Instruction *I) {
  1827. const ReturnInst *Ret = cast<ReturnInst>(I);
  1828. const Function &F = *I->getParent()->getParent();
  1829. const bool IsCmseNSEntry = F.hasFnAttribute("cmse_nonsecure_entry");
  1830. if (!FuncInfo.CanLowerReturn)
  1831. return false;
  1832. if (TLI.supportSwiftError() &&
  1833. F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
  1834. return false;
  1835. if (TLI.supportSplitCSR(FuncInfo.MF))
  1836. return false;
  1837. // Build a list of return value registers.
  1838. SmallVector<unsigned, 4> RetRegs;
  1839. CallingConv::ID CC = F.getCallingConv();
  1840. if (Ret->getNumOperands() > 0) {
  1841. SmallVector<ISD::OutputArg, 4> Outs;
  1842. GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
  1843. // Analyze operands of the call, assigning locations to each operand.
  1844. SmallVector<CCValAssign, 16> ValLocs;
  1845. CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
  1846. CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
  1847. F.isVarArg()));
  1848. const Value *RV = Ret->getOperand(0);
  1849. Register Reg = getRegForValue(RV);
  1850. if (Reg == 0)
  1851. return false;
  1852. // Only handle a single return value for now.
  1853. if (ValLocs.size() != 1)
  1854. return false;
  1855. CCValAssign &VA = ValLocs[0];
  1856. // Don't bother handling odd stuff for now.
  1857. if (VA.getLocInfo() != CCValAssign::Full)
  1858. return false;
  1859. // Only handle register returns for now.
  1860. if (!VA.isRegLoc())
  1861. return false;
  1862. unsigned SrcReg = Reg + VA.getValNo();
  1863. EVT RVEVT = TLI.getValueType(DL, RV->getType());
  1864. if (!RVEVT.isSimple()) return false;
  1865. MVT RVVT = RVEVT.getSimpleVT();
  1866. MVT DestVT = VA.getValVT();
  1867. // Special handling for extended integers.
  1868. if (RVVT != DestVT) {
  1869. if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
  1870. return false;
  1871. assert(DestVT == MVT::i32 && "ARM should always ext to i32");
  1872. // Perform extension if flagged as either zext or sext. Otherwise, do
  1873. // nothing.
  1874. if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
  1875. SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
  1876. if (SrcReg == 0) return false;
  1877. }
  1878. }
  1879. // Make the copy.
  1880. Register DstReg = VA.getLocReg();
  1881. const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
  1882. // Avoid a cross-class copy. This is very unlikely.
  1883. if (!SrcRC->contains(DstReg))
  1884. return false;
  1885. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1886. TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
  1887. // Add register to return instruction.
  1888. RetRegs.push_back(VA.getLocReg());
  1889. }
  1890. unsigned RetOpc;
  1891. if (IsCmseNSEntry)
  1892. if (isThumb2)
  1893. RetOpc = ARM::tBXNS_RET;
  1894. else
  1895. llvm_unreachable("CMSE not valid for non-Thumb targets");
  1896. else
  1897. RetOpc = Subtarget->getReturnOpcode();
  1898. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1899. TII.get(RetOpc));
  1900. AddOptionalDefs(MIB);
  1901. for (unsigned R : RetRegs)
  1902. MIB.addReg(R, RegState::Implicit);
  1903. return true;
  1904. }
  1905. unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
  1906. if (UseReg)
  1907. return isThumb2 ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF);
  1908. else
  1909. return isThumb2 ? ARM::tBL : ARM::BL;
  1910. }
  1911. unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
  1912. // Manually compute the global's type to avoid building it when unnecessary.
  1913. Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
  1914. EVT LCREVT = TLI.getValueType(DL, GVTy);
  1915. if (!LCREVT.isSimple()) return 0;
  1916. GlobalValue *GV = M.getNamedGlobal(Name.str());
  1917. if (!GV)
  1918. GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
  1919. GlobalValue::ExternalLinkage, nullptr, Name);
  1920. return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
  1921. }
  1922. // A quick function that will emit a call for a named libcall in F with the
  1923. // vector of passed arguments for the Instruction in I. We can assume that we
  1924. // can emit a call for any libcall we can produce. This is an abridged version
  1925. // of the full call infrastructure since we won't need to worry about things
  1926. // like computed function pointers or strange arguments at call sites.
  1927. // TODO: Try to unify this and the normal call bits for ARM, then try to unify
  1928. // with X86.
  1929. bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
  1930. CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
  1931. // Handle *simple* calls for now.
  1932. Type *RetTy = I->getType();
  1933. MVT RetVT;
  1934. if (RetTy->isVoidTy())
  1935. RetVT = MVT::isVoid;
  1936. else if (!isTypeLegal(RetTy, RetVT))
  1937. return false;
  1938. // Can't handle non-double multi-reg retvals.
  1939. if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
  1940. SmallVector<CCValAssign, 16> RVLocs;
  1941. CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
  1942. CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
  1943. if (RVLocs.size() >= 2 && RetVT != MVT::f64)
  1944. return false;
  1945. }
  1946. // Set up the argument vectors.
  1947. SmallVector<Value*, 8> Args;
  1948. SmallVector<Register, 8> ArgRegs;
  1949. SmallVector<MVT, 8> ArgVTs;
  1950. SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
  1951. Args.reserve(I->getNumOperands());
  1952. ArgRegs.reserve(I->getNumOperands());
  1953. ArgVTs.reserve(I->getNumOperands());
  1954. ArgFlags.reserve(I->getNumOperands());
  1955. for (Value *Op : I->operands()) {
  1956. Register Arg = getRegForValue(Op);
  1957. if (Arg == 0) return false;
  1958. Type *ArgTy = Op->getType();
  1959. MVT ArgVT;
  1960. if (!isTypeLegal(ArgTy, ArgVT)) return false;
  1961. ISD::ArgFlagsTy Flags;
  1962. Flags.setOrigAlign(DL.getABITypeAlign(ArgTy));
  1963. Args.push_back(Op);
  1964. ArgRegs.push_back(Arg);
  1965. ArgVTs.push_back(ArgVT);
  1966. ArgFlags.push_back(Flags);
  1967. }
  1968. // Handle the arguments now that we've gotten them.
  1969. SmallVector<Register, 4> RegArgs;
  1970. unsigned NumBytes;
  1971. if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
  1972. RegArgs, CC, NumBytes, false))
  1973. return false;
  1974. Register CalleeReg;
  1975. if (Subtarget->genLongCalls()) {
  1976. CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
  1977. if (CalleeReg == 0) return false;
  1978. }
  1979. // Issue the call.
  1980. unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls());
  1981. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
  1982. DbgLoc, TII.get(CallOpc));
  1983. // BL / BLX don't take a predicate, but tBL / tBLX do.
  1984. if (isThumb2)
  1985. MIB.add(predOps(ARMCC::AL));
  1986. if (Subtarget->genLongCalls()) {
  1987. CalleeReg =
  1988. constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);
  1989. MIB.addReg(CalleeReg);
  1990. } else
  1991. MIB.addExternalSymbol(TLI.getLibcallName(Call));
  1992. // Add implicit physical register uses to the call.
  1993. for (Register R : RegArgs)
  1994. MIB.addReg(R, RegState::Implicit);
  1995. // Add a register mask with the call-preserved registers.
  1996. // Proper defs for return values will be added by setPhysRegsDeadExcept().
  1997. MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
  1998. // Finish off the call including any return values.
  1999. SmallVector<Register, 4> UsedRegs;
  2000. if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
  2001. // Set all unused physreg defs as dead.
  2002. static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
  2003. return true;
  2004. }
  2005. bool ARMFastISel::SelectCall(const Instruction *I,
  2006. const char *IntrMemName = nullptr) {
  2007. const CallInst *CI = cast<CallInst>(I);
  2008. const Value *Callee = CI->getCalledOperand();
  2009. // Can't handle inline asm.
  2010. if (isa<InlineAsm>(Callee)) return false;
  2011. // Allow SelectionDAG isel to handle tail calls.
  2012. if (CI->isTailCall()) return false;
  2013. // Check the calling convention.
  2014. CallingConv::ID CC = CI->getCallingConv();
  2015. // TODO: Avoid some calling conventions?
  2016. FunctionType *FTy = CI->getFunctionType();
  2017. bool isVarArg = FTy->isVarArg();
  2018. // Handle *simple* calls for now.
  2019. Type *RetTy = I->getType();
  2020. MVT RetVT;
  2021. if (RetTy->isVoidTy())
  2022. RetVT = MVT::isVoid;
  2023. else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
  2024. RetVT != MVT::i8 && RetVT != MVT::i1)
  2025. return false;
  2026. // Can't handle non-double multi-reg retvals.
  2027. if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
  2028. RetVT != MVT::i16 && RetVT != MVT::i32) {
  2029. SmallVector<CCValAssign, 16> RVLocs;
  2030. CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
  2031. CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
  2032. if (RVLocs.size() >= 2 && RetVT != MVT::f64)
  2033. return false;
  2034. }
  2035. // Set up the argument vectors.
  2036. SmallVector<Value*, 8> Args;
  2037. SmallVector<Register, 8> ArgRegs;
  2038. SmallVector<MVT, 8> ArgVTs;
  2039. SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
  2040. unsigned arg_size = CI->arg_size();
  2041. Args.reserve(arg_size);
  2042. ArgRegs.reserve(arg_size);
  2043. ArgVTs.reserve(arg_size);
  2044. ArgFlags.reserve(arg_size);
  2045. for (auto ArgI = CI->arg_begin(), ArgE = CI->arg_end(); ArgI != ArgE; ++ArgI) {
  2046. // If we're lowering a memory intrinsic instead of a regular call, skip the
  2047. // last argument, which shouldn't be passed to the underlying function.
  2048. if (IntrMemName && ArgE - ArgI <= 1)
  2049. break;
  2050. ISD::ArgFlagsTy Flags;
  2051. unsigned ArgIdx = ArgI - CI->arg_begin();
  2052. if (CI->paramHasAttr(ArgIdx, Attribute::SExt))
  2053. Flags.setSExt();
  2054. if (CI->paramHasAttr(ArgIdx, Attribute::ZExt))
  2055. Flags.setZExt();
  2056. // FIXME: Only handle *easy* calls for now.
  2057. if (CI->paramHasAttr(ArgIdx, Attribute::InReg) ||
  2058. CI->paramHasAttr(ArgIdx, Attribute::StructRet) ||
  2059. CI->paramHasAttr(ArgIdx, Attribute::SwiftSelf) ||
  2060. CI->paramHasAttr(ArgIdx, Attribute::SwiftError) ||
  2061. CI->paramHasAttr(ArgIdx, Attribute::Nest) ||
  2062. CI->paramHasAttr(ArgIdx, Attribute::ByVal))
  2063. return false;
  2064. Type *ArgTy = (*ArgI)->getType();
  2065. MVT ArgVT;
  2066. if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
  2067. ArgVT != MVT::i1)
  2068. return false;
  2069. Register Arg = getRegForValue(*ArgI);
  2070. if (!Arg.isValid())
  2071. return false;
  2072. Flags.setOrigAlign(DL.getABITypeAlign(ArgTy));
  2073. Args.push_back(*ArgI);
  2074. ArgRegs.push_back(Arg);
  2075. ArgVTs.push_back(ArgVT);
  2076. ArgFlags.push_back(Flags);
  2077. }
  2078. // Handle the arguments now that we've gotten them.
  2079. SmallVector<Register, 4> RegArgs;
  2080. unsigned NumBytes;
  2081. if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
  2082. RegArgs, CC, NumBytes, isVarArg))
  2083. return false;
  2084. bool UseReg = false;
  2085. const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
  2086. if (!GV || Subtarget->genLongCalls()) UseReg = true;
  2087. Register CalleeReg;
  2088. if (UseReg) {
  2089. if (IntrMemName)
  2090. CalleeReg = getLibcallReg(IntrMemName);
  2091. else
  2092. CalleeReg = getRegForValue(Callee);
  2093. if (CalleeReg == 0) return false;
  2094. }
  2095. // Issue the call.
  2096. unsigned CallOpc = ARMSelectCallOp(UseReg);
  2097. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
  2098. DbgLoc, TII.get(CallOpc));
  2099. // ARM calls don't take a predicate, but tBL / tBLX do.
  2100. if(isThumb2)
  2101. MIB.add(predOps(ARMCC::AL));
  2102. if (UseReg) {
  2103. CalleeReg =
  2104. constrainOperandRegClass(TII.get(CallOpc), CalleeReg, isThumb2 ? 2 : 0);
  2105. MIB.addReg(CalleeReg);
  2106. } else if (!IntrMemName)
  2107. MIB.addGlobalAddress(GV, 0, 0);
  2108. else
  2109. MIB.addExternalSymbol(IntrMemName, 0);
  2110. // Add implicit physical register uses to the call.
  2111. for (Register R : RegArgs)
  2112. MIB.addReg(R, RegState::Implicit);
  2113. // Add a register mask with the call-preserved registers.
  2114. // Proper defs for return values will be added by setPhysRegsDeadExcept().
  2115. MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
  2116. // Finish off the call including any return values.
  2117. SmallVector<Register, 4> UsedRegs;
  2118. if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
  2119. return false;
  2120. // Set all unused physreg defs as dead.
  2121. static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
  2122. return true;
  2123. }
  2124. bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
  2125. return Len <= 16;
  2126. }
  2127. bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
  2128. uint64_t Len, unsigned Alignment) {
  2129. // Make sure we don't bloat code by inlining very large memcpy's.
  2130. if (!ARMIsMemCpySmall(Len))
  2131. return false;
  2132. while (Len) {
  2133. MVT VT;
  2134. if (!Alignment || Alignment >= 4) {
  2135. if (Len >= 4)
  2136. VT = MVT::i32;
  2137. else if (Len >= 2)
  2138. VT = MVT::i16;
  2139. else {
  2140. assert(Len == 1 && "Expected a length of 1!");
  2141. VT = MVT::i8;
  2142. }
  2143. } else {
  2144. // Bound based on alignment.
  2145. if (Len >= 2 && Alignment == 2)
  2146. VT = MVT::i16;
  2147. else {
  2148. VT = MVT::i8;
  2149. }
  2150. }
  2151. bool RV;
  2152. Register ResultReg;
  2153. RV = ARMEmitLoad(VT, ResultReg, Src);
  2154. assert(RV && "Should be able to handle this load.");
  2155. RV = ARMEmitStore(VT, ResultReg, Dest);
  2156. assert(RV && "Should be able to handle this store.");
  2157. (void)RV;
  2158. unsigned Size = VT.getSizeInBits()/8;
  2159. Len -= Size;
  2160. Dest.Offset += Size;
  2161. Src.Offset += Size;
  2162. }
  2163. return true;
  2164. }
  2165. bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
  2166. // FIXME: Handle more intrinsics.
  2167. switch (I.getIntrinsicID()) {
  2168. default: return false;
  2169. case Intrinsic::frameaddress: {
  2170. MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
  2171. MFI.setFrameAddressIsTaken(true);
  2172. unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
  2173. const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
  2174. : &ARM::GPRRegClass;
  2175. const ARMBaseRegisterInfo *RegInfo =
  2176. static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());
  2177. Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
  2178. unsigned SrcReg = FramePtr;
  2179. // Recursively load frame address
  2180. // ldr r0 [fp]
  2181. // ldr r0 [r0]
  2182. // ldr r0 [r0]
  2183. // ...
  2184. unsigned DestReg;
  2185. unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
  2186. while (Depth--) {
  2187. DestReg = createResultReg(RC);
  2188. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  2189. TII.get(LdrOpc), DestReg)
  2190. .addReg(SrcReg).addImm(0));
  2191. SrcReg = DestReg;
  2192. }
  2193. updateValueMap(&I, SrcReg);
  2194. return true;
  2195. }
  2196. case Intrinsic::memcpy:
  2197. case Intrinsic::memmove: {
  2198. const MemTransferInst &MTI = cast<MemTransferInst>(I);
  2199. // Don't handle volatile.
  2200. if (MTI.isVolatile())
  2201. return false;
  2202. // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
  2203. // we would emit dead code because we don't currently handle memmoves.
  2204. bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
  2205. if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
  2206. // Small memcpy's are common enough that we want to do them without a call
  2207. // if possible.
  2208. uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
  2209. if (ARMIsMemCpySmall(Len)) {
  2210. Address Dest, Src;
  2211. if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
  2212. !ARMComputeAddress(MTI.getRawSource(), Src))
  2213. return false;
  2214. unsigned Alignment = MinAlign(MTI.getDestAlignment(),
  2215. MTI.getSourceAlignment());
  2216. if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
  2217. return true;
  2218. }
  2219. }
  2220. if (!MTI.getLength()->getType()->isIntegerTy(32))
  2221. return false;
  2222. if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
  2223. return false;
  2224. const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
  2225. return SelectCall(&I, IntrMemName);
  2226. }
  2227. case Intrinsic::memset: {
  2228. const MemSetInst &MSI = cast<MemSetInst>(I);
  2229. // Don't handle volatile.
  2230. if (MSI.isVolatile())
  2231. return false;
  2232. if (!MSI.getLength()->getType()->isIntegerTy(32))
  2233. return false;
  2234. if (MSI.getDestAddressSpace() > 255)
  2235. return false;
  2236. return SelectCall(&I, "memset");
  2237. }
  2238. case Intrinsic::trap: {
  2239. unsigned Opcode;
  2240. if (Subtarget->isThumb())
  2241. Opcode = ARM::tTRAP;
  2242. else
  2243. Opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
  2244. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opcode));
  2245. return true;
  2246. }
  2247. }
  2248. }
  2249. bool ARMFastISel::SelectTrunc(const Instruction *I) {
  2250. // The high bits for a type smaller than the register size are assumed to be
  2251. // undefined.
  2252. Value *Op = I->getOperand(0);
  2253. EVT SrcVT, DestVT;
  2254. SrcVT = TLI.getValueType(DL, Op->getType(), true);
  2255. DestVT = TLI.getValueType(DL, I->getType(), true);
  2256. if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
  2257. return false;
  2258. if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
  2259. return false;
  2260. Register SrcReg = getRegForValue(Op);
  2261. if (!SrcReg) return false;
  2262. // Because the high bits are undefined, a truncate doesn't generate
  2263. // any code.
  2264. updateValueMap(I, SrcReg);
  2265. return true;
  2266. }
  2267. unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
  2268. bool isZExt) {
  2269. if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
  2270. return 0;
  2271. if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
  2272. return 0;
  2273. // Table of which combinations can be emitted as a single instruction,
  2274. // and which will require two.
  2275. static const uint8_t isSingleInstrTbl[3][2][2][2] = {
  2276. // ARM Thumb
  2277. // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops
  2278. // ext: s z s z s z s z
  2279. /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
  2280. /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
  2281. /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
  2282. };
  2283. // Target registers for:
  2284. // - For ARM can never be PC.
  2285. // - For 16-bit Thumb are restricted to lower 8 registers.
  2286. // - For 32-bit Thumb are restricted to non-SP and non-PC.
  2287. static const TargetRegisterClass *RCTbl[2][2] = {
  2288. // Instructions: Two Single
  2289. /* ARM */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
  2290. /* Thumb */ { &ARM::tGPRRegClass, &ARM::rGPRRegClass }
  2291. };
  2292. // Table governing the instruction(s) to be emitted.
  2293. static const struct InstructionTable {
  2294. uint32_t Opc : 16;
  2295. uint32_t hasS : 1; // Some instructions have an S bit, always set it to 0.
  2296. uint32_t Shift : 7; // For shift operand addressing mode, used by MOVsi.
  2297. uint32_t Imm : 8; // All instructions have either a shift or a mask.
  2298. } IT[2][2][3][2] = {
  2299. { // Two instructions (first is left shift, second is in this table).
  2300. { // ARM Opc S Shift Imm
  2301. /* 1 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 31 },
  2302. /* 1 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 31 } },
  2303. /* 8 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 24 },
  2304. /* 8 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 24 } },
  2305. /* 16 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 16 },
  2306. /* 16 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 16 } }
  2307. },
  2308. { // Thumb Opc S Shift Imm
  2309. /* 1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 31 },
  2310. /* 1 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 31 } },
  2311. /* 8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 24 },
  2312. /* 8 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 24 } },
  2313. /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 16 },
  2314. /* 16 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 16 } }
  2315. }
  2316. },
  2317. { // Single instruction.
  2318. { // ARM Opc S Shift Imm
  2319. /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 },
  2320. /* 1 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 1 } },
  2321. /* 8 bit sext */ { { ARM::SXTB , 0, ARM_AM::no_shift, 0 },
  2322. /* 8 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 255 } },
  2323. /* 16 bit sext */ { { ARM::SXTH , 0, ARM_AM::no_shift, 0 },
  2324. /* 16 bit zext */ { ARM::UXTH , 0, ARM_AM::no_shift, 0 } }
  2325. },
  2326. { // Thumb Opc S Shift Imm
  2327. /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 },
  2328. /* 1 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 1 } },
  2329. /* 8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift, 0 },
  2330. /* 8 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } },
  2331. /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift, 0 },
  2332. /* 16 bit zext */ { ARM::t2UXTH , 0, ARM_AM::no_shift, 0 } }
  2333. }
  2334. }
  2335. };
  2336. unsigned SrcBits = SrcVT.getSizeInBits();
  2337. unsigned DestBits = DestVT.getSizeInBits();
  2338. (void) DestBits;
  2339. assert((SrcBits < DestBits) && "can only extend to larger types");
  2340. assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
  2341. "other sizes unimplemented");
  2342. assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
  2343. "other sizes unimplemented");
  2344. bool hasV6Ops = Subtarget->hasV6Ops();
  2345. unsigned Bitness = SrcBits / 8; // {1,8,16}=>{0,1,2}
  2346. assert((Bitness < 3) && "sanity-check table bounds");
  2347. bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
  2348. const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
  2349. const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];
  2350. unsigned Opc = ITP->Opc;
  2351. assert(ARM::KILL != Opc && "Invalid table entry");
  2352. unsigned hasS = ITP->hasS;
  2353. ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;
  2354. assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&
  2355. "only MOVsi has shift operand addressing mode");
  2356. unsigned Imm = ITP->Imm;
  2357. // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
  2358. bool setsCPSR = &ARM::tGPRRegClass == RC;
  2359. unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi;
  2360. unsigned ResultReg;
  2361. // MOVsi encodes shift and immediate in shift operand addressing mode.
  2362. // The following condition has the same value when emitting two
  2363. // instruction sequences: both are shifts.
  2364. bool ImmIsSO = (Shift != ARM_AM::no_shift);
  2365. // Either one or two instructions are emitted.
  2366. // They're always of the form:
  2367. // dst = in OP imm
  2368. // CPSR is set only by 16-bit Thumb instructions.
  2369. // Predicate, if any, is AL.
  2370. // S bit, if available, is always 0.
  2371. // When two are emitted the first's result will feed as the second's input,
  2372. // that value is then dead.
  2373. unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;
  2374. for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {
  2375. ResultReg = createResultReg(RC);
  2376. bool isLsl = (0 == Instr) && !isSingleInstr;
  2377. unsigned Opcode = isLsl ? LSLOpc : Opc;
  2378. ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift;
  2379. unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm;
  2380. bool isKill = 1 == Instr;
  2381. MachineInstrBuilder MIB = BuildMI(
  2382. *FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opcode), ResultReg);
  2383. if (setsCPSR)
  2384. MIB.addReg(ARM::CPSR, RegState::Define);
  2385. SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
  2386. MIB.addReg(SrcReg, isKill * RegState::Kill)
  2387. .addImm(ImmEnc)
  2388. .add(predOps(ARMCC::AL));
  2389. if (hasS)
  2390. MIB.add(condCodeOp());
  2391. // Second instruction consumes the first's result.
  2392. SrcReg = ResultReg;
  2393. }
  2394. return ResultReg;
  2395. }
  2396. bool ARMFastISel::SelectIntExt(const Instruction *I) {
  2397. // On ARM, in general, integer casts don't involve legal types; this code
  2398. // handles promotable integers.
  2399. Type *DestTy = I->getType();
  2400. Value *Src = I->getOperand(0);
  2401. Type *SrcTy = Src->getType();
  2402. bool isZExt = isa<ZExtInst>(I);
  2403. Register SrcReg = getRegForValue(Src);
  2404. if (!SrcReg) return false;
  2405. EVT SrcEVT, DestEVT;
  2406. SrcEVT = TLI.getValueType(DL, SrcTy, true);
  2407. DestEVT = TLI.getValueType(DL, DestTy, true);
  2408. if (!SrcEVT.isSimple()) return false;
  2409. if (!DestEVT.isSimple()) return false;
  2410. MVT SrcVT = SrcEVT.getSimpleVT();
  2411. MVT DestVT = DestEVT.getSimpleVT();
  2412. unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
  2413. if (ResultReg == 0) return false;
  2414. updateValueMap(I, ResultReg);
  2415. return true;
  2416. }
  2417. bool ARMFastISel::SelectShift(const Instruction *I,
  2418. ARM_AM::ShiftOpc ShiftTy) {
  2419. // We handle thumb2 mode by target independent selector
  2420. // or SelectionDAG ISel.
  2421. if (isThumb2)
  2422. return false;
  2423. // Only handle i32 now.
  2424. EVT DestVT = TLI.getValueType(DL, I->getType(), true);
  2425. if (DestVT != MVT::i32)
  2426. return false;
  2427. unsigned Opc = ARM::MOVsr;
  2428. unsigned ShiftImm;
  2429. Value *Src2Value = I->getOperand(1);
  2430. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
  2431. ShiftImm = CI->getZExtValue();
  2432. // Fall back to selection DAG isel if the shift amount
  2433. // is zero or greater than the width of the value type.
  2434. if (ShiftImm == 0 || ShiftImm >=32)
  2435. return false;
  2436. Opc = ARM::MOVsi;
  2437. }
  2438. Value *Src1Value = I->getOperand(0);
  2439. Register Reg1 = getRegForValue(Src1Value);
  2440. if (Reg1 == 0) return false;
  2441. unsigned Reg2 = 0;
  2442. if (Opc == ARM::MOVsr) {
  2443. Reg2 = getRegForValue(Src2Value);
  2444. if (Reg2 == 0) return false;
  2445. }
  2446. Register ResultReg = createResultReg(&ARM::GPRnopcRegClass);
  2447. if(ResultReg == 0) return false;
  2448. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  2449. TII.get(Opc), ResultReg)
  2450. .addReg(Reg1);
  2451. if (Opc == ARM::MOVsi)
  2452. MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
  2453. else if (Opc == ARM::MOVsr) {
  2454. MIB.addReg(Reg2);
  2455. MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
  2456. }
  2457. AddOptionalDefs(MIB);
  2458. updateValueMap(I, ResultReg);
  2459. return true;
  2460. }
  2461. // TODO: SoftFP support.
  2462. bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
  2463. switch (I->getOpcode()) {
  2464. case Instruction::Load:
  2465. return SelectLoad(I);
  2466. case Instruction::Store:
  2467. return SelectStore(I);
  2468. case Instruction::Br:
  2469. return SelectBranch(I);
  2470. case Instruction::IndirectBr:
  2471. return SelectIndirectBr(I);
  2472. case Instruction::ICmp:
  2473. case Instruction::FCmp:
  2474. return SelectCmp(I);
  2475. case Instruction::FPExt:
  2476. return SelectFPExt(I);
  2477. case Instruction::FPTrunc:
  2478. return SelectFPTrunc(I);
  2479. case Instruction::SIToFP:
  2480. return SelectIToFP(I, /*isSigned*/ true);
  2481. case Instruction::UIToFP:
  2482. return SelectIToFP(I, /*isSigned*/ false);
  2483. case Instruction::FPToSI:
  2484. return SelectFPToI(I, /*isSigned*/ true);
  2485. case Instruction::FPToUI:
  2486. return SelectFPToI(I, /*isSigned*/ false);
  2487. case Instruction::Add:
  2488. return SelectBinaryIntOp(I, ISD::ADD);
  2489. case Instruction::Or:
  2490. return SelectBinaryIntOp(I, ISD::OR);
  2491. case Instruction::Sub:
  2492. return SelectBinaryIntOp(I, ISD::SUB);
  2493. case Instruction::FAdd:
  2494. return SelectBinaryFPOp(I, ISD::FADD);
  2495. case Instruction::FSub:
  2496. return SelectBinaryFPOp(I, ISD::FSUB);
  2497. case Instruction::FMul:
  2498. return SelectBinaryFPOp(I, ISD::FMUL);
  2499. case Instruction::SDiv:
  2500. return SelectDiv(I, /*isSigned*/ true);
  2501. case Instruction::UDiv:
  2502. return SelectDiv(I, /*isSigned*/ false);
  2503. case Instruction::SRem:
  2504. return SelectRem(I, /*isSigned*/ true);
  2505. case Instruction::URem:
  2506. return SelectRem(I, /*isSigned*/ false);
  2507. case Instruction::Call:
  2508. if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
  2509. return SelectIntrinsicCall(*II);
  2510. return SelectCall(I);
  2511. case Instruction::Select:
  2512. return SelectSelect(I);
  2513. case Instruction::Ret:
  2514. return SelectRet(I);
  2515. case Instruction::Trunc:
  2516. return SelectTrunc(I);
  2517. case Instruction::ZExt:
  2518. case Instruction::SExt:
  2519. return SelectIntExt(I);
  2520. case Instruction::Shl:
  2521. return SelectShift(I, ARM_AM::lsl);
  2522. case Instruction::LShr:
  2523. return SelectShift(I, ARM_AM::lsr);
  2524. case Instruction::AShr:
  2525. return SelectShift(I, ARM_AM::asr);
  2526. default: break;
  2527. }
  2528. return false;
  2529. }
  2530. // This table describes sign- and zero-extend instructions which can be
  2531. // folded into a preceding load. All of these extends have an immediate
  2532. // (sometimes a mask and sometimes a shift) that's applied after
  2533. // extension.
  2534. static const struct FoldableLoadExtendsStruct {
  2535. uint16_t Opc[2]; // ARM, Thumb.
  2536. uint8_t ExpectedImm;
  2537. uint8_t isZExt : 1;
  2538. uint8_t ExpectedVT : 7;
  2539. } FoldableLoadExtends[] = {
  2540. { { ARM::SXTH, ARM::t2SXTH }, 0, 0, MVT::i16 },
  2541. { { ARM::UXTH, ARM::t2UXTH }, 0, 1, MVT::i16 },
  2542. { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8 },
  2543. { { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 },
  2544. { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 }
  2545. };
  2546. /// The specified machine instr operand is a vreg, and that
  2547. /// vreg is being provided by the specified load instruction. If possible,
  2548. /// try to fold the load as an operand to the instruction, returning true if
  2549. /// successful.
  2550. bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
  2551. const LoadInst *LI) {
  2552. // Verify we have a legal type before going any further.
  2553. MVT VT;
  2554. if (!isLoadTypeLegal(LI->getType(), VT))
  2555. return false;
  2556. // Combine load followed by zero- or sign-extend.
  2557. // ldrb r1, [r0] ldrb r1, [r0]
  2558. // uxtb r2, r1 =>
  2559. // mov r3, r2 mov r3, r1
  2560. if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm())
  2561. return false;
  2562. const uint64_t Imm = MI->getOperand(2).getImm();
  2563. bool Found = false;
  2564. bool isZExt;
  2565. for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) {
  2566. if (FLE.Opc[isThumb2] == MI->getOpcode() &&
  2567. (uint64_t)FLE.ExpectedImm == Imm &&
  2568. MVT((MVT::SimpleValueType)FLE.ExpectedVT) == VT) {
  2569. Found = true;
  2570. isZExt = FLE.isZExt;
  2571. }
  2572. }
  2573. if (!Found) return false;
  2574. // See if we can handle this address.
  2575. Address Addr;
  2576. if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
  2577. Register ResultReg = MI->getOperand(0).getReg();
  2578. if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
  2579. return false;
  2580. MachineBasicBlock::iterator I(MI);
  2581. removeDeadCode(I, std::next(I));
  2582. return true;
  2583. }
  2584. unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, MVT VT) {
  2585. bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
  2586. LLVMContext *Context = &MF->getFunction().getContext();
  2587. unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
  2588. unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
  2589. ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
  2590. GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
  2591. UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
  2592. /*AddCurrentAddress=*/UseGOT_PREL);
  2593. Align ConstAlign =
  2594. MF->getDataLayout().getPrefTypeAlign(Type::getInt32PtrTy(*Context));
  2595. unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
  2596. MachineMemOperand *CPMMO =
  2597. MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
  2598. MachineMemOperand::MOLoad, 4, Align(4));
  2599. Register TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
  2600. unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
  2601. MachineInstrBuilder MIB =
  2602. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg)
  2603. .addConstantPoolIndex(Idx)
  2604. .addMemOperand(CPMMO);
  2605. if (Opc == ARM::LDRcp)
  2606. MIB.addImm(0);
  2607. MIB.add(predOps(ARMCC::AL));
  2608. // Fix the address by adding pc.
  2609. Register DestReg = createResultReg(TLI.getRegClassFor(VT));
  2610. Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR
  2611. : ARM::PICADD;
  2612. DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0);
  2613. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
  2614. .addReg(TempReg)
  2615. .addImm(ARMPCLabelIndex);
  2616. if (!Subtarget->isThumb())
  2617. MIB.add(predOps(ARMCC::AL));
  2618. if (UseGOT_PREL && Subtarget->isThumb()) {
  2619. Register NewDestReg = createResultReg(TLI.getRegClassFor(VT));
  2620. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  2621. TII.get(ARM::t2LDRi12), NewDestReg)
  2622. .addReg(DestReg)
  2623. .addImm(0);
  2624. DestReg = NewDestReg;
  2625. AddOptionalDefs(MIB);
  2626. }
  2627. return DestReg;
  2628. }
  2629. bool ARMFastISel::fastLowerArguments() {
  2630. if (!FuncInfo.CanLowerReturn)
  2631. return false;
  2632. const Function *F = FuncInfo.Fn;
  2633. if (F->isVarArg())
  2634. return false;
  2635. CallingConv::ID CC = F->getCallingConv();
  2636. switch (CC) {
  2637. default:
  2638. return false;
  2639. case CallingConv::Fast:
  2640. case CallingConv::C:
  2641. case CallingConv::ARM_AAPCS_VFP:
  2642. case CallingConv::ARM_AAPCS:
  2643. case CallingConv::ARM_APCS:
  2644. case CallingConv::Swift:
  2645. case CallingConv::SwiftTail:
  2646. break;
  2647. }
  2648. // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
  2649. // which are passed in r0 - r3.
  2650. for (const Argument &Arg : F->args()) {
  2651. if (Arg.getArgNo() >= 4)
  2652. return false;
  2653. if (Arg.hasAttribute(Attribute::InReg) ||
  2654. Arg.hasAttribute(Attribute::StructRet) ||
  2655. Arg.hasAttribute(Attribute::SwiftSelf) ||
  2656. Arg.hasAttribute(Attribute::SwiftError) ||
  2657. Arg.hasAttribute(Attribute::ByVal))
  2658. return false;
  2659. Type *ArgTy = Arg.getType();
  2660. if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
  2661. return false;
  2662. EVT ArgVT = TLI.getValueType(DL, ArgTy);
  2663. if (!ArgVT.isSimple()) return false;
  2664. switch (ArgVT.getSimpleVT().SimpleTy) {
  2665. case MVT::i8:
  2666. case MVT::i16:
  2667. case MVT::i32:
  2668. break;
  2669. default:
  2670. return false;
  2671. }
  2672. }
  2673. static const MCPhysReg GPRArgRegs[] = {
  2674. ARM::R0, ARM::R1, ARM::R2, ARM::R3
  2675. };
  2676. const TargetRegisterClass *RC = &ARM::rGPRRegClass;
  2677. for (const Argument &Arg : F->args()) {
  2678. unsigned ArgNo = Arg.getArgNo();
  2679. unsigned SrcReg = GPRArgRegs[ArgNo];
  2680. Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
  2681. // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
  2682. // Without this, EmitLiveInCopies may eliminate the livein if its only
  2683. // use is a bitcast (which isn't turned into an instruction).
  2684. Register ResultReg = createResultReg(RC);
  2685. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  2686. TII.get(TargetOpcode::COPY),
  2687. ResultReg).addReg(DstReg, getKillRegState(true));
  2688. updateValueMap(&Arg, ResultReg);
  2689. }
  2690. return true;
  2691. }
  2692. namespace llvm {
  2693. FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
  2694. const TargetLibraryInfo *libInfo) {
  2695. if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())
  2696. return new ARMFastISel(funcInfo, libInfo);
  2697. return nullptr;
  2698. }
  2699. } // end namespace llvm