PPCFastISel.cpp 86 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482
  1. //===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the PowerPC-specific support for the FastISel class. Some
  10. // of the target-specific code is generated by tablegen in the file
  11. // PPCGenFastISel.inc, which is #included here.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "MCTargetDesc/PPCPredicates.h"
  15. #include "PPC.h"
  16. #include "PPCCCState.h"
  17. #include "PPCCallingConv.h"
  18. #include "PPCISelLowering.h"
  19. #include "PPCMachineFunctionInfo.h"
  20. #include "PPCSubtarget.h"
  21. #include "PPCTargetMachine.h"
  22. #include "llvm/ADT/Optional.h"
  23. #include "llvm/CodeGen/CallingConvLower.h"
  24. #include "llvm/CodeGen/FastISel.h"
  25. #include "llvm/CodeGen/FunctionLoweringInfo.h"
  26. #include "llvm/CodeGen/MachineConstantPool.h"
  27. #include "llvm/CodeGen/MachineFrameInfo.h"
  28. #include "llvm/CodeGen/MachineInstrBuilder.h"
  29. #include "llvm/CodeGen/MachineRegisterInfo.h"
  30. #include "llvm/CodeGen/TargetLowering.h"
  31. #include "llvm/IR/CallingConv.h"
  32. #include "llvm/IR/GetElementPtrTypeIterator.h"
  33. #include "llvm/IR/GlobalAlias.h"
  34. #include "llvm/IR/GlobalVariable.h"
  35. #include "llvm/IR/IntrinsicInst.h"
  36. #include "llvm/IR/Operator.h"
  37. #include "llvm/Support/Debug.h"
  38. #include "llvm/Target/TargetMachine.h"
  39. //===----------------------------------------------------------------------===//
  40. //
  41. // TBD:
  42. // fastLowerArguments: Handle simple cases.
  43. // PPCMaterializeGV: Handle TLS.
  44. // SelectCall: Handle function pointers.
  45. // SelectCall: Handle multi-register return values.
  46. // SelectCall: Optimize away nops for local calls.
  47. // processCallArgs: Handle bit-converted arguments.
  48. // finishCall: Handle multi-register return values.
  49. // PPCComputeAddress: Handle parameter references as FrameIndex's.
  50. // PPCEmitCmp: Handle immediate as operand 1.
  51. // SelectCall: Handle small byval arguments.
  52. // SelectIntrinsicCall: Implement.
  53. // SelectSelect: Implement.
  54. // Consider factoring isTypeLegal into the base class.
  55. // Implement switches and jump tables.
  56. //
  57. //===----------------------------------------------------------------------===//
  58. using namespace llvm;
  59. #define DEBUG_TYPE "ppcfastisel"
  60. namespace {
  61. typedef struct Address {
  62. enum {
  63. RegBase,
  64. FrameIndexBase
  65. } BaseType;
  66. union {
  67. unsigned Reg;
  68. int FI;
  69. } Base;
  70. long Offset;
  71. // Innocuous defaults for our address.
  72. Address()
  73. : BaseType(RegBase), Offset(0) {
  74. Base.Reg = 0;
  75. }
  76. } Address;
  77. class PPCFastISel final : public FastISel {
  78. const TargetMachine &TM;
  79. const PPCSubtarget *Subtarget;
  80. PPCFunctionInfo *PPCFuncInfo;
  81. const TargetInstrInfo &TII;
  82. const TargetLowering &TLI;
  83. LLVMContext *Context;
  84. public:
  85. explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
  86. const TargetLibraryInfo *LibInfo)
  87. : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
  88. Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
  89. PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
  90. TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
  91. Context(&FuncInfo.Fn->getContext()) {}
  92. // Backend specific FastISel code.
  93. private:
  94. bool fastSelectInstruction(const Instruction *I) override;
  95. unsigned fastMaterializeConstant(const Constant *C) override;
  96. unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
  97. bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
  98. const LoadInst *LI) override;
  99. bool fastLowerArguments() override;
  100. unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
  101. unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
  102. const TargetRegisterClass *RC,
  103. unsigned Op0, bool Op0IsKill,
  104. uint64_t Imm);
  105. unsigned fastEmitInst_r(unsigned MachineInstOpcode,
  106. const TargetRegisterClass *RC,
  107. unsigned Op0, bool Op0IsKill);
  108. unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
  109. const TargetRegisterClass *RC,
  110. unsigned Op0, bool Op0IsKill,
  111. unsigned Op1, bool Op1IsKill);
  112. bool fastLowerCall(CallLoweringInfo &CLI) override;
  113. // Instruction selection routines.
  114. private:
  115. bool SelectLoad(const Instruction *I);
  116. bool SelectStore(const Instruction *I);
  117. bool SelectBranch(const Instruction *I);
  118. bool SelectIndirectBr(const Instruction *I);
  119. bool SelectFPExt(const Instruction *I);
  120. bool SelectFPTrunc(const Instruction *I);
  121. bool SelectIToFP(const Instruction *I, bool IsSigned);
  122. bool SelectFPToI(const Instruction *I, bool IsSigned);
  123. bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
  124. bool SelectRet(const Instruction *I);
  125. bool SelectTrunc(const Instruction *I);
  126. bool SelectIntExt(const Instruction *I);
  127. // Utility routines.
  128. private:
  129. bool isTypeLegal(Type *Ty, MVT &VT);
  130. bool isLoadTypeLegal(Type *Ty, MVT &VT);
  131. bool isValueAvailable(const Value *V) const;
  132. bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
  133. return RC->getID() == PPC::VSFRCRegClassID;
  134. }
  135. bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
  136. return RC->getID() == PPC::VSSRCRegClassID;
  137. }
  138. unsigned copyRegToRegClass(const TargetRegisterClass *ToRC,
  139. unsigned SrcReg, unsigned Flag = 0,
  140. unsigned SubReg = 0) {
  141. unsigned TmpReg = createResultReg(ToRC);
  142. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  143. TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
  144. return TmpReg;
  145. }
  146. bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
  147. bool isZExt, unsigned DestReg,
  148. const PPC::Predicate Pred);
  149. bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
  150. const TargetRegisterClass *RC, bool IsZExt = true,
  151. unsigned FP64LoadOpc = PPC::LFD);
  152. bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
  153. bool PPCComputeAddress(const Value *Obj, Address &Addr);
  154. void PPCSimplifyAddress(Address &Addr, bool &UseOffset,
  155. unsigned &IndexReg);
  156. bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
  157. unsigned DestReg, bool IsZExt);
  158. unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
  159. unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
  160. unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
  161. bool UseSExt = true);
  162. unsigned PPCMaterialize32BitInt(int64_t Imm,
  163. const TargetRegisterClass *RC);
  164. unsigned PPCMaterialize64BitInt(int64_t Imm,
  165. const TargetRegisterClass *RC);
  166. unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
  167. unsigned SrcReg, bool IsSigned);
  168. unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
  169. // Call handling routines.
  170. private:
  171. bool processCallArgs(SmallVectorImpl<Value*> &Args,
  172. SmallVectorImpl<unsigned> &ArgRegs,
  173. SmallVectorImpl<MVT> &ArgVTs,
  174. SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
  175. SmallVectorImpl<unsigned> &RegArgs,
  176. CallingConv::ID CC,
  177. unsigned &NumBytes,
  178. bool IsVarArg);
  179. bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
  180. private:
  181. #include "PPCGenFastISel.inc"
  182. };
  183. } // end anonymous namespace
  184. static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
  185. switch (Pred) {
  186. // These are not representable with any single compare.
  187. case CmpInst::FCMP_FALSE:
  188. case CmpInst::FCMP_TRUE:
  189. // Major concern about the following 6 cases is NaN result. The comparison
  190. // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
  191. // only one of which will be set. The result is generated by fcmpu
  192. // instruction. However, bc instruction only inspects one of the first 3
  193. // bits, so when un is set, bc instruction may jump to an undesired
  194. // place.
  195. //
  196. // More specifically, if we expect an unordered comparison and un is set, we
  197. // expect to always go to true branch; in such case UEQ, UGT and ULT still
  198. // give false, which are undesired; but UNE, UGE, ULE happen to give true,
  199. // since they are tested by inspecting !eq, !lt, !gt, respectively.
  200. //
  201. // Similarly, for ordered comparison, when un is set, we always expect the
  202. // result to be false. In such case OGT, OLT and OEQ is good, since they are
  203. // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
  204. // and ONE are tested through !lt, !gt and !eq, and these are true.
  205. case CmpInst::FCMP_UEQ:
  206. case CmpInst::FCMP_UGT:
  207. case CmpInst::FCMP_ULT:
  208. case CmpInst::FCMP_OGE:
  209. case CmpInst::FCMP_OLE:
  210. case CmpInst::FCMP_ONE:
  211. default:
  212. return Optional<PPC::Predicate>();
  213. case CmpInst::FCMP_OEQ:
  214. case CmpInst::ICMP_EQ:
  215. return PPC::PRED_EQ;
  216. case CmpInst::FCMP_OGT:
  217. case CmpInst::ICMP_UGT:
  218. case CmpInst::ICMP_SGT:
  219. return PPC::PRED_GT;
  220. case CmpInst::FCMP_UGE:
  221. case CmpInst::ICMP_UGE:
  222. case CmpInst::ICMP_SGE:
  223. return PPC::PRED_GE;
  224. case CmpInst::FCMP_OLT:
  225. case CmpInst::ICMP_ULT:
  226. case CmpInst::ICMP_SLT:
  227. return PPC::PRED_LT;
  228. case CmpInst::FCMP_ULE:
  229. case CmpInst::ICMP_ULE:
  230. case CmpInst::ICMP_SLE:
  231. return PPC::PRED_LE;
  232. case CmpInst::FCMP_UNE:
  233. case CmpInst::ICMP_NE:
  234. return PPC::PRED_NE;
  235. case CmpInst::FCMP_ORD:
  236. return PPC::PRED_NU;
  237. case CmpInst::FCMP_UNO:
  238. return PPC::PRED_UN;
  239. }
  240. }
  241. // Determine whether the type Ty is simple enough to be handled by
  242. // fast-isel, and return its equivalent machine type in VT.
  243. // FIXME: Copied directly from ARM -- factor into base class?
  244. bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
  245. EVT Evt = TLI.getValueType(DL, Ty, true);
  246. // Only handle simple types.
  247. if (Evt == MVT::Other || !Evt.isSimple()) return false;
  248. VT = Evt.getSimpleVT();
  249. // Handle all legal types, i.e. a register that will directly hold this
  250. // value.
  251. return TLI.isTypeLegal(VT);
  252. }
  253. // Determine whether the type Ty is simple enough to be handled by
  254. // fast-isel as a load target, and return its equivalent machine type in VT.
  255. bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
  256. if (isTypeLegal(Ty, VT)) return true;
  257. // If this is a type than can be sign or zero-extended to a basic operation
  258. // go ahead and accept it now.
  259. if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
  260. return true;
  261. }
  262. return false;
  263. }
  264. bool PPCFastISel::isValueAvailable(const Value *V) const {
  265. if (!isa<Instruction>(V))
  266. return true;
  267. const auto *I = cast<Instruction>(V);
  268. return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
  269. }
  270. // Given a value Obj, create an Address object Addr that represents its
  271. // address. Return false if we can't handle it.
  272. bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
  273. const User *U = nullptr;
  274. unsigned Opcode = Instruction::UserOp1;
  275. if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
  276. // Don't walk into other basic blocks unless the object is an alloca from
  277. // another block, otherwise it may not have a virtual register assigned.
  278. if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
  279. FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
  280. Opcode = I->getOpcode();
  281. U = I;
  282. }
  283. } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
  284. Opcode = C->getOpcode();
  285. U = C;
  286. }
  287. switch (Opcode) {
  288. default:
  289. break;
  290. case Instruction::BitCast:
  291. // Look through bitcasts.
  292. return PPCComputeAddress(U->getOperand(0), Addr);
  293. case Instruction::IntToPtr:
  294. // Look past no-op inttoptrs.
  295. if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
  296. TLI.getPointerTy(DL))
  297. return PPCComputeAddress(U->getOperand(0), Addr);
  298. break;
  299. case Instruction::PtrToInt:
  300. // Look past no-op ptrtoints.
  301. if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
  302. return PPCComputeAddress(U->getOperand(0), Addr);
  303. break;
  304. case Instruction::GetElementPtr: {
  305. Address SavedAddr = Addr;
  306. long TmpOffset = Addr.Offset;
  307. // Iterate through the GEP folding the constants into offsets where
  308. // we can.
  309. gep_type_iterator GTI = gep_type_begin(U);
  310. for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
  311. II != IE; ++II, ++GTI) {
  312. const Value *Op = *II;
  313. if (StructType *STy = GTI.getStructTypeOrNull()) {
  314. const StructLayout *SL = DL.getStructLayout(STy);
  315. unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
  316. TmpOffset += SL->getElementOffset(Idx);
  317. } else {
  318. uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
  319. for (;;) {
  320. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
  321. // Constant-offset addressing.
  322. TmpOffset += CI->getSExtValue() * S;
  323. break;
  324. }
  325. if (canFoldAddIntoGEP(U, Op)) {
  326. // A compatible add with a constant operand. Fold the constant.
  327. ConstantInt *CI =
  328. cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
  329. TmpOffset += CI->getSExtValue() * S;
  330. // Iterate on the other operand.
  331. Op = cast<AddOperator>(Op)->getOperand(0);
  332. continue;
  333. }
  334. // Unsupported
  335. goto unsupported_gep;
  336. }
  337. }
  338. }
  339. // Try to grab the base operand now.
  340. Addr.Offset = TmpOffset;
  341. if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
  342. // We failed, restore everything and try the other options.
  343. Addr = SavedAddr;
  344. unsupported_gep:
  345. break;
  346. }
  347. case Instruction::Alloca: {
  348. const AllocaInst *AI = cast<AllocaInst>(Obj);
  349. DenseMap<const AllocaInst*, int>::iterator SI =
  350. FuncInfo.StaticAllocaMap.find(AI);
  351. if (SI != FuncInfo.StaticAllocaMap.end()) {
  352. Addr.BaseType = Address::FrameIndexBase;
  353. Addr.Base.FI = SI->second;
  354. return true;
  355. }
  356. break;
  357. }
  358. }
  359. // FIXME: References to parameters fall through to the behavior
  360. // below. They should be able to reference a frame index since
  361. // they are stored to the stack, so we can get "ld rx, offset(r1)"
  362. // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
  363. // just contain the parameter. Try to handle this with a FI.
  364. // Try to get this in a register if nothing else has worked.
  365. if (Addr.Base.Reg == 0)
  366. Addr.Base.Reg = getRegForValue(Obj);
  367. // Prevent assignment of base register to X0, which is inappropriate
  368. // for loads and stores alike.
  369. if (Addr.Base.Reg != 0)
  370. MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
  371. return Addr.Base.Reg != 0;
  372. }
  373. // Fix up some addresses that can't be used directly. For example, if
  374. // an offset won't fit in an instruction field, we may need to move it
  375. // into an index register.
  376. void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
  377. unsigned &IndexReg) {
  378. // Check whether the offset fits in the instruction field.
  379. if (!isInt<16>(Addr.Offset))
  380. UseOffset = false;
  381. // If this is a stack pointer and the offset needs to be simplified then
  382. // put the alloca address into a register, set the base type back to
  383. // register and continue. This should almost never happen.
  384. if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
  385. unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
  386. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
  387. ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
  388. Addr.Base.Reg = ResultReg;
  389. Addr.BaseType = Address::RegBase;
  390. }
  391. if (!UseOffset) {
  392. IntegerType *OffsetTy = Type::getInt64Ty(*Context);
  393. const ConstantInt *Offset =
  394. ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
  395. IndexReg = PPCMaterializeInt(Offset, MVT::i64);
  396. assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
  397. }
  398. }
  399. // Emit a load instruction if possible, returning true if we succeeded,
  400. // otherwise false. See commentary below for how the register class of
  401. // the load is determined.
  402. bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
  403. const TargetRegisterClass *RC,
  404. bool IsZExt, unsigned FP64LoadOpc) {
  405. unsigned Opc;
  406. bool UseOffset = true;
  407. bool HasSPE = Subtarget->hasSPE();
  408. // If ResultReg is given, it determines the register class of the load.
  409. // Otherwise, RC is the register class to use. If the result of the
  410. // load isn't anticipated in this block, both may be zero, in which
  411. // case we must make a conservative guess. In particular, don't assign
  412. // R0 or X0 to the result register, as the result may be used in a load,
  413. // store, add-immediate, or isel that won't permit this. (Though
  414. // perhaps the spill and reload of live-exit values would handle this?)
  415. const TargetRegisterClass *UseRC =
  416. (ResultReg ? MRI.getRegClass(ResultReg) :
  417. (RC ? RC :
  418. (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
  419. (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
  420. (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
  421. &PPC::GPRC_and_GPRC_NOR0RegClass)))));
  422. bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
  423. switch (VT.SimpleTy) {
  424. default: // e.g., vector types not handled
  425. return false;
  426. case MVT::i8:
  427. Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
  428. break;
  429. case MVT::i16:
  430. Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
  431. : (Is32BitInt ? PPC::LHA : PPC::LHA8));
  432. break;
  433. case MVT::i32:
  434. Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
  435. : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
  436. if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
  437. UseOffset = false;
  438. break;
  439. case MVT::i64:
  440. Opc = PPC::LD;
  441. assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
  442. "64-bit load with 32-bit target??");
  443. UseOffset = ((Addr.Offset & 3) == 0);
  444. break;
  445. case MVT::f32:
  446. Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
  447. break;
  448. case MVT::f64:
  449. Opc = FP64LoadOpc;
  450. break;
  451. }
  452. // If necessary, materialize the offset into a register and use
  453. // the indexed form. Also handle stack pointers with special needs.
  454. unsigned IndexReg = 0;
  455. PPCSimplifyAddress(Addr, UseOffset, IndexReg);
  456. // If this is a potential VSX load with an offset of 0, a VSX indexed load can
  457. // be used.
  458. bool IsVSSRC = isVSSRCRegClass(UseRC);
  459. bool IsVSFRC = isVSFRCRegClass(UseRC);
  460. bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
  461. bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
  462. if ((Is32VSXLoad || Is64VSXLoad) &&
  463. (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
  464. (Addr.Offset == 0)) {
  465. UseOffset = false;
  466. }
  467. if (ResultReg == 0)
  468. ResultReg = createResultReg(UseRC);
  469. // Note: If we still have a frame index here, we know the offset is
  470. // in range, as otherwise PPCSimplifyAddress would have converted it
  471. // into a RegBase.
  472. if (Addr.BaseType == Address::FrameIndexBase) {
  473. // VSX only provides an indexed load.
  474. if (Is32VSXLoad || Is64VSXLoad) return false;
  475. MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
  476. MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
  477. Addr.Offset),
  478. MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
  479. MFI.getObjectAlign(Addr.Base.FI));
  480. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
  481. .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
  482. // Base reg with offset in range.
  483. } else if (UseOffset) {
  484. // VSX only provides an indexed load.
  485. if (Is32VSXLoad || Is64VSXLoad) return false;
  486. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
  487. .addImm(Addr.Offset).addReg(Addr.Base.Reg);
  488. // Indexed form.
  489. } else {
  490. // Get the RR opcode corresponding to the RI one. FIXME: It would be
  491. // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
  492. // is hard to get at.
  493. switch (Opc) {
  494. default: llvm_unreachable("Unexpected opcode!");
  495. case PPC::LBZ: Opc = PPC::LBZX; break;
  496. case PPC::LBZ8: Opc = PPC::LBZX8; break;
  497. case PPC::LHZ: Opc = PPC::LHZX; break;
  498. case PPC::LHZ8: Opc = PPC::LHZX8; break;
  499. case PPC::LHA: Opc = PPC::LHAX; break;
  500. case PPC::LHA8: Opc = PPC::LHAX8; break;
  501. case PPC::LWZ: Opc = PPC::LWZX; break;
  502. case PPC::LWZ8: Opc = PPC::LWZX8; break;
  503. case PPC::LWA: Opc = PPC::LWAX; break;
  504. case PPC::LWA_32: Opc = PPC::LWAX_32; break;
  505. case PPC::LD: Opc = PPC::LDX; break;
  506. case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
  507. case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
  508. case PPC::EVLDD: Opc = PPC::EVLDDX; break;
  509. case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
  510. }
  511. auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
  512. ResultReg);
  513. // If we have an index register defined we use it in the store inst,
  514. // otherwise we use X0 as base as it makes the vector instructions to
  515. // use zero in the computation of the effective address regardless the
  516. // content of the register.
  517. if (IndexReg)
  518. MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
  519. else
  520. MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
  521. }
  522. return true;
  523. }
  524. // Attempt to fast-select a load instruction.
  525. bool PPCFastISel::SelectLoad(const Instruction *I) {
  526. // FIXME: No atomic loads are supported.
  527. if (cast<LoadInst>(I)->isAtomic())
  528. return false;
  529. // Verify we have a legal type before going any further.
  530. MVT VT;
  531. if (!isLoadTypeLegal(I->getType(), VT))
  532. return false;
  533. // See if we can handle this address.
  534. Address Addr;
  535. if (!PPCComputeAddress(I->getOperand(0), Addr))
  536. return false;
  537. // Look at the currently assigned register for this instruction
  538. // to determine the required register class. This is necessary
  539. // to constrain RA from using R0/X0 when this is not legal.
  540. unsigned AssignedReg = FuncInfo.ValueMap[I];
  541. const TargetRegisterClass *RC =
  542. AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
  543. Register ResultReg = 0;
  544. if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
  545. Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
  546. return false;
  547. updateValueMap(I, ResultReg);
  548. return true;
  549. }
  550. // Emit a store instruction to store SrcReg at Addr.
  551. bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
  552. assert(SrcReg && "Nothing to store!");
  553. unsigned Opc;
  554. bool UseOffset = true;
  555. const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
  556. bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
  557. switch (VT.SimpleTy) {
  558. default: // e.g., vector types not handled
  559. return false;
  560. case MVT::i8:
  561. Opc = Is32BitInt ? PPC::STB : PPC::STB8;
  562. break;
  563. case MVT::i16:
  564. Opc = Is32BitInt ? PPC::STH : PPC::STH8;
  565. break;
  566. case MVT::i32:
  567. assert(Is32BitInt && "Not GPRC for i32??");
  568. Opc = PPC::STW;
  569. break;
  570. case MVT::i64:
  571. Opc = PPC::STD;
  572. UseOffset = ((Addr.Offset & 3) == 0);
  573. break;
  574. case MVT::f32:
  575. Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
  576. break;
  577. case MVT::f64:
  578. Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
  579. break;
  580. }
  581. // If necessary, materialize the offset into a register and use
  582. // the indexed form. Also handle stack pointers with special needs.
  583. unsigned IndexReg = 0;
  584. PPCSimplifyAddress(Addr, UseOffset, IndexReg);
  585. // If this is a potential VSX store with an offset of 0, a VSX indexed store
  586. // can be used.
  587. bool IsVSSRC = isVSSRCRegClass(RC);
  588. bool IsVSFRC = isVSFRCRegClass(RC);
  589. bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
  590. bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
  591. if ((Is32VSXStore || Is64VSXStore) &&
  592. (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
  593. (Addr.Offset == 0)) {
  594. UseOffset = false;
  595. }
  596. // Note: If we still have a frame index here, we know the offset is
  597. // in range, as otherwise PPCSimplifyAddress would have converted it
  598. // into a RegBase.
  599. if (Addr.BaseType == Address::FrameIndexBase) {
  600. // VSX only provides an indexed store.
  601. if (Is32VSXStore || Is64VSXStore) return false;
  602. MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
  603. MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
  604. Addr.Offset),
  605. MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
  606. MFI.getObjectAlign(Addr.Base.FI));
  607. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
  608. .addReg(SrcReg)
  609. .addImm(Addr.Offset)
  610. .addFrameIndex(Addr.Base.FI)
  611. .addMemOperand(MMO);
  612. // Base reg with offset in range.
  613. } else if (UseOffset) {
  614. // VSX only provides an indexed store.
  615. if (Is32VSXStore || Is64VSXStore)
  616. return false;
  617. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
  618. .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
  619. // Indexed form.
  620. } else {
  621. // Get the RR opcode corresponding to the RI one. FIXME: It would be
  622. // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
  623. // is hard to get at.
  624. switch (Opc) {
  625. default: llvm_unreachable("Unexpected opcode!");
  626. case PPC::STB: Opc = PPC::STBX; break;
  627. case PPC::STH : Opc = PPC::STHX; break;
  628. case PPC::STW : Opc = PPC::STWX; break;
  629. case PPC::STB8: Opc = PPC::STBX8; break;
  630. case PPC::STH8: Opc = PPC::STHX8; break;
  631. case PPC::STW8: Opc = PPC::STWX8; break;
  632. case PPC::STD: Opc = PPC::STDX; break;
  633. case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
  634. case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
  635. case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
  636. case PPC::SPESTW: Opc = PPC::SPESTWX; break;
  637. }
  638. auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
  639. .addReg(SrcReg);
  640. // If we have an index register defined we use it in the store inst,
  641. // otherwise we use X0 as base as it makes the vector instructions to
  642. // use zero in the computation of the effective address regardless the
  643. // content of the register.
  644. if (IndexReg)
  645. MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
  646. else
  647. MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
  648. }
  649. return true;
  650. }
  651. // Attempt to fast-select a store instruction.
  652. bool PPCFastISel::SelectStore(const Instruction *I) {
  653. Value *Op0 = I->getOperand(0);
  654. unsigned SrcReg = 0;
  655. // FIXME: No atomics loads are supported.
  656. if (cast<StoreInst>(I)->isAtomic())
  657. return false;
  658. // Verify we have a legal type before going any further.
  659. MVT VT;
  660. if (!isLoadTypeLegal(Op0->getType(), VT))
  661. return false;
  662. // Get the value to be stored into a register.
  663. SrcReg = getRegForValue(Op0);
  664. if (SrcReg == 0)
  665. return false;
  666. // See if we can handle this address.
  667. Address Addr;
  668. if (!PPCComputeAddress(I->getOperand(1), Addr))
  669. return false;
  670. if (!PPCEmitStore(VT, SrcReg, Addr))
  671. return false;
  672. return true;
  673. }
  674. // Attempt to fast-select a branch instruction.
  675. bool PPCFastISel::SelectBranch(const Instruction *I) {
  676. const BranchInst *BI = cast<BranchInst>(I);
  677. MachineBasicBlock *BrBB = FuncInfo.MBB;
  678. MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
  679. MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
  680. // For now, just try the simplest case where it's fed by a compare.
  681. if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
  682. if (isValueAvailable(CI)) {
  683. Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
  684. if (!OptPPCPred)
  685. return false;
  686. PPC::Predicate PPCPred = OptPPCPred.getValue();
  687. // Take advantage of fall-through opportunities.
  688. if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
  689. std::swap(TBB, FBB);
  690. PPCPred = PPC::InvertPredicate(PPCPred);
  691. }
  692. unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
  693. if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
  694. CondReg, PPCPred))
  695. return false;
  696. BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
  697. .addImm(Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
  698. .addReg(CondReg)
  699. .addMBB(TBB);
  700. finishCondBranch(BI->getParent(), TBB, FBB);
  701. return true;
  702. }
  703. } else if (const ConstantInt *CI =
  704. dyn_cast<ConstantInt>(BI->getCondition())) {
  705. uint64_t Imm = CI->getZExtValue();
  706. MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
  707. fastEmitBranch(Target, DbgLoc);
  708. return true;
  709. }
  710. // FIXME: ARM looks for a case where the block containing the compare
  711. // has been split from the block containing the branch. If this happens,
  712. // there is a vreg available containing the result of the compare. I'm
  713. // not sure we can do much, as we've lost the predicate information with
  714. // the compare instruction -- we have a 4-bit CR but don't know which bit
  715. // to test here.
  716. return false;
  717. }
  718. // Attempt to emit a compare of the two source values. Signed and unsigned
  719. // comparisons are supported. Return false if we can't handle it.
  720. bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
  721. bool IsZExt, unsigned DestReg,
  722. const PPC::Predicate Pred) {
  723. Type *Ty = SrcValue1->getType();
  724. EVT SrcEVT = TLI.getValueType(DL, Ty, true);
  725. if (!SrcEVT.isSimple())
  726. return false;
  727. MVT SrcVT = SrcEVT.getSimpleVT();
  728. if (SrcVT == MVT::i1 && Subtarget->useCRBits())
  729. return false;
  730. // See if operand 2 is an immediate encodeable in the compare.
  731. // FIXME: Operands are not in canonical order at -O0, so an immediate
  732. // operand in position 1 is a lost opportunity for now. We are
  733. // similar to ARM in this regard.
  734. long Imm = 0;
  735. bool UseImm = false;
  736. const bool HasSPE = Subtarget->hasSPE();
  737. // Only 16-bit integer constants can be represented in compares for
  738. // PowerPC. Others will be materialized into a register.
  739. if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
  740. if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
  741. SrcVT == MVT::i8 || SrcVT == MVT::i1) {
  742. const APInt &CIVal = ConstInt->getValue();
  743. Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
  744. if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
  745. UseImm = true;
  746. }
  747. }
  748. unsigned SrcReg1 = getRegForValue(SrcValue1);
  749. if (SrcReg1 == 0)
  750. return false;
  751. unsigned SrcReg2 = 0;
  752. if (!UseImm) {
  753. SrcReg2 = getRegForValue(SrcValue2);
  754. if (SrcReg2 == 0)
  755. return false;
  756. }
  757. unsigned CmpOpc;
  758. bool NeedsExt = false;
  759. auto RC1 = MRI.getRegClass(SrcReg1);
  760. auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
  761. switch (SrcVT.SimpleTy) {
  762. default: return false;
  763. case MVT::f32:
  764. if (HasSPE) {
  765. switch (Pred) {
  766. default: return false;
  767. case PPC::PRED_EQ:
  768. CmpOpc = PPC::EFSCMPEQ;
  769. break;
  770. case PPC::PRED_LT:
  771. CmpOpc = PPC::EFSCMPLT;
  772. break;
  773. case PPC::PRED_GT:
  774. CmpOpc = PPC::EFSCMPGT;
  775. break;
  776. }
  777. } else {
  778. CmpOpc = PPC::FCMPUS;
  779. if (isVSSRCRegClass(RC1))
  780. SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
  781. if (RC2 && isVSSRCRegClass(RC2))
  782. SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
  783. }
  784. break;
  785. case MVT::f64:
  786. if (HasSPE) {
  787. switch (Pred) {
  788. default: return false;
  789. case PPC::PRED_EQ:
  790. CmpOpc = PPC::EFDCMPEQ;
  791. break;
  792. case PPC::PRED_LT:
  793. CmpOpc = PPC::EFDCMPLT;
  794. break;
  795. case PPC::PRED_GT:
  796. CmpOpc = PPC::EFDCMPGT;
  797. break;
  798. }
  799. } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
  800. CmpOpc = PPC::XSCMPUDP;
  801. } else {
  802. CmpOpc = PPC::FCMPUD;
  803. }
  804. break;
  805. case MVT::i1:
  806. case MVT::i8:
  807. case MVT::i16:
  808. NeedsExt = true;
  809. LLVM_FALLTHROUGH;
  810. case MVT::i32:
  811. if (!UseImm)
  812. CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
  813. else
  814. CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
  815. break;
  816. case MVT::i64:
  817. if (!UseImm)
  818. CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
  819. else
  820. CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
  821. break;
  822. }
  823. if (NeedsExt) {
  824. unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
  825. if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
  826. return false;
  827. SrcReg1 = ExtReg;
  828. if (!UseImm) {
  829. unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
  830. if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
  831. return false;
  832. SrcReg2 = ExtReg;
  833. }
  834. }
  835. if (!UseImm)
  836. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
  837. .addReg(SrcReg1).addReg(SrcReg2);
  838. else
  839. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
  840. .addReg(SrcReg1).addImm(Imm);
  841. return true;
  842. }
  843. // Attempt to fast-select a floating-point extend instruction.
  844. bool PPCFastISel::SelectFPExt(const Instruction *I) {
  845. Value *Src = I->getOperand(0);
  846. EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
  847. EVT DestVT = TLI.getValueType(DL, I->getType(), true);
  848. if (SrcVT != MVT::f32 || DestVT != MVT::f64)
  849. return false;
  850. unsigned SrcReg = getRegForValue(Src);
  851. if (!SrcReg)
  852. return false;
  853. // No code is generated for a FP extend.
  854. updateValueMap(I, SrcReg);
  855. return true;
  856. }
  857. // Attempt to fast-select a floating-point truncate instruction.
  858. bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
  859. Value *Src = I->getOperand(0);
  860. EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
  861. EVT DestVT = TLI.getValueType(DL, I->getType(), true);
  862. if (SrcVT != MVT::f64 || DestVT != MVT::f32)
  863. return false;
  864. unsigned SrcReg = getRegForValue(Src);
  865. if (!SrcReg)
  866. return false;
  867. // Round the result to single precision.
  868. unsigned DestReg;
  869. auto RC = MRI.getRegClass(SrcReg);
  870. if (Subtarget->hasSPE()) {
  871. DestReg = createResultReg(&PPC::GPRCRegClass);
  872. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  873. TII.get(PPC::EFSCFD), DestReg)
  874. .addReg(SrcReg);
  875. } else if (isVSFRCRegClass(RC)) {
  876. DestReg = createResultReg(&PPC::VSSRCRegClass);
  877. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  878. TII.get(PPC::XSRSP), DestReg)
  879. .addReg(SrcReg);
  880. } else {
  881. DestReg = createResultReg(&PPC::F4RCRegClass);
  882. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  883. TII.get(PPC::FRSP), DestReg)
  884. .addReg(SrcReg);
  885. }
  886. updateValueMap(I, DestReg);
  887. return true;
  888. }
  889. // Move an i32 or i64 value in a GPR to an f64 value in an FPR.
  890. // FIXME: When direct register moves are implemented (see PowerISA 2.07),
  891. // those should be used instead of moving via a stack slot when the
  892. // subtarget permits.
  893. // FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
  894. // stack slot and 4-byte store/load sequence. Or just sext the 4-byte
  895. // case to 8 bytes which produces tighter code but wastes stack space.
  896. unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
  897. bool IsSigned) {
  898. // If necessary, extend 32-bit int to 64-bit.
  899. if (SrcVT == MVT::i32) {
  900. unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
  901. if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
  902. return 0;
  903. SrcReg = TmpReg;
  904. }
  905. // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
  906. Address Addr;
  907. Addr.BaseType = Address::FrameIndexBase;
  908. Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
  909. // Store the value from the GPR.
  910. if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
  911. return 0;
  912. // Load the integer value into an FPR. The kind of load used depends
  913. // on a number of conditions.
  914. unsigned LoadOpc = PPC::LFD;
  915. if (SrcVT == MVT::i32) {
  916. if (!IsSigned) {
  917. LoadOpc = PPC::LFIWZX;
  918. Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
  919. } else if (Subtarget->hasLFIWAX()) {
  920. LoadOpc = PPC::LFIWAX;
  921. Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
  922. }
  923. }
  924. const TargetRegisterClass *RC = &PPC::F8RCRegClass;
  925. Register ResultReg = 0;
  926. if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
  927. return 0;
  928. return ResultReg;
  929. }
  930. // Attempt to fast-select an integer-to-floating-point conversion.
  931. // FIXME: Once fast-isel has better support for VSX, conversions using
  932. // direct moves should be implemented.
  933. bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
  934. MVT DstVT;
  935. Type *DstTy = I->getType();
  936. if (!isTypeLegal(DstTy, DstVT))
  937. return false;
  938. if (DstVT != MVT::f32 && DstVT != MVT::f64)
  939. return false;
  940. Value *Src = I->getOperand(0);
  941. EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
  942. if (!SrcEVT.isSimple())
  943. return false;
  944. MVT SrcVT = SrcEVT.getSimpleVT();
  945. if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
  946. SrcVT != MVT::i32 && SrcVT != MVT::i64)
  947. return false;
  948. unsigned SrcReg = getRegForValue(Src);
  949. if (SrcReg == 0)
  950. return false;
  951. // Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
  952. if (Subtarget->hasSPE()) {
  953. unsigned Opc;
  954. if (DstVT == MVT::f32)
  955. Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
  956. else
  957. Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
  958. unsigned DestReg = createResultReg(&PPC::SPERCRegClass);
  959. // Generate the convert.
  960. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
  961. .addReg(SrcReg);
  962. updateValueMap(I, DestReg);
  963. return true;
  964. }
  965. // We can only lower an unsigned convert if we have the newer
  966. // floating-point conversion operations.
  967. if (!IsSigned && !Subtarget->hasFPCVT())
  968. return false;
  969. // FIXME: For now we require the newer floating-point conversion operations
  970. // (which are present only on P7 and A2 server models) when converting
  971. // to single-precision float. Otherwise we have to generate a lot of
  972. // fiddly code to avoid double rounding. If necessary, the fiddly code
  973. // can be found in PPCTargetLowering::LowerINT_TO_FP().
  974. if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
  975. return false;
  976. // Extend the input if necessary.
  977. if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
  978. unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
  979. if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
  980. return false;
  981. SrcVT = MVT::i64;
  982. SrcReg = TmpReg;
  983. }
  984. // Move the integer value to an FPR.
  985. unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
  986. if (FPReg == 0)
  987. return false;
  988. // Determine the opcode for the conversion.
  989. const TargetRegisterClass *RC = &PPC::F8RCRegClass;
  990. unsigned DestReg = createResultReg(RC);
  991. unsigned Opc;
  992. if (DstVT == MVT::f32)
  993. Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
  994. else
  995. Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
  996. // Generate the convert.
  997. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
  998. .addReg(FPReg);
  999. updateValueMap(I, DestReg);
  1000. return true;
  1001. }
  1002. // Move the floating-point value in SrcReg into an integer destination
  1003. // register, and return the register (or zero if we can't handle it).
  1004. // FIXME: When direct register moves are implemented (see PowerISA 2.07),
  1005. // those should be used instead of moving via a stack slot when the
  1006. // subtarget permits.
  1007. unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
  1008. unsigned SrcReg, bool IsSigned) {
  1009. // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
  1010. // Note that if have STFIWX available, we could use a 4-byte stack
  1011. // slot for i32, but this being fast-isel we'll just go with the
  1012. // easiest code gen possible.
  1013. Address Addr;
  1014. Addr.BaseType = Address::FrameIndexBase;
  1015. Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
  1016. // Store the value from the FPR.
  1017. if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
  1018. return 0;
  1019. // Reload it into a GPR. If we want an i32 on big endian, modify the
  1020. // address to have a 4-byte offset so we load from the right place.
  1021. if (VT == MVT::i32)
  1022. Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
  1023. // Look at the currently assigned register for this instruction
  1024. // to determine the required register class.
  1025. unsigned AssignedReg = FuncInfo.ValueMap[I];
  1026. const TargetRegisterClass *RC =
  1027. AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
  1028. Register ResultReg = 0;
  1029. if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
  1030. return 0;
  1031. return ResultReg;
  1032. }
  1033. // Attempt to fast-select a floating-point-to-integer conversion.
  1034. // FIXME: Once fast-isel has better support for VSX, conversions using
  1035. // direct moves should be implemented.
  1036. bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
  1037. MVT DstVT, SrcVT;
  1038. Type *DstTy = I->getType();
  1039. if (!isTypeLegal(DstTy, DstVT))
  1040. return false;
  1041. if (DstVT != MVT::i32 && DstVT != MVT::i64)
  1042. return false;
  1043. // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
  1044. if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
  1045. !Subtarget->hasSPE())
  1046. return false;
  1047. Value *Src = I->getOperand(0);
  1048. Type *SrcTy = Src->getType();
  1049. if (!isTypeLegal(SrcTy, SrcVT))
  1050. return false;
  1051. if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
  1052. return false;
  1053. unsigned SrcReg = getRegForValue(Src);
  1054. if (SrcReg == 0)
  1055. return false;
  1056. // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
  1057. // meaningless copy to get the register class right.
  1058. const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
  1059. if (InRC == &PPC::F4RCRegClass)
  1060. SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
  1061. else if (InRC == &PPC::VSSRCRegClass)
  1062. SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
  1063. // Determine the opcode for the conversion, which takes place
  1064. // entirely within FPRs or VSRs.
  1065. unsigned DestReg;
  1066. unsigned Opc;
  1067. auto RC = MRI.getRegClass(SrcReg);
  1068. if (Subtarget->hasSPE()) {
  1069. DestReg = createResultReg(&PPC::GPRCRegClass);
  1070. if (IsSigned)
  1071. Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
  1072. else
  1073. Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
  1074. } else if (isVSFRCRegClass(RC)) {
  1075. DestReg = createResultReg(&PPC::VSFRCRegClass);
  1076. if (DstVT == MVT::i32)
  1077. Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
  1078. else
  1079. Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
  1080. } else {
  1081. DestReg = createResultReg(&PPC::F8RCRegClass);
  1082. if (DstVT == MVT::i32)
  1083. if (IsSigned)
  1084. Opc = PPC::FCTIWZ;
  1085. else
  1086. Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
  1087. else
  1088. Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
  1089. }
  1090. // Generate the convert.
  1091. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
  1092. .addReg(SrcReg);
  1093. // Now move the integer value from a float register to an integer register.
  1094. unsigned IntReg = Subtarget->hasSPE()
  1095. ? DestReg
  1096. : PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
  1097. if (IntReg == 0)
  1098. return false;
  1099. updateValueMap(I, IntReg);
  1100. return true;
  1101. }
  1102. // Attempt to fast-select a binary integer operation that isn't already
  1103. // handled automatically.
  1104. bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
  1105. EVT DestVT = TLI.getValueType(DL, I->getType(), true);
  1106. // We can get here in the case when we have a binary operation on a non-legal
  1107. // type and the target independent selector doesn't know how to handle it.
  1108. if (DestVT != MVT::i16 && DestVT != MVT::i8)
  1109. return false;
  1110. // Look at the currently assigned register for this instruction
  1111. // to determine the required register class. If there is no register,
  1112. // make a conservative choice (don't assign R0).
  1113. unsigned AssignedReg = FuncInfo.ValueMap[I];
  1114. const TargetRegisterClass *RC =
  1115. (AssignedReg ? MRI.getRegClass(AssignedReg) :
  1116. &PPC::GPRC_and_GPRC_NOR0RegClass);
  1117. bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
  1118. unsigned Opc;
  1119. switch (ISDOpcode) {
  1120. default: return false;
  1121. case ISD::ADD:
  1122. Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
  1123. break;
  1124. case ISD::OR:
  1125. Opc = IsGPRC ? PPC::OR : PPC::OR8;
  1126. break;
  1127. case ISD::SUB:
  1128. Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
  1129. break;
  1130. }
  1131. unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
  1132. unsigned SrcReg1 = getRegForValue(I->getOperand(0));
  1133. if (SrcReg1 == 0) return false;
  1134. // Handle case of small immediate operand.
  1135. if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
  1136. const APInt &CIVal = ConstInt->getValue();
  1137. int Imm = (int)CIVal.getSExtValue();
  1138. bool UseImm = true;
  1139. if (isInt<16>(Imm)) {
  1140. switch (Opc) {
  1141. default:
  1142. llvm_unreachable("Missing case!");
  1143. case PPC::ADD4:
  1144. Opc = PPC::ADDI;
  1145. MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
  1146. break;
  1147. case PPC::ADD8:
  1148. Opc = PPC::ADDI8;
  1149. MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
  1150. break;
  1151. case PPC::OR:
  1152. Opc = PPC::ORI;
  1153. break;
  1154. case PPC::OR8:
  1155. Opc = PPC::ORI8;
  1156. break;
  1157. case PPC::SUBF:
  1158. if (Imm == -32768)
  1159. UseImm = false;
  1160. else {
  1161. Opc = PPC::ADDI;
  1162. MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
  1163. Imm = -Imm;
  1164. }
  1165. break;
  1166. case PPC::SUBF8:
  1167. if (Imm == -32768)
  1168. UseImm = false;
  1169. else {
  1170. Opc = PPC::ADDI8;
  1171. MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
  1172. Imm = -Imm;
  1173. }
  1174. break;
  1175. }
  1176. if (UseImm) {
  1177. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
  1178. ResultReg)
  1179. .addReg(SrcReg1)
  1180. .addImm(Imm);
  1181. updateValueMap(I, ResultReg);
  1182. return true;
  1183. }
  1184. }
  1185. }
  1186. // Reg-reg case.
  1187. unsigned SrcReg2 = getRegForValue(I->getOperand(1));
  1188. if (SrcReg2 == 0) return false;
  1189. // Reverse operands for subtract-from.
  1190. if (ISDOpcode == ISD::SUB)
  1191. std::swap(SrcReg1, SrcReg2);
  1192. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
  1193. .addReg(SrcReg1).addReg(SrcReg2);
  1194. updateValueMap(I, ResultReg);
  1195. return true;
  1196. }
  1197. // Handle arguments to a call that we're attempting to fast-select.
  1198. // Return false if the arguments are too complex for us at the moment.
  1199. bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
  1200. SmallVectorImpl<unsigned> &ArgRegs,
  1201. SmallVectorImpl<MVT> &ArgVTs,
  1202. SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
  1203. SmallVectorImpl<unsigned> &RegArgs,
  1204. CallingConv::ID CC,
  1205. unsigned &NumBytes,
  1206. bool IsVarArg) {
  1207. SmallVector<CCValAssign, 16> ArgLocs;
  1208. CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
  1209. // Reserve space for the linkage area on the stack.
  1210. unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
  1211. CCInfo.AllocateStack(LinkageSize, Align(8));
  1212. CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
  1213. // Bail out if we can't handle any of the arguments.
  1214. for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
  1215. CCValAssign &VA = ArgLocs[I];
  1216. MVT ArgVT = ArgVTs[VA.getValNo()];
  1217. // Skip vector arguments for now, as well as long double and
  1218. // uint128_t, and anything that isn't passed in a register.
  1219. if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
  1220. !VA.isRegLoc() || VA.needsCustom())
  1221. return false;
  1222. // Skip bit-converted arguments for now.
  1223. if (VA.getLocInfo() == CCValAssign::BCvt)
  1224. return false;
  1225. }
  1226. // Get a count of how many bytes are to be pushed onto the stack.
  1227. NumBytes = CCInfo.getNextStackOffset();
  1228. // The prolog code of the callee may store up to 8 GPR argument registers to
  1229. // the stack, allowing va_start to index over them in memory if its varargs.
  1230. // Because we cannot tell if this is needed on the caller side, we have to
  1231. // conservatively assume that it is needed. As such, make sure we have at
  1232. // least enough stack space for the caller to store the 8 GPRs.
  1233. // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
  1234. NumBytes = std::max(NumBytes, LinkageSize + 64);
  1235. // Issue CALLSEQ_START.
  1236. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1237. TII.get(TII.getCallFrameSetupOpcode()))
  1238. .addImm(NumBytes).addImm(0);
  1239. // Prepare to assign register arguments. Every argument uses up a
  1240. // GPR protocol register even if it's passed in a floating-point
  1241. // register (unless we're using the fast calling convention).
  1242. unsigned NextGPR = PPC::X3;
  1243. unsigned NextFPR = PPC::F1;
  1244. // Process arguments.
  1245. for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
  1246. CCValAssign &VA = ArgLocs[I];
  1247. unsigned Arg = ArgRegs[VA.getValNo()];
  1248. MVT ArgVT = ArgVTs[VA.getValNo()];
  1249. // Handle argument promotion and bitcasts.
  1250. switch (VA.getLocInfo()) {
  1251. default:
  1252. llvm_unreachable("Unknown loc info!");
  1253. case CCValAssign::Full:
  1254. break;
  1255. case CCValAssign::SExt: {
  1256. MVT DestVT = VA.getLocVT();
  1257. const TargetRegisterClass *RC =
  1258. (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
  1259. unsigned TmpReg = createResultReg(RC);
  1260. if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
  1261. llvm_unreachable("Failed to emit a sext!");
  1262. ArgVT = DestVT;
  1263. Arg = TmpReg;
  1264. break;
  1265. }
  1266. case CCValAssign::AExt:
  1267. case CCValAssign::ZExt: {
  1268. MVT DestVT = VA.getLocVT();
  1269. const TargetRegisterClass *RC =
  1270. (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
  1271. unsigned TmpReg = createResultReg(RC);
  1272. if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
  1273. llvm_unreachable("Failed to emit a zext!");
  1274. ArgVT = DestVT;
  1275. Arg = TmpReg;
  1276. break;
  1277. }
  1278. case CCValAssign::BCvt: {
  1279. // FIXME: Not yet handled.
  1280. llvm_unreachable("Should have bailed before getting here!");
  1281. break;
  1282. }
  1283. }
  1284. // Copy this argument to the appropriate register.
  1285. unsigned ArgReg;
  1286. if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
  1287. ArgReg = NextFPR++;
  1288. if (CC != CallingConv::Fast)
  1289. ++NextGPR;
  1290. } else
  1291. ArgReg = NextGPR++;
  1292. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1293. TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
  1294. RegArgs.push_back(ArgReg);
  1295. }
  1296. return true;
  1297. }
  1298. // For a call that we've determined we can fast-select, finish the
  1299. // call sequence and generate a copy to obtain the return value (if any).
  1300. bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
  1301. CallingConv::ID CC = CLI.CallConv;
  1302. // Issue CallSEQ_END.
  1303. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1304. TII.get(TII.getCallFrameDestroyOpcode()))
  1305. .addImm(NumBytes).addImm(0);
  1306. // Next, generate a copy to obtain the return value.
  1307. // FIXME: No multi-register return values yet, though I don't foresee
  1308. // any real difficulties there.
  1309. if (RetVT != MVT::isVoid) {
  1310. SmallVector<CCValAssign, 16> RVLocs;
  1311. CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
  1312. CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
  1313. CCValAssign &VA = RVLocs[0];
  1314. assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
  1315. assert(VA.isRegLoc() && "Can only return in registers!");
  1316. MVT DestVT = VA.getValVT();
  1317. MVT CopyVT = DestVT;
  1318. // Ints smaller than a register still arrive in a full 64-bit
  1319. // register, so make sure we recognize this.
  1320. if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
  1321. CopyVT = MVT::i64;
  1322. unsigned SourcePhysReg = VA.getLocReg();
  1323. unsigned ResultReg = 0;
  1324. if (RetVT == CopyVT) {
  1325. const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
  1326. ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
  1327. // If necessary, round the floating result to single precision.
  1328. } else if (CopyVT == MVT::f64) {
  1329. ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
  1330. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
  1331. ResultReg).addReg(SourcePhysReg);
  1332. // If only the low half of a general register is needed, generate
  1333. // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
  1334. // used along the fast-isel path (not lowered), and downstream logic
  1335. // also doesn't like a direct subreg copy on a physical reg.)
  1336. } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
  1337. // Convert physical register from G8RC to GPRC.
  1338. SourcePhysReg -= PPC::X0 - PPC::R0;
  1339. ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
  1340. }
  1341. assert(ResultReg && "ResultReg unset!");
  1342. CLI.InRegs.push_back(SourcePhysReg);
  1343. CLI.ResultReg = ResultReg;
  1344. CLI.NumResultRegs = 1;
  1345. }
  1346. return true;
  1347. }
  1348. bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
  1349. CallingConv::ID CC = CLI.CallConv;
  1350. bool IsTailCall = CLI.IsTailCall;
  1351. bool IsVarArg = CLI.IsVarArg;
  1352. const Value *Callee = CLI.Callee;
  1353. const MCSymbol *Symbol = CLI.Symbol;
  1354. if (!Callee && !Symbol)
  1355. return false;
  1356. // Allow SelectionDAG isel to handle tail calls.
  1357. if (IsTailCall)
  1358. return false;
  1359. // Let SDISel handle vararg functions.
  1360. if (IsVarArg)
  1361. return false;
  1362. // If this is a PC-Rel function, let SDISel handle the call.
  1363. if (Subtarget->isUsingPCRelativeCalls())
  1364. return false;
  1365. // Handle simple calls for now, with legal return types and
  1366. // those that can be extended.
  1367. Type *RetTy = CLI.RetTy;
  1368. MVT RetVT;
  1369. if (RetTy->isVoidTy())
  1370. RetVT = MVT::isVoid;
  1371. else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
  1372. RetVT != MVT::i8)
  1373. return false;
  1374. else if (RetVT == MVT::i1 && Subtarget->useCRBits())
  1375. // We can't handle boolean returns when CR bits are in use.
  1376. return false;
  1377. // FIXME: No multi-register return values yet.
  1378. if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
  1379. RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
  1380. RetVT != MVT::f64) {
  1381. SmallVector<CCValAssign, 16> RVLocs;
  1382. CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
  1383. CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
  1384. if (RVLocs.size() > 1)
  1385. return false;
  1386. }
  1387. // Bail early if more than 8 arguments, as we only currently
  1388. // handle arguments passed in registers.
  1389. unsigned NumArgs = CLI.OutVals.size();
  1390. if (NumArgs > 8)
  1391. return false;
  1392. // Set up the argument vectors.
  1393. SmallVector<Value*, 8> Args;
  1394. SmallVector<unsigned, 8> ArgRegs;
  1395. SmallVector<MVT, 8> ArgVTs;
  1396. SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
  1397. Args.reserve(NumArgs);
  1398. ArgRegs.reserve(NumArgs);
  1399. ArgVTs.reserve(NumArgs);
  1400. ArgFlags.reserve(NumArgs);
  1401. for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
  1402. // Only handle easy calls for now. It would be reasonably easy
  1403. // to handle <= 8-byte structures passed ByVal in registers, but we
  1404. // have to ensure they are right-justified in the register.
  1405. ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
  1406. if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
  1407. return false;
  1408. Value *ArgValue = CLI.OutVals[i];
  1409. Type *ArgTy = ArgValue->getType();
  1410. MVT ArgVT;
  1411. if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
  1412. return false;
  1413. // FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
  1414. // types, which is passed through vector register. Skip these types and
  1415. // fallback to default SelectionDAG based selection.
  1416. if (ArgVT.isVector() || ArgVT == MVT::f128)
  1417. return false;
  1418. unsigned Arg = getRegForValue(ArgValue);
  1419. if (Arg == 0)
  1420. return false;
  1421. Args.push_back(ArgValue);
  1422. ArgRegs.push_back(Arg);
  1423. ArgVTs.push_back(ArgVT);
  1424. ArgFlags.push_back(Flags);
  1425. }
  1426. // Process the arguments.
  1427. SmallVector<unsigned, 8> RegArgs;
  1428. unsigned NumBytes;
  1429. if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
  1430. RegArgs, CC, NumBytes, IsVarArg))
  1431. return false;
  1432. MachineInstrBuilder MIB;
  1433. // FIXME: No handling for function pointers yet. This requires
  1434. // implementing the function descriptor (OPD) setup.
  1435. const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
  1436. if (!GV) {
  1437. // patchpoints are a special case; they always dispatch to a pointer value.
  1438. // However, we don't actually want to generate the indirect call sequence
  1439. // here (that will be generated, as necessary, during asm printing), and
  1440. // the call we generate here will be erased by FastISel::selectPatchpoint,
  1441. // so don't try very hard...
  1442. if (CLI.IsPatchPoint)
  1443. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
  1444. else
  1445. return false;
  1446. } else {
  1447. // Build direct call with NOP for TOC restore.
  1448. // FIXME: We can and should optimize away the NOP for local calls.
  1449. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1450. TII.get(PPC::BL8_NOP));
  1451. // Add callee.
  1452. MIB.addGlobalAddress(GV);
  1453. }
  1454. // Add implicit physical register uses to the call.
  1455. for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
  1456. MIB.addReg(RegArgs[II], RegState::Implicit);
  1457. // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
  1458. // into the call.
  1459. PPCFuncInfo->setUsesTOCBasePtr();
  1460. MIB.addReg(PPC::X2, RegState::Implicit);
  1461. // Add a register mask with the call-preserved registers. Proper
  1462. // defs for return values will be added by setPhysRegsDeadExcept().
  1463. MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
  1464. CLI.Call = MIB;
  1465. // Finish off the call including any return values.
  1466. return finishCall(RetVT, CLI, NumBytes);
  1467. }
  1468. // Attempt to fast-select a return instruction.
  1469. bool PPCFastISel::SelectRet(const Instruction *I) {
  1470. if (!FuncInfo.CanLowerReturn)
  1471. return false;
  1472. const ReturnInst *Ret = cast<ReturnInst>(I);
  1473. const Function &F = *I->getParent()->getParent();
  1474. // Build a list of return value registers.
  1475. SmallVector<unsigned, 4> RetRegs;
  1476. CallingConv::ID CC = F.getCallingConv();
  1477. if (Ret->getNumOperands() > 0) {
  1478. SmallVector<ISD::OutputArg, 4> Outs;
  1479. GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
  1480. // Analyze operands of the call, assigning locations to each operand.
  1481. SmallVector<CCValAssign, 16> ValLocs;
  1482. CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
  1483. CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
  1484. const Value *RV = Ret->getOperand(0);
  1485. // FIXME: Only one output register for now.
  1486. if (ValLocs.size() > 1)
  1487. return false;
  1488. // Special case for returning a constant integer of any size - materialize
  1489. // the constant as an i64 and copy it to the return register.
  1490. if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
  1491. CCValAssign &VA = ValLocs[0];
  1492. Register RetReg = VA.getLocReg();
  1493. // We still need to worry about properly extending the sign. For example,
  1494. // we could have only a single bit or a constant that needs zero
  1495. // extension rather than sign extension. Make sure we pass the return
  1496. // value extension property to integer materialization.
  1497. unsigned SrcReg =
  1498. PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
  1499. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1500. TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
  1501. RetRegs.push_back(RetReg);
  1502. } else {
  1503. unsigned Reg = getRegForValue(RV);
  1504. if (Reg == 0)
  1505. return false;
  1506. // Copy the result values into the output registers.
  1507. for (unsigned i = 0; i < ValLocs.size(); ++i) {
  1508. CCValAssign &VA = ValLocs[i];
  1509. assert(VA.isRegLoc() && "Can only return in registers!");
  1510. RetRegs.push_back(VA.getLocReg());
  1511. unsigned SrcReg = Reg + VA.getValNo();
  1512. EVT RVEVT = TLI.getValueType(DL, RV->getType());
  1513. if (!RVEVT.isSimple())
  1514. return false;
  1515. MVT RVVT = RVEVT.getSimpleVT();
  1516. MVT DestVT = VA.getLocVT();
  1517. if (RVVT != DestVT && RVVT != MVT::i8 &&
  1518. RVVT != MVT::i16 && RVVT != MVT::i32)
  1519. return false;
  1520. if (RVVT != DestVT) {
  1521. switch (VA.getLocInfo()) {
  1522. default:
  1523. llvm_unreachable("Unknown loc info!");
  1524. case CCValAssign::Full:
  1525. llvm_unreachable("Full value assign but types don't match?");
  1526. case CCValAssign::AExt:
  1527. case CCValAssign::ZExt: {
  1528. const TargetRegisterClass *RC =
  1529. (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
  1530. unsigned TmpReg = createResultReg(RC);
  1531. if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
  1532. return false;
  1533. SrcReg = TmpReg;
  1534. break;
  1535. }
  1536. case CCValAssign::SExt: {
  1537. const TargetRegisterClass *RC =
  1538. (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
  1539. unsigned TmpReg = createResultReg(RC);
  1540. if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
  1541. return false;
  1542. SrcReg = TmpReg;
  1543. break;
  1544. }
  1545. }
  1546. }
  1547. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1548. TII.get(TargetOpcode::COPY), RetRegs[i])
  1549. .addReg(SrcReg);
  1550. }
  1551. }
  1552. }
  1553. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1554. TII.get(PPC::BLR8));
  1555. for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
  1556. MIB.addReg(RetRegs[i], RegState::Implicit);
  1557. return true;
  1558. }
  1559. // Attempt to emit an integer extend of SrcReg into DestReg. Both
  1560. // signed and zero extensions are supported. Return false if we
  1561. // can't handle it.
  1562. bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
  1563. unsigned DestReg, bool IsZExt) {
  1564. if (DestVT != MVT::i32 && DestVT != MVT::i64)
  1565. return false;
  1566. if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
  1567. return false;
  1568. // Signed extensions use EXTSB, EXTSH, EXTSW.
  1569. if (!IsZExt) {
  1570. unsigned Opc;
  1571. if (SrcVT == MVT::i8)
  1572. Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
  1573. else if (SrcVT == MVT::i16)
  1574. Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
  1575. else {
  1576. assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
  1577. Opc = PPC::EXTSW_32_64;
  1578. }
  1579. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
  1580. .addReg(SrcReg);
  1581. // Unsigned 32-bit extensions use RLWINM.
  1582. } else if (DestVT == MVT::i32) {
  1583. unsigned MB;
  1584. if (SrcVT == MVT::i8)
  1585. MB = 24;
  1586. else {
  1587. assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
  1588. MB = 16;
  1589. }
  1590. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
  1591. DestReg)
  1592. .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
  1593. // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
  1594. } else {
  1595. unsigned MB;
  1596. if (SrcVT == MVT::i8)
  1597. MB = 56;
  1598. else if (SrcVT == MVT::i16)
  1599. MB = 48;
  1600. else
  1601. MB = 32;
  1602. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1603. TII.get(PPC::RLDICL_32_64), DestReg)
  1604. .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
  1605. }
  1606. return true;
  1607. }
  1608. // Attempt to fast-select an indirect branch instruction.
  1609. bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
  1610. unsigned AddrReg = getRegForValue(I->getOperand(0));
  1611. if (AddrReg == 0)
  1612. return false;
  1613. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
  1614. .addReg(AddrReg);
  1615. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
  1616. const IndirectBrInst *IB = cast<IndirectBrInst>(I);
  1617. for (const BasicBlock *SuccBB : IB->successors())
  1618. FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
  1619. return true;
  1620. }
  1621. // Attempt to fast-select an integer truncate instruction.
  1622. bool PPCFastISel::SelectTrunc(const Instruction *I) {
  1623. Value *Src = I->getOperand(0);
  1624. EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
  1625. EVT DestVT = TLI.getValueType(DL, I->getType(), true);
  1626. if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
  1627. return false;
  1628. if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
  1629. return false;
  1630. unsigned SrcReg = getRegForValue(Src);
  1631. if (!SrcReg)
  1632. return false;
  1633. // The only interesting case is when we need to switch register classes.
  1634. if (SrcVT == MVT::i64)
  1635. SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32);
  1636. updateValueMap(I, SrcReg);
  1637. return true;
  1638. }
  1639. // Attempt to fast-select an integer extend instruction.
  1640. bool PPCFastISel::SelectIntExt(const Instruction *I) {
  1641. Type *DestTy = I->getType();
  1642. Value *Src = I->getOperand(0);
  1643. Type *SrcTy = Src->getType();
  1644. bool IsZExt = isa<ZExtInst>(I);
  1645. unsigned SrcReg = getRegForValue(Src);
  1646. if (!SrcReg) return false;
  1647. EVT SrcEVT, DestEVT;
  1648. SrcEVT = TLI.getValueType(DL, SrcTy, true);
  1649. DestEVT = TLI.getValueType(DL, DestTy, true);
  1650. if (!SrcEVT.isSimple())
  1651. return false;
  1652. if (!DestEVT.isSimple())
  1653. return false;
  1654. MVT SrcVT = SrcEVT.getSimpleVT();
  1655. MVT DestVT = DestEVT.getSimpleVT();
  1656. // If we know the register class needed for the result of this
  1657. // instruction, use it. Otherwise pick the register class of the
  1658. // correct size that does not contain X0/R0, since we don't know
  1659. // whether downstream uses permit that assignment.
  1660. unsigned AssignedReg = FuncInfo.ValueMap[I];
  1661. const TargetRegisterClass *RC =
  1662. (AssignedReg ? MRI.getRegClass(AssignedReg) :
  1663. (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
  1664. &PPC::GPRC_and_GPRC_NOR0RegClass));
  1665. unsigned ResultReg = createResultReg(RC);
  1666. if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
  1667. return false;
  1668. updateValueMap(I, ResultReg);
  1669. return true;
  1670. }
  1671. // Attempt to fast-select an instruction that wasn't handled by
  1672. // the table-generated machinery.
  1673. bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
  1674. switch (I->getOpcode()) {
  1675. case Instruction::Load:
  1676. return SelectLoad(I);
  1677. case Instruction::Store:
  1678. return SelectStore(I);
  1679. case Instruction::Br:
  1680. return SelectBranch(I);
  1681. case Instruction::IndirectBr:
  1682. return SelectIndirectBr(I);
  1683. case Instruction::FPExt:
  1684. return SelectFPExt(I);
  1685. case Instruction::FPTrunc:
  1686. return SelectFPTrunc(I);
  1687. case Instruction::SIToFP:
  1688. return SelectIToFP(I, /*IsSigned*/ true);
  1689. case Instruction::UIToFP:
  1690. return SelectIToFP(I, /*IsSigned*/ false);
  1691. case Instruction::FPToSI:
  1692. return SelectFPToI(I, /*IsSigned*/ true);
  1693. case Instruction::FPToUI:
  1694. return SelectFPToI(I, /*IsSigned*/ false);
  1695. case Instruction::Add:
  1696. return SelectBinaryIntOp(I, ISD::ADD);
  1697. case Instruction::Or:
  1698. return SelectBinaryIntOp(I, ISD::OR);
  1699. case Instruction::Sub:
  1700. return SelectBinaryIntOp(I, ISD::SUB);
  1701. case Instruction::Call:
  1702. // On AIX, call lowering uses the DAG-ISEL path currently so that the
  1703. // callee of the direct function call instruction will be mapped to the
  1704. // symbol for the function's entry point, which is distinct from the
  1705. // function descriptor symbol. The latter is the symbol whose XCOFF symbol
  1706. // name is the C-linkage name of the source level function.
  1707. if (TM.getTargetTriple().isOSAIX())
  1708. break;
  1709. return selectCall(I);
  1710. case Instruction::Ret:
  1711. return SelectRet(I);
  1712. case Instruction::Trunc:
  1713. return SelectTrunc(I);
  1714. case Instruction::ZExt:
  1715. case Instruction::SExt:
  1716. return SelectIntExt(I);
  1717. // Here add other flavors of Instruction::XXX that automated
  1718. // cases don't catch. For example, switches are terminators
  1719. // that aren't yet handled.
  1720. default:
  1721. break;
  1722. }
  1723. return false;
  1724. }
  1725. // Materialize a floating-point constant into a register, and return
  1726. // the register number (or zero if we failed to handle it).
  1727. unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
  1728. // If this is a PC-Rel function, let SDISel handle constant pool.
  1729. if (Subtarget->isUsingPCRelativeCalls())
  1730. return false;
  1731. // No plans to handle long double here.
  1732. if (VT != MVT::f32 && VT != MVT::f64)
  1733. return 0;
  1734. // All FP constants are loaded from the constant pool.
  1735. Align Alignment = DL.getPrefTypeAlign(CFP->getType());
  1736. unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
  1737. const bool HasSPE = Subtarget->hasSPE();
  1738. const TargetRegisterClass *RC;
  1739. if (HasSPE)
  1740. RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
  1741. else
  1742. RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
  1743. unsigned DestReg = createResultReg(RC);
  1744. CodeModel::Model CModel = TM.getCodeModel();
  1745. MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
  1746. MachinePointerInfo::getConstantPool(*FuncInfo.MF),
  1747. MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Alignment);
  1748. unsigned Opc;
  1749. if (HasSPE)
  1750. Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
  1751. else
  1752. Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
  1753. unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
  1754. PPCFuncInfo->setUsesTOCBasePtr();
  1755. // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
  1756. if (CModel == CodeModel::Small) {
  1757. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
  1758. TmpReg)
  1759. .addConstantPoolIndex(Idx).addReg(PPC::X2);
  1760. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
  1761. .addImm(0).addReg(TmpReg).addMemOperand(MMO);
  1762. } else {
  1763. // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
  1764. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8),
  1765. TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
  1766. // But for large code model, we must generate a LDtocL followed
  1767. // by the LF[SD].
  1768. if (CModel == CodeModel::Large) {
  1769. unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
  1770. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
  1771. TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
  1772. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
  1773. .addImm(0)
  1774. .addReg(TmpReg2);
  1775. } else
  1776. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
  1777. .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
  1778. .addReg(TmpReg)
  1779. .addMemOperand(MMO);
  1780. }
  1781. return DestReg;
  1782. }
  1783. // Materialize the address of a global value into a register, and return
  1784. // the register number (or zero if we failed to handle it).
  1785. unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
  1786. // If this is a PC-Rel function, let SDISel handle GV materialization.
  1787. if (Subtarget->isUsingPCRelativeCalls())
  1788. return false;
  1789. assert(VT == MVT::i64 && "Non-address!");
  1790. const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
  1791. unsigned DestReg = createResultReg(RC);
  1792. // Global values may be plain old object addresses, TLS object
  1793. // addresses, constant pool entries, or jump tables. How we generate
  1794. // code for these may depend on small, medium, or large code model.
  1795. CodeModel::Model CModel = TM.getCodeModel();
  1796. // FIXME: Jump tables are not yet required because fast-isel doesn't
  1797. // handle switches; if that changes, we need them as well. For now,
  1798. // what follows assumes everything's a generic (or TLS) global address.
  1799. // FIXME: We don't yet handle the complexity of TLS.
  1800. if (GV->isThreadLocal())
  1801. return 0;
  1802. PPCFuncInfo->setUsesTOCBasePtr();
  1803. // For small code model, generate a simple TOC load.
  1804. if (CModel == CodeModel::Small)
  1805. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
  1806. DestReg)
  1807. .addGlobalAddress(GV)
  1808. .addReg(PPC::X2);
  1809. else {
  1810. // If the address is an externally defined symbol, a symbol with common
  1811. // or externally available linkage, a non-local function address, or a
  1812. // jump table address (not yet needed), or if we are generating code
  1813. // for large code model, we generate:
  1814. // LDtocL(GV, ADDIStocHA8(%x2, GV))
  1815. // Otherwise we generate:
  1816. // ADDItocL(ADDIStocHA8(%x2, GV), GV)
  1817. // Either way, start with the ADDIStocHA8:
  1818. unsigned HighPartReg = createResultReg(RC);
  1819. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8),
  1820. HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
  1821. if (Subtarget->isGVIndirectSymbol(GV)) {
  1822. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
  1823. DestReg).addGlobalAddress(GV).addReg(HighPartReg);
  1824. } else {
  1825. // Otherwise generate the ADDItocL.
  1826. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
  1827. DestReg).addReg(HighPartReg).addGlobalAddress(GV);
  1828. }
  1829. }
  1830. return DestReg;
  1831. }
  1832. // Materialize a 32-bit integer constant into a register, and return
  1833. // the register number (or zero if we failed to handle it).
  1834. unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
  1835. const TargetRegisterClass *RC) {
  1836. unsigned Lo = Imm & 0xFFFF;
  1837. unsigned Hi = (Imm >> 16) & 0xFFFF;
  1838. unsigned ResultReg = createResultReg(RC);
  1839. bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
  1840. if (isInt<16>(Imm))
  1841. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1842. TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
  1843. .addImm(Imm);
  1844. else if (Lo) {
  1845. // Both Lo and Hi have nonzero bits.
  1846. unsigned TmpReg = createResultReg(RC);
  1847. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1848. TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
  1849. .addImm(Hi);
  1850. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1851. TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
  1852. .addReg(TmpReg).addImm(Lo);
  1853. } else
  1854. // Just Hi bits.
  1855. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1856. TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
  1857. .addImm(Hi);
  1858. return ResultReg;
  1859. }
  1860. // Materialize a 64-bit integer constant into a register, and return
  1861. // the register number (or zero if we failed to handle it).
  1862. unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
  1863. const TargetRegisterClass *RC) {
  1864. unsigned Remainder = 0;
  1865. unsigned Shift = 0;
  1866. // If the value doesn't fit in 32 bits, see if we can shift it
  1867. // so that it fits in 32 bits.
  1868. if (!isInt<32>(Imm)) {
  1869. Shift = countTrailingZeros<uint64_t>(Imm);
  1870. int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
  1871. if (isInt<32>(ImmSh))
  1872. Imm = ImmSh;
  1873. else {
  1874. Remainder = Imm;
  1875. Shift = 32;
  1876. Imm >>= 32;
  1877. }
  1878. }
  1879. // Handle the high-order 32 bits (if shifted) or the whole 32 bits
  1880. // (if not shifted).
  1881. unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
  1882. if (!Shift)
  1883. return TmpReg1;
  1884. // If upper 32 bits were not zero, we've built them and need to shift
  1885. // them into place.
  1886. unsigned TmpReg2;
  1887. if (Imm) {
  1888. TmpReg2 = createResultReg(RC);
  1889. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
  1890. TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
  1891. } else
  1892. TmpReg2 = TmpReg1;
  1893. unsigned TmpReg3, Hi, Lo;
  1894. if ((Hi = (Remainder >> 16) & 0xFFFF)) {
  1895. TmpReg3 = createResultReg(RC);
  1896. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
  1897. TmpReg3).addReg(TmpReg2).addImm(Hi);
  1898. } else
  1899. TmpReg3 = TmpReg2;
  1900. if ((Lo = Remainder & 0xFFFF)) {
  1901. unsigned ResultReg = createResultReg(RC);
  1902. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
  1903. ResultReg).addReg(TmpReg3).addImm(Lo);
  1904. return ResultReg;
  1905. }
  1906. return TmpReg3;
  1907. }
  1908. // Materialize an integer constant into a register, and return
  1909. // the register number (or zero if we failed to handle it).
  1910. unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
  1911. bool UseSExt) {
  1912. // If we're using CR bit registers for i1 values, handle that as a special
  1913. // case first.
  1914. if (VT == MVT::i1 && Subtarget->useCRBits()) {
  1915. unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
  1916. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  1917. TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
  1918. return ImmReg;
  1919. }
  1920. if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
  1921. VT != MVT::i1)
  1922. return 0;
  1923. const TargetRegisterClass *RC =
  1924. ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
  1925. int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
  1926. // If the constant is in range, use a load-immediate.
  1927. // Since LI will sign extend the constant we need to make sure that for
  1928. // our zeroext constants that the sign extended constant fits into 16-bits -
  1929. // a range of 0..0x7fff.
  1930. if (isInt<16>(Imm)) {
  1931. unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
  1932. unsigned ImmReg = createResultReg(RC);
  1933. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
  1934. .addImm(Imm);
  1935. return ImmReg;
  1936. }
  1937. // Construct the constant piecewise.
  1938. if (VT == MVT::i64)
  1939. return PPCMaterialize64BitInt(Imm, RC);
  1940. else if (VT == MVT::i32)
  1941. return PPCMaterialize32BitInt(Imm, RC);
  1942. return 0;
  1943. }
  1944. // Materialize a constant into a register, and return the register
  1945. // number (or zero if we failed to handle it).
  1946. unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
  1947. EVT CEVT = TLI.getValueType(DL, C->getType(), true);
  1948. // Only handle simple types.
  1949. if (!CEVT.isSimple()) return 0;
  1950. MVT VT = CEVT.getSimpleVT();
  1951. if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
  1952. return PPCMaterializeFP(CFP, VT);
  1953. else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
  1954. return PPCMaterializeGV(GV, VT);
  1955. else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
  1956. // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
  1957. // assumes that constant PHI operands will be zero extended, and failure to
  1958. // match that assumption will cause problems if we sign extend here but
  1959. // some user of a PHI is in a block for which we fall back to full SDAG
  1960. // instruction selection.
  1961. return PPCMaterializeInt(CI, VT, false);
  1962. return 0;
  1963. }
  1964. // Materialize the address created by an alloca into a register, and
  1965. // return the register number (or zero if we failed to handle it).
  1966. unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
  1967. // Don't handle dynamic allocas.
  1968. if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
  1969. MVT VT;
  1970. if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
  1971. DenseMap<const AllocaInst*, int>::iterator SI =
  1972. FuncInfo.StaticAllocaMap.find(AI);
  1973. if (SI != FuncInfo.StaticAllocaMap.end()) {
  1974. unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
  1975. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
  1976. ResultReg).addFrameIndex(SI->second).addImm(0);
  1977. return ResultReg;
  1978. }
  1979. return 0;
  1980. }
  1981. // Fold loads into extends when possible.
  1982. // FIXME: We can have multiple redundant extend/trunc instructions
  1983. // following a load. The folding only picks up one. Extend this
  1984. // to check subsequent instructions for the same pattern and remove
  1985. // them. Thus ResultReg should be the def reg for the last redundant
  1986. // instruction in a chain, and all intervening instructions can be
  1987. // removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
  1988. // to add ELF64-NOT: rldicl to the appropriate tests when this works.
  1989. bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
  1990. const LoadInst *LI) {
  1991. // Verify we have a legal type before going any further.
  1992. MVT VT;
  1993. if (!isLoadTypeLegal(LI->getType(), VT))
  1994. return false;
  1995. // Combine load followed by zero- or sign-extend.
  1996. bool IsZExt = false;
  1997. switch(MI->getOpcode()) {
  1998. default:
  1999. return false;
  2000. case PPC::RLDICL:
  2001. case PPC::RLDICL_32_64: {
  2002. IsZExt = true;
  2003. unsigned MB = MI->getOperand(3).getImm();
  2004. if ((VT == MVT::i8 && MB <= 56) ||
  2005. (VT == MVT::i16 && MB <= 48) ||
  2006. (VT == MVT::i32 && MB <= 32))
  2007. break;
  2008. return false;
  2009. }
  2010. case PPC::RLWINM:
  2011. case PPC::RLWINM8: {
  2012. IsZExt = true;
  2013. unsigned MB = MI->getOperand(3).getImm();
  2014. if ((VT == MVT::i8 && MB <= 24) ||
  2015. (VT == MVT::i16 && MB <= 16))
  2016. break;
  2017. return false;
  2018. }
  2019. case PPC::EXTSB:
  2020. case PPC::EXTSB8:
  2021. case PPC::EXTSB8_32_64:
  2022. /* There is no sign-extending load-byte instruction. */
  2023. return false;
  2024. case PPC::EXTSH:
  2025. case PPC::EXTSH8:
  2026. case PPC::EXTSH8_32_64: {
  2027. if (VT != MVT::i16 && VT != MVT::i8)
  2028. return false;
  2029. break;
  2030. }
  2031. case PPC::EXTSW:
  2032. case PPC::EXTSW_32:
  2033. case PPC::EXTSW_32_64: {
  2034. if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
  2035. return false;
  2036. break;
  2037. }
  2038. }
  2039. // See if we can handle this address.
  2040. Address Addr;
  2041. if (!PPCComputeAddress(LI->getOperand(0), Addr))
  2042. return false;
  2043. Register ResultReg = MI->getOperand(0).getReg();
  2044. if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
  2045. Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
  2046. return false;
  2047. MachineBasicBlock::iterator I(MI);
  2048. removeDeadCode(I, std::next(I));
  2049. return true;
  2050. }
  2051. // Attempt to lower call arguments in a faster way than done by
  2052. // the selection DAG code.
  2053. bool PPCFastISel::fastLowerArguments() {
  2054. // Defer to normal argument lowering for now. It's reasonably
  2055. // efficient. Consider doing something like ARM to handle the
  2056. // case where all args fit in registers, no varargs, no float
  2057. // or vector args.
  2058. return false;
  2059. }
  2060. // Handle materializing integer constants into a register. This is not
  2061. // automatically generated for PowerPC, so must be explicitly created here.
  2062. unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
  2063. if (Opc != ISD::Constant)
  2064. return 0;
  2065. // If we're using CR bit registers for i1 values, handle that as a special
  2066. // case first.
  2067. if (VT == MVT::i1 && Subtarget->useCRBits()) {
  2068. unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
  2069. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
  2070. TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
  2071. return ImmReg;
  2072. }
  2073. if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
  2074. VT != MVT::i1)
  2075. return 0;
  2076. const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
  2077. &PPC::GPRCRegClass);
  2078. if (VT == MVT::i64)
  2079. return PPCMaterialize64BitInt(Imm, RC);
  2080. else
  2081. return PPCMaterialize32BitInt(Imm, RC);
  2082. }
  2083. // Override for ADDI and ADDI8 to set the correct register class
  2084. // on RHS operand 0. The automatic infrastructure naively assumes
  2085. // GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
  2086. // for these cases. At the moment, none of the other automatically
  2087. // generated RI instructions require special treatment. However, once
  2088. // SelectSelect is implemented, "isel" requires similar handling.
  2089. //
  2090. // Also be conservative about the output register class. Avoid
  2091. // assigning R0 or X0 to the output register for GPRC and G8RC
  2092. // register classes, as any such result could be used in ADDI, etc.,
  2093. // where those regs have another meaning.
  2094. unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
  2095. const TargetRegisterClass *RC,
  2096. unsigned Op0, bool Op0IsKill,
  2097. uint64_t Imm) {
  2098. if (MachineInstOpcode == PPC::ADDI)
  2099. MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
  2100. else if (MachineInstOpcode == PPC::ADDI8)
  2101. MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
  2102. const TargetRegisterClass *UseRC =
  2103. (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
  2104. (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
  2105. return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
  2106. Op0, Op0IsKill, Imm);
  2107. }
  2108. // Override for instructions with one register operand to avoid use of
  2109. // R0/X0. The automatic infrastructure isn't aware of the context so
  2110. // we must be conservative.
  2111. unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
  2112. const TargetRegisterClass* RC,
  2113. unsigned Op0, bool Op0IsKill) {
  2114. const TargetRegisterClass *UseRC =
  2115. (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
  2116. (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
  2117. return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
  2118. }
  2119. // Override for instructions with two register operands to avoid use
  2120. // of R0/X0. The automatic infrastructure isn't aware of the context
  2121. // so we must be conservative.
  2122. unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
  2123. const TargetRegisterClass* RC,
  2124. unsigned Op0, bool Op0IsKill,
  2125. unsigned Op1, bool Op1IsKill) {
  2126. const TargetRegisterClass *UseRC =
  2127. (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
  2128. (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
  2129. return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
  2130. Op1, Op1IsKill);
  2131. }
  2132. namespace llvm {
  2133. // Create the fast instruction selector for PowerPC64 ELF.
  2134. FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
  2135. const TargetLibraryInfo *LibInfo) {
  2136. // Only available on 64-bit ELF for now.
  2137. const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
  2138. if (Subtarget.is64BitELFABI())
  2139. return new PPCFastISel(FuncInfo, LibInfo);
  2140. return nullptr;
  2141. }
  2142. }