AArch64CallLowering.cpp 47 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216
  1. //===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file implements the lowering of LLVM calls to machine code calls for
  11. /// GlobalISel.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #include "AArch64CallLowering.h"
  15. #include "AArch64ISelLowering.h"
  16. #include "AArch64MachineFunctionInfo.h"
  17. #include "AArch64Subtarget.h"
  18. #include "llvm/ADT/ArrayRef.h"
  19. #include "llvm/ADT/SmallVector.h"
  20. #include "llvm/CodeGen/Analysis.h"
  21. #include "llvm/CodeGen/CallingConvLower.h"
  22. #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  23. #include "llvm/CodeGen/GlobalISel/Utils.h"
  24. #include "llvm/CodeGen/LowLevelType.h"
  25. #include "llvm/CodeGen/MachineBasicBlock.h"
  26. #include "llvm/CodeGen/MachineFrameInfo.h"
  27. #include "llvm/CodeGen/MachineFunction.h"
  28. #include "llvm/CodeGen/MachineInstrBuilder.h"
  29. #include "llvm/CodeGen/MachineMemOperand.h"
  30. #include "llvm/CodeGen/MachineOperand.h"
  31. #include "llvm/CodeGen/MachineRegisterInfo.h"
  32. #include "llvm/CodeGen/TargetRegisterInfo.h"
  33. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  34. #include "llvm/CodeGen/ValueTypes.h"
  35. #include "llvm/IR/Argument.h"
  36. #include "llvm/IR/Attributes.h"
  37. #include "llvm/IR/Function.h"
  38. #include "llvm/IR/Type.h"
  39. #include "llvm/IR/Value.h"
  40. #include "llvm/Support/MachineValueType.h"
  41. #include <algorithm>
  42. #include <cassert>
  43. #include <cstdint>
  44. #include <iterator>
  45. #define DEBUG_TYPE "aarch64-call-lowering"
  46. using namespace llvm;
  47. AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
  48. : CallLowering(&TLI) {}
  49. static void applyStackPassedSmallTypeDAGHack(EVT OrigVT, MVT &ValVT,
  50. MVT &LocVT) {
  51. // If ValVT is i1/i8/i16, we should set LocVT to i8/i8/i16. This is a legacy
  52. // hack because the DAG calls the assignment function with pre-legalized
  53. // register typed values, not the raw type.
  54. //
  55. // This hack is not applied to return values which are not passed on the
  56. // stack.
  57. if (OrigVT == MVT::i1 || OrigVT == MVT::i8)
  58. ValVT = LocVT = MVT::i8;
  59. else if (OrigVT == MVT::i16)
  60. ValVT = LocVT = MVT::i16;
  61. }
  62. // Account for i1/i8/i16 stack passed value hack
  63. static LLT getStackValueStoreTypeHack(const CCValAssign &VA) {
  64. const MVT ValVT = VA.getValVT();
  65. return (ValVT == MVT::i8 || ValVT == MVT::i16) ? LLT(ValVT)
  66. : LLT(VA.getLocVT());
  67. }
  68. namespace {
  69. struct AArch64IncomingValueAssigner
  70. : public CallLowering::IncomingValueAssigner {
  71. AArch64IncomingValueAssigner(CCAssignFn *AssignFn_,
  72. CCAssignFn *AssignFnVarArg_)
  73. : IncomingValueAssigner(AssignFn_, AssignFnVarArg_) {}
  74. bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
  75. CCValAssign::LocInfo LocInfo,
  76. const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
  77. CCState &State) override {
  78. applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
  79. return IncomingValueAssigner::assignArg(ValNo, OrigVT, ValVT, LocVT,
  80. LocInfo, Info, Flags, State);
  81. }
  82. };
  83. struct AArch64OutgoingValueAssigner
  84. : public CallLowering::OutgoingValueAssigner {
  85. const AArch64Subtarget &Subtarget;
  86. /// Track if this is used for a return instead of function argument
  87. /// passing. We apply a hack to i1/i8/i16 stack passed values, but do not use
  88. /// stack passed returns for them and cannot apply the type adjustment.
  89. bool IsReturn;
  90. AArch64OutgoingValueAssigner(CCAssignFn *AssignFn_,
  91. CCAssignFn *AssignFnVarArg_,
  92. const AArch64Subtarget &Subtarget_,
  93. bool IsReturn)
  94. : OutgoingValueAssigner(AssignFn_, AssignFnVarArg_),
  95. Subtarget(Subtarget_), IsReturn(IsReturn) {}
  96. bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
  97. CCValAssign::LocInfo LocInfo,
  98. const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
  99. CCState &State) override {
  100. bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv());
  101. bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
  102. if (!State.isVarArg() && !UseVarArgsCCForFixed && !IsReturn)
  103. applyStackPassedSmallTypeDAGHack(OrigVT, ValVT, LocVT);
  104. bool Res;
  105. if (Info.IsFixed && !UseVarArgsCCForFixed)
  106. Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
  107. else
  108. Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
  109. StackOffset = State.getNextStackOffset();
  110. return Res;
  111. }
  112. };
  113. struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
  114. IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
  115. : IncomingValueHandler(MIRBuilder, MRI) {}
  116. Register getStackAddress(uint64_t Size, int64_t Offset,
  117. MachinePointerInfo &MPO,
  118. ISD::ArgFlagsTy Flags) override {
  119. auto &MFI = MIRBuilder.getMF().getFrameInfo();
  120. // Byval is assumed to be writable memory, but other stack passed arguments
  121. // are not.
  122. const bool IsImmutable = !Flags.isByVal();
  123. int FI = MFI.CreateFixedObject(Size, Offset, IsImmutable);
  124. MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
  125. auto AddrReg = MIRBuilder.buildFrameIndex(LLT::pointer(0, 64), FI);
  126. return AddrReg.getReg(0);
  127. }
  128. LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
  129. ISD::ArgFlagsTy Flags) const override {
  130. // For pointers, we just need to fixup the integer types reported in the
  131. // CCValAssign.
  132. if (Flags.isPointer())
  133. return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
  134. return getStackValueStoreTypeHack(VA);
  135. }
  136. void assignValueToReg(Register ValVReg, Register PhysReg,
  137. CCValAssign VA) override {
  138. markPhysRegUsed(PhysReg);
  139. IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
  140. }
  141. void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
  142. MachinePointerInfo &MPO, CCValAssign &VA) override {
  143. MachineFunction &MF = MIRBuilder.getMF();
  144. LLT ValTy(VA.getValVT());
  145. LLT LocTy(VA.getLocVT());
  146. // Fixup the types for the DAG compatibility hack.
  147. if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16)
  148. std::swap(ValTy, LocTy);
  149. else {
  150. // The calling code knows if this is a pointer or not, we're only touching
  151. // the LocTy for the i8/i16 hack.
  152. assert(LocTy.getSizeInBits() == MemTy.getSizeInBits());
  153. LocTy = MemTy;
  154. }
  155. auto MMO = MF.getMachineMemOperand(
  156. MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy,
  157. inferAlignFromPtrInfo(MF, MPO));
  158. switch (VA.getLocInfo()) {
  159. case CCValAssign::LocInfo::ZExt:
  160. MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, ValVReg, Addr, *MMO);
  161. return;
  162. case CCValAssign::LocInfo::SExt:
  163. MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, ValVReg, Addr, *MMO);
  164. return;
  165. default:
  166. MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
  167. return;
  168. }
  169. }
  170. /// How the physical register gets marked varies between formal
  171. /// parameters (it's a basic-block live-in), and a call instruction
  172. /// (it's an implicit-def of the BL).
  173. virtual void markPhysRegUsed(MCRegister PhysReg) = 0;
  174. };
  175. struct FormalArgHandler : public IncomingArgHandler {
  176. FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
  177. : IncomingArgHandler(MIRBuilder, MRI) {}
  178. void markPhysRegUsed(MCRegister PhysReg) override {
  179. MIRBuilder.getMRI()->addLiveIn(PhysReg);
  180. MIRBuilder.getMBB().addLiveIn(PhysReg);
  181. }
  182. };
  183. struct CallReturnHandler : public IncomingArgHandler {
  184. CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
  185. MachineInstrBuilder MIB)
  186. : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
  187. void markPhysRegUsed(MCRegister PhysReg) override {
  188. MIB.addDef(PhysReg, RegState::Implicit);
  189. }
  190. MachineInstrBuilder MIB;
  191. };
  192. /// A special return arg handler for "returned" attribute arg calls.
  193. struct ReturnedArgCallReturnHandler : public CallReturnHandler {
  194. ReturnedArgCallReturnHandler(MachineIRBuilder &MIRBuilder,
  195. MachineRegisterInfo &MRI,
  196. MachineInstrBuilder MIB)
  197. : CallReturnHandler(MIRBuilder, MRI, MIB) {}
  198. void markPhysRegUsed(MCRegister PhysReg) override {}
  199. };
  200. struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
  201. OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
  202. MachineInstrBuilder MIB, bool IsTailCall = false,
  203. int FPDiff = 0)
  204. : OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB), IsTailCall(IsTailCall),
  205. FPDiff(FPDiff),
  206. Subtarget(MIRBuilder.getMF().getSubtarget<AArch64Subtarget>()) {}
  207. Register getStackAddress(uint64_t Size, int64_t Offset,
  208. MachinePointerInfo &MPO,
  209. ISD::ArgFlagsTy Flags) override {
  210. MachineFunction &MF = MIRBuilder.getMF();
  211. LLT p0 = LLT::pointer(0, 64);
  212. LLT s64 = LLT::scalar(64);
  213. if (IsTailCall) {
  214. assert(!Flags.isByVal() && "byval unhandled with tail calls");
  215. Offset += FPDiff;
  216. int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
  217. auto FIReg = MIRBuilder.buildFrameIndex(p0, FI);
  218. MPO = MachinePointerInfo::getFixedStack(MF, FI);
  219. return FIReg.getReg(0);
  220. }
  221. if (!SPReg)
  222. SPReg = MIRBuilder.buildCopy(p0, Register(AArch64::SP)).getReg(0);
  223. auto OffsetReg = MIRBuilder.buildConstant(s64, Offset);
  224. auto AddrReg = MIRBuilder.buildPtrAdd(p0, SPReg, OffsetReg);
  225. MPO = MachinePointerInfo::getStack(MF, Offset);
  226. return AddrReg.getReg(0);
  227. }
  228. /// We need to fixup the reported store size for certain value types because
  229. /// we invert the interpretation of ValVT and LocVT in certain cases. This is
  230. /// for compatability with the DAG call lowering implementation, which we're
  231. /// currently building on top of.
  232. LLT getStackValueStoreType(const DataLayout &DL, const CCValAssign &VA,
  233. ISD::ArgFlagsTy Flags) const override {
  234. if (Flags.isPointer())
  235. return CallLowering::ValueHandler::getStackValueStoreType(DL, VA, Flags);
  236. return getStackValueStoreTypeHack(VA);
  237. }
  238. void assignValueToReg(Register ValVReg, Register PhysReg,
  239. CCValAssign VA) override {
  240. MIB.addUse(PhysReg, RegState::Implicit);
  241. Register ExtReg = extendRegister(ValVReg, VA);
  242. MIRBuilder.buildCopy(PhysReg, ExtReg);
  243. }
  244. void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
  245. MachinePointerInfo &MPO, CCValAssign &VA) override {
  246. MachineFunction &MF = MIRBuilder.getMF();
  247. auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
  248. inferAlignFromPtrInfo(MF, MPO));
  249. MIRBuilder.buildStore(ValVReg, Addr, *MMO);
  250. }
  251. void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex,
  252. Register Addr, LLT MemTy, MachinePointerInfo &MPO,
  253. CCValAssign &VA) override {
  254. unsigned MaxSize = MemTy.getSizeInBytes() * 8;
  255. // For varargs, we always want to extend them to 8 bytes, in which case
  256. // we disable setting a max.
  257. if (!Arg.IsFixed)
  258. MaxSize = 0;
  259. Register ValVReg = Arg.Regs[RegIndex];
  260. if (VA.getLocInfo() != CCValAssign::LocInfo::FPExt) {
  261. MVT LocVT = VA.getLocVT();
  262. MVT ValVT = VA.getValVT();
  263. if (VA.getValVT() == MVT::i8 || VA.getValVT() == MVT::i16) {
  264. std::swap(ValVT, LocVT);
  265. MemTy = LLT(VA.getValVT());
  266. }
  267. ValVReg = extendRegister(ValVReg, VA, MaxSize);
  268. } else {
  269. // The store does not cover the full allocated stack slot.
  270. MemTy = LLT(VA.getValVT());
  271. }
  272. assignValueToAddress(ValVReg, Addr, MemTy, MPO, VA);
  273. }
  274. MachineInstrBuilder MIB;
  275. bool IsTailCall;
  276. /// For tail calls, the byte offset of the call's argument area from the
  277. /// callee's. Unused elsewhere.
  278. int FPDiff;
  279. // Cache the SP register vreg if we need it more than once in this call site.
  280. Register SPReg;
  281. const AArch64Subtarget &Subtarget;
  282. };
  283. } // namespace
  284. static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
  285. return (CallConv == CallingConv::Fast && TailCallOpt) ||
  286. CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
  287. }
  288. bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
  289. const Value *Val,
  290. ArrayRef<Register> VRegs,
  291. FunctionLoweringInfo &FLI,
  292. Register SwiftErrorVReg) const {
  293. auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
  294. assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
  295. "Return value without a vreg");
  296. bool Success = true;
  297. if (!VRegs.empty()) {
  298. MachineFunction &MF = MIRBuilder.getMF();
  299. const Function &F = MF.getFunction();
  300. const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  301. MachineRegisterInfo &MRI = MF.getRegInfo();
  302. const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
  303. CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
  304. auto &DL = F.getParent()->getDataLayout();
  305. LLVMContext &Ctx = Val->getType()->getContext();
  306. SmallVector<EVT, 4> SplitEVTs;
  307. ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
  308. assert(VRegs.size() == SplitEVTs.size() &&
  309. "For each split Type there should be exactly one VReg.");
  310. SmallVector<ArgInfo, 8> SplitArgs;
  311. CallingConv::ID CC = F.getCallingConv();
  312. for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
  313. Register CurVReg = VRegs[i];
  314. ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx), 0};
  315. setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
  316. // i1 is a special case because SDAG i1 true is naturally zero extended
  317. // when widened using ANYEXT. We need to do it explicitly here.
  318. if (MRI.getType(CurVReg).getSizeInBits() == 1) {
  319. CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
  320. } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
  321. 1) {
  322. // Some types will need extending as specified by the CC.
  323. MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
  324. if (EVT(NewVT) != SplitEVTs[i]) {
  325. unsigned ExtendOp = TargetOpcode::G_ANYEXT;
  326. if (F.getAttributes().hasRetAttr(Attribute::SExt))
  327. ExtendOp = TargetOpcode::G_SEXT;
  328. else if (F.getAttributes().hasRetAttr(Attribute::ZExt))
  329. ExtendOp = TargetOpcode::G_ZEXT;
  330. LLT NewLLT(NewVT);
  331. LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
  332. CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
  333. // Instead of an extend, we might have a vector type which needs
  334. // padding with more elements, e.g. <2 x half> -> <4 x half>.
  335. if (NewVT.isVector()) {
  336. if (OldLLT.isVector()) {
  337. if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
  338. // We don't handle VA types which are not exactly twice the
  339. // size, but can easily be done in future.
  340. if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
  341. LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
  342. return false;
  343. }
  344. auto Undef = MIRBuilder.buildUndef({OldLLT});
  345. CurVReg =
  346. MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef}).getReg(0);
  347. } else {
  348. // Just do a vector extend.
  349. CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
  350. .getReg(0);
  351. }
  352. } else if (NewLLT.getNumElements() == 2) {
  353. // We need to pad a <1 x S> type to <2 x S>. Since we don't have
  354. // <1 x S> vector types in GISel we use a build_vector instead
  355. // of a vector merge/concat.
  356. auto Undef = MIRBuilder.buildUndef({OldLLT});
  357. CurVReg =
  358. MIRBuilder
  359. .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
  360. .getReg(0);
  361. } else {
  362. LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
  363. return false;
  364. }
  365. } else {
  366. // If the split EVT was a <1 x T> vector, and NewVT is T, then we
  367. // don't have to do anything since we don't distinguish between the
  368. // two.
  369. if (NewLLT != MRI.getType(CurVReg)) {
  370. // A scalar extend.
  371. CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
  372. .getReg(0);
  373. }
  374. }
  375. }
  376. }
  377. if (CurVReg != CurArgInfo.Regs[0]) {
  378. CurArgInfo.Regs[0] = CurVReg;
  379. // Reset the arg flags after modifying CurVReg.
  380. setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
  381. }
  382. splitToValueTypes(CurArgInfo, SplitArgs, DL, CC);
  383. }
  384. AArch64OutgoingValueAssigner Assigner(AssignFn, AssignFn, Subtarget,
  385. /*IsReturn*/ true);
  386. OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
  387. Success = determineAndHandleAssignments(Handler, Assigner, SplitArgs,
  388. MIRBuilder, CC, F.isVarArg());
  389. }
  390. if (SwiftErrorVReg) {
  391. MIB.addUse(AArch64::X21, RegState::Implicit);
  392. MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
  393. }
  394. MIRBuilder.insertInstr(MIB);
  395. return Success;
  396. }
  397. /// Helper function to compute forwarded registers for musttail calls. Computes
  398. /// the forwarded registers, sets MBB liveness, and emits COPY instructions that
  399. /// can be used to save + restore registers later.
  400. static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
  401. CCAssignFn *AssignFn) {
  402. MachineBasicBlock &MBB = MIRBuilder.getMBB();
  403. MachineFunction &MF = MIRBuilder.getMF();
  404. MachineFrameInfo &MFI = MF.getFrameInfo();
  405. if (!MFI.hasMustTailInVarArgFunc())
  406. return;
  407. AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
  408. const Function &F = MF.getFunction();
  409. assert(F.isVarArg() && "Expected F to be vararg?");
  410. // Compute the set of forwarded registers. The rest are scratch.
  411. SmallVector<CCValAssign, 16> ArgLocs;
  412. CCState CCInfo(F.getCallingConv(), /*IsVarArg=*/true, MF, ArgLocs,
  413. F.getContext());
  414. SmallVector<MVT, 2> RegParmTypes;
  415. RegParmTypes.push_back(MVT::i64);
  416. RegParmTypes.push_back(MVT::f128);
  417. // Later on, we can use this vector to restore the registers if necessary.
  418. SmallVectorImpl<ForwardedRegister> &Forwards =
  419. FuncInfo->getForwardedMustTailRegParms();
  420. CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, AssignFn);
  421. // Conservatively forward X8, since it might be used for an aggregate
  422. // return.
  423. if (!CCInfo.isAllocated(AArch64::X8)) {
  424. Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
  425. Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
  426. }
  427. // Add the forwards to the MachineBasicBlock and MachineFunction.
  428. for (const auto &F : Forwards) {
  429. MBB.addLiveIn(F.PReg);
  430. MIRBuilder.buildCopy(Register(F.VReg), Register(F.PReg));
  431. }
  432. }
  433. bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
  434. auto &F = MF.getFunction();
  435. if (isa<ScalableVectorType>(F.getReturnType()))
  436. return true;
  437. if (llvm::any_of(F.args(), [](const Argument &A) {
  438. return isa<ScalableVectorType>(A.getType());
  439. }))
  440. return true;
  441. const auto &ST = MF.getSubtarget<AArch64Subtarget>();
  442. if (!ST.hasNEON() || !ST.hasFPARMv8()) {
  443. LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n");
  444. return true;
  445. }
  446. return false;
  447. }
  448. bool AArch64CallLowering::lowerFormalArguments(
  449. MachineIRBuilder &MIRBuilder, const Function &F,
  450. ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
  451. MachineFunction &MF = MIRBuilder.getMF();
  452. MachineBasicBlock &MBB = MIRBuilder.getMBB();
  453. MachineRegisterInfo &MRI = MF.getRegInfo();
  454. auto &DL = F.getParent()->getDataLayout();
  455. SmallVector<ArgInfo, 8> SplitArgs;
  456. SmallVector<std::pair<Register, Register>> BoolArgs;
  457. unsigned i = 0;
  458. for (auto &Arg : F.args()) {
  459. if (DL.getTypeStoreSize(Arg.getType()).isZero())
  460. continue;
  461. ArgInfo OrigArg{VRegs[i], Arg, i};
  462. setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
  463. // i1 arguments are zero-extended to i8 by the caller. Emit a
  464. // hint to reflect this.
  465. if (OrigArg.Ty->isIntegerTy(1)) {
  466. assert(OrigArg.Regs.size() == 1 &&
  467. MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
  468. "Unexpected registers used for i1 arg");
  469. if (!OrigArg.Flags[0].isZExt()) {
  470. // Lower i1 argument as i8, and insert AssertZExt + Trunc later.
  471. Register OrigReg = OrigArg.Regs[0];
  472. Register WideReg = MRI.createGenericVirtualRegister(LLT::scalar(8));
  473. OrigArg.Regs[0] = WideReg;
  474. BoolArgs.push_back({OrigReg, WideReg});
  475. }
  476. }
  477. if (Arg.hasAttribute(Attribute::SwiftAsync))
  478. MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
  479. splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());
  480. ++i;
  481. }
  482. if (!MBB.empty())
  483. MIRBuilder.setInstr(*MBB.begin());
  484. const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
  485. CCAssignFn *AssignFn =
  486. TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
  487. AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
  488. FormalArgHandler Handler(MIRBuilder, MRI);
  489. if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder,
  490. F.getCallingConv(), F.isVarArg()))
  491. return false;
  492. if (!BoolArgs.empty()) {
  493. for (auto &KV : BoolArgs) {
  494. Register OrigReg = KV.first;
  495. Register WideReg = KV.second;
  496. LLT WideTy = MRI.getType(WideReg);
  497. assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 &&
  498. "Unexpected bit size of a bool arg");
  499. MIRBuilder.buildTrunc(
  500. OrigReg, MIRBuilder.buildAssertZExt(WideTy, WideReg, 1).getReg(0));
  501. }
  502. }
  503. AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
  504. uint64_t StackOffset = Assigner.StackOffset;
  505. if (F.isVarArg()) {
  506. auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  507. if (!Subtarget.isTargetDarwin()) {
  508. // FIXME: we need to reimplement saveVarArgsRegisters from
  509. // AArch64ISelLowering.
  510. return false;
  511. }
  512. // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
  513. StackOffset =
  514. alignTo(Assigner.StackOffset, Subtarget.isTargetILP32() ? 4 : 8);
  515. auto &MFI = MIRBuilder.getMF().getFrameInfo();
  516. FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
  517. }
  518. if (doesCalleeRestoreStack(F.getCallingConv(),
  519. MF.getTarget().Options.GuaranteedTailCallOpt)) {
  520. // We have a non-standard ABI, so why not make full use of the stack that
  521. // we're going to pop? It must be aligned to 16 B in any case.
  522. StackOffset = alignTo(StackOffset, 16);
  523. // If we're expected to restore the stack (e.g. fastcc), then we'll be
  524. // adding a multiple of 16.
  525. FuncInfo->setArgumentStackToRestore(StackOffset);
  526. // Our own callers will guarantee that the space is free by giving an
  527. // aligned value to CALLSEQ_START.
  528. }
  529. // When we tail call, we need to check if the callee's arguments
  530. // will fit on the caller's stack. So, whenever we lower formal arguments,
  531. // we should keep track of this information, since we might lower a tail call
  532. // in this function later.
  533. FuncInfo->setBytesInStackArgArea(StackOffset);
  534. auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  535. if (Subtarget.hasCustomCallingConv())
  536. Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
  537. handleMustTailForwardedRegisters(MIRBuilder, AssignFn);
  538. // Move back to the end of the basic block.
  539. MIRBuilder.setMBB(MBB);
  540. return true;
  541. }
  542. /// Return true if the calling convention is one that we can guarantee TCO for.
  543. static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
  544. return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
  545. CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
  546. }
  547. /// Return true if we might ever do TCO for calls with this calling convention.
  548. static bool mayTailCallThisCC(CallingConv::ID CC) {
  549. switch (CC) {
  550. case CallingConv::C:
  551. case CallingConv::PreserveMost:
  552. case CallingConv::Swift:
  553. case CallingConv::SwiftTail:
  554. case CallingConv::Tail:
  555. case CallingConv::Fast:
  556. return true;
  557. default:
  558. return false;
  559. }
  560. }
  561. /// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
  562. /// CC.
  563. static std::pair<CCAssignFn *, CCAssignFn *>
  564. getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
  565. return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
  566. }
  567. bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
  568. CallLoweringInfo &Info, MachineFunction &MF,
  569. SmallVectorImpl<ArgInfo> &InArgs) const {
  570. const Function &CallerF = MF.getFunction();
  571. CallingConv::ID CalleeCC = Info.CallConv;
  572. CallingConv::ID CallerCC = CallerF.getCallingConv();
  573. // If the calling conventions match, then everything must be the same.
  574. if (CalleeCC == CallerCC)
  575. return true;
  576. // Check if the caller and callee will handle arguments in the same way.
  577. const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
  578. CCAssignFn *CalleeAssignFnFixed;
  579. CCAssignFn *CalleeAssignFnVarArg;
  580. std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
  581. getAssignFnsForCC(CalleeCC, TLI);
  582. CCAssignFn *CallerAssignFnFixed;
  583. CCAssignFn *CallerAssignFnVarArg;
  584. std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
  585. getAssignFnsForCC(CallerCC, TLI);
  586. AArch64IncomingValueAssigner CalleeAssigner(CalleeAssignFnFixed,
  587. CalleeAssignFnVarArg);
  588. AArch64IncomingValueAssigner CallerAssigner(CallerAssignFnFixed,
  589. CallerAssignFnVarArg);
  590. if (!resultsCompatible(Info, MF, InArgs, CalleeAssigner, CallerAssigner))
  591. return false;
  592. // Make sure that the caller and callee preserve all of the same registers.
  593. auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
  594. const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
  595. const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
  596. if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv()) {
  597. TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
  598. TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
  599. }
  600. return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved);
  601. }
  602. bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
  603. CallLoweringInfo &Info, MachineFunction &MF,
  604. SmallVectorImpl<ArgInfo> &OutArgs) const {
  605. // If there are no outgoing arguments, then we are done.
  606. if (OutArgs.empty())
  607. return true;
  608. const Function &CallerF = MF.getFunction();
  609. LLVMContext &Ctx = CallerF.getContext();
  610. CallingConv::ID CalleeCC = Info.CallConv;
  611. CallingConv::ID CallerCC = CallerF.getCallingConv();
  612. const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
  613. const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  614. CCAssignFn *AssignFnFixed;
  615. CCAssignFn *AssignFnVarArg;
  616. std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
  617. // We have outgoing arguments. Make sure that we can tail call with them.
  618. SmallVector<CCValAssign, 16> OutLocs;
  619. CCState OutInfo(CalleeCC, false, MF, OutLocs, Ctx);
  620. AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
  621. Subtarget, /*IsReturn*/ false);
  622. if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo)) {
  623. LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
  624. return false;
  625. }
  626. // Make sure that they can fit on the caller's stack.
  627. const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
  628. if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
  629. LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
  630. return false;
  631. }
  632. // Verify that the parameters in callee-saved registers match.
  633. // TODO: Port this over to CallLowering as general code once swiftself is
  634. // supported.
  635. auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
  636. const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC);
  637. MachineRegisterInfo &MRI = MF.getRegInfo();
  638. if (Info.IsVarArg) {
  639. // Be conservative and disallow variadic memory operands to match SDAG's
  640. // behaviour.
  641. // FIXME: If the caller's calling convention is C, then we can
  642. // potentially use its argument area. However, for cases like fastcc,
  643. // we can't do anything.
  644. for (unsigned i = 0; i < OutLocs.size(); ++i) {
  645. auto &ArgLoc = OutLocs[i];
  646. if (ArgLoc.isRegLoc())
  647. continue;
  648. LLVM_DEBUG(
  649. dbgs()
  650. << "... Cannot tail call vararg function with stack arguments\n");
  651. return false;
  652. }
  653. }
  654. return parametersInCSRMatch(MRI, CallerPreservedMask, OutLocs, OutArgs);
  655. }
  656. bool AArch64CallLowering::isEligibleForTailCallOptimization(
  657. MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
  658. SmallVectorImpl<ArgInfo> &InArgs,
  659. SmallVectorImpl<ArgInfo> &OutArgs) const {
  660. // Must pass all target-independent checks in order to tail call optimize.
  661. if (!Info.IsTailCall)
  662. return false;
  663. CallingConv::ID CalleeCC = Info.CallConv;
  664. MachineFunction &MF = MIRBuilder.getMF();
  665. const Function &CallerF = MF.getFunction();
  666. LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
  667. if (Info.SwiftErrorVReg) {
  668. // TODO: We should handle this.
  669. // Note that this is also handled by the check for no outgoing arguments.
  670. // Proactively disabling this though, because the swifterror handling in
  671. // lowerCall inserts a COPY *after* the location of the call.
  672. LLVM_DEBUG(dbgs() << "... Cannot handle tail calls with swifterror yet.\n");
  673. return false;
  674. }
  675. if (!mayTailCallThisCC(CalleeCC)) {
  676. LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
  677. return false;
  678. }
  679. // Byval parameters hand the function a pointer directly into the stack area
  680. // we want to reuse during a tail call. Working around this *is* possible (see
  681. // X86).
  682. //
  683. // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
  684. // it?
  685. //
  686. // On Windows, "inreg" attributes signify non-aggregate indirect returns.
  687. // In this case, it is necessary to save/restore X0 in the callee. Tail
  688. // call opt interferes with this. So we disable tail call opt when the
  689. // caller has an argument with "inreg" attribute.
  690. //
  691. // FIXME: Check whether the callee also has an "inreg" argument.
  692. //
  693. // When the caller has a swifterror argument, we don't want to tail call
  694. // because would have to move into the swifterror register before the
  695. // tail call.
  696. if (any_of(CallerF.args(), [](const Argument &A) {
  697. return A.hasByValAttr() || A.hasInRegAttr() || A.hasSwiftErrorAttr();
  698. })) {
  699. LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval, "
  700. "inreg, or swifterror arguments\n");
  701. return false;
  702. }
  703. // Externally-defined functions with weak linkage should not be
  704. // tail-called on AArch64 when the OS does not support dynamic
  705. // pre-emption of symbols, as the AAELF spec requires normal calls
  706. // to undefined weak functions to be replaced with a NOP or jump to the
  707. // next instruction. The behaviour of branch instructions in this
  708. // situation (as used for tail calls) is implementation-defined, so we
  709. // cannot rely on the linker replacing the tail call with a return.
  710. if (Info.Callee.isGlobal()) {
  711. const GlobalValue *GV = Info.Callee.getGlobal();
  712. const Triple &TT = MF.getTarget().getTargetTriple();
  713. if (GV->hasExternalWeakLinkage() &&
  714. (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
  715. TT.isOSBinFormatMachO())) {
  716. LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
  717. "with weak linkage for this OS.\n");
  718. return false;
  719. }
  720. }
  721. // If we have -tailcallopt, then we're done.
  722. if (canGuaranteeTCO(CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt))
  723. return CalleeCC == CallerF.getCallingConv();
  724. // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
  725. // Try to find cases where we can do that.
  726. // I want anyone implementing a new calling convention to think long and hard
  727. // about this assert.
  728. assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
  729. "Unexpected variadic calling convention");
  730. // Verify that the incoming and outgoing arguments from the callee are
  731. // safe to tail call.
  732. if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) {
  733. LLVM_DEBUG(
  734. dbgs()
  735. << "... Caller and callee have incompatible calling conventions.\n");
  736. return false;
  737. }
  738. if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs))
  739. return false;
  740. LLVM_DEBUG(
  741. dbgs() << "... Call is eligible for tail call optimization.\n");
  742. return true;
  743. }
  744. static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
  745. bool IsTailCall) {
  746. if (!IsTailCall)
  747. return IsIndirect ? getBLRCallOpcode(CallerF) : (unsigned)AArch64::BL;
  748. if (!IsIndirect)
  749. return AArch64::TCRETURNdi;
  750. // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
  751. // x16 or x17.
  752. if (CallerF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
  753. return AArch64::TCRETURNriBTI;
  754. return AArch64::TCRETURNri;
  755. }
  756. static const uint32_t *
  757. getMaskForArgs(SmallVectorImpl<AArch64CallLowering::ArgInfo> &OutArgs,
  758. AArch64CallLowering::CallLoweringInfo &Info,
  759. const AArch64RegisterInfo &TRI, MachineFunction &MF) {
  760. const uint32_t *Mask;
  761. if (!OutArgs.empty() && OutArgs[0].Flags[0].isReturned()) {
  762. // For 'this' returns, use the X0-preserving mask if applicable
  763. Mask = TRI.getThisReturnPreservedMask(MF, Info.CallConv);
  764. if (!Mask) {
  765. OutArgs[0].Flags[0].setReturned(false);
  766. Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
  767. }
  768. } else {
  769. Mask = TRI.getCallPreservedMask(MF, Info.CallConv);
  770. }
  771. return Mask;
  772. }
  773. bool AArch64CallLowering::lowerTailCall(
  774. MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
  775. SmallVectorImpl<ArgInfo> &OutArgs) const {
  776. MachineFunction &MF = MIRBuilder.getMF();
  777. const Function &F = MF.getFunction();
  778. MachineRegisterInfo &MRI = MF.getRegInfo();
  779. const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
  780. AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
  781. // True when we're tail calling, but without -tailcallopt.
  782. bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt &&
  783. Info.CallConv != CallingConv::Tail &&
  784. Info.CallConv != CallingConv::SwiftTail;
  785. // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
  786. // register class. Until we can do that, we should fall back here.
  787. if (MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) {
  788. LLVM_DEBUG(
  789. dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
  790. return false;
  791. }
  792. // Find out which ABI gets to decide where things go.
  793. CallingConv::ID CalleeCC = Info.CallConv;
  794. CCAssignFn *AssignFnFixed;
  795. CCAssignFn *AssignFnVarArg;
  796. std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
  797. MachineInstrBuilder CallSeqStart;
  798. if (!IsSibCall)
  799. CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
  800. unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
  801. auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
  802. MIB.add(Info.Callee);
  803. // Byte offset for the tail call. When we are sibcalling, this will always
  804. // be 0.
  805. MIB.addImm(0);
  806. // Tell the call which registers are clobbered.
  807. const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  808. auto TRI = Subtarget.getRegisterInfo();
  809. const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC);
  810. if (Subtarget.hasCustomCallingConv())
  811. TRI->UpdateCustomCallPreservedMask(MF, &Mask);
  812. MIB.addRegMask(Mask);
  813. if (TRI->isAnyArgRegReserved(MF))
  814. TRI->emitReservedArgRegCallError(MF);
  815. // FPDiff is the byte offset of the call's argument area from the callee's.
  816. // Stores to callee stack arguments will be placed in FixedStackSlots offset
  817. // by this amount for a tail call. In a sibling call it must be 0 because the
  818. // caller will deallocate the entire stack and the callee still expects its
  819. // arguments to begin at SP+0.
  820. int FPDiff = 0;
  821. // This will be 0 for sibcalls, potentially nonzero for tail calls produced
  822. // by -tailcallopt. For sibcalls, the memory operands for the call are
  823. // already available in the caller's incoming argument space.
  824. unsigned NumBytes = 0;
  825. if (!IsSibCall) {
  826. // We aren't sibcalling, so we need to compute FPDiff. We need to do this
  827. // before handling assignments, because FPDiff must be known for memory
  828. // arguments.
  829. unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
  830. SmallVector<CCValAssign, 16> OutLocs;
  831. CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
  832. AArch64OutgoingValueAssigner CalleeAssigner(AssignFnFixed, AssignFnVarArg,
  833. Subtarget, /*IsReturn*/ false);
  834. if (!determineAssignments(CalleeAssigner, OutArgs, OutInfo))
  835. return false;
  836. // The callee will pop the argument stack as a tail call. Thus, we must
  837. // keep it 16-byte aligned.
  838. NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
  839. // FPDiff will be negative if this tail call requires more space than we
  840. // would automatically have in our incoming argument space. Positive if we
  841. // actually shrink the stack.
  842. FPDiff = NumReusableBytes - NumBytes;
  843. // Update the required reserved area if this is the tail call requiring the
  844. // most argument stack space.
  845. if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
  846. FuncInfo->setTailCallReservedStack(-FPDiff);
  847. // The stack pointer must be 16-byte aligned at all times it's used for a
  848. // memory operation, which in practice means at *all* times and in
  849. // particular across call boundaries. Therefore our own arguments started at
  850. // a 16-byte aligned SP and the delta applied for the tail call should
  851. // satisfy the same constraint.
  852. assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
  853. }
  854. const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
  855. AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
  856. Subtarget, /*IsReturn*/ false);
  857. // Do the actual argument marshalling.
  858. OutgoingArgHandler Handler(MIRBuilder, MRI, MIB,
  859. /*IsTailCall*/ true, FPDiff);
  860. if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
  861. CalleeCC, Info.IsVarArg))
  862. return false;
  863. Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
  864. if (Info.IsVarArg && Info.IsMustTailCall) {
  865. // Now we know what's being passed to the function. Add uses to the call for
  866. // the forwarded registers that we *aren't* passing as parameters. This will
  867. // preserve the copies we build earlier.
  868. for (const auto &F : Forwards) {
  869. Register ForwardedReg = F.PReg;
  870. // If the register is already passed, or aliases a register which is
  871. // already being passed, then skip it.
  872. if (any_of(MIB->uses(), [&ForwardedReg, &TRI](const MachineOperand &Use) {
  873. if (!Use.isReg())
  874. return false;
  875. return TRI->regsOverlap(Use.getReg(), ForwardedReg);
  876. }))
  877. continue;
  878. // We aren't passing it already, so we should add it to the call.
  879. MIRBuilder.buildCopy(ForwardedReg, Register(F.VReg));
  880. MIB.addReg(ForwardedReg, RegState::Implicit);
  881. }
  882. }
  883. // If we have -tailcallopt, we need to adjust the stack. We'll do the call
  884. // sequence start and end here.
  885. if (!IsSibCall) {
  886. MIB->getOperand(1).setImm(FPDiff);
  887. CallSeqStart.addImm(0).addImm(0);
  888. // End the call sequence *before* emitting the call. Normally, we would
  889. // tidy the frame up after the call. However, here, we've laid out the
  890. // parameters so that when SP is reset, they will be in the correct
  891. // location.
  892. MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(0).addImm(0);
  893. }
  894. // Now we can add the actual call instruction to the correct basic block.
  895. MIRBuilder.insertInstr(MIB);
  896. // If Callee is a reg, since it is used by a target specific instruction,
  897. // it must have a register class matching the constraint of that instruction.
  898. if (Info.Callee.isReg())
  899. constrainOperandRegClass(MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
  900. *MF.getSubtarget().getRegBankInfo(), *MIB,
  901. MIB->getDesc(), Info.Callee, 0);
  902. MF.getFrameInfo().setHasTailCall();
  903. Info.LoweredTailCall = true;
  904. return true;
  905. }
  906. bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
  907. CallLoweringInfo &Info) const {
  908. MachineFunction &MF = MIRBuilder.getMF();
  909. const Function &F = MF.getFunction();
  910. MachineRegisterInfo &MRI = MF.getRegInfo();
  911. auto &DL = F.getParent()->getDataLayout();
  912. const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
  913. SmallVector<ArgInfo, 8> OutArgs;
  914. for (auto &OrigArg : Info.OrigArgs) {
  915. splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
  916. // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
  917. if (OrigArg.Ty->isIntegerTy(1)) {
  918. ArgInfo &OutArg = OutArgs.back();
  919. assert(OutArg.Regs.size() == 1 &&
  920. MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&
  921. "Unexpected registers used for i1 arg");
  922. // We cannot use a ZExt ArgInfo flag here, because it will
  923. // zero-extend the argument to i32 instead of just i8.
  924. OutArg.Regs[0] =
  925. MIRBuilder.buildZExt(LLT::scalar(8), OutArg.Regs[0]).getReg(0);
  926. LLVMContext &Ctx = MF.getFunction().getContext();
  927. OutArg.Ty = Type::getInt8Ty(Ctx);
  928. }
  929. }
  930. SmallVector<ArgInfo, 8> InArgs;
  931. if (!Info.OrigRet.Ty->isVoidTy())
  932. splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv);
  933. // If we can lower as a tail call, do that instead.
  934. bool CanTailCallOpt =
  935. isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs);
  936. // We must emit a tail call if we have musttail.
  937. if (Info.IsMustTailCall && !CanTailCallOpt) {
  938. // There are types of incoming/outgoing arguments we can't handle yet, so
  939. // it doesn't make sense to actually die here like in ISelLowering. Instead,
  940. // fall back to SelectionDAG and let it try to handle this.
  941. LLVM_DEBUG(dbgs() << "Failed to lower musttail call as tail call\n");
  942. return false;
  943. }
  944. Info.IsTailCall = CanTailCallOpt;
  945. if (CanTailCallOpt)
  946. return lowerTailCall(MIRBuilder, Info, OutArgs);
  947. // Find out which ABI gets to decide where things go.
  948. CCAssignFn *AssignFnFixed;
  949. CCAssignFn *AssignFnVarArg;
  950. std::tie(AssignFnFixed, AssignFnVarArg) =
  951. getAssignFnsForCC(Info.CallConv, TLI);
  952. MachineInstrBuilder CallSeqStart;
  953. CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
  954. // Create a temporarily-floating call instruction so we can add the implicit
  955. // uses of arg registers.
  956. const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
  957. unsigned Opc = 0;
  958. // A call to a returns twice function like setjmp must be followed by a bti
  959. // instruction.
  960. if (Info.CB && Info.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
  961. !Subtarget.noBTIAtReturnTwice() &&
  962. MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
  963. Opc = AArch64::BLR_BTI;
  964. else
  965. Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
  966. auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
  967. MIB.add(Info.Callee);
  968. // Tell the call which registers are clobbered.
  969. const uint32_t *Mask;
  970. const auto *TRI = Subtarget.getRegisterInfo();
  971. AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg,
  972. Subtarget, /*IsReturn*/ false);
  973. // Do the actual argument marshalling.
  974. OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, /*IsReturn*/ false);
  975. if (!determineAndHandleAssignments(Handler, Assigner, OutArgs, MIRBuilder,
  976. Info.CallConv, Info.IsVarArg))
  977. return false;
  978. Mask = getMaskForArgs(OutArgs, Info, *TRI, MF);
  979. if (MF.getSubtarget<AArch64Subtarget>().hasCustomCallingConv())
  980. TRI->UpdateCustomCallPreservedMask(MF, &Mask);
  981. MIB.addRegMask(Mask);
  982. if (TRI->isAnyArgRegReserved(MF))
  983. TRI->emitReservedArgRegCallError(MF);
  984. // Now we can add the actual call instruction to the correct basic block.
  985. MIRBuilder.insertInstr(MIB);
  986. // If Callee is a reg, since it is used by a target specific
  987. // instruction, it must have a register class matching the
  988. // constraint of that instruction.
  989. if (Info.Callee.isReg())
  990. constrainOperandRegClass(MF, *TRI, MRI, *Subtarget.getInstrInfo(),
  991. *Subtarget.getRegBankInfo(), *MIB, MIB->getDesc(),
  992. Info.Callee, 0);
  993. // Finally we can copy the returned value back into its virtual-register. In
  994. // symmetry with the arguments, the physical register must be an
  995. // implicit-define of the call instruction.
  996. if (!Info.OrigRet.Ty->isVoidTy()) {
  997. CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv);
  998. CallReturnHandler Handler(MIRBuilder, MRI, MIB);
  999. bool UsingReturnedArg =
  1000. !OutArgs.empty() && OutArgs[0].Flags[0].isReturned();
  1001. AArch64OutgoingValueAssigner Assigner(RetAssignFn, RetAssignFn, Subtarget,
  1002. /*IsReturn*/ false);
  1003. ReturnedArgCallReturnHandler ReturnedArgHandler(MIRBuilder, MRI, MIB);
  1004. if (!determineAndHandleAssignments(
  1005. UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs,
  1006. MIRBuilder, Info.CallConv, Info.IsVarArg,
  1007. UsingReturnedArg ? makeArrayRef(OutArgs[0].Regs) : None))
  1008. return false;
  1009. }
  1010. if (Info.SwiftErrorVReg) {
  1011. MIB.addDef(AArch64::X21, RegState::Implicit);
  1012. MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
  1013. }
  1014. uint64_t CalleePopBytes =
  1015. doesCalleeRestoreStack(Info.CallConv,
  1016. MF.getTarget().Options.GuaranteedTailCallOpt)
  1017. ? alignTo(Assigner.StackOffset, 16)
  1018. : 0;
  1019. CallSeqStart.addImm(Assigner.StackOffset).addImm(0);
  1020. MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
  1021. .addImm(Assigner.StackOffset)
  1022. .addImm(CalleePopBytes);
  1023. return true;
  1024. }
  1025. bool AArch64CallLowering::isTypeIsValidForThisReturn(EVT Ty) const {
  1026. return Ty.getSizeInBits() == 64;
  1027. }