AArch64CallingConvention.cpp 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. //=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains the table-generated and custom routines for the AArch64
  10. // Calling Convention.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "AArch64CallingConvention.h"
  14. #include "AArch64.h"
  15. #include "AArch64InstrInfo.h"
  16. #include "AArch64Subtarget.h"
  17. #include "llvm/CodeGen/CallingConvLower.h"
  18. #include "llvm/CodeGen/TargetInstrInfo.h"
  19. #include "llvm/IR/CallingConv.h"
  20. using namespace llvm;
  21. static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
  22. AArch64::X3, AArch64::X4, AArch64::X5,
  23. AArch64::X6, AArch64::X7};
  24. static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
  25. AArch64::H3, AArch64::H4, AArch64::H5,
  26. AArch64::H6, AArch64::H7};
  27. static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
  28. AArch64::S3, AArch64::S4, AArch64::S5,
  29. AArch64::S6, AArch64::S7};
  30. static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
  31. AArch64::D3, AArch64::D4, AArch64::D5,
  32. AArch64::D6, AArch64::D7};
  33. static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
  34. AArch64::Q3, AArch64::Q4, AArch64::Q5,
  35. AArch64::Q6, AArch64::Q7};
  36. static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,
  37. AArch64::Z3, AArch64::Z4, AArch64::Z5,
  38. AArch64::Z6, AArch64::Z7};
  39. static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
  40. MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
  41. CCState &State, Align SlotAlign) {
  42. if (LocVT.isScalableVector()) {
  43. const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
  44. State.getMachineFunction().getSubtarget());
  45. const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
  46. // We are about to reinvoke the CCAssignFn auto-generated handler. If we
  47. // don't unset these flags we will get stuck in an infinite loop forever
  48. // invoking the custom handler.
  49. ArgFlags.setInConsecutiveRegs(false);
  50. ArgFlags.setInConsecutiveRegsLast(false);
  51. // The calling convention for passing SVE tuples states that in the event
  52. // we cannot allocate enough registers for the tuple we should still leave
  53. // any remaining registers unallocated. However, when we call the
  54. // CCAssignFn again we want it to behave as if all remaining registers are
  55. // allocated. This will force the code to pass the tuple indirectly in
  56. // accordance with the PCS.
  57. bool RegsAllocated[8];
  58. for (int I = 0; I < 8; I++) {
  59. RegsAllocated[I] = State.isAllocated(ZRegList[I]);
  60. State.AllocateReg(ZRegList[I]);
  61. }
  62. auto &It = PendingMembers[0];
  63. CCAssignFn *AssignFn =
  64. TLI->CCAssignFnForCall(State.getCallingConv(), /*IsVarArg=*/false);
  65. if (AssignFn(It.getValNo(), It.getValVT(), It.getValVT(), CCValAssign::Full,
  66. ArgFlags, State))
  67. llvm_unreachable("Call operand has unhandled type");
  68. // Return the flags to how they were before.
  69. ArgFlags.setInConsecutiveRegs(true);
  70. ArgFlags.setInConsecutiveRegsLast(true);
  71. // Return the register state back to how it was before, leaving any
  72. // unallocated registers available for other smaller types.
  73. for (int I = 0; I < 8; I++)
  74. if (!RegsAllocated[I])
  75. State.DeallocateReg(ZRegList[I]);
  76. // All pending members have now been allocated
  77. PendingMembers.clear();
  78. return true;
  79. }
  80. unsigned Size = LocVT.getSizeInBits() / 8;
  81. for (auto &It : PendingMembers) {
  82. It.convertToMem(State.AllocateStack(Size, SlotAlign));
  83. State.addLoc(It);
  84. SlotAlign = Align(1);
  85. }
  86. // All pending members have now been allocated
  87. PendingMembers.clear();
  88. return true;
  89. }
  90. /// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
  91. /// [N x Ty] type must still be contiguous in memory though.
  92. static bool CC_AArch64_Custom_Stack_Block(
  93. unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
  94. ISD::ArgFlagsTy &ArgFlags, CCState &State) {
  95. SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
  96. // Add the argument to the list to be allocated once we know the size of the
  97. // block.
  98. PendingMembers.push_back(
  99. CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
  100. if (!ArgFlags.isInConsecutiveRegsLast())
  101. return true;
  102. return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, Align(8));
  103. }
  104. /// Given an [N x Ty] block, it should be passed in a consecutive sequence of
  105. /// registers. If no such sequence is available, mark the rest of the registers
  106. /// of that type as used and place the argument on the stack.
  107. static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
  108. CCValAssign::LocInfo &LocInfo,
  109. ISD::ArgFlagsTy &ArgFlags, CCState &State) {
  110. const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
  111. State.getMachineFunction().getSubtarget());
  112. bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO();
  113. // Try to allocate a contiguous block of registers, each of the correct
  114. // size to hold one member.
  115. ArrayRef<MCPhysReg> RegList;
  116. if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32))
  117. RegList = XRegList;
  118. else if (LocVT.SimpleTy == MVT::f16)
  119. RegList = HRegList;
  120. else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
  121. RegList = SRegList;
  122. else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
  123. RegList = DRegList;
  124. else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
  125. RegList = QRegList;
  126. else if (LocVT.isScalableVector())
  127. RegList = ZRegList;
  128. else {
  129. // Not an array we want to split up after all.
  130. return false;
  131. }
  132. SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
  133. // Add the argument to the list to be allocated once we know the size of the
  134. // block.
  135. PendingMembers.push_back(
  136. CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
  137. if (!ArgFlags.isInConsecutiveRegsLast())
  138. return true;
  139. // [N x i32] arguments get packed into x-registers on Darwin's arm64_32
  140. // because that's how the armv7k Clang front-end emits small structs.
  141. unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1;
  142. unsigned RegResult = State.AllocateRegBlock(
  143. RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg);
  144. if (RegResult && EltsPerReg == 1) {
  145. for (auto &It : PendingMembers) {
  146. It.convertToReg(RegResult);
  147. State.addLoc(It);
  148. ++RegResult;
  149. }
  150. PendingMembers.clear();
  151. return true;
  152. } else if (RegResult) {
  153. assert(EltsPerReg == 2 && "unexpected ABI");
  154. bool UseHigh = false;
  155. CCValAssign::LocInfo Info;
  156. for (auto &It : PendingMembers) {
  157. Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt;
  158. State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, RegResult,
  159. MVT::i64, Info));
  160. UseHigh = !UseHigh;
  161. if (!UseHigh)
  162. ++RegResult;
  163. }
  164. PendingMembers.clear();
  165. return true;
  166. }
  167. if (!LocVT.isScalableVector()) {
  168. // Mark all regs in the class as unavailable
  169. for (auto Reg : RegList)
  170. State.AllocateReg(Reg);
  171. }
  172. const Align StackAlign =
  173. State.getMachineFunction().getDataLayout().getStackAlignment();
  174. const Align MemAlign = ArgFlags.getNonZeroMemAlign();
  175. Align SlotAlign = std::min(MemAlign, StackAlign);
  176. if (!Subtarget.isTargetDarwin())
  177. SlotAlign = std::max(SlotAlign, Align(8));
  178. return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
  179. }
  180. // TableGen provides definitions of the calling convention analysis entry
  181. // points.
  182. #include "AArch64GenCallingConv.inc"