X86CallingConv.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. //=== X86CallingConv.cpp - X86 Custom Calling Convention Impl -*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains the implementation of custom routines for the X86
  10. // Calling Convention that aren't done by tablegen.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "X86CallingConv.h"
  14. #include "X86Subtarget.h"
  15. #include "llvm/ADT/SmallVector.h"
  16. #include "llvm/CodeGen/CallingConvLower.h"
  17. #include "llvm/IR/CallingConv.h"
  18. using namespace llvm;
  19. /// When regcall calling convention compiled to 32 bit arch, special treatment
  20. /// is required for 64 bit masks.
  21. /// The value should be assigned to two GPRs.
  22. /// \return true if registers were allocated and false otherwise.
  23. static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT,
  24. MVT &LocVT,
  25. CCValAssign::LocInfo &LocInfo,
  26. ISD::ArgFlagsTy &ArgFlags,
  27. CCState &State) {
  28. // List of GPR registers that are available to store values in regcall
  29. // calling convention.
  30. static const MCPhysReg RegList[] = {X86::EAX, X86::ECX, X86::EDX, X86::EDI,
  31. X86::ESI};
  32. // The vector will save all the available registers for allocation.
  33. SmallVector<unsigned, 5> AvailableRegs;
  34. // searching for the available registers.
  35. for (auto Reg : RegList) {
  36. if (!State.isAllocated(Reg))
  37. AvailableRegs.push_back(Reg);
  38. }
  39. const size_t RequiredGprsUponSplit = 2;
  40. if (AvailableRegs.size() < RequiredGprsUponSplit)
  41. return false; // Not enough free registers - continue the search.
  42. // Allocating the available registers.
  43. for (unsigned I = 0; I < RequiredGprsUponSplit; I++) {
  44. // Marking the register as located.
  45. unsigned Reg = State.AllocateReg(AvailableRegs[I]);
  46. // Since we previously made sure that 2 registers are available
  47. // we expect that a real register number will be returned.
  48. assert(Reg && "Expecting a register will be available");
  49. // Assign the value to the allocated register
  50. State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  51. }
  52. // Successful in allocating registers - stop scanning next rules.
  53. return true;
  54. }
  55. static ArrayRef<MCPhysReg> CC_X86_VectorCallGetSSEs(const MVT &ValVT) {
  56. if (ValVT.is512BitVector()) {
  57. static const MCPhysReg RegListZMM[] = {X86::ZMM0, X86::ZMM1, X86::ZMM2,
  58. X86::ZMM3, X86::ZMM4, X86::ZMM5};
  59. return makeArrayRef(std::begin(RegListZMM), std::end(RegListZMM));
  60. }
  61. if (ValVT.is256BitVector()) {
  62. static const MCPhysReg RegListYMM[] = {X86::YMM0, X86::YMM1, X86::YMM2,
  63. X86::YMM3, X86::YMM4, X86::YMM5};
  64. return makeArrayRef(std::begin(RegListYMM), std::end(RegListYMM));
  65. }
  66. static const MCPhysReg RegListXMM[] = {X86::XMM0, X86::XMM1, X86::XMM2,
  67. X86::XMM3, X86::XMM4, X86::XMM5};
  68. return makeArrayRef(std::begin(RegListXMM), std::end(RegListXMM));
  69. }
  70. static ArrayRef<MCPhysReg> CC_X86_64_VectorCallGetGPRs() {
  71. static const MCPhysReg RegListGPR[] = {X86::RCX, X86::RDX, X86::R8, X86::R9};
  72. return makeArrayRef(std::begin(RegListGPR), std::end(RegListGPR));
  73. }
  74. static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT,
  75. MVT &LocVT,
  76. CCValAssign::LocInfo &LocInfo,
  77. ISD::ArgFlagsTy &ArgFlags,
  78. CCState &State) {
  79. ArrayRef<MCPhysReg> RegList = CC_X86_VectorCallGetSSEs(ValVT);
  80. bool Is64bit = static_cast<const X86Subtarget &>(
  81. State.getMachineFunction().getSubtarget())
  82. .is64Bit();
  83. for (auto Reg : RegList) {
  84. // If the register is not marked as allocated - assign to it.
  85. if (!State.isAllocated(Reg)) {
  86. unsigned AssigedReg = State.AllocateReg(Reg);
  87. assert(AssigedReg == Reg && "Expecting a valid register allocation");
  88. State.addLoc(
  89. CCValAssign::getReg(ValNo, ValVT, AssigedReg, LocVT, LocInfo));
  90. return true;
  91. }
  92. // If the register is marked as shadow allocated - assign to it.
  93. if (Is64bit && State.IsShadowAllocatedReg(Reg)) {
  94. State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  95. return true;
  96. }
  97. }
  98. llvm_unreachable("Clang should ensure that hva marked vectors will have "
  99. "an available register.");
  100. return false;
  101. }
  102. /// Vectorcall calling convention has special handling for vector types or
  103. /// HVA for 64 bit arch.
  104. /// For HVAs shadow registers might be allocated on the first pass
  105. /// and actual XMM registers are allocated on the second pass.
  106. /// For vector types, actual XMM registers are allocated on the first pass.
  107. /// \return true if registers were allocated and false otherwise.
  108. static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
  109. CCValAssign::LocInfo &LocInfo,
  110. ISD::ArgFlagsTy &ArgFlags, CCState &State) {
  111. // On the second pass, go through the HVAs only.
  112. if (ArgFlags.isSecArgPass()) {
  113. if (ArgFlags.isHva())
  114. return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,
  115. ArgFlags, State);
  116. return true;
  117. }
  118. // Process only vector types as defined by vectorcall spec:
  119. // "A vector type is either a floating-point type, for example,
  120. // a float or double, or an SIMD vector type, for example, __m128 or __m256".
  121. if (!(ValVT.isFloatingPoint() ||
  122. (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {
  123. // If R9 was already assigned it means that we are after the fourth element
  124. // and because this is not an HVA / Vector type, we need to allocate
  125. // shadow XMM register.
  126. if (State.isAllocated(X86::R9)) {
  127. // Assign shadow XMM register.
  128. (void)State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT));
  129. }
  130. return false;
  131. }
  132. if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) {
  133. // Assign shadow GPR register.
  134. (void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs());
  135. // Assign XMM register - (shadow for HVA and non-shadow for non HVA).
  136. if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
  137. // In Vectorcall Calling convention, additional shadow stack can be
  138. // created on top of the basic 32 bytes of win64.
  139. // It can happen if the fifth or sixth argument is vector type or HVA.
  140. // At that case for each argument a shadow stack of 8 bytes is allocated.
  141. const TargetRegisterInfo *TRI =
  142. State.getMachineFunction().getSubtarget().getRegisterInfo();
  143. if (TRI->regsOverlap(Reg, X86::XMM4) ||
  144. TRI->regsOverlap(Reg, X86::XMM5))
  145. State.AllocateStack(8, Align(8));
  146. if (!ArgFlags.isHva()) {
  147. State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  148. return true; // Allocated a register - Stop the search.
  149. }
  150. }
  151. }
  152. // If this is an HVA - Stop the search,
  153. // otherwise continue the search.
  154. return ArgFlags.isHva();
  155. }
  156. /// Vectorcall calling convention has special handling for vector types or
  157. /// HVA for 32 bit arch.
  158. /// For HVAs actual XMM registers are allocated on the second pass.
  159. /// For vector types, actual XMM registers are allocated on the first pass.
  160. /// \return true if registers were allocated and false otherwise.
  161. static bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
  162. CCValAssign::LocInfo &LocInfo,
  163. ISD::ArgFlagsTy &ArgFlags, CCState &State) {
  164. // On the second pass, go through the HVAs only.
  165. if (ArgFlags.isSecArgPass()) {
  166. if (ArgFlags.isHva())
  167. return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,
  168. ArgFlags, State);
  169. return true;
  170. }
  171. // Process only vector types as defined by vectorcall spec:
  172. // "A vector type is either a floating point type, for example,
  173. // a float or double, or an SIMD vector type, for example, __m128 or __m256".
  174. if (!(ValVT.isFloatingPoint() ||
  175. (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {
  176. return false;
  177. }
  178. if (ArgFlags.isHva())
  179. return true; // If this is an HVA - Stop the search.
  180. // Assign XMM register.
  181. if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
  182. State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  183. return true;
  184. }
  185. // In case we did not find an available XMM register for a vector -
  186. // pass it indirectly.
  187. // It is similar to CCPassIndirect, with the addition of inreg.
  188. if (!ValVT.isFloatingPoint()) {
  189. LocVT = MVT::i32;
  190. LocInfo = CCValAssign::Indirect;
  191. ArgFlags.setInReg();
  192. }
  193. return false; // No register was assigned - Continue the search.
  194. }
  195. static bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
  196. CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
  197. CCState &) {
  198. llvm_unreachable("The AnyReg calling convention is only supported by the "
  199. "stackmap and patchpoint intrinsics.");
  200. // gracefully fallback to X86 C calling convention on Release builds.
  201. return false;
  202. }
  203. static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
  204. CCValAssign::LocInfo &LocInfo,
  205. ISD::ArgFlagsTy &ArgFlags, CCState &State) {
  206. // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
  207. // not to split i64 and double between a register and stack
  208. static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
  209. static const unsigned NumRegs = sizeof(RegList) / sizeof(RegList[0]);
  210. SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
  211. // If this is the first part of an double/i64/i128, or if we're already
  212. // in the middle of a split, add to the pending list. If this is not
  213. // the end of the split, return, otherwise go on to process the pending
  214. // list
  215. if (ArgFlags.isSplit() || !PendingMembers.empty()) {
  216. PendingMembers.push_back(
  217. CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
  218. if (!ArgFlags.isSplitEnd())
  219. return true;
  220. }
  221. // If there are no pending members, we are not in the middle of a split,
  222. // so do the usual inreg stuff.
  223. if (PendingMembers.empty()) {
  224. if (unsigned Reg = State.AllocateReg(RegList)) {
  225. State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  226. return true;
  227. }
  228. return false;
  229. }
  230. assert(ArgFlags.isSplitEnd());
  231. // We now have the entire original argument in PendingMembers, so decide
  232. // whether to use registers or the stack.
  233. // Per the MCU ABI:
  234. // a) To use registers, we need to have enough of them free to contain
  235. // the entire argument.
  236. // b) We never want to use more than 2 registers for a single argument.
  237. unsigned FirstFree = State.getFirstUnallocated(RegList);
  238. bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);
  239. for (auto &It : PendingMembers) {
  240. if (UseRegs)
  241. It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
  242. else
  243. It.convertToMem(State.AllocateStack(4, Align(4)));
  244. State.addLoc(It);
  245. }
  246. PendingMembers.clear();
  247. return true;
  248. }
  249. /// X86 interrupt handlers can only take one or two stack arguments, but if
  250. /// there are two arguments, they are in the opposite order from the standard
  251. /// convention. Therefore, we have to look at the argument count up front before
  252. /// allocating stack for each argument.
  253. static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
  254. CCValAssign::LocInfo &LocInfo,
  255. ISD::ArgFlagsTy &ArgFlags, CCState &State) {
  256. const MachineFunction &MF = State.getMachineFunction();
  257. size_t ArgCount = State.getMachineFunction().getFunction().arg_size();
  258. bool Is64Bit = static_cast<const X86Subtarget &>(MF.getSubtarget()).is64Bit();
  259. unsigned SlotSize = Is64Bit ? 8 : 4;
  260. unsigned Offset;
  261. if (ArgCount == 1 && ValNo == 0) {
  262. // If we have one argument, the argument is five stack slots big, at fixed
  263. // offset zero.
  264. Offset = State.AllocateStack(5 * SlotSize, Align(4));
  265. } else if (ArgCount == 2 && ValNo == 0) {
  266. // If we have two arguments, the stack slot is *after* the error code
  267. // argument. Pretend it doesn't consume stack space, and account for it when
  268. // we assign the second argument.
  269. Offset = SlotSize;
  270. } else if (ArgCount == 2 && ValNo == 1) {
  271. // If this is the second of two arguments, it must be the error code. It
  272. // appears first on the stack, and is then followed by the five slot
  273. // interrupt struct.
  274. Offset = 0;
  275. (void)State.AllocateStack(6 * SlotSize, Align(4));
  276. } else {
  277. report_fatal_error("unsupported x86 interrupt prototype");
  278. }
  279. // FIXME: This should be accounted for in
  280. // X86FrameLowering::getFrameIndexReference, not here.
  281. if (Is64Bit && ArgCount == 2)
  282. Offset += SlotSize;
  283. State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
  284. return true;
  285. }
  286. static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
  287. CCValAssign::LocInfo &LocInfo,
  288. ISD::ArgFlagsTy &ArgFlags, CCState &State) {
  289. if (LocVT != MVT::i64) {
  290. LocVT = MVT::i64;
  291. LocInfo = CCValAssign::ZExt;
  292. }
  293. return false;
  294. }
  295. // Provides entry points of CC_X86 and RetCC_X86.
  296. #include "X86GenCallingConv.inc"