ARMCallingConv.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. //=== ARMCallingConv.cpp - ARM Custom CC Routines ---------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains the custom routines for the ARM Calling Convention that
  10. // aren't done by tablegen, and includes the table generated implementations.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "ARM.h"
  14. #include "ARMCallingConv.h"
  15. #include "ARMSubtarget.h"
  16. #include "ARMRegisterInfo.h"
  17. using namespace llvm;
  18. // APCS f64 is in register pairs, possibly split to stack
  19. static bool f64AssignAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
  20. CCValAssign::LocInfo LocInfo,
  21. CCState &State, bool CanFail) {
  22. static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
  23. // Try to get the first register.
  24. if (unsigned Reg = State.AllocateReg(RegList))
  25. State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  26. else {
  27. // For the 2nd half of a v2f64, do not fail.
  28. if (CanFail)
  29. return false;
  30. // Put the whole thing on the stack.
  31. State.addLoc(CCValAssign::getCustomMem(
  32. ValNo, ValVT, State.AllocateStack(8, Align(4)), LocVT, LocInfo));
  33. return true;
  34. }
  35. // Try to get the second register.
  36. if (unsigned Reg = State.AllocateReg(RegList))
  37. State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  38. else
  39. State.addLoc(CCValAssign::getCustomMem(
  40. ValNo, ValVT, State.AllocateStack(4, Align(4)), LocVT, LocInfo));
  41. return true;
  42. }
  43. static bool CC_ARM_APCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
  44. CCValAssign::LocInfo LocInfo,
  45. ISD::ArgFlagsTy ArgFlags,
  46. CCState &State) {
  47. if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
  48. return false;
  49. if (LocVT == MVT::v2f64 &&
  50. !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
  51. return false;
  52. return true; // we handled it
  53. }
  54. // AAPCS f64 is in aligned register pairs
  55. static bool f64AssignAAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
  56. CCValAssign::LocInfo LocInfo,
  57. CCState &State, bool CanFail) {
  58. static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
  59. static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
  60. static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };
  61. static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
  62. unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList);
  63. if (Reg == 0) {
  64. // If we had R3 unallocated only, now we still must to waste it.
  65. Reg = State.AllocateReg(GPRArgRegs);
  66. assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
  67. // For the 2nd half of a v2f64, do not just fail.
  68. if (CanFail)
  69. return false;
  70. // Put the whole thing on the stack.
  71. State.addLoc(CCValAssign::getCustomMem(
  72. ValNo, ValVT, State.AllocateStack(8, Align(8)), LocVT, LocInfo));
  73. return true;
  74. }
  75. unsigned i;
  76. for (i = 0; i < 2; ++i)
  77. if (HiRegList[i] == Reg)
  78. break;
  79. unsigned T = State.AllocateReg(LoRegList[i]);
  80. (void)T;
  81. assert(T == LoRegList[i] && "Could not allocate register");
  82. State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  83. State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
  84. LocVT, LocInfo));
  85. return true;
  86. }
  87. static bool CC_ARM_AAPCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
  88. CCValAssign::LocInfo LocInfo,
  89. ISD::ArgFlagsTy ArgFlags,
  90. CCState &State) {
  91. if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
  92. return false;
  93. if (LocVT == MVT::v2f64 &&
  94. !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
  95. return false;
  96. return true; // we handled it
  97. }
  98. static bool f64RetAssign(unsigned ValNo, MVT ValVT, MVT LocVT,
  99. CCValAssign::LocInfo LocInfo, CCState &State) {
  100. static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
  101. static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
  102. unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
  103. if (Reg == 0)
  104. return false; // we didn't handle it
  105. unsigned i;
  106. for (i = 0; i < 2; ++i)
  107. if (HiRegList[i] == Reg)
  108. break;
  109. State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  110. State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
  111. LocVT, LocInfo));
  112. return true;
  113. }
  114. static bool RetCC_ARM_APCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
  115. CCValAssign::LocInfo LocInfo,
  116. ISD::ArgFlagsTy ArgFlags,
  117. CCState &State) {
  118. if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
  119. return false;
  120. if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
  121. return false;
  122. return true; // we handled it
  123. }
  124. static bool RetCC_ARM_AAPCS_Custom_f64(unsigned ValNo, MVT ValVT, MVT LocVT,
  125. CCValAssign::LocInfo LocInfo,
  126. ISD::ArgFlagsTy ArgFlags,
  127. CCState &State) {
  128. return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
  129. State);
  130. }
  131. static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
  132. static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
  133. ARM::S4, ARM::S5, ARM::S6, ARM::S7,
  134. ARM::S8, ARM::S9, ARM::S10, ARM::S11,
  135. ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
  136. static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
  137. ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
  138. static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
  139. // Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
  140. // has InConsecutiveRegs set, and that the last member also has
  141. // InConsecutiveRegsLast set. We must process all members of the HA before
  142. // we can allocate it, as we need to know the total number of registers that
  143. // will be needed in order to (attempt to) allocate a contiguous block.
  144. static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
  145. MVT LocVT,
  146. CCValAssign::LocInfo LocInfo,
  147. ISD::ArgFlagsTy ArgFlags,
  148. CCState &State) {
  149. SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
  150. // AAPCS HFAs must have 1-4 elements, all of the same type
  151. if (PendingMembers.size() > 0)
  152. assert(PendingMembers[0].getLocVT() == LocVT);
  153. // Add the argument to the list to be allocated once we know the size of the
  154. // aggregate. Store the type's required alignment as extra info for later: in
  155. // the [N x i64] case all trace has been removed by the time we actually get
  156. // to do allocation.
  157. PendingMembers.push_back(CCValAssign::getPending(
  158. ValNo, ValVT, LocVT, LocInfo, ArgFlags.getNonZeroOrigAlign().value()));
  159. if (!ArgFlags.isInConsecutiveRegsLast())
  160. return true;
  161. // Try to allocate a contiguous block of registers, each of the correct
  162. // size to hold one member.
  163. auto &DL = State.getMachineFunction().getDataLayout();
  164. const Align StackAlign = DL.getStackAlignment();
  165. const Align FirstMemberAlign(PendingMembers[0].getExtraInfo());
  166. Align Alignment = std::min(FirstMemberAlign, StackAlign);
  167. ArrayRef<MCPhysReg> RegList;
  168. switch (LocVT.SimpleTy) {
  169. case MVT::i32: {
  170. RegList = RRegList;
  171. unsigned RegIdx = State.getFirstUnallocated(RegList);
  172. // First consume all registers that would give an unaligned object. Whether
  173. // we go on stack or in regs, no-one will be using them in future.
  174. unsigned RegAlign = alignTo(Alignment.value(), 4) / 4;
  175. while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
  176. State.AllocateReg(RegList[RegIdx++]);
  177. break;
  178. }
  179. case MVT::f16:
  180. case MVT::bf16:
  181. case MVT::f32:
  182. RegList = SRegList;
  183. break;
  184. case MVT::v4f16:
  185. case MVT::v4bf16:
  186. case MVT::f64:
  187. RegList = DRegList;
  188. break;
  189. case MVT::v8f16:
  190. case MVT::v8bf16:
  191. case MVT::v2f64:
  192. RegList = QRegList;
  193. break;
  194. default:
  195. llvm_unreachable("Unexpected member type for block aggregate");
  196. break;
  197. }
  198. unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
  199. if (RegResult) {
  200. for (CCValAssign &PendingMember : PendingMembers) {
  201. PendingMember.convertToReg(RegResult);
  202. State.addLoc(PendingMember);
  203. ++RegResult;
  204. }
  205. PendingMembers.clear();
  206. return true;
  207. }
  208. // Register allocation failed, we'll be needing the stack
  209. unsigned Size = LocVT.getSizeInBits() / 8;
  210. if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) {
  211. // If nothing else has used the stack until this point, a non-HFA aggregate
  212. // can be split between regs and stack.
  213. unsigned RegIdx = State.getFirstUnallocated(RegList);
  214. for (auto &It : PendingMembers) {
  215. if (RegIdx >= RegList.size())
  216. It.convertToMem(State.AllocateStack(Size, Align(Size)));
  217. else
  218. It.convertToReg(State.AllocateReg(RegList[RegIdx++]));
  219. State.addLoc(It);
  220. }
  221. PendingMembers.clear();
  222. return true;
  223. }
  224. if (LocVT != MVT::i32)
  225. RegList = SRegList;
  226. // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)
  227. for (auto Reg : RegList)
  228. State.AllocateReg(Reg);
  229. // Clamp the alignment between 4 and 8.
  230. if (State.getMachineFunction().getSubtarget<ARMSubtarget>().isTargetAEABI())
  231. Alignment = ArgFlags.getNonZeroMemAlign() <= 4 ? Align(4) : Align(8);
  232. // After the first item has been allocated, the rest are packed as tightly as
  233. // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
  234. // be allocating a bunch of i32 slots).
  235. for (auto &It : PendingMembers) {
  236. It.convertToMem(State.AllocateStack(Size, Alignment));
  237. State.addLoc(It);
  238. Alignment = Align(1);
  239. }
  240. // All pending members have now been allocated
  241. PendingMembers.clear();
  242. // This will be allocated by the last member of the aggregate
  243. return true;
  244. }
  245. static bool CustomAssignInRegList(unsigned ValNo, MVT ValVT, MVT LocVT,
  246. CCValAssign::LocInfo LocInfo, CCState &State,
  247. ArrayRef<MCPhysReg> RegList) {
  248. unsigned Reg = State.AllocateReg(RegList);
  249. if (Reg) {
  250. State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
  251. return true;
  252. }
  253. return false;
  254. }
  255. static bool CC_ARM_AAPCS_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,
  256. CCValAssign::LocInfo LocInfo,
  257. ISD::ArgFlagsTy ArgFlags, CCState &State) {
  258. // f16 arguments are extended to i32 and assigned to a register in [r0, r3]
  259. return CustomAssignInRegList(ValNo, ValVT, MVT::i32, LocInfo, State,
  260. RRegList);
  261. }
  262. static bool CC_ARM_AAPCS_VFP_Custom_f16(unsigned ValNo, MVT ValVT, MVT LocVT,
  263. CCValAssign::LocInfo LocInfo,
  264. ISD::ArgFlagsTy ArgFlags,
  265. CCState &State) {
  266. // f16 arguments are extended to f32 and assigned to a register in [s0, s15]
  267. return CustomAssignInRegList(ValNo, ValVT, MVT::f32, LocInfo, State,
  268. SRegList);
  269. }
  270. // Include the table generated calling convention implementations.
  271. #include "ARMGenCallingConv.inc"