AArch64PostLegalizerLowering.cpp 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142
  1. //=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// Post-legalization lowering for instructions.
  11. ///
  12. /// This is used to offload pattern matching from the selector.
  13. ///
  14. /// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
  15. /// a G_ZIP, G_UZP, etc.
  16. ///
  17. /// General optimization combines should be handled by either the
  18. /// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
  19. ///
  20. //===----------------------------------------------------------------------===//
  21. #include "AArch64GlobalISelUtils.h"
  22. #include "AArch64Subtarget.h"
  23. #include "AArch64TargetMachine.h"
  24. #include "GISel/AArch64LegalizerInfo.h"
  25. #include "MCTargetDesc/AArch64MCTargetDesc.h"
  26. #include "TargetInfo/AArch64TargetInfo.h"
  27. #include "Utils/AArch64BaseInfo.h"
  28. #include "llvm/CodeGen/GlobalISel/Combiner.h"
  29. #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
  30. #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
  31. #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
  32. #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
  33. #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
  34. #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
  35. #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  36. #include "llvm/CodeGen/GlobalISel/Utils.h"
  37. #include "llvm/CodeGen/MachineFunctionPass.h"
  38. #include "llvm/CodeGen/MachineInstrBuilder.h"
  39. #include "llvm/CodeGen/MachineRegisterInfo.h"
  40. #include "llvm/CodeGen/TargetOpcodes.h"
  41. #include "llvm/CodeGen/TargetPassConfig.h"
  42. #include "llvm/IR/InstrTypes.h"
  43. #include "llvm/InitializePasses.h"
  44. #include "llvm/Support/Debug.h"
  45. #include "llvm/Support/ErrorHandling.h"
  46. #include <optional>
  47. #define DEBUG_TYPE "aarch64-postlegalizer-lowering"
  48. using namespace llvm;
  49. using namespace MIPatternMatch;
  50. using namespace AArch64GISelUtils;
  51. /// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
  52. ///
  53. /// Used for matching target-supported shuffles before codegen.
  54. struct ShuffleVectorPseudo {
  55. unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
  56. Register Dst; ///< Destination register.
  57. SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
  58. ShuffleVectorPseudo(unsigned Opc, Register Dst,
  59. std::initializer_list<SrcOp> SrcOps)
  60. : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
  61. ShuffleVectorPseudo() = default;
  62. };
  63. /// Check if a vector shuffle corresponds to a REV instruction with the
  64. /// specified blocksize.
  65. static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
  66. unsigned BlockSize) {
  67. assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
  68. "Only possible block sizes for REV are: 16, 32, 64");
  69. assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
  70. unsigned BlockElts = M[0] + 1;
  71. // If the first shuffle index is UNDEF, be optimistic.
  72. if (M[0] < 0)
  73. BlockElts = BlockSize / EltSize;
  74. if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
  75. return false;
  76. for (unsigned i = 0; i < NumElts; ++i) {
  77. // Ignore undef indices.
  78. if (M[i] < 0)
  79. continue;
  80. if (static_cast<unsigned>(M[i]) !=
  81. (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
  82. return false;
  83. }
  84. return true;
  85. }
  86. /// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
  87. /// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
  88. static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
  89. unsigned &WhichResult) {
  90. if (NumElts % 2 != 0)
  91. return false;
  92. WhichResult = (M[0] == 0 ? 0 : 1);
  93. for (unsigned i = 0; i < NumElts; i += 2) {
  94. if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
  95. (M[i + 1] >= 0 &&
  96. static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
  97. return false;
  98. }
  99. return true;
  100. }
  101. /// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
  102. /// sources of the shuffle are different.
  103. static std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
  104. unsigned NumElts) {
  105. // Look for the first non-undef element.
  106. auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
  107. if (FirstRealElt == M.end())
  108. return std::nullopt;
  109. // Use APInt to handle overflow when calculating expected element.
  110. unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
  111. APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
  112. // The following shuffle indices must be the successive elements after the
  113. // first real element.
  114. if (any_of(
  115. make_range(std::next(FirstRealElt), M.end()),
  116. [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
  117. return std::nullopt;
  118. // The index of an EXT is the first element if it is not UNDEF.
  119. // Watch out for the beginning UNDEFs. The EXT index should be the expected
  120. // value of the first element. E.g.
  121. // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
  122. // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
  123. // ExpectedElt is the last mask index plus 1.
  124. uint64_t Imm = ExpectedElt.getZExtValue();
  125. bool ReverseExt = false;
  126. // There are two difference cases requiring to reverse input vectors.
  127. // For example, for vector <4 x i32> we have the following cases,
  128. // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
  129. // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
  130. // For both cases, we finally use mask <5, 6, 7, 0>, which requires
  131. // to reverse two input vectors.
  132. if (Imm < NumElts)
  133. ReverseExt = true;
  134. else
  135. Imm -= NumElts;
  136. return std::make_pair(ReverseExt, Imm);
  137. }
  138. /// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
  139. /// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
  140. static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
  141. unsigned &WhichResult) {
  142. WhichResult = (M[0] == 0 ? 0 : 1);
  143. for (unsigned i = 0; i != NumElts; ++i) {
  144. // Skip undef indices.
  145. if (M[i] < 0)
  146. continue;
  147. if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
  148. return false;
  149. }
  150. return true;
  151. }
  152. /// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
  153. /// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
  154. static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
  155. unsigned &WhichResult) {
  156. if (NumElts % 2 != 0)
  157. return false;
  158. // 0 means use ZIP1, 1 means use ZIP2.
  159. WhichResult = (M[0] == 0 ? 0 : 1);
  160. unsigned Idx = WhichResult * NumElts / 2;
  161. for (unsigned i = 0; i != NumElts; i += 2) {
  162. if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
  163. (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
  164. return false;
  165. Idx += 1;
  166. }
  167. return true;
  168. }
  169. /// Helper function for matchINS.
  170. ///
  171. /// \returns a value when \p M is an ins mask for \p NumInputElements.
  172. ///
  173. /// First element of the returned pair is true when the produced
  174. /// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
  175. ///
  176. /// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
  177. static std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
  178. int NumInputElements) {
  179. if (M.size() != static_cast<size_t>(NumInputElements))
  180. return std::nullopt;
  181. int NumLHSMatch = 0, NumRHSMatch = 0;
  182. int LastLHSMismatch = -1, LastRHSMismatch = -1;
  183. for (int Idx = 0; Idx < NumInputElements; ++Idx) {
  184. if (M[Idx] == -1) {
  185. ++NumLHSMatch;
  186. ++NumRHSMatch;
  187. continue;
  188. }
  189. M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
  190. M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
  191. }
  192. const int NumNeededToMatch = NumInputElements - 1;
  193. if (NumLHSMatch == NumNeededToMatch)
  194. return std::make_pair(true, LastLHSMismatch);
  195. if (NumRHSMatch == NumNeededToMatch)
  196. return std::make_pair(false, LastRHSMismatch);
  197. return std::nullopt;
  198. }
  199. /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
  200. /// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
  201. static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
  202. ShuffleVectorPseudo &MatchInfo) {
  203. assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
  204. ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
  205. Register Dst = MI.getOperand(0).getReg();
  206. Register Src = MI.getOperand(1).getReg();
  207. LLT Ty = MRI.getType(Dst);
  208. unsigned EltSize = Ty.getScalarSizeInBits();
  209. // Element size for a rev cannot be 64.
  210. if (EltSize == 64)
  211. return false;
  212. unsigned NumElts = Ty.getNumElements();
  213. // Try to produce G_REV64
  214. if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) {
  215. MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src});
  216. return true;
  217. }
  218. // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support.
  219. // This should be identical to above, but with a constant 32 and constant
  220. // 16.
  221. return false;
  222. }
  223. /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
  224. /// a G_TRN1 or G_TRN2 instruction.
  225. static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
  226. ShuffleVectorPseudo &MatchInfo) {
  227. assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
  228. unsigned WhichResult;
  229. ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
  230. Register Dst = MI.getOperand(0).getReg();
  231. unsigned NumElts = MRI.getType(Dst).getNumElements();
  232. if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
  233. return false;
  234. unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
  235. Register V1 = MI.getOperand(1).getReg();
  236. Register V2 = MI.getOperand(2).getReg();
  237. MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
  238. return true;
  239. }
  240. /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
  241. /// a G_UZP1 or G_UZP2 instruction.
  242. ///
  243. /// \param [in] MI - The shuffle vector instruction.
  244. /// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
  245. static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
  246. ShuffleVectorPseudo &MatchInfo) {
  247. assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
  248. unsigned WhichResult;
  249. ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
  250. Register Dst = MI.getOperand(0).getReg();
  251. unsigned NumElts = MRI.getType(Dst).getNumElements();
  252. if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
  253. return false;
  254. unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
  255. Register V1 = MI.getOperand(1).getReg();
  256. Register V2 = MI.getOperand(2).getReg();
  257. MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
  258. return true;
  259. }
  260. static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
  261. ShuffleVectorPseudo &MatchInfo) {
  262. assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
  263. unsigned WhichResult;
  264. ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
  265. Register Dst = MI.getOperand(0).getReg();
  266. unsigned NumElts = MRI.getType(Dst).getNumElements();
  267. if (!isZipMask(ShuffleMask, NumElts, WhichResult))
  268. return false;
  269. unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
  270. Register V1 = MI.getOperand(1).getReg();
  271. Register V2 = MI.getOperand(2).getReg();
  272. MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
  273. return true;
  274. }
  275. /// Helper function for matchDup.
  276. static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
  277. MachineRegisterInfo &MRI,
  278. ShuffleVectorPseudo &MatchInfo) {
  279. if (Lane != 0)
  280. return false;
  281. // Try to match a vector splat operation into a dup instruction.
  282. // We're looking for this pattern:
  283. //
  284. // %scalar:gpr(s64) = COPY $x0
  285. // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
  286. // %cst0:gpr(s32) = G_CONSTANT i32 0
  287. // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
  288. // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
  289. // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>)
  290. //
  291. // ...into:
  292. // %splat = G_DUP %scalar
  293. // Begin matching the insert.
  294. auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
  295. MI.getOperand(1).getReg(), MRI);
  296. if (!InsMI)
  297. return false;
  298. // Match the undef vector operand.
  299. if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
  300. MRI))
  301. return false;
  302. // Match the index constant 0.
  303. if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
  304. return false;
  305. MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
  306. {InsMI->getOperand(2).getReg()});
  307. return true;
  308. }
  309. /// Helper function for matchDup.
  310. static bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
  311. MachineRegisterInfo &MRI,
  312. ShuffleVectorPseudo &MatchInfo) {
  313. assert(Lane >= 0 && "Expected positive lane?");
  314. // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
  315. // lane's definition directly.
  316. auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
  317. MI.getOperand(1).getReg(), MRI);
  318. if (!BuildVecMI)
  319. return false;
  320. Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
  321. MatchInfo =
  322. ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
  323. return true;
  324. }
  325. static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
  326. ShuffleVectorPseudo &MatchInfo) {
  327. assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
  328. auto MaybeLane = getSplatIndex(MI);
  329. if (!MaybeLane)
  330. return false;
  331. int Lane = *MaybeLane;
  332. // If this is undef splat, generate it via "just" vdup, if possible.
  333. if (Lane < 0)
  334. Lane = 0;
  335. if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
  336. return true;
  337. if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
  338. return true;
  339. return false;
  340. }
  341. // Check if an EXT instruction can handle the shuffle mask when the vector
  342. // sources of the shuffle are the same.
  343. static bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
  344. unsigned NumElts = Ty.getNumElements();
  345. // Assume that the first shuffle index is not UNDEF. Fail if it is.
  346. if (M[0] < 0)
  347. return false;
  348. // If this is a VEXT shuffle, the immediate value is the index of the first
  349. // element. The other shuffle indices must be the successive elements after
  350. // the first one.
  351. unsigned ExpectedElt = M[0];
  352. for (unsigned I = 1; I < NumElts; ++I) {
  353. // Increment the expected index. If it wraps around, just follow it
  354. // back to index zero and keep going.
  355. ++ExpectedElt;
  356. if (ExpectedElt == NumElts)
  357. ExpectedElt = 0;
  358. if (M[I] < 0)
  359. continue; // Ignore UNDEF indices.
  360. if (ExpectedElt != static_cast<unsigned>(M[I]))
  361. return false;
  362. }
  363. return true;
  364. }
  365. static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
  366. ShuffleVectorPseudo &MatchInfo) {
  367. assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
  368. Register Dst = MI.getOperand(0).getReg();
  369. LLT DstTy = MRI.getType(Dst);
  370. Register V1 = MI.getOperand(1).getReg();
  371. Register V2 = MI.getOperand(2).getReg();
  372. auto Mask = MI.getOperand(3).getShuffleMask();
  373. uint64_t Imm;
  374. auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
  375. uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
  376. if (!ExtInfo) {
  377. if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
  378. !isSingletonExtMask(Mask, DstTy))
  379. return false;
  380. Imm = Mask[0] * ExtFactor;
  381. MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
  382. return true;
  383. }
  384. bool ReverseExt;
  385. std::tie(ReverseExt, Imm) = *ExtInfo;
  386. if (ReverseExt)
  387. std::swap(V1, V2);
  388. Imm *= ExtFactor;
  389. MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
  390. return true;
  391. }
  392. /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
  393. /// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
  394. static bool applyShuffleVectorPseudo(MachineInstr &MI,
  395. ShuffleVectorPseudo &MatchInfo) {
  396. MachineIRBuilder MIRBuilder(MI);
  397. MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
  398. MI.eraseFromParent();
  399. return true;
  400. }
  401. /// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
  402. /// Special-cased because the constant operand must be emitted as a G_CONSTANT
  403. /// for the imported tablegen patterns to work.
  404. static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
  405. MachineIRBuilder MIRBuilder(MI);
  406. // Tablegen patterns expect an i32 G_CONSTANT as the final op.
  407. auto Cst =
  408. MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
  409. MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
  410. {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
  411. MI.eraseFromParent();
  412. return true;
  413. }
  414. /// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
  415. /// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
  416. ///
  417. /// e.g.
  418. /// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
  419. ///
  420. /// Can be represented as
  421. ///
  422. /// %extract = G_EXTRACT_VECTOR_ELT %left, 0
  423. /// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
  424. ///
  425. static bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
  426. std::tuple<Register, int, Register, int> &MatchInfo) {
  427. assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
  428. ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
  429. Register Dst = MI.getOperand(0).getReg();
  430. int NumElts = MRI.getType(Dst).getNumElements();
  431. auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
  432. if (!DstIsLeftAndDstLane)
  433. return false;
  434. bool DstIsLeft;
  435. int DstLane;
  436. std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
  437. Register Left = MI.getOperand(1).getReg();
  438. Register Right = MI.getOperand(2).getReg();
  439. Register DstVec = DstIsLeft ? Left : Right;
  440. Register SrcVec = Left;
  441. int SrcLane = ShuffleMask[DstLane];
  442. if (SrcLane >= NumElts) {
  443. SrcVec = Right;
  444. SrcLane -= NumElts;
  445. }
  446. MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
  447. return true;
  448. }
  449. static bool applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
  450. MachineIRBuilder &Builder,
  451. std::tuple<Register, int, Register, int> &MatchInfo) {
  452. Builder.setInstrAndDebugLoc(MI);
  453. Register Dst = MI.getOperand(0).getReg();
  454. auto ScalarTy = MRI.getType(Dst).getElementType();
  455. Register DstVec, SrcVec;
  456. int DstLane, SrcLane;
  457. std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
  458. auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);
  459. auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
  460. auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
  461. Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
  462. MI.eraseFromParent();
  463. return true;
  464. }
  465. /// isVShiftRImm - Check if this is a valid vector for the immediate
  466. /// operand of a vector shift right operation. The value must be in the range:
  467. /// 1 <= Value <= ElementBits for a right shift.
  468. static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
  469. int64_t &Cnt) {
  470. assert(Ty.isVector() && "vector shift count is not a vector type");
  471. MachineInstr *MI = MRI.getVRegDef(Reg);
  472. auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
  473. if (!Cst)
  474. return false;
  475. Cnt = *Cst;
  476. int64_t ElementBits = Ty.getScalarSizeInBits();
  477. return Cnt >= 1 && Cnt <= ElementBits;
  478. }
  479. /// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
  480. static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
  481. int64_t &Imm) {
  482. assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
  483. MI.getOpcode() == TargetOpcode::G_LSHR);
  484. LLT Ty = MRI.getType(MI.getOperand(1).getReg());
  485. if (!Ty.isVector())
  486. return false;
  487. return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
  488. }
  489. static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
  490. int64_t &Imm) {
  491. unsigned Opc = MI.getOpcode();
  492. assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
  493. unsigned NewOpc =
  494. Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
  495. MachineIRBuilder MIB(MI);
  496. auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
  497. MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
  498. MI.eraseFromParent();
  499. return true;
  500. }
  501. /// Determine if it is possible to modify the \p RHS and predicate \p P of a
  502. /// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
  503. ///
  504. /// \returns A pair containing the updated immediate and predicate which may
  505. /// be used to optimize the instruction.
  506. ///
  507. /// \note This assumes that the comparison has been legalized.
  508. std::optional<std::pair<uint64_t, CmpInst::Predicate>>
  509. tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
  510. const MachineRegisterInfo &MRI) {
  511. const auto &Ty = MRI.getType(RHS);
  512. if (Ty.isVector())
  513. return std::nullopt;
  514. unsigned Size = Ty.getSizeInBits();
  515. assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
  516. // If the RHS is not a constant, or the RHS is already a valid arithmetic
  517. // immediate, then there is nothing to change.
  518. auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
  519. if (!ValAndVReg)
  520. return std::nullopt;
  521. uint64_t C = ValAndVReg->Value.getZExtValue();
  522. if (isLegalArithImmed(C))
  523. return std::nullopt;
  524. // We have a non-arithmetic immediate. Check if adjusting the immediate and
  525. // adjusting the predicate will result in a legal arithmetic immediate.
  526. switch (P) {
  527. default:
  528. return std::nullopt;
  529. case CmpInst::ICMP_SLT:
  530. case CmpInst::ICMP_SGE:
  531. // Check for
  532. //
  533. // x slt c => x sle c - 1
  534. // x sge c => x sgt c - 1
  535. //
  536. // When c is not the smallest possible negative number.
  537. if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
  538. (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
  539. return std::nullopt;
  540. P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
  541. C -= 1;
  542. break;
  543. case CmpInst::ICMP_ULT:
  544. case CmpInst::ICMP_UGE:
  545. // Check for
  546. //
  547. // x ult c => x ule c - 1
  548. // x uge c => x ugt c - 1
  549. //
  550. // When c is not zero.
  551. if (C == 0)
  552. return std::nullopt;
  553. P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
  554. C -= 1;
  555. break;
  556. case CmpInst::ICMP_SLE:
  557. case CmpInst::ICMP_SGT:
  558. // Check for
  559. //
  560. // x sle c => x slt c + 1
  561. // x sgt c => s sge c + 1
  562. //
  563. // When c is not the largest possible signed integer.
  564. if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
  565. (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
  566. return std::nullopt;
  567. P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
  568. C += 1;
  569. break;
  570. case CmpInst::ICMP_ULE:
  571. case CmpInst::ICMP_UGT:
  572. // Check for
  573. //
  574. // x ule c => x ult c + 1
  575. // x ugt c => s uge c + 1
  576. //
  577. // When c is not the largest possible unsigned integer.
  578. if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
  579. (Size == 64 && C == UINT64_MAX))
  580. return std::nullopt;
  581. P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
  582. C += 1;
  583. break;
  584. }
  585. // Check if the new constant is valid, and return the updated constant and
  586. // predicate if it is.
  587. if (Size == 32)
  588. C = static_cast<uint32_t>(C);
  589. if (!isLegalArithImmed(C))
  590. return std::nullopt;
  591. return {{C, P}};
  592. }
  593. /// Determine whether or not it is possible to update the RHS and predicate of
  594. /// a G_ICMP instruction such that the RHS will be selected as an arithmetic
  595. /// immediate.
  596. ///
  597. /// \p MI - The G_ICMP instruction
  598. /// \p MatchInfo - The new RHS immediate and predicate on success
  599. ///
  600. /// See tryAdjustICmpImmAndPred for valid transformations.
  601. bool matchAdjustICmpImmAndPred(
  602. MachineInstr &MI, const MachineRegisterInfo &MRI,
  603. std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
  604. assert(MI.getOpcode() == TargetOpcode::G_ICMP);
  605. Register RHS = MI.getOperand(3).getReg();
  606. auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
  607. if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
  608. MatchInfo = *MaybeNewImmAndPred;
  609. return true;
  610. }
  611. return false;
  612. }
  613. bool applyAdjustICmpImmAndPred(
  614. MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
  615. MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
  616. MIB.setInstrAndDebugLoc(MI);
  617. MachineOperand &RHS = MI.getOperand(3);
  618. MachineRegisterInfo &MRI = *MIB.getMRI();
  619. auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
  620. MatchInfo.first);
  621. Observer.changingInstr(MI);
  622. RHS.setReg(Cst->getOperand(0).getReg());
  623. MI.getOperand(1).setPredicate(MatchInfo.second);
  624. Observer.changedInstr(MI);
  625. return true;
  626. }
  627. bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
  628. std::pair<unsigned, int> &MatchInfo) {
  629. assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
  630. Register Src1Reg = MI.getOperand(1).getReg();
  631. const LLT SrcTy = MRI.getType(Src1Reg);
  632. const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
  633. auto LaneIdx = getSplatIndex(MI);
  634. if (!LaneIdx)
  635. return false;
  636. // The lane idx should be within the first source vector.
  637. if (*LaneIdx >= SrcTy.getNumElements())
  638. return false;
  639. if (DstTy != SrcTy)
  640. return false;
  641. LLT ScalarTy = SrcTy.getElementType();
  642. unsigned ScalarSize = ScalarTy.getSizeInBits();
  643. unsigned Opc = 0;
  644. switch (SrcTy.getNumElements()) {
  645. case 2:
  646. if (ScalarSize == 64)
  647. Opc = AArch64::G_DUPLANE64;
  648. else if (ScalarSize == 32)
  649. Opc = AArch64::G_DUPLANE32;
  650. break;
  651. case 4:
  652. if (ScalarSize == 32)
  653. Opc = AArch64::G_DUPLANE32;
  654. break;
  655. case 8:
  656. if (ScalarSize == 16)
  657. Opc = AArch64::G_DUPLANE16;
  658. break;
  659. case 16:
  660. if (ScalarSize == 8)
  661. Opc = AArch64::G_DUPLANE8;
  662. break;
  663. default:
  664. break;
  665. }
  666. if (!Opc)
  667. return false;
  668. MatchInfo.first = Opc;
  669. MatchInfo.second = *LaneIdx;
  670. return true;
  671. }
  672. bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
  673. MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
  674. assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
  675. Register Src1Reg = MI.getOperand(1).getReg();
  676. const LLT SrcTy = MRI.getType(Src1Reg);
  677. B.setInstrAndDebugLoc(MI);
  678. auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
  679. Register DupSrc = MI.getOperand(1).getReg();
  680. // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
  681. // To do this, we can use a G_CONCAT_VECTORS to do the widening.
  682. if (SrcTy == LLT::fixed_vector(2, LLT::scalar(32))) {
  683. assert(MRI.getType(MI.getOperand(0).getReg()).getNumElements() == 2 &&
  684. "Unexpected dest elements");
  685. auto Undef = B.buildUndef(SrcTy);
  686. DupSrc = B.buildConcatVectors(
  687. SrcTy.changeElementCount(ElementCount::getFixed(4)),
  688. {Src1Reg, Undef.getReg(0)})
  689. .getReg(0);
  690. }
  691. B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
  692. MI.eraseFromParent();
  693. return true;
  694. }
  695. static bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
  696. assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
  697. auto Splat = getAArch64VectorSplat(MI, MRI);
  698. if (!Splat)
  699. return false;
  700. if (Splat->isReg())
  701. return true;
  702. // Later, during selection, we'll try to match imported patterns using
  703. // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
  704. // G_BUILD_VECTORs which could match those patterns.
  705. int64_t Cst = Splat->getCst();
  706. return (Cst != 0 && Cst != -1);
  707. }
  708. static bool applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
  709. MachineIRBuilder &B) {
  710. B.setInstrAndDebugLoc(MI);
  711. B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
  712. {MI.getOperand(1).getReg()});
  713. MI.eraseFromParent();
  714. return true;
  715. }
  716. /// \returns how many instructions would be saved by folding a G_ICMP's shift
  717. /// and/or extension operations.
  718. static unsigned getCmpOperandFoldingProfit(Register CmpOp,
  719. const MachineRegisterInfo &MRI) {
  720. // No instructions to save if there's more than one use or no uses.
  721. if (!MRI.hasOneNonDBGUse(CmpOp))
  722. return 0;
  723. // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
  724. auto IsSupportedExtend = [&](const MachineInstr &MI) {
  725. if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
  726. return true;
  727. if (MI.getOpcode() != TargetOpcode::G_AND)
  728. return false;
  729. auto ValAndVReg =
  730. getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
  731. if (!ValAndVReg)
  732. return false;
  733. uint64_t Mask = ValAndVReg->Value.getZExtValue();
  734. return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
  735. };
  736. MachineInstr *Def = getDefIgnoringCopies(CmpOp, MRI);
  737. if (IsSupportedExtend(*Def))
  738. return 1;
  739. unsigned Opc = Def->getOpcode();
  740. if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
  741. Opc != TargetOpcode::G_LSHR)
  742. return 0;
  743. auto MaybeShiftAmt =
  744. getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
  745. if (!MaybeShiftAmt)
  746. return 0;
  747. uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
  748. MachineInstr *ShiftLHS =
  749. getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
  750. // Check if we can fold an extend and a shift.
  751. // FIXME: This is duplicated with the selector. (See:
  752. // selectArithExtendedRegister)
  753. if (IsSupportedExtend(*ShiftLHS))
  754. return (ShiftAmt <= 4) ? 2 : 1;
  755. LLT Ty = MRI.getType(Def->getOperand(0).getReg());
  756. if (Ty.isVector())
  757. return 0;
  758. unsigned ShiftSize = Ty.getSizeInBits();
  759. if ((ShiftSize == 32 && ShiftAmt <= 31) ||
  760. (ShiftSize == 64 && ShiftAmt <= 63))
  761. return 1;
  762. return 0;
  763. }
  764. /// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
  765. /// instruction \p MI.
  766. static bool trySwapICmpOperands(MachineInstr &MI,
  767. const MachineRegisterInfo &MRI) {
  768. assert(MI.getOpcode() == TargetOpcode::G_ICMP);
  769. // Swap the operands if it would introduce a profitable folding opportunity.
  770. // (e.g. a shift + extend).
  771. //
  772. // For example:
  773. // lsl w13, w11, #1
  774. // cmp w13, w12
  775. // can be turned into:
  776. // cmp w12, w11, lsl #1
  777. // Don't swap if there's a constant on the RHS, because we know we can fold
  778. // that.
  779. Register RHS = MI.getOperand(3).getReg();
  780. auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);
  781. if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
  782. return false;
  783. Register LHS = MI.getOperand(2).getReg();
  784. auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
  785. auto GetRegForProfit = [&](Register Reg) {
  786. MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
  787. return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
  788. };
  789. // Don't have a constant on the RHS. If we swap the LHS and RHS of the
  790. // compare, would we be able to fold more instructions?
  791. Register TheLHS = GetRegForProfit(LHS);
  792. Register TheRHS = GetRegForProfit(RHS);
  793. // If the LHS is more likely to give us a folding opportunity, then swap the
  794. // LHS and RHS.
  795. return (getCmpOperandFoldingProfit(TheLHS, MRI) >
  796. getCmpOperandFoldingProfit(TheRHS, MRI));
  797. }
  798. static bool applySwapICmpOperands(MachineInstr &MI,
  799. GISelChangeObserver &Observer) {
  800. auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
  801. Register LHS = MI.getOperand(2).getReg();
  802. Register RHS = MI.getOperand(3).getReg();
  803. Observer.changedInstr(MI);
  804. MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
  805. MI.getOperand(2).setReg(RHS);
  806. MI.getOperand(3).setReg(LHS);
  807. Observer.changedInstr(MI);
  808. return true;
  809. }
  810. /// \returns a function which builds a vector floating point compare instruction
  811. /// for a condition code \p CC.
  812. /// \param [in] IsZero - True if the comparison is against 0.
  813. /// \param [in] NoNans - True if the target has NoNansFPMath.
  814. static std::function<Register(MachineIRBuilder &)>
  815. getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
  816. bool NoNans, MachineRegisterInfo &MRI) {
  817. LLT DstTy = MRI.getType(LHS);
  818. assert(DstTy.isVector() && "Expected vector types only?");
  819. assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
  820. switch (CC) {
  821. default:
  822. llvm_unreachable("Unexpected condition code!");
  823. case AArch64CC::NE:
  824. return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
  825. auto FCmp = IsZero
  826. ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
  827. : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
  828. return MIB.buildNot(DstTy, FCmp).getReg(0);
  829. };
  830. case AArch64CC::EQ:
  831. return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
  832. return IsZero
  833. ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
  834. : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
  835. .getReg(0);
  836. };
  837. case AArch64CC::GE:
  838. return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
  839. return IsZero
  840. ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
  841. : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
  842. .getReg(0);
  843. };
  844. case AArch64CC::GT:
  845. return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
  846. return IsZero
  847. ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
  848. : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
  849. .getReg(0);
  850. };
  851. case AArch64CC::LS:
  852. return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
  853. return IsZero
  854. ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
  855. : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
  856. .getReg(0);
  857. };
  858. case AArch64CC::MI:
  859. return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
  860. return IsZero
  861. ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
  862. : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
  863. .getReg(0);
  864. };
  865. }
  866. }
  867. /// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
  868. static bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
  869. MachineIRBuilder &MIB) {
  870. assert(MI.getOpcode() == TargetOpcode::G_FCMP);
  871. const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
  872. Register Dst = MI.getOperand(0).getReg();
  873. LLT DstTy = MRI.getType(Dst);
  874. if (!DstTy.isVector() || !ST.hasNEON())
  875. return false;
  876. const auto Pred =
  877. static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
  878. Register LHS = MI.getOperand(2).getReg();
  879. // TODO: Handle v4s16 case.
  880. unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
  881. if (EltSize != 32 && EltSize != 64)
  882. return false;
  883. Register RHS = MI.getOperand(3).getReg();
  884. auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
  885. // Compares against 0 have special target-specific pseudos.
  886. bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
  887. bool Invert = false;
  888. AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
  889. if (Pred == CmpInst::Predicate::FCMP_ORD && IsZero) {
  890. // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
  891. // NaN, so equivalent to a == a and doesn't need the two comparisons an
  892. // "ord" normally would.
  893. RHS = LHS;
  894. IsZero = false;
  895. CC = AArch64CC::EQ;
  896. } else
  897. changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
  898. bool NoNans = ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
  899. // Instead of having an apply function, just build here to simplify things.
  900. MIB.setInstrAndDebugLoc(MI);
  901. auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
  902. Register CmpRes;
  903. if (CC2 == AArch64CC::AL)
  904. CmpRes = Cmp(MIB);
  905. else {
  906. auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
  907. auto Cmp2Dst = Cmp2(MIB);
  908. auto Cmp1Dst = Cmp(MIB);
  909. CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
  910. }
  911. if (Invert)
  912. CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
  913. MRI.replaceRegWith(Dst, CmpRes);
  914. MI.eraseFromParent();
  915. return false;
  916. }
  917. static bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
  918. Register &SrcReg) {
  919. assert(MI.getOpcode() == TargetOpcode::G_STORE);
  920. Register DstReg = MI.getOperand(0).getReg();
  921. if (MRI.getType(DstReg).isVector())
  922. return false;
  923. // Match a store of a truncate.
  924. if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
  925. return false;
  926. // Only form truncstores for value types of max 64b.
  927. return MRI.getType(SrcReg).getSizeInBits() <= 64;
  928. }
  929. static bool applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
  930. MachineIRBuilder &B,
  931. GISelChangeObserver &Observer,
  932. Register &SrcReg) {
  933. assert(MI.getOpcode() == TargetOpcode::G_STORE);
  934. Observer.changingInstr(MI);
  935. MI.getOperand(0).setReg(SrcReg);
  936. Observer.changedInstr(MI);
  937. return true;
  938. }
  939. // Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
  940. // form in the first place for combine opportunities, so any remaining ones
  941. // at this stage need be lowered back.
  942. static bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
  943. assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
  944. Register DstReg = MI.getOperand(0).getReg();
  945. LLT DstTy = MRI.getType(DstReg);
  946. return DstTy.isVector();
  947. }
  948. static void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
  949. MachineIRBuilder &B,
  950. GISelChangeObserver &Observer) {
  951. assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
  952. B.setInstrAndDebugLoc(MI);
  953. LegalizerHelper Helper(*MI.getMF(), Observer, B);
  954. Helper.lower(MI, 0, /* Unused hint type */ LLT());
  955. }
  956. #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
  957. #include "AArch64GenPostLegalizeGILowering.inc"
  958. #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
  959. namespace {
  960. #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
  961. #include "AArch64GenPostLegalizeGILowering.inc"
  962. #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
  963. class AArch64PostLegalizerLoweringInfo : public CombinerInfo {
  964. public:
  965. AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg;
  966. AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize)
  967. : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
  968. /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize,
  969. MinSize) {
  970. if (!GeneratedRuleCfg.parseCommandLineOption())
  971. report_fatal_error("Invalid rule identifier");
  972. }
  973. bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
  974. MachineIRBuilder &B) const override;
  975. };
  976. bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer,
  977. MachineInstr &MI,
  978. MachineIRBuilder &B) const {
  979. CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false);
  980. AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg);
  981. return Generated.tryCombineAll(Observer, MI, B, Helper);
  982. }
  983. #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
  984. #include "AArch64GenPostLegalizeGILowering.inc"
  985. #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
  986. class AArch64PostLegalizerLowering : public MachineFunctionPass {
  987. public:
  988. static char ID;
  989. AArch64PostLegalizerLowering();
  990. StringRef getPassName() const override {
  991. return "AArch64PostLegalizerLowering";
  992. }
  993. bool runOnMachineFunction(MachineFunction &MF) override;
  994. void getAnalysisUsage(AnalysisUsage &AU) const override;
  995. };
  996. } // end anonymous namespace
  997. void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
  998. AU.addRequired<TargetPassConfig>();
  999. AU.setPreservesCFG();
  1000. getSelectionDAGFallbackAnalysisUsage(AU);
  1001. MachineFunctionPass::getAnalysisUsage(AU);
  1002. }
  1003. AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
  1004. : MachineFunctionPass(ID) {
  1005. initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry());
  1006. }
  1007. bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
  1008. if (MF.getProperties().hasProperty(
  1009. MachineFunctionProperties::Property::FailedISel))
  1010. return false;
  1011. assert(MF.getProperties().hasProperty(
  1012. MachineFunctionProperties::Property::Legalized) &&
  1013. "Expected a legalized function?");
  1014. auto *TPC = &getAnalysis<TargetPassConfig>();
  1015. const Function &F = MF.getFunction();
  1016. AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize());
  1017. Combiner C(PCInfo, TPC);
  1018. return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
  1019. }
  1020. char AArch64PostLegalizerLowering::ID = 0;
  1021. INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
  1022. "Lower AArch64 MachineInstrs after legalization", false,
  1023. false)
  1024. INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
  1025. INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
  1026. "Lower AArch64 MachineInstrs after legalization", false,
  1027. false)
  1028. namespace llvm {
  1029. FunctionPass *createAArch64PostLegalizerLowering() {
  1030. return new AArch64PostLegalizerLowering();
  1031. }
  1032. } // end namespace llvm