PPCVSXFMAMutate.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. //===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass mutates the form of VSX FMA instructions to avoid unnecessary
  10. // copies.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "MCTargetDesc/PPCPredicates.h"
  14. #include "PPC.h"
  15. #include "PPCInstrBuilder.h"
  16. #include "PPCInstrInfo.h"
  17. #include "PPCMachineFunctionInfo.h"
  18. #include "PPCTargetMachine.h"
  19. #include "llvm/ADT/STLExtras.h"
  20. #include "llvm/ADT/Statistic.h"
  21. #include "llvm/CodeGen/LiveIntervals.h"
  22. #include "llvm/CodeGen/MachineDominators.h"
  23. #include "llvm/CodeGen/MachineFrameInfo.h"
  24. #include "llvm/CodeGen/MachineFunctionPass.h"
  25. #include "llvm/CodeGen/MachineInstrBuilder.h"
  26. #include "llvm/CodeGen/MachineMemOperand.h"
  27. #include "llvm/CodeGen/MachineRegisterInfo.h"
  28. #include "llvm/CodeGen/PseudoSourceValue.h"
  29. #include "llvm/CodeGen/ScheduleDAG.h"
  30. #include "llvm/CodeGen/SlotIndexes.h"
  31. #include "llvm/InitializePasses.h"
  32. #include "llvm/MC/MCAsmInfo.h"
  33. #include "llvm/MC/TargetRegistry.h"
  34. #include "llvm/Support/CommandLine.h"
  35. #include "llvm/Support/Debug.h"
  36. #include "llvm/Support/ErrorHandling.h"
  37. #include "llvm/Support/raw_ostream.h"
  38. using namespace llvm;
  39. // Temporarily disable FMA mutation by default, since it doesn't handle
  40. // cross-basic-block intervals well.
  41. // See: http://lists.llvm.org/pipermail/llvm-dev/2016-February/095669.html
  42. // http://reviews.llvm.org/D17087
  43. static cl::opt<bool> DisableVSXFMAMutate(
  44. "disable-ppc-vsx-fma-mutation",
  45. cl::desc("Disable VSX FMA instruction mutation"), cl::init(true),
  46. cl::Hidden);
  47. #define DEBUG_TYPE "ppc-vsx-fma-mutate"
  48. namespace llvm { namespace PPC {
  49. int getAltVSXFMAOpcode(uint16_t Opcode);
  50. } }
  51. namespace {
  52. // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
  53. // (Altivec and scalar floating-point registers), we need to transform the
  54. // copies into subregister copies with other restrictions.
  55. struct PPCVSXFMAMutate : public MachineFunctionPass {
  56. static char ID;
  57. PPCVSXFMAMutate() : MachineFunctionPass(ID) {
  58. initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
  59. }
  60. LiveIntervals *LIS;
  61. const PPCInstrInfo *TII;
  62. protected:
  63. bool processBlock(MachineBasicBlock &MBB) {
  64. bool Changed = false;
  65. MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
  66. const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
  67. for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
  68. I != IE; ++I) {
  69. MachineInstr &MI = *I;
  70. // The default (A-type) VSX FMA form kills the addend (it is taken from
  71. // the target register, which is then updated to reflect the result of
  72. // the FMA). If the instruction, however, kills one of the registers
  73. // used for the product, then we can use the M-form instruction (which
  74. // will take that value from the to-be-defined register).
  75. int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
  76. if (AltOpc == -1)
  77. continue;
  78. // This pass is run after register coalescing, and so we're looking for
  79. // a situation like this:
  80. // ...
  81. // %5 = COPY %9; VSLRC:%5,%9
  82. // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
  83. // implicit %rm; VSLRC:%5,%17,%16
  84. // ...
  85. // %9<def,tied1> = XSMADDADP %9<tied0>, %17, %19,
  86. // implicit %rm; VSLRC:%9,%17,%19
  87. // ...
  88. // Where we can eliminate the copy by changing from the A-type to the
  89. // M-type instruction. Specifically, for this example, this means:
  90. // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
  91. // implicit %rm; VSLRC:%5,%17,%16
  92. // is replaced by:
  93. // %16<def,tied1> = XSMADDMDP %16<tied0>, %18, %9,
  94. // implicit %rm; VSLRC:%16,%18,%9
  95. // and we remove: %5 = COPY %9; VSLRC:%5,%9
  96. SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
  97. VNInfo *AddendValNo =
  98. LIS->getInterval(MI.getOperand(1).getReg()).Query(FMAIdx).valueIn();
  99. // This can be null if the register is undef.
  100. if (!AddendValNo)
  101. continue;
  102. MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
  103. // The addend and this instruction must be in the same block.
  104. if (!AddendMI || AddendMI->getParent() != MI.getParent())
  105. continue;
  106. // The addend must be a full copy within the same register class.
  107. if (!AddendMI->isFullCopy())
  108. continue;
  109. Register AddendSrcReg = AddendMI->getOperand(1).getReg();
  110. if (Register::isVirtualRegister(AddendSrcReg)) {
  111. if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
  112. MRI.getRegClass(AddendSrcReg))
  113. continue;
  114. } else {
  115. // If AddendSrcReg is a physical register, make sure the destination
  116. // register class contains it.
  117. if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
  118. ->contains(AddendSrcReg))
  119. continue;
  120. }
  121. // In theory, there could be other uses of the addend copy before this
  122. // fma. We could deal with this, but that would require additional
  123. // logic below and I suspect it will not occur in any relevant
  124. // situations. Additionally, check whether the copy source is killed
  125. // prior to the fma. In order to replace the addend here with the
  126. // source of the copy, it must still be live here. We can't use
  127. // interval testing for a physical register, so as long as we're
  128. // walking the MIs we may as well test liveness here.
  129. //
  130. // FIXME: There is a case that occurs in practice, like this:
  131. // %9 = COPY %f1; VSSRC:%9
  132. // ...
  133. // %6 = COPY %9; VSSRC:%6,%9
  134. // %7 = COPY %9; VSSRC:%7,%9
  135. // %9<def,tied1> = XSMADDASP %9<tied0>, %1, %4; VSSRC:
  136. // %6<def,tied1> = XSMADDASP %6<tied0>, %1, %2; VSSRC:
  137. // %7<def,tied1> = XSMADDASP %7<tied0>, %1, %3; VSSRC:
  138. // which prevents an otherwise-profitable transformation.
  139. bool OtherUsers = false, KillsAddendSrc = false;
  140. for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
  141. J != JE; --J) {
  142. if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
  143. OtherUsers = true;
  144. break;
  145. }
  146. if (J->modifiesRegister(AddendSrcReg, TRI) ||
  147. J->killsRegister(AddendSrcReg, TRI)) {
  148. KillsAddendSrc = true;
  149. break;
  150. }
  151. }
  152. if (OtherUsers || KillsAddendSrc)
  153. continue;
  154. // The transformation doesn't work well with things like:
  155. // %5 = A-form-op %5, %11, %5;
  156. // unless %11 is also a kill, so skip when it is not,
  157. // and check operand 3 to see it is also a kill to handle the case:
  158. // %5 = A-form-op %5, %5, %11;
  159. // where %5 and %11 are both kills. This case would be skipped
  160. // otherwise.
  161. Register OldFMAReg = MI.getOperand(0).getReg();
  162. // Find one of the product operands that is killed by this instruction.
  163. unsigned KilledProdOp = 0, OtherProdOp = 0;
  164. Register Reg2 = MI.getOperand(2).getReg();
  165. Register Reg3 = MI.getOperand(3).getReg();
  166. if (LIS->getInterval(Reg2).Query(FMAIdx).isKill()
  167. && Reg2 != OldFMAReg) {
  168. KilledProdOp = 2;
  169. OtherProdOp = 3;
  170. } else if (LIS->getInterval(Reg3).Query(FMAIdx).isKill()
  171. && Reg3 != OldFMAReg) {
  172. KilledProdOp = 3;
  173. OtherProdOp = 2;
  174. }
  175. // If there are no usable killed product operands, then this
  176. // transformation is likely not profitable.
  177. if (!KilledProdOp)
  178. continue;
  179. // If the addend copy is used only by this MI, then the addend source
  180. // register is likely not live here. This could be fixed (based on the
  181. // legality checks above, the live range for the addend source register
  182. // could be extended), but it seems likely that such a trivial copy can
  183. // be coalesced away later, and thus is not worth the effort.
  184. if (Register::isVirtualRegister(AddendSrcReg) &&
  185. !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
  186. continue;
  187. // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
  188. Register KilledProdReg = MI.getOperand(KilledProdOp).getReg();
  189. Register OtherProdReg = MI.getOperand(OtherProdOp).getReg();
  190. unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
  191. unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg();
  192. unsigned OtherProdSubReg = MI.getOperand(OtherProdOp).getSubReg();
  193. bool AddRegKill = AddendMI->getOperand(1).isKill();
  194. bool KilledProdRegKill = MI.getOperand(KilledProdOp).isKill();
  195. bool OtherProdRegKill = MI.getOperand(OtherProdOp).isKill();
  196. bool AddRegUndef = AddendMI->getOperand(1).isUndef();
  197. bool KilledProdRegUndef = MI.getOperand(KilledProdOp).isUndef();
  198. bool OtherProdRegUndef = MI.getOperand(OtherProdOp).isUndef();
  199. // If there isn't a class that fits, we can't perform the transform.
  200. // This is needed for correctness with a mixture of VSX and Altivec
  201. // instructions to make sure that a low VSX register is not assigned to
  202. // the Altivec instruction.
  203. if (!MRI.constrainRegClass(KilledProdReg,
  204. MRI.getRegClass(OldFMAReg)))
  205. continue;
  206. assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
  207. "Addend copy not tied to old FMA output!");
  208. LLVM_DEBUG(dbgs() << "VSX FMA Mutation:\n " << MI);
  209. MI.getOperand(0).setReg(KilledProdReg);
  210. MI.getOperand(1).setReg(KilledProdReg);
  211. MI.getOperand(3).setReg(AddendSrcReg);
  212. MI.getOperand(0).setSubReg(KilledProdSubReg);
  213. MI.getOperand(1).setSubReg(KilledProdSubReg);
  214. MI.getOperand(3).setSubReg(AddSubReg);
  215. MI.getOperand(1).setIsKill(KilledProdRegKill);
  216. MI.getOperand(3).setIsKill(AddRegKill);
  217. MI.getOperand(1).setIsUndef(KilledProdRegUndef);
  218. MI.getOperand(3).setIsUndef(AddRegUndef);
  219. MI.setDesc(TII->get(AltOpc));
  220. // If the addend is also a multiplicand, replace it with the addend
  221. // source in both places.
  222. if (OtherProdReg == AddendMI->getOperand(0).getReg()) {
  223. MI.getOperand(2).setReg(AddendSrcReg);
  224. MI.getOperand(2).setSubReg(AddSubReg);
  225. MI.getOperand(2).setIsKill(AddRegKill);
  226. MI.getOperand(2).setIsUndef(AddRegUndef);
  227. } else {
  228. MI.getOperand(2).setReg(OtherProdReg);
  229. MI.getOperand(2).setSubReg(OtherProdSubReg);
  230. MI.getOperand(2).setIsKill(OtherProdRegKill);
  231. MI.getOperand(2).setIsUndef(OtherProdRegUndef);
  232. }
  233. LLVM_DEBUG(dbgs() << " -> " << MI);
  234. // The killed product operand was killed here, so we can reuse it now
  235. // for the result of the fma.
  236. LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
  237. VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
  238. for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
  239. UI != UE;) {
  240. MachineOperand &UseMO = *UI;
  241. MachineInstr *UseMI = UseMO.getParent();
  242. ++UI;
  243. // Don't replace the result register of the copy we're about to erase.
  244. if (UseMI == AddendMI)
  245. continue;
  246. UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI);
  247. }
  248. // Extend the live intervals of the killed product operand to hold the
  249. // fma result.
  250. LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
  251. for (auto &AI : FMAInt) {
  252. // Don't add the segment that corresponds to the original copy.
  253. if (AI.valno == AddendValNo)
  254. continue;
  255. VNInfo *NewFMAValNo =
  256. NewFMAInt.getNextValue(AI.start, LIS->getVNInfoAllocator());
  257. NewFMAInt.addSegment(
  258. LiveInterval::Segment(AI.start, AI.end, NewFMAValNo));
  259. }
  260. LLVM_DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
  261. // Extend the live interval of the addend source (it might end at the
  262. // copy to be removed, or somewhere in between there and here). This
  263. // is necessary only if it is a physical register.
  264. if (!AddendSrcReg.isVirtual())
  265. for (MCRegUnitIterator Units(AddendSrcReg.asMCReg(), TRI);
  266. Units.isValid(); ++Units) {
  267. unsigned Unit = *Units;
  268. LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
  269. AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB),
  270. FMAIdx.getRegSlot());
  271. LLVM_DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n');
  272. }
  273. FMAInt.removeValNo(FMAValNo);
  274. LLVM_DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
  275. // Remove the (now unused) copy.
  276. LLVM_DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
  277. LIS->RemoveMachineInstrFromMaps(*AddendMI);
  278. AddendMI->eraseFromParent();
  279. Changed = true;
  280. }
  281. return Changed;
  282. }
  283. public:
  284. bool runOnMachineFunction(MachineFunction &MF) override {
  285. if (skipFunction(MF.getFunction()))
  286. return false;
  287. // If we don't have VSX then go ahead and return without doing
  288. // anything.
  289. const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
  290. if (!STI.hasVSX())
  291. return false;
  292. LIS = &getAnalysis<LiveIntervals>();
  293. TII = STI.getInstrInfo();
  294. bool Changed = false;
  295. if (DisableVSXFMAMutate)
  296. return Changed;
  297. for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
  298. if (processBlock(B))
  299. Changed = true;
  300. return Changed;
  301. }
  302. void getAnalysisUsage(AnalysisUsage &AU) const override {
  303. AU.addRequired<LiveIntervals>();
  304. AU.addPreserved<LiveIntervals>();
  305. AU.addRequired<SlotIndexes>();
  306. AU.addPreserved<SlotIndexes>();
  307. AU.addRequired<MachineDominatorTree>();
  308. AU.addPreserved<MachineDominatorTree>();
  309. MachineFunctionPass::getAnalysisUsage(AU);
  310. }
  311. };
  312. }
  313. INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
  314. "PowerPC VSX FMA Mutation", false, false)
  315. INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
  316. INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
  317. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  318. INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
  319. "PowerPC VSX FMA Mutation", false, false)
  320. char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
  321. char PPCVSXFMAMutate::ID = 0;
  322. FunctionPass *llvm::createPPCVSXFMAMutatePass() {
  323. return new PPCVSXFMAMutate();
  324. }