MIRCanonicalizerPass.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // The purpose of this pass is to employ a canonical code transformation so
  10. // that code compiled with slightly different IR passes can be diffed more
  11. // effectively than otherwise. This is done by renaming vregs in a given
  12. // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
  13. // move defs closer to their use inorder to reduce diffs caused by slightly
  14. // different schedules.
  15. //
  16. // Basic Usage:
  17. //
  18. // llc -o - -run-pass mir-canonicalizer example.mir
  19. //
  20. // Reorders instructions canonically.
  21. // Renames virtual register operands canonically.
  22. // Strips certain MIR artifacts (optionally).
  23. //
  24. //===----------------------------------------------------------------------===//
  25. #include "MIRVRegNamerUtils.h"
  26. #include "llvm/ADT/PostOrderIterator.h"
  27. #include "llvm/ADT/STLExtras.h"
  28. #include "llvm/CodeGen/MachineFunctionPass.h"
  29. #include "llvm/CodeGen/MachineRegisterInfo.h"
  30. #include "llvm/InitializePasses.h"
  31. #include "llvm/Pass.h"
  32. #include "llvm/Support/Debug.h"
  33. #include "llvm/Support/raw_ostream.h"
  34. using namespace llvm;
  35. #define DEBUG_TYPE "mir-canonicalizer"
  36. static cl::opt<unsigned>
  37. CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
  38. cl::value_desc("N"),
  39. cl::desc("Function number to canonicalize."));
  40. namespace {
  41. class MIRCanonicalizer : public MachineFunctionPass {
  42. public:
  43. static char ID;
  44. MIRCanonicalizer() : MachineFunctionPass(ID) {}
  45. StringRef getPassName() const override {
  46. return "Rename register operands in a canonical ordering.";
  47. }
  48. void getAnalysisUsage(AnalysisUsage &AU) const override {
  49. AU.setPreservesCFG();
  50. MachineFunctionPass::getAnalysisUsage(AU);
  51. }
  52. bool runOnMachineFunction(MachineFunction &MF) override;
  53. };
  54. } // end anonymous namespace
  55. char MIRCanonicalizer::ID;
  56. char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
  57. INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
  58. "Rename Register Operands Canonically", false, false)
  59. INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
  60. "Rename Register Operands Canonically", false, false)
  61. static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
  62. if (MF.empty())
  63. return {};
  64. ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
  65. std::vector<MachineBasicBlock *> RPOList;
  66. append_range(RPOList, RPOT);
  67. return RPOList;
  68. }
  69. static bool
  70. rescheduleLexographically(std::vector<MachineInstr *> instructions,
  71. MachineBasicBlock *MBB,
  72. std::function<MachineBasicBlock::iterator()> getPos) {
  73. bool Changed = false;
  74. using StringInstrPair = std::pair<std::string, MachineInstr *>;
  75. std::vector<StringInstrPair> StringInstrMap;
  76. for (auto *II : instructions) {
  77. std::string S;
  78. raw_string_ostream OS(S);
  79. II->print(OS);
  80. OS.flush();
  81. // Trim the assignment, or start from the beginning in the case of a store.
  82. const size_t i = S.find('=');
  83. StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
  84. }
  85. llvm::sort(StringInstrMap, llvm::less_first());
  86. for (auto &II : StringInstrMap) {
  87. LLVM_DEBUG({
  88. dbgs() << "Splicing ";
  89. II.second->dump();
  90. dbgs() << " right before: ";
  91. getPos()->dump();
  92. });
  93. Changed = true;
  94. MBB->splice(getPos(), MBB, II.second);
  95. }
  96. return Changed;
  97. }
  98. static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
  99. MachineBasicBlock *MBB) {
  100. bool Changed = false;
  101. // Calculates the distance of MI from the beginning of its parent BB.
  102. auto getInstrIdx = [](const MachineInstr &MI) {
  103. unsigned i = 0;
  104. for (const auto &CurMI : *MI.getParent()) {
  105. if (&CurMI == &MI)
  106. return i;
  107. i++;
  108. }
  109. return ~0U;
  110. };
  111. // Pre-Populate vector of instructions to reschedule so that we don't
  112. // clobber the iterator.
  113. std::vector<MachineInstr *> Instructions;
  114. for (auto &MI : *MBB) {
  115. Instructions.push_back(&MI);
  116. }
  117. std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
  118. std::map<unsigned, MachineInstr *> MultiUserLookup;
  119. unsigned UseToBringDefCloserToCount = 0;
  120. std::vector<MachineInstr *> PseudoIdempotentInstructions;
  121. std::vector<unsigned> PhysRegDefs;
  122. for (auto *II : Instructions) {
  123. for (unsigned i = 1; i < II->getNumOperands(); i++) {
  124. MachineOperand &MO = II->getOperand(i);
  125. if (!MO.isReg())
  126. continue;
  127. if (MO.getReg().isVirtual())
  128. continue;
  129. if (!MO.isDef())
  130. continue;
  131. PhysRegDefs.push_back(MO.getReg());
  132. }
  133. }
  134. for (auto *II : Instructions) {
  135. if (II->getNumOperands() == 0)
  136. continue;
  137. if (II->mayLoadOrStore())
  138. continue;
  139. MachineOperand &MO = II->getOperand(0);
  140. if (!MO.isReg() || !MO.getReg().isVirtual())
  141. continue;
  142. if (!MO.isDef())
  143. continue;
  144. bool IsPseudoIdempotent = true;
  145. for (unsigned i = 1; i < II->getNumOperands(); i++) {
  146. if (II->getOperand(i).isImm()) {
  147. continue;
  148. }
  149. if (II->getOperand(i).isReg()) {
  150. if (!II->getOperand(i).getReg().isVirtual())
  151. if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
  152. continue;
  153. }
  154. }
  155. IsPseudoIdempotent = false;
  156. break;
  157. }
  158. if (IsPseudoIdempotent) {
  159. PseudoIdempotentInstructions.push_back(II);
  160. continue;
  161. }
  162. LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
  163. MachineInstr *Def = II;
  164. unsigned Distance = ~0U;
  165. MachineInstr *UseToBringDefCloserTo = nullptr;
  166. MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
  167. for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
  168. MachineInstr *UseInst = UO.getParent();
  169. const unsigned DefLoc = getInstrIdx(*Def);
  170. const unsigned UseLoc = getInstrIdx(*UseInst);
  171. const unsigned Delta = (UseLoc - DefLoc);
  172. if (UseInst->getParent() != Def->getParent())
  173. continue;
  174. if (DefLoc >= UseLoc)
  175. continue;
  176. if (Delta < Distance) {
  177. Distance = Delta;
  178. UseToBringDefCloserTo = UseInst;
  179. MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
  180. }
  181. }
  182. const auto BBE = MBB->instr_end();
  183. MachineBasicBlock::iterator DefI = BBE;
  184. MachineBasicBlock::iterator UseI = BBE;
  185. for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
  186. if (DefI != BBE && UseI != BBE)
  187. break;
  188. if (&*BBI == Def) {
  189. DefI = BBI;
  190. continue;
  191. }
  192. if (&*BBI == UseToBringDefCloserTo) {
  193. UseI = BBI;
  194. continue;
  195. }
  196. }
  197. if (DefI == BBE || UseI == BBE)
  198. continue;
  199. LLVM_DEBUG({
  200. dbgs() << "Splicing ";
  201. DefI->dump();
  202. dbgs() << " right before: ";
  203. UseI->dump();
  204. });
  205. MultiUsers[UseToBringDefCloserTo].push_back(Def);
  206. Changed = true;
  207. MBB->splice(UseI, MBB, DefI);
  208. }
  209. // Sort the defs for users of multiple defs lexographically.
  210. for (const auto &E : MultiUserLookup) {
  211. auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
  212. return &MI == E.second;
  213. });
  214. if (UseI == MBB->instr_end())
  215. continue;
  216. LLVM_DEBUG(
  217. dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
  218. Changed |= rescheduleLexographically(
  219. MultiUsers[E.second], MBB,
  220. [&]() -> MachineBasicBlock::iterator { return UseI; });
  221. }
  222. PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
  223. LLVM_DEBUG(
  224. dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
  225. Changed |= rescheduleLexographically(
  226. PseudoIdempotentInstructions, MBB,
  227. [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
  228. return Changed;
  229. }
  230. static bool propagateLocalCopies(MachineBasicBlock *MBB) {
  231. bool Changed = false;
  232. MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
  233. std::vector<MachineInstr *> Copies;
  234. for (MachineInstr &MI : MBB->instrs()) {
  235. if (MI.isCopy())
  236. Copies.push_back(&MI);
  237. }
  238. for (MachineInstr *MI : Copies) {
  239. if (!MI->getOperand(0).isReg())
  240. continue;
  241. if (!MI->getOperand(1).isReg())
  242. continue;
  243. const Register Dst = MI->getOperand(0).getReg();
  244. const Register Src = MI->getOperand(1).getReg();
  245. if (!Dst.isVirtual())
  246. continue;
  247. if (!Src.isVirtual())
  248. continue;
  249. // Not folding COPY instructions if regbankselect has not set the RCs.
  250. // Why are we only considering Register Classes? Because the verifier
  251. // sometimes gets upset if the register classes don't match even if the
  252. // types do. A future patch might add COPY folding for matching types in
  253. // pre-registerbankselect code.
  254. if (!MRI.getRegClassOrNull(Dst))
  255. continue;
  256. if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
  257. continue;
  258. std::vector<MachineOperand *> Uses;
  259. for (MachineOperand &MO : MRI.use_operands(Dst))
  260. Uses.push_back(&MO);
  261. for (auto *MO : Uses)
  262. MO->setReg(Src);
  263. Changed = true;
  264. MI->eraseFromParent();
  265. }
  266. return Changed;
  267. }
  268. static bool doDefKillClear(MachineBasicBlock *MBB) {
  269. bool Changed = false;
  270. for (auto &MI : *MBB) {
  271. for (auto &MO : MI.operands()) {
  272. if (!MO.isReg())
  273. continue;
  274. if (!MO.isDef() && MO.isKill()) {
  275. Changed = true;
  276. MO.setIsKill(false);
  277. }
  278. if (MO.isDef() && MO.isDead()) {
  279. Changed = true;
  280. MO.setIsDead(false);
  281. }
  282. }
  283. }
  284. return Changed;
  285. }
  286. static bool runOnBasicBlock(MachineBasicBlock *MBB,
  287. unsigned BasicBlockNum, VRegRenamer &Renamer) {
  288. LLVM_DEBUG({
  289. dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
  290. dbgs() << "\n\n================================================\n\n";
  291. });
  292. bool Changed = false;
  293. LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
  294. LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
  295. MBB->dump(););
  296. Changed |= propagateLocalCopies(MBB);
  297. LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
  298. LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
  299. unsigned IdempotentInstCount = 0;
  300. Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
  301. LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
  302. Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
  303. // TODO: Consider dropping this. Dropping kill defs is probably not
  304. // semantically sound.
  305. Changed |= doDefKillClear(MBB);
  306. LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
  307. dbgs() << "\n";);
  308. LLVM_DEBUG(
  309. dbgs() << "\n\n================================================\n\n");
  310. return Changed;
  311. }
  312. bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
  313. static unsigned functionNum = 0;
  314. if (CanonicalizeFunctionNumber != ~0U) {
  315. if (CanonicalizeFunctionNumber != functionNum++)
  316. return false;
  317. LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
  318. << "\n";);
  319. }
  320. // we need a valid vreg to create a vreg type for skipping all those
  321. // stray vreg numbers so reach alignment/canonical vreg values.
  322. std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
  323. LLVM_DEBUG(
  324. dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
  325. dbgs() << "\n\n================================================\n\n";
  326. dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
  327. for (auto MBB
  328. : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
  329. << "\n\n================================================\n\n";);
  330. unsigned BBNum = 0;
  331. bool Changed = false;
  332. MachineRegisterInfo &MRI = MF.getRegInfo();
  333. VRegRenamer Renamer(MRI);
  334. for (auto *MBB : RPOList)
  335. Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
  336. return Changed;
  337. }