ExecutionDomainFix.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. //===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/CodeGen/ExecutionDomainFix.h"
  9. #include "llvm/CodeGen/MachineRegisterInfo.h"
  10. #include "llvm/CodeGen/TargetInstrInfo.h"
  11. #include "llvm/Support/Debug.h"
  12. using namespace llvm;
  13. #define DEBUG_TYPE "execution-deps-fix"
  14. iterator_range<SmallVectorImpl<int>::const_iterator>
  15. ExecutionDomainFix::regIndices(unsigned Reg) const {
  16. assert(Reg < AliasMap.size() && "Invalid register");
  17. const auto &Entry = AliasMap[Reg];
  18. return make_range(Entry.begin(), Entry.end());
  19. }
  20. DomainValue *ExecutionDomainFix::alloc(int domain) {
  21. DomainValue *dv = Avail.empty() ? new (Allocator.Allocate()) DomainValue
  22. : Avail.pop_back_val();
  23. if (domain >= 0)
  24. dv->addDomain(domain);
  25. assert(dv->Refs == 0 && "Reference count wasn't cleared");
  26. assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
  27. return dv;
  28. }
  29. void ExecutionDomainFix::release(DomainValue *DV) {
  30. while (DV) {
  31. assert(DV->Refs && "Bad DomainValue");
  32. if (--DV->Refs)
  33. return;
  34. // There are no more DV references. Collapse any contained instructions.
  35. if (DV->AvailableDomains && !DV->isCollapsed())
  36. collapse(DV, DV->getFirstDomain());
  37. DomainValue *Next = DV->Next;
  38. DV->clear();
  39. Avail.push_back(DV);
  40. // Also release the next DomainValue in the chain.
  41. DV = Next;
  42. }
  43. }
  44. DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) {
  45. DomainValue *DV = DVRef;
  46. if (!DV || !DV->Next)
  47. return DV;
  48. // DV has a chain. Find the end.
  49. do
  50. DV = DV->Next;
  51. while (DV->Next);
  52. // Update DVRef to point to DV.
  53. retain(DV);
  54. release(DVRef);
  55. DVRef = DV;
  56. return DV;
  57. }
  58. void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) {
  59. assert(unsigned(rx) < NumRegs && "Invalid index");
  60. assert(!LiveRegs.empty() && "Must enter basic block first.");
  61. if (LiveRegs[rx] == dv)
  62. return;
  63. if (LiveRegs[rx])
  64. release(LiveRegs[rx]);
  65. LiveRegs[rx] = retain(dv);
  66. }
  67. void ExecutionDomainFix::kill(int rx) {
  68. assert(unsigned(rx) < NumRegs && "Invalid index");
  69. assert(!LiveRegs.empty() && "Must enter basic block first.");
  70. if (!LiveRegs[rx])
  71. return;
  72. release(LiveRegs[rx]);
  73. LiveRegs[rx] = nullptr;
  74. }
  75. void ExecutionDomainFix::force(int rx, unsigned domain) {
  76. assert(unsigned(rx) < NumRegs && "Invalid index");
  77. assert(!LiveRegs.empty() && "Must enter basic block first.");
  78. if (DomainValue *dv = LiveRegs[rx]) {
  79. if (dv->isCollapsed())
  80. dv->addDomain(domain);
  81. else if (dv->hasDomain(domain))
  82. collapse(dv, domain);
  83. else {
  84. // This is an incompatible open DomainValue. Collapse it to whatever and
  85. // force the new value into domain. This costs a domain crossing.
  86. collapse(dv, dv->getFirstDomain());
  87. assert(LiveRegs[rx] && "Not live after collapse?");
  88. LiveRegs[rx]->addDomain(domain);
  89. }
  90. } else {
  91. // Set up basic collapsed DomainValue.
  92. setLiveReg(rx, alloc(domain));
  93. }
  94. }
  95. void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) {
  96. assert(dv->hasDomain(domain) && "Cannot collapse");
  97. // Collapse all the instructions.
  98. while (!dv->Instrs.empty())
  99. TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain);
  100. dv->setSingleDomain(domain);
  101. // If there are multiple users, give them new, unique DomainValues.
  102. if (!LiveRegs.empty() && dv->Refs > 1)
  103. for (unsigned rx = 0; rx != NumRegs; ++rx)
  104. if (LiveRegs[rx] == dv)
  105. setLiveReg(rx, alloc(domain));
  106. }
  107. bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) {
  108. assert(!A->isCollapsed() && "Cannot merge into collapsed");
  109. assert(!B->isCollapsed() && "Cannot merge from collapsed");
  110. if (A == B)
  111. return true;
  112. // Restrict to the domains that A and B have in common.
  113. unsigned common = A->getCommonDomains(B->AvailableDomains);
  114. if (!common)
  115. return false;
  116. A->AvailableDomains = common;
  117. A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
  118. // Clear the old DomainValue so we won't try to swizzle instructions twice.
  119. B->clear();
  120. // All uses of B are referred to A.
  121. B->Next = retain(A);
  122. for (unsigned rx = 0; rx != NumRegs; ++rx) {
  123. assert(!LiveRegs.empty() && "no space allocated for live registers");
  124. if (LiveRegs[rx] == B)
  125. setLiveReg(rx, A);
  126. }
  127. return true;
  128. }
  129. void ExecutionDomainFix::enterBasicBlock(
  130. const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
  131. MachineBasicBlock *MBB = TraversedMBB.MBB;
  132. // Set up LiveRegs to represent registers entering MBB.
  133. // Set default domain values to 'no domain' (nullptr)
  134. if (LiveRegs.empty())
  135. LiveRegs.assign(NumRegs, nullptr);
  136. // This is the entry block.
  137. if (MBB->pred_empty()) {
  138. LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n");
  139. return;
  140. }
  141. // Try to coalesce live-out registers from predecessors.
  142. for (MachineBasicBlock *pred : MBB->predecessors()) {
  143. assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() &&
  144. "Should have pre-allocated MBBInfos for all MBBs");
  145. LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()];
  146. // Incoming is null if this is a backedge from a BB
  147. // we haven't processed yet
  148. if (Incoming.empty())
  149. continue;
  150. for (unsigned rx = 0; rx != NumRegs; ++rx) {
  151. DomainValue *pdv = resolve(Incoming[rx]);
  152. if (!pdv)
  153. continue;
  154. if (!LiveRegs[rx]) {
  155. setLiveReg(rx, pdv);
  156. continue;
  157. }
  158. // We have a live DomainValue from more than one predecessor.
  159. if (LiveRegs[rx]->isCollapsed()) {
  160. // We are already collapsed, but predecessor is not. Force it.
  161. unsigned Domain = LiveRegs[rx]->getFirstDomain();
  162. if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
  163. collapse(pdv, Domain);
  164. continue;
  165. }
  166. // Currently open, merge in predecessor.
  167. if (!pdv->isCollapsed())
  168. merge(LiveRegs[rx], pdv);
  169. else
  170. force(rx, pdv->getFirstDomain());
  171. }
  172. }
  173. LLVM_DEBUG(dbgs() << printMBBReference(*MBB)
  174. << (!TraversedMBB.IsDone ? ": incomplete\n"
  175. : ": all preds known\n"));
  176. }
  177. void ExecutionDomainFix::leaveBasicBlock(
  178. const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
  179. assert(!LiveRegs.empty() && "Must enter basic block first.");
  180. unsigned MBBNumber = TraversedMBB.MBB->getNumber();
  181. assert(MBBNumber < MBBOutRegsInfos.size() &&
  182. "Unexpected basic block number.");
  183. // Save register clearances at end of MBB - used by enterBasicBlock().
  184. for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) {
  185. release(OldLiveReg);
  186. }
  187. MBBOutRegsInfos[MBBNumber] = LiveRegs;
  188. LiveRegs.clear();
  189. }
  190. bool ExecutionDomainFix::visitInstr(MachineInstr *MI) {
  191. // Update instructions with explicit execution domains.
  192. std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
  193. if (DomP.first) {
  194. if (DomP.second)
  195. visitSoftInstr(MI, DomP.second);
  196. else
  197. visitHardInstr(MI, DomP.first);
  198. }
  199. return !DomP.first;
  200. }
  201. void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) {
  202. assert(!MI->isDebugInstr() && "Won't process debug values");
  203. const MCInstrDesc &MCID = MI->getDesc();
  204. for (unsigned i = 0,
  205. e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
  206. i != e; ++i) {
  207. MachineOperand &MO = MI->getOperand(i);
  208. if (!MO.isReg())
  209. continue;
  210. if (MO.isUse())
  211. continue;
  212. for (int rx : regIndices(MO.getReg())) {
  213. // This instruction explicitly defines rx.
  214. LLVM_DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI);
  215. // Kill off domains redefined by generic instructions.
  216. if (Kill)
  217. kill(rx);
  218. }
  219. }
  220. }
  221. void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
  222. // Collapse all uses.
  223. for (unsigned i = mi->getDesc().getNumDefs(),
  224. e = mi->getDesc().getNumOperands();
  225. i != e; ++i) {
  226. MachineOperand &mo = mi->getOperand(i);
  227. if (!mo.isReg())
  228. continue;
  229. for (int rx : regIndices(mo.getReg())) {
  230. force(rx, domain);
  231. }
  232. }
  233. // Kill all defs and force them.
  234. for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
  235. MachineOperand &mo = mi->getOperand(i);
  236. if (!mo.isReg())
  237. continue;
  238. for (int rx : regIndices(mo.getReg())) {
  239. kill(rx);
  240. force(rx, domain);
  241. }
  242. }
  243. }
  244. void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
  245. // Bitmask of available domains for this instruction after taking collapsed
  246. // operands into account.
  247. unsigned available = mask;
  248. // Scan the explicit use operands for incoming domains.
  249. SmallVector<int, 4> used;
  250. if (!LiveRegs.empty())
  251. for (unsigned i = mi->getDesc().getNumDefs(),
  252. e = mi->getDesc().getNumOperands();
  253. i != e; ++i) {
  254. MachineOperand &mo = mi->getOperand(i);
  255. if (!mo.isReg())
  256. continue;
  257. for (int rx : regIndices(mo.getReg())) {
  258. DomainValue *dv = LiveRegs[rx];
  259. if (dv == nullptr)
  260. continue;
  261. // Bitmask of domains that dv and available have in common.
  262. unsigned common = dv->getCommonDomains(available);
  263. // Is it possible to use this collapsed register for free?
  264. if (dv->isCollapsed()) {
  265. // Restrict available domains to the ones in common with the operand.
  266. // If there are no common domains, we must pay the cross-domain
  267. // penalty for this operand.
  268. if (common)
  269. available = common;
  270. } else if (common)
  271. // Open DomainValue is compatible, save it for merging.
  272. used.push_back(rx);
  273. else
  274. // Open DomainValue is not compatible with instruction. It is useless
  275. // now.
  276. kill(rx);
  277. }
  278. }
  279. // If the collapsed operands force a single domain, propagate the collapse.
  280. if (isPowerOf2_32(available)) {
  281. unsigned domain = countTrailingZeros(available);
  282. TII->setExecutionDomain(*mi, domain);
  283. visitHardInstr(mi, domain);
  284. return;
  285. }
  286. // Kill off any remaining uses that don't match available, and build a list of
  287. // incoming DomainValues that we want to merge.
  288. SmallVector<int, 4> Regs;
  289. for (int rx : used) {
  290. assert(!LiveRegs.empty() && "no space allocated for live registers");
  291. DomainValue *&LR = LiveRegs[rx];
  292. // This useless DomainValue could have been missed above.
  293. if (!LR->getCommonDomains(available)) {
  294. kill(rx);
  295. continue;
  296. }
  297. // Sorted insertion.
  298. // Enables giving priority to the latest domains during merging.
  299. const int Def = RDA->getReachingDef(mi, RC->getRegister(rx));
  300. auto I = partition_point(Regs, [&](int I) {
  301. return RDA->getReachingDef(mi, RC->getRegister(I)) <= Def;
  302. });
  303. Regs.insert(I, rx);
  304. }
  305. // doms are now sorted in order of appearance. Try to merge them all, giving
  306. // priority to the latest ones.
  307. DomainValue *dv = nullptr;
  308. while (!Regs.empty()) {
  309. if (!dv) {
  310. dv = LiveRegs[Regs.pop_back_val()];
  311. // Force the first dv to match the current instruction.
  312. dv->AvailableDomains = dv->getCommonDomains(available);
  313. assert(dv->AvailableDomains && "Domain should have been filtered");
  314. continue;
  315. }
  316. DomainValue *Latest = LiveRegs[Regs.pop_back_val()];
  317. // Skip already merged values.
  318. if (Latest == dv || Latest->Next)
  319. continue;
  320. if (merge(dv, Latest))
  321. continue;
  322. // If latest didn't merge, it is useless now. Kill all registers using it.
  323. for (int i : used) {
  324. assert(!LiveRegs.empty() && "no space allocated for live registers");
  325. if (LiveRegs[i] == Latest)
  326. kill(i);
  327. }
  328. }
  329. // dv is the DomainValue we are going to use for this instruction.
  330. if (!dv) {
  331. dv = alloc();
  332. dv->AvailableDomains = available;
  333. }
  334. dv->Instrs.push_back(mi);
  335. // Finally set all defs and non-collapsed uses to dv. We must iterate through
  336. // all the operators, including imp-def ones.
  337. for (const MachineOperand &mo : mi->operands()) {
  338. if (!mo.isReg())
  339. continue;
  340. for (int rx : regIndices(mo.getReg())) {
  341. if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx] != dv)) {
  342. kill(rx);
  343. setLiveReg(rx, dv);
  344. }
  345. }
  346. }
  347. }
  348. void ExecutionDomainFix::processBasicBlock(
  349. const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
  350. enterBasicBlock(TraversedMBB);
  351. // If this block is not done, it makes little sense to make any decisions
  352. // based on clearance information. We need to make a second pass anyway,
  353. // and by then we'll have better information, so we can avoid doing the work
  354. // to try and break dependencies now.
  355. for (MachineInstr &MI : *TraversedMBB.MBB) {
  356. if (!MI.isDebugInstr()) {
  357. bool Kill = false;
  358. if (TraversedMBB.PrimaryPass)
  359. Kill = visitInstr(&MI);
  360. processDefs(&MI, Kill);
  361. }
  362. }
  363. leaveBasicBlock(TraversedMBB);
  364. }
  365. bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) {
  366. if (skipFunction(mf.getFunction()))
  367. return false;
  368. MF = &mf;
  369. TII = MF->getSubtarget().getInstrInfo();
  370. TRI = MF->getSubtarget().getRegisterInfo();
  371. LiveRegs.clear();
  372. assert(NumRegs == RC->getNumRegs() && "Bad regclass");
  373. LLVM_DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: "
  374. << TRI->getRegClassName(RC) << " **********\n");
  375. // If no relevant registers are used in the function, we can skip it
  376. // completely.
  377. bool anyregs = false;
  378. const MachineRegisterInfo &MRI = mf.getRegInfo();
  379. for (unsigned Reg : *RC) {
  380. if (MRI.isPhysRegUsed(Reg)) {
  381. anyregs = true;
  382. break;
  383. }
  384. }
  385. if (!anyregs)
  386. return false;
  387. RDA = &getAnalysis<ReachingDefAnalysis>();
  388. // Initialize the AliasMap on the first use.
  389. if (AliasMap.empty()) {
  390. // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
  391. // therefore the LiveRegs array.
  392. AliasMap.resize(TRI->getNumRegs());
  393. for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
  394. for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid();
  395. ++AI)
  396. AliasMap[*AI].push_back(i);
  397. }
  398. // Initialize the MBBOutRegsInfos
  399. MBBOutRegsInfos.resize(mf.getNumBlockIDs());
  400. // Traverse the basic blocks.
  401. LoopTraversal Traversal;
  402. LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf);
  403. for (const LoopTraversal::TraversedMBBInfo &TraversedMBB : TraversedMBBOrder)
  404. processBasicBlock(TraversedMBB);
  405. for (const LiveRegsDVInfo &OutLiveRegs : MBBOutRegsInfos)
  406. for (DomainValue *OutLiveReg : OutLiveRegs)
  407. if (OutLiveReg)
  408. release(OutLiveReg);
  409. MBBOutRegsInfos.clear();
  410. Avail.clear();
  411. Allocator.DestroyAll();
  412. return false;
  413. }