PPCMachineScheduler.cpp 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. //===- PPCMachineScheduler.cpp - MI Scheduler for PowerPC -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "PPCMachineScheduler.h"
  9. #include "MCTargetDesc/PPCMCTargetDesc.h"
  10. using namespace llvm;
  11. static cl::opt<bool>
  12. DisableAddiLoadHeuristic("disable-ppc-sched-addi-load",
  13. cl::desc("Disable scheduling addi instruction before"
  14. "load for ppc"), cl::Hidden);
  15. static cl::opt<bool>
  16. EnableAddiHeuristic("ppc-postra-bias-addi",
  17. cl::desc("Enable scheduling addi instruction as early"
  18. "as possible post ra"),
  19. cl::Hidden, cl::init(true));
  20. static bool isADDIInstr(const GenericScheduler::SchedCandidate &Cand) {
  21. return Cand.SU->getInstr()->getOpcode() == PPC::ADDI ||
  22. Cand.SU->getInstr()->getOpcode() == PPC::ADDI8;
  23. }
  24. bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
  25. SchedCandidate &TryCand,
  26. SchedBoundary &Zone) const {
  27. if (DisableAddiLoadHeuristic)
  28. return false;
  29. SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand;
  30. SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand;
  31. if (isADDIInstr(FirstCand) && SecondCand.SU->getInstr()->mayLoad()) {
  32. TryCand.Reason = Stall;
  33. return true;
  34. }
  35. if (FirstCand.SU->getInstr()->mayLoad() && isADDIInstr(SecondCand)) {
  36. TryCand.Reason = NoCand;
  37. return true;
  38. }
  39. return false;
  40. }
  41. bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
  42. SchedCandidate &TryCand,
  43. SchedBoundary *Zone) const {
  44. // From GenericScheduler::tryCandidate
  45. // Initialize the candidate if needed.
  46. if (!Cand.isValid()) {
  47. TryCand.Reason = NodeOrder;
  48. return true;
  49. }
  50. // Bias PhysReg Defs and copies to their uses and defined respectively.
  51. if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
  52. biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
  53. return TryCand.Reason != NoCand;
  54. // Avoid exceeding the target's limit.
  55. if (DAG->isTrackingPressure() &&
  56. tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
  57. RegExcess, TRI, DAG->MF))
  58. return TryCand.Reason != NoCand;
  59. // Avoid increasing the max critical pressure in the scheduled region.
  60. if (DAG->isTrackingPressure() &&
  61. tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
  62. TryCand, Cand, RegCritical, TRI, DAG->MF))
  63. return TryCand.Reason != NoCand;
  64. // We only compare a subset of features when comparing nodes between
  65. // Top and Bottom boundary. Some properties are simply incomparable, in many
  66. // other instances we should only override the other boundary if something
  67. // is a clear good pick on one boundary. Skip heuristics that are more
  68. // "tie-breaking" in nature.
  69. bool SameBoundary = Zone != nullptr;
  70. if (SameBoundary) {
  71. // For loops that are acyclic path limited, aggressively schedule for
  72. // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
  73. // heuristics to take precedence.
  74. if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
  75. tryLatency(TryCand, Cand, *Zone))
  76. return TryCand.Reason != NoCand;
  77. // Prioritize instructions that read unbuffered resources by stall cycles.
  78. if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
  79. Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
  80. return TryCand.Reason != NoCand;
  81. }
  82. // Keep clustered nodes together to encourage downstream peephole
  83. // optimizations which may reduce resource requirements.
  84. //
  85. // This is a best effort to set things up for a post-RA pass. Optimizations
  86. // like generating loads of multiple registers should ideally be done within
  87. // the scheduler pass by combining the loads during DAG postprocessing.
  88. const SUnit *CandNextClusterSU =
  89. Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
  90. const SUnit *TryCandNextClusterSU =
  91. TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
  92. if (tryGreater(TryCand.SU == TryCandNextClusterSU,
  93. Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
  94. return TryCand.Reason != NoCand;
  95. if (SameBoundary) {
  96. // Weak edges are for clustering and other constraints.
  97. if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
  98. getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
  99. return TryCand.Reason != NoCand;
  100. }
  101. // Avoid increasing the max pressure of the entire region.
  102. if (DAG->isTrackingPressure() &&
  103. tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
  104. Cand, RegMax, TRI, DAG->MF))
  105. return TryCand.Reason != NoCand;
  106. if (SameBoundary) {
  107. // Avoid critical resource consumption and balance the schedule.
  108. TryCand.initResourceDelta(DAG, SchedModel);
  109. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
  110. TryCand, Cand, ResourceReduce))
  111. return TryCand.Reason != NoCand;
  112. if (tryGreater(TryCand.ResDelta.DemandedResources,
  113. Cand.ResDelta.DemandedResources, TryCand, Cand,
  114. ResourceDemand))
  115. return TryCand.Reason != NoCand;
  116. // Avoid serializing long latency dependence chains.
  117. // For acyclic path limited loops, latency was already checked above.
  118. if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
  119. !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
  120. return TryCand.Reason != NoCand;
  121. // Fall through to original instruction order.
  122. if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
  123. (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
  124. TryCand.Reason = NodeOrder;
  125. }
  126. }
  127. // GenericScheduler::tryCandidate end
  128. // Add powerpc specific heuristic only when TryCand isn't selected or
  129. // selected as node order.
  130. if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
  131. return true;
  132. // There are some benefits to schedule the ADDI before the load to hide the
  133. // latency, as RA may create a true dependency between the load and addi.
  134. if (SameBoundary) {
  135. if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
  136. return TryCand.Reason != NoCand;
  137. }
  138. return TryCand.Reason != NoCand;
  139. }
  140. bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand,
  141. SchedCandidate &TryCand) const {
  142. if (!EnableAddiHeuristic)
  143. return false;
  144. if (isADDIInstr(TryCand) && !isADDIInstr(Cand)) {
  145. TryCand.Reason = Stall;
  146. return true;
  147. }
  148. return false;
  149. }
  150. bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
  151. SchedCandidate &TryCand) {
  152. // From PostGenericScheduler::tryCandidate
  153. // Initialize the candidate if needed.
  154. if (!Cand.isValid()) {
  155. TryCand.Reason = NodeOrder;
  156. return true;
  157. }
  158. // Prioritize instructions that read unbuffered resources by stall cycles.
  159. if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
  160. Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
  161. return TryCand.Reason != NoCand;
  162. // Keep clustered nodes together.
  163. if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
  164. Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster))
  165. return TryCand.Reason != NoCand;
  166. // Avoid critical resource consumption and balance the schedule.
  167. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
  168. TryCand, Cand, ResourceReduce))
  169. return TryCand.Reason != NoCand;
  170. if (tryGreater(TryCand.ResDelta.DemandedResources,
  171. Cand.ResDelta.DemandedResources, TryCand, Cand,
  172. ResourceDemand))
  173. return TryCand.Reason != NoCand;
  174. // Avoid serializing long latency dependence chains.
  175. if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
  176. return TryCand.Reason != NoCand;
  177. }
  178. // Fall through to original instruction order.
  179. if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
  180. TryCand.Reason = NodeOrder;
  181. // PostGenericScheduler::tryCandidate end
  182. // Add powerpc post ra specific heuristic only when TryCand isn't selected or
  183. // selected as node order.
  184. if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
  185. return true;
  186. // There are some benefits to schedule the ADDI as early as possible post ra
  187. // to avoid stalled by vector instructions which take up all the hw units.
  188. // And ADDI is usually used to post inc the loop indvar, which matters the
  189. // performance.
  190. if (biasAddiCandidate(Cand, TryCand))
  191. return TryCand.Reason != NoCand;
  192. return TryCand.Reason != NoCand;
  193. }
  194. void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {
  195. // Custom PPC PostRA specific behavior here.
  196. PostGenericScheduler::enterMBB(MBB);
  197. }
  198. void PPCPostRASchedStrategy::leaveMBB() {
  199. // Custom PPC PostRA specific behavior here.
  200. PostGenericScheduler::leaveMBB();
  201. }
  202. void PPCPostRASchedStrategy::initialize(ScheduleDAGMI *Dag) {
  203. // Custom PPC PostRA specific initialization here.
  204. PostGenericScheduler::initialize(Dag);
  205. }
  206. SUnit *PPCPostRASchedStrategy::pickNode(bool &IsTopNode) {
  207. // Custom PPC PostRA specific scheduling here.
  208. return PostGenericScheduler::pickNode(IsTopNode);
  209. }