VLIWMachineScheduler.cpp 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009
  1. //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // MachineScheduler schedules machine instructions after phi elimination. It
  10. // preserves LiveIntervals so it can be invoked before register allocation.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/CodeGen/VLIWMachineScheduler.h"
  14. #include "llvm/ADT/SmallVector.h"
  15. #include "llvm/CodeGen/DFAPacketizer.h"
  16. #include "llvm/CodeGen/MachineBasicBlock.h"
  17. #include "llvm/CodeGen/MachineFunction.h"
  18. #include "llvm/CodeGen/MachineInstr.h"
  19. #include "llvm/CodeGen/MachineLoopInfo.h"
  20. #include "llvm/CodeGen/RegisterClassInfo.h"
  21. #include "llvm/CodeGen/RegisterPressure.h"
  22. #include "llvm/CodeGen/ScheduleDAG.h"
  23. #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  24. #include "llvm/CodeGen/TargetInstrInfo.h"
  25. #include "llvm/CodeGen/TargetOpcodes.h"
  26. #include "llvm/CodeGen/TargetRegisterInfo.h"
  27. #include "llvm/CodeGen/TargetSchedule.h"
  28. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  29. #include "llvm/IR/Function.h"
  30. #include "llvm/Support/CommandLine.h"
  31. #include "llvm/Support/Debug.h"
  32. #include "llvm/Support/raw_ostream.h"
  33. #include <algorithm>
  34. #include <cassert>
  35. #include <iomanip>
  36. #include <limits>
  37. #include <memory>
  38. #include <sstream>
  39. using namespace llvm;
  40. #define DEBUG_TYPE "machine-scheduler"
  41. static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
  42. cl::ZeroOrMore, cl::init(false));
  43. static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
  44. cl::ZeroOrMore, cl::init(true));
  45. static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
  46. cl::Hidden, cl::ZeroOrMore,
  47. cl::init(1));
  48. // Check if the scheduler should penalize instructions that are available to
  49. // early due to a zero-latency dependence.
  50. static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
  51. cl::ZeroOrMore, cl::init(true));
  52. // This value is used to determine if a register class is a high pressure set.
  53. // We compute the maximum number of registers needed and divided by the total
  54. // available. Then, we compare the result to this value.
  55. static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,
  56. cl::init(0.75f),
  57. cl::desc("High register pressure threhold."));
  58. VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
  59. const TargetSchedModel *SM)
  60. : TII(STI.getInstrInfo()), SchedModel(SM) {
  61. ResourcesModel = createPacketizer(STI);
  62. // This hard requirement could be relaxed,
  63. // but for now do not let it proceed.
  64. assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
  65. Packet.reserve(SchedModel->getIssueWidth());
  66. Packet.clear();
  67. ResourcesModel->clearResources();
  68. }
  69. void VLIWResourceModel::reset() {
  70. Packet.clear();
  71. ResourcesModel->clearResources();
  72. }
  73. VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }
  74. /// Return true if there is a dependence between SUd and SUu.
  75. bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) {
  76. if (SUd->Succs.size() == 0)
  77. return false;
  78. for (const auto &S : SUd->Succs) {
  79. // Since we do not add pseudos to packets, might as well
  80. // ignore order dependencies.
  81. if (S.isCtrl())
  82. continue;
  83. if (S.getSUnit() == SUu && S.getLatency() > 0)
  84. return true;
  85. }
  86. return false;
  87. }
  88. /// Check if scheduling of this SU is possible
  89. /// in the current packet.
  90. /// It is _not_ precise (statefull), it is more like
  91. /// another heuristic. Many corner cases are figured
  92. /// empirically.
  93. bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
  94. if (!SU || !SU->getInstr())
  95. return false;
  96. // First see if the pipeline could receive this instruction
  97. // in the current cycle.
  98. switch (SU->getInstr()->getOpcode()) {
  99. default:
  100. if (!ResourcesModel->canReserveResources(*SU->getInstr()))
  101. return false;
  102. break;
  103. case TargetOpcode::EXTRACT_SUBREG:
  104. case TargetOpcode::INSERT_SUBREG:
  105. case TargetOpcode::SUBREG_TO_REG:
  106. case TargetOpcode::REG_SEQUENCE:
  107. case TargetOpcode::IMPLICIT_DEF:
  108. case TargetOpcode::COPY:
  109. case TargetOpcode::INLINEASM:
  110. case TargetOpcode::INLINEASM_BR:
  111. break;
  112. }
  113. // Now see if there are no other dependencies to instructions already
  114. // in the packet.
  115. if (IsTop) {
  116. for (unsigned i = 0, e = Packet.size(); i != e; ++i)
  117. if (hasDependence(Packet[i], SU))
  118. return false;
  119. } else {
  120. for (unsigned i = 0, e = Packet.size(); i != e; ++i)
  121. if (hasDependence(SU, Packet[i]))
  122. return false;
  123. }
  124. return true;
  125. }
  126. /// Keep track of available resources.
  127. bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
  128. bool startNewCycle = false;
  129. // Artificially reset state.
  130. if (!SU) {
  131. reset();
  132. TotalPackets++;
  133. return false;
  134. }
  135. // If this SU does not fit in the packet or the packet is now full
  136. // start a new one.
  137. if (!isResourceAvailable(SU, IsTop) ||
  138. Packet.size() >= SchedModel->getIssueWidth()) {
  139. reset();
  140. TotalPackets++;
  141. startNewCycle = true;
  142. }
  143. switch (SU->getInstr()->getOpcode()) {
  144. default:
  145. ResourcesModel->reserveResources(*SU->getInstr());
  146. break;
  147. case TargetOpcode::EXTRACT_SUBREG:
  148. case TargetOpcode::INSERT_SUBREG:
  149. case TargetOpcode::SUBREG_TO_REG:
  150. case TargetOpcode::REG_SEQUENCE:
  151. case TargetOpcode::IMPLICIT_DEF:
  152. case TargetOpcode::KILL:
  153. case TargetOpcode::CFI_INSTRUCTION:
  154. case TargetOpcode::EH_LABEL:
  155. case TargetOpcode::COPY:
  156. case TargetOpcode::INLINEASM:
  157. case TargetOpcode::INLINEASM_BR:
  158. break;
  159. }
  160. Packet.push_back(SU);
  161. #ifndef NDEBUG
  162. LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
  163. for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
  164. LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
  165. LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
  166. LLVM_DEBUG(Packet[i]->getInstr()->dump());
  167. }
  168. #endif
  169. return startNewCycle;
  170. }
  171. DFAPacketizer *
  172. VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {
  173. return STI.getInstrInfo()->CreateTargetScheduleState(STI);
  174. }
  175. /// schedule - Called back from MachineScheduler::runOnMachineFunction
  176. /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
  177. /// only includes instructions that have DAG nodes, not scheduling boundaries.
  178. void VLIWMachineScheduler::schedule() {
  179. LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
  180. << printMBBReference(*BB) << " " << BB->getName()
  181. << " in_func " << BB->getParent()->getName()
  182. << " at loop depth " << MLI->getLoopDepth(BB) << " \n");
  183. buildDAGWithRegPressure();
  184. Topo.InitDAGTopologicalSorting();
  185. // Postprocess the DAG to add platform-specific artificial dependencies.
  186. postprocessDAG();
  187. SmallVector<SUnit *, 8> TopRoots, BotRoots;
  188. findRootsAndBiasEdges(TopRoots, BotRoots);
  189. // Initialize the strategy before modifying the DAG.
  190. SchedImpl->initialize(this);
  191. LLVM_DEBUG({
  192. unsigned maxH = 0;
  193. for (const SUnit &SU : SUnits)
  194. if (SU.getHeight() > maxH)
  195. maxH = SU.getHeight();
  196. dbgs() << "Max Height " << maxH << "\n";
  197. });
  198. LLVM_DEBUG({
  199. unsigned maxD = 0;
  200. for (const SUnit &SU : SUnits)
  201. if (SU.getDepth() > maxD)
  202. maxD = SU.getDepth();
  203. dbgs() << "Max Depth " << maxD << "\n";
  204. });
  205. LLVM_DEBUG(dump());
  206. if (ViewMISchedDAGs)
  207. viewGraph();
  208. initQueues(TopRoots, BotRoots);
  209. bool IsTopNode = false;
  210. while (true) {
  211. LLVM_DEBUG(
  212. dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
  213. SUnit *SU = SchedImpl->pickNode(IsTopNode);
  214. if (!SU)
  215. break;
  216. if (!checkSchedLimit())
  217. break;
  218. scheduleMI(SU, IsTopNode);
  219. // Notify the scheduling strategy after updating the DAG.
  220. SchedImpl->schedNode(SU, IsTopNode);
  221. updateQueues(SU, IsTopNode);
  222. }
  223. assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
  224. placeDebugValues();
  225. LLVM_DEBUG({
  226. dbgs() << "*** Final schedule for "
  227. << printMBBReference(*begin()->getParent()) << " ***\n";
  228. dumpSchedule();
  229. dbgs() << '\n';
  230. });
  231. }
  232. void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
  233. DAG = static_cast<VLIWMachineScheduler *>(dag);
  234. SchedModel = DAG->getSchedModel();
  235. Top.init(DAG, SchedModel);
  236. Bot.init(DAG, SchedModel);
  237. // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
  238. // are disabled, then these HazardRecs will be disabled.
  239. const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
  240. const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
  241. const TargetInstrInfo *TII = STI.getInstrInfo();
  242. delete Top.HazardRec;
  243. delete Bot.HazardRec;
  244. Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
  245. Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
  246. delete Top.ResourceModel;
  247. delete Bot.ResourceModel;
  248. Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
  249. Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
  250. const std::vector<unsigned> &MaxPressure =
  251. DAG->getRegPressure().MaxSetPressure;
  252. HighPressureSets.assign(MaxPressure.size(), false);
  253. for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
  254. unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);
  255. HighPressureSets[i] =
  256. ((float)MaxPressure[i] > ((float)Limit * RPThreshold));
  257. }
  258. assert((!ForceTopDown || !ForceBottomUp) &&
  259. "-misched-topdown incompatible with -misched-bottomup");
  260. }
  261. VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
  262. const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {
  263. return new VLIWResourceModel(STI, SchedModel);
  264. }
  265. void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
  266. for (const SDep &PI : SU->Preds) {
  267. unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
  268. unsigned MinLatency = PI.getLatency();
  269. #ifndef NDEBUG
  270. Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
  271. #endif
  272. if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
  273. SU->TopReadyCycle = PredReadyCycle + MinLatency;
  274. }
  275. if (!SU->isScheduled)
  276. Top.releaseNode(SU, SU->TopReadyCycle);
  277. }
  278. void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
  279. assert(SU->getInstr() && "Scheduled SUnit must have instr");
  280. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;
  281. ++I) {
  282. unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
  283. unsigned MinLatency = I->getLatency();
  284. #ifndef NDEBUG
  285. Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
  286. #endif
  287. if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
  288. SU->BotReadyCycle = SuccReadyCycle + MinLatency;
  289. }
  290. if (!SU->isScheduled)
  291. Bot.releaseNode(SU, SU->BotReadyCycle);
  292. }
  293. ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
  294. delete ResourceModel;
  295. delete HazardRec;
  296. }
  297. /// Does this SU have a hazard within the current instruction group.
  298. ///
  299. /// The scheduler supports two modes of hazard recognition. The first is the
  300. /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
  301. /// supports highly complicated in-order reservation tables
  302. /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
  303. ///
  304. /// The second is a streamlined mechanism that checks for hazards based on
  305. /// simple counters that the scheduler itself maintains. It explicitly checks
  306. /// for instruction dispatch limitations, including the number of micro-ops that
  307. /// can dispatch per cycle.
  308. ///
  309. /// TODO: Also check whether the SU must start a new group.
  310. bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
  311. if (HazardRec->isEnabled())
  312. return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
  313. unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
  314. if (IssueCount + uops > SchedModel->getIssueWidth())
  315. return true;
  316. return false;
  317. }
  318. void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
  319. SUnit *SU, unsigned ReadyCycle) {
  320. if (ReadyCycle < MinReadyCycle)
  321. MinReadyCycle = ReadyCycle;
  322. // Check for interlocks first. For the purpose of other heuristics, an
  323. // instruction that cannot issue appears as if it's not in the ReadyQueue.
  324. if (ReadyCycle > CurrCycle || checkHazard(SU))
  325. Pending.push(SU);
  326. else
  327. Available.push(SU);
  328. }
  329. /// Move the boundary of scheduled code by one cycle.
  330. void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
  331. unsigned Width = SchedModel->getIssueWidth();
  332. IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
  333. assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
  334. "MinReadyCycle uninitialized");
  335. unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
  336. if (!HazardRec->isEnabled()) {
  337. // Bypass HazardRec virtual calls.
  338. CurrCycle = NextCycle;
  339. } else {
  340. // Bypass getHazardType calls in case of long latency.
  341. for (; CurrCycle != NextCycle; ++CurrCycle) {
  342. if (isTop())
  343. HazardRec->AdvanceCycle();
  344. else
  345. HazardRec->RecedeCycle();
  346. }
  347. }
  348. CheckPending = true;
  349. LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
  350. << CurrCycle << '\n');
  351. }
  352. /// Move the boundary of scheduled code by one SUnit.
  353. void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
  354. bool startNewCycle = false;
  355. // Update the reservation table.
  356. if (HazardRec->isEnabled()) {
  357. if (!isTop() && SU->isCall) {
  358. // Calls are scheduled with their preceding instructions. For bottom-up
  359. // scheduling, clear the pipeline state before emitting.
  360. HazardRec->Reset();
  361. }
  362. HazardRec->EmitInstruction(SU);
  363. }
  364. // Update DFA model.
  365. startNewCycle = ResourceModel->reserveResources(SU, isTop());
  366. // Check the instruction group dispatch limit.
  367. // TODO: Check if this SU must end a dispatch group.
  368. IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
  369. if (startNewCycle) {
  370. LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
  371. bumpCycle();
  372. } else
  373. LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
  374. << CurrCycle << '\n');
  375. }
  376. /// Release pending ready nodes in to the available queue. This makes them
  377. /// visible to heuristics.
  378. void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
  379. // If the available queue is empty, it is safe to reset MinReadyCycle.
  380. if (Available.empty())
  381. MinReadyCycle = std::numeric_limits<unsigned>::max();
  382. // Check to see if any of the pending instructions are ready to issue. If
  383. // so, add them to the available queue.
  384. for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
  385. SUnit *SU = *(Pending.begin() + i);
  386. unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
  387. if (ReadyCycle < MinReadyCycle)
  388. MinReadyCycle = ReadyCycle;
  389. if (ReadyCycle > CurrCycle)
  390. continue;
  391. if (checkHazard(SU))
  392. continue;
  393. Available.push(SU);
  394. Pending.remove(Pending.begin() + i);
  395. --i;
  396. --e;
  397. }
  398. CheckPending = false;
  399. }
  400. /// Remove SU from the ready set for this boundary.
  401. void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
  402. if (Available.isInQueue(SU))
  403. Available.remove(Available.find(SU));
  404. else {
  405. assert(Pending.isInQueue(SU) && "bad ready count");
  406. Pending.remove(Pending.find(SU));
  407. }
  408. }
  409. /// If this queue only has one ready candidate, return it. As a side effect,
  410. /// advance the cycle until at least one node is ready. If multiple instructions
  411. /// are ready, return NULL.
  412. SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
  413. if (CheckPending)
  414. releasePending();
  415. auto AdvanceCycle = [this]() {
  416. if (Available.empty())
  417. return true;
  418. if (Available.size() == 1 && Pending.size() > 0)
  419. return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) ||
  420. getWeakLeft(*Available.begin(), isTop()) != 0;
  421. return false;
  422. };
  423. for (unsigned i = 0; AdvanceCycle(); ++i) {
  424. assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
  425. "permanent hazard");
  426. (void)i;
  427. ResourceModel->reserveResources(nullptr, isTop());
  428. bumpCycle();
  429. releasePending();
  430. }
  431. if (Available.size() == 1)
  432. return *Available.begin();
  433. return nullptr;
  434. }
  435. #ifndef NDEBUG
  436. void ConvergingVLIWScheduler::traceCandidate(const char *Label,
  437. const ReadyQueue &Q, SUnit *SU,
  438. int Cost, PressureChange P) {
  439. dbgs() << Label << " " << Q.getName() << " ";
  440. if (P.isValid())
  441. dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
  442. << P.getUnitInc() << " ";
  443. else
  444. dbgs() << " ";
  445. dbgs() << "cost(" << Cost << ")\t";
  446. DAG->dumpNode(*SU);
  447. }
  448. // Very detailed queue dump, to be used with higher verbosity levels.
  449. void ConvergingVLIWScheduler::readyQueueVerboseDump(
  450. const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
  451. ReadyQueue &Q) {
  452. RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
  453. dbgs() << ">>> " << Q.getName() << "\n";
  454. for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
  455. RegPressureDelta RPDelta;
  456. TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
  457. DAG->getRegionCriticalPSets(),
  458. DAG->getRegPressure().MaxSetPressure);
  459. std::stringstream dbgstr;
  460. dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
  461. dbgs() << dbgstr.str();
  462. SchedulingCost(Q, *I, Candidate, RPDelta, true);
  463. dbgs() << "\t";
  464. (*I)->getInstr()->dump();
  465. }
  466. dbgs() << "\n";
  467. }
  468. #endif
  469. /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
  470. /// of SU, return true (we may have duplicates)
  471. static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
  472. if (SU->NumPredsLeft == 0)
  473. return false;
  474. for (auto &Pred : SU->Preds) {
  475. // We found an available, but not scheduled, predecessor.
  476. if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
  477. return false;
  478. }
  479. return true;
  480. }
  481. /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
  482. /// of SU, return true (we may have duplicates)
  483. static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
  484. if (SU->NumSuccsLeft == 0)
  485. return false;
  486. for (auto &Succ : SU->Succs) {
  487. // We found an available, but not scheduled, successor.
  488. if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
  489. return false;
  490. }
  491. return true;
  492. }
  493. /// Check if the instruction changes the register pressure of a register in the
  494. /// high pressure set. The function returns a negative value if the pressure
  495. /// decreases and a positive value is the pressure increases. If the instruction
  496. /// doesn't use a high pressure register or doesn't change the register
  497. /// pressure, then return 0.
  498. int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
  499. PressureDiff &PD = DAG->getPressureDiff(SU);
  500. for (auto &P : PD) {
  501. if (!P.isValid())
  502. continue;
  503. // The pressure differences are computed bottom-up, so the comparision for
  504. // an increase is positive in the bottom direction, but negative in the
  505. // top-down direction.
  506. if (HighPressureSets[P.getPSet()])
  507. return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
  508. }
  509. return 0;
  510. }
  511. /// Single point to compute overall scheduling cost.
  512. /// TODO: More heuristics will be used soon.
  513. int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
  514. SchedCandidate &Candidate,
  515. RegPressureDelta &Delta,
  516. bool verbose) {
  517. // Initial trivial priority.
  518. int ResCount = 1;
  519. // Do not waste time on a node that is already scheduled.
  520. if (!SU || SU->isScheduled)
  521. return ResCount;
  522. LLVM_DEBUG(if (verbose) dbgs()
  523. << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
  524. // Forced priority is high.
  525. if (SU->isScheduleHigh) {
  526. ResCount += PriorityOne;
  527. LLVM_DEBUG(dbgs() << "H|");
  528. }
  529. unsigned IsAvailableAmt = 0;
  530. // Critical path first.
  531. if (Q.getID() == TopQID) {
  532. if (Top.isLatencyBound(SU)) {
  533. LLVM_DEBUG(if (verbose) dbgs() << "LB|");
  534. ResCount += (SU->getHeight() * ScaleTwo);
  535. }
  536. LLVM_DEBUG(if (verbose) {
  537. std::stringstream dbgstr;
  538. dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
  539. dbgs() << dbgstr.str();
  540. });
  541. // If resources are available for it, multiply the
  542. // chance of scheduling.
  543. if (Top.ResourceModel->isResourceAvailable(SU, true)) {
  544. IsAvailableAmt = (PriorityTwo + PriorityThree);
  545. ResCount += IsAvailableAmt;
  546. LLVM_DEBUG(if (verbose) dbgs() << "A|");
  547. } else
  548. LLVM_DEBUG(if (verbose) dbgs() << " |");
  549. } else {
  550. if (Bot.isLatencyBound(SU)) {
  551. LLVM_DEBUG(if (verbose) dbgs() << "LB|");
  552. ResCount += (SU->getDepth() * ScaleTwo);
  553. }
  554. LLVM_DEBUG(if (verbose) {
  555. std::stringstream dbgstr;
  556. dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
  557. dbgs() << dbgstr.str();
  558. });
  559. // If resources are available for it, multiply the
  560. // chance of scheduling.
  561. if (Bot.ResourceModel->isResourceAvailable(SU, false)) {
  562. IsAvailableAmt = (PriorityTwo + PriorityThree);
  563. ResCount += IsAvailableAmt;
  564. LLVM_DEBUG(if (verbose) dbgs() << "A|");
  565. } else
  566. LLVM_DEBUG(if (verbose) dbgs() << " |");
  567. }
  568. unsigned NumNodesBlocking = 0;
  569. if (Q.getID() == TopQID) {
  570. // How many SUs does it block from scheduling?
  571. // Look at all of the successors of this node.
  572. // Count the number of nodes that
  573. // this node is the sole unscheduled node for.
  574. if (Top.isLatencyBound(SU))
  575. for (const SDep &SI : SU->Succs)
  576. if (isSingleUnscheduledPred(SI.getSUnit(), SU))
  577. ++NumNodesBlocking;
  578. } else {
  579. // How many unscheduled predecessors block this node?
  580. if (Bot.isLatencyBound(SU))
  581. for (const SDep &PI : SU->Preds)
  582. if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
  583. ++NumNodesBlocking;
  584. }
  585. ResCount += (NumNodesBlocking * ScaleTwo);
  586. LLVM_DEBUG(if (verbose) {
  587. std::stringstream dbgstr;
  588. dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
  589. dbgs() << dbgstr.str();
  590. });
  591. // Factor in reg pressure as a heuristic.
  592. if (!IgnoreBBRegPressure) {
  593. // Decrease priority by the amount that register pressure exceeds the limit.
  594. ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);
  595. // Decrease priority if register pressure exceeds the limit.
  596. ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);
  597. // Decrease priority slightly if register pressure would increase over the
  598. // current maximum.
  599. ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);
  600. // If there are register pressure issues, then we remove the value added for
  601. // the instruction being available. The rationale is that we really don't
  602. // want to schedule an instruction that causes a spill.
  603. if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 &&
  604. (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||
  605. Delta.CurrentMax.getUnitInc()))
  606. ResCount -= IsAvailableAmt;
  607. LLVM_DEBUG(if (verbose) {
  608. dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
  609. << Delta.CriticalMax.getUnitInc() << "/"
  610. << Delta.CurrentMax.getUnitInc() << ")|";
  611. });
  612. }
  613. // Give preference to a zero latency instruction if the dependent
  614. // instruction is in the current packet.
  615. if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {
  616. for (const SDep &PI : SU->Preds) {
  617. if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
  618. PI.getLatency() == 0 &&
  619. Top.ResourceModel->isInPacket(PI.getSUnit())) {
  620. ResCount += PriorityThree;
  621. LLVM_DEBUG(if (verbose) dbgs() << "Z|");
  622. }
  623. }
  624. } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {
  625. for (const SDep &SI : SU->Succs) {
  626. if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
  627. SI.getLatency() == 0 &&
  628. Bot.ResourceModel->isInPacket(SI.getSUnit())) {
  629. ResCount += PriorityThree;
  630. LLVM_DEBUG(if (verbose) dbgs() << "Z|");
  631. }
  632. }
  633. }
  634. // If the instruction has a non-zero latency dependence with an instruction in
  635. // the current packet, then it should not be scheduled yet. The case occurs
  636. // when the dependent instruction is scheduled in a new packet, so the
  637. // scheduler updates the current cycle and pending instructions become
  638. // available.
  639. if (CheckEarlyAvail) {
  640. if (Q.getID() == TopQID) {
  641. for (const auto &PI : SU->Preds) {
  642. if (PI.getLatency() > 0 &&
  643. Top.ResourceModel->isInPacket(PI.getSUnit())) {
  644. ResCount -= PriorityOne;
  645. LLVM_DEBUG(if (verbose) dbgs() << "D|");
  646. }
  647. }
  648. } else {
  649. for (const auto &SI : SU->Succs) {
  650. if (SI.getLatency() > 0 &&
  651. Bot.ResourceModel->isInPacket(SI.getSUnit())) {
  652. ResCount -= PriorityOne;
  653. LLVM_DEBUG(if (verbose) dbgs() << "D|");
  654. }
  655. }
  656. }
  657. }
  658. LLVM_DEBUG(if (verbose) {
  659. std::stringstream dbgstr;
  660. dbgstr << "Total " << std::setw(4) << ResCount << ")";
  661. dbgs() << dbgstr.str();
  662. });
  663. return ResCount;
  664. }
  665. /// Pick the best candidate from the top queue.
  666. ///
  667. /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
  668. /// DAG building. To adjust for the current scheduling location we need to
  669. /// maintain the number of vreg uses remaining to be top-scheduled.
  670. ConvergingVLIWScheduler::CandResult
  671. ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,
  672. const RegPressureTracker &RPTracker,
  673. SchedCandidate &Candidate) {
  674. ReadyQueue &Q = Zone.Available;
  675. LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)
  676. readyQueueVerboseDump(RPTracker, Candidate, Q);
  677. else Q.dump(););
  678. // getMaxPressureDelta temporarily modifies the tracker.
  679. RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
  680. // BestSU remains NULL if no top candidates beat the best existing candidate.
  681. CandResult FoundCandidate = NoCand;
  682. for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
  683. RegPressureDelta RPDelta;
  684. TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
  685. DAG->getRegionCriticalPSets(),
  686. DAG->getRegPressure().MaxSetPressure);
  687. int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
  688. // Initialize the candidate if needed.
  689. if (!Candidate.SU) {
  690. LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
  691. Candidate.SU = *I;
  692. Candidate.RPDelta = RPDelta;
  693. Candidate.SCost = CurrentCost;
  694. FoundCandidate = NodeOrder;
  695. continue;
  696. }
  697. // Choose node order for negative cost candidates. There is no good
  698. // candidate in this case.
  699. if (CurrentCost < 0 && Candidate.SCost < 0) {
  700. if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
  701. (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
  702. LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
  703. Candidate.SU = *I;
  704. Candidate.RPDelta = RPDelta;
  705. Candidate.SCost = CurrentCost;
  706. FoundCandidate = NodeOrder;
  707. }
  708. continue;
  709. }
  710. // Best cost.
  711. if (CurrentCost > Candidate.SCost) {
  712. LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
  713. Candidate.SU = *I;
  714. Candidate.RPDelta = RPDelta;
  715. Candidate.SCost = CurrentCost;
  716. FoundCandidate = BestCost;
  717. continue;
  718. }
  719. // Choose an instruction that does not depend on an artificial edge.
  720. unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));
  721. unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));
  722. if (CurrWeak != CandWeak) {
  723. if (CurrWeak < CandWeak) {
  724. LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
  725. Candidate.SU = *I;
  726. Candidate.RPDelta = RPDelta;
  727. Candidate.SCost = CurrentCost;
  728. FoundCandidate = Weak;
  729. }
  730. continue;
  731. }
  732. if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {
  733. unsigned CurrSize, CandSize;
  734. if (Q.getID() == TopQID) {
  735. CurrSize = (*I)->Succs.size();
  736. CandSize = Candidate.SU->Succs.size();
  737. } else {
  738. CurrSize = (*I)->Preds.size();
  739. CandSize = Candidate.SU->Preds.size();
  740. }
  741. if (CurrSize > CandSize) {
  742. LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
  743. Candidate.SU = *I;
  744. Candidate.RPDelta = RPDelta;
  745. Candidate.SCost = CurrentCost;
  746. FoundCandidate = BestCost;
  747. }
  748. // Keep the old candidate if it's a better candidate. That is, don't use
  749. // the subsequent tie breaker.
  750. if (CurrSize != CandSize)
  751. continue;
  752. }
  753. // Tie breaker.
  754. // To avoid scheduling indeterminism, we need a tie breaker
  755. // for the case when cost is identical for two nodes.
  756. if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
  757. if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
  758. (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
  759. LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
  760. Candidate.SU = *I;
  761. Candidate.RPDelta = RPDelta;
  762. Candidate.SCost = CurrentCost;
  763. FoundCandidate = NodeOrder;
  764. continue;
  765. }
  766. }
  767. // Fall through to original instruction order.
  768. // Only consider node order if Candidate was chosen from this Q.
  769. if (FoundCandidate == NoCand)
  770. continue;
  771. }
  772. return FoundCandidate;
  773. }
  774. /// Pick the best candidate node from either the top or bottom queue.
  775. SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
  776. // Schedule as far as possible in the direction of no choice. This is most
  777. // efficient, but also provides the best heuristics for CriticalPSets.
  778. if (SUnit *SU = Bot.pickOnlyChoice()) {
  779. LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
  780. IsTopNode = false;
  781. return SU;
  782. }
  783. if (SUnit *SU = Top.pickOnlyChoice()) {
  784. LLVM_DEBUG(dbgs() << "Picked only Top\n");
  785. IsTopNode = true;
  786. return SU;
  787. }
  788. SchedCandidate BotCand;
  789. // Prefer bottom scheduling when heuristics are silent.
  790. CandResult BotResult =
  791. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
  792. assert(BotResult != NoCand && "failed to find the first candidate");
  793. // If either Q has a single candidate that provides the least increase in
  794. // Excess pressure, we can immediately schedule from that Q.
  795. //
  796. // RegionCriticalPSets summarizes the pressure within the scheduled region and
  797. // affects picking from either Q. If scheduling in one direction must
  798. // increase pressure for one of the excess PSets, then schedule in that
  799. // direction first to provide more freedom in the other direction.
  800. if (BotResult == SingleExcess || BotResult == SingleCritical) {
  801. LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
  802. IsTopNode = false;
  803. return BotCand.SU;
  804. }
  805. // Check if the top Q has a better candidate.
  806. SchedCandidate TopCand;
  807. CandResult TopResult =
  808. pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
  809. assert(TopResult != NoCand && "failed to find the first candidate");
  810. if (TopResult == SingleExcess || TopResult == SingleCritical) {
  811. LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
  812. IsTopNode = true;
  813. return TopCand.SU;
  814. }
  815. // If either Q has a single candidate that minimizes pressure above the
  816. // original region's pressure pick it.
  817. if (BotResult == SingleMax) {
  818. LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
  819. IsTopNode = false;
  820. return BotCand.SU;
  821. }
  822. if (TopResult == SingleMax) {
  823. LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
  824. IsTopNode = true;
  825. return TopCand.SU;
  826. }
  827. if (TopCand.SCost > BotCand.SCost) {
  828. LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
  829. IsTopNode = true;
  830. return TopCand.SU;
  831. }
  832. // Otherwise prefer the bottom candidate in node order.
  833. LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
  834. IsTopNode = false;
  835. return BotCand.SU;
  836. }
  837. /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
  838. SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
  839. if (DAG->top() == DAG->bottom()) {
  840. assert(Top.Available.empty() && Top.Pending.empty() &&
  841. Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
  842. return nullptr;
  843. }
  844. SUnit *SU;
  845. if (ForceTopDown) {
  846. SU = Top.pickOnlyChoice();
  847. if (!SU) {
  848. SchedCandidate TopCand;
  849. CandResult TopResult =
  850. pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
  851. assert(TopResult != NoCand && "failed to find the first candidate");
  852. (void)TopResult;
  853. SU = TopCand.SU;
  854. }
  855. IsTopNode = true;
  856. } else if (ForceBottomUp) {
  857. SU = Bot.pickOnlyChoice();
  858. if (!SU) {
  859. SchedCandidate BotCand;
  860. CandResult BotResult =
  861. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
  862. assert(BotResult != NoCand && "failed to find the first candidate");
  863. (void)BotResult;
  864. SU = BotCand.SU;
  865. }
  866. IsTopNode = false;
  867. } else {
  868. SU = pickNodeBidrectional(IsTopNode);
  869. }
  870. if (SU->isTopReady())
  871. Top.removeReady(SU);
  872. if (SU->isBottomReady())
  873. Bot.removeReady(SU);
  874. LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
  875. << " Scheduling instruction in cycle "
  876. << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
  877. << reportPackets() << ")\n";
  878. DAG->dumpNode(*SU));
  879. return SU;
  880. }
  881. /// Update the scheduler's state after scheduling a node. This is the same node
  882. /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
  883. /// to update it's state based on the current cycle before MachineSchedStrategy
  884. /// does.
  885. void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
  886. if (IsTopNode) {
  887. Top.bumpNode(SU);
  888. SU->TopReadyCycle = Top.CurrCycle;
  889. } else {
  890. Bot.bumpNode(SU);
  891. SU->BotReadyCycle = Bot.CurrCycle;
  892. }
  893. }