VLIWMachineScheduler.cpp 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007
  1. //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // MachineScheduler schedules machine instructions after phi elimination. It
  10. // preserves LiveIntervals so it can be invoked before register allocation.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/CodeGen/VLIWMachineScheduler.h"
  14. #include "llvm/ADT/SmallVector.h"
  15. #include "llvm/CodeGen/DFAPacketizer.h"
  16. #include "llvm/CodeGen/MachineBasicBlock.h"
  17. #include "llvm/CodeGen/MachineFunction.h"
  18. #include "llvm/CodeGen/MachineInstr.h"
  19. #include "llvm/CodeGen/MachineLoopInfo.h"
  20. #include "llvm/CodeGen/RegisterClassInfo.h"
  21. #include "llvm/CodeGen/RegisterPressure.h"
  22. #include "llvm/CodeGen/ScheduleDAG.h"
  23. #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  24. #include "llvm/CodeGen/TargetInstrInfo.h"
  25. #include "llvm/CodeGen/TargetOpcodes.h"
  26. #include "llvm/CodeGen/TargetRegisterInfo.h"
  27. #include "llvm/CodeGen/TargetSchedule.h"
  28. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  29. #include "llvm/Support/CommandLine.h"
  30. #include "llvm/Support/Debug.h"
  31. #include "llvm/Support/raw_ostream.h"
  32. #include <algorithm>
  33. #include <cassert>
  34. #include <iomanip>
  35. #include <limits>
  36. #include <memory>
  37. #include <sstream>
  38. using namespace llvm;
  39. #define DEBUG_TYPE "machine-scheduler"
  40. static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
  41. cl::init(false));
  42. static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
  43. cl::init(true));
  44. static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
  45. cl::Hidden, cl::init(1));
  46. // Check if the scheduler should penalize instructions that are available to
  47. // early due to a zero-latency dependence.
  48. static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
  49. cl::init(true));
  50. // This value is used to determine if a register class is a high pressure set.
  51. // We compute the maximum number of registers needed and divided by the total
  52. // available. Then, we compare the result to this value.
  53. static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,
  54. cl::init(0.75f),
  55. cl::desc("High register pressure threhold."));
  56. VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
  57. const TargetSchedModel *SM)
  58. : TII(STI.getInstrInfo()), SchedModel(SM) {
  59. ResourcesModel = createPacketizer(STI);
  60. // This hard requirement could be relaxed,
  61. // but for now do not let it proceed.
  62. assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
  63. Packet.reserve(SchedModel->getIssueWidth());
  64. Packet.clear();
  65. ResourcesModel->clearResources();
  66. }
  67. void VLIWResourceModel::reset() {
  68. Packet.clear();
  69. ResourcesModel->clearResources();
  70. }
  71. VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }
  72. /// Return true if there is a dependence between SUd and SUu.
  73. bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) {
  74. if (SUd->Succs.size() == 0)
  75. return false;
  76. for (const auto &S : SUd->Succs) {
  77. // Since we do not add pseudos to packets, might as well
  78. // ignore order dependencies.
  79. if (S.isCtrl())
  80. continue;
  81. if (S.getSUnit() == SUu && S.getLatency() > 0)
  82. return true;
  83. }
  84. return false;
  85. }
  86. /// Check if scheduling of this SU is possible
  87. /// in the current packet.
  88. /// It is _not_ precise (statefull), it is more like
  89. /// another heuristic. Many corner cases are figured
  90. /// empirically.
  91. bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
  92. if (!SU || !SU->getInstr())
  93. return false;
  94. // First see if the pipeline could receive this instruction
  95. // in the current cycle.
  96. switch (SU->getInstr()->getOpcode()) {
  97. default:
  98. if (!ResourcesModel->canReserveResources(*SU->getInstr()))
  99. return false;
  100. break;
  101. case TargetOpcode::EXTRACT_SUBREG:
  102. case TargetOpcode::INSERT_SUBREG:
  103. case TargetOpcode::SUBREG_TO_REG:
  104. case TargetOpcode::REG_SEQUENCE:
  105. case TargetOpcode::IMPLICIT_DEF:
  106. case TargetOpcode::COPY:
  107. case TargetOpcode::INLINEASM:
  108. case TargetOpcode::INLINEASM_BR:
  109. break;
  110. }
  111. // Now see if there are no other dependencies to instructions already
  112. // in the packet.
  113. if (IsTop) {
  114. for (unsigned i = 0, e = Packet.size(); i != e; ++i)
  115. if (hasDependence(Packet[i], SU))
  116. return false;
  117. } else {
  118. for (unsigned i = 0, e = Packet.size(); i != e; ++i)
  119. if (hasDependence(SU, Packet[i]))
  120. return false;
  121. }
  122. return true;
  123. }
  124. /// Keep track of available resources.
  125. bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
  126. bool startNewCycle = false;
  127. // Artificially reset state.
  128. if (!SU) {
  129. reset();
  130. TotalPackets++;
  131. return false;
  132. }
  133. // If this SU does not fit in the packet or the packet is now full
  134. // start a new one.
  135. if (!isResourceAvailable(SU, IsTop) ||
  136. Packet.size() >= SchedModel->getIssueWidth()) {
  137. reset();
  138. TotalPackets++;
  139. startNewCycle = true;
  140. }
  141. switch (SU->getInstr()->getOpcode()) {
  142. default:
  143. ResourcesModel->reserveResources(*SU->getInstr());
  144. break;
  145. case TargetOpcode::EXTRACT_SUBREG:
  146. case TargetOpcode::INSERT_SUBREG:
  147. case TargetOpcode::SUBREG_TO_REG:
  148. case TargetOpcode::REG_SEQUENCE:
  149. case TargetOpcode::IMPLICIT_DEF:
  150. case TargetOpcode::KILL:
  151. case TargetOpcode::CFI_INSTRUCTION:
  152. case TargetOpcode::EH_LABEL:
  153. case TargetOpcode::COPY:
  154. case TargetOpcode::INLINEASM:
  155. case TargetOpcode::INLINEASM_BR:
  156. break;
  157. }
  158. Packet.push_back(SU);
  159. #ifndef NDEBUG
  160. LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
  161. for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
  162. LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
  163. LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
  164. LLVM_DEBUG(Packet[i]->getInstr()->dump());
  165. }
  166. #endif
  167. return startNewCycle;
  168. }
  169. DFAPacketizer *
  170. VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {
  171. return STI.getInstrInfo()->CreateTargetScheduleState(STI);
  172. }
  173. /// schedule - Called back from MachineScheduler::runOnMachineFunction
  174. /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
  175. /// only includes instructions that have DAG nodes, not scheduling boundaries.
  176. void VLIWMachineScheduler::schedule() {
  177. LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
  178. << printMBBReference(*BB) << " " << BB->getName()
  179. << " in_func " << BB->getParent()->getName()
  180. << " at loop depth " << MLI->getLoopDepth(BB) << " \n");
  181. buildDAGWithRegPressure();
  182. Topo.InitDAGTopologicalSorting();
  183. // Postprocess the DAG to add platform-specific artificial dependencies.
  184. postprocessDAG();
  185. SmallVector<SUnit *, 8> TopRoots, BotRoots;
  186. findRootsAndBiasEdges(TopRoots, BotRoots);
  187. // Initialize the strategy before modifying the DAG.
  188. SchedImpl->initialize(this);
  189. LLVM_DEBUG({
  190. unsigned maxH = 0;
  191. for (const SUnit &SU : SUnits)
  192. if (SU.getHeight() > maxH)
  193. maxH = SU.getHeight();
  194. dbgs() << "Max Height " << maxH << "\n";
  195. });
  196. LLVM_DEBUG({
  197. unsigned maxD = 0;
  198. for (const SUnit &SU : SUnits)
  199. if (SU.getDepth() > maxD)
  200. maxD = SU.getDepth();
  201. dbgs() << "Max Depth " << maxD << "\n";
  202. });
  203. LLVM_DEBUG(dump());
  204. if (ViewMISchedDAGs)
  205. viewGraph();
  206. initQueues(TopRoots, BotRoots);
  207. bool IsTopNode = false;
  208. while (true) {
  209. LLVM_DEBUG(
  210. dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
  211. SUnit *SU = SchedImpl->pickNode(IsTopNode);
  212. if (!SU)
  213. break;
  214. if (!checkSchedLimit())
  215. break;
  216. scheduleMI(SU, IsTopNode);
  217. // Notify the scheduling strategy after updating the DAG.
  218. SchedImpl->schedNode(SU, IsTopNode);
  219. updateQueues(SU, IsTopNode);
  220. }
  221. assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
  222. placeDebugValues();
  223. LLVM_DEBUG({
  224. dbgs() << "*** Final schedule for "
  225. << printMBBReference(*begin()->getParent()) << " ***\n";
  226. dumpSchedule();
  227. dbgs() << '\n';
  228. });
  229. }
  230. void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
  231. DAG = static_cast<VLIWMachineScheduler *>(dag);
  232. SchedModel = DAG->getSchedModel();
  233. Top.init(DAG, SchedModel);
  234. Bot.init(DAG, SchedModel);
  235. // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
  236. // are disabled, then these HazardRecs will be disabled.
  237. const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
  238. const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
  239. const TargetInstrInfo *TII = STI.getInstrInfo();
  240. delete Top.HazardRec;
  241. delete Bot.HazardRec;
  242. Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
  243. Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
  244. delete Top.ResourceModel;
  245. delete Bot.ResourceModel;
  246. Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
  247. Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
  248. const std::vector<unsigned> &MaxPressure =
  249. DAG->getRegPressure().MaxSetPressure;
  250. HighPressureSets.assign(MaxPressure.size(), false);
  251. for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
  252. unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);
  253. HighPressureSets[i] =
  254. ((float)MaxPressure[i] > ((float)Limit * RPThreshold));
  255. }
  256. assert((!ForceTopDown || !ForceBottomUp) &&
  257. "-misched-topdown incompatible with -misched-bottomup");
  258. }
  259. VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
  260. const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {
  261. return new VLIWResourceModel(STI, SchedModel);
  262. }
  263. void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
  264. for (const SDep &PI : SU->Preds) {
  265. unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
  266. unsigned MinLatency = PI.getLatency();
  267. #ifndef NDEBUG
  268. Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
  269. #endif
  270. if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
  271. SU->TopReadyCycle = PredReadyCycle + MinLatency;
  272. }
  273. if (!SU->isScheduled)
  274. Top.releaseNode(SU, SU->TopReadyCycle);
  275. }
  276. void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
  277. assert(SU->getInstr() && "Scheduled SUnit must have instr");
  278. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;
  279. ++I) {
  280. unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
  281. unsigned MinLatency = I->getLatency();
  282. #ifndef NDEBUG
  283. Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
  284. #endif
  285. if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
  286. SU->BotReadyCycle = SuccReadyCycle + MinLatency;
  287. }
  288. if (!SU->isScheduled)
  289. Bot.releaseNode(SU, SU->BotReadyCycle);
  290. }
  291. ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
  292. delete ResourceModel;
  293. delete HazardRec;
  294. }
  295. /// Does this SU have a hazard within the current instruction group.
  296. ///
  297. /// The scheduler supports two modes of hazard recognition. The first is the
  298. /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
  299. /// supports highly complicated in-order reservation tables
  300. /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
  301. ///
  302. /// The second is a streamlined mechanism that checks for hazards based on
  303. /// simple counters that the scheduler itself maintains. It explicitly checks
  304. /// for instruction dispatch limitations, including the number of micro-ops that
  305. /// can dispatch per cycle.
  306. ///
  307. /// TODO: Also check whether the SU must start a new group.
  308. bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
  309. if (HazardRec->isEnabled())
  310. return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
  311. unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
  312. if (IssueCount + uops > SchedModel->getIssueWidth())
  313. return true;
  314. return false;
  315. }
  316. void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
  317. SUnit *SU, unsigned ReadyCycle) {
  318. if (ReadyCycle < MinReadyCycle)
  319. MinReadyCycle = ReadyCycle;
  320. // Check for interlocks first. For the purpose of other heuristics, an
  321. // instruction that cannot issue appears as if it's not in the ReadyQueue.
  322. if (ReadyCycle > CurrCycle || checkHazard(SU))
  323. Pending.push(SU);
  324. else
  325. Available.push(SU);
  326. }
  327. /// Move the boundary of scheduled code by one cycle.
  328. void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
  329. unsigned Width = SchedModel->getIssueWidth();
  330. IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
  331. assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
  332. "MinReadyCycle uninitialized");
  333. unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
  334. if (!HazardRec->isEnabled()) {
  335. // Bypass HazardRec virtual calls.
  336. CurrCycle = NextCycle;
  337. } else {
  338. // Bypass getHazardType calls in case of long latency.
  339. for (; CurrCycle != NextCycle; ++CurrCycle) {
  340. if (isTop())
  341. HazardRec->AdvanceCycle();
  342. else
  343. HazardRec->RecedeCycle();
  344. }
  345. }
  346. CheckPending = true;
  347. LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
  348. << CurrCycle << '\n');
  349. }
  350. /// Move the boundary of scheduled code by one SUnit.
  351. void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
  352. bool startNewCycle = false;
  353. // Update the reservation table.
  354. if (HazardRec->isEnabled()) {
  355. if (!isTop() && SU->isCall) {
  356. // Calls are scheduled with their preceding instructions. For bottom-up
  357. // scheduling, clear the pipeline state before emitting.
  358. HazardRec->Reset();
  359. }
  360. HazardRec->EmitInstruction(SU);
  361. }
  362. // Update DFA model.
  363. startNewCycle = ResourceModel->reserveResources(SU, isTop());
  364. // Check the instruction group dispatch limit.
  365. // TODO: Check if this SU must end a dispatch group.
  366. IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
  367. if (startNewCycle) {
  368. LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
  369. bumpCycle();
  370. } else
  371. LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
  372. << CurrCycle << '\n');
  373. }
  374. /// Release pending ready nodes in to the available queue. This makes them
  375. /// visible to heuristics.
  376. void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
  377. // If the available queue is empty, it is safe to reset MinReadyCycle.
  378. if (Available.empty())
  379. MinReadyCycle = std::numeric_limits<unsigned>::max();
  380. // Check to see if any of the pending instructions are ready to issue. If
  381. // so, add them to the available queue.
  382. for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
  383. SUnit *SU = *(Pending.begin() + i);
  384. unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
  385. if (ReadyCycle < MinReadyCycle)
  386. MinReadyCycle = ReadyCycle;
  387. if (ReadyCycle > CurrCycle)
  388. continue;
  389. if (checkHazard(SU))
  390. continue;
  391. Available.push(SU);
  392. Pending.remove(Pending.begin() + i);
  393. --i;
  394. --e;
  395. }
  396. CheckPending = false;
  397. }
  398. /// Remove SU from the ready set for this boundary.
  399. void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
  400. if (Available.isInQueue(SU))
  401. Available.remove(Available.find(SU));
  402. else {
  403. assert(Pending.isInQueue(SU) && "bad ready count");
  404. Pending.remove(Pending.find(SU));
  405. }
  406. }
  407. /// If this queue only has one ready candidate, return it. As a side effect,
  408. /// advance the cycle until at least one node is ready. If multiple instructions
  409. /// are ready, return NULL.
  410. SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
  411. if (CheckPending)
  412. releasePending();
  413. auto AdvanceCycle = [this]() {
  414. if (Available.empty())
  415. return true;
  416. if (Available.size() == 1 && Pending.size() > 0)
  417. return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) ||
  418. getWeakLeft(*Available.begin(), isTop()) != 0;
  419. return false;
  420. };
  421. for (unsigned i = 0; AdvanceCycle(); ++i) {
  422. assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
  423. "permanent hazard");
  424. (void)i;
  425. ResourceModel->reserveResources(nullptr, isTop());
  426. bumpCycle();
  427. releasePending();
  428. }
  429. if (Available.size() == 1)
  430. return *Available.begin();
  431. return nullptr;
  432. }
  433. #ifndef NDEBUG
  434. void ConvergingVLIWScheduler::traceCandidate(const char *Label,
  435. const ReadyQueue &Q, SUnit *SU,
  436. int Cost, PressureChange P) {
  437. dbgs() << Label << " " << Q.getName() << " ";
  438. if (P.isValid())
  439. dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
  440. << P.getUnitInc() << " ";
  441. else
  442. dbgs() << " ";
  443. dbgs() << "cost(" << Cost << ")\t";
  444. DAG->dumpNode(*SU);
  445. }
  446. // Very detailed queue dump, to be used with higher verbosity levels.
  447. void ConvergingVLIWScheduler::readyQueueVerboseDump(
  448. const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
  449. ReadyQueue &Q) {
  450. RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
  451. dbgs() << ">>> " << Q.getName() << "\n";
  452. for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
  453. RegPressureDelta RPDelta;
  454. TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
  455. DAG->getRegionCriticalPSets(),
  456. DAG->getRegPressure().MaxSetPressure);
  457. std::stringstream dbgstr;
  458. dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
  459. dbgs() << dbgstr.str();
  460. SchedulingCost(Q, *I, Candidate, RPDelta, true);
  461. dbgs() << "\t";
  462. (*I)->getInstr()->dump();
  463. }
  464. dbgs() << "\n";
  465. }
  466. #endif
  467. /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
  468. /// of SU, return true (we may have duplicates)
  469. static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
  470. if (SU->NumPredsLeft == 0)
  471. return false;
  472. for (auto &Pred : SU->Preds) {
  473. // We found an available, but not scheduled, predecessor.
  474. if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
  475. return false;
  476. }
  477. return true;
  478. }
  479. /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
  480. /// of SU, return true (we may have duplicates)
  481. static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
  482. if (SU->NumSuccsLeft == 0)
  483. return false;
  484. for (auto &Succ : SU->Succs) {
  485. // We found an available, but not scheduled, successor.
  486. if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
  487. return false;
  488. }
  489. return true;
  490. }
  491. /// Check if the instruction changes the register pressure of a register in the
  492. /// high pressure set. The function returns a negative value if the pressure
  493. /// decreases and a positive value is the pressure increases. If the instruction
  494. /// doesn't use a high pressure register or doesn't change the register
  495. /// pressure, then return 0.
  496. int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
  497. PressureDiff &PD = DAG->getPressureDiff(SU);
  498. for (const auto &P : PD) {
  499. if (!P.isValid())
  500. continue;
  501. // The pressure differences are computed bottom-up, so the comparison for
  502. // an increase is positive in the bottom direction, but negative in the
  503. // top-down direction.
  504. if (HighPressureSets[P.getPSet()])
  505. return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
  506. }
  507. return 0;
  508. }
  509. /// Single point to compute overall scheduling cost.
  510. /// TODO: More heuristics will be used soon.
  511. int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
  512. SchedCandidate &Candidate,
  513. RegPressureDelta &Delta,
  514. bool verbose) {
  515. // Initial trivial priority.
  516. int ResCount = 1;
  517. // Do not waste time on a node that is already scheduled.
  518. if (!SU || SU->isScheduled)
  519. return ResCount;
  520. LLVM_DEBUG(if (verbose) dbgs()
  521. << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
  522. // Forced priority is high.
  523. if (SU->isScheduleHigh) {
  524. ResCount += PriorityOne;
  525. LLVM_DEBUG(dbgs() << "H|");
  526. }
  527. unsigned IsAvailableAmt = 0;
  528. // Critical path first.
  529. if (Q.getID() == TopQID) {
  530. if (Top.isLatencyBound(SU)) {
  531. LLVM_DEBUG(if (verbose) dbgs() << "LB|");
  532. ResCount += (SU->getHeight() * ScaleTwo);
  533. }
  534. LLVM_DEBUG(if (verbose) {
  535. std::stringstream dbgstr;
  536. dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
  537. dbgs() << dbgstr.str();
  538. });
  539. // If resources are available for it, multiply the
  540. // chance of scheduling.
  541. if (Top.ResourceModel->isResourceAvailable(SU, true)) {
  542. IsAvailableAmt = (PriorityTwo + PriorityThree);
  543. ResCount += IsAvailableAmt;
  544. LLVM_DEBUG(if (verbose) dbgs() << "A|");
  545. } else
  546. LLVM_DEBUG(if (verbose) dbgs() << " |");
  547. } else {
  548. if (Bot.isLatencyBound(SU)) {
  549. LLVM_DEBUG(if (verbose) dbgs() << "LB|");
  550. ResCount += (SU->getDepth() * ScaleTwo);
  551. }
  552. LLVM_DEBUG(if (verbose) {
  553. std::stringstream dbgstr;
  554. dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
  555. dbgs() << dbgstr.str();
  556. });
  557. // If resources are available for it, multiply the
  558. // chance of scheduling.
  559. if (Bot.ResourceModel->isResourceAvailable(SU, false)) {
  560. IsAvailableAmt = (PriorityTwo + PriorityThree);
  561. ResCount += IsAvailableAmt;
  562. LLVM_DEBUG(if (verbose) dbgs() << "A|");
  563. } else
  564. LLVM_DEBUG(if (verbose) dbgs() << " |");
  565. }
  566. unsigned NumNodesBlocking = 0;
  567. if (Q.getID() == TopQID) {
  568. // How many SUs does it block from scheduling?
  569. // Look at all of the successors of this node.
  570. // Count the number of nodes that
  571. // this node is the sole unscheduled node for.
  572. if (Top.isLatencyBound(SU))
  573. for (const SDep &SI : SU->Succs)
  574. if (isSingleUnscheduledPred(SI.getSUnit(), SU))
  575. ++NumNodesBlocking;
  576. } else {
  577. // How many unscheduled predecessors block this node?
  578. if (Bot.isLatencyBound(SU))
  579. for (const SDep &PI : SU->Preds)
  580. if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
  581. ++NumNodesBlocking;
  582. }
  583. ResCount += (NumNodesBlocking * ScaleTwo);
  584. LLVM_DEBUG(if (verbose) {
  585. std::stringstream dbgstr;
  586. dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
  587. dbgs() << dbgstr.str();
  588. });
  589. // Factor in reg pressure as a heuristic.
  590. if (!IgnoreBBRegPressure) {
  591. // Decrease priority by the amount that register pressure exceeds the limit.
  592. ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);
  593. // Decrease priority if register pressure exceeds the limit.
  594. ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);
  595. // Decrease priority slightly if register pressure would increase over the
  596. // current maximum.
  597. ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);
  598. // If there are register pressure issues, then we remove the value added for
  599. // the instruction being available. The rationale is that we really don't
  600. // want to schedule an instruction that causes a spill.
  601. if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 &&
  602. (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||
  603. Delta.CurrentMax.getUnitInc()))
  604. ResCount -= IsAvailableAmt;
  605. LLVM_DEBUG(if (verbose) {
  606. dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
  607. << Delta.CriticalMax.getUnitInc() << "/"
  608. << Delta.CurrentMax.getUnitInc() << ")|";
  609. });
  610. }
  611. // Give preference to a zero latency instruction if the dependent
  612. // instruction is in the current packet.
  613. if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {
  614. for (const SDep &PI : SU->Preds) {
  615. if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
  616. PI.getLatency() == 0 &&
  617. Top.ResourceModel->isInPacket(PI.getSUnit())) {
  618. ResCount += PriorityThree;
  619. LLVM_DEBUG(if (verbose) dbgs() << "Z|");
  620. }
  621. }
  622. } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {
  623. for (const SDep &SI : SU->Succs) {
  624. if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
  625. SI.getLatency() == 0 &&
  626. Bot.ResourceModel->isInPacket(SI.getSUnit())) {
  627. ResCount += PriorityThree;
  628. LLVM_DEBUG(if (verbose) dbgs() << "Z|");
  629. }
  630. }
  631. }
  632. // If the instruction has a non-zero latency dependence with an instruction in
  633. // the current packet, then it should not be scheduled yet. The case occurs
  634. // when the dependent instruction is scheduled in a new packet, so the
  635. // scheduler updates the current cycle and pending instructions become
  636. // available.
  637. if (CheckEarlyAvail) {
  638. if (Q.getID() == TopQID) {
  639. for (const auto &PI : SU->Preds) {
  640. if (PI.getLatency() > 0 &&
  641. Top.ResourceModel->isInPacket(PI.getSUnit())) {
  642. ResCount -= PriorityOne;
  643. LLVM_DEBUG(if (verbose) dbgs() << "D|");
  644. }
  645. }
  646. } else {
  647. for (const auto &SI : SU->Succs) {
  648. if (SI.getLatency() > 0 &&
  649. Bot.ResourceModel->isInPacket(SI.getSUnit())) {
  650. ResCount -= PriorityOne;
  651. LLVM_DEBUG(if (verbose) dbgs() << "D|");
  652. }
  653. }
  654. }
  655. }
  656. LLVM_DEBUG(if (verbose) {
  657. std::stringstream dbgstr;
  658. dbgstr << "Total " << std::setw(4) << ResCount << ")";
  659. dbgs() << dbgstr.str();
  660. });
  661. return ResCount;
  662. }
  663. /// Pick the best candidate from the top queue.
  664. ///
  665. /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
  666. /// DAG building. To adjust for the current scheduling location we need to
  667. /// maintain the number of vreg uses remaining to be top-scheduled.
  668. ConvergingVLIWScheduler::CandResult
  669. ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,
  670. const RegPressureTracker &RPTracker,
  671. SchedCandidate &Candidate) {
  672. ReadyQueue &Q = Zone.Available;
  673. LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)
  674. readyQueueVerboseDump(RPTracker, Candidate, Q);
  675. else Q.dump(););
  676. // getMaxPressureDelta temporarily modifies the tracker.
  677. RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
  678. // BestSU remains NULL if no top candidates beat the best existing candidate.
  679. CandResult FoundCandidate = NoCand;
  680. for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
  681. RegPressureDelta RPDelta;
  682. TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
  683. DAG->getRegionCriticalPSets(),
  684. DAG->getRegPressure().MaxSetPressure);
  685. int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
  686. // Initialize the candidate if needed.
  687. if (!Candidate.SU) {
  688. LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
  689. Candidate.SU = *I;
  690. Candidate.RPDelta = RPDelta;
  691. Candidate.SCost = CurrentCost;
  692. FoundCandidate = NodeOrder;
  693. continue;
  694. }
  695. // Choose node order for negative cost candidates. There is no good
  696. // candidate in this case.
  697. if (CurrentCost < 0 && Candidate.SCost < 0) {
  698. if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
  699. (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
  700. LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
  701. Candidate.SU = *I;
  702. Candidate.RPDelta = RPDelta;
  703. Candidate.SCost = CurrentCost;
  704. FoundCandidate = NodeOrder;
  705. }
  706. continue;
  707. }
  708. // Best cost.
  709. if (CurrentCost > Candidate.SCost) {
  710. LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
  711. Candidate.SU = *I;
  712. Candidate.RPDelta = RPDelta;
  713. Candidate.SCost = CurrentCost;
  714. FoundCandidate = BestCost;
  715. continue;
  716. }
  717. // Choose an instruction that does not depend on an artificial edge.
  718. unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));
  719. unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));
  720. if (CurrWeak != CandWeak) {
  721. if (CurrWeak < CandWeak) {
  722. LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
  723. Candidate.SU = *I;
  724. Candidate.RPDelta = RPDelta;
  725. Candidate.SCost = CurrentCost;
  726. FoundCandidate = Weak;
  727. }
  728. continue;
  729. }
  730. if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {
  731. unsigned CurrSize, CandSize;
  732. if (Q.getID() == TopQID) {
  733. CurrSize = (*I)->Succs.size();
  734. CandSize = Candidate.SU->Succs.size();
  735. } else {
  736. CurrSize = (*I)->Preds.size();
  737. CandSize = Candidate.SU->Preds.size();
  738. }
  739. if (CurrSize > CandSize) {
  740. LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
  741. Candidate.SU = *I;
  742. Candidate.RPDelta = RPDelta;
  743. Candidate.SCost = CurrentCost;
  744. FoundCandidate = BestCost;
  745. }
  746. // Keep the old candidate if it's a better candidate. That is, don't use
  747. // the subsequent tie breaker.
  748. if (CurrSize != CandSize)
  749. continue;
  750. }
  751. // Tie breaker.
  752. // To avoid scheduling indeterminism, we need a tie breaker
  753. // for the case when cost is identical for two nodes.
  754. if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
  755. if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
  756. (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
  757. LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
  758. Candidate.SU = *I;
  759. Candidate.RPDelta = RPDelta;
  760. Candidate.SCost = CurrentCost;
  761. FoundCandidate = NodeOrder;
  762. continue;
  763. }
  764. }
  765. // Fall through to original instruction order.
  766. // Only consider node order if Candidate was chosen from this Q.
  767. if (FoundCandidate == NoCand)
  768. continue;
  769. }
  770. return FoundCandidate;
  771. }
  772. /// Pick the best candidate node from either the top or bottom queue.
  773. SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
  774. // Schedule as far as possible in the direction of no choice. This is most
  775. // efficient, but also provides the best heuristics for CriticalPSets.
  776. if (SUnit *SU = Bot.pickOnlyChoice()) {
  777. LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
  778. IsTopNode = false;
  779. return SU;
  780. }
  781. if (SUnit *SU = Top.pickOnlyChoice()) {
  782. LLVM_DEBUG(dbgs() << "Picked only Top\n");
  783. IsTopNode = true;
  784. return SU;
  785. }
  786. SchedCandidate BotCand;
  787. // Prefer bottom scheduling when heuristics are silent.
  788. CandResult BotResult =
  789. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
  790. assert(BotResult != NoCand && "failed to find the first candidate");
  791. // If either Q has a single candidate that provides the least increase in
  792. // Excess pressure, we can immediately schedule from that Q.
  793. //
  794. // RegionCriticalPSets summarizes the pressure within the scheduled region and
  795. // affects picking from either Q. If scheduling in one direction must
  796. // increase pressure for one of the excess PSets, then schedule in that
  797. // direction first to provide more freedom in the other direction.
  798. if (BotResult == SingleExcess || BotResult == SingleCritical) {
  799. LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
  800. IsTopNode = false;
  801. return BotCand.SU;
  802. }
  803. // Check if the top Q has a better candidate.
  804. SchedCandidate TopCand;
  805. CandResult TopResult =
  806. pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
  807. assert(TopResult != NoCand && "failed to find the first candidate");
  808. if (TopResult == SingleExcess || TopResult == SingleCritical) {
  809. LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
  810. IsTopNode = true;
  811. return TopCand.SU;
  812. }
  813. // If either Q has a single candidate that minimizes pressure above the
  814. // original region's pressure pick it.
  815. if (BotResult == SingleMax) {
  816. LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
  817. IsTopNode = false;
  818. return BotCand.SU;
  819. }
  820. if (TopResult == SingleMax) {
  821. LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
  822. IsTopNode = true;
  823. return TopCand.SU;
  824. }
  825. if (TopCand.SCost > BotCand.SCost) {
  826. LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
  827. IsTopNode = true;
  828. return TopCand.SU;
  829. }
  830. // Otherwise prefer the bottom candidate in node order.
  831. LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
  832. IsTopNode = false;
  833. return BotCand.SU;
  834. }
  835. /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
  836. SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
  837. if (DAG->top() == DAG->bottom()) {
  838. assert(Top.Available.empty() && Top.Pending.empty() &&
  839. Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
  840. return nullptr;
  841. }
  842. SUnit *SU;
  843. if (ForceTopDown) {
  844. SU = Top.pickOnlyChoice();
  845. if (!SU) {
  846. SchedCandidate TopCand;
  847. CandResult TopResult =
  848. pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
  849. assert(TopResult != NoCand && "failed to find the first candidate");
  850. (void)TopResult;
  851. SU = TopCand.SU;
  852. }
  853. IsTopNode = true;
  854. } else if (ForceBottomUp) {
  855. SU = Bot.pickOnlyChoice();
  856. if (!SU) {
  857. SchedCandidate BotCand;
  858. CandResult BotResult =
  859. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
  860. assert(BotResult != NoCand && "failed to find the first candidate");
  861. (void)BotResult;
  862. SU = BotCand.SU;
  863. }
  864. IsTopNode = false;
  865. } else {
  866. SU = pickNodeBidrectional(IsTopNode);
  867. }
  868. if (SU->isTopReady())
  869. Top.removeReady(SU);
  870. if (SU->isBottomReady())
  871. Bot.removeReady(SU);
  872. LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
  873. << " Scheduling instruction in cycle "
  874. << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
  875. << reportPackets() << ")\n";
  876. DAG->dumpNode(*SU));
  877. return SU;
  878. }
  879. /// Update the scheduler's state after scheduling a node. This is the same node
  880. /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
  881. /// to update it's state based on the current cycle before MachineSchedStrategy
  882. /// does.
  883. void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
  884. if (IsTopNode) {
  885. Top.bumpNode(SU);
  886. SU->TopReadyCycle = Top.CurrCycle;
  887. } else {
  888. Bot.bumpNode(SU);
  889. SU->BotReadyCycle = Bot.CurrCycle;
  890. }
  891. }