MachinePipeliner.h 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
  15. //
  16. // Software pipelining (SWP) is an instruction scheduling technique for loops
  17. // that overlap loop iterations and exploits ILP via a compiler transformation.
  18. //
  19. // Swing Modulo Scheduling is an implementation of software pipelining
  20. // that generates schedules that are near optimal in terms of initiation
  21. // interval, register requirements, and stage count. See the papers:
  22. //
  23. // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
  24. // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
  25. // Conference on Parallel Architectures and Compilation Techiniques.
  26. //
  27. // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
  28. // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
  29. // Transactions on Computers, Vol. 50, No. 3, 2001.
  30. //
  31. // "An Implementation of Swing Modulo Scheduling With Extensions for
  32. // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
  33. // Urbana-Champaign, 2005.
  34. //
  35. //
  36. // The SMS algorithm consists of three main steps after computing the minimal
  37. // initiation interval (MII).
  38. // 1) Analyze the dependence graph and compute information about each
  39. // instruction in the graph.
  40. // 2) Order the nodes (instructions) by priority based upon the heuristics
  41. // described in the algorithm.
  42. // 3) Attempt to schedule the nodes in the specified order using the MII.
  43. //
  44. //===----------------------------------------------------------------------===//
  45. #ifndef LLVM_CODEGEN_MACHINEPIPELINER_H
  46. #define LLVM_CODEGEN_MACHINEPIPELINER_H
  47. #include "llvm/ADT/SetVector.h"
  48. #include "llvm/CodeGen/DFAPacketizer.h"
  49. #include "llvm/CodeGen/MachineDominators.h"
  50. #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
  51. #include "llvm/CodeGen/RegisterClassInfo.h"
  52. #include "llvm/CodeGen/ScheduleDAGInstrs.h"
  53. #include "llvm/CodeGen/ScheduleDAGMutation.h"
  54. #include "llvm/CodeGen/TargetInstrInfo.h"
  55. #include "llvm/InitializePasses.h"
  56. #include <deque>
  57. namespace llvm {
  58. class AAResults;
  59. class NodeSet;
  60. class SMSchedule;
  61. extern cl::opt<bool> SwpEnableCopyToPhi;
  62. extern cl::opt<int> SwpForceIssueWidth;
  63. /// The main class in the implementation of the target independent
  64. /// software pipeliner pass.
  65. class MachinePipeliner : public MachineFunctionPass {
  66. public:
  67. MachineFunction *MF = nullptr;
  68. MachineOptimizationRemarkEmitter *ORE = nullptr;
  69. const MachineLoopInfo *MLI = nullptr;
  70. const MachineDominatorTree *MDT = nullptr;
  71. const InstrItineraryData *InstrItins;
  72. const TargetInstrInfo *TII = nullptr;
  73. RegisterClassInfo RegClassInfo;
  74. bool disabledByPragma = false;
  75. unsigned II_setByPragma = 0;
  76. #ifndef NDEBUG
  77. static int NumTries;
  78. #endif
  79. /// Cache the target analysis information about the loop.
  80. struct LoopInfo {
  81. MachineBasicBlock *TBB = nullptr;
  82. MachineBasicBlock *FBB = nullptr;
  83. SmallVector<MachineOperand, 4> BrCond;
  84. MachineInstr *LoopInductionVar = nullptr;
  85. MachineInstr *LoopCompare = nullptr;
  86. std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo =
  87. nullptr;
  88. };
  89. LoopInfo LI;
  90. static char ID;
  91. MachinePipeliner() : MachineFunctionPass(ID) {
  92. initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
  93. }
  94. bool runOnMachineFunction(MachineFunction &MF) override;
  95. void getAnalysisUsage(AnalysisUsage &AU) const override;
  96. private:
  97. void preprocessPhiNodes(MachineBasicBlock &B);
  98. bool canPipelineLoop(MachineLoop &L);
  99. bool scheduleLoop(MachineLoop &L);
  100. bool swingModuloScheduler(MachineLoop &L);
  101. void setPragmaPipelineOptions(MachineLoop &L);
  102. };
  103. /// This class builds the dependence graph for the instructions in a loop,
  104. /// and attempts to schedule the instructions using the SMS algorithm.
  105. class SwingSchedulerDAG : public ScheduleDAGInstrs {
  106. MachinePipeliner &Pass;
  107. /// The minimum initiation interval between iterations for this schedule.
  108. unsigned MII = 0;
  109. /// The maximum initiation interval between iterations for this schedule.
  110. unsigned MAX_II = 0;
  111. /// Set to true if a valid pipelined schedule is found for the loop.
  112. bool Scheduled = false;
  113. MachineLoop &Loop;
  114. LiveIntervals &LIS;
  115. const RegisterClassInfo &RegClassInfo;
  116. unsigned II_setByPragma = 0;
  117. TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr;
  118. /// A toplogical ordering of the SUnits, which is needed for changing
  119. /// dependences and iterating over the SUnits.
  120. ScheduleDAGTopologicalSort Topo;
  121. struct NodeInfo {
  122. int ASAP = 0;
  123. int ALAP = 0;
  124. int ZeroLatencyDepth = 0;
  125. int ZeroLatencyHeight = 0;
  126. NodeInfo() = default;
  127. };
  128. /// Computed properties for each node in the graph.
  129. std::vector<NodeInfo> ScheduleInfo;
  130. enum OrderKind { BottomUp = 0, TopDown = 1 };
  131. /// Computed node ordering for scheduling.
  132. SetVector<SUnit *> NodeOrder;
  133. using NodeSetType = SmallVector<NodeSet, 8>;
  134. using ValueMapTy = DenseMap<unsigned, unsigned>;
  135. using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
  136. using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
  137. /// Instructions to change when emitting the final schedule.
  138. DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
  139. /// We may create a new instruction, so remember it because it
  140. /// must be deleted when the pass is finished.
  141. DenseMap<MachineInstr*, MachineInstr *> NewMIs;
  142. /// Ordered list of DAG postprocessing steps.
  143. std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
  144. /// Helper class to implement Johnson's circuit finding algorithm.
  145. class Circuits {
  146. std::vector<SUnit> &SUnits;
  147. SetVector<SUnit *> Stack;
  148. BitVector Blocked;
  149. SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
  150. SmallVector<SmallVector<int, 4>, 16> AdjK;
  151. // Node to Index from ScheduleDAGTopologicalSort
  152. std::vector<int> *Node2Idx;
  153. unsigned NumPaths;
  154. static unsigned MaxPaths;
  155. public:
  156. Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
  157. : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
  158. Node2Idx = new std::vector<int>(SUs.size());
  159. unsigned Idx = 0;
  160. for (const auto &NodeNum : Topo)
  161. Node2Idx->at(NodeNum) = Idx++;
  162. }
  163. ~Circuits() { delete Node2Idx; }
  164. /// Reset the data structures used in the circuit algorithm.
  165. void reset() {
  166. Stack.clear();
  167. Blocked.reset();
  168. B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
  169. NumPaths = 0;
  170. }
  171. void createAdjacencyStructure(SwingSchedulerDAG *DAG);
  172. bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
  173. void unblock(int U);
  174. };
  175. struct CopyToPhiMutation : public ScheduleDAGMutation {
  176. void apply(ScheduleDAGInstrs *DAG) override;
  177. };
  178. public:
  179. SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
  180. const RegisterClassInfo &rci, unsigned II,
  181. TargetInstrInfo::PipelinerLoopInfo *PLI)
  182. : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
  183. RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),
  184. Topo(SUnits, &ExitSU) {
  185. P.MF->getSubtarget().getSMSMutations(Mutations);
  186. if (SwpEnableCopyToPhi)
  187. Mutations.push_back(std::make_unique<CopyToPhiMutation>());
  188. }
  189. void schedule() override;
  190. void finishBlock() override;
  191. /// Return true if the loop kernel has been scheduled.
  192. bool hasNewSchedule() { return Scheduled; }
  193. /// Return the earliest time an instruction may be scheduled.
  194. int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
  195. /// Return the latest time an instruction my be scheduled.
  196. int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
  197. /// The mobility function, which the number of slots in which
  198. /// an instruction may be scheduled.
  199. int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
  200. /// The depth, in the dependence graph, for a node.
  201. unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
  202. /// The maximum unweighted length of a path from an arbitrary node to the
  203. /// given node in which each edge has latency 0
  204. int getZeroLatencyDepth(SUnit *Node) {
  205. return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
  206. }
  207. /// The height, in the dependence graph, for a node.
  208. unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
  209. /// The maximum unweighted length of a path from the given node to an
  210. /// arbitrary node in which each edge has latency 0
  211. int getZeroLatencyHeight(SUnit *Node) {
  212. return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
  213. }
  214. /// Return true if the dependence is a back-edge in the data dependence graph.
  215. /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
  216. /// using an anti dependence from a Phi to an instruction.
  217. bool isBackedge(SUnit *Source, const SDep &Dep) {
  218. if (Dep.getKind() != SDep::Anti)
  219. return false;
  220. return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
  221. }
  222. bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
  223. /// The distance function, which indicates that operation V of iteration I
  224. /// depends on operations U of iteration I-distance.
  225. unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
  226. // Instructions that feed a Phi have a distance of 1. Computing larger
  227. // values for arrays requires data dependence information.
  228. if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
  229. return 1;
  230. return 0;
  231. }
  232. void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
  233. void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
  234. /// Return the new base register that was stored away for the changed
  235. /// instruction.
  236. unsigned getInstrBaseReg(SUnit *SU) {
  237. DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
  238. InstrChanges.find(SU);
  239. if (It != InstrChanges.end())
  240. return It->second.first;
  241. return 0;
  242. }
  243. void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
  244. Mutations.push_back(std::move(Mutation));
  245. }
  246. static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
  247. private:
  248. void addLoopCarriedDependences(AAResults *AA);
  249. void updatePhiDependences();
  250. void changeDependences();
  251. unsigned calculateResMII();
  252. unsigned calculateRecMII(NodeSetType &RecNodeSets);
  253. void findCircuits(NodeSetType &NodeSets);
  254. void fuseRecs(NodeSetType &NodeSets);
  255. void removeDuplicateNodes(NodeSetType &NodeSets);
  256. void computeNodeFunctions(NodeSetType &NodeSets);
  257. void registerPressureFilter(NodeSetType &NodeSets);
  258. void colocateNodeSets(NodeSetType &NodeSets);
  259. void checkNodeSets(NodeSetType &NodeSets);
  260. void groupRemainingNodes(NodeSetType &NodeSets);
  261. void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
  262. SetVector<SUnit *> &NodesAdded);
  263. void computeNodeOrder(NodeSetType &NodeSets);
  264. void checkValidNodeOrder(const NodeSetType &Circuits) const;
  265. bool schedulePipeline(SMSchedule &Schedule);
  266. bool computeDelta(MachineInstr &MI, unsigned &Delta);
  267. MachineInstr *findDefInLoop(Register Reg);
  268. bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
  269. unsigned &OffsetPos, unsigned &NewBase,
  270. int64_t &NewOffset);
  271. void postprocessDAG();
  272. /// Set the Minimum Initiation Interval for this schedule attempt.
  273. void setMII(unsigned ResMII, unsigned RecMII);
  274. /// Set the Maximum Initiation Interval for this schedule attempt.
  275. void setMAX_II();
  276. };
  277. /// A NodeSet contains a set of SUnit DAG nodes with additional information
  278. /// that assigns a priority to the set.
  279. class NodeSet {
  280. SetVector<SUnit *> Nodes;
  281. bool HasRecurrence = false;
  282. unsigned RecMII = 0;
  283. int MaxMOV = 0;
  284. unsigned MaxDepth = 0;
  285. unsigned Colocate = 0;
  286. SUnit *ExceedPressure = nullptr;
  287. unsigned Latency = 0;
  288. public:
  289. using iterator = SetVector<SUnit *>::const_iterator;
  290. NodeSet() = default;
  291. NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
  292. Latency = 0;
  293. for (const SUnit *Node : Nodes) {
  294. DenseMap<SUnit *, unsigned> SuccSUnitLatency;
  295. for (const SDep &Succ : Node->Succs) {
  296. auto SuccSUnit = Succ.getSUnit();
  297. if (!Nodes.count(SuccSUnit))
  298. continue;
  299. unsigned CurLatency = Succ.getLatency();
  300. unsigned MaxLatency = 0;
  301. if (SuccSUnitLatency.count(SuccSUnit))
  302. MaxLatency = SuccSUnitLatency[SuccSUnit];
  303. if (CurLatency > MaxLatency)
  304. SuccSUnitLatency[SuccSUnit] = CurLatency;
  305. }
  306. for (auto SUnitLatency : SuccSUnitLatency)
  307. Latency += SUnitLatency.second;
  308. }
  309. }
  310. bool insert(SUnit *SU) { return Nodes.insert(SU); }
  311. void insert(iterator S, iterator E) { Nodes.insert(S, E); }
  312. template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
  313. return Nodes.remove_if(P);
  314. }
  315. unsigned count(SUnit *SU) const { return Nodes.count(SU); }
  316. bool hasRecurrence() { return HasRecurrence; };
  317. unsigned size() const { return Nodes.size(); }
  318. bool empty() const { return Nodes.empty(); }
  319. SUnit *getNode(unsigned i) const { return Nodes[i]; };
  320. void setRecMII(unsigned mii) { RecMII = mii; };
  321. void setColocate(unsigned c) { Colocate = c; };
  322. void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
  323. bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
  324. int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
  325. int getRecMII() { return RecMII; }
  326. /// Summarize node functions for the entire node set.
  327. void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
  328. for (SUnit *SU : *this) {
  329. MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
  330. MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
  331. }
  332. }
  333. unsigned getLatency() { return Latency; }
  334. unsigned getMaxDepth() { return MaxDepth; }
  335. void clear() {
  336. Nodes.clear();
  337. RecMII = 0;
  338. HasRecurrence = false;
  339. MaxMOV = 0;
  340. MaxDepth = 0;
  341. Colocate = 0;
  342. ExceedPressure = nullptr;
  343. }
  344. operator SetVector<SUnit *> &() { return Nodes; }
  345. /// Sort the node sets by importance. First, rank them by recurrence MII,
  346. /// then by mobility (least mobile done first), and finally by depth.
  347. /// Each node set may contain a colocate value which is used as the first
  348. /// tie breaker, if it's set.
  349. bool operator>(const NodeSet &RHS) const {
  350. if (RecMII == RHS.RecMII) {
  351. if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
  352. return Colocate < RHS.Colocate;
  353. if (MaxMOV == RHS.MaxMOV)
  354. return MaxDepth > RHS.MaxDepth;
  355. return MaxMOV < RHS.MaxMOV;
  356. }
  357. return RecMII > RHS.RecMII;
  358. }
  359. bool operator==(const NodeSet &RHS) const {
  360. return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
  361. MaxDepth == RHS.MaxDepth;
  362. }
  363. bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
  364. iterator begin() { return Nodes.begin(); }
  365. iterator end() { return Nodes.end(); }
  366. void print(raw_ostream &os) const;
  367. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  368. LLVM_DUMP_METHOD void dump() const;
  369. #endif
  370. };
  371. // 16 was selected based on the number of ProcResource kinds for all
  372. // existing Subtargets, so that SmallVector don't need to resize too often.
  373. static const int DefaultProcResSize = 16;
  374. class ResourceManager {
  375. private:
  376. const MCSubtargetInfo *STI;
  377. const MCSchedModel &SM;
  378. const TargetSubtargetInfo *ST;
  379. const TargetInstrInfo *TII;
  380. SwingSchedulerDAG *DAG;
  381. const bool UseDFA;
  382. /// DFA resources for each slot
  383. llvm::SmallVector<std::unique_ptr<DFAPacketizer>> DFAResources;
  384. /// Modulo Reservation Table. When a resource with ID R is consumed in cycle
  385. /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F)
  386. llvm::SmallVector<llvm::SmallVector<uint64_t, DefaultProcResSize>> MRT;
  387. /// The number of scheduled micro operations for each slot. Micro operations
  388. /// are assumed to be scheduled one per cycle, starting with the cycle in
  389. /// which the instruction is scheduled.
  390. llvm::SmallVector<int> NumScheduledMops;
  391. /// Each processor resource is associated with a so-called processor resource
  392. /// mask. This vector allows to correlate processor resource IDs with
  393. /// processor resource masks. There is exactly one element per each processor
  394. /// resource declared by the scheduling model.
  395. llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks;
  396. int InitiationInterval;
  397. /// The number of micro operations that can be scheduled at a cycle.
  398. int IssueWidth;
  399. int calculateResMIIDFA() const;
  400. /// Check if MRT is overbooked
  401. bool isOverbooked() const;
  402. /// Reserve resources on MRT
  403. void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
  404. /// Unreserve resources on MRT
  405. void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
  406. /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor.
  407. /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C,
  408. /// II).
  409. int positiveModulo(int Dividend, int Divisor) const {
  410. assert(Divisor > 0);
  411. int R = Dividend % Divisor;
  412. if (R < 0)
  413. R += Divisor;
  414. return R;
  415. }
  416. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  417. LLVM_DUMP_METHOD void dumpMRT() const;
  418. #endif
  419. public:
  420. ResourceManager(const TargetSubtargetInfo *ST, SwingSchedulerDAG *DAG)
  421. : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()),
  422. DAG(DAG), UseDFA(ST->useDFAforSMS()),
  423. ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
  424. IssueWidth(SM.IssueWidth) {
  425. initProcResourceVectors(SM, ProcResourceMasks);
  426. if (IssueWidth <= 0)
  427. // If IssueWidth is not specified, set a sufficiently large value
  428. IssueWidth = 100;
  429. if (SwpForceIssueWidth > 0)
  430. IssueWidth = SwpForceIssueWidth;
  431. }
  432. void initProcResourceVectors(const MCSchedModel &SM,
  433. SmallVectorImpl<uint64_t> &Masks);
  434. /// Check if the resources occupied by a machine instruction are available
  435. /// in the current state.
  436. bool canReserveResources(SUnit &SU, int Cycle);
  437. /// Reserve the resources occupied by a machine instruction and change the
  438. /// current state to reflect that change.
  439. void reserveResources(SUnit &SU, int Cycle);
  440. int calculateResMII() const;
  441. /// Initialize resources with the initiation interval II.
  442. void init(int II);
  443. };
  444. /// This class represents the scheduled code. The main data structure is a
  445. /// map from scheduled cycle to instructions. During scheduling, the
  446. /// data structure explicitly represents all stages/iterations. When
  447. /// the algorithm finshes, the schedule is collapsed into a single stage,
  448. /// which represents instructions from different loop iterations.
  449. ///
  450. /// The SMS algorithm allows negative values for cycles, so the first cycle
  451. /// in the schedule is the smallest cycle value.
  452. class SMSchedule {
  453. private:
  454. /// Map from execution cycle to instructions.
  455. DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
  456. /// Map from instruction to execution cycle.
  457. std::map<SUnit *, int> InstrToCycle;
  458. /// Keep track of the first cycle value in the schedule. It starts
  459. /// as zero, but the algorithm allows negative values.
  460. int FirstCycle = 0;
  461. /// Keep track of the last cycle value in the schedule.
  462. int LastCycle = 0;
  463. /// The initiation interval (II) for the schedule.
  464. int InitiationInterval = 0;
  465. /// Target machine information.
  466. const TargetSubtargetInfo &ST;
  467. /// Virtual register information.
  468. MachineRegisterInfo &MRI;
  469. ResourceManager ProcItinResources;
  470. public:
  471. SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG)
  472. : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
  473. ProcItinResources(&ST, DAG) {}
  474. void reset() {
  475. ScheduledInstrs.clear();
  476. InstrToCycle.clear();
  477. FirstCycle = 0;
  478. LastCycle = 0;
  479. InitiationInterval = 0;
  480. }
  481. /// Set the initiation interval for this schedule.
  482. void setInitiationInterval(int ii) {
  483. InitiationInterval = ii;
  484. ProcItinResources.init(ii);
  485. }
  486. /// Return the initiation interval for this schedule.
  487. int getInitiationInterval() const { return InitiationInterval; }
  488. /// Return the first cycle in the completed schedule. This
  489. /// can be a negative value.
  490. int getFirstCycle() const { return FirstCycle; }
  491. /// Return the last cycle in the finalized schedule.
  492. int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
  493. /// Return the cycle of the earliest scheduled instruction in the dependence
  494. /// chain.
  495. int earliestCycleInChain(const SDep &Dep);
  496. /// Return the cycle of the latest scheduled instruction in the dependence
  497. /// chain.
  498. int latestCycleInChain(const SDep &Dep);
  499. void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
  500. int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
  501. bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
  502. /// Iterators for the cycle to instruction map.
  503. using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
  504. using const_sched_iterator =
  505. DenseMap<int, std::deque<SUnit *>>::const_iterator;
  506. /// Return true if the instruction is scheduled at the specified stage.
  507. bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
  508. return (stageScheduled(SU) == (int)StageNum);
  509. }
  510. /// Return the stage for a scheduled instruction. Return -1 if
  511. /// the instruction has not been scheduled.
  512. int stageScheduled(SUnit *SU) const {
  513. std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
  514. if (it == InstrToCycle.end())
  515. return -1;
  516. return (it->second - FirstCycle) / InitiationInterval;
  517. }
  518. /// Return the cycle for a scheduled instruction. This function normalizes
  519. /// the first cycle to be 0.
  520. unsigned cycleScheduled(SUnit *SU) const {
  521. std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
  522. assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
  523. return (it->second - FirstCycle) % InitiationInterval;
  524. }
  525. /// Return the maximum stage count needed for this schedule.
  526. unsigned getMaxStageCount() {
  527. return (LastCycle - FirstCycle) / InitiationInterval;
  528. }
  529. /// Return the instructions that are scheduled at the specified cycle.
  530. std::deque<SUnit *> &getInstructions(int cycle) {
  531. return ScheduledInstrs[cycle];
  532. }
  533. SmallSet<SUnit *, 8>
  534. computeUnpipelineableNodes(SwingSchedulerDAG *SSD,
  535. TargetInstrInfo::PipelinerLoopInfo *PLI);
  536. bool
  537. normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD,
  538. TargetInstrInfo::PipelinerLoopInfo *PLI);
  539. bool isValidSchedule(SwingSchedulerDAG *SSD);
  540. void finalizeSchedule(SwingSchedulerDAG *SSD);
  541. void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
  542. std::deque<SUnit *> &Insts);
  543. bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
  544. bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def,
  545. MachineOperand &MO);
  546. void print(raw_ostream &os) const;
  547. void dump() const;
  548. };
  549. } // end namespace llvm
  550. #endif // LLVM_CODEGEN_MACHINEPIPELINER_H
  551. #ifdef __GNUC__
  552. #pragma GCC diagnostic pop
  553. #endif