ScheduleDAGInstrs.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. /// \file Implements the ScheduleDAGInstrs class, which implements scheduling
  15. /// for a MachineInstr-based dependency graph.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
  19. #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
  20. #include "llvm/ADT/DenseMap.h"
  21. #include "llvm/ADT/PointerIntPair.h"
  22. #include "llvm/ADT/SmallVector.h"
  23. #include "llvm/ADT/SparseMultiSet.h"
  24. #include "llvm/ADT/SparseSet.h"
  25. #include "llvm/ADT/identity.h"
  26. #include "llvm/CodeGen/LivePhysRegs.h"
  27. #include "llvm/CodeGen/MachineBasicBlock.h"
  28. #include "llvm/CodeGen/ScheduleDAG.h"
  29. #include "llvm/CodeGen/TargetRegisterInfo.h"
  30. #include "llvm/CodeGen/TargetSchedule.h"
  31. #include "llvm/MC/LaneBitmask.h"
  32. #include <cassert>
  33. #include <cstdint>
  34. #include <list>
  35. #include <string>
  36. #include <utility>
  37. #include <vector>
  38. namespace llvm {
  39. class AAResults;
  40. class LiveIntervals;
  41. class MachineFrameInfo;
  42. class MachineFunction;
  43. class MachineInstr;
  44. class MachineLoopInfo;
  45. class MachineOperand;
  46. struct MCSchedClassDesc;
  47. class PressureDiffs;
  48. class PseudoSourceValue;
  49. class RegPressureTracker;
  50. class UndefValue;
  51. class Value;
  52. /// An individual mapping from virtual register number to SUnit.
  53. struct VReg2SUnit {
  54. unsigned VirtReg;
  55. LaneBitmask LaneMask;
  56. SUnit *SU;
  57. VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU)
  58. : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
  59. unsigned getSparseSetIndex() const {
  60. return Register::virtReg2Index(VirtReg);
  61. }
  62. };
  63. /// Mapping from virtual register to SUnit including an operand index.
  64. struct VReg2SUnitOperIdx : public VReg2SUnit {
  65. unsigned OperandIndex;
  66. VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask,
  67. unsigned OperandIndex, SUnit *SU)
  68. : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {}
  69. };
  70. /// Record a physical register access.
  71. /// For non-data-dependent uses, OpIdx == -1.
  72. struct PhysRegSUOper {
  73. SUnit *SU;
  74. int OpIdx;
  75. unsigned Reg;
  76. PhysRegSUOper(SUnit *su, int op, unsigned R): SU(su), OpIdx(op), Reg(R) {}
  77. unsigned getSparseSetIndex() const { return Reg; }
  78. };
  79. /// Use a SparseMultiSet to track physical registers. Storage is only
  80. /// allocated once for the pass. It can be cleared in constant time and reused
  81. /// without any frees.
  82. using Reg2SUnitsMap =
  83. SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>;
  84. /// Use SparseSet as a SparseMap by relying on the fact that it never
  85. /// compares ValueT's, only unsigned keys. This allows the set to be cleared
  86. /// between scheduling regions in constant time as long as ValueT does not
  87. /// require a destructor.
  88. using VReg2SUnitMap = SparseSet<VReg2SUnit, VirtReg2IndexFunctor>;
  89. /// Track local uses of virtual registers. These uses are gathered by the DAG
  90. /// builder and may be consulted by the scheduler to avoid iterating an entire
  91. /// vreg use list.
  92. using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>;
  93. using VReg2SUnitOperIdxMultiMap =
  94. SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>;
  95. using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>;
  96. struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> {
  97. UnderlyingObject(ValueType V, bool MayAlias)
  98. : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {}
  99. ValueType getValue() const { return getPointer(); }
  100. bool mayAlias() const { return getInt(); }
  101. };
  102. using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>;
  103. /// A ScheduleDAG for scheduling lists of MachineInstr.
  104. class ScheduleDAGInstrs : public ScheduleDAG {
  105. protected:
  106. const MachineLoopInfo *MLI;
  107. const MachineFrameInfo &MFI;
  108. /// TargetSchedModel provides an interface to the machine model.
  109. TargetSchedModel SchedModel;
  110. /// True if the DAG builder should remove kill flags (in preparation for
  111. /// rescheduling).
  112. bool RemoveKillFlags;
  113. /// The standard DAG builder does not normally include terminators as DAG
  114. /// nodes because it does not create the necessary dependencies to prevent
  115. /// reordering. A specialized scheduler can override
  116. /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate
  117. /// it has taken responsibility for scheduling the terminator correctly.
  118. bool CanHandleTerminators = false;
  119. /// Whether lane masks should get tracked.
  120. bool TrackLaneMasks = false;
  121. // State specific to the current scheduling region.
  122. // ------------------------------------------------
  123. /// The block in which to insert instructions
  124. MachineBasicBlock *BB;
  125. /// The beginning of the range to be scheduled.
  126. MachineBasicBlock::iterator RegionBegin;
  127. /// The end of the range to be scheduled.
  128. MachineBasicBlock::iterator RegionEnd;
  129. /// Instructions in this region (distance(RegionBegin, RegionEnd)).
  130. unsigned NumRegionInstrs;
  131. /// After calling BuildSchedGraph, each machine instruction in the current
  132. /// scheduling region is mapped to an SUnit.
  133. DenseMap<MachineInstr*, SUnit*> MISUnitMap;
  134. // State internal to DAG building.
  135. // -------------------------------
  136. /// Defs, Uses - Remember where defs and uses of each register are as we
  137. /// iterate upward through the instructions. This is allocated here instead
  138. /// of inside BuildSchedGraph to avoid the need for it to be initialized and
  139. /// destructed for each block.
  140. Reg2SUnitsMap Defs;
  141. Reg2SUnitsMap Uses;
  142. /// Tracks the last instruction(s) in this region defining each virtual
  143. /// register. There may be multiple current definitions for a register with
  144. /// disjunct lanemasks.
  145. VReg2SUnitMultiMap CurrentVRegDefs;
  146. /// Tracks the last instructions in this region using each virtual register.
  147. VReg2SUnitOperIdxMultiMap CurrentVRegUses;
  148. AAResults *AAForDep = nullptr;
  149. /// Remember a generic side-effecting instruction as we proceed.
  150. /// No other SU ever gets scheduled around it (except in the special
  151. /// case of a huge region that gets reduced).
  152. SUnit *BarrierChain = nullptr;
  153. public:
  154. /// A list of SUnits, used in Value2SUsMap, during DAG construction.
  155. /// Note: to gain speed it might be worth investigating an optimized
  156. /// implementation of this data structure, such as a singly linked list
  157. /// with a memory pool (SmallVector was tried but slow and SparseSet is not
  158. /// applicable).
  159. using SUList = std::list<SUnit *>;
  160. protected:
  161. /// A map from ValueType to SUList, used during DAG construction, as
  162. /// a means of remembering which SUs depend on which memory locations.
  163. class Value2SUsMap;
  164. /// Reduces maps in FIFO order, by N SUs. This is better than turning
  165. /// every Nth memory SU into BarrierChain in buildSchedGraph(), since
  166. /// it avoids unnecessary edges between seen SUs above the new BarrierChain,
  167. /// and those below it.
  168. void reduceHugeMemNodeMaps(Value2SUsMap &stores,
  169. Value2SUsMap &loads, unsigned N);
  170. /// Adds a chain edge between SUa and SUb, but only if both
  171. /// AAResults and Target fail to deny the dependency.
  172. void addChainDependency(SUnit *SUa, SUnit *SUb,
  173. unsigned Latency = 0);
  174. /// Adds dependencies as needed from all SUs in list to SU.
  175. void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) {
  176. for (SUnit *Entry : SUs)
  177. addChainDependency(SU, Entry, Latency);
  178. }
  179. /// Adds dependencies as needed from all SUs in map, to SU.
  180. void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap);
  181. /// Adds dependencies as needed to SU, from all SUs mapped to V.
  182. void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap,
  183. ValueType V);
  184. /// Adds barrier chain edges from all SUs in map, and then clear the map.
  185. /// This is equivalent to insertBarrierChain(), but optimized for the common
  186. /// case where the new BarrierChain (a global memory object) has a higher
  187. /// NodeNum than all SUs in map. It is assumed BarrierChain has been set
  188. /// before calling this.
  189. void addBarrierChain(Value2SUsMap &map);
  190. /// Inserts a barrier chain in a huge region, far below current SU.
  191. /// Adds barrier chain edges from all SUs in map with higher NodeNums than
  192. /// this new BarrierChain, and remove them from map. It is assumed
  193. /// BarrierChain has been set before calling this.
  194. void insertBarrierChain(Value2SUsMap &map);
  195. /// For an unanalyzable memory access, this Value is used in maps.
  196. UndefValue *UnknownValue;
  197. /// Topo - A topological ordering for SUnits which permits fast IsReachable
  198. /// and similar queries.
  199. ScheduleDAGTopologicalSort Topo;
  200. using DbgValueVector =
  201. std::vector<std::pair<MachineInstr *, MachineInstr *>>;
  202. /// Remember instruction that precedes DBG_VALUE.
  203. /// These are generated by buildSchedGraph but persist so they can be
  204. /// referenced when emitting the final schedule.
  205. DbgValueVector DbgValues;
  206. MachineInstr *FirstDbgValue = nullptr;
  207. /// Set of live physical registers for updating kill flags.
  208. LivePhysRegs LiveRegs;
  209. public:
  210. explicit ScheduleDAGInstrs(MachineFunction &mf,
  211. const MachineLoopInfo *mli,
  212. bool RemoveKillFlags = false);
  213. ~ScheduleDAGInstrs() override = default;
  214. /// Gets the machine model for instruction scheduling.
  215. const TargetSchedModel *getSchedModel() const { return &SchedModel; }
  216. /// Resolves and cache a resolved scheduling class for an SUnit.
  217. const MCSchedClassDesc *getSchedClass(SUnit *SU) const {
  218. if (!SU->SchedClass && SchedModel.hasInstrSchedModel())
  219. SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr());
  220. return SU->SchedClass;
  221. }
  222. /// IsReachable - Checks if SU is reachable from TargetSU.
  223. bool IsReachable(SUnit *SU, SUnit *TargetSU) {
  224. return Topo.IsReachable(SU, TargetSU);
  225. }
  226. /// Returns an iterator to the top of the current scheduling region.
  227. MachineBasicBlock::iterator begin() const { return RegionBegin; }
  228. /// Returns an iterator to the bottom of the current scheduling region.
  229. MachineBasicBlock::iterator end() const { return RegionEnd; }
  230. /// Creates a new SUnit and return a ptr to it.
  231. SUnit *newSUnit(MachineInstr *MI);
  232. /// Returns an existing SUnit for this MI, or nullptr.
  233. SUnit *getSUnit(MachineInstr *MI) const;
  234. /// If this method returns true, handling of the scheduling regions
  235. /// themselves (in case of a scheduling boundary in MBB) will be done
  236. /// beginning with the topmost region of MBB.
  237. virtual bool doMBBSchedRegionsTopDown() const { return false; }
  238. /// Prepares to perform scheduling in the given block.
  239. virtual void startBlock(MachineBasicBlock *BB);
  240. /// Cleans up after scheduling in the given block.
  241. virtual void finishBlock();
  242. /// Initialize the DAG and common scheduler state for a new
  243. /// scheduling region. This does not actually create the DAG, only clears
  244. /// it. The scheduling driver may call BuildSchedGraph multiple times per
  245. /// scheduling region.
  246. virtual void enterRegion(MachineBasicBlock *bb,
  247. MachineBasicBlock::iterator begin,
  248. MachineBasicBlock::iterator end,
  249. unsigned regioninstrs);
  250. /// Called when the scheduler has finished scheduling the current region.
  251. virtual void exitRegion();
  252. /// Builds SUnits for the current region.
  253. /// If \p RPTracker is non-null, compute register pressure as a side effect.
  254. /// The DAG builder is an efficient place to do it because it already visits
  255. /// operands.
  256. void buildSchedGraph(AAResults *AA,
  257. RegPressureTracker *RPTracker = nullptr,
  258. PressureDiffs *PDiffs = nullptr,
  259. LiveIntervals *LIS = nullptr,
  260. bool TrackLaneMasks = false);
  261. /// Adds dependencies from instructions in the current list of
  262. /// instructions being scheduled to scheduling barrier. We want to make sure
  263. /// instructions which define registers that are either used by the
  264. /// terminator or are live-out are properly scheduled. This is especially
  265. /// important when the definition latency of the return value(s) are too
  266. /// high to be hidden by the branch or when the liveout registers used by
  267. /// instructions in the fallthrough block.
  268. void addSchedBarrierDeps();
  269. /// Orders nodes according to selected style.
  270. ///
  271. /// Typically, a scheduling algorithm will implement schedule() without
  272. /// overriding enterRegion() or exitRegion().
  273. virtual void schedule() = 0;
  274. /// Allow targets to perform final scheduling actions at the level of the
  275. /// whole MachineFunction. By default does nothing.
  276. virtual void finalizeSchedule() {}
  277. void dumpNode(const SUnit &SU) const override;
  278. void dump() const override;
  279. /// Returns a label for a DAG node that points to an instruction.
  280. std::string getGraphNodeLabel(const SUnit *SU) const override;
  281. /// Returns a label for the region of code covered by the DAG.
  282. std::string getDAGName() const override;
  283. /// Fixes register kill flags that scheduling has made invalid.
  284. void fixupKills(MachineBasicBlock &MBB);
  285. /// True if an edge can be added from PredSU to SuccSU without creating
  286. /// a cycle.
  287. bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
  288. /// Add a DAG edge to the given SU with the given predecessor
  289. /// dependence data.
  290. ///
  291. /// \returns true if the edge may be added without creating a cycle OR if an
  292. /// equivalent edge already existed (false indicates failure).
  293. bool addEdge(SUnit *SuccSU, const SDep &PredDep);
  294. protected:
  295. void initSUnits();
  296. void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
  297. void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
  298. void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
  299. void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
  300. /// Returns a mask for which lanes get read/written by the given (register)
  301. /// machine operand.
  302. LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
  303. /// Returns true if the def register in \p MO has no uses.
  304. bool deadDefHasNoUse(const MachineOperand &MO);
  305. };
  306. /// Creates a new SUnit and return a ptr to it.
  307. inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) {
  308. #ifndef NDEBUG
  309. const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0];
  310. #endif
  311. SUnits.emplace_back(MI, (unsigned)SUnits.size());
  312. assert((Addr == nullptr || Addr == &SUnits[0]) &&
  313. "SUnits std::vector reallocated on the fly!");
  314. return &SUnits.back();
  315. }
  316. /// Returns an existing SUnit for this MI, or nullptr.
  317. inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const {
  318. return MISUnitMap.lookup(MI);
  319. }
  320. } // end namespace llvm
  321. #endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
  322. #ifdef __GNUC__
  323. #pragma GCC diagnostic pop
  324. #endif