MachineScheduler.cpp 144 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954
  1. //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // MachineScheduler schedules machine instructions after phi elimination. It
  10. // preserves LiveIntervals so it can be invoked before register allocation.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/CodeGen/MachineScheduler.h"
  14. #include "llvm/ADT/ArrayRef.h"
  15. #include "llvm/ADT/BitVector.h"
  16. #include "llvm/ADT/DenseMap.h"
  17. #include "llvm/ADT/PriorityQueue.h"
  18. #include "llvm/ADT/STLExtras.h"
  19. #include "llvm/ADT/SmallVector.h"
  20. #include "llvm/ADT/Statistic.h"
  21. #include "llvm/ADT/iterator_range.h"
  22. #include "llvm/Analysis/AliasAnalysis.h"
  23. #include "llvm/CodeGen/LiveInterval.h"
  24. #include "llvm/CodeGen/LiveIntervals.h"
  25. #include "llvm/CodeGen/MachineBasicBlock.h"
  26. #include "llvm/CodeGen/MachineDominators.h"
  27. #include "llvm/CodeGen/MachineFunction.h"
  28. #include "llvm/CodeGen/MachineFunctionPass.h"
  29. #include "llvm/CodeGen/MachineInstr.h"
  30. #include "llvm/CodeGen/MachineLoopInfo.h"
  31. #include "llvm/CodeGen/MachineOperand.h"
  32. #include "llvm/CodeGen/MachinePassRegistry.h"
  33. #include "llvm/CodeGen/MachineRegisterInfo.h"
  34. #include "llvm/CodeGen/Passes.h"
  35. #include "llvm/CodeGen/RegisterClassInfo.h"
  36. #include "llvm/CodeGen/RegisterPressure.h"
  37. #include "llvm/CodeGen/ScheduleDAG.h"
  38. #include "llvm/CodeGen/ScheduleDAGInstrs.h"
  39. #include "llvm/CodeGen/ScheduleDAGMutation.h"
  40. #include "llvm/CodeGen/ScheduleDFS.h"
  41. #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  42. #include "llvm/CodeGen/SlotIndexes.h"
  43. #include "llvm/CodeGen/TargetFrameLowering.h"
  44. #include "llvm/CodeGen/TargetInstrInfo.h"
  45. #include "llvm/CodeGen/TargetLowering.h"
  46. #include "llvm/CodeGen/TargetPassConfig.h"
  47. #include "llvm/CodeGen/TargetRegisterInfo.h"
  48. #include "llvm/CodeGen/TargetSchedule.h"
  49. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  50. #include "llvm/Config/llvm-config.h"
  51. #include "llvm/InitializePasses.h"
  52. #include "llvm/MC/LaneBitmask.h"
  53. #include "llvm/Pass.h"
  54. #include "llvm/Support/CommandLine.h"
  55. #include "llvm/Support/Compiler.h"
  56. #include "llvm/Support/Debug.h"
  57. #include "llvm/Support/ErrorHandling.h"
  58. #include "llvm/Support/GraphWriter.h"
  59. #include "llvm/Support/MachineValueType.h"
  60. #include "llvm/Support/raw_ostream.h"
  61. #include <algorithm>
  62. #include <cassert>
  63. #include <cstdint>
  64. #include <iterator>
  65. #include <limits>
  66. #include <memory>
  67. #include <string>
  68. #include <tuple>
  69. #include <utility>
  70. #include <vector>
  71. using namespace llvm;
  72. #define DEBUG_TYPE "machine-scheduler"
  73. STATISTIC(NumClustered, "Number of load/store pairs clustered");
  74. namespace llvm {
  75. cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
  76. cl::desc("Force top-down list scheduling"));
  77. cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
  78. cl::desc("Force bottom-up list scheduling"));
  79. cl::opt<bool>
  80. DumpCriticalPathLength("misched-dcpl", cl::Hidden,
  81. cl::desc("Print critical path length to stdout"));
  82. cl::opt<bool> VerifyScheduling(
  83. "verify-misched", cl::Hidden,
  84. cl::desc("Verify machine instrs before and after machine scheduling"));
  85. #ifndef NDEBUG
  86. cl::opt<bool> ViewMISchedDAGs(
  87. "view-misched-dags", cl::Hidden,
  88. cl::desc("Pop up a window to show MISched dags after they are processed"));
  89. #else
  90. const bool ViewMISchedDAGs = false;
  91. #endif // NDEBUG
  92. } // end namespace llvm
  93. #ifndef NDEBUG
  94. /// In some situations a few uninteresting nodes depend on nearly all other
  95. /// nodes in the graph, provide a cutoff to hide them.
  96. static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
  97. cl::desc("Hide nodes with more predecessor/successor than cutoff"));
  98. static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
  99. cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
  100. static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
  101. cl::desc("Only schedule this function"));
  102. static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
  103. cl::desc("Only schedule this MBB#"));
  104. static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
  105. cl::desc("Print schedule DAGs"));
  106. #else
  107. static const bool PrintDAGs = false;
  108. #endif // NDEBUG
  109. /// Avoid quadratic complexity in unusually large basic blocks by limiting the
  110. /// size of the ready lists.
  111. static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
  112. cl::desc("Limit ready list to N instructions"), cl::init(256));
  113. static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
  114. cl::desc("Enable register pressure scheduling."), cl::init(true));
  115. static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
  116. cl::desc("Enable cyclic critical path analysis."), cl::init(true));
  117. static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
  118. cl::desc("Enable memop clustering."),
  119. cl::init(true));
  120. static cl::opt<bool>
  121. ForceFastCluster("force-fast-cluster", cl::Hidden,
  122. cl::desc("Switch to fast cluster algorithm with the lost "
  123. "of some fusion opportunities"),
  124. cl::init(false));
  125. static cl::opt<unsigned>
  126. FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
  127. cl::desc("The threshold for fast cluster"),
  128. cl::init(1000));
  129. // DAG subtrees must have at least this many nodes.
  130. static const unsigned MinSubtreeSize = 8;
  131. // Pin the vtables to this file.
  132. void MachineSchedStrategy::anchor() {}
  133. void ScheduleDAGMutation::anchor() {}
  134. //===----------------------------------------------------------------------===//
  135. // Machine Instruction Scheduling Pass and Registry
  136. //===----------------------------------------------------------------------===//
  137. MachineSchedContext::MachineSchedContext() {
  138. RegClassInfo = new RegisterClassInfo();
  139. }
  140. MachineSchedContext::~MachineSchedContext() {
  141. delete RegClassInfo;
  142. }
  143. namespace {
  144. /// Base class for a machine scheduler class that can run at any point.
  145. class MachineSchedulerBase : public MachineSchedContext,
  146. public MachineFunctionPass {
  147. public:
  148. MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
  149. void print(raw_ostream &O, const Module* = nullptr) const override;
  150. protected:
  151. void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
  152. };
  153. /// MachineScheduler runs after coalescing and before register allocation.
  154. class MachineScheduler : public MachineSchedulerBase {
  155. public:
  156. MachineScheduler();
  157. void getAnalysisUsage(AnalysisUsage &AU) const override;
  158. bool runOnMachineFunction(MachineFunction&) override;
  159. static char ID; // Class identification, replacement for typeinfo
  160. protected:
  161. ScheduleDAGInstrs *createMachineScheduler();
  162. };
  163. /// PostMachineScheduler runs after shortly before code emission.
  164. class PostMachineScheduler : public MachineSchedulerBase {
  165. public:
  166. PostMachineScheduler();
  167. void getAnalysisUsage(AnalysisUsage &AU) const override;
  168. bool runOnMachineFunction(MachineFunction&) override;
  169. static char ID; // Class identification, replacement for typeinfo
  170. protected:
  171. ScheduleDAGInstrs *createPostMachineScheduler();
  172. };
  173. } // end anonymous namespace
  174. char MachineScheduler::ID = 0;
  175. char &llvm::MachineSchedulerID = MachineScheduler::ID;
  176. INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
  177. "Machine Instruction Scheduler", false, false)
  178. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  179. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  180. INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
  181. INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
  182. INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
  183. INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
  184. "Machine Instruction Scheduler", false, false)
  185. MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
  186. initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
  187. }
  188. void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
  189. AU.setPreservesCFG();
  190. AU.addRequired<MachineDominatorTree>();
  191. AU.addRequired<MachineLoopInfo>();
  192. AU.addRequired<AAResultsWrapperPass>();
  193. AU.addRequired<TargetPassConfig>();
  194. AU.addRequired<SlotIndexes>();
  195. AU.addPreserved<SlotIndexes>();
  196. AU.addRequired<LiveIntervals>();
  197. AU.addPreserved<LiveIntervals>();
  198. MachineFunctionPass::getAnalysisUsage(AU);
  199. }
  200. char PostMachineScheduler::ID = 0;
  201. char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
  202. INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
  203. "PostRA Machine Instruction Scheduler", false, false)
  204. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  205. INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
  206. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  207. INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
  208. "PostRA Machine Instruction Scheduler", false, false)
  209. PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
  210. initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
  211. }
  212. void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
  213. AU.setPreservesCFG();
  214. AU.addRequired<MachineDominatorTree>();
  215. AU.addRequired<MachineLoopInfo>();
  216. AU.addRequired<AAResultsWrapperPass>();
  217. AU.addRequired<TargetPassConfig>();
  218. MachineFunctionPass::getAnalysisUsage(AU);
  219. }
  220. MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
  221. MachineSchedRegistry::Registry;
  222. /// A dummy default scheduler factory indicates whether the scheduler
  223. /// is overridden on the command line.
  224. static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
  225. return nullptr;
  226. }
  227. /// MachineSchedOpt allows command line selection of the scheduler.
  228. static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
  229. RegisterPassParser<MachineSchedRegistry>>
  230. MachineSchedOpt("misched",
  231. cl::init(&useDefaultMachineSched), cl::Hidden,
  232. cl::desc("Machine instruction scheduler to use"));
  233. static MachineSchedRegistry
  234. DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
  235. useDefaultMachineSched);
  236. static cl::opt<bool> EnableMachineSched(
  237. "enable-misched",
  238. cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
  239. cl::Hidden);
  240. static cl::opt<bool> EnablePostRAMachineSched(
  241. "enable-post-misched",
  242. cl::desc("Enable the post-ra machine instruction scheduling pass."),
  243. cl::init(true), cl::Hidden);
  244. /// Decrement this iterator until reaching the top or a non-debug instr.
  245. static MachineBasicBlock::const_iterator
  246. priorNonDebug(MachineBasicBlock::const_iterator I,
  247. MachineBasicBlock::const_iterator Beg) {
  248. assert(I != Beg && "reached the top of the region, cannot decrement");
  249. while (--I != Beg) {
  250. if (!I->isDebugOrPseudoInstr())
  251. break;
  252. }
  253. return I;
  254. }
  255. /// Non-const version.
  256. static MachineBasicBlock::iterator
  257. priorNonDebug(MachineBasicBlock::iterator I,
  258. MachineBasicBlock::const_iterator Beg) {
  259. return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
  260. .getNonConstIterator();
  261. }
  262. /// If this iterator is a debug value, increment until reaching the End or a
  263. /// non-debug instruction.
  264. static MachineBasicBlock::const_iterator
  265. nextIfDebug(MachineBasicBlock::const_iterator I,
  266. MachineBasicBlock::const_iterator End) {
  267. for(; I != End; ++I) {
  268. if (!I->isDebugOrPseudoInstr())
  269. break;
  270. }
  271. return I;
  272. }
  273. /// Non-const version.
  274. static MachineBasicBlock::iterator
  275. nextIfDebug(MachineBasicBlock::iterator I,
  276. MachineBasicBlock::const_iterator End) {
  277. return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
  278. .getNonConstIterator();
  279. }
  280. /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
  281. ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
  282. // Select the scheduler, or set the default.
  283. MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
  284. if (Ctor != useDefaultMachineSched)
  285. return Ctor(this);
  286. // Get the default scheduler set by the target for this function.
  287. ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
  288. if (Scheduler)
  289. return Scheduler;
  290. // Default to GenericScheduler.
  291. return createGenericSchedLive(this);
  292. }
  293. /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
  294. /// the caller. We don't have a command line option to override the postRA
  295. /// scheduler. The Target must configure it.
  296. ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
  297. // Get the postRA scheduler set by the target for this function.
  298. ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
  299. if (Scheduler)
  300. return Scheduler;
  301. // Default to GenericScheduler.
  302. return createGenericSchedPostRA(this);
  303. }
  304. /// Top-level MachineScheduler pass driver.
  305. ///
  306. /// Visit blocks in function order. Divide each block into scheduling regions
  307. /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
  308. /// consistent with the DAG builder, which traverses the interior of the
  309. /// scheduling regions bottom-up.
  310. ///
  311. /// This design avoids exposing scheduling boundaries to the DAG builder,
  312. /// simplifying the DAG builder's support for "special" target instructions.
  313. /// At the same time the design allows target schedulers to operate across
  314. /// scheduling boundaries, for example to bundle the boundary instructions
  315. /// without reordering them. This creates complexity, because the target
  316. /// scheduler must update the RegionBegin and RegionEnd positions cached by
  317. /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
  318. /// design would be to split blocks at scheduling boundaries, but LLVM has a
  319. /// general bias against block splitting purely for implementation simplicity.
  320. bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
  321. if (skipFunction(mf.getFunction()))
  322. return false;
  323. if (EnableMachineSched.getNumOccurrences()) {
  324. if (!EnableMachineSched)
  325. return false;
  326. } else if (!mf.getSubtarget().enableMachineScheduler())
  327. return false;
  328. LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
  329. // Initialize the context of the pass.
  330. MF = &mf;
  331. MLI = &getAnalysis<MachineLoopInfo>();
  332. MDT = &getAnalysis<MachineDominatorTree>();
  333. PassConfig = &getAnalysis<TargetPassConfig>();
  334. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  335. LIS = &getAnalysis<LiveIntervals>();
  336. if (VerifyScheduling) {
  337. LLVM_DEBUG(LIS->dump());
  338. MF->verify(this, "Before machine scheduling.");
  339. }
  340. RegClassInfo->runOnMachineFunction(*MF);
  341. // Instantiate the selected scheduler for this target, function, and
  342. // optimization level.
  343. std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
  344. scheduleRegions(*Scheduler, false);
  345. LLVM_DEBUG(LIS->dump());
  346. if (VerifyScheduling)
  347. MF->verify(this, "After machine scheduling.");
  348. return true;
  349. }
  350. bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
  351. if (skipFunction(mf.getFunction()))
  352. return false;
  353. if (EnablePostRAMachineSched.getNumOccurrences()) {
  354. if (!EnablePostRAMachineSched)
  355. return false;
  356. } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
  357. LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
  358. return false;
  359. }
  360. LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
  361. // Initialize the context of the pass.
  362. MF = &mf;
  363. MLI = &getAnalysis<MachineLoopInfo>();
  364. PassConfig = &getAnalysis<TargetPassConfig>();
  365. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  366. if (VerifyScheduling)
  367. MF->verify(this, "Before post machine scheduling.");
  368. // Instantiate the selected scheduler for this target, function, and
  369. // optimization level.
  370. std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
  371. scheduleRegions(*Scheduler, true);
  372. if (VerifyScheduling)
  373. MF->verify(this, "After post machine scheduling.");
  374. return true;
  375. }
  376. /// Return true of the given instruction should not be included in a scheduling
  377. /// region.
  378. ///
  379. /// MachineScheduler does not currently support scheduling across calls. To
  380. /// handle calls, the DAG builder needs to be modified to create register
  381. /// anti/output dependencies on the registers clobbered by the call's regmask
  382. /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
  383. /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
  384. /// the boundary, but there would be no benefit to postRA scheduling across
  385. /// calls this late anyway.
  386. static bool isSchedBoundary(MachineBasicBlock::iterator MI,
  387. MachineBasicBlock *MBB,
  388. MachineFunction *MF,
  389. const TargetInstrInfo *TII) {
  390. return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
  391. }
  392. /// A region of an MBB for scheduling.
  393. namespace {
  394. struct SchedRegion {
  395. /// RegionBegin is the first instruction in the scheduling region, and
  396. /// RegionEnd is either MBB->end() or the scheduling boundary after the
  397. /// last instruction in the scheduling region. These iterators cannot refer
  398. /// to instructions outside of the identified scheduling region because
  399. /// those may be reordered before scheduling this region.
  400. MachineBasicBlock::iterator RegionBegin;
  401. MachineBasicBlock::iterator RegionEnd;
  402. unsigned NumRegionInstrs;
  403. SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
  404. unsigned N) :
  405. RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
  406. };
  407. } // end anonymous namespace
  408. using MBBRegionsVector = SmallVector<SchedRegion, 16>;
  409. static void
  410. getSchedRegions(MachineBasicBlock *MBB,
  411. MBBRegionsVector &Regions,
  412. bool RegionsTopDown) {
  413. MachineFunction *MF = MBB->getParent();
  414. const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
  415. MachineBasicBlock::iterator I = nullptr;
  416. for(MachineBasicBlock::iterator RegionEnd = MBB->end();
  417. RegionEnd != MBB->begin(); RegionEnd = I) {
  418. // Avoid decrementing RegionEnd for blocks with no terminator.
  419. if (RegionEnd != MBB->end() ||
  420. isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
  421. --RegionEnd;
  422. }
  423. // The next region starts above the previous region. Look backward in the
  424. // instruction stream until we find the nearest boundary.
  425. unsigned NumRegionInstrs = 0;
  426. I = RegionEnd;
  427. for (;I != MBB->begin(); --I) {
  428. MachineInstr &MI = *std::prev(I);
  429. if (isSchedBoundary(&MI, &*MBB, MF, TII))
  430. break;
  431. if (!MI.isDebugOrPseudoInstr()) {
  432. // MBB::size() uses instr_iterator to count. Here we need a bundle to
  433. // count as a single instruction.
  434. ++NumRegionInstrs;
  435. }
  436. }
  437. // It's possible we found a scheduling region that only has debug
  438. // instructions. Don't bother scheduling these.
  439. if (NumRegionInstrs != 0)
  440. Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
  441. }
  442. if (RegionsTopDown)
  443. std::reverse(Regions.begin(), Regions.end());
  444. }
  445. /// Main driver for both MachineScheduler and PostMachineScheduler.
  446. void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
  447. bool FixKillFlags) {
  448. // Visit all machine basic blocks.
  449. //
  450. // TODO: Visit blocks in global postorder or postorder within the bottom-up
  451. // loop tree. Then we can optionally compute global RegPressure.
  452. for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
  453. MBB != MBBEnd; ++MBB) {
  454. Scheduler.startBlock(&*MBB);
  455. #ifndef NDEBUG
  456. if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
  457. continue;
  458. if (SchedOnlyBlock.getNumOccurrences()
  459. && (int)SchedOnlyBlock != MBB->getNumber())
  460. continue;
  461. #endif
  462. // Break the block into scheduling regions [I, RegionEnd). RegionEnd
  463. // points to the scheduling boundary at the bottom of the region. The DAG
  464. // does not include RegionEnd, but the region does (i.e. the next
  465. // RegionEnd is above the previous RegionBegin). If the current block has
  466. // no terminator then RegionEnd == MBB->end() for the bottom region.
  467. //
  468. // All the regions of MBB are first found and stored in MBBRegions, which
  469. // will be processed (MBB) top-down if initialized with true.
  470. //
  471. // The Scheduler may insert instructions during either schedule() or
  472. // exitRegion(), even for empty regions. So the local iterators 'I' and
  473. // 'RegionEnd' are invalid across these calls. Instructions must not be
  474. // added to other regions than the current one without updating MBBRegions.
  475. MBBRegionsVector MBBRegions;
  476. getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
  477. for (const SchedRegion &R : MBBRegions) {
  478. MachineBasicBlock::iterator I = R.RegionBegin;
  479. MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
  480. unsigned NumRegionInstrs = R.NumRegionInstrs;
  481. // Notify the scheduler of the region, even if we may skip scheduling
  482. // it. Perhaps it still needs to be bundled.
  483. Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
  484. // Skip empty scheduling regions (0 or 1 schedulable instructions).
  485. if (I == RegionEnd || I == std::prev(RegionEnd)) {
  486. // Close the current region. Bundle the terminator if needed.
  487. // This invalidates 'RegionEnd' and 'I'.
  488. Scheduler.exitRegion();
  489. continue;
  490. }
  491. LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
  492. LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
  493. << " " << MBB->getName() << "\n From: " << *I
  494. << " To: ";
  495. if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
  496. else dbgs() << "End\n";
  497. dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
  498. if (DumpCriticalPathLength) {
  499. errs() << MF->getName();
  500. errs() << ":%bb. " << MBB->getNumber();
  501. errs() << " " << MBB->getName() << " \n";
  502. }
  503. // Schedule a region: possibly reorder instructions.
  504. // This invalidates the original region iterators.
  505. Scheduler.schedule();
  506. // Close the current region.
  507. Scheduler.exitRegion();
  508. }
  509. Scheduler.finishBlock();
  510. // FIXME: Ideally, no further passes should rely on kill flags. However,
  511. // thumb2 size reduction is currently an exception, so the PostMIScheduler
  512. // needs to do this.
  513. if (FixKillFlags)
  514. Scheduler.fixupKills(*MBB);
  515. }
  516. Scheduler.finalizeSchedule();
  517. }
  518. void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
  519. // unimplemented
  520. }
  521. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  522. LLVM_DUMP_METHOD void ReadyQueue::dump() const {
  523. dbgs() << "Queue " << Name << ": ";
  524. for (const SUnit *SU : Queue)
  525. dbgs() << SU->NodeNum << " ";
  526. dbgs() << "\n";
  527. }
  528. #endif
  529. //===----------------------------------------------------------------------===//
  530. // ScheduleDAGMI - Basic machine instruction scheduling. This is
  531. // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
  532. // virtual registers.
  533. // ===----------------------------------------------------------------------===/
  534. // Provide a vtable anchor.
  535. ScheduleDAGMI::~ScheduleDAGMI() = default;
  536. /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
  537. /// NumPredsLeft reaches zero, release the successor node.
  538. ///
  539. /// FIXME: Adjust SuccSU height based on MinLatency.
  540. void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
  541. SUnit *SuccSU = SuccEdge->getSUnit();
  542. if (SuccEdge->isWeak()) {
  543. --SuccSU->WeakPredsLeft;
  544. if (SuccEdge->isCluster())
  545. NextClusterSucc = SuccSU;
  546. return;
  547. }
  548. #ifndef NDEBUG
  549. if (SuccSU->NumPredsLeft == 0) {
  550. dbgs() << "*** Scheduling failed! ***\n";
  551. dumpNode(*SuccSU);
  552. dbgs() << " has been released too many times!\n";
  553. llvm_unreachable(nullptr);
  554. }
  555. #endif
  556. // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
  557. // CurrCycle may have advanced since then.
  558. if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
  559. SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
  560. --SuccSU->NumPredsLeft;
  561. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
  562. SchedImpl->releaseTopNode(SuccSU);
  563. }
  564. /// releaseSuccessors - Call releaseSucc on each of SU's successors.
  565. void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
  566. for (SDep &Succ : SU->Succs)
  567. releaseSucc(SU, &Succ);
  568. }
  569. /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
  570. /// NumSuccsLeft reaches zero, release the predecessor node.
  571. ///
  572. /// FIXME: Adjust PredSU height based on MinLatency.
  573. void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
  574. SUnit *PredSU = PredEdge->getSUnit();
  575. if (PredEdge->isWeak()) {
  576. --PredSU->WeakSuccsLeft;
  577. if (PredEdge->isCluster())
  578. NextClusterPred = PredSU;
  579. return;
  580. }
  581. #ifndef NDEBUG
  582. if (PredSU->NumSuccsLeft == 0) {
  583. dbgs() << "*** Scheduling failed! ***\n";
  584. dumpNode(*PredSU);
  585. dbgs() << " has been released too many times!\n";
  586. llvm_unreachable(nullptr);
  587. }
  588. #endif
  589. // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
  590. // CurrCycle may have advanced since then.
  591. if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
  592. PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
  593. --PredSU->NumSuccsLeft;
  594. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
  595. SchedImpl->releaseBottomNode(PredSU);
  596. }
  597. /// releasePredecessors - Call releasePred on each of SU's predecessors.
  598. void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
  599. for (SDep &Pred : SU->Preds)
  600. releasePred(SU, &Pred);
  601. }
  602. void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
  603. ScheduleDAGInstrs::startBlock(bb);
  604. SchedImpl->enterMBB(bb);
  605. }
  606. void ScheduleDAGMI::finishBlock() {
  607. SchedImpl->leaveMBB();
  608. ScheduleDAGInstrs::finishBlock();
  609. }
  610. /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
  611. /// crossing a scheduling boundary. [begin, end) includes all instructions in
  612. /// the region, including the boundary itself and single-instruction regions
  613. /// that don't get scheduled.
  614. void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
  615. MachineBasicBlock::iterator begin,
  616. MachineBasicBlock::iterator end,
  617. unsigned regioninstrs)
  618. {
  619. ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
  620. SchedImpl->initPolicy(begin, end, regioninstrs);
  621. }
  622. /// This is normally called from the main scheduler loop but may also be invoked
  623. /// by the scheduling strategy to perform additional code motion.
  624. void ScheduleDAGMI::moveInstruction(
  625. MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
  626. // Advance RegionBegin if the first instruction moves down.
  627. if (&*RegionBegin == MI)
  628. ++RegionBegin;
  629. // Update the instruction stream.
  630. BB->splice(InsertPos, BB, MI);
  631. // Update LiveIntervals
  632. if (LIS)
  633. LIS->handleMove(*MI, /*UpdateFlags=*/true);
  634. // Recede RegionBegin if an instruction moves above the first.
  635. if (RegionBegin == InsertPos)
  636. RegionBegin = MI;
  637. }
  638. bool ScheduleDAGMI::checkSchedLimit() {
  639. #ifndef NDEBUG
  640. if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
  641. CurrentTop = CurrentBottom;
  642. return false;
  643. }
  644. ++NumInstrsScheduled;
  645. #endif
  646. return true;
  647. }
  648. /// Per-region scheduling driver, called back from
  649. /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
  650. /// does not consider liveness or register pressure. It is useful for PostRA
  651. /// scheduling and potentially other custom schedulers.
  652. void ScheduleDAGMI::schedule() {
  653. LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
  654. LLVM_DEBUG(SchedImpl->dumpPolicy());
  655. // Build the DAG.
  656. buildSchedGraph(AA);
  657. postprocessDAG();
  658. SmallVector<SUnit*, 8> TopRoots, BotRoots;
  659. findRootsAndBiasEdges(TopRoots, BotRoots);
  660. LLVM_DEBUG(dump());
  661. if (PrintDAGs) dump();
  662. if (ViewMISchedDAGs) viewGraph();
  663. // Initialize the strategy before modifying the DAG.
  664. // This may initialize a DFSResult to be used for queue priority.
  665. SchedImpl->initialize(this);
  666. // Initialize ready queues now that the DAG and priority data are finalized.
  667. initQueues(TopRoots, BotRoots);
  668. bool IsTopNode = false;
  669. while (true) {
  670. LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
  671. SUnit *SU = SchedImpl->pickNode(IsTopNode);
  672. if (!SU) break;
  673. assert(!SU->isScheduled && "Node already scheduled");
  674. if (!checkSchedLimit())
  675. break;
  676. MachineInstr *MI = SU->getInstr();
  677. if (IsTopNode) {
  678. assert(SU->isTopReady() && "node still has unscheduled dependencies");
  679. if (&*CurrentTop == MI)
  680. CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
  681. else
  682. moveInstruction(MI, CurrentTop);
  683. } else {
  684. assert(SU->isBottomReady() && "node still has unscheduled dependencies");
  685. MachineBasicBlock::iterator priorII =
  686. priorNonDebug(CurrentBottom, CurrentTop);
  687. if (&*priorII == MI)
  688. CurrentBottom = priorII;
  689. else {
  690. if (&*CurrentTop == MI)
  691. CurrentTop = nextIfDebug(++CurrentTop, priorII);
  692. moveInstruction(MI, CurrentBottom);
  693. CurrentBottom = MI;
  694. }
  695. }
  696. // Notify the scheduling strategy before updating the DAG.
  697. // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
  698. // runs, it can then use the accurate ReadyCycle time to determine whether
  699. // newly released nodes can move to the readyQ.
  700. SchedImpl->schedNode(SU, IsTopNode);
  701. updateQueues(SU, IsTopNode);
  702. }
  703. assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
  704. placeDebugValues();
  705. LLVM_DEBUG({
  706. dbgs() << "*** Final schedule for "
  707. << printMBBReference(*begin()->getParent()) << " ***\n";
  708. dumpSchedule();
  709. dbgs() << '\n';
  710. });
  711. }
  712. /// Apply each ScheduleDAGMutation step in order.
  713. void ScheduleDAGMI::postprocessDAG() {
  714. for (auto &m : Mutations)
  715. m->apply(this);
  716. }
  717. void ScheduleDAGMI::
  718. findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
  719. SmallVectorImpl<SUnit*> &BotRoots) {
  720. for (SUnit &SU : SUnits) {
  721. assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
  722. // Order predecessors so DFSResult follows the critical path.
  723. SU.biasCriticalPath();
  724. // A SUnit is ready to top schedule if it has no predecessors.
  725. if (!SU.NumPredsLeft)
  726. TopRoots.push_back(&SU);
  727. // A SUnit is ready to bottom schedule if it has no successors.
  728. if (!SU.NumSuccsLeft)
  729. BotRoots.push_back(&SU);
  730. }
  731. ExitSU.biasCriticalPath();
  732. }
  733. /// Identify DAG roots and setup scheduler queues.
  734. void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
  735. ArrayRef<SUnit*> BotRoots) {
  736. NextClusterSucc = nullptr;
  737. NextClusterPred = nullptr;
  738. // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
  739. //
  740. // Nodes with unreleased weak edges can still be roots.
  741. // Release top roots in forward order.
  742. for (SUnit *SU : TopRoots)
  743. SchedImpl->releaseTopNode(SU);
  744. // Release bottom roots in reverse order so the higher priority nodes appear
  745. // first. This is more natural and slightly more efficient.
  746. for (SmallVectorImpl<SUnit*>::const_reverse_iterator
  747. I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
  748. SchedImpl->releaseBottomNode(*I);
  749. }
  750. releaseSuccessors(&EntrySU);
  751. releasePredecessors(&ExitSU);
  752. SchedImpl->registerRoots();
  753. // Advance past initial DebugValues.
  754. CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
  755. CurrentBottom = RegionEnd;
  756. }
  757. /// Update scheduler queues after scheduling an instruction.
  758. void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
  759. // Release dependent instructions for scheduling.
  760. if (IsTopNode)
  761. releaseSuccessors(SU);
  762. else
  763. releasePredecessors(SU);
  764. SU->isScheduled = true;
  765. }
  766. /// Reinsert any remaining debug_values, just like the PostRA scheduler.
  767. void ScheduleDAGMI::placeDebugValues() {
  768. // If first instruction was a DBG_VALUE then put it back.
  769. if (FirstDbgValue) {
  770. BB->splice(RegionBegin, BB, FirstDbgValue);
  771. RegionBegin = FirstDbgValue;
  772. }
  773. for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
  774. DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
  775. std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
  776. MachineInstr *DbgValue = P.first;
  777. MachineBasicBlock::iterator OrigPrevMI = P.second;
  778. if (&*RegionBegin == DbgValue)
  779. ++RegionBegin;
  780. BB->splice(++OrigPrevMI, BB, DbgValue);
  781. if (OrigPrevMI == std::prev(RegionEnd))
  782. RegionEnd = DbgValue;
  783. }
  784. DbgValues.clear();
  785. FirstDbgValue = nullptr;
  786. }
  787. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  788. LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
  789. for (MachineInstr &MI : *this) {
  790. if (SUnit *SU = getSUnit(&MI))
  791. dumpNode(*SU);
  792. else
  793. dbgs() << "Missing SUnit\n";
  794. }
  795. }
  796. #endif
  797. //===----------------------------------------------------------------------===//
  798. // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
  799. // preservation.
  800. //===----------------------------------------------------------------------===//
  801. ScheduleDAGMILive::~ScheduleDAGMILive() {
  802. delete DFSResult;
  803. }
  804. void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
  805. const MachineInstr &MI = *SU.getInstr();
  806. for (const MachineOperand &MO : MI.operands()) {
  807. if (!MO.isReg())
  808. continue;
  809. if (!MO.readsReg())
  810. continue;
  811. if (TrackLaneMasks && !MO.isUse())
  812. continue;
  813. Register Reg = MO.getReg();
  814. if (!Register::isVirtualRegister(Reg))
  815. continue;
  816. // Ignore re-defs.
  817. if (TrackLaneMasks) {
  818. bool FoundDef = false;
  819. for (const MachineOperand &MO2 : MI.operands()) {
  820. if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
  821. FoundDef = true;
  822. break;
  823. }
  824. }
  825. if (FoundDef)
  826. continue;
  827. }
  828. // Record this local VReg use.
  829. VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
  830. for (; UI != VRegUses.end(); ++UI) {
  831. if (UI->SU == &SU)
  832. break;
  833. }
  834. if (UI == VRegUses.end())
  835. VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
  836. }
  837. }
  838. /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
  839. /// crossing a scheduling boundary. [begin, end) includes all instructions in
  840. /// the region, including the boundary itself and single-instruction regions
  841. /// that don't get scheduled.
  842. void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
  843. MachineBasicBlock::iterator begin,
  844. MachineBasicBlock::iterator end,
  845. unsigned regioninstrs)
  846. {
  847. // ScheduleDAGMI initializes SchedImpl's per-region policy.
  848. ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
  849. // For convenience remember the end of the liveness region.
  850. LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
  851. SUPressureDiffs.clear();
  852. ShouldTrackPressure = SchedImpl->shouldTrackPressure();
  853. ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
  854. assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
  855. "ShouldTrackLaneMasks requires ShouldTrackPressure");
  856. }
  857. // Setup the register pressure trackers for the top scheduled and bottom
  858. // scheduled regions.
  859. void ScheduleDAGMILive::initRegPressure() {
  860. VRegUses.clear();
  861. VRegUses.setUniverse(MRI.getNumVirtRegs());
  862. for (SUnit &SU : SUnits)
  863. collectVRegUses(SU);
  864. TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
  865. ShouldTrackLaneMasks, false);
  866. BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
  867. ShouldTrackLaneMasks, false);
  868. // Close the RPTracker to finalize live ins.
  869. RPTracker.closeRegion();
  870. LLVM_DEBUG(RPTracker.dump());
  871. // Initialize the live ins and live outs.
  872. TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
  873. BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
  874. // Close one end of the tracker so we can call
  875. // getMaxUpward/DownwardPressureDelta before advancing across any
  876. // instructions. This converts currently live regs into live ins/outs.
  877. TopRPTracker.closeTop();
  878. BotRPTracker.closeBottom();
  879. BotRPTracker.initLiveThru(RPTracker);
  880. if (!BotRPTracker.getLiveThru().empty()) {
  881. TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
  882. LLVM_DEBUG(dbgs() << "Live Thru: ";
  883. dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
  884. };
  885. // For each live out vreg reduce the pressure change associated with other
  886. // uses of the same vreg below the live-out reaching def.
  887. updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
  888. // Account for liveness generated by the region boundary.
  889. if (LiveRegionEnd != RegionEnd) {
  890. SmallVector<RegisterMaskPair, 8> LiveUses;
  891. BotRPTracker.recede(&LiveUses);
  892. updatePressureDiffs(LiveUses);
  893. }
  894. LLVM_DEBUG(dbgs() << "Top Pressure:\n";
  895. dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
  896. dbgs() << "Bottom Pressure:\n";
  897. dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
  898. assert((BotRPTracker.getPos() == RegionEnd ||
  899. (RegionEnd->isDebugInstr() &&
  900. BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
  901. "Can't find the region bottom");
  902. // Cache the list of excess pressure sets in this region. This will also track
  903. // the max pressure in the scheduled code for these sets.
  904. RegionCriticalPSets.clear();
  905. const std::vector<unsigned> &RegionPressure =
  906. RPTracker.getPressure().MaxSetPressure;
  907. for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
  908. unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
  909. if (RegionPressure[i] > Limit) {
  910. LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
  911. << " Actual " << RegionPressure[i] << "\n");
  912. RegionCriticalPSets.push_back(PressureChange(i));
  913. }
  914. }
  915. LLVM_DEBUG(dbgs() << "Excess PSets: ";
  916. for (const PressureChange &RCPS
  917. : RegionCriticalPSets) dbgs()
  918. << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
  919. dbgs() << "\n");
  920. }
  921. void ScheduleDAGMILive::
  922. updateScheduledPressure(const SUnit *SU,
  923. const std::vector<unsigned> &NewMaxPressure) {
  924. const PressureDiff &PDiff = getPressureDiff(SU);
  925. unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
  926. for (const PressureChange &PC : PDiff) {
  927. if (!PC.isValid())
  928. break;
  929. unsigned ID = PC.getPSet();
  930. while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
  931. ++CritIdx;
  932. if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
  933. if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
  934. && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
  935. RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
  936. }
  937. unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
  938. if (NewMaxPressure[ID] >= Limit - 2) {
  939. LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
  940. << NewMaxPressure[ID]
  941. << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
  942. << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
  943. << " livethru)\n");
  944. }
  945. }
  946. }
  947. /// Update the PressureDiff array for liveness after scheduling this
  948. /// instruction.
  949. void ScheduleDAGMILive::updatePressureDiffs(
  950. ArrayRef<RegisterMaskPair> LiveUses) {
  951. for (const RegisterMaskPair &P : LiveUses) {
  952. Register Reg = P.RegUnit;
  953. /// FIXME: Currently assuming single-use physregs.
  954. if (!Register::isVirtualRegister(Reg))
  955. continue;
  956. if (ShouldTrackLaneMasks) {
  957. // If the register has just become live then other uses won't change
  958. // this fact anymore => decrement pressure.
  959. // If the register has just become dead then other uses make it come
  960. // back to life => increment pressure.
  961. bool Decrement = P.LaneMask.any();
  962. for (const VReg2SUnit &V2SU
  963. : make_range(VRegUses.find(Reg), VRegUses.end())) {
  964. SUnit &SU = *V2SU.SU;
  965. if (SU.isScheduled || &SU == &ExitSU)
  966. continue;
  967. PressureDiff &PDiff = getPressureDiff(&SU);
  968. PDiff.addPressureChange(Reg, Decrement, &MRI);
  969. LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
  970. << printReg(Reg, TRI) << ':'
  971. << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
  972. dbgs() << " to "; PDiff.dump(*TRI););
  973. }
  974. } else {
  975. assert(P.LaneMask.any());
  976. LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
  977. // This may be called before CurrentBottom has been initialized. However,
  978. // BotRPTracker must have a valid position. We want the value live into the
  979. // instruction or live out of the block, so ask for the previous
  980. // instruction's live-out.
  981. const LiveInterval &LI = LIS->getInterval(Reg);
  982. VNInfo *VNI;
  983. MachineBasicBlock::const_iterator I =
  984. nextIfDebug(BotRPTracker.getPos(), BB->end());
  985. if (I == BB->end())
  986. VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
  987. else {
  988. LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
  989. VNI = LRQ.valueIn();
  990. }
  991. // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
  992. assert(VNI && "No live value at use.");
  993. for (const VReg2SUnit &V2SU
  994. : make_range(VRegUses.find(Reg), VRegUses.end())) {
  995. SUnit *SU = V2SU.SU;
  996. // If this use comes before the reaching def, it cannot be a last use,
  997. // so decrease its pressure change.
  998. if (!SU->isScheduled && SU != &ExitSU) {
  999. LiveQueryResult LRQ =
  1000. LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
  1001. if (LRQ.valueIn() == VNI) {
  1002. PressureDiff &PDiff = getPressureDiff(SU);
  1003. PDiff.addPressureChange(Reg, true, &MRI);
  1004. LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
  1005. << *SU->getInstr();
  1006. dbgs() << " to "; PDiff.dump(*TRI););
  1007. }
  1008. }
  1009. }
  1010. }
  1011. }
  1012. }
  1013. void ScheduleDAGMILive::dump() const {
  1014. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  1015. if (EntrySU.getInstr() != nullptr)
  1016. dumpNodeAll(EntrySU);
  1017. for (const SUnit &SU : SUnits) {
  1018. dumpNodeAll(SU);
  1019. if (ShouldTrackPressure) {
  1020. dbgs() << " Pressure Diff : ";
  1021. getPressureDiff(&SU).dump(*TRI);
  1022. }
  1023. dbgs() << " Single Issue : ";
  1024. if (SchedModel.mustBeginGroup(SU.getInstr()) &&
  1025. SchedModel.mustEndGroup(SU.getInstr()))
  1026. dbgs() << "true;";
  1027. else
  1028. dbgs() << "false;";
  1029. dbgs() << '\n';
  1030. }
  1031. if (ExitSU.getInstr() != nullptr)
  1032. dumpNodeAll(ExitSU);
  1033. #endif
  1034. }
  1035. /// schedule - Called back from MachineScheduler::runOnMachineFunction
  1036. /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
  1037. /// only includes instructions that have DAG nodes, not scheduling boundaries.
  1038. ///
  1039. /// This is a skeletal driver, with all the functionality pushed into helpers,
  1040. /// so that it can be easily extended by experimental schedulers. Generally,
  1041. /// implementing MachineSchedStrategy should be sufficient to implement a new
  1042. /// scheduling algorithm. However, if a scheduler further subclasses
  1043. /// ScheduleDAGMILive then it will want to override this virtual method in order
  1044. /// to update any specialized state.
  1045. void ScheduleDAGMILive::schedule() {
  1046. LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
  1047. LLVM_DEBUG(SchedImpl->dumpPolicy());
  1048. buildDAGWithRegPressure();
  1049. postprocessDAG();
  1050. SmallVector<SUnit*, 8> TopRoots, BotRoots;
  1051. findRootsAndBiasEdges(TopRoots, BotRoots);
  1052. // Initialize the strategy before modifying the DAG.
  1053. // This may initialize a DFSResult to be used for queue priority.
  1054. SchedImpl->initialize(this);
  1055. LLVM_DEBUG(dump());
  1056. if (PrintDAGs) dump();
  1057. if (ViewMISchedDAGs) viewGraph();
  1058. // Initialize ready queues now that the DAG and priority data are finalized.
  1059. initQueues(TopRoots, BotRoots);
  1060. bool IsTopNode = false;
  1061. while (true) {
  1062. LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
  1063. SUnit *SU = SchedImpl->pickNode(IsTopNode);
  1064. if (!SU) break;
  1065. assert(!SU->isScheduled && "Node already scheduled");
  1066. if (!checkSchedLimit())
  1067. break;
  1068. scheduleMI(SU, IsTopNode);
  1069. if (DFSResult) {
  1070. unsigned SubtreeID = DFSResult->getSubtreeID(SU);
  1071. if (!ScheduledTrees.test(SubtreeID)) {
  1072. ScheduledTrees.set(SubtreeID);
  1073. DFSResult->scheduleTree(SubtreeID);
  1074. SchedImpl->scheduleTree(SubtreeID);
  1075. }
  1076. }
  1077. // Notify the scheduling strategy after updating the DAG.
  1078. SchedImpl->schedNode(SU, IsTopNode);
  1079. updateQueues(SU, IsTopNode);
  1080. }
  1081. assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
  1082. placeDebugValues();
  1083. LLVM_DEBUG({
  1084. dbgs() << "*** Final schedule for "
  1085. << printMBBReference(*begin()->getParent()) << " ***\n";
  1086. dumpSchedule();
  1087. dbgs() << '\n';
  1088. });
  1089. }
  1090. /// Build the DAG and setup three register pressure trackers.
  1091. void ScheduleDAGMILive::buildDAGWithRegPressure() {
  1092. if (!ShouldTrackPressure) {
  1093. RPTracker.reset();
  1094. RegionCriticalPSets.clear();
  1095. buildSchedGraph(AA);
  1096. return;
  1097. }
  1098. // Initialize the register pressure tracker used by buildSchedGraph.
  1099. RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
  1100. ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
  1101. // Account for liveness generate by the region boundary.
  1102. if (LiveRegionEnd != RegionEnd)
  1103. RPTracker.recede();
  1104. // Build the DAG, and compute current register pressure.
  1105. buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
  1106. // Initialize top/bottom trackers after computing region pressure.
  1107. initRegPressure();
  1108. }
  1109. void ScheduleDAGMILive::computeDFSResult() {
  1110. if (!DFSResult)
  1111. DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
  1112. DFSResult->clear();
  1113. ScheduledTrees.clear();
  1114. DFSResult->resize(SUnits.size());
  1115. DFSResult->compute(SUnits);
  1116. ScheduledTrees.resize(DFSResult->getNumSubtrees());
  1117. }
  1118. /// Compute the max cyclic critical path through the DAG. The scheduling DAG
  1119. /// only provides the critical path for single block loops. To handle loops that
  1120. /// span blocks, we could use the vreg path latencies provided by
  1121. /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
  1122. /// available for use in the scheduler.
  1123. ///
  1124. /// The cyclic path estimation identifies a def-use pair that crosses the back
  1125. /// edge and considers the depth and height of the nodes. For example, consider
  1126. /// the following instruction sequence where each instruction has unit latency
  1127. /// and defines an eponymous virtual register:
  1128. ///
  1129. /// a->b(a,c)->c(b)->d(c)->exit
  1130. ///
  1131. /// The cyclic critical path is a two cycles: b->c->b
  1132. /// The acyclic critical path is four cycles: a->b->c->d->exit
  1133. /// LiveOutHeight = height(c) = len(c->d->exit) = 2
  1134. /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
  1135. /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
  1136. /// LiveInDepth = depth(b) = len(a->b) = 1
  1137. ///
  1138. /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
  1139. /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
  1140. /// CyclicCriticalPath = min(2, 2) = 2
  1141. ///
  1142. /// This could be relevant to PostRA scheduling, but is currently implemented
  1143. /// assuming LiveIntervals.
  1144. unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
  1145. // This only applies to single block loop.
  1146. if (!BB->isSuccessor(BB))
  1147. return 0;
  1148. unsigned MaxCyclicLatency = 0;
  1149. // Visit each live out vreg def to find def/use pairs that cross iterations.
  1150. for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
  1151. Register Reg = P.RegUnit;
  1152. if (!Register::isVirtualRegister(Reg))
  1153. continue;
  1154. const LiveInterval &LI = LIS->getInterval(Reg);
  1155. const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
  1156. if (!DefVNI)
  1157. continue;
  1158. MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
  1159. const SUnit *DefSU = getSUnit(DefMI);
  1160. if (!DefSU)
  1161. continue;
  1162. unsigned LiveOutHeight = DefSU->getHeight();
  1163. unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
  1164. // Visit all local users of the vreg def.
  1165. for (const VReg2SUnit &V2SU
  1166. : make_range(VRegUses.find(Reg), VRegUses.end())) {
  1167. SUnit *SU = V2SU.SU;
  1168. if (SU == &ExitSU)
  1169. continue;
  1170. // Only consider uses of the phi.
  1171. LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
  1172. if (!LRQ.valueIn()->isPHIDef())
  1173. continue;
  1174. // Assume that a path spanning two iterations is a cycle, which could
  1175. // overestimate in strange cases. This allows cyclic latency to be
  1176. // estimated as the minimum slack of the vreg's depth or height.
  1177. unsigned CyclicLatency = 0;
  1178. if (LiveOutDepth > SU->getDepth())
  1179. CyclicLatency = LiveOutDepth - SU->getDepth();
  1180. unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
  1181. if (LiveInHeight > LiveOutHeight) {
  1182. if (LiveInHeight - LiveOutHeight < CyclicLatency)
  1183. CyclicLatency = LiveInHeight - LiveOutHeight;
  1184. } else
  1185. CyclicLatency = 0;
  1186. LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
  1187. << SU->NodeNum << ") = " << CyclicLatency << "c\n");
  1188. if (CyclicLatency > MaxCyclicLatency)
  1189. MaxCyclicLatency = CyclicLatency;
  1190. }
  1191. }
  1192. LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
  1193. return MaxCyclicLatency;
  1194. }
  1195. /// Release ExitSU predecessors and setup scheduler queues. Re-position
  1196. /// the Top RP tracker in case the region beginning has changed.
  1197. void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
  1198. ArrayRef<SUnit*> BotRoots) {
  1199. ScheduleDAGMI::initQueues(TopRoots, BotRoots);
  1200. if (ShouldTrackPressure) {
  1201. assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
  1202. TopRPTracker.setPos(CurrentTop);
  1203. }
  1204. }
  1205. /// Move an instruction and update register pressure.
  1206. void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
  1207. // Move the instruction to its new location in the instruction stream.
  1208. MachineInstr *MI = SU->getInstr();
  1209. if (IsTopNode) {
  1210. assert(SU->isTopReady() && "node still has unscheduled dependencies");
  1211. if (&*CurrentTop == MI)
  1212. CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
  1213. else {
  1214. moveInstruction(MI, CurrentTop);
  1215. TopRPTracker.setPos(MI);
  1216. }
  1217. if (ShouldTrackPressure) {
  1218. // Update top scheduled pressure.
  1219. RegisterOperands RegOpers;
  1220. RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
  1221. if (ShouldTrackLaneMasks) {
  1222. // Adjust liveness and add missing dead+read-undef flags.
  1223. SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
  1224. RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
  1225. } else {
  1226. // Adjust for missing dead-def flags.
  1227. RegOpers.detectDeadDefs(*MI, *LIS);
  1228. }
  1229. TopRPTracker.advance(RegOpers);
  1230. assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
  1231. LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
  1232. TopRPTracker.getRegSetPressureAtPos(), TRI););
  1233. updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
  1234. }
  1235. } else {
  1236. assert(SU->isBottomReady() && "node still has unscheduled dependencies");
  1237. MachineBasicBlock::iterator priorII =
  1238. priorNonDebug(CurrentBottom, CurrentTop);
  1239. if (&*priorII == MI)
  1240. CurrentBottom = priorII;
  1241. else {
  1242. if (&*CurrentTop == MI) {
  1243. CurrentTop = nextIfDebug(++CurrentTop, priorII);
  1244. TopRPTracker.setPos(CurrentTop);
  1245. }
  1246. moveInstruction(MI, CurrentBottom);
  1247. CurrentBottom = MI;
  1248. BotRPTracker.setPos(CurrentBottom);
  1249. }
  1250. if (ShouldTrackPressure) {
  1251. RegisterOperands RegOpers;
  1252. RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
  1253. if (ShouldTrackLaneMasks) {
  1254. // Adjust liveness and add missing dead+read-undef flags.
  1255. SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
  1256. RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
  1257. } else {
  1258. // Adjust for missing dead-def flags.
  1259. RegOpers.detectDeadDefs(*MI, *LIS);
  1260. }
  1261. if (BotRPTracker.getPos() != CurrentBottom)
  1262. BotRPTracker.recedeSkipDebugValues();
  1263. SmallVector<RegisterMaskPair, 8> LiveUses;
  1264. BotRPTracker.recede(RegOpers, &LiveUses);
  1265. assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
  1266. LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
  1267. BotRPTracker.getRegSetPressureAtPos(), TRI););
  1268. updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
  1269. updatePressureDiffs(LiveUses);
  1270. }
  1271. }
  1272. }
  1273. //===----------------------------------------------------------------------===//
  1274. // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
  1275. //===----------------------------------------------------------------------===//
  1276. namespace {
  1277. /// Post-process the DAG to create cluster edges between neighboring
  1278. /// loads or between neighboring stores.
  1279. class BaseMemOpClusterMutation : public ScheduleDAGMutation {
  1280. struct MemOpInfo {
  1281. SUnit *SU;
  1282. SmallVector<const MachineOperand *, 4> BaseOps;
  1283. int64_t Offset;
  1284. unsigned Width;
  1285. MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
  1286. int64_t Offset, unsigned Width)
  1287. : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
  1288. Width(Width) {}
  1289. static bool Compare(const MachineOperand *const &A,
  1290. const MachineOperand *const &B) {
  1291. if (A->getType() != B->getType())
  1292. return A->getType() < B->getType();
  1293. if (A->isReg())
  1294. return A->getReg() < B->getReg();
  1295. if (A->isFI()) {
  1296. const MachineFunction &MF = *A->getParent()->getParent()->getParent();
  1297. const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
  1298. bool StackGrowsDown = TFI.getStackGrowthDirection() ==
  1299. TargetFrameLowering::StackGrowsDown;
  1300. return StackGrowsDown ? A->getIndex() > B->getIndex()
  1301. : A->getIndex() < B->getIndex();
  1302. }
  1303. llvm_unreachable("MemOpClusterMutation only supports register or frame "
  1304. "index bases.");
  1305. }
  1306. bool operator<(const MemOpInfo &RHS) const {
  1307. // FIXME: Don't compare everything twice. Maybe use C++20 three way
  1308. // comparison instead when it's available.
  1309. if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
  1310. RHS.BaseOps.begin(), RHS.BaseOps.end(),
  1311. Compare))
  1312. return true;
  1313. if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
  1314. BaseOps.begin(), BaseOps.end(), Compare))
  1315. return false;
  1316. if (Offset != RHS.Offset)
  1317. return Offset < RHS.Offset;
  1318. return SU->NodeNum < RHS.SU->NodeNum;
  1319. }
  1320. };
  1321. const TargetInstrInfo *TII;
  1322. const TargetRegisterInfo *TRI;
  1323. bool IsLoad;
  1324. public:
  1325. BaseMemOpClusterMutation(const TargetInstrInfo *tii,
  1326. const TargetRegisterInfo *tri, bool IsLoad)
  1327. : TII(tii), TRI(tri), IsLoad(IsLoad) {}
  1328. void apply(ScheduleDAGInstrs *DAGInstrs) override;
  1329. protected:
  1330. void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
  1331. ScheduleDAGInstrs *DAG);
  1332. void collectMemOpRecords(std::vector<SUnit> &SUnits,
  1333. SmallVectorImpl<MemOpInfo> &MemOpRecords);
  1334. bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
  1335. DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);
  1336. };
  1337. class StoreClusterMutation : public BaseMemOpClusterMutation {
  1338. public:
  1339. StoreClusterMutation(const TargetInstrInfo *tii,
  1340. const TargetRegisterInfo *tri)
  1341. : BaseMemOpClusterMutation(tii, tri, false) {}
  1342. };
  1343. class LoadClusterMutation : public BaseMemOpClusterMutation {
  1344. public:
  1345. LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
  1346. : BaseMemOpClusterMutation(tii, tri, true) {}
  1347. };
  1348. } // end anonymous namespace
  1349. namespace llvm {
  1350. std::unique_ptr<ScheduleDAGMutation>
  1351. createLoadClusterDAGMutation(const TargetInstrInfo *TII,
  1352. const TargetRegisterInfo *TRI) {
  1353. return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
  1354. : nullptr;
  1355. }
  1356. std::unique_ptr<ScheduleDAGMutation>
  1357. createStoreClusterDAGMutation(const TargetInstrInfo *TII,
  1358. const TargetRegisterInfo *TRI) {
  1359. return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
  1360. : nullptr;
  1361. }
  1362. } // end namespace llvm
  1363. // Sorting all the loads/stores first, then for each load/store, checking the
  1364. // following load/store one by one, until reach the first non-dependent one and
  1365. // call target hook to see if they can cluster.
  1366. // If FastCluster is enabled, we assume that, all the loads/stores have been
  1367. // preprocessed and now, they didn't have dependencies on each other.
  1368. void BaseMemOpClusterMutation::clusterNeighboringMemOps(
  1369. ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
  1370. ScheduleDAGInstrs *DAG) {
  1371. // Keep track of the current cluster length and bytes for each SUnit.
  1372. DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
  1373. // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
  1374. // cluster mem ops collected within `MemOpRecords` array.
  1375. for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
  1376. // Decision to cluster mem ops is taken based on target dependent logic
  1377. auto MemOpa = MemOpRecords[Idx];
  1378. // Seek for the next load/store to do the cluster.
  1379. unsigned NextIdx = Idx + 1;
  1380. for (; NextIdx < End; ++NextIdx)
  1381. // Skip if MemOpb has been clustered already or has dependency with
  1382. // MemOpa.
  1383. if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
  1384. (FastCluster ||
  1385. (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
  1386. !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
  1387. break;
  1388. if (NextIdx == End)
  1389. continue;
  1390. auto MemOpb = MemOpRecords[NextIdx];
  1391. unsigned ClusterLength = 2;
  1392. unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
  1393. if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
  1394. ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
  1395. CurrentClusterBytes =
  1396. SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
  1397. }
  1398. if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
  1399. CurrentClusterBytes))
  1400. continue;
  1401. SUnit *SUa = MemOpa.SU;
  1402. SUnit *SUb = MemOpb.SU;
  1403. if (SUa->NodeNum > SUb->NodeNum)
  1404. std::swap(SUa, SUb);
  1405. // FIXME: Is this check really required?
  1406. if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
  1407. continue;
  1408. LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
  1409. << SUb->NodeNum << ")\n");
  1410. ++NumClustered;
  1411. if (IsLoad) {
  1412. // Copy successor edges from SUa to SUb. Interleaving computation
  1413. // dependent on SUa can prevent load combining due to register reuse.
  1414. // Predecessor edges do not need to be copied from SUb to SUa since
  1415. // nearby loads should have effectively the same inputs.
  1416. for (const SDep &Succ : SUa->Succs) {
  1417. if (Succ.getSUnit() == SUb)
  1418. continue;
  1419. LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
  1420. << ")\n");
  1421. DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
  1422. }
  1423. } else {
  1424. // Copy predecessor edges from SUb to SUa to avoid the SUnits that
  1425. // SUb dependent on scheduled in-between SUb and SUa. Successor edges
  1426. // do not need to be copied from SUa to SUb since no one will depend
  1427. // on stores.
  1428. // Notice that, we don't need to care about the memory dependency as
  1429. // we won't try to cluster them if they have any memory dependency.
  1430. for (const SDep &Pred : SUb->Preds) {
  1431. if (Pred.getSUnit() == SUa)
  1432. continue;
  1433. LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
  1434. << ")\n");
  1435. DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
  1436. }
  1437. }
  1438. SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
  1439. CurrentClusterBytes};
  1440. LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
  1441. << ", Curr cluster bytes: " << CurrentClusterBytes
  1442. << "\n");
  1443. }
  1444. }
  1445. void BaseMemOpClusterMutation::collectMemOpRecords(
  1446. std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
  1447. for (auto &SU : SUnits) {
  1448. if ((IsLoad && !SU.getInstr()->mayLoad()) ||
  1449. (!IsLoad && !SU.getInstr()->mayStore()))
  1450. continue;
  1451. const MachineInstr &MI = *SU.getInstr();
  1452. SmallVector<const MachineOperand *, 4> BaseOps;
  1453. int64_t Offset;
  1454. bool OffsetIsScalable;
  1455. unsigned Width;
  1456. if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
  1457. OffsetIsScalable, Width, TRI)) {
  1458. MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
  1459. LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
  1460. << Offset << ", OffsetIsScalable: " << OffsetIsScalable
  1461. << ", Width: " << Width << "\n");
  1462. }
  1463. #ifndef NDEBUG
  1464. for (auto *Op : BaseOps)
  1465. assert(Op);
  1466. #endif
  1467. }
  1468. }
  1469. bool BaseMemOpClusterMutation::groupMemOps(
  1470. ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
  1471. DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) {
  1472. bool FastCluster =
  1473. ForceFastCluster ||
  1474. MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
  1475. for (const auto &MemOp : MemOps) {
  1476. unsigned ChainPredID = DAG->SUnits.size();
  1477. if (FastCluster) {
  1478. for (const SDep &Pred : MemOp.SU->Preds) {
  1479. // We only want to cluster the mem ops that have the same ctrl(non-data)
  1480. // pred so that they didn't have ctrl dependency for each other. But for
  1481. // store instrs, we can still cluster them if the pred is load instr.
  1482. if ((Pred.isCtrl() &&
  1483. (IsLoad ||
  1484. (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
  1485. !Pred.isArtificial()) {
  1486. ChainPredID = Pred.getSUnit()->NodeNum;
  1487. break;
  1488. }
  1489. }
  1490. } else
  1491. ChainPredID = 0;
  1492. Groups[ChainPredID].push_back(MemOp);
  1493. }
  1494. return FastCluster;
  1495. }
  1496. /// Callback from DAG postProcessing to create cluster edges for loads/stores.
  1497. void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
  1498. // Collect all the clusterable loads/stores
  1499. SmallVector<MemOpInfo, 32> MemOpRecords;
  1500. collectMemOpRecords(DAG->SUnits, MemOpRecords);
  1501. if (MemOpRecords.size() < 2)
  1502. return;
  1503. // Put the loads/stores without dependency into the same group with some
  1504. // heuristic if the DAG is too complex to avoid compiling time blow up.
  1505. // Notice that, some fusion pair could be lost with this.
  1506. DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups;
  1507. bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
  1508. for (auto &Group : Groups) {
  1509. // Sorting the loads/stores, so that, we can stop the cluster as early as
  1510. // possible.
  1511. llvm::sort(Group.second);
  1512. // Trying to cluster all the neighboring loads/stores.
  1513. clusterNeighboringMemOps(Group.second, FastCluster, DAG);
  1514. }
  1515. }
  1516. //===----------------------------------------------------------------------===//
  1517. // CopyConstrain - DAG post-processing to encourage copy elimination.
  1518. //===----------------------------------------------------------------------===//
  1519. namespace {
  1520. /// Post-process the DAG to create weak edges from all uses of a copy to
  1521. /// the one use that defines the copy's source vreg, most likely an induction
  1522. /// variable increment.
  1523. class CopyConstrain : public ScheduleDAGMutation {
  1524. // Transient state.
  1525. SlotIndex RegionBeginIdx;
  1526. // RegionEndIdx is the slot index of the last non-debug instruction in the
  1527. // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
  1528. SlotIndex RegionEndIdx;
  1529. public:
  1530. CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
  1531. void apply(ScheduleDAGInstrs *DAGInstrs) override;
  1532. protected:
  1533. void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
  1534. };
  1535. } // end anonymous namespace
  1536. namespace llvm {
  1537. std::unique_ptr<ScheduleDAGMutation>
  1538. createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
  1539. const TargetRegisterInfo *TRI) {
  1540. return std::make_unique<CopyConstrain>(TII, TRI);
  1541. }
  1542. } // end namespace llvm
  1543. /// constrainLocalCopy handles two possibilities:
  1544. /// 1) Local src:
  1545. /// I0: = dst
  1546. /// I1: src = ...
  1547. /// I2: = dst
  1548. /// I3: dst = src (copy)
  1549. /// (create pred->succ edges I0->I1, I2->I1)
  1550. ///
  1551. /// 2) Local copy:
  1552. /// I0: dst = src (copy)
  1553. /// I1: = dst
  1554. /// I2: src = ...
  1555. /// I3: = dst
  1556. /// (create pred->succ edges I1->I2, I3->I2)
  1557. ///
  1558. /// Although the MachineScheduler is currently constrained to single blocks,
  1559. /// this algorithm should handle extended blocks. An EBB is a set of
  1560. /// contiguously numbered blocks such that the previous block in the EBB is
  1561. /// always the single predecessor.
  1562. void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
  1563. LiveIntervals *LIS = DAG->getLIS();
  1564. MachineInstr *Copy = CopySU->getInstr();
  1565. // Check for pure vreg copies.
  1566. const MachineOperand &SrcOp = Copy->getOperand(1);
  1567. Register SrcReg = SrcOp.getReg();
  1568. if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
  1569. return;
  1570. const MachineOperand &DstOp = Copy->getOperand(0);
  1571. Register DstReg = DstOp.getReg();
  1572. if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
  1573. return;
  1574. // Check if either the dest or source is local. If it's live across a back
  1575. // edge, it's not local. Note that if both vregs are live across the back
  1576. // edge, we cannot successfully contrain the copy without cyclic scheduling.
  1577. // If both the copy's source and dest are local live intervals, then we
  1578. // should treat the dest as the global for the purpose of adding
  1579. // constraints. This adds edges from source's other uses to the copy.
  1580. unsigned LocalReg = SrcReg;
  1581. unsigned GlobalReg = DstReg;
  1582. LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
  1583. if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
  1584. LocalReg = DstReg;
  1585. GlobalReg = SrcReg;
  1586. LocalLI = &LIS->getInterval(LocalReg);
  1587. if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
  1588. return;
  1589. }
  1590. LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
  1591. // Find the global segment after the start of the local LI.
  1592. LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
  1593. // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
  1594. // local live range. We could create edges from other global uses to the local
  1595. // start, but the coalescer should have already eliminated these cases, so
  1596. // don't bother dealing with it.
  1597. if (GlobalSegment == GlobalLI->end())
  1598. return;
  1599. // If GlobalSegment is killed at the LocalLI->start, the call to find()
  1600. // returned the next global segment. But if GlobalSegment overlaps with
  1601. // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
  1602. // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
  1603. if (GlobalSegment->contains(LocalLI->beginIndex()))
  1604. ++GlobalSegment;
  1605. if (GlobalSegment == GlobalLI->end())
  1606. return;
  1607. // Check if GlobalLI contains a hole in the vicinity of LocalLI.
  1608. if (GlobalSegment != GlobalLI->begin()) {
  1609. // Two address defs have no hole.
  1610. if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
  1611. GlobalSegment->start)) {
  1612. return;
  1613. }
  1614. // If the prior global segment may be defined by the same two-address
  1615. // instruction that also defines LocalLI, then can't make a hole here.
  1616. if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
  1617. LocalLI->beginIndex())) {
  1618. return;
  1619. }
  1620. // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
  1621. // it would be a disconnected component in the live range.
  1622. assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
  1623. "Disconnected LRG within the scheduling region.");
  1624. }
  1625. MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
  1626. if (!GlobalDef)
  1627. return;
  1628. SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
  1629. if (!GlobalSU)
  1630. return;
  1631. // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
  1632. // constraining the uses of the last local def to precede GlobalDef.
  1633. SmallVector<SUnit*,8> LocalUses;
  1634. const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
  1635. MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
  1636. SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
  1637. for (const SDep &Succ : LastLocalSU->Succs) {
  1638. if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
  1639. continue;
  1640. if (Succ.getSUnit() == GlobalSU)
  1641. continue;
  1642. if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
  1643. return;
  1644. LocalUses.push_back(Succ.getSUnit());
  1645. }
  1646. // Open the top of the GlobalLI hole by constraining any earlier global uses
  1647. // to precede the start of LocalLI.
  1648. SmallVector<SUnit*,8> GlobalUses;
  1649. MachineInstr *FirstLocalDef =
  1650. LIS->getInstructionFromIndex(LocalLI->beginIndex());
  1651. SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
  1652. for (const SDep &Pred : GlobalSU->Preds) {
  1653. if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
  1654. continue;
  1655. if (Pred.getSUnit() == FirstLocalSU)
  1656. continue;
  1657. if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
  1658. return;
  1659. GlobalUses.push_back(Pred.getSUnit());
  1660. }
  1661. LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
  1662. // Add the weak edges.
  1663. for (SUnit *LU : LocalUses) {
  1664. LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
  1665. << GlobalSU->NodeNum << ")\n");
  1666. DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
  1667. }
  1668. for (SUnit *GU : GlobalUses) {
  1669. LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
  1670. << FirstLocalSU->NodeNum << ")\n");
  1671. DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
  1672. }
  1673. }
  1674. /// Callback from DAG postProcessing to create weak edges to encourage
  1675. /// copy elimination.
  1676. void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
  1677. ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
  1678. assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
  1679. MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
  1680. if (FirstPos == DAG->end())
  1681. return;
  1682. RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
  1683. RegionEndIdx = DAG->getLIS()->getInstructionIndex(
  1684. *priorNonDebug(DAG->end(), DAG->begin()));
  1685. for (SUnit &SU : DAG->SUnits) {
  1686. if (!SU.getInstr()->isCopy())
  1687. continue;
  1688. constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
  1689. }
  1690. }
  1691. //===----------------------------------------------------------------------===//
  1692. // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
  1693. // and possibly other custom schedulers.
  1694. //===----------------------------------------------------------------------===//
  1695. static const unsigned InvalidCycle = ~0U;
  1696. SchedBoundary::~SchedBoundary() { delete HazardRec; }
  1697. /// Given a Count of resource usage and a Latency value, return true if a
  1698. /// SchedBoundary becomes resource limited.
  1699. /// If we are checking after scheduling a node, we should return true when
  1700. /// we just reach the resource limit.
  1701. static bool checkResourceLimit(unsigned LFactor, unsigned Count,
  1702. unsigned Latency, bool AfterSchedNode) {
  1703. int ResCntFactor = (int)(Count - (Latency * LFactor));
  1704. if (AfterSchedNode)
  1705. return ResCntFactor >= (int)LFactor;
  1706. else
  1707. return ResCntFactor > (int)LFactor;
  1708. }
  1709. void SchedBoundary::reset() {
  1710. // A new HazardRec is created for each DAG and owned by SchedBoundary.
  1711. // Destroying and reconstructing it is very expensive though. So keep
  1712. // invalid, placeholder HazardRecs.
  1713. if (HazardRec && HazardRec->isEnabled()) {
  1714. delete HazardRec;
  1715. HazardRec = nullptr;
  1716. }
  1717. Available.clear();
  1718. Pending.clear();
  1719. CheckPending = false;
  1720. CurrCycle = 0;
  1721. CurrMOps = 0;
  1722. MinReadyCycle = std::numeric_limits<unsigned>::max();
  1723. ExpectedLatency = 0;
  1724. DependentLatency = 0;
  1725. RetiredMOps = 0;
  1726. MaxExecutedResCount = 0;
  1727. ZoneCritResIdx = 0;
  1728. IsResourceLimited = false;
  1729. ReservedCycles.clear();
  1730. ReservedCyclesIndex.clear();
  1731. ResourceGroupSubUnitMasks.clear();
  1732. #ifndef NDEBUG
  1733. // Track the maximum number of stall cycles that could arise either from the
  1734. // latency of a DAG edge or the number of cycles that a processor resource is
  1735. // reserved (SchedBoundary::ReservedCycles).
  1736. MaxObservedStall = 0;
  1737. #endif
  1738. // Reserve a zero-count for invalid CritResIdx.
  1739. ExecutedResCounts.resize(1);
  1740. assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
  1741. }
  1742. void SchedRemainder::
  1743. init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
  1744. reset();
  1745. if (!SchedModel->hasInstrSchedModel())
  1746. return;
  1747. RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
  1748. for (SUnit &SU : DAG->SUnits) {
  1749. const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
  1750. RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
  1751. * SchedModel->getMicroOpFactor();
  1752. for (TargetSchedModel::ProcResIter
  1753. PI = SchedModel->getWriteProcResBegin(SC),
  1754. PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
  1755. unsigned PIdx = PI->ProcResourceIdx;
  1756. unsigned Factor = SchedModel->getResourceFactor(PIdx);
  1757. RemainingCounts[PIdx] += (Factor * PI->Cycles);
  1758. }
  1759. }
  1760. }
  1761. void SchedBoundary::
  1762. init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
  1763. reset();
  1764. DAG = dag;
  1765. SchedModel = smodel;
  1766. Rem = rem;
  1767. if (SchedModel->hasInstrSchedModel()) {
  1768. unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
  1769. ReservedCyclesIndex.resize(ResourceCount);
  1770. ExecutedResCounts.resize(ResourceCount);
  1771. ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
  1772. unsigned NumUnits = 0;
  1773. for (unsigned i = 0; i < ResourceCount; ++i) {
  1774. ReservedCyclesIndex[i] = NumUnits;
  1775. NumUnits += SchedModel->getProcResource(i)->NumUnits;
  1776. if (isUnbufferedGroup(i)) {
  1777. auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
  1778. for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
  1779. U != UE; ++U)
  1780. ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
  1781. }
  1782. }
  1783. ReservedCycles.resize(NumUnits, InvalidCycle);
  1784. }
  1785. }
  1786. /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
  1787. /// these "soft stalls" differently than the hard stall cycles based on CPU
  1788. /// resources and computed by checkHazard(). A fully in-order model
  1789. /// (MicroOpBufferSize==0) will not make use of this since instructions are not
  1790. /// available for scheduling until they are ready. However, a weaker in-order
  1791. /// model may use this for heuristics. For example, if a processor has in-order
  1792. /// behavior when reading certain resources, this may come into play.
  1793. unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
  1794. if (!SU->isUnbuffered)
  1795. return 0;
  1796. unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
  1797. if (ReadyCycle > CurrCycle)
  1798. return ReadyCycle - CurrCycle;
  1799. return 0;
  1800. }
  1801. /// Compute the next cycle at which the given processor resource unit
  1802. /// can be scheduled.
  1803. unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
  1804. unsigned Cycles) {
  1805. unsigned NextUnreserved = ReservedCycles[InstanceIdx];
  1806. // If this resource has never been used, always return cycle zero.
  1807. if (NextUnreserved == InvalidCycle)
  1808. return 0;
  1809. // For bottom-up scheduling add the cycles needed for the current operation.
  1810. if (!isTop())
  1811. NextUnreserved += Cycles;
  1812. return NextUnreserved;
  1813. }
  1814. /// Compute the next cycle at which the given processor resource can be
  1815. /// scheduled. Returns the next cycle and the index of the processor resource
  1816. /// instance in the reserved cycles vector.
  1817. std::pair<unsigned, unsigned>
  1818. SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
  1819. unsigned Cycles) {
  1820. unsigned MinNextUnreserved = InvalidCycle;
  1821. unsigned InstanceIdx = 0;
  1822. unsigned StartIndex = ReservedCyclesIndex[PIdx];
  1823. unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
  1824. assert(NumberOfInstances > 0 &&
  1825. "Cannot have zero instances of a ProcResource");
  1826. if (isUnbufferedGroup(PIdx)) {
  1827. // If any subunits are used by the instruction, report that the resource
  1828. // group is available at 0, effectively removing the group record from
  1829. // hazarding and basing the hazarding decisions on the subunit records.
  1830. // Otherwise, choose the first available instance from among the subunits.
  1831. // Specifications which assign cycles to both the subunits and the group or
  1832. // which use an unbuffered group with buffered subunits will appear to
  1833. // schedule strangely. In the first case, the additional cycles for the
  1834. // group will be ignored. In the second, the group will be ignored
  1835. // entirely.
  1836. for (const MCWriteProcResEntry &PE :
  1837. make_range(SchedModel->getWriteProcResBegin(SC),
  1838. SchedModel->getWriteProcResEnd(SC)))
  1839. if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
  1840. return std::make_pair(0u, StartIndex);
  1841. auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
  1842. for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
  1843. unsigned NextUnreserved, NextInstanceIdx;
  1844. std::tie(NextUnreserved, NextInstanceIdx) =
  1845. getNextResourceCycle(SC, SubUnits[I], Cycles);
  1846. if (MinNextUnreserved > NextUnreserved) {
  1847. InstanceIdx = NextInstanceIdx;
  1848. MinNextUnreserved = NextUnreserved;
  1849. }
  1850. }
  1851. return std::make_pair(MinNextUnreserved, InstanceIdx);
  1852. }
  1853. for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
  1854. ++I) {
  1855. unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
  1856. if (MinNextUnreserved > NextUnreserved) {
  1857. InstanceIdx = I;
  1858. MinNextUnreserved = NextUnreserved;
  1859. }
  1860. }
  1861. return std::make_pair(MinNextUnreserved, InstanceIdx);
  1862. }
  1863. /// Does this SU have a hazard within the current instruction group.
  1864. ///
  1865. /// The scheduler supports two modes of hazard recognition. The first is the
  1866. /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
  1867. /// supports highly complicated in-order reservation tables
  1868. /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
  1869. ///
  1870. /// The second is a streamlined mechanism that checks for hazards based on
  1871. /// simple counters that the scheduler itself maintains. It explicitly checks
  1872. /// for instruction dispatch limitations, including the number of micro-ops that
  1873. /// can dispatch per cycle.
  1874. ///
  1875. /// TODO: Also check whether the SU must start a new group.
  1876. bool SchedBoundary::checkHazard(SUnit *SU) {
  1877. if (HazardRec->isEnabled()
  1878. && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
  1879. return true;
  1880. }
  1881. unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
  1882. if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
  1883. LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
  1884. << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
  1885. return true;
  1886. }
  1887. if (CurrMOps > 0 &&
  1888. ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
  1889. (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
  1890. LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
  1891. << (isTop() ? "begin" : "end") << " group\n");
  1892. return true;
  1893. }
  1894. if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
  1895. const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
  1896. for (const MCWriteProcResEntry &PE :
  1897. make_range(SchedModel->getWriteProcResBegin(SC),
  1898. SchedModel->getWriteProcResEnd(SC))) {
  1899. unsigned ResIdx = PE.ProcResourceIdx;
  1900. unsigned Cycles = PE.Cycles;
  1901. unsigned NRCycle, InstanceIdx;
  1902. std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles);
  1903. if (NRCycle > CurrCycle) {
  1904. #ifndef NDEBUG
  1905. MaxObservedStall = std::max(Cycles, MaxObservedStall);
  1906. #endif
  1907. LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
  1908. << SchedModel->getResourceName(ResIdx)
  1909. << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
  1910. << "=" << NRCycle << "c\n");
  1911. return true;
  1912. }
  1913. }
  1914. }
  1915. return false;
  1916. }
  1917. // Find the unscheduled node in ReadySUs with the highest latency.
  1918. unsigned SchedBoundary::
  1919. findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
  1920. SUnit *LateSU = nullptr;
  1921. unsigned RemLatency = 0;
  1922. for (SUnit *SU : ReadySUs) {
  1923. unsigned L = getUnscheduledLatency(SU);
  1924. if (L > RemLatency) {
  1925. RemLatency = L;
  1926. LateSU = SU;
  1927. }
  1928. }
  1929. if (LateSU) {
  1930. LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
  1931. << LateSU->NodeNum << ") " << RemLatency << "c\n");
  1932. }
  1933. return RemLatency;
  1934. }
  1935. // Count resources in this zone and the remaining unscheduled
  1936. // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
  1937. // resource index, or zero if the zone is issue limited.
  1938. unsigned SchedBoundary::
  1939. getOtherResourceCount(unsigned &OtherCritIdx) {
  1940. OtherCritIdx = 0;
  1941. if (!SchedModel->hasInstrSchedModel())
  1942. return 0;
  1943. unsigned OtherCritCount = Rem->RemIssueCount
  1944. + (RetiredMOps * SchedModel->getMicroOpFactor());
  1945. LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
  1946. << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
  1947. for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
  1948. PIdx != PEnd; ++PIdx) {
  1949. unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
  1950. if (OtherCount > OtherCritCount) {
  1951. OtherCritCount = OtherCount;
  1952. OtherCritIdx = PIdx;
  1953. }
  1954. }
  1955. if (OtherCritIdx) {
  1956. LLVM_DEBUG(
  1957. dbgs() << " " << Available.getName() << " + Remain CritRes: "
  1958. << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
  1959. << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
  1960. }
  1961. return OtherCritCount;
  1962. }
  1963. void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
  1964. unsigned Idx) {
  1965. assert(SU->getInstr() && "Scheduled SUnit must have instr");
  1966. #ifndef NDEBUG
  1967. // ReadyCycle was been bumped up to the CurrCycle when this node was
  1968. // scheduled, but CurrCycle may have been eagerly advanced immediately after
  1969. // scheduling, so may now be greater than ReadyCycle.
  1970. if (ReadyCycle > CurrCycle)
  1971. MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
  1972. #endif
  1973. if (ReadyCycle < MinReadyCycle)
  1974. MinReadyCycle = ReadyCycle;
  1975. // Check for interlocks first. For the purpose of other heuristics, an
  1976. // instruction that cannot issue appears as if it's not in the ReadyQueue.
  1977. bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
  1978. bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
  1979. checkHazard(SU) || (Available.size() >= ReadyListLimit);
  1980. if (!HazardDetected) {
  1981. Available.push(SU);
  1982. if (InPQueue)
  1983. Pending.remove(Pending.begin() + Idx);
  1984. return;
  1985. }
  1986. if (!InPQueue)
  1987. Pending.push(SU);
  1988. }
  1989. /// Move the boundary of scheduled code by one cycle.
  1990. void SchedBoundary::bumpCycle(unsigned NextCycle) {
  1991. if (SchedModel->getMicroOpBufferSize() == 0) {
  1992. assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
  1993. "MinReadyCycle uninitialized");
  1994. if (MinReadyCycle > NextCycle)
  1995. NextCycle = MinReadyCycle;
  1996. }
  1997. // Update the current micro-ops, which will issue in the next cycle.
  1998. unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
  1999. CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
  2000. // Decrement DependentLatency based on the next cycle.
  2001. if ((NextCycle - CurrCycle) > DependentLatency)
  2002. DependentLatency = 0;
  2003. else
  2004. DependentLatency -= (NextCycle - CurrCycle);
  2005. if (!HazardRec->isEnabled()) {
  2006. // Bypass HazardRec virtual calls.
  2007. CurrCycle = NextCycle;
  2008. } else {
  2009. // Bypass getHazardType calls in case of long latency.
  2010. for (; CurrCycle != NextCycle; ++CurrCycle) {
  2011. if (isTop())
  2012. HazardRec->AdvanceCycle();
  2013. else
  2014. HazardRec->RecedeCycle();
  2015. }
  2016. }
  2017. CheckPending = true;
  2018. IsResourceLimited =
  2019. checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
  2020. getScheduledLatency(), true);
  2021. LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
  2022. << '\n');
  2023. }
  2024. void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
  2025. ExecutedResCounts[PIdx] += Count;
  2026. if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
  2027. MaxExecutedResCount = ExecutedResCounts[PIdx];
  2028. }
  2029. /// Add the given processor resource to this scheduled zone.
  2030. ///
  2031. /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
  2032. /// during which this resource is consumed.
  2033. ///
  2034. /// \return the next cycle at which the instruction may execute without
  2035. /// oversubscribing resources.
  2036. unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
  2037. unsigned Cycles, unsigned NextCycle) {
  2038. unsigned Factor = SchedModel->getResourceFactor(PIdx);
  2039. unsigned Count = Factor * Cycles;
  2040. LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
  2041. << Cycles << "x" << Factor << "u\n");
  2042. // Update Executed resources counts.
  2043. incExecutedResources(PIdx, Count);
  2044. assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
  2045. Rem->RemainingCounts[PIdx] -= Count;
  2046. // Check if this resource exceeds the current critical resource. If so, it
  2047. // becomes the critical resource.
  2048. if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
  2049. ZoneCritResIdx = PIdx;
  2050. LLVM_DEBUG(dbgs() << " *** Critical resource "
  2051. << SchedModel->getResourceName(PIdx) << ": "
  2052. << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
  2053. << "c\n");
  2054. }
  2055. // For reserved resources, record the highest cycle using the resource.
  2056. unsigned NextAvailable, InstanceIdx;
  2057. std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles);
  2058. if (NextAvailable > CurrCycle) {
  2059. LLVM_DEBUG(dbgs() << " Resource conflict: "
  2060. << SchedModel->getResourceName(PIdx)
  2061. << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
  2062. << " reserved until @" << NextAvailable << "\n");
  2063. }
  2064. return NextAvailable;
  2065. }
  2066. /// Move the boundary of scheduled code by one SUnit.
  2067. void SchedBoundary::bumpNode(SUnit *SU) {
  2068. // Update the reservation table.
  2069. if (HazardRec->isEnabled()) {
  2070. if (!isTop() && SU->isCall) {
  2071. // Calls are scheduled with their preceding instructions. For bottom-up
  2072. // scheduling, clear the pipeline state before emitting.
  2073. HazardRec->Reset();
  2074. }
  2075. HazardRec->EmitInstruction(SU);
  2076. // Scheduling an instruction may have made pending instructions available.
  2077. CheckPending = true;
  2078. }
  2079. // checkHazard should prevent scheduling multiple instructions per cycle that
  2080. // exceed the issue width.
  2081. const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
  2082. unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
  2083. assert(
  2084. (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
  2085. "Cannot schedule this instruction's MicroOps in the current cycle.");
  2086. unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
  2087. LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
  2088. unsigned NextCycle = CurrCycle;
  2089. switch (SchedModel->getMicroOpBufferSize()) {
  2090. case 0:
  2091. assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
  2092. break;
  2093. case 1:
  2094. if (ReadyCycle > NextCycle) {
  2095. NextCycle = ReadyCycle;
  2096. LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
  2097. }
  2098. break;
  2099. default:
  2100. // We don't currently model the OOO reorder buffer, so consider all
  2101. // scheduled MOps to be "retired". We do loosely model in-order resource
  2102. // latency. If this instruction uses an in-order resource, account for any
  2103. // likely stall cycles.
  2104. if (SU->isUnbuffered && ReadyCycle > NextCycle)
  2105. NextCycle = ReadyCycle;
  2106. break;
  2107. }
  2108. RetiredMOps += IncMOps;
  2109. // Update resource counts and critical resource.
  2110. if (SchedModel->hasInstrSchedModel()) {
  2111. unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
  2112. assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
  2113. Rem->RemIssueCount -= DecRemIssue;
  2114. if (ZoneCritResIdx) {
  2115. // Scale scheduled micro-ops for comparing with the critical resource.
  2116. unsigned ScaledMOps =
  2117. RetiredMOps * SchedModel->getMicroOpFactor();
  2118. // If scaled micro-ops are now more than the previous critical resource by
  2119. // a full cycle, then micro-ops issue becomes critical.
  2120. if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
  2121. >= (int)SchedModel->getLatencyFactor()) {
  2122. ZoneCritResIdx = 0;
  2123. LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
  2124. << ScaledMOps / SchedModel->getLatencyFactor()
  2125. << "c\n");
  2126. }
  2127. }
  2128. for (TargetSchedModel::ProcResIter
  2129. PI = SchedModel->getWriteProcResBegin(SC),
  2130. PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
  2131. unsigned RCycle =
  2132. countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle);
  2133. if (RCycle > NextCycle)
  2134. NextCycle = RCycle;
  2135. }
  2136. if (SU->hasReservedResource) {
  2137. // For reserved resources, record the highest cycle using the resource.
  2138. // For top-down scheduling, this is the cycle in which we schedule this
  2139. // instruction plus the number of cycles the operations reserves the
  2140. // resource. For bottom-up is it simply the instruction's cycle.
  2141. for (TargetSchedModel::ProcResIter
  2142. PI = SchedModel->getWriteProcResBegin(SC),
  2143. PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
  2144. unsigned PIdx = PI->ProcResourceIdx;
  2145. if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
  2146. unsigned ReservedUntil, InstanceIdx;
  2147. std::tie(ReservedUntil, InstanceIdx) =
  2148. getNextResourceCycle(SC, PIdx, 0);
  2149. if (isTop()) {
  2150. ReservedCycles[InstanceIdx] =
  2151. std::max(ReservedUntil, NextCycle + PI->Cycles);
  2152. } else
  2153. ReservedCycles[InstanceIdx] = NextCycle;
  2154. }
  2155. }
  2156. }
  2157. }
  2158. // Update ExpectedLatency and DependentLatency.
  2159. unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
  2160. unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
  2161. if (SU->getDepth() > TopLatency) {
  2162. TopLatency = SU->getDepth();
  2163. LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
  2164. << SU->NodeNum << ") " << TopLatency << "c\n");
  2165. }
  2166. if (SU->getHeight() > BotLatency) {
  2167. BotLatency = SU->getHeight();
  2168. LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
  2169. << SU->NodeNum << ") " << BotLatency << "c\n");
  2170. }
  2171. // If we stall for any reason, bump the cycle.
  2172. if (NextCycle > CurrCycle)
  2173. bumpCycle(NextCycle);
  2174. else
  2175. // After updating ZoneCritResIdx and ExpectedLatency, check if we're
  2176. // resource limited. If a stall occurred, bumpCycle does this.
  2177. IsResourceLimited =
  2178. checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
  2179. getScheduledLatency(), true);
  2180. // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
  2181. // resets CurrMOps. Loop to handle instructions with more MOps than issue in
  2182. // one cycle. Since we commonly reach the max MOps here, opportunistically
  2183. // bump the cycle to avoid uselessly checking everything in the readyQ.
  2184. CurrMOps += IncMOps;
  2185. // Bump the cycle count for issue group constraints.
  2186. // This must be done after NextCycle has been adjust for all other stalls.
  2187. // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
  2188. // currCycle to X.
  2189. if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
  2190. (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
  2191. LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
  2192. << " group\n");
  2193. bumpCycle(++NextCycle);
  2194. }
  2195. while (CurrMOps >= SchedModel->getIssueWidth()) {
  2196. LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
  2197. << CurrCycle << '\n');
  2198. bumpCycle(++NextCycle);
  2199. }
  2200. LLVM_DEBUG(dumpScheduledState());
  2201. }
  2202. /// Release pending ready nodes in to the available queue. This makes them
  2203. /// visible to heuristics.
  2204. void SchedBoundary::releasePending() {
  2205. // If the available queue is empty, it is safe to reset MinReadyCycle.
  2206. if (Available.empty())
  2207. MinReadyCycle = std::numeric_limits<unsigned>::max();
  2208. // Check to see if any of the pending instructions are ready to issue. If
  2209. // so, add them to the available queue.
  2210. for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
  2211. SUnit *SU = *(Pending.begin() + I);
  2212. unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
  2213. if (ReadyCycle < MinReadyCycle)
  2214. MinReadyCycle = ReadyCycle;
  2215. if (Available.size() >= ReadyListLimit)
  2216. break;
  2217. releaseNode(SU, ReadyCycle, true, I);
  2218. if (E != Pending.size()) {
  2219. --I;
  2220. --E;
  2221. }
  2222. }
  2223. CheckPending = false;
  2224. }
  2225. /// Remove SU from the ready set for this boundary.
  2226. void SchedBoundary::removeReady(SUnit *SU) {
  2227. if (Available.isInQueue(SU))
  2228. Available.remove(Available.find(SU));
  2229. else {
  2230. assert(Pending.isInQueue(SU) && "bad ready count");
  2231. Pending.remove(Pending.find(SU));
  2232. }
  2233. }
  2234. /// If this queue only has one ready candidate, return it. As a side effect,
  2235. /// defer any nodes that now hit a hazard, and advance the cycle until at least
  2236. /// one node is ready. If multiple instructions are ready, return NULL.
  2237. SUnit *SchedBoundary::pickOnlyChoice() {
  2238. if (CheckPending)
  2239. releasePending();
  2240. // Defer any ready instrs that now have a hazard.
  2241. for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
  2242. if (checkHazard(*I)) {
  2243. Pending.push(*I);
  2244. I = Available.remove(I);
  2245. continue;
  2246. }
  2247. ++I;
  2248. }
  2249. for (unsigned i = 0; Available.empty(); ++i) {
  2250. // FIXME: Re-enable assert once PR20057 is resolved.
  2251. // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
  2252. // "permanent hazard");
  2253. (void)i;
  2254. bumpCycle(CurrCycle + 1);
  2255. releasePending();
  2256. }
  2257. LLVM_DEBUG(Pending.dump());
  2258. LLVM_DEBUG(Available.dump());
  2259. if (Available.size() == 1)
  2260. return *Available.begin();
  2261. return nullptr;
  2262. }
  2263. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  2264. // This is useful information to dump after bumpNode.
  2265. // Note that the Queue contents are more useful before pickNodeFromQueue.
  2266. LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
  2267. unsigned ResFactor;
  2268. unsigned ResCount;
  2269. if (ZoneCritResIdx) {
  2270. ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
  2271. ResCount = getResourceCount(ZoneCritResIdx);
  2272. } else {
  2273. ResFactor = SchedModel->getMicroOpFactor();
  2274. ResCount = RetiredMOps * ResFactor;
  2275. }
  2276. unsigned LFactor = SchedModel->getLatencyFactor();
  2277. dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
  2278. << " Retired: " << RetiredMOps;
  2279. dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
  2280. dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
  2281. << ResCount / ResFactor << " "
  2282. << SchedModel->getResourceName(ZoneCritResIdx)
  2283. << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
  2284. << (IsResourceLimited ? " - Resource" : " - Latency")
  2285. << " limited.\n";
  2286. }
  2287. #endif
  2288. //===----------------------------------------------------------------------===//
  2289. // GenericScheduler - Generic implementation of MachineSchedStrategy.
  2290. //===----------------------------------------------------------------------===//
  2291. void GenericSchedulerBase::SchedCandidate::
  2292. initResourceDelta(const ScheduleDAGMI *DAG,
  2293. const TargetSchedModel *SchedModel) {
  2294. if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
  2295. return;
  2296. const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
  2297. for (TargetSchedModel::ProcResIter
  2298. PI = SchedModel->getWriteProcResBegin(SC),
  2299. PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
  2300. if (PI->ProcResourceIdx == Policy.ReduceResIdx)
  2301. ResDelta.CritResources += PI->Cycles;
  2302. if (PI->ProcResourceIdx == Policy.DemandResIdx)
  2303. ResDelta.DemandedResources += PI->Cycles;
  2304. }
  2305. }
  2306. /// Compute remaining latency. We need this both to determine whether the
  2307. /// overall schedule has become latency-limited and whether the instructions
  2308. /// outside this zone are resource or latency limited.
  2309. ///
  2310. /// The "dependent" latency is updated incrementally during scheduling as the
  2311. /// max height/depth of scheduled nodes minus the cycles since it was
  2312. /// scheduled:
  2313. /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
  2314. ///
  2315. /// The "independent" latency is the max ready queue depth:
  2316. /// ILat = max N.depth for N in Available|Pending
  2317. ///
  2318. /// RemainingLatency is the greater of independent and dependent latency.
  2319. ///
  2320. /// These computations are expensive, especially in DAGs with many edges, so
  2321. /// only do them if necessary.
  2322. static unsigned computeRemLatency(SchedBoundary &CurrZone) {
  2323. unsigned RemLatency = CurrZone.getDependentLatency();
  2324. RemLatency = std::max(RemLatency,
  2325. CurrZone.findMaxLatency(CurrZone.Available.elements()));
  2326. RemLatency = std::max(RemLatency,
  2327. CurrZone.findMaxLatency(CurrZone.Pending.elements()));
  2328. return RemLatency;
  2329. }
  2330. /// Returns true if the current cycle plus remaning latency is greater than
  2331. /// the critical path in the scheduling region.
  2332. bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
  2333. SchedBoundary &CurrZone,
  2334. bool ComputeRemLatency,
  2335. unsigned &RemLatency) const {
  2336. // The current cycle is already greater than the critical path, so we are
  2337. // already latency limited and don't need to compute the remaining latency.
  2338. if (CurrZone.getCurrCycle() > Rem.CriticalPath)
  2339. return true;
  2340. // If we haven't scheduled anything yet, then we aren't latency limited.
  2341. if (CurrZone.getCurrCycle() == 0)
  2342. return false;
  2343. if (ComputeRemLatency)
  2344. RemLatency = computeRemLatency(CurrZone);
  2345. return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
  2346. }
  2347. /// Set the CandPolicy given a scheduling zone given the current resources and
  2348. /// latencies inside and outside the zone.
  2349. void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
  2350. SchedBoundary &CurrZone,
  2351. SchedBoundary *OtherZone) {
  2352. // Apply preemptive heuristics based on the total latency and resources
  2353. // inside and outside this zone. Potential stalls should be considered before
  2354. // following this policy.
  2355. // Compute the critical resource outside the zone.
  2356. unsigned OtherCritIdx = 0;
  2357. unsigned OtherCount =
  2358. OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
  2359. bool OtherResLimited = false;
  2360. unsigned RemLatency = 0;
  2361. bool RemLatencyComputed = false;
  2362. if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
  2363. RemLatency = computeRemLatency(CurrZone);
  2364. RemLatencyComputed = true;
  2365. OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
  2366. OtherCount, RemLatency, false);
  2367. }
  2368. // Schedule aggressively for latency in PostRA mode. We don't check for
  2369. // acyclic latency during PostRA, and highly out-of-order processors will
  2370. // skip PostRA scheduling.
  2371. if (!OtherResLimited &&
  2372. (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
  2373. RemLatency))) {
  2374. Policy.ReduceLatency |= true;
  2375. LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
  2376. << " RemainingLatency " << RemLatency << " + "
  2377. << CurrZone.getCurrCycle() << "c > CritPath "
  2378. << Rem.CriticalPath << "\n");
  2379. }
  2380. // If the same resource is limiting inside and outside the zone, do nothing.
  2381. if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
  2382. return;
  2383. LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
  2384. dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
  2385. << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
  2386. } if (OtherResLimited) dbgs()
  2387. << " RemainingLimit: "
  2388. << SchedModel->getResourceName(OtherCritIdx) << "\n";
  2389. if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
  2390. << " Latency limited both directions.\n");
  2391. if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
  2392. Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
  2393. if (OtherResLimited)
  2394. Policy.DemandResIdx = OtherCritIdx;
  2395. }
  2396. #ifndef NDEBUG
  2397. const char *GenericSchedulerBase::getReasonStr(
  2398. GenericSchedulerBase::CandReason Reason) {
  2399. switch (Reason) {
  2400. case NoCand: return "NOCAND ";
  2401. case Only1: return "ONLY1 ";
  2402. case PhysReg: return "PHYS-REG ";
  2403. case RegExcess: return "REG-EXCESS";
  2404. case RegCritical: return "REG-CRIT ";
  2405. case Stall: return "STALL ";
  2406. case Cluster: return "CLUSTER ";
  2407. case Weak: return "WEAK ";
  2408. case RegMax: return "REG-MAX ";
  2409. case ResourceReduce: return "RES-REDUCE";
  2410. case ResourceDemand: return "RES-DEMAND";
  2411. case TopDepthReduce: return "TOP-DEPTH ";
  2412. case TopPathReduce: return "TOP-PATH ";
  2413. case BotHeightReduce:return "BOT-HEIGHT";
  2414. case BotPathReduce: return "BOT-PATH ";
  2415. case NextDefUse: return "DEF-USE ";
  2416. case NodeOrder: return "ORDER ";
  2417. };
  2418. llvm_unreachable("Unknown reason!");
  2419. }
  2420. void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
  2421. PressureChange P;
  2422. unsigned ResIdx = 0;
  2423. unsigned Latency = 0;
  2424. switch (Cand.Reason) {
  2425. default:
  2426. break;
  2427. case RegExcess:
  2428. P = Cand.RPDelta.Excess;
  2429. break;
  2430. case RegCritical:
  2431. P = Cand.RPDelta.CriticalMax;
  2432. break;
  2433. case RegMax:
  2434. P = Cand.RPDelta.CurrentMax;
  2435. break;
  2436. case ResourceReduce:
  2437. ResIdx = Cand.Policy.ReduceResIdx;
  2438. break;
  2439. case ResourceDemand:
  2440. ResIdx = Cand.Policy.DemandResIdx;
  2441. break;
  2442. case TopDepthReduce:
  2443. Latency = Cand.SU->getDepth();
  2444. break;
  2445. case TopPathReduce:
  2446. Latency = Cand.SU->getHeight();
  2447. break;
  2448. case BotHeightReduce:
  2449. Latency = Cand.SU->getHeight();
  2450. break;
  2451. case BotPathReduce:
  2452. Latency = Cand.SU->getDepth();
  2453. break;
  2454. }
  2455. dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
  2456. if (P.isValid())
  2457. dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
  2458. << ":" << P.getUnitInc() << " ";
  2459. else
  2460. dbgs() << " ";
  2461. if (ResIdx)
  2462. dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
  2463. else
  2464. dbgs() << " ";
  2465. if (Latency)
  2466. dbgs() << " " << Latency << " cycles ";
  2467. else
  2468. dbgs() << " ";
  2469. dbgs() << '\n';
  2470. }
  2471. #endif
  2472. namespace llvm {
  2473. /// Return true if this heuristic determines order.
  2474. /// TODO: Consider refactor return type of these functions as integer or enum,
  2475. /// as we may need to differentiate whether TryCand is better than Cand.
  2476. bool tryLess(int TryVal, int CandVal,
  2477. GenericSchedulerBase::SchedCandidate &TryCand,
  2478. GenericSchedulerBase::SchedCandidate &Cand,
  2479. GenericSchedulerBase::CandReason Reason) {
  2480. if (TryVal < CandVal) {
  2481. TryCand.Reason = Reason;
  2482. return true;
  2483. }
  2484. if (TryVal > CandVal) {
  2485. if (Cand.Reason > Reason)
  2486. Cand.Reason = Reason;
  2487. return true;
  2488. }
  2489. return false;
  2490. }
  2491. bool tryGreater(int TryVal, int CandVal,
  2492. GenericSchedulerBase::SchedCandidate &TryCand,
  2493. GenericSchedulerBase::SchedCandidate &Cand,
  2494. GenericSchedulerBase::CandReason Reason) {
  2495. if (TryVal > CandVal) {
  2496. TryCand.Reason = Reason;
  2497. return true;
  2498. }
  2499. if (TryVal < CandVal) {
  2500. if (Cand.Reason > Reason)
  2501. Cand.Reason = Reason;
  2502. return true;
  2503. }
  2504. return false;
  2505. }
  2506. bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
  2507. GenericSchedulerBase::SchedCandidate &Cand,
  2508. SchedBoundary &Zone) {
  2509. if (Zone.isTop()) {
  2510. // Prefer the candidate with the lesser depth, but only if one of them has
  2511. // depth greater than the total latency scheduled so far, otherwise either
  2512. // of them could be scheduled now with no stall.
  2513. if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
  2514. Zone.getScheduledLatency()) {
  2515. if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
  2516. TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
  2517. return true;
  2518. }
  2519. if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
  2520. TryCand, Cand, GenericSchedulerBase::TopPathReduce))
  2521. return true;
  2522. } else {
  2523. // Prefer the candidate with the lesser height, but only if one of them has
  2524. // height greater than the total latency scheduled so far, otherwise either
  2525. // of them could be scheduled now with no stall.
  2526. if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
  2527. Zone.getScheduledLatency()) {
  2528. if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
  2529. TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
  2530. return true;
  2531. }
  2532. if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
  2533. TryCand, Cand, GenericSchedulerBase::BotPathReduce))
  2534. return true;
  2535. }
  2536. return false;
  2537. }
  2538. } // end namespace llvm
  2539. static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
  2540. LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
  2541. << GenericSchedulerBase::getReasonStr(Reason) << '\n');
  2542. }
  2543. static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
  2544. tracePick(Cand.Reason, Cand.AtTop);
  2545. }
  2546. void GenericScheduler::initialize(ScheduleDAGMI *dag) {
  2547. assert(dag->hasVRegLiveness() &&
  2548. "(PreRA)GenericScheduler needs vreg liveness");
  2549. DAG = static_cast<ScheduleDAGMILive*>(dag);
  2550. SchedModel = DAG->getSchedModel();
  2551. TRI = DAG->TRI;
  2552. if (RegionPolicy.ComputeDFSResult)
  2553. DAG->computeDFSResult();
  2554. Rem.init(DAG, SchedModel);
  2555. Top.init(DAG, SchedModel, &Rem);
  2556. Bot.init(DAG, SchedModel, &Rem);
  2557. // Initialize resource counts.
  2558. // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
  2559. // are disabled, then these HazardRecs will be disabled.
  2560. const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
  2561. if (!Top.HazardRec) {
  2562. Top.HazardRec =
  2563. DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
  2564. Itin, DAG);
  2565. }
  2566. if (!Bot.HazardRec) {
  2567. Bot.HazardRec =
  2568. DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
  2569. Itin, DAG);
  2570. }
  2571. TopCand.SU = nullptr;
  2572. BotCand.SU = nullptr;
  2573. }
  2574. /// Initialize the per-region scheduling policy.
  2575. void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
  2576. MachineBasicBlock::iterator End,
  2577. unsigned NumRegionInstrs) {
  2578. const MachineFunction &MF = *Begin->getMF();
  2579. const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
  2580. // Avoid setting up the register pressure tracker for small regions to save
  2581. // compile time. As a rough heuristic, only track pressure when the number of
  2582. // schedulable instructions exceeds half the integer register file.
  2583. RegionPolicy.ShouldTrackPressure = true;
  2584. for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
  2585. MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
  2586. if (TLI->isTypeLegal(LegalIntVT)) {
  2587. unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
  2588. TLI->getRegClassFor(LegalIntVT));
  2589. RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
  2590. }
  2591. }
  2592. // For generic targets, we default to bottom-up, because it's simpler and more
  2593. // compile-time optimizations have been implemented in that direction.
  2594. RegionPolicy.OnlyBottomUp = true;
  2595. // Allow the subtarget to override default policy.
  2596. MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
  2597. // After subtarget overrides, apply command line options.
  2598. if (!EnableRegPressure) {
  2599. RegionPolicy.ShouldTrackPressure = false;
  2600. RegionPolicy.ShouldTrackLaneMasks = false;
  2601. }
  2602. // Check -misched-topdown/bottomup can force or unforce scheduling direction.
  2603. // e.g. -misched-bottomup=false allows scheduling in both directions.
  2604. assert((!ForceTopDown || !ForceBottomUp) &&
  2605. "-misched-topdown incompatible with -misched-bottomup");
  2606. if (ForceBottomUp.getNumOccurrences() > 0) {
  2607. RegionPolicy.OnlyBottomUp = ForceBottomUp;
  2608. if (RegionPolicy.OnlyBottomUp)
  2609. RegionPolicy.OnlyTopDown = false;
  2610. }
  2611. if (ForceTopDown.getNumOccurrences() > 0) {
  2612. RegionPolicy.OnlyTopDown = ForceTopDown;
  2613. if (RegionPolicy.OnlyTopDown)
  2614. RegionPolicy.OnlyBottomUp = false;
  2615. }
  2616. }
  2617. void GenericScheduler::dumpPolicy() const {
  2618. // Cannot completely remove virtual function even in release mode.
  2619. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  2620. dbgs() << "GenericScheduler RegionPolicy: "
  2621. << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
  2622. << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
  2623. << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
  2624. << "\n";
  2625. #endif
  2626. }
  2627. /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
  2628. /// critical path by more cycles than it takes to drain the instruction buffer.
  2629. /// We estimate an upper bounds on in-flight instructions as:
  2630. ///
  2631. /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
  2632. /// InFlightIterations = AcyclicPath / CyclesPerIteration
  2633. /// InFlightResources = InFlightIterations * LoopResources
  2634. ///
  2635. /// TODO: Check execution resources in addition to IssueCount.
  2636. void GenericScheduler::checkAcyclicLatency() {
  2637. if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
  2638. return;
  2639. // Scaled number of cycles per loop iteration.
  2640. unsigned IterCount =
  2641. std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
  2642. Rem.RemIssueCount);
  2643. // Scaled acyclic critical path.
  2644. unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
  2645. // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
  2646. unsigned InFlightCount =
  2647. (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
  2648. unsigned BufferLimit =
  2649. SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
  2650. Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
  2651. LLVM_DEBUG(
  2652. dbgs() << "IssueCycles="
  2653. << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
  2654. << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
  2655. << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
  2656. << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
  2657. << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
  2658. if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
  2659. }
  2660. void GenericScheduler::registerRoots() {
  2661. Rem.CriticalPath = DAG->ExitSU.getDepth();
  2662. // Some roots may not feed into ExitSU. Check all of them in case.
  2663. for (const SUnit *SU : Bot.Available) {
  2664. if (SU->getDepth() > Rem.CriticalPath)
  2665. Rem.CriticalPath = SU->getDepth();
  2666. }
  2667. LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
  2668. if (DumpCriticalPathLength) {
  2669. errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
  2670. }
  2671. if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
  2672. Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
  2673. checkAcyclicLatency();
  2674. }
  2675. }
  2676. namespace llvm {
  2677. bool tryPressure(const PressureChange &TryP,
  2678. const PressureChange &CandP,
  2679. GenericSchedulerBase::SchedCandidate &TryCand,
  2680. GenericSchedulerBase::SchedCandidate &Cand,
  2681. GenericSchedulerBase::CandReason Reason,
  2682. const TargetRegisterInfo *TRI,
  2683. const MachineFunction &MF) {
  2684. // If one candidate decreases and the other increases, go with it.
  2685. // Invalid candidates have UnitInc==0.
  2686. if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
  2687. Reason)) {
  2688. return true;
  2689. }
  2690. // Do not compare the magnitude of pressure changes between top and bottom
  2691. // boundary.
  2692. if (Cand.AtTop != TryCand.AtTop)
  2693. return false;
  2694. // If both candidates affect the same set in the same boundary, go with the
  2695. // smallest increase.
  2696. unsigned TryPSet = TryP.getPSetOrMax();
  2697. unsigned CandPSet = CandP.getPSetOrMax();
  2698. if (TryPSet == CandPSet) {
  2699. return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
  2700. Reason);
  2701. }
  2702. int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
  2703. std::numeric_limits<int>::max();
  2704. int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
  2705. std::numeric_limits<int>::max();
  2706. // If the candidates are decreasing pressure, reverse priority.
  2707. if (TryP.getUnitInc() < 0)
  2708. std::swap(TryRank, CandRank);
  2709. return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
  2710. }
  2711. unsigned getWeakLeft(const SUnit *SU, bool isTop) {
  2712. return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
  2713. }
  2714. /// Minimize physical register live ranges. Regalloc wants them adjacent to
  2715. /// their physreg def/use.
  2716. ///
  2717. /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
  2718. /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
  2719. /// with the operation that produces or consumes the physreg. We'll do this when
  2720. /// regalloc has support for parallel copies.
  2721. int biasPhysReg(const SUnit *SU, bool isTop) {
  2722. const MachineInstr *MI = SU->getInstr();
  2723. if (MI->isCopy()) {
  2724. unsigned ScheduledOper = isTop ? 1 : 0;
  2725. unsigned UnscheduledOper = isTop ? 0 : 1;
  2726. // If we have already scheduled the physreg produce/consumer, immediately
  2727. // schedule the copy.
  2728. if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
  2729. return 1;
  2730. // If the physreg is at the boundary, defer it. Otherwise schedule it
  2731. // immediately to free the dependent. We can hoist the copy later.
  2732. bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
  2733. if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
  2734. return AtBoundary ? -1 : 1;
  2735. }
  2736. if (MI->isMoveImmediate()) {
  2737. // If we have a move immediate and all successors have been assigned, bias
  2738. // towards scheduling this later. Make sure all register defs are to
  2739. // physical registers.
  2740. bool DoBias = true;
  2741. for (const MachineOperand &Op : MI->defs()) {
  2742. if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
  2743. DoBias = false;
  2744. break;
  2745. }
  2746. }
  2747. if (DoBias)
  2748. return isTop ? -1 : 1;
  2749. }
  2750. return 0;
  2751. }
  2752. } // end namespace llvm
  2753. void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
  2754. bool AtTop,
  2755. const RegPressureTracker &RPTracker,
  2756. RegPressureTracker &TempTracker) {
  2757. Cand.SU = SU;
  2758. Cand.AtTop = AtTop;
  2759. if (DAG->isTrackingPressure()) {
  2760. if (AtTop) {
  2761. TempTracker.getMaxDownwardPressureDelta(
  2762. Cand.SU->getInstr(),
  2763. Cand.RPDelta,
  2764. DAG->getRegionCriticalPSets(),
  2765. DAG->getRegPressure().MaxSetPressure);
  2766. } else {
  2767. if (VerifyScheduling) {
  2768. TempTracker.getMaxUpwardPressureDelta(
  2769. Cand.SU->getInstr(),
  2770. &DAG->getPressureDiff(Cand.SU),
  2771. Cand.RPDelta,
  2772. DAG->getRegionCriticalPSets(),
  2773. DAG->getRegPressure().MaxSetPressure);
  2774. } else {
  2775. RPTracker.getUpwardPressureDelta(
  2776. Cand.SU->getInstr(),
  2777. DAG->getPressureDiff(Cand.SU),
  2778. Cand.RPDelta,
  2779. DAG->getRegionCriticalPSets(),
  2780. DAG->getRegPressure().MaxSetPressure);
  2781. }
  2782. }
  2783. }
  2784. LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
  2785. << " Try SU(" << Cand.SU->NodeNum << ") "
  2786. << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
  2787. << Cand.RPDelta.Excess.getUnitInc() << "\n");
  2788. }
  2789. /// Apply a set of heuristics to a new candidate. Heuristics are currently
  2790. /// hierarchical. This may be more efficient than a graduated cost model because
  2791. /// we don't need to evaluate all aspects of the model for each node in the
  2792. /// queue. But it's really done to make the heuristics easier to debug and
  2793. /// statistically analyze.
  2794. ///
  2795. /// \param Cand provides the policy and current best candidate.
  2796. /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
  2797. /// \param Zone describes the scheduled zone that we are extending, or nullptr
  2798. /// if Cand is from a different zone than TryCand.
  2799. /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
  2800. bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
  2801. SchedCandidate &TryCand,
  2802. SchedBoundary *Zone) const {
  2803. // Initialize the candidate if needed.
  2804. if (!Cand.isValid()) {
  2805. TryCand.Reason = NodeOrder;
  2806. return true;
  2807. }
  2808. // Bias PhysReg Defs and copies to their uses and defined respectively.
  2809. if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
  2810. biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
  2811. return TryCand.Reason != NoCand;
  2812. // Avoid exceeding the target's limit.
  2813. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
  2814. Cand.RPDelta.Excess,
  2815. TryCand, Cand, RegExcess, TRI,
  2816. DAG->MF))
  2817. return TryCand.Reason != NoCand;
  2818. // Avoid increasing the max critical pressure in the scheduled region.
  2819. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
  2820. Cand.RPDelta.CriticalMax,
  2821. TryCand, Cand, RegCritical, TRI,
  2822. DAG->MF))
  2823. return TryCand.Reason != NoCand;
  2824. // We only compare a subset of features when comparing nodes between
  2825. // Top and Bottom boundary. Some properties are simply incomparable, in many
  2826. // other instances we should only override the other boundary if something
  2827. // is a clear good pick on one boundary. Skip heuristics that are more
  2828. // "tie-breaking" in nature.
  2829. bool SameBoundary = Zone != nullptr;
  2830. if (SameBoundary) {
  2831. // For loops that are acyclic path limited, aggressively schedule for
  2832. // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
  2833. // heuristics to take precedence.
  2834. if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
  2835. tryLatency(TryCand, Cand, *Zone))
  2836. return TryCand.Reason != NoCand;
  2837. // Prioritize instructions that read unbuffered resources by stall cycles.
  2838. if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
  2839. Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
  2840. return TryCand.Reason != NoCand;
  2841. }
  2842. // Keep clustered nodes together to encourage downstream peephole
  2843. // optimizations which may reduce resource requirements.
  2844. //
  2845. // This is a best effort to set things up for a post-RA pass. Optimizations
  2846. // like generating loads of multiple registers should ideally be done within
  2847. // the scheduler pass by combining the loads during DAG postprocessing.
  2848. const SUnit *CandNextClusterSU =
  2849. Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
  2850. const SUnit *TryCandNextClusterSU =
  2851. TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
  2852. if (tryGreater(TryCand.SU == TryCandNextClusterSU,
  2853. Cand.SU == CandNextClusterSU,
  2854. TryCand, Cand, Cluster))
  2855. return TryCand.Reason != NoCand;
  2856. if (SameBoundary) {
  2857. // Weak edges are for clustering and other constraints.
  2858. if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
  2859. getWeakLeft(Cand.SU, Cand.AtTop),
  2860. TryCand, Cand, Weak))
  2861. return TryCand.Reason != NoCand;
  2862. }
  2863. // Avoid increasing the max pressure of the entire region.
  2864. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
  2865. Cand.RPDelta.CurrentMax,
  2866. TryCand, Cand, RegMax, TRI,
  2867. DAG->MF))
  2868. return TryCand.Reason != NoCand;
  2869. if (SameBoundary) {
  2870. // Avoid critical resource consumption and balance the schedule.
  2871. TryCand.initResourceDelta(DAG, SchedModel);
  2872. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
  2873. TryCand, Cand, ResourceReduce))
  2874. return TryCand.Reason != NoCand;
  2875. if (tryGreater(TryCand.ResDelta.DemandedResources,
  2876. Cand.ResDelta.DemandedResources,
  2877. TryCand, Cand, ResourceDemand))
  2878. return TryCand.Reason != NoCand;
  2879. // Avoid serializing long latency dependence chains.
  2880. // For acyclic path limited loops, latency was already checked above.
  2881. if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
  2882. !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
  2883. return TryCand.Reason != NoCand;
  2884. // Fall through to original instruction order.
  2885. if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
  2886. || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
  2887. TryCand.Reason = NodeOrder;
  2888. return true;
  2889. }
  2890. }
  2891. return false;
  2892. }
  2893. /// Pick the best candidate from the queue.
  2894. ///
  2895. /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
  2896. /// DAG building. To adjust for the current scheduling location we need to
  2897. /// maintain the number of vreg uses remaining to be top-scheduled.
  2898. void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
  2899. const CandPolicy &ZonePolicy,
  2900. const RegPressureTracker &RPTracker,
  2901. SchedCandidate &Cand) {
  2902. // getMaxPressureDelta temporarily modifies the tracker.
  2903. RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
  2904. ReadyQueue &Q = Zone.Available;
  2905. for (SUnit *SU : Q) {
  2906. SchedCandidate TryCand(ZonePolicy);
  2907. initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
  2908. // Pass SchedBoundary only when comparing nodes from the same boundary.
  2909. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
  2910. if (tryCandidate(Cand, TryCand, ZoneArg)) {
  2911. // Initialize resource delta if needed in case future heuristics query it.
  2912. if (TryCand.ResDelta == SchedResourceDelta())
  2913. TryCand.initResourceDelta(DAG, SchedModel);
  2914. Cand.setBest(TryCand);
  2915. LLVM_DEBUG(traceCandidate(Cand));
  2916. }
  2917. }
  2918. }
  2919. /// Pick the best candidate node from either the top or bottom queue.
  2920. SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
  2921. // Schedule as far as possible in the direction of no choice. This is most
  2922. // efficient, but also provides the best heuristics for CriticalPSets.
  2923. if (SUnit *SU = Bot.pickOnlyChoice()) {
  2924. IsTopNode = false;
  2925. tracePick(Only1, false);
  2926. return SU;
  2927. }
  2928. if (SUnit *SU = Top.pickOnlyChoice()) {
  2929. IsTopNode = true;
  2930. tracePick(Only1, true);
  2931. return SU;
  2932. }
  2933. // Set the bottom-up policy based on the state of the current bottom zone and
  2934. // the instructions outside the zone, including the top zone.
  2935. CandPolicy BotPolicy;
  2936. setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
  2937. // Set the top-down policy based on the state of the current top zone and
  2938. // the instructions outside the zone, including the bottom zone.
  2939. CandPolicy TopPolicy;
  2940. setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
  2941. // See if BotCand is still valid (because we previously scheduled from Top).
  2942. LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
  2943. if (!BotCand.isValid() || BotCand.SU->isScheduled ||
  2944. BotCand.Policy != BotPolicy) {
  2945. BotCand.reset(CandPolicy());
  2946. pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
  2947. assert(BotCand.Reason != NoCand && "failed to find the first candidate");
  2948. } else {
  2949. LLVM_DEBUG(traceCandidate(BotCand));
  2950. #ifndef NDEBUG
  2951. if (VerifyScheduling) {
  2952. SchedCandidate TCand;
  2953. TCand.reset(CandPolicy());
  2954. pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
  2955. assert(TCand.SU == BotCand.SU &&
  2956. "Last pick result should correspond to re-picking right now");
  2957. }
  2958. #endif
  2959. }
  2960. // Check if the top Q has a better candidate.
  2961. LLVM_DEBUG(dbgs() << "Picking from Top:\n");
  2962. if (!TopCand.isValid() || TopCand.SU->isScheduled ||
  2963. TopCand.Policy != TopPolicy) {
  2964. TopCand.reset(CandPolicy());
  2965. pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
  2966. assert(TopCand.Reason != NoCand && "failed to find the first candidate");
  2967. } else {
  2968. LLVM_DEBUG(traceCandidate(TopCand));
  2969. #ifndef NDEBUG
  2970. if (VerifyScheduling) {
  2971. SchedCandidate TCand;
  2972. TCand.reset(CandPolicy());
  2973. pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
  2974. assert(TCand.SU == TopCand.SU &&
  2975. "Last pick result should correspond to re-picking right now");
  2976. }
  2977. #endif
  2978. }
  2979. // Pick best from BotCand and TopCand.
  2980. assert(BotCand.isValid());
  2981. assert(TopCand.isValid());
  2982. SchedCandidate Cand = BotCand;
  2983. TopCand.Reason = NoCand;
  2984. if (tryCandidate(Cand, TopCand, nullptr)) {
  2985. Cand.setBest(TopCand);
  2986. LLVM_DEBUG(traceCandidate(Cand));
  2987. }
  2988. IsTopNode = Cand.AtTop;
  2989. tracePick(Cand);
  2990. return Cand.SU;
  2991. }
  2992. /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
  2993. SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
  2994. if (DAG->top() == DAG->bottom()) {
  2995. assert(Top.Available.empty() && Top.Pending.empty() &&
  2996. Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
  2997. return nullptr;
  2998. }
  2999. SUnit *SU;
  3000. do {
  3001. if (RegionPolicy.OnlyTopDown) {
  3002. SU = Top.pickOnlyChoice();
  3003. if (!SU) {
  3004. CandPolicy NoPolicy;
  3005. TopCand.reset(NoPolicy);
  3006. pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
  3007. assert(TopCand.Reason != NoCand && "failed to find a candidate");
  3008. tracePick(TopCand);
  3009. SU = TopCand.SU;
  3010. }
  3011. IsTopNode = true;
  3012. } else if (RegionPolicy.OnlyBottomUp) {
  3013. SU = Bot.pickOnlyChoice();
  3014. if (!SU) {
  3015. CandPolicy NoPolicy;
  3016. BotCand.reset(NoPolicy);
  3017. pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
  3018. assert(BotCand.Reason != NoCand && "failed to find a candidate");
  3019. tracePick(BotCand);
  3020. SU = BotCand.SU;
  3021. }
  3022. IsTopNode = false;
  3023. } else {
  3024. SU = pickNodeBidirectional(IsTopNode);
  3025. }
  3026. } while (SU->isScheduled);
  3027. if (SU->isTopReady())
  3028. Top.removeReady(SU);
  3029. if (SU->isBottomReady())
  3030. Bot.removeReady(SU);
  3031. LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
  3032. << *SU->getInstr());
  3033. return SU;
  3034. }
  3035. void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
  3036. MachineBasicBlock::iterator InsertPos = SU->getInstr();
  3037. if (!isTop)
  3038. ++InsertPos;
  3039. SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
  3040. // Find already scheduled copies with a single physreg dependence and move
  3041. // them just above the scheduled instruction.
  3042. for (SDep &Dep : Deps) {
  3043. if (Dep.getKind() != SDep::Data ||
  3044. !Register::isPhysicalRegister(Dep.getReg()))
  3045. continue;
  3046. SUnit *DepSU = Dep.getSUnit();
  3047. if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
  3048. continue;
  3049. MachineInstr *Copy = DepSU->getInstr();
  3050. if (!Copy->isCopy() && !Copy->isMoveImmediate())
  3051. continue;
  3052. LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
  3053. DAG->dumpNode(*Dep.getSUnit()));
  3054. DAG->moveInstruction(Copy, InsertPos);
  3055. }
  3056. }
  3057. /// Update the scheduler's state after scheduling a node. This is the same node
  3058. /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
  3059. /// update it's state based on the current cycle before MachineSchedStrategy
  3060. /// does.
  3061. ///
  3062. /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
  3063. /// them here. See comments in biasPhysReg.
  3064. void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
  3065. if (IsTopNode) {
  3066. SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
  3067. Top.bumpNode(SU);
  3068. if (SU->hasPhysRegUses)
  3069. reschedulePhysReg(SU, true);
  3070. } else {
  3071. SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
  3072. Bot.bumpNode(SU);
  3073. if (SU->hasPhysRegDefs)
  3074. reschedulePhysReg(SU, false);
  3075. }
  3076. }
  3077. /// Create the standard converging machine scheduler. This will be used as the
  3078. /// default scheduler if the target does not set a default.
  3079. ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
  3080. ScheduleDAGMILive *DAG =
  3081. new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
  3082. // Register DAG post-processors.
  3083. //
  3084. // FIXME: extend the mutation API to allow earlier mutations to instantiate
  3085. // data and pass it to later mutations. Have a single mutation that gathers
  3086. // the interesting nodes in one pass.
  3087. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
  3088. return DAG;
  3089. }
  3090. static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
  3091. return createGenericSchedLive(C);
  3092. }
  3093. static MachineSchedRegistry
  3094. GenericSchedRegistry("converge", "Standard converging scheduler.",
  3095. createConvergingSched);
  3096. //===----------------------------------------------------------------------===//
  3097. // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
  3098. //===----------------------------------------------------------------------===//
  3099. void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
  3100. DAG = Dag;
  3101. SchedModel = DAG->getSchedModel();
  3102. TRI = DAG->TRI;
  3103. Rem.init(DAG, SchedModel);
  3104. Top.init(DAG, SchedModel, &Rem);
  3105. BotRoots.clear();
  3106. // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
  3107. // or are disabled, then these HazardRecs will be disabled.
  3108. const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
  3109. if (!Top.HazardRec) {
  3110. Top.HazardRec =
  3111. DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
  3112. Itin, DAG);
  3113. }
  3114. }
  3115. void PostGenericScheduler::registerRoots() {
  3116. Rem.CriticalPath = DAG->ExitSU.getDepth();
  3117. // Some roots may not feed into ExitSU. Check all of them in case.
  3118. for (const SUnit *SU : BotRoots) {
  3119. if (SU->getDepth() > Rem.CriticalPath)
  3120. Rem.CriticalPath = SU->getDepth();
  3121. }
  3122. LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
  3123. if (DumpCriticalPathLength) {
  3124. errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
  3125. }
  3126. }
  3127. /// Apply a set of heuristics to a new candidate for PostRA scheduling.
  3128. ///
  3129. /// \param Cand provides the policy and current best candidate.
  3130. /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
  3131. /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
  3132. bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
  3133. SchedCandidate &TryCand) {
  3134. // Initialize the candidate if needed.
  3135. if (!Cand.isValid()) {
  3136. TryCand.Reason = NodeOrder;
  3137. return true;
  3138. }
  3139. // Prioritize instructions that read unbuffered resources by stall cycles.
  3140. if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
  3141. Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
  3142. return TryCand.Reason != NoCand;
  3143. // Keep clustered nodes together.
  3144. if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
  3145. Cand.SU == DAG->getNextClusterSucc(),
  3146. TryCand, Cand, Cluster))
  3147. return TryCand.Reason != NoCand;
  3148. // Avoid critical resource consumption and balance the schedule.
  3149. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
  3150. TryCand, Cand, ResourceReduce))
  3151. return TryCand.Reason != NoCand;
  3152. if (tryGreater(TryCand.ResDelta.DemandedResources,
  3153. Cand.ResDelta.DemandedResources,
  3154. TryCand, Cand, ResourceDemand))
  3155. return TryCand.Reason != NoCand;
  3156. // Avoid serializing long latency dependence chains.
  3157. if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
  3158. return TryCand.Reason != NoCand;
  3159. }
  3160. // Fall through to original instruction order.
  3161. if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
  3162. TryCand.Reason = NodeOrder;
  3163. return true;
  3164. }
  3165. return false;
  3166. }
  3167. void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
  3168. ReadyQueue &Q = Top.Available;
  3169. for (SUnit *SU : Q) {
  3170. SchedCandidate TryCand(Cand.Policy);
  3171. TryCand.SU = SU;
  3172. TryCand.AtTop = true;
  3173. TryCand.initResourceDelta(DAG, SchedModel);
  3174. if (tryCandidate(Cand, TryCand)) {
  3175. Cand.setBest(TryCand);
  3176. LLVM_DEBUG(traceCandidate(Cand));
  3177. }
  3178. }
  3179. }
  3180. /// Pick the next node to schedule.
  3181. SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
  3182. if (DAG->top() == DAG->bottom()) {
  3183. assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
  3184. return nullptr;
  3185. }
  3186. SUnit *SU;
  3187. do {
  3188. SU = Top.pickOnlyChoice();
  3189. if (SU) {
  3190. tracePick(Only1, true);
  3191. } else {
  3192. CandPolicy NoPolicy;
  3193. SchedCandidate TopCand(NoPolicy);
  3194. // Set the top-down policy based on the state of the current top zone and
  3195. // the instructions outside the zone, including the bottom zone.
  3196. setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
  3197. pickNodeFromQueue(TopCand);
  3198. assert(TopCand.Reason != NoCand && "failed to find a candidate");
  3199. tracePick(TopCand);
  3200. SU = TopCand.SU;
  3201. }
  3202. } while (SU->isScheduled);
  3203. IsTopNode = true;
  3204. Top.removeReady(SU);
  3205. LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
  3206. << *SU->getInstr());
  3207. return SU;
  3208. }
  3209. /// Called after ScheduleDAGMI has scheduled an instruction and updated
  3210. /// scheduled/remaining flags in the DAG nodes.
  3211. void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
  3212. SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
  3213. Top.bumpNode(SU);
  3214. }
  3215. ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
  3216. return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
  3217. /*RemoveKillFlags=*/true);
  3218. }
  3219. //===----------------------------------------------------------------------===//
  3220. // ILP Scheduler. Currently for experimental analysis of heuristics.
  3221. //===----------------------------------------------------------------------===//
  3222. namespace {
  3223. /// Order nodes by the ILP metric.
  3224. struct ILPOrder {
  3225. const SchedDFSResult *DFSResult = nullptr;
  3226. const BitVector *ScheduledTrees = nullptr;
  3227. bool MaximizeILP;
  3228. ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
  3229. /// Apply a less-than relation on node priority.
  3230. ///
  3231. /// (Return true if A comes after B in the Q.)
  3232. bool operator()(const SUnit *A, const SUnit *B) const {
  3233. unsigned SchedTreeA = DFSResult->getSubtreeID(A);
  3234. unsigned SchedTreeB = DFSResult->getSubtreeID(B);
  3235. if (SchedTreeA != SchedTreeB) {
  3236. // Unscheduled trees have lower priority.
  3237. if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
  3238. return ScheduledTrees->test(SchedTreeB);
  3239. // Trees with shallower connections have have lower priority.
  3240. if (DFSResult->getSubtreeLevel(SchedTreeA)
  3241. != DFSResult->getSubtreeLevel(SchedTreeB)) {
  3242. return DFSResult->getSubtreeLevel(SchedTreeA)
  3243. < DFSResult->getSubtreeLevel(SchedTreeB);
  3244. }
  3245. }
  3246. if (MaximizeILP)
  3247. return DFSResult->getILP(A) < DFSResult->getILP(B);
  3248. else
  3249. return DFSResult->getILP(A) > DFSResult->getILP(B);
  3250. }
  3251. };
  3252. /// Schedule based on the ILP metric.
  3253. class ILPScheduler : public MachineSchedStrategy {
  3254. ScheduleDAGMILive *DAG = nullptr;
  3255. ILPOrder Cmp;
  3256. std::vector<SUnit*> ReadyQ;
  3257. public:
  3258. ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
  3259. void initialize(ScheduleDAGMI *dag) override {
  3260. assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
  3261. DAG = static_cast<ScheduleDAGMILive*>(dag);
  3262. DAG->computeDFSResult();
  3263. Cmp.DFSResult = DAG->getDFSResult();
  3264. Cmp.ScheduledTrees = &DAG->getScheduledTrees();
  3265. ReadyQ.clear();
  3266. }
  3267. void registerRoots() override {
  3268. // Restore the heap in ReadyQ with the updated DFS results.
  3269. std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
  3270. }
  3271. /// Implement MachineSchedStrategy interface.
  3272. /// -----------------------------------------
  3273. /// Callback to select the highest priority node from the ready Q.
  3274. SUnit *pickNode(bool &IsTopNode) override {
  3275. if (ReadyQ.empty()) return nullptr;
  3276. std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
  3277. SUnit *SU = ReadyQ.back();
  3278. ReadyQ.pop_back();
  3279. IsTopNode = false;
  3280. LLVM_DEBUG(dbgs() << "Pick node "
  3281. << "SU(" << SU->NodeNum << ") "
  3282. << " ILP: " << DAG->getDFSResult()->getILP(SU)
  3283. << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
  3284. << " @"
  3285. << DAG->getDFSResult()->getSubtreeLevel(
  3286. DAG->getDFSResult()->getSubtreeID(SU))
  3287. << '\n'
  3288. << "Scheduling " << *SU->getInstr());
  3289. return SU;
  3290. }
  3291. /// Scheduler callback to notify that a new subtree is scheduled.
  3292. void scheduleTree(unsigned SubtreeID) override {
  3293. std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
  3294. }
  3295. /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
  3296. /// DFSResults, and resort the priority Q.
  3297. void schedNode(SUnit *SU, bool IsTopNode) override {
  3298. assert(!IsTopNode && "SchedDFSResult needs bottom-up");
  3299. }
  3300. void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
  3301. void releaseBottomNode(SUnit *SU) override {
  3302. ReadyQ.push_back(SU);
  3303. std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
  3304. }
  3305. };
  3306. } // end anonymous namespace
  3307. static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
  3308. return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
  3309. }
  3310. static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
  3311. return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
  3312. }
  3313. static MachineSchedRegistry ILPMaxRegistry(
  3314. "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
  3315. static MachineSchedRegistry ILPMinRegistry(
  3316. "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
  3317. //===----------------------------------------------------------------------===//
  3318. // Machine Instruction Shuffler for Correctness Testing
  3319. //===----------------------------------------------------------------------===//
  3320. #ifndef NDEBUG
  3321. namespace {
  3322. /// Apply a less-than relation on the node order, which corresponds to the
  3323. /// instruction order prior to scheduling. IsReverse implements greater-than.
  3324. template<bool IsReverse>
  3325. struct SUnitOrder {
  3326. bool operator()(SUnit *A, SUnit *B) const {
  3327. if (IsReverse)
  3328. return A->NodeNum > B->NodeNum;
  3329. else
  3330. return A->NodeNum < B->NodeNum;
  3331. }
  3332. };
  3333. /// Reorder instructions as much as possible.
  3334. class InstructionShuffler : public MachineSchedStrategy {
  3335. bool IsAlternating;
  3336. bool IsTopDown;
  3337. // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
  3338. // gives nodes with a higher number higher priority causing the latest
  3339. // instructions to be scheduled first.
  3340. PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
  3341. TopQ;
  3342. // When scheduling bottom-up, use greater-than as the queue priority.
  3343. PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
  3344. BottomQ;
  3345. public:
  3346. InstructionShuffler(bool alternate, bool topdown)
  3347. : IsAlternating(alternate), IsTopDown(topdown) {}
  3348. void initialize(ScheduleDAGMI*) override {
  3349. TopQ.clear();
  3350. BottomQ.clear();
  3351. }
  3352. /// Implement MachineSchedStrategy interface.
  3353. /// -----------------------------------------
  3354. SUnit *pickNode(bool &IsTopNode) override {
  3355. SUnit *SU;
  3356. if (IsTopDown) {
  3357. do {
  3358. if (TopQ.empty()) return nullptr;
  3359. SU = TopQ.top();
  3360. TopQ.pop();
  3361. } while (SU->isScheduled);
  3362. IsTopNode = true;
  3363. } else {
  3364. do {
  3365. if (BottomQ.empty()) return nullptr;
  3366. SU = BottomQ.top();
  3367. BottomQ.pop();
  3368. } while (SU->isScheduled);
  3369. IsTopNode = false;
  3370. }
  3371. if (IsAlternating)
  3372. IsTopDown = !IsTopDown;
  3373. return SU;
  3374. }
  3375. void schedNode(SUnit *SU, bool IsTopNode) override {}
  3376. void releaseTopNode(SUnit *SU) override {
  3377. TopQ.push(SU);
  3378. }
  3379. void releaseBottomNode(SUnit *SU) override {
  3380. BottomQ.push(SU);
  3381. }
  3382. };
  3383. } // end anonymous namespace
  3384. static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
  3385. bool Alternate = !ForceTopDown && !ForceBottomUp;
  3386. bool TopDown = !ForceBottomUp;
  3387. assert((TopDown || !ForceTopDown) &&
  3388. "-misched-topdown incompatible with -misched-bottomup");
  3389. return new ScheduleDAGMILive(
  3390. C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
  3391. }
  3392. static MachineSchedRegistry ShufflerRegistry(
  3393. "shuffle", "Shuffle machine instructions alternating directions",
  3394. createInstructionShuffler);
  3395. #endif // !NDEBUG
  3396. //===----------------------------------------------------------------------===//
  3397. // GraphWriter support for ScheduleDAGMILive.
  3398. //===----------------------------------------------------------------------===//
  3399. #ifndef NDEBUG
  3400. namespace llvm {
  3401. template<> struct GraphTraits<
  3402. ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
  3403. template<>
  3404. struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
  3405. DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
  3406. static std::string getGraphName(const ScheduleDAG *G) {
  3407. return std::string(G->MF.getName());
  3408. }
  3409. static bool renderGraphFromBottomUp() {
  3410. return true;
  3411. }
  3412. static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
  3413. if (ViewMISchedCutoff == 0)
  3414. return false;
  3415. return (Node->Preds.size() > ViewMISchedCutoff
  3416. || Node->Succs.size() > ViewMISchedCutoff);
  3417. }
  3418. /// If you want to override the dot attributes printed for a particular
  3419. /// edge, override this method.
  3420. static std::string getEdgeAttributes(const SUnit *Node,
  3421. SUnitIterator EI,
  3422. const ScheduleDAG *Graph) {
  3423. if (EI.isArtificialDep())
  3424. return "color=cyan,style=dashed";
  3425. if (EI.isCtrlDep())
  3426. return "color=blue,style=dashed";
  3427. return "";
  3428. }
  3429. static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
  3430. std::string Str;
  3431. raw_string_ostream SS(Str);
  3432. const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
  3433. const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
  3434. static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
  3435. SS << "SU:" << SU->NodeNum;
  3436. if (DFS)
  3437. SS << " I:" << DFS->getNumInstrs(SU);
  3438. return SS.str();
  3439. }
  3440. static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
  3441. return G->getGraphNodeLabel(SU);
  3442. }
  3443. static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
  3444. std::string Str("shape=Mrecord");
  3445. const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
  3446. const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
  3447. static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
  3448. if (DFS) {
  3449. Str += ",style=filled,fillcolor=\"#";
  3450. Str += DOT::getColorString(DFS->getSubtreeID(N));
  3451. Str += '"';
  3452. }
  3453. return Str;
  3454. }
  3455. };
  3456. } // end namespace llvm
  3457. #endif // NDEBUG
  3458. /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
  3459. /// rendered using 'dot'.
  3460. void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
  3461. #ifndef NDEBUG
  3462. ViewGraph(this, Name, false, Title);
  3463. #else
  3464. errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
  3465. << "systems with Graphviz or gv!\n";
  3466. #endif // NDEBUG
  3467. }
  3468. /// Out-of-line implementation with no arguments is handy for gdb.
  3469. void ScheduleDAGMI::viewGraph() {
  3470. viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
  3471. }