PassBuilderPipelines.cpp 83 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998
  1. //===- Construction of pass pipelines -------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. ///
  10. /// This file provides the implementation of the PassBuilder based on our
  11. /// static pass registry as well as related functionality. It also provides
  12. /// helpers to aid in analyzing, debugging, and testing passes and pass
  13. /// pipelines.
  14. ///
  15. //===----------------------------------------------------------------------===//
  16. #include "llvm/Analysis/AliasAnalysis.h"
  17. #include "llvm/Analysis/BasicAliasAnalysis.h"
  18. #include "llvm/Analysis/CGSCCPassManager.h"
  19. #include "llvm/Analysis/GlobalsModRef.h"
  20. #include "llvm/Analysis/InlineAdvisor.h"
  21. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  22. #include "llvm/Analysis/ProfileSummaryInfo.h"
  23. #include "llvm/Analysis/ScopedNoAliasAA.h"
  24. #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
  25. #include "llvm/IR/PassManager.h"
  26. #include "llvm/Passes/OptimizationLevel.h"
  27. #include "llvm/Passes/PassBuilder.h"
  28. #include "llvm/Support/CommandLine.h"
  29. #include "llvm/Support/ErrorHandling.h"
  30. #include "llvm/Support/PGOOptions.h"
  31. #include "llvm/Target/TargetMachine.h"
  32. #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
  33. #include "llvm/Transforms/Coroutines/CoroCleanup.h"
  34. #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
  35. #include "llvm/Transforms/Coroutines/CoroEarly.h"
  36. #include "llvm/Transforms/Coroutines/CoroElide.h"
  37. #include "llvm/Transforms/Coroutines/CoroSplit.h"
  38. #include "llvm/Transforms/IPO/AlwaysInliner.h"
  39. #include "llvm/Transforms/IPO/Annotation2Metadata.h"
  40. #include "llvm/Transforms/IPO/ArgumentPromotion.h"
  41. #include "llvm/Transforms/IPO/Attributor.h"
  42. #include "llvm/Transforms/IPO/CalledValuePropagation.h"
  43. #include "llvm/Transforms/IPO/ConstantMerge.h"
  44. #include "llvm/Transforms/IPO/CrossDSOCFI.h"
  45. #include "llvm/Transforms/IPO/DeadArgumentElimination.h"
  46. #include "llvm/Transforms/IPO/ElimAvailExtern.h"
  47. #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
  48. #include "llvm/Transforms/IPO/FunctionAttrs.h"
  49. #include "llvm/Transforms/IPO/GlobalDCE.h"
  50. #include "llvm/Transforms/IPO/GlobalOpt.h"
  51. #include "llvm/Transforms/IPO/GlobalSplit.h"
  52. #include "llvm/Transforms/IPO/HotColdSplitting.h"
  53. #include "llvm/Transforms/IPO/IROutliner.h"
  54. #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
  55. #include "llvm/Transforms/IPO/Inliner.h"
  56. #include "llvm/Transforms/IPO/LowerTypeTests.h"
  57. #include "llvm/Transforms/IPO/MergeFunctions.h"
  58. #include "llvm/Transforms/IPO/ModuleInliner.h"
  59. #include "llvm/Transforms/IPO/OpenMPOpt.h"
  60. #include "llvm/Transforms/IPO/PartialInlining.h"
  61. #include "llvm/Transforms/IPO/SCCP.h"
  62. #include "llvm/Transforms/IPO/SampleProfile.h"
  63. #include "llvm/Transforms/IPO/SampleProfileProbe.h"
  64. #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
  65. #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
  66. #include "llvm/Transforms/InstCombine/InstCombine.h"
  67. #include "llvm/Transforms/Instrumentation/CGProfile.h"
  68. #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
  69. #include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
  70. #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
  71. #include "llvm/Transforms/Instrumentation/MemProfiler.h"
  72. #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
  73. #include "llvm/Transforms/Scalar/ADCE.h"
  74. #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
  75. #include "llvm/Transforms/Scalar/AnnotationRemarks.h"
  76. #include "llvm/Transforms/Scalar/BDCE.h"
  77. #include "llvm/Transforms/Scalar/CallSiteSplitting.h"
  78. #include "llvm/Transforms/Scalar/ConstraintElimination.h"
  79. #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
  80. #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
  81. #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
  82. #include "llvm/Transforms/Scalar/DivRemPairs.h"
  83. #include "llvm/Transforms/Scalar/EarlyCSE.h"
  84. #include "llvm/Transforms/Scalar/Float2Int.h"
  85. #include "llvm/Transforms/Scalar/GVN.h"
  86. #include "llvm/Transforms/Scalar/IndVarSimplify.h"
  87. #include "llvm/Transforms/Scalar/InstSimplifyPass.h"
  88. #include "llvm/Transforms/Scalar/JumpThreading.h"
  89. #include "llvm/Transforms/Scalar/LICM.h"
  90. #include "llvm/Transforms/Scalar/LoopDeletion.h"
  91. #include "llvm/Transforms/Scalar/LoopDistribute.h"
  92. #include "llvm/Transforms/Scalar/LoopFlatten.h"
  93. #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
  94. #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
  95. #include "llvm/Transforms/Scalar/LoopInterchange.h"
  96. #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
  97. #include "llvm/Transforms/Scalar/LoopPassManager.h"
  98. #include "llvm/Transforms/Scalar/LoopRotation.h"
  99. #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
  100. #include "llvm/Transforms/Scalar/LoopSink.h"
  101. #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
  102. #include "llvm/Transforms/Scalar/LoopUnrollPass.h"
  103. #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
  104. #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
  105. #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
  106. #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
  107. #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
  108. #include "llvm/Transforms/Scalar/NewGVN.h"
  109. #include "llvm/Transforms/Scalar/Reassociate.h"
  110. #include "llvm/Transforms/Scalar/SCCP.h"
  111. #include "llvm/Transforms/Scalar/SROA.h"
  112. #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
  113. #include "llvm/Transforms/Scalar/SimplifyCFG.h"
  114. #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
  115. #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
  116. #include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
  117. #include "llvm/Transforms/Utils/AddDiscriminators.h"
  118. #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
  119. #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
  120. #include "llvm/Transforms/Utils/InjectTLIMappings.h"
  121. #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
  122. #include "llvm/Transforms/Utils/Mem2Reg.h"
  123. #include "llvm/Transforms/Utils/NameAnonGlobals.h"
  124. #include "llvm/Transforms/Utils/RelLookupTableConverter.h"
  125. #include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
  126. #include "llvm/Transforms/Vectorize/LoopVectorize.h"
  127. #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
  128. #include "llvm/Transforms/Vectorize/VectorCombine.h"
  129. using namespace llvm;
  130. static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
  131. "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
  132. cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
  133. cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
  134. "Heuristics-based inliner version"),
  135. clEnumValN(InliningAdvisorMode::Development, "development",
  136. "Use development mode (runtime-loadable model)"),
  137. clEnumValN(InliningAdvisorMode::Release, "release",
  138. "Use release mode (AOT-compiled model)")));
  139. static cl::opt<bool> EnableSyntheticCounts(
  140. "enable-npm-synthetic-counts", cl::Hidden,
  141. cl::desc("Run synthetic function entry count generation "
  142. "pass"));
  143. /// Flag to enable inline deferral during PGO.
  144. static cl::opt<bool>
  145. EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
  146. cl::Hidden,
  147. cl::desc("Enable inline deferral during PGO"));
  148. static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::Hidden,
  149. cl::desc("Enable memory profiler"));
  150. static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
  151. cl::init(false), cl::Hidden,
  152. cl::desc("Enable module inliner"));
  153. static cl::opt<bool> PerformMandatoryInliningsFirst(
  154. "mandatory-inlining-first", cl::init(true), cl::Hidden,
  155. cl::desc("Perform mandatory inlinings module-wide, before performing "
  156. "inlining"));
  157. static cl::opt<bool> EnableO3NonTrivialUnswitching(
  158. "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
  159. cl::desc("Enable non-trivial loop unswitching for -O3"));
  160. static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
  161. "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
  162. cl::desc("Eagerly invalidate more analyses in default pipelines"));
  163. static cl::opt<bool> EnableNoRerunSimplificationPipeline(
  164. "enable-no-rerun-simplification-pipeline", cl::init(true), cl::Hidden,
  165. cl::desc(
  166. "Prevent running the simplification pipeline on a function more "
  167. "than once in the case that SCC mutations cause a function to be "
  168. "visited multiple times as long as the function has not been changed"));
  169. static cl::opt<bool> EnableMergeFunctions(
  170. "enable-merge-functions", cl::init(false), cl::Hidden,
  171. cl::desc("Enable function merging as part of the optimization pipeline"));
  172. static cl::opt<bool> EnablePostPGOLoopRotation(
  173. "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
  174. cl::desc("Run the loop rotation transformation after PGO instrumentation"));
  175. static cl::opt<bool> EnableGlobalAnalyses(
  176. "enable-global-analyses", cl::init(true), cl::Hidden,
  177. cl::desc("Enable inter-procedural analyses"));
  178. static cl::opt<bool>
  179. RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden,
  180. cl::desc("Run Partial inlinining pass"));
  181. static cl::opt<bool> ExtraVectorizerPasses(
  182. "extra-vectorizer-passes", cl::init(false), cl::Hidden,
  183. cl::desc("Run cleanup optimization passes after vectorization"));
  184. static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
  185. cl::desc("Run the NewGVN pass"));
  186. static cl::opt<bool> EnableLoopInterchange(
  187. "enable-loopinterchange", cl::init(false), cl::Hidden,
  188. cl::desc("Enable the experimental LoopInterchange Pass"));
  189. static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
  190. cl::init(false), cl::Hidden,
  191. cl::desc("Enable Unroll And Jam Pass"));
  192. static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
  193. cl::Hidden,
  194. cl::desc("Enable the LoopFlatten Pass"));
  195. static cl::opt<bool>
  196. EnableDFAJumpThreading("enable-dfa-jump-thread",
  197. cl::desc("Enable DFA jump threading"),
  198. cl::init(false), cl::Hidden);
  199. static cl::opt<bool>
  200. EnableHotColdSplit("hot-cold-split",
  201. cl::desc("Enable hot-cold splitting pass"));
  202. static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
  203. cl::Hidden,
  204. cl::desc("Enable ir outliner pass"));
  205. static cl::opt<bool>
  206. DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
  207. cl::desc("Disable pre-instrumentation inliner"));
  208. static cl::opt<int> PreInlineThreshold(
  209. "preinline-threshold", cl::Hidden, cl::init(75),
  210. cl::desc("Control the amount of inlining in pre-instrumentation inliner "
  211. "(default = 75)"));
  212. static cl::opt<bool>
  213. EnableGVNHoist("enable-gvn-hoist",
  214. cl::desc("Enable the GVN hoisting pass (default = off)"));
  215. static cl::opt<bool>
  216. EnableGVNSink("enable-gvn-sink",
  217. cl::desc("Enable the GVN sinking pass (default = off)"));
  218. // This option is used in simplifying testing SampleFDO optimizations for
  219. // profile loading.
  220. static cl::opt<bool>
  221. EnableCHR("enable-chr", cl::init(true), cl::Hidden,
  222. cl::desc("Enable control height reduction optimization (CHR)"));
  223. static cl::opt<bool> FlattenedProfileUsed(
  224. "flattened-profile-used", cl::init(false), cl::Hidden,
  225. cl::desc("Indicate the sample profile being used is flattened, i.e., "
  226. "no inline hierachy exists in the profile"));
  227. static cl::opt<bool> EnableOrderFileInstrumentation(
  228. "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
  229. cl::desc("Enable order file instrumentation (default = off)"));
  230. static cl::opt<bool>
  231. EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
  232. cl::desc("Enable lowering of the matrix intrinsics"));
  233. static cl::opt<bool> EnableConstraintElimination(
  234. "enable-constraint-elimination", cl::init(false), cl::Hidden,
  235. cl::desc(
  236. "Enable pass to eliminate conditions based on linear constraints"));
  237. static cl::opt<AttributorRunOption> AttributorRun(
  238. "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
  239. cl::desc("Enable the attributor inter-procedural deduction pass"),
  240. cl::values(clEnumValN(AttributorRunOption::ALL, "all",
  241. "enable all attributor runs"),
  242. clEnumValN(AttributorRunOption::MODULE, "module",
  243. "enable module-wide attributor runs"),
  244. clEnumValN(AttributorRunOption::CGSCC, "cgscc",
  245. "enable call graph SCC attributor runs"),
  246. clEnumValN(AttributorRunOption::NONE, "none",
  247. "disable attributor runs")));
  248. PipelineTuningOptions::PipelineTuningOptions() {
  249. LoopInterleaving = true;
  250. LoopVectorization = true;
  251. SLPVectorization = false;
  252. LoopUnrolling = true;
  253. ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
  254. LicmMssaOptCap = SetLicmMssaOptCap;
  255. LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
  256. CallGraphProfile = true;
  257. MergeFunctions = EnableMergeFunctions;
  258. InlinerThreshold = -1;
  259. EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
  260. }
  261. namespace llvm {
  262. extern cl::opt<unsigned> MaxDevirtIterations;
  263. extern cl::opt<bool> EnableKnowledgeRetention;
  264. } // namespace llvm
  265. void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
  266. OptimizationLevel Level) {
  267. for (auto &C : PeepholeEPCallbacks)
  268. C(FPM, Level);
  269. }
  270. // Helper to add AnnotationRemarksPass.
  271. static void addAnnotationRemarksPass(ModulePassManager &MPM) {
  272. MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
  273. }
  274. // Helper to check if the current compilation phase is preparing for LTO
  275. static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
  276. return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
  277. Phase == ThinOrFullLTOPhase::FullLTOPreLink;
  278. }
  279. // TODO: Investigate the cost/benefit of tail call elimination on debugging.
  280. FunctionPassManager
  281. PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
  282. ThinOrFullLTOPhase Phase) {
  283. FunctionPassManager FPM;
  284. // Form SSA out of local memory accesses after breaking apart aggregates into
  285. // scalars.
  286. FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
  287. // Catch trivial redundancies
  288. FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
  289. // Hoisting of scalars and load expressions.
  290. FPM.addPass(
  291. SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  292. FPM.addPass(InstCombinePass());
  293. FPM.addPass(LibCallsShrinkWrapPass());
  294. invokePeepholeEPCallbacks(FPM, Level);
  295. FPM.addPass(
  296. SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  297. // Form canonically associated expression trees, and simplify the trees using
  298. // basic mathematical properties. For example, this will form (nearly)
  299. // minimal multiplication trees.
  300. FPM.addPass(ReassociatePass());
  301. // Add the primary loop simplification pipeline.
  302. // FIXME: Currently this is split into two loop pass pipelines because we run
  303. // some function passes in between them. These can and should be removed
  304. // and/or replaced by scheduling the loop pass equivalents in the correct
  305. // positions. But those equivalent passes aren't powerful enough yet.
  306. // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
  307. // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
  308. // fully replace `SimplifyCFGPass`, and the closest to the other we have is
  309. // `LoopInstSimplify`.
  310. LoopPassManager LPM1, LPM2;
  311. // Simplify the loop body. We do this initially to clean up after other loop
  312. // passes run, either when iterating on a loop or on inner loops with
  313. // implications on the outer loop.
  314. LPM1.addPass(LoopInstSimplifyPass());
  315. LPM1.addPass(LoopSimplifyCFGPass());
  316. // Try to remove as much code from the loop header as possible,
  317. // to reduce amount of IR that will have to be duplicated. However,
  318. // do not perform speculative hoisting the first time as LICM
  319. // will destroy metadata that may not need to be destroyed if run
  320. // after loop rotation.
  321. // TODO: Investigate promotion cap for O1.
  322. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
  323. /*AllowSpeculation=*/false));
  324. LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
  325. isLTOPreLink(Phase)));
  326. // TODO: Investigate promotion cap for O1.
  327. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
  328. /*AllowSpeculation=*/true));
  329. LPM1.addPass(SimpleLoopUnswitchPass());
  330. if (EnableLoopFlatten)
  331. LPM1.addPass(LoopFlattenPass());
  332. LPM2.addPass(LoopIdiomRecognizePass());
  333. LPM2.addPass(IndVarSimplifyPass());
  334. for (auto &C : LateLoopOptimizationsEPCallbacks)
  335. C(LPM2, Level);
  336. LPM2.addPass(LoopDeletionPass());
  337. if (EnableLoopInterchange)
  338. LPM2.addPass(LoopInterchangePass());
  339. // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
  340. // because it changes IR to makes profile annotation in back compile
  341. // inaccurate. The normal unroller doesn't pay attention to forced full unroll
  342. // attributes so we need to make sure and allow the full unroll pass to pay
  343. // attention to it.
  344. if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
  345. PGOOpt->Action != PGOOptions::SampleUse)
  346. LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
  347. /* OnlyWhenForced= */ !PTO.LoopUnrolling,
  348. PTO.ForgetAllSCEVInLoopUnroll));
  349. for (auto &C : LoopOptimizerEndEPCallbacks)
  350. C(LPM2, Level);
  351. // We provide the opt remark emitter pass for LICM to use. We only need to do
  352. // this once as it is immutable.
  353. FPM.addPass(
  354. RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
  355. FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
  356. /*UseMemorySSA=*/true,
  357. /*UseBlockFrequencyInfo=*/true));
  358. FPM.addPass(
  359. SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  360. FPM.addPass(InstCombinePass());
  361. // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
  362. // *All* loop passes must preserve it, in order to be able to use it.
  363. FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
  364. /*UseMemorySSA=*/false,
  365. /*UseBlockFrequencyInfo=*/false));
  366. // Delete small array after loop unroll.
  367. FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
  368. // Specially optimize memory movement as it doesn't look like dataflow in SSA.
  369. FPM.addPass(MemCpyOptPass());
  370. // Sparse conditional constant propagation.
  371. // FIXME: It isn't clear why we do this *after* loop passes rather than
  372. // before...
  373. FPM.addPass(SCCPPass());
  374. // Delete dead bit computations (instcombine runs after to fold away the dead
  375. // computations, and then ADCE will run later to exploit any new DCE
  376. // opportunities that creates).
  377. FPM.addPass(BDCEPass());
  378. // Run instcombine after redundancy and dead bit elimination to exploit
  379. // opportunities opened up by them.
  380. FPM.addPass(InstCombinePass());
  381. invokePeepholeEPCallbacks(FPM, Level);
  382. FPM.addPass(CoroElidePass());
  383. for (auto &C : ScalarOptimizerLateEPCallbacks)
  384. C(FPM, Level);
  385. // Finally, do an expensive DCE pass to catch all the dead code exposed by
  386. // the simplifications and basic cleanup after all the simplifications.
  387. // TODO: Investigate if this is too expensive.
  388. FPM.addPass(ADCEPass());
  389. FPM.addPass(
  390. SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  391. FPM.addPass(InstCombinePass());
  392. invokePeepholeEPCallbacks(FPM, Level);
  393. return FPM;
  394. }
  395. FunctionPassManager
  396. PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
  397. ThinOrFullLTOPhase Phase) {
  398. assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
  399. // The O1 pipeline has a separate pipeline creation function to simplify
  400. // construction readability.
  401. if (Level.getSpeedupLevel() == 1)
  402. return buildO1FunctionSimplificationPipeline(Level, Phase);
  403. FunctionPassManager FPM;
  404. // Form SSA out of local memory accesses after breaking apart aggregates into
  405. // scalars.
  406. FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
  407. // Catch trivial redundancies
  408. FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
  409. if (EnableKnowledgeRetention)
  410. FPM.addPass(AssumeSimplifyPass());
  411. // Hoisting of scalars and load expressions.
  412. if (EnableGVNHoist)
  413. FPM.addPass(GVNHoistPass());
  414. // Global value numbering based sinking.
  415. if (EnableGVNSink) {
  416. FPM.addPass(GVNSinkPass());
  417. FPM.addPass(
  418. SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  419. }
  420. // Speculative execution if the target has divergent branches; otherwise nop.
  421. FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
  422. // Optimize based on known information about branches, and cleanup afterward.
  423. FPM.addPass(JumpThreadingPass());
  424. FPM.addPass(CorrelatedValuePropagationPass());
  425. FPM.addPass(
  426. SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  427. FPM.addPass(InstCombinePass());
  428. if (Level == OptimizationLevel::O3)
  429. FPM.addPass(AggressiveInstCombinePass());
  430. if (EnableConstraintElimination)
  431. FPM.addPass(ConstraintEliminationPass());
  432. if (!Level.isOptimizingForSize())
  433. FPM.addPass(LibCallsShrinkWrapPass());
  434. invokePeepholeEPCallbacks(FPM, Level);
  435. // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
  436. // using the size value profile. Don't perform this when optimizing for size.
  437. if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
  438. !Level.isOptimizingForSize())
  439. FPM.addPass(PGOMemOPSizeOpt());
  440. FPM.addPass(TailCallElimPass());
  441. FPM.addPass(
  442. SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  443. // Form canonically associated expression trees, and simplify the trees using
  444. // basic mathematical properties. For example, this will form (nearly)
  445. // minimal multiplication trees.
  446. FPM.addPass(ReassociatePass());
  447. // Add the primary loop simplification pipeline.
  448. // FIXME: Currently this is split into two loop pass pipelines because we run
  449. // some function passes in between them. These can and should be removed
  450. // and/or replaced by scheduling the loop pass equivalents in the correct
  451. // positions. But those equivalent passes aren't powerful enough yet.
  452. // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
  453. // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
  454. // fully replace `SimplifyCFGPass`, and the closest to the other we have is
  455. // `LoopInstSimplify`.
  456. LoopPassManager LPM1, LPM2;
  457. // Simplify the loop body. We do this initially to clean up after other loop
  458. // passes run, either when iterating on a loop or on inner loops with
  459. // implications on the outer loop.
  460. LPM1.addPass(LoopInstSimplifyPass());
  461. LPM1.addPass(LoopSimplifyCFGPass());
  462. // Try to remove as much code from the loop header as possible,
  463. // to reduce amount of IR that will have to be duplicated. However,
  464. // do not perform speculative hoisting the first time as LICM
  465. // will destroy metadata that may not need to be destroyed if run
  466. // after loop rotation.
  467. // TODO: Investigate promotion cap for O1.
  468. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
  469. /*AllowSpeculation=*/false));
  470. // Disable header duplication in loop rotation at -Oz.
  471. LPM1.addPass(
  472. LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
  473. // TODO: Investigate promotion cap for O1.
  474. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
  475. /*AllowSpeculation=*/true));
  476. LPM1.addPass(
  477. SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
  478. EnableO3NonTrivialUnswitching));
  479. if (EnableLoopFlatten)
  480. LPM1.addPass(LoopFlattenPass());
  481. LPM2.addPass(LoopIdiomRecognizePass());
  482. LPM2.addPass(IndVarSimplifyPass());
  483. for (auto &C : LateLoopOptimizationsEPCallbacks)
  484. C(LPM2, Level);
  485. LPM2.addPass(LoopDeletionPass());
  486. if (EnableLoopInterchange)
  487. LPM2.addPass(LoopInterchangePass());
  488. // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
  489. // because it changes IR to makes profile annotation in back compile
  490. // inaccurate. The normal unroller doesn't pay attention to forced full unroll
  491. // attributes so we need to make sure and allow the full unroll pass to pay
  492. // attention to it.
  493. if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
  494. PGOOpt->Action != PGOOptions::SampleUse)
  495. LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
  496. /* OnlyWhenForced= */ !PTO.LoopUnrolling,
  497. PTO.ForgetAllSCEVInLoopUnroll));
  498. for (auto &C : LoopOptimizerEndEPCallbacks)
  499. C(LPM2, Level);
  500. // We provide the opt remark emitter pass for LICM to use. We only need to do
  501. // this once as it is immutable.
  502. FPM.addPass(
  503. RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
  504. FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
  505. /*UseMemorySSA=*/true,
  506. /*UseBlockFrequencyInfo=*/true));
  507. FPM.addPass(
  508. SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  509. FPM.addPass(InstCombinePass());
  510. // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
  511. // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
  512. // *All* loop passes must preserve it, in order to be able to use it.
  513. FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
  514. /*UseMemorySSA=*/false,
  515. /*UseBlockFrequencyInfo=*/false));
  516. // Delete small array after loop unroll.
  517. FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
  518. // Try vectorization/scalarization transforms that are both improvements
  519. // themselves and can allow further folds with GVN and InstCombine.
  520. FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
  521. // Eliminate redundancies.
  522. FPM.addPass(MergedLoadStoreMotionPass());
  523. if (RunNewGVN)
  524. FPM.addPass(NewGVNPass());
  525. else
  526. FPM.addPass(GVNPass());
  527. // Sparse conditional constant propagation.
  528. // FIXME: It isn't clear why we do this *after* loop passes rather than
  529. // before...
  530. FPM.addPass(SCCPPass());
  531. // Delete dead bit computations (instcombine runs after to fold away the dead
  532. // computations, and then ADCE will run later to exploit any new DCE
  533. // opportunities that creates).
  534. FPM.addPass(BDCEPass());
  535. // Run instcombine after redundancy and dead bit elimination to exploit
  536. // opportunities opened up by them.
  537. FPM.addPass(InstCombinePass());
  538. invokePeepholeEPCallbacks(FPM, Level);
  539. // Re-consider control flow based optimizations after redundancy elimination,
  540. // redo DCE, etc.
  541. if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
  542. FPM.addPass(DFAJumpThreadingPass());
  543. FPM.addPass(JumpThreadingPass());
  544. FPM.addPass(CorrelatedValuePropagationPass());
  545. // Finally, do an expensive DCE pass to catch all the dead code exposed by
  546. // the simplifications and basic cleanup after all the simplifications.
  547. // TODO: Investigate if this is too expensive.
  548. FPM.addPass(ADCEPass());
  549. // Specially optimize memory movement as it doesn't look like dataflow in SSA.
  550. FPM.addPass(MemCpyOptPass());
  551. FPM.addPass(DSEPass());
  552. FPM.addPass(createFunctionToLoopPassAdaptor(
  553. LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
  554. /*AllowSpeculation=*/true),
  555. /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
  556. FPM.addPass(CoroElidePass());
  557. for (auto &C : ScalarOptimizerLateEPCallbacks)
  558. C(FPM, Level);
  559. FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
  560. .convertSwitchRangeToICmp(true)
  561. .hoistCommonInsts(true)
  562. .sinkCommonInsts(true)));
  563. FPM.addPass(InstCombinePass());
  564. invokePeepholeEPCallbacks(FPM, Level);
  565. // Don't add CHR pass for CSIRInstr build in PostLink as the profile
  566. // is still the same as the PreLink compilation.
  567. if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
  568. ((PGOOpt->Action == PGOOptions::IRUse &&
  569. (Phase != ThinOrFullLTOPhase::ThinLTOPostLink ||
  570. PGOOpt->CSAction != PGOOptions::CSIRInstr)) ||
  571. PGOOpt->Action == PGOOptions::SampleUse))
  572. FPM.addPass(ControlHeightReductionPass());
  573. return FPM;
  574. }
  575. void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
  576. MPM.addPass(CanonicalizeAliasesPass());
  577. MPM.addPass(NameAnonGlobalPass());
  578. }
  579. void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
  580. OptimizationLevel Level, bool RunProfileGen,
  581. bool IsCS, std::string ProfileFile,
  582. std::string ProfileRemappingFile,
  583. ThinOrFullLTOPhase LTOPhase) {
  584. assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
  585. if (!IsCS && !DisablePreInliner) {
  586. InlineParams IP;
  587. IP.DefaultThreshold = PreInlineThreshold;
  588. // FIXME: The hint threshold has the same value used by the regular inliner
  589. // when not optimzing for size. This should probably be lowered after
  590. // performance testing.
  591. // FIXME: this comment is cargo culted from the old pass manager, revisit).
  592. IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
  593. ModuleInlinerWrapperPass MIWP(
  594. IP, /* MandatoryFirst */ true,
  595. InlineContext{LTOPhase, InlinePass::EarlyInliner});
  596. CGSCCPassManager &CGPipeline = MIWP.getPM();
  597. FunctionPassManager FPM;
  598. FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
  599. FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
  600. FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
  601. true))); // Merge & remove basic blocks.
  602. FPM.addPass(InstCombinePass()); // Combine silly sequences.
  603. invokePeepholeEPCallbacks(FPM, Level);
  604. CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
  605. std::move(FPM), PTO.EagerlyInvalidateAnalyses));
  606. MPM.addPass(std::move(MIWP));
  607. // Delete anything that is now dead to make sure that we don't instrument
  608. // dead code. Instrumentation can end up keeping dead code around and
  609. // dramatically increase code size.
  610. MPM.addPass(GlobalDCEPass());
  611. }
  612. if (!RunProfileGen) {
  613. assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
  614. MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
  615. // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
  616. // RequireAnalysisPass for PSI before subsequent non-module passes.
  617. MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
  618. return;
  619. }
  620. // Perform PGO instrumentation.
  621. MPM.addPass(PGOInstrumentationGen(IsCS));
  622. if (EnablePostPGOLoopRotation) {
  623. // Disable header duplication in loop rotation at -Oz.
  624. MPM.addPass(createModuleToFunctionPassAdaptor(
  625. createFunctionToLoopPassAdaptor(
  626. LoopRotatePass(Level != OptimizationLevel::Oz),
  627. /*UseMemorySSA=*/false,
  628. /*UseBlockFrequencyInfo=*/false),
  629. PTO.EagerlyInvalidateAnalyses));
  630. }
  631. // Add the profile lowering pass.
  632. InstrProfOptions Options;
  633. if (!ProfileFile.empty())
  634. Options.InstrProfileOutput = ProfileFile;
  635. // Do counter promotion at Level greater than O0.
  636. Options.DoCounterPromotion = true;
  637. Options.UseBFIInPromotion = IsCS;
  638. MPM.addPass(InstrProfiling(Options, IsCS));
  639. }
  640. void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
  641. bool RunProfileGen, bool IsCS,
  642. std::string ProfileFile,
  643. std::string ProfileRemappingFile) {
  644. if (!RunProfileGen) {
  645. assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
  646. MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
  647. // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
  648. // RequireAnalysisPass for PSI before subsequent non-module passes.
  649. MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
  650. return;
  651. }
  652. // Perform PGO instrumentation.
  653. MPM.addPass(PGOInstrumentationGen(IsCS));
  654. // Add the profile lowering pass.
  655. InstrProfOptions Options;
  656. if (!ProfileFile.empty())
  657. Options.InstrProfileOutput = ProfileFile;
  658. // Do not do counter promotion at O0.
  659. Options.DoCounterPromotion = false;
  660. Options.UseBFIInPromotion = IsCS;
  661. MPM.addPass(InstrProfiling(Options, IsCS));
  662. }
  663. static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
  664. return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
  665. }
  666. ModuleInlinerWrapperPass
  667. PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
  668. ThinOrFullLTOPhase Phase) {
  669. InlineParams IP;
  670. if (PTO.InlinerThreshold == -1)
  671. IP = getInlineParamsFromOptLevel(Level);
  672. else
  673. IP = getInlineParams(PTO.InlinerThreshold);
  674. // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
  675. // disable hot callsite inline (as much as possible [1]) because it makes
  676. // profile annotation in the backend inaccurate.
  677. //
  678. // [1] Note the cost of a function could be below zero due to erased
  679. // prologue / epilogue.
  680. if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
  681. PGOOpt->Action == PGOOptions::SampleUse)
  682. IP.HotCallSiteThreshold = 0;
  683. if (PGOOpt)
  684. IP.EnableDeferral = EnablePGOInlineDeferral;
  685. ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
  686. InlineContext{Phase, InlinePass::CGSCCInliner},
  687. UseInlineAdvisor, MaxDevirtIterations);
  688. // Require the GlobalsAA analysis for the module so we can query it within
  689. // the CGSCC pipeline.
  690. MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
  691. // Invalidate AAManager so it can be recreated and pick up the newly available
  692. // GlobalsAA.
  693. MIWP.addModulePass(
  694. createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
  695. // Require the ProfileSummaryAnalysis for the module so we can query it within
  696. // the inliner pass.
  697. MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
  698. // Now begin the main postorder CGSCC pipeline.
  699. // FIXME: The current CGSCC pipeline has its origins in the legacy pass
  700. // manager and trying to emulate its precise behavior. Much of this doesn't
  701. // make a lot of sense and we should revisit the core CGSCC structure.
  702. CGSCCPassManager &MainCGPipeline = MIWP.getPM();
  703. // Note: historically, the PruneEH pass was run first to deduce nounwind and
  704. // generally clean up exception handling overhead. It isn't clear this is
  705. // valuable as the inliner doesn't currently care whether it is inlining an
  706. // invoke or a call.
  707. if (AttributorRun & AttributorRunOption::CGSCC)
  708. MainCGPipeline.addPass(AttributorCGSCCPass());
  709. // Now deduce any function attributes based in the current code.
  710. MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
  711. // When at O3 add argument promotion to the pass pipeline.
  712. // FIXME: It isn't at all clear why this should be limited to O3.
  713. if (Level == OptimizationLevel::O3)
  714. MainCGPipeline.addPass(ArgumentPromotionPass());
  715. // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
  716. // there are no OpenMP runtime calls present in the module.
  717. if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
  718. MainCGPipeline.addPass(OpenMPOptCGSCCPass());
  719. for (auto &C : CGSCCOptimizerLateEPCallbacks)
  720. C(MainCGPipeline, Level);
  721. // Lastly, add the core function simplification pipeline nested inside the
  722. // CGSCC walk.
  723. MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
  724. buildFunctionSimplificationPipeline(Level, Phase),
  725. PTO.EagerlyInvalidateAnalyses, EnableNoRerunSimplificationPipeline));
  726. MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
  727. if (EnableNoRerunSimplificationPipeline)
  728. MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
  729. InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
  730. return MIWP;
  731. }
  732. ModulePassManager
  733. PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
  734. ThinOrFullLTOPhase Phase) {
  735. ModulePassManager MPM;
  736. InlineParams IP = getInlineParamsFromOptLevel(Level);
  737. // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
  738. // disable hot callsite inline (as much as possible [1]) because it makes
  739. // profile annotation in the backend inaccurate.
  740. //
  741. // [1] Note the cost of a function could be below zero due to erased
  742. // prologue / epilogue.
  743. if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
  744. PGOOpt->Action == PGOOptions::SampleUse)
  745. IP.HotCallSiteThreshold = 0;
  746. if (PGOOpt)
  747. IP.EnableDeferral = EnablePGOInlineDeferral;
  748. // The inline deferral logic is used to avoid losing some
  749. // inlining chance in future. It is helpful in SCC inliner, in which
  750. // inlining is processed in bottom-up order.
  751. // While in module inliner, the inlining order is a priority-based order
  752. // by default. The inline deferral is unnecessary there. So we disable the
  753. // inline deferral logic in module inliner.
  754. IP.EnableDeferral = false;
  755. MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
  756. MPM.addPass(createModuleToFunctionPassAdaptor(
  757. buildFunctionSimplificationPipeline(Level, Phase),
  758. PTO.EagerlyInvalidateAnalyses));
  759. MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
  760. CoroSplitPass(Level != OptimizationLevel::O0)));
  761. return MPM;
  762. }
  763. ModulePassManager
  764. PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
  765. ThinOrFullLTOPhase Phase) {
  766. ModulePassManager MPM;
  767. // Place pseudo probe instrumentation as the first pass of the pipeline to
  768. // minimize the impact of optimization changes.
  769. if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
  770. Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
  771. MPM.addPass(SampleProfileProbePass(TM));
  772. bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
  773. // In ThinLTO mode, when flattened profile is used, all the available
  774. // profile information will be annotated in PreLink phase so there is
  775. // no need to load the profile again in PostLink.
  776. bool LoadSampleProfile =
  777. HasSampleProfile &&
  778. !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
  779. // During the ThinLTO backend phase we perform early indirect call promotion
  780. // here, before globalopt. Otherwise imported available_externally functions
  781. // look unreferenced and are removed. If we are going to load the sample
  782. // profile then defer until later.
  783. // TODO: See if we can move later and consolidate with the location where
  784. // we perform ICP when we are loading a sample profile.
  785. // TODO: We pass HasSampleProfile (whether there was a sample profile file
  786. // passed to the compile) to the SamplePGO flag of ICP. This is used to
  787. // determine whether the new direct calls are annotated with prof metadata.
  788. // Ideally this should be determined from whether the IR is annotated with
  789. // sample profile, and not whether the a sample profile was provided on the
  790. // command line. E.g. for flattened profiles where we will not be reloading
  791. // the sample profile in the ThinLTO backend, we ideally shouldn't have to
  792. // provide the sample profile file.
  793. if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
  794. MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
  795. // Do basic inference of function attributes from known properties of system
  796. // libraries and other oracles.
  797. MPM.addPass(InferFunctionAttrsPass());
  798. MPM.addPass(CoroEarlyPass());
  799. // Create an early function pass manager to cleanup the output of the
  800. // frontend.
  801. FunctionPassManager EarlyFPM;
  802. // Lower llvm.expect to metadata before attempting transforms.
  803. // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
  804. EarlyFPM.addPass(LowerExpectIntrinsicPass());
  805. EarlyFPM.addPass(SimplifyCFGPass());
  806. EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
  807. EarlyFPM.addPass(EarlyCSEPass());
  808. if (Level == OptimizationLevel::O3)
  809. EarlyFPM.addPass(CallSiteSplittingPass());
  810. // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
  811. // to convert bitcast to direct calls so that they can be inlined during the
  812. // profile annotation prepration step.
  813. // More details about SamplePGO design can be found in:
  814. // https://research.google.com/pubs/pub45290.html
  815. // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
  816. if (LoadSampleProfile)
  817. EarlyFPM.addPass(InstCombinePass());
  818. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM),
  819. PTO.EagerlyInvalidateAnalyses));
  820. if (LoadSampleProfile) {
  821. // Annotate sample profile right after early FPM to ensure freshness of
  822. // the debug info.
  823. MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
  824. PGOOpt->ProfileRemappingFile, Phase));
  825. // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
  826. // RequireAnalysisPass for PSI before subsequent non-module passes.
  827. MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
  828. // Do not invoke ICP in the LTOPrelink phase as it makes it hard
  829. // for the profile annotation to be accurate in the LTO backend.
  830. if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink &&
  831. Phase != ThinOrFullLTOPhase::FullLTOPreLink)
  832. // We perform early indirect call promotion here, before globalopt.
  833. // This is important for the ThinLTO backend phase because otherwise
  834. // imported available_externally functions look unreferenced and are
  835. // removed.
  836. MPM.addPass(
  837. PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
  838. }
  839. // Try to perform OpenMP specific optimizations on the module. This is a
  840. // (quick!) no-op if there are no OpenMP runtime calls present in the module.
  841. if (Level != OptimizationLevel::O0)
  842. MPM.addPass(OpenMPOptPass());
  843. if (AttributorRun & AttributorRunOption::MODULE)
  844. MPM.addPass(AttributorPass());
  845. // Lower type metadata and the type.test intrinsic in the ThinLTO
  846. // post link pipeline after ICP. This is to enable usage of the type
  847. // tests in ICP sequences.
  848. if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
  849. MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
  850. for (auto &C : PipelineEarlySimplificationEPCallbacks)
  851. C(MPM, Level);
  852. // Interprocedural constant propagation now that basic cleanup has occurred
  853. // and prior to optimizing globals.
  854. // FIXME: This position in the pipeline hasn't been carefully considered in
  855. // years, it should be re-analyzed.
  856. MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
  857. Level != OptimizationLevel::Os &&
  858. Level != OptimizationLevel::Oz)));
  859. // Attach metadata to indirect call sites indicating the set of functions
  860. // they may target at run-time. This should follow IPSCCP.
  861. MPM.addPass(CalledValuePropagationPass());
  862. // Optimize globals to try and fold them into constants.
  863. MPM.addPass(GlobalOptPass());
  864. // Promote any localized globals to SSA registers.
  865. // FIXME: Should this instead by a run of SROA?
  866. // FIXME: We should probably run instcombine and simplifycfg afterward to
  867. // delete control flows that are dead once globals have been folded to
  868. // constants.
  869. MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
  870. // Create a small function pass pipeline to cleanup after all the global
  871. // optimizations.
  872. FunctionPassManager GlobalCleanupPM;
  873. GlobalCleanupPM.addPass(InstCombinePass());
  874. invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
  875. GlobalCleanupPM.addPass(
  876. SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  877. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
  878. PTO.EagerlyInvalidateAnalyses));
  879. // Add all the requested passes for instrumentation PGO, if requested.
  880. if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
  881. (PGOOpt->Action == PGOOptions::IRInstr ||
  882. PGOOpt->Action == PGOOptions::IRUse)) {
  883. addPGOInstrPasses(MPM, Level,
  884. /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
  885. /* IsCS */ false, PGOOpt->ProfileFile,
  886. PGOOpt->ProfileRemappingFile, Phase);
  887. MPM.addPass(PGOIndirectCallPromotion(false, false));
  888. }
  889. if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
  890. PGOOpt->CSAction == PGOOptions::CSIRInstr)
  891. MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
  892. // Synthesize function entry counts for non-PGO compilation.
  893. if (EnableSyntheticCounts && !PGOOpt)
  894. MPM.addPass(SyntheticCountsPropagation());
  895. if (EnableModuleInliner)
  896. MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
  897. else
  898. MPM.addPass(buildInlinerPipeline(Level, Phase));
  899. // Remove any dead arguments exposed by cleanups, constant folding globals,
  900. // and argument promotion.
  901. MPM.addPass(DeadArgumentEliminationPass());
  902. MPM.addPass(CoroCleanupPass());
  903. if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
  904. MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
  905. MPM.addPass(ModuleMemProfilerPass());
  906. }
  907. return MPM;
  908. }
  909. /// TODO: Should LTO cause any differences to this set of passes?
  910. void PassBuilder::addVectorPasses(OptimizationLevel Level,
  911. FunctionPassManager &FPM, bool IsFullLTO) {
  912. FPM.addPass(LoopVectorizePass(
  913. LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
  914. if (IsFullLTO) {
  915. // The vectorizer may have significantly shortened a loop body; unroll
  916. // again. Unroll small loops to hide loop backedge latency and saturate any
  917. // parallel execution resources of an out-of-order processor. We also then
  918. // need to clean up redundancies and loop invariant code.
  919. // FIXME: It would be really good to use a loop-integrated instruction
  920. // combiner for cleanup here so that the unrolling and LICM can be pipelined
  921. // across the loop nests.
  922. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
  923. if (EnableUnrollAndJam && PTO.LoopUnrolling)
  924. FPM.addPass(createFunctionToLoopPassAdaptor(
  925. LoopUnrollAndJamPass(Level.getSpeedupLevel())));
  926. FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
  927. Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
  928. PTO.ForgetAllSCEVInLoopUnroll)));
  929. FPM.addPass(WarnMissedTransformationsPass());
  930. // Now that we are done with loop unrolling, be it either by LoopVectorizer,
  931. // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
  932. // become constant-offset, thus enabling SROA and alloca promotion. Do so.
  933. // NOTE: we are very late in the pipeline, and we don't have any LICM
  934. // or SimplifyCFG passes scheduled after us, that would cleanup
  935. // the CFG mess this may created if allowed to modify CFG, so forbid that.
  936. FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
  937. }
  938. if (!IsFullLTO) {
  939. // Eliminate loads by forwarding stores from the previous iteration to loads
  940. // of the current iteration.
  941. FPM.addPass(LoopLoadEliminationPass());
  942. }
  943. // Cleanup after the loop optimization passes.
  944. FPM.addPass(InstCombinePass());
  945. if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
  946. ExtraVectorPassManager ExtraPasses;
  947. // At higher optimization levels, try to clean up any runtime overlap and
  948. // alignment checks inserted by the vectorizer. We want to track correlated
  949. // runtime checks for two inner loops in the same outer loop, fold any
  950. // common computations, hoist loop-invariant aspects out of any outer loop,
  951. // and unswitch the runtime checks if possible. Once hoisted, we may have
  952. // dead (or speculatable) control flows or more combining opportunities.
  953. ExtraPasses.addPass(EarlyCSEPass());
  954. ExtraPasses.addPass(CorrelatedValuePropagationPass());
  955. ExtraPasses.addPass(InstCombinePass());
  956. LoopPassManager LPM;
  957. LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
  958. /*AllowSpeculation=*/true));
  959. LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
  960. OptimizationLevel::O3));
  961. ExtraPasses.addPass(
  962. RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
  963. ExtraPasses.addPass(
  964. createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
  965. /*UseBlockFrequencyInfo=*/true));
  966. ExtraPasses.addPass(
  967. SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  968. ExtraPasses.addPass(InstCombinePass());
  969. FPM.addPass(std::move(ExtraPasses));
  970. }
  971. // Now that we've formed fast to execute loop structures, we do further
  972. // optimizations. These are run afterward as they might block doing complex
  973. // analyses and transforms such as what are needed for loop vectorization.
  974. // Cleanup after loop vectorization, etc. Simplification passes like CVP and
  975. // GVN, loop transforms, and others have already run, so it's now better to
  976. // convert to more optimized IR using more aggressive simplify CFG options.
  977. // The extra sinking transform can create larger basic blocks, so do this
  978. // before SLP vectorization.
  979. FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
  980. .forwardSwitchCondToPhi(true)
  981. .convertSwitchRangeToICmp(true)
  982. .convertSwitchToLookupTable(true)
  983. .needCanonicalLoops(false)
  984. .hoistCommonInsts(true)
  985. .sinkCommonInsts(true)));
  986. if (IsFullLTO) {
  987. FPM.addPass(SCCPPass());
  988. FPM.addPass(InstCombinePass());
  989. FPM.addPass(BDCEPass());
  990. }
  991. // Optimize parallel scalar instruction chains into SIMD instructions.
  992. if (PTO.SLPVectorization) {
  993. FPM.addPass(SLPVectorizerPass());
  994. if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
  995. FPM.addPass(EarlyCSEPass());
  996. }
  997. }
  998. // Enhance/cleanup vector code.
  999. FPM.addPass(VectorCombinePass());
  1000. if (!IsFullLTO) {
  1001. FPM.addPass(InstCombinePass());
  1002. // Unroll small loops to hide loop backedge latency and saturate any
  1003. // parallel execution resources of an out-of-order processor. We also then
  1004. // need to clean up redundancies and loop invariant code.
  1005. // FIXME: It would be really good to use a loop-integrated instruction
  1006. // combiner for cleanup here so that the unrolling and LICM can be pipelined
  1007. // across the loop nests.
  1008. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
  1009. if (EnableUnrollAndJam && PTO.LoopUnrolling) {
  1010. FPM.addPass(createFunctionToLoopPassAdaptor(
  1011. LoopUnrollAndJamPass(Level.getSpeedupLevel())));
  1012. }
  1013. FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
  1014. Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
  1015. PTO.ForgetAllSCEVInLoopUnroll)));
  1016. FPM.addPass(WarnMissedTransformationsPass());
  1017. // Now that we are done with loop unrolling, be it either by LoopVectorizer,
  1018. // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
  1019. // become constant-offset, thus enabling SROA and alloca promotion. Do so.
  1020. // NOTE: we are very late in the pipeline, and we don't have any LICM
  1021. // or SimplifyCFG passes scheduled after us, that would cleanup
  1022. // the CFG mess this may created if allowed to modify CFG, so forbid that.
  1023. FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
  1024. FPM.addPass(InstCombinePass());
  1025. FPM.addPass(
  1026. RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
  1027. FPM.addPass(createFunctionToLoopPassAdaptor(
  1028. LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
  1029. /*AllowSpeculation=*/true),
  1030. /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
  1031. }
  1032. // Now that we've vectorized and unrolled loops, we may have more refined
  1033. // alignment information, try to re-derive it here.
  1034. FPM.addPass(AlignmentFromAssumptionsPass());
  1035. if (IsFullLTO)
  1036. FPM.addPass(InstCombinePass());
  1037. }
  1038. ModulePassManager
  1039. PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
  1040. ThinOrFullLTOPhase LTOPhase) {
  1041. const bool LTOPreLink = (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink ||
  1042. LTOPhase == ThinOrFullLTOPhase::FullLTOPreLink);
  1043. ModulePassManager MPM;
  1044. // Optimize globals now that the module is fully simplified.
  1045. MPM.addPass(GlobalOptPass());
  1046. MPM.addPass(GlobalDCEPass());
  1047. // Run partial inlining pass to partially inline functions that have
  1048. // large bodies.
  1049. if (RunPartialInlining)
  1050. MPM.addPass(PartialInlinerPass());
  1051. // Remove avail extern fns and globals definitions since we aren't compiling
  1052. // an object file for later LTO. For LTO we want to preserve these so they
  1053. // are eligible for inlining at link-time. Note if they are unreferenced they
  1054. // will be removed by GlobalDCE later, so this only impacts referenced
  1055. // available externally globals. Eventually they will be suppressed during
  1056. // codegen, but eliminating here enables more opportunity for GlobalDCE as it
  1057. // may make globals referenced by available external functions dead and saves
  1058. // running remaining passes on the eliminated functions. These should be
  1059. // preserved during prelinking for link-time inlining decisions.
  1060. if (!LTOPreLink)
  1061. MPM.addPass(EliminateAvailableExternallyPass());
  1062. if (EnableOrderFileInstrumentation)
  1063. MPM.addPass(InstrOrderFilePass());
  1064. // Do RPO function attribute inference across the module to forward-propagate
  1065. // attributes where applicable.
  1066. // FIXME: Is this really an optimization rather than a canonicalization?
  1067. MPM.addPass(ReversePostOrderFunctionAttrsPass());
  1068. // Do a post inline PGO instrumentation and use pass. This is a context
  1069. // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
  1070. // cross-module inline has not been done yet. The context sensitive
  1071. // instrumentation is after all the inlines are done.
  1072. if (!LTOPreLink && PGOOpt) {
  1073. if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
  1074. addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
  1075. /* IsCS */ true, PGOOpt->CSProfileGenFile,
  1076. PGOOpt->ProfileRemappingFile, LTOPhase);
  1077. else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
  1078. addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
  1079. /* IsCS */ true, PGOOpt->ProfileFile,
  1080. PGOOpt->ProfileRemappingFile, LTOPhase);
  1081. }
  1082. // Re-compute GlobalsAA here prior to function passes. This is particularly
  1083. // useful as the above will have inlined, DCE'ed, and function-attr
  1084. // propagated everything. We should at this point have a reasonably minimal
  1085. // and richly annotated call graph. By computing aliasing and mod/ref
  1086. // information for all local globals here, the late loop passes and notably
  1087. // the vectorizer will be able to use them to help recognize vectorizable
  1088. // memory operations.
  1089. MPM.addPass(RecomputeGlobalsAAPass());
  1090. for (auto &C : OptimizerEarlyEPCallbacks)
  1091. C(MPM, Level);
  1092. FunctionPassManager OptimizePM;
  1093. OptimizePM.addPass(Float2IntPass());
  1094. OptimizePM.addPass(LowerConstantIntrinsicsPass());
  1095. if (EnableMatrix) {
  1096. OptimizePM.addPass(LowerMatrixIntrinsicsPass());
  1097. OptimizePM.addPass(EarlyCSEPass());
  1098. }
  1099. // FIXME: We need to run some loop optimizations to re-rotate loops after
  1100. // simplifycfg and others undo their rotation.
  1101. // Optimize the loop execution. These passes operate on entire loop nests
  1102. // rather than on each loop in an inside-out manner, and so they are actually
  1103. // function passes.
  1104. for (auto &C : VectorizerStartEPCallbacks)
  1105. C(OptimizePM, Level);
  1106. LoopPassManager LPM;
  1107. // First rotate loops that may have been un-rotated by prior passes.
  1108. // Disable header duplication at -Oz.
  1109. LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
  1110. // Some loops may have become dead by now. Try to delete them.
  1111. // FIXME: see discussion in https://reviews.llvm.org/D112851,
  1112. // this may need to be revisited once we run GVN before loop deletion
  1113. // in the simplification pipeline.
  1114. LPM.addPass(LoopDeletionPass());
  1115. OptimizePM.addPass(createFunctionToLoopPassAdaptor(
  1116. std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
  1117. // Distribute loops to allow partial vectorization. I.e. isolate dependences
  1118. // into separate loop that would otherwise inhibit vectorization. This is
  1119. // currently only performed for loops marked with the metadata
  1120. // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
  1121. OptimizePM.addPass(LoopDistributePass());
  1122. // Populates the VFABI attribute with the scalar-to-vector mappings
  1123. // from the TargetLibraryInfo.
  1124. OptimizePM.addPass(InjectTLIMappings());
  1125. addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
  1126. // LoopSink pass sinks instructions hoisted by LICM, which serves as a
  1127. // canonicalization pass that enables other optimizations. As a result,
  1128. // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
  1129. // result too early.
  1130. OptimizePM.addPass(LoopSinkPass());
  1131. // And finally clean up LCSSA form before generating code.
  1132. OptimizePM.addPass(InstSimplifyPass());
  1133. // This hoists/decomposes div/rem ops. It should run after other sink/hoist
  1134. // passes to avoid re-sinking, but before SimplifyCFG because it can allow
  1135. // flattening of blocks.
  1136. OptimizePM.addPass(DivRemPairsPass());
  1137. // Try to annotate calls that were created during optimization.
  1138. OptimizePM.addPass(TailCallElimPass());
  1139. // LoopSink (and other loop passes since the last simplifyCFG) might have
  1140. // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
  1141. OptimizePM.addPass(
  1142. SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  1143. // Add the core optimizing pipeline.
  1144. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
  1145. PTO.EagerlyInvalidateAnalyses));
  1146. for (auto &C : OptimizerLastEPCallbacks)
  1147. C(MPM, Level);
  1148. // Split out cold code. Splitting is done late to avoid hiding context from
  1149. // other optimizations and inadvertently regressing performance. The tradeoff
  1150. // is that this has a higher code size cost than splitting early.
  1151. if (EnableHotColdSplit && !LTOPreLink)
  1152. MPM.addPass(HotColdSplittingPass());
  1153. // Search the code for similar regions of code. If enough similar regions can
  1154. // be found where extracting the regions into their own function will decrease
  1155. // the size of the program, we extract the regions, a deduplicate the
  1156. // structurally similar regions.
  1157. if (EnableIROutliner)
  1158. MPM.addPass(IROutlinerPass());
  1159. // Merge functions if requested.
  1160. if (PTO.MergeFunctions)
  1161. MPM.addPass(MergeFunctionsPass());
  1162. // Now we need to do some global optimization transforms.
  1163. // FIXME: It would seem like these should come first in the optimization
  1164. // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
  1165. // ordering here.
  1166. MPM.addPass(GlobalDCEPass());
  1167. MPM.addPass(ConstantMergePass());
  1168. if (PTO.CallGraphProfile && !LTOPreLink)
  1169. MPM.addPass(CGProfilePass());
  1170. // TODO: Relative look table converter pass caused an issue when full lto is
  1171. // enabled. See https://reviews.llvm.org/D94355 for more details.
  1172. // Until the issue fixed, disable this pass during pre-linking phase.
  1173. if (!LTOPreLink)
  1174. MPM.addPass(RelLookupTableConverterPass());
  1175. return MPM;
  1176. }
  1177. ModulePassManager
  1178. PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
  1179. bool LTOPreLink) {
  1180. assert(Level != OptimizationLevel::O0 &&
  1181. "Must request optimizations for the default pipeline!");
  1182. ModulePassManager MPM;
  1183. // Convert @llvm.global.annotations to !annotation metadata.
  1184. MPM.addPass(Annotation2MetadataPass());
  1185. // Force any function attributes we want the rest of the pipeline to observe.
  1186. MPM.addPass(ForceFunctionAttrsPass());
  1187. // Apply module pipeline start EP callback.
  1188. for (auto &C : PipelineStartEPCallbacks)
  1189. C(MPM, Level);
  1190. if (PGOOpt && PGOOpt->DebugInfoForProfiling)
  1191. MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
  1192. const ThinOrFullLTOPhase LTOPhase = LTOPreLink
  1193. ? ThinOrFullLTOPhase::FullLTOPreLink
  1194. : ThinOrFullLTOPhase::None;
  1195. // Add the core simplification pipeline.
  1196. MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase));
  1197. // Now add the optimization pipeline.
  1198. MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPhase));
  1199. if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
  1200. PGOOpt->Action == PGOOptions::SampleUse)
  1201. MPM.addPass(PseudoProbeUpdatePass());
  1202. // Emit annotation remarks.
  1203. addAnnotationRemarksPass(MPM);
  1204. if (LTOPreLink)
  1205. addRequiredLTOPreLinkPasses(MPM);
  1206. return MPM;
  1207. }
  1208. ModulePassManager
  1209. PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
  1210. assert(Level != OptimizationLevel::O0 &&
  1211. "Must request optimizations for the default pipeline!");
  1212. ModulePassManager MPM;
  1213. // Convert @llvm.global.annotations to !annotation metadata.
  1214. MPM.addPass(Annotation2MetadataPass());
  1215. // Force any function attributes we want the rest of the pipeline to observe.
  1216. MPM.addPass(ForceFunctionAttrsPass());
  1217. if (PGOOpt && PGOOpt->DebugInfoForProfiling)
  1218. MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
  1219. // Apply module pipeline start EP callback.
  1220. for (auto &C : PipelineStartEPCallbacks)
  1221. C(MPM, Level);
  1222. // If we are planning to perform ThinLTO later, we don't bloat the code with
  1223. // unrolling/vectorization/... now. Just simplify the module as much as we
  1224. // can.
  1225. MPM.addPass(buildModuleSimplificationPipeline(
  1226. Level, ThinOrFullLTOPhase::ThinLTOPreLink));
  1227. // Run partial inlining pass to partially inline functions that have
  1228. // large bodies.
  1229. // FIXME: It isn't clear whether this is really the right place to run this
  1230. // in ThinLTO. Because there is another canonicalization and simplification
  1231. // phase that will run after the thin link, running this here ends up with
  1232. // less information than will be available later and it may grow functions in
  1233. // ways that aren't beneficial.
  1234. if (RunPartialInlining)
  1235. MPM.addPass(PartialInlinerPass());
  1236. // Reduce the size of the IR as much as possible.
  1237. MPM.addPass(GlobalOptPass());
  1238. if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
  1239. PGOOpt->Action == PGOOptions::SampleUse)
  1240. MPM.addPass(PseudoProbeUpdatePass());
  1241. // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
  1242. // optimization is going to be done in PostLink stage, but clang can't add
  1243. // callbacks there in case of in-process ThinLTO called by linker.
  1244. for (auto &C : OptimizerEarlyEPCallbacks)
  1245. C(MPM, Level);
  1246. for (auto &C : OptimizerLastEPCallbacks)
  1247. C(MPM, Level);
  1248. // Emit annotation remarks.
  1249. addAnnotationRemarksPass(MPM);
  1250. addRequiredLTOPreLinkPasses(MPM);
  1251. return MPM;
  1252. }
  1253. ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
  1254. OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
  1255. ModulePassManager MPM;
  1256. // Convert @llvm.global.annotations to !annotation metadata.
  1257. MPM.addPass(Annotation2MetadataPass());
  1258. if (ImportSummary) {
  1259. // These passes import type identifier resolutions for whole-program
  1260. // devirtualization and CFI. They must run early because other passes may
  1261. // disturb the specific instruction patterns that these passes look for,
  1262. // creating dependencies on resolutions that may not appear in the summary.
  1263. //
  1264. // For example, GVN may transform the pattern assume(type.test) appearing in
  1265. // two basic blocks into assume(phi(type.test, type.test)), which would
  1266. // transform a dependency on a WPD resolution into a dependency on a type
  1267. // identifier resolution for CFI.
  1268. //
  1269. // Also, WPD has access to more precise information than ICP and can
  1270. // devirtualize more effectively, so it should operate on the IR first.
  1271. //
  1272. // The WPD and LowerTypeTest passes need to run at -O0 to lower type
  1273. // metadata and intrinsics.
  1274. MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
  1275. MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
  1276. }
  1277. if (Level == OptimizationLevel::O0) {
  1278. // Run a second time to clean up any type tests left behind by WPD for use
  1279. // in ICP.
  1280. MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
  1281. // Drop available_externally and unreferenced globals. This is necessary
  1282. // with ThinLTO in order to avoid leaving undefined references to dead
  1283. // globals in the object file.
  1284. MPM.addPass(EliminateAvailableExternallyPass());
  1285. MPM.addPass(GlobalDCEPass());
  1286. return MPM;
  1287. }
  1288. // Force any function attributes we want the rest of the pipeline to observe.
  1289. MPM.addPass(ForceFunctionAttrsPass());
  1290. // Add the core simplification pipeline.
  1291. MPM.addPass(buildModuleSimplificationPipeline(
  1292. Level, ThinOrFullLTOPhase::ThinLTOPostLink));
  1293. // Now add the optimization pipeline.
  1294. MPM.addPass(buildModuleOptimizationPipeline(
  1295. Level, ThinOrFullLTOPhase::ThinLTOPostLink));
  1296. // Emit annotation remarks.
  1297. addAnnotationRemarksPass(MPM);
  1298. return MPM;
  1299. }
  1300. ModulePassManager
  1301. PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
  1302. assert(Level != OptimizationLevel::O0 &&
  1303. "Must request optimizations for the default pipeline!");
  1304. // FIXME: We should use a customized pre-link pipeline!
  1305. return buildPerModuleDefaultPipeline(Level,
  1306. /* LTOPreLink */ true);
  1307. }
  1308. ModulePassManager
  1309. PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
  1310. ModuleSummaryIndex *ExportSummary) {
  1311. ModulePassManager MPM;
  1312. // Convert @llvm.global.annotations to !annotation metadata.
  1313. MPM.addPass(Annotation2MetadataPass());
  1314. for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
  1315. C(MPM, Level);
  1316. // Create a function that performs CFI checks for cross-DSO calls with targets
  1317. // in the current module.
  1318. MPM.addPass(CrossDSOCFIPass());
  1319. if (Level == OptimizationLevel::O0) {
  1320. // The WPD and LowerTypeTest passes need to run at -O0 to lower type
  1321. // metadata and intrinsics.
  1322. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
  1323. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
  1324. // Run a second time to clean up any type tests left behind by WPD for use
  1325. // in ICP.
  1326. MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
  1327. for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
  1328. C(MPM, Level);
  1329. // Emit annotation remarks.
  1330. addAnnotationRemarksPass(MPM);
  1331. return MPM;
  1332. }
  1333. if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
  1334. // Load sample profile before running the LTO optimization pipeline.
  1335. MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
  1336. PGOOpt->ProfileRemappingFile,
  1337. ThinOrFullLTOPhase::FullLTOPostLink));
  1338. // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
  1339. // RequireAnalysisPass for PSI before subsequent non-module passes.
  1340. MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
  1341. }
  1342. // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
  1343. MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
  1344. // Remove unused virtual tables to improve the quality of code generated by
  1345. // whole-program devirtualization and bitset lowering.
  1346. MPM.addPass(GlobalDCEPass());
  1347. // Force any function attributes we want the rest of the pipeline to observe.
  1348. MPM.addPass(ForceFunctionAttrsPass());
  1349. // Do basic inference of function attributes from known properties of system
  1350. // libraries and other oracles.
  1351. MPM.addPass(InferFunctionAttrsPass());
  1352. if (Level.getSpeedupLevel() > 1) {
  1353. MPM.addPass(createModuleToFunctionPassAdaptor(
  1354. CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
  1355. // Indirect call promotion. This should promote all the targets that are
  1356. // left by the earlier promotion pass that promotes intra-module targets.
  1357. // This two-step promotion is to save the compile time. For LTO, it should
  1358. // produce the same result as if we only do promotion here.
  1359. MPM.addPass(PGOIndirectCallPromotion(
  1360. true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
  1361. // Propagate constants at call sites into the functions they call. This
  1362. // opens opportunities for globalopt (and inlining) by substituting function
  1363. // pointers passed as arguments to direct uses of functions.
  1364. MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
  1365. Level != OptimizationLevel::Os &&
  1366. Level != OptimizationLevel::Oz)));
  1367. // Attach metadata to indirect call sites indicating the set of functions
  1368. // they may target at run-time. This should follow IPSCCP.
  1369. MPM.addPass(CalledValuePropagationPass());
  1370. }
  1371. // Now deduce any function attributes based in the current code.
  1372. MPM.addPass(
  1373. createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
  1374. // Do RPO function attribute inference across the module to forward-propagate
  1375. // attributes where applicable.
  1376. // FIXME: Is this really an optimization rather than a canonicalization?
  1377. MPM.addPass(ReversePostOrderFunctionAttrsPass());
  1378. // Use in-range annotations on GEP indices to split globals where beneficial.
  1379. MPM.addPass(GlobalSplitPass());
  1380. // Run whole program optimization of virtual call when the list of callees
  1381. // is fixed.
  1382. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
  1383. // Stop here at -O1.
  1384. if (Level == OptimizationLevel::O1) {
  1385. // The LowerTypeTestsPass needs to run to lower type metadata and the
  1386. // type.test intrinsics. The pass does nothing if CFI is disabled.
  1387. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
  1388. // Run a second time to clean up any type tests left behind by WPD for use
  1389. // in ICP (which is performed earlier than this in the regular LTO
  1390. // pipeline).
  1391. MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
  1392. for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
  1393. C(MPM, Level);
  1394. // Emit annotation remarks.
  1395. addAnnotationRemarksPass(MPM);
  1396. return MPM;
  1397. }
  1398. // Optimize globals to try and fold them into constants.
  1399. MPM.addPass(GlobalOptPass());
  1400. // Promote any localized globals to SSA registers.
  1401. MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
  1402. // Linking modules together can lead to duplicate global constant, only
  1403. // keep one copy of each constant.
  1404. MPM.addPass(ConstantMergePass());
  1405. // Reduce the code after globalopt and ipsccp. Both can open up significant
  1406. // simplification opportunities, and both can propagate functions through
  1407. // function pointers. When this happens, we often have to resolve varargs
  1408. // calls, etc, so let instcombine do this.
  1409. FunctionPassManager PeepholeFPM;
  1410. PeepholeFPM.addPass(InstCombinePass());
  1411. if (Level == OptimizationLevel::O3)
  1412. PeepholeFPM.addPass(AggressiveInstCombinePass());
  1413. invokePeepholeEPCallbacks(PeepholeFPM, Level);
  1414. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
  1415. PTO.EagerlyInvalidateAnalyses));
  1416. // Note: historically, the PruneEH pass was run first to deduce nounwind and
  1417. // generally clean up exception handling overhead. It isn't clear this is
  1418. // valuable as the inliner doesn't currently care whether it is inlining an
  1419. // invoke or a call.
  1420. // Run the inliner now.
  1421. MPM.addPass(ModuleInlinerWrapperPass(
  1422. getInlineParamsFromOptLevel(Level),
  1423. /* MandatoryFirst */ true,
  1424. InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,
  1425. InlinePass::CGSCCInliner}));
  1426. // Optimize globals again after we ran the inliner.
  1427. MPM.addPass(GlobalOptPass());
  1428. // Run the OpenMPOpt pass again after global optimizations.
  1429. MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
  1430. // Garbage collect dead functions.
  1431. MPM.addPass(GlobalDCEPass());
  1432. // If we didn't decide to inline a function, check to see if we can
  1433. // transform it to pass arguments by value instead of by reference.
  1434. MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
  1435. // Remove unused arguments from functions.
  1436. MPM.addPass(DeadArgumentEliminationPass());
  1437. FunctionPassManager FPM;
  1438. // The IPO Passes may leave cruft around. Clean up after them.
  1439. FPM.addPass(InstCombinePass());
  1440. invokePeepholeEPCallbacks(FPM, Level);
  1441. if (EnableConstraintElimination)
  1442. FPM.addPass(ConstraintEliminationPass());
  1443. FPM.addPass(JumpThreadingPass());
  1444. // Do a post inline PGO instrumentation and use pass. This is a context
  1445. // sensitive PGO pass.
  1446. if (PGOOpt) {
  1447. if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
  1448. addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
  1449. /* IsCS */ true, PGOOpt->CSProfileGenFile,
  1450. PGOOpt->ProfileRemappingFile,
  1451. ThinOrFullLTOPhase::FullLTOPostLink);
  1452. else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
  1453. addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
  1454. /* IsCS */ true, PGOOpt->ProfileFile,
  1455. PGOOpt->ProfileRemappingFile,
  1456. ThinOrFullLTOPhase::FullLTOPostLink);
  1457. }
  1458. // Break up allocas
  1459. FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
  1460. // LTO provides additional opportunities for tailcall elimination due to
  1461. // link-time inlining, and visibility of nocapture attribute.
  1462. FPM.addPass(TailCallElimPass());
  1463. // Run a few AA driver optimizations here and now to cleanup the code.
  1464. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
  1465. PTO.EagerlyInvalidateAnalyses));
  1466. MPM.addPass(
  1467. createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
  1468. // Require the GlobalsAA analysis for the module so we can query it within
  1469. // MainFPM.
  1470. MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
  1471. // Invalidate AAManager so it can be recreated and pick up the newly available
  1472. // GlobalsAA.
  1473. MPM.addPass(
  1474. createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
  1475. FunctionPassManager MainFPM;
  1476. MainFPM.addPass(createFunctionToLoopPassAdaptor(
  1477. LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
  1478. /*AllowSpeculation=*/true),
  1479. /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
  1480. if (RunNewGVN)
  1481. MainFPM.addPass(NewGVNPass());
  1482. else
  1483. MainFPM.addPass(GVNPass());
  1484. // Remove dead memcpy()'s.
  1485. MainFPM.addPass(MemCpyOptPass());
  1486. // Nuke dead stores.
  1487. MainFPM.addPass(DSEPass());
  1488. MainFPM.addPass(MergedLoadStoreMotionPass());
  1489. LoopPassManager LPM;
  1490. if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
  1491. LPM.addPass(LoopFlattenPass());
  1492. LPM.addPass(IndVarSimplifyPass());
  1493. LPM.addPass(LoopDeletionPass());
  1494. // FIXME: Add loop interchange.
  1495. // Unroll small loops and perform peeling.
  1496. LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
  1497. /* OnlyWhenForced= */ !PTO.LoopUnrolling,
  1498. PTO.ForgetAllSCEVInLoopUnroll));
  1499. // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
  1500. // *All* loop passes must preserve it, in order to be able to use it.
  1501. MainFPM.addPass(createFunctionToLoopPassAdaptor(
  1502. std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
  1503. MainFPM.addPass(LoopDistributePass());
  1504. addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
  1505. // Run the OpenMPOpt CGSCC pass again late.
  1506. MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
  1507. OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
  1508. invokePeepholeEPCallbacks(MainFPM, Level);
  1509. MainFPM.addPass(JumpThreadingPass());
  1510. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
  1511. PTO.EagerlyInvalidateAnalyses));
  1512. // Lower type metadata and the type.test intrinsic. This pass supports
  1513. // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
  1514. // to be run at link time if CFI is enabled. This pass does nothing if
  1515. // CFI is disabled.
  1516. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
  1517. // Run a second time to clean up any type tests left behind by WPD for use
  1518. // in ICP (which is performed earlier than this in the regular LTO pipeline).
  1519. MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
  1520. // Enable splitting late in the FullLTO post-link pipeline.
  1521. if (EnableHotColdSplit)
  1522. MPM.addPass(HotColdSplittingPass());
  1523. // Add late LTO optimization passes.
  1524. // Delete basic blocks, which optimization passes may have killed.
  1525. MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass(
  1526. SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
  1527. true))));
  1528. // Drop bodies of available eternally objects to improve GlobalDCE.
  1529. MPM.addPass(EliminateAvailableExternallyPass());
  1530. // Now that we have optimized the program, discard unreachable functions.
  1531. MPM.addPass(GlobalDCEPass());
  1532. if (PTO.MergeFunctions)
  1533. MPM.addPass(MergeFunctionsPass());
  1534. if (PTO.CallGraphProfile)
  1535. MPM.addPass(CGProfilePass());
  1536. for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
  1537. C(MPM, Level);
  1538. // Emit annotation remarks.
  1539. addAnnotationRemarksPass(MPM);
  1540. return MPM;
  1541. }
  1542. ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
  1543. bool LTOPreLink) {
  1544. assert(Level == OptimizationLevel::O0 &&
  1545. "buildO0DefaultPipeline should only be used with O0");
  1546. ModulePassManager MPM;
  1547. // Perform pseudo probe instrumentation in O0 mode. This is for the
  1548. // consistency between different build modes. For example, a LTO build can be
  1549. // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
  1550. // the postlink will require pseudo probe instrumentation in the prelink.
  1551. if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
  1552. MPM.addPass(SampleProfileProbePass(TM));
  1553. if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
  1554. PGOOpt->Action == PGOOptions::IRUse))
  1555. addPGOInstrPassesForO0(
  1556. MPM,
  1557. /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
  1558. /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
  1559. for (auto &C : PipelineStartEPCallbacks)
  1560. C(MPM, Level);
  1561. if (PGOOpt && PGOOpt->DebugInfoForProfiling)
  1562. MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
  1563. for (auto &C : PipelineEarlySimplificationEPCallbacks)
  1564. C(MPM, Level);
  1565. // Build a minimal pipeline based on the semantics required by LLVM,
  1566. // which is just that always inlining occurs. Further, disable generating
  1567. // lifetime intrinsics to avoid enabling further optimizations during
  1568. // code generation.
  1569. MPM.addPass(AlwaysInlinerPass(
  1570. /*InsertLifetimeIntrinsics=*/false));
  1571. if (PTO.MergeFunctions)
  1572. MPM.addPass(MergeFunctionsPass());
  1573. if (EnableMatrix)
  1574. MPM.addPass(
  1575. createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
  1576. if (!CGSCCOptimizerLateEPCallbacks.empty()) {
  1577. CGSCCPassManager CGPM;
  1578. for (auto &C : CGSCCOptimizerLateEPCallbacks)
  1579. C(CGPM, Level);
  1580. if (!CGPM.isEmpty())
  1581. MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
  1582. }
  1583. if (!LateLoopOptimizationsEPCallbacks.empty()) {
  1584. LoopPassManager LPM;
  1585. for (auto &C : LateLoopOptimizationsEPCallbacks)
  1586. C(LPM, Level);
  1587. if (!LPM.isEmpty()) {
  1588. MPM.addPass(createModuleToFunctionPassAdaptor(
  1589. createFunctionToLoopPassAdaptor(std::move(LPM))));
  1590. }
  1591. }
  1592. if (!LoopOptimizerEndEPCallbacks.empty()) {
  1593. LoopPassManager LPM;
  1594. for (auto &C : LoopOptimizerEndEPCallbacks)
  1595. C(LPM, Level);
  1596. if (!LPM.isEmpty()) {
  1597. MPM.addPass(createModuleToFunctionPassAdaptor(
  1598. createFunctionToLoopPassAdaptor(std::move(LPM))));
  1599. }
  1600. }
  1601. if (!ScalarOptimizerLateEPCallbacks.empty()) {
  1602. FunctionPassManager FPM;
  1603. for (auto &C : ScalarOptimizerLateEPCallbacks)
  1604. C(FPM, Level);
  1605. if (!FPM.isEmpty())
  1606. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
  1607. }
  1608. for (auto &C : OptimizerEarlyEPCallbacks)
  1609. C(MPM, Level);
  1610. if (!VectorizerStartEPCallbacks.empty()) {
  1611. FunctionPassManager FPM;
  1612. for (auto &C : VectorizerStartEPCallbacks)
  1613. C(FPM, Level);
  1614. if (!FPM.isEmpty())
  1615. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
  1616. }
  1617. ModulePassManager CoroPM;
  1618. CoroPM.addPass(CoroEarlyPass());
  1619. CGSCCPassManager CGPM;
  1620. CGPM.addPass(CoroSplitPass());
  1621. CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
  1622. CoroPM.addPass(CoroCleanupPass());
  1623. CoroPM.addPass(GlobalDCEPass());
  1624. MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
  1625. for (auto &C : OptimizerLastEPCallbacks)
  1626. C(MPM, Level);
  1627. if (LTOPreLink)
  1628. addRequiredLTOPreLinkPasses(MPM);
  1629. MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
  1630. return MPM;
  1631. }
  1632. AAManager PassBuilder::buildDefaultAAPipeline() {
  1633. AAManager AA;
  1634. // The order in which these are registered determines their priority when
  1635. // being queried.
  1636. // First we register the basic alias analysis that provides the majority of
  1637. // per-function local AA logic. This is a stateless, on-demand local set of
  1638. // AA techniques.
  1639. AA.registerFunctionAnalysis<BasicAA>();
  1640. // Next we query fast, specialized alias analyses that wrap IR-embedded
  1641. // information about aliasing.
  1642. AA.registerFunctionAnalysis<ScopedNoAliasAA>();
  1643. AA.registerFunctionAnalysis<TypeBasedAA>();
  1644. // Add support for querying global aliasing information when available.
  1645. // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
  1646. // analysis, all that the `AAManager` can do is query for any *cached*
  1647. // results from `GlobalsAA` through a readonly proxy.
  1648. if (EnableGlobalAnalyses)
  1649. AA.registerModuleAnalysis<GlobalsAA>();
  1650. // Add target-specific alias analyses.
  1651. if (TM)
  1652. TM->registerDefaultAliasAnalyses(AA);
  1653. return AA;
  1654. }