1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855 |
- //===- Construction of pass pipelines -------------------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- /// \file
- ///
- /// This file provides the implementation of the PassBuilder based on our
- /// static pass registry as well as related functionality. It also provides
- /// helpers to aid in analyzing, debugging, and testing passes and pass
- /// pipelines.
- ///
- //===----------------------------------------------------------------------===//
- #include "llvm/Analysis/AliasAnalysis.h"
- #include "llvm/Analysis/BasicAliasAnalysis.h"
- #include "llvm/Analysis/CGSCCPassManager.h"
- #include "llvm/Analysis/GlobalsModRef.h"
- #include "llvm/Analysis/InlineAdvisor.h"
- #include "llvm/Analysis/OptimizationRemarkEmitter.h"
- #include "llvm/Analysis/ProfileSummaryInfo.h"
- #include "llvm/Analysis/ScopedNoAliasAA.h"
- #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
- #include "llvm/IR/PassManager.h"
- #include "llvm/Passes/OptimizationLevel.h"
- #include "llvm/Passes/PassBuilder.h"
- #include "llvm/Support/CommandLine.h"
- #include "llvm/Support/ErrorHandling.h"
- #include "llvm/Support/PGOOptions.h"
- #include "llvm/Target/TargetMachine.h"
- #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
- #include "llvm/Transforms/Coroutines/CoroCleanup.h"
- #include "llvm/Transforms/Coroutines/CoroEarly.h"
- #include "llvm/Transforms/Coroutines/CoroElide.h"
- #include "llvm/Transforms/Coroutines/CoroSplit.h"
- #include "llvm/Transforms/IPO/AlwaysInliner.h"
- #include "llvm/Transforms/IPO/Annotation2Metadata.h"
- #include "llvm/Transforms/IPO/ArgumentPromotion.h"
- #include "llvm/Transforms/IPO/Attributor.h"
- #include "llvm/Transforms/IPO/CalledValuePropagation.h"
- #include "llvm/Transforms/IPO/ConstantMerge.h"
- #include "llvm/Transforms/IPO/CrossDSOCFI.h"
- #include "llvm/Transforms/IPO/DeadArgumentElimination.h"
- #include "llvm/Transforms/IPO/ElimAvailExtern.h"
- #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
- #include "llvm/Transforms/IPO/FunctionAttrs.h"
- #include "llvm/Transforms/IPO/GlobalDCE.h"
- #include "llvm/Transforms/IPO/GlobalOpt.h"
- #include "llvm/Transforms/IPO/GlobalSplit.h"
- #include "llvm/Transforms/IPO/HotColdSplitting.h"
- #include "llvm/Transforms/IPO/IROutliner.h"
- #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
- #include "llvm/Transforms/IPO/Inliner.h"
- #include "llvm/Transforms/IPO/LowerTypeTests.h"
- #include "llvm/Transforms/IPO/MergeFunctions.h"
- #include "llvm/Transforms/IPO/ModuleInliner.h"
- #include "llvm/Transforms/IPO/OpenMPOpt.h"
- #include "llvm/Transforms/IPO/PartialInlining.h"
- #include "llvm/Transforms/IPO/SCCP.h"
- #include "llvm/Transforms/IPO/SampleProfile.h"
- #include "llvm/Transforms/IPO/SampleProfileProbe.h"
- #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
- #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
- #include "llvm/Transforms/InstCombine/InstCombine.h"
- #include "llvm/Transforms/Instrumentation/CGProfile.h"
- #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
- #include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
- #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
- #include "llvm/Transforms/Instrumentation/MemProfiler.h"
- #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
- #include "llvm/Transforms/Scalar/ADCE.h"
- #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
- #include "llvm/Transforms/Scalar/AnnotationRemarks.h"
- #include "llvm/Transforms/Scalar/BDCE.h"
- #include "llvm/Transforms/Scalar/CallSiteSplitting.h"
- #include "llvm/Transforms/Scalar/ConstraintElimination.h"
- #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
- #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
- #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
- #include "llvm/Transforms/Scalar/DivRemPairs.h"
- #include "llvm/Transforms/Scalar/EarlyCSE.h"
- #include "llvm/Transforms/Scalar/Float2Int.h"
- #include "llvm/Transforms/Scalar/GVN.h"
- #include "llvm/Transforms/Scalar/IndVarSimplify.h"
- #include "llvm/Transforms/Scalar/InstSimplifyPass.h"
- #include "llvm/Transforms/Scalar/JumpThreading.h"
- #include "llvm/Transforms/Scalar/LICM.h"
- #include "llvm/Transforms/Scalar/LoopDeletion.h"
- #include "llvm/Transforms/Scalar/LoopDistribute.h"
- #include "llvm/Transforms/Scalar/LoopFlatten.h"
- #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
- #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
- #include "llvm/Transforms/Scalar/LoopInterchange.h"
- #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
- #include "llvm/Transforms/Scalar/LoopPassManager.h"
- #include "llvm/Transforms/Scalar/LoopRotation.h"
- #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
- #include "llvm/Transforms/Scalar/LoopSink.h"
- #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
- #include "llvm/Transforms/Scalar/LoopUnrollPass.h"
- #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
- #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
- #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
- #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
- #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
- #include "llvm/Transforms/Scalar/NewGVN.h"
- #include "llvm/Transforms/Scalar/Reassociate.h"
- #include "llvm/Transforms/Scalar/SCCP.h"
- #include "llvm/Transforms/Scalar/SROA.h"
- #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
- #include "llvm/Transforms/Scalar/SimplifyCFG.h"
- #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
- #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
- #include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
- #include "llvm/Transforms/Utils/AddDiscriminators.h"
- #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
- #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
- #include "llvm/Transforms/Utils/InjectTLIMappings.h"
- #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
- #include "llvm/Transforms/Utils/Mem2Reg.h"
- #include "llvm/Transforms/Utils/NameAnonGlobals.h"
- #include "llvm/Transforms/Utils/RelLookupTableConverter.h"
- #include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
- #include "llvm/Transforms/Vectorize/LoopVectorize.h"
- #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
- #include "llvm/Transforms/Vectorize/VectorCombine.h"
- using namespace llvm;
- static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
- "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
- cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
- cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
- "Heuristics-based inliner version."),
- clEnumValN(InliningAdvisorMode::Development, "development",
- "Use development mode (runtime-loadable model)."),
- clEnumValN(InliningAdvisorMode::Release, "release",
- "Use release mode (AOT-compiled model).")));
- static cl::opt<bool> EnableSyntheticCounts(
- "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Run synthetic function entry count generation "
- "pass"));
- /// Flag to enable inline deferral during PGO.
- static cl::opt<bool>
- EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
- cl::Hidden,
- cl::desc("Enable inline deferral during PGO"));
- static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
- cl::Hidden, cl::ZeroOrMore,
- cl::desc("Enable memory profiler"));
- static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
- cl::init(false), cl::Hidden,
- cl::desc("Enable module inliner"));
- static cl::opt<bool> PerformMandatoryInliningsFirst(
- "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Perform mandatory inlinings module-wide, before performing "
- "inlining."));
- static cl::opt<bool> EnableO3NonTrivialUnswitching(
- "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
- cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));
- static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
- "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
- cl::desc("Eagerly invalidate more analyses in default pipelines"));
- static cl::opt<bool> EnableNoRerunSimplificationPipeline(
- "enable-no-rerun-simplification-pipeline", cl::init(false), cl::Hidden,
- cl::desc(
- "Prevent running the simplification pipeline on a function more "
- "than once in the case that SCC mutations cause a function to be "
- "visited multiple times as long as the function has not been changed"));
- static cl::opt<bool> EnableMergeFunctions(
- "enable-merge-functions", cl::init(false), cl::Hidden,
- cl::desc("Enable function merging as part of the optimization pipeline"));
- PipelineTuningOptions::PipelineTuningOptions() {
- LoopInterleaving = true;
- LoopVectorization = true;
- SLPVectorization = false;
- LoopUnrolling = true;
- ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
- LicmMssaOptCap = SetLicmMssaOptCap;
- LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
- CallGraphProfile = true;
- MergeFunctions = EnableMergeFunctions;
- EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
- }
- namespace llvm {
- extern cl::opt<unsigned> MaxDevirtIterations;
- extern cl::opt<bool> EnableConstraintElimination;
- extern cl::opt<bool> EnableFunctionSpecialization;
- extern cl::opt<bool> EnableGVNHoist;
- extern cl::opt<bool> EnableGVNSink;
- extern cl::opt<bool> EnableHotColdSplit;
- extern cl::opt<bool> EnableIROutliner;
- extern cl::opt<bool> EnableOrderFileInstrumentation;
- extern cl::opt<bool> EnableCHR;
- extern cl::opt<bool> EnableLoopInterchange;
- extern cl::opt<bool> EnableUnrollAndJam;
- extern cl::opt<bool> EnableLoopFlatten;
- extern cl::opt<bool> EnableDFAJumpThreading;
- extern cl::opt<bool> RunNewGVN;
- extern cl::opt<bool> RunPartialInlining;
- extern cl::opt<bool> ExtraVectorizerPasses;
- extern cl::opt<bool> FlattenedProfileUsed;
- extern cl::opt<AttributorRunOption> AttributorRun;
- extern cl::opt<bool> EnableKnowledgeRetention;
- extern cl::opt<bool> EnableMatrix;
- extern cl::opt<bool> DisablePreInliner;
- extern cl::opt<int> PreInlineThreshold;
- } // namespace llvm
- void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
- OptimizationLevel Level) {
- for (auto &C : PeepholeEPCallbacks)
- C(FPM, Level);
- }
- // Helper to add AnnotationRemarksPass.
- static void addAnnotationRemarksPass(ModulePassManager &MPM) {
- FunctionPassManager FPM;
- FPM.addPass(AnnotationRemarksPass());
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
- }
- // Helper to check if the current compilation phase is preparing for LTO
- static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
- return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
- Phase == ThinOrFullLTOPhase::FullLTOPreLink;
- }
- // TODO: Investigate the cost/benefit of tail call elimination on debugging.
- FunctionPassManager
- PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
- ThinOrFullLTOPhase Phase) {
- FunctionPassManager FPM;
- // Form SSA out of local memory accesses after breaking apart aggregates into
- // scalars.
- FPM.addPass(SROAPass());
- // Catch trivial redundancies
- FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
- // Hoisting of scalars and load expressions.
- FPM.addPass(
- SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
- FPM.addPass(InstCombinePass());
- FPM.addPass(LibCallsShrinkWrapPass());
- invokePeepholeEPCallbacks(FPM, Level);
- FPM.addPass(
- SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
- // Form canonically associated expression trees, and simplify the trees using
- // basic mathematical properties. For example, this will form (nearly)
- // minimal multiplication trees.
- FPM.addPass(ReassociatePass());
- // Add the primary loop simplification pipeline.
- // FIXME: Currently this is split into two loop pass pipelines because we run
- // some function passes in between them. These can and should be removed
- // and/or replaced by scheduling the loop pass equivalents in the correct
- // positions. But those equivalent passes aren't powerful enough yet.
- // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
- // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
- // fully replace `SimplifyCFGPass`, and the closest to the other we have is
- // `LoopInstSimplify`.
- LoopPassManager LPM1, LPM2;
- // Simplify the loop body. We do this initially to clean up after other loop
- // passes run, either when iterating on a loop or on inner loops with
- // implications on the outer loop.
- LPM1.addPass(LoopInstSimplifyPass());
- LPM1.addPass(LoopSimplifyCFGPass());
- // Try to remove as much code from the loop header as possible,
- // to reduce amount of IR that will have to be duplicated. However,
- // do not perform speculative hoisting the first time as LICM
- // will destroy metadata that may not need to be destroyed if run
- // after loop rotation.
- // TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/false));
- LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
- isLTOPreLink(Phase)));
- // TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/true));
- LPM1.addPass(SimpleLoopUnswitchPass());
- if (EnableLoopFlatten)
- LPM1.addPass(LoopFlattenPass());
- LPM2.addPass(LoopIdiomRecognizePass());
- LPM2.addPass(IndVarSimplifyPass());
- for (auto &C : LateLoopOptimizationsEPCallbacks)
- C(LPM2, Level);
- LPM2.addPass(LoopDeletionPass());
- if (EnableLoopInterchange)
- LPM2.addPass(LoopInterchangePass());
- // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
- // because it changes IR to makes profile annotation in back compile
- // inaccurate. The normal unroller doesn't pay attention to forced full unroll
- // attributes so we need to make sure and allow the full unroll pass to pay
- // attention to it.
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
- PGOOpt->Action != PGOOptions::SampleUse)
- LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
- /* OnlyWhenForced= */ !PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll));
- for (auto &C : LoopOptimizerEndEPCallbacks)
- C(LPM2, Level);
- // We provide the opt remark emitter pass for LICM to use. We only need to do
- // this once as it is immutable.
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
- /*UseMemorySSA=*/true,
- /*UseBlockFrequencyInfo=*/true));
- FPM.addPass(
- SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
- FPM.addPass(InstCombinePass());
- // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
- // *All* loop passes must preserve it, in order to be able to use it.
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
- /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false));
- // Delete small array after loop unroll.
- FPM.addPass(SROAPass());
- // Specially optimize memory movement as it doesn't look like dataflow in SSA.
- FPM.addPass(MemCpyOptPass());
- // Sparse conditional constant propagation.
- // FIXME: It isn't clear why we do this *after* loop passes rather than
- // before...
- FPM.addPass(SCCPPass());
- // Delete dead bit computations (instcombine runs after to fold away the dead
- // computations, and then ADCE will run later to exploit any new DCE
- // opportunities that creates).
- FPM.addPass(BDCEPass());
- // Run instcombine after redundancy and dead bit elimination to exploit
- // opportunities opened up by them.
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
- FPM.addPass(CoroElidePass());
- for (auto &C : ScalarOptimizerLateEPCallbacks)
- C(FPM, Level);
- // Finally, do an expensive DCE pass to catch all the dead code exposed by
- // the simplifications and basic cleanup after all the simplifications.
- // TODO: Investigate if this is too expensive.
- FPM.addPass(ADCEPass());
- FPM.addPass(
- SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
- return FPM;
- }
- FunctionPassManager
- PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
- ThinOrFullLTOPhase Phase) {
- assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
- // The O1 pipeline has a separate pipeline creation function to simplify
- // construction readability.
- if (Level.getSpeedupLevel() == 1)
- return buildO1FunctionSimplificationPipeline(Level, Phase);
- FunctionPassManager FPM;
- // Form SSA out of local memory accesses after breaking apart aggregates into
- // scalars.
- FPM.addPass(SROAPass());
- // Catch trivial redundancies
- FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
- if (EnableKnowledgeRetention)
- FPM.addPass(AssumeSimplifyPass());
- // Hoisting of scalars and load expressions.
- if (EnableGVNHoist)
- FPM.addPass(GVNHoistPass());
- // Global value numbering based sinking.
- if (EnableGVNSink) {
- FPM.addPass(GVNSinkPass());
- FPM.addPass(
- SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
- }
- if (EnableConstraintElimination)
- FPM.addPass(ConstraintEliminationPass());
- // Speculative execution if the target has divergent branches; otherwise nop.
- FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
- // Optimize based on known information about branches, and cleanup afterward.
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
- FPM.addPass(
- SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
- FPM.addPass(InstCombinePass());
- if (Level == OptimizationLevel::O3)
- FPM.addPass(AggressiveInstCombinePass());
- if (!Level.isOptimizingForSize())
- FPM.addPass(LibCallsShrinkWrapPass());
- invokePeepholeEPCallbacks(FPM, Level);
- // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
- // using the size value profile. Don't perform this when optimizing for size.
- if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
- !Level.isOptimizingForSize())
- FPM.addPass(PGOMemOPSizeOpt());
- FPM.addPass(TailCallElimPass());
- FPM.addPass(
- SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
- // Form canonically associated expression trees, and simplify the trees using
- // basic mathematical properties. For example, this will form (nearly)
- // minimal multiplication trees.
- FPM.addPass(ReassociatePass());
- // Add the primary loop simplification pipeline.
- // FIXME: Currently this is split into two loop pass pipelines because we run
- // some function passes in between them. These can and should be removed
- // and/or replaced by scheduling the loop pass equivalents in the correct
- // positions. But those equivalent passes aren't powerful enough yet.
- // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
- // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
- // fully replace `SimplifyCFGPass`, and the closest to the other we have is
- // `LoopInstSimplify`.
- LoopPassManager LPM1, LPM2;
- // Simplify the loop body. We do this initially to clean up after other loop
- // passes run, either when iterating on a loop or on inner loops with
- // implications on the outer loop.
- LPM1.addPass(LoopInstSimplifyPass());
- LPM1.addPass(LoopSimplifyCFGPass());
- // Try to remove as much code from the loop header as possible,
- // to reduce amount of IR that will have to be duplicated. However,
- // do not perform speculative hoisting the first time as LICM
- // will destroy metadata that may not need to be destroyed if run
- // after loop rotation.
- // TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/false));
- // Disable header duplication in loop rotation at -Oz.
- LPM1.addPass(
- LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
- // TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/true));
- LPM1.addPass(
- SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
- EnableO3NonTrivialUnswitching));
- if (EnableLoopFlatten)
- LPM1.addPass(LoopFlattenPass());
- LPM2.addPass(LoopIdiomRecognizePass());
- LPM2.addPass(IndVarSimplifyPass());
- for (auto &C : LateLoopOptimizationsEPCallbacks)
- C(LPM2, Level);
- LPM2.addPass(LoopDeletionPass());
- if (EnableLoopInterchange)
- LPM2.addPass(LoopInterchangePass());
- // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
- // because it changes IR to makes profile annotation in back compile
- // inaccurate. The normal unroller doesn't pay attention to forced full unroll
- // attributes so we need to make sure and allow the full unroll pass to pay
- // attention to it.
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
- PGOOpt->Action != PGOOptions::SampleUse)
- LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
- /* OnlyWhenForced= */ !PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll));
- for (auto &C : LoopOptimizerEndEPCallbacks)
- C(LPM2, Level);
- // We provide the opt remark emitter pass for LICM to use. We only need to do
- // this once as it is immutable.
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
- /*UseMemorySSA=*/true,
- /*UseBlockFrequencyInfo=*/true));
- FPM.addPass(
- SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
- FPM.addPass(InstCombinePass());
- // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
- // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
- // *All* loop passes must preserve it, in order to be able to use it.
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
- /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false));
- // Delete small array after loop unroll.
- FPM.addPass(SROAPass());
- // The matrix extension can introduce large vector operations early, which can
- // benefit from running vector-combine early on.
- if (EnableMatrix)
- FPM.addPass(VectorCombinePass(/*ScalarizationOnly=*/true));
- // Eliminate redundancies.
- FPM.addPass(MergedLoadStoreMotionPass());
- if (RunNewGVN)
- FPM.addPass(NewGVNPass());
- else
- FPM.addPass(GVNPass());
- // Sparse conditional constant propagation.
- // FIXME: It isn't clear why we do this *after* loop passes rather than
- // before...
- FPM.addPass(SCCPPass());
- // Delete dead bit computations (instcombine runs after to fold away the dead
- // computations, and then ADCE will run later to exploit any new DCE
- // opportunities that creates).
- FPM.addPass(BDCEPass());
- // Run instcombine after redundancy and dead bit elimination to exploit
- // opportunities opened up by them.
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
- // Re-consider control flow based optimizations after redundancy elimination,
- // redo DCE, etc.
- if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
- FPM.addPass(DFAJumpThreadingPass());
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
- // Finally, do an expensive DCE pass to catch all the dead code exposed by
- // the simplifications and basic cleanup after all the simplifications.
- // TODO: Investigate if this is too expensive.
- FPM.addPass(ADCEPass());
- // Specially optimize memory movement as it doesn't look like dataflow in SSA.
- FPM.addPass(MemCpyOptPass());
- FPM.addPass(DSEPass());
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/true),
- /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
- FPM.addPass(CoroElidePass());
- for (auto &C : ScalarOptimizerLateEPCallbacks)
- C(FPM, Level);
- FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
- .convertSwitchRangeToICmp(true)
- .hoistCommonInsts(true)
- .sinkCommonInsts(true)));
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
- if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
- (PGOOpt->Action == PGOOptions::IRUse ||
- PGOOpt->Action == PGOOptions::SampleUse))
- FPM.addPass(ControlHeightReductionPass());
- return FPM;
- }
- void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
- MPM.addPass(CanonicalizeAliasesPass());
- MPM.addPass(NameAnonGlobalPass());
- }
- void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
- OptimizationLevel Level, bool RunProfileGen,
- bool IsCS, std::string ProfileFile,
- std::string ProfileRemappingFile) {
- assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
- if (!IsCS && !DisablePreInliner) {
- InlineParams IP;
- IP.DefaultThreshold = PreInlineThreshold;
- // FIXME: The hint threshold has the same value used by the regular inliner
- // when not optimzing for size. This should probably be lowered after
- // performance testing.
- // FIXME: this comment is cargo culted from the old pass manager, revisit).
- IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
- ModuleInlinerWrapperPass MIWP(IP);
- CGSCCPassManager &CGPipeline = MIWP.getPM();
- FunctionPassManager FPM;
- FPM.addPass(SROAPass());
- FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
- FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
- true))); // Merge & remove basic blocks.
- FPM.addPass(InstCombinePass()); // Combine silly sequences.
- invokePeepholeEPCallbacks(FPM, Level);
- CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
- std::move(FPM), PTO.EagerlyInvalidateAnalyses));
- MPM.addPass(std::move(MIWP));
- // Delete anything that is now dead to make sure that we don't instrument
- // dead code. Instrumentation can end up keeping dead code around and
- // dramatically increase code size.
- MPM.addPass(GlobalDCEPass());
- }
- if (!RunProfileGen) {
- assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
- MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
- // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
- // RequireAnalysisPass for PSI before subsequent non-module passes.
- MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- return;
- }
- // Perform PGO instrumentation.
- MPM.addPass(PGOInstrumentationGen(IsCS));
- FunctionPassManager FPM;
- // Disable header duplication in loop rotation at -Oz.
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LoopRotatePass(Level != OptimizationLevel::Oz), /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false));
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
- PTO.EagerlyInvalidateAnalyses));
- // Add the profile lowering pass.
- InstrProfOptions Options;
- if (!ProfileFile.empty())
- Options.InstrProfileOutput = ProfileFile;
- // Do counter promotion at Level greater than O0.
- Options.DoCounterPromotion = true;
- Options.UseBFIInPromotion = IsCS;
- MPM.addPass(InstrProfiling(Options, IsCS));
- }
- void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
- bool RunProfileGen, bool IsCS,
- std::string ProfileFile,
- std::string ProfileRemappingFile) {
- if (!RunProfileGen) {
- assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
- MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
- // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
- // RequireAnalysisPass for PSI before subsequent non-module passes.
- MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- return;
- }
- // Perform PGO instrumentation.
- MPM.addPass(PGOInstrumentationGen(IsCS));
- // Add the profile lowering pass.
- InstrProfOptions Options;
- if (!ProfileFile.empty())
- Options.InstrProfileOutput = ProfileFile;
- // Do not do counter promotion at O0.
- Options.DoCounterPromotion = false;
- Options.UseBFIInPromotion = IsCS;
- MPM.addPass(InstrProfiling(Options, IsCS));
- }
- static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
- return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
- }
- ModuleInlinerWrapperPass
- PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
- ThinOrFullLTOPhase Phase) {
- InlineParams IP = getInlineParamsFromOptLevel(Level);
- if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
- PGOOpt->Action == PGOOptions::SampleUse)
- IP.HotCallSiteThreshold = 0;
- if (PGOOpt)
- IP.EnableDeferral = EnablePGOInlineDeferral;
- ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
- UseInlineAdvisor, MaxDevirtIterations);
- // Require the GlobalsAA analysis for the module so we can query it within
- // the CGSCC pipeline.
- MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
- // Invalidate AAManager so it can be recreated and pick up the newly available
- // GlobalsAA.
- MIWP.addModulePass(
- createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
- // Require the ProfileSummaryAnalysis for the module so we can query it within
- // the inliner pass.
- MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- // Now begin the main postorder CGSCC pipeline.
- // FIXME: The current CGSCC pipeline has its origins in the legacy pass
- // manager and trying to emulate its precise behavior. Much of this doesn't
- // make a lot of sense and we should revisit the core CGSCC structure.
- CGSCCPassManager &MainCGPipeline = MIWP.getPM();
- // Note: historically, the PruneEH pass was run first to deduce nounwind and
- // generally clean up exception handling overhead. It isn't clear this is
- // valuable as the inliner doesn't currently care whether it is inlining an
- // invoke or a call.
- if (AttributorRun & AttributorRunOption::CGSCC)
- MainCGPipeline.addPass(AttributorCGSCCPass());
- // Now deduce any function attributes based in the current code.
- MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
- // When at O3 add argument promotion to the pass pipeline.
- // FIXME: It isn't at all clear why this should be limited to O3.
- if (Level == OptimizationLevel::O3)
- MainCGPipeline.addPass(ArgumentPromotionPass());
- // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
- // there are no OpenMP runtime calls present in the module.
- if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
- MainCGPipeline.addPass(OpenMPOptCGSCCPass());
- for (auto &C : CGSCCOptimizerLateEPCallbacks)
- C(MainCGPipeline, Level);
- // Lastly, add the core function simplification pipeline nested inside the
- // CGSCC walk.
- MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
- buildFunctionSimplificationPipeline(Level, Phase),
- PTO.EagerlyInvalidateAnalyses, EnableNoRerunSimplificationPipeline));
- MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
- if (EnableNoRerunSimplificationPipeline)
- MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
- InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
- return MIWP;
- }
- ModulePassManager
- PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
- ThinOrFullLTOPhase Phase) {
- ModulePassManager MPM;
- InlineParams IP = getInlineParamsFromOptLevel(Level);
- if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
- PGOOpt->Action == PGOOptions::SampleUse)
- IP.HotCallSiteThreshold = 0;
- if (PGOOpt)
- IP.EnableDeferral = EnablePGOInlineDeferral;
- // The inline deferral logic is used to avoid losing some
- // inlining chance in future. It is helpful in SCC inliner, in which
- // inlining is processed in bottom-up order.
- // While in module inliner, the inlining order is a priority-based order
- // by default. The inline deferral is unnecessary there. So we disable the
- // inline deferral logic in module inliner.
- IP.EnableDeferral = false;
- MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor));
- MPM.addPass(createModuleToFunctionPassAdaptor(
- buildFunctionSimplificationPipeline(Level, Phase),
- PTO.EagerlyInvalidateAnalyses));
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
- CoroSplitPass(Level != OptimizationLevel::O0)));
- return MPM;
- }
- ModulePassManager
- PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
- ThinOrFullLTOPhase Phase) {
- ModulePassManager MPM;
- // Place pseudo probe instrumentation as the first pass of the pipeline to
- // minimize the impact of optimization changes.
- if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
- Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
- MPM.addPass(SampleProfileProbePass(TM));
- bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
- // In ThinLTO mode, when flattened profile is used, all the available
- // profile information will be annotated in PreLink phase so there is
- // no need to load the profile again in PostLink.
- bool LoadSampleProfile =
- HasSampleProfile &&
- !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
- // During the ThinLTO backend phase we perform early indirect call promotion
- // here, before globalopt. Otherwise imported available_externally functions
- // look unreferenced and are removed. If we are going to load the sample
- // profile then defer until later.
- // TODO: See if we can move later and consolidate with the location where
- // we perform ICP when we are loading a sample profile.
- // TODO: We pass HasSampleProfile (whether there was a sample profile file
- // passed to the compile) to the SamplePGO flag of ICP. This is used to
- // determine whether the new direct calls are annotated with prof metadata.
- // Ideally this should be determined from whether the IR is annotated with
- // sample profile, and not whether the a sample profile was provided on the
- // command line. E.g. for flattened profiles where we will not be reloading
- // the sample profile in the ThinLTO backend, we ideally shouldn't have to
- // provide the sample profile file.
- if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
- MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
- // Do basic inference of function attributes from known properties of system
- // libraries and other oracles.
- MPM.addPass(InferFunctionAttrsPass());
- // Create an early function pass manager to cleanup the output of the
- // frontend.
- FunctionPassManager EarlyFPM;
- // Lower llvm.expect to metadata before attempting transforms.
- // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
- EarlyFPM.addPass(LowerExpectIntrinsicPass());
- EarlyFPM.addPass(SimplifyCFGPass());
- EarlyFPM.addPass(SROAPass());
- EarlyFPM.addPass(EarlyCSEPass());
- EarlyFPM.addPass(CoroEarlyPass());
- if (Level == OptimizationLevel::O3)
- EarlyFPM.addPass(CallSiteSplittingPass());
- // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
- // to convert bitcast to direct calls so that they can be inlined during the
- // profile annotation prepration step.
- // More details about SamplePGO design can be found in:
- // https://research.google.com/pubs/pub45290.html
- // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
- if (LoadSampleProfile)
- EarlyFPM.addPass(InstCombinePass());
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM),
- PTO.EagerlyInvalidateAnalyses));
- if (LoadSampleProfile) {
- // Annotate sample profile right after early FPM to ensure freshness of
- // the debug info.
- MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile, Phase));
- // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
- // RequireAnalysisPass for PSI before subsequent non-module passes.
- MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- // Do not invoke ICP in the LTOPrelink phase as it makes it hard
- // for the profile annotation to be accurate in the LTO backend.
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink &&
- Phase != ThinOrFullLTOPhase::FullLTOPreLink)
- // We perform early indirect call promotion here, before globalopt.
- // This is important for the ThinLTO backend phase because otherwise
- // imported available_externally functions look unreferenced and are
- // removed.
- MPM.addPass(
- PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
- }
- // Try to perform OpenMP specific optimizations on the module. This is a
- // (quick!) no-op if there are no OpenMP runtime calls present in the module.
- if (Level != OptimizationLevel::O0)
- MPM.addPass(OpenMPOptPass());
- if (AttributorRun & AttributorRunOption::MODULE)
- MPM.addPass(AttributorPass());
- // Lower type metadata and the type.test intrinsic in the ThinLTO
- // post link pipeline after ICP. This is to enable usage of the type
- // tests in ICP sequences.
- if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
- for (auto &C : PipelineEarlySimplificationEPCallbacks)
- C(MPM, Level);
- // Specialize functions with IPSCCP.
- if (EnableFunctionSpecialization && Level == OptimizationLevel::O3)
- MPM.addPass(FunctionSpecializationPass());
- // Interprocedural constant propagation now that basic cleanup has occurred
- // and prior to optimizing globals.
- // FIXME: This position in the pipeline hasn't been carefully considered in
- // years, it should be re-analyzed.
- MPM.addPass(IPSCCPPass());
- // Attach metadata to indirect call sites indicating the set of functions
- // they may target at run-time. This should follow IPSCCP.
- MPM.addPass(CalledValuePropagationPass());
- // Optimize globals to try and fold them into constants.
- MPM.addPass(GlobalOptPass());
- // Promote any localized globals to SSA registers.
- // FIXME: Should this instead by a run of SROA?
- // FIXME: We should probably run instcombine and simplifycfg afterward to
- // delete control flows that are dead once globals have been folded to
- // constants.
- MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
- // Remove any dead arguments exposed by cleanups and constant folding
- // globals.
- MPM.addPass(DeadArgumentEliminationPass());
- // Create a small function pass pipeline to cleanup after all the global
- // optimizations.
- FunctionPassManager GlobalCleanupPM;
- GlobalCleanupPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
- GlobalCleanupPM.addPass(
- SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
- PTO.EagerlyInvalidateAnalyses));
- // Add all the requested passes for instrumentation PGO, if requested.
- if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
- (PGOOpt->Action == PGOOptions::IRInstr ||
- PGOOpt->Action == PGOOptions::IRUse)) {
- addPGOInstrPasses(MPM, Level,
- /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
- /* IsCS */ false, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile);
- MPM.addPass(PGOIndirectCallPromotion(false, false));
- }
- if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
- PGOOpt->CSAction == PGOOptions::CSIRInstr)
- MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
- // Synthesize function entry counts for non-PGO compilation.
- if (EnableSyntheticCounts && !PGOOpt)
- MPM.addPass(SyntheticCountsPropagation());
- if (EnableModuleInliner)
- MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
- else
- MPM.addPass(buildInlinerPipeline(Level, Phase));
- if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
- MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
- MPM.addPass(ModuleMemProfilerPass());
- }
- return MPM;
- }
- /// TODO: Should LTO cause any differences to this set of passes?
- void PassBuilder::addVectorPasses(OptimizationLevel Level,
- FunctionPassManager &FPM, bool IsFullLTO) {
- FPM.addPass(LoopVectorizePass(
- LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
- if (IsFullLTO) {
- // The vectorizer may have significantly shortened a loop body; unroll
- // again. Unroll small loops to hide loop backedge latency and saturate any
- // parallel execution resources of an out-of-order processor. We also then
- // need to clean up redundancies and loop invariant code.
- // FIXME: It would be really good to use a loop-integrated instruction
- // combiner for cleanup here so that the unrolling and LICM can be pipelined
- // across the loop nests.
- // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
- if (EnableUnrollAndJam && PTO.LoopUnrolling)
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LoopUnrollAndJamPass(Level.getSpeedupLevel())));
- FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
- Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll)));
- FPM.addPass(WarnMissedTransformationsPass());
- }
- if (!IsFullLTO) {
- // Eliminate loads by forwarding stores from the previous iteration to loads
- // of the current iteration.
- FPM.addPass(LoopLoadEliminationPass());
- }
- // Cleanup after the loop optimization passes.
- FPM.addPass(InstCombinePass());
- if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
- ExtraVectorPassManager ExtraPasses;
- // At higher optimization levels, try to clean up any runtime overlap and
- // alignment checks inserted by the vectorizer. We want to track correlated
- // runtime checks for two inner loops in the same outer loop, fold any
- // common computations, hoist loop-invariant aspects out of any outer loop,
- // and unswitch the runtime checks if possible. Once hoisted, we may have
- // dead (or speculatable) control flows or more combining opportunities.
- ExtraPasses.addPass(EarlyCSEPass());
- ExtraPasses.addPass(CorrelatedValuePropagationPass());
- ExtraPasses.addPass(InstCombinePass());
- LoopPassManager LPM;
- LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/true));
- LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
- OptimizationLevel::O3));
- ExtraPasses.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- ExtraPasses.addPass(
- createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
- /*UseBlockFrequencyInfo=*/true));
- ExtraPasses.addPass(
- SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
- ExtraPasses.addPass(InstCombinePass());
- FPM.addPass(std::move(ExtraPasses));
- }
- // Now that we've formed fast to execute loop structures, we do further
- // optimizations. These are run afterward as they might block doing complex
- // analyses and transforms such as what are needed for loop vectorization.
- // Cleanup after loop vectorization, etc. Simplification passes like CVP and
- // GVN, loop transforms, and others have already run, so it's now better to
- // convert to more optimized IR using more aggressive simplify CFG options.
- // The extra sinking transform can create larger basic blocks, so do this
- // before SLP vectorization.
- FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
- .forwardSwitchCondToPhi(true)
- .convertSwitchRangeToICmp(true)
- .convertSwitchToLookupTable(true)
- .needCanonicalLoops(false)
- .hoistCommonInsts(true)
- .sinkCommonInsts(true)));
- if (IsFullLTO) {
- FPM.addPass(SCCPPass());
- FPM.addPass(InstCombinePass());
- FPM.addPass(BDCEPass());
- }
- // Optimize parallel scalar instruction chains into SIMD instructions.
- if (PTO.SLPVectorization) {
- FPM.addPass(SLPVectorizerPass());
- if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
- FPM.addPass(EarlyCSEPass());
- }
- }
- // Enhance/cleanup vector code.
- FPM.addPass(VectorCombinePass());
- if (!IsFullLTO) {
- FPM.addPass(InstCombinePass());
- // Unroll small loops to hide loop backedge latency and saturate any
- // parallel execution resources of an out-of-order processor. We also then
- // need to clean up redundancies and loop invariant code.
- // FIXME: It would be really good to use a loop-integrated instruction
- // combiner for cleanup here so that the unrolling and LICM can be pipelined
- // across the loop nests.
- // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
- if (EnableUnrollAndJam && PTO.LoopUnrolling) {
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LoopUnrollAndJamPass(Level.getSpeedupLevel())));
- }
- FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
- Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll)));
- FPM.addPass(WarnMissedTransformationsPass());
- FPM.addPass(InstCombinePass());
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/true),
- /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
- }
- // Now that we've vectorized and unrolled loops, we may have more refined
- // alignment information, try to re-derive it here.
- FPM.addPass(AlignmentFromAssumptionsPass());
- if (IsFullLTO)
- FPM.addPass(InstCombinePass());
- }
- ModulePassManager
- PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
- bool LTOPreLink) {
- ModulePassManager MPM;
- // Optimize globals now that the module is fully simplified.
- MPM.addPass(GlobalOptPass());
- MPM.addPass(GlobalDCEPass());
- // Run partial inlining pass to partially inline functions that have
- // large bodies.
- if (RunPartialInlining)
- MPM.addPass(PartialInlinerPass());
- // Remove avail extern fns and globals definitions since we aren't compiling
- // an object file for later LTO. For LTO we want to preserve these so they
- // are eligible for inlining at link-time. Note if they are unreferenced they
- // will be removed by GlobalDCE later, so this only impacts referenced
- // available externally globals. Eventually they will be suppressed during
- // codegen, but eliminating here enables more opportunity for GlobalDCE as it
- // may make globals referenced by available external functions dead and saves
- // running remaining passes on the eliminated functions. These should be
- // preserved during prelinking for link-time inlining decisions.
- if (!LTOPreLink)
- MPM.addPass(EliminateAvailableExternallyPass());
- if (EnableOrderFileInstrumentation)
- MPM.addPass(InstrOrderFilePass());
- // Do RPO function attribute inference across the module to forward-propagate
- // attributes where applicable.
- // FIXME: Is this really an optimization rather than a canonicalization?
- MPM.addPass(ReversePostOrderFunctionAttrsPass());
- // Do a post inline PGO instrumentation and use pass. This is a context
- // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
- // cross-module inline has not been done yet. The context sensitive
- // instrumentation is after all the inlines are done.
- if (!LTOPreLink && PGOOpt) {
- if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
- /* IsCS */ true, PGOOpt->CSProfileGenFile,
- PGOOpt->ProfileRemappingFile);
- else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
- /* IsCS */ true, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile);
- }
- // Re-require GloblasAA here prior to function passes. This is particularly
- // useful as the above will have inlined, DCE'ed, and function-attr
- // propagated everything. We should at this point have a reasonably minimal
- // and richly annotated call graph. By computing aliasing and mod/ref
- // information for all local globals here, the late loop passes and notably
- // the vectorizer will be able to use them to help recognize vectorizable
- // memory operations.
- MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
- FunctionPassManager OptimizePM;
- OptimizePM.addPass(Float2IntPass());
- OptimizePM.addPass(LowerConstantIntrinsicsPass());
- if (EnableMatrix) {
- OptimizePM.addPass(LowerMatrixIntrinsicsPass());
- OptimizePM.addPass(EarlyCSEPass());
- }
- // FIXME: We need to run some loop optimizations to re-rotate loops after
- // simplifycfg and others undo their rotation.
- // Optimize the loop execution. These passes operate on entire loop nests
- // rather than on each loop in an inside-out manner, and so they are actually
- // function passes.
- for (auto &C : VectorizerStartEPCallbacks)
- C(OptimizePM, Level);
- LoopPassManager LPM;
- // First rotate loops that may have been un-rotated by prior passes.
- // Disable header duplication at -Oz.
- LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
- // Some loops may have become dead by now. Try to delete them.
- // FIXME: see discussion in https://reviews.llvm.org/D112851,
- // this may need to be revisited once we run GVN before loop deletion
- // in the simplification pipeline.
- LPM.addPass(LoopDeletionPass());
- OptimizePM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
- // Distribute loops to allow partial vectorization. I.e. isolate dependences
- // into separate loop that would otherwise inhibit vectorization. This is
- // currently only performed for loops marked with the metadata
- // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
- OptimizePM.addPass(LoopDistributePass());
- // Populates the VFABI attribute with the scalar-to-vector mappings
- // from the TargetLibraryInfo.
- OptimizePM.addPass(InjectTLIMappings());
- addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
- // LoopSink pass sinks instructions hoisted by LICM, which serves as a
- // canonicalization pass that enables other optimizations. As a result,
- // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
- // result too early.
- OptimizePM.addPass(LoopSinkPass());
- // And finally clean up LCSSA form before generating code.
- OptimizePM.addPass(InstSimplifyPass());
- // This hoists/decomposes div/rem ops. It should run after other sink/hoist
- // passes to avoid re-sinking, but before SimplifyCFG because it can allow
- // flattening of blocks.
- OptimizePM.addPass(DivRemPairsPass());
- // LoopSink (and other loop passes since the last simplifyCFG) might have
- // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
- OptimizePM.addPass(
- SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
- OptimizePM.addPass(CoroCleanupPass());
- // Add the core optimizing pipeline.
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
- PTO.EagerlyInvalidateAnalyses));
- for (auto &C : OptimizerLastEPCallbacks)
- C(MPM, Level);
- // Split out cold code. Splitting is done late to avoid hiding context from
- // other optimizations and inadvertently regressing performance. The tradeoff
- // is that this has a higher code size cost than splitting early.
- if (EnableHotColdSplit && !LTOPreLink)
- MPM.addPass(HotColdSplittingPass());
- // Search the code for similar regions of code. If enough similar regions can
- // be found where extracting the regions into their own function will decrease
- // the size of the program, we extract the regions, a deduplicate the
- // structurally similar regions.
- if (EnableIROutliner)
- MPM.addPass(IROutlinerPass());
- // Merge functions if requested.
- if (PTO.MergeFunctions)
- MPM.addPass(MergeFunctionsPass());
- if (PTO.CallGraphProfile)
- MPM.addPass(CGProfilePass());
- // Now we need to do some global optimization transforms.
- // FIXME: It would seem like these should come first in the optimization
- // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
- // ordering here.
- MPM.addPass(GlobalDCEPass());
- MPM.addPass(ConstantMergePass());
- // TODO: Relative look table converter pass caused an issue when full lto is
- // enabled. See https://reviews.llvm.org/D94355 for more details.
- // Until the issue fixed, disable this pass during pre-linking phase.
- if (!LTOPreLink)
- MPM.addPass(RelLookupTableConverterPass());
- return MPM;
- }
- ModulePassManager
- PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
- bool LTOPreLink) {
- assert(Level != OptimizationLevel::O0 &&
- "Must request optimizations for the default pipeline!");
- ModulePassManager MPM;
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
- // Apply module pipeline start EP callback.
- for (auto &C : PipelineStartEPCallbacks)
- C(MPM, Level);
- if (PGOOpt && PGOOpt->DebugInfoForProfiling)
- MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
- // Add the core simplification pipeline.
- MPM.addPass(buildModuleSimplificationPipeline(
- Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink
- : ThinOrFullLTOPhase::None));
- // Now add the optimization pipeline.
- MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
- if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
- PGOOpt->Action == PGOOptions::SampleUse)
- MPM.addPass(PseudoProbeUpdatePass());
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
- if (LTOPreLink)
- addRequiredLTOPreLinkPasses(MPM);
- return MPM;
- }
- ModulePassManager
- PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
- assert(Level != OptimizationLevel::O0 &&
- "Must request optimizations for the default pipeline!");
- ModulePassManager MPM;
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
- if (PGOOpt && PGOOpt->DebugInfoForProfiling)
- MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
- // Apply module pipeline start EP callback.
- for (auto &C : PipelineStartEPCallbacks)
- C(MPM, Level);
- // If we are planning to perform ThinLTO later, we don't bloat the code with
- // unrolling/vectorization/... now. Just simplify the module as much as we
- // can.
- MPM.addPass(buildModuleSimplificationPipeline(
- Level, ThinOrFullLTOPhase::ThinLTOPreLink));
- // Run partial inlining pass to partially inline functions that have
- // large bodies.
- // FIXME: It isn't clear whether this is really the right place to run this
- // in ThinLTO. Because there is another canonicalization and simplification
- // phase that will run after the thin link, running this here ends up with
- // less information than will be available later and it may grow functions in
- // ways that aren't beneficial.
- if (RunPartialInlining)
- MPM.addPass(PartialInlinerPass());
- // Reduce the size of the IR as much as possible.
- MPM.addPass(GlobalOptPass());
- // Module simplification splits coroutines, but does not fully clean up
- // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
- // on these, we schedule the cleanup here.
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
- if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
- PGOOpt->Action == PGOOptions::SampleUse)
- MPM.addPass(PseudoProbeUpdatePass());
- // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
- // optimization is going to be done in PostLink stage, but clang can't
- // add callbacks there in case of in-process ThinLTO called by linker.
- for (auto &C : OptimizerLastEPCallbacks)
- C(MPM, Level);
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
- addRequiredLTOPreLinkPasses(MPM);
- return MPM;
- }
- ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
- OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
- ModulePassManager MPM;
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
- if (ImportSummary) {
- // These passes import type identifier resolutions for whole-program
- // devirtualization and CFI. They must run early because other passes may
- // disturb the specific instruction patterns that these passes look for,
- // creating dependencies on resolutions that may not appear in the summary.
- //
- // For example, GVN may transform the pattern assume(type.test) appearing in
- // two basic blocks into assume(phi(type.test, type.test)), which would
- // transform a dependency on a WPD resolution into a dependency on a type
- // identifier resolution for CFI.
- //
- // Also, WPD has access to more precise information than ICP and can
- // devirtualize more effectively, so it should operate on the IR first.
- //
- // The WPD and LowerTypeTest passes need to run at -O0 to lower type
- // metadata and intrinsics.
- MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
- MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
- }
- if (Level == OptimizationLevel::O0) {
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP.
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
- // Drop available_externally and unreferenced globals. This is necessary
- // with ThinLTO in order to avoid leaving undefined references to dead
- // globals in the object file.
- MPM.addPass(EliminateAvailableExternallyPass());
- MPM.addPass(GlobalDCEPass());
- return MPM;
- }
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
- // Add the core simplification pipeline.
- MPM.addPass(buildModuleSimplificationPipeline(
- Level, ThinOrFullLTOPhase::ThinLTOPostLink));
- // Now add the optimization pipeline.
- MPM.addPass(buildModuleOptimizationPipeline(Level));
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
- return MPM;
- }
- ModulePassManager
- PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
- assert(Level != OptimizationLevel::O0 &&
- "Must request optimizations for the default pipeline!");
- // FIXME: We should use a customized pre-link pipeline!
- return buildPerModuleDefaultPipeline(Level,
- /* LTOPreLink */ true);
- }
- ModulePassManager
- PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
- ModuleSummaryIndex *ExportSummary) {
- ModulePassManager MPM;
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
- // Create a function that performs CFI checks for cross-DSO calls with targets
- // in the current module.
- MPM.addPass(CrossDSOCFIPass());
- if (Level == OptimizationLevel::O0) {
- // The WPD and LowerTypeTest passes need to run at -O0 to lower type
- // metadata and intrinsics.
- MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
- MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP.
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
- return MPM;
- }
- if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
- // Load sample profile before running the LTO optimization pipeline.
- MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile,
- ThinOrFullLTOPhase::FullLTOPostLink));
- // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
- // RequireAnalysisPass for PSI before subsequent non-module passes.
- MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- }
- // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
- MPM.addPass(OpenMPOptPass());
- // Remove unused virtual tables to improve the quality of code generated by
- // whole-program devirtualization and bitset lowering.
- MPM.addPass(GlobalDCEPass());
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
- // Do basic inference of function attributes from known properties of system
- // libraries and other oracles.
- MPM.addPass(InferFunctionAttrsPass());
- if (Level.getSpeedupLevel() > 1) {
- FunctionPassManager EarlyFPM;
- EarlyFPM.addPass(CallSiteSplittingPass());
- MPM.addPass(createModuleToFunctionPassAdaptor(
- std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
- // Indirect call promotion. This should promote all the targets that are
- // left by the earlier promotion pass that promotes intra-module targets.
- // This two-step promotion is to save the compile time. For LTO, it should
- // produce the same result as if we only do promotion here.
- MPM.addPass(PGOIndirectCallPromotion(
- true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
- if (EnableFunctionSpecialization && Level == OptimizationLevel::O3)
- MPM.addPass(FunctionSpecializationPass());
- // Propagate constants at call sites into the functions they call. This
- // opens opportunities for globalopt (and inlining) by substituting function
- // pointers passed as arguments to direct uses of functions.
- MPM.addPass(IPSCCPPass());
- // Attach metadata to indirect call sites indicating the set of functions
- // they may target at run-time. This should follow IPSCCP.
- MPM.addPass(CalledValuePropagationPass());
- }
- // Now deduce any function attributes based in the current code.
- MPM.addPass(
- createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
- // Do RPO function attribute inference across the module to forward-propagate
- // attributes where applicable.
- // FIXME: Is this really an optimization rather than a canonicalization?
- MPM.addPass(ReversePostOrderFunctionAttrsPass());
- // Use in-range annotations on GEP indices to split globals where beneficial.
- MPM.addPass(GlobalSplitPass());
- // Run whole program optimization of virtual call when the list of callees
- // is fixed.
- MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
- // Stop here at -O1.
- if (Level == OptimizationLevel::O1) {
- // The LowerTypeTestsPass needs to run to lower type metadata and the
- // type.test intrinsics. The pass does nothing if CFI is disabled.
- MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP (which is performed earlier than this in the regular LTO
- // pipeline).
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
- return MPM;
- }
- // Optimize globals to try and fold them into constants.
- MPM.addPass(GlobalOptPass());
- // Promote any localized globals to SSA registers.
- MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
- // Linking modules together can lead to duplicate global constant, only
- // keep one copy of each constant.
- MPM.addPass(ConstantMergePass());
- // Remove unused arguments from functions.
- MPM.addPass(DeadArgumentEliminationPass());
- // Reduce the code after globalopt and ipsccp. Both can open up significant
- // simplification opportunities, and both can propagate functions through
- // function pointers. When this happens, we often have to resolve varargs
- // calls, etc, so let instcombine do this.
- FunctionPassManager PeepholeFPM;
- PeepholeFPM.addPass(InstCombinePass());
- if (Level == OptimizationLevel::O3)
- PeepholeFPM.addPass(AggressiveInstCombinePass());
- invokePeepholeEPCallbacks(PeepholeFPM, Level);
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
- PTO.EagerlyInvalidateAnalyses));
- // Note: historically, the PruneEH pass was run first to deduce nounwind and
- // generally clean up exception handling overhead. It isn't clear this is
- // valuable as the inliner doesn't currently care whether it is inlining an
- // invoke or a call.
- // Run the inliner now.
- MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level)));
- // Optimize globals again after we ran the inliner.
- MPM.addPass(GlobalOptPass());
- // Garbage collect dead functions.
- MPM.addPass(GlobalDCEPass());
- // If we didn't decide to inline a function, check to see if we can
- // transform it to pass arguments by value instead of by reference.
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
- FunctionPassManager FPM;
- // The IPO Passes may leave cruft around. Clean up after them.
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
- FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
- // Do a post inline PGO instrumentation and use pass. This is a context
- // sensitive PGO pass.
- if (PGOOpt) {
- if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
- /* IsCS */ true, PGOOpt->CSProfileGenFile,
- PGOOpt->ProfileRemappingFile);
- else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
- /* IsCS */ true, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile);
- }
- // Break up allocas
- FPM.addPass(SROAPass());
- // LTO provides additional opportunities for tailcall elimination due to
- // link-time inlining, and visibility of nocapture attribute.
- FPM.addPass(TailCallElimPass());
- // Run a few AA driver optimizations here and now to cleanup the code.
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
- PTO.EagerlyInvalidateAnalyses));
- MPM.addPass(
- createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
- // Require the GlobalsAA analysis for the module so we can query it within
- // MainFPM.
- MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
- // Invalidate AAManager so it can be recreated and pick up the newly available
- // GlobalsAA.
- MPM.addPass(
- createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
- FunctionPassManager MainFPM;
- MainFPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/true),
- /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
- if (RunNewGVN)
- MainFPM.addPass(NewGVNPass());
- else
- MainFPM.addPass(GVNPass());
- // Remove dead memcpy()'s.
- MainFPM.addPass(MemCpyOptPass());
- // Nuke dead stores.
- MainFPM.addPass(DSEPass());
- MainFPM.addPass(MergedLoadStoreMotionPass());
- if (EnableConstraintElimination)
- MainFPM.addPass(ConstraintEliminationPass());
- LoopPassManager LPM;
- if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
- LPM.addPass(LoopFlattenPass());
- LPM.addPass(IndVarSimplifyPass());
- LPM.addPass(LoopDeletionPass());
- // FIXME: Add loop interchange.
- // Unroll small loops and perform peeling.
- LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
- /* OnlyWhenForced= */ !PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll));
- // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
- // *All* loop passes must preserve it, in order to be able to use it.
- MainFPM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
- MainFPM.addPass(LoopDistributePass());
- addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
- // Run the OpenMPOpt CGSCC pass again late.
- MPM.addPass(
- createModuleToPostOrderCGSCCPassAdaptor(OpenMPOptCGSCCPass()));
- invokePeepholeEPCallbacks(MainFPM, Level);
- MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
- PTO.EagerlyInvalidateAnalyses));
- // Lower type metadata and the type.test intrinsic. This pass supports
- // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
- // to be run at link time if CFI is enabled. This pass does nothing if
- // CFI is disabled.
- MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP (which is performed earlier than this in the regular LTO pipeline).
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
- // Enable splitting late in the FullLTO post-link pipeline. This is done in
- // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
- if (EnableHotColdSplit)
- MPM.addPass(HotColdSplittingPass());
- // Add late LTO optimization passes.
- // Delete basic blocks, which optimization passes may have killed.
- MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass(
- SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
- true))));
- // Drop bodies of available eternally objects to improve GlobalDCE.
- MPM.addPass(EliminateAvailableExternallyPass());
- // Now that we have optimized the program, discard unreachable functions.
- MPM.addPass(GlobalDCEPass());
- if (PTO.MergeFunctions)
- MPM.addPass(MergeFunctionsPass());
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
- return MPM;
- }
- ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
- bool LTOPreLink) {
- assert(Level == OptimizationLevel::O0 &&
- "buildO0DefaultPipeline should only be used with O0");
- ModulePassManager MPM;
- // Perform pseudo probe instrumentation in O0 mode. This is for the
- // consistency between different build modes. For example, a LTO build can be
- // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
- // the postlink will require pseudo probe instrumentation in the prelink.
- if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
- MPM.addPass(SampleProfileProbePass(TM));
- if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
- PGOOpt->Action == PGOOptions::IRUse))
- addPGOInstrPassesForO0(
- MPM,
- /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
- /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
- for (auto &C : PipelineStartEPCallbacks)
- C(MPM, Level);
- if (PGOOpt && PGOOpt->DebugInfoForProfiling)
- MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
- for (auto &C : PipelineEarlySimplificationEPCallbacks)
- C(MPM, Level);
- // Build a minimal pipeline based on the semantics required by LLVM,
- // which is just that always inlining occurs. Further, disable generating
- // lifetime intrinsics to avoid enabling further optimizations during
- // code generation.
- MPM.addPass(AlwaysInlinerPass(
- /*InsertLifetimeIntrinsics=*/false));
- if (PTO.MergeFunctions)
- MPM.addPass(MergeFunctionsPass());
- if (EnableMatrix)
- MPM.addPass(
- createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
- if (!CGSCCOptimizerLateEPCallbacks.empty()) {
- CGSCCPassManager CGPM;
- for (auto &C : CGSCCOptimizerLateEPCallbacks)
- C(CGPM, Level);
- if (!CGPM.isEmpty())
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
- }
- if (!LateLoopOptimizationsEPCallbacks.empty()) {
- LoopPassManager LPM;
- for (auto &C : LateLoopOptimizationsEPCallbacks)
- C(LPM, Level);
- if (!LPM.isEmpty()) {
- MPM.addPass(createModuleToFunctionPassAdaptor(
- createFunctionToLoopPassAdaptor(std::move(LPM))));
- }
- }
- if (!LoopOptimizerEndEPCallbacks.empty()) {
- LoopPassManager LPM;
- for (auto &C : LoopOptimizerEndEPCallbacks)
- C(LPM, Level);
- if (!LPM.isEmpty()) {
- MPM.addPass(createModuleToFunctionPassAdaptor(
- createFunctionToLoopPassAdaptor(std::move(LPM))));
- }
- }
- if (!ScalarOptimizerLateEPCallbacks.empty()) {
- FunctionPassManager FPM;
- for (auto &C : ScalarOptimizerLateEPCallbacks)
- C(FPM, Level);
- if (!FPM.isEmpty())
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
- }
- if (!VectorizerStartEPCallbacks.empty()) {
- FunctionPassManager FPM;
- for (auto &C : VectorizerStartEPCallbacks)
- C(FPM, Level);
- if (!FPM.isEmpty())
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
- }
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
- CGSCCPassManager CGPM;
- CGPM.addPass(CoroSplitPass());
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
- for (auto &C : OptimizerLastEPCallbacks)
- C(MPM, Level);
- if (LTOPreLink)
- addRequiredLTOPreLinkPasses(MPM);
- MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
- return MPM;
- }
- AAManager PassBuilder::buildDefaultAAPipeline() {
- AAManager AA;
- // The order in which these are registered determines their priority when
- // being queried.
- // First we register the basic alias analysis that provides the majority of
- // per-function local AA logic. This is a stateless, on-demand local set of
- // AA techniques.
- AA.registerFunctionAnalysis<BasicAA>();
- // Next we query fast, specialized alias analyses that wrap IR-embedded
- // information about aliasing.
- AA.registerFunctionAnalysis<ScopedNoAliasAA>();
- AA.registerFunctionAnalysis<TypeBasedAA>();
- // Add support for querying global aliasing information when available.
- // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
- // analysis, all that the `AAManager` can do is query for any *cached*
- // results from `GlobalsAA` through a readonly proxy.
- AA.registerModuleAnalysis<GlobalsAA>();
- // Add target-specific alias analyses.
- if (TM)
- TM->registerDefaultAliasAnalyses(AA);
- return AA;
- }
|