ControlHeightReduction.cpp 80 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107
  1. //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass merges conditional blocks of code and reduces the number of
  10. // conditional branches in the hot paths based on profiles.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
  14. #include "llvm/ADT/DenseMap.h"
  15. #include "llvm/ADT/DenseSet.h"
  16. #include "llvm/ADT/SmallVector.h"
  17. #include "llvm/ADT/StringSet.h"
  18. #include "llvm/Analysis/BlockFrequencyInfo.h"
  19. #include "llvm/Analysis/GlobalsModRef.h"
  20. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  21. #include "llvm/Analysis/ProfileSummaryInfo.h"
  22. #include "llvm/Analysis/RegionInfo.h"
  23. #include "llvm/Analysis/RegionIterator.h"
  24. #include "llvm/Analysis/ValueTracking.h"
  25. #include "llvm/IR/CFG.h"
  26. #include "llvm/IR/Dominators.h"
  27. #include "llvm/IR/IRBuilder.h"
  28. #include "llvm/IR/MDBuilder.h"
  29. #include "llvm/IR/PassManager.h"
  30. #include "llvm/InitializePasses.h"
  31. #include "llvm/Support/BranchProbability.h"
  32. #include "llvm/Support/CommandLine.h"
  33. #include "llvm/Support/MemoryBuffer.h"
  34. #include "llvm/Transforms/Utils.h"
  35. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  36. #include "llvm/Transforms/Utils/Cloning.h"
  37. #include "llvm/Transforms/Utils/ValueMapper.h"
  38. #include <set>
  39. #include <sstream>
  40. using namespace llvm;
  41. #define DEBUG_TYPE "chr"
  42. #define CHR_DEBUG(X) LLVM_DEBUG(X)
  43. static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
  44. cl::desc("Apply CHR for all functions"));
  45. static cl::opt<double> CHRBiasThreshold(
  46. "chr-bias-threshold", cl::init(0.99), cl::Hidden,
  47. cl::desc("CHR considers a branch bias greater than this ratio as biased"));
  48. static cl::opt<unsigned> CHRMergeThreshold(
  49. "chr-merge-threshold", cl::init(2), cl::Hidden,
  50. cl::desc("CHR merges a group of N branches/selects where N >= this value"));
  51. static cl::opt<std::string> CHRModuleList(
  52. "chr-module-list", cl::init(""), cl::Hidden,
  53. cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
  54. static cl::opt<std::string> CHRFunctionList(
  55. "chr-function-list", cl::init(""), cl::Hidden,
  56. cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
  57. static StringSet<> CHRModules;
  58. static StringSet<> CHRFunctions;
  59. static void parseCHRFilterFiles() {
  60. if (!CHRModuleList.empty()) {
  61. auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
  62. if (!FileOrErr) {
  63. errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
  64. std::exit(1);
  65. }
  66. StringRef Buf = FileOrErr->get()->getBuffer();
  67. SmallVector<StringRef, 0> Lines;
  68. Buf.split(Lines, '\n');
  69. for (StringRef Line : Lines) {
  70. Line = Line.trim();
  71. if (!Line.empty())
  72. CHRModules.insert(Line);
  73. }
  74. }
  75. if (!CHRFunctionList.empty()) {
  76. auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
  77. if (!FileOrErr) {
  78. errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
  79. std::exit(1);
  80. }
  81. StringRef Buf = FileOrErr->get()->getBuffer();
  82. SmallVector<StringRef, 0> Lines;
  83. Buf.split(Lines, '\n');
  84. for (StringRef Line : Lines) {
  85. Line = Line.trim();
  86. if (!Line.empty())
  87. CHRFunctions.insert(Line);
  88. }
  89. }
  90. }
  91. namespace {
  92. class ControlHeightReductionLegacyPass : public FunctionPass {
  93. public:
  94. static char ID;
  95. ControlHeightReductionLegacyPass() : FunctionPass(ID) {
  96. initializeControlHeightReductionLegacyPassPass(
  97. *PassRegistry::getPassRegistry());
  98. parseCHRFilterFiles();
  99. }
  100. bool runOnFunction(Function &F) override;
  101. void getAnalysisUsage(AnalysisUsage &AU) const override {
  102. AU.addRequired<BlockFrequencyInfoWrapperPass>();
  103. AU.addRequired<DominatorTreeWrapperPass>();
  104. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  105. AU.addRequired<RegionInfoPass>();
  106. AU.addPreserved<GlobalsAAWrapperPass>();
  107. }
  108. };
  109. } // end anonymous namespace
  110. char ControlHeightReductionLegacyPass::ID = 0;
  111. INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
  112. "chr",
  113. "Reduce control height in the hot paths",
  114. false, false)
  115. INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
  116. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  117. INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
  118. INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
  119. INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
  120. "chr",
  121. "Reduce control height in the hot paths",
  122. false, false)
  123. FunctionPass *llvm::createControlHeightReductionLegacyPass() {
  124. return new ControlHeightReductionLegacyPass();
  125. }
  126. namespace {
  127. struct CHRStats {
  128. CHRStats() : NumBranches(0), NumBranchesDelta(0),
  129. WeightedNumBranchesDelta(0) {}
  130. void print(raw_ostream &OS) const {
  131. OS << "CHRStats: NumBranches " << NumBranches
  132. << " NumBranchesDelta " << NumBranchesDelta
  133. << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
  134. }
  135. uint64_t NumBranches; // The original number of conditional branches /
  136. // selects
  137. uint64_t NumBranchesDelta; // The decrease of the number of conditional
  138. // branches / selects in the hot paths due to CHR.
  139. uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
  140. // count at the scope entry.
  141. };
  142. // RegInfo - some properties of a Region.
  143. struct RegInfo {
  144. RegInfo() : R(nullptr), HasBranch(false) {}
  145. RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
  146. Region *R;
  147. bool HasBranch;
  148. SmallVector<SelectInst *, 8> Selects;
  149. };
  150. typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
  151. // CHRScope - a sequence of regions to CHR together. It corresponds to a
  152. // sequence of conditional blocks. It can have subscopes which correspond to
  153. // nested conditional blocks. Nested CHRScopes form a tree.
  154. class CHRScope {
  155. public:
  156. CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
  157. assert(RI.R && "Null RegionIn");
  158. RegInfos.push_back(RI);
  159. }
  160. Region *getParentRegion() {
  161. assert(RegInfos.size() > 0 && "Empty CHRScope");
  162. Region *Parent = RegInfos[0].R->getParent();
  163. assert(Parent && "Unexpected to call this on the top-level region");
  164. return Parent;
  165. }
  166. BasicBlock *getEntryBlock() {
  167. assert(RegInfos.size() > 0 && "Empty CHRScope");
  168. return RegInfos.front().R->getEntry();
  169. }
  170. BasicBlock *getExitBlock() {
  171. assert(RegInfos.size() > 0 && "Empty CHRScope");
  172. return RegInfos.back().R->getExit();
  173. }
  174. bool appendable(CHRScope *Next) {
  175. // The next scope is appendable only if this scope is directly connected to
  176. // it (which implies it post-dominates this scope) and this scope dominates
  177. // it (no edge to the next scope outside this scope).
  178. BasicBlock *NextEntry = Next->getEntryBlock();
  179. if (getExitBlock() != NextEntry)
  180. // Not directly connected.
  181. return false;
  182. Region *LastRegion = RegInfos.back().R;
  183. for (BasicBlock *Pred : predecessors(NextEntry))
  184. if (!LastRegion->contains(Pred))
  185. // There's an edge going into the entry of the next scope from outside
  186. // of this scope.
  187. return false;
  188. return true;
  189. }
  190. void append(CHRScope *Next) {
  191. assert(RegInfos.size() > 0 && "Empty CHRScope");
  192. assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
  193. assert(getParentRegion() == Next->getParentRegion() &&
  194. "Must be siblings");
  195. assert(getExitBlock() == Next->getEntryBlock() &&
  196. "Must be adjacent");
  197. RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
  198. Subs.append(Next->Subs.begin(), Next->Subs.end());
  199. }
  200. void addSub(CHRScope *SubIn) {
  201. #ifndef NDEBUG
  202. bool IsChild = false;
  203. for (RegInfo &RI : RegInfos)
  204. if (RI.R == SubIn->getParentRegion()) {
  205. IsChild = true;
  206. break;
  207. }
  208. assert(IsChild && "Must be a child");
  209. #endif
  210. Subs.push_back(SubIn);
  211. }
  212. // Split this scope at the boundary region into two, which will belong to the
  213. // tail and returns the tail.
  214. CHRScope *split(Region *Boundary) {
  215. assert(Boundary && "Boundary null");
  216. assert(RegInfos.begin()->R != Boundary &&
  217. "Can't be split at beginning");
  218. auto BoundaryIt = llvm::find_if(
  219. RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
  220. if (BoundaryIt == RegInfos.end())
  221. return nullptr;
  222. ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
  223. DenseSet<Region *> TailRegionSet;
  224. for (const RegInfo &RI : TailRegInfos)
  225. TailRegionSet.insert(RI.R);
  226. auto TailIt =
  227. std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
  228. assert(Sub && "null Sub");
  229. Region *Parent = Sub->getParentRegion();
  230. if (TailRegionSet.count(Parent))
  231. return false;
  232. assert(llvm::any_of(
  233. RegInfos,
  234. [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
  235. "Must be in head");
  236. return true;
  237. });
  238. ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
  239. assert(HoistStopMap.empty() && "MapHoistStops must be empty");
  240. auto *Scope = new CHRScope(TailRegInfos, TailSubs);
  241. RegInfos.erase(BoundaryIt, RegInfos.end());
  242. Subs.erase(TailIt, Subs.end());
  243. return Scope;
  244. }
  245. bool contains(Instruction *I) const {
  246. BasicBlock *Parent = I->getParent();
  247. for (const RegInfo &RI : RegInfos)
  248. if (RI.R->contains(Parent))
  249. return true;
  250. return false;
  251. }
  252. void print(raw_ostream &OS) const;
  253. SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
  254. SmallVector<CHRScope *, 8> Subs; // Subscopes.
  255. // The instruction at which to insert the CHR conditional branch (and hoist
  256. // the dependent condition values).
  257. Instruction *BranchInsertPoint;
  258. // True-biased and false-biased regions (conditional blocks),
  259. // respectively. Used only for the outermost scope and includes regions in
  260. // subscopes. The rest are unbiased.
  261. DenseSet<Region *> TrueBiasedRegions;
  262. DenseSet<Region *> FalseBiasedRegions;
  263. // Among the biased regions, the regions that get CHRed.
  264. SmallVector<RegInfo, 8> CHRRegions;
  265. // True-biased and false-biased selects, respectively. Used only for the
  266. // outermost scope and includes ones in subscopes.
  267. DenseSet<SelectInst *> TrueBiasedSelects;
  268. DenseSet<SelectInst *> FalseBiasedSelects;
  269. // Map from one of the above regions to the instructions to stop
  270. // hoisting instructions at through use-def chains.
  271. HoistStopMapTy HoistStopMap;
  272. private:
  273. CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
  274. : RegInfos(RegInfosIn.begin(), RegInfosIn.end()),
  275. Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {}
  276. };
  277. class CHR {
  278. public:
  279. CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
  280. ProfileSummaryInfo &PSIin, RegionInfo &RIin,
  281. OptimizationRemarkEmitter &OREin)
  282. : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
  283. ~CHR() {
  284. for (CHRScope *Scope : Scopes) {
  285. delete Scope;
  286. }
  287. }
  288. bool run();
  289. private:
  290. // See the comments in CHR::run() for the high level flow of the algorithm and
  291. // what the following functions do.
  292. void findScopes(SmallVectorImpl<CHRScope *> &Output) {
  293. Region *R = RI.getTopLevelRegion();
  294. if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
  295. Output.push_back(Scope);
  296. }
  297. }
  298. CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
  299. SmallVectorImpl<CHRScope *> &Scopes);
  300. CHRScope *findScope(Region *R);
  301. void checkScopeHoistable(CHRScope *Scope);
  302. void splitScopes(SmallVectorImpl<CHRScope *> &Input,
  303. SmallVectorImpl<CHRScope *> &Output);
  304. SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
  305. CHRScope *Outer,
  306. DenseSet<Value *> *OuterConditionValues,
  307. Instruction *OuterInsertPoint,
  308. SmallVectorImpl<CHRScope *> &Output,
  309. DenseSet<Instruction *> &Unhoistables);
  310. void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
  311. void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
  312. void filterScopes(SmallVectorImpl<CHRScope *> &Input,
  313. SmallVectorImpl<CHRScope *> &Output);
  314. void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
  315. SmallVectorImpl<CHRScope *> &Output);
  316. void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
  317. void sortScopes(SmallVectorImpl<CHRScope *> &Input,
  318. SmallVectorImpl<CHRScope *> &Output);
  319. void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
  320. void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
  321. void cloneScopeBlocks(CHRScope *Scope,
  322. BasicBlock *PreEntryBlock,
  323. BasicBlock *ExitBlock,
  324. Region *LastRegion,
  325. ValueToValueMapTy &VMap);
  326. BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
  327. BasicBlock *EntryBlock,
  328. BasicBlock *NewEntryBlock,
  329. ValueToValueMapTy &VMap);
  330. void fixupBranchesAndSelects(CHRScope *Scope,
  331. BasicBlock *PreEntryBlock,
  332. BranchInst *MergedBR,
  333. uint64_t ProfileCount);
  334. void fixupBranch(Region *R,
  335. CHRScope *Scope,
  336. IRBuilder<> &IRB,
  337. Value *&MergedCondition, BranchProbability &CHRBranchBias);
  338. void fixupSelect(SelectInst* SI,
  339. CHRScope *Scope,
  340. IRBuilder<> &IRB,
  341. Value *&MergedCondition, BranchProbability &CHRBranchBias);
  342. void addToMergedCondition(bool IsTrueBiased, Value *Cond,
  343. Instruction *BranchOrSelect,
  344. CHRScope *Scope,
  345. IRBuilder<> &IRB,
  346. Value *&MergedCondition);
  347. Function &F;
  348. BlockFrequencyInfo &BFI;
  349. DominatorTree &DT;
  350. ProfileSummaryInfo &PSI;
  351. RegionInfo &RI;
  352. OptimizationRemarkEmitter &ORE;
  353. CHRStats Stats;
  354. // All the true-biased regions in the function
  355. DenseSet<Region *> TrueBiasedRegionsGlobal;
  356. // All the false-biased regions in the function
  357. DenseSet<Region *> FalseBiasedRegionsGlobal;
  358. // All the true-biased selects in the function
  359. DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
  360. // All the false-biased selects in the function
  361. DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
  362. // A map from biased regions to their branch bias
  363. DenseMap<Region *, BranchProbability> BranchBiasMap;
  364. // A map from biased selects to their branch bias
  365. DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
  366. // All the scopes.
  367. DenseSet<CHRScope *> Scopes;
  368. };
  369. } // end anonymous namespace
  370. static inline
  371. raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
  372. const CHRStats &Stats) {
  373. Stats.print(OS);
  374. return OS;
  375. }
  376. static inline
  377. raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
  378. Scope.print(OS);
  379. return OS;
  380. }
  381. static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
  382. if (ForceCHR)
  383. return true;
  384. if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
  385. if (CHRModules.count(F.getParent()->getName()))
  386. return true;
  387. return CHRFunctions.count(F.getName());
  388. }
  389. assert(PSI.hasProfileSummary() && "Empty PSI?");
  390. return PSI.isFunctionEntryHot(&F);
  391. }
  392. static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
  393. CHRStats *Stats) {
  394. StringRef FuncName = F.getName();
  395. StringRef ModuleName = F.getParent()->getName();
  396. (void)(FuncName); // Unused in release build.
  397. (void)(ModuleName); // Unused in release build.
  398. CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
  399. << FuncName);
  400. if (Stats)
  401. CHR_DEBUG(dbgs() << " " << *Stats);
  402. CHR_DEBUG(dbgs() << "\n");
  403. CHR_DEBUG(F.dump());
  404. }
  405. void CHRScope::print(raw_ostream &OS) const {
  406. assert(RegInfos.size() > 0 && "Empty CHRScope");
  407. OS << "CHRScope[";
  408. OS << RegInfos.size() << ", Regions[";
  409. for (const RegInfo &RI : RegInfos) {
  410. OS << RI.R->getNameStr();
  411. if (RI.HasBranch)
  412. OS << " B";
  413. if (RI.Selects.size() > 0)
  414. OS << " S" << RI.Selects.size();
  415. OS << ", ";
  416. }
  417. if (RegInfos[0].R->getParent()) {
  418. OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
  419. } else {
  420. // top level region
  421. OS << "]";
  422. }
  423. OS << ", Subs[";
  424. for (CHRScope *Sub : Subs) {
  425. OS << *Sub << ", ";
  426. }
  427. OS << "]]";
  428. }
  429. // Return true if the given instruction type can be hoisted by CHR.
  430. static bool isHoistableInstructionType(Instruction *I) {
  431. return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
  432. isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
  433. isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
  434. isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
  435. isa<InsertValueInst>(I);
  436. }
  437. // Return true if the given instruction can be hoisted by CHR.
  438. static bool isHoistable(Instruction *I, DominatorTree &DT) {
  439. if (!isHoistableInstructionType(I))
  440. return false;
  441. return isSafeToSpeculativelyExecute(I, nullptr, &DT);
  442. }
  443. // Recursively traverse the use-def chains of the given value and return a set
  444. // of the unhoistable base values defined within the scope (excluding the
  445. // first-region entry block) or the (hoistable or unhoistable) base values that
  446. // are defined outside (including the first-region entry block) of the
  447. // scope. The returned set doesn't include constants.
  448. static const std::set<Value *> &
  449. getBaseValues(Value *V, DominatorTree &DT,
  450. DenseMap<Value *, std::set<Value *>> &Visited) {
  451. auto It = Visited.find(V);
  452. if (It != Visited.end()) {
  453. return It->second;
  454. }
  455. std::set<Value *> Result;
  456. if (auto *I = dyn_cast<Instruction>(V)) {
  457. // We don't stop at a block that's not in the Scope because we would miss
  458. // some instructions that are based on the same base values if we stop
  459. // there.
  460. if (!isHoistable(I, DT)) {
  461. Result.insert(I);
  462. return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
  463. }
  464. // I is hoistable above the Scope.
  465. for (Value *Op : I->operands()) {
  466. const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
  467. Result.insert(OpResult.begin(), OpResult.end());
  468. }
  469. return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
  470. }
  471. if (isa<Argument>(V)) {
  472. Result.insert(V);
  473. }
  474. // We don't include others like constants because those won't lead to any
  475. // chance of folding of conditions (eg two bit checks merged into one check)
  476. // after CHR.
  477. return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
  478. }
  479. // Return true if V is already hoisted or can be hoisted (along with its
  480. // operands) above the insert point. When it returns true and HoistStops is
  481. // non-null, the instructions to stop hoisting at through the use-def chains are
  482. // inserted into HoistStops.
  483. static bool
  484. checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
  485. DenseSet<Instruction *> &Unhoistables,
  486. DenseSet<Instruction *> *HoistStops,
  487. DenseMap<Instruction *, bool> &Visited) {
  488. assert(InsertPoint && "Null InsertPoint");
  489. if (auto *I = dyn_cast<Instruction>(V)) {
  490. auto It = Visited.find(I);
  491. if (It != Visited.end()) {
  492. return It->second;
  493. }
  494. assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
  495. assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
  496. if (Unhoistables.count(I)) {
  497. // Don't hoist if they are not to be hoisted.
  498. Visited[I] = false;
  499. return false;
  500. }
  501. if (DT.dominates(I, InsertPoint)) {
  502. // We are already above the insert point. Stop here.
  503. if (HoistStops)
  504. HoistStops->insert(I);
  505. Visited[I] = true;
  506. return true;
  507. }
  508. // We aren't not above the insert point, check if we can hoist it above the
  509. // insert point.
  510. if (isHoistable(I, DT)) {
  511. // Check operands first.
  512. DenseSet<Instruction *> OpsHoistStops;
  513. bool AllOpsHoisted = true;
  514. for (Value *Op : I->operands()) {
  515. if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
  516. Visited)) {
  517. AllOpsHoisted = false;
  518. break;
  519. }
  520. }
  521. if (AllOpsHoisted) {
  522. CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
  523. if (HoistStops)
  524. HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
  525. Visited[I] = true;
  526. return true;
  527. }
  528. }
  529. Visited[I] = false;
  530. return false;
  531. }
  532. // Non-instructions are considered hoistable.
  533. return true;
  534. }
  535. // Returns true and sets the true probability and false probability of an
  536. // MD_prof metadata if it's well-formed.
  537. static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb,
  538. BranchProbability &FalseProb) {
  539. if (!MD) return false;
  540. MDString *MDName = cast<MDString>(MD->getOperand(0));
  541. if (MDName->getString() != "branch_weights" ||
  542. MD->getNumOperands() != 3)
  543. return false;
  544. ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
  545. ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
  546. if (!TrueWeight || !FalseWeight)
  547. return false;
  548. uint64_t TrueWt = TrueWeight->getValue().getZExtValue();
  549. uint64_t FalseWt = FalseWeight->getValue().getZExtValue();
  550. uint64_t SumWt = TrueWt + FalseWt;
  551. assert(SumWt >= TrueWt && SumWt >= FalseWt &&
  552. "Overflow calculating branch probabilities.");
  553. // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
  554. if (SumWt == 0)
  555. return false;
  556. TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
  557. FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
  558. return true;
  559. }
  560. static BranchProbability getCHRBiasThreshold() {
  561. return BranchProbability::getBranchProbability(
  562. static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
  563. }
  564. // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
  565. // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
  566. // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
  567. // false.
  568. template <typename K, typename S, typename M>
  569. static bool checkBias(K *Key, BranchProbability TrueProb,
  570. BranchProbability FalseProb, S &TrueSet, S &FalseSet,
  571. M &BiasMap) {
  572. BranchProbability Threshold = getCHRBiasThreshold();
  573. if (TrueProb >= Threshold) {
  574. TrueSet.insert(Key);
  575. BiasMap[Key] = TrueProb;
  576. return true;
  577. } else if (FalseProb >= Threshold) {
  578. FalseSet.insert(Key);
  579. BiasMap[Key] = FalseProb;
  580. return true;
  581. }
  582. return false;
  583. }
  584. // Returns true and insert a region into the right biased set and the map if the
  585. // branch of the region is biased.
  586. static bool checkBiasedBranch(BranchInst *BI, Region *R,
  587. DenseSet<Region *> &TrueBiasedRegionsGlobal,
  588. DenseSet<Region *> &FalseBiasedRegionsGlobal,
  589. DenseMap<Region *, BranchProbability> &BranchBiasMap) {
  590. if (!BI->isConditional())
  591. return false;
  592. BranchProbability ThenProb, ElseProb;
  593. if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof),
  594. ThenProb, ElseProb))
  595. return false;
  596. BasicBlock *IfThen = BI->getSuccessor(0);
  597. BasicBlock *IfElse = BI->getSuccessor(1);
  598. assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
  599. IfThen != IfElse &&
  600. "Invariant from findScopes");
  601. if (IfThen == R->getExit()) {
  602. // Swap them so that IfThen/ThenProb means going into the conditional code
  603. // and IfElse/ElseProb means skipping it.
  604. std::swap(IfThen, IfElse);
  605. std::swap(ThenProb, ElseProb);
  606. }
  607. CHR_DEBUG(dbgs() << "BI " << *BI << " ");
  608. CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
  609. CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
  610. return checkBias(R, ThenProb, ElseProb,
  611. TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
  612. BranchBiasMap);
  613. }
  614. // Returns true and insert a select into the right biased set and the map if the
  615. // select is biased.
  616. static bool checkBiasedSelect(
  617. SelectInst *SI, Region *R,
  618. DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
  619. DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
  620. DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
  621. BranchProbability TrueProb, FalseProb;
  622. if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof),
  623. TrueProb, FalseProb))
  624. return false;
  625. CHR_DEBUG(dbgs() << "SI " << *SI << " ");
  626. CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
  627. CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
  628. return checkBias(SI, TrueProb, FalseProb,
  629. TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
  630. SelectBiasMap);
  631. }
  632. // Returns the instruction at which to hoist the dependent condition values and
  633. // insert the CHR branch for a region. This is the terminator branch in the
  634. // entry block or the first select in the entry block, if any.
  635. static Instruction* getBranchInsertPoint(RegInfo &RI) {
  636. Region *R = RI.R;
  637. BasicBlock *EntryBB = R->getEntry();
  638. // The hoist point is by default the terminator of the entry block, which is
  639. // the same as the branch instruction if RI.HasBranch is true.
  640. Instruction *HoistPoint = EntryBB->getTerminator();
  641. for (SelectInst *SI : RI.Selects) {
  642. if (SI->getParent() == EntryBB) {
  643. // Pick the first select in Selects in the entry block. Note Selects is
  644. // sorted in the instruction order within a block (asserted below).
  645. HoistPoint = SI;
  646. break;
  647. }
  648. }
  649. assert(HoistPoint && "Null HoistPoint");
  650. #ifndef NDEBUG
  651. // Check that HoistPoint is the first one in Selects in the entry block,
  652. // if any.
  653. DenseSet<Instruction *> EntryBlockSelectSet;
  654. for (SelectInst *SI : RI.Selects) {
  655. if (SI->getParent() == EntryBB) {
  656. EntryBlockSelectSet.insert(SI);
  657. }
  658. }
  659. for (Instruction &I : *EntryBB) {
  660. if (EntryBlockSelectSet.contains(&I)) {
  661. assert(&I == HoistPoint &&
  662. "HoistPoint must be the first one in Selects");
  663. break;
  664. }
  665. }
  666. #endif
  667. return HoistPoint;
  668. }
  669. // Find a CHR scope in the given region.
  670. CHRScope * CHR::findScope(Region *R) {
  671. CHRScope *Result = nullptr;
  672. BasicBlock *Entry = R->getEntry();
  673. BasicBlock *Exit = R->getExit(); // null if top level.
  674. assert(Entry && "Entry must not be null");
  675. assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
  676. "Only top level region has a null exit");
  677. if (Entry)
  678. CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
  679. else
  680. CHR_DEBUG(dbgs() << "Entry null\n");
  681. if (Exit)
  682. CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
  683. else
  684. CHR_DEBUG(dbgs() << "Exit null\n");
  685. // Exclude cases where Entry is part of a subregion (hence it doesn't belong
  686. // to this region).
  687. bool EntryInSubregion = RI.getRegionFor(Entry) != R;
  688. if (EntryInSubregion)
  689. return nullptr;
  690. // Exclude loops
  691. for (BasicBlock *Pred : predecessors(Entry))
  692. if (R->contains(Pred))
  693. return nullptr;
  694. // If any of the basic blocks have address taken, we must skip this region
  695. // because we cannot clone basic blocks that have address taken.
  696. for (BasicBlock *BB : R->blocks())
  697. if (BB->hasAddressTaken())
  698. return nullptr;
  699. if (Exit) {
  700. // Try to find an if-then block (check if R is an if-then).
  701. // if (cond) {
  702. // ...
  703. // }
  704. auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
  705. if (BI)
  706. CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
  707. else
  708. CHR_DEBUG(dbgs() << "BI null\n");
  709. if (BI && BI->isConditional()) {
  710. BasicBlock *S0 = BI->getSuccessor(0);
  711. BasicBlock *S1 = BI->getSuccessor(1);
  712. CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
  713. CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
  714. if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
  715. RegInfo RI(R);
  716. RI.HasBranch = checkBiasedBranch(
  717. BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
  718. BranchBiasMap);
  719. Result = new CHRScope(RI);
  720. Scopes.insert(Result);
  721. CHR_DEBUG(dbgs() << "Found a region with a branch\n");
  722. ++Stats.NumBranches;
  723. if (!RI.HasBranch) {
  724. ORE.emit([&]() {
  725. return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
  726. << "Branch not biased";
  727. });
  728. }
  729. }
  730. }
  731. }
  732. {
  733. // Try to look for selects in the direct child blocks (as opposed to in
  734. // subregions) of R.
  735. // ...
  736. // if (..) { // Some subregion
  737. // ...
  738. // }
  739. // if (..) { // Some subregion
  740. // ...
  741. // }
  742. // ...
  743. // a = cond ? b : c;
  744. // ...
  745. SmallVector<SelectInst *, 8> Selects;
  746. for (RegionNode *E : R->elements()) {
  747. if (E->isSubRegion())
  748. continue;
  749. // This returns the basic block of E if E is a direct child of R (not a
  750. // subregion.)
  751. BasicBlock *BB = E->getEntry();
  752. // Need to push in the order to make it easier to find the first Select
  753. // later.
  754. for (Instruction &I : *BB) {
  755. if (auto *SI = dyn_cast<SelectInst>(&I)) {
  756. Selects.push_back(SI);
  757. ++Stats.NumBranches;
  758. }
  759. }
  760. }
  761. if (Selects.size() > 0) {
  762. auto AddSelects = [&](RegInfo &RI) {
  763. for (auto *SI : Selects)
  764. if (checkBiasedSelect(SI, RI.R,
  765. TrueBiasedSelectsGlobal,
  766. FalseBiasedSelectsGlobal,
  767. SelectBiasMap))
  768. RI.Selects.push_back(SI);
  769. else
  770. ORE.emit([&]() {
  771. return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
  772. << "Select not biased";
  773. });
  774. };
  775. if (!Result) {
  776. CHR_DEBUG(dbgs() << "Found a select-only region\n");
  777. RegInfo RI(R);
  778. AddSelects(RI);
  779. Result = new CHRScope(RI);
  780. Scopes.insert(Result);
  781. } else {
  782. CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
  783. AddSelects(Result->RegInfos[0]);
  784. }
  785. }
  786. }
  787. if (Result) {
  788. checkScopeHoistable(Result);
  789. }
  790. return Result;
  791. }
  792. // Check that any of the branch and the selects in the region could be
  793. // hoisted above the the CHR branch insert point (the most dominating of
  794. // them, either the branch (at the end of the first block) or the first
  795. // select in the first block). If the branch can't be hoisted, drop the
  796. // selects in the first blocks.
  797. //
  798. // For example, for the following scope/region with selects, we want to insert
  799. // the merged branch right before the first select in the first/entry block by
  800. // hoisting c1, c2, c3, and c4.
  801. //
  802. // // Branch insert point here.
  803. // a = c1 ? b : c; // Select 1
  804. // d = c2 ? e : f; // Select 2
  805. // if (c3) { // Branch
  806. // ...
  807. // c4 = foo() // A call.
  808. // g = c4 ? h : i; // Select 3
  809. // }
  810. //
  811. // But suppose we can't hoist c4 because it's dependent on the preceding
  812. // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
  813. // Select 2. If we can't hoist c3, we drop Selects 1 & 2.
  814. void CHR::checkScopeHoistable(CHRScope *Scope) {
  815. RegInfo &RI = Scope->RegInfos[0];
  816. Region *R = RI.R;
  817. BasicBlock *EntryBB = R->getEntry();
  818. auto *Branch = RI.HasBranch ?
  819. cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
  820. SmallVector<SelectInst *, 8> &Selects = RI.Selects;
  821. if (RI.HasBranch || !Selects.empty()) {
  822. Instruction *InsertPoint = getBranchInsertPoint(RI);
  823. CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
  824. // Avoid a data dependence from a select or a branch to a(nother)
  825. // select. Note no instruction can't data-depend on a branch (a branch
  826. // instruction doesn't produce a value).
  827. DenseSet<Instruction *> Unhoistables;
  828. // Initialize Unhoistables with the selects.
  829. for (SelectInst *SI : Selects) {
  830. Unhoistables.insert(SI);
  831. }
  832. // Remove Selects that can't be hoisted.
  833. for (auto it = Selects.begin(); it != Selects.end(); ) {
  834. SelectInst *SI = *it;
  835. if (SI == InsertPoint) {
  836. ++it;
  837. continue;
  838. }
  839. DenseMap<Instruction *, bool> Visited;
  840. bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
  841. DT, Unhoistables, nullptr, Visited);
  842. if (!IsHoistable) {
  843. CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
  844. ORE.emit([&]() {
  845. return OptimizationRemarkMissed(DEBUG_TYPE,
  846. "DropUnhoistableSelect", SI)
  847. << "Dropped unhoistable select";
  848. });
  849. it = Selects.erase(it);
  850. // Since we are dropping the select here, we also drop it from
  851. // Unhoistables.
  852. Unhoistables.erase(SI);
  853. } else
  854. ++it;
  855. }
  856. // Update InsertPoint after potentially removing selects.
  857. InsertPoint = getBranchInsertPoint(RI);
  858. CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
  859. if (RI.HasBranch && InsertPoint != Branch) {
  860. DenseMap<Instruction *, bool> Visited;
  861. bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
  862. DT, Unhoistables, nullptr, Visited);
  863. if (!IsHoistable) {
  864. // If the branch isn't hoistable, drop the selects in the entry
  865. // block, preferring the branch, which makes the branch the hoist
  866. // point.
  867. assert(InsertPoint != Branch && "Branch must not be the hoist point");
  868. CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
  869. CHR_DEBUG(
  870. for (SelectInst *SI : Selects) {
  871. dbgs() << "SI " << *SI << "\n";
  872. });
  873. for (SelectInst *SI : Selects) {
  874. ORE.emit([&]() {
  875. return OptimizationRemarkMissed(DEBUG_TYPE,
  876. "DropSelectUnhoistableBranch", SI)
  877. << "Dropped select due to unhoistable branch";
  878. });
  879. }
  880. llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
  881. return SI->getParent() == EntryBB;
  882. });
  883. Unhoistables.clear();
  884. InsertPoint = Branch;
  885. }
  886. }
  887. CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
  888. #ifndef NDEBUG
  889. if (RI.HasBranch) {
  890. assert(!DT.dominates(Branch, InsertPoint) &&
  891. "Branch can't be already above the hoist point");
  892. DenseMap<Instruction *, bool> Visited;
  893. assert(checkHoistValue(Branch->getCondition(), InsertPoint,
  894. DT, Unhoistables, nullptr, Visited) &&
  895. "checkHoistValue for branch");
  896. }
  897. for (auto *SI : Selects) {
  898. assert(!DT.dominates(SI, InsertPoint) &&
  899. "SI can't be already above the hoist point");
  900. DenseMap<Instruction *, bool> Visited;
  901. assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
  902. Unhoistables, nullptr, Visited) &&
  903. "checkHoistValue for selects");
  904. }
  905. CHR_DEBUG(dbgs() << "Result\n");
  906. if (RI.HasBranch) {
  907. CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
  908. }
  909. for (auto *SI : Selects) {
  910. CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
  911. }
  912. #endif
  913. }
  914. }
  915. // Traverse the region tree, find all nested scopes and merge them if possible.
  916. CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
  917. SmallVectorImpl<CHRScope *> &Scopes) {
  918. CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
  919. CHRScope *Result = findScope(R);
  920. // Visit subscopes.
  921. CHRScope *ConsecutiveSubscope = nullptr;
  922. SmallVector<CHRScope *, 8> Subscopes;
  923. for (auto It = R->begin(); It != R->end(); ++It) {
  924. const std::unique_ptr<Region> &SubR = *It;
  925. auto NextIt = std::next(It);
  926. Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
  927. CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
  928. << "\n");
  929. CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
  930. if (SubCHRScope) {
  931. CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
  932. } else {
  933. CHR_DEBUG(dbgs() << "Subregion Scope null\n");
  934. }
  935. if (SubCHRScope) {
  936. if (!ConsecutiveSubscope)
  937. ConsecutiveSubscope = SubCHRScope;
  938. else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
  939. Subscopes.push_back(ConsecutiveSubscope);
  940. ConsecutiveSubscope = SubCHRScope;
  941. } else
  942. ConsecutiveSubscope->append(SubCHRScope);
  943. } else {
  944. if (ConsecutiveSubscope) {
  945. Subscopes.push_back(ConsecutiveSubscope);
  946. }
  947. ConsecutiveSubscope = nullptr;
  948. }
  949. }
  950. if (ConsecutiveSubscope) {
  951. Subscopes.push_back(ConsecutiveSubscope);
  952. }
  953. for (CHRScope *Sub : Subscopes) {
  954. if (Result) {
  955. // Combine it with the parent.
  956. Result->addSub(Sub);
  957. } else {
  958. // Push Subscopes as they won't be combined with the parent.
  959. Scopes.push_back(Sub);
  960. }
  961. }
  962. return Result;
  963. }
  964. static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
  965. DenseSet<Value *> ConditionValues;
  966. if (RI.HasBranch) {
  967. auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
  968. ConditionValues.insert(BI->getCondition());
  969. }
  970. for (SelectInst *SI : RI.Selects) {
  971. ConditionValues.insert(SI->getCondition());
  972. }
  973. return ConditionValues;
  974. }
  975. // Determine whether to split a scope depending on the sets of the branch
  976. // condition values of the previous region and the current region. We split
  977. // (return true) it if 1) the condition values of the inner/lower scope can't be
  978. // hoisted up to the outer/upper scope, or 2) the two sets of the condition
  979. // values have an empty intersection (because the combined branch conditions
  980. // won't probably lead to a simpler combined condition).
  981. static bool shouldSplit(Instruction *InsertPoint,
  982. DenseSet<Value *> &PrevConditionValues,
  983. DenseSet<Value *> &ConditionValues,
  984. DominatorTree &DT,
  985. DenseSet<Instruction *> &Unhoistables) {
  986. assert(InsertPoint && "Null InsertPoint");
  987. CHR_DEBUG(
  988. dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
  989. for (Value *V : PrevConditionValues) {
  990. dbgs() << *V << ", ";
  991. }
  992. dbgs() << " ConditionValues ";
  993. for (Value *V : ConditionValues) {
  994. dbgs() << *V << ", ";
  995. }
  996. dbgs() << "\n");
  997. // If any of Bases isn't hoistable to the hoist point, split.
  998. for (Value *V : ConditionValues) {
  999. DenseMap<Instruction *, bool> Visited;
  1000. if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
  1001. CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
  1002. return true; // Not hoistable, split.
  1003. }
  1004. }
  1005. // If PrevConditionValues or ConditionValues is empty, don't split to avoid
  1006. // unnecessary splits at scopes with no branch/selects. If
  1007. // PrevConditionValues and ConditionValues don't intersect at all, split.
  1008. if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
  1009. // Use std::set as DenseSet doesn't work with set_intersection.
  1010. std::set<Value *> PrevBases, Bases;
  1011. DenseMap<Value *, std::set<Value *>> Visited;
  1012. for (Value *V : PrevConditionValues) {
  1013. const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
  1014. PrevBases.insert(BaseValues.begin(), BaseValues.end());
  1015. }
  1016. for (Value *V : ConditionValues) {
  1017. const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
  1018. Bases.insert(BaseValues.begin(), BaseValues.end());
  1019. }
  1020. CHR_DEBUG(
  1021. dbgs() << "PrevBases ";
  1022. for (Value *V : PrevBases) {
  1023. dbgs() << *V << ", ";
  1024. }
  1025. dbgs() << " Bases ";
  1026. for (Value *V : Bases) {
  1027. dbgs() << *V << ", ";
  1028. }
  1029. dbgs() << "\n");
  1030. std::vector<Value *> Intersection;
  1031. std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
  1032. Bases.end(), std::back_inserter(Intersection));
  1033. if (Intersection.empty()) {
  1034. // Empty intersection, split.
  1035. CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
  1036. return true;
  1037. }
  1038. }
  1039. CHR_DEBUG(dbgs() << "No split\n");
  1040. return false; // Don't split.
  1041. }
  1042. static void getSelectsInScope(CHRScope *Scope,
  1043. DenseSet<Instruction *> &Output) {
  1044. for (RegInfo &RI : Scope->RegInfos)
  1045. for (SelectInst *SI : RI.Selects)
  1046. Output.insert(SI);
  1047. for (CHRScope *Sub : Scope->Subs)
  1048. getSelectsInScope(Sub, Output);
  1049. }
  1050. void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
  1051. SmallVectorImpl<CHRScope *> &Output) {
  1052. for (CHRScope *Scope : Input) {
  1053. assert(!Scope->BranchInsertPoint &&
  1054. "BranchInsertPoint must not be set");
  1055. DenseSet<Instruction *> Unhoistables;
  1056. getSelectsInScope(Scope, Unhoistables);
  1057. splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
  1058. }
  1059. #ifndef NDEBUG
  1060. for (CHRScope *Scope : Output) {
  1061. assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
  1062. }
  1063. #endif
  1064. }
  1065. SmallVector<CHRScope *, 8> CHR::splitScope(
  1066. CHRScope *Scope,
  1067. CHRScope *Outer,
  1068. DenseSet<Value *> *OuterConditionValues,
  1069. Instruction *OuterInsertPoint,
  1070. SmallVectorImpl<CHRScope *> &Output,
  1071. DenseSet<Instruction *> &Unhoistables) {
  1072. if (Outer) {
  1073. assert(OuterConditionValues && "Null OuterConditionValues");
  1074. assert(OuterInsertPoint && "Null OuterInsertPoint");
  1075. }
  1076. bool PrevSplitFromOuter = true;
  1077. DenseSet<Value *> PrevConditionValues;
  1078. Instruction *PrevInsertPoint = nullptr;
  1079. SmallVector<CHRScope *, 8> Splits;
  1080. SmallVector<bool, 8> SplitsSplitFromOuter;
  1081. SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
  1082. SmallVector<Instruction *, 8> SplitsInsertPoints;
  1083. SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
  1084. for (RegInfo &RI : RegInfos) {
  1085. Instruction *InsertPoint = getBranchInsertPoint(RI);
  1086. DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
  1087. CHR_DEBUG(
  1088. dbgs() << "ConditionValues ";
  1089. for (Value *V : ConditionValues) {
  1090. dbgs() << *V << ", ";
  1091. }
  1092. dbgs() << "\n");
  1093. if (RI.R == RegInfos[0].R) {
  1094. // First iteration. Check to see if we should split from the outer.
  1095. if (Outer) {
  1096. CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
  1097. CHR_DEBUG(dbgs() << "Should split from outer at "
  1098. << RI.R->getNameStr() << "\n");
  1099. if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
  1100. ConditionValues, DT, Unhoistables)) {
  1101. PrevConditionValues = ConditionValues;
  1102. PrevInsertPoint = InsertPoint;
  1103. ORE.emit([&]() {
  1104. return OptimizationRemarkMissed(DEBUG_TYPE,
  1105. "SplitScopeFromOuter",
  1106. RI.R->getEntry()->getTerminator())
  1107. << "Split scope from outer due to unhoistable branch/select "
  1108. << "and/or lack of common condition values";
  1109. });
  1110. } else {
  1111. // Not splitting from the outer. Use the outer bases and insert
  1112. // point. Union the bases.
  1113. PrevSplitFromOuter = false;
  1114. PrevConditionValues = *OuterConditionValues;
  1115. PrevConditionValues.insert(ConditionValues.begin(),
  1116. ConditionValues.end());
  1117. PrevInsertPoint = OuterInsertPoint;
  1118. }
  1119. } else {
  1120. CHR_DEBUG(dbgs() << "Outer null\n");
  1121. PrevConditionValues = ConditionValues;
  1122. PrevInsertPoint = InsertPoint;
  1123. }
  1124. } else {
  1125. CHR_DEBUG(dbgs() << "Should split from prev at "
  1126. << RI.R->getNameStr() << "\n");
  1127. if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
  1128. DT, Unhoistables)) {
  1129. CHRScope *Tail = Scope->split(RI.R);
  1130. Scopes.insert(Tail);
  1131. Splits.push_back(Scope);
  1132. SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
  1133. SplitsConditionValues.push_back(PrevConditionValues);
  1134. SplitsInsertPoints.push_back(PrevInsertPoint);
  1135. Scope = Tail;
  1136. PrevConditionValues = ConditionValues;
  1137. PrevInsertPoint = InsertPoint;
  1138. PrevSplitFromOuter = true;
  1139. ORE.emit([&]() {
  1140. return OptimizationRemarkMissed(DEBUG_TYPE,
  1141. "SplitScopeFromPrev",
  1142. RI.R->getEntry()->getTerminator())
  1143. << "Split scope from previous due to unhoistable branch/select "
  1144. << "and/or lack of common condition values";
  1145. });
  1146. } else {
  1147. // Not splitting. Union the bases. Keep the hoist point.
  1148. PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
  1149. }
  1150. }
  1151. }
  1152. Splits.push_back(Scope);
  1153. SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
  1154. SplitsConditionValues.push_back(PrevConditionValues);
  1155. assert(PrevInsertPoint && "Null PrevInsertPoint");
  1156. SplitsInsertPoints.push_back(PrevInsertPoint);
  1157. assert(Splits.size() == SplitsConditionValues.size() &&
  1158. Splits.size() == SplitsSplitFromOuter.size() &&
  1159. Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
  1160. for (size_t I = 0; I < Splits.size(); ++I) {
  1161. CHRScope *Split = Splits[I];
  1162. DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
  1163. Instruction *SplitInsertPoint = SplitsInsertPoints[I];
  1164. SmallVector<CHRScope *, 8> NewSubs;
  1165. DenseSet<Instruction *> SplitUnhoistables;
  1166. getSelectsInScope(Split, SplitUnhoistables);
  1167. for (CHRScope *Sub : Split->Subs) {
  1168. SmallVector<CHRScope *, 8> SubSplits = splitScope(
  1169. Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
  1170. SplitUnhoistables);
  1171. llvm::append_range(NewSubs, SubSplits);
  1172. }
  1173. Split->Subs = NewSubs;
  1174. }
  1175. SmallVector<CHRScope *, 8> Result;
  1176. for (size_t I = 0; I < Splits.size(); ++I) {
  1177. CHRScope *Split = Splits[I];
  1178. if (SplitsSplitFromOuter[I]) {
  1179. // Split from the outer.
  1180. Output.push_back(Split);
  1181. Split->BranchInsertPoint = SplitsInsertPoints[I];
  1182. CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
  1183. << "\n");
  1184. } else {
  1185. // Connected to the outer.
  1186. Result.push_back(Split);
  1187. }
  1188. }
  1189. if (!Outer)
  1190. assert(Result.empty() &&
  1191. "If no outer (top-level), must return no nested ones");
  1192. return Result;
  1193. }
  1194. void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
  1195. for (CHRScope *Scope : Scopes) {
  1196. assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
  1197. classifyBiasedScopes(Scope, Scope);
  1198. CHR_DEBUG(
  1199. dbgs() << "classifyBiasedScopes " << *Scope << "\n";
  1200. dbgs() << "TrueBiasedRegions ";
  1201. for (Region *R : Scope->TrueBiasedRegions) {
  1202. dbgs() << R->getNameStr() << ", ";
  1203. }
  1204. dbgs() << "\n";
  1205. dbgs() << "FalseBiasedRegions ";
  1206. for (Region *R : Scope->FalseBiasedRegions) {
  1207. dbgs() << R->getNameStr() << ", ";
  1208. }
  1209. dbgs() << "\n";
  1210. dbgs() << "TrueBiasedSelects ";
  1211. for (SelectInst *SI : Scope->TrueBiasedSelects) {
  1212. dbgs() << *SI << ", ";
  1213. }
  1214. dbgs() << "\n";
  1215. dbgs() << "FalseBiasedSelects ";
  1216. for (SelectInst *SI : Scope->FalseBiasedSelects) {
  1217. dbgs() << *SI << ", ";
  1218. }
  1219. dbgs() << "\n";);
  1220. }
  1221. }
  1222. void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
  1223. for (RegInfo &RI : Scope->RegInfos) {
  1224. if (RI.HasBranch) {
  1225. Region *R = RI.R;
  1226. if (TrueBiasedRegionsGlobal.contains(R))
  1227. OutermostScope->TrueBiasedRegions.insert(R);
  1228. else if (FalseBiasedRegionsGlobal.contains(R))
  1229. OutermostScope->FalseBiasedRegions.insert(R);
  1230. else
  1231. llvm_unreachable("Must be biased");
  1232. }
  1233. for (SelectInst *SI : RI.Selects) {
  1234. if (TrueBiasedSelectsGlobal.contains(SI))
  1235. OutermostScope->TrueBiasedSelects.insert(SI);
  1236. else if (FalseBiasedSelectsGlobal.contains(SI))
  1237. OutermostScope->FalseBiasedSelects.insert(SI);
  1238. else
  1239. llvm_unreachable("Must be biased");
  1240. }
  1241. }
  1242. for (CHRScope *Sub : Scope->Subs) {
  1243. classifyBiasedScopes(Sub, OutermostScope);
  1244. }
  1245. }
  1246. static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
  1247. unsigned NumBiased = Scope->TrueBiasedRegions.size() +
  1248. Scope->FalseBiasedRegions.size() +
  1249. Scope->TrueBiasedSelects.size() +
  1250. Scope->FalseBiasedSelects.size();
  1251. return NumBiased >= CHRMergeThreshold;
  1252. }
  1253. void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
  1254. SmallVectorImpl<CHRScope *> &Output) {
  1255. for (CHRScope *Scope : Input) {
  1256. // Filter out the ones with only one region and no subs.
  1257. if (!hasAtLeastTwoBiasedBranches(Scope)) {
  1258. CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
  1259. << Scope->TrueBiasedRegions.size()
  1260. << " falsy-regions " << Scope->FalseBiasedRegions.size()
  1261. << " true-selects " << Scope->TrueBiasedSelects.size()
  1262. << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
  1263. ORE.emit([&]() {
  1264. return OptimizationRemarkMissed(
  1265. DEBUG_TYPE,
  1266. "DropScopeWithOneBranchOrSelect",
  1267. Scope->RegInfos[0].R->getEntry()->getTerminator())
  1268. << "Drop scope with < "
  1269. << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
  1270. << " biased branch(es) or select(s)";
  1271. });
  1272. continue;
  1273. }
  1274. Output.push_back(Scope);
  1275. }
  1276. }
  1277. void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
  1278. SmallVectorImpl<CHRScope *> &Output) {
  1279. for (CHRScope *Scope : Input) {
  1280. assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
  1281. "Empty");
  1282. setCHRRegions(Scope, Scope);
  1283. Output.push_back(Scope);
  1284. CHR_DEBUG(
  1285. dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
  1286. for (auto pair : Scope->HoistStopMap) {
  1287. Region *R = pair.first;
  1288. dbgs() << "Region " << R->getNameStr() << "\n";
  1289. for (Instruction *I : pair.second) {
  1290. dbgs() << "HoistStop " << *I << "\n";
  1291. }
  1292. }
  1293. dbgs() << "CHRRegions" << "\n";
  1294. for (RegInfo &RI : Scope->CHRRegions) {
  1295. dbgs() << RI.R->getNameStr() << "\n";
  1296. });
  1297. }
  1298. }
  1299. void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
  1300. DenseSet<Instruction *> Unhoistables;
  1301. // Put the biased selects in Unhoistables because they should stay where they
  1302. // are and constant-folded after CHR (in case one biased select or a branch
  1303. // can depend on another biased select.)
  1304. for (RegInfo &RI : Scope->RegInfos) {
  1305. for (SelectInst *SI : RI.Selects) {
  1306. Unhoistables.insert(SI);
  1307. }
  1308. }
  1309. Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
  1310. for (RegInfo &RI : Scope->RegInfos) {
  1311. Region *R = RI.R;
  1312. DenseSet<Instruction *> HoistStops;
  1313. bool IsHoisted = false;
  1314. if (RI.HasBranch) {
  1315. assert((OutermostScope->TrueBiasedRegions.contains(R) ||
  1316. OutermostScope->FalseBiasedRegions.contains(R)) &&
  1317. "Must be truthy or falsy");
  1318. auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
  1319. // Note checkHoistValue fills in HoistStops.
  1320. DenseMap<Instruction *, bool> Visited;
  1321. bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
  1322. Unhoistables, &HoistStops, Visited);
  1323. assert(IsHoistable && "Must be hoistable");
  1324. (void)(IsHoistable); // Unused in release build
  1325. IsHoisted = true;
  1326. }
  1327. for (SelectInst *SI : RI.Selects) {
  1328. assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
  1329. OutermostScope->FalseBiasedSelects.contains(SI)) &&
  1330. "Must be true or false biased");
  1331. // Note checkHoistValue fills in HoistStops.
  1332. DenseMap<Instruction *, bool> Visited;
  1333. bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
  1334. Unhoistables, &HoistStops, Visited);
  1335. assert(IsHoistable && "Must be hoistable");
  1336. (void)(IsHoistable); // Unused in release build
  1337. IsHoisted = true;
  1338. }
  1339. if (IsHoisted) {
  1340. OutermostScope->CHRRegions.push_back(RI);
  1341. OutermostScope->HoistStopMap[R] = HoistStops;
  1342. }
  1343. }
  1344. for (CHRScope *Sub : Scope->Subs)
  1345. setCHRRegions(Sub, OutermostScope);
  1346. }
  1347. static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
  1348. return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
  1349. }
  1350. void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
  1351. SmallVectorImpl<CHRScope *> &Output) {
  1352. Output.resize(Input.size());
  1353. llvm::copy(Input, Output.begin());
  1354. llvm::stable_sort(Output, CHRScopeSorter);
  1355. }
  1356. // Return true if V is already hoisted or was hoisted (along with its operands)
  1357. // to the insert point.
  1358. static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
  1359. HoistStopMapTy &HoistStopMap,
  1360. DenseSet<Instruction *> &HoistedSet,
  1361. DenseSet<PHINode *> &TrivialPHIs,
  1362. DominatorTree &DT) {
  1363. auto IT = HoistStopMap.find(R);
  1364. assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
  1365. DenseSet<Instruction *> &HoistStops = IT->second;
  1366. if (auto *I = dyn_cast<Instruction>(V)) {
  1367. if (I == HoistPoint)
  1368. return;
  1369. if (HoistStops.count(I))
  1370. return;
  1371. if (auto *PN = dyn_cast<PHINode>(I))
  1372. if (TrivialPHIs.count(PN))
  1373. // The trivial phi inserted by the previous CHR scope could replace a
  1374. // non-phi in HoistStops. Note that since this phi is at the exit of a
  1375. // previous CHR scope, which dominates this scope, it's safe to stop
  1376. // hoisting there.
  1377. return;
  1378. if (HoistedSet.count(I))
  1379. // Already hoisted, return.
  1380. return;
  1381. assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
  1382. assert(DT.getNode(I->getParent()) && "DT must contain I's block");
  1383. assert(DT.getNode(HoistPoint->getParent()) &&
  1384. "DT must contain HoistPoint block");
  1385. if (DT.dominates(I, HoistPoint))
  1386. // We are already above the hoist point. Stop here. This may be necessary
  1387. // when multiple scopes would independently hoist the same
  1388. // instruction. Since an outer (dominating) scope would hoist it to its
  1389. // entry before an inner (dominated) scope would to its entry, the inner
  1390. // scope may see the instruction already hoisted, in which case it
  1391. // potentially wrong for the inner scope to hoist it and could cause bad
  1392. // IR (non-dominating def), but safe to skip hoisting it instead because
  1393. // it's already in a block that dominates the inner scope.
  1394. return;
  1395. for (Value *Op : I->operands()) {
  1396. hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
  1397. }
  1398. I->moveBefore(HoistPoint);
  1399. HoistedSet.insert(I);
  1400. CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
  1401. }
  1402. }
  1403. // Hoist the dependent condition values of the branches and the selects in the
  1404. // scope to the insert point.
  1405. static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
  1406. DenseSet<PHINode *> &TrivialPHIs,
  1407. DominatorTree &DT) {
  1408. DenseSet<Instruction *> HoistedSet;
  1409. for (const RegInfo &RI : Scope->CHRRegions) {
  1410. Region *R = RI.R;
  1411. bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
  1412. bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
  1413. if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
  1414. auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
  1415. hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
  1416. HoistedSet, TrivialPHIs, DT);
  1417. }
  1418. for (SelectInst *SI : RI.Selects) {
  1419. bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
  1420. bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
  1421. if (!(IsTrueBiased || IsFalseBiased))
  1422. continue;
  1423. hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
  1424. HoistedSet, TrivialPHIs, DT);
  1425. }
  1426. }
  1427. }
  1428. // Negate the predicate if an ICmp if it's used only by branches or selects by
  1429. // swapping the operands of the branches or the selects. Returns true if success.
  1430. static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
  1431. Instruction *ExcludedUser,
  1432. CHRScope *Scope) {
  1433. for (User *U : ICmp->users()) {
  1434. if (U == ExcludedUser)
  1435. continue;
  1436. if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
  1437. continue;
  1438. if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
  1439. continue;
  1440. return false;
  1441. }
  1442. for (User *U : ICmp->users()) {
  1443. if (U == ExcludedUser)
  1444. continue;
  1445. if (auto *BI = dyn_cast<BranchInst>(U)) {
  1446. assert(BI->isConditional() && "Must be conditional");
  1447. BI->swapSuccessors();
  1448. // Don't need to swap this in terms of
  1449. // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
  1450. // mean whehter the branch is likely go into the if-then rather than
  1451. // successor0/successor1 and because we can tell which edge is the then or
  1452. // the else one by comparing the destination to the region exit block.
  1453. continue;
  1454. }
  1455. if (auto *SI = dyn_cast<SelectInst>(U)) {
  1456. // Swap operands
  1457. SI->swapValues();
  1458. SI->swapProfMetadata();
  1459. if (Scope->TrueBiasedSelects.count(SI)) {
  1460. assert(!Scope->FalseBiasedSelects.contains(SI) &&
  1461. "Must not be already in");
  1462. Scope->FalseBiasedSelects.insert(SI);
  1463. } else if (Scope->FalseBiasedSelects.count(SI)) {
  1464. assert(!Scope->TrueBiasedSelects.contains(SI) &&
  1465. "Must not be already in");
  1466. Scope->TrueBiasedSelects.insert(SI);
  1467. }
  1468. continue;
  1469. }
  1470. llvm_unreachable("Must be a branch or a select");
  1471. }
  1472. ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
  1473. return true;
  1474. }
  1475. // A helper for transformScopes. Insert a trivial phi at the scope exit block
  1476. // for a value that's defined in the scope but used outside it (meaning it's
  1477. // alive at the exit block).
  1478. static void insertTrivialPHIs(CHRScope *Scope,
  1479. BasicBlock *EntryBlock, BasicBlock *ExitBlock,
  1480. DenseSet<PHINode *> &TrivialPHIs) {
  1481. SmallSetVector<BasicBlock *, 8> BlocksInScope;
  1482. for (RegInfo &RI : Scope->RegInfos) {
  1483. for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
  1484. // sub-Scopes.
  1485. BlocksInScope.insert(BB);
  1486. }
  1487. }
  1488. CHR_DEBUG({
  1489. dbgs() << "Inserting redundant phis\n";
  1490. for (BasicBlock *BB : BlocksInScope)
  1491. dbgs() << "BlockInScope " << BB->getName() << "\n";
  1492. });
  1493. for (BasicBlock *BB : BlocksInScope) {
  1494. for (Instruction &I : *BB) {
  1495. SmallVector<Instruction *, 8> Users;
  1496. for (User *U : I.users()) {
  1497. if (auto *UI = dyn_cast<Instruction>(U)) {
  1498. if (!BlocksInScope.contains(UI->getParent()) &&
  1499. // Unless there's already a phi for I at the exit block.
  1500. !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
  1501. CHR_DEBUG(dbgs() << "V " << I << "\n");
  1502. CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
  1503. Users.push_back(UI);
  1504. } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
  1505. // There's a loop backedge from a block that's dominated by this
  1506. // scope to the entry block.
  1507. CHR_DEBUG(dbgs() << "V " << I << "\n");
  1508. CHR_DEBUG(dbgs()
  1509. << "Used at entry block (for a back edge) by a phi user "
  1510. << *UI << "\n");
  1511. Users.push_back(UI);
  1512. }
  1513. }
  1514. }
  1515. if (Users.size() > 0) {
  1516. // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
  1517. // ExitBlock. Replace I with the new phi in UI unless UI is another
  1518. // phi at ExitBlock.
  1519. PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "",
  1520. &ExitBlock->front());
  1521. for (BasicBlock *Pred : predecessors(ExitBlock)) {
  1522. PN->addIncoming(&I, Pred);
  1523. }
  1524. TrivialPHIs.insert(PN);
  1525. CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
  1526. for (Instruction *UI : Users) {
  1527. for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
  1528. if (UI->getOperand(J) == &I) {
  1529. UI->setOperand(J, PN);
  1530. }
  1531. }
  1532. CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
  1533. }
  1534. }
  1535. }
  1536. }
  1537. }
  1538. // Assert that all the CHR regions of the scope have a biased branch or select.
  1539. static void LLVM_ATTRIBUTE_UNUSED
  1540. assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
  1541. #ifndef NDEBUG
  1542. auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
  1543. if (Scope->TrueBiasedRegions.count(RI.R) ||
  1544. Scope->FalseBiasedRegions.count(RI.R))
  1545. return true;
  1546. for (SelectInst *SI : RI.Selects)
  1547. if (Scope->TrueBiasedSelects.count(SI) ||
  1548. Scope->FalseBiasedSelects.count(SI))
  1549. return true;
  1550. return false;
  1551. };
  1552. for (RegInfo &RI : Scope->CHRRegions) {
  1553. assert(HasBiasedBranchOrSelect(RI, Scope) &&
  1554. "Must have biased branch or select");
  1555. }
  1556. #endif
  1557. }
  1558. // Assert that all the condition values of the biased branches and selects have
  1559. // been hoisted to the pre-entry block or outside of the scope.
  1560. static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
  1561. CHRScope *Scope, BasicBlock *PreEntryBlock) {
  1562. CHR_DEBUG(dbgs() << "Biased regions condition values \n");
  1563. for (RegInfo &RI : Scope->CHRRegions) {
  1564. Region *R = RI.R;
  1565. bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
  1566. bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
  1567. if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
  1568. auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
  1569. Value *V = BI->getCondition();
  1570. CHR_DEBUG(dbgs() << *V << "\n");
  1571. if (auto *I = dyn_cast<Instruction>(V)) {
  1572. (void)(I); // Unused in release build.
  1573. assert((I->getParent() == PreEntryBlock ||
  1574. !Scope->contains(I)) &&
  1575. "Must have been hoisted to PreEntryBlock or outside the scope");
  1576. }
  1577. }
  1578. for (SelectInst *SI : RI.Selects) {
  1579. bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
  1580. bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
  1581. if (!(IsTrueBiased || IsFalseBiased))
  1582. continue;
  1583. Value *V = SI->getCondition();
  1584. CHR_DEBUG(dbgs() << *V << "\n");
  1585. if (auto *I = dyn_cast<Instruction>(V)) {
  1586. (void)(I); // Unused in release build.
  1587. assert((I->getParent() == PreEntryBlock ||
  1588. !Scope->contains(I)) &&
  1589. "Must have been hoisted to PreEntryBlock or outside the scope");
  1590. }
  1591. }
  1592. }
  1593. }
  1594. void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
  1595. CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
  1596. assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
  1597. Region *FirstRegion = Scope->RegInfos[0].R;
  1598. BasicBlock *EntryBlock = FirstRegion->getEntry();
  1599. Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
  1600. BasicBlock *ExitBlock = LastRegion->getExit();
  1601. Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
  1602. if (ExitBlock) {
  1603. // Insert a trivial phi at the exit block (where the CHR hot path and the
  1604. // cold path merges) for a value that's defined in the scope but used
  1605. // outside it (meaning it's alive at the exit block). We will add the
  1606. // incoming values for the CHR cold paths to it below. Without this, we'd
  1607. // miss updating phi's for such values unless there happens to already be a
  1608. // phi for that value there.
  1609. insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
  1610. }
  1611. // Split the entry block of the first region. The new block becomes the new
  1612. // entry block of the first region. The old entry block becomes the block to
  1613. // insert the CHR branch into. Note DT gets updated. Since DT gets updated
  1614. // through the split, we update the entry of the first region after the split,
  1615. // and Region only points to the entry and the exit blocks, rather than
  1616. // keeping everything in a list or set, the blocks membership and the
  1617. // entry/exit blocks of the region are still valid after the split.
  1618. CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
  1619. << " at " << *Scope->BranchInsertPoint << "\n");
  1620. BasicBlock *NewEntryBlock =
  1621. SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
  1622. assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
  1623. "NewEntryBlock's only pred must be EntryBlock");
  1624. FirstRegion->replaceEntryRecursive(NewEntryBlock);
  1625. BasicBlock *PreEntryBlock = EntryBlock;
  1626. ValueToValueMapTy VMap;
  1627. // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
  1628. // hot path (originals) and a cold path (clones) and update the PHIs at the
  1629. // exit block.
  1630. cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
  1631. // Replace the old (placeholder) branch with the new (merged) conditional
  1632. // branch.
  1633. BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
  1634. NewEntryBlock, VMap);
  1635. #ifndef NDEBUG
  1636. assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
  1637. #endif
  1638. // Hoist the conditional values of the branches/selects.
  1639. hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
  1640. #ifndef NDEBUG
  1641. assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
  1642. #endif
  1643. // Create the combined branch condition and constant-fold the branches/selects
  1644. // in the hot path.
  1645. fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
  1646. ProfileCount.getValueOr(0));
  1647. }
  1648. // A helper for transformScopes. Clone the blocks in the scope (excluding the
  1649. // PreEntryBlock) to split into a hot path and a cold path and update the PHIs
  1650. // at the exit block.
  1651. void CHR::cloneScopeBlocks(CHRScope *Scope,
  1652. BasicBlock *PreEntryBlock,
  1653. BasicBlock *ExitBlock,
  1654. Region *LastRegion,
  1655. ValueToValueMapTy &VMap) {
  1656. // Clone all the blocks. The original blocks will be the hot-path
  1657. // CHR-optimized code and the cloned blocks will be the original unoptimized
  1658. // code. This is so that the block pointers from the
  1659. // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
  1660. // which CHR should apply to.
  1661. SmallVector<BasicBlock*, 8> NewBlocks;
  1662. for (RegInfo &RI : Scope->RegInfos)
  1663. for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
  1664. // sub-Scopes.
  1665. assert(BB != PreEntryBlock && "Don't copy the preetntry block");
  1666. BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
  1667. NewBlocks.push_back(NewBB);
  1668. VMap[BB] = NewBB;
  1669. }
  1670. // Place the cloned blocks right after the original blocks (right before the
  1671. // exit block of.)
  1672. if (ExitBlock)
  1673. F.getBasicBlockList().splice(ExitBlock->getIterator(),
  1674. F.getBasicBlockList(),
  1675. NewBlocks[0]->getIterator(), F.end());
  1676. // Update the cloned blocks/instructions to refer to themselves.
  1677. for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
  1678. for (Instruction &I : *NewBlocks[i])
  1679. RemapInstruction(&I, VMap,
  1680. RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
  1681. // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
  1682. // the top-level region but we don't need to add PHIs. The trivial PHIs
  1683. // inserted above will be updated here.
  1684. if (ExitBlock)
  1685. for (PHINode &PN : ExitBlock->phis())
  1686. for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
  1687. ++I) {
  1688. BasicBlock *Pred = PN.getIncomingBlock(I);
  1689. if (LastRegion->contains(Pred)) {
  1690. Value *V = PN.getIncomingValue(I);
  1691. auto It = VMap.find(V);
  1692. if (It != VMap.end()) V = It->second;
  1693. assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
  1694. PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
  1695. }
  1696. }
  1697. }
  1698. // A helper for transformScope. Replace the old (placeholder) branch with the
  1699. // new (merged) conditional branch.
  1700. BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
  1701. BasicBlock *EntryBlock,
  1702. BasicBlock *NewEntryBlock,
  1703. ValueToValueMapTy &VMap) {
  1704. BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
  1705. assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
  1706. "SplitBlock did not work correctly!");
  1707. assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
  1708. "NewEntryBlock's only pred must be EntryBlock");
  1709. assert(VMap.find(NewEntryBlock) != VMap.end() &&
  1710. "NewEntryBlock must have been copied");
  1711. OldBR->dropAllReferences();
  1712. OldBR->eraseFromParent();
  1713. // The true predicate is a placeholder. It will be replaced later in
  1714. // fixupBranchesAndSelects().
  1715. BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
  1716. cast<BasicBlock>(VMap[NewEntryBlock]),
  1717. ConstantInt::getTrue(F.getContext()));
  1718. PreEntryBlock->getInstList().push_back(NewBR);
  1719. assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
  1720. "NewEntryBlock's only pred must be EntryBlock");
  1721. return NewBR;
  1722. }
  1723. // A helper for transformScopes. Create the combined branch condition and
  1724. // constant-fold the branches/selects in the hot path.
  1725. void CHR::fixupBranchesAndSelects(CHRScope *Scope,
  1726. BasicBlock *PreEntryBlock,
  1727. BranchInst *MergedBR,
  1728. uint64_t ProfileCount) {
  1729. Value *MergedCondition = ConstantInt::getTrue(F.getContext());
  1730. BranchProbability CHRBranchBias(1, 1);
  1731. uint64_t NumCHRedBranches = 0;
  1732. IRBuilder<> IRB(PreEntryBlock->getTerminator());
  1733. for (RegInfo &RI : Scope->CHRRegions) {
  1734. Region *R = RI.R;
  1735. if (RI.HasBranch) {
  1736. fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
  1737. ++NumCHRedBranches;
  1738. }
  1739. for (SelectInst *SI : RI.Selects) {
  1740. fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
  1741. ++NumCHRedBranches;
  1742. }
  1743. }
  1744. Stats.NumBranchesDelta += NumCHRedBranches - 1;
  1745. Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
  1746. ORE.emit([&]() {
  1747. return OptimizationRemark(DEBUG_TYPE,
  1748. "CHR",
  1749. // Refer to the hot (original) path
  1750. MergedBR->getSuccessor(0)->getTerminator())
  1751. << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
  1752. << " branches or selects";
  1753. });
  1754. MergedBR->setCondition(MergedCondition);
  1755. uint32_t Weights[] = {
  1756. static_cast<uint32_t>(CHRBranchBias.scale(1000)),
  1757. static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
  1758. };
  1759. MDBuilder MDB(F.getContext());
  1760. MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
  1761. CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
  1762. << "\n");
  1763. }
  1764. // A helper for fixupBranchesAndSelects. Add to the combined branch condition
  1765. // and constant-fold a branch in the hot path.
  1766. void CHR::fixupBranch(Region *R, CHRScope *Scope,
  1767. IRBuilder<> &IRB,
  1768. Value *&MergedCondition,
  1769. BranchProbability &CHRBranchBias) {
  1770. bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
  1771. assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
  1772. "Must be truthy or falsy");
  1773. auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
  1774. assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
  1775. "Must be in the bias map");
  1776. BranchProbability Bias = BranchBiasMap[R];
  1777. assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
  1778. // Take the min.
  1779. if (CHRBranchBias > Bias)
  1780. CHRBranchBias = Bias;
  1781. BasicBlock *IfThen = BI->getSuccessor(1);
  1782. BasicBlock *IfElse = BI->getSuccessor(0);
  1783. BasicBlock *RegionExitBlock = R->getExit();
  1784. assert(RegionExitBlock && "Null ExitBlock");
  1785. assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
  1786. IfThen != IfElse && "Invariant from findScopes");
  1787. if (IfThen == RegionExitBlock) {
  1788. // Swap them so that IfThen means going into it and IfElse means skipping
  1789. // it.
  1790. std::swap(IfThen, IfElse);
  1791. }
  1792. CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
  1793. << " IfElse " << IfElse->getName() << "\n");
  1794. Value *Cond = BI->getCondition();
  1795. BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
  1796. bool ConditionTrue = HotTarget == BI->getSuccessor(0);
  1797. addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
  1798. MergedCondition);
  1799. // Constant-fold the branch at ClonedEntryBlock.
  1800. assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
  1801. "The successor shouldn't change");
  1802. Value *NewCondition = ConditionTrue ?
  1803. ConstantInt::getTrue(F.getContext()) :
  1804. ConstantInt::getFalse(F.getContext());
  1805. BI->setCondition(NewCondition);
  1806. }
  1807. // A helper for fixupBranchesAndSelects. Add to the combined branch condition
  1808. // and constant-fold a select in the hot path.
  1809. void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
  1810. IRBuilder<> &IRB,
  1811. Value *&MergedCondition,
  1812. BranchProbability &CHRBranchBias) {
  1813. bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
  1814. assert((IsTrueBiased ||
  1815. Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
  1816. assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
  1817. "Must be in the bias map");
  1818. BranchProbability Bias = SelectBiasMap[SI];
  1819. assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
  1820. // Take the min.
  1821. if (CHRBranchBias > Bias)
  1822. CHRBranchBias = Bias;
  1823. Value *Cond = SI->getCondition();
  1824. addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
  1825. MergedCondition);
  1826. Value *NewCondition = IsTrueBiased ?
  1827. ConstantInt::getTrue(F.getContext()) :
  1828. ConstantInt::getFalse(F.getContext());
  1829. SI->setCondition(NewCondition);
  1830. }
  1831. // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
  1832. // condition.
  1833. void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
  1834. Instruction *BranchOrSelect,
  1835. CHRScope *Scope,
  1836. IRBuilder<> &IRB,
  1837. Value *&MergedCondition) {
  1838. if (IsTrueBiased) {
  1839. MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
  1840. } else {
  1841. // If Cond is an icmp and all users of V except for BranchOrSelect is a
  1842. // branch, negate the icmp predicate and swap the branch targets and avoid
  1843. // inserting an Xor to negate Cond.
  1844. bool Done = false;
  1845. if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
  1846. if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
  1847. MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
  1848. Done = true;
  1849. }
  1850. if (!Done) {
  1851. Value *Negate = IRB.CreateXor(
  1852. ConstantInt::getTrue(F.getContext()), Cond);
  1853. MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
  1854. }
  1855. }
  1856. }
  1857. void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
  1858. unsigned I = 0;
  1859. DenseSet<PHINode *> TrivialPHIs;
  1860. for (CHRScope *Scope : CHRScopes) {
  1861. transformScopes(Scope, TrivialPHIs);
  1862. CHR_DEBUG(
  1863. std::ostringstream oss;
  1864. oss << " after transformScopes " << I++;
  1865. dumpIR(F, oss.str().c_str(), nullptr));
  1866. (void)I;
  1867. }
  1868. }
  1869. static void LLVM_ATTRIBUTE_UNUSED
  1870. dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
  1871. dbgs() << Label << " " << Scopes.size() << "\n";
  1872. for (CHRScope *Scope : Scopes) {
  1873. dbgs() << *Scope << "\n";
  1874. }
  1875. }
  1876. bool CHR::run() {
  1877. if (!shouldApply(F, PSI))
  1878. return false;
  1879. CHR_DEBUG(dumpIR(F, "before", nullptr));
  1880. bool Changed = false;
  1881. {
  1882. CHR_DEBUG(
  1883. dbgs() << "RegionInfo:\n";
  1884. RI.print(dbgs()));
  1885. // Recursively traverse the region tree and find regions that have biased
  1886. // branches and/or selects and create scopes.
  1887. SmallVector<CHRScope *, 8> AllScopes;
  1888. findScopes(AllScopes);
  1889. CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
  1890. // Split the scopes if 1) the conditiona values of the biased
  1891. // branches/selects of the inner/lower scope can't be hoisted up to the
  1892. // outermost/uppermost scope entry, or 2) the condition values of the biased
  1893. // branches/selects in a scope (including subscopes) don't share at least
  1894. // one common value.
  1895. SmallVector<CHRScope *, 8> SplitScopes;
  1896. splitScopes(AllScopes, SplitScopes);
  1897. CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
  1898. // After splitting, set the biased regions and selects of a scope (a tree
  1899. // root) that include those of the subscopes.
  1900. classifyBiasedScopes(SplitScopes);
  1901. CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
  1902. // Filter out the scopes that has only one biased region or select (CHR
  1903. // isn't useful in such a case).
  1904. SmallVector<CHRScope *, 8> FilteredScopes;
  1905. filterScopes(SplitScopes, FilteredScopes);
  1906. CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
  1907. // Set the regions to be CHR'ed and their hoist stops for each scope.
  1908. SmallVector<CHRScope *, 8> SetScopes;
  1909. setCHRRegions(FilteredScopes, SetScopes);
  1910. CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
  1911. // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
  1912. // ones. We need to apply CHR from outer to inner so that we apply CHR only
  1913. // to the hot path, rather than both hot and cold paths.
  1914. SmallVector<CHRScope *, 8> SortedScopes;
  1915. sortScopes(SetScopes, SortedScopes);
  1916. CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
  1917. CHR_DEBUG(
  1918. dbgs() << "RegionInfo:\n";
  1919. RI.print(dbgs()));
  1920. // Apply the CHR transformation.
  1921. if (!SortedScopes.empty()) {
  1922. transformScopes(SortedScopes);
  1923. Changed = true;
  1924. }
  1925. }
  1926. if (Changed) {
  1927. CHR_DEBUG(dumpIR(F, "after", &Stats));
  1928. ORE.emit([&]() {
  1929. return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
  1930. << ore::NV("Function", &F) << " "
  1931. << "Reduced the number of branches in hot paths by "
  1932. << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
  1933. << " (static) and "
  1934. << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
  1935. << " (weighted by PGO count)";
  1936. });
  1937. }
  1938. return Changed;
  1939. }
  1940. bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
  1941. BlockFrequencyInfo &BFI =
  1942. getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
  1943. DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  1944. ProfileSummaryInfo &PSI =
  1945. getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  1946. RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
  1947. std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
  1948. std::make_unique<OptimizationRemarkEmitter>(&F);
  1949. return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run();
  1950. }
  1951. namespace llvm {
  1952. ControlHeightReductionPass::ControlHeightReductionPass() {
  1953. parseCHRFilterFiles();
  1954. }
  1955. PreservedAnalyses ControlHeightReductionPass::run(
  1956. Function &F,
  1957. FunctionAnalysisManager &FAM) {
  1958. auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
  1959. auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
  1960. auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
  1961. auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
  1962. auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
  1963. auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
  1964. bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
  1965. if (!Changed)
  1966. return PreservedAnalyses::all();
  1967. return PreservedAnalyses::none();
  1968. }
  1969. } // namespace llvm