ControlHeightReduction.cpp 79 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095
  1. //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass merges conditional blocks of code and reduces the number of
  10. // conditional branches in the hot paths based on profiles.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
  14. #include "llvm/ADT/DenseMap.h"
  15. #include "llvm/ADT/DenseSet.h"
  16. #include "llvm/ADT/SmallVector.h"
  17. #include "llvm/ADT/StringSet.h"
  18. #include "llvm/Analysis/BlockFrequencyInfo.h"
  19. #include "llvm/Analysis/GlobalsModRef.h"
  20. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  21. #include "llvm/Analysis/ProfileSummaryInfo.h"
  22. #include "llvm/Analysis/RegionInfo.h"
  23. #include "llvm/Analysis/RegionIterator.h"
  24. #include "llvm/Analysis/ValueTracking.h"
  25. #include "llvm/IR/CFG.h"
  26. #include "llvm/IR/Dominators.h"
  27. #include "llvm/IR/IRBuilder.h"
  28. #include "llvm/IR/IntrinsicInst.h"
  29. #include "llvm/IR/MDBuilder.h"
  30. #include "llvm/IR/PassManager.h"
  31. #include "llvm/IR/ProfDataUtils.h"
  32. #include "llvm/InitializePasses.h"
  33. #include "llvm/Support/BranchProbability.h"
  34. #include "llvm/Support/CommandLine.h"
  35. #include "llvm/Support/MemoryBuffer.h"
  36. #include "llvm/Transforms/Utils.h"
  37. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  38. #include "llvm/Transforms/Utils/Cloning.h"
  39. #include "llvm/Transforms/Utils/ValueMapper.h"
  40. #include <optional>
  41. #include <set>
  42. #include <sstream>
  43. using namespace llvm;
  44. #define DEBUG_TYPE "chr"
  45. #define CHR_DEBUG(X) LLVM_DEBUG(X)
  46. static cl::opt<bool> DisableCHR("disable-chr", cl::init(false), cl::Hidden,
  47. cl::desc("Disable CHR for all functions"));
  48. static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
  49. cl::desc("Apply CHR for all functions"));
  50. static cl::opt<double> CHRBiasThreshold(
  51. "chr-bias-threshold", cl::init(0.99), cl::Hidden,
  52. cl::desc("CHR considers a branch bias greater than this ratio as biased"));
  53. static cl::opt<unsigned> CHRMergeThreshold(
  54. "chr-merge-threshold", cl::init(2), cl::Hidden,
  55. cl::desc("CHR merges a group of N branches/selects where N >= this value"));
  56. static cl::opt<std::string> CHRModuleList(
  57. "chr-module-list", cl::init(""), cl::Hidden,
  58. cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
  59. static cl::opt<std::string> CHRFunctionList(
  60. "chr-function-list", cl::init(""), cl::Hidden,
  61. cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
  62. static cl::opt<unsigned> CHRDupThreshsold(
  63. "chr-dup-threshold", cl::init(3), cl::Hidden,
  64. cl::desc("Max number of duplications by CHR for a region"));
  65. static StringSet<> CHRModules;
  66. static StringSet<> CHRFunctions;
  67. static void parseCHRFilterFiles() {
  68. if (!CHRModuleList.empty()) {
  69. auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
  70. if (!FileOrErr) {
  71. errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
  72. std::exit(1);
  73. }
  74. StringRef Buf = FileOrErr->get()->getBuffer();
  75. SmallVector<StringRef, 0> Lines;
  76. Buf.split(Lines, '\n');
  77. for (StringRef Line : Lines) {
  78. Line = Line.trim();
  79. if (!Line.empty())
  80. CHRModules.insert(Line);
  81. }
  82. }
  83. if (!CHRFunctionList.empty()) {
  84. auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
  85. if (!FileOrErr) {
  86. errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
  87. std::exit(1);
  88. }
  89. StringRef Buf = FileOrErr->get()->getBuffer();
  90. SmallVector<StringRef, 0> Lines;
  91. Buf.split(Lines, '\n');
  92. for (StringRef Line : Lines) {
  93. Line = Line.trim();
  94. if (!Line.empty())
  95. CHRFunctions.insert(Line);
  96. }
  97. }
  98. }
  99. namespace {
  100. struct CHRStats {
  101. CHRStats() = default;
  102. void print(raw_ostream &OS) const {
  103. OS << "CHRStats: NumBranches " << NumBranches
  104. << " NumBranchesDelta " << NumBranchesDelta
  105. << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
  106. }
  107. // The original number of conditional branches / selects
  108. uint64_t NumBranches = 0;
  109. // The decrease of the number of conditional branches / selects in the hot
  110. // paths due to CHR.
  111. uint64_t NumBranchesDelta = 0;
  112. // NumBranchesDelta weighted by the profile count at the scope entry.
  113. uint64_t WeightedNumBranchesDelta = 0;
  114. };
  115. // RegInfo - some properties of a Region.
  116. struct RegInfo {
  117. RegInfo() = default;
  118. RegInfo(Region *RegionIn) : R(RegionIn) {}
  119. Region *R = nullptr;
  120. bool HasBranch = false;
  121. SmallVector<SelectInst *, 8> Selects;
  122. };
  123. typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
  124. // CHRScope - a sequence of regions to CHR together. It corresponds to a
  125. // sequence of conditional blocks. It can have subscopes which correspond to
  126. // nested conditional blocks. Nested CHRScopes form a tree.
  127. class CHRScope {
  128. public:
  129. CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
  130. assert(RI.R && "Null RegionIn");
  131. RegInfos.push_back(RI);
  132. }
  133. Region *getParentRegion() {
  134. assert(RegInfos.size() > 0 && "Empty CHRScope");
  135. Region *Parent = RegInfos[0].R->getParent();
  136. assert(Parent && "Unexpected to call this on the top-level region");
  137. return Parent;
  138. }
  139. BasicBlock *getEntryBlock() {
  140. assert(RegInfos.size() > 0 && "Empty CHRScope");
  141. return RegInfos.front().R->getEntry();
  142. }
  143. BasicBlock *getExitBlock() {
  144. assert(RegInfos.size() > 0 && "Empty CHRScope");
  145. return RegInfos.back().R->getExit();
  146. }
  147. bool appendable(CHRScope *Next) {
  148. // The next scope is appendable only if this scope is directly connected to
  149. // it (which implies it post-dominates this scope) and this scope dominates
  150. // it (no edge to the next scope outside this scope).
  151. BasicBlock *NextEntry = Next->getEntryBlock();
  152. if (getExitBlock() != NextEntry)
  153. // Not directly connected.
  154. return false;
  155. Region *LastRegion = RegInfos.back().R;
  156. for (BasicBlock *Pred : predecessors(NextEntry))
  157. if (!LastRegion->contains(Pred))
  158. // There's an edge going into the entry of the next scope from outside
  159. // of this scope.
  160. return false;
  161. return true;
  162. }
  163. void append(CHRScope *Next) {
  164. assert(RegInfos.size() > 0 && "Empty CHRScope");
  165. assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
  166. assert(getParentRegion() == Next->getParentRegion() &&
  167. "Must be siblings");
  168. assert(getExitBlock() == Next->getEntryBlock() &&
  169. "Must be adjacent");
  170. RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
  171. Subs.append(Next->Subs.begin(), Next->Subs.end());
  172. }
  173. void addSub(CHRScope *SubIn) {
  174. #ifndef NDEBUG
  175. bool IsChild = false;
  176. for (RegInfo &RI : RegInfos)
  177. if (RI.R == SubIn->getParentRegion()) {
  178. IsChild = true;
  179. break;
  180. }
  181. assert(IsChild && "Must be a child");
  182. #endif
  183. Subs.push_back(SubIn);
  184. }
  185. // Split this scope at the boundary region into two, which will belong to the
  186. // tail and returns the tail.
  187. CHRScope *split(Region *Boundary) {
  188. assert(Boundary && "Boundary null");
  189. assert(RegInfos.begin()->R != Boundary &&
  190. "Can't be split at beginning");
  191. auto BoundaryIt = llvm::find_if(
  192. RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
  193. if (BoundaryIt == RegInfos.end())
  194. return nullptr;
  195. ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
  196. DenseSet<Region *> TailRegionSet;
  197. for (const RegInfo &RI : TailRegInfos)
  198. TailRegionSet.insert(RI.R);
  199. auto TailIt =
  200. std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
  201. assert(Sub && "null Sub");
  202. Region *Parent = Sub->getParentRegion();
  203. if (TailRegionSet.count(Parent))
  204. return false;
  205. assert(llvm::any_of(
  206. RegInfos,
  207. [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
  208. "Must be in head");
  209. return true;
  210. });
  211. ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
  212. assert(HoistStopMap.empty() && "MapHoistStops must be empty");
  213. auto *Scope = new CHRScope(TailRegInfos, TailSubs);
  214. RegInfos.erase(BoundaryIt, RegInfos.end());
  215. Subs.erase(TailIt, Subs.end());
  216. return Scope;
  217. }
  218. bool contains(Instruction *I) const {
  219. BasicBlock *Parent = I->getParent();
  220. for (const RegInfo &RI : RegInfos)
  221. if (RI.R->contains(Parent))
  222. return true;
  223. return false;
  224. }
  225. void print(raw_ostream &OS) const;
  226. SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
  227. SmallVector<CHRScope *, 8> Subs; // Subscopes.
  228. // The instruction at which to insert the CHR conditional branch (and hoist
  229. // the dependent condition values).
  230. Instruction *BranchInsertPoint;
  231. // True-biased and false-biased regions (conditional blocks),
  232. // respectively. Used only for the outermost scope and includes regions in
  233. // subscopes. The rest are unbiased.
  234. DenseSet<Region *> TrueBiasedRegions;
  235. DenseSet<Region *> FalseBiasedRegions;
  236. // Among the biased regions, the regions that get CHRed.
  237. SmallVector<RegInfo, 8> CHRRegions;
  238. // True-biased and false-biased selects, respectively. Used only for the
  239. // outermost scope and includes ones in subscopes.
  240. DenseSet<SelectInst *> TrueBiasedSelects;
  241. DenseSet<SelectInst *> FalseBiasedSelects;
  242. // Map from one of the above regions to the instructions to stop
  243. // hoisting instructions at through use-def chains.
  244. HoistStopMapTy HoistStopMap;
  245. private:
  246. CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
  247. : RegInfos(RegInfosIn.begin(), RegInfosIn.end()),
  248. Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {}
  249. };
  250. class CHR {
  251. public:
  252. CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
  253. ProfileSummaryInfo &PSIin, RegionInfo &RIin,
  254. OptimizationRemarkEmitter &OREin)
  255. : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
  256. ~CHR() {
  257. for (CHRScope *Scope : Scopes) {
  258. delete Scope;
  259. }
  260. }
  261. bool run();
  262. private:
  263. // See the comments in CHR::run() for the high level flow of the algorithm and
  264. // what the following functions do.
  265. void findScopes(SmallVectorImpl<CHRScope *> &Output) {
  266. Region *R = RI.getTopLevelRegion();
  267. if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
  268. Output.push_back(Scope);
  269. }
  270. }
  271. CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
  272. SmallVectorImpl<CHRScope *> &Scopes);
  273. CHRScope *findScope(Region *R);
  274. void checkScopeHoistable(CHRScope *Scope);
  275. void splitScopes(SmallVectorImpl<CHRScope *> &Input,
  276. SmallVectorImpl<CHRScope *> &Output);
  277. SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
  278. CHRScope *Outer,
  279. DenseSet<Value *> *OuterConditionValues,
  280. Instruction *OuterInsertPoint,
  281. SmallVectorImpl<CHRScope *> &Output,
  282. DenseSet<Instruction *> &Unhoistables);
  283. void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
  284. void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
  285. void filterScopes(SmallVectorImpl<CHRScope *> &Input,
  286. SmallVectorImpl<CHRScope *> &Output);
  287. void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
  288. SmallVectorImpl<CHRScope *> &Output);
  289. void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
  290. void sortScopes(SmallVectorImpl<CHRScope *> &Input,
  291. SmallVectorImpl<CHRScope *> &Output);
  292. void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
  293. void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
  294. void cloneScopeBlocks(CHRScope *Scope,
  295. BasicBlock *PreEntryBlock,
  296. BasicBlock *ExitBlock,
  297. Region *LastRegion,
  298. ValueToValueMapTy &VMap);
  299. BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
  300. BasicBlock *EntryBlock,
  301. BasicBlock *NewEntryBlock,
  302. ValueToValueMapTy &VMap);
  303. void fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock,
  304. BranchInst *MergedBR, uint64_t ProfileCount);
  305. void fixupBranch(Region *R, CHRScope *Scope, IRBuilder<> &IRB,
  306. Value *&MergedCondition, BranchProbability &CHRBranchBias);
  307. void fixupSelect(SelectInst *SI, CHRScope *Scope, IRBuilder<> &IRB,
  308. Value *&MergedCondition, BranchProbability &CHRBranchBias);
  309. void addToMergedCondition(bool IsTrueBiased, Value *Cond,
  310. Instruction *BranchOrSelect, CHRScope *Scope,
  311. IRBuilder<> &IRB, Value *&MergedCondition);
  312. unsigned getRegionDuplicationCount(const Region *R) {
  313. unsigned Count = 0;
  314. // Find out how many times region R is cloned. Note that if the parent
  315. // of R is cloned, R is also cloned, but R's clone count is not updated
  316. // from the clone of the parent. We need to accumlate all the counts
  317. // from the ancestors to get the clone count.
  318. while (R) {
  319. Count += DuplicationCount[R];
  320. R = R->getParent();
  321. }
  322. return Count;
  323. }
  324. Function &F;
  325. BlockFrequencyInfo &BFI;
  326. DominatorTree &DT;
  327. ProfileSummaryInfo &PSI;
  328. RegionInfo &RI;
  329. OptimizationRemarkEmitter &ORE;
  330. CHRStats Stats;
  331. // All the true-biased regions in the function
  332. DenseSet<Region *> TrueBiasedRegionsGlobal;
  333. // All the false-biased regions in the function
  334. DenseSet<Region *> FalseBiasedRegionsGlobal;
  335. // All the true-biased selects in the function
  336. DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
  337. // All the false-biased selects in the function
  338. DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
  339. // A map from biased regions to their branch bias
  340. DenseMap<Region *, BranchProbability> BranchBiasMap;
  341. // A map from biased selects to their branch bias
  342. DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
  343. // All the scopes.
  344. DenseSet<CHRScope *> Scopes;
  345. // This maps records how many times this region is cloned.
  346. DenseMap<const Region *, unsigned> DuplicationCount;
  347. };
  348. } // end anonymous namespace
  349. static inline
  350. raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
  351. const CHRStats &Stats) {
  352. Stats.print(OS);
  353. return OS;
  354. }
  355. static inline
  356. raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
  357. Scope.print(OS);
  358. return OS;
  359. }
  360. static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) {
  361. if (DisableCHR)
  362. return false;
  363. if (ForceCHR)
  364. return true;
  365. if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
  366. if (CHRModules.count(F.getParent()->getName()))
  367. return true;
  368. return CHRFunctions.count(F.getName());
  369. }
  370. return PSI.isFunctionEntryHot(&F);
  371. }
  372. static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
  373. CHRStats *Stats) {
  374. StringRef FuncName = F.getName();
  375. StringRef ModuleName = F.getParent()->getName();
  376. (void)(FuncName); // Unused in release build.
  377. (void)(ModuleName); // Unused in release build.
  378. CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
  379. << FuncName);
  380. if (Stats)
  381. CHR_DEBUG(dbgs() << " " << *Stats);
  382. CHR_DEBUG(dbgs() << "\n");
  383. CHR_DEBUG(F.dump());
  384. }
  385. void CHRScope::print(raw_ostream &OS) const {
  386. assert(RegInfos.size() > 0 && "Empty CHRScope");
  387. OS << "CHRScope[";
  388. OS << RegInfos.size() << ", Regions[";
  389. for (const RegInfo &RI : RegInfos) {
  390. OS << RI.R->getNameStr();
  391. if (RI.HasBranch)
  392. OS << " B";
  393. if (RI.Selects.size() > 0)
  394. OS << " S" << RI.Selects.size();
  395. OS << ", ";
  396. }
  397. if (RegInfos[0].R->getParent()) {
  398. OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
  399. } else {
  400. // top level region
  401. OS << "]";
  402. }
  403. OS << ", Subs[";
  404. for (CHRScope *Sub : Subs) {
  405. OS << *Sub << ", ";
  406. }
  407. OS << "]]";
  408. }
  409. // Return true if the given instruction type can be hoisted by CHR.
  410. static bool isHoistableInstructionType(Instruction *I) {
  411. return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
  412. isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
  413. isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
  414. isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
  415. isa<InsertValueInst>(I);
  416. }
  417. // Return true if the given instruction can be hoisted by CHR.
  418. static bool isHoistable(Instruction *I, DominatorTree &DT) {
  419. if (!isHoistableInstructionType(I))
  420. return false;
  421. return isSafeToSpeculativelyExecute(I, nullptr, nullptr, &DT);
  422. }
  423. // Recursively traverse the use-def chains of the given value and return a set
  424. // of the unhoistable base values defined within the scope (excluding the
  425. // first-region entry block) or the (hoistable or unhoistable) base values that
  426. // are defined outside (including the first-region entry block) of the
  427. // scope. The returned set doesn't include constants.
  428. static const std::set<Value *> &
  429. getBaseValues(Value *V, DominatorTree &DT,
  430. DenseMap<Value *, std::set<Value *>> &Visited) {
  431. auto It = Visited.find(V);
  432. if (It != Visited.end()) {
  433. return It->second;
  434. }
  435. std::set<Value *> Result;
  436. if (auto *I = dyn_cast<Instruction>(V)) {
  437. // We don't stop at a block that's not in the Scope because we would miss
  438. // some instructions that are based on the same base values if we stop
  439. // there.
  440. if (!isHoistable(I, DT)) {
  441. Result.insert(I);
  442. return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
  443. }
  444. // I is hoistable above the Scope.
  445. for (Value *Op : I->operands()) {
  446. const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
  447. Result.insert(OpResult.begin(), OpResult.end());
  448. }
  449. return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
  450. }
  451. if (isa<Argument>(V)) {
  452. Result.insert(V);
  453. }
  454. // We don't include others like constants because those won't lead to any
  455. // chance of folding of conditions (eg two bit checks merged into one check)
  456. // after CHR.
  457. return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
  458. }
  459. // Return true if V is already hoisted or can be hoisted (along with its
  460. // operands) above the insert point. When it returns true and HoistStops is
  461. // non-null, the instructions to stop hoisting at through the use-def chains are
  462. // inserted into HoistStops.
  463. static bool
  464. checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
  465. DenseSet<Instruction *> &Unhoistables,
  466. DenseSet<Instruction *> *HoistStops,
  467. DenseMap<Instruction *, bool> &Visited) {
  468. assert(InsertPoint && "Null InsertPoint");
  469. if (auto *I = dyn_cast<Instruction>(V)) {
  470. auto It = Visited.find(I);
  471. if (It != Visited.end()) {
  472. return It->second;
  473. }
  474. assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
  475. assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
  476. if (Unhoistables.count(I)) {
  477. // Don't hoist if they are not to be hoisted.
  478. Visited[I] = false;
  479. return false;
  480. }
  481. if (DT.dominates(I, InsertPoint)) {
  482. // We are already above the insert point. Stop here.
  483. if (HoistStops)
  484. HoistStops->insert(I);
  485. Visited[I] = true;
  486. return true;
  487. }
  488. // We aren't not above the insert point, check if we can hoist it above the
  489. // insert point.
  490. if (isHoistable(I, DT)) {
  491. // Check operands first.
  492. DenseSet<Instruction *> OpsHoistStops;
  493. bool AllOpsHoisted = true;
  494. for (Value *Op : I->operands()) {
  495. if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
  496. Visited)) {
  497. AllOpsHoisted = false;
  498. break;
  499. }
  500. }
  501. if (AllOpsHoisted) {
  502. CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
  503. if (HoistStops)
  504. HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
  505. Visited[I] = true;
  506. return true;
  507. }
  508. }
  509. Visited[I] = false;
  510. return false;
  511. }
  512. // Non-instructions are considered hoistable.
  513. return true;
  514. }
  515. // Constructs the true and false branch probabilities if the the instruction has
  516. // valid branch weights. Returns true when this was successful, false otherwise.
  517. static bool extractBranchProbabilities(Instruction *I,
  518. BranchProbability &TrueProb,
  519. BranchProbability &FalseProb) {
  520. uint64_t TrueWeight;
  521. uint64_t FalseWeight;
  522. if (!extractBranchWeights(*I, TrueWeight, FalseWeight))
  523. return false;
  524. uint64_t SumWeight = TrueWeight + FalseWeight;
  525. assert(SumWeight >= TrueWeight && SumWeight >= FalseWeight &&
  526. "Overflow calculating branch probabilities.");
  527. // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
  528. if (SumWeight == 0)
  529. return false;
  530. TrueProb = BranchProbability::getBranchProbability(TrueWeight, SumWeight);
  531. FalseProb = BranchProbability::getBranchProbability(FalseWeight, SumWeight);
  532. return true;
  533. }
  534. static BranchProbability getCHRBiasThreshold() {
  535. return BranchProbability::getBranchProbability(
  536. static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
  537. }
  538. // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
  539. // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
  540. // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
  541. // false.
  542. template <typename K, typename S, typename M>
  543. static bool checkBias(K *Key, BranchProbability TrueProb,
  544. BranchProbability FalseProb, S &TrueSet, S &FalseSet,
  545. M &BiasMap) {
  546. BranchProbability Threshold = getCHRBiasThreshold();
  547. if (TrueProb >= Threshold) {
  548. TrueSet.insert(Key);
  549. BiasMap[Key] = TrueProb;
  550. return true;
  551. } else if (FalseProb >= Threshold) {
  552. FalseSet.insert(Key);
  553. BiasMap[Key] = FalseProb;
  554. return true;
  555. }
  556. return false;
  557. }
  558. // Returns true and insert a region into the right biased set and the map if the
  559. // branch of the region is biased.
  560. static bool checkBiasedBranch(BranchInst *BI, Region *R,
  561. DenseSet<Region *> &TrueBiasedRegionsGlobal,
  562. DenseSet<Region *> &FalseBiasedRegionsGlobal,
  563. DenseMap<Region *, BranchProbability> &BranchBiasMap) {
  564. if (!BI->isConditional())
  565. return false;
  566. BranchProbability ThenProb, ElseProb;
  567. if (!extractBranchProbabilities(BI, ThenProb, ElseProb))
  568. return false;
  569. BasicBlock *IfThen = BI->getSuccessor(0);
  570. BasicBlock *IfElse = BI->getSuccessor(1);
  571. assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
  572. IfThen != IfElse &&
  573. "Invariant from findScopes");
  574. if (IfThen == R->getExit()) {
  575. // Swap them so that IfThen/ThenProb means going into the conditional code
  576. // and IfElse/ElseProb means skipping it.
  577. std::swap(IfThen, IfElse);
  578. std::swap(ThenProb, ElseProb);
  579. }
  580. CHR_DEBUG(dbgs() << "BI " << *BI << " ");
  581. CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
  582. CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
  583. return checkBias(R, ThenProb, ElseProb,
  584. TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
  585. BranchBiasMap);
  586. }
  587. // Returns true and insert a select into the right biased set and the map if the
  588. // select is biased.
  589. static bool checkBiasedSelect(
  590. SelectInst *SI, Region *R,
  591. DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
  592. DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
  593. DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
  594. BranchProbability TrueProb, FalseProb;
  595. if (!extractBranchProbabilities(SI, TrueProb, FalseProb))
  596. return false;
  597. CHR_DEBUG(dbgs() << "SI " << *SI << " ");
  598. CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
  599. CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
  600. return checkBias(SI, TrueProb, FalseProb,
  601. TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
  602. SelectBiasMap);
  603. }
  604. // Returns the instruction at which to hoist the dependent condition values and
  605. // insert the CHR branch for a region. This is the terminator branch in the
  606. // entry block or the first select in the entry block, if any.
  607. static Instruction* getBranchInsertPoint(RegInfo &RI) {
  608. Region *R = RI.R;
  609. BasicBlock *EntryBB = R->getEntry();
  610. // The hoist point is by default the terminator of the entry block, which is
  611. // the same as the branch instruction if RI.HasBranch is true.
  612. Instruction *HoistPoint = EntryBB->getTerminator();
  613. for (SelectInst *SI : RI.Selects) {
  614. if (SI->getParent() == EntryBB) {
  615. // Pick the first select in Selects in the entry block. Note Selects is
  616. // sorted in the instruction order within a block (asserted below).
  617. HoistPoint = SI;
  618. break;
  619. }
  620. }
  621. assert(HoistPoint && "Null HoistPoint");
  622. #ifndef NDEBUG
  623. // Check that HoistPoint is the first one in Selects in the entry block,
  624. // if any.
  625. DenseSet<Instruction *> EntryBlockSelectSet;
  626. for (SelectInst *SI : RI.Selects) {
  627. if (SI->getParent() == EntryBB) {
  628. EntryBlockSelectSet.insert(SI);
  629. }
  630. }
  631. for (Instruction &I : *EntryBB) {
  632. if (EntryBlockSelectSet.contains(&I)) {
  633. assert(&I == HoistPoint &&
  634. "HoistPoint must be the first one in Selects");
  635. break;
  636. }
  637. }
  638. #endif
  639. return HoistPoint;
  640. }
  641. // Find a CHR scope in the given region.
  642. CHRScope * CHR::findScope(Region *R) {
  643. CHRScope *Result = nullptr;
  644. BasicBlock *Entry = R->getEntry();
  645. BasicBlock *Exit = R->getExit(); // null if top level.
  646. assert(Entry && "Entry must not be null");
  647. assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
  648. "Only top level region has a null exit");
  649. if (Entry)
  650. CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
  651. else
  652. CHR_DEBUG(dbgs() << "Entry null\n");
  653. if (Exit)
  654. CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
  655. else
  656. CHR_DEBUG(dbgs() << "Exit null\n");
  657. // Exclude cases where Entry is part of a subregion (hence it doesn't belong
  658. // to this region).
  659. bool EntryInSubregion = RI.getRegionFor(Entry) != R;
  660. if (EntryInSubregion)
  661. return nullptr;
  662. // Exclude loops
  663. for (BasicBlock *Pred : predecessors(Entry))
  664. if (R->contains(Pred))
  665. return nullptr;
  666. // If any of the basic blocks have address taken, we must skip this region
  667. // because we cannot clone basic blocks that have address taken.
  668. for (BasicBlock *BB : R->blocks()) {
  669. if (BB->hasAddressTaken())
  670. return nullptr;
  671. // If we encounter llvm.coro.id, skip this region because if the basic block
  672. // is cloned, we end up inserting a token type PHI node to the block with
  673. // llvm.coro.begin.
  674. // FIXME: This could lead to less optimal codegen, because the region is
  675. // excluded, it can prevent CHR from merging adjacent regions into bigger
  676. // scope and hoisting more branches.
  677. for (Instruction &I : *BB)
  678. if (auto *II = dyn_cast<IntrinsicInst>(&I))
  679. if (II->getIntrinsicID() == Intrinsic::coro_id)
  680. return nullptr;
  681. }
  682. if (Exit) {
  683. // Try to find an if-then block (check if R is an if-then).
  684. // if (cond) {
  685. // ...
  686. // }
  687. auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
  688. if (BI)
  689. CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
  690. else
  691. CHR_DEBUG(dbgs() << "BI null\n");
  692. if (BI && BI->isConditional()) {
  693. BasicBlock *S0 = BI->getSuccessor(0);
  694. BasicBlock *S1 = BI->getSuccessor(1);
  695. CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
  696. CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
  697. if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
  698. RegInfo RI(R);
  699. RI.HasBranch = checkBiasedBranch(
  700. BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
  701. BranchBiasMap);
  702. Result = new CHRScope(RI);
  703. Scopes.insert(Result);
  704. CHR_DEBUG(dbgs() << "Found a region with a branch\n");
  705. ++Stats.NumBranches;
  706. if (!RI.HasBranch) {
  707. ORE.emit([&]() {
  708. return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
  709. << "Branch not biased";
  710. });
  711. }
  712. }
  713. }
  714. }
  715. {
  716. // Try to look for selects in the direct child blocks (as opposed to in
  717. // subregions) of R.
  718. // ...
  719. // if (..) { // Some subregion
  720. // ...
  721. // }
  722. // if (..) { // Some subregion
  723. // ...
  724. // }
  725. // ...
  726. // a = cond ? b : c;
  727. // ...
  728. SmallVector<SelectInst *, 8> Selects;
  729. for (RegionNode *E : R->elements()) {
  730. if (E->isSubRegion())
  731. continue;
  732. // This returns the basic block of E if E is a direct child of R (not a
  733. // subregion.)
  734. BasicBlock *BB = E->getEntry();
  735. // Need to push in the order to make it easier to find the first Select
  736. // later.
  737. for (Instruction &I : *BB) {
  738. if (auto *SI = dyn_cast<SelectInst>(&I)) {
  739. Selects.push_back(SI);
  740. ++Stats.NumBranches;
  741. }
  742. }
  743. }
  744. if (Selects.size() > 0) {
  745. auto AddSelects = [&](RegInfo &RI) {
  746. for (auto *SI : Selects)
  747. if (checkBiasedSelect(SI, RI.R,
  748. TrueBiasedSelectsGlobal,
  749. FalseBiasedSelectsGlobal,
  750. SelectBiasMap))
  751. RI.Selects.push_back(SI);
  752. else
  753. ORE.emit([&]() {
  754. return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
  755. << "Select not biased";
  756. });
  757. };
  758. if (!Result) {
  759. CHR_DEBUG(dbgs() << "Found a select-only region\n");
  760. RegInfo RI(R);
  761. AddSelects(RI);
  762. Result = new CHRScope(RI);
  763. Scopes.insert(Result);
  764. } else {
  765. CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
  766. AddSelects(Result->RegInfos[0]);
  767. }
  768. }
  769. }
  770. if (Result) {
  771. checkScopeHoistable(Result);
  772. }
  773. return Result;
  774. }
  775. // Check that any of the branch and the selects in the region could be
  776. // hoisted above the the CHR branch insert point (the most dominating of
  777. // them, either the branch (at the end of the first block) or the first
  778. // select in the first block). If the branch can't be hoisted, drop the
  779. // selects in the first blocks.
  780. //
  781. // For example, for the following scope/region with selects, we want to insert
  782. // the merged branch right before the first select in the first/entry block by
  783. // hoisting c1, c2, c3, and c4.
  784. //
  785. // // Branch insert point here.
  786. // a = c1 ? b : c; // Select 1
  787. // d = c2 ? e : f; // Select 2
  788. // if (c3) { // Branch
  789. // ...
  790. // c4 = foo() // A call.
  791. // g = c4 ? h : i; // Select 3
  792. // }
  793. //
  794. // But suppose we can't hoist c4 because it's dependent on the preceding
  795. // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
  796. // Select 2. If we can't hoist c3, we drop Selects 1 & 2.
  797. void CHR::checkScopeHoistable(CHRScope *Scope) {
  798. RegInfo &RI = Scope->RegInfos[0];
  799. Region *R = RI.R;
  800. BasicBlock *EntryBB = R->getEntry();
  801. auto *Branch = RI.HasBranch ?
  802. cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
  803. SmallVector<SelectInst *, 8> &Selects = RI.Selects;
  804. if (RI.HasBranch || !Selects.empty()) {
  805. Instruction *InsertPoint = getBranchInsertPoint(RI);
  806. CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
  807. // Avoid a data dependence from a select or a branch to a(nother)
  808. // select. Note no instruction can't data-depend on a branch (a branch
  809. // instruction doesn't produce a value).
  810. DenseSet<Instruction *> Unhoistables;
  811. // Initialize Unhoistables with the selects.
  812. for (SelectInst *SI : Selects) {
  813. Unhoistables.insert(SI);
  814. }
  815. // Remove Selects that can't be hoisted.
  816. for (auto it = Selects.begin(); it != Selects.end(); ) {
  817. SelectInst *SI = *it;
  818. if (SI == InsertPoint) {
  819. ++it;
  820. continue;
  821. }
  822. DenseMap<Instruction *, bool> Visited;
  823. bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
  824. DT, Unhoistables, nullptr, Visited);
  825. if (!IsHoistable) {
  826. CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
  827. ORE.emit([&]() {
  828. return OptimizationRemarkMissed(DEBUG_TYPE,
  829. "DropUnhoistableSelect", SI)
  830. << "Dropped unhoistable select";
  831. });
  832. it = Selects.erase(it);
  833. // Since we are dropping the select here, we also drop it from
  834. // Unhoistables.
  835. Unhoistables.erase(SI);
  836. } else
  837. ++it;
  838. }
  839. // Update InsertPoint after potentially removing selects.
  840. InsertPoint = getBranchInsertPoint(RI);
  841. CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
  842. if (RI.HasBranch && InsertPoint != Branch) {
  843. DenseMap<Instruction *, bool> Visited;
  844. bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
  845. DT, Unhoistables, nullptr, Visited);
  846. if (!IsHoistable) {
  847. // If the branch isn't hoistable, drop the selects in the entry
  848. // block, preferring the branch, which makes the branch the hoist
  849. // point.
  850. assert(InsertPoint != Branch && "Branch must not be the hoist point");
  851. CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
  852. CHR_DEBUG(
  853. for (SelectInst *SI : Selects) {
  854. dbgs() << "SI " << *SI << "\n";
  855. });
  856. for (SelectInst *SI : Selects) {
  857. ORE.emit([&]() {
  858. return OptimizationRemarkMissed(DEBUG_TYPE,
  859. "DropSelectUnhoistableBranch", SI)
  860. << "Dropped select due to unhoistable branch";
  861. });
  862. }
  863. llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
  864. return SI->getParent() == EntryBB;
  865. });
  866. Unhoistables.clear();
  867. InsertPoint = Branch;
  868. }
  869. }
  870. CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
  871. #ifndef NDEBUG
  872. if (RI.HasBranch) {
  873. assert(!DT.dominates(Branch, InsertPoint) &&
  874. "Branch can't be already above the hoist point");
  875. DenseMap<Instruction *, bool> Visited;
  876. assert(checkHoistValue(Branch->getCondition(), InsertPoint,
  877. DT, Unhoistables, nullptr, Visited) &&
  878. "checkHoistValue for branch");
  879. }
  880. for (auto *SI : Selects) {
  881. assert(!DT.dominates(SI, InsertPoint) &&
  882. "SI can't be already above the hoist point");
  883. DenseMap<Instruction *, bool> Visited;
  884. assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
  885. Unhoistables, nullptr, Visited) &&
  886. "checkHoistValue for selects");
  887. }
  888. CHR_DEBUG(dbgs() << "Result\n");
  889. if (RI.HasBranch) {
  890. CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
  891. }
  892. for (auto *SI : Selects) {
  893. CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
  894. }
  895. #endif
  896. }
  897. }
  898. // Traverse the region tree, find all nested scopes and merge them if possible.
  899. CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
  900. SmallVectorImpl<CHRScope *> &Scopes) {
  901. CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
  902. CHRScope *Result = findScope(R);
  903. // Visit subscopes.
  904. CHRScope *ConsecutiveSubscope = nullptr;
  905. SmallVector<CHRScope *, 8> Subscopes;
  906. for (auto It = R->begin(); It != R->end(); ++It) {
  907. const std::unique_ptr<Region> &SubR = *It;
  908. auto NextIt = std::next(It);
  909. Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
  910. CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
  911. << "\n");
  912. CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
  913. if (SubCHRScope) {
  914. CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
  915. } else {
  916. CHR_DEBUG(dbgs() << "Subregion Scope null\n");
  917. }
  918. if (SubCHRScope) {
  919. if (!ConsecutiveSubscope)
  920. ConsecutiveSubscope = SubCHRScope;
  921. else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
  922. Subscopes.push_back(ConsecutiveSubscope);
  923. ConsecutiveSubscope = SubCHRScope;
  924. } else
  925. ConsecutiveSubscope->append(SubCHRScope);
  926. } else {
  927. if (ConsecutiveSubscope) {
  928. Subscopes.push_back(ConsecutiveSubscope);
  929. }
  930. ConsecutiveSubscope = nullptr;
  931. }
  932. }
  933. if (ConsecutiveSubscope) {
  934. Subscopes.push_back(ConsecutiveSubscope);
  935. }
  936. for (CHRScope *Sub : Subscopes) {
  937. if (Result) {
  938. // Combine it with the parent.
  939. Result->addSub(Sub);
  940. } else {
  941. // Push Subscopes as they won't be combined with the parent.
  942. Scopes.push_back(Sub);
  943. }
  944. }
  945. return Result;
  946. }
  947. static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
  948. DenseSet<Value *> ConditionValues;
  949. if (RI.HasBranch) {
  950. auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
  951. ConditionValues.insert(BI->getCondition());
  952. }
  953. for (SelectInst *SI : RI.Selects) {
  954. ConditionValues.insert(SI->getCondition());
  955. }
  956. return ConditionValues;
  957. }
  958. // Determine whether to split a scope depending on the sets of the branch
  959. // condition values of the previous region and the current region. We split
  960. // (return true) it if 1) the condition values of the inner/lower scope can't be
  961. // hoisted up to the outer/upper scope, or 2) the two sets of the condition
  962. // values have an empty intersection (because the combined branch conditions
  963. // won't probably lead to a simpler combined condition).
  964. static bool shouldSplit(Instruction *InsertPoint,
  965. DenseSet<Value *> &PrevConditionValues,
  966. DenseSet<Value *> &ConditionValues,
  967. DominatorTree &DT,
  968. DenseSet<Instruction *> &Unhoistables) {
  969. assert(InsertPoint && "Null InsertPoint");
  970. CHR_DEBUG(
  971. dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
  972. for (Value *V : PrevConditionValues) {
  973. dbgs() << *V << ", ";
  974. }
  975. dbgs() << " ConditionValues ";
  976. for (Value *V : ConditionValues) {
  977. dbgs() << *V << ", ";
  978. }
  979. dbgs() << "\n");
  980. // If any of Bases isn't hoistable to the hoist point, split.
  981. for (Value *V : ConditionValues) {
  982. DenseMap<Instruction *, bool> Visited;
  983. if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
  984. CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
  985. return true; // Not hoistable, split.
  986. }
  987. }
  988. // If PrevConditionValues or ConditionValues is empty, don't split to avoid
  989. // unnecessary splits at scopes with no branch/selects. If
  990. // PrevConditionValues and ConditionValues don't intersect at all, split.
  991. if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
  992. // Use std::set as DenseSet doesn't work with set_intersection.
  993. std::set<Value *> PrevBases, Bases;
  994. DenseMap<Value *, std::set<Value *>> Visited;
  995. for (Value *V : PrevConditionValues) {
  996. const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
  997. PrevBases.insert(BaseValues.begin(), BaseValues.end());
  998. }
  999. for (Value *V : ConditionValues) {
  1000. const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
  1001. Bases.insert(BaseValues.begin(), BaseValues.end());
  1002. }
  1003. CHR_DEBUG(
  1004. dbgs() << "PrevBases ";
  1005. for (Value *V : PrevBases) {
  1006. dbgs() << *V << ", ";
  1007. }
  1008. dbgs() << " Bases ";
  1009. for (Value *V : Bases) {
  1010. dbgs() << *V << ", ";
  1011. }
  1012. dbgs() << "\n");
  1013. std::vector<Value *> Intersection;
  1014. std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
  1015. Bases.end(), std::back_inserter(Intersection));
  1016. if (Intersection.empty()) {
  1017. // Empty intersection, split.
  1018. CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
  1019. return true;
  1020. }
  1021. }
  1022. CHR_DEBUG(dbgs() << "No split\n");
  1023. return false; // Don't split.
  1024. }
  1025. static void getSelectsInScope(CHRScope *Scope,
  1026. DenseSet<Instruction *> &Output) {
  1027. for (RegInfo &RI : Scope->RegInfos)
  1028. for (SelectInst *SI : RI.Selects)
  1029. Output.insert(SI);
  1030. for (CHRScope *Sub : Scope->Subs)
  1031. getSelectsInScope(Sub, Output);
  1032. }
  1033. void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
  1034. SmallVectorImpl<CHRScope *> &Output) {
  1035. for (CHRScope *Scope : Input) {
  1036. assert(!Scope->BranchInsertPoint &&
  1037. "BranchInsertPoint must not be set");
  1038. DenseSet<Instruction *> Unhoistables;
  1039. getSelectsInScope(Scope, Unhoistables);
  1040. splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
  1041. }
  1042. #ifndef NDEBUG
  1043. for (CHRScope *Scope : Output) {
  1044. assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
  1045. }
  1046. #endif
  1047. }
  1048. SmallVector<CHRScope *, 8> CHR::splitScope(
  1049. CHRScope *Scope,
  1050. CHRScope *Outer,
  1051. DenseSet<Value *> *OuterConditionValues,
  1052. Instruction *OuterInsertPoint,
  1053. SmallVectorImpl<CHRScope *> &Output,
  1054. DenseSet<Instruction *> &Unhoistables) {
  1055. if (Outer) {
  1056. assert(OuterConditionValues && "Null OuterConditionValues");
  1057. assert(OuterInsertPoint && "Null OuterInsertPoint");
  1058. }
  1059. bool PrevSplitFromOuter = true;
  1060. DenseSet<Value *> PrevConditionValues;
  1061. Instruction *PrevInsertPoint = nullptr;
  1062. SmallVector<CHRScope *, 8> Splits;
  1063. SmallVector<bool, 8> SplitsSplitFromOuter;
  1064. SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
  1065. SmallVector<Instruction *, 8> SplitsInsertPoints;
  1066. SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
  1067. for (RegInfo &RI : RegInfos) {
  1068. Instruction *InsertPoint = getBranchInsertPoint(RI);
  1069. DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
  1070. CHR_DEBUG(
  1071. dbgs() << "ConditionValues ";
  1072. for (Value *V : ConditionValues) {
  1073. dbgs() << *V << ", ";
  1074. }
  1075. dbgs() << "\n");
  1076. if (RI.R == RegInfos[0].R) {
  1077. // First iteration. Check to see if we should split from the outer.
  1078. if (Outer) {
  1079. CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
  1080. CHR_DEBUG(dbgs() << "Should split from outer at "
  1081. << RI.R->getNameStr() << "\n");
  1082. if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
  1083. ConditionValues, DT, Unhoistables)) {
  1084. PrevConditionValues = ConditionValues;
  1085. PrevInsertPoint = InsertPoint;
  1086. ORE.emit([&]() {
  1087. return OptimizationRemarkMissed(DEBUG_TYPE,
  1088. "SplitScopeFromOuter",
  1089. RI.R->getEntry()->getTerminator())
  1090. << "Split scope from outer due to unhoistable branch/select "
  1091. << "and/or lack of common condition values";
  1092. });
  1093. } else {
  1094. // Not splitting from the outer. Use the outer bases and insert
  1095. // point. Union the bases.
  1096. PrevSplitFromOuter = false;
  1097. PrevConditionValues = *OuterConditionValues;
  1098. PrevConditionValues.insert(ConditionValues.begin(),
  1099. ConditionValues.end());
  1100. PrevInsertPoint = OuterInsertPoint;
  1101. }
  1102. } else {
  1103. CHR_DEBUG(dbgs() << "Outer null\n");
  1104. PrevConditionValues = ConditionValues;
  1105. PrevInsertPoint = InsertPoint;
  1106. }
  1107. } else {
  1108. CHR_DEBUG(dbgs() << "Should split from prev at "
  1109. << RI.R->getNameStr() << "\n");
  1110. if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
  1111. DT, Unhoistables)) {
  1112. CHRScope *Tail = Scope->split(RI.R);
  1113. Scopes.insert(Tail);
  1114. Splits.push_back(Scope);
  1115. SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
  1116. SplitsConditionValues.push_back(PrevConditionValues);
  1117. SplitsInsertPoints.push_back(PrevInsertPoint);
  1118. Scope = Tail;
  1119. PrevConditionValues = ConditionValues;
  1120. PrevInsertPoint = InsertPoint;
  1121. PrevSplitFromOuter = true;
  1122. ORE.emit([&]() {
  1123. return OptimizationRemarkMissed(DEBUG_TYPE,
  1124. "SplitScopeFromPrev",
  1125. RI.R->getEntry()->getTerminator())
  1126. << "Split scope from previous due to unhoistable branch/select "
  1127. << "and/or lack of common condition values";
  1128. });
  1129. } else {
  1130. // Not splitting. Union the bases. Keep the hoist point.
  1131. PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
  1132. }
  1133. }
  1134. }
  1135. Splits.push_back(Scope);
  1136. SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
  1137. SplitsConditionValues.push_back(PrevConditionValues);
  1138. assert(PrevInsertPoint && "Null PrevInsertPoint");
  1139. SplitsInsertPoints.push_back(PrevInsertPoint);
  1140. assert(Splits.size() == SplitsConditionValues.size() &&
  1141. Splits.size() == SplitsSplitFromOuter.size() &&
  1142. Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
  1143. for (size_t I = 0; I < Splits.size(); ++I) {
  1144. CHRScope *Split = Splits[I];
  1145. DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
  1146. Instruction *SplitInsertPoint = SplitsInsertPoints[I];
  1147. SmallVector<CHRScope *, 8> NewSubs;
  1148. DenseSet<Instruction *> SplitUnhoistables;
  1149. getSelectsInScope(Split, SplitUnhoistables);
  1150. for (CHRScope *Sub : Split->Subs) {
  1151. SmallVector<CHRScope *, 8> SubSplits = splitScope(
  1152. Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
  1153. SplitUnhoistables);
  1154. llvm::append_range(NewSubs, SubSplits);
  1155. }
  1156. Split->Subs = NewSubs;
  1157. }
  1158. SmallVector<CHRScope *, 8> Result;
  1159. for (size_t I = 0; I < Splits.size(); ++I) {
  1160. CHRScope *Split = Splits[I];
  1161. if (SplitsSplitFromOuter[I]) {
  1162. // Split from the outer.
  1163. Output.push_back(Split);
  1164. Split->BranchInsertPoint = SplitsInsertPoints[I];
  1165. CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
  1166. << "\n");
  1167. } else {
  1168. // Connected to the outer.
  1169. Result.push_back(Split);
  1170. }
  1171. }
  1172. if (!Outer)
  1173. assert(Result.empty() &&
  1174. "If no outer (top-level), must return no nested ones");
  1175. return Result;
  1176. }
  1177. void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
  1178. for (CHRScope *Scope : Scopes) {
  1179. assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
  1180. classifyBiasedScopes(Scope, Scope);
  1181. CHR_DEBUG(
  1182. dbgs() << "classifyBiasedScopes " << *Scope << "\n";
  1183. dbgs() << "TrueBiasedRegions ";
  1184. for (Region *R : Scope->TrueBiasedRegions) {
  1185. dbgs() << R->getNameStr() << ", ";
  1186. }
  1187. dbgs() << "\n";
  1188. dbgs() << "FalseBiasedRegions ";
  1189. for (Region *R : Scope->FalseBiasedRegions) {
  1190. dbgs() << R->getNameStr() << ", ";
  1191. }
  1192. dbgs() << "\n";
  1193. dbgs() << "TrueBiasedSelects ";
  1194. for (SelectInst *SI : Scope->TrueBiasedSelects) {
  1195. dbgs() << *SI << ", ";
  1196. }
  1197. dbgs() << "\n";
  1198. dbgs() << "FalseBiasedSelects ";
  1199. for (SelectInst *SI : Scope->FalseBiasedSelects) {
  1200. dbgs() << *SI << ", ";
  1201. }
  1202. dbgs() << "\n";);
  1203. }
  1204. }
  1205. void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
  1206. for (RegInfo &RI : Scope->RegInfos) {
  1207. if (RI.HasBranch) {
  1208. Region *R = RI.R;
  1209. if (TrueBiasedRegionsGlobal.contains(R))
  1210. OutermostScope->TrueBiasedRegions.insert(R);
  1211. else if (FalseBiasedRegionsGlobal.contains(R))
  1212. OutermostScope->FalseBiasedRegions.insert(R);
  1213. else
  1214. llvm_unreachable("Must be biased");
  1215. }
  1216. for (SelectInst *SI : RI.Selects) {
  1217. if (TrueBiasedSelectsGlobal.contains(SI))
  1218. OutermostScope->TrueBiasedSelects.insert(SI);
  1219. else if (FalseBiasedSelectsGlobal.contains(SI))
  1220. OutermostScope->FalseBiasedSelects.insert(SI);
  1221. else
  1222. llvm_unreachable("Must be biased");
  1223. }
  1224. }
  1225. for (CHRScope *Sub : Scope->Subs) {
  1226. classifyBiasedScopes(Sub, OutermostScope);
  1227. }
  1228. }
  1229. static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
  1230. unsigned NumBiased = Scope->TrueBiasedRegions.size() +
  1231. Scope->FalseBiasedRegions.size() +
  1232. Scope->TrueBiasedSelects.size() +
  1233. Scope->FalseBiasedSelects.size();
  1234. return NumBiased >= CHRMergeThreshold;
  1235. }
  1236. void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
  1237. SmallVectorImpl<CHRScope *> &Output) {
  1238. for (CHRScope *Scope : Input) {
  1239. // Filter out the ones with only one region and no subs.
  1240. if (!hasAtLeastTwoBiasedBranches(Scope)) {
  1241. CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
  1242. << Scope->TrueBiasedRegions.size()
  1243. << " falsy-regions " << Scope->FalseBiasedRegions.size()
  1244. << " true-selects " << Scope->TrueBiasedSelects.size()
  1245. << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
  1246. ORE.emit([&]() {
  1247. return OptimizationRemarkMissed(
  1248. DEBUG_TYPE,
  1249. "DropScopeWithOneBranchOrSelect",
  1250. Scope->RegInfos[0].R->getEntry()->getTerminator())
  1251. << "Drop scope with < "
  1252. << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
  1253. << " biased branch(es) or select(s)";
  1254. });
  1255. continue;
  1256. }
  1257. Output.push_back(Scope);
  1258. }
  1259. }
  1260. void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
  1261. SmallVectorImpl<CHRScope *> &Output) {
  1262. for (CHRScope *Scope : Input) {
  1263. assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
  1264. "Empty");
  1265. setCHRRegions(Scope, Scope);
  1266. Output.push_back(Scope);
  1267. CHR_DEBUG(
  1268. dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
  1269. for (auto pair : Scope->HoistStopMap) {
  1270. Region *R = pair.first;
  1271. dbgs() << "Region " << R->getNameStr() << "\n";
  1272. for (Instruction *I : pair.second) {
  1273. dbgs() << "HoistStop " << *I << "\n";
  1274. }
  1275. }
  1276. dbgs() << "CHRRegions" << "\n";
  1277. for (RegInfo &RI : Scope->CHRRegions) {
  1278. dbgs() << RI.R->getNameStr() << "\n";
  1279. });
  1280. }
  1281. }
  1282. void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
  1283. DenseSet<Instruction *> Unhoistables;
  1284. // Put the biased selects in Unhoistables because they should stay where they
  1285. // are and constant-folded after CHR (in case one biased select or a branch
  1286. // can depend on another biased select.)
  1287. for (RegInfo &RI : Scope->RegInfos) {
  1288. for (SelectInst *SI : RI.Selects) {
  1289. Unhoistables.insert(SI);
  1290. }
  1291. }
  1292. Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
  1293. for (RegInfo &RI : Scope->RegInfos) {
  1294. Region *R = RI.R;
  1295. DenseSet<Instruction *> HoistStops;
  1296. bool IsHoisted = false;
  1297. if (RI.HasBranch) {
  1298. assert((OutermostScope->TrueBiasedRegions.contains(R) ||
  1299. OutermostScope->FalseBiasedRegions.contains(R)) &&
  1300. "Must be truthy or falsy");
  1301. auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
  1302. // Note checkHoistValue fills in HoistStops.
  1303. DenseMap<Instruction *, bool> Visited;
  1304. bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
  1305. Unhoistables, &HoistStops, Visited);
  1306. assert(IsHoistable && "Must be hoistable");
  1307. (void)(IsHoistable); // Unused in release build
  1308. IsHoisted = true;
  1309. }
  1310. for (SelectInst *SI : RI.Selects) {
  1311. assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
  1312. OutermostScope->FalseBiasedSelects.contains(SI)) &&
  1313. "Must be true or false biased");
  1314. // Note checkHoistValue fills in HoistStops.
  1315. DenseMap<Instruction *, bool> Visited;
  1316. bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
  1317. Unhoistables, &HoistStops, Visited);
  1318. assert(IsHoistable && "Must be hoistable");
  1319. (void)(IsHoistable); // Unused in release build
  1320. IsHoisted = true;
  1321. }
  1322. if (IsHoisted) {
  1323. OutermostScope->CHRRegions.push_back(RI);
  1324. OutermostScope->HoistStopMap[R] = HoistStops;
  1325. }
  1326. }
  1327. for (CHRScope *Sub : Scope->Subs)
  1328. setCHRRegions(Sub, OutermostScope);
  1329. }
  1330. static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
  1331. return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
  1332. }
  1333. void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
  1334. SmallVectorImpl<CHRScope *> &Output) {
  1335. Output.resize(Input.size());
  1336. llvm::copy(Input, Output.begin());
  1337. llvm::stable_sort(Output, CHRScopeSorter);
  1338. }
  1339. // Return true if V is already hoisted or was hoisted (along with its operands)
  1340. // to the insert point.
  1341. static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
  1342. HoistStopMapTy &HoistStopMap,
  1343. DenseSet<Instruction *> &HoistedSet,
  1344. DenseSet<PHINode *> &TrivialPHIs,
  1345. DominatorTree &DT) {
  1346. auto IT = HoistStopMap.find(R);
  1347. assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
  1348. DenseSet<Instruction *> &HoistStops = IT->second;
  1349. if (auto *I = dyn_cast<Instruction>(V)) {
  1350. if (I == HoistPoint)
  1351. return;
  1352. if (HoistStops.count(I))
  1353. return;
  1354. if (auto *PN = dyn_cast<PHINode>(I))
  1355. if (TrivialPHIs.count(PN))
  1356. // The trivial phi inserted by the previous CHR scope could replace a
  1357. // non-phi in HoistStops. Note that since this phi is at the exit of a
  1358. // previous CHR scope, which dominates this scope, it's safe to stop
  1359. // hoisting there.
  1360. return;
  1361. if (HoistedSet.count(I))
  1362. // Already hoisted, return.
  1363. return;
  1364. assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
  1365. assert(DT.getNode(I->getParent()) && "DT must contain I's block");
  1366. assert(DT.getNode(HoistPoint->getParent()) &&
  1367. "DT must contain HoistPoint block");
  1368. if (DT.dominates(I, HoistPoint))
  1369. // We are already above the hoist point. Stop here. This may be necessary
  1370. // when multiple scopes would independently hoist the same
  1371. // instruction. Since an outer (dominating) scope would hoist it to its
  1372. // entry before an inner (dominated) scope would to its entry, the inner
  1373. // scope may see the instruction already hoisted, in which case it
  1374. // potentially wrong for the inner scope to hoist it and could cause bad
  1375. // IR (non-dominating def), but safe to skip hoisting it instead because
  1376. // it's already in a block that dominates the inner scope.
  1377. return;
  1378. for (Value *Op : I->operands()) {
  1379. hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
  1380. }
  1381. I->moveBefore(HoistPoint);
  1382. HoistedSet.insert(I);
  1383. CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
  1384. }
  1385. }
  1386. // Hoist the dependent condition values of the branches and the selects in the
  1387. // scope to the insert point.
  1388. static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
  1389. DenseSet<PHINode *> &TrivialPHIs,
  1390. DominatorTree &DT) {
  1391. DenseSet<Instruction *> HoistedSet;
  1392. for (const RegInfo &RI : Scope->CHRRegions) {
  1393. Region *R = RI.R;
  1394. bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
  1395. bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
  1396. if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
  1397. auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
  1398. hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
  1399. HoistedSet, TrivialPHIs, DT);
  1400. }
  1401. for (SelectInst *SI : RI.Selects) {
  1402. bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
  1403. bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
  1404. if (!(IsTrueBiased || IsFalseBiased))
  1405. continue;
  1406. hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
  1407. HoistedSet, TrivialPHIs, DT);
  1408. }
  1409. }
  1410. }
  1411. // Negate the predicate if an ICmp if it's used only by branches or selects by
  1412. // swapping the operands of the branches or the selects. Returns true if success.
  1413. static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
  1414. Instruction *ExcludedUser,
  1415. CHRScope *Scope) {
  1416. for (User *U : ICmp->users()) {
  1417. if (U == ExcludedUser)
  1418. continue;
  1419. if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
  1420. continue;
  1421. if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
  1422. continue;
  1423. return false;
  1424. }
  1425. for (User *U : ICmp->users()) {
  1426. if (U == ExcludedUser)
  1427. continue;
  1428. if (auto *BI = dyn_cast<BranchInst>(U)) {
  1429. assert(BI->isConditional() && "Must be conditional");
  1430. BI->swapSuccessors();
  1431. // Don't need to swap this in terms of
  1432. // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
  1433. // mean whehter the branch is likely go into the if-then rather than
  1434. // successor0/successor1 and because we can tell which edge is the then or
  1435. // the else one by comparing the destination to the region exit block.
  1436. continue;
  1437. }
  1438. if (auto *SI = dyn_cast<SelectInst>(U)) {
  1439. // Swap operands
  1440. SI->swapValues();
  1441. SI->swapProfMetadata();
  1442. if (Scope->TrueBiasedSelects.count(SI)) {
  1443. assert(!Scope->FalseBiasedSelects.contains(SI) &&
  1444. "Must not be already in");
  1445. Scope->FalseBiasedSelects.insert(SI);
  1446. } else if (Scope->FalseBiasedSelects.count(SI)) {
  1447. assert(!Scope->TrueBiasedSelects.contains(SI) &&
  1448. "Must not be already in");
  1449. Scope->TrueBiasedSelects.insert(SI);
  1450. }
  1451. continue;
  1452. }
  1453. llvm_unreachable("Must be a branch or a select");
  1454. }
  1455. ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
  1456. return true;
  1457. }
  1458. // A helper for transformScopes. Insert a trivial phi at the scope exit block
  1459. // for a value that's defined in the scope but used outside it (meaning it's
  1460. // alive at the exit block).
  1461. static void insertTrivialPHIs(CHRScope *Scope,
  1462. BasicBlock *EntryBlock, BasicBlock *ExitBlock,
  1463. DenseSet<PHINode *> &TrivialPHIs) {
  1464. SmallSetVector<BasicBlock *, 8> BlocksInScope;
  1465. for (RegInfo &RI : Scope->RegInfos) {
  1466. for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
  1467. // sub-Scopes.
  1468. BlocksInScope.insert(BB);
  1469. }
  1470. }
  1471. CHR_DEBUG({
  1472. dbgs() << "Inserting redundant phis\n";
  1473. for (BasicBlock *BB : BlocksInScope)
  1474. dbgs() << "BlockInScope " << BB->getName() << "\n";
  1475. });
  1476. for (BasicBlock *BB : BlocksInScope) {
  1477. for (Instruction &I : *BB) {
  1478. SmallVector<Instruction *, 8> Users;
  1479. for (User *U : I.users()) {
  1480. if (auto *UI = dyn_cast<Instruction>(U)) {
  1481. if (!BlocksInScope.contains(UI->getParent()) &&
  1482. // Unless there's already a phi for I at the exit block.
  1483. !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
  1484. CHR_DEBUG(dbgs() << "V " << I << "\n");
  1485. CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
  1486. Users.push_back(UI);
  1487. } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
  1488. // There's a loop backedge from a block that's dominated by this
  1489. // scope to the entry block.
  1490. CHR_DEBUG(dbgs() << "V " << I << "\n");
  1491. CHR_DEBUG(dbgs()
  1492. << "Used at entry block (for a back edge) by a phi user "
  1493. << *UI << "\n");
  1494. Users.push_back(UI);
  1495. }
  1496. }
  1497. }
  1498. if (Users.size() > 0) {
  1499. // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
  1500. // ExitBlock. Replace I with the new phi in UI unless UI is another
  1501. // phi at ExitBlock.
  1502. PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "",
  1503. &ExitBlock->front());
  1504. for (BasicBlock *Pred : predecessors(ExitBlock)) {
  1505. PN->addIncoming(&I, Pred);
  1506. }
  1507. TrivialPHIs.insert(PN);
  1508. CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
  1509. for (Instruction *UI : Users) {
  1510. for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
  1511. if (UI->getOperand(J) == &I) {
  1512. UI->setOperand(J, PN);
  1513. }
  1514. }
  1515. CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
  1516. }
  1517. }
  1518. }
  1519. }
  1520. }
  1521. // Assert that all the CHR regions of the scope have a biased branch or select.
  1522. static void LLVM_ATTRIBUTE_UNUSED
  1523. assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
  1524. #ifndef NDEBUG
  1525. auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
  1526. if (Scope->TrueBiasedRegions.count(RI.R) ||
  1527. Scope->FalseBiasedRegions.count(RI.R))
  1528. return true;
  1529. for (SelectInst *SI : RI.Selects)
  1530. if (Scope->TrueBiasedSelects.count(SI) ||
  1531. Scope->FalseBiasedSelects.count(SI))
  1532. return true;
  1533. return false;
  1534. };
  1535. for (RegInfo &RI : Scope->CHRRegions) {
  1536. assert(HasBiasedBranchOrSelect(RI, Scope) &&
  1537. "Must have biased branch or select");
  1538. }
  1539. #endif
  1540. }
  1541. // Assert that all the condition values of the biased branches and selects have
  1542. // been hoisted to the pre-entry block or outside of the scope.
  1543. static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
  1544. CHRScope *Scope, BasicBlock *PreEntryBlock) {
  1545. CHR_DEBUG(dbgs() << "Biased regions condition values \n");
  1546. for (RegInfo &RI : Scope->CHRRegions) {
  1547. Region *R = RI.R;
  1548. bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
  1549. bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
  1550. if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
  1551. auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
  1552. Value *V = BI->getCondition();
  1553. CHR_DEBUG(dbgs() << *V << "\n");
  1554. if (auto *I = dyn_cast<Instruction>(V)) {
  1555. (void)(I); // Unused in release build.
  1556. assert((I->getParent() == PreEntryBlock ||
  1557. !Scope->contains(I)) &&
  1558. "Must have been hoisted to PreEntryBlock or outside the scope");
  1559. }
  1560. }
  1561. for (SelectInst *SI : RI.Selects) {
  1562. bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
  1563. bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
  1564. if (!(IsTrueBiased || IsFalseBiased))
  1565. continue;
  1566. Value *V = SI->getCondition();
  1567. CHR_DEBUG(dbgs() << *V << "\n");
  1568. if (auto *I = dyn_cast<Instruction>(V)) {
  1569. (void)(I); // Unused in release build.
  1570. assert((I->getParent() == PreEntryBlock ||
  1571. !Scope->contains(I)) &&
  1572. "Must have been hoisted to PreEntryBlock or outside the scope");
  1573. }
  1574. }
  1575. }
  1576. }
  1577. void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
  1578. CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
  1579. assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
  1580. for (RegInfo &RI : Scope->RegInfos) {
  1581. const Region *R = RI.R;
  1582. unsigned Duplication = getRegionDuplicationCount(R);
  1583. CHR_DEBUG(dbgs() << "Dup count for R=" << R << " is " << Duplication
  1584. << "\n");
  1585. if (Duplication >= CHRDupThreshsold) {
  1586. CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication
  1587. << " for this region");
  1588. ORE.emit([&]() {
  1589. return OptimizationRemarkMissed(DEBUG_TYPE, "DupThresholdReached",
  1590. R->getEntry()->getTerminator())
  1591. << "Reached the duplication threshold for the region";
  1592. });
  1593. return;
  1594. }
  1595. }
  1596. for (RegInfo &RI : Scope->RegInfos) {
  1597. DuplicationCount[RI.R]++;
  1598. }
  1599. Region *FirstRegion = Scope->RegInfos[0].R;
  1600. BasicBlock *EntryBlock = FirstRegion->getEntry();
  1601. Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
  1602. BasicBlock *ExitBlock = LastRegion->getExit();
  1603. std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
  1604. if (ExitBlock) {
  1605. // Insert a trivial phi at the exit block (where the CHR hot path and the
  1606. // cold path merges) for a value that's defined in the scope but used
  1607. // outside it (meaning it's alive at the exit block). We will add the
  1608. // incoming values for the CHR cold paths to it below. Without this, we'd
  1609. // miss updating phi's for such values unless there happens to already be a
  1610. // phi for that value there.
  1611. insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
  1612. }
  1613. // Split the entry block of the first region. The new block becomes the new
  1614. // entry block of the first region. The old entry block becomes the block to
  1615. // insert the CHR branch into. Note DT gets updated. Since DT gets updated
  1616. // through the split, we update the entry of the first region after the split,
  1617. // and Region only points to the entry and the exit blocks, rather than
  1618. // keeping everything in a list or set, the blocks membership and the
  1619. // entry/exit blocks of the region are still valid after the split.
  1620. CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
  1621. << " at " << *Scope->BranchInsertPoint << "\n");
  1622. BasicBlock *NewEntryBlock =
  1623. SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
  1624. assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
  1625. "NewEntryBlock's only pred must be EntryBlock");
  1626. FirstRegion->replaceEntryRecursive(NewEntryBlock);
  1627. BasicBlock *PreEntryBlock = EntryBlock;
  1628. ValueToValueMapTy VMap;
  1629. // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
  1630. // hot path (originals) and a cold path (clones) and update the PHIs at the
  1631. // exit block.
  1632. cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
  1633. // Replace the old (placeholder) branch with the new (merged) conditional
  1634. // branch.
  1635. BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
  1636. NewEntryBlock, VMap);
  1637. #ifndef NDEBUG
  1638. assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
  1639. #endif
  1640. // Hoist the conditional values of the branches/selects.
  1641. hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
  1642. #ifndef NDEBUG
  1643. assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
  1644. #endif
  1645. // Create the combined branch condition and constant-fold the branches/selects
  1646. // in the hot path.
  1647. fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
  1648. ProfileCount.value_or(0));
  1649. }
  1650. // A helper for transformScopes. Clone the blocks in the scope (excluding the
  1651. // PreEntryBlock) to split into a hot path and a cold path and update the PHIs
  1652. // at the exit block.
  1653. void CHR::cloneScopeBlocks(CHRScope *Scope,
  1654. BasicBlock *PreEntryBlock,
  1655. BasicBlock *ExitBlock,
  1656. Region *LastRegion,
  1657. ValueToValueMapTy &VMap) {
  1658. // Clone all the blocks. The original blocks will be the hot-path
  1659. // CHR-optimized code and the cloned blocks will be the original unoptimized
  1660. // code. This is so that the block pointers from the
  1661. // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
  1662. // which CHR should apply to.
  1663. SmallVector<BasicBlock*, 8> NewBlocks;
  1664. for (RegInfo &RI : Scope->RegInfos)
  1665. for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
  1666. // sub-Scopes.
  1667. assert(BB != PreEntryBlock && "Don't copy the preetntry block");
  1668. BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
  1669. NewBlocks.push_back(NewBB);
  1670. VMap[BB] = NewBB;
  1671. }
  1672. // Place the cloned blocks right after the original blocks (right before the
  1673. // exit block of.)
  1674. if (ExitBlock)
  1675. F.splice(ExitBlock->getIterator(), &F, NewBlocks[0]->getIterator(),
  1676. F.end());
  1677. // Update the cloned blocks/instructions to refer to themselves.
  1678. for (BasicBlock *NewBB : NewBlocks)
  1679. for (Instruction &I : *NewBB)
  1680. RemapInstruction(&I, VMap,
  1681. RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
  1682. // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
  1683. // the top-level region but we don't need to add PHIs. The trivial PHIs
  1684. // inserted above will be updated here.
  1685. if (ExitBlock)
  1686. for (PHINode &PN : ExitBlock->phis())
  1687. for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
  1688. ++I) {
  1689. BasicBlock *Pred = PN.getIncomingBlock(I);
  1690. if (LastRegion->contains(Pred)) {
  1691. Value *V = PN.getIncomingValue(I);
  1692. auto It = VMap.find(V);
  1693. if (It != VMap.end()) V = It->second;
  1694. assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
  1695. PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
  1696. }
  1697. }
  1698. }
  1699. // A helper for transformScope. Replace the old (placeholder) branch with the
  1700. // new (merged) conditional branch.
  1701. BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
  1702. BasicBlock *EntryBlock,
  1703. BasicBlock *NewEntryBlock,
  1704. ValueToValueMapTy &VMap) {
  1705. BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
  1706. assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
  1707. "SplitBlock did not work correctly!");
  1708. assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
  1709. "NewEntryBlock's only pred must be EntryBlock");
  1710. assert(VMap.find(NewEntryBlock) != VMap.end() &&
  1711. "NewEntryBlock must have been copied");
  1712. OldBR->dropAllReferences();
  1713. OldBR->eraseFromParent();
  1714. // The true predicate is a placeholder. It will be replaced later in
  1715. // fixupBranchesAndSelects().
  1716. BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
  1717. cast<BasicBlock>(VMap[NewEntryBlock]),
  1718. ConstantInt::getTrue(F.getContext()));
  1719. NewBR->insertInto(PreEntryBlock, PreEntryBlock->end());
  1720. assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
  1721. "NewEntryBlock's only pred must be EntryBlock");
  1722. return NewBR;
  1723. }
  1724. // A helper for transformScopes. Create the combined branch condition and
  1725. // constant-fold the branches/selects in the hot path.
  1726. void CHR::fixupBranchesAndSelects(CHRScope *Scope,
  1727. BasicBlock *PreEntryBlock,
  1728. BranchInst *MergedBR,
  1729. uint64_t ProfileCount) {
  1730. Value *MergedCondition = ConstantInt::getTrue(F.getContext());
  1731. BranchProbability CHRBranchBias(1, 1);
  1732. uint64_t NumCHRedBranches = 0;
  1733. IRBuilder<> IRB(PreEntryBlock->getTerminator());
  1734. for (RegInfo &RI : Scope->CHRRegions) {
  1735. Region *R = RI.R;
  1736. if (RI.HasBranch) {
  1737. fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
  1738. ++NumCHRedBranches;
  1739. }
  1740. for (SelectInst *SI : RI.Selects) {
  1741. fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
  1742. ++NumCHRedBranches;
  1743. }
  1744. }
  1745. Stats.NumBranchesDelta += NumCHRedBranches - 1;
  1746. Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
  1747. ORE.emit([&]() {
  1748. return OptimizationRemark(DEBUG_TYPE,
  1749. "CHR",
  1750. // Refer to the hot (original) path
  1751. MergedBR->getSuccessor(0)->getTerminator())
  1752. << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
  1753. << " branches or selects";
  1754. });
  1755. MergedBR->setCondition(MergedCondition);
  1756. uint32_t Weights[] = {
  1757. static_cast<uint32_t>(CHRBranchBias.scale(1000)),
  1758. static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
  1759. };
  1760. MDBuilder MDB(F.getContext());
  1761. MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
  1762. CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
  1763. << "\n");
  1764. }
  1765. // A helper for fixupBranchesAndSelects. Add to the combined branch condition
  1766. // and constant-fold a branch in the hot path.
  1767. void CHR::fixupBranch(Region *R, CHRScope *Scope,
  1768. IRBuilder<> &IRB,
  1769. Value *&MergedCondition,
  1770. BranchProbability &CHRBranchBias) {
  1771. bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
  1772. assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
  1773. "Must be truthy or falsy");
  1774. auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
  1775. assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
  1776. "Must be in the bias map");
  1777. BranchProbability Bias = BranchBiasMap[R];
  1778. assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
  1779. // Take the min.
  1780. if (CHRBranchBias > Bias)
  1781. CHRBranchBias = Bias;
  1782. BasicBlock *IfThen = BI->getSuccessor(1);
  1783. BasicBlock *IfElse = BI->getSuccessor(0);
  1784. BasicBlock *RegionExitBlock = R->getExit();
  1785. assert(RegionExitBlock && "Null ExitBlock");
  1786. assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
  1787. IfThen != IfElse && "Invariant from findScopes");
  1788. if (IfThen == RegionExitBlock) {
  1789. // Swap them so that IfThen means going into it and IfElse means skipping
  1790. // it.
  1791. std::swap(IfThen, IfElse);
  1792. }
  1793. CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
  1794. << " IfElse " << IfElse->getName() << "\n");
  1795. Value *Cond = BI->getCondition();
  1796. BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
  1797. bool ConditionTrue = HotTarget == BI->getSuccessor(0);
  1798. addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
  1799. MergedCondition);
  1800. // Constant-fold the branch at ClonedEntryBlock.
  1801. assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
  1802. "The successor shouldn't change");
  1803. Value *NewCondition = ConditionTrue ?
  1804. ConstantInt::getTrue(F.getContext()) :
  1805. ConstantInt::getFalse(F.getContext());
  1806. BI->setCondition(NewCondition);
  1807. }
  1808. // A helper for fixupBranchesAndSelects. Add to the combined branch condition
  1809. // and constant-fold a select in the hot path.
  1810. void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
  1811. IRBuilder<> &IRB,
  1812. Value *&MergedCondition,
  1813. BranchProbability &CHRBranchBias) {
  1814. bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
  1815. assert((IsTrueBiased ||
  1816. Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
  1817. assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
  1818. "Must be in the bias map");
  1819. BranchProbability Bias = SelectBiasMap[SI];
  1820. assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
  1821. // Take the min.
  1822. if (CHRBranchBias > Bias)
  1823. CHRBranchBias = Bias;
  1824. Value *Cond = SI->getCondition();
  1825. addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
  1826. MergedCondition);
  1827. Value *NewCondition = IsTrueBiased ?
  1828. ConstantInt::getTrue(F.getContext()) :
  1829. ConstantInt::getFalse(F.getContext());
  1830. SI->setCondition(NewCondition);
  1831. }
  1832. // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
  1833. // condition.
  1834. void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
  1835. Instruction *BranchOrSelect, CHRScope *Scope,
  1836. IRBuilder<> &IRB, Value *&MergedCondition) {
  1837. if (!IsTrueBiased) {
  1838. // If Cond is an icmp and all users of V except for BranchOrSelect is a
  1839. // branch, negate the icmp predicate and swap the branch targets and avoid
  1840. // inserting an Xor to negate Cond.
  1841. auto *ICmp = dyn_cast<ICmpInst>(Cond);
  1842. if (!ICmp ||
  1843. !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope))
  1844. Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond);
  1845. }
  1846. // Select conditions can be poison, while branching on poison is immediate
  1847. // undefined behavior. As such, we need to freeze potentially poisonous
  1848. // conditions derived from selects.
  1849. if (isa<SelectInst>(BranchOrSelect) &&
  1850. !isGuaranteedNotToBeUndefOrPoison(Cond))
  1851. Cond = IRB.CreateFreeze(Cond);
  1852. // Use logical and to avoid propagating poison from later conditions.
  1853. MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond);
  1854. }
  1855. void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
  1856. unsigned I = 0;
  1857. DenseSet<PHINode *> TrivialPHIs;
  1858. for (CHRScope *Scope : CHRScopes) {
  1859. transformScopes(Scope, TrivialPHIs);
  1860. CHR_DEBUG(
  1861. std::ostringstream oss;
  1862. oss << " after transformScopes " << I++;
  1863. dumpIR(F, oss.str().c_str(), nullptr));
  1864. (void)I;
  1865. }
  1866. }
  1867. static void LLVM_ATTRIBUTE_UNUSED
  1868. dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
  1869. dbgs() << Label << " " << Scopes.size() << "\n";
  1870. for (CHRScope *Scope : Scopes) {
  1871. dbgs() << *Scope << "\n";
  1872. }
  1873. }
  1874. bool CHR::run() {
  1875. if (!shouldApply(F, PSI))
  1876. return false;
  1877. CHR_DEBUG(dumpIR(F, "before", nullptr));
  1878. bool Changed = false;
  1879. {
  1880. CHR_DEBUG(
  1881. dbgs() << "RegionInfo:\n";
  1882. RI.print(dbgs()));
  1883. // Recursively traverse the region tree and find regions that have biased
  1884. // branches and/or selects and create scopes.
  1885. SmallVector<CHRScope *, 8> AllScopes;
  1886. findScopes(AllScopes);
  1887. CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
  1888. // Split the scopes if 1) the conditional values of the biased
  1889. // branches/selects of the inner/lower scope can't be hoisted up to the
  1890. // outermost/uppermost scope entry, or 2) the condition values of the biased
  1891. // branches/selects in a scope (including subscopes) don't share at least
  1892. // one common value.
  1893. SmallVector<CHRScope *, 8> SplitScopes;
  1894. splitScopes(AllScopes, SplitScopes);
  1895. CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
  1896. // After splitting, set the biased regions and selects of a scope (a tree
  1897. // root) that include those of the subscopes.
  1898. classifyBiasedScopes(SplitScopes);
  1899. CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
  1900. // Filter out the scopes that has only one biased region or select (CHR
  1901. // isn't useful in such a case).
  1902. SmallVector<CHRScope *, 8> FilteredScopes;
  1903. filterScopes(SplitScopes, FilteredScopes);
  1904. CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
  1905. // Set the regions to be CHR'ed and their hoist stops for each scope.
  1906. SmallVector<CHRScope *, 8> SetScopes;
  1907. setCHRRegions(FilteredScopes, SetScopes);
  1908. CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
  1909. // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
  1910. // ones. We need to apply CHR from outer to inner so that we apply CHR only
  1911. // to the hot path, rather than both hot and cold paths.
  1912. SmallVector<CHRScope *, 8> SortedScopes;
  1913. sortScopes(SetScopes, SortedScopes);
  1914. CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
  1915. CHR_DEBUG(
  1916. dbgs() << "RegionInfo:\n";
  1917. RI.print(dbgs()));
  1918. // Apply the CHR transformation.
  1919. if (!SortedScopes.empty()) {
  1920. transformScopes(SortedScopes);
  1921. Changed = true;
  1922. }
  1923. }
  1924. if (Changed) {
  1925. CHR_DEBUG(dumpIR(F, "after", &Stats));
  1926. ORE.emit([&]() {
  1927. return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
  1928. << ore::NV("Function", &F) << " "
  1929. << "Reduced the number of branches in hot paths by "
  1930. << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
  1931. << " (static) and "
  1932. << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
  1933. << " (weighted by PGO count)";
  1934. });
  1935. }
  1936. return Changed;
  1937. }
  1938. namespace llvm {
  1939. ControlHeightReductionPass::ControlHeightReductionPass() {
  1940. parseCHRFilterFiles();
  1941. }
  1942. PreservedAnalyses ControlHeightReductionPass::run(
  1943. Function &F,
  1944. FunctionAnalysisManager &FAM) {
  1945. auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
  1946. auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
  1947. auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
  1948. auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
  1949. auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
  1950. auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
  1951. bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
  1952. if (!Changed)
  1953. return PreservedAnalyses::all();
  1954. return PreservedAnalyses::none();
  1955. }
  1956. } // namespace llvm