WholeProgramDevirt.cpp 92 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370
  1. //===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass implements whole program optimization of virtual calls in cases
  10. // where we know (via !type metadata) that the list of callees is fixed. This
  11. // includes the following:
  12. // - Single implementation devirtualization: if a virtual call has a single
  13. // possible callee, replace all calls with a direct call to that callee.
  14. // - Virtual constant propagation: if the virtual function's return type is an
  15. // integer <=64 bits and all possible callees are readnone, for each class and
  16. // each list of constant arguments: evaluate the function, store the return
  17. // value alongside the virtual table, and rewrite each virtual call as a load
  18. // from the virtual table.
  19. // - Uniform return value optimization: if the conditions for virtual constant
  20. // propagation hold and each function returns the same constant value, replace
  21. // each virtual call with that constant.
  22. // - Unique return value optimization for i1 return values: if the conditions
  23. // for virtual constant propagation hold and a single vtable's function
  24. // returns 0, or a single vtable's function returns 1, replace each virtual
  25. // call with a comparison of the vptr against that vtable's address.
  26. //
  27. // This pass is intended to be used during the regular and thin LTO pipelines:
  28. //
  29. // During regular LTO, the pass determines the best optimization for each
  30. // virtual call and applies the resolutions directly to virtual calls that are
  31. // eligible for virtual call optimization (i.e. calls that use either of the
  32. // llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
  33. //
  34. // During hybrid Regular/ThinLTO, the pass operates in two phases:
  35. // - Export phase: this is run during the thin link over a single merged module
  36. // that contains all vtables with !type metadata that participate in the link.
  37. // The pass computes a resolution for each virtual call and stores it in the
  38. // type identifier summary.
  39. // - Import phase: this is run during the thin backends over the individual
  40. // modules. The pass applies the resolutions previously computed during the
  41. // import phase to each eligible virtual call.
  42. //
  43. // During ThinLTO, the pass operates in two phases:
  44. // - Export phase: this is run during the thin link over the index which
  45. // contains a summary of all vtables with !type metadata that participate in
  46. // the link. It computes a resolution for each virtual call and stores it in
  47. // the type identifier summary. Only single implementation devirtualization
  48. // is supported.
  49. // - Import phase: (same as with hybrid case above).
  50. //
  51. //===----------------------------------------------------------------------===//
  52. #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
  53. #include "llvm/ADT/ArrayRef.h"
  54. #include "llvm/ADT/DenseMap.h"
  55. #include "llvm/ADT/DenseMapInfo.h"
  56. #include "llvm/ADT/DenseSet.h"
  57. #include "llvm/ADT/MapVector.h"
  58. #include "llvm/ADT/SmallVector.h"
  59. #include "llvm/ADT/Triple.h"
  60. #include "llvm/ADT/iterator_range.h"
  61. #include "llvm/Analysis/AssumptionCache.h"
  62. #include "llvm/Analysis/BasicAliasAnalysis.h"
  63. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  64. #include "llvm/Analysis/TypeMetadataUtils.h"
  65. #include "llvm/Bitcode/BitcodeReader.h"
  66. #include "llvm/Bitcode/BitcodeWriter.h"
  67. #include "llvm/IR/Constants.h"
  68. #include "llvm/IR/DataLayout.h"
  69. #include "llvm/IR/DebugLoc.h"
  70. #include "llvm/IR/DerivedTypes.h"
  71. #include "llvm/IR/Dominators.h"
  72. #include "llvm/IR/Function.h"
  73. #include "llvm/IR/GlobalAlias.h"
  74. #include "llvm/IR/GlobalVariable.h"
  75. #include "llvm/IR/IRBuilder.h"
  76. #include "llvm/IR/InstrTypes.h"
  77. #include "llvm/IR/Instruction.h"
  78. #include "llvm/IR/Instructions.h"
  79. #include "llvm/IR/Intrinsics.h"
  80. #include "llvm/IR/LLVMContext.h"
  81. #include "llvm/IR/Metadata.h"
  82. #include "llvm/IR/Module.h"
  83. #include "llvm/IR/ModuleSummaryIndexYAML.h"
  84. #include "llvm/InitializePasses.h"
  85. #include "llvm/Pass.h"
  86. #include "llvm/PassRegistry.h"
  87. #include "llvm/Support/Casting.h"
  88. #include "llvm/Support/CommandLine.h"
  89. #include "llvm/Support/Errc.h"
  90. #include "llvm/Support/Error.h"
  91. #include "llvm/Support/FileSystem.h"
  92. #include "llvm/Support/GlobPattern.h"
  93. #include "llvm/Support/MathExtras.h"
  94. #include "llvm/Transforms/IPO.h"
  95. #include "llvm/Transforms/IPO/FunctionAttrs.h"
  96. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  97. #include "llvm/Transforms/Utils/Evaluator.h"
  98. #include <algorithm>
  99. #include <cstddef>
  100. #include <map>
  101. #include <set>
  102. #include <string>
  103. using namespace llvm;
  104. using namespace wholeprogramdevirt;
  105. #define DEBUG_TYPE "wholeprogramdevirt"
  106. static cl::opt<PassSummaryAction> ClSummaryAction(
  107. "wholeprogramdevirt-summary-action",
  108. cl::desc("What to do with the summary when running this pass"),
  109. cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
  110. clEnumValN(PassSummaryAction::Import, "import",
  111. "Import typeid resolutions from summary and globals"),
  112. clEnumValN(PassSummaryAction::Export, "export",
  113. "Export typeid resolutions to summary and globals")),
  114. cl::Hidden);
  115. static cl::opt<std::string> ClReadSummary(
  116. "wholeprogramdevirt-read-summary",
  117. cl::desc(
  118. "Read summary from given bitcode or YAML file before running pass"),
  119. cl::Hidden);
  120. static cl::opt<std::string> ClWriteSummary(
  121. "wholeprogramdevirt-write-summary",
  122. cl::desc("Write summary to given bitcode or YAML file after running pass. "
  123. "Output file format is deduced from extension: *.bc means writing "
  124. "bitcode, otherwise YAML"),
  125. cl::Hidden);
  126. static cl::opt<unsigned>
  127. ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
  128. cl::init(10), cl::ZeroOrMore,
  129. cl::desc("Maximum number of call targets per "
  130. "call site to enable branch funnels"));
  131. static cl::opt<bool>
  132. PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
  133. cl::init(false), cl::ZeroOrMore,
  134. cl::desc("Print index-based devirtualization messages"));
  135. /// Provide a way to force enable whole program visibility in tests.
  136. /// This is needed to support legacy tests that don't contain
  137. /// !vcall_visibility metadata (the mere presense of type tests
  138. /// previously implied hidden visibility).
  139. static cl::opt<bool>
  140. WholeProgramVisibility("whole-program-visibility", cl::init(false),
  141. cl::Hidden, cl::ZeroOrMore,
  142. cl::desc("Enable whole program visibility"));
  143. /// Provide a way to force disable whole program for debugging or workarounds,
  144. /// when enabled via the linker.
  145. static cl::opt<bool> DisableWholeProgramVisibility(
  146. "disable-whole-program-visibility", cl::init(false), cl::Hidden,
  147. cl::ZeroOrMore,
  148. cl::desc("Disable whole program visibility (overrides enabling options)"));
  149. /// Provide way to prevent certain function from being devirtualized
  150. static cl::list<std::string>
  151. SkipFunctionNames("wholeprogramdevirt-skip",
  152. cl::desc("Prevent function(s) from being devirtualized"),
  153. cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated);
  154. /// Mechanism to add runtime checking of devirtualization decisions, trapping on
  155. /// any that are not correct. Useful for debugging undefined behavior leading to
  156. /// failures with WPD.
  157. static cl::opt<bool>
  158. CheckDevirt("wholeprogramdevirt-check", cl::init(false), cl::Hidden,
  159. cl::ZeroOrMore,
  160. cl::desc("Add code to trap on incorrect devirtualizations"));
  161. namespace {
  162. struct PatternList {
  163. std::vector<GlobPattern> Patterns;
  164. template <class T> void init(const T &StringList) {
  165. for (const auto &S : StringList)
  166. if (Expected<GlobPattern> Pat = GlobPattern::create(S))
  167. Patterns.push_back(std::move(*Pat));
  168. }
  169. bool match(StringRef S) {
  170. for (const GlobPattern &P : Patterns)
  171. if (P.match(S))
  172. return true;
  173. return false;
  174. }
  175. };
  176. } // namespace
  177. // Find the minimum offset that we may store a value of size Size bits at. If
  178. // IsAfter is set, look for an offset before the object, otherwise look for an
  179. // offset after the object.
  180. uint64_t
  181. wholeprogramdevirt::findLowestOffset(ArrayRef<VirtualCallTarget> Targets,
  182. bool IsAfter, uint64_t Size) {
  183. // Find a minimum offset taking into account only vtable sizes.
  184. uint64_t MinByte = 0;
  185. for (const VirtualCallTarget &Target : Targets) {
  186. if (IsAfter)
  187. MinByte = std::max(MinByte, Target.minAfterBytes());
  188. else
  189. MinByte = std::max(MinByte, Target.minBeforeBytes());
  190. }
  191. // Build a vector of arrays of bytes covering, for each target, a slice of the
  192. // used region (see AccumBitVector::BytesUsed in
  193. // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
  194. // this aligns the used regions to start at MinByte.
  195. //
  196. // In this example, A, B and C are vtables, # is a byte already allocated for
  197. // a virtual function pointer, AAAA... (etc.) are the used regions for the
  198. // vtables and Offset(X) is the value computed for the Offset variable below
  199. // for X.
  200. //
  201. // Offset(A)
  202. // | |
  203. // |MinByte
  204. // A: ################AAAAAAAA|AAAAAAAA
  205. // B: ########BBBBBBBBBBBBBBBB|BBBB
  206. // C: ########################|CCCCCCCCCCCCCCCC
  207. // | Offset(B) |
  208. //
  209. // This code produces the slices of A, B and C that appear after the divider
  210. // at MinByte.
  211. std::vector<ArrayRef<uint8_t>> Used;
  212. for (const VirtualCallTarget &Target : Targets) {
  213. ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
  214. : Target.TM->Bits->Before.BytesUsed;
  215. uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
  216. : MinByte - Target.minBeforeBytes();
  217. // Disregard used regions that are smaller than Offset. These are
  218. // effectively all-free regions that do not need to be checked.
  219. if (VTUsed.size() > Offset)
  220. Used.push_back(VTUsed.slice(Offset));
  221. }
  222. if (Size == 1) {
  223. // Find a free bit in each member of Used.
  224. for (unsigned I = 0;; ++I) {
  225. uint8_t BitsUsed = 0;
  226. for (auto &&B : Used)
  227. if (I < B.size())
  228. BitsUsed |= B[I];
  229. if (BitsUsed != 0xff)
  230. return (MinByte + I) * 8 +
  231. countTrailingZeros(uint8_t(~BitsUsed), ZB_Undefined);
  232. }
  233. } else {
  234. // Find a free (Size/8) byte region in each member of Used.
  235. // FIXME: see if alignment helps.
  236. for (unsigned I = 0;; ++I) {
  237. for (auto &&B : Used) {
  238. unsigned Byte = 0;
  239. while ((I + Byte) < B.size() && Byte < (Size / 8)) {
  240. if (B[I + Byte])
  241. goto NextI;
  242. ++Byte;
  243. }
  244. }
  245. return (MinByte + I) * 8;
  246. NextI:;
  247. }
  248. }
  249. }
  250. void wholeprogramdevirt::setBeforeReturnValues(
  251. MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore,
  252. unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
  253. if (BitWidth == 1)
  254. OffsetByte = -(AllocBefore / 8 + 1);
  255. else
  256. OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
  257. OffsetBit = AllocBefore % 8;
  258. for (VirtualCallTarget &Target : Targets) {
  259. if (BitWidth == 1)
  260. Target.setBeforeBit(AllocBefore);
  261. else
  262. Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
  263. }
  264. }
  265. void wholeprogramdevirt::setAfterReturnValues(
  266. MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocAfter,
  267. unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
  268. if (BitWidth == 1)
  269. OffsetByte = AllocAfter / 8;
  270. else
  271. OffsetByte = (AllocAfter + 7) / 8;
  272. OffsetBit = AllocAfter % 8;
  273. for (VirtualCallTarget &Target : Targets) {
  274. if (BitWidth == 1)
  275. Target.setAfterBit(AllocAfter);
  276. else
  277. Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
  278. }
  279. }
  280. VirtualCallTarget::VirtualCallTarget(Function *Fn, const TypeMemberInfo *TM)
  281. : Fn(Fn), TM(TM),
  282. IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()), WasDevirt(false) {}
  283. namespace {
  284. // A slot in a set of virtual tables. The TypeID identifies the set of virtual
  285. // tables, and the ByteOffset is the offset in bytes from the address point to
  286. // the virtual function pointer.
  287. struct VTableSlot {
  288. Metadata *TypeID;
  289. uint64_t ByteOffset;
  290. };
  291. } // end anonymous namespace
  292. namespace llvm {
  293. template <> struct DenseMapInfo<VTableSlot> {
  294. static VTableSlot getEmptyKey() {
  295. return {DenseMapInfo<Metadata *>::getEmptyKey(),
  296. DenseMapInfo<uint64_t>::getEmptyKey()};
  297. }
  298. static VTableSlot getTombstoneKey() {
  299. return {DenseMapInfo<Metadata *>::getTombstoneKey(),
  300. DenseMapInfo<uint64_t>::getTombstoneKey()};
  301. }
  302. static unsigned getHashValue(const VTableSlot &I) {
  303. return DenseMapInfo<Metadata *>::getHashValue(I.TypeID) ^
  304. DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset);
  305. }
  306. static bool isEqual(const VTableSlot &LHS,
  307. const VTableSlot &RHS) {
  308. return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
  309. }
  310. };
  311. template <> struct DenseMapInfo<VTableSlotSummary> {
  312. static VTableSlotSummary getEmptyKey() {
  313. return {DenseMapInfo<StringRef>::getEmptyKey(),
  314. DenseMapInfo<uint64_t>::getEmptyKey()};
  315. }
  316. static VTableSlotSummary getTombstoneKey() {
  317. return {DenseMapInfo<StringRef>::getTombstoneKey(),
  318. DenseMapInfo<uint64_t>::getTombstoneKey()};
  319. }
  320. static unsigned getHashValue(const VTableSlotSummary &I) {
  321. return DenseMapInfo<StringRef>::getHashValue(I.TypeID) ^
  322. DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset);
  323. }
  324. static bool isEqual(const VTableSlotSummary &LHS,
  325. const VTableSlotSummary &RHS) {
  326. return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
  327. }
  328. };
  329. } // end namespace llvm
  330. namespace {
  331. // Returns true if the function must be unreachable based on ValueInfo.
  332. //
  333. // In particular, identifies a function as unreachable in the following
  334. // conditions
  335. // 1) All summaries are live.
  336. // 2) All function summaries indicate it's unreachable
  337. bool mustBeUnreachableFunction(ValueInfo TheFnVI) {
  338. if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
  339. // Returns false if ValueInfo is absent, or the summary list is empty
  340. // (e.g., function declarations).
  341. return false;
  342. }
  343. for (auto &Summary : TheFnVI.getSummaryList()) {
  344. // Conservatively returns false if any non-live functions are seen.
  345. // In general either all summaries should be live or all should be dead.
  346. if (!Summary->isLive())
  347. return false;
  348. if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) {
  349. if (!FS->fflags().MustBeUnreachable)
  350. return false;
  351. }
  352. // Do nothing if a non-function has the same GUID (which is rare).
  353. // This is correct since non-function summaries are not relevant.
  354. }
  355. // All function summaries are live and all of them agree that the function is
  356. // unreachble.
  357. return true;
  358. }
  359. // A virtual call site. VTable is the loaded virtual table pointer, and CS is
  360. // the indirect virtual call.
  361. struct VirtualCallSite {
  362. Value *VTable = nullptr;
  363. CallBase &CB;
  364. // If non-null, this field points to the associated unsafe use count stored in
  365. // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
  366. // of that field for details.
  367. unsigned *NumUnsafeUses = nullptr;
  368. void
  369. emitRemark(const StringRef OptName, const StringRef TargetName,
  370. function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
  371. Function *F = CB.getCaller();
  372. DebugLoc DLoc = CB.getDebugLoc();
  373. BasicBlock *Block = CB.getParent();
  374. using namespace ore;
  375. OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
  376. << NV("Optimization", OptName)
  377. << ": devirtualized a call to "
  378. << NV("FunctionName", TargetName));
  379. }
  380. void replaceAndErase(
  381. const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,
  382. function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
  383. Value *New) {
  384. if (RemarksEnabled)
  385. emitRemark(OptName, TargetName, OREGetter);
  386. CB.replaceAllUsesWith(New);
  387. if (auto *II = dyn_cast<InvokeInst>(&CB)) {
  388. BranchInst::Create(II->getNormalDest(), &CB);
  389. II->getUnwindDest()->removePredecessor(II->getParent());
  390. }
  391. CB.eraseFromParent();
  392. // This use is no longer unsafe.
  393. if (NumUnsafeUses)
  394. --*NumUnsafeUses;
  395. }
  396. };
  397. // Call site information collected for a specific VTableSlot and possibly a list
  398. // of constant integer arguments. The grouping by arguments is handled by the
  399. // VTableSlotInfo class.
  400. struct CallSiteInfo {
  401. /// The set of call sites for this slot. Used during regular LTO and the
  402. /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
  403. /// call sites that appear in the merged module itself); in each of these
  404. /// cases we are directly operating on the call sites at the IR level.
  405. std::vector<VirtualCallSite> CallSites;
  406. /// Whether all call sites represented by this CallSiteInfo, including those
  407. /// in summaries, have been devirtualized. This starts off as true because a
  408. /// default constructed CallSiteInfo represents no call sites.
  409. bool AllCallSitesDevirted = true;
  410. // These fields are used during the export phase of ThinLTO and reflect
  411. // information collected from function summaries.
  412. /// Whether any function summary contains an llvm.assume(llvm.type.test) for
  413. /// this slot.
  414. bool SummaryHasTypeTestAssumeUsers = false;
  415. /// CFI-specific: a vector containing the list of function summaries that use
  416. /// the llvm.type.checked.load intrinsic and therefore will require
  417. /// resolutions for llvm.type.test in order to implement CFI checks if
  418. /// devirtualization was unsuccessful. If devirtualization was successful, the
  419. /// pass will clear this vector by calling markDevirt(). If at the end of the
  420. /// pass the vector is non-empty, we will need to add a use of llvm.type.test
  421. /// to each of the function summaries in the vector.
  422. std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
  423. std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
  424. bool isExported() const {
  425. return SummaryHasTypeTestAssumeUsers ||
  426. !SummaryTypeCheckedLoadUsers.empty();
  427. }
  428. void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
  429. SummaryTypeCheckedLoadUsers.push_back(FS);
  430. AllCallSitesDevirted = false;
  431. }
  432. void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
  433. SummaryTypeTestAssumeUsers.push_back(FS);
  434. SummaryHasTypeTestAssumeUsers = true;
  435. AllCallSitesDevirted = false;
  436. }
  437. void markDevirt() {
  438. AllCallSitesDevirted = true;
  439. // As explained in the comment for SummaryTypeCheckedLoadUsers.
  440. SummaryTypeCheckedLoadUsers.clear();
  441. }
  442. };
  443. // Call site information collected for a specific VTableSlot.
  444. struct VTableSlotInfo {
  445. // The set of call sites which do not have all constant integer arguments
  446. // (excluding "this").
  447. CallSiteInfo CSInfo;
  448. // The set of call sites with all constant integer arguments (excluding
  449. // "this"), grouped by argument list.
  450. std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
  451. void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);
  452. private:
  453. CallSiteInfo &findCallSiteInfo(CallBase &CB);
  454. };
  455. CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
  456. std::vector<uint64_t> Args;
  457. auto *CBType = dyn_cast<IntegerType>(CB.getType());
  458. if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
  459. return CSInfo;
  460. for (auto &&Arg : drop_begin(CB.args())) {
  461. auto *CI = dyn_cast<ConstantInt>(Arg);
  462. if (!CI || CI->getBitWidth() > 64)
  463. return CSInfo;
  464. Args.push_back(CI->getZExtValue());
  465. }
  466. return ConstCSInfo[Args];
  467. }
  468. void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,
  469. unsigned *NumUnsafeUses) {
  470. auto &CSI = findCallSiteInfo(CB);
  471. CSI.AllCallSitesDevirted = false;
  472. CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});
  473. }
  474. struct DevirtModule {
  475. Module &M;
  476. function_ref<AAResults &(Function &)> AARGetter;
  477. function_ref<DominatorTree &(Function &)> LookupDomTree;
  478. ModuleSummaryIndex *ExportSummary;
  479. const ModuleSummaryIndex *ImportSummary;
  480. IntegerType *Int8Ty;
  481. PointerType *Int8PtrTy;
  482. IntegerType *Int32Ty;
  483. IntegerType *Int64Ty;
  484. IntegerType *IntPtrTy;
  485. /// Sizeless array type, used for imported vtables. This provides a signal
  486. /// to analyzers that these imports may alias, as they do for example
  487. /// when multiple unique return values occur in the same vtable.
  488. ArrayType *Int8Arr0Ty;
  489. bool RemarksEnabled;
  490. function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter;
  491. MapVector<VTableSlot, VTableSlotInfo> CallSlots;
  492. // Calls that have already been optimized. We may add a call to multiple
  493. // VTableSlotInfos if vtable loads are coalesced and need to make sure not to
  494. // optimize a call more than once.
  495. SmallPtrSet<CallBase *, 8> OptimizedCalls;
  496. // This map keeps track of the number of "unsafe" uses of a loaded function
  497. // pointer. The key is the associated llvm.type.test intrinsic call generated
  498. // by this pass. An unsafe use is one that calls the loaded function pointer
  499. // directly. Every time we eliminate an unsafe use (for example, by
  500. // devirtualizing it or by applying virtual constant propagation), we
  501. // decrement the value stored in this map. If a value reaches zero, we can
  502. // eliminate the type check by RAUWing the associated llvm.type.test call with
  503. // true.
  504. std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
  505. PatternList FunctionsToSkip;
  506. DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
  507. function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
  508. function_ref<DominatorTree &(Function &)> LookupDomTree,
  509. ModuleSummaryIndex *ExportSummary,
  510. const ModuleSummaryIndex *ImportSummary)
  511. : M(M), AARGetter(AARGetter), LookupDomTree(LookupDomTree),
  512. ExportSummary(ExportSummary), ImportSummary(ImportSummary),
  513. Int8Ty(Type::getInt8Ty(M.getContext())),
  514. Int8PtrTy(Type::getInt8PtrTy(M.getContext())),
  515. Int32Ty(Type::getInt32Ty(M.getContext())),
  516. Int64Ty(Type::getInt64Ty(M.getContext())),
  517. IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
  518. Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
  519. RemarksEnabled(areRemarksEnabled()), OREGetter(OREGetter) {
  520. assert(!(ExportSummary && ImportSummary));
  521. FunctionsToSkip.init(SkipFunctionNames);
  522. }
  523. bool areRemarksEnabled();
  524. void
  525. scanTypeTestUsers(Function *TypeTestFunc,
  526. DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
  527. void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
  528. void buildTypeIdentifierMap(
  529. std::vector<VTableBits> &Bits,
  530. DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
  531. bool
  532. tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
  533. const std::set<TypeMemberInfo> &TypeMemberInfos,
  534. uint64_t ByteOffset,
  535. ModuleSummaryIndex *ExportSummary);
  536. void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
  537. bool &IsExported);
  538. bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
  539. MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  540. VTableSlotInfo &SlotInfo,
  541. WholeProgramDevirtResolution *Res);
  542. void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Constant *JT,
  543. bool &IsExported);
  544. void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  545. VTableSlotInfo &SlotInfo,
  546. WholeProgramDevirtResolution *Res, VTableSlot Slot);
  547. bool tryEvaluateFunctionsWithArgs(
  548. MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  549. ArrayRef<uint64_t> Args);
  550. void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
  551. uint64_t TheRetVal);
  552. bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  553. CallSiteInfo &CSInfo,
  554. WholeProgramDevirtResolution::ByArg *Res);
  555. // Returns the global symbol name that is used to export information about the
  556. // given vtable slot and list of arguments.
  557. std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
  558. StringRef Name);
  559. bool shouldExportConstantsAsAbsoluteSymbols();
  560. // This function is called during the export phase to create a symbol
  561. // definition containing information about the given vtable slot and list of
  562. // arguments.
  563. void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
  564. Constant *C);
  565. void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
  566. uint32_t Const, uint32_t &Storage);
  567. // This function is called during the import phase to create a reference to
  568. // the symbol definition created during the export phase.
  569. Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
  570. StringRef Name);
  571. Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
  572. StringRef Name, IntegerType *IntTy,
  573. uint32_t Storage);
  574. Constant *getMemberAddr(const TypeMemberInfo *M);
  575. void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
  576. Constant *UniqueMemberAddr);
  577. bool tryUniqueRetValOpt(unsigned BitWidth,
  578. MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  579. CallSiteInfo &CSInfo,
  580. WholeProgramDevirtResolution::ByArg *Res,
  581. VTableSlot Slot, ArrayRef<uint64_t> Args);
  582. void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
  583. Constant *Byte, Constant *Bit);
  584. bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  585. VTableSlotInfo &SlotInfo,
  586. WholeProgramDevirtResolution *Res, VTableSlot Slot);
  587. void rebuildGlobal(VTableBits &B);
  588. // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
  589. void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
  590. // If we were able to eliminate all unsafe uses for a type checked load,
  591. // eliminate the associated type tests by replacing them with true.
  592. void removeRedundantTypeTests();
  593. bool run();
  594. // Look up the corresponding ValueInfo entry of `TheFn` in `ExportSummary`.
  595. //
  596. // Caller guarantees that `ExportSummary` is not nullptr.
  597. static ValueInfo lookUpFunctionValueInfo(Function *TheFn,
  598. ModuleSummaryIndex *ExportSummary);
  599. // Returns true if the function definition must be unreachable.
  600. //
  601. // Note if this helper function returns true, `F` is guaranteed
  602. // to be unreachable; if it returns false, `F` might still
  603. // be unreachable but not covered by this helper function.
  604. //
  605. // Implementation-wise, if function definition is present, IR is analyzed; if
  606. // not, look up function flags from ExportSummary as a fallback.
  607. static bool mustBeUnreachableFunction(Function *const F,
  608. ModuleSummaryIndex *ExportSummary);
  609. // Lower the module using the action and summary passed as command line
  610. // arguments. For testing purposes only.
  611. static bool
  612. runForTesting(Module &M, function_ref<AAResults &(Function &)> AARGetter,
  613. function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
  614. function_ref<DominatorTree &(Function &)> LookupDomTree);
  615. };
  616. struct DevirtIndex {
  617. ModuleSummaryIndex &ExportSummary;
  618. // The set in which to record GUIDs exported from their module by
  619. // devirtualization, used by client to ensure they are not internalized.
  620. std::set<GlobalValue::GUID> &ExportedGUIDs;
  621. // A map in which to record the information necessary to locate the WPD
  622. // resolution for local targets in case they are exported by cross module
  623. // importing.
  624. std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
  625. MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;
  626. PatternList FunctionsToSkip;
  627. DevirtIndex(
  628. ModuleSummaryIndex &ExportSummary,
  629. std::set<GlobalValue::GUID> &ExportedGUIDs,
  630. std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
  631. : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
  632. LocalWPDTargetsMap(LocalWPDTargetsMap) {
  633. FunctionsToSkip.init(SkipFunctionNames);
  634. }
  635. bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
  636. const TypeIdCompatibleVtableInfo TIdInfo,
  637. uint64_t ByteOffset);
  638. bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
  639. VTableSlotSummary &SlotSummary,
  640. VTableSlotInfo &SlotInfo,
  641. WholeProgramDevirtResolution *Res,
  642. std::set<ValueInfo> &DevirtTargets);
  643. void run();
  644. };
  645. struct WholeProgramDevirt : public ModulePass {
  646. static char ID;
  647. bool UseCommandLine = false;
  648. ModuleSummaryIndex *ExportSummary = nullptr;
  649. const ModuleSummaryIndex *ImportSummary = nullptr;
  650. WholeProgramDevirt() : ModulePass(ID), UseCommandLine(true) {
  651. initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
  652. }
  653. WholeProgramDevirt(ModuleSummaryIndex *ExportSummary,
  654. const ModuleSummaryIndex *ImportSummary)
  655. : ModulePass(ID), ExportSummary(ExportSummary),
  656. ImportSummary(ImportSummary) {
  657. initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
  658. }
  659. bool runOnModule(Module &M) override {
  660. if (skipModule(M))
  661. return false;
  662. // In the new pass manager, we can request the optimization
  663. // remark emitter pass on a per-function-basis, which the
  664. // OREGetter will do for us.
  665. // In the old pass manager, this is harder, so we just build
  666. // an optimization remark emitter on the fly, when we need it.
  667. std::unique_ptr<OptimizationRemarkEmitter> ORE;
  668. auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
  669. ORE = std::make_unique<OptimizationRemarkEmitter>(F);
  670. return *ORE;
  671. };
  672. auto LookupDomTree = [this](Function &F) -> DominatorTree & {
  673. return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
  674. };
  675. if (UseCommandLine)
  676. return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter,
  677. LookupDomTree);
  678. return DevirtModule(M, LegacyAARGetter(*this), OREGetter, LookupDomTree,
  679. ExportSummary, ImportSummary)
  680. .run();
  681. }
  682. void getAnalysisUsage(AnalysisUsage &AU) const override {
  683. AU.addRequired<AssumptionCacheTracker>();
  684. AU.addRequired<TargetLibraryInfoWrapperPass>();
  685. AU.addRequired<DominatorTreeWrapperPass>();
  686. }
  687. };
  688. } // end anonymous namespace
  689. INITIALIZE_PASS_BEGIN(WholeProgramDevirt, "wholeprogramdevirt",
  690. "Whole program devirtualization", false, false)
  691. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  692. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  693. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  694. INITIALIZE_PASS_END(WholeProgramDevirt, "wholeprogramdevirt",
  695. "Whole program devirtualization", false, false)
  696. char WholeProgramDevirt::ID = 0;
  697. ModulePass *
  698. llvm::createWholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
  699. const ModuleSummaryIndex *ImportSummary) {
  700. return new WholeProgramDevirt(ExportSummary, ImportSummary);
  701. }
  702. PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
  703. ModuleAnalysisManager &AM) {
  704. auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
  705. auto AARGetter = [&](Function &F) -> AAResults & {
  706. return FAM.getResult<AAManager>(F);
  707. };
  708. auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
  709. return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
  710. };
  711. auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
  712. return FAM.getResult<DominatorTreeAnalysis>(F);
  713. };
  714. if (UseCommandLine) {
  715. if (DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
  716. return PreservedAnalyses::all();
  717. return PreservedAnalyses::none();
  718. }
  719. if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
  720. ImportSummary)
  721. .run())
  722. return PreservedAnalyses::all();
  723. return PreservedAnalyses::none();
  724. }
  725. // Enable whole program visibility if enabled by client (e.g. linker) or
  726. // internal option, and not force disabled.
  727. static bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
  728. return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
  729. !DisableWholeProgramVisibility;
  730. }
  731. namespace llvm {
  732. /// If whole program visibility asserted, then upgrade all public vcall
  733. /// visibility metadata on vtable definitions to linkage unit visibility in
  734. /// Module IR (for regular or hybrid LTO).
  735. void updateVCallVisibilityInModule(
  736. Module &M, bool WholeProgramVisibilityEnabledInLTO,
  737. const DenseSet<GlobalValue::GUID> &DynamicExportSymbols) {
  738. if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
  739. return;
  740. for (GlobalVariable &GV : M.globals())
  741. // Add linkage unit visibility to any variable with type metadata, which are
  742. // the vtable definitions. We won't have an existing vcall_visibility
  743. // metadata on vtable definitions with public visibility.
  744. if (GV.hasMetadata(LLVMContext::MD_type) &&
  745. GV.getVCallVisibility() == GlobalObject::VCallVisibilityPublic &&
  746. // Don't upgrade the visibility for symbols exported to the dynamic
  747. // linker, as we have no information on their eventual use.
  748. !DynamicExportSymbols.count(GV.getGUID()))
  749. GV.setVCallVisibilityMetadata(GlobalObject::VCallVisibilityLinkageUnit);
  750. }
  751. /// If whole program visibility asserted, then upgrade all public vcall
  752. /// visibility metadata on vtable definition summaries to linkage unit
  753. /// visibility in Module summary index (for ThinLTO).
  754. void updateVCallVisibilityInIndex(
  755. ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO,
  756. const DenseSet<GlobalValue::GUID> &DynamicExportSymbols) {
  757. if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
  758. return;
  759. for (auto &P : Index) {
  760. for (auto &S : P.second.SummaryList) {
  761. auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
  762. if (!GVar ||
  763. GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic ||
  764. // Don't upgrade the visibility for symbols exported to the dynamic
  765. // linker, as we have no information on their eventual use.
  766. DynamicExportSymbols.count(P.first))
  767. continue;
  768. GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
  769. }
  770. }
  771. }
  772. void runWholeProgramDevirtOnIndex(
  773. ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
  774. std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
  775. DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
  776. }
  777. void updateIndexWPDForExports(
  778. ModuleSummaryIndex &Summary,
  779. function_ref<bool(StringRef, ValueInfo)> isExported,
  780. std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
  781. for (auto &T : LocalWPDTargetsMap) {
  782. auto &VI = T.first;
  783. // This was enforced earlier during trySingleImplDevirt.
  784. assert(VI.getSummaryList().size() == 1 &&
  785. "Devirt of local target has more than one copy");
  786. auto &S = VI.getSummaryList()[0];
  787. if (!isExported(S->modulePath(), VI))
  788. continue;
  789. // It's been exported by a cross module import.
  790. for (auto &SlotSummary : T.second) {
  791. auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
  792. assert(TIdSum);
  793. auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
  794. assert(WPDRes != TIdSum->WPDRes.end());
  795. WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
  796. WPDRes->second.SingleImplName,
  797. Summary.getModuleHash(S->modulePath()));
  798. }
  799. }
  800. }
  801. } // end namespace llvm
  802. static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary) {
  803. // Check that summary index contains regular LTO module when performing
  804. // export to prevent occasional use of index from pure ThinLTO compilation
  805. // (-fno-split-lto-module). This kind of summary index is passed to
  806. // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
  807. const auto &ModPaths = Summary->modulePaths();
  808. if (ClSummaryAction != PassSummaryAction::Import &&
  809. ModPaths.find(ModuleSummaryIndex::getRegularLTOModuleName()) ==
  810. ModPaths.end())
  811. return createStringError(
  812. errc::invalid_argument,
  813. "combined summary should contain Regular LTO module");
  814. return ErrorSuccess();
  815. }
  816. bool DevirtModule::runForTesting(
  817. Module &M, function_ref<AAResults &(Function &)> AARGetter,
  818. function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
  819. function_ref<DominatorTree &(Function &)> LookupDomTree) {
  820. std::unique_ptr<ModuleSummaryIndex> Summary =
  821. std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
  822. // Handle the command-line summary arguments. This code is for testing
  823. // purposes only, so we handle errors directly.
  824. if (!ClReadSummary.empty()) {
  825. ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
  826. ": ");
  827. auto ReadSummaryFile =
  828. ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
  829. if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =
  830. getModuleSummaryIndex(*ReadSummaryFile)) {
  831. Summary = std::move(*SummaryOrErr);
  832. ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));
  833. } else {
  834. // Try YAML if we've failed with bitcode.
  835. consumeError(SummaryOrErr.takeError());
  836. yaml::Input In(ReadSummaryFile->getBuffer());
  837. In >> *Summary;
  838. ExitOnErr(errorCodeToError(In.error()));
  839. }
  840. }
  841. bool Changed =
  842. DevirtModule(M, AARGetter, OREGetter, LookupDomTree,
  843. ClSummaryAction == PassSummaryAction::Export ? Summary.get()
  844. : nullptr,
  845. ClSummaryAction == PassSummaryAction::Import ? Summary.get()
  846. : nullptr)
  847. .run();
  848. if (!ClWriteSummary.empty()) {
  849. ExitOnError ExitOnErr(
  850. "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
  851. std::error_code EC;
  852. if (StringRef(ClWriteSummary).endswith(".bc")) {
  853. raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None);
  854. ExitOnErr(errorCodeToError(EC));
  855. writeIndexToFile(*Summary, OS);
  856. } else {
  857. raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF);
  858. ExitOnErr(errorCodeToError(EC));
  859. yaml::Output Out(OS);
  860. Out << *Summary;
  861. }
  862. }
  863. return Changed;
  864. }
  865. void DevirtModule::buildTypeIdentifierMap(
  866. std::vector<VTableBits> &Bits,
  867. DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
  868. DenseMap<GlobalVariable *, VTableBits *> GVToBits;
  869. Bits.reserve(M.getGlobalList().size());
  870. SmallVector<MDNode *, 2> Types;
  871. for (GlobalVariable &GV : M.globals()) {
  872. Types.clear();
  873. GV.getMetadata(LLVMContext::MD_type, Types);
  874. if (GV.isDeclaration() || Types.empty())
  875. continue;
  876. VTableBits *&BitsPtr = GVToBits[&GV];
  877. if (!BitsPtr) {
  878. Bits.emplace_back();
  879. Bits.back().GV = &GV;
  880. Bits.back().ObjectSize =
  881. M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
  882. BitsPtr = &Bits.back();
  883. }
  884. for (MDNode *Type : Types) {
  885. auto TypeID = Type->getOperand(1).get();
  886. uint64_t Offset =
  887. cast<ConstantInt>(
  888. cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
  889. ->getZExtValue();
  890. TypeIdMap[TypeID].insert({BitsPtr, Offset});
  891. }
  892. }
  893. }
  894. bool DevirtModule::tryFindVirtualCallTargets(
  895. std::vector<VirtualCallTarget> &TargetsForSlot,
  896. const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset,
  897. ModuleSummaryIndex *ExportSummary) {
  898. for (const TypeMemberInfo &TM : TypeMemberInfos) {
  899. if (!TM.Bits->GV->isConstant())
  900. return false;
  901. // We cannot perform whole program devirtualization analysis on a vtable
  902. // with public LTO visibility.
  903. if (TM.Bits->GV->getVCallVisibility() ==
  904. GlobalObject::VCallVisibilityPublic)
  905. return false;
  906. Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(),
  907. TM.Offset + ByteOffset, M);
  908. if (!Ptr)
  909. return false;
  910. auto Fn = dyn_cast<Function>(Ptr->stripPointerCasts());
  911. if (!Fn)
  912. return false;
  913. if (FunctionsToSkip.match(Fn->getName()))
  914. return false;
  915. // We can disregard __cxa_pure_virtual as a possible call target, as
  916. // calls to pure virtuals are UB.
  917. if (Fn->getName() == "__cxa_pure_virtual")
  918. continue;
  919. // We can disregard unreachable functions as possible call targets, as
  920. // unreachable functions shouldn't be called.
  921. if (mustBeUnreachableFunction(Fn, ExportSummary))
  922. continue;
  923. TargetsForSlot.push_back({Fn, &TM});
  924. }
  925. // Give up if we couldn't find any targets.
  926. return !TargetsForSlot.empty();
  927. }
  928. bool DevirtIndex::tryFindVirtualCallTargets(
  929. std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo,
  930. uint64_t ByteOffset) {
  931. for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
  932. // Find a representative copy of the vtable initializer.
  933. // We can have multiple available_externally, linkonce_odr and weak_odr
  934. // vtable initializers. We can also have multiple external vtable
  935. // initializers in the case of comdats, which we cannot check here.
  936. // The linker should give an error in this case.
  937. //
  938. // Also, handle the case of same-named local Vtables with the same path
  939. // and therefore the same GUID. This can happen if there isn't enough
  940. // distinguishing path when compiling the source file. In that case we
  941. // conservatively return false early.
  942. const GlobalVarSummary *VS = nullptr;
  943. bool LocalFound = false;
  944. for (auto &S : P.VTableVI.getSummaryList()) {
  945. if (GlobalValue::isLocalLinkage(S->linkage())) {
  946. if (LocalFound)
  947. return false;
  948. LocalFound = true;
  949. }
  950. auto *CurVS = cast<GlobalVarSummary>(S->getBaseObject());
  951. if (!CurVS->vTableFuncs().empty() ||
  952. // Previously clang did not attach the necessary type metadata to
  953. // available_externally vtables, in which case there would not
  954. // be any vtable functions listed in the summary and we need
  955. // to treat this case conservatively (in case the bitcode is old).
  956. // However, we will also not have any vtable functions in the
  957. // case of a pure virtual base class. In that case we do want
  958. // to set VS to avoid treating it conservatively.
  959. !GlobalValue::isAvailableExternallyLinkage(S->linkage())) {
  960. VS = CurVS;
  961. // We cannot perform whole program devirtualization analysis on a vtable
  962. // with public LTO visibility.
  963. if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
  964. return false;
  965. }
  966. }
  967. // There will be no VS if all copies are available_externally having no
  968. // type metadata. In that case we can't safely perform WPD.
  969. if (!VS)
  970. return false;
  971. if (!VS->isLive())
  972. continue;
  973. for (auto VTP : VS->vTableFuncs()) {
  974. if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
  975. continue;
  976. if (mustBeUnreachableFunction(VTP.FuncVI))
  977. continue;
  978. TargetsForSlot.push_back(VTP.FuncVI);
  979. }
  980. }
  981. // Give up if we couldn't find any targets.
  982. return !TargetsForSlot.empty();
  983. }
  984. void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
  985. Constant *TheFn, bool &IsExported) {
  986. // Don't devirtualize function if we're told to skip it
  987. // in -wholeprogramdevirt-skip.
  988. if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
  989. return;
  990. auto Apply = [&](CallSiteInfo &CSInfo) {
  991. for (auto &&VCallSite : CSInfo.CallSites) {
  992. if (!OptimizedCalls.insert(&VCallSite.CB).second)
  993. continue;
  994. if (RemarksEnabled)
  995. VCallSite.emitRemark("single-impl",
  996. TheFn->stripPointerCasts()->getName(), OREGetter);
  997. auto &CB = VCallSite.CB;
  998. assert(!CB.getCalledFunction() && "devirtualizing direct call?");
  999. IRBuilder<> Builder(&CB);
  1000. Value *Callee =
  1001. Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());
  1002. // If checking is enabled, add support to compare the virtual function
  1003. // pointer to the devirtualized target. In case of a mismatch, perform a
  1004. // debug trap.
  1005. if (CheckDevirt) {
  1006. auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);
  1007. Instruction *ThenTerm =
  1008. SplitBlockAndInsertIfThen(Cond, &CB, /*Unreachable=*/false);
  1009. Builder.SetInsertPoint(ThenTerm);
  1010. Function *TrapFn = Intrinsic::getDeclaration(&M, Intrinsic::debugtrap);
  1011. auto *CallTrap = Builder.CreateCall(TrapFn);
  1012. CallTrap->setDebugLoc(CB.getDebugLoc());
  1013. }
  1014. // Devirtualize.
  1015. CB.setCalledOperand(Callee);
  1016. // This use is no longer unsafe.
  1017. if (VCallSite.NumUnsafeUses)
  1018. --*VCallSite.NumUnsafeUses;
  1019. }
  1020. if (CSInfo.isExported())
  1021. IsExported = true;
  1022. CSInfo.markDevirt();
  1023. };
  1024. Apply(SlotInfo.CSInfo);
  1025. for (auto &P : SlotInfo.ConstCSInfo)
  1026. Apply(P.second);
  1027. }
  1028. static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
  1029. // We can't add calls if we haven't seen a definition
  1030. if (Callee.getSummaryList().empty())
  1031. return false;
  1032. // Insert calls into the summary index so that the devirtualized targets
  1033. // are eligible for import.
  1034. // FIXME: Annotate type tests with hotness. For now, mark these as hot
  1035. // to better ensure we have the opportunity to inline them.
  1036. bool IsExported = false;
  1037. auto &S = Callee.getSummaryList()[0];
  1038. CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0);
  1039. auto AddCalls = [&](CallSiteInfo &CSInfo) {
  1040. for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
  1041. FS->addCall({Callee, CI});
  1042. IsExported |= S->modulePath() != FS->modulePath();
  1043. }
  1044. for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
  1045. FS->addCall({Callee, CI});
  1046. IsExported |= S->modulePath() != FS->modulePath();
  1047. }
  1048. };
  1049. AddCalls(SlotInfo.CSInfo);
  1050. for (auto &P : SlotInfo.ConstCSInfo)
  1051. AddCalls(P.second);
  1052. return IsExported;
  1053. }
  1054. bool DevirtModule::trySingleImplDevirt(
  1055. ModuleSummaryIndex *ExportSummary,
  1056. MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
  1057. WholeProgramDevirtResolution *Res) {
  1058. // See if the program contains a single implementation of this virtual
  1059. // function.
  1060. Function *TheFn = TargetsForSlot[0].Fn;
  1061. for (auto &&Target : TargetsForSlot)
  1062. if (TheFn != Target.Fn)
  1063. return false;
  1064. // If so, update each call site to call that implementation directly.
  1065. if (RemarksEnabled)
  1066. TargetsForSlot[0].WasDevirt = true;
  1067. bool IsExported = false;
  1068. applySingleImplDevirt(SlotInfo, TheFn, IsExported);
  1069. if (!IsExported)
  1070. return false;
  1071. // If the only implementation has local linkage, we must promote to external
  1072. // to make it visible to thin LTO objects. We can only get here during the
  1073. // ThinLTO export phase.
  1074. if (TheFn->hasLocalLinkage()) {
  1075. std::string NewName = (TheFn->getName() + ".llvm.merged").str();
  1076. // Since we are renaming the function, any comdats with the same name must
  1077. // also be renamed. This is required when targeting COFF, as the comdat name
  1078. // must match one of the names of the symbols in the comdat.
  1079. if (Comdat *C = TheFn->getComdat()) {
  1080. if (C->getName() == TheFn->getName()) {
  1081. Comdat *NewC = M.getOrInsertComdat(NewName);
  1082. NewC->setSelectionKind(C->getSelectionKind());
  1083. for (GlobalObject &GO : M.global_objects())
  1084. if (GO.getComdat() == C)
  1085. GO.setComdat(NewC);
  1086. }
  1087. }
  1088. TheFn->setLinkage(GlobalValue::ExternalLinkage);
  1089. TheFn->setVisibility(GlobalValue::HiddenVisibility);
  1090. TheFn->setName(NewName);
  1091. }
  1092. if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
  1093. // Any needed promotion of 'TheFn' has already been done during
  1094. // LTO unit split, so we can ignore return value of AddCalls.
  1095. AddCalls(SlotInfo, TheFnVI);
  1096. Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
  1097. Res->SingleImplName = std::string(TheFn->getName());
  1098. return true;
  1099. }
  1100. bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
  1101. VTableSlotSummary &SlotSummary,
  1102. VTableSlotInfo &SlotInfo,
  1103. WholeProgramDevirtResolution *Res,
  1104. std::set<ValueInfo> &DevirtTargets) {
  1105. // See if the program contains a single implementation of this virtual
  1106. // function.
  1107. auto TheFn = TargetsForSlot[0];
  1108. for (auto &&Target : TargetsForSlot)
  1109. if (TheFn != Target)
  1110. return false;
  1111. // Don't devirtualize if we don't have target definition.
  1112. auto Size = TheFn.getSummaryList().size();
  1113. if (!Size)
  1114. return false;
  1115. // Don't devirtualize function if we're told to skip it
  1116. // in -wholeprogramdevirt-skip.
  1117. if (FunctionsToSkip.match(TheFn.name()))
  1118. return false;
  1119. // If the summary list contains multiple summaries where at least one is
  1120. // a local, give up, as we won't know which (possibly promoted) name to use.
  1121. for (auto &S : TheFn.getSummaryList())
  1122. if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)
  1123. return false;
  1124. // Collect functions devirtualized at least for one call site for stats.
  1125. if (PrintSummaryDevirt)
  1126. DevirtTargets.insert(TheFn);
  1127. auto &S = TheFn.getSummaryList()[0];
  1128. bool IsExported = AddCalls(SlotInfo, TheFn);
  1129. if (IsExported)
  1130. ExportedGUIDs.insert(TheFn.getGUID());
  1131. // Record in summary for use in devirtualization during the ThinLTO import
  1132. // step.
  1133. Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
  1134. if (GlobalValue::isLocalLinkage(S->linkage())) {
  1135. if (IsExported)
  1136. // If target is a local function and we are exporting it by
  1137. // devirtualizing a call in another module, we need to record the
  1138. // promoted name.
  1139. Res->SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
  1140. TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
  1141. else {
  1142. LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
  1143. Res->SingleImplName = std::string(TheFn.name());
  1144. }
  1145. } else
  1146. Res->SingleImplName = std::string(TheFn.name());
  1147. // Name will be empty if this thin link driven off of serialized combined
  1148. // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
  1149. // legacy LTO API anyway.
  1150. assert(!Res->SingleImplName.empty());
  1151. return true;
  1152. }
  1153. void DevirtModule::tryICallBranchFunnel(
  1154. MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
  1155. WholeProgramDevirtResolution *Res, VTableSlot Slot) {
  1156. Triple T(M.getTargetTriple());
  1157. if (T.getArch() != Triple::x86_64)
  1158. return;
  1159. if (TargetsForSlot.size() > ClThreshold)
  1160. return;
  1161. bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;
  1162. if (!HasNonDevirt)
  1163. for (auto &P : SlotInfo.ConstCSInfo)
  1164. if (!P.second.AllCallSitesDevirted) {
  1165. HasNonDevirt = true;
  1166. break;
  1167. }
  1168. if (!HasNonDevirt)
  1169. return;
  1170. FunctionType *FT =
  1171. FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
  1172. Function *JT;
  1173. if (isa<MDString>(Slot.TypeID)) {
  1174. JT = Function::Create(FT, Function::ExternalLinkage,
  1175. M.getDataLayout().getProgramAddressSpace(),
  1176. getGlobalName(Slot, {}, "branch_funnel"), &M);
  1177. JT->setVisibility(GlobalValue::HiddenVisibility);
  1178. } else {
  1179. JT = Function::Create(FT, Function::InternalLinkage,
  1180. M.getDataLayout().getProgramAddressSpace(),
  1181. "branch_funnel", &M);
  1182. }
  1183. JT->addParamAttr(0, Attribute::Nest);
  1184. std::vector<Value *> JTArgs;
  1185. JTArgs.push_back(JT->arg_begin());
  1186. for (auto &T : TargetsForSlot) {
  1187. JTArgs.push_back(getMemberAddr(T.TM));
  1188. JTArgs.push_back(T.Fn);
  1189. }
  1190. BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
  1191. Function *Intr =
  1192. Intrinsic::getDeclaration(&M, llvm::Intrinsic::icall_branch_funnel, {});
  1193. auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
  1194. CI->setTailCallKind(CallInst::TCK_MustTail);
  1195. ReturnInst::Create(M.getContext(), nullptr, BB);
  1196. bool IsExported = false;
  1197. applyICallBranchFunnel(SlotInfo, JT, IsExported);
  1198. if (IsExported)
  1199. Res->TheKind = WholeProgramDevirtResolution::BranchFunnel;
  1200. }
  1201. void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
  1202. Constant *JT, bool &IsExported) {
  1203. auto Apply = [&](CallSiteInfo &CSInfo) {
  1204. if (CSInfo.isExported())
  1205. IsExported = true;
  1206. if (CSInfo.AllCallSitesDevirted)
  1207. return;
  1208. for (auto &&VCallSite : CSInfo.CallSites) {
  1209. CallBase &CB = VCallSite.CB;
  1210. // Jump tables are only profitable if the retpoline mitigation is enabled.
  1211. Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
  1212. if (!FSAttr.isValid() ||
  1213. !FSAttr.getValueAsString().contains("+retpoline"))
  1214. continue;
  1215. if (RemarksEnabled)
  1216. VCallSite.emitRemark("branch-funnel",
  1217. JT->stripPointerCasts()->getName(), OREGetter);
  1218. // Pass the address of the vtable in the nest register, which is r10 on
  1219. // x86_64.
  1220. std::vector<Type *> NewArgs;
  1221. NewArgs.push_back(Int8PtrTy);
  1222. append_range(NewArgs, CB.getFunctionType()->params());
  1223. FunctionType *NewFT =
  1224. FunctionType::get(CB.getFunctionType()->getReturnType(), NewArgs,
  1225. CB.getFunctionType()->isVarArg());
  1226. PointerType *NewFTPtr = PointerType::getUnqual(NewFT);
  1227. IRBuilder<> IRB(&CB);
  1228. std::vector<Value *> Args;
  1229. Args.push_back(IRB.CreateBitCast(VCallSite.VTable, Int8PtrTy));
  1230. llvm::append_range(Args, CB.args());
  1231. CallBase *NewCS = nullptr;
  1232. if (isa<CallInst>(CB))
  1233. NewCS = IRB.CreateCall(NewFT, IRB.CreateBitCast(JT, NewFTPtr), Args);
  1234. else
  1235. NewCS = IRB.CreateInvoke(NewFT, IRB.CreateBitCast(JT, NewFTPtr),
  1236. cast<InvokeInst>(CB).getNormalDest(),
  1237. cast<InvokeInst>(CB).getUnwindDest(), Args);
  1238. NewCS->setCallingConv(CB.getCallingConv());
  1239. AttributeList Attrs = CB.getAttributes();
  1240. std::vector<AttributeSet> NewArgAttrs;
  1241. NewArgAttrs.push_back(AttributeSet::get(
  1242. M.getContext(), ArrayRef<Attribute>{Attribute::get(
  1243. M.getContext(), Attribute::Nest)}));
  1244. for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
  1245. NewArgAttrs.push_back(Attrs.getParamAttrs(I));
  1246. NewCS->setAttributes(
  1247. AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
  1248. Attrs.getRetAttrs(), NewArgAttrs));
  1249. CB.replaceAllUsesWith(NewCS);
  1250. CB.eraseFromParent();
  1251. // This use is no longer unsafe.
  1252. if (VCallSite.NumUnsafeUses)
  1253. --*VCallSite.NumUnsafeUses;
  1254. }
  1255. // Don't mark as devirtualized because there may be callers compiled without
  1256. // retpoline mitigation, which would mean that they are lowered to
  1257. // llvm.type.test and therefore require an llvm.type.test resolution for the
  1258. // type identifier.
  1259. };
  1260. Apply(SlotInfo.CSInfo);
  1261. for (auto &P : SlotInfo.ConstCSInfo)
  1262. Apply(P.second);
  1263. }
  1264. bool DevirtModule::tryEvaluateFunctionsWithArgs(
  1265. MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  1266. ArrayRef<uint64_t> Args) {
  1267. // Evaluate each function and store the result in each target's RetVal
  1268. // field.
  1269. for (VirtualCallTarget &Target : TargetsForSlot) {
  1270. if (Target.Fn->arg_size() != Args.size() + 1)
  1271. return false;
  1272. Evaluator Eval(M.getDataLayout(), nullptr);
  1273. SmallVector<Constant *, 2> EvalArgs;
  1274. EvalArgs.push_back(
  1275. Constant::getNullValue(Target.Fn->getFunctionType()->getParamType(0)));
  1276. for (unsigned I = 0; I != Args.size(); ++I) {
  1277. auto *ArgTy = dyn_cast<IntegerType>(
  1278. Target.Fn->getFunctionType()->getParamType(I + 1));
  1279. if (!ArgTy)
  1280. return false;
  1281. EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
  1282. }
  1283. Constant *RetVal;
  1284. if (!Eval.EvaluateFunction(Target.Fn, RetVal, EvalArgs) ||
  1285. !isa<ConstantInt>(RetVal))
  1286. return false;
  1287. Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
  1288. }
  1289. return true;
  1290. }
  1291. void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
  1292. uint64_t TheRetVal) {
  1293. for (auto Call : CSInfo.CallSites) {
  1294. if (!OptimizedCalls.insert(&Call.CB).second)
  1295. continue;
  1296. Call.replaceAndErase(
  1297. "uniform-ret-val", FnName, RemarksEnabled, OREGetter,
  1298. ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
  1299. }
  1300. CSInfo.markDevirt();
  1301. }
  1302. bool DevirtModule::tryUniformRetValOpt(
  1303. MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
  1304. WholeProgramDevirtResolution::ByArg *Res) {
  1305. // Uniform return value optimization. If all functions return the same
  1306. // constant, replace all calls with that constant.
  1307. uint64_t TheRetVal = TargetsForSlot[0].RetVal;
  1308. for (const VirtualCallTarget &Target : TargetsForSlot)
  1309. if (Target.RetVal != TheRetVal)
  1310. return false;
  1311. if (CSInfo.isExported()) {
  1312. Res->TheKind = WholeProgramDevirtResolution::ByArg::UniformRetVal;
  1313. Res->Info = TheRetVal;
  1314. }
  1315. applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
  1316. if (RemarksEnabled)
  1317. for (auto &&Target : TargetsForSlot)
  1318. Target.WasDevirt = true;
  1319. return true;
  1320. }
  1321. std::string DevirtModule::getGlobalName(VTableSlot Slot,
  1322. ArrayRef<uint64_t> Args,
  1323. StringRef Name) {
  1324. std::string FullName = "__typeid_";
  1325. raw_string_ostream OS(FullName);
  1326. OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
  1327. for (uint64_t Arg : Args)
  1328. OS << '_' << Arg;
  1329. OS << '_' << Name;
  1330. return OS.str();
  1331. }
  1332. bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
  1333. Triple T(M.getTargetTriple());
  1334. return T.isX86() && T.getObjectFormat() == Triple::ELF;
  1335. }
  1336. void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
  1337. StringRef Name, Constant *C) {
  1338. GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
  1339. getGlobalName(Slot, Args, Name), C, &M);
  1340. GA->setVisibility(GlobalValue::HiddenVisibility);
  1341. }
  1342. void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
  1343. StringRef Name, uint32_t Const,
  1344. uint32_t &Storage) {
  1345. if (shouldExportConstantsAsAbsoluteSymbols()) {
  1346. exportGlobal(
  1347. Slot, Args, Name,
  1348. ConstantExpr::getIntToPtr(ConstantInt::get(Int32Ty, Const), Int8PtrTy));
  1349. return;
  1350. }
  1351. Storage = Const;
  1352. }
  1353. Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
  1354. StringRef Name) {
  1355. Constant *C =
  1356. M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
  1357. auto *GV = dyn_cast<GlobalVariable>(C);
  1358. if (GV)
  1359. GV->setVisibility(GlobalValue::HiddenVisibility);
  1360. return C;
  1361. }
  1362. Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
  1363. StringRef Name, IntegerType *IntTy,
  1364. uint32_t Storage) {
  1365. if (!shouldExportConstantsAsAbsoluteSymbols())
  1366. return ConstantInt::get(IntTy, Storage);
  1367. Constant *C = importGlobal(Slot, Args, Name);
  1368. auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
  1369. C = ConstantExpr::getPtrToInt(C, IntTy);
  1370. // We only need to set metadata if the global is newly created, in which
  1371. // case it would not have hidden visibility.
  1372. if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))
  1373. return C;
  1374. auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
  1375. auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
  1376. auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
  1377. GV->setMetadata(LLVMContext::MD_absolute_symbol,
  1378. MDNode::get(M.getContext(), {MinC, MaxC}));
  1379. };
  1380. unsigned AbsWidth = IntTy->getBitWidth();
  1381. if (AbsWidth == IntPtrTy->getBitWidth())
  1382. SetAbsRange(~0ull, ~0ull); // Full set.
  1383. else
  1384. SetAbsRange(0, 1ull << AbsWidth);
  1385. return C;
  1386. }
  1387. void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
  1388. bool IsOne,
  1389. Constant *UniqueMemberAddr) {
  1390. for (auto &&Call : CSInfo.CallSites) {
  1391. if (!OptimizedCalls.insert(&Call.CB).second)
  1392. continue;
  1393. IRBuilder<> B(&Call.CB);
  1394. Value *Cmp =
  1395. B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
  1396. B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
  1397. Cmp = B.CreateZExt(Cmp, Call.CB.getType());
  1398. Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
  1399. Cmp);
  1400. }
  1401. CSInfo.markDevirt();
  1402. }
  1403. Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
  1404. Constant *C = ConstantExpr::getBitCast(M->Bits->GV, Int8PtrTy);
  1405. return ConstantExpr::getGetElementPtr(Int8Ty, C,
  1406. ConstantInt::get(Int64Ty, M->Offset));
  1407. }
  1408. bool DevirtModule::tryUniqueRetValOpt(
  1409. unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  1410. CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
  1411. VTableSlot Slot, ArrayRef<uint64_t> Args) {
  1412. // IsOne controls whether we look for a 0 or a 1.
  1413. auto tryUniqueRetValOptFor = [&](bool IsOne) {
  1414. const TypeMemberInfo *UniqueMember = nullptr;
  1415. for (const VirtualCallTarget &Target : TargetsForSlot) {
  1416. if (Target.RetVal == (IsOne ? 1 : 0)) {
  1417. if (UniqueMember)
  1418. return false;
  1419. UniqueMember = Target.TM;
  1420. }
  1421. }
  1422. // We should have found a unique member or bailed out by now. We already
  1423. // checked for a uniform return value in tryUniformRetValOpt.
  1424. assert(UniqueMember);
  1425. Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);
  1426. if (CSInfo.isExported()) {
  1427. Res->TheKind = WholeProgramDevirtResolution::ByArg::UniqueRetVal;
  1428. Res->Info = IsOne;
  1429. exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
  1430. }
  1431. // Replace each call with the comparison.
  1432. applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
  1433. UniqueMemberAddr);
  1434. // Update devirtualization statistics for targets.
  1435. if (RemarksEnabled)
  1436. for (auto &&Target : TargetsForSlot)
  1437. Target.WasDevirt = true;
  1438. return true;
  1439. };
  1440. if (BitWidth == 1) {
  1441. if (tryUniqueRetValOptFor(true))
  1442. return true;
  1443. if (tryUniqueRetValOptFor(false))
  1444. return true;
  1445. }
  1446. return false;
  1447. }
  1448. void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
  1449. Constant *Byte, Constant *Bit) {
  1450. for (auto Call : CSInfo.CallSites) {
  1451. if (!OptimizedCalls.insert(&Call.CB).second)
  1452. continue;
  1453. auto *RetType = cast<IntegerType>(Call.CB.getType());
  1454. IRBuilder<> B(&Call.CB);
  1455. Value *Addr =
  1456. B.CreateGEP(Int8Ty, B.CreateBitCast(Call.VTable, Int8PtrTy), Byte);
  1457. if (RetType->getBitWidth() == 1) {
  1458. Value *Bits = B.CreateLoad(Int8Ty, Addr);
  1459. Value *BitsAndBit = B.CreateAnd(Bits, Bit);
  1460. auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
  1461. Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
  1462. OREGetter, IsBitSet);
  1463. } else {
  1464. Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
  1465. Value *Val = B.CreateLoad(RetType, ValAddr);
  1466. Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
  1467. OREGetter, Val);
  1468. }
  1469. }
  1470. CSInfo.markDevirt();
  1471. }
  1472. bool DevirtModule::tryVirtualConstProp(
  1473. MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
  1474. WholeProgramDevirtResolution *Res, VTableSlot Slot) {
  1475. // This only works if the function returns an integer.
  1476. auto RetType = dyn_cast<IntegerType>(TargetsForSlot[0].Fn->getReturnType());
  1477. if (!RetType)
  1478. return false;
  1479. unsigned BitWidth = RetType->getBitWidth();
  1480. if (BitWidth > 64)
  1481. return false;
  1482. // Make sure that each function is defined, does not access memory, takes at
  1483. // least one argument, does not use its first argument (which we assume is
  1484. // 'this'), and has the same return type.
  1485. //
  1486. // Note that we test whether this copy of the function is readnone, rather
  1487. // than testing function attributes, which must hold for any copy of the
  1488. // function, even a less optimized version substituted at link time. This is
  1489. // sound because the virtual constant propagation optimizations effectively
  1490. // inline all implementations of the virtual function into each call site,
  1491. // rather than using function attributes to perform local optimization.
  1492. for (VirtualCallTarget &Target : TargetsForSlot) {
  1493. if (Target.Fn->isDeclaration() ||
  1494. computeFunctionBodyMemoryAccess(*Target.Fn, AARGetter(*Target.Fn)) !=
  1495. MAK_ReadNone ||
  1496. Target.Fn->arg_empty() || !Target.Fn->arg_begin()->use_empty() ||
  1497. Target.Fn->getReturnType() != RetType)
  1498. return false;
  1499. }
  1500. for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
  1501. if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
  1502. continue;
  1503. WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
  1504. if (Res)
  1505. ResByArg = &Res->ResByArg[CSByConstantArg.first];
  1506. if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
  1507. continue;
  1508. if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
  1509. ResByArg, Slot, CSByConstantArg.first))
  1510. continue;
  1511. // Find an allocation offset in bits in all vtables associated with the
  1512. // type.
  1513. uint64_t AllocBefore =
  1514. findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
  1515. uint64_t AllocAfter =
  1516. findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
  1517. // Calculate the total amount of padding needed to store a value at both
  1518. // ends of the object.
  1519. uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
  1520. for (auto &&Target : TargetsForSlot) {
  1521. TotalPaddingBefore += std::max<int64_t>(
  1522. (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
  1523. TotalPaddingAfter += std::max<int64_t>(
  1524. (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
  1525. }
  1526. // If the amount of padding is too large, give up.
  1527. // FIXME: do something smarter here.
  1528. if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
  1529. continue;
  1530. // Calculate the offset to the value as a (possibly negative) byte offset
  1531. // and (if applicable) a bit offset, and store the values in the targets.
  1532. int64_t OffsetByte;
  1533. uint64_t OffsetBit;
  1534. if (TotalPaddingBefore <= TotalPaddingAfter)
  1535. setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
  1536. OffsetBit);
  1537. else
  1538. setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
  1539. OffsetBit);
  1540. if (RemarksEnabled)
  1541. for (auto &&Target : TargetsForSlot)
  1542. Target.WasDevirt = true;
  1543. if (CSByConstantArg.second.isExported()) {
  1544. ResByArg->TheKind = WholeProgramDevirtResolution::ByArg::VirtualConstProp;
  1545. exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,
  1546. ResByArg->Byte);
  1547. exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,
  1548. ResByArg->Bit);
  1549. }
  1550. // Rewrite each call to a load from OffsetByte/OffsetBit.
  1551. Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
  1552. Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
  1553. applyVirtualConstProp(CSByConstantArg.second,
  1554. TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
  1555. }
  1556. return true;
  1557. }
  1558. void DevirtModule::rebuildGlobal(VTableBits &B) {
  1559. if (B.Before.Bytes.empty() && B.After.Bytes.empty())
  1560. return;
  1561. // Align the before byte array to the global's minimum alignment so that we
  1562. // don't break any alignment requirements on the global.
  1563. Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(
  1564. B.GV->getAlign(), B.GV->getValueType());
  1565. B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
  1566. // Before was stored in reverse order; flip it now.
  1567. for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
  1568. std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
  1569. // Build an anonymous global containing the before bytes, followed by the
  1570. // original initializer, followed by the after bytes.
  1571. auto NewInit = ConstantStruct::getAnon(
  1572. {ConstantDataArray::get(M.getContext(), B.Before.Bytes),
  1573. B.GV->getInitializer(),
  1574. ConstantDataArray::get(M.getContext(), B.After.Bytes)});
  1575. auto NewGV =
  1576. new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
  1577. GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
  1578. NewGV->setSection(B.GV->getSection());
  1579. NewGV->setComdat(B.GV->getComdat());
  1580. NewGV->setAlignment(B.GV->getAlign());
  1581. // Copy the original vtable's metadata to the anonymous global, adjusting
  1582. // offsets as required.
  1583. NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
  1584. // Build an alias named after the original global, pointing at the second
  1585. // element (the original initializer).
  1586. auto Alias = GlobalAlias::create(
  1587. B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
  1588. ConstantExpr::getGetElementPtr(
  1589. NewInit->getType(), NewGV,
  1590. ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
  1591. ConstantInt::get(Int32Ty, 1)}),
  1592. &M);
  1593. Alias->setVisibility(B.GV->getVisibility());
  1594. Alias->takeName(B.GV);
  1595. B.GV->replaceAllUsesWith(Alias);
  1596. B.GV->eraseFromParent();
  1597. }
  1598. bool DevirtModule::areRemarksEnabled() {
  1599. const auto &FL = M.getFunctionList();
  1600. for (const Function &Fn : FL) {
  1601. const auto &BBL = Fn.getBasicBlockList();
  1602. if (BBL.empty())
  1603. continue;
  1604. auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBL.front());
  1605. return DI.isEnabled();
  1606. }
  1607. return false;
  1608. }
  1609. void DevirtModule::scanTypeTestUsers(
  1610. Function *TypeTestFunc,
  1611. DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
  1612. // Find all virtual calls via a virtual table pointer %p under an assumption
  1613. // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
  1614. // points to a member of the type identifier %md. Group calls by (type ID,
  1615. // offset) pair (effectively the identity of the virtual function) and store
  1616. // to CallSlots.
  1617. for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
  1618. auto *CI = dyn_cast<CallInst>(U.getUser());
  1619. if (!CI)
  1620. continue;
  1621. // Search for virtual calls based on %p and add them to DevirtCalls.
  1622. SmallVector<DevirtCallSite, 1> DevirtCalls;
  1623. SmallVector<CallInst *, 1> Assumes;
  1624. auto &DT = LookupDomTree(*CI->getFunction());
  1625. findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
  1626. Metadata *TypeId =
  1627. cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
  1628. // If we found any, add them to CallSlots.
  1629. if (!Assumes.empty()) {
  1630. Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
  1631. for (DevirtCallSite Call : DevirtCalls)
  1632. CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
  1633. }
  1634. auto RemoveTypeTestAssumes = [&]() {
  1635. // We no longer need the assumes or the type test.
  1636. for (auto Assume : Assumes)
  1637. Assume->eraseFromParent();
  1638. // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
  1639. // may use the vtable argument later.
  1640. if (CI->use_empty())
  1641. CI->eraseFromParent();
  1642. };
  1643. // At this point we could remove all type test assume sequences, as they
  1644. // were originally inserted for WPD. However, we can keep these in the
  1645. // code stream for later analysis (e.g. to help drive more efficient ICP
  1646. // sequences). They will eventually be removed by a second LowerTypeTests
  1647. // invocation that cleans them up. In order to do this correctly, the first
  1648. // LowerTypeTests invocation needs to know that they have "Unknown" type
  1649. // test resolution, so that they aren't treated as Unsat and lowered to
  1650. // False, which will break any uses on assumes. Below we remove any type
  1651. // test assumes that will not be treated as Unknown by LTT.
  1652. // The type test assumes will be treated by LTT as Unsat if the type id is
  1653. // not used on a global (in which case it has no entry in the TypeIdMap).
  1654. if (!TypeIdMap.count(TypeId))
  1655. RemoveTypeTestAssumes();
  1656. // For ThinLTO importing, we need to remove the type test assumes if this is
  1657. // an MDString type id without a corresponding TypeIdSummary. Any
  1658. // non-MDString type ids are ignored and treated as Unknown by LTT, so their
  1659. // type test assumes can be kept. If the MDString type id is missing a
  1660. // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
  1661. // exporting phase of WPD from analyzing it), then it would be treated as
  1662. // Unsat by LTT and we need to remove its type test assumes here. If not
  1663. // used on a vcall we don't need them for later optimization use in any
  1664. // case.
  1665. else if (ImportSummary && isa<MDString>(TypeId)) {
  1666. const TypeIdSummary *TidSummary =
  1667. ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
  1668. if (!TidSummary)
  1669. RemoveTypeTestAssumes();
  1670. else
  1671. // If one was created it should not be Unsat, because if we reached here
  1672. // the type id was used on a global.
  1673. assert(TidSummary->TTRes.TheKind != TypeTestResolution::Unsat);
  1674. }
  1675. }
  1676. }
  1677. void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
  1678. Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test);
  1679. for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
  1680. auto *CI = dyn_cast<CallInst>(U.getUser());
  1681. if (!CI)
  1682. continue;
  1683. Value *Ptr = CI->getArgOperand(0);
  1684. Value *Offset = CI->getArgOperand(1);
  1685. Value *TypeIdValue = CI->getArgOperand(2);
  1686. Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
  1687. SmallVector<DevirtCallSite, 1> DevirtCalls;
  1688. SmallVector<Instruction *, 1> LoadedPtrs;
  1689. SmallVector<Instruction *, 1> Preds;
  1690. bool HasNonCallUses = false;
  1691. auto &DT = LookupDomTree(*CI->getFunction());
  1692. findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
  1693. HasNonCallUses, CI, DT);
  1694. // Start by generating "pessimistic" code that explicitly loads the function
  1695. // pointer from the vtable and performs the type check. If possible, we will
  1696. // eliminate the load and the type check later.
  1697. // If possible, only generate the load at the point where it is used.
  1698. // This helps avoid unnecessary spills.
  1699. IRBuilder<> LoadB(
  1700. (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
  1701. Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset);
  1702. Value *GEPPtr = LoadB.CreateBitCast(GEP, PointerType::getUnqual(Int8PtrTy));
  1703. Value *LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEPPtr);
  1704. for (Instruction *LoadedPtr : LoadedPtrs) {
  1705. LoadedPtr->replaceAllUsesWith(LoadedValue);
  1706. LoadedPtr->eraseFromParent();
  1707. }
  1708. // Likewise for the type test.
  1709. IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
  1710. CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
  1711. for (Instruction *Pred : Preds) {
  1712. Pred->replaceAllUsesWith(TypeTestCall);
  1713. Pred->eraseFromParent();
  1714. }
  1715. // We have already erased any extractvalue instructions that refer to the
  1716. // intrinsic call, but the intrinsic may have other non-extractvalue uses
  1717. // (although this is unlikely). In that case, explicitly build a pair and
  1718. // RAUW it.
  1719. if (!CI->use_empty()) {
  1720. Value *Pair = UndefValue::get(CI->getType());
  1721. IRBuilder<> B(CI);
  1722. Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
  1723. Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
  1724. CI->replaceAllUsesWith(Pair);
  1725. }
  1726. // The number of unsafe uses is initially the number of uses.
  1727. auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
  1728. NumUnsafeUses = DevirtCalls.size();
  1729. // If the function pointer has a non-call user, we cannot eliminate the type
  1730. // check, as one of those users may eventually call the pointer. Increment
  1731. // the unsafe use count to make sure it cannot reach zero.
  1732. if (HasNonCallUses)
  1733. ++NumUnsafeUses;
  1734. for (DevirtCallSite Call : DevirtCalls) {
  1735. CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,
  1736. &NumUnsafeUses);
  1737. }
  1738. CI->eraseFromParent();
  1739. }
  1740. }
  1741. void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
  1742. auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
  1743. if (!TypeId)
  1744. return;
  1745. const TypeIdSummary *TidSummary =
  1746. ImportSummary->getTypeIdSummary(TypeId->getString());
  1747. if (!TidSummary)
  1748. return;
  1749. auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
  1750. if (ResI == TidSummary->WPDRes.end())
  1751. return;
  1752. const WholeProgramDevirtResolution &Res = ResI->second;
  1753. if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
  1754. assert(!Res.SingleImplName.empty());
  1755. // The type of the function in the declaration is irrelevant because every
  1756. // call site will cast it to the correct type.
  1757. Constant *SingleImpl =
  1758. cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
  1759. Type::getVoidTy(M.getContext()))
  1760. .getCallee());
  1761. // This is the import phase so we should not be exporting anything.
  1762. bool IsExported = false;
  1763. applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
  1764. assert(!IsExported);
  1765. }
  1766. for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
  1767. auto I = Res.ResByArg.find(CSByConstantArg.first);
  1768. if (I == Res.ResByArg.end())
  1769. continue;
  1770. auto &ResByArg = I->second;
  1771. // FIXME: We should figure out what to do about the "function name" argument
  1772. // to the apply* functions, as the function names are unavailable during the
  1773. // importing phase. For now we just pass the empty string. This does not
  1774. // impact correctness because the function names are just used for remarks.
  1775. switch (ResByArg.TheKind) {
  1776. case WholeProgramDevirtResolution::ByArg::UniformRetVal:
  1777. applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
  1778. break;
  1779. case WholeProgramDevirtResolution::ByArg::UniqueRetVal: {
  1780. Constant *UniqueMemberAddr =
  1781. importGlobal(Slot, CSByConstantArg.first, "unique_member");
  1782. applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
  1783. UniqueMemberAddr);
  1784. break;
  1785. }
  1786. case WholeProgramDevirtResolution::ByArg::VirtualConstProp: {
  1787. Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",
  1788. Int32Ty, ResByArg.Byte);
  1789. Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
  1790. ResByArg.Bit);
  1791. applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
  1792. break;
  1793. }
  1794. default:
  1795. break;
  1796. }
  1797. }
  1798. if (Res.TheKind == WholeProgramDevirtResolution::BranchFunnel) {
  1799. // The type of the function is irrelevant, because it's bitcast at calls
  1800. // anyhow.
  1801. Constant *JT = cast<Constant>(
  1802. M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
  1803. Type::getVoidTy(M.getContext()))
  1804. .getCallee());
  1805. bool IsExported = false;
  1806. applyICallBranchFunnel(SlotInfo, JT, IsExported);
  1807. assert(!IsExported);
  1808. }
  1809. }
  1810. void DevirtModule::removeRedundantTypeTests() {
  1811. auto True = ConstantInt::getTrue(M.getContext());
  1812. for (auto &&U : NumUnsafeUsesForTypeTest) {
  1813. if (U.second == 0) {
  1814. U.first->replaceAllUsesWith(True);
  1815. U.first->eraseFromParent();
  1816. }
  1817. }
  1818. }
  1819. ValueInfo
  1820. DevirtModule::lookUpFunctionValueInfo(Function *TheFn,
  1821. ModuleSummaryIndex *ExportSummary) {
  1822. assert((ExportSummary != nullptr) &&
  1823. "Caller guarantees ExportSummary is not nullptr");
  1824. const auto TheFnGUID = TheFn->getGUID();
  1825. const auto TheFnGUIDWithExportedName = GlobalValue::getGUID(TheFn->getName());
  1826. // Look up ValueInfo with the GUID in the current linkage.
  1827. ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFnGUID);
  1828. // If no entry is found and GUID is different from GUID computed using
  1829. // exported name, look up ValueInfo with the exported name unconditionally.
  1830. // This is a fallback.
  1831. //
  1832. // The reason to have a fallback:
  1833. // 1. LTO could enable global value internalization via
  1834. // `enable-lto-internalization`.
  1835. // 2. The GUID in ExportedSummary is computed using exported name.
  1836. if ((!TheFnVI) && (TheFnGUID != TheFnGUIDWithExportedName)) {
  1837. TheFnVI = ExportSummary->getValueInfo(TheFnGUIDWithExportedName);
  1838. }
  1839. return TheFnVI;
  1840. }
  1841. bool DevirtModule::mustBeUnreachableFunction(
  1842. Function *const F, ModuleSummaryIndex *ExportSummary) {
  1843. // First, learn unreachability by analyzing function IR.
  1844. if (!F->isDeclaration()) {
  1845. // A function must be unreachable if its entry block ends with an
  1846. // 'unreachable'.
  1847. return isa<UnreachableInst>(F->getEntryBlock().getTerminator());
  1848. }
  1849. // Learn unreachability from ExportSummary if ExportSummary is present.
  1850. return ExportSummary &&
  1851. ::mustBeUnreachableFunction(
  1852. DevirtModule::lookUpFunctionValueInfo(F, ExportSummary));
  1853. }
  1854. bool DevirtModule::run() {
  1855. // If only some of the modules were split, we cannot correctly perform
  1856. // this transformation. We already checked for the presense of type tests
  1857. // with partially split modules during the thin link, and would have emitted
  1858. // an error if any were found, so here we can simply return.
  1859. if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
  1860. (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
  1861. return false;
  1862. Function *TypeTestFunc =
  1863. M.getFunction(Intrinsic::getName(Intrinsic::type_test));
  1864. Function *TypeCheckedLoadFunc =
  1865. M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
  1866. Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
  1867. // Normally if there are no users of the devirtualization intrinsics in the
  1868. // module, this pass has nothing to do. But if we are exporting, we also need
  1869. // to handle any users that appear only in the function summaries.
  1870. if (!ExportSummary &&
  1871. (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
  1872. AssumeFunc->use_empty()) &&
  1873. (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
  1874. return false;
  1875. // Rebuild type metadata into a map for easy lookup.
  1876. std::vector<VTableBits> Bits;
  1877. DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
  1878. buildTypeIdentifierMap(Bits, TypeIdMap);
  1879. if (TypeTestFunc && AssumeFunc)
  1880. scanTypeTestUsers(TypeTestFunc, TypeIdMap);
  1881. if (TypeCheckedLoadFunc)
  1882. scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
  1883. if (ImportSummary) {
  1884. for (auto &S : CallSlots)
  1885. importResolution(S.first, S.second);
  1886. removeRedundantTypeTests();
  1887. // We have lowered or deleted the type instrinsics, so we will no
  1888. // longer have enough information to reason about the liveness of virtual
  1889. // function pointers in GlobalDCE.
  1890. for (GlobalVariable &GV : M.globals())
  1891. GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
  1892. // The rest of the code is only necessary when exporting or during regular
  1893. // LTO, so we are done.
  1894. return true;
  1895. }
  1896. if (TypeIdMap.empty())
  1897. return true;
  1898. // Collect information from summary about which calls to try to devirtualize.
  1899. if (ExportSummary) {
  1900. DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
  1901. for (auto &P : TypeIdMap) {
  1902. if (auto *TypeId = dyn_cast<MDString>(P.first))
  1903. MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
  1904. TypeId);
  1905. }
  1906. for (auto &P : *ExportSummary) {
  1907. for (auto &S : P.second.SummaryList) {
  1908. auto *FS = dyn_cast<FunctionSummary>(S.get());
  1909. if (!FS)
  1910. continue;
  1911. // FIXME: Only add live functions.
  1912. for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
  1913. for (Metadata *MD : MetadataByGUID[VF.GUID]) {
  1914. CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
  1915. }
  1916. }
  1917. for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
  1918. for (Metadata *MD : MetadataByGUID[VF.GUID]) {
  1919. CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
  1920. }
  1921. }
  1922. for (const FunctionSummary::ConstVCall &VC :
  1923. FS->type_test_assume_const_vcalls()) {
  1924. for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
  1925. CallSlots[{MD, VC.VFunc.Offset}]
  1926. .ConstCSInfo[VC.Args]
  1927. .addSummaryTypeTestAssumeUser(FS);
  1928. }
  1929. }
  1930. for (const FunctionSummary::ConstVCall &VC :
  1931. FS->type_checked_load_const_vcalls()) {
  1932. for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
  1933. CallSlots[{MD, VC.VFunc.Offset}]
  1934. .ConstCSInfo[VC.Args]
  1935. .addSummaryTypeCheckedLoadUser(FS);
  1936. }
  1937. }
  1938. }
  1939. }
  1940. }
  1941. // For each (type, offset) pair:
  1942. bool DidVirtualConstProp = false;
  1943. std::map<std::string, Function*> DevirtTargets;
  1944. for (auto &S : CallSlots) {
  1945. // Search each of the members of the type identifier for the virtual
  1946. // function implementation at offset S.first.ByteOffset, and add to
  1947. // TargetsForSlot.
  1948. std::vector<VirtualCallTarget> TargetsForSlot;
  1949. WholeProgramDevirtResolution *Res = nullptr;
  1950. const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
  1951. if (ExportSummary && isa<MDString>(S.first.TypeID) &&
  1952. TypeMemberInfos.size())
  1953. // For any type id used on a global's type metadata, create the type id
  1954. // summary resolution regardless of whether we can devirtualize, so that
  1955. // lower type tests knows the type id is not Unsat. If it was not used on
  1956. // a global's type metadata, the TypeIdMap entry set will be empty, and
  1957. // we don't want to create an entry (with the default Unknown type
  1958. // resolution), which can prevent detection of the Unsat.
  1959. Res = &ExportSummary
  1960. ->getOrInsertTypeIdSummary(
  1961. cast<MDString>(S.first.TypeID)->getString())
  1962. .WPDRes[S.first.ByteOffset];
  1963. if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
  1964. S.first.ByteOffset, ExportSummary)) {
  1965. if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
  1966. DidVirtualConstProp |=
  1967. tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
  1968. tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
  1969. }
  1970. // Collect functions devirtualized at least for one call site for stats.
  1971. if (RemarksEnabled)
  1972. for (const auto &T : TargetsForSlot)
  1973. if (T.WasDevirt)
  1974. DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
  1975. }
  1976. // CFI-specific: if we are exporting and any llvm.type.checked.load
  1977. // intrinsics were *not* devirtualized, we need to add the resulting
  1978. // llvm.type.test intrinsics to the function summaries so that the
  1979. // LowerTypeTests pass will export them.
  1980. if (ExportSummary && isa<MDString>(S.first.TypeID)) {
  1981. auto GUID =
  1982. GlobalValue::getGUID(cast<MDString>(S.first.TypeID)->getString());
  1983. for (auto FS : S.second.CSInfo.SummaryTypeCheckedLoadUsers)
  1984. FS->addTypeTest(GUID);
  1985. for (auto &CCS : S.second.ConstCSInfo)
  1986. for (auto FS : CCS.second.SummaryTypeCheckedLoadUsers)
  1987. FS->addTypeTest(GUID);
  1988. }
  1989. }
  1990. if (RemarksEnabled) {
  1991. // Generate remarks for each devirtualized function.
  1992. for (const auto &DT : DevirtTargets) {
  1993. Function *F = DT.second;
  1994. using namespace ore;
  1995. OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
  1996. << "devirtualized "
  1997. << NV("FunctionName", DT.first));
  1998. }
  1999. }
  2000. removeRedundantTypeTests();
  2001. // Rebuild each global we touched as part of virtual constant propagation to
  2002. // include the before and after bytes.
  2003. if (DidVirtualConstProp)
  2004. for (VTableBits &B : Bits)
  2005. rebuildGlobal(B);
  2006. // We have lowered or deleted the type instrinsics, so we will no
  2007. // longer have enough information to reason about the liveness of virtual
  2008. // function pointers in GlobalDCE.
  2009. for (GlobalVariable &GV : M.globals())
  2010. GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
  2011. return true;
  2012. }
  2013. void DevirtIndex::run() {
  2014. if (ExportSummary.typeIdCompatibleVtableMap().empty())
  2015. return;
  2016. DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID;
  2017. for (auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
  2018. NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first);
  2019. }
  2020. // Collect information from summary about which calls to try to devirtualize.
  2021. for (auto &P : ExportSummary) {
  2022. for (auto &S : P.second.SummaryList) {
  2023. auto *FS = dyn_cast<FunctionSummary>(S.get());
  2024. if (!FS)
  2025. continue;
  2026. // FIXME: Only add live functions.
  2027. for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
  2028. for (StringRef Name : NameByGUID[VF.GUID]) {
  2029. CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
  2030. }
  2031. }
  2032. for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
  2033. for (StringRef Name : NameByGUID[VF.GUID]) {
  2034. CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
  2035. }
  2036. }
  2037. for (const FunctionSummary::ConstVCall &VC :
  2038. FS->type_test_assume_const_vcalls()) {
  2039. for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
  2040. CallSlots[{Name, VC.VFunc.Offset}]
  2041. .ConstCSInfo[VC.Args]
  2042. .addSummaryTypeTestAssumeUser(FS);
  2043. }
  2044. }
  2045. for (const FunctionSummary::ConstVCall &VC :
  2046. FS->type_checked_load_const_vcalls()) {
  2047. for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
  2048. CallSlots[{Name, VC.VFunc.Offset}]
  2049. .ConstCSInfo[VC.Args]
  2050. .addSummaryTypeCheckedLoadUser(FS);
  2051. }
  2052. }
  2053. }
  2054. }
  2055. std::set<ValueInfo> DevirtTargets;
  2056. // For each (type, offset) pair:
  2057. for (auto &S : CallSlots) {
  2058. // Search each of the members of the type identifier for the virtual
  2059. // function implementation at offset S.first.ByteOffset, and add to
  2060. // TargetsForSlot.
  2061. std::vector<ValueInfo> TargetsForSlot;
  2062. auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
  2063. assert(TidSummary);
  2064. // Create the type id summary resolution regardlness of whether we can
  2065. // devirtualize, so that lower type tests knows the type id is used on
  2066. // a global and not Unsat.
  2067. WholeProgramDevirtResolution *Res =
  2068. &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID)
  2069. .WPDRes[S.first.ByteOffset];
  2070. if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
  2071. S.first.ByteOffset)) {
  2072. if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
  2073. DevirtTargets))
  2074. continue;
  2075. }
  2076. }
  2077. // Optionally have the thin link print message for each devirtualized
  2078. // function.
  2079. if (PrintSummaryDevirt)
  2080. for (const auto &DT : DevirtTargets)
  2081. errs() << "Devirtualized call to " << DT << "\n";
  2082. }