WholeProgramDevirt.cpp 85 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216
  1. //===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass implements whole program optimization of virtual calls in cases
  10. // where we know (via !type metadata) that the list of callees is fixed. This
  11. // includes the following:
  12. // - Single implementation devirtualization: if a virtual call has a single
  13. // possible callee, replace all calls with a direct call to that callee.
  14. // - Virtual constant propagation: if the virtual function's return type is an
  15. // integer <=64 bits and all possible callees are readnone, for each class and
  16. // each list of constant arguments: evaluate the function, store the return
  17. // value alongside the virtual table, and rewrite each virtual call as a load
  18. // from the virtual table.
  19. // - Uniform return value optimization: if the conditions for virtual constant
  20. // propagation hold and each function returns the same constant value, replace
  21. // each virtual call with that constant.
  22. // - Unique return value optimization for i1 return values: if the conditions
  23. // for virtual constant propagation hold and a single vtable's function
  24. // returns 0, or a single vtable's function returns 1, replace each virtual
  25. // call with a comparison of the vptr against that vtable's address.
  26. //
  27. // This pass is intended to be used during the regular and thin LTO pipelines:
  28. //
  29. // During regular LTO, the pass determines the best optimization for each
  30. // virtual call and applies the resolutions directly to virtual calls that are
  31. // eligible for virtual call optimization (i.e. calls that use either of the
  32. // llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
  33. //
  34. // During hybrid Regular/ThinLTO, the pass operates in two phases:
  35. // - Export phase: this is run during the thin link over a single merged module
  36. // that contains all vtables with !type metadata that participate in the link.
  37. // The pass computes a resolution for each virtual call and stores it in the
  38. // type identifier summary.
  39. // - Import phase: this is run during the thin backends over the individual
  40. // modules. The pass applies the resolutions previously computed during the
  41. // import phase to each eligible virtual call.
  42. //
  43. // During ThinLTO, the pass operates in two phases:
  44. // - Export phase: this is run during the thin link over the index which
  45. // contains a summary of all vtables with !type metadata that participate in
  46. // the link. It computes a resolution for each virtual call and stores it in
  47. // the type identifier summary. Only single implementation devirtualization
  48. // is supported.
  49. // - Import phase: (same as with hybrid case above).
  50. //
  51. //===----------------------------------------------------------------------===//
  52. #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
  53. #include "llvm/ADT/ArrayRef.h"
  54. #include "llvm/ADT/DenseMap.h"
  55. #include "llvm/ADT/DenseMapInfo.h"
  56. #include "llvm/ADT/DenseSet.h"
  57. #include "llvm/ADT/MapVector.h"
  58. #include "llvm/ADT/SmallVector.h"
  59. #include "llvm/ADT/Triple.h"
  60. #include "llvm/ADT/iterator_range.h"
  61. #include "llvm/Analysis/AssumptionCache.h"
  62. #include "llvm/Analysis/BasicAliasAnalysis.h"
  63. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  64. #include "llvm/Analysis/TypeMetadataUtils.h"
  65. #include "llvm/Bitcode/BitcodeReader.h"
  66. #include "llvm/Bitcode/BitcodeWriter.h"
  67. #include "llvm/IR/Constants.h"
  68. #include "llvm/IR/DataLayout.h"
  69. #include "llvm/IR/DebugLoc.h"
  70. #include "llvm/IR/DerivedTypes.h"
  71. #include "llvm/IR/Dominators.h"
  72. #include "llvm/IR/Function.h"
  73. #include "llvm/IR/GlobalAlias.h"
  74. #include "llvm/IR/GlobalVariable.h"
  75. #include "llvm/IR/IRBuilder.h"
  76. #include "llvm/IR/InstrTypes.h"
  77. #include "llvm/IR/Instruction.h"
  78. #include "llvm/IR/Instructions.h"
  79. #include "llvm/IR/Intrinsics.h"
  80. #include "llvm/IR/LLVMContext.h"
  81. #include "llvm/IR/Metadata.h"
  82. #include "llvm/IR/Module.h"
  83. #include "llvm/IR/ModuleSummaryIndexYAML.h"
  84. #include "llvm/InitializePasses.h"
  85. #include "llvm/Pass.h"
  86. #include "llvm/PassRegistry.h"
  87. #include "llvm/Support/Casting.h"
  88. #include "llvm/Support/CommandLine.h"
  89. #include "llvm/Support/Errc.h"
  90. #include "llvm/Support/Error.h"
  91. #include "llvm/Support/FileSystem.h"
  92. #include "llvm/Support/GlobPattern.h"
  93. #include "llvm/Support/MathExtras.h"
  94. #include "llvm/Transforms/IPO.h"
  95. #include "llvm/Transforms/IPO/FunctionAttrs.h"
  96. #include "llvm/Transforms/Utils/Evaluator.h"
  97. #include <algorithm>
  98. #include <cstddef>
  99. #include <map>
  100. #include <set>
  101. #include <string>
  102. using namespace llvm;
  103. using namespace wholeprogramdevirt;
  104. #define DEBUG_TYPE "wholeprogramdevirt"
  105. static cl::opt<PassSummaryAction> ClSummaryAction(
  106. "wholeprogramdevirt-summary-action",
  107. cl::desc("What to do with the summary when running this pass"),
  108. cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
  109. clEnumValN(PassSummaryAction::Import, "import",
  110. "Import typeid resolutions from summary and globals"),
  111. clEnumValN(PassSummaryAction::Export, "export",
  112. "Export typeid resolutions to summary and globals")),
  113. cl::Hidden);
  114. static cl::opt<std::string> ClReadSummary(
  115. "wholeprogramdevirt-read-summary",
  116. cl::desc(
  117. "Read summary from given bitcode or YAML file before running pass"),
  118. cl::Hidden);
  119. static cl::opt<std::string> ClWriteSummary(
  120. "wholeprogramdevirt-write-summary",
  121. cl::desc("Write summary to given bitcode or YAML file after running pass. "
  122. "Output file format is deduced from extension: *.bc means writing "
  123. "bitcode, otherwise YAML"),
  124. cl::Hidden);
  125. static cl::opt<unsigned>
  126. ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,
  127. cl::init(10), cl::ZeroOrMore,
  128. cl::desc("Maximum number of call targets per "
  129. "call site to enable branch funnels"));
  130. static cl::opt<bool>
  131. PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
  132. cl::init(false), cl::ZeroOrMore,
  133. cl::desc("Print index-based devirtualization messages"));
  134. /// Provide a way to force enable whole program visibility in tests.
  135. /// This is needed to support legacy tests that don't contain
  136. /// !vcall_visibility metadata (the mere presense of type tests
  137. /// previously implied hidden visibility).
  138. cl::opt<bool>
  139. WholeProgramVisibility("whole-program-visibility", cl::init(false),
  140. cl::Hidden, cl::ZeroOrMore,
  141. cl::desc("Enable whole program visibility"));
  142. /// Provide a way to force disable whole program for debugging or workarounds,
  143. /// when enabled via the linker.
  144. cl::opt<bool> DisableWholeProgramVisibility(
  145. "disable-whole-program-visibility", cl::init(false), cl::Hidden,
  146. cl::ZeroOrMore,
  147. cl::desc("Disable whole program visibility (overrides enabling options)"));
  148. /// Provide way to prevent certain function from being devirtualized
  149. cl::list<std::string>
  150. SkipFunctionNames("wholeprogramdevirt-skip",
  151. cl::desc("Prevent function(s) from being devirtualized"),
  152. cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated);
  153. namespace {
  154. struct PatternList {
  155. std::vector<GlobPattern> Patterns;
  156. template <class T> void init(const T &StringList) {
  157. for (const auto &S : StringList)
  158. if (Expected<GlobPattern> Pat = GlobPattern::create(S))
  159. Patterns.push_back(std::move(*Pat));
  160. }
  161. bool match(StringRef S) {
  162. for (const GlobPattern &P : Patterns)
  163. if (P.match(S))
  164. return true;
  165. return false;
  166. }
  167. };
  168. } // namespace
  169. // Find the minimum offset that we may store a value of size Size bits at. If
  170. // IsAfter is set, look for an offset before the object, otherwise look for an
  171. // offset after the object.
  172. uint64_t
  173. wholeprogramdevirt::findLowestOffset(ArrayRef<VirtualCallTarget> Targets,
  174. bool IsAfter, uint64_t Size) {
  175. // Find a minimum offset taking into account only vtable sizes.
  176. uint64_t MinByte = 0;
  177. for (const VirtualCallTarget &Target : Targets) {
  178. if (IsAfter)
  179. MinByte = std::max(MinByte, Target.minAfterBytes());
  180. else
  181. MinByte = std::max(MinByte, Target.minBeforeBytes());
  182. }
  183. // Build a vector of arrays of bytes covering, for each target, a slice of the
  184. // used region (see AccumBitVector::BytesUsed in
  185. // llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,
  186. // this aligns the used regions to start at MinByte.
  187. //
  188. // In this example, A, B and C are vtables, # is a byte already allocated for
  189. // a virtual function pointer, AAAA... (etc.) are the used regions for the
  190. // vtables and Offset(X) is the value computed for the Offset variable below
  191. // for X.
  192. //
  193. // Offset(A)
  194. // | |
  195. // |MinByte
  196. // A: ################AAAAAAAA|AAAAAAAA
  197. // B: ########BBBBBBBBBBBBBBBB|BBBB
  198. // C: ########################|CCCCCCCCCCCCCCCC
  199. // | Offset(B) |
  200. //
  201. // This code produces the slices of A, B and C that appear after the divider
  202. // at MinByte.
  203. std::vector<ArrayRef<uint8_t>> Used;
  204. for (const VirtualCallTarget &Target : Targets) {
  205. ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed
  206. : Target.TM->Bits->Before.BytesUsed;
  207. uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()
  208. : MinByte - Target.minBeforeBytes();
  209. // Disregard used regions that are smaller than Offset. These are
  210. // effectively all-free regions that do not need to be checked.
  211. if (VTUsed.size() > Offset)
  212. Used.push_back(VTUsed.slice(Offset));
  213. }
  214. if (Size == 1) {
  215. // Find a free bit in each member of Used.
  216. for (unsigned I = 0;; ++I) {
  217. uint8_t BitsUsed = 0;
  218. for (auto &&B : Used)
  219. if (I < B.size())
  220. BitsUsed |= B[I];
  221. if (BitsUsed != 0xff)
  222. return (MinByte + I) * 8 +
  223. countTrailingZeros(uint8_t(~BitsUsed), ZB_Undefined);
  224. }
  225. } else {
  226. // Find a free (Size/8) byte region in each member of Used.
  227. // FIXME: see if alignment helps.
  228. for (unsigned I = 0;; ++I) {
  229. for (auto &&B : Used) {
  230. unsigned Byte = 0;
  231. while ((I + Byte) < B.size() && Byte < (Size / 8)) {
  232. if (B[I + Byte])
  233. goto NextI;
  234. ++Byte;
  235. }
  236. }
  237. return (MinByte + I) * 8;
  238. NextI:;
  239. }
  240. }
  241. }
  242. void wholeprogramdevirt::setBeforeReturnValues(
  243. MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore,
  244. unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
  245. if (BitWidth == 1)
  246. OffsetByte = -(AllocBefore / 8 + 1);
  247. else
  248. OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);
  249. OffsetBit = AllocBefore % 8;
  250. for (VirtualCallTarget &Target : Targets) {
  251. if (BitWidth == 1)
  252. Target.setBeforeBit(AllocBefore);
  253. else
  254. Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);
  255. }
  256. }
  257. void wholeprogramdevirt::setAfterReturnValues(
  258. MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocAfter,
  259. unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {
  260. if (BitWidth == 1)
  261. OffsetByte = AllocAfter / 8;
  262. else
  263. OffsetByte = (AllocAfter + 7) / 8;
  264. OffsetBit = AllocAfter % 8;
  265. for (VirtualCallTarget &Target : Targets) {
  266. if (BitWidth == 1)
  267. Target.setAfterBit(AllocAfter);
  268. else
  269. Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);
  270. }
  271. }
  272. VirtualCallTarget::VirtualCallTarget(Function *Fn, const TypeMemberInfo *TM)
  273. : Fn(Fn), TM(TM),
  274. IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()), WasDevirt(false) {}
  275. namespace {
  276. // A slot in a set of virtual tables. The TypeID identifies the set of virtual
  277. // tables, and the ByteOffset is the offset in bytes from the address point to
  278. // the virtual function pointer.
  279. struct VTableSlot {
  280. Metadata *TypeID;
  281. uint64_t ByteOffset;
  282. };
  283. } // end anonymous namespace
  284. namespace llvm {
  285. template <> struct DenseMapInfo<VTableSlot> {
  286. static VTableSlot getEmptyKey() {
  287. return {DenseMapInfo<Metadata *>::getEmptyKey(),
  288. DenseMapInfo<uint64_t>::getEmptyKey()};
  289. }
  290. static VTableSlot getTombstoneKey() {
  291. return {DenseMapInfo<Metadata *>::getTombstoneKey(),
  292. DenseMapInfo<uint64_t>::getTombstoneKey()};
  293. }
  294. static unsigned getHashValue(const VTableSlot &I) {
  295. return DenseMapInfo<Metadata *>::getHashValue(I.TypeID) ^
  296. DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset);
  297. }
  298. static bool isEqual(const VTableSlot &LHS,
  299. const VTableSlot &RHS) {
  300. return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
  301. }
  302. };
  303. template <> struct DenseMapInfo<VTableSlotSummary> {
  304. static VTableSlotSummary getEmptyKey() {
  305. return {DenseMapInfo<StringRef>::getEmptyKey(),
  306. DenseMapInfo<uint64_t>::getEmptyKey()};
  307. }
  308. static VTableSlotSummary getTombstoneKey() {
  309. return {DenseMapInfo<StringRef>::getTombstoneKey(),
  310. DenseMapInfo<uint64_t>::getTombstoneKey()};
  311. }
  312. static unsigned getHashValue(const VTableSlotSummary &I) {
  313. return DenseMapInfo<StringRef>::getHashValue(I.TypeID) ^
  314. DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset);
  315. }
  316. static bool isEqual(const VTableSlotSummary &LHS,
  317. const VTableSlotSummary &RHS) {
  318. return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
  319. }
  320. };
  321. } // end namespace llvm
  322. namespace {
  323. // A virtual call site. VTable is the loaded virtual table pointer, and CS is
  324. // the indirect virtual call.
  325. struct VirtualCallSite {
  326. Value *VTable = nullptr;
  327. CallBase &CB;
  328. // If non-null, this field points to the associated unsafe use count stored in
  329. // the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description
  330. // of that field for details.
  331. unsigned *NumUnsafeUses = nullptr;
  332. void
  333. emitRemark(const StringRef OptName, const StringRef TargetName,
  334. function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
  335. Function *F = CB.getCaller();
  336. DebugLoc DLoc = CB.getDebugLoc();
  337. BasicBlock *Block = CB.getParent();
  338. using namespace ore;
  339. OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)
  340. << NV("Optimization", OptName)
  341. << ": devirtualized a call to "
  342. << NV("FunctionName", TargetName));
  343. }
  344. void replaceAndErase(
  345. const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,
  346. function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
  347. Value *New) {
  348. if (RemarksEnabled)
  349. emitRemark(OptName, TargetName, OREGetter);
  350. CB.replaceAllUsesWith(New);
  351. if (auto *II = dyn_cast<InvokeInst>(&CB)) {
  352. BranchInst::Create(II->getNormalDest(), &CB);
  353. II->getUnwindDest()->removePredecessor(II->getParent());
  354. }
  355. CB.eraseFromParent();
  356. // This use is no longer unsafe.
  357. if (NumUnsafeUses)
  358. --*NumUnsafeUses;
  359. }
  360. };
  361. // Call site information collected for a specific VTableSlot and possibly a list
  362. // of constant integer arguments. The grouping by arguments is handled by the
  363. // VTableSlotInfo class.
  364. struct CallSiteInfo {
  365. /// The set of call sites for this slot. Used during regular LTO and the
  366. /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
  367. /// call sites that appear in the merged module itself); in each of these
  368. /// cases we are directly operating on the call sites at the IR level.
  369. std::vector<VirtualCallSite> CallSites;
  370. /// Whether all call sites represented by this CallSiteInfo, including those
  371. /// in summaries, have been devirtualized. This starts off as true because a
  372. /// default constructed CallSiteInfo represents no call sites.
  373. bool AllCallSitesDevirted = true;
  374. // These fields are used during the export phase of ThinLTO and reflect
  375. // information collected from function summaries.
  376. /// Whether any function summary contains an llvm.assume(llvm.type.test) for
  377. /// this slot.
  378. bool SummaryHasTypeTestAssumeUsers = false;
  379. /// CFI-specific: a vector containing the list of function summaries that use
  380. /// the llvm.type.checked.load intrinsic and therefore will require
  381. /// resolutions for llvm.type.test in order to implement CFI checks if
  382. /// devirtualization was unsuccessful. If devirtualization was successful, the
  383. /// pass will clear this vector by calling markDevirt(). If at the end of the
  384. /// pass the vector is non-empty, we will need to add a use of llvm.type.test
  385. /// to each of the function summaries in the vector.
  386. std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
  387. std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
  388. bool isExported() const {
  389. return SummaryHasTypeTestAssumeUsers ||
  390. !SummaryTypeCheckedLoadUsers.empty();
  391. }
  392. void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {
  393. SummaryTypeCheckedLoadUsers.push_back(FS);
  394. AllCallSitesDevirted = false;
  395. }
  396. void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
  397. SummaryTypeTestAssumeUsers.push_back(FS);
  398. SummaryHasTypeTestAssumeUsers = true;
  399. AllCallSitesDevirted = false;
  400. }
  401. void markDevirt() {
  402. AllCallSitesDevirted = true;
  403. // As explained in the comment for SummaryTypeCheckedLoadUsers.
  404. SummaryTypeCheckedLoadUsers.clear();
  405. }
  406. };
  407. // Call site information collected for a specific VTableSlot.
  408. struct VTableSlotInfo {
  409. // The set of call sites which do not have all constant integer arguments
  410. // (excluding "this").
  411. CallSiteInfo CSInfo;
  412. // The set of call sites with all constant integer arguments (excluding
  413. // "this"), grouped by argument list.
  414. std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
  415. void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);
  416. private:
  417. CallSiteInfo &findCallSiteInfo(CallBase &CB);
  418. };
  419. CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {
  420. std::vector<uint64_t> Args;
  421. auto *CBType = dyn_cast<IntegerType>(CB.getType());
  422. if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())
  423. return CSInfo;
  424. for (auto &&Arg : drop_begin(CB.args())) {
  425. auto *CI = dyn_cast<ConstantInt>(Arg);
  426. if (!CI || CI->getBitWidth() > 64)
  427. return CSInfo;
  428. Args.push_back(CI->getZExtValue());
  429. }
  430. return ConstCSInfo[Args];
  431. }
  432. void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,
  433. unsigned *NumUnsafeUses) {
  434. auto &CSI = findCallSiteInfo(CB);
  435. CSI.AllCallSitesDevirted = false;
  436. CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});
  437. }
  438. struct DevirtModule {
  439. Module &M;
  440. function_ref<AAResults &(Function &)> AARGetter;
  441. function_ref<DominatorTree &(Function &)> LookupDomTree;
  442. ModuleSummaryIndex *ExportSummary;
  443. const ModuleSummaryIndex *ImportSummary;
  444. IntegerType *Int8Ty;
  445. PointerType *Int8PtrTy;
  446. IntegerType *Int32Ty;
  447. IntegerType *Int64Ty;
  448. IntegerType *IntPtrTy;
  449. /// Sizeless array type, used for imported vtables. This provides a signal
  450. /// to analyzers that these imports may alias, as they do for example
  451. /// when multiple unique return values occur in the same vtable.
  452. ArrayType *Int8Arr0Ty;
  453. bool RemarksEnabled;
  454. function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter;
  455. MapVector<VTableSlot, VTableSlotInfo> CallSlots;
  456. // This map keeps track of the number of "unsafe" uses of a loaded function
  457. // pointer. The key is the associated llvm.type.test intrinsic call generated
  458. // by this pass. An unsafe use is one that calls the loaded function pointer
  459. // directly. Every time we eliminate an unsafe use (for example, by
  460. // devirtualizing it or by applying virtual constant propagation), we
  461. // decrement the value stored in this map. If a value reaches zero, we can
  462. // eliminate the type check by RAUWing the associated llvm.type.test call with
  463. // true.
  464. std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
  465. PatternList FunctionsToSkip;
  466. DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
  467. function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
  468. function_ref<DominatorTree &(Function &)> LookupDomTree,
  469. ModuleSummaryIndex *ExportSummary,
  470. const ModuleSummaryIndex *ImportSummary)
  471. : M(M), AARGetter(AARGetter), LookupDomTree(LookupDomTree),
  472. ExportSummary(ExportSummary), ImportSummary(ImportSummary),
  473. Int8Ty(Type::getInt8Ty(M.getContext())),
  474. Int8PtrTy(Type::getInt8PtrTy(M.getContext())),
  475. Int32Ty(Type::getInt32Ty(M.getContext())),
  476. Int64Ty(Type::getInt64Ty(M.getContext())),
  477. IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
  478. Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
  479. RemarksEnabled(areRemarksEnabled()), OREGetter(OREGetter) {
  480. assert(!(ExportSummary && ImportSummary));
  481. FunctionsToSkip.init(SkipFunctionNames);
  482. }
  483. bool areRemarksEnabled();
  484. void
  485. scanTypeTestUsers(Function *TypeTestFunc,
  486. DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
  487. void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
  488. void buildTypeIdentifierMap(
  489. std::vector<VTableBits> &Bits,
  490. DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
  491. bool
  492. tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
  493. const std::set<TypeMemberInfo> &TypeMemberInfos,
  494. uint64_t ByteOffset);
  495. void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
  496. bool &IsExported);
  497. bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,
  498. MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  499. VTableSlotInfo &SlotInfo,
  500. WholeProgramDevirtResolution *Res);
  501. void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Constant *JT,
  502. bool &IsExported);
  503. void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  504. VTableSlotInfo &SlotInfo,
  505. WholeProgramDevirtResolution *Res, VTableSlot Slot);
  506. bool tryEvaluateFunctionsWithArgs(
  507. MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  508. ArrayRef<uint64_t> Args);
  509. void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
  510. uint64_t TheRetVal);
  511. bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  512. CallSiteInfo &CSInfo,
  513. WholeProgramDevirtResolution::ByArg *Res);
  514. // Returns the global symbol name that is used to export information about the
  515. // given vtable slot and list of arguments.
  516. std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
  517. StringRef Name);
  518. bool shouldExportConstantsAsAbsoluteSymbols();
  519. // This function is called during the export phase to create a symbol
  520. // definition containing information about the given vtable slot and list of
  521. // arguments.
  522. void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
  523. Constant *C);
  524. void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
  525. uint32_t Const, uint32_t &Storage);
  526. // This function is called during the import phase to create a reference to
  527. // the symbol definition created during the export phase.
  528. Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
  529. StringRef Name);
  530. Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
  531. StringRef Name, IntegerType *IntTy,
  532. uint32_t Storage);
  533. Constant *getMemberAddr(const TypeMemberInfo *M);
  534. void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
  535. Constant *UniqueMemberAddr);
  536. bool tryUniqueRetValOpt(unsigned BitWidth,
  537. MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  538. CallSiteInfo &CSInfo,
  539. WholeProgramDevirtResolution::ByArg *Res,
  540. VTableSlot Slot, ArrayRef<uint64_t> Args);
  541. void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
  542. Constant *Byte, Constant *Bit);
  543. bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  544. VTableSlotInfo &SlotInfo,
  545. WholeProgramDevirtResolution *Res, VTableSlot Slot);
  546. void rebuildGlobal(VTableBits &B);
  547. // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
  548. void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
  549. // If we were able to eliminate all unsafe uses for a type checked load,
  550. // eliminate the associated type tests by replacing them with true.
  551. void removeRedundantTypeTests();
  552. bool run();
  553. // Lower the module using the action and summary passed as command line
  554. // arguments. For testing purposes only.
  555. static bool
  556. runForTesting(Module &M, function_ref<AAResults &(Function &)> AARGetter,
  557. function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
  558. function_ref<DominatorTree &(Function &)> LookupDomTree);
  559. };
  560. struct DevirtIndex {
  561. ModuleSummaryIndex &ExportSummary;
  562. // The set in which to record GUIDs exported from their module by
  563. // devirtualization, used by client to ensure they are not internalized.
  564. std::set<GlobalValue::GUID> &ExportedGUIDs;
  565. // A map in which to record the information necessary to locate the WPD
  566. // resolution for local targets in case they are exported by cross module
  567. // importing.
  568. std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
  569. MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;
  570. PatternList FunctionsToSkip;
  571. DevirtIndex(
  572. ModuleSummaryIndex &ExportSummary,
  573. std::set<GlobalValue::GUID> &ExportedGUIDs,
  574. std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
  575. : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
  576. LocalWPDTargetsMap(LocalWPDTargetsMap) {
  577. FunctionsToSkip.init(SkipFunctionNames);
  578. }
  579. bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
  580. const TypeIdCompatibleVtableInfo TIdInfo,
  581. uint64_t ByteOffset);
  582. bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
  583. VTableSlotSummary &SlotSummary,
  584. VTableSlotInfo &SlotInfo,
  585. WholeProgramDevirtResolution *Res,
  586. std::set<ValueInfo> &DevirtTargets);
  587. void run();
  588. };
  589. struct WholeProgramDevirt : public ModulePass {
  590. static char ID;
  591. bool UseCommandLine = false;
  592. ModuleSummaryIndex *ExportSummary = nullptr;
  593. const ModuleSummaryIndex *ImportSummary = nullptr;
  594. WholeProgramDevirt() : ModulePass(ID), UseCommandLine(true) {
  595. initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
  596. }
  597. WholeProgramDevirt(ModuleSummaryIndex *ExportSummary,
  598. const ModuleSummaryIndex *ImportSummary)
  599. : ModulePass(ID), ExportSummary(ExportSummary),
  600. ImportSummary(ImportSummary) {
  601. initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
  602. }
  603. bool runOnModule(Module &M) override {
  604. if (skipModule(M))
  605. return false;
  606. // In the new pass manager, we can request the optimization
  607. // remark emitter pass on a per-function-basis, which the
  608. // OREGetter will do for us.
  609. // In the old pass manager, this is harder, so we just build
  610. // an optimization remark emitter on the fly, when we need it.
  611. std::unique_ptr<OptimizationRemarkEmitter> ORE;
  612. auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
  613. ORE = std::make_unique<OptimizationRemarkEmitter>(F);
  614. return *ORE;
  615. };
  616. auto LookupDomTree = [this](Function &F) -> DominatorTree & {
  617. return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
  618. };
  619. if (UseCommandLine)
  620. return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter,
  621. LookupDomTree);
  622. return DevirtModule(M, LegacyAARGetter(*this), OREGetter, LookupDomTree,
  623. ExportSummary, ImportSummary)
  624. .run();
  625. }
  626. void getAnalysisUsage(AnalysisUsage &AU) const override {
  627. AU.addRequired<AssumptionCacheTracker>();
  628. AU.addRequired<TargetLibraryInfoWrapperPass>();
  629. AU.addRequired<DominatorTreeWrapperPass>();
  630. }
  631. };
  632. } // end anonymous namespace
  633. INITIALIZE_PASS_BEGIN(WholeProgramDevirt, "wholeprogramdevirt",
  634. "Whole program devirtualization", false, false)
  635. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  636. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  637. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  638. INITIALIZE_PASS_END(WholeProgramDevirt, "wholeprogramdevirt",
  639. "Whole program devirtualization", false, false)
  640. char WholeProgramDevirt::ID = 0;
  641. ModulePass *
  642. llvm::createWholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
  643. const ModuleSummaryIndex *ImportSummary) {
  644. return new WholeProgramDevirt(ExportSummary, ImportSummary);
  645. }
  646. PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
  647. ModuleAnalysisManager &AM) {
  648. auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
  649. auto AARGetter = [&](Function &F) -> AAResults & {
  650. return FAM.getResult<AAManager>(F);
  651. };
  652. auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
  653. return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
  654. };
  655. auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
  656. return FAM.getResult<DominatorTreeAnalysis>(F);
  657. };
  658. if (UseCommandLine) {
  659. if (DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
  660. return PreservedAnalyses::all();
  661. return PreservedAnalyses::none();
  662. }
  663. if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,
  664. ImportSummary)
  665. .run())
  666. return PreservedAnalyses::all();
  667. return PreservedAnalyses::none();
  668. }
  669. // Enable whole program visibility if enabled by client (e.g. linker) or
  670. // internal option, and not force disabled.
  671. static bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
  672. return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
  673. !DisableWholeProgramVisibility;
  674. }
  675. namespace llvm {
  676. /// If whole program visibility asserted, then upgrade all public vcall
  677. /// visibility metadata on vtable definitions to linkage unit visibility in
  678. /// Module IR (for regular or hybrid LTO).
  679. void updateVCallVisibilityInModule(Module &M,
  680. bool WholeProgramVisibilityEnabledInLTO) {
  681. if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
  682. return;
  683. for (GlobalVariable &GV : M.globals())
  684. // Add linkage unit visibility to any variable with type metadata, which are
  685. // the vtable definitions. We won't have an existing vcall_visibility
  686. // metadata on vtable definitions with public visibility.
  687. if (GV.hasMetadata(LLVMContext::MD_type) &&
  688. GV.getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
  689. GV.setVCallVisibilityMetadata(GlobalObject::VCallVisibilityLinkageUnit);
  690. }
  691. /// If whole program visibility asserted, then upgrade all public vcall
  692. /// visibility metadata on vtable definition summaries to linkage unit
  693. /// visibility in Module summary index (for ThinLTO).
  694. void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index,
  695. bool WholeProgramVisibilityEnabledInLTO) {
  696. if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
  697. return;
  698. for (auto &P : Index) {
  699. for (auto &S : P.second.SummaryList) {
  700. auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
  701. if (!GVar || GVar->vTableFuncs().empty() ||
  702. GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
  703. continue;
  704. GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
  705. }
  706. }
  707. }
  708. void runWholeProgramDevirtOnIndex(
  709. ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
  710. std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
  711. DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
  712. }
  713. void updateIndexWPDForExports(
  714. ModuleSummaryIndex &Summary,
  715. function_ref<bool(StringRef, ValueInfo)> isExported,
  716. std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
  717. for (auto &T : LocalWPDTargetsMap) {
  718. auto &VI = T.first;
  719. // This was enforced earlier during trySingleImplDevirt.
  720. assert(VI.getSummaryList().size() == 1 &&
  721. "Devirt of local target has more than one copy");
  722. auto &S = VI.getSummaryList()[0];
  723. if (!isExported(S->modulePath(), VI))
  724. continue;
  725. // It's been exported by a cross module import.
  726. for (auto &SlotSummary : T.second) {
  727. auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
  728. assert(TIdSum);
  729. auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
  730. assert(WPDRes != TIdSum->WPDRes.end());
  731. WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
  732. WPDRes->second.SingleImplName,
  733. Summary.getModuleHash(S->modulePath()));
  734. }
  735. }
  736. }
  737. } // end namespace llvm
  738. static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary) {
  739. // Check that summary index contains regular LTO module when performing
  740. // export to prevent occasional use of index from pure ThinLTO compilation
  741. // (-fno-split-lto-module). This kind of summary index is passed to
  742. // DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
  743. const auto &ModPaths = Summary->modulePaths();
  744. if (ClSummaryAction != PassSummaryAction::Import &&
  745. ModPaths.find(ModuleSummaryIndex::getRegularLTOModuleName()) ==
  746. ModPaths.end())
  747. return createStringError(
  748. errc::invalid_argument,
  749. "combined summary should contain Regular LTO module");
  750. return ErrorSuccess();
  751. }
  752. bool DevirtModule::runForTesting(
  753. Module &M, function_ref<AAResults &(Function &)> AARGetter,
  754. function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
  755. function_ref<DominatorTree &(Function &)> LookupDomTree) {
  756. std::unique_ptr<ModuleSummaryIndex> Summary =
  757. std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
  758. // Handle the command-line summary arguments. This code is for testing
  759. // purposes only, so we handle errors directly.
  760. if (!ClReadSummary.empty()) {
  761. ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
  762. ": ");
  763. auto ReadSummaryFile =
  764. ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
  765. if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =
  766. getModuleSummaryIndex(*ReadSummaryFile)) {
  767. Summary = std::move(*SummaryOrErr);
  768. ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));
  769. } else {
  770. // Try YAML if we've failed with bitcode.
  771. consumeError(SummaryOrErr.takeError());
  772. yaml::Input In(ReadSummaryFile->getBuffer());
  773. In >> *Summary;
  774. ExitOnErr(errorCodeToError(In.error()));
  775. }
  776. }
  777. bool Changed =
  778. DevirtModule(M, AARGetter, OREGetter, LookupDomTree,
  779. ClSummaryAction == PassSummaryAction::Export ? Summary.get()
  780. : nullptr,
  781. ClSummaryAction == PassSummaryAction::Import ? Summary.get()
  782. : nullptr)
  783. .run();
  784. if (!ClWriteSummary.empty()) {
  785. ExitOnError ExitOnErr(
  786. "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
  787. std::error_code EC;
  788. if (StringRef(ClWriteSummary).endswith(".bc")) {
  789. raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None);
  790. ExitOnErr(errorCodeToError(EC));
  791. WriteIndexToFile(*Summary, OS);
  792. } else {
  793. raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_Text);
  794. ExitOnErr(errorCodeToError(EC));
  795. yaml::Output Out(OS);
  796. Out << *Summary;
  797. }
  798. }
  799. return Changed;
  800. }
  801. void DevirtModule::buildTypeIdentifierMap(
  802. std::vector<VTableBits> &Bits,
  803. DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
  804. DenseMap<GlobalVariable *, VTableBits *> GVToBits;
  805. Bits.reserve(M.getGlobalList().size());
  806. SmallVector<MDNode *, 2> Types;
  807. for (GlobalVariable &GV : M.globals()) {
  808. Types.clear();
  809. GV.getMetadata(LLVMContext::MD_type, Types);
  810. if (GV.isDeclaration() || Types.empty())
  811. continue;
  812. VTableBits *&BitsPtr = GVToBits[&GV];
  813. if (!BitsPtr) {
  814. Bits.emplace_back();
  815. Bits.back().GV = &GV;
  816. Bits.back().ObjectSize =
  817. M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());
  818. BitsPtr = &Bits.back();
  819. }
  820. for (MDNode *Type : Types) {
  821. auto TypeID = Type->getOperand(1).get();
  822. uint64_t Offset =
  823. cast<ConstantInt>(
  824. cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
  825. ->getZExtValue();
  826. TypeIdMap[TypeID].insert({BitsPtr, Offset});
  827. }
  828. }
  829. }
  830. bool DevirtModule::tryFindVirtualCallTargets(
  831. std::vector<VirtualCallTarget> &TargetsForSlot,
  832. const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset) {
  833. for (const TypeMemberInfo &TM : TypeMemberInfos) {
  834. if (!TM.Bits->GV->isConstant())
  835. return false;
  836. // We cannot perform whole program devirtualization analysis on a vtable
  837. // with public LTO visibility.
  838. if (TM.Bits->GV->getVCallVisibility() ==
  839. GlobalObject::VCallVisibilityPublic)
  840. return false;
  841. Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(),
  842. TM.Offset + ByteOffset, M);
  843. if (!Ptr)
  844. return false;
  845. auto Fn = dyn_cast<Function>(Ptr->stripPointerCasts());
  846. if (!Fn)
  847. return false;
  848. if (FunctionsToSkip.match(Fn->getName()))
  849. return false;
  850. // We can disregard __cxa_pure_virtual as a possible call target, as
  851. // calls to pure virtuals are UB.
  852. if (Fn->getName() == "__cxa_pure_virtual")
  853. continue;
  854. TargetsForSlot.push_back({Fn, &TM});
  855. }
  856. // Give up if we couldn't find any targets.
  857. return !TargetsForSlot.empty();
  858. }
  859. bool DevirtIndex::tryFindVirtualCallTargets(
  860. std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo,
  861. uint64_t ByteOffset) {
  862. for (const TypeIdOffsetVtableInfo &P : TIdInfo) {
  863. // Find the first non-available_externally linkage vtable initializer.
  864. // We can have multiple available_externally, linkonce_odr and weak_odr
  865. // vtable initializers, however we want to skip available_externally as they
  866. // do not have type metadata attached, and therefore the summary will not
  867. // contain any vtable functions. We can also have multiple external
  868. // vtable initializers in the case of comdats, which we cannot check here.
  869. // The linker should give an error in this case.
  870. //
  871. // Also, handle the case of same-named local Vtables with the same path
  872. // and therefore the same GUID. This can happen if there isn't enough
  873. // distinguishing path when compiling the source file. In that case we
  874. // conservatively return false early.
  875. const GlobalVarSummary *VS = nullptr;
  876. bool LocalFound = false;
  877. for (auto &S : P.VTableVI.getSummaryList()) {
  878. if (GlobalValue::isLocalLinkage(S->linkage())) {
  879. if (LocalFound)
  880. return false;
  881. LocalFound = true;
  882. }
  883. if (!GlobalValue::isAvailableExternallyLinkage(S->linkage())) {
  884. VS = cast<GlobalVarSummary>(S->getBaseObject());
  885. // We cannot perform whole program devirtualization analysis on a vtable
  886. // with public LTO visibility.
  887. if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
  888. return false;
  889. }
  890. }
  891. if (!VS->isLive())
  892. continue;
  893. for (auto VTP : VS->vTableFuncs()) {
  894. if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
  895. continue;
  896. TargetsForSlot.push_back(VTP.FuncVI);
  897. }
  898. }
  899. // Give up if we couldn't find any targets.
  900. return !TargetsForSlot.empty();
  901. }
  902. void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
  903. Constant *TheFn, bool &IsExported) {
  904. // Don't devirtualize function if we're told to skip it
  905. // in -wholeprogramdevirt-skip.
  906. if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))
  907. return;
  908. auto Apply = [&](CallSiteInfo &CSInfo) {
  909. for (auto &&VCallSite : CSInfo.CallSites) {
  910. if (RemarksEnabled)
  911. VCallSite.emitRemark("single-impl",
  912. TheFn->stripPointerCasts()->getName(), OREGetter);
  913. VCallSite.CB.setCalledOperand(ConstantExpr::getBitCast(
  914. TheFn, VCallSite.CB.getCalledOperand()->getType()));
  915. // This use is no longer unsafe.
  916. if (VCallSite.NumUnsafeUses)
  917. --*VCallSite.NumUnsafeUses;
  918. }
  919. if (CSInfo.isExported())
  920. IsExported = true;
  921. CSInfo.markDevirt();
  922. };
  923. Apply(SlotInfo.CSInfo);
  924. for (auto &P : SlotInfo.ConstCSInfo)
  925. Apply(P.second);
  926. }
  927. static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {
  928. // We can't add calls if we haven't seen a definition
  929. if (Callee.getSummaryList().empty())
  930. return false;
  931. // Insert calls into the summary index so that the devirtualized targets
  932. // are eligible for import.
  933. // FIXME: Annotate type tests with hotness. For now, mark these as hot
  934. // to better ensure we have the opportunity to inline them.
  935. bool IsExported = false;
  936. auto &S = Callee.getSummaryList()[0];
  937. CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0);
  938. auto AddCalls = [&](CallSiteInfo &CSInfo) {
  939. for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
  940. FS->addCall({Callee, CI});
  941. IsExported |= S->modulePath() != FS->modulePath();
  942. }
  943. for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
  944. FS->addCall({Callee, CI});
  945. IsExported |= S->modulePath() != FS->modulePath();
  946. }
  947. };
  948. AddCalls(SlotInfo.CSInfo);
  949. for (auto &P : SlotInfo.ConstCSInfo)
  950. AddCalls(P.second);
  951. return IsExported;
  952. }
  953. bool DevirtModule::trySingleImplDevirt(
  954. ModuleSummaryIndex *ExportSummary,
  955. MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
  956. WholeProgramDevirtResolution *Res) {
  957. // See if the program contains a single implementation of this virtual
  958. // function.
  959. Function *TheFn = TargetsForSlot[0].Fn;
  960. for (auto &&Target : TargetsForSlot)
  961. if (TheFn != Target.Fn)
  962. return false;
  963. // If so, update each call site to call that implementation directly.
  964. if (RemarksEnabled)
  965. TargetsForSlot[0].WasDevirt = true;
  966. bool IsExported = false;
  967. applySingleImplDevirt(SlotInfo, TheFn, IsExported);
  968. if (!IsExported)
  969. return false;
  970. // If the only implementation has local linkage, we must promote to external
  971. // to make it visible to thin LTO objects. We can only get here during the
  972. // ThinLTO export phase.
  973. if (TheFn->hasLocalLinkage()) {
  974. std::string NewName = (TheFn->getName() + "$merged").str();
  975. // Since we are renaming the function, any comdats with the same name must
  976. // also be renamed. This is required when targeting COFF, as the comdat name
  977. // must match one of the names of the symbols in the comdat.
  978. if (Comdat *C = TheFn->getComdat()) {
  979. if (C->getName() == TheFn->getName()) {
  980. Comdat *NewC = M.getOrInsertComdat(NewName);
  981. NewC->setSelectionKind(C->getSelectionKind());
  982. for (GlobalObject &GO : M.global_objects())
  983. if (GO.getComdat() == C)
  984. GO.setComdat(NewC);
  985. }
  986. }
  987. TheFn->setLinkage(GlobalValue::ExternalLinkage);
  988. TheFn->setVisibility(GlobalValue::HiddenVisibility);
  989. TheFn->setName(NewName);
  990. }
  991. if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))
  992. // Any needed promotion of 'TheFn' has already been done during
  993. // LTO unit split, so we can ignore return value of AddCalls.
  994. AddCalls(SlotInfo, TheFnVI);
  995. Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
  996. Res->SingleImplName = std::string(TheFn->getName());
  997. return true;
  998. }
  999. bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
  1000. VTableSlotSummary &SlotSummary,
  1001. VTableSlotInfo &SlotInfo,
  1002. WholeProgramDevirtResolution *Res,
  1003. std::set<ValueInfo> &DevirtTargets) {
  1004. // See if the program contains a single implementation of this virtual
  1005. // function.
  1006. auto TheFn = TargetsForSlot[0];
  1007. for (auto &&Target : TargetsForSlot)
  1008. if (TheFn != Target)
  1009. return false;
  1010. // Don't devirtualize if we don't have target definition.
  1011. auto Size = TheFn.getSummaryList().size();
  1012. if (!Size)
  1013. return false;
  1014. // Don't devirtualize function if we're told to skip it
  1015. // in -wholeprogramdevirt-skip.
  1016. if (FunctionsToSkip.match(TheFn.name()))
  1017. return false;
  1018. // If the summary list contains multiple summaries where at least one is
  1019. // a local, give up, as we won't know which (possibly promoted) name to use.
  1020. for (auto &S : TheFn.getSummaryList())
  1021. if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)
  1022. return false;
  1023. // Collect functions devirtualized at least for one call site for stats.
  1024. if (PrintSummaryDevirt)
  1025. DevirtTargets.insert(TheFn);
  1026. auto &S = TheFn.getSummaryList()[0];
  1027. bool IsExported = AddCalls(SlotInfo, TheFn);
  1028. if (IsExported)
  1029. ExportedGUIDs.insert(TheFn.getGUID());
  1030. // Record in summary for use in devirtualization during the ThinLTO import
  1031. // step.
  1032. Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
  1033. if (GlobalValue::isLocalLinkage(S->linkage())) {
  1034. if (IsExported)
  1035. // If target is a local function and we are exporting it by
  1036. // devirtualizing a call in another module, we need to record the
  1037. // promoted name.
  1038. Res->SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
  1039. TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
  1040. else {
  1041. LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
  1042. Res->SingleImplName = std::string(TheFn.name());
  1043. }
  1044. } else
  1045. Res->SingleImplName = std::string(TheFn.name());
  1046. // Name will be empty if this thin link driven off of serialized combined
  1047. // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
  1048. // legacy LTO API anyway.
  1049. assert(!Res->SingleImplName.empty());
  1050. return true;
  1051. }
  1052. void DevirtModule::tryICallBranchFunnel(
  1053. MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
  1054. WholeProgramDevirtResolution *Res, VTableSlot Slot) {
  1055. Triple T(M.getTargetTriple());
  1056. if (T.getArch() != Triple::x86_64)
  1057. return;
  1058. if (TargetsForSlot.size() > ClThreshold)
  1059. return;
  1060. bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;
  1061. if (!HasNonDevirt)
  1062. for (auto &P : SlotInfo.ConstCSInfo)
  1063. if (!P.second.AllCallSitesDevirted) {
  1064. HasNonDevirt = true;
  1065. break;
  1066. }
  1067. if (!HasNonDevirt)
  1068. return;
  1069. FunctionType *FT =
  1070. FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);
  1071. Function *JT;
  1072. if (isa<MDString>(Slot.TypeID)) {
  1073. JT = Function::Create(FT, Function::ExternalLinkage,
  1074. M.getDataLayout().getProgramAddressSpace(),
  1075. getGlobalName(Slot, {}, "branch_funnel"), &M);
  1076. JT->setVisibility(GlobalValue::HiddenVisibility);
  1077. } else {
  1078. JT = Function::Create(FT, Function::InternalLinkage,
  1079. M.getDataLayout().getProgramAddressSpace(),
  1080. "branch_funnel", &M);
  1081. }
  1082. JT->addAttribute(1, Attribute::Nest);
  1083. std::vector<Value *> JTArgs;
  1084. JTArgs.push_back(JT->arg_begin());
  1085. for (auto &T : TargetsForSlot) {
  1086. JTArgs.push_back(getMemberAddr(T.TM));
  1087. JTArgs.push_back(T.Fn);
  1088. }
  1089. BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
  1090. Function *Intr =
  1091. Intrinsic::getDeclaration(&M, llvm::Intrinsic::icall_branch_funnel, {});
  1092. auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
  1093. CI->setTailCallKind(CallInst::TCK_MustTail);
  1094. ReturnInst::Create(M.getContext(), nullptr, BB);
  1095. bool IsExported = false;
  1096. applyICallBranchFunnel(SlotInfo, JT, IsExported);
  1097. if (IsExported)
  1098. Res->TheKind = WholeProgramDevirtResolution::BranchFunnel;
  1099. }
  1100. void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
  1101. Constant *JT, bool &IsExported) {
  1102. auto Apply = [&](CallSiteInfo &CSInfo) {
  1103. if (CSInfo.isExported())
  1104. IsExported = true;
  1105. if (CSInfo.AllCallSitesDevirted)
  1106. return;
  1107. for (auto &&VCallSite : CSInfo.CallSites) {
  1108. CallBase &CB = VCallSite.CB;
  1109. // Jump tables are only profitable if the retpoline mitigation is enabled.
  1110. Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
  1111. if (!FSAttr.isValid() ||
  1112. !FSAttr.getValueAsString().contains("+retpoline"))
  1113. continue;
  1114. if (RemarksEnabled)
  1115. VCallSite.emitRemark("branch-funnel",
  1116. JT->stripPointerCasts()->getName(), OREGetter);
  1117. // Pass the address of the vtable in the nest register, which is r10 on
  1118. // x86_64.
  1119. std::vector<Type *> NewArgs;
  1120. NewArgs.push_back(Int8PtrTy);
  1121. append_range(NewArgs, CB.getFunctionType()->params());
  1122. FunctionType *NewFT =
  1123. FunctionType::get(CB.getFunctionType()->getReturnType(), NewArgs,
  1124. CB.getFunctionType()->isVarArg());
  1125. PointerType *NewFTPtr = PointerType::getUnqual(NewFT);
  1126. IRBuilder<> IRB(&CB);
  1127. std::vector<Value *> Args;
  1128. Args.push_back(IRB.CreateBitCast(VCallSite.VTable, Int8PtrTy));
  1129. llvm::append_range(Args, CB.args());
  1130. CallBase *NewCS = nullptr;
  1131. if (isa<CallInst>(CB))
  1132. NewCS = IRB.CreateCall(NewFT, IRB.CreateBitCast(JT, NewFTPtr), Args);
  1133. else
  1134. NewCS = IRB.CreateInvoke(NewFT, IRB.CreateBitCast(JT, NewFTPtr),
  1135. cast<InvokeInst>(CB).getNormalDest(),
  1136. cast<InvokeInst>(CB).getUnwindDest(), Args);
  1137. NewCS->setCallingConv(CB.getCallingConv());
  1138. AttributeList Attrs = CB.getAttributes();
  1139. std::vector<AttributeSet> NewArgAttrs;
  1140. NewArgAttrs.push_back(AttributeSet::get(
  1141. M.getContext(), ArrayRef<Attribute>{Attribute::get(
  1142. M.getContext(), Attribute::Nest)}));
  1143. for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
  1144. NewArgAttrs.push_back(Attrs.getParamAttributes(I));
  1145. NewCS->setAttributes(
  1146. AttributeList::get(M.getContext(), Attrs.getFnAttributes(),
  1147. Attrs.getRetAttributes(), NewArgAttrs));
  1148. CB.replaceAllUsesWith(NewCS);
  1149. CB.eraseFromParent();
  1150. // This use is no longer unsafe.
  1151. if (VCallSite.NumUnsafeUses)
  1152. --*VCallSite.NumUnsafeUses;
  1153. }
  1154. // Don't mark as devirtualized because there may be callers compiled without
  1155. // retpoline mitigation, which would mean that they are lowered to
  1156. // llvm.type.test and therefore require an llvm.type.test resolution for the
  1157. // type identifier.
  1158. };
  1159. Apply(SlotInfo.CSInfo);
  1160. for (auto &P : SlotInfo.ConstCSInfo)
  1161. Apply(P.second);
  1162. }
  1163. bool DevirtModule::tryEvaluateFunctionsWithArgs(
  1164. MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  1165. ArrayRef<uint64_t> Args) {
  1166. // Evaluate each function and store the result in each target's RetVal
  1167. // field.
  1168. for (VirtualCallTarget &Target : TargetsForSlot) {
  1169. if (Target.Fn->arg_size() != Args.size() + 1)
  1170. return false;
  1171. Evaluator Eval(M.getDataLayout(), nullptr);
  1172. SmallVector<Constant *, 2> EvalArgs;
  1173. EvalArgs.push_back(
  1174. Constant::getNullValue(Target.Fn->getFunctionType()->getParamType(0)));
  1175. for (unsigned I = 0; I != Args.size(); ++I) {
  1176. auto *ArgTy = dyn_cast<IntegerType>(
  1177. Target.Fn->getFunctionType()->getParamType(I + 1));
  1178. if (!ArgTy)
  1179. return false;
  1180. EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
  1181. }
  1182. Constant *RetVal;
  1183. if (!Eval.EvaluateFunction(Target.Fn, RetVal, EvalArgs) ||
  1184. !isa<ConstantInt>(RetVal))
  1185. return false;
  1186. Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
  1187. }
  1188. return true;
  1189. }
  1190. void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
  1191. uint64_t TheRetVal) {
  1192. for (auto Call : CSInfo.CallSites)
  1193. Call.replaceAndErase(
  1194. "uniform-ret-val", FnName, RemarksEnabled, OREGetter,
  1195. ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));
  1196. CSInfo.markDevirt();
  1197. }
  1198. bool DevirtModule::tryUniformRetValOpt(
  1199. MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
  1200. WholeProgramDevirtResolution::ByArg *Res) {
  1201. // Uniform return value optimization. If all functions return the same
  1202. // constant, replace all calls with that constant.
  1203. uint64_t TheRetVal = TargetsForSlot[0].RetVal;
  1204. for (const VirtualCallTarget &Target : TargetsForSlot)
  1205. if (Target.RetVal != TheRetVal)
  1206. return false;
  1207. if (CSInfo.isExported()) {
  1208. Res->TheKind = WholeProgramDevirtResolution::ByArg::UniformRetVal;
  1209. Res->Info = TheRetVal;
  1210. }
  1211. applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
  1212. if (RemarksEnabled)
  1213. for (auto &&Target : TargetsForSlot)
  1214. Target.WasDevirt = true;
  1215. return true;
  1216. }
  1217. std::string DevirtModule::getGlobalName(VTableSlot Slot,
  1218. ArrayRef<uint64_t> Args,
  1219. StringRef Name) {
  1220. std::string FullName = "__typeid_";
  1221. raw_string_ostream OS(FullName);
  1222. OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
  1223. for (uint64_t Arg : Args)
  1224. OS << '_' << Arg;
  1225. OS << '_' << Name;
  1226. return OS.str();
  1227. }
  1228. bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {
  1229. Triple T(M.getTargetTriple());
  1230. return T.isX86() && T.getObjectFormat() == Triple::ELF;
  1231. }
  1232. void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
  1233. StringRef Name, Constant *C) {
  1234. GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
  1235. getGlobalName(Slot, Args, Name), C, &M);
  1236. GA->setVisibility(GlobalValue::HiddenVisibility);
  1237. }
  1238. void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
  1239. StringRef Name, uint32_t Const,
  1240. uint32_t &Storage) {
  1241. if (shouldExportConstantsAsAbsoluteSymbols()) {
  1242. exportGlobal(
  1243. Slot, Args, Name,
  1244. ConstantExpr::getIntToPtr(ConstantInt::get(Int32Ty, Const), Int8PtrTy));
  1245. return;
  1246. }
  1247. Storage = Const;
  1248. }
  1249. Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
  1250. StringRef Name) {
  1251. Constant *C =
  1252. M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
  1253. auto *GV = dyn_cast<GlobalVariable>(C);
  1254. if (GV)
  1255. GV->setVisibility(GlobalValue::HiddenVisibility);
  1256. return C;
  1257. }
  1258. Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
  1259. StringRef Name, IntegerType *IntTy,
  1260. uint32_t Storage) {
  1261. if (!shouldExportConstantsAsAbsoluteSymbols())
  1262. return ConstantInt::get(IntTy, Storage);
  1263. Constant *C = importGlobal(Slot, Args, Name);
  1264. auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
  1265. C = ConstantExpr::getPtrToInt(C, IntTy);
  1266. // We only need to set metadata if the global is newly created, in which
  1267. // case it would not have hidden visibility.
  1268. if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))
  1269. return C;
  1270. auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
  1271. auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
  1272. auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
  1273. GV->setMetadata(LLVMContext::MD_absolute_symbol,
  1274. MDNode::get(M.getContext(), {MinC, MaxC}));
  1275. };
  1276. unsigned AbsWidth = IntTy->getBitWidth();
  1277. if (AbsWidth == IntPtrTy->getBitWidth())
  1278. SetAbsRange(~0ull, ~0ull); // Full set.
  1279. else
  1280. SetAbsRange(0, 1ull << AbsWidth);
  1281. return C;
  1282. }
  1283. void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
  1284. bool IsOne,
  1285. Constant *UniqueMemberAddr) {
  1286. for (auto &&Call : CSInfo.CallSites) {
  1287. IRBuilder<> B(&Call.CB);
  1288. Value *Cmp =
  1289. B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
  1290. B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
  1291. Cmp = B.CreateZExt(Cmp, Call.CB.getType());
  1292. Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
  1293. Cmp);
  1294. }
  1295. CSInfo.markDevirt();
  1296. }
  1297. Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {
  1298. Constant *C = ConstantExpr::getBitCast(M->Bits->GV, Int8PtrTy);
  1299. return ConstantExpr::getGetElementPtr(Int8Ty, C,
  1300. ConstantInt::get(Int64Ty, M->Offset));
  1301. }
  1302. bool DevirtModule::tryUniqueRetValOpt(
  1303. unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
  1304. CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
  1305. VTableSlot Slot, ArrayRef<uint64_t> Args) {
  1306. // IsOne controls whether we look for a 0 or a 1.
  1307. auto tryUniqueRetValOptFor = [&](bool IsOne) {
  1308. const TypeMemberInfo *UniqueMember = nullptr;
  1309. for (const VirtualCallTarget &Target : TargetsForSlot) {
  1310. if (Target.RetVal == (IsOne ? 1 : 0)) {
  1311. if (UniqueMember)
  1312. return false;
  1313. UniqueMember = Target.TM;
  1314. }
  1315. }
  1316. // We should have found a unique member or bailed out by now. We already
  1317. // checked for a uniform return value in tryUniformRetValOpt.
  1318. assert(UniqueMember);
  1319. Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);
  1320. if (CSInfo.isExported()) {
  1321. Res->TheKind = WholeProgramDevirtResolution::ByArg::UniqueRetVal;
  1322. Res->Info = IsOne;
  1323. exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
  1324. }
  1325. // Replace each call with the comparison.
  1326. applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
  1327. UniqueMemberAddr);
  1328. // Update devirtualization statistics for targets.
  1329. if (RemarksEnabled)
  1330. for (auto &&Target : TargetsForSlot)
  1331. Target.WasDevirt = true;
  1332. return true;
  1333. };
  1334. if (BitWidth == 1) {
  1335. if (tryUniqueRetValOptFor(true))
  1336. return true;
  1337. if (tryUniqueRetValOptFor(false))
  1338. return true;
  1339. }
  1340. return false;
  1341. }
  1342. void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
  1343. Constant *Byte, Constant *Bit) {
  1344. for (auto Call : CSInfo.CallSites) {
  1345. auto *RetType = cast<IntegerType>(Call.CB.getType());
  1346. IRBuilder<> B(&Call.CB);
  1347. Value *Addr =
  1348. B.CreateGEP(Int8Ty, B.CreateBitCast(Call.VTable, Int8PtrTy), Byte);
  1349. if (RetType->getBitWidth() == 1) {
  1350. Value *Bits = B.CreateLoad(Int8Ty, Addr);
  1351. Value *BitsAndBit = B.CreateAnd(Bits, Bit);
  1352. auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
  1353. Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
  1354. OREGetter, IsBitSet);
  1355. } else {
  1356. Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
  1357. Value *Val = B.CreateLoad(RetType, ValAddr);
  1358. Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
  1359. OREGetter, Val);
  1360. }
  1361. }
  1362. CSInfo.markDevirt();
  1363. }
  1364. bool DevirtModule::tryVirtualConstProp(
  1365. MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
  1366. WholeProgramDevirtResolution *Res, VTableSlot Slot) {
  1367. // This only works if the function returns an integer.
  1368. auto RetType = dyn_cast<IntegerType>(TargetsForSlot[0].Fn->getReturnType());
  1369. if (!RetType)
  1370. return false;
  1371. unsigned BitWidth = RetType->getBitWidth();
  1372. if (BitWidth > 64)
  1373. return false;
  1374. // Make sure that each function is defined, does not access memory, takes at
  1375. // least one argument, does not use its first argument (which we assume is
  1376. // 'this'), and has the same return type.
  1377. //
  1378. // Note that we test whether this copy of the function is readnone, rather
  1379. // than testing function attributes, which must hold for any copy of the
  1380. // function, even a less optimized version substituted at link time. This is
  1381. // sound because the virtual constant propagation optimizations effectively
  1382. // inline all implementations of the virtual function into each call site,
  1383. // rather than using function attributes to perform local optimization.
  1384. for (VirtualCallTarget &Target : TargetsForSlot) {
  1385. if (Target.Fn->isDeclaration() ||
  1386. computeFunctionBodyMemoryAccess(*Target.Fn, AARGetter(*Target.Fn)) !=
  1387. MAK_ReadNone ||
  1388. Target.Fn->arg_empty() || !Target.Fn->arg_begin()->use_empty() ||
  1389. Target.Fn->getReturnType() != RetType)
  1390. return false;
  1391. }
  1392. for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
  1393. if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
  1394. continue;
  1395. WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
  1396. if (Res)
  1397. ResByArg = &Res->ResByArg[CSByConstantArg.first];
  1398. if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
  1399. continue;
  1400. if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
  1401. ResByArg, Slot, CSByConstantArg.first))
  1402. continue;
  1403. // Find an allocation offset in bits in all vtables associated with the
  1404. // type.
  1405. uint64_t AllocBefore =
  1406. findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
  1407. uint64_t AllocAfter =
  1408. findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);
  1409. // Calculate the total amount of padding needed to store a value at both
  1410. // ends of the object.
  1411. uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;
  1412. for (auto &&Target : TargetsForSlot) {
  1413. TotalPaddingBefore += std::max<int64_t>(
  1414. (AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);
  1415. TotalPaddingAfter += std::max<int64_t>(
  1416. (AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);
  1417. }
  1418. // If the amount of padding is too large, give up.
  1419. // FIXME: do something smarter here.
  1420. if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)
  1421. continue;
  1422. // Calculate the offset to the value as a (possibly negative) byte offset
  1423. // and (if applicable) a bit offset, and store the values in the targets.
  1424. int64_t OffsetByte;
  1425. uint64_t OffsetBit;
  1426. if (TotalPaddingBefore <= TotalPaddingAfter)
  1427. setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,
  1428. OffsetBit);
  1429. else
  1430. setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
  1431. OffsetBit);
  1432. if (RemarksEnabled)
  1433. for (auto &&Target : TargetsForSlot)
  1434. Target.WasDevirt = true;
  1435. if (CSByConstantArg.second.isExported()) {
  1436. ResByArg->TheKind = WholeProgramDevirtResolution::ByArg::VirtualConstProp;
  1437. exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,
  1438. ResByArg->Byte);
  1439. exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,
  1440. ResByArg->Bit);
  1441. }
  1442. // Rewrite each call to a load from OffsetByte/OffsetBit.
  1443. Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
  1444. Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
  1445. applyVirtualConstProp(CSByConstantArg.second,
  1446. TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
  1447. }
  1448. return true;
  1449. }
  1450. void DevirtModule::rebuildGlobal(VTableBits &B) {
  1451. if (B.Before.Bytes.empty() && B.After.Bytes.empty())
  1452. return;
  1453. // Align the before byte array to the global's minimum alignment so that we
  1454. // don't break any alignment requirements on the global.
  1455. Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(
  1456. B.GV->getAlign(), B.GV->getValueType());
  1457. B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));
  1458. // Before was stored in reverse order; flip it now.
  1459. for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)
  1460. std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);
  1461. // Build an anonymous global containing the before bytes, followed by the
  1462. // original initializer, followed by the after bytes.
  1463. auto NewInit = ConstantStruct::getAnon(
  1464. {ConstantDataArray::get(M.getContext(), B.Before.Bytes),
  1465. B.GV->getInitializer(),
  1466. ConstantDataArray::get(M.getContext(), B.After.Bytes)});
  1467. auto NewGV =
  1468. new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),
  1469. GlobalVariable::PrivateLinkage, NewInit, "", B.GV);
  1470. NewGV->setSection(B.GV->getSection());
  1471. NewGV->setComdat(B.GV->getComdat());
  1472. NewGV->setAlignment(MaybeAlign(B.GV->getAlignment()));
  1473. // Copy the original vtable's metadata to the anonymous global, adjusting
  1474. // offsets as required.
  1475. NewGV->copyMetadata(B.GV, B.Before.Bytes.size());
  1476. // Build an alias named after the original global, pointing at the second
  1477. // element (the original initializer).
  1478. auto Alias = GlobalAlias::create(
  1479. B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",
  1480. ConstantExpr::getGetElementPtr(
  1481. NewInit->getType(), NewGV,
  1482. ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),
  1483. ConstantInt::get(Int32Ty, 1)}),
  1484. &M);
  1485. Alias->setVisibility(B.GV->getVisibility());
  1486. Alias->takeName(B.GV);
  1487. B.GV->replaceAllUsesWith(Alias);
  1488. B.GV->eraseFromParent();
  1489. }
  1490. bool DevirtModule::areRemarksEnabled() {
  1491. const auto &FL = M.getFunctionList();
  1492. for (const Function &Fn : FL) {
  1493. const auto &BBL = Fn.getBasicBlockList();
  1494. if (BBL.empty())
  1495. continue;
  1496. auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBL.front());
  1497. return DI.isEnabled();
  1498. }
  1499. return false;
  1500. }
  1501. void DevirtModule::scanTypeTestUsers(
  1502. Function *TypeTestFunc,
  1503. DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
  1504. // Find all virtual calls via a virtual table pointer %p under an assumption
  1505. // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
  1506. // points to a member of the type identifier %md. Group calls by (type ID,
  1507. // offset) pair (effectively the identity of the virtual function) and store
  1508. // to CallSlots.
  1509. for (auto I = TypeTestFunc->use_begin(), E = TypeTestFunc->use_end();
  1510. I != E;) {
  1511. auto CI = dyn_cast<CallInst>(I->getUser());
  1512. ++I;
  1513. if (!CI)
  1514. continue;
  1515. // Search for virtual calls based on %p and add them to DevirtCalls.
  1516. SmallVector<DevirtCallSite, 1> DevirtCalls;
  1517. SmallVector<CallInst *, 1> Assumes;
  1518. auto &DT = LookupDomTree(*CI->getFunction());
  1519. findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
  1520. Metadata *TypeId =
  1521. cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
  1522. // If we found any, add them to CallSlots.
  1523. if (!Assumes.empty()) {
  1524. Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
  1525. for (DevirtCallSite Call : DevirtCalls)
  1526. CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
  1527. }
  1528. auto RemoveTypeTestAssumes = [&]() {
  1529. // We no longer need the assumes or the type test.
  1530. for (auto Assume : Assumes)
  1531. Assume->eraseFromParent();
  1532. // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
  1533. // may use the vtable argument later.
  1534. if (CI->use_empty())
  1535. CI->eraseFromParent();
  1536. };
  1537. // At this point we could remove all type test assume sequences, as they
  1538. // were originally inserted for WPD. However, we can keep these in the
  1539. // code stream for later analysis (e.g. to help drive more efficient ICP
  1540. // sequences). They will eventually be removed by a second LowerTypeTests
  1541. // invocation that cleans them up. In order to do this correctly, the first
  1542. // LowerTypeTests invocation needs to know that they have "Unknown" type
  1543. // test resolution, so that they aren't treated as Unsat and lowered to
  1544. // False, which will break any uses on assumes. Below we remove any type
  1545. // test assumes that will not be treated as Unknown by LTT.
  1546. // The type test assumes will be treated by LTT as Unsat if the type id is
  1547. // not used on a global (in which case it has no entry in the TypeIdMap).
  1548. if (!TypeIdMap.count(TypeId))
  1549. RemoveTypeTestAssumes();
  1550. // For ThinLTO importing, we need to remove the type test assumes if this is
  1551. // an MDString type id without a corresponding TypeIdSummary. Any
  1552. // non-MDString type ids are ignored and treated as Unknown by LTT, so their
  1553. // type test assumes can be kept. If the MDString type id is missing a
  1554. // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
  1555. // exporting phase of WPD from analyzing it), then it would be treated as
  1556. // Unsat by LTT and we need to remove its type test assumes here. If not
  1557. // used on a vcall we don't need them for later optimization use in any
  1558. // case.
  1559. else if (ImportSummary && isa<MDString>(TypeId)) {
  1560. const TypeIdSummary *TidSummary =
  1561. ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
  1562. if (!TidSummary)
  1563. RemoveTypeTestAssumes();
  1564. else
  1565. // If one was created it should not be Unsat, because if we reached here
  1566. // the type id was used on a global.
  1567. assert(TidSummary->TTRes.TheKind != TypeTestResolution::Unsat);
  1568. }
  1569. }
  1570. }
  1571. void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
  1572. Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test);
  1573. for (auto I = TypeCheckedLoadFunc->use_begin(),
  1574. E = TypeCheckedLoadFunc->use_end();
  1575. I != E;) {
  1576. auto CI = dyn_cast<CallInst>(I->getUser());
  1577. ++I;
  1578. if (!CI)
  1579. continue;
  1580. Value *Ptr = CI->getArgOperand(0);
  1581. Value *Offset = CI->getArgOperand(1);
  1582. Value *TypeIdValue = CI->getArgOperand(2);
  1583. Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
  1584. SmallVector<DevirtCallSite, 1> DevirtCalls;
  1585. SmallVector<Instruction *, 1> LoadedPtrs;
  1586. SmallVector<Instruction *, 1> Preds;
  1587. bool HasNonCallUses = false;
  1588. auto &DT = LookupDomTree(*CI->getFunction());
  1589. findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
  1590. HasNonCallUses, CI, DT);
  1591. // Start by generating "pessimistic" code that explicitly loads the function
  1592. // pointer from the vtable and performs the type check. If possible, we will
  1593. // eliminate the load and the type check later.
  1594. // If possible, only generate the load at the point where it is used.
  1595. // This helps avoid unnecessary spills.
  1596. IRBuilder<> LoadB(
  1597. (LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
  1598. Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset);
  1599. Value *GEPPtr = LoadB.CreateBitCast(GEP, PointerType::getUnqual(Int8PtrTy));
  1600. Value *LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEPPtr);
  1601. for (Instruction *LoadedPtr : LoadedPtrs) {
  1602. LoadedPtr->replaceAllUsesWith(LoadedValue);
  1603. LoadedPtr->eraseFromParent();
  1604. }
  1605. // Likewise for the type test.
  1606. IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);
  1607. CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});
  1608. for (Instruction *Pred : Preds) {
  1609. Pred->replaceAllUsesWith(TypeTestCall);
  1610. Pred->eraseFromParent();
  1611. }
  1612. // We have already erased any extractvalue instructions that refer to the
  1613. // intrinsic call, but the intrinsic may have other non-extractvalue uses
  1614. // (although this is unlikely). In that case, explicitly build a pair and
  1615. // RAUW it.
  1616. if (!CI->use_empty()) {
  1617. Value *Pair = UndefValue::get(CI->getType());
  1618. IRBuilder<> B(CI);
  1619. Pair = B.CreateInsertValue(Pair, LoadedValue, {0});
  1620. Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});
  1621. CI->replaceAllUsesWith(Pair);
  1622. }
  1623. // The number of unsafe uses is initially the number of uses.
  1624. auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];
  1625. NumUnsafeUses = DevirtCalls.size();
  1626. // If the function pointer has a non-call user, we cannot eliminate the type
  1627. // check, as one of those users may eventually call the pointer. Increment
  1628. // the unsafe use count to make sure it cannot reach zero.
  1629. if (HasNonCallUses)
  1630. ++NumUnsafeUses;
  1631. for (DevirtCallSite Call : DevirtCalls) {
  1632. CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,
  1633. &NumUnsafeUses);
  1634. }
  1635. CI->eraseFromParent();
  1636. }
  1637. }
  1638. void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
  1639. auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
  1640. if (!TypeId)
  1641. return;
  1642. const TypeIdSummary *TidSummary =
  1643. ImportSummary->getTypeIdSummary(TypeId->getString());
  1644. if (!TidSummary)
  1645. return;
  1646. auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
  1647. if (ResI == TidSummary->WPDRes.end())
  1648. return;
  1649. const WholeProgramDevirtResolution &Res = ResI->second;
  1650. if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
  1651. assert(!Res.SingleImplName.empty());
  1652. // The type of the function in the declaration is irrelevant because every
  1653. // call site will cast it to the correct type.
  1654. Constant *SingleImpl =
  1655. cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
  1656. Type::getVoidTy(M.getContext()))
  1657. .getCallee());
  1658. // This is the import phase so we should not be exporting anything.
  1659. bool IsExported = false;
  1660. applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
  1661. assert(!IsExported);
  1662. }
  1663. for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
  1664. auto I = Res.ResByArg.find(CSByConstantArg.first);
  1665. if (I == Res.ResByArg.end())
  1666. continue;
  1667. auto &ResByArg = I->second;
  1668. // FIXME: We should figure out what to do about the "function name" argument
  1669. // to the apply* functions, as the function names are unavailable during the
  1670. // importing phase. For now we just pass the empty string. This does not
  1671. // impact correctness because the function names are just used for remarks.
  1672. switch (ResByArg.TheKind) {
  1673. case WholeProgramDevirtResolution::ByArg::UniformRetVal:
  1674. applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
  1675. break;
  1676. case WholeProgramDevirtResolution::ByArg::UniqueRetVal: {
  1677. Constant *UniqueMemberAddr =
  1678. importGlobal(Slot, CSByConstantArg.first, "unique_member");
  1679. applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
  1680. UniqueMemberAddr);
  1681. break;
  1682. }
  1683. case WholeProgramDevirtResolution::ByArg::VirtualConstProp: {
  1684. Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",
  1685. Int32Ty, ResByArg.Byte);
  1686. Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,
  1687. ResByArg.Bit);
  1688. applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
  1689. break;
  1690. }
  1691. default:
  1692. break;
  1693. }
  1694. }
  1695. if (Res.TheKind == WholeProgramDevirtResolution::BranchFunnel) {
  1696. // The type of the function is irrelevant, because it's bitcast at calls
  1697. // anyhow.
  1698. Constant *JT = cast<Constant>(
  1699. M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
  1700. Type::getVoidTy(M.getContext()))
  1701. .getCallee());
  1702. bool IsExported = false;
  1703. applyICallBranchFunnel(SlotInfo, JT, IsExported);
  1704. assert(!IsExported);
  1705. }
  1706. }
  1707. void DevirtModule::removeRedundantTypeTests() {
  1708. auto True = ConstantInt::getTrue(M.getContext());
  1709. for (auto &&U : NumUnsafeUsesForTypeTest) {
  1710. if (U.second == 0) {
  1711. U.first->replaceAllUsesWith(True);
  1712. U.first->eraseFromParent();
  1713. }
  1714. }
  1715. }
  1716. bool DevirtModule::run() {
  1717. // If only some of the modules were split, we cannot correctly perform
  1718. // this transformation. We already checked for the presense of type tests
  1719. // with partially split modules during the thin link, and would have emitted
  1720. // an error if any were found, so here we can simply return.
  1721. if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
  1722. (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
  1723. return false;
  1724. Function *TypeTestFunc =
  1725. M.getFunction(Intrinsic::getName(Intrinsic::type_test));
  1726. Function *TypeCheckedLoadFunc =
  1727. M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
  1728. Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
  1729. // Normally if there are no users of the devirtualization intrinsics in the
  1730. // module, this pass has nothing to do. But if we are exporting, we also need
  1731. // to handle any users that appear only in the function summaries.
  1732. if (!ExportSummary &&
  1733. (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
  1734. AssumeFunc->use_empty()) &&
  1735. (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
  1736. return false;
  1737. // Rebuild type metadata into a map for easy lookup.
  1738. std::vector<VTableBits> Bits;
  1739. DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
  1740. buildTypeIdentifierMap(Bits, TypeIdMap);
  1741. if (TypeTestFunc && AssumeFunc)
  1742. scanTypeTestUsers(TypeTestFunc, TypeIdMap);
  1743. if (TypeCheckedLoadFunc)
  1744. scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
  1745. if (ImportSummary) {
  1746. for (auto &S : CallSlots)
  1747. importResolution(S.first, S.second);
  1748. removeRedundantTypeTests();
  1749. // We have lowered or deleted the type instrinsics, so we will no
  1750. // longer have enough information to reason about the liveness of virtual
  1751. // function pointers in GlobalDCE.
  1752. for (GlobalVariable &GV : M.globals())
  1753. GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
  1754. // The rest of the code is only necessary when exporting or during regular
  1755. // LTO, so we are done.
  1756. return true;
  1757. }
  1758. if (TypeIdMap.empty())
  1759. return true;
  1760. // Collect information from summary about which calls to try to devirtualize.
  1761. if (ExportSummary) {
  1762. DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
  1763. for (auto &P : TypeIdMap) {
  1764. if (auto *TypeId = dyn_cast<MDString>(P.first))
  1765. MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
  1766. TypeId);
  1767. }
  1768. for (auto &P : *ExportSummary) {
  1769. for (auto &S : P.second.SummaryList) {
  1770. auto *FS = dyn_cast<FunctionSummary>(S.get());
  1771. if (!FS)
  1772. continue;
  1773. // FIXME: Only add live functions.
  1774. for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
  1775. for (Metadata *MD : MetadataByGUID[VF.GUID]) {
  1776. CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
  1777. }
  1778. }
  1779. for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
  1780. for (Metadata *MD : MetadataByGUID[VF.GUID]) {
  1781. CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
  1782. }
  1783. }
  1784. for (const FunctionSummary::ConstVCall &VC :
  1785. FS->type_test_assume_const_vcalls()) {
  1786. for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
  1787. CallSlots[{MD, VC.VFunc.Offset}]
  1788. .ConstCSInfo[VC.Args]
  1789. .addSummaryTypeTestAssumeUser(FS);
  1790. }
  1791. }
  1792. for (const FunctionSummary::ConstVCall &VC :
  1793. FS->type_checked_load_const_vcalls()) {
  1794. for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
  1795. CallSlots[{MD, VC.VFunc.Offset}]
  1796. .ConstCSInfo[VC.Args]
  1797. .addSummaryTypeCheckedLoadUser(FS);
  1798. }
  1799. }
  1800. }
  1801. }
  1802. }
  1803. // For each (type, offset) pair:
  1804. bool DidVirtualConstProp = false;
  1805. std::map<std::string, Function*> DevirtTargets;
  1806. for (auto &S : CallSlots) {
  1807. // Search each of the members of the type identifier for the virtual
  1808. // function implementation at offset S.first.ByteOffset, and add to
  1809. // TargetsForSlot.
  1810. std::vector<VirtualCallTarget> TargetsForSlot;
  1811. WholeProgramDevirtResolution *Res = nullptr;
  1812. const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
  1813. if (ExportSummary && isa<MDString>(S.first.TypeID) &&
  1814. TypeMemberInfos.size())
  1815. // For any type id used on a global's type metadata, create the type id
  1816. // summary resolution regardless of whether we can devirtualize, so that
  1817. // lower type tests knows the type id is not Unsat. If it was not used on
  1818. // a global's type metadata, the TypeIdMap entry set will be empty, and
  1819. // we don't want to create an entry (with the default Unknown type
  1820. // resolution), which can prevent detection of the Unsat.
  1821. Res = &ExportSummary
  1822. ->getOrInsertTypeIdSummary(
  1823. cast<MDString>(S.first.TypeID)->getString())
  1824. .WPDRes[S.first.ByteOffset];
  1825. if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
  1826. S.first.ByteOffset)) {
  1827. if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
  1828. DidVirtualConstProp |=
  1829. tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);
  1830. tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);
  1831. }
  1832. // Collect functions devirtualized at least for one call site for stats.
  1833. if (RemarksEnabled)
  1834. for (const auto &T : TargetsForSlot)
  1835. if (T.WasDevirt)
  1836. DevirtTargets[std::string(T.Fn->getName())] = T.Fn;
  1837. }
  1838. // CFI-specific: if we are exporting and any llvm.type.checked.load
  1839. // intrinsics were *not* devirtualized, we need to add the resulting
  1840. // llvm.type.test intrinsics to the function summaries so that the
  1841. // LowerTypeTests pass will export them.
  1842. if (ExportSummary && isa<MDString>(S.first.TypeID)) {
  1843. auto GUID =
  1844. GlobalValue::getGUID(cast<MDString>(S.first.TypeID)->getString());
  1845. for (auto FS : S.second.CSInfo.SummaryTypeCheckedLoadUsers)
  1846. FS->addTypeTest(GUID);
  1847. for (auto &CCS : S.second.ConstCSInfo)
  1848. for (auto FS : CCS.second.SummaryTypeCheckedLoadUsers)
  1849. FS->addTypeTest(GUID);
  1850. }
  1851. }
  1852. if (RemarksEnabled) {
  1853. // Generate remarks for each devirtualized function.
  1854. for (const auto &DT : DevirtTargets) {
  1855. Function *F = DT.second;
  1856. using namespace ore;
  1857. OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
  1858. << "devirtualized "
  1859. << NV("FunctionName", DT.first));
  1860. }
  1861. }
  1862. removeRedundantTypeTests();
  1863. // Rebuild each global we touched as part of virtual constant propagation to
  1864. // include the before and after bytes.
  1865. if (DidVirtualConstProp)
  1866. for (VTableBits &B : Bits)
  1867. rebuildGlobal(B);
  1868. // We have lowered or deleted the type instrinsics, so we will no
  1869. // longer have enough information to reason about the liveness of virtual
  1870. // function pointers in GlobalDCE.
  1871. for (GlobalVariable &GV : M.globals())
  1872. GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
  1873. return true;
  1874. }
  1875. void DevirtIndex::run() {
  1876. if (ExportSummary.typeIdCompatibleVtableMap().empty())
  1877. return;
  1878. DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID;
  1879. for (auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
  1880. NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first);
  1881. }
  1882. // Collect information from summary about which calls to try to devirtualize.
  1883. for (auto &P : ExportSummary) {
  1884. for (auto &S : P.second.SummaryList) {
  1885. auto *FS = dyn_cast<FunctionSummary>(S.get());
  1886. if (!FS)
  1887. continue;
  1888. // FIXME: Only add live functions.
  1889. for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
  1890. for (StringRef Name : NameByGUID[VF.GUID]) {
  1891. CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
  1892. }
  1893. }
  1894. for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
  1895. for (StringRef Name : NameByGUID[VF.GUID]) {
  1896. CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
  1897. }
  1898. }
  1899. for (const FunctionSummary::ConstVCall &VC :
  1900. FS->type_test_assume_const_vcalls()) {
  1901. for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
  1902. CallSlots[{Name, VC.VFunc.Offset}]
  1903. .ConstCSInfo[VC.Args]
  1904. .addSummaryTypeTestAssumeUser(FS);
  1905. }
  1906. }
  1907. for (const FunctionSummary::ConstVCall &VC :
  1908. FS->type_checked_load_const_vcalls()) {
  1909. for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
  1910. CallSlots[{Name, VC.VFunc.Offset}]
  1911. .ConstCSInfo[VC.Args]
  1912. .addSummaryTypeCheckedLoadUser(FS);
  1913. }
  1914. }
  1915. }
  1916. }
  1917. std::set<ValueInfo> DevirtTargets;
  1918. // For each (type, offset) pair:
  1919. for (auto &S : CallSlots) {
  1920. // Search each of the members of the type identifier for the virtual
  1921. // function implementation at offset S.first.ByteOffset, and add to
  1922. // TargetsForSlot.
  1923. std::vector<ValueInfo> TargetsForSlot;
  1924. auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
  1925. assert(TidSummary);
  1926. // Create the type id summary resolution regardlness of whether we can
  1927. // devirtualize, so that lower type tests knows the type id is used on
  1928. // a global and not Unsat.
  1929. WholeProgramDevirtResolution *Res =
  1930. &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID)
  1931. .WPDRes[S.first.ByteOffset];
  1932. if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
  1933. S.first.ByteOffset)) {
  1934. if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
  1935. DevirtTargets))
  1936. continue;
  1937. }
  1938. }
  1939. // Optionally have the thin link print message for each devirtualized
  1940. // function.
  1941. if (PrintSummaryDevirt)
  1942. for (const auto &DT : DevirtTargets)
  1943. errs() << "Devirtualized call to " << DT << "\n";
  1944. }