PerfMonitor.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. //===----------------------------------------------------------------------===//
  10. #include "polly/CodeGen/PerfMonitor.h"
  11. #include "polly/CodeGen/RuntimeDebugBuilder.h"
  12. #include "polly/ScopInfo.h"
  13. #include "llvm/ADT/Triple.h"
  14. #include "llvm/ADT/Twine.h"
  15. #include "llvm/IR/IntrinsicsX86.h"
  16. using namespace llvm;
  17. using namespace polly;
  18. Function *PerfMonitor::getAtExit() {
  19. const char *Name = "atexit";
  20. Function *F = M->getFunction(Name);
  21. if (!F) {
  22. GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
  23. FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(),
  24. {Builder.getInt8PtrTy()}, false);
  25. F = Function::Create(Ty, Linkage, Name, M);
  26. }
  27. return F;
  28. }
  29. void PerfMonitor::addToGlobalConstructors(Function *Fn) {
  30. const char *Name = "llvm.global_ctors";
  31. GlobalVariable *GV = M->getGlobalVariable(Name);
  32. std::vector<Constant *> V;
  33. if (GV) {
  34. Constant *Array = GV->getInitializer();
  35. for (Value *X : Array->operand_values())
  36. V.push_back(cast<Constant>(X));
  37. GV->eraseFromParent();
  38. }
  39. StructType *ST = StructType::get(Builder.getInt32Ty(), Fn->getType(),
  40. Builder.getInt8PtrTy());
  41. V.push_back(
  42. ConstantStruct::get(ST, Builder.getInt32(10), Fn,
  43. ConstantPointerNull::get(Builder.getInt8PtrTy())));
  44. ArrayType *Ty = ArrayType::get(ST, V.size());
  45. GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage,
  46. ConstantArray::get(Ty, V), Name, nullptr,
  47. GlobalVariable::NotThreadLocal);
  48. }
  49. Function *PerfMonitor::getRDTSCP() {
  50. return Intrinsic::getDeclaration(M, Intrinsic::x86_rdtscp);
  51. }
  52. PerfMonitor::PerfMonitor(const Scop &S, Module *M)
  53. : M(M), Builder(M->getContext()), S(S) {
  54. if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64)
  55. Supported = true;
  56. else
  57. Supported = false;
  58. }
  59. static void TryRegisterGlobal(Module *M, const char *Name,
  60. Constant *InitialValue, Value **Location) {
  61. *Location = M->getGlobalVariable(Name);
  62. if (!*Location)
  63. *Location = new GlobalVariable(
  64. *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
  65. InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
  66. }
  67. // Generate a unique name that is usable as a LLVM name for a scop to name its
  68. // performance counter.
  69. static std::string GetScopUniqueVarname(const Scop &S) {
  70. std::string EntryString, ExitString;
  71. std::tie(EntryString, ExitString) = S.getEntryExitStr();
  72. return (Twine("__polly_perf_in_") + S.getFunction().getName() + "_from__" +
  73. EntryString + "__to__" + ExitString)
  74. .str();
  75. }
  76. void PerfMonitor::addScopCounter() {
  77. const std::string varname = GetScopUniqueVarname(S);
  78. TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0),
  79. &CyclesInCurrentScopPtr);
  80. TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0),
  81. &TripCountForCurrentScopPtr);
  82. }
  83. void PerfMonitor::addGlobalVariables() {
  84. TryRegisterGlobal(M, "__polly_perf_cycles_total_start", Builder.getInt64(0),
  85. &CyclesTotalStartPtr);
  86. TryRegisterGlobal(M, "__polly_perf_initialized", Builder.getInt1(0),
  87. &AlreadyInitializedPtr);
  88. TryRegisterGlobal(M, "__polly_perf_cycles_in_scops", Builder.getInt64(0),
  89. &CyclesInScopsPtr);
  90. TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
  91. &CyclesInScopStartPtr);
  92. }
  93. static const char *InitFunctionName = "__polly_perf_init";
  94. static const char *FinalReportingFunctionName = "__polly_perf_final";
  95. static BasicBlock *FinalStartBB = nullptr;
  96. static ReturnInst *ReturnFromFinal = nullptr;
  97. Function *PerfMonitor::insertFinalReporting() {
  98. // Create new function.
  99. GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
  100. FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
  101. Function *ExitFn =
  102. Function::Create(Ty, Linkage, FinalReportingFunctionName, M);
  103. FinalStartBB = BasicBlock::Create(M->getContext(), "start", ExitFn);
  104. Builder.SetInsertPoint(FinalStartBB);
  105. if (!Supported) {
  106. RuntimeDebugBuilder::createCPUPrinter(
  107. Builder, "Polly runtime information generation not supported\n");
  108. Builder.CreateRetVoid();
  109. return ExitFn;
  110. }
  111. // Measure current cycles and compute final timings.
  112. Function *RDTSCPFn = getRDTSCP();
  113. Value *CurrentCycles =
  114. Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
  115. Value *CyclesStart = Builder.CreateLoad(CyclesTotalStartPtr, true);
  116. Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart);
  117. Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
  118. // Print the runtime information.
  119. RuntimeDebugBuilder::createCPUPrinter(Builder, "Polly runtime information\n");
  120. RuntimeDebugBuilder::createCPUPrinter(Builder, "-------------------------\n");
  121. RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n");
  122. RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops,
  123. "\n");
  124. // Print the preamble for per-scop information.
  125. RuntimeDebugBuilder::createCPUPrinter(Builder, "\n");
  126. RuntimeDebugBuilder::createCPUPrinter(Builder, "Per SCoP information\n");
  127. RuntimeDebugBuilder::createCPUPrinter(Builder, "--------------------\n");
  128. RuntimeDebugBuilder::createCPUPrinter(
  129. Builder, "scop function, "
  130. "entry block name, exit block name, total time, trip count\n");
  131. ReturnFromFinal = Builder.CreateRetVoid();
  132. return ExitFn;
  133. }
  134. void PerfMonitor::AppendScopReporting() {
  135. if (!Supported)
  136. return;
  137. assert(FinalStartBB && "Expected FinalStartBB to be initialized by "
  138. "PerfMonitor::insertFinalReporting.");
  139. assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by "
  140. "PerfMonitor::insertFinalReporting.");
  141. Builder.SetInsertPoint(FinalStartBB);
  142. ReturnFromFinal->eraseFromParent();
  143. Value *CyclesInCurrentScop =
  144. Builder.CreateLoad(this->CyclesInCurrentScopPtr, true);
  145. Value *TripCountForCurrentScop =
  146. Builder.CreateLoad(this->TripCountForCurrentScopPtr, true);
  147. std::string EntryName, ExitName;
  148. std::tie(EntryName, ExitName) = S.getEntryExitStr();
  149. // print in CSV for easy parsing with other tools.
  150. RuntimeDebugBuilder::createCPUPrinter(
  151. Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ",
  152. CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n");
  153. ReturnFromFinal = Builder.CreateRetVoid();
  154. }
  155. static Function *FinalReporting = nullptr;
  156. void PerfMonitor::initialize() {
  157. addGlobalVariables();
  158. addScopCounter();
  159. // Ensure that we only add the final reporting function once.
  160. // On later invocations, append to the reporting function.
  161. if (!FinalReporting) {
  162. FinalReporting = insertFinalReporting();
  163. Function *InitFn = insertInitFunction(FinalReporting);
  164. addToGlobalConstructors(InitFn);
  165. }
  166. AppendScopReporting();
  167. }
  168. Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
  169. // Insert function definition and BBs.
  170. GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
  171. FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
  172. Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M);
  173. BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", InitFn);
  174. BasicBlock *EarlyReturn =
  175. BasicBlock::Create(M->getContext(), "earlyreturn", InitFn);
  176. BasicBlock *InitBB = BasicBlock::Create(M->getContext(), "initbb", InitFn);
  177. Builder.SetInsertPoint(Start);
  178. // Check if this function was already run. If yes, return.
  179. //
  180. // In case profiling has been enabled in multiple translation units, the
  181. // initializer function will be added to the global constructors list of
  182. // each translation unit. When merging translation units, the global
  183. // constructor lists are just appended, such that the initializer will appear
  184. // multiple times. To avoid initializations being run multiple times (and
  185. // especially to avoid that atExitFn is called more than once), we bail
  186. // out if the initializer is run more than once.
  187. Value *HasRunBefore = Builder.CreateLoad(AlreadyInitializedPtr);
  188. Builder.CreateCondBr(HasRunBefore, EarlyReturn, InitBB);
  189. Builder.SetInsertPoint(EarlyReturn);
  190. Builder.CreateRetVoid();
  191. // Keep track that this function has been run once.
  192. Builder.SetInsertPoint(InitBB);
  193. Value *True = Builder.getInt1(true);
  194. Builder.CreateStore(True, AlreadyInitializedPtr);
  195. // Register the final reporting function with atexit().
  196. Value *FinalReportingPtr =
  197. Builder.CreatePointerCast(FinalReporting, Builder.getInt8PtrTy());
  198. Function *AtExitFn = getAtExit();
  199. Builder.CreateCall(AtExitFn, {FinalReportingPtr});
  200. if (Supported) {
  201. // Read the currently cycle counter and store the result for later.
  202. Function *RDTSCPFn = getRDTSCP();
  203. Value *CurrentCycles =
  204. Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
  205. Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true);
  206. }
  207. Builder.CreateRetVoid();
  208. return InitFn;
  209. }
  210. void PerfMonitor::insertRegionStart(Instruction *InsertBefore) {
  211. if (!Supported)
  212. return;
  213. Builder.SetInsertPoint(InsertBefore);
  214. Function *RDTSCPFn = getRDTSCP();
  215. Value *CurrentCycles =
  216. Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
  217. Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true);
  218. }
  219. void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
  220. if (!Supported)
  221. return;
  222. Builder.SetInsertPoint(InsertBefore);
  223. Function *RDTSCPFn = getRDTSCP();
  224. LoadInst *CyclesStart = Builder.CreateLoad(CyclesInScopStartPtr, true);
  225. Value *CurrentCycles =
  226. Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
  227. Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart);
  228. Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
  229. CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop);
  230. Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true);
  231. Value *CyclesInCurrentScop = Builder.CreateLoad(CyclesInCurrentScopPtr, true);
  232. CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
  233. Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
  234. Value *TripCountForCurrentScop =
  235. Builder.CreateLoad(TripCountForCurrentScopPtr, true);
  236. TripCountForCurrentScop =
  237. Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1));
  238. Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr,
  239. true);
  240. }