PerfMonitor.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. //===----------------------------------------------------------------------===//
  10. #include "polly/CodeGen/PerfMonitor.h"
  11. #include "polly/CodeGen/RuntimeDebugBuilder.h"
  12. #include "polly/ScopInfo.h"
  13. #include "llvm/ADT/Triple.h"
  14. #include "llvm/ADT/Twine.h"
  15. #include "llvm/IR/IntrinsicsX86.h"
  16. using namespace llvm;
  17. using namespace polly;
  18. Function *PerfMonitor::getAtExit() {
  19. const char *Name = "atexit";
  20. Function *F = M->getFunction(Name);
  21. if (!F) {
  22. GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
  23. FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(),
  24. {Builder.getInt8PtrTy()}, false);
  25. F = Function::Create(Ty, Linkage, Name, M);
  26. }
  27. return F;
  28. }
  29. void PerfMonitor::addToGlobalConstructors(Function *Fn) {
  30. const char *Name = "llvm.global_ctors";
  31. GlobalVariable *GV = M->getGlobalVariable(Name);
  32. std::vector<Constant *> V;
  33. if (GV) {
  34. Constant *Array = GV->getInitializer();
  35. for (Value *X : Array->operand_values())
  36. V.push_back(cast<Constant>(X));
  37. GV->eraseFromParent();
  38. }
  39. StructType *ST = StructType::get(Builder.getInt32Ty(), Fn->getType(),
  40. Builder.getInt8PtrTy());
  41. V.push_back(
  42. ConstantStruct::get(ST, Builder.getInt32(10), Fn,
  43. ConstantPointerNull::get(Builder.getInt8PtrTy())));
  44. ArrayType *Ty = ArrayType::get(ST, V.size());
  45. GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage,
  46. ConstantArray::get(Ty, V), Name, nullptr,
  47. GlobalVariable::NotThreadLocal);
  48. }
  49. Function *PerfMonitor::getRDTSCP() {
  50. return Intrinsic::getDeclaration(M, Intrinsic::x86_rdtscp);
  51. }
  52. PerfMonitor::PerfMonitor(const Scop &S, Module *M)
  53. : M(M), Builder(M->getContext()), S(S) {
  54. if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64)
  55. Supported = true;
  56. else
  57. Supported = false;
  58. }
  59. static void TryRegisterGlobal(Module *M, const char *Name,
  60. Constant *InitialValue, Value **Location) {
  61. *Location = M->getGlobalVariable(Name);
  62. if (!*Location)
  63. *Location = new GlobalVariable(
  64. *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
  65. InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
  66. }
  67. // Generate a unique name that is usable as a LLVM name for a scop to name its
  68. // performance counter.
  69. static std::string GetScopUniqueVarname(const Scop &S) {
  70. std::string EntryString, ExitString;
  71. std::tie(EntryString, ExitString) = S.getEntryExitStr();
  72. return (Twine("__polly_perf_in_") + S.getFunction().getName() + "_from__" +
  73. EntryString + "__to__" + ExitString)
  74. .str();
  75. }
  76. void PerfMonitor::addScopCounter() {
  77. const std::string varname = GetScopUniqueVarname(S);
  78. TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0),
  79. &CyclesInCurrentScopPtr);
  80. TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0),
  81. &TripCountForCurrentScopPtr);
  82. }
  83. void PerfMonitor::addGlobalVariables() {
  84. TryRegisterGlobal(M, "__polly_perf_cycles_total_start", Builder.getInt64(0),
  85. &CyclesTotalStartPtr);
  86. TryRegisterGlobal(M, "__polly_perf_initialized", Builder.getInt1(false),
  87. &AlreadyInitializedPtr);
  88. TryRegisterGlobal(M, "__polly_perf_cycles_in_scops", Builder.getInt64(0),
  89. &CyclesInScopsPtr);
  90. TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
  91. &CyclesInScopStartPtr);
  92. }
  93. static const char *InitFunctionName = "__polly_perf_init";
  94. static const char *FinalReportingFunctionName = "__polly_perf_final";
  95. static BasicBlock *FinalStartBB = nullptr;
  96. static ReturnInst *ReturnFromFinal = nullptr;
  97. Function *PerfMonitor::insertFinalReporting() {
  98. // Create new function.
  99. GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
  100. FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
  101. Function *ExitFn =
  102. Function::Create(Ty, Linkage, FinalReportingFunctionName, M);
  103. FinalStartBB = BasicBlock::Create(M->getContext(), "start", ExitFn);
  104. Builder.SetInsertPoint(FinalStartBB);
  105. if (!Supported) {
  106. RuntimeDebugBuilder::createCPUPrinter(
  107. Builder, "Polly runtime information generation not supported\n");
  108. Builder.CreateRetVoid();
  109. return ExitFn;
  110. }
  111. // Measure current cycles and compute final timings.
  112. Function *RDTSCPFn = getRDTSCP();
  113. Type *Int64Ty = Builder.getInt64Ty();
  114. Value *CurrentCycles =
  115. Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
  116. Value *CyclesStart = Builder.CreateLoad(Int64Ty, CyclesTotalStartPtr, true);
  117. Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart);
  118. Value *CyclesInScops = Builder.CreateLoad(Int64Ty, CyclesInScopsPtr, true);
  119. // Print the runtime information.
  120. RuntimeDebugBuilder::createCPUPrinter(Builder, "Polly runtime information\n");
  121. RuntimeDebugBuilder::createCPUPrinter(Builder, "-------------------------\n");
  122. RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n");
  123. RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops,
  124. "\n");
  125. // Print the preamble for per-scop information.
  126. RuntimeDebugBuilder::createCPUPrinter(Builder, "\n");
  127. RuntimeDebugBuilder::createCPUPrinter(Builder, "Per SCoP information\n");
  128. RuntimeDebugBuilder::createCPUPrinter(Builder, "--------------------\n");
  129. RuntimeDebugBuilder::createCPUPrinter(
  130. Builder, "scop function, "
  131. "entry block name, exit block name, total time, trip count\n");
  132. ReturnFromFinal = Builder.CreateRetVoid();
  133. return ExitFn;
  134. }
  135. void PerfMonitor::AppendScopReporting() {
  136. if (!Supported)
  137. return;
  138. assert(FinalStartBB && "Expected FinalStartBB to be initialized by "
  139. "PerfMonitor::insertFinalReporting.");
  140. assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by "
  141. "PerfMonitor::insertFinalReporting.");
  142. Builder.SetInsertPoint(FinalStartBB);
  143. ReturnFromFinal->eraseFromParent();
  144. Type *Int64Ty = Builder.getInt64Ty();
  145. Value *CyclesInCurrentScop =
  146. Builder.CreateLoad(Int64Ty, this->CyclesInCurrentScopPtr, true);
  147. Value *TripCountForCurrentScop =
  148. Builder.CreateLoad(Int64Ty, this->TripCountForCurrentScopPtr, true);
  149. std::string EntryName, ExitName;
  150. std::tie(EntryName, ExitName) = S.getEntryExitStr();
  151. // print in CSV for easy parsing with other tools.
  152. RuntimeDebugBuilder::createCPUPrinter(
  153. Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ",
  154. CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n");
  155. ReturnFromFinal = Builder.CreateRetVoid();
  156. }
  157. static Function *FinalReporting = nullptr;
  158. void PerfMonitor::initialize() {
  159. addGlobalVariables();
  160. addScopCounter();
  161. // Ensure that we only add the final reporting function once.
  162. // On later invocations, append to the reporting function.
  163. if (!FinalReporting) {
  164. FinalReporting = insertFinalReporting();
  165. Function *InitFn = insertInitFunction(FinalReporting);
  166. addToGlobalConstructors(InitFn);
  167. }
  168. AppendScopReporting();
  169. }
  170. Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
  171. // Insert function definition and BBs.
  172. GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
  173. FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
  174. Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M);
  175. BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", InitFn);
  176. BasicBlock *EarlyReturn =
  177. BasicBlock::Create(M->getContext(), "earlyreturn", InitFn);
  178. BasicBlock *InitBB = BasicBlock::Create(M->getContext(), "initbb", InitFn);
  179. Builder.SetInsertPoint(Start);
  180. // Check if this function was already run. If yes, return.
  181. //
  182. // In case profiling has been enabled in multiple translation units, the
  183. // initializer function will be added to the global constructors list of
  184. // each translation unit. When merging translation units, the global
  185. // constructor lists are just appended, such that the initializer will appear
  186. // multiple times. To avoid initializations being run multiple times (and
  187. // especially to avoid that atExitFn is called more than once), we bail
  188. // out if the initializer is run more than once.
  189. Value *HasRunBefore =
  190. Builder.CreateLoad(Builder.getInt1Ty(), AlreadyInitializedPtr);
  191. Builder.CreateCondBr(HasRunBefore, EarlyReturn, InitBB);
  192. Builder.SetInsertPoint(EarlyReturn);
  193. Builder.CreateRetVoid();
  194. // Keep track that this function has been run once.
  195. Builder.SetInsertPoint(InitBB);
  196. Value *True = Builder.getInt1(true);
  197. Builder.CreateStore(True, AlreadyInitializedPtr);
  198. // Register the final reporting function with atexit().
  199. Value *FinalReportingPtr =
  200. Builder.CreatePointerCast(FinalReporting, Builder.getInt8PtrTy());
  201. Function *AtExitFn = getAtExit();
  202. Builder.CreateCall(AtExitFn, {FinalReportingPtr});
  203. if (Supported) {
  204. // Read the currently cycle counter and store the result for later.
  205. Function *RDTSCPFn = getRDTSCP();
  206. Value *CurrentCycles =
  207. Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
  208. Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true);
  209. }
  210. Builder.CreateRetVoid();
  211. return InitFn;
  212. }
  213. void PerfMonitor::insertRegionStart(Instruction *InsertBefore) {
  214. if (!Supported)
  215. return;
  216. Builder.SetInsertPoint(InsertBefore);
  217. Function *RDTSCPFn = getRDTSCP();
  218. Value *CurrentCycles =
  219. Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
  220. Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true);
  221. }
  222. void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
  223. if (!Supported)
  224. return;
  225. Builder.SetInsertPoint(InsertBefore);
  226. Function *RDTSCPFn = getRDTSCP();
  227. Type *Int64Ty = Builder.getInt64Ty();
  228. LoadInst *CyclesStart =
  229. Builder.CreateLoad(Int64Ty, CyclesInScopStartPtr, true);
  230. Value *CurrentCycles =
  231. Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
  232. Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart);
  233. Value *CyclesInScops = Builder.CreateLoad(Int64Ty, CyclesInScopsPtr, true);
  234. CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop);
  235. Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true);
  236. Value *CyclesInCurrentScop =
  237. Builder.CreateLoad(Int64Ty, CyclesInCurrentScopPtr, true);
  238. CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
  239. Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
  240. Value *TripCountForCurrentScop =
  241. Builder.CreateLoad(Int64Ty, TripCountForCurrentScopPtr, true);
  242. TripCountForCurrentScop =
  243. Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1));
  244. Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr,
  245. true);
  246. }