TimeProfiler.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements hierarchical time profiler.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Support/TimeProfiler.h"
  13. #include "llvm/ADT/STLFunctionalExtras.h"
  14. #include "llvm/ADT/StringMap.h"
  15. #include "llvm/Support/JSON.h"
  16. #include "llvm/Support/ManagedStatic.h"
  17. #include "llvm/Support/Path.h"
  18. #include "llvm/Support/Process.h"
  19. #include "llvm/Support/Threading.h"
  20. #include <algorithm>
  21. #include <cassert>
  22. #include <chrono>
  23. #include <mutex>
  24. #include <string>
  25. #include <vector>
  26. using namespace std::chrono;
  27. using namespace llvm;
  28. static std::mutex Mu;
  29. // List of all instances
  30. static ManagedStatic<std::vector<TimeTraceProfiler *>>
  31. ThreadTimeTraceProfilerInstances; // GUARDED_BY(Mu)
  32. // Per Thread instance
  33. static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
  34. TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
  35. return TimeTraceProfilerInstance;
  36. }
  37. typedef duration<steady_clock::rep, steady_clock::period> DurationType;
  38. typedef time_point<steady_clock> TimePointType;
  39. typedef std::pair<size_t, DurationType> CountAndDurationType;
  40. typedef std::pair<std::string, CountAndDurationType>
  41. NameAndCountAndDurationType;
  42. namespace {
  43. struct Entry {
  44. const TimePointType Start;
  45. TimePointType End;
  46. const std::string Name;
  47. const std::string Detail;
  48. Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
  49. : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
  50. Detail(std::move(Dt)) {}
  51. // Calculate timings for FlameGraph. Cast time points to microsecond precision
  52. // rather than casting duration. This avoid truncation issues causing inner
  53. // scopes overruning outer scopes.
  54. steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
  55. return (time_point_cast<microseconds>(Start) -
  56. time_point_cast<microseconds>(StartTime))
  57. .count();
  58. }
  59. steady_clock::rep getFlameGraphDurUs() const {
  60. return (time_point_cast<microseconds>(End) -
  61. time_point_cast<microseconds>(Start))
  62. .count();
  63. }
  64. };
  65. } // namespace
  66. struct llvm::TimeTraceProfiler {
  67. TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
  68. : BeginningOfTime(system_clock::now()), StartTime(steady_clock::now()),
  69. ProcName(ProcName), Pid(sys::Process::getProcessId()),
  70. Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {
  71. llvm::get_thread_name(ThreadName);
  72. }
  73. void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
  74. Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
  75. Detail());
  76. }
  77. void end() {
  78. assert(!Stack.empty() && "Must call begin() first");
  79. Entry &E = Stack.back();
  80. E.End = steady_clock::now();
  81. // Check that end times monotonically increase.
  82. assert((Entries.empty() ||
  83. (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
  84. Entries.back().getFlameGraphStartUs(StartTime) +
  85. Entries.back().getFlameGraphDurUs())) &&
  86. "TimeProfiler scope ended earlier than previous scope");
  87. // Calculate duration at full precision for overall counts.
  88. DurationType Duration = E.End - E.Start;
  89. // Only include sections longer or equal to TimeTraceGranularity msec.
  90. if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
  91. Entries.emplace_back(E);
  92. // Track total time taken by each "name", but only the topmost levels of
  93. // them; e.g. if there's a template instantiation that instantiates other
  94. // templates from within, we only want to add the topmost one. "topmost"
  95. // happens to be the ones that don't have any currently open entries above
  96. // itself.
  97. if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
  98. [&](const Entry &Val) { return Val.Name == E.Name; })) {
  99. auto &CountAndTotal = CountAndTotalPerName[E.Name];
  100. CountAndTotal.first++;
  101. CountAndTotal.second += Duration;
  102. }
  103. Stack.pop_back();
  104. }
  105. // Write events from this TimeTraceProfilerInstance and
  106. // ThreadTimeTraceProfilerInstances.
  107. void write(raw_pwrite_stream &OS) {
  108. // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
  109. std::lock_guard<std::mutex> Lock(Mu);
  110. assert(Stack.empty() &&
  111. "All profiler sections should be ended when calling write");
  112. assert(llvm::all_of(*ThreadTimeTraceProfilerInstances,
  113. [](const auto &TTP) { return TTP->Stack.empty(); }) &&
  114. "All profiler sections should be ended when calling write");
  115. json::OStream J(OS);
  116. J.objectBegin();
  117. J.attributeBegin("traceEvents");
  118. J.arrayBegin();
  119. // Emit all events for the main flame graph.
  120. auto writeEvent = [&](const auto &E, uint64_t Tid) {
  121. auto StartUs = E.getFlameGraphStartUs(StartTime);
  122. auto DurUs = E.getFlameGraphDurUs();
  123. J.object([&] {
  124. J.attribute("pid", Pid);
  125. J.attribute("tid", int64_t(Tid));
  126. J.attribute("ph", "X");
  127. J.attribute("ts", StartUs);
  128. J.attribute("dur", DurUs);
  129. J.attribute("name", E.Name);
  130. if (!E.Detail.empty()) {
  131. J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
  132. }
  133. });
  134. };
  135. for (const Entry &E : Entries)
  136. writeEvent(E, this->Tid);
  137. for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
  138. for (const Entry &E : TTP->Entries)
  139. writeEvent(E, TTP->Tid);
  140. // Emit totals by section name as additional "thread" events, sorted from
  141. // longest one.
  142. // Find highest used thread id.
  143. uint64_t MaxTid = this->Tid;
  144. for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
  145. MaxTid = std::max(MaxTid, TTP->Tid);
  146. // Combine all CountAndTotalPerName from threads into one.
  147. StringMap<CountAndDurationType> AllCountAndTotalPerName;
  148. auto combineStat = [&](const auto &Stat) {
  149. StringRef Key = Stat.getKey();
  150. auto Value = Stat.getValue();
  151. auto &CountAndTotal = AllCountAndTotalPerName[Key];
  152. CountAndTotal.first += Value.first;
  153. CountAndTotal.second += Value.second;
  154. };
  155. for (const auto &Stat : CountAndTotalPerName)
  156. combineStat(Stat);
  157. for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
  158. for (const auto &Stat : TTP->CountAndTotalPerName)
  159. combineStat(Stat);
  160. std::vector<NameAndCountAndDurationType> SortedTotals;
  161. SortedTotals.reserve(AllCountAndTotalPerName.size());
  162. for (const auto &Total : AllCountAndTotalPerName)
  163. SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
  164. llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
  165. const NameAndCountAndDurationType &B) {
  166. return A.second.second > B.second.second;
  167. });
  168. // Report totals on separate threads of tracing file.
  169. uint64_t TotalTid = MaxTid + 1;
  170. for (const NameAndCountAndDurationType &Total : SortedTotals) {
  171. auto DurUs = duration_cast<microseconds>(Total.second.second).count();
  172. auto Count = AllCountAndTotalPerName[Total.first].first;
  173. J.object([&] {
  174. J.attribute("pid", Pid);
  175. J.attribute("tid", int64_t(TotalTid));
  176. J.attribute("ph", "X");
  177. J.attribute("ts", 0);
  178. J.attribute("dur", DurUs);
  179. J.attribute("name", "Total " + Total.first);
  180. J.attributeObject("args", [&] {
  181. J.attribute("count", int64_t(Count));
  182. J.attribute("avg ms", int64_t(DurUs / Count / 1000));
  183. });
  184. });
  185. ++TotalTid;
  186. }
  187. auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
  188. StringRef arg) {
  189. J.object([&] {
  190. J.attribute("cat", "");
  191. J.attribute("pid", Pid);
  192. J.attribute("tid", int64_t(Tid));
  193. J.attribute("ts", 0);
  194. J.attribute("ph", "M");
  195. J.attribute("name", Name);
  196. J.attributeObject("args", [&] { J.attribute("name", arg); });
  197. });
  198. };
  199. writeMetadataEvent("process_name", Tid, ProcName);
  200. writeMetadataEvent("thread_name", Tid, ThreadName);
  201. for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
  202. writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
  203. J.arrayEnd();
  204. J.attributeEnd();
  205. // Emit the absolute time when this TimeProfiler started.
  206. // This can be used to combine the profiling data from
  207. // multiple processes and preserve actual time intervals.
  208. J.attribute("beginningOfTime",
  209. time_point_cast<microseconds>(BeginningOfTime)
  210. .time_since_epoch()
  211. .count());
  212. J.objectEnd();
  213. }
  214. SmallVector<Entry, 16> Stack;
  215. SmallVector<Entry, 128> Entries;
  216. StringMap<CountAndDurationType> CountAndTotalPerName;
  217. const time_point<system_clock> BeginningOfTime;
  218. const TimePointType StartTime;
  219. const std::string ProcName;
  220. const sys::Process::Pid Pid;
  221. SmallString<0> ThreadName;
  222. const uint64_t Tid;
  223. // Minimum time granularity (in microseconds)
  224. const unsigned TimeTraceGranularity;
  225. };
  226. void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
  227. StringRef ProcName) {
  228. assert(TimeTraceProfilerInstance == nullptr &&
  229. "Profiler should not be initialized");
  230. TimeTraceProfilerInstance = new TimeTraceProfiler(
  231. TimeTraceGranularity, llvm::sys::path::filename(ProcName));
  232. }
  233. // Removes all TimeTraceProfilerInstances.
  234. // Called from main thread.
  235. void llvm::timeTraceProfilerCleanup() {
  236. delete TimeTraceProfilerInstance;
  237. TimeTraceProfilerInstance = nullptr;
  238. std::lock_guard<std::mutex> Lock(Mu);
  239. for (auto *TTP : *ThreadTimeTraceProfilerInstances)
  240. delete TTP;
  241. ThreadTimeTraceProfilerInstances->clear();
  242. }
  243. // Finish TimeTraceProfilerInstance on a worker thread.
  244. // This doesn't remove the instance, just moves the pointer to global vector.
  245. void llvm::timeTraceProfilerFinishThread() {
  246. std::lock_guard<std::mutex> Lock(Mu);
  247. ThreadTimeTraceProfilerInstances->push_back(TimeTraceProfilerInstance);
  248. TimeTraceProfilerInstance = nullptr;
  249. }
  250. void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
  251. assert(TimeTraceProfilerInstance != nullptr &&
  252. "Profiler object can't be null");
  253. TimeTraceProfilerInstance->write(OS);
  254. }
  255. Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
  256. StringRef FallbackFileName) {
  257. assert(TimeTraceProfilerInstance != nullptr &&
  258. "Profiler object can't be null");
  259. std::string Path = PreferredFileName.str();
  260. if (Path.empty()) {
  261. Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
  262. Path += ".time-trace";
  263. }
  264. std::error_code EC;
  265. raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
  266. if (EC)
  267. return createStringError(EC, "Could not open " + Path);
  268. timeTraceProfilerWrite(OS);
  269. return Error::success();
  270. }
  271. void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
  272. if (TimeTraceProfilerInstance != nullptr)
  273. TimeTraceProfilerInstance->begin(std::string(Name),
  274. [&]() { return std::string(Detail); });
  275. }
  276. void llvm::timeTraceProfilerBegin(StringRef Name,
  277. llvm::function_ref<std::string()> Detail) {
  278. if (TimeTraceProfilerInstance != nullptr)
  279. TimeTraceProfilerInstance->begin(std::string(Name), Detail);
  280. }
  281. void llvm::timeTraceProfilerEnd() {
  282. if (TimeTraceProfilerInstance != nullptr)
  283. TimeTraceProfilerInstance->end();
  284. }