TimeProfiler.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements hierarchical time profiler.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Support/TimeProfiler.h"
  13. #include "llvm/ADT/STLFunctionalExtras.h"
  14. #include "llvm/ADT/StringMap.h"
  15. #include "llvm/Support/JSON.h"
  16. #include "llvm/Support/Path.h"
  17. #include "llvm/Support/Process.h"
  18. #include "llvm/Support/Threading.h"
  19. #include <algorithm>
  20. #include <cassert>
  21. #include <chrono>
  22. #include <mutex>
  23. #include <string>
  24. #include <vector>
  25. using namespace llvm;
  26. namespace {
  27. using std::chrono::duration;
  28. using std::chrono::duration_cast;
  29. using std::chrono::microseconds;
  30. using std::chrono::steady_clock;
  31. using std::chrono::system_clock;
  32. using std::chrono::time_point;
  33. using std::chrono::time_point_cast;
  34. struct TimeTraceProfilerInstances {
  35. std::mutex Lock;
  36. std::vector<TimeTraceProfiler *> List;
  37. };
  38. TimeTraceProfilerInstances &getTimeTraceProfilerInstances() {
  39. static TimeTraceProfilerInstances Instances;
  40. return Instances;
  41. }
  42. } // anonymous namespace
  43. // Per Thread instance
  44. static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
  45. TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
  46. return TimeTraceProfilerInstance;
  47. }
  48. namespace {
  49. using ClockType = steady_clock;
  50. using TimePointType = time_point<ClockType>;
  51. using DurationType = duration<ClockType::rep, ClockType::period>;
  52. using CountAndDurationType = std::pair<size_t, DurationType>;
  53. using NameAndCountAndDurationType =
  54. std::pair<std::string, CountAndDurationType>;
  55. /// Represents an open or completed time section entry to be captured.
  56. struct TimeTraceProfilerEntry {
  57. const TimePointType Start;
  58. TimePointType End;
  59. const std::string Name;
  60. const std::string Detail;
  61. TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
  62. std::string &&Dt)
  63. : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
  64. Detail(std::move(Dt)) {}
  65. // Calculate timings for FlameGraph. Cast time points to microsecond precision
  66. // rather than casting duration. This avoids truncation issues causing inner
  67. // scopes overruning outer scopes.
  68. ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const {
  69. return (time_point_cast<microseconds>(Start) -
  70. time_point_cast<microseconds>(StartTime))
  71. .count();
  72. }
  73. ClockType::rep getFlameGraphDurUs() const {
  74. return (time_point_cast<microseconds>(End) -
  75. time_point_cast<microseconds>(Start))
  76. .count();
  77. }
  78. };
  79. } // anonymous namespace
  80. struct llvm::TimeTraceProfiler {
  81. TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
  82. : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()),
  83. ProcName(ProcName), Pid(sys::Process::getProcessId()),
  84. Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {
  85. llvm::get_thread_name(ThreadName);
  86. }
  87. void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
  88. Stack.emplace_back(ClockType::now(), TimePointType(), std::move(Name),
  89. Detail());
  90. }
  91. void end() {
  92. assert(!Stack.empty() && "Must call begin() first");
  93. TimeTraceProfilerEntry &E = Stack.back();
  94. E.End = ClockType::now();
  95. // Check that end times monotonically increase.
  96. assert((Entries.empty() ||
  97. (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
  98. Entries.back().getFlameGraphStartUs(StartTime) +
  99. Entries.back().getFlameGraphDurUs())) &&
  100. "TimeProfiler scope ended earlier than previous scope");
  101. // Calculate duration at full precision for overall counts.
  102. DurationType Duration = E.End - E.Start;
  103. // Only include sections longer or equal to TimeTraceGranularity msec.
  104. if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
  105. Entries.emplace_back(E);
  106. // Track total time taken by each "name", but only the topmost levels of
  107. // them; e.g. if there's a template instantiation that instantiates other
  108. // templates from within, we only want to add the topmost one. "topmost"
  109. // happens to be the ones that don't have any currently open entries above
  110. // itself.
  111. if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
  112. [&](const TimeTraceProfilerEntry &Val) {
  113. return Val.Name == E.Name;
  114. })) {
  115. auto &CountAndTotal = CountAndTotalPerName[E.Name];
  116. CountAndTotal.first++;
  117. CountAndTotal.second += Duration;
  118. }
  119. Stack.pop_back();
  120. }
  121. // Write events from this TimeTraceProfilerInstance and
  122. // ThreadTimeTraceProfilerInstances.
  123. void write(raw_pwrite_stream &OS) {
  124. // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
  125. auto &Instances = getTimeTraceProfilerInstances();
  126. std::lock_guard<std::mutex> Lock(Instances.Lock);
  127. assert(Stack.empty() &&
  128. "All profiler sections should be ended when calling write");
  129. assert(llvm::all_of(Instances.List,
  130. [](const auto &TTP) { return TTP->Stack.empty(); }) &&
  131. "All profiler sections should be ended when calling write");
  132. json::OStream J(OS);
  133. J.objectBegin();
  134. J.attributeBegin("traceEvents");
  135. J.arrayBegin();
  136. // Emit all events for the main flame graph.
  137. auto writeEvent = [&](const auto &E, uint64_t Tid) {
  138. auto StartUs = E.getFlameGraphStartUs(StartTime);
  139. auto DurUs = E.getFlameGraphDurUs();
  140. J.object([&] {
  141. J.attribute("pid", Pid);
  142. J.attribute("tid", int64_t(Tid));
  143. J.attribute("ph", "X");
  144. J.attribute("ts", StartUs);
  145. J.attribute("dur", DurUs);
  146. J.attribute("name", E.Name);
  147. if (!E.Detail.empty()) {
  148. J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
  149. }
  150. });
  151. };
  152. for (const TimeTraceProfilerEntry &E : Entries)
  153. writeEvent(E, this->Tid);
  154. for (const TimeTraceProfiler *TTP : Instances.List)
  155. for (const TimeTraceProfilerEntry &E : TTP->Entries)
  156. writeEvent(E, TTP->Tid);
  157. // Emit totals by section name as additional "thread" events, sorted from
  158. // longest one.
  159. // Find highest used thread id.
  160. uint64_t MaxTid = this->Tid;
  161. for (const TimeTraceProfiler *TTP : Instances.List)
  162. MaxTid = std::max(MaxTid, TTP->Tid);
  163. // Combine all CountAndTotalPerName from threads into one.
  164. StringMap<CountAndDurationType> AllCountAndTotalPerName;
  165. auto combineStat = [&](const auto &Stat) {
  166. StringRef Key = Stat.getKey();
  167. auto Value = Stat.getValue();
  168. auto &CountAndTotal = AllCountAndTotalPerName[Key];
  169. CountAndTotal.first += Value.first;
  170. CountAndTotal.second += Value.second;
  171. };
  172. for (const auto &Stat : CountAndTotalPerName)
  173. combineStat(Stat);
  174. for (const TimeTraceProfiler *TTP : Instances.List)
  175. for (const auto &Stat : TTP->CountAndTotalPerName)
  176. combineStat(Stat);
  177. std::vector<NameAndCountAndDurationType> SortedTotals;
  178. SortedTotals.reserve(AllCountAndTotalPerName.size());
  179. for (const auto &Total : AllCountAndTotalPerName)
  180. SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
  181. llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
  182. const NameAndCountAndDurationType &B) {
  183. return A.second.second > B.second.second;
  184. });
  185. // Report totals on separate threads of tracing file.
  186. uint64_t TotalTid = MaxTid + 1;
  187. for (const NameAndCountAndDurationType &Total : SortedTotals) {
  188. auto DurUs = duration_cast<microseconds>(Total.second.second).count();
  189. auto Count = AllCountAndTotalPerName[Total.first].first;
  190. J.object([&] {
  191. J.attribute("pid", Pid);
  192. J.attribute("tid", int64_t(TotalTid));
  193. J.attribute("ph", "X");
  194. J.attribute("ts", 0);
  195. J.attribute("dur", DurUs);
  196. J.attribute("name", "Total " + Total.first);
  197. J.attributeObject("args", [&] {
  198. J.attribute("count", int64_t(Count));
  199. J.attribute("avg ms", int64_t(DurUs / Count / 1000));
  200. });
  201. });
  202. ++TotalTid;
  203. }
  204. auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
  205. StringRef arg) {
  206. J.object([&] {
  207. J.attribute("cat", "");
  208. J.attribute("pid", Pid);
  209. J.attribute("tid", int64_t(Tid));
  210. J.attribute("ts", 0);
  211. J.attribute("ph", "M");
  212. J.attribute("name", Name);
  213. J.attributeObject("args", [&] { J.attribute("name", arg); });
  214. });
  215. };
  216. writeMetadataEvent("process_name", Tid, ProcName);
  217. writeMetadataEvent("thread_name", Tid, ThreadName);
  218. for (const TimeTraceProfiler *TTP : Instances.List)
  219. writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
  220. J.arrayEnd();
  221. J.attributeEnd();
  222. // Emit the absolute time when this TimeProfiler started.
  223. // This can be used to combine the profiling data from
  224. // multiple processes and preserve actual time intervals.
  225. J.attribute("beginningOfTime",
  226. time_point_cast<microseconds>(BeginningOfTime)
  227. .time_since_epoch()
  228. .count());
  229. J.objectEnd();
  230. }
  231. SmallVector<TimeTraceProfilerEntry, 16> Stack;
  232. SmallVector<TimeTraceProfilerEntry, 128> Entries;
  233. StringMap<CountAndDurationType> CountAndTotalPerName;
  234. // System clock time when the session was begun.
  235. const time_point<system_clock> BeginningOfTime;
  236. // Profiling clock time when the session was begun.
  237. const TimePointType StartTime;
  238. const std::string ProcName;
  239. const sys::Process::Pid Pid;
  240. SmallString<0> ThreadName;
  241. const uint64_t Tid;
  242. // Minimum time granularity (in microseconds)
  243. const unsigned TimeTraceGranularity;
  244. };
  245. void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
  246. StringRef ProcName) {
  247. assert(TimeTraceProfilerInstance == nullptr &&
  248. "Profiler should not be initialized");
  249. TimeTraceProfilerInstance = new TimeTraceProfiler(
  250. TimeTraceGranularity, llvm::sys::path::filename(ProcName));
  251. }
  252. // Removes all TimeTraceProfilerInstances.
  253. // Called from main thread.
  254. void llvm::timeTraceProfilerCleanup() {
  255. delete TimeTraceProfilerInstance;
  256. TimeTraceProfilerInstance = nullptr;
  257. auto &Instances = getTimeTraceProfilerInstances();
  258. std::lock_guard<std::mutex> Lock(Instances.Lock);
  259. for (auto *TTP : Instances.List)
  260. delete TTP;
  261. Instances.List.clear();
  262. }
  263. // Finish TimeTraceProfilerInstance on a worker thread.
  264. // This doesn't remove the instance, just moves the pointer to global vector.
  265. void llvm::timeTraceProfilerFinishThread() {
  266. auto &Instances = getTimeTraceProfilerInstances();
  267. std::lock_guard<std::mutex> Lock(Instances.Lock);
  268. Instances.List.push_back(TimeTraceProfilerInstance);
  269. TimeTraceProfilerInstance = nullptr;
  270. }
  271. void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
  272. assert(TimeTraceProfilerInstance != nullptr &&
  273. "Profiler object can't be null");
  274. TimeTraceProfilerInstance->write(OS);
  275. }
  276. Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
  277. StringRef FallbackFileName) {
  278. assert(TimeTraceProfilerInstance != nullptr &&
  279. "Profiler object can't be null");
  280. std::string Path = PreferredFileName.str();
  281. if (Path.empty()) {
  282. Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
  283. Path += ".time-trace";
  284. }
  285. std::error_code EC;
  286. raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
  287. if (EC)
  288. return createStringError(EC, "Could not open " + Path);
  289. timeTraceProfilerWrite(OS);
  290. return Error::success();
  291. }
  292. void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
  293. if (TimeTraceProfilerInstance != nullptr)
  294. TimeTraceProfilerInstance->begin(std::string(Name),
  295. [&]() { return std::string(Detail); });
  296. }
  297. void llvm::timeTraceProfilerBegin(StringRef Name,
  298. llvm::function_ref<std::string()> Detail) {
  299. if (TimeTraceProfilerInstance != nullptr)
  300. TimeTraceProfilerInstance->begin(std::string(Name), Detail);
  301. }
  302. void llvm::timeTraceProfilerEnd() {
  303. if (TimeTraceProfilerInstance != nullptr)
  304. TimeTraceProfilerInstance->end();
  305. }