X86Counter.cpp 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. //===-- X86Counter.cpp ------------------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "X86Counter.h"
  9. #if defined(__linux__) && defined(HAVE_LIBPFM) && \
  10. defined(LIBPFM_HAS_FIELD_CYCLES)
  11. // FIXME: Use appropriate wrappers for poll.h and mman.h
  12. // to support Windows and remove this linux-only guard.
  13. #include "llvm/Support/Endian.h"
  14. #include "llvm/Support/Errc.h"
  15. #error #include <perfmon/perf_event.h>
  16. #error #include <perfmon/pfmlib.h>
  17. #error #include <perfmon/pfmlib_perf_event.h>
  18. #include <atomic>
  19. #include <chrono>
  20. #include <cstddef>
  21. #include <cstdint>
  22. #include <limits>
  23. #include <memory>
  24. #include <vector>
  25. #include <poll.h>
  26. #include <sys/mman.h>
  27. #include <unistd.h>
  28. namespace llvm {
  29. namespace exegesis {
  30. // Number of entries in the LBR.
  31. static constexpr int kLbrEntries = 16;
  32. static constexpr size_t kBufferPages = 8;
  33. static const size_t kDataBufferSize = kBufferPages * getpagesize();
  34. // First page is reserved for perf_event_mmap_page. Data buffer starts on
  35. // the next page, so we allocate one more page.
  36. static const size_t kMappedBufferSize = (kBufferPages + 1) * getpagesize();
  37. // Waits for the LBR perf events.
  38. static int pollLbrPerfEvent(const int FileDescriptor) {
  39. struct pollfd PollFd;
  40. PollFd.fd = FileDescriptor;
  41. PollFd.events = POLLIN;
  42. PollFd.revents = 0;
  43. return poll(&PollFd, 1 /* num of fds */, 10000 /* timeout in ms */);
  44. }
  45. // Copies the data-buffer into Buf, given the pointer to MMapped.
  46. static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail,
  47. size_t DataSize) {
  48. // First page is reserved for perf_event_mmap_page. Data buffer starts on
  49. // the next page.
  50. char *Start = reinterpret_cast<char *>(MMappedBuffer) + getpagesize();
  51. // The LBR buffer is a cyclic buffer, we copy data to another buffer.
  52. uint64_t Offset = Tail % kDataBufferSize;
  53. size_t CopySize = kDataBufferSize - Offset;
  54. memcpy(Buf, Start + Offset, CopySize);
  55. if (CopySize >= DataSize)
  56. return;
  57. memcpy(Buf + CopySize, Start, Offset);
  58. return;
  59. }
  60. // Parses the given data-buffer for stats and fill the CycleArray.
  61. // If data has been extracted successfully, also modifies the code to jump
  62. // out the benchmark loop.
  63. static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize,
  64. const void *From, const void *To,
  65. llvm::SmallVector<int64_t, 4> *CycleArray) {
  66. const char *DataPtr = DataBuf;
  67. while (DataPtr < DataBuf + DataSize) {
  68. struct perf_event_header Header;
  69. memcpy(&Header, DataPtr, sizeof(struct perf_event_header));
  70. if (Header.type != PERF_RECORD_SAMPLE) {
  71. // Ignores non-sample records.
  72. DataPtr += Header.size;
  73. continue;
  74. }
  75. DataPtr += sizeof(Header);
  76. uint64_t Count = llvm::support::endian::read64(DataPtr, support::native);
  77. DataPtr += sizeof(Count);
  78. struct perf_branch_entry Entry;
  79. memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
  80. // Read the perf_branch_entry array.
  81. for (uint64_t i = 0; i < Count; ++i) {
  82. const uint64_t BlockStart = From == nullptr
  83. ? std::numeric_limits<uint64_t>::min()
  84. : reinterpret_cast<uint64_t>(From);
  85. const uint64_t BlockEnd = To == nullptr
  86. ? std::numeric_limits<uint64_t>::max()
  87. : reinterpret_cast<uint64_t>(To);
  88. if (BlockStart <= Entry.from && BlockEnd >= Entry.to)
  89. CycleArray->push_back(Entry.cycles);
  90. if (i == Count - 1)
  91. // We've reached the last entry.
  92. return llvm::Error::success();
  93. // Advance to next entry
  94. DataPtr += sizeof(Entry);
  95. memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
  96. }
  97. }
  98. return llvm::make_error<llvm::StringError>("Unable to parse databuffer.",
  99. llvm::errc::io_error);
  100. }
  101. X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) {
  102. assert(SamplingPeriod > 0 && "SamplingPeriod must be positive");
  103. EventString = "BR_INST_RETIRED.NEAR_TAKEN";
  104. Attr = new perf_event_attr();
  105. Attr->size = sizeof(*Attr);
  106. Attr->type = PERF_TYPE_RAW;
  107. // FIXME This is SKL's encoding. Not sure if it'll change.
  108. Attr->config = 0x20c4; // BR_INST_RETIRED.NEAR_TAKEN
  109. Attr->sample_type = PERF_SAMPLE_BRANCH_STACK;
  110. // Don't need to specify "USER" because we've already excluded HV and Kernel.
  111. Attr->branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
  112. Attr->sample_period = SamplingPeriod;
  113. Attr->wakeup_events = 1; // We need this even when using ioctl REFRESH.
  114. Attr->disabled = 1;
  115. Attr->exclude_kernel = 1;
  116. Attr->exclude_hv = 1;
  117. Attr->read_format = PERF_FORMAT_GROUP;
  118. FullQualifiedEventString = EventString;
  119. }
  120. X86LbrCounter::X86LbrCounter(pfm::PerfEvent &&NewEvent)
  121. : Counter(std::move(NewEvent)) {
  122. MMappedBuffer = mmap(nullptr, kMappedBufferSize, PROT_READ | PROT_WRITE,
  123. MAP_SHARED, FileDescriptor, 0);
  124. if (MMappedBuffer == MAP_FAILED)
  125. llvm::errs() << "Failed to mmap buffer.";
  126. }
  127. X86LbrCounter::~X86LbrCounter() {
  128. if (0 != munmap(MMappedBuffer, kMappedBufferSize))
  129. llvm::errs() << "Failed to munmap buffer.";
  130. }
  131. void X86LbrCounter::start() {
  132. ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */);
  133. }
  134. llvm::Error X86LbrCounter::checkLbrSupport() {
  135. // Do a sample read and check if the results contain non-zero values.
  136. X86LbrCounter counter(X86LbrPerfEvent(123));
  137. counter.start();
  138. // Prevent the compiler from unrolling the loop and get rid of all the
  139. // branches. We need at least 16 iterations.
  140. int Sum = 0;
  141. int V = 1;
  142. volatile int *P = &V;
  143. auto TimeLimit =
  144. std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5);
  145. for (int I = 0;
  146. I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit;
  147. ++I) {
  148. Sum += *P;
  149. }
  150. counter.stop();
  151. (void)Sum;
  152. auto ResultOrError = counter.doReadCounter(nullptr, nullptr);
  153. if (ResultOrError)
  154. if (!ResultOrError.get().empty())
  155. // If there is at least one non-zero entry, then LBR is supported.
  156. for (const int64_t &Value : ResultOrError.get())
  157. if (Value != 0)
  158. return Error::success();
  159. return llvm::make_error<llvm::StringError>(
  160. "LBR format with cycles is not suppported on the host.",
  161. llvm::errc::not_supported);
  162. }
  163. llvm::Expected<llvm::SmallVector<int64_t, 4>>
  164. X86LbrCounter::readOrError(StringRef FunctionBytes) const {
  165. // Disable the event before reading
  166. ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0);
  167. // Find the boundary of the function so that we could filter the LBRs
  168. // to keep only the relevant records.
  169. if (FunctionBytes.empty())
  170. return llvm::make_error<llvm::StringError>("Empty function bytes",
  171. llvm::errc::invalid_argument);
  172. const void *From = reinterpret_cast<const void *>(FunctionBytes.data());
  173. const void *To = reinterpret_cast<const void *>(FunctionBytes.data() +
  174. FunctionBytes.size());
  175. return doReadCounter(From, To);
  176. }
  177. llvm::Expected<llvm::SmallVector<int64_t, 4>>
  178. X86LbrCounter::doReadCounter(const void *From, const void *To) const {
  179. // The max number of time-outs/retries before we give up.
  180. static constexpr int kMaxTimeouts = 160;
  181. // Parses the LBR buffer and fills CycleArray with the sequence of cycle
  182. // counts from the buffer.
  183. llvm::SmallVector<int64_t, 4> CycleArray;
  184. auto DataBuf = std::make_unique<char[]>(kDataBufferSize);
  185. int NumTimeouts = 0;
  186. int PollResult = 0;
  187. while (PollResult <= 0) {
  188. PollResult = pollLbrPerfEvent(FileDescriptor);
  189. if (PollResult > 0)
  190. break;
  191. if (PollResult == -1)
  192. return llvm::make_error<llvm::StringError>("Cannot poll LBR perf event.",
  193. llvm::errc::io_error);
  194. if (NumTimeouts++ >= kMaxTimeouts)
  195. return llvm::make_error<llvm::StringError>(
  196. "LBR polling still timed out after max number of attempts.",
  197. llvm::errc::device_or_resource_busy);
  198. }
  199. struct perf_event_mmap_page Page;
  200. memcpy(&Page, MMappedBuffer, sizeof(struct perf_event_mmap_page));
  201. const uint64_t DataTail = Page.data_tail;
  202. const uint64_t DataHead = Page.data_head;
  203. // We're supposed to use a barrier after reading data_head.
  204. std::atomic_thread_fence(std::memory_order_acq_rel);
  205. const size_t DataSize = DataHead - DataTail;
  206. if (DataSize > kDataBufferSize)
  207. return llvm::make_error<llvm::StringError>(
  208. "DataSize larger than buffer size.", llvm::errc::invalid_argument);
  209. copyDataBuffer(MMappedBuffer, DataBuf.get(), DataTail, DataSize);
  210. llvm::Error error =
  211. parseDataBuffer(DataBuf.get(), DataSize, From, To, &CycleArray);
  212. if (!error)
  213. return CycleArray;
  214. return std::move(error);
  215. }
  216. } // namespace exegesis
  217. } // namespace llvm
  218. #endif // defined(__linux__) && defined(HAVE_LIBPFM) &&
  219. // defined(LIBPFM_HAS_FIELD_CYCLES)