123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261 |
- //===-- X86Counter.cpp ------------------------------------------*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- #include "X86Counter.h"
- #if defined(__linux__) && defined(HAVE_LIBPFM) && \
- defined(LIBPFM_HAS_FIELD_CYCLES)
- // FIXME: Use appropriate wrappers for poll.h and mman.h
- // to support Windows and remove this linux-only guard.
- #include "llvm/Support/Endian.h"
- #include "llvm/Support/Errc.h"
- #error #include <perfmon/perf_event.h>
- #error #include <perfmon/pfmlib.h>
- #error #include <perfmon/pfmlib_perf_event.h>
- #include <atomic>
- #include <chrono>
- #include <cstddef>
- #include <cstdint>
- #include <limits>
- #include <memory>
- #include <vector>
- #include <poll.h>
- #include <sys/mman.h>
- #include <unistd.h>
- namespace llvm {
- namespace exegesis {
- // Number of entries in the LBR.
- static constexpr int kLbrEntries = 16;
- static constexpr size_t kBufferPages = 8;
- static const size_t kDataBufferSize = kBufferPages * getpagesize();
- // First page is reserved for perf_event_mmap_page. Data buffer starts on
- // the next page, so we allocate one more page.
- static const size_t kMappedBufferSize = (kBufferPages + 1) * getpagesize();
- // Waits for the LBR perf events.
- static int pollLbrPerfEvent(const int FileDescriptor) {
- struct pollfd PollFd;
- PollFd.fd = FileDescriptor;
- PollFd.events = POLLIN;
- PollFd.revents = 0;
- return poll(&PollFd, 1 /* num of fds */, 10000 /* timeout in ms */);
- }
- // Copies the data-buffer into Buf, given the pointer to MMapped.
- static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail,
- size_t DataSize) {
- // First page is reserved for perf_event_mmap_page. Data buffer starts on
- // the next page.
- char *Start = reinterpret_cast<char *>(MMappedBuffer) + getpagesize();
- // The LBR buffer is a cyclic buffer, we copy data to another buffer.
- uint64_t Offset = Tail % kDataBufferSize;
- size_t CopySize = kDataBufferSize - Offset;
- memcpy(Buf, Start + Offset, CopySize);
- if (CopySize >= DataSize)
- return;
- memcpy(Buf + CopySize, Start, Offset);
- return;
- }
- // Parses the given data-buffer for stats and fill the CycleArray.
- // If data has been extracted successfully, also modifies the code to jump
- // out the benchmark loop.
- static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize,
- const void *From, const void *To,
- llvm::SmallVector<int64_t, 4> *CycleArray) {
- const char *DataPtr = DataBuf;
- while (DataPtr < DataBuf + DataSize) {
- struct perf_event_header Header;
- memcpy(&Header, DataPtr, sizeof(struct perf_event_header));
- if (Header.type != PERF_RECORD_SAMPLE) {
- // Ignores non-sample records.
- DataPtr += Header.size;
- continue;
- }
- DataPtr += sizeof(Header);
- uint64_t Count = llvm::support::endian::read64(DataPtr, support::native);
- DataPtr += sizeof(Count);
- struct perf_branch_entry Entry;
- memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
- // Read the perf_branch_entry array.
- for (uint64_t i = 0; i < Count; ++i) {
- const uint64_t BlockStart = From == nullptr
- ? std::numeric_limits<uint64_t>::min()
- : reinterpret_cast<uint64_t>(From);
- const uint64_t BlockEnd = To == nullptr
- ? std::numeric_limits<uint64_t>::max()
- : reinterpret_cast<uint64_t>(To);
- if (BlockStart <= Entry.from && BlockEnd >= Entry.to)
- CycleArray->push_back(Entry.cycles);
- if (i == Count - 1)
- // We've reached the last entry.
- return llvm::Error::success();
- // Advance to next entry
- DataPtr += sizeof(Entry);
- memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
- }
- }
- return llvm::make_error<llvm::StringError>("Unable to parse databuffer.",
- llvm::errc::io_error);
- }
- X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) {
- assert(SamplingPeriod > 0 && "SamplingPeriod must be positive");
- EventString = "BR_INST_RETIRED.NEAR_TAKEN";
- Attr = new perf_event_attr();
- Attr->size = sizeof(*Attr);
- Attr->type = PERF_TYPE_RAW;
- // FIXME This is SKL's encoding. Not sure if it'll change.
- Attr->config = 0x20c4; // BR_INST_RETIRED.NEAR_TAKEN
- Attr->sample_type = PERF_SAMPLE_BRANCH_STACK;
- // Don't need to specify "USER" because we've already excluded HV and Kernel.
- Attr->branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
- Attr->sample_period = SamplingPeriod;
- Attr->wakeup_events = 1; // We need this even when using ioctl REFRESH.
- Attr->disabled = 1;
- Attr->exclude_kernel = 1;
- Attr->exclude_hv = 1;
- Attr->read_format = PERF_FORMAT_GROUP;
- FullQualifiedEventString = EventString;
- }
- X86LbrCounter::X86LbrCounter(pfm::PerfEvent &&NewEvent)
- : Counter(std::move(NewEvent)) {
- MMappedBuffer = mmap(nullptr, kMappedBufferSize, PROT_READ | PROT_WRITE,
- MAP_SHARED, FileDescriptor, 0);
- if (MMappedBuffer == MAP_FAILED)
- llvm::errs() << "Failed to mmap buffer.";
- }
- X86LbrCounter::~X86LbrCounter() {
- if (0 != munmap(MMappedBuffer, kMappedBufferSize))
- llvm::errs() << "Failed to munmap buffer.";
- }
- void X86LbrCounter::start() {
- ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */);
- }
- llvm::Error X86LbrCounter::checkLbrSupport() {
- // Do a sample read and check if the results contain non-zero values.
- X86LbrCounter counter(X86LbrPerfEvent(123));
- counter.start();
- // Prevent the compiler from unrolling the loop and get rid of all the
- // branches. We need at least 16 iterations.
- int Sum = 0;
- int V = 1;
- volatile int *P = &V;
- auto TimeLimit =
- std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5);
- for (int I = 0;
- I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit;
- ++I) {
- Sum += *P;
- }
- counter.stop();
- (void)Sum;
- auto ResultOrError = counter.doReadCounter(nullptr, nullptr);
- if (ResultOrError)
- if (!ResultOrError.get().empty())
- // If there is at least one non-zero entry, then LBR is supported.
- for (const int64_t &Value : ResultOrError.get())
- if (Value != 0)
- return Error::success();
- return llvm::make_error<llvm::StringError>(
- "LBR format with cycles is not suppported on the host.",
- llvm::errc::not_supported);
- }
- llvm::Expected<llvm::SmallVector<int64_t, 4>>
- X86LbrCounter::readOrError(StringRef FunctionBytes) const {
- // Disable the event before reading
- ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0);
- // Find the boundary of the function so that we could filter the LBRs
- // to keep only the relevant records.
- if (FunctionBytes.empty())
- return llvm::make_error<llvm::StringError>("Empty function bytes",
- llvm::errc::invalid_argument);
- const void *From = reinterpret_cast<const void *>(FunctionBytes.data());
- const void *To = reinterpret_cast<const void *>(FunctionBytes.data() +
- FunctionBytes.size());
- return doReadCounter(From, To);
- }
- llvm::Expected<llvm::SmallVector<int64_t, 4>>
- X86LbrCounter::doReadCounter(const void *From, const void *To) const {
- // The max number of time-outs/retries before we give up.
- static constexpr int kMaxTimeouts = 160;
- // Parses the LBR buffer and fills CycleArray with the sequence of cycle
- // counts from the buffer.
- llvm::SmallVector<int64_t, 4> CycleArray;
- auto DataBuf = std::make_unique<char[]>(kDataBufferSize);
- int NumTimeouts = 0;
- int PollResult = 0;
- while (PollResult <= 0) {
- PollResult = pollLbrPerfEvent(FileDescriptor);
- if (PollResult > 0)
- break;
- if (PollResult == -1)
- return llvm::make_error<llvm::StringError>("Cannot poll LBR perf event.",
- llvm::errc::io_error);
- if (NumTimeouts++ >= kMaxTimeouts)
- return llvm::make_error<llvm::StringError>(
- "LBR polling still timed out after max number of attempts.",
- llvm::errc::device_or_resource_busy);
- }
- struct perf_event_mmap_page Page;
- memcpy(&Page, MMappedBuffer, sizeof(struct perf_event_mmap_page));
- const uint64_t DataTail = Page.data_tail;
- const uint64_t DataHead = Page.data_head;
- // We're supposed to use a barrier after reading data_head.
- std::atomic_thread_fence(std::memory_order_acq_rel);
- const size_t DataSize = DataHead - DataTail;
- if (DataSize > kDataBufferSize)
- return llvm::make_error<llvm::StringError>(
- "DataSize larger than buffer size.", llvm::errc::invalid_argument);
- copyDataBuffer(MMappedBuffer, DataBuf.get(), DataTail, DataSize);
- llvm::Error error =
- parseDataBuffer(DataBuf.get(), DataSize, From, To, &CycleArray);
- if (!error)
- return CycleArray;
- return std::move(error);
- }
- } // namespace exegesis
- } // namespace llvm
- #endif // defined(__linux__) && defined(HAVE_LIBPFM) &&
- // defined(LIBPFM_HAS_FIELD_CYCLES)
|