123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282 |
- // Copyright 2021 Google Inc. All rights reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- #include "perf_counters.h"
- #include <cstring>
- #include <memory>
- #include <vector>
- #if defined HAVE_LIBPFM
- #error #include "perfmon/pfmlib.h"
- #error #include "perfmon/pfmlib_perf_event.h"
- #endif
- namespace benchmark {
- namespace internal {
- constexpr size_t PerfCounterValues::kMaxCounters;
- #if defined HAVE_LIBPFM
- size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
- // Create a pointer for multiple reads
- const size_t bufsize = values_.size() * sizeof(values_[0]);
- char* ptr = reinterpret_cast<char*>(values_.data());
- size_t size = bufsize;
- for (int lead : leaders) {
- auto read_bytes = ::read(lead, ptr, size);
- if (read_bytes >= ssize_t(sizeof(uint64_t))) {
- // Actual data bytes are all bytes minus initial padding
- std::size_t data_bytes = read_bytes - sizeof(uint64_t);
- // This should be very cheap since it's in hot cache
- std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
- // Increment our counters
- ptr += data_bytes;
- size -= data_bytes;
- } else {
- int err = errno;
- GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
- << " " << ::strerror(err) << "\n";
- return 0;
- }
- }
- return (bufsize - size) / sizeof(uint64_t);
- }
- const bool PerfCounters::kSupported = true;
- // Initializes libpfm only on the first call. Returns whether that single
- // initialization was successful.
- bool PerfCounters::Initialize() {
- // Function-scope static gets initialized only once on first call.
- static const bool success = []() {
- return pfm_initialize() == PFM_SUCCESS;
- }();
- return success;
- }
- bool PerfCounters::IsCounterSupported(const std::string& name) {
- Initialize();
- perf_event_attr_t attr;
- std::memset(&attr, 0, sizeof(attr));
- pfm_perf_encode_arg_t arg;
- std::memset(&arg, 0, sizeof(arg));
- arg.attr = &attr;
- const int mode = PFM_PLM3; // user mode only
- int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
- &arg);
- return (ret == PFM_SUCCESS);
- }
- PerfCounters PerfCounters::Create(
- const std::vector<std::string>& counter_names) {
- if (!counter_names.empty()) {
- Initialize();
- }
- // Valid counters will populate these arrays but we start empty
- std::vector<std::string> valid_names;
- std::vector<int> counter_ids;
- std::vector<int> leader_ids;
- // Resize to the maximum possible
- valid_names.reserve(counter_names.size());
- counter_ids.reserve(counter_names.size());
- const int kCounterMode = PFM_PLM3; // user mode only
- // Group leads will be assigned on demand. The idea is that once we cannot
- // create a counter descriptor, the reason is that this group has maxed out
- // so we set the group_id again to -1 and retry - giving the algorithm a
- // chance to create a new group leader to hold the next set of counters.
- int group_id = -1;
- // Loop through all performance counters
- for (size_t i = 0; i < counter_names.size(); ++i) {
- // we are about to push into the valid names vector
- // check if we did not reach the maximum
- if (valid_names.size() == PerfCounterValues::kMaxCounters) {
- // Log a message if we maxed out and stop adding
- GetErrorLogInstance()
- << counter_names.size() << " counters were requested. The maximum is "
- << PerfCounterValues::kMaxCounters << " and " << valid_names.size()
- << " were already added. All remaining counters will be ignored\n";
- // stop the loop and return what we have already
- break;
- }
- // Check if this name is empty
- const auto& name = counter_names[i];
- if (name.empty()) {
- GetErrorLogInstance()
- << "A performance counter name was the empty string\n";
- continue;
- }
- // Here first means first in group, ie the group leader
- const bool is_first = (group_id < 0);
- // This struct will be populated by libpfm from the counter string
- // and then fed into the syscall perf_event_open
- struct perf_event_attr attr {};
- attr.size = sizeof(attr);
- // This is the input struct to libpfm.
- pfm_perf_encode_arg_t arg{};
- arg.attr = &attr;
- const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
- PFM_OS_PERF_EVENT, &arg);
- if (pfm_get != PFM_SUCCESS) {
- GetErrorLogInstance()
- << "Unknown performance counter name: " << name << "\n";
- continue;
- }
- // We then proceed to populate the remaining fields in our attribute struct
- // Note: the man page for perf_event_create suggests inherit = true and
- // read_format = PERF_FORMAT_GROUP don't work together, but that's not the
- // case.
- attr.disabled = is_first;
- attr.inherit = true;
- attr.pinned = is_first;
- attr.exclude_kernel = true;
- attr.exclude_user = false;
- attr.exclude_hv = true;
- // Read all counters in a group in one read.
- attr.read_format = PERF_FORMAT_GROUP;
- int id = -1;
- while (id < 0) {
- static constexpr size_t kNrOfSyscallRetries = 5;
- // Retry syscall as it was interrupted often (b/64774091).
- for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
- ++num_retries) {
- id = perf_event_open(&attr, 0, -1, group_id, 0);
- if (id >= 0 || errno != EINTR) {
- break;
- }
- }
- if (id < 0) {
- // If the file descriptor is negative we might have reached a limit
- // in the current group. Set the group_id to -1 and retry
- if (group_id >= 0) {
- // Create a new group
- group_id = -1;
- } else {
- // At this point we have already retried to set a new group id and
- // failed. We then give up.
- break;
- }
- }
- }
- // We failed to get a new file descriptor. We might have reached a hard
- // hardware limit that cannot be resolved even with group multiplexing
- if (id < 0) {
- GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
- "for performance counter "
- << name << ". Ignoring\n";
- // We give up on this counter but try to keep going
- // as the others would be fine
- continue;
- }
- if (group_id < 0) {
- // This is a leader, store and assign it to the current file descriptor
- leader_ids.push_back(id);
- group_id = id;
- }
- // This is a valid counter, add it to our descriptor's list
- counter_ids.push_back(id);
- valid_names.push_back(name);
- }
- // Loop through all group leaders activating them
- // There is another option of starting ALL counters in a process but
- // that would be far reaching an intrusion. If the user is using PMCs
- // by themselves then this would have a side effect on them. It is
- // friendlier to loop through all groups individually.
- for (int lead : leader_ids) {
- if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
- // This should never happen but if it does, we give up on the
- // entire batch as recovery would be a mess.
- GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
- "Claring out all counters.\n";
- // Close all peformance counters
- for (int id : counter_ids) {
- ::close(id);
- }
- // Return an empty object so our internal state is still good and
- // the process can continue normally without impact
- return NoCounters();
- }
- }
- return PerfCounters(std::move(valid_names), std::move(counter_ids),
- std::move(leader_ids));
- }
- void PerfCounters::CloseCounters() const {
- if (counter_ids_.empty()) {
- return;
- }
- for (int lead : leader_ids_) {
- ioctl(lead, PERF_EVENT_IOC_DISABLE);
- }
- for (int fd : counter_ids_) {
- close(fd);
- }
- }
- #else // defined HAVE_LIBPFM
- size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
- const bool PerfCounters::kSupported = false;
- bool PerfCounters::Initialize() { return false; }
- bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
- PerfCounters PerfCounters::Create(
- const std::vector<std::string>& counter_names) {
- if (!counter_names.empty()) {
- GetErrorLogInstance() << "Performance counters not supported.";
- }
- return NoCounters();
- }
- void PerfCounters::CloseCounters() const {}
- #endif // defined HAVE_LIBPFM
- PerfCountersMeasurement::PerfCountersMeasurement(
- const std::vector<std::string>& counter_names)
- : start_values_(counter_names.size()), end_values_(counter_names.size()) {
- counters_ = PerfCounters::Create(counter_names);
- }
- PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
- if (this != &other) {
- CloseCounters();
- counter_ids_ = std::move(other.counter_ids_);
- leader_ids_ = std::move(other.leader_ids_);
- counter_names_ = std::move(other.counter_names_);
- }
- return *this;
- }
- } // namespace internal
- } // namespace benchmark
|