perf_counters.cc 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. // Copyright 2021 Google Inc. All rights reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "perf_counters.h"
  15. #include <cstring>
  16. #include <memory>
  17. #include <vector>
  18. #if defined HAVE_LIBPFM
  19. #error #include "perfmon/pfmlib.h"
  20. #error #include "perfmon/pfmlib_perf_event.h"
  21. #endif
  22. namespace benchmark {
  23. namespace internal {
  24. constexpr size_t PerfCounterValues::kMaxCounters;
  25. #if defined HAVE_LIBPFM
  26. size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
  27. // Create a pointer for multiple reads
  28. const size_t bufsize = values_.size() * sizeof(values_[0]);
  29. char* ptr = reinterpret_cast<char*>(values_.data());
  30. size_t size = bufsize;
  31. for (int lead : leaders) {
  32. auto read_bytes = ::read(lead, ptr, size);
  33. if (read_bytes >= ssize_t(sizeof(uint64_t))) {
  34. // Actual data bytes are all bytes minus initial padding
  35. std::size_t data_bytes = read_bytes - sizeof(uint64_t);
  36. // This should be very cheap since it's in hot cache
  37. std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
  38. // Increment our counters
  39. ptr += data_bytes;
  40. size -= data_bytes;
  41. } else {
  42. int err = errno;
  43. GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
  44. << " " << ::strerror(err) << "\n";
  45. return 0;
  46. }
  47. }
  48. return (bufsize - size) / sizeof(uint64_t);
  49. }
  50. const bool PerfCounters::kSupported = true;
  51. // Initializes libpfm only on the first call. Returns whether that single
  52. // initialization was successful.
  53. bool PerfCounters::Initialize() {
  54. // Function-scope static gets initialized only once on first call.
  55. static const bool success = []() {
  56. return pfm_initialize() == PFM_SUCCESS;
  57. }();
  58. return success;
  59. }
  60. bool PerfCounters::IsCounterSupported(const std::string& name) {
  61. Initialize();
  62. perf_event_attr_t attr;
  63. std::memset(&attr, 0, sizeof(attr));
  64. pfm_perf_encode_arg_t arg;
  65. std::memset(&arg, 0, sizeof(arg));
  66. arg.attr = &attr;
  67. const int mode = PFM_PLM3; // user mode only
  68. int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
  69. &arg);
  70. return (ret == PFM_SUCCESS);
  71. }
  72. PerfCounters PerfCounters::Create(
  73. const std::vector<std::string>& counter_names) {
  74. if (!counter_names.empty()) {
  75. Initialize();
  76. }
  77. // Valid counters will populate these arrays but we start empty
  78. std::vector<std::string> valid_names;
  79. std::vector<int> counter_ids;
  80. std::vector<int> leader_ids;
  81. // Resize to the maximum possible
  82. valid_names.reserve(counter_names.size());
  83. counter_ids.reserve(counter_names.size());
  84. const int kCounterMode = PFM_PLM3; // user mode only
  85. // Group leads will be assigned on demand. The idea is that once we cannot
  86. // create a counter descriptor, the reason is that this group has maxed out
  87. // so we set the group_id again to -1 and retry - giving the algorithm a
  88. // chance to create a new group leader to hold the next set of counters.
  89. int group_id = -1;
  90. // Loop through all performance counters
  91. for (size_t i = 0; i < counter_names.size(); ++i) {
  92. // we are about to push into the valid names vector
  93. // check if we did not reach the maximum
  94. if (valid_names.size() == PerfCounterValues::kMaxCounters) {
  95. // Log a message if we maxed out and stop adding
  96. GetErrorLogInstance()
  97. << counter_names.size() << " counters were requested. The maximum is "
  98. << PerfCounterValues::kMaxCounters << " and " << valid_names.size()
  99. << " were already added. All remaining counters will be ignored\n";
  100. // stop the loop and return what we have already
  101. break;
  102. }
  103. // Check if this name is empty
  104. const auto& name = counter_names[i];
  105. if (name.empty()) {
  106. GetErrorLogInstance()
  107. << "A performance counter name was the empty string\n";
  108. continue;
  109. }
  110. // Here first means first in group, ie the group leader
  111. const bool is_first = (group_id < 0);
  112. // This struct will be populated by libpfm from the counter string
  113. // and then fed into the syscall perf_event_open
  114. struct perf_event_attr attr {};
  115. attr.size = sizeof(attr);
  116. // This is the input struct to libpfm.
  117. pfm_perf_encode_arg_t arg{};
  118. arg.attr = &attr;
  119. const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
  120. PFM_OS_PERF_EVENT, &arg);
  121. if (pfm_get != PFM_SUCCESS) {
  122. GetErrorLogInstance()
  123. << "Unknown performance counter name: " << name << "\n";
  124. continue;
  125. }
  126. // We then proceed to populate the remaining fields in our attribute struct
  127. // Note: the man page for perf_event_create suggests inherit = true and
  128. // read_format = PERF_FORMAT_GROUP don't work together, but that's not the
  129. // case.
  130. attr.disabled = is_first;
  131. attr.inherit = true;
  132. attr.pinned = is_first;
  133. attr.exclude_kernel = true;
  134. attr.exclude_user = false;
  135. attr.exclude_hv = true;
  136. // Read all counters in a group in one read.
  137. attr.read_format = PERF_FORMAT_GROUP;
  138. int id = -1;
  139. while (id < 0) {
  140. static constexpr size_t kNrOfSyscallRetries = 5;
  141. // Retry syscall as it was interrupted often (b/64774091).
  142. for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
  143. ++num_retries) {
  144. id = perf_event_open(&attr, 0, -1, group_id, 0);
  145. if (id >= 0 || errno != EINTR) {
  146. break;
  147. }
  148. }
  149. if (id < 0) {
  150. // If the file descriptor is negative we might have reached a limit
  151. // in the current group. Set the group_id to -1 and retry
  152. if (group_id >= 0) {
  153. // Create a new group
  154. group_id = -1;
  155. } else {
  156. // At this point we have already retried to set a new group id and
  157. // failed. We then give up.
  158. break;
  159. }
  160. }
  161. }
  162. // We failed to get a new file descriptor. We might have reached a hard
  163. // hardware limit that cannot be resolved even with group multiplexing
  164. if (id < 0) {
  165. GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
  166. "for performance counter "
  167. << name << ". Ignoring\n";
  168. // We give up on this counter but try to keep going
  169. // as the others would be fine
  170. continue;
  171. }
  172. if (group_id < 0) {
  173. // This is a leader, store and assign it to the current file descriptor
  174. leader_ids.push_back(id);
  175. group_id = id;
  176. }
  177. // This is a valid counter, add it to our descriptor's list
  178. counter_ids.push_back(id);
  179. valid_names.push_back(name);
  180. }
  181. // Loop through all group leaders activating them
  182. // There is another option of starting ALL counters in a process but
  183. // that would be far reaching an intrusion. If the user is using PMCs
  184. // by themselves then this would have a side effect on them. It is
  185. // friendlier to loop through all groups individually.
  186. for (int lead : leader_ids) {
  187. if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
  188. // This should never happen but if it does, we give up on the
  189. // entire batch as recovery would be a mess.
  190. GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
  191. "Claring out all counters.\n";
  192. // Close all peformance counters
  193. for (int id : counter_ids) {
  194. ::close(id);
  195. }
  196. // Return an empty object so our internal state is still good and
  197. // the process can continue normally without impact
  198. return NoCounters();
  199. }
  200. }
  201. return PerfCounters(std::move(valid_names), std::move(counter_ids),
  202. std::move(leader_ids));
  203. }
  204. void PerfCounters::CloseCounters() const {
  205. if (counter_ids_.empty()) {
  206. return;
  207. }
  208. for (int lead : leader_ids_) {
  209. ioctl(lead, PERF_EVENT_IOC_DISABLE);
  210. }
  211. for (int fd : counter_ids_) {
  212. close(fd);
  213. }
  214. }
  215. #else // defined HAVE_LIBPFM
  216. size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
  217. const bool PerfCounters::kSupported = false;
  218. bool PerfCounters::Initialize() { return false; }
  219. bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
  220. PerfCounters PerfCounters::Create(
  221. const std::vector<std::string>& counter_names) {
  222. if (!counter_names.empty()) {
  223. GetErrorLogInstance() << "Performance counters not supported.";
  224. }
  225. return NoCounters();
  226. }
  227. void PerfCounters::CloseCounters() const {}
  228. #endif // defined HAVE_LIBPFM
  229. PerfCountersMeasurement::PerfCountersMeasurement(
  230. const std::vector<std::string>& counter_names)
  231. : start_values_(counter_names.size()), end_values_(counter_names.size()) {
  232. counters_ = PerfCounters::Create(counter_names);
  233. }
  234. PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
  235. if (this != &other) {
  236. CloseCounters();
  237. counter_ids_ = std::move(other.counter_ids_);
  238. leader_ids_ = std::move(other.leader_ids_);
  239. counter_names_ = std::move(other.counter_names_);
  240. }
  241. return *this;
  242. }
  243. } // namespace internal
  244. } // namespace benchmark