Clustering.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. //===-- Clustering.h --------------------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// Utilities to compute benchmark result clusters.
  11. ///
  12. //===----------------------------------------------------------------------===//
  13. #ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
  14. #define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
  15. #include "BenchmarkResult.h"
  16. #include "llvm/ADT/Optional.h"
  17. #include "llvm/Support/Error.h"
  18. #include <limits>
  19. #include <vector>
  20. namespace llvm {
  21. namespace exegesis {
  22. class InstructionBenchmarkClustering {
  23. public:
  24. enum ModeE { Dbscan, Naive };
  25. // Clusters `Points` using DBSCAN with the given parameters. See the cc file
  26. // for more explanations on the algorithm.
  27. static Expected<InstructionBenchmarkClustering>
  28. create(const std::vector<InstructionBenchmark> &Points, ModeE Mode,
  29. size_t DbscanMinPts, double AnalysisClusteringEpsilon,
  30. Optional<unsigned> NumOpcodes = None);
  31. class ClusterId {
  32. public:
  33. static ClusterId noise() { return ClusterId(kNoise); }
  34. static ClusterId error() { return ClusterId(kError); }
  35. static ClusterId makeValid(size_t Id, bool IsUnstable = false) {
  36. return ClusterId(Id, IsUnstable);
  37. }
  38. static ClusterId makeValidUnstable(size_t Id) {
  39. return makeValid(Id, /*IsUnstable=*/true);
  40. }
  41. ClusterId() : Id_(kUndef), IsUnstable_(false) {}
  42. // Compare id's, ignoring the 'unstability' bit.
  43. bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
  44. bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }
  45. bool isValid() const { return Id_ <= kMaxValid; }
  46. bool isUnstable() const { return IsUnstable_; }
  47. bool isNoise() const { return Id_ == kNoise; }
  48. bool isError() const { return Id_ == kError; }
  49. bool isUndef() const { return Id_ == kUndef; }
  50. // Precondition: isValid().
  51. size_t getId() const {
  52. assert(isValid());
  53. return Id_;
  54. }
  55. private:
  56. ClusterId(size_t Id, bool IsUnstable = false)
  57. : Id_(Id), IsUnstable_(IsUnstable) {}
  58. static constexpr const size_t kMaxValid =
  59. (std::numeric_limits<size_t>::max() >> 1) - 4;
  60. static constexpr const size_t kNoise = kMaxValid + 1;
  61. static constexpr const size_t kError = kMaxValid + 2;
  62. static constexpr const size_t kUndef = kMaxValid + 3;
  63. size_t Id_ : (std::numeric_limits<size_t>::digits - 1);
  64. size_t IsUnstable_ : 1;
  65. };
  66. static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field.");
  67. struct Cluster {
  68. Cluster() = delete;
  69. explicit Cluster(const ClusterId &Id) : Id(Id) {}
  70. const ClusterId Id;
  71. // Indices of benchmarks within the cluster.
  72. std::vector<int> PointIndices;
  73. };
  74. ClusterId getClusterIdForPoint(size_t P) const {
  75. return ClusterIdForPoint_[P];
  76. }
  77. const std::vector<InstructionBenchmark> &getPoints() const { return Points_; }
  78. const Cluster &getCluster(ClusterId Id) const {
  79. assert(!Id.isUndef() && "unlabeled cluster");
  80. if (Id.isNoise()) {
  81. return NoiseCluster_;
  82. }
  83. if (Id.isError()) {
  84. return ErrorCluster_;
  85. }
  86. return Clusters_[Id.getId()];
  87. }
  88. const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
  89. // Returns true if the given point is within a distance Epsilon of each other.
  90. bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
  91. const std::vector<BenchmarkMeasure> &Q,
  92. const double EpsilonSquared_) const {
  93. double DistanceSquared = 0.0;
  94. for (size_t I = 0, E = P.size(); I < E; ++I) {
  95. const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
  96. DistanceSquared += Diff * Diff;
  97. }
  98. return DistanceSquared <= EpsilonSquared_;
  99. }
  100. private:
  101. InstructionBenchmarkClustering(
  102. const std::vector<InstructionBenchmark> &Points,
  103. double AnalysisClusteringEpsilonSquared);
  104. Error validateAndSetup();
  105. void clusterizeDbScan(size_t MinPts);
  106. void clusterizeNaive(unsigned NumOpcodes);
  107. // Stabilization is only needed if dbscan was used to clusterize.
  108. void stabilize(unsigned NumOpcodes);
  109. void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;
  110. bool areAllNeighbours(ArrayRef<size_t> Pts) const;
  111. const std::vector<InstructionBenchmark> &Points_;
  112. const double AnalysisClusteringEpsilonSquared_;
  113. int NumDimensions_ = 0;
  114. // ClusterForPoint_[P] is the cluster id for Points[P].
  115. std::vector<ClusterId> ClusterIdForPoint_;
  116. std::vector<Cluster> Clusters_;
  117. Cluster NoiseCluster_;
  118. Cluster ErrorCluster_;
  119. };
  120. class SchedClassClusterCentroid {
  121. public:
  122. const std::vector<PerInstructionStats> &getStats() const {
  123. return Representative;
  124. }
  125. std::vector<BenchmarkMeasure> getAsPoint() const;
  126. void addPoint(ArrayRef<BenchmarkMeasure> Point);
  127. bool validate(InstructionBenchmark::ModeE Mode) const;
  128. private:
  129. // Measurement stats for the points in the SchedClassCluster.
  130. std::vector<PerInstructionStats> Representative;
  131. };
  132. } // namespace exegesis
  133. } // namespace llvm
  134. #endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H