Clustering.h 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. //===-- Clustering.h --------------------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// Utilities to compute benchmark result clusters.
  11. ///
  12. //===----------------------------------------------------------------------===//
  13. #ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
  14. #define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
  15. #include "BenchmarkResult.h"
  16. #include "llvm/Support/Error.h"
  17. #include <limits>
  18. #include <vector>
  19. namespace llvm {
  20. namespace exegesis {
  21. class InstructionBenchmarkClustering {
  22. public:
  23. enum ModeE { Dbscan, Naive };
  24. // Clusters `Points` using DBSCAN with the given parameters. See the cc file
  25. // for more explanations on the algorithm.
  26. static Expected<InstructionBenchmarkClustering>
  27. create(const std::vector<InstructionBenchmark> &Points, ModeE Mode,
  28. size_t DbscanMinPts, double AnalysisClusteringEpsilon,
  29. const MCSubtargetInfo *SubtargetInfo = nullptr,
  30. const MCInstrInfo *InstrInfo = nullptr);
  31. class ClusterId {
  32. public:
  33. static ClusterId noise() { return ClusterId(kNoise); }
  34. static ClusterId error() { return ClusterId(kError); }
  35. static ClusterId makeValid(size_t Id, bool IsUnstable = false) {
  36. return ClusterId(Id, IsUnstable);
  37. }
  38. static ClusterId makeValidUnstable(size_t Id) {
  39. return makeValid(Id, /*IsUnstable=*/true);
  40. }
  41. ClusterId() : Id_(kUndef), IsUnstable_(false) {}
  42. // Compare id's, ignoring the 'unstability' bit.
  43. bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
  44. bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }
  45. bool isValid() const { return Id_ <= kMaxValid; }
  46. bool isUnstable() const { return IsUnstable_; }
  47. bool isNoise() const { return Id_ == kNoise; }
  48. bool isError() const { return Id_ == kError; }
  49. bool isUndef() const { return Id_ == kUndef; }
  50. // Precondition: isValid().
  51. size_t getId() const {
  52. assert(isValid());
  53. return Id_;
  54. }
  55. private:
  56. ClusterId(size_t Id, bool IsUnstable = false)
  57. : Id_(Id), IsUnstable_(IsUnstable) {}
  58. static constexpr const size_t kMaxValid =
  59. (std::numeric_limits<size_t>::max() >> 1) - 4;
  60. static constexpr const size_t kNoise = kMaxValid + 1;
  61. static constexpr const size_t kError = kMaxValid + 2;
  62. static constexpr const size_t kUndef = kMaxValid + 3;
  63. size_t Id_ : (std::numeric_limits<size_t>::digits - 1);
  64. size_t IsUnstable_ : 1;
  65. };
  66. static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field.");
  67. struct Cluster {
  68. Cluster() = delete;
  69. explicit Cluster(const ClusterId &Id) : Id(Id) {}
  70. const ClusterId Id;
  71. // Indices of benchmarks within the cluster.
  72. std::vector<int> PointIndices;
  73. };
  74. ClusterId getClusterIdForPoint(size_t P) const {
  75. return ClusterIdForPoint_[P];
  76. }
  77. const std::vector<InstructionBenchmark> &getPoints() const { return Points_; }
  78. const Cluster &getCluster(ClusterId Id) const {
  79. assert(!Id.isUndef() && "unlabeled cluster");
  80. if (Id.isNoise()) {
  81. return NoiseCluster_;
  82. }
  83. if (Id.isError()) {
  84. return ErrorCluster_;
  85. }
  86. return Clusters_[Id.getId()];
  87. }
  88. const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
  89. // Returns true if the given point is within a distance Epsilon of each other.
  90. bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
  91. const std::vector<BenchmarkMeasure> &Q,
  92. const double EpsilonSquared_) const {
  93. double DistanceSquared = 0.0;
  94. for (size_t I = 0, E = P.size(); I < E; ++I) {
  95. const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
  96. DistanceSquared += Diff * Diff;
  97. }
  98. return DistanceSquared <= EpsilonSquared_;
  99. }
  100. private:
  101. InstructionBenchmarkClustering(
  102. const std::vector<InstructionBenchmark> &Points,
  103. double AnalysisClusteringEpsilonSquared);
  104. Error validateAndSetup();
  105. void clusterizeDbScan(size_t MinPts);
  106. void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo,
  107. const MCInstrInfo &InstrInfo);
  108. // Stabilization is only needed if dbscan was used to clusterize.
  109. void stabilize(unsigned NumOpcodes);
  110. void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;
  111. bool areAllNeighbours(ArrayRef<size_t> Pts) const;
  112. const std::vector<InstructionBenchmark> &Points_;
  113. const double AnalysisClusteringEpsilonSquared_;
  114. int NumDimensions_ = 0;
  115. // ClusterForPoint_[P] is the cluster id for Points[P].
  116. std::vector<ClusterId> ClusterIdForPoint_;
  117. std::vector<Cluster> Clusters_;
  118. Cluster NoiseCluster_;
  119. Cluster ErrorCluster_;
  120. };
  121. class SchedClassClusterCentroid {
  122. public:
  123. const std::vector<PerInstructionStats> &getStats() const {
  124. return Representative;
  125. }
  126. std::vector<BenchmarkMeasure> getAsPoint() const;
  127. void addPoint(ArrayRef<BenchmarkMeasure> Point);
  128. bool validate(InstructionBenchmark::ModeE Mode) const;
  129. private:
  130. // Measurement stats for the points in the SchedClassCluster.
  131. std::vector<PerInstructionStats> Representative;
  132. };
  133. } // namespace exegesis
  134. } // namespace llvm
  135. #endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H