//===-- Clustering.h --------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// Utilities to compute benchmark result clusters. /// //===----------------------------------------------------------------------===// #ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H #define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H #include "BenchmarkResult.h" #include "llvm/Support/Error.h" #include #include namespace llvm { namespace exegesis { class InstructionBenchmarkClustering { public: enum ModeE { Dbscan, Naive }; // Clusters `Points` using DBSCAN with the given parameters. See the cc file // for more explanations on the algorithm. static Expected create(const std::vector &Points, ModeE Mode, size_t DbscanMinPts, double AnalysisClusteringEpsilon, const MCSubtargetInfo *SubtargetInfo = nullptr, const MCInstrInfo *InstrInfo = nullptr); class ClusterId { public: static ClusterId noise() { return ClusterId(kNoise); } static ClusterId error() { return ClusterId(kError); } static ClusterId makeValid(size_t Id, bool IsUnstable = false) { return ClusterId(Id, IsUnstable); } static ClusterId makeValidUnstable(size_t Id) { return makeValid(Id, /*IsUnstable=*/true); } ClusterId() : Id_(kUndef), IsUnstable_(false) {} // Compare id's, ignoring the 'unstability' bit. bool operator==(const ClusterId &O) const { return Id_ == O.Id_; } bool operator<(const ClusterId &O) const { return Id_ < O.Id_; } bool isValid() const { return Id_ <= kMaxValid; } bool isUnstable() const { return IsUnstable_; } bool isNoise() const { return Id_ == kNoise; } bool isError() const { return Id_ == kError; } bool isUndef() const { return Id_ == kUndef; } // Precondition: isValid(). size_t getId() const { assert(isValid()); return Id_; } private: ClusterId(size_t Id, bool IsUnstable = false) : Id_(Id), IsUnstable_(IsUnstable) {} static constexpr const size_t kMaxValid = (std::numeric_limits::max() >> 1) - 4; static constexpr const size_t kNoise = kMaxValid + 1; static constexpr const size_t kError = kMaxValid + 2; static constexpr const size_t kUndef = kMaxValid + 3; size_t Id_ : (std::numeric_limits::digits - 1); size_t IsUnstable_ : 1; }; static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field."); struct Cluster { Cluster() = delete; explicit Cluster(const ClusterId &Id) : Id(Id) {} const ClusterId Id; // Indices of benchmarks within the cluster. std::vector PointIndices; }; ClusterId getClusterIdForPoint(size_t P) const { return ClusterIdForPoint_[P]; } const std::vector &getPoints() const { return Points_; } const Cluster &getCluster(ClusterId Id) const { assert(!Id.isUndef() && "unlabeled cluster"); if (Id.isNoise()) { return NoiseCluster_; } if (Id.isError()) { return ErrorCluster_; } return Clusters_[Id.getId()]; } const std::vector &getValidClusters() const { return Clusters_; } // Returns true if the given point is within a distance Epsilon of each other. bool isNeighbour(const std::vector &P, const std::vector &Q, const double EpsilonSquared_) const { double DistanceSquared = 0.0; for (size_t I = 0, E = P.size(); I < E; ++I) { const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue; DistanceSquared += Diff * Diff; } return DistanceSquared <= EpsilonSquared_; } private: InstructionBenchmarkClustering( const std::vector &Points, double AnalysisClusteringEpsilonSquared); Error validateAndSetup(); void clusterizeDbScan(size_t MinPts); void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo, const MCInstrInfo &InstrInfo); // Stabilization is only needed if dbscan was used to clusterize. void stabilize(unsigned NumOpcodes); void rangeQuery(size_t Q, std::vector &Scratchpad) const; bool areAllNeighbours(ArrayRef Pts) const; const std::vector &Points_; const double AnalysisClusteringEpsilonSquared_; int NumDimensions_ = 0; // ClusterForPoint_[P] is the cluster id for Points[P]. std::vector ClusterIdForPoint_; std::vector Clusters_; Cluster NoiseCluster_; Cluster ErrorCluster_; }; class SchedClassClusterCentroid { public: const std::vector &getStats() const { return Representative; } std::vector getAsPoint() const; void addPoint(ArrayRef Point); bool validate(InstructionBenchmark::ModeE Mode) const; private: // Measurement stats for the points in the SchedClassCluster. std::vector Representative; }; } // namespace exegesis } // namespace llvm #endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H