1 //===-- Clustering.h --------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Utilities to compute benchmark result clusters. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H 15 #define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H 16 17 #include "BenchmarkResult.h" 18 #include "llvm/ADT/Optional.h" 19 #include "llvm/Support/Error.h" 20 #include <limits> 21 #include <vector> 22 23 namespace llvm { 24 namespace exegesis { 25 26 class InstructionBenchmarkClustering { 27 public: 28 enum ModeE { Dbscan, Naive }; 29 30 // Clusters `Points` using DBSCAN with the given parameters. See the cc file 31 // for more explanations on the algorithm. 32 static Expected<InstructionBenchmarkClustering> 33 create(const std::vector<InstructionBenchmark> &Points, ModeE Mode, 34 size_t DbscanMinPts, double AnalysisClusteringEpsilon, 35 Optional<unsigned> NumOpcodes = None); 36 37 class ClusterId { 38 public: noise()39 static ClusterId noise() { return ClusterId(kNoise); } error()40 static ClusterId error() { return ClusterId(kError); } 41 static ClusterId makeValid(size_t Id, bool IsUnstable = false) { 42 return ClusterId(Id, IsUnstable); 43 } makeValidUnstable(size_t Id)44 static ClusterId makeValidUnstable(size_t Id) { 45 return makeValid(Id, /*IsUnstable=*/true); 46 } 47 ClusterId()48 ClusterId() : Id_(kUndef), IsUnstable_(false) {} 49 50 // Compare id's, ignoring the 'unstability' bit. 51 bool operator==(const ClusterId &O) const { return Id_ == O.Id_; } 52 bool operator<(const ClusterId &O) const { return Id_ < O.Id_; } 53 isValid()54 bool isValid() const { return Id_ <= kMaxValid; } isUnstable()55 bool isUnstable() const { return IsUnstable_; } isNoise()56 bool isNoise() const { return Id_ == kNoise; } isError()57 bool isError() const { return Id_ == kError; } isUndef()58 bool isUndef() const { return Id_ == kUndef; } 59 60 // Precondition: isValid(). getId()61 size_t getId() const { 62 assert(isValid()); 63 return Id_; 64 } 65 66 private: 67 ClusterId(size_t Id, bool IsUnstable = false) Id_(Id)68 : Id_(Id), IsUnstable_(IsUnstable) {} 69 70 static constexpr const size_t kMaxValid = 71 (std::numeric_limits<size_t>::max() >> 1) - 4; 72 static constexpr const size_t kNoise = kMaxValid + 1; 73 static constexpr const size_t kError = kMaxValid + 2; 74 static constexpr const size_t kUndef = kMaxValid + 3; 75 76 size_t Id_ : (std::numeric_limits<size_t>::digits - 1); 77 size_t IsUnstable_ : 1; 78 }; 79 static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field."); 80 81 struct Cluster { 82 Cluster() = delete; ClusterCluster83 explicit Cluster(const ClusterId &Id) : Id(Id) {} 84 85 const ClusterId Id; 86 // Indices of benchmarks within the cluster. 87 std::vector<int> PointIndices; 88 }; 89 getClusterIdForPoint(size_t P)90 ClusterId getClusterIdForPoint(size_t P) const { 91 return ClusterIdForPoint_[P]; 92 } 93 getPoints()94 const std::vector<InstructionBenchmark> &getPoints() const { return Points_; } 95 getCluster(ClusterId Id)96 const Cluster &getCluster(ClusterId Id) const { 97 assert(!Id.isUndef() && "unlabeled cluster"); 98 if (Id.isNoise()) { 99 return NoiseCluster_; 100 } 101 if (Id.isError()) { 102 return ErrorCluster_; 103 } 104 return Clusters_[Id.getId()]; 105 } 106 getValidClusters()107 const std::vector<Cluster> &getValidClusters() const { return Clusters_; } 108 109 // Returns true if the given point is within a distance Epsilon of each other. isNeighbour(const std::vector<BenchmarkMeasure> & P,const std::vector<BenchmarkMeasure> & Q,const double EpsilonSquared_)110 bool isNeighbour(const std::vector<BenchmarkMeasure> &P, 111 const std::vector<BenchmarkMeasure> &Q, 112 const double EpsilonSquared_) const { 113 double DistanceSquared = 0.0; 114 for (size_t I = 0, E = P.size(); I < E; ++I) { 115 const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue; 116 DistanceSquared += Diff * Diff; 117 } 118 return DistanceSquared <= EpsilonSquared_; 119 } 120 121 private: 122 InstructionBenchmarkClustering( 123 const std::vector<InstructionBenchmark> &Points, 124 double AnalysisClusteringEpsilonSquared); 125 126 Error validateAndSetup(); 127 128 void clusterizeDbScan(size_t MinPts); 129 void clusterizeNaive(unsigned NumOpcodes); 130 131 // Stabilization is only needed if dbscan was used to clusterize. 132 void stabilize(unsigned NumOpcodes); 133 134 void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const; 135 136 bool areAllNeighbours(ArrayRef<size_t> Pts) const; 137 138 const std::vector<InstructionBenchmark> &Points_; 139 const double AnalysisClusteringEpsilonSquared_; 140 141 int NumDimensions_ = 0; 142 // ClusterForPoint_[P] is the cluster id for Points[P]. 143 std::vector<ClusterId> ClusterIdForPoint_; 144 std::vector<Cluster> Clusters_; 145 Cluster NoiseCluster_; 146 Cluster ErrorCluster_; 147 }; 148 149 class SchedClassClusterCentroid { 150 public: getStats()151 const std::vector<PerInstructionStats> &getStats() const { 152 return Representative; 153 } 154 155 std::vector<BenchmarkMeasure> getAsPoint() const; 156 157 void addPoint(ArrayRef<BenchmarkMeasure> Point); 158 159 bool validate(InstructionBenchmark::ModeE Mode) const; 160 161 private: 162 // Measurement stats for the points in the SchedClassCluster. 163 std::vector<PerInstructionStats> Representative; 164 }; 165 166 } // namespace exegesis 167 } // namespace llvm 168 169 #endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H 170