1 //===-- Clustering.h --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Utilities to compute benchmark result clusters.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
15 #define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
16 
17 #include "BenchmarkResult.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/Support/Error.h"
20 #include <limits>
21 #include <vector>
22 
23 namespace llvm {
24 namespace exegesis {
25 
26 class InstructionBenchmarkClustering {
27 public:
28   enum ModeE { Dbscan, Naive };
29 
30   // Clusters `Points` using DBSCAN with the given parameters. See the cc file
31   // for more explanations on the algorithm.
32   static Expected<InstructionBenchmarkClustering>
33   create(const std::vector<InstructionBenchmark> &Points, ModeE Mode,
34          size_t DbscanMinPts, double AnalysisClusteringEpsilon,
35          Optional<unsigned> NumOpcodes = None);
36 
37   class ClusterId {
38   public:
noise()39     static ClusterId noise() { return ClusterId(kNoise); }
error()40     static ClusterId error() { return ClusterId(kError); }
41     static ClusterId makeValid(size_t Id, bool IsUnstable = false) {
42       return ClusterId(Id, IsUnstable);
43     }
makeValidUnstable(size_t Id)44     static ClusterId makeValidUnstable(size_t Id) {
45       return makeValid(Id, /*IsUnstable=*/true);
46     }
47 
ClusterId()48     ClusterId() : Id_(kUndef), IsUnstable_(false) {}
49 
50     // Compare id's, ignoring the 'unstability' bit.
51     bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
52     bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }
53 
isValid()54     bool isValid() const { return Id_ <= kMaxValid; }
isUnstable()55     bool isUnstable() const { return IsUnstable_; }
isNoise()56     bool isNoise() const { return Id_ == kNoise; }
isError()57     bool isError() const { return Id_ == kError; }
isUndef()58     bool isUndef() const { return Id_ == kUndef; }
59 
60     // Precondition: isValid().
getId()61     size_t getId() const {
62       assert(isValid());
63       return Id_;
64     }
65 
66   private:
67     ClusterId(size_t Id, bool IsUnstable = false)
Id_(Id)68         : Id_(Id), IsUnstable_(IsUnstable) {}
69 
70     static constexpr const size_t kMaxValid =
71         (std::numeric_limits<size_t>::max() >> 1) - 4;
72     static constexpr const size_t kNoise = kMaxValid + 1;
73     static constexpr const size_t kError = kMaxValid + 2;
74     static constexpr const size_t kUndef = kMaxValid + 3;
75 
76     size_t Id_ : (std::numeric_limits<size_t>::digits - 1);
77     size_t IsUnstable_ : 1;
78   };
79   static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field.");
80 
81   struct Cluster {
82     Cluster() = delete;
ClusterCluster83     explicit Cluster(const ClusterId &Id) : Id(Id) {}
84 
85     const ClusterId Id;
86     // Indices of benchmarks within the cluster.
87     std::vector<int> PointIndices;
88   };
89 
getClusterIdForPoint(size_t P)90   ClusterId getClusterIdForPoint(size_t P) const {
91     return ClusterIdForPoint_[P];
92   }
93 
getPoints()94   const std::vector<InstructionBenchmark> &getPoints() const { return Points_; }
95 
getCluster(ClusterId Id)96   const Cluster &getCluster(ClusterId Id) const {
97     assert(!Id.isUndef() && "unlabeled cluster");
98     if (Id.isNoise()) {
99       return NoiseCluster_;
100     }
101     if (Id.isError()) {
102       return ErrorCluster_;
103     }
104     return Clusters_[Id.getId()];
105   }
106 
getValidClusters()107   const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
108 
109   // Returns true if the given point is within a distance Epsilon of each other.
isNeighbour(const std::vector<BenchmarkMeasure> & P,const std::vector<BenchmarkMeasure> & Q,const double EpsilonSquared_)110   bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
111                    const std::vector<BenchmarkMeasure> &Q,
112                    const double EpsilonSquared_) const {
113     double DistanceSquared = 0.0;
114     for (size_t I = 0, E = P.size(); I < E; ++I) {
115       const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
116       DistanceSquared += Diff * Diff;
117     }
118     return DistanceSquared <= EpsilonSquared_;
119   }
120 
121 private:
122   InstructionBenchmarkClustering(
123       const std::vector<InstructionBenchmark> &Points,
124       double AnalysisClusteringEpsilonSquared);
125 
126   Error validateAndSetup();
127 
128   void clusterizeDbScan(size_t MinPts);
129   void clusterizeNaive(unsigned NumOpcodes);
130 
131   // Stabilization is only needed if dbscan was used to clusterize.
132   void stabilize(unsigned NumOpcodes);
133 
134   void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;
135 
136   bool areAllNeighbours(ArrayRef<size_t> Pts) const;
137 
138   const std::vector<InstructionBenchmark> &Points_;
139   const double AnalysisClusteringEpsilonSquared_;
140 
141   int NumDimensions_ = 0;
142   // ClusterForPoint_[P] is the cluster id for Points[P].
143   std::vector<ClusterId> ClusterIdForPoint_;
144   std::vector<Cluster> Clusters_;
145   Cluster NoiseCluster_;
146   Cluster ErrorCluster_;
147 };
148 
149 class SchedClassClusterCentroid {
150 public:
getStats()151   const std::vector<PerInstructionStats> &getStats() const {
152     return Representative;
153   }
154 
155   std::vector<BenchmarkMeasure> getAsPoint() const;
156 
157   void addPoint(ArrayRef<BenchmarkMeasure> Point);
158 
159   bool validate(InstructionBenchmark::ModeE Mode) const;
160 
161 private:
162   // Measurement stats for the points in the SchedClassCluster.
163   std::vector<PerInstructionStats> Representative;
164 };
165 
166 } // namespace exegesis
167 } // namespace llvm
168 
169 #endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
170