1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Analysis.h"
11 #include "BenchmarkResult.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/Support/FormatVariadic.h"
15 #include <unordered_set>
16 #include <vector>
17 
18 namespace exegesis {
19 
20 static const char kCsvSep = ',';
21 
22 namespace {
23 
24 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
25 
26 template <EscapeTag Tag>
27 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
28 
29 template <>
30 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
31   if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
32     OS << S;
33   } else {
34     // Needs escaping.
35     OS << '"';
36     for (const char C : S) {
37       if (C == '"')
38         OS << "\"\"";
39       else
40         OS << C;
41     }
42     OS << '"';
43   }
44 }
45 
46 template <>
47 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
48   for (const char C : S) {
49     if (C == '<')
50       OS << "&lt;";
51     else if (C == '>')
52       OS << "&gt;";
53     else if (C == '&')
54       OS << "&amp;";
55     else
56       OS << C;
57   }
58 }
59 
60 template <>
61 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS,
62                                      const llvm::StringRef S) {
63   for (const char C : S) {
64     if (C == '"')
65       OS << "\\\"";
66     else
67       OS << C;
68   }
69 }
70 
71 } // namespace
72 
73 template <EscapeTag Tag>
74 static void
75 writeClusterId(llvm::raw_ostream &OS,
76                const InstructionBenchmarkClustering::ClusterId &CID) {
77   if (CID.isNoise())
78     writeEscaped<Tag>(OS, "[noise]");
79   else if (CID.isError())
80     writeEscaped<Tag>(OS, "[error]");
81   else
82     OS << CID.getId();
83 }
84 
85 template <EscapeTag Tag>
86 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
87   writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str());
88 }
89 
90 template <typename EscapeTag, EscapeTag Tag>
91 void Analysis::writeSnippet(llvm::raw_ostream &OS,
92                             llvm::ArrayRef<uint8_t> Bytes,
93                             const char *Separator) const {
94   llvm::SmallVector<std::string, 3> Lines;
95   // Parse the asm snippet and print it.
96   while (!Bytes.empty()) {
97     llvm::MCInst MI;
98     uint64_t MISize = 0;
99     if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(),
100                                  llvm::nulls())) {
101       writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
102       writeEscaped<Tag>(OS, Separator);
103       writeEscaped<Tag>(OS, "[error decoding asm snippet]");
104       return;
105     }
106     Lines.emplace_back();
107     std::string &Line = Lines.back();
108     llvm::raw_string_ostream OSS(Line);
109     InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_);
110     Bytes = Bytes.drop_front(MISize);
111     OSS.flush();
112     Line = llvm::StringRef(Line).trim().str();
113   }
114   writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
115 }
116 
117 // Prints a row representing an instruction, along with scheduling info and
118 // point coordinates (measurements).
119 void Analysis::printInstructionRowCsv(const size_t PointId,
120                                       llvm::raw_ostream &OS) const {
121   const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
122   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
123   OS << kCsvSep;
124   writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
125   OS << kCsvSep;
126   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
127   OS << kCsvSep;
128   assert(!Point.Key.Instructions.empty());
129   // FIXME: Resolve variant classes.
130   const unsigned SchedClassId =
131       InstrInfo_->get(Point.Key.Instructions[0].getOpcode()).getSchedClass();
132 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
133   const auto &SchedModel = SubtargetInfo_->getSchedModel();
134   const llvm::MCSchedClassDesc *const SCDesc =
135       SchedModel.getSchedClassDesc(SchedClassId);
136   writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
137 #else
138   OS << SchedClassId;
139 #endif
140   for (const auto &Measurement : Point.Measurements) {
141     OS << kCsvSep;
142     writeMeasurementValue<kEscapeCsv>(OS, Measurement.Value);
143   }
144   OS << "\n";
145 }
146 
147 Analysis::Analysis(const llvm::Target &Target,
148                    const InstructionBenchmarkClustering &Clustering)
149     : Clustering_(Clustering) {
150   if (Clustering.getPoints().empty())
151     return;
152 
153   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
154   InstrInfo_.reset(Target.createMCInstrInfo());
155   RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple));
156   AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple));
157   SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
158                                                     FirstPoint.CpuName, ""));
159   InstPrinter_.reset(Target.createMCInstPrinter(
160       llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_,
161       *InstrInfo_, *RegInfo_));
162 
163   Context_ = llvm::make_unique<llvm::MCContext>(AsmInfo_.get(), RegInfo_.get(),
164                                                 &ObjectFileInfo_);
165   Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_));
166   assert(Disasm_ && "cannot create MCDisassembler. missing call to "
167                     "InitializeXXXTargetDisassembler ?");
168 }
169 
170 template <>
171 llvm::Error
172 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
173   if (Clustering_.getPoints().empty())
174     return llvm::Error::success();
175 
176   // Write the header.
177   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
178      << kCsvSep << "sched_class";
179   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
180     OS << kCsvSep;
181     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
182   }
183   OS << "\n";
184 
185   // Write the points.
186   const auto &Clusters = Clustering_.getValidClusters();
187   for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
188     for (const size_t PointId : Clusters[I].PointIndices) {
189       printInstructionRowCsv(PointId, OS);
190     }
191     OS << "\n\n";
192   }
193   return llvm::Error::success();
194 }
195 
196 std::unordered_map<unsigned, std::vector<size_t>>
197 Analysis::makePointsPerSchedClass() const {
198   std::unordered_map<unsigned, std::vector<size_t>> PointsPerSchedClass;
199   const auto &Points = Clustering_.getPoints();
200   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
201     const InstructionBenchmark &Point = Points[PointId];
202     if (!Point.Error.empty())
203       continue;
204     assert(!Point.Key.Instructions.empty());
205     const auto Opcode = Point.Key.Instructions[0].getOpcode();
206     // FIXME: Resolve variant classes.
207     PointsPerSchedClass[InstrInfo_->get(Opcode).getSchedClass()].push_back(
208         PointId);
209   }
210   return PointsPerSchedClass;
211 }
212 
213 // Uops repeat the same opcode over again. Just show this opcode and show the
214 // whole snippet only on hover.
215 static void writeUopsSnippetHtml(llvm::raw_ostream &OS,
216                                  const std::vector<llvm::MCInst> &Instructions,
217                                  const llvm::MCInstrInfo &InstrInfo) {
218   if (Instructions.empty())
219     return;
220   writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
221   if (Instructions.size() > 1)
222     OS << " (x" << Instructions.size() << ")";
223 }
224 
225 // Latency tries to find a serial path. Just show the opcode path and show the
226 // whole snippet only on hover.
227 static void
228 writeLatencySnippetHtml(llvm::raw_ostream &OS,
229                         const std::vector<llvm::MCInst> &Instructions,
230                         const llvm::MCInstrInfo &InstrInfo) {
231   bool First = true;
232   for (const llvm::MCInst &Instr : Instructions) {
233     if (First)
234       First = false;
235     else
236       OS << " &rarr; ";
237     writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
238   }
239 }
240 
241 void Analysis::printSchedClassClustersHtml(
242     const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC,
243     llvm::raw_ostream &OS) const {
244   const auto &Points = Clustering_.getPoints();
245   OS << "<table class=\"sched-class-clusters\">";
246   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
247   assert(!Clusters.empty());
248   for (const auto &Measurement :
249        Points[Clusters[0].getPointIds()[0]].Measurements) {
250     OS << "<th>";
251     if (Measurement.DebugString.empty())
252       writeEscaped<kEscapeHtml>(OS, Measurement.Key);
253     else
254       writeEscaped<kEscapeHtml>(OS, Measurement.DebugString);
255     OS << "</th>";
256   }
257   OS << "</tr>";
258   for (const SchedClassCluster &Cluster : Clusters) {
259     OS << "<tr class=\""
260        << (Cluster.measurementsMatch(*SubtargetInfo_, SC, Clustering_)
261                ? "good-cluster"
262                : "bad-cluster")
263        << "\"><td>";
264     writeClusterId<kEscapeHtml>(OS, Cluster.id());
265     OS << "</td><td><ul>";
266     for (const size_t PointId : Cluster.getPointIds()) {
267       const auto &Point = Points[PointId];
268       OS << "<li><span class=\"mono\" title=\"";
269       writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet,
270                                                  "\n");
271       OS << "\">";
272       switch (Point.Mode) {
273       case InstructionBenchmark::Latency:
274         writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
275         break;
276       case InstructionBenchmark::Uops:
277         writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
278         break;
279       default:
280         llvm_unreachable("invalid mode");
281       }
282       OS << "</span> <span class=\"mono\">";
283       writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
284       OS << "</span></li>";
285     }
286     OS << "</ul></td>";
287     for (const auto &Stats : Cluster.getRepresentative()) {
288       OS << "<td class=\"measurement\">";
289       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
290       OS << "<br><span class=\"minmax\">[";
291       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
292       OS << ";";
293       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
294       OS << "]</span></td>";
295     }
296     OS << "</tr>";
297   }
298   OS << "</table>";
299 }
300 
301 // Return the non-redundant list of WriteProcRes used by the given sched class.
302 // The scheduling model for LLVM is such that each instruction has a certain
303 // number of uops which consume resources which are described by WriteProcRes
304 // entries. Each entry describe how many cycles are spent on a specific ProcRes
305 // kind.
306 // For example, an instruction might have 3 uOps, one dispatching on P0
307 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
308 // Note that LLVM additionally denormalizes resource consumption to include
309 // usage of super resources by subresources. So in practice if there exists a
310 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
311 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
312 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
313 // denote implied usage of superresources by subresources:
314 //            P0      P06    P016
315 //     uOp1    1      (1)     (1)
316 //     uOp2            1      (1)
317 //     uOp3            1      (1)
318 //     =============================
319 //             1       3       3
320 // Eventually we end up with three entries for the WriteProcRes of the
321 // instruction:
322 //    {ProcResIdx=1,  Cycles=1}  // P0
323 //    {ProcResIdx=7,  Cycles=3}  // P06
324 //    {ProcResIdx=10, Cycles=3}  // P016
325 //
326 // Note that in this case, P016 does not contribute any cycles, so it would
327 // be removed by this function.
328 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
329 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
330 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
331                             const llvm::MCSubtargetInfo &STI) {
332   llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
333   const auto &SM = STI.getSchedModel();
334   const unsigned NumProcRes = SM.getNumProcResourceKinds();
335 
336   // This assumes that the ProcResDescs are sorted in topological order, which
337   // is guaranteed by the tablegen backend.
338   llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
339   for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
340                   *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
341        WPR != WPREnd; ++WPR) {
342     const llvm::MCProcResourceDesc *const ProcResDesc =
343         SM.getProcResource(WPR->ProcResourceIdx);
344     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
345       // This is a ProcResUnit.
346       Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
347       ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
348     } else {
349       // This is a ProcResGroup. First see if it contributes any cycles or if
350       // it has cycles just from subunits.
351       float RemainingCycles = WPR->Cycles;
352       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
353            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
354            ++SubResIdx) {
355         RemainingCycles -= ProcResUnitUsage[*SubResIdx];
356       }
357       if (RemainingCycles < 0.01f) {
358         // The ProcResGroup contributes no cycles of its own.
359         continue;
360       }
361       // The ProcResGroup contributes `RemainingCycles` cycles of its own.
362       Result.push_back({WPR->ProcResourceIdx,
363                         static_cast<uint16_t>(std::round(RemainingCycles))});
364       // Spread the remaining cycles over all subunits.
365       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
366            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
367            ++SubResIdx) {
368         ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
369       }
370     }
371   }
372   return Result;
373 }
374 
375 Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD,
376                                  const llvm::MCSubtargetInfo &STI)
377     : SCDesc(&SD),
378       NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)),
379       IdealizedProcResPressure(computeIdealizedProcResPressure(
380           STI.getSchedModel(), NonRedundantWriteProcRes)) {}
381 
382 void Analysis::SchedClassCluster::addPoint(
383     size_t PointId, const InstructionBenchmarkClustering &Clustering) {
384   PointIds.push_back(PointId);
385   const auto &Point = Clustering.getPoints()[PointId];
386   if (ClusterId.isUndef()) {
387     ClusterId = Clustering.getClusterIdForPoint(PointId);
388     Representative.resize(Point.Measurements.size());
389   }
390   for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
391     Representative[I].push(Point.Measurements[I]);
392   }
393   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
394 }
395 
396 bool Analysis::SchedClassCluster::measurementsMatch(
397     const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
398     const InstructionBenchmarkClustering &Clustering) const {
399   const size_t NumMeasurements = Representative.size();
400   std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
401   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
402   // Latency case.
403   assert(!Clustering.getPoints().empty());
404   const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
405   if (Mode == InstructionBenchmark::Latency) {
406     if (NumMeasurements != 1) {
407       llvm::errs()
408           << "invalid number of measurements in latency mode: expected 1, got "
409           << NumMeasurements << "\n";
410       return false;
411     }
412     // Find the latency.
413     SchedClassPoint[0].Value = 0.0;
414     for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) {
415       const llvm::MCWriteLatencyEntry *const WLE =
416           STI.getWriteLatencyEntry(SC.SCDesc, I);
417       SchedClassPoint[0].Value =
418           std::max<double>(SchedClassPoint[0].Value, WLE->Cycles);
419     }
420     ClusterCenterPoint[0].Value = Representative[0].avg();
421   } else if (Mode == InstructionBenchmark::Uops) {
422     for (int I = 0, E = Representative.size(); I < E; ++I) {
423       // Find the pressure on ProcResIdx `Key`.
424       uint16_t ProcResIdx = 0;
425       if (!llvm::to_integer(Representative[I].key(), ProcResIdx, 10)) {
426         llvm::errs() << "expected ProcResIdx key, got "
427                      << Representative[I].key() << "\n";
428         return false;
429       }
430       const auto ProcResPressureIt =
431           std::find_if(SC.IdealizedProcResPressure.begin(),
432                        SC.IdealizedProcResPressure.end(),
433                        [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
434                          return WPR.first == ProcResIdx;
435                        });
436       SchedClassPoint[I].Value =
437           ProcResPressureIt == SC.IdealizedProcResPressure.end()
438               ? 0.0
439               : ProcResPressureIt->second;
440       ClusterCenterPoint[I].Value = Representative[I].avg();
441     }
442   } else {
443     llvm::errs() << "unimplemented measurement matching for mode " << Mode
444                  << "\n";
445     return false;
446   }
447   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
448 }
449 
450 void Analysis::printSchedClassDescHtml(const SchedClass &SC,
451                                        llvm::raw_ostream &OS) const {
452   OS << "<table class=\"sched-class-desc\">";
453   OS << "<tr><th>Valid</th><th>Variant</th><th>uOps</th><th>Latency</"
454         "th><th>WriteProcRes</th><th title=\"This is the idealized unit "
455         "resource (port) pressure assuming ideal distribution\">Idealized "
456         "Resource Pressure</th></tr>";
457   if (SC.SCDesc->isValid()) {
458     const auto &SM = SubtargetInfo_->getSchedModel();
459     OS << "<tr><td>&#10004;</td>";
460     OS << "<td>" << (SC.SCDesc->isVariant() ? "&#10004;" : "&#10005;")
461        << "</td>";
462     OS << "<td>" << SC.SCDesc->NumMicroOps << "</td>";
463     // Latencies.
464     OS << "<td><ul>";
465     for (int I = 0, E = SC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
466       const auto *const Entry =
467           SubtargetInfo_->getWriteLatencyEntry(SC.SCDesc, I);
468       OS << "<li>" << Entry->Cycles;
469       if (SC.SCDesc->NumWriteLatencyEntries > 1) {
470         // Dismabiguate if more than 1 latency.
471         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
472       }
473       OS << "</li>";
474     }
475     OS << "</ul></td>";
476     // WriteProcRes.
477     OS << "<td><ul>";
478     for (const auto &WPR : SC.NonRedundantWriteProcRes) {
479       OS << "<li><span class=\"mono\">";
480       writeEscaped<kEscapeHtml>(OS,
481                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
482       OS << "</span>: " << WPR.Cycles << "</li>";
483     }
484     OS << "</ul></td>";
485     // Idealized port pressure.
486     OS << "<td><ul>";
487     for (const auto &Pressure : SC.IdealizedProcResPressure) {
488       OS << "<li><span class=\"mono\">";
489       writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
490                                         .getProcResource(Pressure.first)
491                                         ->Name);
492       OS << "</span>: ";
493       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
494       OS << "</li>";
495     }
496     OS << "</ul></td>";
497     OS << "</tr>";
498   } else {
499     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
500   }
501   OS << "</table>";
502 }
503 
504 static constexpr const char kHtmlHead[] = R"(
505 <head>
506 <title>llvm-exegesis Analysis Results</title>
507 <style>
508 body {
509   font-family: sans-serif
510 }
511 span.sched-class-name {
512   font-weight: bold;
513   font-family: monospace;
514 }
515 span.opcode {
516   font-family: monospace;
517 }
518 span.config {
519   font-family: monospace;
520 }
521 div.inconsistency {
522   margin-top: 50px;
523 }
524 table {
525   margin-left: 50px;
526   border-collapse: collapse;
527 }
528 table, table tr,td,th {
529   border: 1px solid #444;
530 }
531 table ul {
532   padding-left: 0px;
533   margin: 0px;
534   list-style-type: none;
535 }
536 table.sched-class-clusters td {
537   padding-left: 10px;
538   padding-right: 10px;
539   padding-top: 10px;
540   padding-bottom: 10px;
541 }
542 table.sched-class-desc td {
543   padding-left: 10px;
544   padding-right: 10px;
545   padding-top: 2px;
546   padding-bottom: 2px;
547 }
548 span.mono {
549   font-family: monospace;
550 }
551 td.measurement {
552   text-align: center;
553 }
554 tr.good-cluster td.measurement {
555   color: #292
556 }
557 tr.bad-cluster td.measurement {
558   color: #922
559 }
560 tr.good-cluster td.measurement span.minmax {
561   color: #888;
562 }
563 tr.bad-cluster td.measurement span.minmax {
564   color: #888;
565 }
566 </style>
567 </head>
568 )";
569 
570 template <>
571 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
572     llvm::raw_ostream &OS) const {
573   const auto &FirstPoint = Clustering_.getPoints()[0];
574   // Print the header.
575   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
576   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
577   OS << "<h3>Triple: <span class=\"mono\">";
578   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
579   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
580   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
581   OS << "</span></h3>";
582 
583   for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) {
584     const auto SchedClassId = SchedClassAndPoints.first;
585     const std::vector<size_t> &SchedClassPoints = SchedClassAndPoints.second;
586     const auto &SchedModel = SubtargetInfo_->getSchedModel();
587     const llvm::MCSchedClassDesc *const SCDesc =
588         SchedModel.getSchedClassDesc(SchedClassId);
589     if (!SCDesc)
590       continue;
591     const SchedClass SC(*SCDesc, *SubtargetInfo_);
592 
593     // Bucket sched class points into sched class clusters.
594     std::vector<SchedClassCluster> SchedClassClusters;
595     for (const size_t PointId : SchedClassPoints) {
596       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
597       if (!ClusterId.isValid())
598         continue; // Ignore noise and errors. FIXME: take noise into account ?
599       auto SchedClassClusterIt =
600           std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
601                        [ClusterId](const SchedClassCluster &C) {
602                          return C.id() == ClusterId;
603                        });
604       if (SchedClassClusterIt == SchedClassClusters.end()) {
605         SchedClassClusters.emplace_back();
606         SchedClassClusterIt = std::prev(SchedClassClusters.end());
607       }
608       SchedClassClusterIt->addPoint(PointId, Clustering_);
609     }
610 
611     // Print any scheduling class that has at least one cluster that does not
612     // match the checked-in data.
613     if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(),
614                     [this, &SC](const SchedClassCluster &C) {
615                       return C.measurementsMatch(*SubtargetInfo_, SC,
616                                                  Clustering_);
617                     }))
618       continue; // Nothing weird.
619 
620     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
621           "class=\"sched-class-name\">";
622 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
623     writeEscaped<kEscapeHtml>(OS, SCDesc->Name);
624 #else
625     OS << SchedClassId;
626 #endif
627     OS << "</span> contains instructions whose performance characteristics do"
628           " not match that of LLVM:</p>";
629     printSchedClassClustersHtml(SchedClassClusters, SC, OS);
630     OS << "<p>llvm SchedModel data:</p>";
631     printSchedClassDescHtml(SC, OS);
632     OS << "</div>";
633   }
634 
635   OS << "</body></html>";
636   return llvm::Error::success();
637 }
638 
639 // Distributes a pressure budget as evenly as possible on the provided subunits
640 // given the already existing port pressure distribution.
641 //
642 // The algorithm is as follows: while there is remaining pressure to
643 // distribute, find the subunits with minimal pressure, and distribute
644 // remaining pressure equally up to the pressure of the unit with
645 // second-to-minimal pressure.
646 // For example, let's assume we want to distribute 2*P1256
647 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
648 //     DensePressure =        P0   P1   P2   P3   P4   P5   P6   P7
649 //                           0.1  0.3  0.2  0.0  0.0  0.5  0.5  0.5
650 //     RemainingPressure = 2.0
651 // We sort the subunits by pressure:
652 //     Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
653 // We'll first start by the subunits with minimal pressure, which are at
654 // the beginning of the sorted array. In this example there is one (P2).
655 // The subunit with second-to-minimal pressure is the next one in the
656 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
657 // from the budget.
658 //     Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
659 //     RemainingPressure = 1.9
660 // We repeat this process: distribute 0.2 pressure on each of the minimal
661 // P2 and P1, decrease budget by 2*0.2:
662 //     Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
663 //     RemainingPressure = 1.5
664 // There are no second-to-minimal subunits so we just share the remaining
665 // budget (1.5 cycles) equally:
666 //     Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
667 //     RemainingPressure = 0.0
668 // We stop as there is no remaining budget to distribute.
669 void distributePressure(float RemainingPressure,
670                         llvm::SmallVector<uint16_t, 32> Subunits,
671                         llvm::SmallVector<float, 32> &DensePressure) {
672   // Find the number of subunits with minimal pressure (they are at the
673   // front).
674   llvm::sort(Subunits.begin(), Subunits.end(),
675              [&DensePressure](const uint16_t A, const uint16_t B) {
676                return DensePressure[A] < DensePressure[B];
677              });
678   const auto getPressureForSubunit = [&DensePressure,
679                                       &Subunits](size_t I) -> float & {
680     return DensePressure[Subunits[I]];
681   };
682   size_t NumMinimalSU = 1;
683   while (NumMinimalSU < Subunits.size() &&
684          getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
685     ++NumMinimalSU;
686   }
687   while (RemainingPressure > 0.0f) {
688     if (NumMinimalSU == Subunits.size()) {
689       // All units are minimal, just distribute evenly and be done.
690       for (size_t I = 0; I < NumMinimalSU; ++I) {
691         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
692       }
693       return;
694     }
695     // Distribute the remaining pressure equally.
696     const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
697     const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
698     assert(MinimalPressure < SecondToMinimalPressure);
699     const float Increment = SecondToMinimalPressure - MinimalPressure;
700     if (RemainingPressure <= NumMinimalSU * Increment) {
701       // There is not enough remaining pressure.
702       for (size_t I = 0; I < NumMinimalSU; ++I) {
703         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
704       }
705       return;
706     }
707     // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
708     for (size_t I = 0; I < NumMinimalSU; ++I) {
709       getPressureForSubunit(I) = SecondToMinimalPressure;
710       RemainingPressure -= SecondToMinimalPressure;
711     }
712     while (NumMinimalSU < Subunits.size() &&
713            getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
714       ++NumMinimalSU;
715     }
716   }
717 }
718 
719 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
720     const llvm::MCSchedModel &SM,
721     llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
722   // DensePressure[I] is the port pressure for Proc Resource I.
723   llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
724   llvm::sort(WPRS.begin(), WPRS.end(),
725              [](const llvm::MCWriteProcResEntry &A,
726                 const llvm::MCWriteProcResEntry &B) {
727                return A.ProcResourceIdx < B.ProcResourceIdx;
728              });
729   for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
730     // Get units for the entry.
731     const llvm::MCProcResourceDesc *const ProcResDesc =
732         SM.getProcResource(WPR.ProcResourceIdx);
733     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
734       // This is a ProcResUnit.
735       DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
736     } else {
737       // This is a ProcResGroup.
738       llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
739                                                ProcResDesc->SubUnitsIdxBegin +
740                                                    ProcResDesc->NumUnits);
741       distributePressure(WPR.Cycles, Subunits, DensePressure);
742     }
743   }
744   // Turn dense pressure into sparse pressure by removing zero entries.
745   std::vector<std::pair<uint16_t, float>> Pressure;
746   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
747     if (DensePressure[I] > 0.0f)
748       Pressure.emplace_back(I, DensePressure[I]);
749   }
750   return Pressure;
751 }
752 
753 } // namespace exegesis
754