1 //===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Latency.h"
10 
11 #include "Assembler.h"
12 #include "BenchmarkRunner.h"
13 #include "MCInstrDescView.h"
14 #include "PerfHelper.h"
15 #include "Target.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstBuilder.h"
19 #include "llvm/Support/FormatVariadic.h"
20 
21 namespace llvm {
22 namespace exegesis {
23 
24 struct ExecutionClass {
25   ExecutionMode Mask;
26   const char *Description;
27 } static const kExecutionClasses[] = {
28     {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS |
29          ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS,
30      "Repeating a single implicitly serial instruction"},
31     {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS,
32      "Repeating a single explicitly serial instruction"},
33     {ExecutionMode::SERIAL_VIA_MEMORY_INSTR |
34          ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR,
35      "Repeating two instructions"},
36 };
37 
38 static constexpr size_t kMaxAliasingInstructions = 10;
39 
40 static std::vector<const Instruction *>
computeAliasingInstructions(const LLVMState & State,const Instruction * Instr,size_t MaxAliasingInstructions,const BitVector & ForbiddenRegisters)41 computeAliasingInstructions(const LLVMState &State, const Instruction *Instr,
42                             size_t MaxAliasingInstructions,
43                             const BitVector &ForbiddenRegisters) {
44   // Randomly iterate the set of instructions.
45   std::vector<unsigned> Opcodes;
46   Opcodes.resize(State.getInstrInfo().getNumOpcodes());
47   std::iota(Opcodes.begin(), Opcodes.end(), 0U);
48   std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
49 
50   std::vector<const Instruction *> AliasingInstructions;
51   for (const unsigned OtherOpcode : Opcodes) {
52     if (OtherOpcode == Instr->Description.getOpcode())
53       continue;
54     const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode);
55     if (OtherInstr.hasMemoryOperands())
56       continue;
57     if (Instr->hasAliasingRegistersThrough(OtherInstr, ForbiddenRegisters))
58       AliasingInstructions.push_back(&OtherInstr);
59     if (AliasingInstructions.size() >= MaxAliasingInstructions)
60       break;
61   }
62   return AliasingInstructions;
63 }
64 
getExecutionModes(const Instruction & Instr,const BitVector & ForbiddenRegisters)65 static ExecutionMode getExecutionModes(const Instruction &Instr,
66                                        const BitVector &ForbiddenRegisters) {
67   ExecutionMode EM = ExecutionMode::UNKNOWN;
68   if (Instr.hasAliasingImplicitRegisters())
69     EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
70   if (Instr.hasTiedRegisters())
71     EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS;
72   if (Instr.hasMemoryOperands())
73     EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
74   else {
75     if (Instr.hasAliasingRegisters(ForbiddenRegisters))
76       EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
77     if (Instr.hasOneUseOrOneDef())
78       EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
79   }
80   return EM;
81 }
82 
appendCodeTemplates(const LLVMState & State,const Instruction * Instr,const BitVector & ForbiddenRegisters,ExecutionMode ExecutionModeBit,StringRef ExecutionClassDescription,std::vector<CodeTemplate> & CodeTemplates)83 static void appendCodeTemplates(const LLVMState &State,
84                                 const Instruction *Instr,
85                                 const BitVector &ForbiddenRegisters,
86                                 ExecutionMode ExecutionModeBit,
87                                 StringRef ExecutionClassDescription,
88                                 std::vector<CodeTemplate> &CodeTemplates) {
89   assert(isEnumValue(ExecutionModeBit) && "Bit must be a power of two");
90   switch (ExecutionModeBit) {
91   case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS:
92     // Nothing to do, the instruction is always serial.
93     LLVM_FALLTHROUGH;
94   case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: {
95     // Picking whatever value for the tied variable will make the instruction
96     // serial.
97     CodeTemplate CT;
98     CT.Execution = ExecutionModeBit;
99     CT.Info = ExecutionClassDescription;
100     CT.Instructions.push_back(Instr);
101     CodeTemplates.push_back(std::move(CT));
102     return;
103   }
104   case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: {
105     // Select back-to-back memory instruction.
106     // TODO: Implement me.
107     return;
108   }
109   case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: {
110     // Making the execution of this instruction serial by selecting one def
111     // register to alias with one use register.
112     const AliasingConfigurations SelfAliasing(*Instr, *Instr);
113     assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() &&
114            "Instr must alias itself explicitly");
115     InstructionTemplate IT(Instr);
116     // This is a self aliasing instruction so defs and uses are from the same
117     // instance, hence twice IT in the following call.
118     setRandomAliasing(SelfAliasing, IT, IT);
119     CodeTemplate CT;
120     CT.Execution = ExecutionModeBit;
121     CT.Info = ExecutionClassDescription;
122     CT.Instructions.push_back(std::move(IT));
123     CodeTemplates.push_back(std::move(CT));
124     return;
125   }
126   case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
127     // Select back-to-back non-memory instruction.
128     for (const auto *OtherInstr : computeAliasingInstructions(
129              State, Instr, kMaxAliasingInstructions, ForbiddenRegisters)) {
130       const AliasingConfigurations Forward(*Instr, *OtherInstr);
131       const AliasingConfigurations Back(*OtherInstr, *Instr);
132       InstructionTemplate ThisIT(Instr);
133       InstructionTemplate OtherIT(OtherInstr);
134       if (!Forward.hasImplicitAliasing())
135         setRandomAliasing(Forward, ThisIT, OtherIT);
136       if (!Back.hasImplicitAliasing())
137         setRandomAliasing(Back, OtherIT, ThisIT);
138       CodeTemplate CT;
139       CT.Execution = ExecutionModeBit;
140       CT.Info = ExecutionClassDescription;
141       CT.Instructions.push_back(std::move(ThisIT));
142       CT.Instructions.push_back(std::move(OtherIT));
143       CodeTemplates.push_back(std::move(CT));
144     }
145     return;
146   }
147   default:
148     llvm_unreachable("Unhandled enum value");
149   }
150 }
151 
152 LatencySnippetGenerator::~LatencySnippetGenerator() = default;
153 
154 Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction & Instr,const BitVector & ForbiddenRegisters) const155 LatencySnippetGenerator::generateCodeTemplates(
156     const Instruction &Instr, const BitVector &ForbiddenRegisters) const {
157   std::vector<CodeTemplate> Results;
158   const ExecutionMode EM = getExecutionModes(Instr, ForbiddenRegisters);
159   for (const auto EC : kExecutionClasses) {
160     for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
161       appendCodeTemplates(State, &Instr, ForbiddenRegisters, ExecutionModeBit,
162                           EC.Description, Results);
163     if (!Results.empty())
164       break;
165   }
166   if (Results.empty())
167     return make_error<Failure>(
168         "No strategy found to make the execution serial");
169   return std::move(Results);
170 }
171 
LatencyBenchmarkRunner(const LLVMState & State,InstructionBenchmark::ModeE Mode)172 LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State,
173                                                InstructionBenchmark::ModeE Mode)
174     : BenchmarkRunner(State, Mode) {
175   assert((Mode == InstructionBenchmark::Latency ||
176           Mode == InstructionBenchmark::InverseThroughput) &&
177          "invalid mode");
178 }
179 
180 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
181 
runMeasurements(const FunctionExecutor & Executor) const182 Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements(
183     const FunctionExecutor &Executor) const {
184   // Cycle measurements include some overhead from the kernel. Repeat the
185   // measure several times and take the minimum value.
186   constexpr const int NumMeasurements = 30;
187   int64_t MinValue = std::numeric_limits<int64_t>::max();
188   const char *CounterName = State.getPfmCounters().CycleCounter;
189   for (size_t I = 0; I < NumMeasurements; ++I) {
190     auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
191     if (!ExpectedCounterValue)
192       return ExpectedCounterValue.takeError();
193     if (*ExpectedCounterValue < MinValue)
194       MinValue = *ExpectedCounterValue;
195   }
196   std::vector<BenchmarkMeasure> Result;
197   switch (Mode) {
198   case InstructionBenchmark::Latency:
199     Result = {BenchmarkMeasure::Create("latency", MinValue)};
200     break;
201   case InstructionBenchmark::InverseThroughput:
202     Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)};
203     break;
204   default:
205     break;
206   }
207   return std::move(Result);
208 }
209 
210 } // namespace exegesis
211 } // namespace llvm
212