1 //===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Latency.h"
10 
11 #include "Assembler.h"
12 #include "BenchmarkRunner.h"
13 #include "MCInstrDescView.h"
14 #include "PerfHelper.h"
15 #include "Target.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstBuilder.h"
19 #include "llvm/Support/FormatVariadic.h"
20 
21 namespace llvm {
22 namespace exegesis {
23 
24 struct ExecutionClass {
25   ExecutionMode Mask;
26   const char *Description;
27 } static const kExecutionClasses[] = {
28     {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS |
29          ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS,
30      "Repeating a single implicitly serial instruction"},
31     {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS,
32      "Repeating a single explicitly serial instruction"},
33     {ExecutionMode::SERIAL_VIA_MEMORY_INSTR |
34          ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR,
35      "Repeating two instructions"},
36 };
37 
38 static constexpr size_t kMaxAliasingInstructions = 10;
39 
40 static std::vector<Instruction>
computeAliasingInstructions(const LLVMState & State,const Instruction & Instr,size_t MaxAliasingInstructions)41 computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
42                             size_t MaxAliasingInstructions) {
43   // Randomly iterate the set of instructions.
44   std::vector<unsigned> Opcodes;
45   Opcodes.resize(State.getInstrInfo().getNumOpcodes());
46   std::iota(Opcodes.begin(), Opcodes.end(), 0U);
47   std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
48 
49   std::vector<Instruction> AliasingInstructions;
50   for (const unsigned OtherOpcode : Opcodes) {
51     if (OtherOpcode == Instr.Description->getOpcode())
52       continue;
53     const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode);
54     if (OtherInstr.hasMemoryOperands())
55       continue;
56     if (Instr.hasAliasingRegistersThrough(OtherInstr))
57       AliasingInstructions.push_back(std::move(OtherInstr));
58     if (AliasingInstructions.size() >= MaxAliasingInstructions)
59       break;
60   }
61   return AliasingInstructions;
62 }
63 
getExecutionModes(const Instruction & Instr)64 static ExecutionMode getExecutionModes(const Instruction &Instr) {
65   ExecutionMode EM = ExecutionMode::UNKNOWN;
66   if (Instr.hasAliasingImplicitRegisters())
67     EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
68   if (Instr.hasTiedRegisters())
69     EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS;
70   if (Instr.hasMemoryOperands())
71     EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
72   else {
73     if (Instr.hasAliasingRegisters())
74       EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
75     if (Instr.hasOneUseOrOneDef())
76       EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
77   }
78   return EM;
79 }
80 
appendCodeTemplates(const LLVMState & State,const Instruction & Instr,ExecutionMode ExecutionModeBit,llvm::StringRef ExecutionClassDescription,std::vector<CodeTemplate> & CodeTemplates)81 static void appendCodeTemplates(const LLVMState &State,
82                                 const Instruction &Instr,
83                                 ExecutionMode ExecutionModeBit,
84                                 llvm::StringRef ExecutionClassDescription,
85                                 std::vector<CodeTemplate> &CodeTemplates) {
86   assert(isEnumValue(ExecutionModeBit) && "Bit must be a power of two");
87   switch (ExecutionModeBit) {
88   case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS:
89     // Nothing to do, the instruction is always serial.
90     LLVM_FALLTHROUGH;
91   case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: {
92     // Picking whatever value for the tied variable will make the instruction
93     // serial.
94     CodeTemplate CT;
95     CT.Execution = ExecutionModeBit;
96     CT.Info = ExecutionClassDescription;
97     CT.Instructions.push_back(Instr);
98     CodeTemplates.push_back(std::move(CT));
99     return;
100   }
101   case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: {
102     // Select back-to-back memory instruction.
103     // TODO: Implement me.
104     return;
105   }
106   case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: {
107     // Making the execution of this instruction serial by selecting one def
108     // register to alias with one use register.
109     const AliasingConfigurations SelfAliasing(Instr, Instr);
110     assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() &&
111            "Instr must alias itself explicitly");
112     InstructionTemplate IT(Instr);
113     // This is a self aliasing instruction so defs and uses are from the same
114     // instance, hence twice IT in the following call.
115     setRandomAliasing(SelfAliasing, IT, IT);
116     CodeTemplate CT;
117     CT.Execution = ExecutionModeBit;
118     CT.Info = ExecutionClassDescription;
119     CT.Instructions.push_back(std::move(IT));
120     CodeTemplates.push_back(std::move(CT));
121     return;
122   }
123   case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
124     // Select back-to-back non-memory instruction.
125     for (const auto OtherInstr :
126          computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) {
127       const AliasingConfigurations Forward(Instr, OtherInstr);
128       const AliasingConfigurations Back(OtherInstr, Instr);
129       InstructionTemplate ThisIT(Instr);
130       InstructionTemplate OtherIT(OtherInstr);
131       if (!Forward.hasImplicitAliasing())
132         setRandomAliasing(Forward, ThisIT, OtherIT);
133       if (!Back.hasImplicitAliasing())
134         setRandomAliasing(Back, OtherIT, ThisIT);
135       CodeTemplate CT;
136       CT.Execution = ExecutionModeBit;
137       CT.Info = ExecutionClassDescription;
138       CT.Instructions.push_back(std::move(ThisIT));
139       CT.Instructions.push_back(std::move(OtherIT));
140       CodeTemplates.push_back(std::move(CT));
141     }
142     return;
143   }
144   default:
145     llvm_unreachable("Unhandled enum value");
146   }
147 }
148 
149 LatencySnippetGenerator::~LatencySnippetGenerator() = default;
150 
151 llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction & Instr) const152 LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
153   std::vector<CodeTemplate> Results;
154   const ExecutionMode EM = getExecutionModes(Instr);
155   for (const auto EC : kExecutionClasses) {
156     for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
157       appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description,
158                           Results);
159     if (!Results.empty())
160       break;
161   }
162   if (Results.empty())
163     return llvm::make_error<BenchmarkFailure>(
164         "No strategy found to make the execution serial");
165   return std::move(Results);
166 }
167 
LatencyBenchmarkRunner(const LLVMState & State,InstructionBenchmark::ModeE Mode)168 LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State,
169                                                InstructionBenchmark::ModeE Mode)
170     : BenchmarkRunner(State, Mode) {
171   assert((Mode == InstructionBenchmark::Latency ||
172           Mode == InstructionBenchmark::InverseThroughput) &&
173          "invalid mode");
174 }
175 
176 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
177 
178 llvm::Expected<std::vector<BenchmarkMeasure>>
runMeasurements(const FunctionExecutor & Executor) const179 LatencyBenchmarkRunner::runMeasurements(
180     const FunctionExecutor &Executor) const {
181   // Cycle measurements include some overhead from the kernel. Repeat the
182   // measure several times and take the minimum value.
183   constexpr const int NumMeasurements = 30;
184   int64_t MinValue = std::numeric_limits<int64_t>::max();
185   const char *CounterName = State.getPfmCounters().CycleCounter;
186   if (!CounterName)
187     llvm::report_fatal_error("sched model does not define a cycle counter");
188   for (size_t I = 0; I < NumMeasurements; ++I) {
189     auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
190     if (!ExpectedCounterValue)
191       return ExpectedCounterValue.takeError();
192     if (*ExpectedCounterValue < MinValue)
193       MinValue = *ExpectedCounterValue;
194   }
195   std::vector<BenchmarkMeasure> Result;
196   switch (Mode) {
197   case InstructionBenchmark::Latency:
198     Result = {BenchmarkMeasure::Create("latency", MinValue)};
199     break;
200   case InstructionBenchmark::InverseThroughput:
201     Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)};
202     break;
203   default:
204     break;
205   }
206   return std::move(Result);
207 }
208 
209 } // namespace exegesis
210 } // namespace llvm
211