1 //===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "Latency.h"
10
11 #include "Assembler.h"
12 #include "BenchmarkRunner.h"
13 #include "MCInstrDescView.h"
14 #include "PerfHelper.h"
15 #include "Target.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstBuilder.h"
19 #include "llvm/Support/FormatVariadic.h"
20
21 namespace llvm {
22 namespace exegesis {
23
24 struct ExecutionClass {
25 ExecutionMode Mask;
26 const char *Description;
27 } static const kExecutionClasses[] = {
28 {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS |
29 ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS,
30 "Repeating a single implicitly serial instruction"},
31 {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS,
32 "Repeating a single explicitly serial instruction"},
33 {ExecutionMode::SERIAL_VIA_MEMORY_INSTR |
34 ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR,
35 "Repeating two instructions"},
36 };
37
38 static constexpr size_t kMaxAliasingInstructions = 10;
39
40 static std::vector<Instruction>
computeAliasingInstructions(const LLVMState & State,const Instruction & Instr,size_t MaxAliasingInstructions)41 computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
42 size_t MaxAliasingInstructions) {
43 // Randomly iterate the set of instructions.
44 std::vector<unsigned> Opcodes;
45 Opcodes.resize(State.getInstrInfo().getNumOpcodes());
46 std::iota(Opcodes.begin(), Opcodes.end(), 0U);
47 std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
48
49 std::vector<Instruction> AliasingInstructions;
50 for (const unsigned OtherOpcode : Opcodes) {
51 if (OtherOpcode == Instr.Description->getOpcode())
52 continue;
53 const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode);
54 if (OtherInstr.hasMemoryOperands())
55 continue;
56 if (Instr.hasAliasingRegistersThrough(OtherInstr))
57 AliasingInstructions.push_back(std::move(OtherInstr));
58 if (AliasingInstructions.size() >= MaxAliasingInstructions)
59 break;
60 }
61 return AliasingInstructions;
62 }
63
getExecutionModes(const Instruction & Instr)64 static ExecutionMode getExecutionModes(const Instruction &Instr) {
65 ExecutionMode EM = ExecutionMode::UNKNOWN;
66 if (Instr.hasAliasingImplicitRegisters())
67 EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
68 if (Instr.hasTiedRegisters())
69 EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS;
70 if (Instr.hasMemoryOperands())
71 EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
72 else {
73 if (Instr.hasAliasingRegisters())
74 EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
75 if (Instr.hasOneUseOrOneDef())
76 EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
77 }
78 return EM;
79 }
80
appendCodeTemplates(const LLVMState & State,const Instruction & Instr,ExecutionMode ExecutionModeBit,llvm::StringRef ExecutionClassDescription,std::vector<CodeTemplate> & CodeTemplates)81 static void appendCodeTemplates(const LLVMState &State,
82 const Instruction &Instr,
83 ExecutionMode ExecutionModeBit,
84 llvm::StringRef ExecutionClassDescription,
85 std::vector<CodeTemplate> &CodeTemplates) {
86 assert(isEnumValue(ExecutionModeBit) && "Bit must be a power of two");
87 switch (ExecutionModeBit) {
88 case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS:
89 // Nothing to do, the instruction is always serial.
90 LLVM_FALLTHROUGH;
91 case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: {
92 // Picking whatever value for the tied variable will make the instruction
93 // serial.
94 CodeTemplate CT;
95 CT.Execution = ExecutionModeBit;
96 CT.Info = ExecutionClassDescription;
97 CT.Instructions.push_back(Instr);
98 CodeTemplates.push_back(std::move(CT));
99 return;
100 }
101 case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: {
102 // Select back-to-back memory instruction.
103 // TODO: Implement me.
104 return;
105 }
106 case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: {
107 // Making the execution of this instruction serial by selecting one def
108 // register to alias with one use register.
109 const AliasingConfigurations SelfAliasing(Instr, Instr);
110 assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() &&
111 "Instr must alias itself explicitly");
112 InstructionTemplate IT(Instr);
113 // This is a self aliasing instruction so defs and uses are from the same
114 // instance, hence twice IT in the following call.
115 setRandomAliasing(SelfAliasing, IT, IT);
116 CodeTemplate CT;
117 CT.Execution = ExecutionModeBit;
118 CT.Info = ExecutionClassDescription;
119 CT.Instructions.push_back(std::move(IT));
120 CodeTemplates.push_back(std::move(CT));
121 return;
122 }
123 case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
124 // Select back-to-back non-memory instruction.
125 for (const auto OtherInstr :
126 computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) {
127 const AliasingConfigurations Forward(Instr, OtherInstr);
128 const AliasingConfigurations Back(OtherInstr, Instr);
129 InstructionTemplate ThisIT(Instr);
130 InstructionTemplate OtherIT(OtherInstr);
131 if (!Forward.hasImplicitAliasing())
132 setRandomAliasing(Forward, ThisIT, OtherIT);
133 if (!Back.hasImplicitAliasing())
134 setRandomAliasing(Back, OtherIT, ThisIT);
135 CodeTemplate CT;
136 CT.Execution = ExecutionModeBit;
137 CT.Info = ExecutionClassDescription;
138 CT.Instructions.push_back(std::move(ThisIT));
139 CT.Instructions.push_back(std::move(OtherIT));
140 CodeTemplates.push_back(std::move(CT));
141 }
142 return;
143 }
144 default:
145 llvm_unreachable("Unhandled enum value");
146 }
147 }
148
149 LatencySnippetGenerator::~LatencySnippetGenerator() = default;
150
151 llvm::Expected<std::vector<CodeTemplate>>
generateCodeTemplates(const Instruction & Instr) const152 LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
153 std::vector<CodeTemplate> Results;
154 const ExecutionMode EM = getExecutionModes(Instr);
155 for (const auto EC : kExecutionClasses) {
156 for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
157 appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description,
158 Results);
159 if (!Results.empty())
160 break;
161 }
162 if (Results.empty())
163 return llvm::make_error<BenchmarkFailure>(
164 "No strategy found to make the execution serial");
165 return std::move(Results);
166 }
167
LatencyBenchmarkRunner(const LLVMState & State,InstructionBenchmark::ModeE Mode)168 LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State,
169 InstructionBenchmark::ModeE Mode)
170 : BenchmarkRunner(State, Mode) {
171 assert((Mode == InstructionBenchmark::Latency ||
172 Mode == InstructionBenchmark::InverseThroughput) &&
173 "invalid mode");
174 }
175
176 LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
177
178 llvm::Expected<std::vector<BenchmarkMeasure>>
runMeasurements(const FunctionExecutor & Executor) const179 LatencyBenchmarkRunner::runMeasurements(
180 const FunctionExecutor &Executor) const {
181 // Cycle measurements include some overhead from the kernel. Repeat the
182 // measure several times and take the minimum value.
183 constexpr const int NumMeasurements = 30;
184 int64_t MinValue = std::numeric_limits<int64_t>::max();
185 const char *CounterName = State.getPfmCounters().CycleCounter;
186 if (!CounterName)
187 llvm::report_fatal_error("sched model does not define a cycle counter");
188 for (size_t I = 0; I < NumMeasurements; ++I) {
189 auto ExpectedCounterValue = Executor.runAndMeasure(CounterName);
190 if (!ExpectedCounterValue)
191 return ExpectedCounterValue.takeError();
192 if (*ExpectedCounterValue < MinValue)
193 MinValue = *ExpectedCounterValue;
194 }
195 std::vector<BenchmarkMeasure> Result;
196 switch (Mode) {
197 case InstructionBenchmark::Latency:
198 Result = {BenchmarkMeasure::Create("latency", MinValue)};
199 break;
200 case InstructionBenchmark::InverseThroughput:
201 Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)};
202 break;
203 default:
204 break;
205 }
206 return std::move(Result);
207 }
208
209 } // namespace exegesis
210 } // namespace llvm
211