1 use crate::cdsl::ast::{Apply, Expr, Literal, VarPool};
2 use crate::cdsl::encodings::{Encoding, EncodingBuilder};
3 use crate::cdsl::instructions::{
4 Bindable, BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry,
5 };
6 use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes};
7 use crate::cdsl::settings::SettingGroup;
8
9 use crate::shared::types::Bool::B1;
10 use crate::shared::types::Float::{F32, F64};
11 use crate::shared::types::Int::{I16, I32, I64, I8};
12 use crate::shared::types::Reference::{R32, R64};
13 use crate::shared::Definitions as SharedDefinitions;
14
15 use super::recipes::RecipeGroup;
16
17 pub(crate) struct PerCpuModeEncodings<'defs> {
18 pub inst_pred_reg: InstructionPredicateRegistry,
19 pub enc32: Vec<Encoding>,
20 pub enc64: Vec<Encoding>,
21 recipes: &'defs Recipes,
22 }
23
24 impl<'defs> PerCpuModeEncodings<'defs> {
new(recipes: &'defs Recipes) -> Self25 fn new(recipes: &'defs Recipes) -> Self {
26 Self {
27 inst_pred_reg: InstructionPredicateRegistry::new(),
28 enc32: Vec::new(),
29 enc64: Vec::new(),
30 recipes,
31 }
32 }
enc( &self, inst: impl Into<InstSpec>, recipe: EncodingRecipeNumber, bits: u16, ) -> EncodingBuilder33 fn enc(
34 &self,
35 inst: impl Into<InstSpec>,
36 recipe: EncodingRecipeNumber,
37 bits: u16,
38 ) -> EncodingBuilder {
39 EncodingBuilder::new(inst.into(), recipe, bits)
40 }
add32(&mut self, encoding: EncodingBuilder)41 fn add32(&mut self, encoding: EncodingBuilder) {
42 self.enc32
43 .push(encoding.build(self.recipes, &mut self.inst_pred_reg));
44 }
add64(&mut self, encoding: EncodingBuilder)45 fn add64(&mut self, encoding: EncodingBuilder) {
46 self.enc64
47 .push(encoding.build(self.recipes, &mut self.inst_pred_reg));
48 }
49 }
50
51 // The low 7 bits of a RISC-V instruction is the base opcode. All 32-bit instructions have 11 as
52 // the two low bits, with bits 6:2 determining the base opcode.
53 //
54 // Encbits for the 32-bit recipes are opcode[6:2] | (funct3 << 5) | ...
55 // The functions below encode the encbits.
56
load_bits(funct3: u16) -> u1657 fn load_bits(funct3: u16) -> u16 {
58 assert!(funct3 <= 0b111);
59 funct3 << 5
60 }
61
store_bits(funct3: u16) -> u1662 fn store_bits(funct3: u16) -> u16 {
63 assert!(funct3 <= 0b111);
64 0b01000 | (funct3 << 5)
65 }
66
branch_bits(funct3: u16) -> u1667 fn branch_bits(funct3: u16) -> u16 {
68 assert!(funct3 <= 0b111);
69 0b11000 | (funct3 << 5)
70 }
71
jalr_bits() -> u1672 fn jalr_bits() -> u16 {
73 // This was previously accepting an argument funct3 of 3 bits and used the following formula:
74 //0b11001 | (funct3 << 5)
75 0b11001
76 }
77
jal_bits() -> u1678 fn jal_bits() -> u16 {
79 0b11011
80 }
81
opimm_bits(funct3: u16, funct7: u16) -> u1682 fn opimm_bits(funct3: u16, funct7: u16) -> u16 {
83 assert!(funct3 <= 0b111);
84 0b00100 | (funct3 << 5) | (funct7 << 8)
85 }
86
opimm32_bits(funct3: u16, funct7: u16) -> u1687 fn opimm32_bits(funct3: u16, funct7: u16) -> u16 {
88 assert!(funct3 <= 0b111);
89 0b00110 | (funct3 << 5) | (funct7 << 8)
90 }
91
op_bits(funct3: u16, funct7: u16) -> u1692 fn op_bits(funct3: u16, funct7: u16) -> u16 {
93 assert!(funct3 <= 0b111);
94 assert!(funct7 <= 0b111_1111);
95 0b01100 | (funct3 << 5) | (funct7 << 8)
96 }
97
op32_bits(funct3: u16, funct7: u16) -> u1698 fn op32_bits(funct3: u16, funct7: u16) -> u16 {
99 assert!(funct3 <= 0b111);
100 assert!(funct7 <= 0b111_1111);
101 0b01110 | (funct3 << 5) | (funct7 << 8)
102 }
103
lui_bits() -> u16104 fn lui_bits() -> u16 {
105 0b01101
106 }
107
define<'defs>( shared_defs: &'defs SharedDefinitions, isa_settings: &SettingGroup, recipes: &'defs RecipeGroup, ) -> PerCpuModeEncodings<'defs>108 pub(crate) fn define<'defs>(
109 shared_defs: &'defs SharedDefinitions,
110 isa_settings: &SettingGroup,
111 recipes: &'defs RecipeGroup,
112 ) -> PerCpuModeEncodings<'defs> {
113 // Instructions shorthands.
114 let shared = &shared_defs.instructions;
115
116 let band = shared.by_name("band");
117 let band_imm = shared.by_name("band_imm");
118 let bor = shared.by_name("bor");
119 let bor_imm = shared.by_name("bor_imm");
120 let br_icmp = shared.by_name("br_icmp");
121 let brz = shared.by_name("brz");
122 let brnz = shared.by_name("brnz");
123 let bxor = shared.by_name("bxor");
124 let bxor_imm = shared.by_name("bxor_imm");
125 let call = shared.by_name("call");
126 let call_indirect = shared.by_name("call_indirect");
127 let copy = shared.by_name("copy");
128 let copy_nop = shared.by_name("copy_nop");
129 let copy_to_ssa = shared.by_name("copy_to_ssa");
130 let fill = shared.by_name("fill");
131 let fill_nop = shared.by_name("fill_nop");
132 let iadd = shared.by_name("iadd");
133 let iadd_imm = shared.by_name("iadd_imm");
134 let iconst = shared.by_name("iconst");
135 let icmp = shared.by_name("icmp");
136 let icmp_imm = shared.by_name("icmp_imm");
137 let imul = shared.by_name("imul");
138 let ishl = shared.by_name("ishl");
139 let ishl_imm = shared.by_name("ishl_imm");
140 let isub = shared.by_name("isub");
141 let jump = shared.by_name("jump");
142 let regmove = shared.by_name("regmove");
143 let spill = shared.by_name("spill");
144 let sshr = shared.by_name("sshr");
145 let sshr_imm = shared.by_name("sshr_imm");
146 let ushr = shared.by_name("ushr");
147 let ushr_imm = shared.by_name("ushr_imm");
148 let return_ = shared.by_name("return");
149
150 // Recipes shorthands, prefixed with r_.
151 let r_copytossa = recipes.by_name("copytossa");
152 let r_fillnull = recipes.by_name("fillnull");
153 let r_icall = recipes.by_name("Icall");
154 let r_icopy = recipes.by_name("Icopy");
155 let r_ii = recipes.by_name("Ii");
156 let r_iicmp = recipes.by_name("Iicmp");
157 let r_iret = recipes.by_name("Iret");
158 let r_irmov = recipes.by_name("Irmov");
159 let r_iz = recipes.by_name("Iz");
160 let r_gp_sp = recipes.by_name("GPsp");
161 let r_gp_fi = recipes.by_name("GPfi");
162 let r_r = recipes.by_name("R");
163 let r_ricmp = recipes.by_name("Ricmp");
164 let r_rshamt = recipes.by_name("Rshamt");
165 let r_sb = recipes.by_name("SB");
166 let r_sb_zero = recipes.by_name("SBzero");
167 let r_stacknull = recipes.by_name("stacknull");
168 let r_u = recipes.by_name("U");
169 let r_uj = recipes.by_name("UJ");
170 let r_uj_call = recipes.by_name("UJcall");
171
172 // Predicates shorthands.
173 let use_m = isa_settings.predicate_by_name("use_m");
174
175 // Definitions.
176 let mut e = PerCpuModeEncodings::new(&recipes.recipes);
177
178 // Basic arithmetic binary instructions are encoded in an R-type instruction.
179 for &(inst, inst_imm, f3, f7) in &[
180 (iadd, Some(iadd_imm), 0b000, 0b000_0000),
181 (isub, None, 0b000, 0b010_0000),
182 (bxor, Some(bxor_imm), 0b100, 0b000_0000),
183 (bor, Some(bor_imm), 0b110, 0b000_0000),
184 (band, Some(band_imm), 0b111, 0b000_0000),
185 ] {
186 e.add32(e.enc(inst.bind(I32), r_r, op_bits(f3, f7)));
187 e.add64(e.enc(inst.bind(I64), r_r, op_bits(f3, f7)));
188
189 // Immediate versions for add/xor/or/and.
190 if let Some(inst_imm) = inst_imm {
191 e.add32(e.enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0)));
192 e.add64(e.enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0)));
193 }
194 }
195
196 // 32-bit ops in RV64.
197 e.add64(e.enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b000_0000)));
198 e.add64(e.enc(isub.bind(I32), r_r, op32_bits(0b000, 0b010_0000)));
199 // There are no andiw/oriw/xoriw variations.
200 e.add64(e.enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0)));
201
202 // Use iadd_imm with %x0 to materialize constants.
203 e.add32(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
204 e.add64(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
205 e.add64(e.enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0)));
206
207 // Dynamic shifts have the same masking semantics as the clif base instructions.
208 for &(inst, inst_imm, f3, f7) in &[
209 (ishl, ishl_imm, 0b1, 0b0),
210 (ushr, ushr_imm, 0b101, 0b0),
211 (sshr, sshr_imm, 0b101, 0b10_0000),
212 ] {
213 e.add32(e.enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7)));
214 e.add64(e.enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7)));
215 e.add64(e.enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7)));
216 // Allow i32 shift amounts in 64-bit shifts.
217 e.add64(e.enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7)));
218 e.add64(e.enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7)));
219
220 // Immediate shifts.
221 e.add32(e.enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7)));
222 e.add64(e.enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7)));
223 e.add64(e.enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7)));
224 }
225
226 // Signed and unsigned integer 'less than'. There are no 'w' variants for comparing 32-bit
227 // numbers in RV64.
228 {
229 let mut var_pool = VarPool::new();
230
231 // Helper that creates an instruction predicate for an instruction in the icmp family.
232 let mut icmp_instp = |bound_inst: &BoundInstruction,
233 intcc_field: &'static str|
234 -> InstructionPredicateNode {
235 let x = var_pool.create("x");
236 let y = var_pool.create("y");
237 let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field);
238 Apply::new(
239 bound_inst.clone().into(),
240 vec![Expr::Literal(cc), Expr::Var(x), Expr::Var(y)],
241 )
242 .inst_predicate(&var_pool)
243 .unwrap()
244 };
245
246 let icmp_i32 = icmp.bind(I32);
247 let icmp_i64 = icmp.bind(I64);
248 e.add32(
249 e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b000_0000))
250 .inst_predicate(icmp_instp(&icmp_i32, "slt")),
251 );
252 e.add64(
253 e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b000_0000))
254 .inst_predicate(icmp_instp(&icmp_i64, "slt")),
255 );
256
257 e.add32(
258 e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b000_0000))
259 .inst_predicate(icmp_instp(&icmp_i32, "ult")),
260 );
261 e.add64(
262 e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b000_0000))
263 .inst_predicate(icmp_instp(&icmp_i64, "ult")),
264 );
265
266 // Immediate variants.
267 let icmp_i32 = icmp_imm.bind(I32);
268 let icmp_i64 = icmp_imm.bind(I64);
269 e.add32(
270 e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0))
271 .inst_predicate(icmp_instp(&icmp_i32, "slt")),
272 );
273 e.add64(
274 e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0))
275 .inst_predicate(icmp_instp(&icmp_i64, "slt")),
276 );
277
278 e.add32(
279 e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0))
280 .inst_predicate(icmp_instp(&icmp_i32, "ult")),
281 );
282 e.add64(
283 e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0))
284 .inst_predicate(icmp_instp(&icmp_i64, "ult")),
285 );
286 }
287
288 // Integer constants with the low 12 bits clear are materialized by lui.
289 e.add32(e.enc(iconst.bind(I32), r_u, lui_bits()));
290 e.add64(e.enc(iconst.bind(I32), r_u, lui_bits()));
291 e.add64(e.enc(iconst.bind(I64), r_u, lui_bits()));
292
293 // "M" Standard Extension for Integer Multiplication and Division.
294 // Gated by the `use_m` flag.
295 e.add32(
296 e.enc(imul.bind(I32), r_r, op_bits(0b000, 0b0000_0001))
297 .isa_predicate(use_m),
298 );
299 e.add64(
300 e.enc(imul.bind(I64), r_r, op_bits(0b000, 0b0000_0001))
301 .isa_predicate(use_m),
302 );
303 e.add64(
304 e.enc(imul.bind(I32), r_r, op32_bits(0b000, 0b0000_0001))
305 .isa_predicate(use_m),
306 );
307
308 // Control flow.
309
310 // Unconditional branches.
311 e.add32(e.enc(jump, r_uj, jal_bits()));
312 e.add64(e.enc(jump, r_uj, jal_bits()));
313 e.add32(e.enc(call, r_uj_call, jal_bits()));
314 e.add64(e.enc(call, r_uj_call, jal_bits()));
315
316 // Conditional branches.
317 {
318 let mut var_pool = VarPool::new();
319
320 // Helper that creates an instruction predicate for an instruction in the icmp family.
321 let mut br_icmp_instp = |bound_inst: &BoundInstruction,
322 intcc_field: &'static str|
323 -> InstructionPredicateNode {
324 let x = var_pool.create("x");
325 let y = var_pool.create("y");
326 let dest = var_pool.create("dest");
327 let args = var_pool.create("args");
328 let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field);
329 Apply::new(
330 bound_inst.clone().into(),
331 vec![
332 Expr::Literal(cc),
333 Expr::Var(x),
334 Expr::Var(y),
335 Expr::Var(dest),
336 Expr::Var(args),
337 ],
338 )
339 .inst_predicate(&var_pool)
340 .unwrap()
341 };
342
343 let br_icmp_i32 = br_icmp.bind(I32);
344 let br_icmp_i64 = br_icmp.bind(I64);
345 for &(cond, f3) in &[
346 ("eq", 0b000),
347 ("ne", 0b001),
348 ("slt", 0b100),
349 ("sge", 0b101),
350 ("ult", 0b110),
351 ("uge", 0b111),
352 ] {
353 e.add32(
354 e.enc(br_icmp_i32.clone(), r_sb, branch_bits(f3))
355 .inst_predicate(br_icmp_instp(&br_icmp_i32, cond)),
356 );
357 e.add64(
358 e.enc(br_icmp_i64.clone(), r_sb, branch_bits(f3))
359 .inst_predicate(br_icmp_instp(&br_icmp_i64, cond)),
360 );
361 }
362 }
363
364 for &(inst, f3) in &[(brz, 0b000), (brnz, 0b001)] {
365 e.add32(e.enc(inst.bind(I32), r_sb_zero, branch_bits(f3)));
366 e.add64(e.enc(inst.bind(I64), r_sb_zero, branch_bits(f3)));
367 e.add32(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
368 e.add64(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
369 }
370
371 // Returns are a special case of jalr_bits using %x1 to hold the return address.
372 // The return address is provided by a special-purpose `link` return value that
373 // is added by legalize_signature().
374 e.add32(e.enc(return_, r_iret, jalr_bits()));
375 e.add64(e.enc(return_, r_iret, jalr_bits()));
376 e.add32(e.enc(call_indirect.bind(I32), r_icall, jalr_bits()));
377 e.add64(e.enc(call_indirect.bind(I64), r_icall, jalr_bits()));
378
379 // Spill and fill.
380 e.add32(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
381 e.add64(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
382 e.add64(e.enc(spill.bind(I64), r_gp_sp, store_bits(0b011)));
383 e.add32(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
384 e.add64(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
385 e.add64(e.enc(fill.bind(I64), r_gp_fi, load_bits(0b011)));
386
387 // No-op fills, created by late-stage redundant-fill removal.
388 for &ty in &[I64, I32] {
389 e.add64(e.enc(fill_nop.bind(ty), r_fillnull, 0));
390 e.add32(e.enc(fill_nop.bind(ty), r_fillnull, 0));
391 }
392 e.add64(e.enc(fill_nop.bind(B1), r_fillnull, 0));
393 e.add32(e.enc(fill_nop.bind(B1), r_fillnull, 0));
394
395 // Register copies.
396 e.add32(e.enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0)));
397 e.add64(e.enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0)));
398 e.add64(e.enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0)));
399
400 e.add32(e.enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0)));
401 e.add64(e.enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0)));
402 e.add64(e.enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0)));
403
404 e.add32(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
405 e.add64(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
406 e.add32(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));
407 e.add64(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));
408
409 // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
410 // into a no-op.
411 // The same encoding is generated for both the 64- and 32-bit architectures.
412 for &ty in &[I64, I32, I16, I8] {
413 e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0));
414 e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0));
415 }
416 for &ty in &[F64, F32] {
417 e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0));
418 e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0));
419 }
420
421 // Copy-to-SSA
422 e.add32(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm_bits(0b000, 0)));
423 e.add64(e.enc(copy_to_ssa.bind(I64), r_copytossa, opimm_bits(0b000, 0)));
424 e.add64(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm32_bits(0b000, 0)));
425 e.add32(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
426 e.add64(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
427 e.add32(e.enc(copy_to_ssa.bind(R32), r_copytossa, opimm_bits(0b000, 0)));
428 e.add64(e.enc(copy_to_ssa.bind(R64), r_copytossa, opimm_bits(0b000, 0)));
429
430 e
431 }
432