1 use crate::cdsl::ast::{Apply, Expr, Literal, VarPool};
2 use crate::cdsl::encodings::{Encoding, EncodingBuilder};
3 use crate::cdsl::instructions::{
4     Bindable, BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry,
5 };
6 use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes};
7 use crate::cdsl::settings::SettingGroup;
8 
9 use crate::shared::types::Bool::B1;
10 use crate::shared::types::Float::{F32, F64};
11 use crate::shared::types::Int::{I16, I32, I64, I8};
12 use crate::shared::types::Reference::{R32, R64};
13 use crate::shared::Definitions as SharedDefinitions;
14 
15 use super::recipes::RecipeGroup;
16 
17 pub(crate) struct PerCpuModeEncodings<'defs> {
18     pub inst_pred_reg: InstructionPredicateRegistry,
19     pub enc32: Vec<Encoding>,
20     pub enc64: Vec<Encoding>,
21     recipes: &'defs Recipes,
22 }
23 
24 impl<'defs> PerCpuModeEncodings<'defs> {
new(recipes: &'defs Recipes) -> Self25     fn new(recipes: &'defs Recipes) -> Self {
26         Self {
27             inst_pred_reg: InstructionPredicateRegistry::new(),
28             enc32: Vec::new(),
29             enc64: Vec::new(),
30             recipes,
31         }
32     }
enc( &self, inst: impl Into<InstSpec>, recipe: EncodingRecipeNumber, bits: u16, ) -> EncodingBuilder33     fn enc(
34         &self,
35         inst: impl Into<InstSpec>,
36         recipe: EncodingRecipeNumber,
37         bits: u16,
38     ) -> EncodingBuilder {
39         EncodingBuilder::new(inst.into(), recipe, bits)
40     }
add32(&mut self, encoding: EncodingBuilder)41     fn add32(&mut self, encoding: EncodingBuilder) {
42         self.enc32
43             .push(encoding.build(self.recipes, &mut self.inst_pred_reg));
44     }
add64(&mut self, encoding: EncodingBuilder)45     fn add64(&mut self, encoding: EncodingBuilder) {
46         self.enc64
47             .push(encoding.build(self.recipes, &mut self.inst_pred_reg));
48     }
49 }
50 
51 // The low 7 bits of a RISC-V instruction is the base opcode. All 32-bit instructions have 11 as
52 // the two low bits, with bits 6:2 determining the base opcode.
53 //
54 // Encbits for the 32-bit recipes are opcode[6:2] | (funct3 << 5) | ...
55 // The functions below encode the encbits.
56 
load_bits(funct3: u16) -> u1657 fn load_bits(funct3: u16) -> u16 {
58     assert!(funct3 <= 0b111);
59     funct3 << 5
60 }
61 
store_bits(funct3: u16) -> u1662 fn store_bits(funct3: u16) -> u16 {
63     assert!(funct3 <= 0b111);
64     0b01000 | (funct3 << 5)
65 }
66 
branch_bits(funct3: u16) -> u1667 fn branch_bits(funct3: u16) -> u16 {
68     assert!(funct3 <= 0b111);
69     0b11000 | (funct3 << 5)
70 }
71 
jalr_bits() -> u1672 fn jalr_bits() -> u16 {
73     // This was previously accepting an argument funct3 of 3 bits and used the following formula:
74     //0b11001 | (funct3 << 5)
75     0b11001
76 }
77 
jal_bits() -> u1678 fn jal_bits() -> u16 {
79     0b11011
80 }
81 
opimm_bits(funct3: u16, funct7: u16) -> u1682 fn opimm_bits(funct3: u16, funct7: u16) -> u16 {
83     assert!(funct3 <= 0b111);
84     0b00100 | (funct3 << 5) | (funct7 << 8)
85 }
86 
opimm32_bits(funct3: u16, funct7: u16) -> u1687 fn opimm32_bits(funct3: u16, funct7: u16) -> u16 {
88     assert!(funct3 <= 0b111);
89     0b00110 | (funct3 << 5) | (funct7 << 8)
90 }
91 
op_bits(funct3: u16, funct7: u16) -> u1692 fn op_bits(funct3: u16, funct7: u16) -> u16 {
93     assert!(funct3 <= 0b111);
94     assert!(funct7 <= 0b111_1111);
95     0b01100 | (funct3 << 5) | (funct7 << 8)
96 }
97 
op32_bits(funct3: u16, funct7: u16) -> u1698 fn op32_bits(funct3: u16, funct7: u16) -> u16 {
99     assert!(funct3 <= 0b111);
100     assert!(funct7 <= 0b111_1111);
101     0b01110 | (funct3 << 5) | (funct7 << 8)
102 }
103 
lui_bits() -> u16104 fn lui_bits() -> u16 {
105     0b01101
106 }
107 
define<'defs>( shared_defs: &'defs SharedDefinitions, isa_settings: &SettingGroup, recipes: &'defs RecipeGroup, ) -> PerCpuModeEncodings<'defs>108 pub(crate) fn define<'defs>(
109     shared_defs: &'defs SharedDefinitions,
110     isa_settings: &SettingGroup,
111     recipes: &'defs RecipeGroup,
112 ) -> PerCpuModeEncodings<'defs> {
113     // Instructions shorthands.
114     let shared = &shared_defs.instructions;
115 
116     let band = shared.by_name("band");
117     let band_imm = shared.by_name("band_imm");
118     let bor = shared.by_name("bor");
119     let bor_imm = shared.by_name("bor_imm");
120     let br_icmp = shared.by_name("br_icmp");
121     let brz = shared.by_name("brz");
122     let brnz = shared.by_name("brnz");
123     let bxor = shared.by_name("bxor");
124     let bxor_imm = shared.by_name("bxor_imm");
125     let call = shared.by_name("call");
126     let call_indirect = shared.by_name("call_indirect");
127     let copy = shared.by_name("copy");
128     let copy_nop = shared.by_name("copy_nop");
129     let copy_to_ssa = shared.by_name("copy_to_ssa");
130     let fill = shared.by_name("fill");
131     let fill_nop = shared.by_name("fill_nop");
132     let iadd = shared.by_name("iadd");
133     let iadd_imm = shared.by_name("iadd_imm");
134     let iconst = shared.by_name("iconst");
135     let icmp = shared.by_name("icmp");
136     let icmp_imm = shared.by_name("icmp_imm");
137     let imul = shared.by_name("imul");
138     let ishl = shared.by_name("ishl");
139     let ishl_imm = shared.by_name("ishl_imm");
140     let isub = shared.by_name("isub");
141     let jump = shared.by_name("jump");
142     let regmove = shared.by_name("regmove");
143     let spill = shared.by_name("spill");
144     let sshr = shared.by_name("sshr");
145     let sshr_imm = shared.by_name("sshr_imm");
146     let ushr = shared.by_name("ushr");
147     let ushr_imm = shared.by_name("ushr_imm");
148     let return_ = shared.by_name("return");
149 
150     // Recipes shorthands, prefixed with r_.
151     let r_copytossa = recipes.by_name("copytossa");
152     let r_fillnull = recipes.by_name("fillnull");
153     let r_icall = recipes.by_name("Icall");
154     let r_icopy = recipes.by_name("Icopy");
155     let r_ii = recipes.by_name("Ii");
156     let r_iicmp = recipes.by_name("Iicmp");
157     let r_iret = recipes.by_name("Iret");
158     let r_irmov = recipes.by_name("Irmov");
159     let r_iz = recipes.by_name("Iz");
160     let r_gp_sp = recipes.by_name("GPsp");
161     let r_gp_fi = recipes.by_name("GPfi");
162     let r_r = recipes.by_name("R");
163     let r_ricmp = recipes.by_name("Ricmp");
164     let r_rshamt = recipes.by_name("Rshamt");
165     let r_sb = recipes.by_name("SB");
166     let r_sb_zero = recipes.by_name("SBzero");
167     let r_stacknull = recipes.by_name("stacknull");
168     let r_u = recipes.by_name("U");
169     let r_uj = recipes.by_name("UJ");
170     let r_uj_call = recipes.by_name("UJcall");
171 
172     // Predicates shorthands.
173     let use_m = isa_settings.predicate_by_name("use_m");
174 
175     // Definitions.
176     let mut e = PerCpuModeEncodings::new(&recipes.recipes);
177 
178     // Basic arithmetic binary instructions are encoded in an R-type instruction.
179     for &(inst, inst_imm, f3, f7) in &[
180         (iadd, Some(iadd_imm), 0b000, 0b000_0000),
181         (isub, None, 0b000, 0b010_0000),
182         (bxor, Some(bxor_imm), 0b100, 0b000_0000),
183         (bor, Some(bor_imm), 0b110, 0b000_0000),
184         (band, Some(band_imm), 0b111, 0b000_0000),
185     ] {
186         e.add32(e.enc(inst.bind(I32), r_r, op_bits(f3, f7)));
187         e.add64(e.enc(inst.bind(I64), r_r, op_bits(f3, f7)));
188 
189         // Immediate versions for add/xor/or/and.
190         if let Some(inst_imm) = inst_imm {
191             e.add32(e.enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0)));
192             e.add64(e.enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0)));
193         }
194     }
195 
196     // 32-bit ops in RV64.
197     e.add64(e.enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b000_0000)));
198     e.add64(e.enc(isub.bind(I32), r_r, op32_bits(0b000, 0b010_0000)));
199     // There are no andiw/oriw/xoriw variations.
200     e.add64(e.enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0)));
201 
202     // Use iadd_imm with %x0 to materialize constants.
203     e.add32(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
204     e.add64(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
205     e.add64(e.enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0)));
206 
207     // Dynamic shifts have the same masking semantics as the clif base instructions.
208     for &(inst, inst_imm, f3, f7) in &[
209         (ishl, ishl_imm, 0b1, 0b0),
210         (ushr, ushr_imm, 0b101, 0b0),
211         (sshr, sshr_imm, 0b101, 0b10_0000),
212     ] {
213         e.add32(e.enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7)));
214         e.add64(e.enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7)));
215         e.add64(e.enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7)));
216         // Allow i32 shift amounts in 64-bit shifts.
217         e.add64(e.enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7)));
218         e.add64(e.enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7)));
219 
220         // Immediate shifts.
221         e.add32(e.enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7)));
222         e.add64(e.enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7)));
223         e.add64(e.enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7)));
224     }
225 
226     // Signed and unsigned integer 'less than'. There are no 'w' variants for comparing 32-bit
227     // numbers in RV64.
228     {
229         let mut var_pool = VarPool::new();
230 
231         // Helper that creates an instruction predicate for an instruction in the icmp family.
232         let mut icmp_instp = |bound_inst: &BoundInstruction,
233                               intcc_field: &'static str|
234          -> InstructionPredicateNode {
235             let x = var_pool.create("x");
236             let y = var_pool.create("y");
237             let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field);
238             Apply::new(
239                 bound_inst.clone().into(),
240                 vec![Expr::Literal(cc), Expr::Var(x), Expr::Var(y)],
241             )
242             .inst_predicate(&var_pool)
243             .unwrap()
244         };
245 
246         let icmp_i32 = icmp.bind(I32);
247         let icmp_i64 = icmp.bind(I64);
248         e.add32(
249             e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b000_0000))
250                 .inst_predicate(icmp_instp(&icmp_i32, "slt")),
251         );
252         e.add64(
253             e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b000_0000))
254                 .inst_predicate(icmp_instp(&icmp_i64, "slt")),
255         );
256 
257         e.add32(
258             e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b000_0000))
259                 .inst_predicate(icmp_instp(&icmp_i32, "ult")),
260         );
261         e.add64(
262             e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b000_0000))
263                 .inst_predicate(icmp_instp(&icmp_i64, "ult")),
264         );
265 
266         // Immediate variants.
267         let icmp_i32 = icmp_imm.bind(I32);
268         let icmp_i64 = icmp_imm.bind(I64);
269         e.add32(
270             e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0))
271                 .inst_predicate(icmp_instp(&icmp_i32, "slt")),
272         );
273         e.add64(
274             e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0))
275                 .inst_predicate(icmp_instp(&icmp_i64, "slt")),
276         );
277 
278         e.add32(
279             e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0))
280                 .inst_predicate(icmp_instp(&icmp_i32, "ult")),
281         );
282         e.add64(
283             e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0))
284                 .inst_predicate(icmp_instp(&icmp_i64, "ult")),
285         );
286     }
287 
288     // Integer constants with the low 12 bits clear are materialized by lui.
289     e.add32(e.enc(iconst.bind(I32), r_u, lui_bits()));
290     e.add64(e.enc(iconst.bind(I32), r_u, lui_bits()));
291     e.add64(e.enc(iconst.bind(I64), r_u, lui_bits()));
292 
293     // "M" Standard Extension for Integer Multiplication and Division.
294     // Gated by the `use_m` flag.
295     e.add32(
296         e.enc(imul.bind(I32), r_r, op_bits(0b000, 0b0000_0001))
297             .isa_predicate(use_m),
298     );
299     e.add64(
300         e.enc(imul.bind(I64), r_r, op_bits(0b000, 0b0000_0001))
301             .isa_predicate(use_m),
302     );
303     e.add64(
304         e.enc(imul.bind(I32), r_r, op32_bits(0b000, 0b0000_0001))
305             .isa_predicate(use_m),
306     );
307 
308     // Control flow.
309 
310     // Unconditional branches.
311     e.add32(e.enc(jump, r_uj, jal_bits()));
312     e.add64(e.enc(jump, r_uj, jal_bits()));
313     e.add32(e.enc(call, r_uj_call, jal_bits()));
314     e.add64(e.enc(call, r_uj_call, jal_bits()));
315 
316     // Conditional branches.
317     {
318         let mut var_pool = VarPool::new();
319 
320         // Helper that creates an instruction predicate for an instruction in the icmp family.
321         let mut br_icmp_instp = |bound_inst: &BoundInstruction,
322                                  intcc_field: &'static str|
323          -> InstructionPredicateNode {
324             let x = var_pool.create("x");
325             let y = var_pool.create("y");
326             let dest = var_pool.create("dest");
327             let args = var_pool.create("args");
328             let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field);
329             Apply::new(
330                 bound_inst.clone().into(),
331                 vec![
332                     Expr::Literal(cc),
333                     Expr::Var(x),
334                     Expr::Var(y),
335                     Expr::Var(dest),
336                     Expr::Var(args),
337                 ],
338             )
339             .inst_predicate(&var_pool)
340             .unwrap()
341         };
342 
343         let br_icmp_i32 = br_icmp.bind(I32);
344         let br_icmp_i64 = br_icmp.bind(I64);
345         for &(cond, f3) in &[
346             ("eq", 0b000),
347             ("ne", 0b001),
348             ("slt", 0b100),
349             ("sge", 0b101),
350             ("ult", 0b110),
351             ("uge", 0b111),
352         ] {
353             e.add32(
354                 e.enc(br_icmp_i32.clone(), r_sb, branch_bits(f3))
355                     .inst_predicate(br_icmp_instp(&br_icmp_i32, cond)),
356             );
357             e.add64(
358                 e.enc(br_icmp_i64.clone(), r_sb, branch_bits(f3))
359                     .inst_predicate(br_icmp_instp(&br_icmp_i64, cond)),
360             );
361         }
362     }
363 
364     for &(inst, f3) in &[(brz, 0b000), (brnz, 0b001)] {
365         e.add32(e.enc(inst.bind(I32), r_sb_zero, branch_bits(f3)));
366         e.add64(e.enc(inst.bind(I64), r_sb_zero, branch_bits(f3)));
367         e.add32(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
368         e.add64(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
369     }
370 
371     // Returns are a special case of jalr_bits using %x1 to hold the return address.
372     // The return address is provided by a special-purpose `link` return value that
373     // is added by legalize_signature().
374     e.add32(e.enc(return_, r_iret, jalr_bits()));
375     e.add64(e.enc(return_, r_iret, jalr_bits()));
376     e.add32(e.enc(call_indirect.bind(I32), r_icall, jalr_bits()));
377     e.add64(e.enc(call_indirect.bind(I64), r_icall, jalr_bits()));
378 
379     // Spill and fill.
380     e.add32(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
381     e.add64(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
382     e.add64(e.enc(spill.bind(I64), r_gp_sp, store_bits(0b011)));
383     e.add32(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
384     e.add64(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
385     e.add64(e.enc(fill.bind(I64), r_gp_fi, load_bits(0b011)));
386 
387     // No-op fills, created by late-stage redundant-fill removal.
388     for &ty in &[I64, I32] {
389         e.add64(e.enc(fill_nop.bind(ty), r_fillnull, 0));
390         e.add32(e.enc(fill_nop.bind(ty), r_fillnull, 0));
391     }
392     e.add64(e.enc(fill_nop.bind(B1), r_fillnull, 0));
393     e.add32(e.enc(fill_nop.bind(B1), r_fillnull, 0));
394 
395     // Register copies.
396     e.add32(e.enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0)));
397     e.add64(e.enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0)));
398     e.add64(e.enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0)));
399 
400     e.add32(e.enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0)));
401     e.add64(e.enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0)));
402     e.add64(e.enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0)));
403 
404     e.add32(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
405     e.add64(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
406     e.add32(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));
407     e.add64(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));
408 
409     // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
410     // into a no-op.
411     // The same encoding is generated for both the 64- and 32-bit architectures.
412     for &ty in &[I64, I32, I16, I8] {
413         e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0));
414         e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0));
415     }
416     for &ty in &[F64, F32] {
417         e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0));
418         e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0));
419     }
420 
421     // Copy-to-SSA
422     e.add32(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm_bits(0b000, 0)));
423     e.add64(e.enc(copy_to_ssa.bind(I64), r_copytossa, opimm_bits(0b000, 0)));
424     e.add64(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm32_bits(0b000, 0)));
425     e.add32(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
426     e.add64(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
427     e.add32(e.enc(copy_to_ssa.bind(R32), r_copytossa, opimm_bits(0b000, 0)));
428     e.add64(e.enc(copy_to_ssa.bind(R64), r_copytossa, opimm_bits(0b000, 0)));
429 
430     e
431 }
432