1 #![allow(non_snake_case)]
2 
3 use cranelift_codegen_shared::condcodes::IntCC;
4 use std::collections::HashMap;
5 
6 use crate::cdsl::encodings::{Encoding, EncodingBuilder};
7 use crate::cdsl::instructions::{
8     vector, Bindable, Immediate, InstSpec, Instruction, InstructionGroup, InstructionPredicate,
9     InstructionPredicateNode, InstructionPredicateRegistry,
10 };
11 use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
12 use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
13 use crate::cdsl::types::{LaneType, ValueType};
14 use crate::shared::types::Bool::{B1, B16, B32, B64, B8};
15 use crate::shared::types::Float::{F32, F64};
16 use crate::shared::types::Int::{I16, I32, I64, I8};
17 use crate::shared::types::Reference::{R32, R64};
18 use crate::shared::Definitions as SharedDefinitions;
19 
20 use crate::isa::x86::opcodes::*;
21 
22 use super::recipes::{RecipeGroup, Template};
23 use crate::cdsl::instructions::BindParameter::Any;
24 
25 pub(crate) struct PerCpuModeEncodings {
26     pub enc32: Vec<Encoding>,
27     pub enc64: Vec<Encoding>,
28     pub recipes: Recipes,
29     recipes_by_name: HashMap<String, EncodingRecipeNumber>,
30     pub inst_pred_reg: InstructionPredicateRegistry,
31 }
32 
33 impl PerCpuModeEncodings {
new() -> Self34     fn new() -> Self {
35         Self {
36             enc32: Vec::new(),
37             enc64: Vec::new(),
38             recipes: Recipes::new(),
39             recipes_by_name: HashMap::new(),
40             inst_pred_reg: InstructionPredicateRegistry::new(),
41         }
42     }
43 
add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber44     fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber {
45         if let Some(found_index) = self.recipes_by_name.get(&recipe.name) {
46             assert!(
47                 self.recipes[*found_index] == recipe,
48                 format!(
49                     "trying to insert different recipes with a same name ({})",
50                     recipe.name
51                 )
52             );
53             *found_index
54         } else {
55             let recipe_name = recipe.name.clone();
56             let index = self.recipes.push(recipe);
57             self.recipes_by_name.insert(recipe_name, index);
58             index
59         }
60     }
61 
make_encoding<T>( &mut self, inst: InstSpec, template: Template, builder_closure: T, ) -> Encoding where T: FnOnce(EncodingBuilder) -> EncodingBuilder,62     fn make_encoding<T>(
63         &mut self,
64         inst: InstSpec,
65         template: Template,
66         builder_closure: T,
67     ) -> Encoding
68     where
69         T: FnOnce(EncodingBuilder) -> EncodingBuilder,
70     {
71         let (recipe, bits) = template.build();
72         let recipe_number = self.add_recipe(recipe);
73         let builder = EncodingBuilder::new(inst, recipe_number, bits);
74         builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg)
75     }
76 
enc32_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T) where T: FnOnce(EncodingBuilder) -> EncodingBuilder,77     fn enc32_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
78     where
79         T: FnOnce(EncodingBuilder) -> EncodingBuilder,
80     {
81         let encoding = self.make_encoding(inst.into(), template, builder_closure);
82         self.enc32.push(encoding);
83     }
enc32(&mut self, inst: impl Into<InstSpec>, template: Template)84     fn enc32(&mut self, inst: impl Into<InstSpec>, template: Template) {
85         self.enc32_func(inst, template, |x| x);
86     }
enc32_isap( &mut self, inst: impl Into<InstSpec>, template: Template, isap: SettingPredicateNumber, )87     fn enc32_isap(
88         &mut self,
89         inst: impl Into<InstSpec>,
90         template: Template,
91         isap: SettingPredicateNumber,
92     ) {
93         self.enc32_func(inst, template, |encoding| encoding.isa_predicate(isap));
94     }
enc32_instp( &mut self, inst: impl Into<InstSpec>, template: Template, instp: InstructionPredicateNode, )95     fn enc32_instp(
96         &mut self,
97         inst: impl Into<InstSpec>,
98         template: Template,
99         instp: InstructionPredicateNode,
100     ) {
101         self.enc32_func(inst, template, |encoding| encoding.inst_predicate(instp));
102     }
enc32_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16)103     fn enc32_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
104         let recipe_number = self.add_recipe(recipe.clone());
105         let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
106         let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
107         self.enc32.push(encoding);
108     }
109 
enc64_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T) where T: FnOnce(EncodingBuilder) -> EncodingBuilder,110     fn enc64_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
111     where
112         T: FnOnce(EncodingBuilder) -> EncodingBuilder,
113     {
114         let encoding = self.make_encoding(inst.into(), template, builder_closure);
115         self.enc64.push(encoding);
116     }
enc64(&mut self, inst: impl Into<InstSpec>, template: Template)117     fn enc64(&mut self, inst: impl Into<InstSpec>, template: Template) {
118         self.enc64_func(inst, template, |x| x);
119     }
enc64_isap( &mut self, inst: impl Into<InstSpec>, template: Template, isap: SettingPredicateNumber, )120     fn enc64_isap(
121         &mut self,
122         inst: impl Into<InstSpec>,
123         template: Template,
124         isap: SettingPredicateNumber,
125     ) {
126         self.enc64_func(inst, template, |encoding| encoding.isa_predicate(isap));
127     }
enc64_instp( &mut self, inst: impl Into<InstSpec>, template: Template, instp: InstructionPredicateNode, )128     fn enc64_instp(
129         &mut self,
130         inst: impl Into<InstSpec>,
131         template: Template,
132         instp: InstructionPredicateNode,
133     ) {
134         self.enc64_func(inst, template, |encoding| encoding.inst_predicate(instp));
135     }
enc64_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16)136     fn enc64_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
137         let recipe_number = self.add_recipe(recipe.clone());
138         let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
139         let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
140         self.enc64.push(encoding);
141     }
142 
143     /// Adds I32/I64 encodings as appropriate for a typed instruction.
144     /// The REX prefix is always inferred at runtime.
145     ///
146     /// Add encodings for `inst.i32` to X86_32.
147     /// Add encodings for `inst.i32` to X86_64 with optional, inferred REX.
148     /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
enc_i32_i64(&mut self, inst: impl Into<InstSpec>, template: Template)149     fn enc_i32_i64(&mut self, inst: impl Into<InstSpec>, template: Template) {
150         let inst: InstSpec = inst.into();
151 
152         // I32 on x86: no REX prefix.
153         self.enc32(inst.bind(I32), template.infer_rex());
154 
155         // I32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers.
156         self.enc64(inst.bind(I32), template.infer_rex());
157 
158         // I64 on x86_64: REX.W set; REX.RXB determined at runtime from registers.
159         self.enc64(inst.bind(I64), template.rex().w());
160     }
161 
162     /// Adds I32/I64 encodings as appropriate for a typed instruction.
163     /// All variants of REX prefix are explicitly emitted, not inferred.
164     ///
165     /// Add encodings for `inst.i32` to X86_32.
166     /// Add encodings for `inst.i32` to X86_64 with and without REX.
167     /// Add encodings for `inst.i64` to X86_64 with and without REX.
enc_i32_i64_explicit_rex(&mut self, inst: impl Into<InstSpec>, template: Template)168     fn enc_i32_i64_explicit_rex(&mut self, inst: impl Into<InstSpec>, template: Template) {
169         let inst: InstSpec = inst.into();
170         self.enc32(inst.bind(I32), template.nonrex());
171 
172         // REX-less encoding must come after REX encoding so we don't use it by default.
173         // Otherwise reg-alloc would never use r8 and up.
174         self.enc64(inst.bind(I32), template.rex());
175         self.enc64(inst.bind(I32), template.nonrex());
176         self.enc64(inst.bind(I64), template.rex().w());
177     }
178 
179     /// Adds B32/B64 encodings as appropriate for a typed instruction.
180     /// The REX prefix is always inferred at runtime.
181     ///
182     /// Adds encoding for `inst.b32` to X86_32.
183     /// Adds encoding for `inst.b32` to X86_64 with optional, inferred REX.
184     /// Adds encoding for `inst.b64` to X86_64 with a REX.W prefix.
enc_b32_b64(&mut self, inst: impl Into<InstSpec>, template: Template)185     fn enc_b32_b64(&mut self, inst: impl Into<InstSpec>, template: Template) {
186         let inst: InstSpec = inst.into();
187 
188         // B32 on x86: no REX prefix.
189         self.enc32(inst.bind(B32), template.infer_rex());
190 
191         // B32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers.
192         self.enc64(inst.bind(B32), template.infer_rex());
193 
194         // B64 on x86_64: REX.W set; REX.RXB determined at runtime from registers.
195         self.enc64(inst.bind(B64), template.rex().w());
196     }
197 
198     /// Add encodings for `inst.i32` to X86_32.
199     /// Add encodings for `inst.i32` to X86_64 with a REX prefix.
200     /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
enc_i32_i64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template)201     fn enc_i32_i64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
202         let inst: InstSpec = inst.into();
203         self.enc32(inst.bind(I32), template.nonrex());
204         self.enc64(inst.bind(I32), template.rex());
205         self.enc64(inst.bind(I64), template.rex().w());
206     }
207 
208     /// Add encodings for `inst.i32` to X86_32.
209     /// Add encodings for `inst.i32` to X86_64 with and without REX.
210     /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
enc_i32_i64_instp( &mut self, inst: &Instruction, template: Template, instp: InstructionPredicateNode, )211     fn enc_i32_i64_instp(
212         &mut self,
213         inst: &Instruction,
214         template: Template,
215         instp: InstructionPredicateNode,
216     ) {
217         self.enc32_func(inst.bind(I32), template.nonrex(), |builder| {
218             builder.inst_predicate(instp.clone())
219         });
220 
221         // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
222         // reg-alloc would never use r8 and up.
223         self.enc64_func(inst.bind(I32), template.rex(), |builder| {
224             builder.inst_predicate(instp.clone())
225         });
226         self.enc64_func(inst.bind(I32), template.nonrex(), |builder| {
227             builder.inst_predicate(instp.clone())
228         });
229         self.enc64_func(inst.bind(I64), template.rex().w(), |builder| {
230             builder.inst_predicate(instp)
231         });
232     }
233 
234     /// Add encodings for `inst.r32` to X86_32.
235     /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix.
enc_r32_r64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template)236     fn enc_r32_r64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
237         let inst: InstSpec = inst.into();
238         self.enc32(inst.bind(R32), template.nonrex());
239         self.enc64(inst.bind(R64), template.rex().w());
240     }
241 
enc_r32_r64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template)242     fn enc_r32_r64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) {
243         self.enc32(inst.clone().bind(R32).bind(Any), template.clone());
244 
245         // REX-less encoding must come after REX encoding so we don't use it by
246         // default. Otherwise reg-alloc would never use r8 and up.
247         self.enc64(inst.clone().bind(R32).bind(Any), template.clone().rex());
248         self.enc64(inst.clone().bind(R32).bind(Any), template.clone());
249 
250         if w_bit {
251             self.enc64(inst.clone().bind(R64).bind(Any), template.rex().w());
252         } else {
253             self.enc64(inst.clone().bind(R64).bind(Any), template.clone().rex());
254             self.enc64(inst.clone().bind(R64).bind(Any), template);
255         }
256     }
257 
258     /// Add encodings for `inst` to X86_64 with and without a REX prefix.
enc_x86_64(&mut self, inst: impl Into<InstSpec> + Clone, template: Template)259     fn enc_x86_64(&mut self, inst: impl Into<InstSpec> + Clone, template: Template) {
260         // See above comment about the ordering of rex vs non-rex encodings.
261         self.enc64(inst.clone(), template.rex());
262         self.enc64(inst, template);
263     }
264 
265     /// Add encodings for `inst` to X86_64 with and without a REX prefix.
enc_x86_64_instp( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, instp: InstructionPredicateNode, )266     fn enc_x86_64_instp(
267         &mut self,
268         inst: impl Clone + Into<InstSpec>,
269         template: Template,
270         instp: InstructionPredicateNode,
271     ) {
272         // See above comment about the ordering of rex vs non-rex encodings.
273         self.enc64_func(inst.clone(), template.rex(), |builder| {
274             builder.inst_predicate(instp.clone())
275         });
276         self.enc64_func(inst, template, |builder| builder.inst_predicate(instp));
277     }
enc_x86_64_isap( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, isap: SettingPredicateNumber, )278     fn enc_x86_64_isap(
279         &mut self,
280         inst: impl Clone + Into<InstSpec>,
281         template: Template,
282         isap: SettingPredicateNumber,
283     ) {
284         // See above comment about the ordering of rex vs non-rex encodings.
285         self.enc64_isap(inst.clone(), template.rex(), isap);
286         self.enc64_isap(inst, template, isap);
287     }
288 
289     /// Add all three encodings for `inst`:
290     /// - X86_32
291     /// - X86_64 with and without the REX prefix.
enc_both(&mut self, inst: impl Clone + Into<InstSpec>, template: Template)292     fn enc_both(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
293         self.enc32(inst.clone(), template.clone());
294         self.enc_x86_64(inst, template);
295     }
enc_both_isap( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, isap: SettingPredicateNumber, )296     fn enc_both_isap(
297         &mut self,
298         inst: impl Clone + Into<InstSpec>,
299         template: Template,
300         isap: SettingPredicateNumber,
301     ) {
302         self.enc32_isap(inst.clone(), template.clone(), isap);
303         self.enc_x86_64_isap(inst, template, isap);
304     }
enc_both_instp( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, instp: InstructionPredicateNode, )305     fn enc_both_instp(
306         &mut self,
307         inst: impl Clone + Into<InstSpec>,
308         template: Template,
309         instp: InstructionPredicateNode,
310     ) {
311         self.enc32_instp(inst.clone(), template.clone(), instp.clone());
312         self.enc_x86_64_instp(inst, template, instp);
313     }
314 
315     /// Add two encodings for `inst`:
316     /// - X86_32, no REX prefix, since this is not valid in 32-bit mode.
317     /// - X86_64, dynamically infer the REX prefix.
enc_both_inferred(&mut self, inst: impl Clone + Into<InstSpec>, template: Template)318     fn enc_both_inferred(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
319         self.enc32(inst.clone(), template.clone());
320         self.enc64(inst, template.infer_rex());
321     }
enc_both_inferred_maybe_isap( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, isap: Option<SettingPredicateNumber>, )322     fn enc_both_inferred_maybe_isap(
323         &mut self,
324         inst: impl Clone + Into<InstSpec>,
325         template: Template,
326         isap: Option<SettingPredicateNumber>,
327     ) {
328         self.enc32_maybe_isap(inst.clone(), template.clone(), isap);
329         self.enc64_maybe_isap(inst, template.infer_rex(), isap);
330     }
331 
332     /// Add two encodings for `inst`:
333     /// - X86_32
334     /// - X86_64 with the REX prefix.
enc_both_rex_only(&mut self, inst: impl Clone + Into<InstSpec>, template: Template)335     fn enc_both_rex_only(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
336         self.enc32(inst.clone(), template.clone());
337         self.enc64(inst, template.rex());
338     }
339 
340     /// Add encodings for `inst.i32` to X86_32.
341     /// Add encodings for `inst.i32` to X86_64 with and without REX.
342     /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
343     /// argument to determine whether or not to set the REX.W bit.
enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template)344     fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) {
345         self.enc32(inst.clone().bind(I32).bind(Any), template.clone());
346 
347         // REX-less encoding must come after REX encoding so we don't use it by
348         // default. Otherwise reg-alloc would never use r8 and up.
349         self.enc64(inst.clone().bind(I32).bind(Any), template.clone().rex());
350         self.enc64(inst.clone().bind(I32).bind(Any), template.clone());
351 
352         if w_bit {
353             self.enc64(inst.clone().bind(I64).bind(Any), template.rex().w());
354         } else {
355             self.enc64(inst.clone().bind(I64).bind(Any), template.clone().rex());
356             self.enc64(inst.clone().bind(I64).bind(Any), template);
357         }
358     }
359 
360     /// Add the same encoding/recipe pairing to both X86_32 and X86_64
enc_32_64_rec( &mut self, inst: impl Clone + Into<InstSpec>, recipe: &EncodingRecipe, bits: u16, )361     fn enc_32_64_rec(
362         &mut self,
363         inst: impl Clone + Into<InstSpec>,
364         recipe: &EncodingRecipe,
365         bits: u16,
366     ) {
367         self.enc32_rec(inst.clone(), recipe, bits);
368         self.enc64_rec(inst, recipe, bits);
369     }
370 
371     /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened
enc_32_64_func<T>( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, builder_closure: T, ) where T: FnOnce(EncodingBuilder) -> EncodingBuilder,372     fn enc_32_64_func<T>(
373         &mut self,
374         inst: impl Clone + Into<InstSpec>,
375         template: Template,
376         builder_closure: T,
377     ) where
378         T: FnOnce(EncodingBuilder) -> EncodingBuilder,
379     {
380         let encoding = self.make_encoding(inst.into(), template, builder_closure);
381         self.enc32.push(encoding.clone());
382         self.enc64.push(encoding);
383     }
384 
385     /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand
386     /// binding) has already happened.
enc_32_64_maybe_isap( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, isap: Option<SettingPredicateNumber>, )387     fn enc_32_64_maybe_isap(
388         &mut self,
389         inst: impl Clone + Into<InstSpec>,
390         template: Template,
391         isap: Option<SettingPredicateNumber>,
392     ) {
393         self.enc32_maybe_isap(inst.clone(), template.clone(), isap);
394         self.enc64_maybe_isap(inst, template, isap);
395     }
396 
enc32_maybe_isap( &mut self, inst: impl Into<InstSpec>, template: Template, isap: Option<SettingPredicateNumber>, )397     fn enc32_maybe_isap(
398         &mut self,
399         inst: impl Into<InstSpec>,
400         template: Template,
401         isap: Option<SettingPredicateNumber>,
402     ) {
403         match isap {
404             None => self.enc32(inst, template),
405             Some(isap) => self.enc32_isap(inst, template, isap),
406         }
407     }
408 
enc64_maybe_isap( &mut self, inst: impl Into<InstSpec>, template: Template, isap: Option<SettingPredicateNumber>, )409     fn enc64_maybe_isap(
410         &mut self,
411         inst: impl Into<InstSpec>,
412         template: Template,
413         isap: Option<SettingPredicateNumber>,
414     ) {
415         match isap {
416             None => self.enc64(inst, template),
417             Some(isap) => self.enc64_isap(inst, template, isap),
418         }
419     }
420 }
421 
422 // Definitions.
423 
424 #[inline(never)]
define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup)425 fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
426     let shared = &shared_defs.instructions;
427     let formats = &shared_defs.formats;
428 
429     // Shorthands for instructions.
430     let bconst = shared.by_name("bconst");
431     let bint = shared.by_name("bint");
432     let copy = shared.by_name("copy");
433     let copy_special = shared.by_name("copy_special");
434     let copy_to_ssa = shared.by_name("copy_to_ssa");
435     let get_pinned_reg = shared.by_name("get_pinned_reg");
436     let iconst = shared.by_name("iconst");
437     let ireduce = shared.by_name("ireduce");
438     let regmove = shared.by_name("regmove");
439     let sextend = shared.by_name("sextend");
440     let set_pinned_reg = shared.by_name("set_pinned_reg");
441     let uextend = shared.by_name("uextend");
442 
443     // Shorthands for recipes.
444     let rec_copysp = r.template("copysp");
445     let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa");
446     let rec_get_pinned_reg = r.recipe("get_pinned_reg");
447     let rec_null = r.recipe("null");
448     let rec_pu_id = r.template("pu_id");
449     let rec_pu_id_bool = r.template("pu_id_bool");
450     let rec_pu_iq = r.template("pu_iq");
451     let rec_rmov = r.template("rmov");
452     let rec_set_pinned_reg = r.template("set_pinned_reg");
453     let rec_u_id = r.template("u_id");
454     let rec_u_id_z = r.template("u_id_z");
455     let rec_umr = r.template("umr");
456     let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa");
457     let rec_urm_noflags = r.template("urm_noflags");
458     let rec_urm_noflags_abcd = r.template("urm_noflags_abcd");
459 
460     // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing!
461     e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0);
462     e.enc_x86_64(
463         set_pinned_reg.bind(I64),
464         rec_set_pinned_reg.opcodes(&MOV_STORE).rex().w(),
465     );
466 
467     e.enc_i32_i64(copy, rec_umr.opcodes(&MOV_STORE));
468     e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(&MOV_STORE));
469     e.enc_both(copy.bind(B1), rec_umr.opcodes(&MOV_STORE));
470     e.enc_both(copy.bind(I8), rec_umr.opcodes(&MOV_STORE));
471     e.enc_both(copy.bind(I16), rec_umr.opcodes(&MOV_STORE));
472 
473     // TODO For x86-64, only define REX forms for now, since we can't describe the
474     // special regunit immediate operands with the current constraint language.
475     for &ty in &[I8, I16, I32] {
476         e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE));
477         e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex());
478     }
479     for &ty in &[B8, B16, B32] {
480         e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE));
481         e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex());
482     }
483     e.enc64(regmove.bind(I64), rec_rmov.opcodes(&MOV_STORE).rex().w());
484     e.enc_both(regmove.bind(B1), rec_rmov.opcodes(&MOV_STORE));
485     e.enc_both(regmove.bind(I8), rec_rmov.opcodes(&MOV_STORE));
486     e.enc32(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE));
487     e.enc64(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE).rex());
488     e.enc64(regmove.bind(R64), rec_rmov.opcodes(&MOV_STORE).rex().w());
489 
490     // Immediate constants.
491     e.enc32(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM));
492 
493     e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(&MOV_IMM));
494     e.enc64(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM));
495 
496     // The 32-bit immediate movl also zero-extends to 64 bits.
497     let is_unsigned_int32 =
498         InstructionPredicate::new_is_unsigned_int(&*formats.unary_imm, "imm", 32, 0);
499 
500     e.enc64_func(
501         iconst.bind(I64),
502         rec_pu_id.opcodes(&MOV_IMM).rex(),
503         |encoding| encoding.inst_predicate(is_unsigned_int32.clone()),
504     );
505     e.enc64_func(iconst.bind(I64), rec_pu_id.opcodes(&MOV_IMM), |encoding| {
506         encoding.inst_predicate(is_unsigned_int32)
507     });
508 
509     // Sign-extended 32-bit immediate.
510     e.enc64(
511         iconst.bind(I64),
512         rec_u_id.rex().opcodes(&MOV_IMM_SIGNEXTEND).rrr(0).w(),
513     );
514 
515     // Finally, the MOV_IMM opcode takes an 8-byte immediate with a REX.W prefix.
516     e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(&MOV_IMM).rex().w());
517 
518     // Bool constants (uses MOV)
519     for &ty in &[B1, B8, B16, B32] {
520         e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(&MOV_IMM));
521     }
522     e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(&MOV_IMM).rex());
523 
524     let is_zero_int = InstructionPredicate::new_is_zero_int(&formats.unary_imm, "imm");
525     e.enc_both_instp(
526         iconst.bind(I8),
527         rec_u_id_z.opcodes(&XORB),
528         is_zero_int.clone(),
529     );
530 
531     // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that
532     // encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not
533     // know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these
534     // scenarios, so we explicitly select a wider but permissible opcode.
535     //
536     // This effectively formalizes the i16->i32 widening that Cranelift performs when there isn't
537     // an appropriate i16 encoding available.
538     e.enc_both_instp(
539         iconst.bind(I16),
540         rec_u_id_z.opcodes(&XOR),
541         is_zero_int.clone(),
542     );
543     e.enc_both_instp(
544         iconst.bind(I32),
545         rec_u_id_z.opcodes(&XOR),
546         is_zero_int.clone(),
547     );
548     e.enc_x86_64_instp(iconst.bind(I64), rec_u_id_z.opcodes(&XOR), is_zero_int);
549 
550     // Numerical conversions.
551 
552     // Reducing an integer is a no-op.
553     e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
554     e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
555     e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
556 
557     e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
558     e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
559     e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
560     e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0);
561     e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0);
562     e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0);
563 
564     // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
565     // instructions for %al/%ax/%eax to %ax/%eax/%rax.
566 
567     // movsbl
568     e.enc32(
569         sextend.bind(I32).bind(I8),
570         rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE),
571     );
572     e.enc64(
573         sextend.bind(I32).bind(I8),
574         rec_urm_noflags.opcodes(&MOVSX_BYTE).rex(),
575     );
576     e.enc64(
577         sextend.bind(I32).bind(I8),
578         rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE),
579     );
580 
581     // movswl
582     e.enc32(
583         sextend.bind(I32).bind(I16),
584         rec_urm_noflags.opcodes(&MOVSX_WORD),
585     );
586     e.enc64(
587         sextend.bind(I32).bind(I16),
588         rec_urm_noflags.opcodes(&MOVSX_WORD).rex(),
589     );
590     e.enc64(
591         sextend.bind(I32).bind(I16),
592         rec_urm_noflags.opcodes(&MOVSX_WORD),
593     );
594 
595     // movsbq
596     e.enc64(
597         sextend.bind(I64).bind(I8),
598         rec_urm_noflags.opcodes(&MOVSX_BYTE).rex().w(),
599     );
600 
601     // movswq
602     e.enc64(
603         sextend.bind(I64).bind(I16),
604         rec_urm_noflags.opcodes(&MOVSX_WORD).rex().w(),
605     );
606 
607     // movslq
608     e.enc64(
609         sextend.bind(I64).bind(I32),
610         rec_urm_noflags.opcodes(&MOVSXD).rex().w(),
611     );
612 
613     // movzbl
614     e.enc32(
615         uextend.bind(I32).bind(I8),
616         rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
617     );
618     e.enc64(
619         uextend.bind(I32).bind(I8),
620         rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
621     );
622     e.enc64(
623         uextend.bind(I32).bind(I8),
624         rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
625     );
626 
627     // movzwl
628     e.enc32(
629         uextend.bind(I32).bind(I16),
630         rec_urm_noflags.opcodes(&MOVZX_WORD),
631     );
632     e.enc64(
633         uextend.bind(I32).bind(I16),
634         rec_urm_noflags.opcodes(&MOVZX_WORD).rex(),
635     );
636     e.enc64(
637         uextend.bind(I32).bind(I16),
638         rec_urm_noflags.opcodes(&MOVZX_WORD),
639     );
640 
641     // movzbq, encoded as movzbl because it's equivalent and shorter.
642     e.enc64(
643         uextend.bind(I64).bind(I8),
644         rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
645     );
646     e.enc64(
647         uextend.bind(I64).bind(I8),
648         rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
649     );
650 
651     // movzwq, encoded as movzwl because it's equivalent and shorter
652     e.enc64(
653         uextend.bind(I64).bind(I16),
654         rec_urm_noflags.opcodes(&MOVZX_WORD).rex(),
655     );
656     e.enc64(
657         uextend.bind(I64).bind(I16),
658         rec_urm_noflags.opcodes(&MOVZX_WORD),
659     );
660 
661     // A 32-bit register copy clears the high 32 bits.
662     e.enc64(
663         uextend.bind(I64).bind(I32),
664         rec_umr.opcodes(&MOV_STORE).rex(),
665     );
666     e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(&MOV_STORE));
667 
668     // Convert bool to int.
669     //
670     // This assumes that b1 is represented as an 8-bit low register with the value 0
671     // or 1.
672     //
673     // Encode movzbq as movzbl, because it's equivalent and shorter.
674     for &to in &[I8, I16, I32, I64] {
675         for &from in &[B1, B8] {
676             e.enc64(
677                 bint.bind(to).bind(from),
678                 rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
679             );
680             e.enc64(
681                 bint.bind(to).bind(from),
682                 rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
683             );
684             if to != I64 {
685                 e.enc32(
686                     bint.bind(to).bind(from),
687                     rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
688                 );
689             }
690         }
691     }
692 
693     // Copy Special
694     // For x86-64, only define REX forms for now, since we can't describe the
695     // special regunit immediate operands with the current constraint language.
696     e.enc64(copy_special, rec_copysp.opcodes(&MOV_STORE).rex().w());
697     e.enc32(copy_special, rec_copysp.opcodes(&MOV_STORE));
698 
699     // Copy to SSA.  These have to be done with special _rex_only encoders, because the standard
700     // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account
701     // the source register, which is specified directly in the instruction.
702     e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
703     e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
704     e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
705     e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
706     e.enc_both_rex_only(
707         copy_to_ssa.bind(I16),
708         rec_umr_reg_to_ssa.opcodes(&MOV_STORE),
709     );
710     e.enc_both_rex_only(
711         copy_to_ssa.bind(F64),
712         rec_furm_reg_to_ssa.opcodes(&MOVSD_LOAD),
713     );
714     e.enc_both_rex_only(
715         copy_to_ssa.bind(F32),
716         rec_furm_reg_to_ssa.opcodes(&MOVSS_LOAD),
717     );
718 }
719 
720 #[inline(never)]
define_memory( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, x86: &InstructionGroup, r: &RecipeGroup, )721 fn define_memory(
722     e: &mut PerCpuModeEncodings,
723     shared_defs: &SharedDefinitions,
724     x86: &InstructionGroup,
725     r: &RecipeGroup,
726 ) {
727     let shared = &shared_defs.instructions;
728     let formats = &shared_defs.formats;
729 
730     // Shorthands for instructions.
731     let adjust_sp_down = shared.by_name("adjust_sp_down");
732     let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm");
733     let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm");
734     let copy_nop = shared.by_name("copy_nop");
735     let fill = shared.by_name("fill");
736     let fill_nop = shared.by_name("fill_nop");
737     let istore16 = shared.by_name("istore16");
738     let istore16_complex = shared.by_name("istore16_complex");
739     let istore32 = shared.by_name("istore32");
740     let istore32_complex = shared.by_name("istore32_complex");
741     let istore8 = shared.by_name("istore8");
742     let istore8_complex = shared.by_name("istore8_complex");
743     let load = shared.by_name("load");
744     let load_complex = shared.by_name("load_complex");
745     let regfill = shared.by_name("regfill");
746     let regspill = shared.by_name("regspill");
747     let sload16 = shared.by_name("sload16");
748     let sload16_complex = shared.by_name("sload16_complex");
749     let sload32 = shared.by_name("sload32");
750     let sload32_complex = shared.by_name("sload32_complex");
751     let sload8 = shared.by_name("sload8");
752     let sload8_complex = shared.by_name("sload8_complex");
753     let spill = shared.by_name("spill");
754     let store = shared.by_name("store");
755     let store_complex = shared.by_name("store_complex");
756     let uload16 = shared.by_name("uload16");
757     let uload16_complex = shared.by_name("uload16_complex");
758     let uload32 = shared.by_name("uload32");
759     let uload32_complex = shared.by_name("uload32_complex");
760     let uload8 = shared.by_name("uload8");
761     let uload8_complex = shared.by_name("uload8_complex");
762     let x86_pop = x86.by_name("x86_pop");
763     let x86_push = x86.by_name("x86_push");
764 
765     // Shorthands for recipes.
766     let rec_adjustsp = r.template("adjustsp");
767     let rec_adjustsp_ib = r.template("adjustsp_ib");
768     let rec_adjustsp_id = r.template("adjustsp_id");
769     let rec_ffillnull = r.recipe("ffillnull");
770     let rec_fillnull = r.recipe("fillnull");
771     let rec_fillSib32 = r.template("fillSib32");
772     let rec_ld = r.template("ld");
773     let rec_ldDisp32 = r.template("ldDisp32");
774     let rec_ldDisp8 = r.template("ldDisp8");
775     let rec_ldWithIndex = r.template("ldWithIndex");
776     let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32");
777     let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8");
778     let rec_popq = r.template("popq");
779     let rec_pushq = r.template("pushq");
780     let rec_regfill32 = r.template("regfill32");
781     let rec_regspill32 = r.template("regspill32");
782     let rec_spillSib32 = r.template("spillSib32");
783     let rec_st = r.template("st");
784     let rec_stacknull = r.recipe("stacknull");
785     let rec_stDisp32 = r.template("stDisp32");
786     let rec_stDisp32_abcd = r.template("stDisp32_abcd");
787     let rec_stDisp8 = r.template("stDisp8");
788     let rec_stDisp8_abcd = r.template("stDisp8_abcd");
789     let rec_stWithIndex = r.template("stWithIndex");
790     let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32");
791     let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd");
792     let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8");
793     let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd");
794     let rec_stWithIndex_abcd = r.template("stWithIndex_abcd");
795     let rec_st_abcd = r.template("st_abcd");
796 
797     // Loads and stores.
798     let is_load_complex_length_two =
799         InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
800 
801     for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] {
802         e.enc_i32_i64_instp(
803             load_complex,
804             recipe.opcodes(&MOV_LOAD),
805             is_load_complex_length_two.clone(),
806         );
807         e.enc_x86_64_instp(
808             uload32_complex,
809             recipe.opcodes(&MOV_LOAD),
810             is_load_complex_length_two.clone(),
811         );
812 
813         e.enc64_instp(
814             sload32_complex,
815             recipe.opcodes(&MOVSXD).rex().w(),
816             is_load_complex_length_two.clone(),
817         );
818 
819         e.enc_i32_i64_instp(
820             uload16_complex,
821             recipe.opcodes(&MOVZX_WORD),
822             is_load_complex_length_two.clone(),
823         );
824         e.enc_i32_i64_instp(
825             sload16_complex,
826             recipe.opcodes(&MOVSX_WORD),
827             is_load_complex_length_two.clone(),
828         );
829 
830         e.enc_i32_i64_instp(
831             uload8_complex,
832             recipe.opcodes(&MOVZX_BYTE),
833             is_load_complex_length_two.clone(),
834         );
835 
836         e.enc_i32_i64_instp(
837             sload8_complex,
838             recipe.opcodes(&MOVSX_BYTE),
839             is_load_complex_length_two.clone(),
840         );
841     }
842 
843     let is_store_complex_length_three =
844         InstructionPredicate::new_length_equals(&*formats.store_complex, 3);
845 
846     for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] {
847         e.enc_i32_i64_instp(
848             store_complex,
849             recipe.opcodes(&MOV_STORE),
850             is_store_complex_length_three.clone(),
851         );
852         e.enc_x86_64_instp(
853             istore32_complex,
854             recipe.opcodes(&MOV_STORE),
855             is_store_complex_length_three.clone(),
856         );
857         e.enc_both_instp(
858             istore16_complex.bind(I32),
859             recipe.opcodes(&MOV_STORE_16),
860             is_store_complex_length_three.clone(),
861         );
862         e.enc_x86_64_instp(
863             istore16_complex.bind(I64),
864             recipe.opcodes(&MOV_STORE_16),
865             is_store_complex_length_three.clone(),
866         );
867     }
868 
869     for recipe in &[
870         rec_stWithIndex_abcd,
871         rec_stWithIndexDisp8_abcd,
872         rec_stWithIndexDisp32_abcd,
873     ] {
874         e.enc_both_instp(
875             istore8_complex.bind(I32),
876             recipe.opcodes(&MOV_BYTE_STORE),
877             is_store_complex_length_three.clone(),
878         );
879         e.enc_x86_64_instp(
880             istore8_complex.bind(I64),
881             recipe.opcodes(&MOV_BYTE_STORE),
882             is_store_complex_length_three.clone(),
883         );
884     }
885 
886     for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] {
887         e.enc_i32_i64_ld_st(store, true, recipe.opcodes(&MOV_STORE));
888         e.enc_r32_r64_ld_st(store, true, recipe.opcodes(&MOV_STORE));
889         e.enc_x86_64(istore32.bind(I64).bind(Any), recipe.opcodes(&MOV_STORE));
890         e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(&MOV_STORE_16));
891     }
892 
893     // Byte stores are more complicated because the registers they can address
894     // depends of the presence of a REX prefix. The st*_abcd recipes fall back to
895     // the corresponding st* recipes when a REX prefix is applied.
896 
897     for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] {
898         e.enc_both(istore8.bind(I32).bind(Any), recipe.opcodes(&MOV_BYTE_STORE));
899         e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE));
900     }
901 
902     e.enc_i32_i64_explicit_rex(spill, rec_spillSib32.opcodes(&MOV_STORE));
903     e.enc_i32_i64_explicit_rex(regspill, rec_regspill32.opcodes(&MOV_STORE));
904     e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(&MOV_STORE));
905     e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(&MOV_STORE));
906 
907     // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid
908     // constraining the permitted registers.
909     // See MIN_SPILL_SLOT_SIZE which makes this safe.
910 
911     e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(&MOV_STORE));
912     e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(&MOV_STORE));
913     for &ty in &[I8, I16] {
914         e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(&MOV_STORE));
915         e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(&MOV_STORE));
916     }
917 
918     for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] {
919         e.enc_i32_i64_ld_st(load, true, recipe.opcodes(&MOV_LOAD));
920         e.enc_r32_r64_ld_st(load, true, recipe.opcodes(&MOV_LOAD));
921         e.enc_x86_64(uload32.bind(I64), recipe.opcodes(&MOV_LOAD));
922         e.enc64(sload32.bind(I64), recipe.opcodes(&MOVSXD).rex().w());
923         e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(&MOVZX_WORD));
924         e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(&MOVSX_WORD));
925         e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(&MOVZX_BYTE));
926         e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(&MOVSX_BYTE));
927     }
928 
929     e.enc_i32_i64_explicit_rex(fill, rec_fillSib32.opcodes(&MOV_LOAD));
930     e.enc_i32_i64_explicit_rex(regfill, rec_regfill32.opcodes(&MOV_LOAD));
931     e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(&MOV_LOAD));
932     e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(&MOV_LOAD));
933 
934     // No-op fills, created by late-stage redundant-fill removal.
935     for &ty in &[I64, I32, I16, I8] {
936         e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0);
937         e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0);
938     }
939     e.enc64_rec(fill_nop.bind(B1), rec_fillnull, 0);
940     e.enc32_rec(fill_nop.bind(B1), rec_fillnull, 0);
941     for &ty in &[F64, F32] {
942         e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0);
943         e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0);
944     }
945 
946     // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
947 
948     e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(&MOV_LOAD));
949     e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(&MOV_LOAD));
950     for &ty in &[I8, I16] {
951         e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(&MOV_LOAD));
952         e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(&MOV_LOAD));
953     }
954 
955     // Push and Pop.
956     e.enc32(x86_push.bind(I32), rec_pushq.opcodes(&PUSH_REG));
957     e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(&PUSH_REG));
958 
959     e.enc32(x86_pop.bind(I32), rec_popq.opcodes(&POP_REG));
960     e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(&POP_REG));
961 
962     // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
963     // into a no-op.
964     // The same encoding is generated for both the 64- and 32-bit architectures.
965     for &ty in &[I64, I32, I16, I8] {
966         e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
967         e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
968     }
969     for &ty in &[F64, F32] {
970         e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
971         e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
972     }
973 
974     // Adjust SP down by a dynamic value (or up, with a negative operand).
975     e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(&SUB));
976     e.enc64(
977         adjust_sp_down.bind(I64),
978         rec_adjustsp.opcodes(&SUB).rex().w(),
979     );
980 
981     // Adjust SP up by an immediate (or down, with a negative immediate).
982     e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(&CMP_IMM8));
983     e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(&CMP_IMM));
984     e.enc64(
985         adjust_sp_up_imm,
986         rec_adjustsp_ib.opcodes(&CMP_IMM8).rex().w(),
987     );
988     e.enc64(
989         adjust_sp_up_imm,
990         rec_adjustsp_id.opcodes(&CMP_IMM).rex().w(),
991     );
992 
993     // Adjust SP down by an immediate (or up, with a negative immediate).
994     e.enc32(
995         adjust_sp_down_imm,
996         rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5),
997     );
998     e.enc32(adjust_sp_down_imm, rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5));
999     e.enc64(
1000         adjust_sp_down_imm,
1001         rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5).rex().w(),
1002     );
1003     e.enc64(
1004         adjust_sp_down_imm,
1005         rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5).rex().w(),
1006     );
1007 }
1008 
1009 #[inline(never)]
define_fpu_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup)1010 fn define_fpu_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
1011     let shared = &shared_defs.instructions;
1012 
1013     // Shorthands for instructions.
1014     let bitcast = shared.by_name("bitcast");
1015     let copy = shared.by_name("copy");
1016     let regmove = shared.by_name("regmove");
1017 
1018     // Shorthands for recipes.
1019     let rec_frmov = r.template("frmov");
1020     let rec_frurm = r.template("frurm");
1021     let rec_furm = r.template("furm");
1022     let rec_rfumr = r.template("rfumr");
1023 
1024     // Floating-point moves.
1025     // movd
1026     e.enc_both(
1027         bitcast.bind(F32).bind(I32),
1028         rec_frurm.opcodes(&MOVD_LOAD_XMM),
1029     );
1030     e.enc_both(
1031         bitcast.bind(I32).bind(F32),
1032         rec_rfumr.opcodes(&MOVD_STORE_XMM),
1033     );
1034 
1035     // movq
1036     e.enc64(
1037         bitcast.bind(F64).bind(I64),
1038         rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(),
1039     );
1040     e.enc64(
1041         bitcast.bind(I64).bind(F64),
1042         rec_rfumr.opcodes(&MOVD_STORE_XMM).rex().w(),
1043     );
1044 
1045     // movaps
1046     e.enc_both(copy.bind(F32), rec_furm.opcodes(&MOVAPS_LOAD));
1047     e.enc_both(copy.bind(F64), rec_furm.opcodes(&MOVAPS_LOAD));
1048 
1049     // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
1050     // immediate operands with the current constraint language.
1051     e.enc32(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD));
1052     e.enc64(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD).rex());
1053 
1054     // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
1055     // immediate operands with the current constraint language.
1056     e.enc32(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD));
1057     e.enc64(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD).rex());
1058 }
1059 
1060 #[inline(never)]
define_fpu_memory( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup, )1061 fn define_fpu_memory(
1062     e: &mut PerCpuModeEncodings,
1063     shared_defs: &SharedDefinitions,
1064     r: &RecipeGroup,
1065 ) {
1066     let shared = &shared_defs.instructions;
1067 
1068     // Shorthands for instructions.
1069     let fill = shared.by_name("fill");
1070     let load = shared.by_name("load");
1071     let load_complex = shared.by_name("load_complex");
1072     let regfill = shared.by_name("regfill");
1073     let regspill = shared.by_name("regspill");
1074     let spill = shared.by_name("spill");
1075     let store = shared.by_name("store");
1076     let store_complex = shared.by_name("store_complex");
1077 
1078     // Shorthands for recipes.
1079     let rec_ffillSib32 = r.template("ffillSib32");
1080     let rec_fld = r.template("fld");
1081     let rec_fldDisp32 = r.template("fldDisp32");
1082     let rec_fldDisp8 = r.template("fldDisp8");
1083     let rec_fldWithIndex = r.template("fldWithIndex");
1084     let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32");
1085     let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8");
1086     let rec_fregfill32 = r.template("fregfill32");
1087     let rec_fregspill32 = r.template("fregspill32");
1088     let rec_fspillSib32 = r.template("fspillSib32");
1089     let rec_fst = r.template("fst");
1090     let rec_fstDisp32 = r.template("fstDisp32");
1091     let rec_fstDisp8 = r.template("fstDisp8");
1092     let rec_fstWithIndex = r.template("fstWithIndex");
1093     let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
1094     let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
1095 
1096     // Float loads and stores.
1097     e.enc_both(load.bind(F32).bind(Any), rec_fld.opcodes(&MOVSS_LOAD));
1098     e.enc_both(load.bind(F32).bind(Any), rec_fldDisp8.opcodes(&MOVSS_LOAD));
1099     e.enc_both(load.bind(F32).bind(Any), rec_fldDisp32.opcodes(&MOVSS_LOAD));
1100 
1101     e.enc_both(
1102         load_complex.bind(F32),
1103         rec_fldWithIndex.opcodes(&MOVSS_LOAD),
1104     );
1105     e.enc_both(
1106         load_complex.bind(F32),
1107         rec_fldWithIndexDisp8.opcodes(&MOVSS_LOAD),
1108     );
1109     e.enc_both(
1110         load_complex.bind(F32),
1111         rec_fldWithIndexDisp32.opcodes(&MOVSS_LOAD),
1112     );
1113 
1114     e.enc_both(load.bind(F64).bind(Any), rec_fld.opcodes(&MOVSD_LOAD));
1115     e.enc_both(load.bind(F64).bind(Any), rec_fldDisp8.opcodes(&MOVSD_LOAD));
1116     e.enc_both(load.bind(F64).bind(Any), rec_fldDisp32.opcodes(&MOVSD_LOAD));
1117 
1118     e.enc_both(
1119         load_complex.bind(F64),
1120         rec_fldWithIndex.opcodes(&MOVSD_LOAD),
1121     );
1122     e.enc_both(
1123         load_complex.bind(F64),
1124         rec_fldWithIndexDisp8.opcodes(&MOVSD_LOAD),
1125     );
1126     e.enc_both(
1127         load_complex.bind(F64),
1128         rec_fldWithIndexDisp32.opcodes(&MOVSD_LOAD),
1129     );
1130 
1131     e.enc_both(store.bind(F32).bind(Any), rec_fst.opcodes(&MOVSS_STORE));
1132     e.enc_both(
1133         store.bind(F32).bind(Any),
1134         rec_fstDisp8.opcodes(&MOVSS_STORE),
1135     );
1136     e.enc_both(
1137         store.bind(F32).bind(Any),
1138         rec_fstDisp32.opcodes(&MOVSS_STORE),
1139     );
1140 
1141     e.enc_both(
1142         store_complex.bind(F32),
1143         rec_fstWithIndex.opcodes(&MOVSS_STORE),
1144     );
1145     e.enc_both(
1146         store_complex.bind(F32),
1147         rec_fstWithIndexDisp8.opcodes(&MOVSS_STORE),
1148     );
1149     e.enc_both(
1150         store_complex.bind(F32),
1151         rec_fstWithIndexDisp32.opcodes(&MOVSS_STORE),
1152     );
1153 
1154     e.enc_both(store.bind(F64).bind(Any), rec_fst.opcodes(&MOVSD_STORE));
1155     e.enc_both(
1156         store.bind(F64).bind(Any),
1157         rec_fstDisp8.opcodes(&MOVSD_STORE),
1158     );
1159     e.enc_both(
1160         store.bind(F64).bind(Any),
1161         rec_fstDisp32.opcodes(&MOVSD_STORE),
1162     );
1163 
1164     e.enc_both(
1165         store_complex.bind(F64),
1166         rec_fstWithIndex.opcodes(&MOVSD_STORE),
1167     );
1168     e.enc_both(
1169         store_complex.bind(F64),
1170         rec_fstWithIndexDisp8.opcodes(&MOVSD_STORE),
1171     );
1172     e.enc_both(
1173         store_complex.bind(F64),
1174         rec_fstWithIndexDisp32.opcodes(&MOVSD_STORE),
1175     );
1176 
1177     e.enc_both(fill.bind(F32), rec_ffillSib32.opcodes(&MOVSS_LOAD));
1178     e.enc_both(regfill.bind(F32), rec_fregfill32.opcodes(&MOVSS_LOAD));
1179     e.enc_both(fill.bind(F64), rec_ffillSib32.opcodes(&MOVSD_LOAD));
1180     e.enc_both(regfill.bind(F64), rec_fregfill32.opcodes(&MOVSD_LOAD));
1181 
1182     e.enc_both(spill.bind(F32), rec_fspillSib32.opcodes(&MOVSS_STORE));
1183     e.enc_both(regspill.bind(F32), rec_fregspill32.opcodes(&MOVSS_STORE));
1184     e.enc_both(spill.bind(F64), rec_fspillSib32.opcodes(&MOVSD_STORE));
1185     e.enc_both(regspill.bind(F64), rec_fregspill32.opcodes(&MOVSD_STORE));
1186 }
1187 
1188 #[inline(never)]
define_fpu_ops( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, settings: &SettingGroup, x86: &InstructionGroup, r: &RecipeGroup, )1189 fn define_fpu_ops(
1190     e: &mut PerCpuModeEncodings,
1191     shared_defs: &SharedDefinitions,
1192     settings: &SettingGroup,
1193     x86: &InstructionGroup,
1194     r: &RecipeGroup,
1195 ) {
1196     let shared = &shared_defs.instructions;
1197     let formats = &shared_defs.formats;
1198 
1199     // Shorthands for instructions.
1200     let ceil = shared.by_name("ceil");
1201     let f32const = shared.by_name("f32const");
1202     let f64const = shared.by_name("f64const");
1203     let fadd = shared.by_name("fadd");
1204     let fcmp = shared.by_name("fcmp");
1205     let fcvt_from_sint = shared.by_name("fcvt_from_sint");
1206     let fdemote = shared.by_name("fdemote");
1207     let fdiv = shared.by_name("fdiv");
1208     let ffcmp = shared.by_name("ffcmp");
1209     let floor = shared.by_name("floor");
1210     let fmul = shared.by_name("fmul");
1211     let fpromote = shared.by_name("fpromote");
1212     let fsub = shared.by_name("fsub");
1213     let nearest = shared.by_name("nearest");
1214     let sqrt = shared.by_name("sqrt");
1215     let trunc = shared.by_name("trunc");
1216     let x86_cvtt2si = x86.by_name("x86_cvtt2si");
1217     let x86_fmax = x86.by_name("x86_fmax");
1218     let x86_fmin = x86.by_name("x86_fmin");
1219 
1220     // Shorthands for recipes.
1221     let rec_f32imm_z = r.template("f32imm_z");
1222     let rec_f64imm_z = r.template("f64imm_z");
1223     let rec_fa = r.template("fa");
1224     let rec_fcmp = r.template("fcmp");
1225     let rec_fcscc = r.template("fcscc");
1226     let rec_frurm = r.template("frurm");
1227     let rec_furm = r.template("furm");
1228     let rec_furmi_rnd = r.template("furmi_rnd");
1229     let rec_rfurm = r.template("rfurm");
1230 
1231     // Predicates shorthands.
1232     let use_sse41 = settings.predicate_by_name("use_sse41");
1233 
1234     // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for
1235     // 32-bit and 64-bit floats respectively.
1236     let is_zero_32_bit_float =
1237         InstructionPredicate::new_is_zero_32bit_float(&*formats.unary_ieee32, "imm");
1238     e.enc32_instp(
1239         f32const,
1240         rec_f32imm_z.opcodes(&XORPS),
1241         is_zero_32_bit_float.clone(),
1242     );
1243 
1244     let is_zero_64_bit_float =
1245         InstructionPredicate::new_is_zero_64bit_float(&*formats.unary_ieee64, "imm");
1246     e.enc32_instp(
1247         f64const,
1248         rec_f64imm_z.opcodes(&XORPD),
1249         is_zero_64_bit_float.clone(),
1250     );
1251 
1252     e.enc_x86_64_instp(f32const, rec_f32imm_z.opcodes(&XORPS), is_zero_32_bit_float);
1253     e.enc_x86_64_instp(f64const, rec_f64imm_z.opcodes(&XORPD), is_zero_64_bit_float);
1254 
1255     // cvtsi2ss
1256     e.enc_i32_i64(fcvt_from_sint.bind(F32), rec_frurm.opcodes(&CVTSI2SS));
1257 
1258     // cvtsi2sd
1259     e.enc_i32_i64(fcvt_from_sint.bind(F64), rec_frurm.opcodes(&CVTSI2SD));
1260 
1261     // cvtss2sd
1262     e.enc_both(fpromote.bind(F64).bind(F32), rec_furm.opcodes(&CVTSS2SD));
1263 
1264     // cvtsd2ss
1265     e.enc_both(fdemote.bind(F32).bind(F64), rec_furm.opcodes(&CVTSD2SS));
1266 
1267     // cvttss2si
1268     e.enc_both(
1269         x86_cvtt2si.bind(I32).bind(F32),
1270         rec_rfurm.opcodes(&CVTTSS2SI),
1271     );
1272     e.enc64(
1273         x86_cvtt2si.bind(I64).bind(F32),
1274         rec_rfurm.opcodes(&CVTTSS2SI).rex().w(),
1275     );
1276 
1277     // cvttsd2si
1278     e.enc_both(
1279         x86_cvtt2si.bind(I32).bind(F64),
1280         rec_rfurm.opcodes(&CVTTSD2SI),
1281     );
1282     e.enc64(
1283         x86_cvtt2si.bind(I64).bind(F64),
1284         rec_rfurm.opcodes(&CVTTSD2SI).rex().w(),
1285     );
1286 
1287     // Exact square roots.
1288     e.enc_both(sqrt.bind(F32), rec_furm.opcodes(&SQRTSS));
1289     e.enc_both(sqrt.bind(F64), rec_furm.opcodes(&SQRTSD));
1290 
1291     // Rounding. The recipe looks at the opcode to pick an immediate.
1292     for inst in &[nearest, floor, ceil, trunc] {
1293         e.enc_both_isap(inst.bind(F32), rec_furmi_rnd.opcodes(&ROUNDSS), use_sse41);
1294         e.enc_both_isap(inst.bind(F64), rec_furmi_rnd.opcodes(&ROUNDSD), use_sse41);
1295     }
1296 
1297     // Binary arithmetic ops.
1298     e.enc_both(fadd.bind(F32), rec_fa.opcodes(&ADDSS));
1299     e.enc_both(fadd.bind(F64), rec_fa.opcodes(&ADDSD));
1300 
1301     e.enc_both(fsub.bind(F32), rec_fa.opcodes(&SUBSS));
1302     e.enc_both(fsub.bind(F64), rec_fa.opcodes(&SUBSD));
1303 
1304     e.enc_both(fmul.bind(F32), rec_fa.opcodes(&MULSS));
1305     e.enc_both(fmul.bind(F64), rec_fa.opcodes(&MULSD));
1306 
1307     e.enc_both(fdiv.bind(F32), rec_fa.opcodes(&DIVSS));
1308     e.enc_both(fdiv.bind(F64), rec_fa.opcodes(&DIVSD));
1309 
1310     e.enc_both(x86_fmin.bind(F32), rec_fa.opcodes(&MINSS));
1311     e.enc_both(x86_fmin.bind(F64), rec_fa.opcodes(&MINSD));
1312 
1313     e.enc_both(x86_fmax.bind(F32), rec_fa.opcodes(&MAXSS));
1314     e.enc_both(x86_fmax.bind(F64), rec_fa.opcodes(&MAXSD));
1315 
1316     // Comparisons.
1317     //
1318     // This only covers the condition codes in `supported_floatccs`, the rest are
1319     // handled by legalization patterns.
1320     e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(&UCOMISS));
1321     e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(&UCOMISD));
1322     e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(&UCOMISS));
1323     e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(&UCOMISD));
1324 }
1325 
1326 #[inline(never)]
define_alu( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, settings: &SettingGroup, x86: &InstructionGroup, r: &RecipeGroup, )1327 fn define_alu(
1328     e: &mut PerCpuModeEncodings,
1329     shared_defs: &SharedDefinitions,
1330     settings: &SettingGroup,
1331     x86: &InstructionGroup,
1332     r: &RecipeGroup,
1333 ) {
1334     let shared = &shared_defs.instructions;
1335 
1336     // Shorthands for instructions.
1337     let clz = shared.by_name("clz");
1338     let ctz = shared.by_name("ctz");
1339     let icmp = shared.by_name("icmp");
1340     let icmp_imm = shared.by_name("icmp_imm");
1341     let ifcmp = shared.by_name("ifcmp");
1342     let ifcmp_imm = shared.by_name("ifcmp_imm");
1343     let ifcmp_sp = shared.by_name("ifcmp_sp");
1344     let ishl = shared.by_name("ishl");
1345     let ishl_imm = shared.by_name("ishl_imm");
1346     let popcnt = shared.by_name("popcnt");
1347     let rotl = shared.by_name("rotl");
1348     let rotl_imm = shared.by_name("rotl_imm");
1349     let rotr = shared.by_name("rotr");
1350     let rotr_imm = shared.by_name("rotr_imm");
1351     let selectif = shared.by_name("selectif");
1352     let sshr = shared.by_name("sshr");
1353     let sshr_imm = shared.by_name("sshr_imm");
1354     let trueff = shared.by_name("trueff");
1355     let trueif = shared.by_name("trueif");
1356     let ushr = shared.by_name("ushr");
1357     let ushr_imm = shared.by_name("ushr_imm");
1358     let x86_bsf = x86.by_name("x86_bsf");
1359     let x86_bsr = x86.by_name("x86_bsr");
1360 
1361     // Shorthands for recipes.
1362     let rec_bsf_and_bsr = r.template("bsf_and_bsr");
1363     let rec_cmov = r.template("cmov");
1364     let rec_icscc = r.template("icscc");
1365     let rec_icscc_ib = r.template("icscc_ib");
1366     let rec_icscc_id = r.template("icscc_id");
1367     let rec_rcmp = r.template("rcmp");
1368     let rec_rcmp_ib = r.template("rcmp_ib");
1369     let rec_rcmp_id = r.template("rcmp_id");
1370     let rec_rcmp_sp = r.template("rcmp_sp");
1371     let rec_rc = r.template("rc");
1372     let rec_setf_abcd = r.template("setf_abcd");
1373     let rec_seti_abcd = r.template("seti_abcd");
1374     let rec_urm = r.template("urm");
1375 
1376     // Predicates shorthands.
1377     let use_popcnt = settings.predicate_by_name("use_popcnt");
1378     let use_lzcnt = settings.predicate_by_name("use_lzcnt");
1379     let use_bmi1 = settings.predicate_by_name("use_bmi1");
1380 
1381     let band = shared.by_name("band");
1382     let band_imm = shared.by_name("band_imm");
1383     let band_not = shared.by_name("band_not");
1384     let bnot = shared.by_name("bnot");
1385     let bor = shared.by_name("bor");
1386     let bor_imm = shared.by_name("bor_imm");
1387     let bxor = shared.by_name("bxor");
1388     let bxor_imm = shared.by_name("bxor_imm");
1389     let iadd = shared.by_name("iadd");
1390     let iadd_ifcarry = shared.by_name("iadd_ifcarry");
1391     let iadd_ifcin = shared.by_name("iadd_ifcin");
1392     let iadd_ifcout = shared.by_name("iadd_ifcout");
1393     let iadd_imm = shared.by_name("iadd_imm");
1394     let imul = shared.by_name("imul");
1395     let isub = shared.by_name("isub");
1396     let isub_ifbin = shared.by_name("isub_ifbin");
1397     let isub_ifborrow = shared.by_name("isub_ifborrow");
1398     let isub_ifbout = shared.by_name("isub_ifbout");
1399     let x86_sdivmodx = x86.by_name("x86_sdivmodx");
1400     let x86_smulx = x86.by_name("x86_smulx");
1401     let x86_udivmodx = x86.by_name("x86_udivmodx");
1402     let x86_umulx = x86.by_name("x86_umulx");
1403 
1404     let rec_div = r.template("div");
1405     let rec_fa = r.template("fa");
1406     let rec_fax = r.template("fax");
1407     let rec_mulx = r.template("mulx");
1408     let rec_r_ib = r.template("r_ib");
1409     let rec_r_id = r.template("r_id");
1410     let rec_rin = r.template("rin");
1411     let rec_rio = r.template("rio");
1412     let rec_rout = r.template("rout");
1413     let rec_rr = r.template("rr");
1414     let rec_rrx = r.template("rrx");
1415     let rec_ur = r.template("ur");
1416 
1417     e.enc_i32_i64(iadd, rec_rr.opcodes(&ADD));
1418     e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(&ADD));
1419     e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(&ADC));
1420     e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(&ADC));
1421     e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0));
1422     e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0));
1423 
1424     e.enc_i32_i64(isub, rec_rr.opcodes(&SUB));
1425     e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(&SUB));
1426     e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(&SBB));
1427     e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(&SBB));
1428 
1429     e.enc_i32_i64(band, rec_rr.opcodes(&AND));
1430     e.enc_b32_b64(band, rec_rr.opcodes(&AND));
1431 
1432     // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can
1433     // even use the single-byte immediate for 0xffff_ffXX masks.
1434 
1435     e.enc_i32_i64(band_imm, rec_r_ib.opcodes(&AND_IMM8_SIGN_EXTEND).rrr(4));
1436     e.enc_i32_i64(band_imm, rec_r_id.opcodes(&AND_IMM).rrr(4));
1437 
1438     e.enc_i32_i64(bor, rec_rr.opcodes(&OR));
1439     e.enc_b32_b64(bor, rec_rr.opcodes(&OR));
1440     e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(&OR_IMM8_SIGN_EXTEND).rrr(1));
1441     e.enc_i32_i64(bor_imm, rec_r_id.opcodes(&OR_IMM).rrr(1));
1442 
1443     e.enc_i32_i64(bxor, rec_rr.opcodes(&XOR));
1444     e.enc_b32_b64(bxor, rec_rr.opcodes(&XOR));
1445     e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(&XOR_IMM8_SIGN_EXTEND).rrr(6));
1446     e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(&XOR_IMM).rrr(6));
1447 
1448     // x86 has a bitwise not instruction NOT.
1449     e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2));
1450     e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2));
1451 
1452     // Also add a `b1` encodings for the logic instructions.
1453     // TODO: Should this be done with 8-bit instructions? It would improve partial register
1454     // dependencies.
1455     e.enc_both(band.bind(B1), rec_rr.opcodes(&AND));
1456     e.enc_both(bor.bind(B1), rec_rr.opcodes(&OR));
1457     e.enc_both(bxor.bind(B1), rec_rr.opcodes(&XOR));
1458 
1459     e.enc_i32_i64(imul, rec_rrx.opcodes(&IMUL));
1460     e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(&IDIV).rrr(7));
1461     e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(&DIV).rrr(6));
1462 
1463     e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(&IMUL_RDX_RAX).rrr(5));
1464     e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(&MUL).rrr(4));
1465 
1466     // Binary bitwise ops.
1467     //
1468     // The F64 version is intentionally encoded using the single-precision opcode:
1469     // the operation is identical and the encoding is one byte shorter.
1470     e.enc_both(band.bind(F32), rec_fa.opcodes(&ANDPS));
1471     e.enc_both(band.bind(F64), rec_fa.opcodes(&ANDPS));
1472 
1473     e.enc_both(bor.bind(F32), rec_fa.opcodes(&ORPS));
1474     e.enc_both(bor.bind(F64), rec_fa.opcodes(&ORPS));
1475 
1476     e.enc_both(bxor.bind(F32), rec_fa.opcodes(&XORPS));
1477     e.enc_both(bxor.bind(F64), rec_fa.opcodes(&XORPS));
1478 
1479     // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y.
1480     e.enc_both(band_not.bind(F32), rec_fax.opcodes(&ANDNPS));
1481     e.enc_both(band_not.bind(F64), rec_fax.opcodes(&ANDNPS));
1482 
1483     // Shifts and rotates.
1484     // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
1485     // and 16-bit shifts would need explicit masking.
1486 
1487     for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] {
1488         // Cannot use enc_i32_i64 for this pattern because instructions require
1489         // to bind any.
1490         e.enc32(
1491             inst.bind(I32).bind(Any),
1492             rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
1493         );
1494         e.enc64(
1495             inst.bind(I64).bind(Any),
1496             rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(),
1497         );
1498         e.enc64(
1499             inst.bind(I32).bind(Any),
1500             rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(),
1501         );
1502         e.enc64(
1503             inst.bind(I32).bind(Any),
1504             rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
1505         );
1506     }
1507 
1508     e.enc_i32_i64(rotl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(0));
1509     e.enc_i32_i64(rotr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(1));
1510     e.enc_i32_i64(ishl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(4));
1511     e.enc_i32_i64(ushr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(5));
1512     e.enc_i32_i64(sshr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(7));
1513 
1514     // Population count.
1515     e.enc32_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt);
1516     e.enc64_isap(
1517         popcnt.bind(I64),
1518         rec_urm.opcodes(&POPCNT).rex().w(),
1519         use_popcnt,
1520     );
1521     e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT).rex(), use_popcnt);
1522     e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt);
1523 
1524     // Count leading zero bits.
1525     e.enc32_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt);
1526     e.enc64_isap(clz.bind(I64), rec_urm.opcodes(&LZCNT).rex().w(), use_lzcnt);
1527     e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT).rex(), use_lzcnt);
1528     e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt);
1529 
1530     // Count trailing zero bits.
1531     e.enc32_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1);
1532     e.enc64_isap(ctz.bind(I64), rec_urm.opcodes(&TZCNT).rex().w(), use_bmi1);
1533     e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT).rex(), use_bmi1);
1534     e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1);
1535 
1536     // Bit scan forwards and reverse
1537     e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(&BIT_SCAN_FORWARD));
1538     e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(&BIT_SCAN_REVERSE));
1539 
1540     // Comparisons
1541     e.enc_i32_i64(icmp, rec_icscc.opcodes(&CMP_REG));
1542     e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(&CMP_IMM8).rrr(7));
1543     e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(&CMP_IMM).rrr(7));
1544     e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(&CMP_REG));
1545     e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(&CMP_IMM8).rrr(7));
1546     e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(&CMP_IMM).rrr(7));
1547     // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).
1548 
1549     e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(&CMP_REG));
1550     e.enc64(ifcmp_sp.bind(I64), rec_rcmp_sp.opcodes(&CMP_REG).rex().w());
1551 
1552     // Convert flags to bool.
1553     // This encodes `b1` as an 8-bit low register with the value 0 or 1.
1554     e.enc_both(trueif, rec_seti_abcd.opcodes(&SET_BYTE_IF_OVERFLOW));
1555     e.enc_both(trueff, rec_setf_abcd.opcodes(&SET_BYTE_IF_OVERFLOW));
1556 
1557     // Conditional move (a.k.a integer select).
1558     e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW));
1559 }
1560 
1561 #[inline(never)]
1562 #[allow(clippy::cognitive_complexity)]
define_simd( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, settings: &SettingGroup, x86: &InstructionGroup, r: &RecipeGroup, )1563 fn define_simd(
1564     e: &mut PerCpuModeEncodings,
1565     shared_defs: &SharedDefinitions,
1566     settings: &SettingGroup,
1567     x86: &InstructionGroup,
1568     r: &RecipeGroup,
1569 ) {
1570     let shared = &shared_defs.instructions;
1571     let formats = &shared_defs.formats;
1572 
1573     // Shorthands for instructions.
1574     let avg_round = shared.by_name("avg_round");
1575     let bitcast = shared.by_name("bitcast");
1576     let bor = shared.by_name("bor");
1577     let bxor = shared.by_name("bxor");
1578     let copy = shared.by_name("copy");
1579     let copy_nop = shared.by_name("copy_nop");
1580     let copy_to_ssa = shared.by_name("copy_to_ssa");
1581     let fadd = shared.by_name("fadd");
1582     let fcmp = shared.by_name("fcmp");
1583     let fcvt_from_sint = shared.by_name("fcvt_from_sint");
1584     let fdiv = shared.by_name("fdiv");
1585     let fill = shared.by_name("fill");
1586     let fill_nop = shared.by_name("fill_nop");
1587     let fmax = shared.by_name("fmax");
1588     let fmin = shared.by_name("fmin");
1589     let fmul = shared.by_name("fmul");
1590     let fsub = shared.by_name("fsub");
1591     let iadd = shared.by_name("iadd");
1592     let icmp = shared.by_name("icmp");
1593     let imul = shared.by_name("imul");
1594     let ishl_imm = shared.by_name("ishl_imm");
1595     let load = shared.by_name("load");
1596     let load_complex = shared.by_name("load_complex");
1597     let raw_bitcast = shared.by_name("raw_bitcast");
1598     let regfill = shared.by_name("regfill");
1599     let regmove = shared.by_name("regmove");
1600     let regspill = shared.by_name("regspill");
1601     let sadd_sat = shared.by_name("sadd_sat");
1602     let scalar_to_vector = shared.by_name("scalar_to_vector");
1603     let sload8x8 = shared.by_name("sload8x8");
1604     let sload16x4 = shared.by_name("sload16x4");
1605     let sload32x2 = shared.by_name("sload32x2");
1606     let spill = shared.by_name("spill");
1607     let sqrt = shared.by_name("sqrt");
1608     let sshr_imm = shared.by_name("sshr_imm");
1609     let ssub_sat = shared.by_name("ssub_sat");
1610     let store = shared.by_name("store");
1611     let store_complex = shared.by_name("store_complex");
1612     let uadd_sat = shared.by_name("uadd_sat");
1613     let uload8x8 = shared.by_name("uload8x8");
1614     let uload16x4 = shared.by_name("uload16x4");
1615     let uload32x2 = shared.by_name("uload32x2");
1616     let ushr_imm = shared.by_name("ushr_imm");
1617     let usub_sat = shared.by_name("usub_sat");
1618     let vconst = shared.by_name("vconst");
1619     let x86_insertps = x86.by_name("x86_insertps");
1620     let x86_movlhps = x86.by_name("x86_movlhps");
1621     let x86_movsd = x86.by_name("x86_movsd");
1622     let x86_packss = x86.by_name("x86_packss");
1623     let x86_pextr = x86.by_name("x86_pextr");
1624     let x86_pinsr = x86.by_name("x86_pinsr");
1625     let x86_pmaxs = x86.by_name("x86_pmaxs");
1626     let x86_pmaxu = x86.by_name("x86_pmaxu");
1627     let x86_pmins = x86.by_name("x86_pmins");
1628     let x86_pminu = x86.by_name("x86_pminu");
1629     let x86_pshufb = x86.by_name("x86_pshufb");
1630     let x86_pshufd = x86.by_name("x86_pshufd");
1631     let x86_psll = x86.by_name("x86_psll");
1632     let x86_psra = x86.by_name("x86_psra");
1633     let x86_psrl = x86.by_name("x86_psrl");
1634     let x86_ptest = x86.by_name("x86_ptest");
1635     let x86_punpckh = x86.by_name("x86_punpckh");
1636     let x86_punpckl = x86.by_name("x86_punpckl");
1637 
1638     // Shorthands for recipes.
1639     let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
1640     let rec_f_ib = r.template("f_ib");
1641     let rec_fa = r.template("fa");
1642     let rec_fa_ib = r.template("fa_ib");
1643     let rec_fax = r.template("fax");
1644     let rec_fcmp = r.template("fcmp");
1645     let rec_ffillSib32 = r.template("ffillSib32");
1646     let rec_ffillnull = r.recipe("ffillnull");
1647     let rec_fld = r.template("fld");
1648     let rec_fldDisp32 = r.template("fldDisp32");
1649     let rec_fldDisp8 = r.template("fldDisp8");
1650     let rec_fldWithIndex = r.template("fldWithIndex");
1651     let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32");
1652     let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8");
1653     let rec_fregfill32 = r.template("fregfill32");
1654     let rec_fregspill32 = r.template("fregspill32");
1655     let rec_frmov = r.template("frmov");
1656     let rec_frurm = r.template("frurm");
1657     let rec_fspillSib32 = r.template("fspillSib32");
1658     let rec_fst = r.template("fst");
1659     let rec_fstDisp32 = r.template("fstDisp32");
1660     let rec_fstDisp8 = r.template("fstDisp8");
1661     let rec_fstWithIndex = r.template("fstWithIndex");
1662     let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
1663     let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
1664     let rec_furm = r.template("furm");
1665     let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa");
1666     let rec_icscc_fpr = r.template("icscc_fpr");
1667     let rec_null_fpr = r.recipe("null_fpr");
1668     let rec_pfcmp = r.template("pfcmp");
1669     let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr");
1670     let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr");
1671     let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r");
1672     let rec_stacknull = r.recipe("stacknull");
1673     let rec_vconst = r.template("vconst");
1674     let rec_vconst_optimized = r.template("vconst_optimized");
1675 
1676     // Predicates shorthands.
1677     settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic");
1678     settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic");
1679     let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd");
1680     let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
1681     let use_sse42_simd = settings.predicate_by_name("use_sse42_simd");
1682     let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd");
1683 
1684     // SIMD vector size: eventually multiple vector sizes may be supported but for now only
1685     // SSE-sized vectors are available.
1686     let sse_vector_size: u64 = 128;
1687 
1688     // SIMD splat: before x86 can use vector data, it must be moved to XMM registers; see
1689     // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the
1690     // value across the register.
1691 
1692     let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
1693 
1694     // PSHUFB, 8-bit shuffle using two XMM registers.
1695     for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1696         let instruction = x86_pshufb.bind(vector(ty, sse_vector_size));
1697         let template = rec_fa.opcodes(&PSHUFB);
1698         e.enc_both_inferred_maybe_isap(instruction.clone(), template.clone(), Some(use_ssse3_simd));
1699     }
1700 
1701     // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate.
1702     for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
1703         let instruction = x86_pshufd.bind(vector(ty, sse_vector_size));
1704         let template = rec_r_ib_unsigned_fpr.opcodes(&PSHUFD);
1705         e.enc_both_inferred(instruction, template);
1706     }
1707 
1708     // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
1709     // to the Intel manual: "When the destination operand is an XMM register, the source operand is
1710     // written to the low doubleword of the register and the register is zero-extended to 128 bits."
1711     for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1712         let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size));
1713         if ty.is_float() {
1714             // No need to move floats--they already live in XMM registers.
1715             e.enc_32_64_rec(instruction, rec_null_fpr, 0);
1716         } else {
1717             let template = rec_frurm.opcodes(&MOVD_LOAD_XMM);
1718             if ty.lane_bits() < 64 {
1719                 e.enc_both_inferred(instruction, template);
1720             } else {
1721                 // No 32-bit encodings for 64-bit widths.
1722                 assert_eq!(ty.lane_bits(), 64);
1723                 e.enc64(instruction, template.rex().w());
1724             }
1725         }
1726     }
1727 
1728     // SIMD insertlane
1729     for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1730         let (opcode, isap): (&[_], _) = match ty.lane_bits() {
1731             8 => (&PINSRB, Some(use_sse41_simd)),
1732             16 => (&PINSRW, None),
1733             32 | 64 => (&PINSR, Some(use_sse41_simd)),
1734             _ => panic!("invalid size for SIMD insertlane"),
1735         };
1736 
1737         let instruction = x86_pinsr.bind(vector(ty, sse_vector_size));
1738         let template = rec_r_ib_unsigned_r.opcodes(opcode);
1739         if ty.lane_bits() < 64 {
1740             e.enc_both_inferred_maybe_isap(instruction, template, isap);
1741         } else {
1742             // It turns out the 64-bit widths have REX/W encodings and only are available on
1743             // x86_64.
1744             e.enc64_maybe_isap(instruction, template.rex().w(), isap);
1745         }
1746     }
1747 
1748     // For legalizing insertlane with floats, INSERTPS from SSE4.1.
1749     {
1750         let instruction = x86_insertps.bind(vector(F32, sse_vector_size));
1751         let template = rec_fa_ib.opcodes(&INSERTPS);
1752         e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
1753     }
1754 
1755     // For legalizing insertlane with floats,  MOVSD from SSE2.
1756     {
1757         let instruction = x86_movsd.bind(vector(F64, sse_vector_size));
1758         let template = rec_fa.opcodes(&MOVSD_LOAD);
1759         e.enc_both_inferred(instruction, template); // from SSE2
1760     }
1761 
1762     // For legalizing insertlane with floats, MOVLHPS from SSE.
1763     {
1764         let instruction = x86_movlhps.bind(vector(F64, sse_vector_size));
1765         let template = rec_fa.opcodes(&MOVLHPS);
1766         e.enc_both_inferred(instruction, template); // from SSE
1767     }
1768 
1769     // SIMD extractlane
1770     for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1771         let opcode = match ty.lane_bits() {
1772             8 => &PEXTRB,
1773             16 => &PEXTRW,
1774             32 | 64 => &PEXTR,
1775             _ => panic!("invalid size for SIMD extractlane"),
1776         };
1777 
1778         let instruction = x86_pextr.bind(vector(ty, sse_vector_size));
1779         let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
1780         if ty.lane_bits() < 64 {
1781             e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
1782         } else {
1783             // It turns out the 64-bit widths have REX/W encodings and only are available on
1784             // x86_64.
1785             e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd));
1786         }
1787     }
1788 
1789     // SIMD packing/unpacking
1790     for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1791         let (high, low) = match ty.lane_bits() {
1792             8 => (&PUNPCKHBW, &PUNPCKLBW),
1793             16 => (&PUNPCKHWD, &PUNPCKLWD),
1794             32 => (&PUNPCKHDQ, &PUNPCKLDQ),
1795             64 => (&PUNPCKHQDQ, &PUNPCKLQDQ),
1796             _ => panic!("invalid size for SIMD packing/unpacking"),
1797         };
1798 
1799         e.enc_both_inferred(
1800             x86_punpckh.bind(vector(ty, sse_vector_size)),
1801             rec_fa.opcodes(high),
1802         );
1803         e.enc_both_inferred(
1804             x86_punpckl.bind(vector(ty, sse_vector_size)),
1805             rec_fa.opcodes(low),
1806         );
1807     }
1808     for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
1809         let x86_packss = x86_packss.bind(vector(*ty, sse_vector_size));
1810         e.enc_both_inferred(x86_packss, rec_fa.opcodes(*opcodes));
1811     }
1812 
1813     // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
1814     for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
1815         for to_type in
1816             ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type)
1817         {
1818             let instruction = raw_bitcast
1819                 .bind(vector(to_type, sse_vector_size))
1820                 .bind(vector(from_type, sse_vector_size));
1821             e.enc_32_64_rec(instruction, rec_null_fpr, 0);
1822         }
1823     }
1824 
1825     // SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an
1826     // XMM register.
1827     for float_type in &[F32, F64] {
1828         for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) {
1829             e.enc_32_64_rec(
1830                 raw_bitcast
1831                     .bind(vector(lane_type, sse_vector_size))
1832                     .bind(*float_type),
1833                 rec_null_fpr,
1834                 0,
1835             );
1836             e.enc_32_64_rec(
1837                 raw_bitcast
1838                     .bind(*float_type)
1839                     .bind(vector(lane_type, sse_vector_size)),
1840                 rec_null_fpr,
1841                 0,
1842             );
1843         }
1844     }
1845 
1846     // SIMD conversions
1847     {
1848         let fcvt_from_sint_32 = fcvt_from_sint
1849             .bind(vector(F32, sse_vector_size))
1850             .bind(vector(I32, sse_vector_size));
1851         e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS));
1852     }
1853 
1854     // SIMD vconst for special cases (all zeroes, all ones)
1855     // this must be encoded prior to the MOVUPS implementation (below) so the compiler sees this
1856     // encoding first
1857     for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1858         let instruction = vconst.bind(vector(ty, sse_vector_size));
1859 
1860         let is_zero_128bit =
1861             InstructionPredicate::new_is_all_zeroes(&*formats.unary_const, "constant_handle");
1862         let template = rec_vconst_optimized.opcodes(&PXOR).infer_rex();
1863         e.enc_32_64_func(instruction.clone(), template, |builder| {
1864             builder.inst_predicate(is_zero_128bit)
1865         });
1866 
1867         let is_ones_128bit =
1868             InstructionPredicate::new_is_all_ones(&*formats.unary_const, "constant_handle");
1869         let template = rec_vconst_optimized.opcodes(&PCMPEQB).infer_rex();
1870         e.enc_32_64_func(instruction, template, |builder| {
1871             builder.inst_predicate(is_ones_128bit)
1872         });
1873     }
1874 
1875     // SIMD vconst using MOVUPS
1876     // TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have
1877     // to guarantee that the constants are aligned when emitted and there is currently no mechanism
1878     // for that; alternately, constants could be loaded into XMM registers using a sequence like:
1879     // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored
1880     // in memory) but some performance measurements are needed.
1881     for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1882         let instruction = vconst.bind(vector(ty, sse_vector_size));
1883         let template = rec_vconst.opcodes(&MOVUPS_LOAD);
1884         e.enc_both_inferred(instruction, template); // from SSE
1885     }
1886 
1887     // SIMD register movement: store, load, spill, fill, regmove, etc. All of these use encodings of
1888     // MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have
1889     // alignment or type-specific encodings, see https://github.com/bytecodealliance/wasmtime/issues/1124).
1890     // Also, it would be ideal to infer REX prefixes for all of these instructions but for the
1891     // time being only instructions with common recipes have `infer_rex()` support.
1892     for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1893         // Store
1894         let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any);
1895         e.enc_both_inferred(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE));
1896         e.enc_both_inferred(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE));
1897         e.enc_both_inferred(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE));
1898 
1899         // Store complex
1900         let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size));
1901         e.enc_both(
1902             bound_store_complex.clone(),
1903             rec_fstWithIndex.opcodes(&MOVUPS_STORE),
1904         );
1905         e.enc_both(
1906             bound_store_complex.clone(),
1907             rec_fstWithIndexDisp8.opcodes(&MOVUPS_STORE),
1908         );
1909         e.enc_both(
1910             bound_store_complex,
1911             rec_fstWithIndexDisp32.opcodes(&MOVUPS_STORE),
1912         );
1913 
1914         // Load
1915         let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any);
1916         e.enc_both_inferred(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD));
1917         e.enc_both_inferred(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD));
1918         e.enc_both_inferred(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD));
1919 
1920         // Load complex
1921         let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size));
1922         e.enc_both(
1923             bound_load_complex.clone(),
1924             rec_fldWithIndex.opcodes(&MOVUPS_LOAD),
1925         );
1926         e.enc_both(
1927             bound_load_complex.clone(),
1928             rec_fldWithIndexDisp8.opcodes(&MOVUPS_LOAD),
1929         );
1930         e.enc_both(
1931             bound_load_complex,
1932             rec_fldWithIndexDisp32.opcodes(&MOVUPS_LOAD),
1933         );
1934 
1935         // Spill
1936         let bound_spill = spill.bind(vector(ty, sse_vector_size));
1937         e.enc_both(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE));
1938         let bound_regspill = regspill.bind(vector(ty, sse_vector_size));
1939         e.enc_both(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE));
1940 
1941         // Fill
1942         let bound_fill = fill.bind(vector(ty, sse_vector_size));
1943         e.enc_both(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD));
1944         let bound_regfill = regfill.bind(vector(ty, sse_vector_size));
1945         e.enc_both(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD));
1946         let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size));
1947         e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0);
1948 
1949         // Regmove
1950         let bound_regmove = regmove.bind(vector(ty, sse_vector_size));
1951         e.enc_both(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD));
1952 
1953         // Copy
1954         let bound_copy = copy.bind(vector(ty, sse_vector_size));
1955         e.enc_both(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD));
1956         let bound_copy_to_ssa = copy_to_ssa.bind(vector(ty, sse_vector_size));
1957         e.enc_both(bound_copy_to_ssa, rec_furm_reg_to_ssa.opcodes(&MOVAPS_LOAD));
1958         let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size));
1959         e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0);
1960     }
1961 
1962     // SIMD load extend
1963     for (inst, opcodes) in &[
1964         (uload8x8, &PMOVZXBW),
1965         (uload16x4, &PMOVZXWD),
1966         (uload32x2, &PMOVZXDQ),
1967         (sload8x8, &PMOVSXBW),
1968         (sload16x4, &PMOVSXWD),
1969         (sload32x2, &PMOVSXDQ),
1970     ] {
1971         let isap = Some(use_sse41_simd);
1972         for recipe in &[rec_fld, rec_fldDisp8, rec_fldDisp32] {
1973             let inst = *inst;
1974             let template = recipe.opcodes(*opcodes);
1975             e.enc_both_inferred_maybe_isap(inst.clone().bind(I32), template.clone(), isap);
1976             e.enc64_maybe_isap(inst.bind(I64), template.infer_rex(), isap);
1977         }
1978     }
1979 
1980     // SIMD integer addition
1981     for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
1982         let iadd = iadd.bind(vector(*ty, sse_vector_size));
1983         e.enc_both_inferred(iadd, rec_fa.opcodes(*opcodes));
1984     }
1985 
1986     // SIMD integer saturating addition
1987     e.enc_both_inferred(
1988         sadd_sat.bind(vector(I8, sse_vector_size)),
1989         rec_fa.opcodes(&PADDSB),
1990     );
1991     e.enc_both_inferred(
1992         sadd_sat.bind(vector(I16, sse_vector_size)),
1993         rec_fa.opcodes(&PADDSW),
1994     );
1995     e.enc_both_inferred(
1996         uadd_sat.bind(vector(I8, sse_vector_size)),
1997         rec_fa.opcodes(&PADDUSB),
1998     );
1999     e.enc_both_inferred(
2000         uadd_sat.bind(vector(I16, sse_vector_size)),
2001         rec_fa.opcodes(&PADDUSW),
2002     );
2003 
2004     // SIMD integer subtraction
2005     let isub = shared.by_name("isub");
2006     for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] {
2007         let isub = isub.bind(vector(*ty, sse_vector_size));
2008         e.enc_both_inferred(isub, rec_fa.opcodes(*opcodes));
2009     }
2010 
2011     // SIMD integer saturating subtraction
2012     e.enc_both_inferred(
2013         ssub_sat.bind(vector(I8, sse_vector_size)),
2014         rec_fa.opcodes(&PSUBSB),
2015     );
2016     e.enc_both_inferred(
2017         ssub_sat.bind(vector(I16, sse_vector_size)),
2018         rec_fa.opcodes(&PSUBSW),
2019     );
2020     e.enc_both_inferred(
2021         usub_sat.bind(vector(I8, sse_vector_size)),
2022         rec_fa.opcodes(&PSUBUSB),
2023     );
2024     e.enc_both_inferred(
2025         usub_sat.bind(vector(I16, sse_vector_size)),
2026         rec_fa.opcodes(&PSUBUSW),
2027     );
2028 
2029     // SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16
2030     // and I64x2 and these are (at the time of writing) not necessary for WASM SIMD.
2031     for (ty, opcodes, isap) in &[
2032         (I16, &PMULLW[..], None),
2033         (I32, &PMULLD[..], Some(use_sse41_simd)),
2034     ] {
2035         let imul = imul.bind(vector(*ty, sse_vector_size));
2036         e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
2037     }
2038 
2039     // SIMD integer multiplication for I64x2 using a AVX512.
2040     {
2041         let imul = imul.bind(vector(I64, sse_vector_size));
2042         e.enc_32_64_maybe_isap(
2043             imul,
2044             rec_evex_reg_vvvv_rm_128.opcodes(&PMULLQ).w(),
2045             Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL
2046         );
2047     }
2048 
2049     // SIMD integer average with rounding.
2050     for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] {
2051         let avgr = avg_round.bind(vector(*ty, sse_vector_size));
2052         e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes));
2053     }
2054 
2055     // SIMD logical operations
2056     let band = shared.by_name("band");
2057     let band_not = shared.by_name("band_not");
2058     for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
2059         // and
2060         let band = band.bind(vector(ty, sse_vector_size));
2061         e.enc_both_inferred(band, rec_fa.opcodes(&PAND));
2062 
2063         // and not (note flipped recipe operands to match band_not order)
2064         let band_not = band_not.bind(vector(ty, sse_vector_size));
2065         e.enc_both_inferred(band_not, rec_fax.opcodes(&PANDN));
2066 
2067         // or
2068         let bor = bor.bind(vector(ty, sse_vector_size));
2069         e.enc_both_inferred(bor, rec_fa.opcodes(&POR));
2070 
2071         // xor
2072         let bxor = bxor.bind(vector(ty, sse_vector_size));
2073         e.enc_both_inferred(bxor, rec_fa.opcodes(&PXOR));
2074 
2075         // ptest
2076         let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size));
2077         e.enc_both_inferred_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd));
2078     }
2079 
2080     // SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement
2081     // allows SIMD shifts to be legalized more easily. TODO ideally this would be typed as an
2082     // I128x1 but restrictions on the type builder prevent this; the general idea here is that
2083     // the upper bits are all zeroed and do not form parts of any separate lane. See
2084     // https://github.com/bytecodealliance/wasmtime/issues/1140.
2085     e.enc_both_inferred(
2086         bitcast.bind(vector(I64, sse_vector_size)).bind(I32),
2087         rec_frurm.opcodes(&MOVD_LOAD_XMM),
2088     );
2089     e.enc64(
2090         bitcast.bind(vector(I64, sse_vector_size)).bind(I64),
2091         rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(),
2092     );
2093 
2094     // SIMD shift left
2095     for (ty, opcodes) in &[(I16, &PSLLW), (I32, &PSLLD), (I64, &PSLLQ)] {
2096         let x86_psll = x86_psll.bind(vector(*ty, sse_vector_size));
2097         e.enc_both_inferred(x86_psll, rec_fa.opcodes(*opcodes));
2098     }
2099 
2100     // SIMD shift right (logical)
2101     for (ty, opcodes) in &[(I16, &PSRLW), (I32, &PSRLD), (I64, &PSRLQ)] {
2102         let x86_psrl = x86_psrl.bind(vector(*ty, sse_vector_size));
2103         e.enc_both_inferred(x86_psrl, rec_fa.opcodes(*opcodes));
2104     }
2105 
2106     // SIMD shift right (arithmetic)
2107     for (ty, opcodes) in &[(I16, &PSRAW), (I32, &PSRAD)] {
2108         let x86_psra = x86_psra.bind(vector(*ty, sse_vector_size));
2109         e.enc_both_inferred(x86_psra, rec_fa.opcodes(*opcodes));
2110     }
2111 
2112     // SIMD immediate shift
2113     for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] {
2114         let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size));
2115         e.enc_both_inferred(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6));
2116 
2117         let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
2118         e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));
2119 
2120         let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
2121         e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
2122     }
2123 
2124     // SIMD integer comparisons
2125     {
2126         use IntCC::*;
2127         for (ty, cc, opcodes, isa_predicate) in &[
2128             (I8, Equal, &PCMPEQB[..], None),
2129             (I16, Equal, &PCMPEQW[..], None),
2130             (I32, Equal, &PCMPEQD[..], None),
2131             (I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)),
2132             (I8, SignedGreaterThan, &PCMPGTB[..], None),
2133             (I16, SignedGreaterThan, &PCMPGTW[..], None),
2134             (I32, SignedGreaterThan, &PCMPGTD[..], None),
2135             (I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)),
2136         ] {
2137             let instruction = icmp
2138                 .bind(Immediate::IntCC(*cc))
2139                 .bind(vector(*ty, sse_vector_size));
2140             let template = rec_icscc_fpr.opcodes(opcodes);
2141             e.enc_both_inferred_maybe_isap(instruction, template, *isa_predicate);
2142         }
2143     }
2144 
2145     // SIMD min/max
2146     for (ty, inst, opcodes, isa_predicate) in &[
2147         (I8, x86_pmaxs, &PMAXSB[..], Some(use_sse41_simd)),
2148         (I16, x86_pmaxs, &PMAXSW[..], None),
2149         (I32, x86_pmaxs, &PMAXSD[..], Some(use_sse41_simd)),
2150         (I8, x86_pmaxu, &PMAXUB[..], None),
2151         (I16, x86_pmaxu, &PMAXUW[..], Some(use_sse41_simd)),
2152         (I32, x86_pmaxu, &PMAXUD[..], Some(use_sse41_simd)),
2153         (I8, x86_pmins, &PMINSB[..], Some(use_sse41_simd)),
2154         (I16, x86_pmins, &PMINSW[..], None),
2155         (I32, x86_pmins, &PMINSD[..], Some(use_sse41_simd)),
2156         (I8, x86_pminu, &PMINUB[..], None),
2157         (I16, x86_pminu, &PMINUW[..], Some(use_sse41_simd)),
2158         (I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)),
2159     ] {
2160         let inst = inst.bind(vector(*ty, sse_vector_size));
2161         e.enc_both_inferred_maybe_isap(inst, rec_fa.opcodes(opcodes), *isa_predicate);
2162     }
2163 
2164     // SIMD float comparisons
2165     e.enc_both_inferred(
2166         fcmp.bind(vector(F32, sse_vector_size)),
2167         rec_pfcmp.opcodes(&CMPPS),
2168     );
2169     e.enc_both_inferred(
2170         fcmp.bind(vector(F64, sse_vector_size)),
2171         rec_pfcmp.opcodes(&CMPPD),
2172     );
2173 
2174     // SIMD float arithmetic
2175     for (ty, inst, opcodes) in &[
2176         (F32, fadd, &ADDPS[..]),
2177         (F64, fadd, &ADDPD[..]),
2178         (F32, fsub, &SUBPS[..]),
2179         (F64, fsub, &SUBPD[..]),
2180         (F32, fmul, &MULPS[..]),
2181         (F64, fmul, &MULPD[..]),
2182         (F32, fdiv, &DIVPS[..]),
2183         (F64, fdiv, &DIVPD[..]),
2184         (F32, fmin, &MINPS[..]),
2185         (F64, fmin, &MINPD[..]),
2186         (F32, fmax, &MAXPS[..]),
2187         (F64, fmax, &MAXPD[..]),
2188     ] {
2189         let inst = inst.bind(vector(*ty, sse_vector_size));
2190         e.enc_both_inferred(inst, rec_fa.opcodes(opcodes));
2191     }
2192     for (ty, inst, opcodes) in &[(F32, sqrt, &SQRTPS[..]), (F64, sqrt, &SQRTPD[..])] {
2193         let inst = inst.bind(vector(*ty, sse_vector_size));
2194         e.enc_both_inferred(inst, rec_furm.opcodes(opcodes));
2195     }
2196 }
2197 
2198 #[inline(never)]
define_entity_ref( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, settings: &SettingGroup, r: &RecipeGroup, )2199 fn define_entity_ref(
2200     e: &mut PerCpuModeEncodings,
2201     shared_defs: &SharedDefinitions,
2202     settings: &SettingGroup,
2203     r: &RecipeGroup,
2204 ) {
2205     let shared = &shared_defs.instructions;
2206     let formats = &shared_defs.formats;
2207 
2208     // Shorthands for instructions.
2209     let const_addr = shared.by_name("const_addr");
2210     let func_addr = shared.by_name("func_addr");
2211     let stack_addr = shared.by_name("stack_addr");
2212     let symbol_value = shared.by_name("symbol_value");
2213 
2214     // Shorthands for recipes.
2215     let rec_allones_fnaddr4 = r.template("allones_fnaddr4");
2216     let rec_allones_fnaddr8 = r.template("allones_fnaddr8");
2217     let rec_fnaddr4 = r.template("fnaddr4");
2218     let rec_fnaddr8 = r.template("fnaddr8");
2219     let rec_const_addr = r.template("const_addr");
2220     let rec_got_fnaddr8 = r.template("got_fnaddr8");
2221     let rec_got_gvaddr8 = r.template("got_gvaddr8");
2222     let rec_gvaddr4 = r.template("gvaddr4");
2223     let rec_gvaddr8 = r.template("gvaddr8");
2224     let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8");
2225     let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8");
2226     let rec_spaddr4_id = r.template("spaddr4_id");
2227     let rec_spaddr8_id = r.template("spaddr8_id");
2228 
2229     // Predicates shorthands.
2230     let all_ones_funcaddrs_and_not_is_pic =
2231         settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic");
2232     let is_pic = settings.predicate_by_name("is_pic");
2233     let not_all_ones_funcaddrs_and_not_is_pic =
2234         settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic");
2235     let not_is_pic = settings.predicate_by_name("not_is_pic");
2236 
2237     // Function addresses.
2238 
2239     // Non-PIC, all-ones funcaddresses.
2240     e.enc32_isap(
2241         func_addr.bind(I32),
2242         rec_fnaddr4.opcodes(&MOV_IMM),
2243         not_all_ones_funcaddrs_and_not_is_pic,
2244     );
2245     e.enc64_isap(
2246         func_addr.bind(I64),
2247         rec_fnaddr8.opcodes(&MOV_IMM).rex().w(),
2248         not_all_ones_funcaddrs_and_not_is_pic,
2249     );
2250 
2251     // Non-PIC, all-zeros funcaddresses.
2252     e.enc32_isap(
2253         func_addr.bind(I32),
2254         rec_allones_fnaddr4.opcodes(&MOV_IMM),
2255         all_ones_funcaddrs_and_not_is_pic,
2256     );
2257     e.enc64_isap(
2258         func_addr.bind(I64),
2259         rec_allones_fnaddr8.opcodes(&MOV_IMM).rex().w(),
2260         all_ones_funcaddrs_and_not_is_pic,
2261     );
2262 
2263     // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field.
2264     let is_colocated_func =
2265         InstructionPredicate::new_is_colocated_func(&*formats.func_addr, "func_ref");
2266     e.enc64_instp(
2267         func_addr.bind(I64),
2268         rec_pcrel_fnaddr8.opcodes(&LEA).rex().w(),
2269         is_colocated_func,
2270     );
2271 
2272     // 64-bit, non-colocated, PIC.
2273     e.enc64_isap(
2274         func_addr.bind(I64),
2275         rec_got_fnaddr8.opcodes(&MOV_LOAD).rex().w(),
2276         is_pic,
2277     );
2278 
2279     // Global addresses.
2280 
2281     // Non-PIC.
2282     e.enc32_isap(
2283         symbol_value.bind(I32),
2284         rec_gvaddr4.opcodes(&MOV_IMM),
2285         not_is_pic,
2286     );
2287     e.enc64_isap(
2288         symbol_value.bind(I64),
2289         rec_gvaddr8.opcodes(&MOV_IMM).rex().w(),
2290         not_is_pic,
2291     );
2292 
2293     // PIC, colocated.
2294     e.enc64_func(
2295         symbol_value.bind(I64),
2296         rec_pcrel_gvaddr8.opcodes(&LEA).rex().w(),
2297         |encoding| {
2298             encoding
2299                 .isa_predicate(is_pic)
2300                 .inst_predicate(InstructionPredicate::new_is_colocated_data(formats))
2301         },
2302     );
2303 
2304     // PIC, non-colocated.
2305     e.enc64_isap(
2306         symbol_value.bind(I64),
2307         rec_got_gvaddr8.opcodes(&MOV_LOAD).rex().w(),
2308         is_pic,
2309     );
2310 
2311     // Stack addresses.
2312     //
2313     // TODO: Add encoding rules for stack_load and stack_store, so that they
2314     // don't get legalized to stack_addr + load/store.
2315     e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(&LEA));
2316     e.enc64(stack_addr.bind(I64), rec_spaddr8_id.opcodes(&LEA).rex().w());
2317 
2318     // Constant addresses (PIC).
2319     e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w());
2320     e.enc32(const_addr.bind(I32), rec_const_addr.opcodes(&LEA));
2321 }
2322 
2323 /// Control flow opcodes.
2324 #[inline(never)]
define_control_flow( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, settings: &SettingGroup, r: &RecipeGroup, )2325 fn define_control_flow(
2326     e: &mut PerCpuModeEncodings,
2327     shared_defs: &SharedDefinitions,
2328     settings: &SettingGroup,
2329     r: &RecipeGroup,
2330 ) {
2331     let shared = &shared_defs.instructions;
2332     let formats = &shared_defs.formats;
2333 
2334     // Shorthands for instructions.
2335     let brff = shared.by_name("brff");
2336     let brif = shared.by_name("brif");
2337     let brnz = shared.by_name("brnz");
2338     let brz = shared.by_name("brz");
2339     let call = shared.by_name("call");
2340     let call_indirect = shared.by_name("call_indirect");
2341     let debugtrap = shared.by_name("debugtrap");
2342     let indirect_jump_table_br = shared.by_name("indirect_jump_table_br");
2343     let jump = shared.by_name("jump");
2344     let jump_table_base = shared.by_name("jump_table_base");
2345     let jump_table_entry = shared.by_name("jump_table_entry");
2346     let return_ = shared.by_name("return");
2347     let trap = shared.by_name("trap");
2348     let trapff = shared.by_name("trapff");
2349     let trapif = shared.by_name("trapif");
2350     let resumable_trap = shared.by_name("resumable_trap");
2351 
2352     // Shorthands for recipes.
2353     let rec_brfb = r.template("brfb");
2354     let rec_brfd = r.template("brfd");
2355     let rec_brib = r.template("brib");
2356     let rec_brid = r.template("brid");
2357     let rec_call_id = r.template("call_id");
2358     let rec_call_plt_id = r.template("call_plt_id");
2359     let rec_call_r = r.template("call_r");
2360     let rec_debugtrap = r.recipe("debugtrap");
2361     let rec_indirect_jmp = r.template("indirect_jmp");
2362     let rec_jmpb = r.template("jmpb");
2363     let rec_jmpd = r.template("jmpd");
2364     let rec_jt_base = r.template("jt_base");
2365     let rec_jt_entry = r.template("jt_entry");
2366     let rec_ret = r.template("ret");
2367     let rec_t8jccb_abcd = r.template("t8jccb_abcd");
2368     let rec_t8jccd_abcd = r.template("t8jccd_abcd");
2369     let rec_t8jccd_long = r.template("t8jccd_long");
2370     let rec_tjccb = r.template("tjccb");
2371     let rec_tjccd = r.template("tjccd");
2372     let rec_trap = r.template("trap");
2373     let rec_trapif = r.recipe("trapif");
2374     let rec_trapff = r.recipe("trapff");
2375 
2376     // Predicates shorthands.
2377     let is_pic = settings.predicate_by_name("is_pic");
2378 
2379     // Call/return
2380 
2381     // 32-bit, both PIC and non-PIC.
2382     e.enc32(call, rec_call_id.opcodes(&CALL_RELATIVE));
2383 
2384     // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field.
2385     let is_colocated_func = InstructionPredicate::new_is_colocated_func(&*formats.call, "func_ref");
2386     e.enc64_instp(call, rec_call_id.opcodes(&CALL_RELATIVE), is_colocated_func);
2387 
2388     // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC
2389     // is currently using the large model, which requires calls be lowered to
2390     // func_addr+call_indirect.
2391     e.enc64_isap(call, rec_call_plt_id.opcodes(&CALL_RELATIVE), is_pic);
2392 
2393     e.enc32(
2394         call_indirect.bind(I32),
2395         rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2),
2396     );
2397     e.enc64(
2398         call_indirect.bind(I64),
2399         rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2).rex(),
2400     );
2401     e.enc64(
2402         call_indirect.bind(I64),
2403         rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2),
2404     );
2405 
2406     e.enc32(return_, rec_ret.opcodes(&RET_NEAR));
2407     e.enc64(return_, rec_ret.opcodes(&RET_NEAR));
2408 
2409     // Branches.
2410     e.enc32(jump, rec_jmpb.opcodes(&JUMP_SHORT));
2411     e.enc64(jump, rec_jmpb.opcodes(&JUMP_SHORT));
2412     e.enc32(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE));
2413     e.enc64(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE));
2414 
2415     e.enc_both(brif, rec_brib.opcodes(&JUMP_SHORT_IF_OVERFLOW));
2416     e.enc_both(brif, rec_brid.opcodes(&JUMP_NEAR_IF_OVERFLOW));
2417 
2418     // Not all float condition codes are legal, see `supported_floatccs`.
2419     e.enc_both(brff, rec_brfb.opcodes(&JUMP_SHORT_IF_OVERFLOW));
2420     e.enc_both(brff, rec_brfd.opcodes(&JUMP_NEAR_IF_OVERFLOW));
2421 
2422     // Note that the tjccd opcode will be prefixed with 0x0f.
2423     e.enc_i32_i64_explicit_rex(brz, rec_tjccb.opcodes(&JUMP_SHORT_IF_EQUAL));
2424     e.enc_i32_i64_explicit_rex(brz, rec_tjccd.opcodes(&TEST_BYTE_REG));
2425     e.enc_i32_i64_explicit_rex(brnz, rec_tjccb.opcodes(&JUMP_SHORT_IF_NOT_EQUAL));
2426     e.enc_i32_i64_explicit_rex(brnz, rec_tjccd.opcodes(&TEST_REG));
2427 
2428     // Branch on a b1 value in a register only looks at the low 8 bits. See also
2429     // bint encodings below.
2430     //
2431     // Start with the worst-case encoding for X86_32 only. The register allocator
2432     // can't handle a branch with an ABCD-constrained operand.
2433     e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(&TEST_BYTE_REG));
2434     e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(&TEST_REG));
2435 
2436     e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_EQUAL));
2437     e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_BYTE_REG));
2438     e.enc_both(
2439         brnz.bind(B1),
2440         rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_NOT_EQUAL),
2441     );
2442     e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_REG));
2443 
2444     // Jump tables.
2445     e.enc64(
2446         jump_table_entry.bind(I64),
2447         rec_jt_entry.opcodes(&MOVSXD).rex().w(),
2448     );
2449     e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(&MOV_LOAD));
2450 
2451     e.enc64(
2452         jump_table_base.bind(I64),
2453         rec_jt_base.opcodes(&LEA).rex().w(),
2454     );
2455     e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(&LEA));
2456 
2457     e.enc_x86_64(
2458         indirect_jump_table_br.bind(I64),
2459         rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4),
2460     );
2461     e.enc32(
2462         indirect_jump_table_br.bind(I32),
2463         rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4),
2464     );
2465 
2466     // Trap as ud2
2467     e.enc32(trap, rec_trap.opcodes(&UNDEFINED2));
2468     e.enc64(trap, rec_trap.opcodes(&UNDEFINED2));
2469     e.enc32(resumable_trap, rec_trap.opcodes(&UNDEFINED2));
2470     e.enc64(resumable_trap, rec_trap.opcodes(&UNDEFINED2));
2471 
2472     // Debug trap as int3
2473     e.enc32_rec(debugtrap, rec_debugtrap, 0);
2474     e.enc64_rec(debugtrap, rec_debugtrap, 0);
2475 
2476     e.enc32_rec(trapif, rec_trapif, 0);
2477     e.enc64_rec(trapif, rec_trapif, 0);
2478     e.enc32_rec(trapff, rec_trapff, 0);
2479     e.enc64_rec(trapff, rec_trapff, 0);
2480 }
2481 
2482 /// Reference type instructions.
2483 #[inline(never)]
define_reftypes(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup)2484 fn define_reftypes(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
2485     let shared = &shared_defs.instructions;
2486 
2487     let is_null = shared.by_name("is_null");
2488     let is_invalid = shared.by_name("is_invalid");
2489     let null = shared.by_name("null");
2490     let safepoint = shared.by_name("safepoint");
2491 
2492     let rec_is_zero = r.template("is_zero");
2493     let rec_is_invalid = r.template("is_invalid");
2494     let rec_pu_id_ref = r.template("pu_id_ref");
2495     let rec_safepoint = r.recipe("safepoint");
2496 
2497     // Null references implemented as iconst 0.
2498     e.enc32(null.bind(R32), rec_pu_id_ref.opcodes(&MOV_IMM));
2499 
2500     e.enc64(null.bind(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM));
2501     e.enc64(null.bind(R64), rec_pu_id_ref.opcodes(&MOV_IMM));
2502 
2503     // is_null, implemented by testing whether the value is 0.
2504     e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(&TEST_REG));
2505 
2506     // is_invalid, implemented by testing whether the value is -1.
2507     e.enc_r32_r64_rex_only(is_invalid, rec_is_invalid.opcodes(&CMP_IMM8).rrr(7));
2508 
2509     // safepoint instruction calls sink, no actual encoding.
2510     e.enc32_rec(safepoint, rec_safepoint, 0);
2511     e.enc64_rec(safepoint, rec_safepoint, 0);
2512 }
2513 
2514 #[allow(clippy::cognitive_complexity)]
define( shared_defs: &SharedDefinitions, settings: &SettingGroup, x86: &InstructionGroup, r: &RecipeGroup, ) -> PerCpuModeEncodings2515 pub(crate) fn define(
2516     shared_defs: &SharedDefinitions,
2517     settings: &SettingGroup,
2518     x86: &InstructionGroup,
2519     r: &RecipeGroup,
2520 ) -> PerCpuModeEncodings {
2521     // Definitions.
2522     let mut e = PerCpuModeEncodings::new();
2523 
2524     define_moves(&mut e, shared_defs, r);
2525     define_memory(&mut e, shared_defs, x86, r);
2526     define_fpu_moves(&mut e, shared_defs, r);
2527     define_fpu_memory(&mut e, shared_defs, r);
2528     define_fpu_ops(&mut e, shared_defs, settings, x86, r);
2529     define_alu(&mut e, shared_defs, settings, x86, r);
2530     define_simd(&mut e, shared_defs, settings, x86, r);
2531     define_entity_ref(&mut e, shared_defs, settings, r);
2532     define_control_flow(&mut e, shared_defs, settings, r);
2533     define_reftypes(&mut e, shared_defs, r);
2534 
2535     let x86_elf_tls_get_addr = x86.by_name("x86_elf_tls_get_addr");
2536     let x86_macho_tls_get_addr = x86.by_name("x86_macho_tls_get_addr");
2537 
2538     let rec_elf_tls_get_addr = r.recipe("elf_tls_get_addr");
2539     let rec_macho_tls_get_addr = r.recipe("macho_tls_get_addr");
2540 
2541     e.enc64_rec(x86_elf_tls_get_addr, rec_elf_tls_get_addr, 0);
2542     e.enc64_rec(x86_macho_tls_get_addr, rec_macho_tls_get_addr, 0);
2543 
2544     e
2545 }
2546