1 //! Encoding recipes for x86/x86_64.
2 use std::rc::Rc;
3 
4 use cranelift_codegen_shared::isa::x86::EncodingBits;
5 
6 use crate::cdsl::ast::Literal;
7 use crate::cdsl::formats::InstructionFormat;
8 use crate::cdsl::instructions::InstructionPredicate;
9 use crate::cdsl::recipes::{
10     EncodingRecipe, EncodingRecipeBuilder, OperandConstraint, Register, Stack,
11 };
12 use crate::cdsl::regs::IsaRegs;
13 use crate::cdsl::settings::SettingGroup;
14 use crate::shared::Definitions as SharedDefinitions;
15 
16 use crate::isa::x86::opcodes;
17 
18 /// Helper data structure to create recipes and template recipes.
19 /// It contains all the recipes and recipe templates that might be used in the encodings crate of
20 /// this same directory.
21 pub(crate) struct RecipeGroup<'builder> {
22     /// Memoized registers description, to pass it to builders later.
23     regs: &'builder IsaRegs,
24 
25     /// All the recipes explicitly created in this file. This is different from the final set of
26     /// recipes, which is definitive only once encodings have generated new recipes on the fly.
27     recipes: Vec<EncodingRecipe>,
28 
29     /// All the recipe templates created in this file.
30     templates: Vec<Rc<Template<'builder>>>,
31 }
32 
33 impl<'builder> RecipeGroup<'builder> {
new(regs: &'builder IsaRegs) -> Self34     fn new(regs: &'builder IsaRegs) -> Self {
35         Self {
36             regs,
37             recipes: Vec::new(),
38             templates: Vec::new(),
39         }
40     }
add_recipe(&mut self, recipe: EncodingRecipeBuilder)41     fn add_recipe(&mut self, recipe: EncodingRecipeBuilder) {
42         self.recipes.push(recipe.build());
43     }
add_template_recipe(&mut self, recipe: EncodingRecipeBuilder) -> Rc<Template<'builder>>44     fn add_template_recipe(&mut self, recipe: EncodingRecipeBuilder) -> Rc<Template<'builder>> {
45         let template = Rc::new(Template::new(recipe, self.regs));
46         self.templates.push(template.clone());
47         template
48     }
add_template_inferred( &mut self, recipe: EncodingRecipeBuilder, infer_function: &'static str, ) -> Rc<Template<'builder>>49     fn add_template_inferred(
50         &mut self,
51         recipe: EncodingRecipeBuilder,
52         infer_function: &'static str,
53     ) -> Rc<Template<'builder>> {
54         let template =
55             Rc::new(Template::new(recipe, self.regs).inferred_rex_compute_size(infer_function));
56         self.templates.push(template.clone());
57         template
58     }
add_template(&mut self, template: Template<'builder>) -> Rc<Template<'builder>>59     fn add_template(&mut self, template: Template<'builder>) -> Rc<Template<'builder>> {
60         let template = Rc::new(template);
61         self.templates.push(template.clone());
62         template
63     }
recipe(&self, name: &str) -> &EncodingRecipe64     pub fn recipe(&self, name: &str) -> &EncodingRecipe {
65         self.recipes
66             .iter()
67             .find(|recipe| recipe.name == name)
68             .unwrap_or_else(|| panic!("unknown recipe name: {}. Try template?", name))
69     }
template(&self, name: &str) -> &Template70     pub fn template(&self, name: &str) -> &Template {
71         self.templates
72             .iter()
73             .find(|recipe| recipe.name() == name)
74             .unwrap_or_else(|| panic!("unknown template name: {}. Try recipe?", name))
75     }
76 }
77 
78 // Opcode representation.
79 //
80 // Cranelift requires each recipe to have a single encoding size in bytes, and x86 opcodes are
81 // variable length, so we use separate recipes for different styles of opcodes and prefixes. The
82 // opcode format is indicated by the recipe name prefix.
83 //
84 // The match case below does not include the REX prefix which goes after the mandatory prefix.
85 // VEX/XOP and EVEX prefixes are not yet supported. Encodings using any of these prefixes are
86 // represented by separate recipes.
87 //
88 // The encoding bits are:
89 //
90 // 0-7:   The opcode byte <op>.
91 // 8-9:   pp, mandatory prefix:
92 //        00 none (Op*)
93 //        01 66   (Mp*)
94 //        10 F3   (Mp*)
95 //        11 F2   (Mp*)
96 // 10-11: mm, opcode map:
97 //        00 <op>        (Op1/Mp1)
98 //        01 0F <op>     (Op2/Mp2)
99 //        10 0F 38 <op>  (Op3/Mp3)
100 //        11 0F 3A <op>  (Op3/Mp3)
101 // 12-14  rrr, opcode bits for the ModR/M byte for certain opcodes.
102 // 15:    REX.W bit (or VEX.W/E)
103 //
104 // There is some redundancy between bits 8-11 and the recipe names, but we have enough bits, and
105 // the pp+mm format is ready for supporting VEX prefixes.
106 //
107 // TODO Cranelift doesn't actually require recipe to have different encoding sizes anymore, so this
108 // could be simplified.
109 
110 /// Given a sequence of opcode bytes, compute the recipe name prefix and encoding bits.
decode_opcodes(op_bytes: &[u8], rrr: u16, w: u16) -> (&'static str, u16)111 fn decode_opcodes(op_bytes: &[u8], rrr: u16, w: u16) -> (&'static str, u16) {
112     let enc = EncodingBits::new(op_bytes, rrr, w);
113     (enc.prefix().recipe_name_prefix(), enc.bits())
114 }
115 
116 /// Given a snippet of Rust code (or None), replace the `PUT_OP` macro with the
117 /// corresponding `put_*` function from the `binemit.rs` module.
replace_put_op(code: Option<String>, prefix: &str) -> Option<String>118 fn replace_put_op(code: Option<String>, prefix: &str) -> Option<String> {
119     code.map(|code| code.replace("{{PUT_OP}}", &format!("put_{}", prefix.to_lowercase())))
120 }
121 
122 /// Replaces constraints to a REX-prefixed register class by the equivalent non-REX register class.
replace_nonrex_constraints( regs: &IsaRegs, constraints: Vec<OperandConstraint>, ) -> Vec<OperandConstraint>123 fn replace_nonrex_constraints(
124     regs: &IsaRegs,
125     constraints: Vec<OperandConstraint>,
126 ) -> Vec<OperandConstraint> {
127     constraints
128         .into_iter()
129         .map(|constraint| match constraint {
130             OperandConstraint::RegClass(rc_index) => {
131                 let new_rc_index = if rc_index == regs.class_by_name("GPR") {
132                     regs.class_by_name("GPR8")
133                 } else if rc_index == regs.class_by_name("FPR") {
134                     regs.class_by_name("FPR8")
135                 } else {
136                     rc_index
137                 };
138                 OperandConstraint::RegClass(new_rc_index)
139             }
140             _ => constraint,
141         })
142         .collect()
143 }
144 
replace_evex_constraints( _: &IsaRegs, constraints: Vec<OperandConstraint>, ) -> Vec<OperandConstraint>145 fn replace_evex_constraints(
146     _: &IsaRegs,
147     constraints: Vec<OperandConstraint>,
148 ) -> Vec<OperandConstraint> {
149     constraints
150         .into_iter()
151         .map(|constraint| match constraint {
152             OperandConstraint::RegClass(rc_index) => {
153                 // FIXME(#1306) this should be able to upgrade the register class to FPR32 as in
154                 // `replace_nonrex_constraints` above, e.g. When FPR32 is re-added, add back in the
155                 // rc_index conversion to FPR32. In the meantime, this is effectively a no-op
156                 // conversion--the register class stays the same.
157                 OperandConstraint::RegClass(rc_index)
158             }
159             _ => constraint,
160         })
161         .collect()
162 }
163 
164 /// Specifies how the prefix (e.g. REX) is emitted by a Recipe.
165 #[derive(Copy, Clone, PartialEq)]
166 pub enum RecipePrefixKind {
167     /// The REX emission behavior is not hardcoded for the Recipe
168     /// and may be overridden when using the Template.
169     Unspecified,
170 
171     /// The Recipe must hardcode the non-emission of the REX prefix.
172     NeverEmitRex,
173 
174     /// The Recipe must hardcode the emission of the REX prefix.
175     AlwaysEmitRex,
176 
177     /// The Recipe should infer the emission of the REX.RXB bits from registers,
178     /// and the REX.W bit from the EncodingBits.
179     ///
180     /// Because such a Recipe has a non-constant instruction size, it must have
181     /// a special `compute_size` handler for the inferrable-REX case.
182     InferRex,
183 
184     /// The Recipe must hardcode the emission of an EVEX prefix.
185     Evex,
186 }
187 
188 impl Default for RecipePrefixKind {
default() -> Self189     fn default() -> Self {
190         Self::Unspecified
191     }
192 }
193 
194 /// Previously called a TailRecipe in the Python meta language, this allows to create multiple
195 /// variants of a single base EncodingRecipe (rex prefix, specialized w/rrr bits, different
196 /// opcodes). It serves as a prototype of an EncodingRecipe, which is then used when actually creating
197 /// Encodings, in encodings.rs. This is an idiosyncrasy of the x86 meta-language, and could be
198 /// reconsidered later.
199 #[derive(Clone)]
200 pub(crate) struct Template<'builder> {
201     /// Description of registers, used in the build() method.
202     regs: &'builder IsaRegs,
203 
204     /// The recipe template, which is to be specialized (by copy).
205     recipe: EncodingRecipeBuilder,
206 
207     /// How is the REX prefix emitted?
208     rex_kind: RecipePrefixKind,
209 
210     /// Function for `compute_size()` when REX is inferrable.
211     inferred_rex_compute_size: Option<&'static str>,
212 
213     /// Other recipe to use when REX-prefixed.
214     when_prefixed: Option<Rc<Template<'builder>>>,
215 
216     // Parameters passed in the EncodingBits.
217     /// Value of the W bit (0 or 1), stored in the EncodingBits.
218     w_bit: u16,
219     /// Value of the RRR bits (between 0 and 0b111).
220     rrr_bits: u16,
221     /// Opcode bytes.
222     op_bytes: &'static [u8],
223 }
224 
225 impl<'builder> Template<'builder> {
new(recipe: EncodingRecipeBuilder, regs: &'builder IsaRegs) -> Self226     fn new(recipe: EncodingRecipeBuilder, regs: &'builder IsaRegs) -> Self {
227         Self {
228             regs,
229             recipe,
230             rex_kind: RecipePrefixKind::default(),
231             inferred_rex_compute_size: None,
232             when_prefixed: None,
233             w_bit: 0,
234             rrr_bits: 0,
235             op_bytes: &opcodes::EMPTY,
236         }
237     }
238 
name(&self) -> &str239     fn name(&self) -> &str {
240         &self.recipe.name
241     }
rex_kind(self, kind: RecipePrefixKind) -> Self242     fn rex_kind(self, kind: RecipePrefixKind) -> Self {
243         Self {
244             rex_kind: kind,
245             ..self
246         }
247     }
inferred_rex_compute_size(self, function: &'static str) -> Self248     fn inferred_rex_compute_size(self, function: &'static str) -> Self {
249         Self {
250             inferred_rex_compute_size: Some(function),
251             ..self
252         }
253     }
when_prefixed(self, template: Rc<Template<'builder>>) -> Self254     fn when_prefixed(self, template: Rc<Template<'builder>>) -> Self {
255         assert!(self.when_prefixed.is_none());
256         Self {
257             when_prefixed: Some(template),
258             ..self
259         }
260     }
261 
262     // Copy setters.
opcodes(&self, op_bytes: &'static [u8]) -> Self263     pub fn opcodes(&self, op_bytes: &'static [u8]) -> Self {
264         assert!(!op_bytes.is_empty());
265         let mut copy = self.clone();
266         copy.op_bytes = op_bytes;
267         copy
268     }
w(&self) -> Self269     pub fn w(&self) -> Self {
270         let mut copy = self.clone();
271         copy.w_bit = 1;
272         copy
273     }
rrr(&self, value: u16) -> Self274     pub fn rrr(&self, value: u16) -> Self {
275         assert!(value <= 0b111);
276         let mut copy = self.clone();
277         copy.rrr_bits = value;
278         copy
279     }
nonrex(&self) -> Self280     pub fn nonrex(&self) -> Self {
281         assert!(
282             self.rex_kind != RecipePrefixKind::AlwaysEmitRex,
283             "Template requires REX prefix."
284         );
285         let mut copy = self.clone();
286         copy.rex_kind = RecipePrefixKind::NeverEmitRex;
287         copy
288     }
rex(&self) -> Self289     pub fn rex(&self) -> Self {
290         assert!(
291             self.rex_kind != RecipePrefixKind::NeverEmitRex,
292             "Template requires no REX prefix."
293         );
294         if let Some(prefixed) = &self.when_prefixed {
295             let mut ret = prefixed.rex();
296             // Forward specialized parameters.
297             ret.op_bytes = self.op_bytes;
298             ret.w_bit = self.w_bit;
299             ret.rrr_bits = self.rrr_bits;
300             return ret;
301         }
302         let mut copy = self.clone();
303         copy.rex_kind = RecipePrefixKind::AlwaysEmitRex;
304         copy
305     }
infer_rex(&self) -> Self306     pub fn infer_rex(&self) -> Self {
307         assert!(
308             self.rex_kind != RecipePrefixKind::NeverEmitRex,
309             "Template requires no REX prefix."
310         );
311         assert!(
312             self.when_prefixed.is_none(),
313             "infer_rex used with when_prefixed()."
314         );
315         let mut copy = self.clone();
316         copy.rex_kind = RecipePrefixKind::InferRex;
317         copy
318     }
319 
build(mut self) -> (EncodingRecipe, u16)320     pub fn build(mut self) -> (EncodingRecipe, u16) {
321         let (opcode, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit);
322 
323         let (recipe_name, size_addendum) = match self.rex_kind {
324             RecipePrefixKind::Unspecified | RecipePrefixKind::NeverEmitRex => {
325                 // Ensure the operands are limited to non-REX constraints.
326                 let operands_in = self.recipe.operands_in.unwrap_or_default();
327                 self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in));
328                 let operands_out = self.recipe.operands_out.unwrap_or_default();
329                 self.recipe.operands_out =
330                     Some(replace_nonrex_constraints(self.regs, operands_out));
331 
332                 (opcode.into(), self.op_bytes.len() as u64)
333             }
334             RecipePrefixKind::AlwaysEmitRex => {
335                 ("Rex".to_string() + opcode, self.op_bytes.len() as u64 + 1)
336             }
337             RecipePrefixKind::InferRex => {
338                 assert_eq!(self.w_bit, 0, "A REX.W bit always requires a REX prefix; avoid using `infer_rex().w()` and use `rex().w()` instead.");
339                 // Hook up the right function for inferred compute_size().
340                 assert!(
341                     self.inferred_rex_compute_size.is_some(),
342                     "InferRex recipe '{}' needs an inferred_rex_compute_size function.",
343                     &self.recipe.name
344                 );
345                 self.recipe.compute_size = self.inferred_rex_compute_size;
346 
347                 ("DynRex".to_string() + opcode, self.op_bytes.len() as u64)
348             }
349             RecipePrefixKind::Evex => {
350                 // Allow the operands to expand limits to EVEX constraints.
351                 let operands_in = self.recipe.operands_in.unwrap_or_default();
352                 self.recipe.operands_in = Some(replace_evex_constraints(self.regs, operands_in));
353                 let operands_out = self.recipe.operands_out.unwrap_or_default();
354                 self.recipe.operands_out = Some(replace_evex_constraints(self.regs, operands_out));
355 
356                 ("Evex".to_string() + opcode, 4 + 1)
357             }
358         };
359 
360         self.recipe.base_size += size_addendum;
361 
362         // Branch ranges are relative to the end of the instruction.
363         // For InferRex, the range should be the minimum, assuming no REX.
364         if let Some(range) = self.recipe.branch_range.as_mut() {
365             range.inst_size += size_addendum;
366         }
367 
368         self.recipe.emit = replace_put_op(self.recipe.emit, &recipe_name);
369         self.recipe.name = recipe_name + &self.recipe.name;
370 
371         (self.recipe.build(), bits)
372     }
373 }
374 
375 /// Returns a predicate checking that the "cond" field of the instruction contains one of the
376 /// directly supported floating point condition codes.
supported_floatccs_predicate( supported_cc: &[Literal], format: &InstructionFormat, ) -> InstructionPredicate377 fn supported_floatccs_predicate(
378     supported_cc: &[Literal],
379     format: &InstructionFormat,
380 ) -> InstructionPredicate {
381     supported_cc
382         .iter()
383         .fold(InstructionPredicate::new(), |pred, literal| {
384             pred.or(InstructionPredicate::new_is_field_equal(
385                 format,
386                 "cond",
387                 literal.to_rust_code(),
388             ))
389         })
390 }
391 
392 /// Return an instruction predicate that checks if `iform.imm` is a valid `scale` for a SIB byte.
valid_scale(format: &InstructionFormat) -> InstructionPredicate393 fn valid_scale(format: &InstructionFormat) -> InstructionPredicate {
394     ["1", "2", "4", "8"]
395         .iter()
396         .fold(InstructionPredicate::new(), |pred, &literal| {
397             pred.or(InstructionPredicate::new_is_field_equal(
398                 format,
399                 "imm",
400                 literal.into(),
401             ))
402         })
403 }
404 
define<'shared>( shared_defs: &'shared SharedDefinitions, settings: &'shared SettingGroup, regs: &'shared IsaRegs, ) -> RecipeGroup<'shared>405 pub(crate) fn define<'shared>(
406     shared_defs: &'shared SharedDefinitions,
407     settings: &'shared SettingGroup,
408     regs: &'shared IsaRegs,
409 ) -> RecipeGroup<'shared> {
410     // The set of floating point condition codes that are directly supported.
411     // Other condition codes need to be reversed or expressed as two tests.
412     let floatcc = &shared_defs.imm.floatcc;
413     let supported_floatccs: Vec<Literal> = ["ord", "uno", "one", "ueq", "gt", "ge", "ult", "ule"]
414         .iter()
415         .map(|name| Literal::enumerator_for(floatcc, name))
416         .collect();
417 
418     // Register classes shorthands.
419     let abcd = regs.class_by_name("ABCD");
420     let gpr = regs.class_by_name("GPR");
421     let fpr = regs.class_by_name("FPR");
422     let flag = regs.class_by_name("FLAG");
423 
424     // Operand constraints shorthands.
425     let reg_rflags = Register::new(flag, regs.regunit_by_name(flag, "rflags"));
426     let reg_rax = Register::new(gpr, regs.regunit_by_name(gpr, "rax"));
427     let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx"));
428     let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx"));
429     let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15"));
430     let reg_xmm0 = Register::new(fpr, regs.regunit_by_name(fpr, "xmm0"));
431 
432     // Stack operand with a 32-bit signed displacement from either RBP or RSP.
433     let stack_gpr32 = Stack::new(gpr);
434     let stack_fpr32 = Stack::new(fpr);
435 
436     let formats = &shared_defs.formats;
437 
438     // Predicates shorthands.
439     let use_sse41 = settings.predicate_by_name("use_sse41");
440 
441     // Definitions.
442     let mut recipes = RecipeGroup::new(regs);
443 
444     // A null unary instruction that takes a GPR register. Can be used for identity copies and
445     // no-op conversions.
446     recipes.add_recipe(
447         EncodingRecipeBuilder::new("null", &formats.unary, 0)
448             .operands_in(vec![gpr])
449             .operands_out(vec![0])
450             .emit(""),
451     );
452     recipes.add_recipe(
453         EncodingRecipeBuilder::new("null_fpr", &formats.unary, 0)
454             .operands_in(vec![fpr])
455             .operands_out(vec![0])
456             .emit(""),
457     );
458     recipes.add_recipe(
459         EncodingRecipeBuilder::new("stacknull", &formats.unary, 0)
460             .operands_in(vec![stack_gpr32])
461             .operands_out(vec![stack_gpr32])
462             .emit(""),
463     );
464 
465     recipes.add_recipe(
466         EncodingRecipeBuilder::new("get_pinned_reg", &formats.nullary, 0)
467             .operands_out(vec![reg_r15])
468             .emit(""),
469     );
470     // umr with a fixed register output that's r15.
471     recipes.add_template_recipe(
472         EncodingRecipeBuilder::new("set_pinned_reg", &formats.unary, 1)
473             .operands_in(vec![gpr])
474             .clobbers_flags(false)
475             .emit(
476                 r#"
477                     let r15 = RU::r15.into();
478                     {{PUT_OP}}(bits, rex2(r15, in_reg0), sink);
479                     modrm_rr(r15, in_reg0, sink);
480                 "#,
481             ),
482     );
483 
484     // No-op fills, created by late-stage redundant-fill removal.
485     recipes.add_recipe(
486         EncodingRecipeBuilder::new("fillnull", &formats.unary, 0)
487             .operands_in(vec![stack_gpr32])
488             .operands_out(vec![gpr])
489             .clobbers_flags(false)
490             .emit(""),
491     );
492     recipes.add_recipe(
493         EncodingRecipeBuilder::new("ffillnull", &formats.unary, 0)
494             .operands_in(vec![stack_gpr32])
495             .operands_out(vec![fpr])
496             .clobbers_flags(false)
497             .emit(""),
498     );
499 
500     recipes.add_recipe(
501         EncodingRecipeBuilder::new("debugtrap", &formats.nullary, 1).emit("sink.put1(0xcc);"),
502     );
503 
504     // XX opcode, no ModR/M.
505     recipes.add_template_recipe(EncodingRecipeBuilder::new("trap", &formats.trap, 0).emit(
506         r#"
507             sink.trap(code, func.srclocs[inst]);
508             {{PUT_OP}}(bits, BASE_REX, sink);
509         "#,
510     ));
511 
512     // Macro: conditional jump over a ud2.
513     recipes.add_recipe(
514         EncodingRecipeBuilder::new("trapif", &formats.int_cond_trap, 4)
515             .operands_in(vec![reg_rflags])
516             .clobbers_flags(false)
517             .emit(
518                 r#"
519                     // Jump over a 2-byte ud2.
520                     sink.put1(0x70 | (icc2opc(cond.inverse()) as u8));
521                     sink.put1(2);
522                     // ud2.
523                     sink.trap(code, func.srclocs[inst]);
524                     sink.put1(0x0f);
525                     sink.put1(0x0b);
526                 "#,
527             ),
528     );
529 
530     recipes.add_recipe(
531         EncodingRecipeBuilder::new("trapff", &formats.float_cond_trap, 4)
532             .operands_in(vec![reg_rflags])
533             .clobbers_flags(false)
534             .inst_predicate(supported_floatccs_predicate(
535                 &supported_floatccs,
536                 &*formats.float_cond_trap,
537             ))
538             .emit(
539                 r#"
540                     // Jump over a 2-byte ud2.
541                     sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8));
542                     sink.put1(2);
543                     // ud2.
544                     sink.trap(code, func.srclocs[inst]);
545                     sink.put1(0x0f);
546                     sink.put1(0x0b);
547                 "#,
548             ),
549     );
550 
551     // XX /r
552     recipes.add_template_inferred(
553         EncodingRecipeBuilder::new("rr", &formats.binary, 1)
554             .operands_in(vec![gpr, gpr])
555             .operands_out(vec![0])
556             .emit(
557                 r#"
558                         {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
559                         modrm_rr(in_reg0, in_reg1, sink);
560                     "#,
561             ),
562         "size_with_inferred_rex_for_inreg0_inreg1",
563     );
564 
565     // XX /r with operands swapped. (RM form).
566     recipes.add_template_inferred(
567         EncodingRecipeBuilder::new("rrx", &formats.binary, 1)
568             .operands_in(vec![gpr, gpr])
569             .operands_out(vec![0])
570             .emit(
571                 r#"
572                         {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
573                         modrm_rr(in_reg1, in_reg0, sink);
574                     "#,
575             ),
576         "size_with_inferred_rex_for_inreg0_inreg1",
577     );
578 
579     // XX /r with FPR ins and outs. A form.
580     recipes.add_template_inferred(
581         EncodingRecipeBuilder::new("fa", &formats.binary, 1)
582             .operands_in(vec![fpr, fpr])
583             .operands_out(vec![0])
584             .emit(
585                 r#"
586                     {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
587                     modrm_rr(in_reg1, in_reg0, sink);
588                 "#,
589             ),
590         "size_with_inferred_rex_for_inreg0_inreg1",
591     );
592 
593     // XX /r with FPR ins and outs. A form with input operands swapped.
594     recipes.add_template_inferred(
595         EncodingRecipeBuilder::new("fax", &formats.binary, 1)
596             .operands_in(vec![fpr, fpr])
597             .operands_out(vec![1])
598             .emit(
599                 r#"
600                     {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
601                     modrm_rr(in_reg0, in_reg1, sink);
602                 "#,
603             ),
604         // The operand order does not matter for calculating whether a REX prefix is needed.
605         "size_with_inferred_rex_for_inreg0_inreg1",
606     );
607 
608     // XX /r with FPR ins and outs. A form with a byte immediate.
609     {
610         recipes.add_template_inferred(
611             EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2)
612                 .operands_in(vec![fpr, fpr])
613                 .operands_out(vec![0])
614                 .inst_predicate(InstructionPredicate::new_is_unsigned_int(
615                     &*formats.ternary_imm8,
616                     "imm",
617                     8,
618                     0,
619                 ))
620                 .emit(
621                     r#"
622                     {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
623                     modrm_rr(in_reg1, in_reg0, sink);
624                     let imm: i64 = imm.into();
625                     sink.put1(imm as u8);
626                 "#,
627                 ),
628             "size_with_inferred_rex_for_inreg0_inreg1",
629         );
630     }
631 
632     // XX /n for a unary operation with extension bits.
633     recipes.add_template(
634         Template::new(
635             EncodingRecipeBuilder::new("ur", &formats.unary, 1)
636                 .operands_in(vec![gpr])
637                 .operands_out(vec![0])
638                 .emit(
639                     r#"
640                         {{PUT_OP}}(bits, rex1(in_reg0), sink);
641                         modrm_r_bits(in_reg0, bits, sink);
642                     "#,
643                 ),
644             regs,
645         )
646         .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
647     );
648 
649     // XX /r, but for a unary operator with separate input/output register, like
650     // copies. MR form, preserving flags.
651     recipes.add_template(
652         Template::new(
653             EncodingRecipeBuilder::new("umr", &formats.unary, 1)
654                 .operands_in(vec![gpr])
655                 .operands_out(vec![gpr])
656                 .clobbers_flags(false)
657                 .emit(
658                     r#"
659                         {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
660                         modrm_rr(out_reg0, in_reg0, sink);
661                     "#,
662                 ),
663             regs,
664         )
665         .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"),
666     );
667 
668     // Same as umr, but with FPR -> GPR registers.
669     recipes.add_template_recipe(
670         EncodingRecipeBuilder::new("rfumr", &formats.unary, 1)
671             .operands_in(vec![fpr])
672             .operands_out(vec![gpr])
673             .clobbers_flags(false)
674             .emit(
675                 r#"
676                     {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
677                     modrm_rr(out_reg0, in_reg0, sink);
678                 "#,
679             ),
680     );
681 
682     // Same as umr, but with the source register specified directly.
683     recipes.add_template_recipe(
684         EncodingRecipeBuilder::new("umr_reg_to_ssa", &formats.copy_to_ssa, 1)
685             // No operands_in to mention, because a source register is specified directly.
686             .operands_out(vec![gpr])
687             .clobbers_flags(false)
688             .emit(
689                 r#"
690                     {{PUT_OP}}(bits, rex2(out_reg0, src), sink);
691                     modrm_rr(out_reg0, src, sink);
692                 "#,
693             ),
694     );
695 
696     // XX /r, but for a unary operator with separate input/output register.
697     // RM form. Clobbers FLAGS.
698     recipes.add_template_recipe(
699         EncodingRecipeBuilder::new("urm", &formats.unary, 1)
700             .operands_in(vec![gpr])
701             .operands_out(vec![gpr])
702             .emit(
703                 r#"
704                     {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
705                     modrm_rr(in_reg0, out_reg0, sink);
706                 "#,
707             ),
708     );
709 
710     // XX /r. Same as urm, but doesn't clobber FLAGS.
711     let urm_noflags = recipes.add_template_recipe(
712         EncodingRecipeBuilder::new("urm_noflags", &formats.unary, 1)
713             .operands_in(vec![gpr])
714             .operands_out(vec![gpr])
715             .clobbers_flags(false)
716             .emit(
717                 r#"
718                     {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
719                     modrm_rr(in_reg0, out_reg0, sink);
720                 "#,
721             ),
722     );
723 
724     // XX /r. Same as urm_noflags, but input limited to ABCD.
725     recipes.add_template(
726         Template::new(
727             EncodingRecipeBuilder::new("urm_noflags_abcd", &formats.unary, 1)
728                 .operands_in(vec![abcd])
729                 .operands_out(vec![gpr])
730                 .clobbers_flags(false)
731                 .emit(
732                     r#"
733                     {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
734                     modrm_rr(in_reg0, out_reg0, sink);
735                 "#,
736                 ),
737             regs,
738         )
739         .when_prefixed(urm_noflags),
740     );
741 
742     // XX /r, RM form, FPR -> FPR.
743     recipes.add_template_inferred(
744         EncodingRecipeBuilder::new("furm", &formats.unary, 1)
745             .operands_in(vec![fpr])
746             .operands_out(vec![fpr])
747             .clobbers_flags(false)
748             .emit(
749                 r#"
750                     {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
751                     modrm_rr(in_reg0, out_reg0, sink);
752                 "#,
753             ),
754         "size_with_inferred_rex_for_inreg0_outreg0",
755     );
756 
757     // Same as furm, but with the source register specified directly.
758     recipes.add_template_recipe(
759         EncodingRecipeBuilder::new("furm_reg_to_ssa", &formats.copy_to_ssa, 1)
760             // No operands_in to mention, because a source register is specified directly.
761             .operands_out(vec![fpr])
762             .clobbers_flags(false)
763             .emit(
764                 r#"
765                     {{PUT_OP}}(bits, rex2(src, out_reg0), sink);
766                     modrm_rr(src, out_reg0, sink);
767                 "#,
768             ),
769     );
770 
771     // XX /r, RM form, GPR -> FPR.
772     recipes.add_template_inferred(
773         EncodingRecipeBuilder::new("frurm", &formats.unary, 1)
774             .operands_in(vec![gpr])
775             .operands_out(vec![fpr])
776             .clobbers_flags(false)
777             .emit(
778                 r#"
779                         {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
780                         modrm_rr(in_reg0, out_reg0, sink);
781                     "#,
782             ),
783         "size_with_inferred_rex_for_inreg0_outreg0",
784     );
785 
786     // XX /r, RM form, FPR -> GPR.
787     recipes.add_template_recipe(
788         EncodingRecipeBuilder::new("rfurm", &formats.unary, 1)
789             .operands_in(vec![fpr])
790             .operands_out(vec![gpr])
791             .clobbers_flags(false)
792             .emit(
793                 r#"
794                     {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
795                     modrm_rr(in_reg0, out_reg0, sink);
796                 "#,
797             ),
798     );
799 
800     // XX /r, RMI form for one of the roundXX SSE 4.1 instructions.
801     recipes.add_template_recipe(
802         EncodingRecipeBuilder::new("furmi_rnd", &formats.unary, 2)
803             .operands_in(vec![fpr])
804             .operands_out(vec![fpr])
805             .isa_predicate(use_sse41)
806             .emit(
807                 r#"
808                     {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
809                     modrm_rr(in_reg0, out_reg0, sink);
810                     sink.put1(match opcode {
811                         Opcode::Nearest => 0b00,
812                         Opcode::Floor => 0b01,
813                         Opcode::Ceil => 0b10,
814                         Opcode::Trunc => 0b11,
815                         x => panic!("{} unexpected for furmi_rnd", opcode),
816                     });
817                 "#,
818             ),
819     );
820 
821     // XX /r, for regmove instructions.
822     recipes.add_template_recipe(
823         EncodingRecipeBuilder::new("rmov", &formats.reg_move, 1)
824             .operands_in(vec![gpr])
825             .clobbers_flags(false)
826             .emit(
827                 r#"
828                     {{PUT_OP}}(bits, rex2(dst, src), sink);
829                     modrm_rr(dst, src, sink);
830                 "#,
831             ),
832     );
833 
834     // XX /r, for regmove instructions (FPR version, RM encoded).
835     recipes.add_template_recipe(
836         EncodingRecipeBuilder::new("frmov", &formats.reg_move, 1)
837             .operands_in(vec![fpr])
838             .clobbers_flags(false)
839             .emit(
840                 r#"
841                     {{PUT_OP}}(bits, rex2(src, dst), sink);
842                     modrm_rr(src, dst, sink);
843                 "#,
844             ),
845     );
846 
847     // XX /n with one arg in %rcx, for shifts.
848     recipes.add_template_recipe(
849         EncodingRecipeBuilder::new("rc", &formats.binary, 1)
850             .operands_in(vec![
851                 OperandConstraint::RegClass(gpr),
852                 OperandConstraint::FixedReg(reg_rcx),
853             ])
854             .operands_out(vec![0])
855             .emit(
856                 r#"
857                     {{PUT_OP}}(bits, rex1(in_reg0), sink);
858                     modrm_r_bits(in_reg0, bits, sink);
859                 "#,
860             ),
861     );
862 
863     // XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx.
864     recipes.add_template(
865         Template::new(
866             EncodingRecipeBuilder::new("div", &formats.ternary, 1)
867                 .operands_in(vec![
868                     OperandConstraint::FixedReg(reg_rax),
869                     OperandConstraint::FixedReg(reg_rdx),
870                     OperandConstraint::RegClass(gpr),
871                 ])
872                 .operands_out(vec![reg_rax, reg_rdx])
873                 .emit(
874                     r#"
875                         sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]);
876                         {{PUT_OP}}(bits, rex1(in_reg2), sink);
877                         modrm_r_bits(in_reg2, bits, sink);
878                     "#,
879                 ),
880             regs,
881         )
882         .inferred_rex_compute_size("size_with_inferred_rex_for_inreg2"),
883     );
884 
885     // XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo)
886     recipes.add_template(
887         Template::new(
888             EncodingRecipeBuilder::new("mulx", &formats.binary, 1)
889                 .operands_in(vec![
890                     OperandConstraint::FixedReg(reg_rax),
891                     OperandConstraint::RegClass(gpr),
892                 ])
893                 .operands_out(vec![
894                     OperandConstraint::FixedReg(reg_rax),
895                     OperandConstraint::FixedReg(reg_rdx),
896                 ])
897                 .emit(
898                     r#"
899                         {{PUT_OP}}(bits, rex1(in_reg1), sink);
900                         modrm_r_bits(in_reg1, bits, sink);
901                     "#,
902                 ),
903             regs,
904         )
905         .inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"),
906     );
907 
908     // XX /r for BLEND* instructions
909     recipes.add_template_inferred(
910         EncodingRecipeBuilder::new("blend", &formats.ternary, 1)
911             .operands_in(vec![
912                 OperandConstraint::FixedReg(reg_xmm0),
913                 OperandConstraint::RegClass(fpr),
914                 OperandConstraint::RegClass(fpr),
915             ])
916             .operands_out(vec![2])
917             .emit(
918                 r#"
919                     {{PUT_OP}}(bits, rex2(in_reg1, in_reg2), sink);
920                     modrm_rr(in_reg1, in_reg2, sink);
921                 "#,
922             ),
923         "size_with_inferred_rex_for_inreg1_inreg2",
924     );
925 
926     // XX /n ib with 8-bit immediate sign-extended.
927     {
928         recipes.add_template_inferred(
929             EncodingRecipeBuilder::new("r_ib", &formats.binary_imm64, 2)
930                 .operands_in(vec![gpr])
931                 .operands_out(vec![0])
932                 .inst_predicate(InstructionPredicate::new_is_signed_int(
933                     &*formats.binary_imm64,
934                     "imm",
935                     8,
936                     0,
937                 ))
938                 .emit(
939                     r#"
940                             {{PUT_OP}}(bits, rex1(in_reg0), sink);
941                             modrm_r_bits(in_reg0, bits, sink);
942                             let imm: i64 = imm.into();
943                             sink.put1(imm as u8);
944                         "#,
945                 ),
946             "size_with_inferred_rex_for_inreg0",
947         );
948 
949         recipes.add_template_inferred(
950             EncodingRecipeBuilder::new("f_ib", &formats.binary_imm64, 2)
951                 .operands_in(vec![fpr])
952                 .operands_out(vec![0])
953                 .inst_predicate(InstructionPredicate::new_is_signed_int(
954                     &*formats.binary_imm64,
955                     "imm",
956                     8,
957                     0,
958                 ))
959                 .emit(
960                     r#"
961                         {{PUT_OP}}(bits, rex1(in_reg0), sink);
962                         modrm_r_bits(in_reg0, bits, sink);
963                         let imm: i64 = imm.into();
964                         sink.put1(imm as u8);
965                     "#,
966                 ),
967             "size_with_inferred_rex_for_inreg0",
968         );
969 
970         // XX /n id with 32-bit immediate sign-extended.
971         recipes.add_template(
972             Template::new(
973                 EncodingRecipeBuilder::new("r_id", &formats.binary_imm64, 5)
974                     .operands_in(vec![gpr])
975                     .operands_out(vec![0])
976                     .inst_predicate(InstructionPredicate::new_is_signed_int(
977                         &*formats.binary_imm64,
978                         "imm",
979                         32,
980                         0,
981                     ))
982                     .emit(
983                         r#"
984                             {{PUT_OP}}(bits, rex1(in_reg0), sink);
985                             modrm_r_bits(in_reg0, bits, sink);
986                             let imm: i64 = imm.into();
987                             sink.put4(imm as u32);
988                         "#,
989                     ),
990                 regs,
991             )
992             .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
993         );
994     }
995 
996     // XX /r ib with 8-bit unsigned immediate (e.g. for pshufd)
997     {
998         recipes.add_template_inferred(
999             EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.binary_imm8, 2)
1000                 .operands_in(vec![fpr])
1001                 .operands_out(vec![fpr])
1002                 .inst_predicate(InstructionPredicate::new_is_unsigned_int(
1003                     &*formats.binary_imm8,
1004                     "imm",
1005                     8,
1006                     0,
1007                 ))
1008                 .emit(
1009                     r#"
1010                     {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
1011                     modrm_rr(in_reg0, out_reg0, sink);
1012                     let imm: i64 = imm.into();
1013                     sink.put1(imm as u8);
1014                 "#,
1015                 ),
1016             "size_with_inferred_rex_for_inreg0_outreg0",
1017         );
1018     }
1019 
1020     // XX /r ib with 8-bit unsigned immediate (e.g. for extractlane)
1021     {
1022         recipes.add_template_inferred(
1023             EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.binary_imm8, 2)
1024                 .operands_in(vec![fpr])
1025                 .operands_out(vec![gpr])
1026                 .inst_predicate(InstructionPredicate::new_is_unsigned_int(
1027                     &*formats.binary_imm8, "imm", 8, 0,
1028                 ))
1029                 .emit(
1030                     r#"
1031                     {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
1032                     modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte
1033                     let imm: i64 = imm.into();
1034                     sink.put1(imm as u8);
1035                 "#,
1036                 ), "size_with_inferred_rex_for_inreg0_outreg0"
1037         );
1038     }
1039 
1040     // XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
1041     {
1042         recipes.add_template_inferred(
1043             EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2)
1044                 .operands_in(vec![fpr, gpr])
1045                 .operands_out(vec![0])
1046                 .inst_predicate(InstructionPredicate::new_is_unsigned_int(
1047                     &*formats.ternary_imm8,
1048                     "imm",
1049                     8,
1050                     0,
1051                 ))
1052                 .emit(
1053                     r#"
1054                     {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
1055                     modrm_rr(in_reg1, in_reg0, sink);
1056                     let imm: i64 = imm.into();
1057                     sink.put1(imm as u8);
1058                 "#,
1059                 ),
1060             "size_with_inferred_rex_for_inreg0_inreg1",
1061         );
1062     }
1063 
1064     {
1065         // XX /n id with 32-bit immediate sign-extended. UnaryImm version.
1066         recipes.add_template_recipe(
1067             EncodingRecipeBuilder::new("u_id", &formats.unary_imm, 5)
1068                 .operands_out(vec![gpr])
1069                 .inst_predicate(InstructionPredicate::new_is_signed_int(
1070                     &*formats.unary_imm,
1071                     "imm",
1072                     32,
1073                     0,
1074                 ))
1075                 .emit(
1076                     r#"
1077                         {{PUT_OP}}(bits, rex1(out_reg0), sink);
1078                         modrm_r_bits(out_reg0, bits, sink);
1079                         let imm: i64 = imm.into();
1080                         sink.put4(imm as u32);
1081                     "#,
1082                 ),
1083         );
1084     }
1085 
1086     // XX+rd id unary with 32-bit immediate. Note no recipe predicate.
1087     recipes.add_template_recipe(
1088         EncodingRecipeBuilder::new("pu_id", &formats.unary_imm, 4)
1089             .operands_out(vec![gpr])
1090             .emit(
1091                 r#"
1092                     // The destination register is encoded in the low bits of the opcode.
1093                     // No ModR/M.
1094                     {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
1095                     let imm: i64 = imm.into();
1096                     sink.put4(imm as u32);
1097                 "#,
1098             ),
1099     );
1100 
1101     // XX+rd id unary with bool immediate. Note no recipe predicate.
1102     recipes.add_template_recipe(
1103         EncodingRecipeBuilder::new("pu_id_bool", &formats.unary_bool, 4)
1104             .operands_out(vec![gpr])
1105             .emit(
1106                 r#"
1107                     // The destination register is encoded in the low bits of the opcode.
1108                     // No ModR/M.
1109                     {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
1110                     let imm: u32 = if imm { 1 } else { 0 };
1111                     sink.put4(imm);
1112                 "#,
1113             ),
1114     );
1115 
1116     // XX+rd id nullary with 0 as 32-bit immediate. Note no recipe predicate.
1117     recipes.add_template_recipe(
1118         EncodingRecipeBuilder::new("pu_id_ref", &formats.nullary, 4)
1119             .operands_out(vec![gpr])
1120             .emit(
1121                 r#"
1122                     // The destination register is encoded in the low bits of the opcode.
1123                     // No ModR/M.
1124                     {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
1125                     sink.put4(0);
1126                 "#,
1127             ),
1128     );
1129 
1130     // XX+rd iq unary with 64-bit immediate.
1131     recipes.add_template_recipe(
1132         EncodingRecipeBuilder::new("pu_iq", &formats.unary_imm, 8)
1133             .operands_out(vec![gpr])
1134             .emit(
1135                 r#"
1136                     {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
1137                     let imm: i64 = imm.into();
1138                     sink.put8(imm as u64);
1139                 "#,
1140             ),
1141     );
1142 
1143     // XX+rd id unary with zero immediate.
1144     recipes.add_template_recipe(
1145         EncodingRecipeBuilder::new("u_id_z", &formats.unary_imm, 1)
1146             .operands_out(vec![gpr])
1147             .emit(
1148                 r#"
1149                     {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
1150                     modrm_rr(out_reg0, out_reg0, sink);
1151                 "#,
1152             ),
1153     );
1154 
1155     // XX /n Unary with floating point 32-bit immediate equal to zero.
1156     {
1157         recipes.add_template_recipe(
1158             EncodingRecipeBuilder::new("f32imm_z", &formats.unary_ieee32, 1)
1159                 .operands_out(vec![fpr])
1160                 .inst_predicate(InstructionPredicate::new_is_zero_32bit_float(
1161                     &*formats.unary_ieee32,
1162                     "imm",
1163                 ))
1164                 .emit(
1165                     r#"
1166                         {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
1167                         modrm_rr(out_reg0, out_reg0, sink);
1168                     "#,
1169                 ),
1170         );
1171     }
1172 
1173     // XX /n Unary with floating point 64-bit immediate equal to zero.
1174     {
1175         recipes.add_template_recipe(
1176             EncodingRecipeBuilder::new("f64imm_z", &formats.unary_ieee64, 1)
1177                 .operands_out(vec![fpr])
1178                 .inst_predicate(InstructionPredicate::new_is_zero_64bit_float(
1179                     &*formats.unary_ieee64,
1180                     "imm",
1181                 ))
1182                 .emit(
1183                     r#"
1184                         {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
1185                         modrm_rr(out_reg0, out_reg0, sink);
1186                     "#,
1187                 ),
1188         );
1189     }
1190 
1191     recipes.add_template_recipe(
1192         EncodingRecipeBuilder::new("pushq", &formats.unary, 0)
1193             .operands_in(vec![gpr])
1194             .emit(
1195                 r#"
1196                     sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
1197                     {{PUT_OP}}(bits | (in_reg0 & 7), rex1(in_reg0), sink);
1198                 "#,
1199             ),
1200     );
1201 
1202     recipes.add_template_recipe(
1203         EncodingRecipeBuilder::new("popq", &formats.nullary, 0)
1204             .operands_out(vec![gpr])
1205             .emit(
1206                 r#"
1207                     {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
1208                 "#,
1209             ),
1210     );
1211 
1212     // XX /r, for regmove instructions.
1213     recipes.add_template_recipe(
1214         EncodingRecipeBuilder::new("copysp", &formats.copy_special, 1)
1215             .clobbers_flags(false)
1216             .emit(
1217                 r#"
1218                     {{PUT_OP}}(bits, rex2(dst, src), sink);
1219                     modrm_rr(dst, src, sink);
1220                 "#,
1221             ),
1222     );
1223 
1224     recipes.add_template_recipe(
1225         EncodingRecipeBuilder::new("adjustsp", &formats.unary, 1)
1226             .operands_in(vec![gpr])
1227             .emit(
1228                 r#"
1229                     {{PUT_OP}}(bits, rex2(RU::rsp.into(), in_reg0), sink);
1230                     modrm_rr(RU::rsp.into(), in_reg0, sink);
1231                 "#,
1232             ),
1233     );
1234 
1235     {
1236         recipes.add_template_recipe(
1237             EncodingRecipeBuilder::new("adjustsp_ib", &formats.unary_imm, 2)
1238                 .inst_predicate(InstructionPredicate::new_is_signed_int(
1239                     &*formats.unary_imm,
1240                     "imm",
1241                     8,
1242                     0,
1243                 ))
1244                 .emit(
1245                     r#"
1246                         {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink);
1247                         modrm_r_bits(RU::rsp.into(), bits, sink);
1248                         let imm: i64 = imm.into();
1249                         sink.put1(imm as u8);
1250                     "#,
1251                 ),
1252         );
1253 
1254         recipes.add_template_recipe(
1255             EncodingRecipeBuilder::new("adjustsp_id", &formats.unary_imm, 5)
1256                 .inst_predicate(InstructionPredicate::new_is_signed_int(
1257                     &*formats.unary_imm,
1258                     "imm",
1259                     32,
1260                     0,
1261                 ))
1262                 .emit(
1263                     r#"
1264                         {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink);
1265                         modrm_r_bits(RU::rsp.into(), bits, sink);
1266                         let imm: i64 = imm.into();
1267                         sink.put4(imm as u32);
1268                     "#,
1269                 ),
1270         );
1271     }
1272 
1273     recipes.add_recipe(
1274         EncodingRecipeBuilder::new("dummy_sarg_t", &formats.nullary, 0)
1275             .operands_out(vec![Stack::new(gpr)])
1276             .emit(""),
1277     );
1278 
1279     // XX+rd id with Abs4 function relocation.
1280     recipes.add_template_recipe(
1281         EncodingRecipeBuilder::new("fnaddr4", &formats.func_addr, 4)
1282             .operands_out(vec![gpr])
1283             .emit(
1284                 r#"
1285                     {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
1286                     sink.reloc_external(func.srclocs[inst],
1287                                         Reloc::Abs4,
1288                                         &func.dfg.ext_funcs[func_ref].name,
1289                                         0);
1290                     sink.put4(0);
1291                 "#,
1292             ),
1293     );
1294 
1295     // XX+rd iq with Abs8 function relocation.
1296     recipes.add_template_recipe(
1297         EncodingRecipeBuilder::new("fnaddr8", &formats.func_addr, 8)
1298             .operands_out(vec![gpr])
1299             .emit(
1300                 r#"
1301                     {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
1302                     sink.reloc_external(func.srclocs[inst],
1303                                         Reloc::Abs8,
1304                                         &func.dfg.ext_funcs[func_ref].name,
1305                                         0);
1306                     sink.put8(0);
1307                 "#,
1308             ),
1309     );
1310 
1311     // Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey).
1312     recipes.add_template_recipe(
1313         EncodingRecipeBuilder::new("allones_fnaddr4", &formats.func_addr, 4)
1314             .operands_out(vec![gpr])
1315             .emit(
1316                 r#"
1317                     {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
1318                     sink.reloc_external(func.srclocs[inst],
1319                                         Reloc::Abs4,
1320                                         &func.dfg.ext_funcs[func_ref].name,
1321                                         0);
1322                     // Write the immediate as `!0` for the benefit of BaldrMonkey.
1323                     sink.put4(!0);
1324                 "#,
1325             ),
1326     );
1327 
1328     // Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey).
1329     recipes.add_template_recipe(
1330         EncodingRecipeBuilder::new("allones_fnaddr8", &formats.func_addr, 8)
1331             .operands_out(vec![gpr])
1332             .emit(
1333                 r#"
1334                     {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
1335                     sink.reloc_external(func.srclocs[inst],
1336                                         Reloc::Abs8,
1337                                         &func.dfg.ext_funcs[func_ref].name,
1338                                         0);
1339                     // Write the immediate as `!0` for the benefit of BaldrMonkey.
1340                     sink.put8(!0);
1341                 "#,
1342             ),
1343     );
1344 
1345     recipes.add_template_recipe(
1346         EncodingRecipeBuilder::new("pcrel_fnaddr8", &formats.func_addr, 5)
1347             .operands_out(vec![gpr])
1348             // rex2 gets passed 0 for r/m register because the upper bit of
1349             // r/m doesn't get decoded when in rip-relative addressing mode.
1350             .emit(
1351                 r#"
1352                     {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
1353                     modrm_riprel(out_reg0, sink);
1354                     // The addend adjusts for the difference between the end of the
1355                     // instruction and the beginning of the immediate field.
1356                     sink.reloc_external(func.srclocs[inst],
1357                                         Reloc::X86PCRel4,
1358                                         &func.dfg.ext_funcs[func_ref].name,
1359                                         -4);
1360                     sink.put4(0);
1361                 "#,
1362             ),
1363     );
1364 
1365     recipes.add_template_recipe(
1366         EncodingRecipeBuilder::new("got_fnaddr8", &formats.func_addr, 5)
1367             .operands_out(vec![gpr])
1368             // rex2 gets passed 0 for r/m register because the upper bit of
1369             // r/m doesn't get decoded when in rip-relative addressing mode.
1370             .emit(
1371                 r#"
1372                     {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
1373                     modrm_riprel(out_reg0, sink);
1374                     // The addend adjusts for the difference between the end of the
1375                     // instruction and the beginning of the immediate field.
1376                     sink.reloc_external(func.srclocs[inst],
1377                                         Reloc::X86GOTPCRel4,
1378                                         &func.dfg.ext_funcs[func_ref].name,
1379                                         -4);
1380                     sink.put4(0);
1381                 "#,
1382             ),
1383     );
1384 
1385     // XX+rd id with Abs4 globalsym relocation.
1386     recipes.add_template_recipe(
1387         EncodingRecipeBuilder::new("gvaddr4", &formats.unary_global_value, 4)
1388             .operands_out(vec![gpr])
1389             .emit(
1390                 r#"
1391                     {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
1392                     sink.reloc_external(func.srclocs[inst],
1393                                         Reloc::Abs4,
1394                                         &func.global_values[global_value].symbol_name(),
1395                                         0);
1396                     sink.put4(0);
1397                 "#,
1398             ),
1399     );
1400 
1401     // XX+rd iq with Abs8 globalsym relocation.
1402     recipes.add_template_recipe(
1403         EncodingRecipeBuilder::new("gvaddr8", &formats.unary_global_value, 8)
1404             .operands_out(vec![gpr])
1405             .emit(
1406                 r#"
1407                     {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
1408                     sink.reloc_external(func.srclocs[inst],
1409                                         Reloc::Abs8,
1410                                         &func.global_values[global_value].symbol_name(),
1411                                         0);
1412                     sink.put8(0);
1413                 "#,
1414             ),
1415     );
1416 
1417     // XX+rd iq with PCRel4 globalsym relocation.
1418     recipes.add_template_recipe(
1419         EncodingRecipeBuilder::new("pcrel_gvaddr8", &formats.unary_global_value, 5)
1420             .operands_out(vec![gpr])
1421             .emit(
1422                 r#"
1423                     {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
1424                     modrm_rm(5, out_reg0, sink);
1425                     // The addend adjusts for the difference between the end of the
1426                     // instruction and the beginning of the immediate field.
1427                     sink.reloc_external(func.srclocs[inst],
1428                                         Reloc::X86PCRel4,
1429                                         &func.global_values[global_value].symbol_name(),
1430                                         -4);
1431                     sink.put4(0);
1432                 "#,
1433             ),
1434     );
1435 
1436     // XX+rd iq with Abs8 globalsym relocation.
1437     recipes.add_template_recipe(
1438         EncodingRecipeBuilder::new("got_gvaddr8", &formats.unary_global_value, 5)
1439             .operands_out(vec![gpr])
1440             .emit(
1441                 r#"
1442                     {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
1443                     modrm_rm(5, out_reg0, sink);
1444                     // The addend adjusts for the difference between the end of the
1445                     // instruction and the beginning of the immediate field.
1446                     sink.reloc_external(func.srclocs[inst],
1447                                         Reloc::X86GOTPCRel4,
1448                                         &func.global_values[global_value].symbol_name(),
1449                                         -4);
1450                     sink.put4(0);
1451                 "#,
1452             ),
1453     );
1454 
1455     // Stack addresses.
1456     //
1457     // TODO Alternative forms for 8-bit immediates, when applicable.
1458 
1459     recipes.add_template_recipe(
1460         EncodingRecipeBuilder::new("spaddr_id", &formats.stack_load, 6)
1461             .operands_out(vec![gpr])
1462             .emit(
1463                 r#"
1464                     let sp = StackRef::sp(stack_slot, &func.stack_slots);
1465                     let base = stk_base(sp.base);
1466                     {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
1467                     modrm_sib_disp32(out_reg0, sink);
1468                     sib_noindex(base, sink);
1469                     let imm : i32 = offset.into();
1470                     sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
1471                 "#,
1472             ),
1473     );
1474 
1475     // Constant addresses.
1476 
1477     recipes.add_template_recipe(
1478         EncodingRecipeBuilder::new("const_addr", &formats.unary_const, 5)
1479             .operands_out(vec![gpr])
1480             .clobbers_flags(false)
1481             .emit(
1482                 r#"
1483                     {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
1484                     modrm_riprel(out_reg0, sink);
1485                     const_disp4(constant_handle, func, sink);
1486                 "#,
1487             ),
1488     );
1489 
1490     // Store recipes.
1491 
1492     {
1493         // Simple stores.
1494 
1495         // A predicate asking if the offset is zero.
1496         let has_no_offset =
1497             InstructionPredicate::new_is_field_equal(&*formats.store, "offset", "0".into());
1498 
1499         // XX /r register-indirect store with no offset.
1500         let st = recipes.add_template_recipe(
1501             EncodingRecipeBuilder::new("st", &formats.store, 1)
1502                 .operands_in(vec![gpr, gpr])
1503                 .inst_predicate(has_no_offset.clone())
1504                 .clobbers_flags(false)
1505                 .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1")
1506                 .emit(
1507                     r#"
1508                         if !flags.notrap() {
1509                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1510                         }
1511                         {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
1512                         if needs_sib_byte(in_reg1) {
1513                             modrm_sib(in_reg0, sink);
1514                             sib_noindex(in_reg1, sink);
1515                         } else if needs_offset(in_reg1) {
1516                             modrm_disp8(in_reg1, in_reg0, sink);
1517                             sink.put1(0);
1518                         } else {
1519                             modrm_rm(in_reg1, in_reg0, sink);
1520                         }
1521                     "#,
1522                 ),
1523         );
1524 
1525         // XX /r register-indirect store with no offset.
1526         // Only ABCD allowed for stored value. This is for byte stores with no REX.
1527         recipes.add_template(
1528             Template::new(
1529                 EncodingRecipeBuilder::new("st_abcd", &formats.store, 1)
1530                     .operands_in(vec![abcd, gpr])
1531                     .inst_predicate(has_no_offset.clone())
1532                     .clobbers_flags(false)
1533                     .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1")
1534                     .emit(
1535                         r#"
1536                         if !flags.notrap() {
1537                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1538                         }
1539                         {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
1540                         if needs_sib_byte(in_reg1) {
1541                             modrm_sib(in_reg0, sink);
1542                             sib_noindex(in_reg1, sink);
1543                         } else if needs_offset(in_reg1) {
1544                             modrm_disp8(in_reg1, in_reg0, sink);
1545                             sink.put1(0);
1546                         } else {
1547                             modrm_rm(in_reg1, in_reg0, sink);
1548                         }
1549                     "#,
1550                     ),
1551                 regs,
1552             )
1553             .when_prefixed(st),
1554         );
1555 
1556         // XX /r register-indirect store of FPR with no offset.
1557         recipes.add_template_inferred(
1558             EncodingRecipeBuilder::new("fst", &formats.store, 1)
1559                 .operands_in(vec![fpr, gpr])
1560                 .inst_predicate(has_no_offset)
1561                 .clobbers_flags(false)
1562                 .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1")
1563                 .emit(
1564                     r#"
1565                         if !flags.notrap() {
1566                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1567                         }
1568                         {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
1569                         if needs_sib_byte(in_reg1) {
1570                             modrm_sib(in_reg0, sink);
1571                             sib_noindex(in_reg1, sink);
1572                         } else if needs_offset(in_reg1) {
1573                             modrm_disp8(in_reg1, in_reg0, sink);
1574                             sink.put1(0);
1575                         } else {
1576                             modrm_rm(in_reg1, in_reg0, sink);
1577                         }
1578                     "#,
1579                 ),
1580             "size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1",
1581         );
1582 
1583         let has_small_offset =
1584             InstructionPredicate::new_is_signed_int(&*formats.store, "offset", 8, 0);
1585 
1586         // XX /r register-indirect store with 8-bit offset.
1587         let st_disp8 = recipes.add_template_recipe(
1588             EncodingRecipeBuilder::new("stDisp8", &formats.store, 2)
1589                 .operands_in(vec![gpr, gpr])
1590                 .inst_predicate(has_small_offset.clone())
1591                 .clobbers_flags(false)
1592                 .compute_size("size_plus_maybe_sib_for_inreg_1")
1593                 .emit(
1594                     r#"
1595                         if !flags.notrap() {
1596                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1597                         }
1598                         {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
1599                         if needs_sib_byte(in_reg1) {
1600                             modrm_sib_disp8(in_reg0, sink);
1601                             sib_noindex(in_reg1, sink);
1602                         } else {
1603                             modrm_disp8(in_reg1, in_reg0, sink);
1604                         }
1605                         let offset: i32 = offset.into();
1606                         sink.put1(offset as u8);
1607                     "#,
1608                 ),
1609         );
1610 
1611         // XX /r register-indirect store with 8-bit offset.
1612         // Only ABCD allowed for stored value. This is for byte stores with no REX.
1613         recipes.add_template(
1614             Template::new(
1615                 EncodingRecipeBuilder::new("stDisp8_abcd", &formats.store, 2)
1616                     .operands_in(vec![abcd, gpr])
1617                     .inst_predicate(has_small_offset.clone())
1618                     .clobbers_flags(false)
1619                     .compute_size("size_plus_maybe_sib_for_inreg_1")
1620                     .emit(
1621                         r#"
1622                         if !flags.notrap() {
1623                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1624                         }
1625                         {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
1626                         if needs_sib_byte(in_reg1) {
1627                             modrm_sib_disp8(in_reg0, sink);
1628                             sib_noindex(in_reg1, sink);
1629                         } else {
1630                             modrm_disp8(in_reg1, in_reg0, sink);
1631                         }
1632                         let offset: i32 = offset.into();
1633                         sink.put1(offset as u8);
1634                     "#,
1635                     ),
1636                 regs,
1637             )
1638             .when_prefixed(st_disp8),
1639         );
1640 
1641         // XX /r register-indirect store with 8-bit offset of FPR.
1642         recipes.add_template_inferred(
1643             EncodingRecipeBuilder::new("fstDisp8", &formats.store, 2)
1644                 .operands_in(vec![fpr, gpr])
1645                 .inst_predicate(has_small_offset)
1646                 .clobbers_flags(false)
1647                 .compute_size("size_plus_maybe_sib_for_inreg_1")
1648                 .emit(
1649                     r#"
1650                         if !flags.notrap() {
1651                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1652                         }
1653                         {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
1654                         if needs_sib_byte(in_reg1) {
1655                             modrm_sib_disp8(in_reg0, sink);
1656                             sib_noindex(in_reg1, sink);
1657                         } else {
1658                             modrm_disp8(in_reg1, in_reg0, sink);
1659                         }
1660                         let offset: i32 = offset.into();
1661                         sink.put1(offset as u8);
1662                     "#,
1663                 ),
1664             "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1",
1665         );
1666 
1667         // XX /r register-indirect store with 32-bit offset.
1668         let st_disp32 = recipes.add_template_recipe(
1669             EncodingRecipeBuilder::new("stDisp32", &formats.store, 5)
1670                 .operands_in(vec![gpr, gpr])
1671                 .clobbers_flags(false)
1672                 .compute_size("size_plus_maybe_sib_for_inreg_1")
1673                 .emit(
1674                     r#"
1675                         if !flags.notrap() {
1676                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1677                         }
1678                         {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
1679                         if needs_sib_byte(in_reg1) {
1680                             modrm_sib_disp32(in_reg0, sink);
1681                             sib_noindex(in_reg1, sink);
1682                         } else {
1683                             modrm_disp32(in_reg1, in_reg0, sink);
1684                         }
1685                         let offset: i32 = offset.into();
1686                         sink.put4(offset as u32);
1687                     "#,
1688                 ),
1689         );
1690 
1691         // XX /r register-indirect store with 32-bit offset.
1692         // Only ABCD allowed for stored value. This is for byte stores with no REX.
1693         recipes.add_template(
1694             Template::new(
1695                 EncodingRecipeBuilder::new("stDisp32_abcd", &formats.store, 5)
1696                     .operands_in(vec![abcd, gpr])
1697                     .clobbers_flags(false)
1698                     .compute_size("size_plus_maybe_sib_for_inreg_1")
1699                     .emit(
1700                         r#"
1701                         if !flags.notrap() {
1702                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1703                         }
1704                         {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
1705                         if needs_sib_byte(in_reg1) {
1706                             modrm_sib_disp32(in_reg0, sink);
1707                             sib_noindex(in_reg1, sink);
1708                         } else {
1709                             modrm_disp32(in_reg1, in_reg0, sink);
1710                         }
1711                         let offset: i32 = offset.into();
1712                         sink.put4(offset as u32);
1713                     "#,
1714                     ),
1715                 regs,
1716             )
1717             .when_prefixed(st_disp32),
1718         );
1719 
1720         // XX /r register-indirect store with 32-bit offset of FPR.
1721         recipes.add_template_inferred(
1722             EncodingRecipeBuilder::new("fstDisp32", &formats.store, 5)
1723                 .operands_in(vec![fpr, gpr])
1724                 .clobbers_flags(false)
1725                 .compute_size("size_plus_maybe_sib_for_inreg_1")
1726                 .emit(
1727                     r#"
1728                         if !flags.notrap() {
1729                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1730                         }
1731                         {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
1732                         if needs_sib_byte(in_reg1) {
1733                             modrm_sib_disp32(in_reg0, sink);
1734                             sib_noindex(in_reg1, sink);
1735                         } else {
1736                             modrm_disp32(in_reg1, in_reg0, sink);
1737                         }
1738                         let offset: i32 = offset.into();
1739                         sink.put4(offset as u32);
1740                     "#,
1741                 ),
1742             "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1",
1743         );
1744     }
1745 
1746     {
1747         // Complex stores.
1748 
1749         // A predicate asking if the offset is zero.
1750         let has_no_offset =
1751             InstructionPredicate::new_is_field_equal(&*formats.store_complex, "offset", "0".into());
1752 
1753         // XX /r register-indirect store with index and no offset.
1754         recipes.add_template_recipe(
1755             EncodingRecipeBuilder::new("stWithIndex", &formats.store_complex, 2)
1756                 .operands_in(vec![gpr, gpr, gpr])
1757                 .inst_predicate(has_no_offset.clone())
1758                 .clobbers_flags(false)
1759                 .compute_size("size_plus_maybe_offset_for_inreg_1")
1760                 .emit(
1761                     r#"
1762                         if !flags.notrap() {
1763                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1764                         }
1765                         {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
1766                         // The else branch always inserts an SIB byte.
1767                         if needs_offset(in_reg1) {
1768                             modrm_sib_disp8(in_reg0, sink);
1769                             sib(0, in_reg2, in_reg1, sink);
1770                             sink.put1(0);
1771                         } else {
1772                             modrm_sib(in_reg0, sink);
1773                             sib(0, in_reg2, in_reg1, sink);
1774                         }
1775                     "#,
1776                 ),
1777         );
1778 
1779         // XX /r register-indirect store with index and no offset.
1780         // Only ABCD allowed for stored value. This is for byte stores with no REX.
1781         recipes.add_template_recipe(
1782             EncodingRecipeBuilder::new("stWithIndex_abcd", &formats.store_complex, 2)
1783                 .operands_in(vec![abcd, gpr, gpr])
1784                 .inst_predicate(has_no_offset.clone())
1785                 .clobbers_flags(false)
1786                 .compute_size("size_plus_maybe_offset_for_inreg_1")
1787                 .emit(
1788                     r#"
1789                         if !flags.notrap() {
1790                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1791                         }
1792                         {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
1793                         // The else branch always inserts an SIB byte.
1794                         if needs_offset(in_reg1) {
1795                             modrm_sib_disp8(in_reg0, sink);
1796                             sib(0, in_reg2, in_reg1, sink);
1797                             sink.put1(0);
1798                         } else {
1799                             modrm_sib(in_reg0, sink);
1800                             sib(0, in_reg2, in_reg1, sink);
1801                         }
1802                     "#,
1803                 ),
1804         );
1805 
1806         // XX /r register-indirect store with index and no offset of FPR.
1807         recipes.add_template_recipe(
1808             EncodingRecipeBuilder::new("fstWithIndex", &formats.store_complex, 2)
1809                 .operands_in(vec![fpr, gpr, gpr])
1810                 .inst_predicate(has_no_offset)
1811                 .clobbers_flags(false)
1812                 .compute_size("size_plus_maybe_offset_for_inreg_1")
1813                 .emit(
1814                     r#"
1815                         if !flags.notrap() {
1816                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1817                         }
1818                         {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
1819                         // The else branch always inserts an SIB byte.
1820                         if needs_offset(in_reg1) {
1821                             modrm_sib_disp8(in_reg0, sink);
1822                             sib(0, in_reg2, in_reg1, sink);
1823                             sink.put1(0);
1824                         } else {
1825                             modrm_sib(in_reg0, sink);
1826                             sib(0, in_reg2, in_reg1, sink);
1827                         }
1828                     "#,
1829                 ),
1830         );
1831 
1832         let has_small_offset =
1833             InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 8, 0);
1834 
1835         // XX /r register-indirect store with index and 8-bit offset.
1836         recipes.add_template_recipe(
1837             EncodingRecipeBuilder::new("stWithIndexDisp8", &formats.store_complex, 3)
1838                 .operands_in(vec![gpr, gpr, gpr])
1839                 .inst_predicate(has_small_offset.clone())
1840                 .clobbers_flags(false)
1841                 .emit(
1842                     r#"
1843                         if !flags.notrap() {
1844                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1845                         }
1846                         {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
1847                         modrm_sib_disp8(in_reg0, sink);
1848                         sib(0, in_reg2, in_reg1, sink);
1849                         let offset: i32 = offset.into();
1850                         sink.put1(offset as u8);
1851                     "#,
1852                 ),
1853         );
1854 
1855         // XX /r register-indirect store with index and 8-bit offset.
1856         // Only ABCD allowed for stored value. This is for byte stores with no REX.
1857         recipes.add_template_recipe(
1858             EncodingRecipeBuilder::new("stWithIndexDisp8_abcd", &formats.store_complex, 3)
1859                 .operands_in(vec![abcd, gpr, gpr])
1860                 .inst_predicate(has_small_offset.clone())
1861                 .clobbers_flags(false)
1862                 .emit(
1863                     r#"
1864                         if !flags.notrap() {
1865                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1866                         }
1867                         {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
1868                         modrm_sib_disp8(in_reg0, sink);
1869                         sib(0, in_reg2, in_reg1, sink);
1870                         let offset: i32 = offset.into();
1871                         sink.put1(offset as u8);
1872                     "#,
1873                 ),
1874         );
1875 
1876         // XX /r register-indirect store with index and 8-bit offset of FPR.
1877         recipes.add_template_recipe(
1878             EncodingRecipeBuilder::new("fstWithIndexDisp8", &formats.store_complex, 3)
1879                 .operands_in(vec![fpr, gpr, gpr])
1880                 .inst_predicate(has_small_offset)
1881                 .clobbers_flags(false)
1882                 .emit(
1883                     r#"
1884                         if !flags.notrap() {
1885                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1886                         }
1887                         {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
1888                         modrm_sib_disp8(in_reg0, sink);
1889                         sib(0, in_reg2, in_reg1, sink);
1890                         let offset: i32 = offset.into();
1891                         sink.put1(offset as u8);
1892                     "#,
1893                 ),
1894         );
1895 
1896         let has_big_offset =
1897             InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 32, 0);
1898 
1899         // XX /r register-indirect store with index and 32-bit offset.
1900         recipes.add_template_recipe(
1901             EncodingRecipeBuilder::new("stWithIndexDisp32", &formats.store_complex, 6)
1902                 .operands_in(vec![gpr, gpr, gpr])
1903                 .inst_predicate(has_big_offset.clone())
1904                 .clobbers_flags(false)
1905                 .emit(
1906                     r#"
1907                         if !flags.notrap() {
1908                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1909                         }
1910                         {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
1911                         modrm_sib_disp32(in_reg0, sink);
1912                         sib(0, in_reg2, in_reg1, sink);
1913                         let offset: i32 = offset.into();
1914                         sink.put4(offset as u32);
1915                     "#,
1916                 ),
1917         );
1918 
1919         // XX /r register-indirect store with index and 32-bit offset.
1920         // Only ABCD allowed for stored value. This is for byte stores with no REX.
1921         recipes.add_template_recipe(
1922             EncodingRecipeBuilder::new("stWithIndexDisp32_abcd", &formats.store_complex, 6)
1923                 .operands_in(vec![abcd, gpr, gpr])
1924                 .inst_predicate(has_big_offset.clone())
1925                 .clobbers_flags(false)
1926                 .emit(
1927                     r#"
1928                         if !flags.notrap() {
1929                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1930                         }
1931                         {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
1932                         modrm_sib_disp32(in_reg0, sink);
1933                         sib(0, in_reg2, in_reg1, sink);
1934                         let offset: i32 = offset.into();
1935                         sink.put4(offset as u32);
1936                     "#,
1937                 ),
1938         );
1939 
1940         // XX /r register-indirect store with index and 32-bit offset of FPR.
1941         recipes.add_template_recipe(
1942             EncodingRecipeBuilder::new("fstWithIndexDisp32", &formats.store_complex, 6)
1943                 .operands_in(vec![fpr, gpr, gpr])
1944                 .inst_predicate(has_big_offset)
1945                 .clobbers_flags(false)
1946                 .emit(
1947                     r#"
1948                         if !flags.notrap() {
1949                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
1950                         }
1951                         {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
1952                         modrm_sib_disp32(in_reg0, sink);
1953                         sib(0, in_reg2, in_reg1, sink);
1954                         let offset: i32 = offset.into();
1955                         sink.put4(offset as u32);
1956                     "#,
1957                 ),
1958         );
1959     }
1960 
1961     // Unary spill with SIB and 32-bit displacement.
1962     recipes.add_template_recipe(
1963         EncodingRecipeBuilder::new("spillSib32", &formats.unary, 6)
1964             .operands_in(vec![gpr])
1965             .operands_out(vec![stack_gpr32])
1966             .clobbers_flags(false)
1967             .emit(
1968                 r#"
1969                     sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
1970                     let base = stk_base(out_stk0.base);
1971                     {{PUT_OP}}(bits, rex2(base, in_reg0), sink);
1972                     modrm_sib_disp32(in_reg0, sink);
1973                     sib_noindex(base, sink);
1974                     sink.put4(out_stk0.offset as u32);
1975                 "#,
1976             ),
1977     );
1978 
1979     // Like spillSib32, but targeting an FPR rather than a GPR.
1980     recipes.add_template_recipe(
1981         EncodingRecipeBuilder::new("fspillSib32", &formats.unary, 6)
1982             .operands_in(vec![fpr])
1983             .operands_out(vec![stack_fpr32])
1984             .clobbers_flags(false)
1985             .emit(
1986                 r#"
1987                     sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
1988                     let base = stk_base(out_stk0.base);
1989                     {{PUT_OP}}(bits, rex2(base, in_reg0), sink);
1990                     modrm_sib_disp32(in_reg0, sink);
1991                     sib_noindex(base, sink);
1992                     sink.put4(out_stk0.offset as u32);
1993                 "#,
1994             ),
1995     );
1996 
1997     // Regspill using RSP-relative addressing.
1998     recipes.add_template_recipe(
1999         EncodingRecipeBuilder::new("regspill32", &formats.reg_spill, 6)
2000             .operands_in(vec![gpr])
2001             .clobbers_flags(false)
2002             .emit(
2003                 r#"
2004                     sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
2005                     let dst = StackRef::sp(dst, &func.stack_slots);
2006                     let base = stk_base(dst.base);
2007                     {{PUT_OP}}(bits, rex2(base, src), sink);
2008                     modrm_sib_disp32(src, sink);
2009                     sib_noindex(base, sink);
2010                     sink.put4(dst.offset as u32);
2011                 "#,
2012             ),
2013     );
2014 
2015     // Like regspill32, but targeting an FPR rather than a GPR.
2016     recipes.add_template_recipe(
2017         EncodingRecipeBuilder::new("fregspill32", &formats.reg_spill, 6)
2018             .operands_in(vec![fpr])
2019             .clobbers_flags(false)
2020             .emit(
2021                 r#"
2022                     sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
2023                     let dst = StackRef::sp(dst, &func.stack_slots);
2024                     let base = stk_base(dst.base);
2025                     {{PUT_OP}}(bits, rex2(base, src), sink);
2026                     modrm_sib_disp32(src, sink);
2027                     sib_noindex(base, sink);
2028                     sink.put4(dst.offset as u32);
2029                 "#,
2030             ),
2031     );
2032 
2033     // Load recipes.
2034 
2035     {
2036         // Simple loads.
2037 
2038         // A predicate asking if the offset is zero.
2039         let has_no_offset =
2040             InstructionPredicate::new_is_field_equal(&*formats.load, "offset", "0".into());
2041 
2042         // XX /r load with no offset.
2043         recipes.add_template_recipe(
2044             EncodingRecipeBuilder::new("ld", &formats.load, 1)
2045                 .operands_in(vec![gpr])
2046                 .operands_out(vec![gpr])
2047                 .inst_predicate(has_no_offset.clone())
2048                 .clobbers_flags(false)
2049                 .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0")
2050                 .emit(
2051                     r#"
2052                         if !flags.notrap() {
2053                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
2054                         }
2055                         {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
2056                         if needs_sib_byte(in_reg0) {
2057                             modrm_sib(out_reg0, sink);
2058                             sib_noindex(in_reg0, sink);
2059                         } else if needs_offset(in_reg0) {
2060                             modrm_disp8(in_reg0, out_reg0, sink);
2061                             sink.put1(0);
2062                         } else {
2063                             modrm_rm(in_reg0, out_reg0, sink);
2064                         }
2065                     "#,
2066                 ),
2067         );
2068 
2069         // XX /r float load with no offset.
2070         recipes.add_template_inferred(
2071             EncodingRecipeBuilder::new("fld", &formats.load, 1)
2072                 .operands_in(vec![gpr])
2073                 .operands_out(vec![fpr])
2074                 .inst_predicate(has_no_offset)
2075                 .clobbers_flags(false)
2076                 .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0")
2077                 .emit(
2078                     r#"
2079                         if !flags.notrap() {
2080                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
2081                         }
2082                         {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
2083                         if needs_sib_byte(in_reg0) {
2084                             modrm_sib(out_reg0, sink);
2085                             sib_noindex(in_reg0, sink);
2086                         } else if needs_offset(in_reg0) {
2087                             modrm_disp8(in_reg0, out_reg0, sink);
2088                             sink.put1(0);
2089                         } else {
2090                             modrm_rm(in_reg0, out_reg0, sink);
2091                         }
2092                     "#,
2093                 ),
2094             "size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0",
2095         );
2096 
2097         let has_small_offset =
2098             InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 8, 0);
2099 
2100         // XX /r load with 8-bit offset.
2101         recipes.add_template_recipe(
2102             EncodingRecipeBuilder::new("ldDisp8", &formats.load, 2)
2103                 .operands_in(vec![gpr])
2104                 .operands_out(vec![gpr])
2105                 .inst_predicate(has_small_offset.clone())
2106                 .clobbers_flags(false)
2107                 .compute_size("size_plus_maybe_sib_for_inreg_0")
2108                 .emit(
2109                     r#"
2110                         if !flags.notrap() {
2111                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
2112                         }
2113                         {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
2114                         if needs_sib_byte(in_reg0) {
2115                             modrm_sib_disp8(out_reg0, sink);
2116                             sib_noindex(in_reg0, sink);
2117                         } else {
2118                             modrm_disp8(in_reg0, out_reg0, sink);
2119                         }
2120                         let offset: i32 = offset.into();
2121                         sink.put1(offset as u8);
2122                     "#,
2123                 ),
2124         );
2125 
2126         // XX /r float load with 8-bit offset.
2127         recipes.add_template_inferred(
2128             EncodingRecipeBuilder::new("fldDisp8", &formats.load, 2)
2129                 .operands_in(vec![gpr])
2130                 .operands_out(vec![fpr])
2131                 .inst_predicate(has_small_offset)
2132                 .clobbers_flags(false)
2133                 .compute_size("size_plus_maybe_sib_for_inreg_0")
2134                 .emit(
2135                     r#"
2136                         if !flags.notrap() {
2137                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
2138                         }
2139                         {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
2140                         if needs_sib_byte(in_reg0) {
2141                             modrm_sib_disp8(out_reg0, sink);
2142                             sib_noindex(in_reg0, sink);
2143                         } else {
2144                             modrm_disp8(in_reg0, out_reg0, sink);
2145                         }
2146                         let offset: i32 = offset.into();
2147                         sink.put1(offset as u8);
2148                     "#,
2149                 ),
2150             "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0",
2151         );
2152 
2153         let has_big_offset =
2154             InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 32, 0);
2155 
2156         // XX /r load with 32-bit offset.
2157         recipes.add_template_recipe(
2158             EncodingRecipeBuilder::new("ldDisp32", &formats.load, 5)
2159                 .operands_in(vec![gpr])
2160                 .operands_out(vec![gpr])
2161                 .inst_predicate(has_big_offset.clone())
2162                 .clobbers_flags(false)
2163                 .compute_size("size_plus_maybe_sib_for_inreg_0")
2164                 .emit(
2165                     r#"
2166                         if !flags.notrap() {
2167                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
2168                         }
2169                         {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
2170                         if needs_sib_byte(in_reg0) {
2171                             modrm_sib_disp32(out_reg0, sink);
2172                             sib_noindex(in_reg0, sink);
2173                         } else {
2174                             modrm_disp32(in_reg0, out_reg0, sink);
2175                         }
2176                         let offset: i32 = offset.into();
2177                         sink.put4(offset as u32);
2178                     "#,
2179                 ),
2180         );
2181 
2182         // XX /r float load with 32-bit offset.
2183         recipes.add_template_inferred(
2184             EncodingRecipeBuilder::new("fldDisp32", &formats.load, 5)
2185                 .operands_in(vec![gpr])
2186                 .operands_out(vec![fpr])
2187                 .inst_predicate(has_big_offset)
2188                 .clobbers_flags(false)
2189                 .compute_size("size_plus_maybe_sib_for_inreg_0")
2190                 .emit(
2191                     r#"
2192                         if !flags.notrap() {
2193                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
2194                         }
2195                         {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
2196                         if needs_sib_byte(in_reg0) {
2197                             modrm_sib_disp32(out_reg0, sink);
2198                             sib_noindex(in_reg0, sink);
2199                         } else {
2200                             modrm_disp32(in_reg0, out_reg0, sink);
2201                         }
2202                         let offset: i32 = offset.into();
2203                         sink.put4(offset as u32);
2204                     "#,
2205                 ),
2206             "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0",
2207         );
2208     }
2209 
2210     {
2211         // Complex loads.
2212 
2213         // A predicate asking if the offset is zero.
2214         let has_no_offset =
2215             InstructionPredicate::new_is_field_equal(&*formats.load_complex, "offset", "0".into());
2216 
2217         // XX /r load with index and no offset.
2218         recipes.add_template_recipe(
2219             EncodingRecipeBuilder::new("ldWithIndex", &formats.load_complex, 2)
2220                 .operands_in(vec![gpr, gpr])
2221                 .operands_out(vec![gpr])
2222                 .inst_predicate(has_no_offset.clone())
2223                 .clobbers_flags(false)
2224                 .compute_size("size_plus_maybe_offset_for_inreg_0")
2225                 .emit(
2226                     r#"
2227                         if !flags.notrap() {
2228                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
2229                         }
2230                         {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
2231                         // The else branch always inserts an SIB byte.
2232                         if needs_offset(in_reg0) {
2233                             modrm_sib_disp8(out_reg0, sink);
2234                             sib(0, in_reg1, in_reg0, sink);
2235                             sink.put1(0);
2236                         } else {
2237                             modrm_sib(out_reg0, sink);
2238                             sib(0, in_reg1, in_reg0, sink);
2239                         }
2240                     "#,
2241                 ),
2242         );
2243 
2244         // XX /r float load with index and no offset.
2245         recipes.add_template_recipe(
2246             EncodingRecipeBuilder::new("fldWithIndex", &formats.load_complex, 2)
2247                 .operands_in(vec![gpr, gpr])
2248                 .operands_out(vec![fpr])
2249                 .inst_predicate(has_no_offset)
2250                 .clobbers_flags(false)
2251                 .compute_size("size_plus_maybe_offset_for_inreg_0")
2252                 .emit(
2253                     r#"
2254                         if !flags.notrap() {
2255                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
2256                         }
2257                         {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
2258                         // The else branch always inserts an SIB byte.
2259                         if needs_offset(in_reg0) {
2260                             modrm_sib_disp8(out_reg0, sink);
2261                             sib(0, in_reg1, in_reg0, sink);
2262                             sink.put1(0);
2263                         } else {
2264                             modrm_sib(out_reg0, sink);
2265                             sib(0, in_reg1, in_reg0, sink);
2266                         }
2267                     "#,
2268                 ),
2269         );
2270 
2271         let has_small_offset =
2272             InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 8, 0);
2273 
2274         // XX /r load with index and 8-bit offset.
2275         recipes.add_template_recipe(
2276             EncodingRecipeBuilder::new("ldWithIndexDisp8", &formats.load_complex, 3)
2277                 .operands_in(vec![gpr, gpr])
2278                 .operands_out(vec![gpr])
2279                 .inst_predicate(has_small_offset.clone())
2280                 .clobbers_flags(false)
2281                 .emit(
2282                     r#"
2283                         if !flags.notrap() {
2284                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
2285                         }
2286                         {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
2287                         modrm_sib_disp8(out_reg0, sink);
2288                         sib(0, in_reg1, in_reg0, sink);
2289                         let offset: i32 = offset.into();
2290                         sink.put1(offset as u8);
2291                     "#,
2292                 ),
2293         );
2294 
2295         // XX /r float load with 8-bit offset.
2296         recipes.add_template_recipe(
2297             EncodingRecipeBuilder::new("fldWithIndexDisp8", &formats.load_complex, 3)
2298                 .operands_in(vec![gpr, gpr])
2299                 .operands_out(vec![fpr])
2300                 .inst_predicate(has_small_offset)
2301                 .clobbers_flags(false)
2302                 .emit(
2303                     r#"
2304                         if !flags.notrap() {
2305                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
2306                         }
2307                         {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
2308                         modrm_sib_disp8(out_reg0, sink);
2309                         sib(0, in_reg1, in_reg0, sink);
2310                         let offset: i32 = offset.into();
2311                         sink.put1(offset as u8);
2312                     "#,
2313                 ),
2314         );
2315 
2316         let has_big_offset =
2317             InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 32, 0);
2318 
2319         // XX /r load with index and 32-bit offset.
2320         recipes.add_template_recipe(
2321             EncodingRecipeBuilder::new("ldWithIndexDisp32", &formats.load_complex, 6)
2322                 .operands_in(vec![gpr, gpr])
2323                 .operands_out(vec![gpr])
2324                 .inst_predicate(has_big_offset.clone())
2325                 .clobbers_flags(false)
2326                 .emit(
2327                     r#"
2328                         if !flags.notrap() {
2329                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
2330                         }
2331                         {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
2332                         modrm_sib_disp32(out_reg0, sink);
2333                         sib(0, in_reg1, in_reg0, sink);
2334                         let offset: i32 = offset.into();
2335                         sink.put4(offset as u32);
2336                     "#,
2337                 ),
2338         );
2339 
2340         // XX /r float load with index and 32-bit offset.
2341         recipes.add_template_recipe(
2342             EncodingRecipeBuilder::new("fldWithIndexDisp32", &formats.load_complex, 6)
2343                 .operands_in(vec![gpr, gpr])
2344                 .operands_out(vec![fpr])
2345                 .inst_predicate(has_big_offset)
2346                 .clobbers_flags(false)
2347                 .emit(
2348                     r#"
2349                         if !flags.notrap() {
2350                             sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
2351                         }
2352                         {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
2353                         modrm_sib_disp32(out_reg0, sink);
2354                         sib(0, in_reg1, in_reg0, sink);
2355                         let offset: i32 = offset.into();
2356                         sink.put4(offset as u32);
2357                     "#,
2358                 ),
2359         );
2360     }
2361 
2362     // Unary fill with SIB and 32-bit displacement.
2363     recipes.add_template_recipe(
2364         EncodingRecipeBuilder::new("fillSib32", &formats.unary, 6)
2365             .operands_in(vec![stack_gpr32])
2366             .operands_out(vec![gpr])
2367             .clobbers_flags(false)
2368             .emit(
2369                 r#"
2370                     let base = stk_base(in_stk0.base);
2371                     {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
2372                     modrm_sib_disp32(out_reg0, sink);
2373                     sib_noindex(base, sink);
2374                     sink.put4(in_stk0.offset as u32);
2375                 "#,
2376             ),
2377     );
2378 
2379     // Like fillSib32, but targeting an FPR rather than a GPR.
2380     recipes.add_template_recipe(
2381         EncodingRecipeBuilder::new("ffillSib32", &formats.unary, 6)
2382             .operands_in(vec![stack_fpr32])
2383             .operands_out(vec![fpr])
2384             .clobbers_flags(false)
2385             .emit(
2386                 r#"
2387                     let base = stk_base(in_stk0.base);
2388                     {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
2389                     modrm_sib_disp32(out_reg0, sink);
2390                     sib_noindex(base, sink);
2391                     sink.put4(in_stk0.offset as u32);
2392                 "#,
2393             ),
2394     );
2395 
2396     // Regfill with RSP-relative 32-bit displacement.
2397     recipes.add_template_recipe(
2398         EncodingRecipeBuilder::new("regfill32", &formats.reg_fill, 6)
2399             .operands_in(vec![stack_gpr32])
2400             .clobbers_flags(false)
2401             .emit(
2402                 r#"
2403                     let src = StackRef::sp(src, &func.stack_slots);
2404                     let base = stk_base(src.base);
2405                     {{PUT_OP}}(bits, rex2(base, dst), sink);
2406                     modrm_sib_disp32(dst, sink);
2407                     sib_noindex(base, sink);
2408                     sink.put4(src.offset as u32);
2409                 "#,
2410             ),
2411     );
2412 
2413     // Like regfill32, but targeting an FPR rather than a GPR.
2414     recipes.add_template_recipe(
2415         EncodingRecipeBuilder::new("fregfill32", &formats.reg_fill, 6)
2416             .operands_in(vec![stack_fpr32])
2417             .clobbers_flags(false)
2418             .emit(
2419                 r#"
2420                     let src = StackRef::sp(src, &func.stack_slots);
2421                     let base = stk_base(src.base);
2422                     {{PUT_OP}}(bits, rex2(base, dst), sink);
2423                     modrm_sib_disp32(dst, sink);
2424                     sib_noindex(base, sink);
2425                     sink.put4(src.offset as u32);
2426                 "#,
2427             ),
2428     );
2429 
2430     // Call/return.
2431 
2432     recipes.add_template_recipe(
2433         EncodingRecipeBuilder::new("call_id", &formats.call, 4).emit(
2434             r#"
2435             sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
2436             {{PUT_OP}}(bits, BASE_REX, sink);
2437             // The addend adjusts for the difference between the end of the
2438             // instruction and the beginning of the immediate field.
2439             sink.reloc_external(func.srclocs[inst],
2440                                 Reloc::X86CallPCRel4,
2441                                 &func.dfg.ext_funcs[func_ref].name,
2442                                 -4);
2443             sink.put4(0);
2444             sink.add_call_site(opcode, func.srclocs[inst]);
2445         "#,
2446         ),
2447     );
2448 
2449     recipes.add_template_recipe(
2450         EncodingRecipeBuilder::new("call_plt_id", &formats.call, 4).emit(
2451             r#"
2452             sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
2453             {{PUT_OP}}(bits, BASE_REX, sink);
2454             sink.reloc_external(func.srclocs[inst],
2455                                 Reloc::X86CallPLTRel4,
2456                                 &func.dfg.ext_funcs[func_ref].name,
2457                                 -4);
2458             sink.put4(0);
2459             sink.add_call_site(opcode, func.srclocs[inst]);
2460         "#,
2461         ),
2462     );
2463 
2464     recipes.add_template_recipe(
2465         EncodingRecipeBuilder::new("call_r", &formats.call_indirect, 1)
2466             .operands_in(vec![gpr])
2467             .emit(
2468                 r#"
2469                     sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
2470                     {{PUT_OP}}(bits, rex1(in_reg0), sink);
2471                     modrm_r_bits(in_reg0, bits, sink);
2472                     sink.add_call_site(opcode, func.srclocs[inst]);
2473                 "#,
2474             ),
2475     );
2476 
2477     recipes.add_template_recipe(
2478         EncodingRecipeBuilder::new("ret", &formats.multiary, 0)
2479             .emit("{{PUT_OP}}(bits, BASE_REX, sink);"),
2480     );
2481 
2482     // Branches.
2483 
2484     recipes.add_template_recipe(
2485         EncodingRecipeBuilder::new("jmpb", &formats.jump, 1)
2486             .branch_range((1, 8))
2487             .clobbers_flags(false)
2488             .emit(
2489                 r#"
2490                     {{PUT_OP}}(bits, BASE_REX, sink);
2491                     disp1(destination, func, sink);
2492                 "#,
2493             ),
2494     );
2495 
2496     recipes.add_template_recipe(
2497         EncodingRecipeBuilder::new("jmpd", &formats.jump, 4)
2498             .branch_range((4, 32))
2499             .clobbers_flags(false)
2500             .emit(
2501                 r#"
2502                     {{PUT_OP}}(bits, BASE_REX, sink);
2503                     disp4(destination, func, sink);
2504                 "#,
2505             ),
2506     );
2507 
2508     recipes.add_template_recipe(
2509         EncodingRecipeBuilder::new("brib", &formats.branch_int, 1)
2510             .operands_in(vec![reg_rflags])
2511             .branch_range((1, 8))
2512             .clobbers_flags(false)
2513             .emit(
2514                 r#"
2515                     {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink);
2516                     disp1(destination, func, sink);
2517                 "#,
2518             ),
2519     );
2520 
2521     recipes.add_template_recipe(
2522         EncodingRecipeBuilder::new("brid", &formats.branch_int, 4)
2523             .operands_in(vec![reg_rflags])
2524             .branch_range((4, 32))
2525             .clobbers_flags(false)
2526             .emit(
2527                 r#"
2528                     {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink);
2529                     disp4(destination, func, sink);
2530                 "#,
2531             ),
2532     );
2533 
2534     recipes.add_template_recipe(
2535         EncodingRecipeBuilder::new("brfb", &formats.branch_float, 1)
2536             .operands_in(vec![reg_rflags])
2537             .branch_range((1, 8))
2538             .clobbers_flags(false)
2539             .inst_predicate(supported_floatccs_predicate(
2540                 &supported_floatccs,
2541                 &*formats.branch_float,
2542             ))
2543             .emit(
2544                 r#"
2545                     {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink);
2546                     disp1(destination, func, sink);
2547                 "#,
2548             ),
2549     );
2550 
2551     recipes.add_template_recipe(
2552         EncodingRecipeBuilder::new("brfd", &formats.branch_float, 4)
2553             .operands_in(vec![reg_rflags])
2554             .branch_range((4, 32))
2555             .clobbers_flags(false)
2556             .inst_predicate(supported_floatccs_predicate(
2557                 &supported_floatccs,
2558                 &*formats.branch_float,
2559             ))
2560             .emit(
2561                 r#"
2562                     {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink);
2563                     disp4(destination, func, sink);
2564                 "#,
2565             ),
2566     );
2567 
2568     recipes.add_template_recipe(
2569         EncodingRecipeBuilder::new("indirect_jmp", &formats.indirect_jump, 1)
2570             .operands_in(vec![gpr])
2571             .clobbers_flags(false)
2572             .emit(
2573                 r#"
2574                     {{PUT_OP}}(bits, rex1(in_reg0), sink);
2575                     modrm_r_bits(in_reg0, bits, sink);
2576                 "#,
2577             ),
2578     );
2579 
2580     recipes.add_template_recipe(
2581         EncodingRecipeBuilder::new("jt_entry", &formats.branch_table_entry, 2)
2582             .operands_in(vec![gpr, gpr])
2583             .operands_out(vec![gpr])
2584             .clobbers_flags(false)
2585             .inst_predicate(valid_scale(&*formats.branch_table_entry))
2586             .compute_size("size_plus_maybe_offset_for_inreg_1")
2587             .emit(
2588                 r#"
2589                     {{PUT_OP}}(bits, rex3(in_reg1, out_reg0, in_reg0), sink);
2590                     if needs_offset(in_reg1) {
2591                         modrm_sib_disp8(out_reg0, sink);
2592                         sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
2593                         sink.put1(0);
2594                     } else {
2595                         modrm_sib(out_reg0, sink);
2596                         sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
2597                     }
2598                 "#,
2599             ),
2600     );
2601 
2602     recipes.add_template_inferred(
2603         EncodingRecipeBuilder::new("vconst", &formats.unary_const, 5)
2604             .operands_out(vec![fpr])
2605             .clobbers_flags(false)
2606             .emit(
2607                 r#"
2608                     {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
2609                     modrm_riprel(out_reg0, sink);
2610                     const_disp4(constant_handle, func, sink);
2611                 "#,
2612             ),
2613         "size_with_inferred_rex_for_outreg0",
2614     );
2615 
2616     recipes.add_template_inferred(
2617         EncodingRecipeBuilder::new("vconst_optimized", &formats.unary_const, 1)
2618             .operands_out(vec![fpr])
2619             .clobbers_flags(false)
2620             .emit(
2621                 r#"
2622                     {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
2623                     modrm_rr(out_reg0, out_reg0, sink);
2624                 "#,
2625             ),
2626         "size_with_inferred_rex_for_outreg0",
2627     );
2628 
2629     recipes.add_template_recipe(
2630         EncodingRecipeBuilder::new("jt_base", &formats.branch_table_base, 5)
2631             .operands_out(vec![gpr])
2632             .clobbers_flags(false)
2633             .emit(
2634                 r#"
2635                     {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
2636                     modrm_riprel(out_reg0, sink);
2637 
2638                     // No reloc is needed here as the jump table is emitted directly after
2639                     // the function body.
2640                     jt_disp4(table, func, sink);
2641                 "#,
2642             ),
2643     );
2644 
2645     // Test flags and set a register.
2646     //
2647     // These setCC instructions only set the low 8 bits, and they can only write ABCD registers
2648     // without a REX prefix.
2649     //
2650     // Other instruction encodings accepting `b1` inputs have the same constraints and only look at
2651     // the low 8 bits of the input register.
2652 
2653     let seti = recipes.add_template(
2654         Template::new(
2655             EncodingRecipeBuilder::new("seti", &formats.int_cond, 1)
2656                 .operands_in(vec![reg_rflags])
2657                 .operands_out(vec![gpr])
2658                 .clobbers_flags(false)
2659                 .emit(
2660                     r#"
2661                     {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink);
2662                     modrm_r_bits(out_reg0, bits, sink);
2663                 "#,
2664                 ),
2665             regs,
2666         )
2667         .rex_kind(RecipePrefixKind::AlwaysEmitRex),
2668     );
2669 
2670     recipes.add_template(
2671         Template::new(
2672             EncodingRecipeBuilder::new("seti_abcd", &formats.int_cond, 1)
2673                 .operands_in(vec![reg_rflags])
2674                 .operands_out(vec![abcd])
2675                 .clobbers_flags(false)
2676                 .emit(
2677                     r#"
2678                     {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink);
2679                     modrm_r_bits(out_reg0, bits, sink);
2680                 "#,
2681                 ),
2682             regs,
2683         )
2684         .when_prefixed(seti),
2685     );
2686 
2687     let setf = recipes.add_template(
2688         Template::new(
2689             EncodingRecipeBuilder::new("setf", &formats.float_cond, 1)
2690                 .operands_in(vec![reg_rflags])
2691                 .operands_out(vec![gpr])
2692                 .clobbers_flags(false)
2693                 .emit(
2694                     r#"
2695                     {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink);
2696                     modrm_r_bits(out_reg0, bits, sink);
2697                 "#,
2698                 ),
2699             regs,
2700         )
2701         .rex_kind(RecipePrefixKind::AlwaysEmitRex),
2702     );
2703 
2704     recipes.add_template(
2705         Template::new(
2706             EncodingRecipeBuilder::new("setf_abcd", &formats.float_cond, 1)
2707                 .operands_in(vec![reg_rflags])
2708                 .operands_out(vec![abcd])
2709                 .clobbers_flags(false)
2710                 .emit(
2711                     r#"
2712                     {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink);
2713                     modrm_r_bits(out_reg0, bits, sink);
2714                 "#,
2715                 ),
2716             regs,
2717         )
2718         .when_prefixed(setf),
2719     );
2720 
2721     // Conditional move (a.k.a integer select)
2722     // (maybe-REX.W) 0F 4x modrm(r,r)
2723     // 1 byte, modrm(r,r), is after the opcode
2724     recipes.add_template(
2725         Template::new(
2726             EncodingRecipeBuilder::new("cmov", &formats.int_select, 1)
2727                 .operands_in(vec![
2728                     OperandConstraint::FixedReg(reg_rflags),
2729                     OperandConstraint::RegClass(gpr),
2730                     OperandConstraint::RegClass(gpr),
2731                 ])
2732                 .operands_out(vec![2])
2733                 .clobbers_flags(false)
2734                 .emit(
2735                     r#"
2736                         {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink);
2737                         modrm_rr(in_reg1, in_reg2, sink);
2738                     "#,
2739                 ),
2740             regs,
2741         )
2742         .inferred_rex_compute_size("size_with_inferred_rex_for_cmov"),
2743     );
2744 
2745     // Bit scan forwards and reverse
2746     recipes.add_template(
2747         Template::new(
2748             EncodingRecipeBuilder::new("bsf_and_bsr", &formats.unary, 1)
2749                 .operands_in(vec![gpr])
2750                 .operands_out(vec![
2751                     OperandConstraint::RegClass(gpr),
2752                     OperandConstraint::FixedReg(reg_rflags),
2753                 ])
2754                 .emit(
2755                     r#"
2756                         {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
2757                         modrm_rr(in_reg0, out_reg0, sink);
2758                     "#,
2759                 ),
2760             regs,
2761         )
2762         .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"),
2763     );
2764 
2765     // Arithematic with flag I/O.
2766 
2767     // XX /r, MR form. Add two GPR registers and set carry flag.
2768     recipes.add_template(
2769         Template::new(
2770             EncodingRecipeBuilder::new("rout", &formats.binary, 1)
2771                 .operands_in(vec![gpr, gpr])
2772                 .operands_out(vec![
2773                     OperandConstraint::TiedInput(0),
2774                     OperandConstraint::FixedReg(reg_rflags),
2775                 ])
2776                 .clobbers_flags(true)
2777                 .emit(
2778                     r#"
2779                         {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
2780                         modrm_rr(in_reg0, in_reg1, sink);
2781                     "#,
2782                 ),
2783             regs,
2784         )
2785         .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
2786     );
2787 
2788     // XX /r, MR form. Add two GPR registers and get carry flag.
2789     recipes.add_template(
2790         Template::new(
2791             EncodingRecipeBuilder::new("rin", &formats.ternary, 1)
2792                 .operands_in(vec![
2793                     OperandConstraint::RegClass(gpr),
2794                     OperandConstraint::RegClass(gpr),
2795                     OperandConstraint::FixedReg(reg_rflags),
2796                 ])
2797                 .operands_out(vec![0])
2798                 .clobbers_flags(true)
2799                 .emit(
2800                     r#"
2801                         {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
2802                         modrm_rr(in_reg0, in_reg1, sink);
2803                     "#,
2804                 ),
2805             regs,
2806         )
2807         .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
2808     );
2809 
2810     // XX /r, MR form. Add two GPR registers with carry flag.
2811     recipes.add_template(
2812         Template::new(
2813             EncodingRecipeBuilder::new("rio", &formats.ternary, 1)
2814                 .operands_in(vec![
2815                     OperandConstraint::RegClass(gpr),
2816                     OperandConstraint::RegClass(gpr),
2817                     OperandConstraint::FixedReg(reg_rflags),
2818                 ])
2819                 .operands_out(vec![
2820                     OperandConstraint::TiedInput(0),
2821                     OperandConstraint::FixedReg(reg_rflags),
2822                 ])
2823                 .clobbers_flags(true)
2824                 .emit(
2825                     r#"
2826                         {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
2827                         modrm_rr(in_reg0, in_reg1, sink);
2828                     "#,
2829                 ),
2830             regs,
2831         )
2832         .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
2833     );
2834 
2835     // Compare and set flags.
2836 
2837     // XX /r, MR form. Compare two GPR registers and set flags.
2838     recipes.add_template(
2839         Template::new(
2840             EncodingRecipeBuilder::new("rcmp", &formats.binary, 1)
2841                 .operands_in(vec![gpr, gpr])
2842                 .operands_out(vec![reg_rflags])
2843                 .emit(
2844                     r#"
2845                         {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
2846                         modrm_rr(in_reg0, in_reg1, sink);
2847                     "#,
2848                 ),
2849             regs,
2850         )
2851         .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
2852     );
2853 
2854     // Same as rcmp, but second operand is the stack pointer.
2855     recipes.add_template_recipe(
2856         EncodingRecipeBuilder::new("rcmp_sp", &formats.unary, 1)
2857             .operands_in(vec![gpr])
2858             .operands_out(vec![reg_rflags])
2859             .emit(
2860                 r#"
2861                     {{PUT_OP}}(bits, rex2(in_reg0, RU::rsp.into()), sink);
2862                     modrm_rr(in_reg0, RU::rsp.into(), sink);
2863                 "#,
2864             ),
2865     );
2866 
2867     // XX /r, RM form. Compare two FPR registers and set flags.
2868     recipes.add_template_inferred(
2869         EncodingRecipeBuilder::new("fcmp", &formats.binary, 1)
2870             .operands_in(vec![fpr, fpr])
2871             .operands_out(vec![reg_rflags])
2872             .emit(
2873                 r#"
2874                     {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
2875                     modrm_rr(in_reg1, in_reg0, sink);
2876                 "#,
2877             ),
2878         "size_with_inferred_rex_for_inreg0_inreg1",
2879     );
2880 
2881     {
2882         let has_small_offset =
2883             InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 8, 0);
2884 
2885         // XX /n, MI form with imm8.
2886         recipes.add_template(
2887             Template::new(
2888                 EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm64, 2)
2889                     .operands_in(vec![gpr])
2890                     .operands_out(vec![reg_rflags])
2891                     .inst_predicate(has_small_offset)
2892                     .emit(
2893                         r#"
2894                             {{PUT_OP}}(bits, rex1(in_reg0), sink);
2895                             modrm_r_bits(in_reg0, bits, sink);
2896                             let imm: i64 = imm.into();
2897                             sink.put1(imm as u8);
2898                         "#,
2899                     ),
2900                 regs,
2901             )
2902             .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
2903         );
2904 
2905         let has_big_offset =
2906             InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 32, 0);
2907 
2908         // XX /n, MI form with imm32.
2909         recipes.add_template(
2910             Template::new(
2911                 EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm64, 5)
2912                     .operands_in(vec![gpr])
2913                     .operands_out(vec![reg_rflags])
2914                     .inst_predicate(has_big_offset)
2915                     .emit(
2916                         r#"
2917                             {{PUT_OP}}(bits, rex1(in_reg0), sink);
2918                             modrm_r_bits(in_reg0, bits, sink);
2919                             let imm: i64 = imm.into();
2920                             sink.put4(imm as u32);
2921                         "#,
2922                     ),
2923                 regs,
2924             )
2925             .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
2926         );
2927     }
2928 
2929     // Test-and-branch.
2930     //
2931     // This recipe represents the macro fusion of a test and a conditional branch.
2932     // This serves two purposes:
2933     //
2934     // 1. Guarantee that the test and branch get scheduled next to each other so
2935     //    macro fusion is guaranteed to be possible.
2936     // 2. Hide the status flags from Cranelift which doesn't currently model flags.
2937     //
2938     // The encoding bits affect both the test and the branch instruction:
2939     //
2940     // Bits 0-7 are the Jcc opcode.
2941     // Bits 8-15 control the test instruction which always has opcode byte 0x85.
2942 
2943     recipes.add_template(
2944         Template::new(
2945             EncodingRecipeBuilder::new("tjccb", &formats.branch, 1 + 2)
2946                 .operands_in(vec![gpr])
2947                 .branch_range((3, 8))
2948                 .emit(
2949                     r#"
2950                         // test r, r.
2951                         {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
2952                         modrm_rr(in_reg0, in_reg0, sink);
2953                         // Jcc instruction.
2954                         sink.put1(bits as u8);
2955                         disp1(destination, func, sink);
2956                     "#,
2957                 ),
2958             regs,
2959         )
2960         .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
2961     );
2962 
2963     recipes.add_template(
2964         Template::new(
2965             EncodingRecipeBuilder::new("tjccd", &formats.branch, 1 + 6)
2966                 .operands_in(vec![gpr])
2967                 .branch_range((7, 32))
2968                 .emit(
2969                     r#"
2970                         // test r, r.
2971                         {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
2972                         modrm_rr(in_reg0, in_reg0, sink);
2973                         // Jcc instruction.
2974                         sink.put1(0x0f);
2975                         sink.put1(bits as u8);
2976                         disp4(destination, func, sink);
2977                     "#,
2978                 ),
2979             regs,
2980         )
2981         .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
2982     );
2983 
2984     // 8-bit test-and-branch.
2985 
2986     let t8jccb = recipes.add_template(
2987         Template::new(
2988             EncodingRecipeBuilder::new("t8jccb", &formats.branch, 1 + 2)
2989                 .operands_in(vec![gpr])
2990                 .branch_range((3, 8))
2991                 .emit(
2992                     r#"
2993                     // test8 r, r.
2994                     {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
2995                     modrm_rr(in_reg0, in_reg0, sink);
2996                     // Jcc instruction.
2997                     sink.put1(bits as u8);
2998                     disp1(destination, func, sink);
2999                 "#,
3000                 ),
3001             regs,
3002         )
3003         .rex_kind(RecipePrefixKind::AlwaysEmitRex),
3004     );
3005 
3006     recipes.add_template(
3007         Template::new(
3008             EncodingRecipeBuilder::new("t8jccb_abcd", &formats.branch, 1 + 2)
3009                 .operands_in(vec![abcd])
3010                 .branch_range((3, 8))
3011                 .emit(
3012                     r#"
3013                     // test8 r, r.
3014                     {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
3015                     modrm_rr(in_reg0, in_reg0, sink);
3016                     // Jcc instruction.
3017                     sink.put1(bits as u8);
3018                     disp1(destination, func, sink);
3019                 "#,
3020                 ),
3021             regs,
3022         )
3023         .when_prefixed(t8jccb),
3024     );
3025 
3026     let t8jccd = recipes.add_template(
3027         Template::new(
3028             EncodingRecipeBuilder::new("t8jccd", &formats.branch, 1 + 6)
3029                 .operands_in(vec![gpr])
3030                 .branch_range((7, 32))
3031                 .emit(
3032                     r#"
3033                     // test8 r, r.
3034                     {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
3035                     modrm_rr(in_reg0, in_reg0, sink);
3036                     // Jcc instruction.
3037                     sink.put1(0x0f);
3038                     sink.put1(bits as u8);
3039                     disp4(destination, func, sink);
3040                 "#,
3041                 ),
3042             regs,
3043         )
3044         .rex_kind(RecipePrefixKind::AlwaysEmitRex),
3045     );
3046 
3047     recipes.add_template(
3048         Template::new(
3049             EncodingRecipeBuilder::new("t8jccd_abcd", &formats.branch, 1 + 6)
3050                 .operands_in(vec![abcd])
3051                 .branch_range((7, 32))
3052                 .emit(
3053                     r#"
3054                     // test8 r, r.
3055                     {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
3056                     modrm_rr(in_reg0, in_reg0, sink);
3057                     // Jcc instruction.
3058                     sink.put1(0x0f);
3059                     sink.put1(bits as u8);
3060                     disp4(destination, func, sink);
3061                 "#,
3062                 ),
3063             regs,
3064         )
3065         .when_prefixed(t8jccd),
3066     );
3067 
3068     // Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode.
3069     // The register allocator can't handle a branch instruction with constrained
3070     // operands like the t8jccd_abcd above. This variant can accept the b1 opernd in
3071     // any register, but is is larger because it uses a 32-bit test instruction with
3072     // a 0xff immediate.
3073 
3074     recipes.add_template_recipe(
3075         EncodingRecipeBuilder::new("t8jccd_long", &formats.branch, 5 + 6)
3076             .operands_in(vec![gpr])
3077             .branch_range((11, 32))
3078             .emit(
3079                 r#"
3080                     // test32 r, 0xff.
3081                     {{PUT_OP}}((bits & 0xff00) | 0xf7, rex1(in_reg0), sink);
3082                     modrm_r_bits(in_reg0, bits, sink);
3083                     sink.put4(0xff);
3084                     // Jcc instruction.
3085                     sink.put1(0x0f);
3086                     sink.put1(bits as u8);
3087                     disp4(destination, func, sink);
3088                 "#,
3089             ),
3090     );
3091 
3092     // Comparison that produces a `b1` result in a GPR.
3093     //
3094     // This is a macro of a `cmp` instruction followed by a `setCC` instruction.
3095     //
3096     // TODO This is not a great solution because:
3097     //
3098     // - The cmp+setcc combination is not recognized by CPU's macro fusion.
3099     // - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC`
3100     //   instructions may need a REX independently.
3101     // - Modeling CPU flags in the type system would be better.
3102     //
3103     // Since the `setCC` instructions only write an 8-bit register, we use that as
3104     // our `b1` representation: A `b1` value is represented as a GPR where the low 8
3105     // bits are known to be 0 or 1. The high bits are undefined.
3106     //
3107     // This bandaid macro doesn't support a REX prefix for the final `setCC`
3108     // instruction, so it is limited to the `ABCD` register class for booleans.
3109     // The omission of a `when_prefixed` alternative is deliberate here.
3110 
3111     recipes.add_template(
3112         Template::new(
3113             EncodingRecipeBuilder::new("icscc", &formats.int_compare, 1 + 3)
3114                 .operands_in(vec![gpr, gpr])
3115                 .operands_out(vec![abcd])
3116                 .emit(
3117                     r#"
3118                         // Comparison instruction.
3119                         {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
3120                         modrm_rr(in_reg0, in_reg1, sink);
3121                         // `setCC` instruction, no REX.
3122                         let setcc = 0x90 | icc2opc(cond);
3123                         sink.put1(0x0f);
3124                         sink.put1(setcc as u8);
3125                         modrm_rr(out_reg0, 0, sink);
3126                     "#,
3127                 ),
3128             regs,
3129         )
3130         .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
3131     );
3132 
3133     recipes.add_template_inferred(
3134         EncodingRecipeBuilder::new("icscc_fpr", &formats.int_compare, 1)
3135             .operands_in(vec![fpr, fpr])
3136             .operands_out(vec![0])
3137             .emit(
3138                 r#"
3139                     // Comparison instruction.
3140                     {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
3141                     modrm_rr(in_reg1, in_reg0, sink);
3142                 "#,
3143             ),
3144         "size_with_inferred_rex_for_inreg0_inreg1",
3145     );
3146 
3147     {
3148         let is_small_imm =
3149             InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 8, 0);
3150 
3151         recipes.add_template(
3152             Template::new(
3153                 EncodingRecipeBuilder::new("icscc_ib", &formats.int_compare_imm, 2 + 3)
3154                     .operands_in(vec![gpr])
3155                     .operands_out(vec![abcd])
3156                     .inst_predicate(is_small_imm)
3157                     .emit(
3158                         r#"
3159                             // Comparison instruction.
3160                             {{PUT_OP}}(bits, rex1(in_reg0), sink);
3161                             modrm_r_bits(in_reg0, bits, sink);
3162                             let imm: i64 = imm.into();
3163                             sink.put1(imm as u8);
3164                             // `setCC` instruction, no REX.
3165                             let setcc = 0x90 | icc2opc(cond);
3166                             sink.put1(0x0f);
3167                             sink.put1(setcc as u8);
3168                             modrm_rr(out_reg0, 0, sink);
3169                         "#,
3170                     ),
3171                 regs,
3172             )
3173             .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
3174         );
3175 
3176         let is_big_imm =
3177             InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 32, 0);
3178 
3179         recipes.add_template(
3180             Template::new(
3181                 EncodingRecipeBuilder::new("icscc_id", &formats.int_compare_imm, 5 + 3)
3182                     .operands_in(vec![gpr])
3183                     .operands_out(vec![abcd])
3184                     .inst_predicate(is_big_imm)
3185                     .emit(
3186                         r#"
3187                             // Comparison instruction.
3188                             {{PUT_OP}}(bits, rex1(in_reg0), sink);
3189                             modrm_r_bits(in_reg0, bits, sink);
3190                             let imm: i64 = imm.into();
3191                             sink.put4(imm as u32);
3192                             // `setCC` instruction, no REX.
3193                             let setcc = 0x90 | icc2opc(cond);
3194                             sink.put1(0x0f);
3195                             sink.put1(setcc as u8);
3196                             modrm_rr(out_reg0, 0, sink);
3197                         "#,
3198                     ),
3199                 regs,
3200             )
3201             .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
3202         );
3203     }
3204 
3205     // Make a FloatCompare instruction predicate with the supported condition codes.
3206     //
3207     // Same thing for floating point.
3208     //
3209     // The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
3210     //
3211     //    ZPC OSA
3212     // UN 111 000
3213     // GT 000 000
3214     // LT 001 000
3215     // EQ 100 000
3216     //
3217     // Not all floating point condition codes are supported.
3218     // The omission of a `when_prefixed` alternative is deliberate here.
3219 
3220     recipes.add_template_recipe(
3221         EncodingRecipeBuilder::new("fcscc", &formats.float_compare, 1 + 3)
3222             .operands_in(vec![fpr, fpr])
3223             .operands_out(vec![abcd])
3224             .inst_predicate(supported_floatccs_predicate(
3225                 &supported_floatccs,
3226                 &*formats.float_compare,
3227             ))
3228             .emit(
3229                 r#"
3230                     // Comparison instruction.
3231                     {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
3232                     modrm_rr(in_reg1, in_reg0, sink);
3233                     // `setCC` instruction, no REX.
3234                     use crate::ir::condcodes::FloatCC::*;
3235                     let setcc = match cond {
3236                         Ordered                    => 0x9b, // EQ|LT|GT => setnp (P=0)
3237                         Unordered                  => 0x9a, // UN       => setp  (P=1)
3238                         OrderedNotEqual            => 0x95, // LT|GT    => setne (Z=0),
3239                         UnorderedOrEqual           => 0x94, // UN|EQ    => sete  (Z=1)
3240                         GreaterThan                => 0x97, // GT       => seta  (C=0&Z=0)
3241                         GreaterThanOrEqual         => 0x93, // GT|EQ    => setae (C=0)
3242                         UnorderedOrLessThan        => 0x92, // UN|LT    => setb  (C=1)
3243                         UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1)
3244                         Equal |                       // EQ
3245                         NotEqual |                    // UN|LT|GT
3246                         LessThan |                    // LT
3247                         LessThanOrEqual |             // LT|EQ
3248                         UnorderedOrGreaterThan |      // UN|GT
3249                         UnorderedOrGreaterThanOrEqual // UN|GT|EQ
3250                         => panic!("{} not supported by fcscc", cond),
3251                     };
3252                     sink.put1(0x0f);
3253                     sink.put1(setcc);
3254                     modrm_rr(out_reg0, 0, sink);
3255                 "#,
3256             ),
3257     );
3258 
3259     {
3260         let supported_floatccs: Vec<Literal> = ["eq", "lt", "le", "uno", "ne", "uge", "ugt", "ord"]
3261             .iter()
3262             .map(|name| Literal::enumerator_for(floatcc, name))
3263             .collect();
3264         recipes.add_template_inferred(
3265             EncodingRecipeBuilder::new("pfcmp", &formats.float_compare, 2)
3266                 .operands_in(vec![fpr, fpr])
3267                 .operands_out(vec![0])
3268                 .inst_predicate(supported_floatccs_predicate(
3269                     &supported_floatccs[..],
3270                     &*formats.float_compare,
3271                 ))
3272                 .emit(
3273                     r#"
3274                     // Comparison instruction.
3275                     {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
3276                     modrm_rr(in_reg1, in_reg0, sink);
3277                     // Add immediate byte indicating what type of comparison.
3278                     use crate::ir::condcodes::FloatCC::*;
3279                     let imm = match cond {
3280                         Equal                      => 0x00,
3281                         LessThan                   => 0x01,
3282                         LessThanOrEqual            => 0x02,
3283                         Unordered                  => 0x03,
3284                         NotEqual                   => 0x04,
3285                         UnorderedOrGreaterThanOrEqual => 0x05,
3286                         UnorderedOrGreaterThan => 0x06,
3287                         Ordered                    => 0x07,
3288                         _ => panic!("{} not supported by pfcmp", cond),
3289                     };
3290                     sink.put1(imm);
3291                 "#,
3292                 ),
3293             "size_with_inferred_rex_for_inreg0_inreg1",
3294         );
3295     }
3296 
3297     recipes.add_template_recipe(
3298         EncodingRecipeBuilder::new("is_zero", &formats.unary, 2 + 2)
3299             .operands_in(vec![gpr])
3300             .operands_out(vec![abcd])
3301             .emit(
3302                 r#"
3303                     // Test instruction.
3304                     {{PUT_OP}}(bits, rex2(in_reg0, in_reg0), sink);
3305                     modrm_rr(in_reg0, in_reg0, sink);
3306                     // Check ZF = 1 flag to see if register holds 0.
3307                     sink.put1(0x0f);
3308                     sink.put1(0x94);
3309                     modrm_rr(out_reg0, 0, sink);
3310                 "#,
3311             ),
3312     );
3313 
3314     recipes.add_template_recipe(
3315         EncodingRecipeBuilder::new("is_invalid", &formats.unary, 2 + 3)
3316             .operands_in(vec![gpr])
3317             .operands_out(vec![abcd])
3318             .emit(
3319                 r#"
3320                     // Comparison instruction.
3321                     {{PUT_OP}}(bits, rex1(in_reg0), sink);
3322                     modrm_r_bits(in_reg0, bits, sink);
3323                     sink.put1(0xff);
3324                     // `setCC` instruction, no REX.
3325                     use crate::ir::condcodes::IntCC::*;
3326                     let setcc = 0x90 | icc2opc(Equal);
3327                     sink.put1(0x0f);
3328                     sink.put1(setcc as u8);
3329                     modrm_rr(out_reg0, 0, sink);
3330                 "#,
3331             ),
3332     );
3333 
3334     recipes.add_recipe(
3335         EncodingRecipeBuilder::new("safepoint", &formats.multiary, 0).emit(
3336             r#"
3337                 sink.add_stack_map(args, func, isa);
3338             "#,
3339         ),
3340     );
3341 
3342     // Both `elf_tls_get_addr` and `macho_tls_get_addr` require all caller-saved registers to be spilled.
3343     // This is currently special cased in `regalloc/spilling.rs` in the `visit_inst` function.
3344 
3345     recipes.add_recipe(
3346         EncodingRecipeBuilder::new("elf_tls_get_addr", &formats.unary_global_value, 16)
3347             // FIXME Correct encoding for non rax registers
3348             .operands_out(vec![reg_rax])
3349             .emit(
3350                 r#"
3351                     // output %rax
3352                     // clobbers %rdi
3353 
3354                     // Those data16 prefixes are necessary to pad to 16 bytes.
3355 
3356                     // data16 lea gv@tlsgd(%rip),%rdi
3357                     sink.put1(0x66); // data16
3358                     sink.put1(0b01001000); // rex.w
3359                     const LEA: u8 = 0x8d;
3360                     sink.put1(LEA); // lea
3361                     modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d
3362                     sink.reloc_external(func.srclocs[inst],
3363                                         Reloc::ElfX86_64TlsGd,
3364                                         &func.global_values[global_value].symbol_name(),
3365                                         -4);
3366                     sink.put4(0);
3367 
3368                     // data16 data16 callq __tls_get_addr-4
3369                     sink.put1(0x66); // data16
3370                     sink.put1(0x66); // data16
3371                     sink.put1(0b01001000); // rex.w
3372                     sink.put1(0xe8); // call
3373                     sink.reloc_external(func.srclocs[inst],
3374                                         Reloc::X86CallPLTRel4,
3375                                         &ExternalName::LibCall(LibCall::ElfTlsGetAddr),
3376                                         -4);
3377                     sink.put4(0);
3378                 "#,
3379             ),
3380     );
3381 
3382     recipes.add_recipe(
3383         EncodingRecipeBuilder::new("macho_tls_get_addr", &formats.unary_global_value, 9)
3384             // FIXME Correct encoding for non rax registers
3385             .operands_out(vec![reg_rax])
3386             .emit(
3387                 r#"
3388                     // output %rax
3389                     // clobbers %rdi
3390 
3391                     // movq gv@tlv(%rip), %rdi
3392                     sink.put1(0x48); // rex
3393                     sink.put1(0x8b); // mov
3394                     modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d
3395                     sink.reloc_external(func.srclocs[inst],
3396                                         Reloc::MachOX86_64Tlv,
3397                                         &func.global_values[global_value].symbol_name(),
3398                                         -4);
3399                     sink.put4(0);
3400 
3401                     // callq *(%rdi)
3402                     sink.put1(0xff);
3403                     sink.put1(0x17);
3404                 "#,
3405             ),
3406     );
3407 
3408     recipes.add_template(
3409         Template::new(
3410         EncodingRecipeBuilder::new("evex_reg_vvvv_rm_128", &formats.binary, 1)
3411             .operands_in(vec![fpr, fpr])
3412             .operands_out(vec![fpr])
3413             .emit(
3414                 r#"
3415                 // instruction encoding operands: reg (op1, w), vvvv (op2, r), rm (op3, r)
3416                 // this maps to:                  out_reg0,     in_reg0,       in_reg1
3417                 let context = EvexContext::Other { length: EvexVectorLength::V128 };
3418                 let masking = EvexMasking::None;
3419                 put_evex(bits, out_reg0, in_reg0, in_reg1, context, masking, sink); // params: reg, vvvv, rm
3420                 modrm_rr(in_reg1, out_reg0, sink); // params: rm, reg
3421                 "#,
3422             ),
3423         regs).rex_kind(RecipePrefixKind::Evex)
3424     );
3425 
3426     recipes.add_template(
3427         Template::new(
3428             EncodingRecipeBuilder::new("evex_reg_rm_128", &formats.unary, 1)
3429                 .operands_in(vec![fpr])
3430                 .operands_out(vec![fpr])
3431                 .emit(
3432                     r#"
3433                 // instruction encoding operands: reg (op1, w), rm (op2, r)
3434                 // this maps to:                  out_reg0,     in_reg0
3435                 let context = EvexContext::Other { length: EvexVectorLength::V128 };
3436                 let masking = EvexMasking::None;
3437                 put_evex(bits, out_reg0, 0, in_reg0, context, masking, sink); // params: reg, vvvv, rm
3438                 modrm_rr(in_reg0, out_reg0, sink); // params: rm, reg
3439                 "#,
3440                 ),
3441             regs).rex_kind(RecipePrefixKind::Evex)
3442     );
3443 
3444     recipes
3445 }
3446