1 #![allow(non_snake_case)]
2
3 use cranelift_codegen_shared::condcodes::IntCC;
4 use std::collections::HashMap;
5
6 use crate::cdsl::encodings::{Encoding, EncodingBuilder};
7 use crate::cdsl::instructions::{
8 vector, Bindable, Immediate, InstSpec, Instruction, InstructionGroup, InstructionPredicate,
9 InstructionPredicateNode, InstructionPredicateRegistry,
10 };
11 use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
12 use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
13 use crate::cdsl::types::{LaneType, ValueType};
14 use crate::shared::types::Bool::{B1, B16, B32, B64, B8};
15 use crate::shared::types::Float::{F32, F64};
16 use crate::shared::types::Int::{I16, I32, I64, I8};
17 use crate::shared::types::Reference::{R32, R64};
18 use crate::shared::Definitions as SharedDefinitions;
19
20 use crate::isa::x86::opcodes::*;
21
22 use super::recipes::{RecipeGroup, Template};
23 use crate::cdsl::instructions::BindParameter::Any;
24
25 pub(crate) struct PerCpuModeEncodings {
26 pub enc32: Vec<Encoding>,
27 pub enc64: Vec<Encoding>,
28 pub recipes: Recipes,
29 recipes_by_name: HashMap<String, EncodingRecipeNumber>,
30 pub inst_pred_reg: InstructionPredicateRegistry,
31 }
32
33 impl PerCpuModeEncodings {
new() -> Self34 fn new() -> Self {
35 Self {
36 enc32: Vec::new(),
37 enc64: Vec::new(),
38 recipes: Recipes::new(),
39 recipes_by_name: HashMap::new(),
40 inst_pred_reg: InstructionPredicateRegistry::new(),
41 }
42 }
43
add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber44 fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber {
45 if let Some(found_index) = self.recipes_by_name.get(&recipe.name) {
46 assert!(
47 self.recipes[*found_index] == recipe,
48 format!(
49 "trying to insert different recipes with a same name ({})",
50 recipe.name
51 )
52 );
53 *found_index
54 } else {
55 let recipe_name = recipe.name.clone();
56 let index = self.recipes.push(recipe);
57 self.recipes_by_name.insert(recipe_name, index);
58 index
59 }
60 }
61
make_encoding<T>( &mut self, inst: InstSpec, template: Template, builder_closure: T, ) -> Encoding where T: FnOnce(EncodingBuilder) -> EncodingBuilder,62 fn make_encoding<T>(
63 &mut self,
64 inst: InstSpec,
65 template: Template,
66 builder_closure: T,
67 ) -> Encoding
68 where
69 T: FnOnce(EncodingBuilder) -> EncodingBuilder,
70 {
71 let (recipe, bits) = template.build();
72 let recipe_number = self.add_recipe(recipe);
73 let builder = EncodingBuilder::new(inst, recipe_number, bits);
74 builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg)
75 }
76
enc32_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T) where T: FnOnce(EncodingBuilder) -> EncodingBuilder,77 fn enc32_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
78 where
79 T: FnOnce(EncodingBuilder) -> EncodingBuilder,
80 {
81 let encoding = self.make_encoding(inst.into(), template, builder_closure);
82 self.enc32.push(encoding);
83 }
enc32(&mut self, inst: impl Into<InstSpec>, template: Template)84 fn enc32(&mut self, inst: impl Into<InstSpec>, template: Template) {
85 self.enc32_func(inst, template, |x| x);
86 }
enc32_isap( &mut self, inst: impl Into<InstSpec>, template: Template, isap: SettingPredicateNumber, )87 fn enc32_isap(
88 &mut self,
89 inst: impl Into<InstSpec>,
90 template: Template,
91 isap: SettingPredicateNumber,
92 ) {
93 self.enc32_func(inst, template, |encoding| encoding.isa_predicate(isap));
94 }
enc32_instp( &mut self, inst: impl Into<InstSpec>, template: Template, instp: InstructionPredicateNode, )95 fn enc32_instp(
96 &mut self,
97 inst: impl Into<InstSpec>,
98 template: Template,
99 instp: InstructionPredicateNode,
100 ) {
101 self.enc32_func(inst, template, |encoding| encoding.inst_predicate(instp));
102 }
enc32_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16)103 fn enc32_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
104 let recipe_number = self.add_recipe(recipe.clone());
105 let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
106 let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
107 self.enc32.push(encoding);
108 }
109
enc64_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T) where T: FnOnce(EncodingBuilder) -> EncodingBuilder,110 fn enc64_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
111 where
112 T: FnOnce(EncodingBuilder) -> EncodingBuilder,
113 {
114 let encoding = self.make_encoding(inst.into(), template, builder_closure);
115 self.enc64.push(encoding);
116 }
enc64(&mut self, inst: impl Into<InstSpec>, template: Template)117 fn enc64(&mut self, inst: impl Into<InstSpec>, template: Template) {
118 self.enc64_func(inst, template, |x| x);
119 }
enc64_isap( &mut self, inst: impl Into<InstSpec>, template: Template, isap: SettingPredicateNumber, )120 fn enc64_isap(
121 &mut self,
122 inst: impl Into<InstSpec>,
123 template: Template,
124 isap: SettingPredicateNumber,
125 ) {
126 self.enc64_func(inst, template, |encoding| encoding.isa_predicate(isap));
127 }
enc64_instp( &mut self, inst: impl Into<InstSpec>, template: Template, instp: InstructionPredicateNode, )128 fn enc64_instp(
129 &mut self,
130 inst: impl Into<InstSpec>,
131 template: Template,
132 instp: InstructionPredicateNode,
133 ) {
134 self.enc64_func(inst, template, |encoding| encoding.inst_predicate(instp));
135 }
enc64_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16)136 fn enc64_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
137 let recipe_number = self.add_recipe(recipe.clone());
138 let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
139 let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
140 self.enc64.push(encoding);
141 }
142
143 /// Adds I32/I64 encodings as appropriate for a typed instruction.
144 /// The REX prefix is always inferred at runtime.
145 ///
146 /// Add encodings for `inst.i32` to X86_32.
147 /// Add encodings for `inst.i32` to X86_64 with optional, inferred REX.
148 /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
enc_i32_i64(&mut self, inst: impl Into<InstSpec>, template: Template)149 fn enc_i32_i64(&mut self, inst: impl Into<InstSpec>, template: Template) {
150 let inst: InstSpec = inst.into();
151
152 // I32 on x86: no REX prefix.
153 self.enc32(inst.bind(I32), template.infer_rex());
154
155 // I32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers.
156 self.enc64(inst.bind(I32), template.infer_rex());
157
158 // I64 on x86_64: REX.W set; REX.RXB determined at runtime from registers.
159 self.enc64(inst.bind(I64), template.rex().w());
160 }
161
162 /// Adds I32/I64 encodings as appropriate for a typed instruction.
163 /// All variants of REX prefix are explicitly emitted, not inferred.
164 ///
165 /// Add encodings for `inst.i32` to X86_32.
166 /// Add encodings for `inst.i32` to X86_64 with and without REX.
167 /// Add encodings for `inst.i64` to X86_64 with and without REX.
enc_i32_i64_explicit_rex(&mut self, inst: impl Into<InstSpec>, template: Template)168 fn enc_i32_i64_explicit_rex(&mut self, inst: impl Into<InstSpec>, template: Template) {
169 let inst: InstSpec = inst.into();
170 self.enc32(inst.bind(I32), template.nonrex());
171
172 // REX-less encoding must come after REX encoding so we don't use it by default.
173 // Otherwise reg-alloc would never use r8 and up.
174 self.enc64(inst.bind(I32), template.rex());
175 self.enc64(inst.bind(I32), template.nonrex());
176 self.enc64(inst.bind(I64), template.rex().w());
177 }
178
179 /// Adds B32/B64 encodings as appropriate for a typed instruction.
180 /// The REX prefix is always inferred at runtime.
181 ///
182 /// Adds encoding for `inst.b32` to X86_32.
183 /// Adds encoding for `inst.b32` to X86_64 with optional, inferred REX.
184 /// Adds encoding for `inst.b64` to X86_64 with a REX.W prefix.
enc_b32_b64(&mut self, inst: impl Into<InstSpec>, template: Template)185 fn enc_b32_b64(&mut self, inst: impl Into<InstSpec>, template: Template) {
186 let inst: InstSpec = inst.into();
187
188 // B32 on x86: no REX prefix.
189 self.enc32(inst.bind(B32), template.infer_rex());
190
191 // B32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers.
192 self.enc64(inst.bind(B32), template.infer_rex());
193
194 // B64 on x86_64: REX.W set; REX.RXB determined at runtime from registers.
195 self.enc64(inst.bind(B64), template.rex().w());
196 }
197
198 /// Add encodings for `inst.i32` to X86_32.
199 /// Add encodings for `inst.i32` to X86_64 with a REX prefix.
200 /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
enc_i32_i64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template)201 fn enc_i32_i64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
202 let inst: InstSpec = inst.into();
203 self.enc32(inst.bind(I32), template.nonrex());
204 self.enc64(inst.bind(I32), template.rex());
205 self.enc64(inst.bind(I64), template.rex().w());
206 }
207
208 /// Add encodings for `inst.i32` to X86_32.
209 /// Add encodings for `inst.i32` to X86_64 with and without REX.
210 /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
enc_i32_i64_instp( &mut self, inst: &Instruction, template: Template, instp: InstructionPredicateNode, )211 fn enc_i32_i64_instp(
212 &mut self,
213 inst: &Instruction,
214 template: Template,
215 instp: InstructionPredicateNode,
216 ) {
217 self.enc32_func(inst.bind(I32), template.nonrex(), |builder| {
218 builder.inst_predicate(instp.clone())
219 });
220
221 // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
222 // reg-alloc would never use r8 and up.
223 self.enc64_func(inst.bind(I32), template.rex(), |builder| {
224 builder.inst_predicate(instp.clone())
225 });
226 self.enc64_func(inst.bind(I32), template.nonrex(), |builder| {
227 builder.inst_predicate(instp.clone())
228 });
229 self.enc64_func(inst.bind(I64), template.rex().w(), |builder| {
230 builder.inst_predicate(instp)
231 });
232 }
233
234 /// Add encodings for `inst.r32` to X86_32.
235 /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix.
enc_r32_r64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template)236 fn enc_r32_r64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
237 let inst: InstSpec = inst.into();
238 self.enc32(inst.bind(R32), template.nonrex());
239 self.enc64(inst.bind(R64), template.rex().w());
240 }
241
enc_r32_r64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template)242 fn enc_r32_r64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) {
243 self.enc32(inst.clone().bind(R32).bind(Any), template.clone());
244
245 // REX-less encoding must come after REX encoding so we don't use it by
246 // default. Otherwise reg-alloc would never use r8 and up.
247 self.enc64(inst.clone().bind(R32).bind(Any), template.clone().rex());
248 self.enc64(inst.clone().bind(R32).bind(Any), template.clone());
249
250 if w_bit {
251 self.enc64(inst.clone().bind(R64).bind(Any), template.rex().w());
252 } else {
253 self.enc64(inst.clone().bind(R64).bind(Any), template.clone().rex());
254 self.enc64(inst.clone().bind(R64).bind(Any), template);
255 }
256 }
257
258 /// Add encodings for `inst` to X86_64 with and without a REX prefix.
enc_x86_64(&mut self, inst: impl Into<InstSpec> + Clone, template: Template)259 fn enc_x86_64(&mut self, inst: impl Into<InstSpec> + Clone, template: Template) {
260 // See above comment about the ordering of rex vs non-rex encodings.
261 self.enc64(inst.clone(), template.rex());
262 self.enc64(inst, template);
263 }
264
265 /// Add encodings for `inst` to X86_64 with and without a REX prefix.
enc_x86_64_instp( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, instp: InstructionPredicateNode, )266 fn enc_x86_64_instp(
267 &mut self,
268 inst: impl Clone + Into<InstSpec>,
269 template: Template,
270 instp: InstructionPredicateNode,
271 ) {
272 // See above comment about the ordering of rex vs non-rex encodings.
273 self.enc64_func(inst.clone(), template.rex(), |builder| {
274 builder.inst_predicate(instp.clone())
275 });
276 self.enc64_func(inst, template, |builder| builder.inst_predicate(instp));
277 }
enc_x86_64_isap( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, isap: SettingPredicateNumber, )278 fn enc_x86_64_isap(
279 &mut self,
280 inst: impl Clone + Into<InstSpec>,
281 template: Template,
282 isap: SettingPredicateNumber,
283 ) {
284 // See above comment about the ordering of rex vs non-rex encodings.
285 self.enc64_isap(inst.clone(), template.rex(), isap);
286 self.enc64_isap(inst, template, isap);
287 }
288
289 /// Add all three encodings for `inst`:
290 /// - X86_32
291 /// - X86_64 with and without the REX prefix.
enc_both(&mut self, inst: impl Clone + Into<InstSpec>, template: Template)292 fn enc_both(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
293 self.enc32(inst.clone(), template.clone());
294 self.enc_x86_64(inst, template);
295 }
enc_both_isap( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, isap: SettingPredicateNumber, )296 fn enc_both_isap(
297 &mut self,
298 inst: impl Clone + Into<InstSpec>,
299 template: Template,
300 isap: SettingPredicateNumber,
301 ) {
302 self.enc32_isap(inst.clone(), template.clone(), isap);
303 self.enc_x86_64_isap(inst, template, isap);
304 }
enc_both_instp( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, instp: InstructionPredicateNode, )305 fn enc_both_instp(
306 &mut self,
307 inst: impl Clone + Into<InstSpec>,
308 template: Template,
309 instp: InstructionPredicateNode,
310 ) {
311 self.enc32_instp(inst.clone(), template.clone(), instp.clone());
312 self.enc_x86_64_instp(inst, template, instp);
313 }
314
315 /// Add two encodings for `inst`:
316 /// - X86_32, no REX prefix, since this is not valid in 32-bit mode.
317 /// - X86_64, dynamically infer the REX prefix.
enc_both_inferred(&mut self, inst: impl Clone + Into<InstSpec>, template: Template)318 fn enc_both_inferred(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
319 self.enc32(inst.clone(), template.clone());
320 self.enc64(inst, template.infer_rex());
321 }
enc_both_inferred_maybe_isap( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, isap: Option<SettingPredicateNumber>, )322 fn enc_both_inferred_maybe_isap(
323 &mut self,
324 inst: impl Clone + Into<InstSpec>,
325 template: Template,
326 isap: Option<SettingPredicateNumber>,
327 ) {
328 self.enc32_maybe_isap(inst.clone(), template.clone(), isap);
329 self.enc64_maybe_isap(inst, template.infer_rex(), isap);
330 }
331
332 /// Add two encodings for `inst`:
333 /// - X86_32
334 /// - X86_64 with the REX prefix.
enc_both_rex_only(&mut self, inst: impl Clone + Into<InstSpec>, template: Template)335 fn enc_both_rex_only(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
336 self.enc32(inst.clone(), template.clone());
337 self.enc64(inst, template.rex());
338 }
339
340 /// Add encodings for `inst.i32` to X86_32.
341 /// Add encodings for `inst.i32` to X86_64 with and without REX.
342 /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
343 /// argument to determine whether or not to set the REX.W bit.
enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template)344 fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) {
345 self.enc32(inst.clone().bind(I32).bind(Any), template.clone());
346
347 // REX-less encoding must come after REX encoding so we don't use it by
348 // default. Otherwise reg-alloc would never use r8 and up.
349 self.enc64(inst.clone().bind(I32).bind(Any), template.clone().rex());
350 self.enc64(inst.clone().bind(I32).bind(Any), template.clone());
351
352 if w_bit {
353 self.enc64(inst.clone().bind(I64).bind(Any), template.rex().w());
354 } else {
355 self.enc64(inst.clone().bind(I64).bind(Any), template.clone().rex());
356 self.enc64(inst.clone().bind(I64).bind(Any), template);
357 }
358 }
359
360 /// Add the same encoding/recipe pairing to both X86_32 and X86_64
enc_32_64_rec( &mut self, inst: impl Clone + Into<InstSpec>, recipe: &EncodingRecipe, bits: u16, )361 fn enc_32_64_rec(
362 &mut self,
363 inst: impl Clone + Into<InstSpec>,
364 recipe: &EncodingRecipe,
365 bits: u16,
366 ) {
367 self.enc32_rec(inst.clone(), recipe, bits);
368 self.enc64_rec(inst, recipe, bits);
369 }
370
371 /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened
enc_32_64_func<T>( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, builder_closure: T, ) where T: FnOnce(EncodingBuilder) -> EncodingBuilder,372 fn enc_32_64_func<T>(
373 &mut self,
374 inst: impl Clone + Into<InstSpec>,
375 template: Template,
376 builder_closure: T,
377 ) where
378 T: FnOnce(EncodingBuilder) -> EncodingBuilder,
379 {
380 let encoding = self.make_encoding(inst.into(), template, builder_closure);
381 self.enc32.push(encoding.clone());
382 self.enc64.push(encoding);
383 }
384
385 /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand
386 /// binding) has already happened.
enc_32_64_maybe_isap( &mut self, inst: impl Clone + Into<InstSpec>, template: Template, isap: Option<SettingPredicateNumber>, )387 fn enc_32_64_maybe_isap(
388 &mut self,
389 inst: impl Clone + Into<InstSpec>,
390 template: Template,
391 isap: Option<SettingPredicateNumber>,
392 ) {
393 self.enc32_maybe_isap(inst.clone(), template.clone(), isap);
394 self.enc64_maybe_isap(inst, template, isap);
395 }
396
enc32_maybe_isap( &mut self, inst: impl Into<InstSpec>, template: Template, isap: Option<SettingPredicateNumber>, )397 fn enc32_maybe_isap(
398 &mut self,
399 inst: impl Into<InstSpec>,
400 template: Template,
401 isap: Option<SettingPredicateNumber>,
402 ) {
403 match isap {
404 None => self.enc32(inst, template),
405 Some(isap) => self.enc32_isap(inst, template, isap),
406 }
407 }
408
enc64_maybe_isap( &mut self, inst: impl Into<InstSpec>, template: Template, isap: Option<SettingPredicateNumber>, )409 fn enc64_maybe_isap(
410 &mut self,
411 inst: impl Into<InstSpec>,
412 template: Template,
413 isap: Option<SettingPredicateNumber>,
414 ) {
415 match isap {
416 None => self.enc64(inst, template),
417 Some(isap) => self.enc64_isap(inst, template, isap),
418 }
419 }
420 }
421
422 // Definitions.
423
424 #[inline(never)]
define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup)425 fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
426 let shared = &shared_defs.instructions;
427 let formats = &shared_defs.formats;
428
429 // Shorthands for instructions.
430 let bconst = shared.by_name("bconst");
431 let bint = shared.by_name("bint");
432 let copy = shared.by_name("copy");
433 let copy_special = shared.by_name("copy_special");
434 let copy_to_ssa = shared.by_name("copy_to_ssa");
435 let get_pinned_reg = shared.by_name("get_pinned_reg");
436 let iconst = shared.by_name("iconst");
437 let ireduce = shared.by_name("ireduce");
438 let regmove = shared.by_name("regmove");
439 let sextend = shared.by_name("sextend");
440 let set_pinned_reg = shared.by_name("set_pinned_reg");
441 let uextend = shared.by_name("uextend");
442
443 // Shorthands for recipes.
444 let rec_copysp = r.template("copysp");
445 let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa");
446 let rec_get_pinned_reg = r.recipe("get_pinned_reg");
447 let rec_null = r.recipe("null");
448 let rec_pu_id = r.template("pu_id");
449 let rec_pu_id_bool = r.template("pu_id_bool");
450 let rec_pu_iq = r.template("pu_iq");
451 let rec_rmov = r.template("rmov");
452 let rec_set_pinned_reg = r.template("set_pinned_reg");
453 let rec_u_id = r.template("u_id");
454 let rec_u_id_z = r.template("u_id_z");
455 let rec_umr = r.template("umr");
456 let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa");
457 let rec_urm_noflags = r.template("urm_noflags");
458 let rec_urm_noflags_abcd = r.template("urm_noflags_abcd");
459
460 // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing!
461 e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0);
462 e.enc_x86_64(
463 set_pinned_reg.bind(I64),
464 rec_set_pinned_reg.opcodes(&MOV_STORE).rex().w(),
465 );
466
467 e.enc_i32_i64(copy, rec_umr.opcodes(&MOV_STORE));
468 e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(&MOV_STORE));
469 e.enc_both(copy.bind(B1), rec_umr.opcodes(&MOV_STORE));
470 e.enc_both(copy.bind(I8), rec_umr.opcodes(&MOV_STORE));
471 e.enc_both(copy.bind(I16), rec_umr.opcodes(&MOV_STORE));
472
473 // TODO For x86-64, only define REX forms for now, since we can't describe the
474 // special regunit immediate operands with the current constraint language.
475 for &ty in &[I8, I16, I32] {
476 e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE));
477 e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex());
478 }
479 for &ty in &[B8, B16, B32] {
480 e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE));
481 e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex());
482 }
483 e.enc64(regmove.bind(I64), rec_rmov.opcodes(&MOV_STORE).rex().w());
484 e.enc_both(regmove.bind(B1), rec_rmov.opcodes(&MOV_STORE));
485 e.enc_both(regmove.bind(I8), rec_rmov.opcodes(&MOV_STORE));
486 e.enc32(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE));
487 e.enc64(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE).rex());
488 e.enc64(regmove.bind(R64), rec_rmov.opcodes(&MOV_STORE).rex().w());
489
490 // Immediate constants.
491 e.enc32(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM));
492
493 e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(&MOV_IMM));
494 e.enc64(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM));
495
496 // The 32-bit immediate movl also zero-extends to 64 bits.
497 let is_unsigned_int32 =
498 InstructionPredicate::new_is_unsigned_int(&*formats.unary_imm, "imm", 32, 0);
499
500 e.enc64_func(
501 iconst.bind(I64),
502 rec_pu_id.opcodes(&MOV_IMM).rex(),
503 |encoding| encoding.inst_predicate(is_unsigned_int32.clone()),
504 );
505 e.enc64_func(iconst.bind(I64), rec_pu_id.opcodes(&MOV_IMM), |encoding| {
506 encoding.inst_predicate(is_unsigned_int32)
507 });
508
509 // Sign-extended 32-bit immediate.
510 e.enc64(
511 iconst.bind(I64),
512 rec_u_id.rex().opcodes(&MOV_IMM_SIGNEXTEND).rrr(0).w(),
513 );
514
515 // Finally, the MOV_IMM opcode takes an 8-byte immediate with a REX.W prefix.
516 e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(&MOV_IMM).rex().w());
517
518 // Bool constants (uses MOV)
519 for &ty in &[B1, B8, B16, B32] {
520 e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(&MOV_IMM));
521 }
522 e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(&MOV_IMM).rex());
523
524 let is_zero_int = InstructionPredicate::new_is_zero_int(&formats.unary_imm, "imm");
525 e.enc_both_instp(
526 iconst.bind(I8),
527 rec_u_id_z.opcodes(&XORB),
528 is_zero_int.clone(),
529 );
530
531 // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that
532 // encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not
533 // know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these
534 // scenarios, so we explicitly select a wider but permissible opcode.
535 //
536 // This effectively formalizes the i16->i32 widening that Cranelift performs when there isn't
537 // an appropriate i16 encoding available.
538 e.enc_both_instp(
539 iconst.bind(I16),
540 rec_u_id_z.opcodes(&XOR),
541 is_zero_int.clone(),
542 );
543 e.enc_both_instp(
544 iconst.bind(I32),
545 rec_u_id_z.opcodes(&XOR),
546 is_zero_int.clone(),
547 );
548 e.enc_x86_64_instp(iconst.bind(I64), rec_u_id_z.opcodes(&XOR), is_zero_int);
549
550 // Numerical conversions.
551
552 // Reducing an integer is a no-op.
553 e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
554 e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
555 e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
556
557 e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
558 e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
559 e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
560 e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0);
561 e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0);
562 e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0);
563
564 // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
565 // instructions for %al/%ax/%eax to %ax/%eax/%rax.
566
567 // movsbl
568 e.enc32(
569 sextend.bind(I32).bind(I8),
570 rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE),
571 );
572 e.enc64(
573 sextend.bind(I32).bind(I8),
574 rec_urm_noflags.opcodes(&MOVSX_BYTE).rex(),
575 );
576 e.enc64(
577 sextend.bind(I32).bind(I8),
578 rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE),
579 );
580
581 // movswl
582 e.enc32(
583 sextend.bind(I32).bind(I16),
584 rec_urm_noflags.opcodes(&MOVSX_WORD),
585 );
586 e.enc64(
587 sextend.bind(I32).bind(I16),
588 rec_urm_noflags.opcodes(&MOVSX_WORD).rex(),
589 );
590 e.enc64(
591 sextend.bind(I32).bind(I16),
592 rec_urm_noflags.opcodes(&MOVSX_WORD),
593 );
594
595 // movsbq
596 e.enc64(
597 sextend.bind(I64).bind(I8),
598 rec_urm_noflags.opcodes(&MOVSX_BYTE).rex().w(),
599 );
600
601 // movswq
602 e.enc64(
603 sextend.bind(I64).bind(I16),
604 rec_urm_noflags.opcodes(&MOVSX_WORD).rex().w(),
605 );
606
607 // movslq
608 e.enc64(
609 sextend.bind(I64).bind(I32),
610 rec_urm_noflags.opcodes(&MOVSXD).rex().w(),
611 );
612
613 // movzbl
614 e.enc32(
615 uextend.bind(I32).bind(I8),
616 rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
617 );
618 e.enc64(
619 uextend.bind(I32).bind(I8),
620 rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
621 );
622 e.enc64(
623 uextend.bind(I32).bind(I8),
624 rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
625 );
626
627 // movzwl
628 e.enc32(
629 uextend.bind(I32).bind(I16),
630 rec_urm_noflags.opcodes(&MOVZX_WORD),
631 );
632 e.enc64(
633 uextend.bind(I32).bind(I16),
634 rec_urm_noflags.opcodes(&MOVZX_WORD).rex(),
635 );
636 e.enc64(
637 uextend.bind(I32).bind(I16),
638 rec_urm_noflags.opcodes(&MOVZX_WORD),
639 );
640
641 // movzbq, encoded as movzbl because it's equivalent and shorter.
642 e.enc64(
643 uextend.bind(I64).bind(I8),
644 rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
645 );
646 e.enc64(
647 uextend.bind(I64).bind(I8),
648 rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
649 );
650
651 // movzwq, encoded as movzwl because it's equivalent and shorter
652 e.enc64(
653 uextend.bind(I64).bind(I16),
654 rec_urm_noflags.opcodes(&MOVZX_WORD).rex(),
655 );
656 e.enc64(
657 uextend.bind(I64).bind(I16),
658 rec_urm_noflags.opcodes(&MOVZX_WORD),
659 );
660
661 // A 32-bit register copy clears the high 32 bits.
662 e.enc64(
663 uextend.bind(I64).bind(I32),
664 rec_umr.opcodes(&MOV_STORE).rex(),
665 );
666 e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(&MOV_STORE));
667
668 // Convert bool to int.
669 //
670 // This assumes that b1 is represented as an 8-bit low register with the value 0
671 // or 1.
672 //
673 // Encode movzbq as movzbl, because it's equivalent and shorter.
674 for &to in &[I8, I16, I32, I64] {
675 for &from in &[B1, B8] {
676 e.enc64(
677 bint.bind(to).bind(from),
678 rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
679 );
680 e.enc64(
681 bint.bind(to).bind(from),
682 rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
683 );
684 if to != I64 {
685 e.enc32(
686 bint.bind(to).bind(from),
687 rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
688 );
689 }
690 }
691 }
692
693 // Copy Special
694 // For x86-64, only define REX forms for now, since we can't describe the
695 // special regunit immediate operands with the current constraint language.
696 e.enc64(copy_special, rec_copysp.opcodes(&MOV_STORE).rex().w());
697 e.enc32(copy_special, rec_copysp.opcodes(&MOV_STORE));
698
699 // Copy to SSA. These have to be done with special _rex_only encoders, because the standard
700 // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account
701 // the source register, which is specified directly in the instruction.
702 e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
703 e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
704 e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
705 e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
706 e.enc_both_rex_only(
707 copy_to_ssa.bind(I16),
708 rec_umr_reg_to_ssa.opcodes(&MOV_STORE),
709 );
710 e.enc_both_rex_only(
711 copy_to_ssa.bind(F64),
712 rec_furm_reg_to_ssa.opcodes(&MOVSD_LOAD),
713 );
714 e.enc_both_rex_only(
715 copy_to_ssa.bind(F32),
716 rec_furm_reg_to_ssa.opcodes(&MOVSS_LOAD),
717 );
718 }
719
720 #[inline(never)]
define_memory( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, x86: &InstructionGroup, r: &RecipeGroup, )721 fn define_memory(
722 e: &mut PerCpuModeEncodings,
723 shared_defs: &SharedDefinitions,
724 x86: &InstructionGroup,
725 r: &RecipeGroup,
726 ) {
727 let shared = &shared_defs.instructions;
728 let formats = &shared_defs.formats;
729
730 // Shorthands for instructions.
731 let adjust_sp_down = shared.by_name("adjust_sp_down");
732 let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm");
733 let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm");
734 let copy_nop = shared.by_name("copy_nop");
735 let fill = shared.by_name("fill");
736 let fill_nop = shared.by_name("fill_nop");
737 let istore16 = shared.by_name("istore16");
738 let istore16_complex = shared.by_name("istore16_complex");
739 let istore32 = shared.by_name("istore32");
740 let istore32_complex = shared.by_name("istore32_complex");
741 let istore8 = shared.by_name("istore8");
742 let istore8_complex = shared.by_name("istore8_complex");
743 let load = shared.by_name("load");
744 let load_complex = shared.by_name("load_complex");
745 let regfill = shared.by_name("regfill");
746 let regspill = shared.by_name("regspill");
747 let sload16 = shared.by_name("sload16");
748 let sload16_complex = shared.by_name("sload16_complex");
749 let sload32 = shared.by_name("sload32");
750 let sload32_complex = shared.by_name("sload32_complex");
751 let sload8 = shared.by_name("sload8");
752 let sload8_complex = shared.by_name("sload8_complex");
753 let spill = shared.by_name("spill");
754 let store = shared.by_name("store");
755 let store_complex = shared.by_name("store_complex");
756 let uload16 = shared.by_name("uload16");
757 let uload16_complex = shared.by_name("uload16_complex");
758 let uload32 = shared.by_name("uload32");
759 let uload32_complex = shared.by_name("uload32_complex");
760 let uload8 = shared.by_name("uload8");
761 let uload8_complex = shared.by_name("uload8_complex");
762 let x86_pop = x86.by_name("x86_pop");
763 let x86_push = x86.by_name("x86_push");
764
765 // Shorthands for recipes.
766 let rec_adjustsp = r.template("adjustsp");
767 let rec_adjustsp_ib = r.template("adjustsp_ib");
768 let rec_adjustsp_id = r.template("adjustsp_id");
769 let rec_ffillnull = r.recipe("ffillnull");
770 let rec_fillnull = r.recipe("fillnull");
771 let rec_fillSib32 = r.template("fillSib32");
772 let rec_ld = r.template("ld");
773 let rec_ldDisp32 = r.template("ldDisp32");
774 let rec_ldDisp8 = r.template("ldDisp8");
775 let rec_ldWithIndex = r.template("ldWithIndex");
776 let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32");
777 let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8");
778 let rec_popq = r.template("popq");
779 let rec_pushq = r.template("pushq");
780 let rec_regfill32 = r.template("regfill32");
781 let rec_regspill32 = r.template("regspill32");
782 let rec_spillSib32 = r.template("spillSib32");
783 let rec_st = r.template("st");
784 let rec_stacknull = r.recipe("stacknull");
785 let rec_stDisp32 = r.template("stDisp32");
786 let rec_stDisp32_abcd = r.template("stDisp32_abcd");
787 let rec_stDisp8 = r.template("stDisp8");
788 let rec_stDisp8_abcd = r.template("stDisp8_abcd");
789 let rec_stWithIndex = r.template("stWithIndex");
790 let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32");
791 let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd");
792 let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8");
793 let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd");
794 let rec_stWithIndex_abcd = r.template("stWithIndex_abcd");
795 let rec_st_abcd = r.template("st_abcd");
796
797 // Loads and stores.
798 let is_load_complex_length_two =
799 InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
800
801 for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] {
802 e.enc_i32_i64_instp(
803 load_complex,
804 recipe.opcodes(&MOV_LOAD),
805 is_load_complex_length_two.clone(),
806 );
807 e.enc_x86_64_instp(
808 uload32_complex,
809 recipe.opcodes(&MOV_LOAD),
810 is_load_complex_length_two.clone(),
811 );
812
813 e.enc64_instp(
814 sload32_complex,
815 recipe.opcodes(&MOVSXD).rex().w(),
816 is_load_complex_length_two.clone(),
817 );
818
819 e.enc_i32_i64_instp(
820 uload16_complex,
821 recipe.opcodes(&MOVZX_WORD),
822 is_load_complex_length_two.clone(),
823 );
824 e.enc_i32_i64_instp(
825 sload16_complex,
826 recipe.opcodes(&MOVSX_WORD),
827 is_load_complex_length_two.clone(),
828 );
829
830 e.enc_i32_i64_instp(
831 uload8_complex,
832 recipe.opcodes(&MOVZX_BYTE),
833 is_load_complex_length_two.clone(),
834 );
835
836 e.enc_i32_i64_instp(
837 sload8_complex,
838 recipe.opcodes(&MOVSX_BYTE),
839 is_load_complex_length_two.clone(),
840 );
841 }
842
843 let is_store_complex_length_three =
844 InstructionPredicate::new_length_equals(&*formats.store_complex, 3);
845
846 for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] {
847 e.enc_i32_i64_instp(
848 store_complex,
849 recipe.opcodes(&MOV_STORE),
850 is_store_complex_length_three.clone(),
851 );
852 e.enc_x86_64_instp(
853 istore32_complex,
854 recipe.opcodes(&MOV_STORE),
855 is_store_complex_length_three.clone(),
856 );
857 e.enc_both_instp(
858 istore16_complex.bind(I32),
859 recipe.opcodes(&MOV_STORE_16),
860 is_store_complex_length_three.clone(),
861 );
862 e.enc_x86_64_instp(
863 istore16_complex.bind(I64),
864 recipe.opcodes(&MOV_STORE_16),
865 is_store_complex_length_three.clone(),
866 );
867 }
868
869 for recipe in &[
870 rec_stWithIndex_abcd,
871 rec_stWithIndexDisp8_abcd,
872 rec_stWithIndexDisp32_abcd,
873 ] {
874 e.enc_both_instp(
875 istore8_complex.bind(I32),
876 recipe.opcodes(&MOV_BYTE_STORE),
877 is_store_complex_length_three.clone(),
878 );
879 e.enc_x86_64_instp(
880 istore8_complex.bind(I64),
881 recipe.opcodes(&MOV_BYTE_STORE),
882 is_store_complex_length_three.clone(),
883 );
884 }
885
886 for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] {
887 e.enc_i32_i64_ld_st(store, true, recipe.opcodes(&MOV_STORE));
888 e.enc_r32_r64_ld_st(store, true, recipe.opcodes(&MOV_STORE));
889 e.enc_x86_64(istore32.bind(I64).bind(Any), recipe.opcodes(&MOV_STORE));
890 e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(&MOV_STORE_16));
891 }
892
893 // Byte stores are more complicated because the registers they can address
894 // depends of the presence of a REX prefix. The st*_abcd recipes fall back to
895 // the corresponding st* recipes when a REX prefix is applied.
896
897 for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] {
898 e.enc_both(istore8.bind(I32).bind(Any), recipe.opcodes(&MOV_BYTE_STORE));
899 e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE));
900 }
901
902 e.enc_i32_i64_explicit_rex(spill, rec_spillSib32.opcodes(&MOV_STORE));
903 e.enc_i32_i64_explicit_rex(regspill, rec_regspill32.opcodes(&MOV_STORE));
904 e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(&MOV_STORE));
905 e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(&MOV_STORE));
906
907 // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid
908 // constraining the permitted registers.
909 // See MIN_SPILL_SLOT_SIZE which makes this safe.
910
911 e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(&MOV_STORE));
912 e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(&MOV_STORE));
913 for &ty in &[I8, I16] {
914 e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(&MOV_STORE));
915 e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(&MOV_STORE));
916 }
917
918 for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] {
919 e.enc_i32_i64_ld_st(load, true, recipe.opcodes(&MOV_LOAD));
920 e.enc_r32_r64_ld_st(load, true, recipe.opcodes(&MOV_LOAD));
921 e.enc_x86_64(uload32.bind(I64), recipe.opcodes(&MOV_LOAD));
922 e.enc64(sload32.bind(I64), recipe.opcodes(&MOVSXD).rex().w());
923 e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(&MOVZX_WORD));
924 e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(&MOVSX_WORD));
925 e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(&MOVZX_BYTE));
926 e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(&MOVSX_BYTE));
927 }
928
929 e.enc_i32_i64_explicit_rex(fill, rec_fillSib32.opcodes(&MOV_LOAD));
930 e.enc_i32_i64_explicit_rex(regfill, rec_regfill32.opcodes(&MOV_LOAD));
931 e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(&MOV_LOAD));
932 e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(&MOV_LOAD));
933
934 // No-op fills, created by late-stage redundant-fill removal.
935 for &ty in &[I64, I32, I16, I8] {
936 e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0);
937 e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0);
938 }
939 e.enc64_rec(fill_nop.bind(B1), rec_fillnull, 0);
940 e.enc32_rec(fill_nop.bind(B1), rec_fillnull, 0);
941 for &ty in &[F64, F32] {
942 e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0);
943 e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0);
944 }
945
946 // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
947
948 e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(&MOV_LOAD));
949 e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(&MOV_LOAD));
950 for &ty in &[I8, I16] {
951 e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(&MOV_LOAD));
952 e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(&MOV_LOAD));
953 }
954
955 // Push and Pop.
956 e.enc32(x86_push.bind(I32), rec_pushq.opcodes(&PUSH_REG));
957 e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(&PUSH_REG));
958
959 e.enc32(x86_pop.bind(I32), rec_popq.opcodes(&POP_REG));
960 e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(&POP_REG));
961
962 // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
963 // into a no-op.
964 // The same encoding is generated for both the 64- and 32-bit architectures.
965 for &ty in &[I64, I32, I16, I8] {
966 e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
967 e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
968 }
969 for &ty in &[F64, F32] {
970 e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
971 e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
972 }
973
974 // Adjust SP down by a dynamic value (or up, with a negative operand).
975 e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(&SUB));
976 e.enc64(
977 adjust_sp_down.bind(I64),
978 rec_adjustsp.opcodes(&SUB).rex().w(),
979 );
980
981 // Adjust SP up by an immediate (or down, with a negative immediate).
982 e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(&CMP_IMM8));
983 e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(&CMP_IMM));
984 e.enc64(
985 adjust_sp_up_imm,
986 rec_adjustsp_ib.opcodes(&CMP_IMM8).rex().w(),
987 );
988 e.enc64(
989 adjust_sp_up_imm,
990 rec_adjustsp_id.opcodes(&CMP_IMM).rex().w(),
991 );
992
993 // Adjust SP down by an immediate (or up, with a negative immediate).
994 e.enc32(
995 adjust_sp_down_imm,
996 rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5),
997 );
998 e.enc32(adjust_sp_down_imm, rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5));
999 e.enc64(
1000 adjust_sp_down_imm,
1001 rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5).rex().w(),
1002 );
1003 e.enc64(
1004 adjust_sp_down_imm,
1005 rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5).rex().w(),
1006 );
1007 }
1008
1009 #[inline(never)]
define_fpu_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup)1010 fn define_fpu_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
1011 let shared = &shared_defs.instructions;
1012
1013 // Shorthands for instructions.
1014 let bitcast = shared.by_name("bitcast");
1015 let copy = shared.by_name("copy");
1016 let regmove = shared.by_name("regmove");
1017
1018 // Shorthands for recipes.
1019 let rec_frmov = r.template("frmov");
1020 let rec_frurm = r.template("frurm");
1021 let rec_furm = r.template("furm");
1022 let rec_rfumr = r.template("rfumr");
1023
1024 // Floating-point moves.
1025 // movd
1026 e.enc_both(
1027 bitcast.bind(F32).bind(I32),
1028 rec_frurm.opcodes(&MOVD_LOAD_XMM),
1029 );
1030 e.enc_both(
1031 bitcast.bind(I32).bind(F32),
1032 rec_rfumr.opcodes(&MOVD_STORE_XMM),
1033 );
1034
1035 // movq
1036 e.enc64(
1037 bitcast.bind(F64).bind(I64),
1038 rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(),
1039 );
1040 e.enc64(
1041 bitcast.bind(I64).bind(F64),
1042 rec_rfumr.opcodes(&MOVD_STORE_XMM).rex().w(),
1043 );
1044
1045 // movaps
1046 e.enc_both(copy.bind(F32), rec_furm.opcodes(&MOVAPS_LOAD));
1047 e.enc_both(copy.bind(F64), rec_furm.opcodes(&MOVAPS_LOAD));
1048
1049 // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
1050 // immediate operands with the current constraint language.
1051 e.enc32(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD));
1052 e.enc64(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD).rex());
1053
1054 // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
1055 // immediate operands with the current constraint language.
1056 e.enc32(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD));
1057 e.enc64(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD).rex());
1058 }
1059
1060 #[inline(never)]
define_fpu_memory( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup, )1061 fn define_fpu_memory(
1062 e: &mut PerCpuModeEncodings,
1063 shared_defs: &SharedDefinitions,
1064 r: &RecipeGroup,
1065 ) {
1066 let shared = &shared_defs.instructions;
1067
1068 // Shorthands for instructions.
1069 let fill = shared.by_name("fill");
1070 let load = shared.by_name("load");
1071 let load_complex = shared.by_name("load_complex");
1072 let regfill = shared.by_name("regfill");
1073 let regspill = shared.by_name("regspill");
1074 let spill = shared.by_name("spill");
1075 let store = shared.by_name("store");
1076 let store_complex = shared.by_name("store_complex");
1077
1078 // Shorthands for recipes.
1079 let rec_ffillSib32 = r.template("ffillSib32");
1080 let rec_fld = r.template("fld");
1081 let rec_fldDisp32 = r.template("fldDisp32");
1082 let rec_fldDisp8 = r.template("fldDisp8");
1083 let rec_fldWithIndex = r.template("fldWithIndex");
1084 let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32");
1085 let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8");
1086 let rec_fregfill32 = r.template("fregfill32");
1087 let rec_fregspill32 = r.template("fregspill32");
1088 let rec_fspillSib32 = r.template("fspillSib32");
1089 let rec_fst = r.template("fst");
1090 let rec_fstDisp32 = r.template("fstDisp32");
1091 let rec_fstDisp8 = r.template("fstDisp8");
1092 let rec_fstWithIndex = r.template("fstWithIndex");
1093 let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
1094 let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
1095
1096 // Float loads and stores.
1097 e.enc_both(load.bind(F32).bind(Any), rec_fld.opcodes(&MOVSS_LOAD));
1098 e.enc_both(load.bind(F32).bind(Any), rec_fldDisp8.opcodes(&MOVSS_LOAD));
1099 e.enc_both(load.bind(F32).bind(Any), rec_fldDisp32.opcodes(&MOVSS_LOAD));
1100
1101 e.enc_both(
1102 load_complex.bind(F32),
1103 rec_fldWithIndex.opcodes(&MOVSS_LOAD),
1104 );
1105 e.enc_both(
1106 load_complex.bind(F32),
1107 rec_fldWithIndexDisp8.opcodes(&MOVSS_LOAD),
1108 );
1109 e.enc_both(
1110 load_complex.bind(F32),
1111 rec_fldWithIndexDisp32.opcodes(&MOVSS_LOAD),
1112 );
1113
1114 e.enc_both(load.bind(F64).bind(Any), rec_fld.opcodes(&MOVSD_LOAD));
1115 e.enc_both(load.bind(F64).bind(Any), rec_fldDisp8.opcodes(&MOVSD_LOAD));
1116 e.enc_both(load.bind(F64).bind(Any), rec_fldDisp32.opcodes(&MOVSD_LOAD));
1117
1118 e.enc_both(
1119 load_complex.bind(F64),
1120 rec_fldWithIndex.opcodes(&MOVSD_LOAD),
1121 );
1122 e.enc_both(
1123 load_complex.bind(F64),
1124 rec_fldWithIndexDisp8.opcodes(&MOVSD_LOAD),
1125 );
1126 e.enc_both(
1127 load_complex.bind(F64),
1128 rec_fldWithIndexDisp32.opcodes(&MOVSD_LOAD),
1129 );
1130
1131 e.enc_both(store.bind(F32).bind(Any), rec_fst.opcodes(&MOVSS_STORE));
1132 e.enc_both(
1133 store.bind(F32).bind(Any),
1134 rec_fstDisp8.opcodes(&MOVSS_STORE),
1135 );
1136 e.enc_both(
1137 store.bind(F32).bind(Any),
1138 rec_fstDisp32.opcodes(&MOVSS_STORE),
1139 );
1140
1141 e.enc_both(
1142 store_complex.bind(F32),
1143 rec_fstWithIndex.opcodes(&MOVSS_STORE),
1144 );
1145 e.enc_both(
1146 store_complex.bind(F32),
1147 rec_fstWithIndexDisp8.opcodes(&MOVSS_STORE),
1148 );
1149 e.enc_both(
1150 store_complex.bind(F32),
1151 rec_fstWithIndexDisp32.opcodes(&MOVSS_STORE),
1152 );
1153
1154 e.enc_both(store.bind(F64).bind(Any), rec_fst.opcodes(&MOVSD_STORE));
1155 e.enc_both(
1156 store.bind(F64).bind(Any),
1157 rec_fstDisp8.opcodes(&MOVSD_STORE),
1158 );
1159 e.enc_both(
1160 store.bind(F64).bind(Any),
1161 rec_fstDisp32.opcodes(&MOVSD_STORE),
1162 );
1163
1164 e.enc_both(
1165 store_complex.bind(F64),
1166 rec_fstWithIndex.opcodes(&MOVSD_STORE),
1167 );
1168 e.enc_both(
1169 store_complex.bind(F64),
1170 rec_fstWithIndexDisp8.opcodes(&MOVSD_STORE),
1171 );
1172 e.enc_both(
1173 store_complex.bind(F64),
1174 rec_fstWithIndexDisp32.opcodes(&MOVSD_STORE),
1175 );
1176
1177 e.enc_both(fill.bind(F32), rec_ffillSib32.opcodes(&MOVSS_LOAD));
1178 e.enc_both(regfill.bind(F32), rec_fregfill32.opcodes(&MOVSS_LOAD));
1179 e.enc_both(fill.bind(F64), rec_ffillSib32.opcodes(&MOVSD_LOAD));
1180 e.enc_both(regfill.bind(F64), rec_fregfill32.opcodes(&MOVSD_LOAD));
1181
1182 e.enc_both(spill.bind(F32), rec_fspillSib32.opcodes(&MOVSS_STORE));
1183 e.enc_both(regspill.bind(F32), rec_fregspill32.opcodes(&MOVSS_STORE));
1184 e.enc_both(spill.bind(F64), rec_fspillSib32.opcodes(&MOVSD_STORE));
1185 e.enc_both(regspill.bind(F64), rec_fregspill32.opcodes(&MOVSD_STORE));
1186 }
1187
1188 #[inline(never)]
define_fpu_ops( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, settings: &SettingGroup, x86: &InstructionGroup, r: &RecipeGroup, )1189 fn define_fpu_ops(
1190 e: &mut PerCpuModeEncodings,
1191 shared_defs: &SharedDefinitions,
1192 settings: &SettingGroup,
1193 x86: &InstructionGroup,
1194 r: &RecipeGroup,
1195 ) {
1196 let shared = &shared_defs.instructions;
1197 let formats = &shared_defs.formats;
1198
1199 // Shorthands for instructions.
1200 let ceil = shared.by_name("ceil");
1201 let f32const = shared.by_name("f32const");
1202 let f64const = shared.by_name("f64const");
1203 let fadd = shared.by_name("fadd");
1204 let fcmp = shared.by_name("fcmp");
1205 let fcvt_from_sint = shared.by_name("fcvt_from_sint");
1206 let fdemote = shared.by_name("fdemote");
1207 let fdiv = shared.by_name("fdiv");
1208 let ffcmp = shared.by_name("ffcmp");
1209 let floor = shared.by_name("floor");
1210 let fmul = shared.by_name("fmul");
1211 let fpromote = shared.by_name("fpromote");
1212 let fsub = shared.by_name("fsub");
1213 let nearest = shared.by_name("nearest");
1214 let sqrt = shared.by_name("sqrt");
1215 let trunc = shared.by_name("trunc");
1216 let x86_cvtt2si = x86.by_name("x86_cvtt2si");
1217 let x86_fmax = x86.by_name("x86_fmax");
1218 let x86_fmin = x86.by_name("x86_fmin");
1219
1220 // Shorthands for recipes.
1221 let rec_f32imm_z = r.template("f32imm_z");
1222 let rec_f64imm_z = r.template("f64imm_z");
1223 let rec_fa = r.template("fa");
1224 let rec_fcmp = r.template("fcmp");
1225 let rec_fcscc = r.template("fcscc");
1226 let rec_frurm = r.template("frurm");
1227 let rec_furm = r.template("furm");
1228 let rec_furmi_rnd = r.template("furmi_rnd");
1229 let rec_rfurm = r.template("rfurm");
1230
1231 // Predicates shorthands.
1232 let use_sse41 = settings.predicate_by_name("use_sse41");
1233
1234 // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for
1235 // 32-bit and 64-bit floats respectively.
1236 let is_zero_32_bit_float =
1237 InstructionPredicate::new_is_zero_32bit_float(&*formats.unary_ieee32, "imm");
1238 e.enc32_instp(
1239 f32const,
1240 rec_f32imm_z.opcodes(&XORPS),
1241 is_zero_32_bit_float.clone(),
1242 );
1243
1244 let is_zero_64_bit_float =
1245 InstructionPredicate::new_is_zero_64bit_float(&*formats.unary_ieee64, "imm");
1246 e.enc32_instp(
1247 f64const,
1248 rec_f64imm_z.opcodes(&XORPD),
1249 is_zero_64_bit_float.clone(),
1250 );
1251
1252 e.enc_x86_64_instp(f32const, rec_f32imm_z.opcodes(&XORPS), is_zero_32_bit_float);
1253 e.enc_x86_64_instp(f64const, rec_f64imm_z.opcodes(&XORPD), is_zero_64_bit_float);
1254
1255 // cvtsi2ss
1256 e.enc_i32_i64(fcvt_from_sint.bind(F32), rec_frurm.opcodes(&CVTSI2SS));
1257
1258 // cvtsi2sd
1259 e.enc_i32_i64(fcvt_from_sint.bind(F64), rec_frurm.opcodes(&CVTSI2SD));
1260
1261 // cvtss2sd
1262 e.enc_both(fpromote.bind(F64).bind(F32), rec_furm.opcodes(&CVTSS2SD));
1263
1264 // cvtsd2ss
1265 e.enc_both(fdemote.bind(F32).bind(F64), rec_furm.opcodes(&CVTSD2SS));
1266
1267 // cvttss2si
1268 e.enc_both(
1269 x86_cvtt2si.bind(I32).bind(F32),
1270 rec_rfurm.opcodes(&CVTTSS2SI),
1271 );
1272 e.enc64(
1273 x86_cvtt2si.bind(I64).bind(F32),
1274 rec_rfurm.opcodes(&CVTTSS2SI).rex().w(),
1275 );
1276
1277 // cvttsd2si
1278 e.enc_both(
1279 x86_cvtt2si.bind(I32).bind(F64),
1280 rec_rfurm.opcodes(&CVTTSD2SI),
1281 );
1282 e.enc64(
1283 x86_cvtt2si.bind(I64).bind(F64),
1284 rec_rfurm.opcodes(&CVTTSD2SI).rex().w(),
1285 );
1286
1287 // Exact square roots.
1288 e.enc_both(sqrt.bind(F32), rec_furm.opcodes(&SQRTSS));
1289 e.enc_both(sqrt.bind(F64), rec_furm.opcodes(&SQRTSD));
1290
1291 // Rounding. The recipe looks at the opcode to pick an immediate.
1292 for inst in &[nearest, floor, ceil, trunc] {
1293 e.enc_both_isap(inst.bind(F32), rec_furmi_rnd.opcodes(&ROUNDSS), use_sse41);
1294 e.enc_both_isap(inst.bind(F64), rec_furmi_rnd.opcodes(&ROUNDSD), use_sse41);
1295 }
1296
1297 // Binary arithmetic ops.
1298 e.enc_both(fadd.bind(F32), rec_fa.opcodes(&ADDSS));
1299 e.enc_both(fadd.bind(F64), rec_fa.opcodes(&ADDSD));
1300
1301 e.enc_both(fsub.bind(F32), rec_fa.opcodes(&SUBSS));
1302 e.enc_both(fsub.bind(F64), rec_fa.opcodes(&SUBSD));
1303
1304 e.enc_both(fmul.bind(F32), rec_fa.opcodes(&MULSS));
1305 e.enc_both(fmul.bind(F64), rec_fa.opcodes(&MULSD));
1306
1307 e.enc_both(fdiv.bind(F32), rec_fa.opcodes(&DIVSS));
1308 e.enc_both(fdiv.bind(F64), rec_fa.opcodes(&DIVSD));
1309
1310 e.enc_both(x86_fmin.bind(F32), rec_fa.opcodes(&MINSS));
1311 e.enc_both(x86_fmin.bind(F64), rec_fa.opcodes(&MINSD));
1312
1313 e.enc_both(x86_fmax.bind(F32), rec_fa.opcodes(&MAXSS));
1314 e.enc_both(x86_fmax.bind(F64), rec_fa.opcodes(&MAXSD));
1315
1316 // Comparisons.
1317 //
1318 // This only covers the condition codes in `supported_floatccs`, the rest are
1319 // handled by legalization patterns.
1320 e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(&UCOMISS));
1321 e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(&UCOMISD));
1322 e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(&UCOMISS));
1323 e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(&UCOMISD));
1324 }
1325
1326 #[inline(never)]
define_alu( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, settings: &SettingGroup, x86: &InstructionGroup, r: &RecipeGroup, )1327 fn define_alu(
1328 e: &mut PerCpuModeEncodings,
1329 shared_defs: &SharedDefinitions,
1330 settings: &SettingGroup,
1331 x86: &InstructionGroup,
1332 r: &RecipeGroup,
1333 ) {
1334 let shared = &shared_defs.instructions;
1335
1336 // Shorthands for instructions.
1337 let clz = shared.by_name("clz");
1338 let ctz = shared.by_name("ctz");
1339 let icmp = shared.by_name("icmp");
1340 let icmp_imm = shared.by_name("icmp_imm");
1341 let ifcmp = shared.by_name("ifcmp");
1342 let ifcmp_imm = shared.by_name("ifcmp_imm");
1343 let ifcmp_sp = shared.by_name("ifcmp_sp");
1344 let ishl = shared.by_name("ishl");
1345 let ishl_imm = shared.by_name("ishl_imm");
1346 let popcnt = shared.by_name("popcnt");
1347 let rotl = shared.by_name("rotl");
1348 let rotl_imm = shared.by_name("rotl_imm");
1349 let rotr = shared.by_name("rotr");
1350 let rotr_imm = shared.by_name("rotr_imm");
1351 let selectif = shared.by_name("selectif");
1352 let sshr = shared.by_name("sshr");
1353 let sshr_imm = shared.by_name("sshr_imm");
1354 let trueff = shared.by_name("trueff");
1355 let trueif = shared.by_name("trueif");
1356 let ushr = shared.by_name("ushr");
1357 let ushr_imm = shared.by_name("ushr_imm");
1358 let x86_bsf = x86.by_name("x86_bsf");
1359 let x86_bsr = x86.by_name("x86_bsr");
1360
1361 // Shorthands for recipes.
1362 let rec_bsf_and_bsr = r.template("bsf_and_bsr");
1363 let rec_cmov = r.template("cmov");
1364 let rec_icscc = r.template("icscc");
1365 let rec_icscc_ib = r.template("icscc_ib");
1366 let rec_icscc_id = r.template("icscc_id");
1367 let rec_rcmp = r.template("rcmp");
1368 let rec_rcmp_ib = r.template("rcmp_ib");
1369 let rec_rcmp_id = r.template("rcmp_id");
1370 let rec_rcmp_sp = r.template("rcmp_sp");
1371 let rec_rc = r.template("rc");
1372 let rec_setf_abcd = r.template("setf_abcd");
1373 let rec_seti_abcd = r.template("seti_abcd");
1374 let rec_urm = r.template("urm");
1375
1376 // Predicates shorthands.
1377 let use_popcnt = settings.predicate_by_name("use_popcnt");
1378 let use_lzcnt = settings.predicate_by_name("use_lzcnt");
1379 let use_bmi1 = settings.predicate_by_name("use_bmi1");
1380
1381 let band = shared.by_name("band");
1382 let band_imm = shared.by_name("band_imm");
1383 let band_not = shared.by_name("band_not");
1384 let bnot = shared.by_name("bnot");
1385 let bor = shared.by_name("bor");
1386 let bor_imm = shared.by_name("bor_imm");
1387 let bxor = shared.by_name("bxor");
1388 let bxor_imm = shared.by_name("bxor_imm");
1389 let iadd = shared.by_name("iadd");
1390 let iadd_ifcarry = shared.by_name("iadd_ifcarry");
1391 let iadd_ifcin = shared.by_name("iadd_ifcin");
1392 let iadd_ifcout = shared.by_name("iadd_ifcout");
1393 let iadd_imm = shared.by_name("iadd_imm");
1394 let imul = shared.by_name("imul");
1395 let isub = shared.by_name("isub");
1396 let isub_ifbin = shared.by_name("isub_ifbin");
1397 let isub_ifborrow = shared.by_name("isub_ifborrow");
1398 let isub_ifbout = shared.by_name("isub_ifbout");
1399 let x86_sdivmodx = x86.by_name("x86_sdivmodx");
1400 let x86_smulx = x86.by_name("x86_smulx");
1401 let x86_udivmodx = x86.by_name("x86_udivmodx");
1402 let x86_umulx = x86.by_name("x86_umulx");
1403
1404 let rec_div = r.template("div");
1405 let rec_fa = r.template("fa");
1406 let rec_fax = r.template("fax");
1407 let rec_mulx = r.template("mulx");
1408 let rec_r_ib = r.template("r_ib");
1409 let rec_r_id = r.template("r_id");
1410 let rec_rin = r.template("rin");
1411 let rec_rio = r.template("rio");
1412 let rec_rout = r.template("rout");
1413 let rec_rr = r.template("rr");
1414 let rec_rrx = r.template("rrx");
1415 let rec_ur = r.template("ur");
1416
1417 e.enc_i32_i64(iadd, rec_rr.opcodes(&ADD));
1418 e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(&ADD));
1419 e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(&ADC));
1420 e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(&ADC));
1421 e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0));
1422 e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0));
1423
1424 e.enc_i32_i64(isub, rec_rr.opcodes(&SUB));
1425 e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(&SUB));
1426 e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(&SBB));
1427 e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(&SBB));
1428
1429 e.enc_i32_i64(band, rec_rr.opcodes(&AND));
1430 e.enc_b32_b64(band, rec_rr.opcodes(&AND));
1431
1432 // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can
1433 // even use the single-byte immediate for 0xffff_ffXX masks.
1434
1435 e.enc_i32_i64(band_imm, rec_r_ib.opcodes(&AND_IMM8_SIGN_EXTEND).rrr(4));
1436 e.enc_i32_i64(band_imm, rec_r_id.opcodes(&AND_IMM).rrr(4));
1437
1438 e.enc_i32_i64(bor, rec_rr.opcodes(&OR));
1439 e.enc_b32_b64(bor, rec_rr.opcodes(&OR));
1440 e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(&OR_IMM8_SIGN_EXTEND).rrr(1));
1441 e.enc_i32_i64(bor_imm, rec_r_id.opcodes(&OR_IMM).rrr(1));
1442
1443 e.enc_i32_i64(bxor, rec_rr.opcodes(&XOR));
1444 e.enc_b32_b64(bxor, rec_rr.opcodes(&XOR));
1445 e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(&XOR_IMM8_SIGN_EXTEND).rrr(6));
1446 e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(&XOR_IMM).rrr(6));
1447
1448 // x86 has a bitwise not instruction NOT.
1449 e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2));
1450 e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2));
1451
1452 // Also add a `b1` encodings for the logic instructions.
1453 // TODO: Should this be done with 8-bit instructions? It would improve partial register
1454 // dependencies.
1455 e.enc_both(band.bind(B1), rec_rr.opcodes(&AND));
1456 e.enc_both(bor.bind(B1), rec_rr.opcodes(&OR));
1457 e.enc_both(bxor.bind(B1), rec_rr.opcodes(&XOR));
1458
1459 e.enc_i32_i64(imul, rec_rrx.opcodes(&IMUL));
1460 e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(&IDIV).rrr(7));
1461 e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(&DIV).rrr(6));
1462
1463 e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(&IMUL_RDX_RAX).rrr(5));
1464 e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(&MUL).rrr(4));
1465
1466 // Binary bitwise ops.
1467 //
1468 // The F64 version is intentionally encoded using the single-precision opcode:
1469 // the operation is identical and the encoding is one byte shorter.
1470 e.enc_both(band.bind(F32), rec_fa.opcodes(&ANDPS));
1471 e.enc_both(band.bind(F64), rec_fa.opcodes(&ANDPS));
1472
1473 e.enc_both(bor.bind(F32), rec_fa.opcodes(&ORPS));
1474 e.enc_both(bor.bind(F64), rec_fa.opcodes(&ORPS));
1475
1476 e.enc_both(bxor.bind(F32), rec_fa.opcodes(&XORPS));
1477 e.enc_both(bxor.bind(F64), rec_fa.opcodes(&XORPS));
1478
1479 // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y.
1480 e.enc_both(band_not.bind(F32), rec_fax.opcodes(&ANDNPS));
1481 e.enc_both(band_not.bind(F64), rec_fax.opcodes(&ANDNPS));
1482
1483 // Shifts and rotates.
1484 // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
1485 // and 16-bit shifts would need explicit masking.
1486
1487 for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] {
1488 // Cannot use enc_i32_i64 for this pattern because instructions require
1489 // to bind any.
1490 e.enc32(
1491 inst.bind(I32).bind(Any),
1492 rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
1493 );
1494 e.enc64(
1495 inst.bind(I64).bind(Any),
1496 rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(),
1497 );
1498 e.enc64(
1499 inst.bind(I32).bind(Any),
1500 rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(),
1501 );
1502 e.enc64(
1503 inst.bind(I32).bind(Any),
1504 rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
1505 );
1506 }
1507
1508 e.enc_i32_i64(rotl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(0));
1509 e.enc_i32_i64(rotr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(1));
1510 e.enc_i32_i64(ishl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(4));
1511 e.enc_i32_i64(ushr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(5));
1512 e.enc_i32_i64(sshr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(7));
1513
1514 // Population count.
1515 e.enc32_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt);
1516 e.enc64_isap(
1517 popcnt.bind(I64),
1518 rec_urm.opcodes(&POPCNT).rex().w(),
1519 use_popcnt,
1520 );
1521 e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT).rex(), use_popcnt);
1522 e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt);
1523
1524 // Count leading zero bits.
1525 e.enc32_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt);
1526 e.enc64_isap(clz.bind(I64), rec_urm.opcodes(&LZCNT).rex().w(), use_lzcnt);
1527 e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT).rex(), use_lzcnt);
1528 e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt);
1529
1530 // Count trailing zero bits.
1531 e.enc32_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1);
1532 e.enc64_isap(ctz.bind(I64), rec_urm.opcodes(&TZCNT).rex().w(), use_bmi1);
1533 e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT).rex(), use_bmi1);
1534 e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1);
1535
1536 // Bit scan forwards and reverse
1537 e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(&BIT_SCAN_FORWARD));
1538 e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(&BIT_SCAN_REVERSE));
1539
1540 // Comparisons
1541 e.enc_i32_i64(icmp, rec_icscc.opcodes(&CMP_REG));
1542 e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(&CMP_IMM8).rrr(7));
1543 e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(&CMP_IMM).rrr(7));
1544 e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(&CMP_REG));
1545 e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(&CMP_IMM8).rrr(7));
1546 e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(&CMP_IMM).rrr(7));
1547 // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).
1548
1549 e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(&CMP_REG));
1550 e.enc64(ifcmp_sp.bind(I64), rec_rcmp_sp.opcodes(&CMP_REG).rex().w());
1551
1552 // Convert flags to bool.
1553 // This encodes `b1` as an 8-bit low register with the value 0 or 1.
1554 e.enc_both(trueif, rec_seti_abcd.opcodes(&SET_BYTE_IF_OVERFLOW));
1555 e.enc_both(trueff, rec_setf_abcd.opcodes(&SET_BYTE_IF_OVERFLOW));
1556
1557 // Conditional move (a.k.a integer select).
1558 e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW));
1559 }
1560
1561 #[inline(never)]
1562 #[allow(clippy::cognitive_complexity)]
define_simd( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, settings: &SettingGroup, x86: &InstructionGroup, r: &RecipeGroup, )1563 fn define_simd(
1564 e: &mut PerCpuModeEncodings,
1565 shared_defs: &SharedDefinitions,
1566 settings: &SettingGroup,
1567 x86: &InstructionGroup,
1568 r: &RecipeGroup,
1569 ) {
1570 let shared = &shared_defs.instructions;
1571 let formats = &shared_defs.formats;
1572
1573 // Shorthands for instructions.
1574 let avg_round = shared.by_name("avg_round");
1575 let bitcast = shared.by_name("bitcast");
1576 let bor = shared.by_name("bor");
1577 let bxor = shared.by_name("bxor");
1578 let copy = shared.by_name("copy");
1579 let copy_nop = shared.by_name("copy_nop");
1580 let copy_to_ssa = shared.by_name("copy_to_ssa");
1581 let fadd = shared.by_name("fadd");
1582 let fcmp = shared.by_name("fcmp");
1583 let fcvt_from_sint = shared.by_name("fcvt_from_sint");
1584 let fdiv = shared.by_name("fdiv");
1585 let fill = shared.by_name("fill");
1586 let fill_nop = shared.by_name("fill_nop");
1587 let fmax = shared.by_name("fmax");
1588 let fmin = shared.by_name("fmin");
1589 let fmul = shared.by_name("fmul");
1590 let fsub = shared.by_name("fsub");
1591 let iadd = shared.by_name("iadd");
1592 let icmp = shared.by_name("icmp");
1593 let imul = shared.by_name("imul");
1594 let ishl_imm = shared.by_name("ishl_imm");
1595 let load = shared.by_name("load");
1596 let load_complex = shared.by_name("load_complex");
1597 let raw_bitcast = shared.by_name("raw_bitcast");
1598 let regfill = shared.by_name("regfill");
1599 let regmove = shared.by_name("regmove");
1600 let regspill = shared.by_name("regspill");
1601 let sadd_sat = shared.by_name("sadd_sat");
1602 let scalar_to_vector = shared.by_name("scalar_to_vector");
1603 let sload8x8 = shared.by_name("sload8x8");
1604 let sload16x4 = shared.by_name("sload16x4");
1605 let sload32x2 = shared.by_name("sload32x2");
1606 let spill = shared.by_name("spill");
1607 let sqrt = shared.by_name("sqrt");
1608 let sshr_imm = shared.by_name("sshr_imm");
1609 let ssub_sat = shared.by_name("ssub_sat");
1610 let store = shared.by_name("store");
1611 let store_complex = shared.by_name("store_complex");
1612 let uadd_sat = shared.by_name("uadd_sat");
1613 let uload8x8 = shared.by_name("uload8x8");
1614 let uload16x4 = shared.by_name("uload16x4");
1615 let uload32x2 = shared.by_name("uload32x2");
1616 let ushr_imm = shared.by_name("ushr_imm");
1617 let usub_sat = shared.by_name("usub_sat");
1618 let vconst = shared.by_name("vconst");
1619 let x86_insertps = x86.by_name("x86_insertps");
1620 let x86_movlhps = x86.by_name("x86_movlhps");
1621 let x86_movsd = x86.by_name("x86_movsd");
1622 let x86_packss = x86.by_name("x86_packss");
1623 let x86_pextr = x86.by_name("x86_pextr");
1624 let x86_pinsr = x86.by_name("x86_pinsr");
1625 let x86_pmaxs = x86.by_name("x86_pmaxs");
1626 let x86_pmaxu = x86.by_name("x86_pmaxu");
1627 let x86_pmins = x86.by_name("x86_pmins");
1628 let x86_pminu = x86.by_name("x86_pminu");
1629 let x86_pshufb = x86.by_name("x86_pshufb");
1630 let x86_pshufd = x86.by_name("x86_pshufd");
1631 let x86_psll = x86.by_name("x86_psll");
1632 let x86_psra = x86.by_name("x86_psra");
1633 let x86_psrl = x86.by_name("x86_psrl");
1634 let x86_ptest = x86.by_name("x86_ptest");
1635 let x86_punpckh = x86.by_name("x86_punpckh");
1636 let x86_punpckl = x86.by_name("x86_punpckl");
1637
1638 // Shorthands for recipes.
1639 let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
1640 let rec_f_ib = r.template("f_ib");
1641 let rec_fa = r.template("fa");
1642 let rec_fa_ib = r.template("fa_ib");
1643 let rec_fax = r.template("fax");
1644 let rec_fcmp = r.template("fcmp");
1645 let rec_ffillSib32 = r.template("ffillSib32");
1646 let rec_ffillnull = r.recipe("ffillnull");
1647 let rec_fld = r.template("fld");
1648 let rec_fldDisp32 = r.template("fldDisp32");
1649 let rec_fldDisp8 = r.template("fldDisp8");
1650 let rec_fldWithIndex = r.template("fldWithIndex");
1651 let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32");
1652 let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8");
1653 let rec_fregfill32 = r.template("fregfill32");
1654 let rec_fregspill32 = r.template("fregspill32");
1655 let rec_frmov = r.template("frmov");
1656 let rec_frurm = r.template("frurm");
1657 let rec_fspillSib32 = r.template("fspillSib32");
1658 let rec_fst = r.template("fst");
1659 let rec_fstDisp32 = r.template("fstDisp32");
1660 let rec_fstDisp8 = r.template("fstDisp8");
1661 let rec_fstWithIndex = r.template("fstWithIndex");
1662 let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
1663 let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
1664 let rec_furm = r.template("furm");
1665 let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa");
1666 let rec_icscc_fpr = r.template("icscc_fpr");
1667 let rec_null_fpr = r.recipe("null_fpr");
1668 let rec_pfcmp = r.template("pfcmp");
1669 let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr");
1670 let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr");
1671 let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r");
1672 let rec_stacknull = r.recipe("stacknull");
1673 let rec_vconst = r.template("vconst");
1674 let rec_vconst_optimized = r.template("vconst_optimized");
1675
1676 // Predicates shorthands.
1677 settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic");
1678 settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic");
1679 let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd");
1680 let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
1681 let use_sse42_simd = settings.predicate_by_name("use_sse42_simd");
1682 let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd");
1683
1684 // SIMD vector size: eventually multiple vector sizes may be supported but for now only
1685 // SSE-sized vectors are available.
1686 let sse_vector_size: u64 = 128;
1687
1688 // SIMD splat: before x86 can use vector data, it must be moved to XMM registers; see
1689 // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the
1690 // value across the register.
1691
1692 let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
1693
1694 // PSHUFB, 8-bit shuffle using two XMM registers.
1695 for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1696 let instruction = x86_pshufb.bind(vector(ty, sse_vector_size));
1697 let template = rec_fa.opcodes(&PSHUFB);
1698 e.enc_both_inferred_maybe_isap(instruction.clone(), template.clone(), Some(use_ssse3_simd));
1699 }
1700
1701 // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate.
1702 for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
1703 let instruction = x86_pshufd.bind(vector(ty, sse_vector_size));
1704 let template = rec_r_ib_unsigned_fpr.opcodes(&PSHUFD);
1705 e.enc_both_inferred(instruction, template);
1706 }
1707
1708 // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
1709 // to the Intel manual: "When the destination operand is an XMM register, the source operand is
1710 // written to the low doubleword of the register and the register is zero-extended to 128 bits."
1711 for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1712 let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size));
1713 if ty.is_float() {
1714 // No need to move floats--they already live in XMM registers.
1715 e.enc_32_64_rec(instruction, rec_null_fpr, 0);
1716 } else {
1717 let template = rec_frurm.opcodes(&MOVD_LOAD_XMM);
1718 if ty.lane_bits() < 64 {
1719 e.enc_both_inferred(instruction, template);
1720 } else {
1721 // No 32-bit encodings for 64-bit widths.
1722 assert_eq!(ty.lane_bits(), 64);
1723 e.enc64(instruction, template.rex().w());
1724 }
1725 }
1726 }
1727
1728 // SIMD insertlane
1729 for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1730 let (opcode, isap): (&[_], _) = match ty.lane_bits() {
1731 8 => (&PINSRB, Some(use_sse41_simd)),
1732 16 => (&PINSRW, None),
1733 32 | 64 => (&PINSR, Some(use_sse41_simd)),
1734 _ => panic!("invalid size for SIMD insertlane"),
1735 };
1736
1737 let instruction = x86_pinsr.bind(vector(ty, sse_vector_size));
1738 let template = rec_r_ib_unsigned_r.opcodes(opcode);
1739 if ty.lane_bits() < 64 {
1740 e.enc_both_inferred_maybe_isap(instruction, template, isap);
1741 } else {
1742 // It turns out the 64-bit widths have REX/W encodings and only are available on
1743 // x86_64.
1744 e.enc64_maybe_isap(instruction, template.rex().w(), isap);
1745 }
1746 }
1747
1748 // For legalizing insertlane with floats, INSERTPS from SSE4.1.
1749 {
1750 let instruction = x86_insertps.bind(vector(F32, sse_vector_size));
1751 let template = rec_fa_ib.opcodes(&INSERTPS);
1752 e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
1753 }
1754
1755 // For legalizing insertlane with floats, MOVSD from SSE2.
1756 {
1757 let instruction = x86_movsd.bind(vector(F64, sse_vector_size));
1758 let template = rec_fa.opcodes(&MOVSD_LOAD);
1759 e.enc_both_inferred(instruction, template); // from SSE2
1760 }
1761
1762 // For legalizing insertlane with floats, MOVLHPS from SSE.
1763 {
1764 let instruction = x86_movlhps.bind(vector(F64, sse_vector_size));
1765 let template = rec_fa.opcodes(&MOVLHPS);
1766 e.enc_both_inferred(instruction, template); // from SSE
1767 }
1768
1769 // SIMD extractlane
1770 for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1771 let opcode = match ty.lane_bits() {
1772 8 => &PEXTRB,
1773 16 => &PEXTRW,
1774 32 | 64 => &PEXTR,
1775 _ => panic!("invalid size for SIMD extractlane"),
1776 };
1777
1778 let instruction = x86_pextr.bind(vector(ty, sse_vector_size));
1779 let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
1780 if ty.lane_bits() < 64 {
1781 e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
1782 } else {
1783 // It turns out the 64-bit widths have REX/W encodings and only are available on
1784 // x86_64.
1785 e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd));
1786 }
1787 }
1788
1789 // SIMD packing/unpacking
1790 for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1791 let (high, low) = match ty.lane_bits() {
1792 8 => (&PUNPCKHBW, &PUNPCKLBW),
1793 16 => (&PUNPCKHWD, &PUNPCKLWD),
1794 32 => (&PUNPCKHDQ, &PUNPCKLDQ),
1795 64 => (&PUNPCKHQDQ, &PUNPCKLQDQ),
1796 _ => panic!("invalid size for SIMD packing/unpacking"),
1797 };
1798
1799 e.enc_both_inferred(
1800 x86_punpckh.bind(vector(ty, sse_vector_size)),
1801 rec_fa.opcodes(high),
1802 );
1803 e.enc_both_inferred(
1804 x86_punpckl.bind(vector(ty, sse_vector_size)),
1805 rec_fa.opcodes(low),
1806 );
1807 }
1808 for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
1809 let x86_packss = x86_packss.bind(vector(*ty, sse_vector_size));
1810 e.enc_both_inferred(x86_packss, rec_fa.opcodes(*opcodes));
1811 }
1812
1813 // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
1814 for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
1815 for to_type in
1816 ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type)
1817 {
1818 let instruction = raw_bitcast
1819 .bind(vector(to_type, sse_vector_size))
1820 .bind(vector(from_type, sse_vector_size));
1821 e.enc_32_64_rec(instruction, rec_null_fpr, 0);
1822 }
1823 }
1824
1825 // SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an
1826 // XMM register.
1827 for float_type in &[F32, F64] {
1828 for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) {
1829 e.enc_32_64_rec(
1830 raw_bitcast
1831 .bind(vector(lane_type, sse_vector_size))
1832 .bind(*float_type),
1833 rec_null_fpr,
1834 0,
1835 );
1836 e.enc_32_64_rec(
1837 raw_bitcast
1838 .bind(*float_type)
1839 .bind(vector(lane_type, sse_vector_size)),
1840 rec_null_fpr,
1841 0,
1842 );
1843 }
1844 }
1845
1846 // SIMD conversions
1847 {
1848 let fcvt_from_sint_32 = fcvt_from_sint
1849 .bind(vector(F32, sse_vector_size))
1850 .bind(vector(I32, sse_vector_size));
1851 e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS));
1852 }
1853
1854 // SIMD vconst for special cases (all zeroes, all ones)
1855 // this must be encoded prior to the MOVUPS implementation (below) so the compiler sees this
1856 // encoding first
1857 for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1858 let instruction = vconst.bind(vector(ty, sse_vector_size));
1859
1860 let is_zero_128bit =
1861 InstructionPredicate::new_is_all_zeroes(&*formats.unary_const, "constant_handle");
1862 let template = rec_vconst_optimized.opcodes(&PXOR).infer_rex();
1863 e.enc_32_64_func(instruction.clone(), template, |builder| {
1864 builder.inst_predicate(is_zero_128bit)
1865 });
1866
1867 let is_ones_128bit =
1868 InstructionPredicate::new_is_all_ones(&*formats.unary_const, "constant_handle");
1869 let template = rec_vconst_optimized.opcodes(&PCMPEQB).infer_rex();
1870 e.enc_32_64_func(instruction, template, |builder| {
1871 builder.inst_predicate(is_ones_128bit)
1872 });
1873 }
1874
1875 // SIMD vconst using MOVUPS
1876 // TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have
1877 // to guarantee that the constants are aligned when emitted and there is currently no mechanism
1878 // for that; alternately, constants could be loaded into XMM registers using a sequence like:
1879 // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored
1880 // in memory) but some performance measurements are needed.
1881 for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1882 let instruction = vconst.bind(vector(ty, sse_vector_size));
1883 let template = rec_vconst.opcodes(&MOVUPS_LOAD);
1884 e.enc_both_inferred(instruction, template); // from SSE
1885 }
1886
1887 // SIMD register movement: store, load, spill, fill, regmove, etc. All of these use encodings of
1888 // MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have
1889 // alignment or type-specific encodings, see https://github.com/bytecodealliance/wasmtime/issues/1124).
1890 // Also, it would be ideal to infer REX prefixes for all of these instructions but for the
1891 // time being only instructions with common recipes have `infer_rex()` support.
1892 for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
1893 // Store
1894 let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any);
1895 e.enc_both_inferred(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE));
1896 e.enc_both_inferred(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE));
1897 e.enc_both_inferred(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE));
1898
1899 // Store complex
1900 let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size));
1901 e.enc_both(
1902 bound_store_complex.clone(),
1903 rec_fstWithIndex.opcodes(&MOVUPS_STORE),
1904 );
1905 e.enc_both(
1906 bound_store_complex.clone(),
1907 rec_fstWithIndexDisp8.opcodes(&MOVUPS_STORE),
1908 );
1909 e.enc_both(
1910 bound_store_complex,
1911 rec_fstWithIndexDisp32.opcodes(&MOVUPS_STORE),
1912 );
1913
1914 // Load
1915 let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any);
1916 e.enc_both_inferred(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD));
1917 e.enc_both_inferred(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD));
1918 e.enc_both_inferred(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD));
1919
1920 // Load complex
1921 let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size));
1922 e.enc_both(
1923 bound_load_complex.clone(),
1924 rec_fldWithIndex.opcodes(&MOVUPS_LOAD),
1925 );
1926 e.enc_both(
1927 bound_load_complex.clone(),
1928 rec_fldWithIndexDisp8.opcodes(&MOVUPS_LOAD),
1929 );
1930 e.enc_both(
1931 bound_load_complex,
1932 rec_fldWithIndexDisp32.opcodes(&MOVUPS_LOAD),
1933 );
1934
1935 // Spill
1936 let bound_spill = spill.bind(vector(ty, sse_vector_size));
1937 e.enc_both(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE));
1938 let bound_regspill = regspill.bind(vector(ty, sse_vector_size));
1939 e.enc_both(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE));
1940
1941 // Fill
1942 let bound_fill = fill.bind(vector(ty, sse_vector_size));
1943 e.enc_both(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD));
1944 let bound_regfill = regfill.bind(vector(ty, sse_vector_size));
1945 e.enc_both(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD));
1946 let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size));
1947 e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0);
1948
1949 // Regmove
1950 let bound_regmove = regmove.bind(vector(ty, sse_vector_size));
1951 e.enc_both(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD));
1952
1953 // Copy
1954 let bound_copy = copy.bind(vector(ty, sse_vector_size));
1955 e.enc_both(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD));
1956 let bound_copy_to_ssa = copy_to_ssa.bind(vector(ty, sse_vector_size));
1957 e.enc_both(bound_copy_to_ssa, rec_furm_reg_to_ssa.opcodes(&MOVAPS_LOAD));
1958 let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size));
1959 e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0);
1960 }
1961
1962 // SIMD load extend
1963 for (inst, opcodes) in &[
1964 (uload8x8, &PMOVZXBW),
1965 (uload16x4, &PMOVZXWD),
1966 (uload32x2, &PMOVZXDQ),
1967 (sload8x8, &PMOVSXBW),
1968 (sload16x4, &PMOVSXWD),
1969 (sload32x2, &PMOVSXDQ),
1970 ] {
1971 let isap = Some(use_sse41_simd);
1972 for recipe in &[rec_fld, rec_fldDisp8, rec_fldDisp32] {
1973 let inst = *inst;
1974 let template = recipe.opcodes(*opcodes);
1975 e.enc_both_inferred_maybe_isap(inst.clone().bind(I32), template.clone(), isap);
1976 e.enc64_maybe_isap(inst.bind(I64), template.infer_rex(), isap);
1977 }
1978 }
1979
1980 // SIMD integer addition
1981 for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
1982 let iadd = iadd.bind(vector(*ty, sse_vector_size));
1983 e.enc_both_inferred(iadd, rec_fa.opcodes(*opcodes));
1984 }
1985
1986 // SIMD integer saturating addition
1987 e.enc_both_inferred(
1988 sadd_sat.bind(vector(I8, sse_vector_size)),
1989 rec_fa.opcodes(&PADDSB),
1990 );
1991 e.enc_both_inferred(
1992 sadd_sat.bind(vector(I16, sse_vector_size)),
1993 rec_fa.opcodes(&PADDSW),
1994 );
1995 e.enc_both_inferred(
1996 uadd_sat.bind(vector(I8, sse_vector_size)),
1997 rec_fa.opcodes(&PADDUSB),
1998 );
1999 e.enc_both_inferred(
2000 uadd_sat.bind(vector(I16, sse_vector_size)),
2001 rec_fa.opcodes(&PADDUSW),
2002 );
2003
2004 // SIMD integer subtraction
2005 let isub = shared.by_name("isub");
2006 for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] {
2007 let isub = isub.bind(vector(*ty, sse_vector_size));
2008 e.enc_both_inferred(isub, rec_fa.opcodes(*opcodes));
2009 }
2010
2011 // SIMD integer saturating subtraction
2012 e.enc_both_inferred(
2013 ssub_sat.bind(vector(I8, sse_vector_size)),
2014 rec_fa.opcodes(&PSUBSB),
2015 );
2016 e.enc_both_inferred(
2017 ssub_sat.bind(vector(I16, sse_vector_size)),
2018 rec_fa.opcodes(&PSUBSW),
2019 );
2020 e.enc_both_inferred(
2021 usub_sat.bind(vector(I8, sse_vector_size)),
2022 rec_fa.opcodes(&PSUBUSB),
2023 );
2024 e.enc_both_inferred(
2025 usub_sat.bind(vector(I16, sse_vector_size)),
2026 rec_fa.opcodes(&PSUBUSW),
2027 );
2028
2029 // SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16
2030 // and I64x2 and these are (at the time of writing) not necessary for WASM SIMD.
2031 for (ty, opcodes, isap) in &[
2032 (I16, &PMULLW[..], None),
2033 (I32, &PMULLD[..], Some(use_sse41_simd)),
2034 ] {
2035 let imul = imul.bind(vector(*ty, sse_vector_size));
2036 e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
2037 }
2038
2039 // SIMD integer multiplication for I64x2 using a AVX512.
2040 {
2041 let imul = imul.bind(vector(I64, sse_vector_size));
2042 e.enc_32_64_maybe_isap(
2043 imul,
2044 rec_evex_reg_vvvv_rm_128.opcodes(&PMULLQ).w(),
2045 Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL
2046 );
2047 }
2048
2049 // SIMD integer average with rounding.
2050 for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] {
2051 let avgr = avg_round.bind(vector(*ty, sse_vector_size));
2052 e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes));
2053 }
2054
2055 // SIMD logical operations
2056 let band = shared.by_name("band");
2057 let band_not = shared.by_name("band_not");
2058 for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
2059 // and
2060 let band = band.bind(vector(ty, sse_vector_size));
2061 e.enc_both_inferred(band, rec_fa.opcodes(&PAND));
2062
2063 // and not (note flipped recipe operands to match band_not order)
2064 let band_not = band_not.bind(vector(ty, sse_vector_size));
2065 e.enc_both_inferred(band_not, rec_fax.opcodes(&PANDN));
2066
2067 // or
2068 let bor = bor.bind(vector(ty, sse_vector_size));
2069 e.enc_both_inferred(bor, rec_fa.opcodes(&POR));
2070
2071 // xor
2072 let bxor = bxor.bind(vector(ty, sse_vector_size));
2073 e.enc_both_inferred(bxor, rec_fa.opcodes(&PXOR));
2074
2075 // ptest
2076 let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size));
2077 e.enc_both_inferred_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd));
2078 }
2079
2080 // SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement
2081 // allows SIMD shifts to be legalized more easily. TODO ideally this would be typed as an
2082 // I128x1 but restrictions on the type builder prevent this; the general idea here is that
2083 // the upper bits are all zeroed and do not form parts of any separate lane. See
2084 // https://github.com/bytecodealliance/wasmtime/issues/1140.
2085 e.enc_both_inferred(
2086 bitcast.bind(vector(I64, sse_vector_size)).bind(I32),
2087 rec_frurm.opcodes(&MOVD_LOAD_XMM),
2088 );
2089 e.enc64(
2090 bitcast.bind(vector(I64, sse_vector_size)).bind(I64),
2091 rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(),
2092 );
2093
2094 // SIMD shift left
2095 for (ty, opcodes) in &[(I16, &PSLLW), (I32, &PSLLD), (I64, &PSLLQ)] {
2096 let x86_psll = x86_psll.bind(vector(*ty, sse_vector_size));
2097 e.enc_both_inferred(x86_psll, rec_fa.opcodes(*opcodes));
2098 }
2099
2100 // SIMD shift right (logical)
2101 for (ty, opcodes) in &[(I16, &PSRLW), (I32, &PSRLD), (I64, &PSRLQ)] {
2102 let x86_psrl = x86_psrl.bind(vector(*ty, sse_vector_size));
2103 e.enc_both_inferred(x86_psrl, rec_fa.opcodes(*opcodes));
2104 }
2105
2106 // SIMD shift right (arithmetic)
2107 for (ty, opcodes) in &[(I16, &PSRAW), (I32, &PSRAD)] {
2108 let x86_psra = x86_psra.bind(vector(*ty, sse_vector_size));
2109 e.enc_both_inferred(x86_psra, rec_fa.opcodes(*opcodes));
2110 }
2111
2112 // SIMD immediate shift
2113 for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] {
2114 let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size));
2115 e.enc_both_inferred(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6));
2116
2117 let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
2118 e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));
2119
2120 let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
2121 e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
2122 }
2123
2124 // SIMD integer comparisons
2125 {
2126 use IntCC::*;
2127 for (ty, cc, opcodes, isa_predicate) in &[
2128 (I8, Equal, &PCMPEQB[..], None),
2129 (I16, Equal, &PCMPEQW[..], None),
2130 (I32, Equal, &PCMPEQD[..], None),
2131 (I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)),
2132 (I8, SignedGreaterThan, &PCMPGTB[..], None),
2133 (I16, SignedGreaterThan, &PCMPGTW[..], None),
2134 (I32, SignedGreaterThan, &PCMPGTD[..], None),
2135 (I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)),
2136 ] {
2137 let instruction = icmp
2138 .bind(Immediate::IntCC(*cc))
2139 .bind(vector(*ty, sse_vector_size));
2140 let template = rec_icscc_fpr.opcodes(opcodes);
2141 e.enc_both_inferred_maybe_isap(instruction, template, *isa_predicate);
2142 }
2143 }
2144
2145 // SIMD min/max
2146 for (ty, inst, opcodes, isa_predicate) in &[
2147 (I8, x86_pmaxs, &PMAXSB[..], Some(use_sse41_simd)),
2148 (I16, x86_pmaxs, &PMAXSW[..], None),
2149 (I32, x86_pmaxs, &PMAXSD[..], Some(use_sse41_simd)),
2150 (I8, x86_pmaxu, &PMAXUB[..], None),
2151 (I16, x86_pmaxu, &PMAXUW[..], Some(use_sse41_simd)),
2152 (I32, x86_pmaxu, &PMAXUD[..], Some(use_sse41_simd)),
2153 (I8, x86_pmins, &PMINSB[..], Some(use_sse41_simd)),
2154 (I16, x86_pmins, &PMINSW[..], None),
2155 (I32, x86_pmins, &PMINSD[..], Some(use_sse41_simd)),
2156 (I8, x86_pminu, &PMINUB[..], None),
2157 (I16, x86_pminu, &PMINUW[..], Some(use_sse41_simd)),
2158 (I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)),
2159 ] {
2160 let inst = inst.bind(vector(*ty, sse_vector_size));
2161 e.enc_both_inferred_maybe_isap(inst, rec_fa.opcodes(opcodes), *isa_predicate);
2162 }
2163
2164 // SIMD float comparisons
2165 e.enc_both_inferred(
2166 fcmp.bind(vector(F32, sse_vector_size)),
2167 rec_pfcmp.opcodes(&CMPPS),
2168 );
2169 e.enc_both_inferred(
2170 fcmp.bind(vector(F64, sse_vector_size)),
2171 rec_pfcmp.opcodes(&CMPPD),
2172 );
2173
2174 // SIMD float arithmetic
2175 for (ty, inst, opcodes) in &[
2176 (F32, fadd, &ADDPS[..]),
2177 (F64, fadd, &ADDPD[..]),
2178 (F32, fsub, &SUBPS[..]),
2179 (F64, fsub, &SUBPD[..]),
2180 (F32, fmul, &MULPS[..]),
2181 (F64, fmul, &MULPD[..]),
2182 (F32, fdiv, &DIVPS[..]),
2183 (F64, fdiv, &DIVPD[..]),
2184 (F32, fmin, &MINPS[..]),
2185 (F64, fmin, &MINPD[..]),
2186 (F32, fmax, &MAXPS[..]),
2187 (F64, fmax, &MAXPD[..]),
2188 ] {
2189 let inst = inst.bind(vector(*ty, sse_vector_size));
2190 e.enc_both_inferred(inst, rec_fa.opcodes(opcodes));
2191 }
2192 for (ty, inst, opcodes) in &[(F32, sqrt, &SQRTPS[..]), (F64, sqrt, &SQRTPD[..])] {
2193 let inst = inst.bind(vector(*ty, sse_vector_size));
2194 e.enc_both_inferred(inst, rec_furm.opcodes(opcodes));
2195 }
2196 }
2197
2198 #[inline(never)]
define_entity_ref( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, settings: &SettingGroup, r: &RecipeGroup, )2199 fn define_entity_ref(
2200 e: &mut PerCpuModeEncodings,
2201 shared_defs: &SharedDefinitions,
2202 settings: &SettingGroup,
2203 r: &RecipeGroup,
2204 ) {
2205 let shared = &shared_defs.instructions;
2206 let formats = &shared_defs.formats;
2207
2208 // Shorthands for instructions.
2209 let const_addr = shared.by_name("const_addr");
2210 let func_addr = shared.by_name("func_addr");
2211 let stack_addr = shared.by_name("stack_addr");
2212 let symbol_value = shared.by_name("symbol_value");
2213
2214 // Shorthands for recipes.
2215 let rec_allones_fnaddr4 = r.template("allones_fnaddr4");
2216 let rec_allones_fnaddr8 = r.template("allones_fnaddr8");
2217 let rec_fnaddr4 = r.template("fnaddr4");
2218 let rec_fnaddr8 = r.template("fnaddr8");
2219 let rec_const_addr = r.template("const_addr");
2220 let rec_got_fnaddr8 = r.template("got_fnaddr8");
2221 let rec_got_gvaddr8 = r.template("got_gvaddr8");
2222 let rec_gvaddr4 = r.template("gvaddr4");
2223 let rec_gvaddr8 = r.template("gvaddr8");
2224 let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8");
2225 let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8");
2226 let rec_spaddr4_id = r.template("spaddr4_id");
2227 let rec_spaddr8_id = r.template("spaddr8_id");
2228
2229 // Predicates shorthands.
2230 let all_ones_funcaddrs_and_not_is_pic =
2231 settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic");
2232 let is_pic = settings.predicate_by_name("is_pic");
2233 let not_all_ones_funcaddrs_and_not_is_pic =
2234 settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic");
2235 let not_is_pic = settings.predicate_by_name("not_is_pic");
2236
2237 // Function addresses.
2238
2239 // Non-PIC, all-ones funcaddresses.
2240 e.enc32_isap(
2241 func_addr.bind(I32),
2242 rec_fnaddr4.opcodes(&MOV_IMM),
2243 not_all_ones_funcaddrs_and_not_is_pic,
2244 );
2245 e.enc64_isap(
2246 func_addr.bind(I64),
2247 rec_fnaddr8.opcodes(&MOV_IMM).rex().w(),
2248 not_all_ones_funcaddrs_and_not_is_pic,
2249 );
2250
2251 // Non-PIC, all-zeros funcaddresses.
2252 e.enc32_isap(
2253 func_addr.bind(I32),
2254 rec_allones_fnaddr4.opcodes(&MOV_IMM),
2255 all_ones_funcaddrs_and_not_is_pic,
2256 );
2257 e.enc64_isap(
2258 func_addr.bind(I64),
2259 rec_allones_fnaddr8.opcodes(&MOV_IMM).rex().w(),
2260 all_ones_funcaddrs_and_not_is_pic,
2261 );
2262
2263 // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field.
2264 let is_colocated_func =
2265 InstructionPredicate::new_is_colocated_func(&*formats.func_addr, "func_ref");
2266 e.enc64_instp(
2267 func_addr.bind(I64),
2268 rec_pcrel_fnaddr8.opcodes(&LEA).rex().w(),
2269 is_colocated_func,
2270 );
2271
2272 // 64-bit, non-colocated, PIC.
2273 e.enc64_isap(
2274 func_addr.bind(I64),
2275 rec_got_fnaddr8.opcodes(&MOV_LOAD).rex().w(),
2276 is_pic,
2277 );
2278
2279 // Global addresses.
2280
2281 // Non-PIC.
2282 e.enc32_isap(
2283 symbol_value.bind(I32),
2284 rec_gvaddr4.opcodes(&MOV_IMM),
2285 not_is_pic,
2286 );
2287 e.enc64_isap(
2288 symbol_value.bind(I64),
2289 rec_gvaddr8.opcodes(&MOV_IMM).rex().w(),
2290 not_is_pic,
2291 );
2292
2293 // PIC, colocated.
2294 e.enc64_func(
2295 symbol_value.bind(I64),
2296 rec_pcrel_gvaddr8.opcodes(&LEA).rex().w(),
2297 |encoding| {
2298 encoding
2299 .isa_predicate(is_pic)
2300 .inst_predicate(InstructionPredicate::new_is_colocated_data(formats))
2301 },
2302 );
2303
2304 // PIC, non-colocated.
2305 e.enc64_isap(
2306 symbol_value.bind(I64),
2307 rec_got_gvaddr8.opcodes(&MOV_LOAD).rex().w(),
2308 is_pic,
2309 );
2310
2311 // Stack addresses.
2312 //
2313 // TODO: Add encoding rules for stack_load and stack_store, so that they
2314 // don't get legalized to stack_addr + load/store.
2315 e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(&LEA));
2316 e.enc64(stack_addr.bind(I64), rec_spaddr8_id.opcodes(&LEA).rex().w());
2317
2318 // Constant addresses (PIC).
2319 e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w());
2320 e.enc32(const_addr.bind(I32), rec_const_addr.opcodes(&LEA));
2321 }
2322
2323 /// Control flow opcodes.
2324 #[inline(never)]
define_control_flow( e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, settings: &SettingGroup, r: &RecipeGroup, )2325 fn define_control_flow(
2326 e: &mut PerCpuModeEncodings,
2327 shared_defs: &SharedDefinitions,
2328 settings: &SettingGroup,
2329 r: &RecipeGroup,
2330 ) {
2331 let shared = &shared_defs.instructions;
2332 let formats = &shared_defs.formats;
2333
2334 // Shorthands for instructions.
2335 let brff = shared.by_name("brff");
2336 let brif = shared.by_name("brif");
2337 let brnz = shared.by_name("brnz");
2338 let brz = shared.by_name("brz");
2339 let call = shared.by_name("call");
2340 let call_indirect = shared.by_name("call_indirect");
2341 let debugtrap = shared.by_name("debugtrap");
2342 let indirect_jump_table_br = shared.by_name("indirect_jump_table_br");
2343 let jump = shared.by_name("jump");
2344 let jump_table_base = shared.by_name("jump_table_base");
2345 let jump_table_entry = shared.by_name("jump_table_entry");
2346 let return_ = shared.by_name("return");
2347 let trap = shared.by_name("trap");
2348 let trapff = shared.by_name("trapff");
2349 let trapif = shared.by_name("trapif");
2350 let resumable_trap = shared.by_name("resumable_trap");
2351
2352 // Shorthands for recipes.
2353 let rec_brfb = r.template("brfb");
2354 let rec_brfd = r.template("brfd");
2355 let rec_brib = r.template("brib");
2356 let rec_brid = r.template("brid");
2357 let rec_call_id = r.template("call_id");
2358 let rec_call_plt_id = r.template("call_plt_id");
2359 let rec_call_r = r.template("call_r");
2360 let rec_debugtrap = r.recipe("debugtrap");
2361 let rec_indirect_jmp = r.template("indirect_jmp");
2362 let rec_jmpb = r.template("jmpb");
2363 let rec_jmpd = r.template("jmpd");
2364 let rec_jt_base = r.template("jt_base");
2365 let rec_jt_entry = r.template("jt_entry");
2366 let rec_ret = r.template("ret");
2367 let rec_t8jccb_abcd = r.template("t8jccb_abcd");
2368 let rec_t8jccd_abcd = r.template("t8jccd_abcd");
2369 let rec_t8jccd_long = r.template("t8jccd_long");
2370 let rec_tjccb = r.template("tjccb");
2371 let rec_tjccd = r.template("tjccd");
2372 let rec_trap = r.template("trap");
2373 let rec_trapif = r.recipe("trapif");
2374 let rec_trapff = r.recipe("trapff");
2375
2376 // Predicates shorthands.
2377 let is_pic = settings.predicate_by_name("is_pic");
2378
2379 // Call/return
2380
2381 // 32-bit, both PIC and non-PIC.
2382 e.enc32(call, rec_call_id.opcodes(&CALL_RELATIVE));
2383
2384 // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field.
2385 let is_colocated_func = InstructionPredicate::new_is_colocated_func(&*formats.call, "func_ref");
2386 e.enc64_instp(call, rec_call_id.opcodes(&CALL_RELATIVE), is_colocated_func);
2387
2388 // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC
2389 // is currently using the large model, which requires calls be lowered to
2390 // func_addr+call_indirect.
2391 e.enc64_isap(call, rec_call_plt_id.opcodes(&CALL_RELATIVE), is_pic);
2392
2393 e.enc32(
2394 call_indirect.bind(I32),
2395 rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2),
2396 );
2397 e.enc64(
2398 call_indirect.bind(I64),
2399 rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2).rex(),
2400 );
2401 e.enc64(
2402 call_indirect.bind(I64),
2403 rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2),
2404 );
2405
2406 e.enc32(return_, rec_ret.opcodes(&RET_NEAR));
2407 e.enc64(return_, rec_ret.opcodes(&RET_NEAR));
2408
2409 // Branches.
2410 e.enc32(jump, rec_jmpb.opcodes(&JUMP_SHORT));
2411 e.enc64(jump, rec_jmpb.opcodes(&JUMP_SHORT));
2412 e.enc32(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE));
2413 e.enc64(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE));
2414
2415 e.enc_both(brif, rec_brib.opcodes(&JUMP_SHORT_IF_OVERFLOW));
2416 e.enc_both(brif, rec_brid.opcodes(&JUMP_NEAR_IF_OVERFLOW));
2417
2418 // Not all float condition codes are legal, see `supported_floatccs`.
2419 e.enc_both(brff, rec_brfb.opcodes(&JUMP_SHORT_IF_OVERFLOW));
2420 e.enc_both(brff, rec_brfd.opcodes(&JUMP_NEAR_IF_OVERFLOW));
2421
2422 // Note that the tjccd opcode will be prefixed with 0x0f.
2423 e.enc_i32_i64_explicit_rex(brz, rec_tjccb.opcodes(&JUMP_SHORT_IF_EQUAL));
2424 e.enc_i32_i64_explicit_rex(brz, rec_tjccd.opcodes(&TEST_BYTE_REG));
2425 e.enc_i32_i64_explicit_rex(brnz, rec_tjccb.opcodes(&JUMP_SHORT_IF_NOT_EQUAL));
2426 e.enc_i32_i64_explicit_rex(brnz, rec_tjccd.opcodes(&TEST_REG));
2427
2428 // Branch on a b1 value in a register only looks at the low 8 bits. See also
2429 // bint encodings below.
2430 //
2431 // Start with the worst-case encoding for X86_32 only. The register allocator
2432 // can't handle a branch with an ABCD-constrained operand.
2433 e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(&TEST_BYTE_REG));
2434 e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(&TEST_REG));
2435
2436 e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_EQUAL));
2437 e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_BYTE_REG));
2438 e.enc_both(
2439 brnz.bind(B1),
2440 rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_NOT_EQUAL),
2441 );
2442 e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_REG));
2443
2444 // Jump tables.
2445 e.enc64(
2446 jump_table_entry.bind(I64),
2447 rec_jt_entry.opcodes(&MOVSXD).rex().w(),
2448 );
2449 e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(&MOV_LOAD));
2450
2451 e.enc64(
2452 jump_table_base.bind(I64),
2453 rec_jt_base.opcodes(&LEA).rex().w(),
2454 );
2455 e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(&LEA));
2456
2457 e.enc_x86_64(
2458 indirect_jump_table_br.bind(I64),
2459 rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4),
2460 );
2461 e.enc32(
2462 indirect_jump_table_br.bind(I32),
2463 rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4),
2464 );
2465
2466 // Trap as ud2
2467 e.enc32(trap, rec_trap.opcodes(&UNDEFINED2));
2468 e.enc64(trap, rec_trap.opcodes(&UNDEFINED2));
2469 e.enc32(resumable_trap, rec_trap.opcodes(&UNDEFINED2));
2470 e.enc64(resumable_trap, rec_trap.opcodes(&UNDEFINED2));
2471
2472 // Debug trap as int3
2473 e.enc32_rec(debugtrap, rec_debugtrap, 0);
2474 e.enc64_rec(debugtrap, rec_debugtrap, 0);
2475
2476 e.enc32_rec(trapif, rec_trapif, 0);
2477 e.enc64_rec(trapif, rec_trapif, 0);
2478 e.enc32_rec(trapff, rec_trapff, 0);
2479 e.enc64_rec(trapff, rec_trapff, 0);
2480 }
2481
2482 /// Reference type instructions.
2483 #[inline(never)]
define_reftypes(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup)2484 fn define_reftypes(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
2485 let shared = &shared_defs.instructions;
2486
2487 let is_null = shared.by_name("is_null");
2488 let is_invalid = shared.by_name("is_invalid");
2489 let null = shared.by_name("null");
2490 let safepoint = shared.by_name("safepoint");
2491
2492 let rec_is_zero = r.template("is_zero");
2493 let rec_is_invalid = r.template("is_invalid");
2494 let rec_pu_id_ref = r.template("pu_id_ref");
2495 let rec_safepoint = r.recipe("safepoint");
2496
2497 // Null references implemented as iconst 0.
2498 e.enc32(null.bind(R32), rec_pu_id_ref.opcodes(&MOV_IMM));
2499
2500 e.enc64(null.bind(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM));
2501 e.enc64(null.bind(R64), rec_pu_id_ref.opcodes(&MOV_IMM));
2502
2503 // is_null, implemented by testing whether the value is 0.
2504 e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(&TEST_REG));
2505
2506 // is_invalid, implemented by testing whether the value is -1.
2507 e.enc_r32_r64_rex_only(is_invalid, rec_is_invalid.opcodes(&CMP_IMM8).rrr(7));
2508
2509 // safepoint instruction calls sink, no actual encoding.
2510 e.enc32_rec(safepoint, rec_safepoint, 0);
2511 e.enc64_rec(safepoint, rec_safepoint, 0);
2512 }
2513
2514 #[allow(clippy::cognitive_complexity)]
define( shared_defs: &SharedDefinitions, settings: &SettingGroup, x86: &InstructionGroup, r: &RecipeGroup, ) -> PerCpuModeEncodings2515 pub(crate) fn define(
2516 shared_defs: &SharedDefinitions,
2517 settings: &SettingGroup,
2518 x86: &InstructionGroup,
2519 r: &RecipeGroup,
2520 ) -> PerCpuModeEncodings {
2521 // Definitions.
2522 let mut e = PerCpuModeEncodings::new();
2523
2524 define_moves(&mut e, shared_defs, r);
2525 define_memory(&mut e, shared_defs, x86, r);
2526 define_fpu_moves(&mut e, shared_defs, r);
2527 define_fpu_memory(&mut e, shared_defs, r);
2528 define_fpu_ops(&mut e, shared_defs, settings, x86, r);
2529 define_alu(&mut e, shared_defs, settings, x86, r);
2530 define_simd(&mut e, shared_defs, settings, x86, r);
2531 define_entity_ref(&mut e, shared_defs, settings, r);
2532 define_control_flow(&mut e, shared_defs, settings, r);
2533 define_reftypes(&mut e, shared_defs, r);
2534
2535 let x86_elf_tls_get_addr = x86.by_name("x86_elf_tls_get_addr");
2536 let x86_macho_tls_get_addr = x86.by_name("x86_macho_tls_get_addr");
2537
2538 let rec_elf_tls_get_addr = r.recipe("elf_tls_get_addr");
2539 let rec_macho_tls_get_addr = r.recipe("macho_tls_get_addr");
2540
2541 e.enc64_rec(x86_elf_tls_get_addr, rec_elf_tls_get_addr, 0);
2542 e.enc64_rec(x86_macho_tls_get_addr, rec_macho_tls_get_addr, 0);
2543
2544 e
2545 }
2546