1 //! A pre-legalization rewriting pass.
2 //!
3 //! This module provides early-stage optimizations. The optimizations found
4 //! should be useful for already well-optimized code. More general purpose
5 //! early-stage optimizations can be found in the preopt crate.
6 
7 use crate::cursor::{Cursor, FuncCursor};
8 use crate::divconst_magic_numbers::{magic_s32, magic_s64, magic_u32, magic_u64};
9 use crate::divconst_magic_numbers::{MS32, MS64, MU32, MU64};
10 use crate::flowgraph::ControlFlowGraph;
11 use crate::ir::{
12     condcodes::{CondCode, IntCC},
13     instructions::Opcode,
14     types::{I32, I64},
15     Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Type, Value,
16 };
17 use crate::isa::TargetIsa;
18 use crate::timing;
19 
20 #[inline]
21 /// Replaces the unique result of the instruction inst to an alias of the given value, and
22 /// replaces the instruction with a nop. Can be used only on instructions producing one unique
23 /// result, otherwise will assert.
replace_single_result_with_alias(dfg: &mut DataFlowGraph, inst: Inst, value: Value)24 fn replace_single_result_with_alias(dfg: &mut DataFlowGraph, inst: Inst, value: Value) {
25     // Replace the result value by an alias.
26     let results = dfg.detach_results(inst);
27     debug_assert!(results.len(&dfg.value_lists) == 1);
28     let result = results.get(0, &dfg.value_lists).unwrap();
29     dfg.change_to_alias(result, value);
30 
31     // Replace instruction by a nop.
32     dfg.replace(inst).nop();
33 }
34 
35 //----------------------------------------------------------------------
36 //
37 // Pattern-match helpers and transformation for div and rem by constants.
38 
39 // Simple math helpers
40 
41 /// if `x` is a power of two, or the negation thereof, return the power along
42 /// with a boolean that indicates whether `x` is negative. Else return None.
43 #[inline]
i32_is_power_of_two(x: i32) -> Option<(bool, u32)>44 fn i32_is_power_of_two(x: i32) -> Option<(bool, u32)> {
45     // We have to special-case this because abs(x) isn't representable.
46     if x == -0x8000_0000 {
47         return Some((true, 31));
48     }
49     let abs_x = i32::wrapping_abs(x) as u32;
50     if abs_x.is_power_of_two() {
51         return Some((x < 0, abs_x.trailing_zeros()));
52     }
53     None
54 }
55 
56 /// Same comments as for i32_is_power_of_two apply.
57 #[inline]
i64_is_power_of_two(x: i64) -> Option<(bool, u32)>58 fn i64_is_power_of_two(x: i64) -> Option<(bool, u32)> {
59     // We have to special-case this because abs(x) isn't representable.
60     if x == -0x8000_0000_0000_0000 {
61         return Some((true, 63));
62     }
63     let abs_x = i64::wrapping_abs(x) as u64;
64     if abs_x.is_power_of_two() {
65         return Some((x < 0, abs_x.trailing_zeros()));
66     }
67     None
68 }
69 
70 /// Representation of an instruction that can be replaced by a single division/remainder operation
71 /// between a left Value operand and a right immediate operand.
72 #[derive(Debug)]
73 enum DivRemByConstInfo {
74     DivU32(Value, u32),
75     DivU64(Value, u64),
76     DivS32(Value, i32),
77     DivS64(Value, i64),
78     RemU32(Value, u32),
79     RemU64(Value, u64),
80     RemS32(Value, i32),
81     RemS64(Value, i64),
82 }
83 
84 /// Possibly create a DivRemByConstInfo from the given components, by figuring out which, if any,
85 /// of the 8 cases apply, and also taking care to sanity-check the immediate.
package_up_divrem_info( value: Value, value_type: Type, imm_i64: i64, is_signed: bool, is_rem: bool, ) -> Option<DivRemByConstInfo>86 fn package_up_divrem_info(
87     value: Value,
88     value_type: Type,
89     imm_i64: i64,
90     is_signed: bool,
91     is_rem: bool,
92 ) -> Option<DivRemByConstInfo> {
93     let imm_u64 = imm_i64 as u64;
94 
95     match (is_signed, value_type) {
96         (false, I32) => {
97             if imm_u64 < 0x1_0000_0000 {
98                 if is_rem {
99                     Some(DivRemByConstInfo::RemU32(value, imm_u64 as u32))
100                 } else {
101                     Some(DivRemByConstInfo::DivU32(value, imm_u64 as u32))
102                 }
103             } else {
104                 None
105             }
106         }
107 
108         (false, I64) => {
109             // unsigned 64, no range constraint.
110             if is_rem {
111                 Some(DivRemByConstInfo::RemU64(value, imm_u64))
112             } else {
113                 Some(DivRemByConstInfo::DivU64(value, imm_u64))
114             }
115         }
116 
117         (true, I32) => {
118             if imm_u64 <= 0x7fff_ffff || imm_u64 >= 0xffff_ffff_8000_0000 {
119                 if is_rem {
120                     Some(DivRemByConstInfo::RemS32(value, imm_u64 as i32))
121                 } else {
122                     Some(DivRemByConstInfo::DivS32(value, imm_u64 as i32))
123                 }
124             } else {
125                 None
126             }
127         }
128 
129         (true, I64) => {
130             // signed 64, no range constraint.
131             if is_rem {
132                 Some(DivRemByConstInfo::RemS64(value, imm_u64 as i64))
133             } else {
134                 Some(DivRemByConstInfo::DivS64(value, imm_u64 as i64))
135             }
136         }
137 
138         _ => None,
139     }
140 }
141 
142 /// Examine `inst` to see if it is a div or rem by a constant, and if so return the operands,
143 /// signedness, operation size and div-vs-rem-ness in a handy bundle.
get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo>144 fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
145     if let InstructionData::BinaryImm64 { opcode, arg, imm } = dfg[inst] {
146         let (is_signed, is_rem) = match opcode {
147             Opcode::UdivImm => (false, false),
148             Opcode::UremImm => (false, true),
149             Opcode::SdivImm => (true, false),
150             Opcode::SremImm => (true, true),
151             _ => return None,
152         };
153         return package_up_divrem_info(arg, dfg.value_type(arg), imm.into(), is_signed, is_rem);
154     }
155 
156     None
157 }
158 
159 /// Actually do the transformation given a bundle containing the relevant information.
160 /// `divrem_info` describes a div or rem by a constant, that `pos` currently points at, and `inst`
161 /// is the associated instruction.  `inst` is replaced by a sequence of other operations that
162 /// calculate the same result. Note that there are various `divrem_info` cases where we cannot do
163 /// any transformation, in which case `inst` is left unchanged.
do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCursor, inst: Inst)164 fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCursor, inst: Inst) {
165     let is_rem = match *divrem_info {
166         DivRemByConstInfo::DivU32(_, _)
167         | DivRemByConstInfo::DivU64(_, _)
168         | DivRemByConstInfo::DivS32(_, _)
169         | DivRemByConstInfo::DivS64(_, _) => false,
170         DivRemByConstInfo::RemU32(_, _)
171         | DivRemByConstInfo::RemU64(_, _)
172         | DivRemByConstInfo::RemS32(_, _)
173         | DivRemByConstInfo::RemS64(_, _) => true,
174     };
175 
176     match *divrem_info {
177         // -------------------- U32 --------------------
178 
179         // U32 div, rem by zero: ignore
180         DivRemByConstInfo::DivU32(_n1, 0) | DivRemByConstInfo::RemU32(_n1, 0) => {}
181 
182         // U32 div by 1: identity
183         // U32 rem by 1: zero
184         DivRemByConstInfo::DivU32(n1, 1) | DivRemByConstInfo::RemU32(n1, 1) => {
185             if is_rem {
186                 pos.func.dfg.replace(inst).iconst(I32, 0);
187             } else {
188                 replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
189             }
190         }
191 
192         // U32 div, rem by a power-of-2
193         DivRemByConstInfo::DivU32(n1, d) | DivRemByConstInfo::RemU32(n1, d)
194             if d.is_power_of_two() =>
195         {
196             debug_assert!(d >= 2);
197             // compute k where d == 2^k
198             let k = d.trailing_zeros();
199             debug_assert!(k >= 1 && k <= 31);
200             if is_rem {
201                 let mask = (1u64 << k) - 1;
202                 pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
203             } else {
204                 pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
205             }
206         }
207 
208         // U32 div, rem by non-power-of-2
209         DivRemByConstInfo::DivU32(n1, d) | DivRemByConstInfo::RemU32(n1, d) => {
210             debug_assert!(d >= 3);
211             let MU32 {
212                 mul_by,
213                 do_add,
214                 shift_by,
215             } = magic_u32(d);
216             let qf; // final quotient
217             let q0 = pos.ins().iconst(I32, mul_by as i64);
218             let q1 = pos.ins().umulhi(n1, q0);
219             if do_add {
220                 debug_assert!(shift_by >= 1 && shift_by <= 32);
221                 let t1 = pos.ins().isub(n1, q1);
222                 let t2 = pos.ins().ushr_imm(t1, 1);
223                 let t3 = pos.ins().iadd(t2, q1);
224                 // I never found any case where shift_by == 1 here.
225                 // So there's no attempt to fold out a zero shift.
226                 debug_assert_ne!(shift_by, 1);
227                 qf = pos.ins().ushr_imm(t3, (shift_by - 1) as i64);
228             } else {
229                 debug_assert!(shift_by >= 0 && shift_by <= 31);
230                 // Whereas there are known cases here for shift_by == 0.
231                 if shift_by > 0 {
232                     qf = pos.ins().ushr_imm(q1, shift_by as i64);
233                 } else {
234                     qf = q1;
235                 }
236             }
237             // Now qf holds the final quotient. If necessary calculate the
238             // remainder instead.
239             if is_rem {
240                 let tt = pos.ins().imul_imm(qf, d as i64);
241                 pos.func.dfg.replace(inst).isub(n1, tt);
242             } else {
243                 replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
244             }
245         }
246 
247         // -------------------- U64 --------------------
248 
249         // U64 div, rem by zero: ignore
250         DivRemByConstInfo::DivU64(_n1, 0) | DivRemByConstInfo::RemU64(_n1, 0) => {}
251 
252         // U64 div by 1: identity
253         // U64 rem by 1: zero
254         DivRemByConstInfo::DivU64(n1, 1) | DivRemByConstInfo::RemU64(n1, 1) => {
255             if is_rem {
256                 pos.func.dfg.replace(inst).iconst(I64, 0);
257             } else {
258                 replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
259             }
260         }
261 
262         // U64 div, rem by a power-of-2
263         DivRemByConstInfo::DivU64(n1, d) | DivRemByConstInfo::RemU64(n1, d)
264             if d.is_power_of_two() =>
265         {
266             debug_assert!(d >= 2);
267             // compute k where d == 2^k
268             let k = d.trailing_zeros();
269             debug_assert!(k >= 1 && k <= 63);
270             if is_rem {
271                 let mask = (1u64 << k) - 1;
272                 pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
273             } else {
274                 pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
275             }
276         }
277 
278         // U64 div, rem by non-power-of-2
279         DivRemByConstInfo::DivU64(n1, d) | DivRemByConstInfo::RemU64(n1, d) => {
280             debug_assert!(d >= 3);
281             let MU64 {
282                 mul_by,
283                 do_add,
284                 shift_by,
285             } = magic_u64(d);
286             let qf; // final quotient
287             let q0 = pos.ins().iconst(I64, mul_by as i64);
288             let q1 = pos.ins().umulhi(n1, q0);
289             if do_add {
290                 debug_assert!(shift_by >= 1 && shift_by <= 64);
291                 let t1 = pos.ins().isub(n1, q1);
292                 let t2 = pos.ins().ushr_imm(t1, 1);
293                 let t3 = pos.ins().iadd(t2, q1);
294                 // I never found any case where shift_by == 1 here.
295                 // So there's no attempt to fold out a zero shift.
296                 debug_assert_ne!(shift_by, 1);
297                 qf = pos.ins().ushr_imm(t3, (shift_by - 1) as i64);
298             } else {
299                 debug_assert!(shift_by >= 0 && shift_by <= 63);
300                 // Whereas there are known cases here for shift_by == 0.
301                 if shift_by > 0 {
302                     qf = pos.ins().ushr_imm(q1, shift_by as i64);
303                 } else {
304                     qf = q1;
305                 }
306             }
307             // Now qf holds the final quotient. If necessary calculate the
308             // remainder instead.
309             if is_rem {
310                 let tt = pos.ins().imul_imm(qf, d as i64);
311                 pos.func.dfg.replace(inst).isub(n1, tt);
312             } else {
313                 replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
314             }
315         }
316 
317         // -------------------- S32 --------------------
318 
319         // S32 div, rem by zero or -1: ignore
320         DivRemByConstInfo::DivS32(_n1, -1)
321         | DivRemByConstInfo::RemS32(_n1, -1)
322         | DivRemByConstInfo::DivS32(_n1, 0)
323         | DivRemByConstInfo::RemS32(_n1, 0) => {}
324 
325         // S32 div by 1: identity
326         // S32 rem by 1: zero
327         DivRemByConstInfo::DivS32(n1, 1) | DivRemByConstInfo::RemS32(n1, 1) => {
328             if is_rem {
329                 pos.func.dfg.replace(inst).iconst(I32, 0);
330             } else {
331                 replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
332             }
333         }
334 
335         DivRemByConstInfo::DivS32(n1, d) | DivRemByConstInfo::RemS32(n1, d) => {
336             if let Some((is_negative, k)) = i32_is_power_of_two(d) {
337                 // k can be 31 only in the case that d is -2^31.
338                 debug_assert!(k >= 1 && k <= 31);
339                 let t1 = if k - 1 == 0 {
340                     n1
341                 } else {
342                     pos.ins().sshr_imm(n1, (k - 1) as i64)
343                 };
344                 let t2 = pos.ins().ushr_imm(t1, (32 - k) as i64);
345                 let t3 = pos.ins().iadd(n1, t2);
346                 if is_rem {
347                     // S32 rem by a power-of-2
348                     let t4 = pos.ins().band_imm(t3, i32::wrapping_neg(1 << k) as i64);
349                     // Curiously, we don't care here what the sign of d is.
350                     pos.func.dfg.replace(inst).isub(n1, t4);
351                 } else {
352                     // S32 div by a power-of-2
353                     let t4 = pos.ins().sshr_imm(t3, k as i64);
354                     if is_negative {
355                         pos.func.dfg.replace(inst).irsub_imm(t4, 0);
356                     } else {
357                         replace_single_result_with_alias(&mut pos.func.dfg, inst, t4);
358                     }
359                 }
360             } else {
361                 // S32 div, rem by a non-power-of-2
362                 debug_assert!(d < -2 || d > 2);
363                 let MS32 { mul_by, shift_by } = magic_s32(d);
364                 let q0 = pos.ins().iconst(I32, mul_by as i64);
365                 let q1 = pos.ins().smulhi(n1, q0);
366                 let q2 = if d > 0 && mul_by < 0 {
367                     pos.ins().iadd(q1, n1)
368                 } else if d < 0 && mul_by > 0 {
369                     pos.ins().isub(q1, n1)
370                 } else {
371                     q1
372                 };
373                 debug_assert!(shift_by >= 0 && shift_by <= 31);
374                 let q3 = if shift_by == 0 {
375                     q2
376                 } else {
377                     pos.ins().sshr_imm(q2, shift_by as i64)
378                 };
379                 let t1 = pos.ins().ushr_imm(q3, 31);
380                 let qf = pos.ins().iadd(q3, t1);
381                 // Now qf holds the final quotient. If necessary calculate
382                 // the remainder instead.
383                 if is_rem {
384                     let tt = pos.ins().imul_imm(qf, d as i64);
385                     pos.func.dfg.replace(inst).isub(n1, tt);
386                 } else {
387                     replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
388                 }
389             }
390         }
391 
392         // -------------------- S64 --------------------
393 
394         // S64 div, rem by zero or -1: ignore
395         DivRemByConstInfo::DivS64(_n1, -1)
396         | DivRemByConstInfo::RemS64(_n1, -1)
397         | DivRemByConstInfo::DivS64(_n1, 0)
398         | DivRemByConstInfo::RemS64(_n1, 0) => {}
399 
400         // S64 div by 1: identity
401         // S64 rem by 1: zero
402         DivRemByConstInfo::DivS64(n1, 1) | DivRemByConstInfo::RemS64(n1, 1) => {
403             if is_rem {
404                 pos.func.dfg.replace(inst).iconst(I64, 0);
405             } else {
406                 replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
407             }
408         }
409 
410         DivRemByConstInfo::DivS64(n1, d) | DivRemByConstInfo::RemS64(n1, d) => {
411             if let Some((is_negative, k)) = i64_is_power_of_two(d) {
412                 // k can be 63 only in the case that d is -2^63.
413                 debug_assert!(k >= 1 && k <= 63);
414                 let t1 = if k - 1 == 0 {
415                     n1
416                 } else {
417                     pos.ins().sshr_imm(n1, (k - 1) as i64)
418                 };
419                 let t2 = pos.ins().ushr_imm(t1, (64 - k) as i64);
420                 let t3 = pos.ins().iadd(n1, t2);
421                 if is_rem {
422                     // S64 rem by a power-of-2
423                     let t4 = pos.ins().band_imm(t3, i64::wrapping_neg(1 << k));
424                     // Curiously, we don't care here what the sign of d is.
425                     pos.func.dfg.replace(inst).isub(n1, t4);
426                 } else {
427                     // S64 div by a power-of-2
428                     let t4 = pos.ins().sshr_imm(t3, k as i64);
429                     if is_negative {
430                         pos.func.dfg.replace(inst).irsub_imm(t4, 0);
431                     } else {
432                         replace_single_result_with_alias(&mut pos.func.dfg, inst, t4);
433                     }
434                 }
435             } else {
436                 // S64 div, rem by a non-power-of-2
437                 debug_assert!(d < -2 || d > 2);
438                 let MS64 { mul_by, shift_by } = magic_s64(d);
439                 let q0 = pos.ins().iconst(I64, mul_by);
440                 let q1 = pos.ins().smulhi(n1, q0);
441                 let q2 = if d > 0 && mul_by < 0 {
442                     pos.ins().iadd(q1, n1)
443                 } else if d < 0 && mul_by > 0 {
444                     pos.ins().isub(q1, n1)
445                 } else {
446                     q1
447                 };
448                 debug_assert!(shift_by >= 0 && shift_by <= 63);
449                 let q3 = if shift_by == 0 {
450                     q2
451                 } else {
452                     pos.ins().sshr_imm(q2, shift_by as i64)
453                 };
454                 let t1 = pos.ins().ushr_imm(q3, 63);
455                 let qf = pos.ins().iadd(q3, t1);
456                 // Now qf holds the final quotient. If necessary calculate
457                 // the remainder instead.
458                 if is_rem {
459                     let tt = pos.ins().imul_imm(qf, d);
460                     pos.func.dfg.replace(inst).isub(n1, tt);
461                 } else {
462                     replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
463                 }
464             }
465         }
466     }
467 }
468 
469 enum BranchOrderKind {
470     BrzToBrnz(Value),
471     BrnzToBrz(Value),
472     InvertIcmpCond(IntCC, Value, Value),
473 }
474 
475 /// Reorder branches to encourage fallthroughs.
476 ///
477 /// When a block ends with a conditional branch followed by an unconditional
478 /// branch, this will reorder them if one of them is branching to the next Block
479 /// layout-wise. The unconditional jump can then become a fallthrough.
branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, block: Block, inst: Inst)480 fn branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, block: Block, inst: Inst) {
481     let (term_inst, term_inst_args, term_dest, cond_inst, cond_inst_args, cond_dest, kind) =
482         match pos.func.dfg[inst] {
483             InstructionData::Jump {
484                 opcode: Opcode::Jump,
485                 destination,
486                 ref args,
487             } => {
488                 let next_block = if let Some(next_block) = pos.func.layout.next_block(block) {
489                     next_block
490                 } else {
491                     return;
492                 };
493 
494                 if destination == next_block {
495                     return;
496                 }
497 
498                 let prev_inst = if let Some(prev_inst) = pos.func.layout.prev_inst(inst) {
499                     prev_inst
500                 } else {
501                     return;
502                 };
503 
504                 let prev_inst_data = &pos.func.dfg[prev_inst];
505 
506                 if let Some(prev_dest) = prev_inst_data.branch_destination() {
507                     if prev_dest != next_block {
508                         return;
509                     }
510                 } else {
511                     return;
512                 }
513 
514                 match prev_inst_data {
515                     InstructionData::Branch {
516                         opcode,
517                         args: ref prev_args,
518                         destination: cond_dest,
519                     } => {
520                         let cond_arg = {
521                             let args = pos.func.dfg.inst_args(prev_inst);
522                             args[0]
523                         };
524 
525                         let kind = match opcode {
526                             Opcode::Brz => BranchOrderKind::BrzToBrnz(cond_arg),
527                             Opcode::Brnz => BranchOrderKind::BrnzToBrz(cond_arg),
528                             _ => panic!("unexpected opcode"),
529                         };
530 
531                         (
532                             inst,
533                             args.clone(),
534                             destination,
535                             prev_inst,
536                             prev_args.clone(),
537                             *cond_dest,
538                             kind,
539                         )
540                     }
541                     InstructionData::BranchIcmp {
542                         opcode: Opcode::BrIcmp,
543                         cond,
544                         destination: cond_dest,
545                         args: ref prev_args,
546                     } => {
547                         let (x_arg, y_arg) = {
548                             let args = pos.func.dfg.inst_args(prev_inst);
549                             (args[0], args[1])
550                         };
551 
552                         (
553                             inst,
554                             args.clone(),
555                             destination,
556                             prev_inst,
557                             prev_args.clone(),
558                             *cond_dest,
559                             BranchOrderKind::InvertIcmpCond(*cond, x_arg, y_arg),
560                         )
561                     }
562                     _ => return,
563                 }
564             }
565 
566             _ => return,
567         };
568 
569     let cond_args = cond_inst_args.as_slice(&pos.func.dfg.value_lists).to_vec();
570     let term_args = term_inst_args.as_slice(&pos.func.dfg.value_lists).to_vec();
571 
572     match kind {
573         BranchOrderKind::BrnzToBrz(cond_arg) => {
574             pos.func
575                 .dfg
576                 .replace(term_inst)
577                 .jump(cond_dest, &cond_args[1..]);
578             pos.func
579                 .dfg
580                 .replace(cond_inst)
581                 .brz(cond_arg, term_dest, &term_args);
582         }
583         BranchOrderKind::BrzToBrnz(cond_arg) => {
584             pos.func
585                 .dfg
586                 .replace(term_inst)
587                 .jump(cond_dest, &cond_args[1..]);
588             pos.func
589                 .dfg
590                 .replace(cond_inst)
591                 .brnz(cond_arg, term_dest, &term_args);
592         }
593         BranchOrderKind::InvertIcmpCond(cond, x_arg, y_arg) => {
594             pos.func
595                 .dfg
596                 .replace(term_inst)
597                 .jump(cond_dest, &cond_args[2..]);
598             pos.func.dfg.replace(cond_inst).br_icmp(
599                 cond.inverse(),
600                 x_arg,
601                 y_arg,
602                 term_dest,
603                 &term_args,
604             );
605         }
606     }
607 
608     cfg.recompute_block(pos.func, block);
609 }
610 
611 #[cfg(feature = "enable-peepmatic")]
612 mod simplify {
613     use super::*;
614     use crate::peepmatic::ValueOrInst;
615 
616     pub type PeepholeOptimizer<'a, 'b> =
617         peepmatic_runtime::optimizer::PeepholeOptimizer<'static, 'a, &'b dyn TargetIsa>;
618 
peephole_optimizer<'a, 'b>(isa: &'b dyn TargetIsa) -> PeepholeOptimizer<'a, 'b>619     pub fn peephole_optimizer<'a, 'b>(isa: &'b dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
620         crate::peepmatic::preopt(isa)
621     }
622 
apply_all<'a, 'b>( optimizer: &mut PeepholeOptimizer<'a, 'b>, pos: &mut FuncCursor<'a>, inst: Inst, _native_word_width: u32, )623     pub fn apply_all<'a, 'b>(
624         optimizer: &mut PeepholeOptimizer<'a, 'b>,
625         pos: &mut FuncCursor<'a>,
626         inst: Inst,
627         _native_word_width: u32,
628     ) {
629         // After we apply one optimization, that might make another
630         // optimization applicable. Keep running the peephole optimizer
631         // until either:
632         //
633         // * No optimization applied, and therefore it doesn't make sense to
634         //   try again, because no optimization will apply again.
635         //
636         // * Or when we replaced an instruction with an alias to an existing
637         //   value, because we already ran the peephole optimizer over the
638         //   aliased value's instruction in an early part of the traversal
639         //   over the function.
640         while let Some(ValueOrInst::Inst(new_inst)) =
641             optimizer.apply_one(pos, ValueOrInst::Inst(inst))
642         {
643             // We transplanted a new instruction into the current
644             // instruction, so the "new" instruction is actually the same
645             // one, just with different data.
646             debug_assert_eq!(new_inst, inst);
647         }
648         debug_assert_eq!(pos.current_inst(), Some(inst));
649     }
650 }
651 
652 #[cfg(not(feature = "enable-peepmatic"))]
653 mod simplify {
654     use super::*;
655     use crate::ir::{
656         dfg::ValueDef,
657         immediates,
658         instructions::{Opcode, ValueList},
659         types::{B8, I16, I32, I8},
660     };
661     use std::marker::PhantomData;
662 
663     pub struct PeepholeOptimizer<'a, 'b> {
664         phantom: PhantomData<(&'a (), &'b ())>,
665     }
666 
peephole_optimizer<'a, 'b>(_: &dyn TargetIsa) -> PeepholeOptimizer<'a, 'b>667     pub fn peephole_optimizer<'a, 'b>(_: &dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
668         PeepholeOptimizer {
669             phantom: PhantomData,
670         }
671     }
672 
apply_all<'a, 'b>( _optimizer: &mut PeepholeOptimizer<'a, 'b>, pos: &mut FuncCursor<'a>, inst: Inst, native_word_width: u32, )673     pub fn apply_all<'a, 'b>(
674         _optimizer: &mut PeepholeOptimizer<'a, 'b>,
675         pos: &mut FuncCursor<'a>,
676         inst: Inst,
677         native_word_width: u32,
678     ) {
679         simplify(pos, inst, native_word_width);
680         branch_opt(pos, inst);
681     }
682 
683     #[inline]
resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64>684     fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64> {
685         if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) {
686             if let InstructionData::UnaryImm {
687                 opcode: Opcode::Iconst,
688                 imm,
689             } = dfg[candidate_inst]
690             {
691                 return Some(imm);
692             }
693         }
694         None
695     }
696 
697     /// Try to transform [(x << N) >> N] into a (un)signed-extending move.
698     /// Returns true if the final instruction has been converted to such a move.
try_fold_extended_move( pos: &mut FuncCursor, inst: Inst, opcode: Opcode, arg: Value, imm: immediates::Imm64, ) -> bool699     fn try_fold_extended_move(
700         pos: &mut FuncCursor,
701         inst: Inst,
702         opcode: Opcode,
703         arg: Value,
704         imm: immediates::Imm64,
705     ) -> bool {
706         if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
707             if let InstructionData::BinaryImm64 {
708                 opcode: Opcode::IshlImm,
709                 arg: prev_arg,
710                 imm: prev_imm,
711             } = &pos.func.dfg[arg_inst]
712             {
713                 if imm != *prev_imm {
714                     return false;
715                 }
716 
717                 let dest_ty = pos.func.dfg.ctrl_typevar(inst);
718                 if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
719                     return false;
720                 }
721 
722                 let imm_bits: i64 = imm.into();
723                 let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) {
724                     8 => I8,
725                     16 => I16,
726                     32 => I32,
727                     _ => return false,
728                 };
729                 let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
730 
731                 // This becomes a no-op, since ireduce_ty has a smaller lane width than
732                 // the argument type (also the destination type).
733                 let arg = *prev_arg;
734                 let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
735 
736                 if opcode == Opcode::UshrImm {
737                     pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
738                 } else {
739                     pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
740                 }
741                 return true;
742             }
743         }
744         false
745     }
746 
747     /// Apply basic simplifications.
748     ///
749     /// This folds constants with arithmetic to form `_imm` instructions, and other minor
750     /// simplifications.
751     ///
752     /// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
753     /// controlling type's width of the instruction. This would result in an illegal instruction that
754     /// would likely be expanded back into an instruction on smaller types with the same initial
755     /// opcode, creating unnecessary churn.
simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32)756     fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
757         match pos.func.dfg[inst] {
758             InstructionData::Binary { opcode, args } => {
759                 if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
760                     let new_opcode = match opcode {
761                         Opcode::Iadd => Opcode::IaddImm,
762                         Opcode::Imul => Opcode::ImulImm,
763                         Opcode::Sdiv => Opcode::SdivImm,
764                         Opcode::Udiv => Opcode::UdivImm,
765                         Opcode::Srem => Opcode::SremImm,
766                         Opcode::Urem => Opcode::UremImm,
767                         Opcode::Band => Opcode::BandImm,
768                         Opcode::Bor => Opcode::BorImm,
769                         Opcode::Bxor => Opcode::BxorImm,
770                         Opcode::Rotl => Opcode::RotlImm,
771                         Opcode::Rotr => Opcode::RotrImm,
772                         Opcode::Ishl => Opcode::IshlImm,
773                         Opcode::Ushr => Opcode::UshrImm,
774                         Opcode::Sshr => Opcode::SshrImm,
775                         Opcode::Isub => {
776                             imm = imm.wrapping_neg();
777                             Opcode::IaddImm
778                         }
779                         Opcode::Ifcmp => Opcode::IfcmpImm,
780                         _ => return,
781                     };
782                     let ty = pos.func.dfg.ctrl_typevar(inst);
783                     if ty.bytes() <= native_word_width {
784                         pos.func
785                             .dfg
786                             .replace(inst)
787                             .BinaryImm64(new_opcode, ty, imm, args[0]);
788 
789                         // Repeat for BinaryImm simplification.
790                         simplify(pos, inst, native_word_width);
791                     }
792                 } else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
793                     let new_opcode = match opcode {
794                         Opcode::Iadd => Opcode::IaddImm,
795                         Opcode::Imul => Opcode::ImulImm,
796                         Opcode::Band => Opcode::BandImm,
797                         Opcode::Bor => Opcode::BorImm,
798                         Opcode::Bxor => Opcode::BxorImm,
799                         Opcode::Isub => Opcode::IrsubImm,
800                         _ => return,
801                     };
802                     let ty = pos.func.dfg.ctrl_typevar(inst);
803                     if ty.bytes() <= native_word_width {
804                         pos.func
805                             .dfg
806                             .replace(inst)
807                             .BinaryImm64(new_opcode, ty, imm, args[1]);
808                     }
809                 }
810             }
811 
812             InstructionData::Unary { opcode, arg } => {
813                 if let Opcode::AdjustSpDown = opcode {
814                     if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) {
815                         // Note this works for both positive and negative immediate values.
816                         pos.func.dfg.replace(inst).adjust_sp_down_imm(imm);
817                     }
818                 }
819             }
820 
821             InstructionData::BinaryImm64 { opcode, arg, imm } => {
822                 let ty = pos.func.dfg.ctrl_typevar(inst);
823 
824                 let mut arg = arg;
825                 let mut imm = imm;
826                 match opcode {
827                     Opcode::IaddImm
828                     | Opcode::ImulImm
829                     | Opcode::BorImm
830                     | Opcode::BandImm
831                     | Opcode::BxorImm => {
832                         // Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
833                         if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
834                             if let InstructionData::BinaryImm64 {
835                                 opcode: prev_opcode,
836                                 arg: prev_arg,
837                                 imm: prev_imm,
838                             } = &pos.func.dfg[arg_inst]
839                             {
840                                 if opcode == *prev_opcode
841                                     && ty == pos.func.dfg.ctrl_typevar(arg_inst)
842                                 {
843                                     let lhs: i64 = imm.into();
844                                     let rhs: i64 = (*prev_imm).into();
845                                     let new_imm = match opcode {
846                                         Opcode::BorImm => lhs | rhs,
847                                         Opcode::BandImm => lhs & rhs,
848                                         Opcode::BxorImm => lhs ^ rhs,
849                                         Opcode::IaddImm => lhs.wrapping_add(rhs),
850                                         Opcode::ImulImm => lhs.wrapping_mul(rhs),
851                                         _ => panic!("can't happen"),
852                                     };
853                                     let new_imm = immediates::Imm64::from(new_imm);
854                                     let new_arg = *prev_arg;
855                                     pos.func
856                                         .dfg
857                                         .replace(inst)
858                                         .BinaryImm64(opcode, ty, new_imm, new_arg);
859                                     imm = new_imm;
860                                     arg = new_arg;
861                                 }
862                             }
863                         }
864                     }
865 
866                     Opcode::UshrImm | Opcode::SshrImm => {
867                         if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
868                             && try_fold_extended_move(pos, inst, opcode, arg, imm)
869                         {
870                             return;
871                         }
872                     }
873 
874                     _ => {}
875                 };
876 
877                 // Replace operations that are no-ops.
878                 match (opcode, imm.into()) {
879                     (Opcode::IaddImm, 0)
880                     | (Opcode::ImulImm, 1)
881                     | (Opcode::SdivImm, 1)
882                     | (Opcode::UdivImm, 1)
883                     | (Opcode::BorImm, 0)
884                     | (Opcode::BandImm, -1)
885                     | (Opcode::BxorImm, 0)
886                     | (Opcode::RotlImm, 0)
887                     | (Opcode::RotrImm, 0)
888                     | (Opcode::IshlImm, 0)
889                     | (Opcode::UshrImm, 0)
890                     | (Opcode::SshrImm, 0) => {
891                         // Alias the result value with the original argument.
892                         replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
893                     }
894                     (Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => {
895                         // Replace by zero.
896                         pos.func.dfg.replace(inst).iconst(ty, 0);
897                     }
898                     (Opcode::BorImm, -1) => {
899                         // Replace by minus one.
900                         pos.func.dfg.replace(inst).iconst(ty, -1);
901                     }
902                     _ => {}
903                 }
904             }
905 
906             InstructionData::IntCompare { opcode, cond, args } => {
907                 debug_assert_eq!(opcode, Opcode::Icmp);
908                 if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
909                     if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
910                         pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
911                     }
912                 }
913             }
914 
915             InstructionData::CondTrap { .. }
916             | InstructionData::Branch { .. }
917             | InstructionData::Ternary {
918                 opcode: Opcode::Select,
919                 ..
920             } => {
921                 // Fold away a redundant `bint`.
922                 let condition_def = {
923                     let args = pos.func.dfg.inst_args(inst);
924                     pos.func.dfg.value_def(args[0])
925                 };
926                 if let ValueDef::Result(def_inst, _) = condition_def {
927                     if let InstructionData::Unary {
928                         opcode: Opcode::Bint,
929                         arg: bool_val,
930                     } = pos.func.dfg[def_inst]
931                     {
932                         let args = pos.func.dfg.inst_args_mut(inst);
933                         args[0] = bool_val;
934                     }
935                 }
936             }
937 
938             InstructionData::Ternary {
939                 opcode: Opcode::Bitselect,
940                 args,
941             } => {
942                 let old_cond_type = pos.func.dfg.value_type(args[0]);
943                 if !old_cond_type.is_vector() {
944                     return;
945                 }
946 
947                 // Replace bitselect with vselect if each lane of controlling mask is either
948                 // all ones or all zeroes; on x86 bitselect is encoded using 3 instructions,
949                 // while vselect can be encoded using single BLEND instruction.
950                 if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
951                     let (cond_val, cond_type) = match pos.func.dfg[def_inst] {
952                         InstructionData::Unary {
953                             opcode: Opcode::RawBitcast,
954                             arg,
955                         } => {
956                             // If controlling mask is raw-bitcasted boolean vector then
957                             // we know each lane is either all zeroes or ones,
958                             // so we can use vselect instruction instead.
959                             let arg_type = pos.func.dfg.value_type(arg);
960                             if !arg_type.is_vector() || !arg_type.lane_type().is_bool() {
961                                 return;
962                             }
963                             (arg, arg_type)
964                         }
965                         InstructionData::UnaryConst {
966                             opcode: Opcode::Vconst,
967                             constant_handle,
968                         } => {
969                             // If each byte of controlling mask is 0x00 or 0xFF then
970                             // we will always bitcast our way to vselect(B8x16, I8x16, I8x16).
971                             // Bitselect operates at bit level, so the lane types don't matter.
972                             let const_data = pos.func.dfg.constants.get(constant_handle);
973                             if !const_data.iter().all(|&b| b == 0 || b == 0xFF) {
974                                 return;
975                             }
976                             let new_type = B8.by(old_cond_type.bytes() as u16).unwrap();
977                             (pos.ins().raw_bitcast(new_type, args[0]), new_type)
978                         }
979                         _ => return,
980                     };
981 
982                     let lane_type = Type::int(cond_type.lane_bits() as u16).unwrap();
983                     let arg_type = lane_type.by(cond_type.lane_count()).unwrap();
984                     let old_arg_type = pos.func.dfg.value_type(args[1]);
985 
986                     if arg_type != old_arg_type {
987                         // Operands types must match, we need to add bitcasts.
988                         let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
989                         let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
990                         let ret = pos.ins().vselect(cond_val, arg1, arg2);
991                         pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
992                     } else {
993                         pos.func
994                             .dfg
995                             .replace(inst)
996                             .vselect(cond_val, args[1], args[2]);
997                     }
998                 }
999             }
1000 
1001             _ => {}
1002         }
1003     }
1004 
1005     struct BranchOptInfo {
1006         br_inst: Inst,
1007         cmp_arg: Value,
1008         args: ValueList,
1009         new_opcode: Opcode,
1010     }
1011 
1012     /// Fold comparisons into branch operations when possible.
1013     ///
1014     /// This matches against operations which compare against zero, then use the
1015     /// result in a `brz` or `brnz` branch. It folds those two operations into a
1016     /// single `brz` or `brnz`.
branch_opt(pos: &mut FuncCursor, inst: Inst)1017     fn branch_opt(pos: &mut FuncCursor, inst: Inst) {
1018         let mut info = if let InstructionData::Branch {
1019             opcode: br_opcode,
1020             args: ref br_args,
1021             ..
1022         } = pos.func.dfg[inst]
1023         {
1024             let first_arg = {
1025                 let args = pos.func.dfg.inst_args(inst);
1026                 args[0]
1027             };
1028 
1029             let icmp_inst =
1030                 if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) {
1031                     icmp_inst
1032                 } else {
1033                     return;
1034                 };
1035 
1036             if let InstructionData::IntCompareImm {
1037                 opcode: Opcode::IcmpImm,
1038                 arg: cmp_arg,
1039                 cond: cmp_cond,
1040                 imm: cmp_imm,
1041             } = pos.func.dfg[icmp_inst]
1042             {
1043                 let cmp_imm: i64 = cmp_imm.into();
1044                 if cmp_imm != 0 {
1045                     return;
1046                 }
1047 
1048                 // icmp_imm returns non-zero when the comparison is true. So, if
1049                 // we're branching on zero, we need to invert the condition.
1050                 let cond = match br_opcode {
1051                     Opcode::Brz => cmp_cond.inverse(),
1052                     Opcode::Brnz => cmp_cond,
1053                     _ => return,
1054                 };
1055 
1056                 let new_opcode = match cond {
1057                     IntCC::Equal => Opcode::Brz,
1058                     IntCC::NotEqual => Opcode::Brnz,
1059                     _ => return,
1060                 };
1061 
1062                 BranchOptInfo {
1063                     br_inst: inst,
1064                     cmp_arg,
1065                     args: br_args.clone(),
1066                     new_opcode,
1067                 }
1068             } else {
1069                 return;
1070             }
1071         } else {
1072             return;
1073         };
1074 
1075         info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg;
1076         if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] {
1077             *opcode = info.new_opcode;
1078         } else {
1079             panic!();
1080         }
1081     }
1082 }
1083 
1084 /// The main pre-opt pass.
do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa)1085 pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) {
1086     let _tt = timing::preopt();
1087 
1088     let mut pos = FuncCursor::new(func);
1089     let native_word_width = isa.pointer_bytes() as u32;
1090     let mut optimizer = simplify::peephole_optimizer(isa);
1091 
1092     while let Some(block) = pos.next_block() {
1093         while let Some(inst) = pos.next_inst() {
1094             simplify::apply_all(&mut optimizer, &mut pos, inst, native_word_width);
1095 
1096             // Try to transform divide-by-constant into simpler operations.
1097             if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) {
1098                 do_divrem_transformation(&divrem_info, &mut pos, inst);
1099                 continue;
1100             }
1101 
1102             branch_order(&mut pos, cfg, block, inst);
1103         }
1104     }
1105 }
1106