1 //! A pre-legalization rewriting pass.
2 //!
3 //! This module provides early-stage optimizations. The optimizations found
4 //! should be useful for already well-optimized code. More general purpose
5 //! early-stage optimizations can be found in the preopt crate.
6
7 use crate::cursor::{Cursor, FuncCursor};
8 use crate::divconst_magic_numbers::{magic_s32, magic_s64, magic_u32, magic_u64};
9 use crate::divconst_magic_numbers::{MS32, MS64, MU32, MU64};
10 use crate::flowgraph::ControlFlowGraph;
11 use crate::ir::{
12 condcodes::{CondCode, IntCC},
13 instructions::Opcode,
14 types::{I32, I64},
15 Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Type, Value,
16 };
17 use crate::isa::TargetIsa;
18 use crate::timing;
19
20 #[inline]
21 /// Replaces the unique result of the instruction inst to an alias of the given value, and
22 /// replaces the instruction with a nop. Can be used only on instructions producing one unique
23 /// result, otherwise will assert.
replace_single_result_with_alias(dfg: &mut DataFlowGraph, inst: Inst, value: Value)24 fn replace_single_result_with_alias(dfg: &mut DataFlowGraph, inst: Inst, value: Value) {
25 // Replace the result value by an alias.
26 let results = dfg.detach_results(inst);
27 debug_assert!(results.len(&dfg.value_lists) == 1);
28 let result = results.get(0, &dfg.value_lists).unwrap();
29 dfg.change_to_alias(result, value);
30
31 // Replace instruction by a nop.
32 dfg.replace(inst).nop();
33 }
34
35 //----------------------------------------------------------------------
36 //
37 // Pattern-match helpers and transformation for div and rem by constants.
38
39 // Simple math helpers
40
41 /// if `x` is a power of two, or the negation thereof, return the power along
42 /// with a boolean that indicates whether `x` is negative. Else return None.
43 #[inline]
i32_is_power_of_two(x: i32) -> Option<(bool, u32)>44 fn i32_is_power_of_two(x: i32) -> Option<(bool, u32)> {
45 // We have to special-case this because abs(x) isn't representable.
46 if x == -0x8000_0000 {
47 return Some((true, 31));
48 }
49 let abs_x = i32::wrapping_abs(x) as u32;
50 if abs_x.is_power_of_two() {
51 return Some((x < 0, abs_x.trailing_zeros()));
52 }
53 None
54 }
55
56 /// Same comments as for i32_is_power_of_two apply.
57 #[inline]
i64_is_power_of_two(x: i64) -> Option<(bool, u32)>58 fn i64_is_power_of_two(x: i64) -> Option<(bool, u32)> {
59 // We have to special-case this because abs(x) isn't representable.
60 if x == -0x8000_0000_0000_0000 {
61 return Some((true, 63));
62 }
63 let abs_x = i64::wrapping_abs(x) as u64;
64 if abs_x.is_power_of_two() {
65 return Some((x < 0, abs_x.trailing_zeros()));
66 }
67 None
68 }
69
70 /// Representation of an instruction that can be replaced by a single division/remainder operation
71 /// between a left Value operand and a right immediate operand.
72 #[derive(Debug)]
73 enum DivRemByConstInfo {
74 DivU32(Value, u32),
75 DivU64(Value, u64),
76 DivS32(Value, i32),
77 DivS64(Value, i64),
78 RemU32(Value, u32),
79 RemU64(Value, u64),
80 RemS32(Value, i32),
81 RemS64(Value, i64),
82 }
83
84 /// Possibly create a DivRemByConstInfo from the given components, by figuring out which, if any,
85 /// of the 8 cases apply, and also taking care to sanity-check the immediate.
package_up_divrem_info( value: Value, value_type: Type, imm_i64: i64, is_signed: bool, is_rem: bool, ) -> Option<DivRemByConstInfo>86 fn package_up_divrem_info(
87 value: Value,
88 value_type: Type,
89 imm_i64: i64,
90 is_signed: bool,
91 is_rem: bool,
92 ) -> Option<DivRemByConstInfo> {
93 let imm_u64 = imm_i64 as u64;
94
95 match (is_signed, value_type) {
96 (false, I32) => {
97 if imm_u64 < 0x1_0000_0000 {
98 if is_rem {
99 Some(DivRemByConstInfo::RemU32(value, imm_u64 as u32))
100 } else {
101 Some(DivRemByConstInfo::DivU32(value, imm_u64 as u32))
102 }
103 } else {
104 None
105 }
106 }
107
108 (false, I64) => {
109 // unsigned 64, no range constraint.
110 if is_rem {
111 Some(DivRemByConstInfo::RemU64(value, imm_u64))
112 } else {
113 Some(DivRemByConstInfo::DivU64(value, imm_u64))
114 }
115 }
116
117 (true, I32) => {
118 if imm_u64 <= 0x7fff_ffff || imm_u64 >= 0xffff_ffff_8000_0000 {
119 if is_rem {
120 Some(DivRemByConstInfo::RemS32(value, imm_u64 as i32))
121 } else {
122 Some(DivRemByConstInfo::DivS32(value, imm_u64 as i32))
123 }
124 } else {
125 None
126 }
127 }
128
129 (true, I64) => {
130 // signed 64, no range constraint.
131 if is_rem {
132 Some(DivRemByConstInfo::RemS64(value, imm_u64 as i64))
133 } else {
134 Some(DivRemByConstInfo::DivS64(value, imm_u64 as i64))
135 }
136 }
137
138 _ => None,
139 }
140 }
141
142 /// Examine `inst` to see if it is a div or rem by a constant, and if so return the operands,
143 /// signedness, operation size and div-vs-rem-ness in a handy bundle.
get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo>144 fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
145 if let InstructionData::BinaryImm64 { opcode, arg, imm } = dfg[inst] {
146 let (is_signed, is_rem) = match opcode {
147 Opcode::UdivImm => (false, false),
148 Opcode::UremImm => (false, true),
149 Opcode::SdivImm => (true, false),
150 Opcode::SremImm => (true, true),
151 _ => return None,
152 };
153 return package_up_divrem_info(arg, dfg.value_type(arg), imm.into(), is_signed, is_rem);
154 }
155
156 None
157 }
158
159 /// Actually do the transformation given a bundle containing the relevant information.
160 /// `divrem_info` describes a div or rem by a constant, that `pos` currently points at, and `inst`
161 /// is the associated instruction. `inst` is replaced by a sequence of other operations that
162 /// calculate the same result. Note that there are various `divrem_info` cases where we cannot do
163 /// any transformation, in which case `inst` is left unchanged.
do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCursor, inst: Inst)164 fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCursor, inst: Inst) {
165 let is_rem = match *divrem_info {
166 DivRemByConstInfo::DivU32(_, _)
167 | DivRemByConstInfo::DivU64(_, _)
168 | DivRemByConstInfo::DivS32(_, _)
169 | DivRemByConstInfo::DivS64(_, _) => false,
170 DivRemByConstInfo::RemU32(_, _)
171 | DivRemByConstInfo::RemU64(_, _)
172 | DivRemByConstInfo::RemS32(_, _)
173 | DivRemByConstInfo::RemS64(_, _) => true,
174 };
175
176 match *divrem_info {
177 // -------------------- U32 --------------------
178
179 // U32 div, rem by zero: ignore
180 DivRemByConstInfo::DivU32(_n1, 0) | DivRemByConstInfo::RemU32(_n1, 0) => {}
181
182 // U32 div by 1: identity
183 // U32 rem by 1: zero
184 DivRemByConstInfo::DivU32(n1, 1) | DivRemByConstInfo::RemU32(n1, 1) => {
185 if is_rem {
186 pos.func.dfg.replace(inst).iconst(I32, 0);
187 } else {
188 replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
189 }
190 }
191
192 // U32 div, rem by a power-of-2
193 DivRemByConstInfo::DivU32(n1, d) | DivRemByConstInfo::RemU32(n1, d)
194 if d.is_power_of_two() =>
195 {
196 debug_assert!(d >= 2);
197 // compute k where d == 2^k
198 let k = d.trailing_zeros();
199 debug_assert!(k >= 1 && k <= 31);
200 if is_rem {
201 let mask = (1u64 << k) - 1;
202 pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
203 } else {
204 pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
205 }
206 }
207
208 // U32 div, rem by non-power-of-2
209 DivRemByConstInfo::DivU32(n1, d) | DivRemByConstInfo::RemU32(n1, d) => {
210 debug_assert!(d >= 3);
211 let MU32 {
212 mul_by,
213 do_add,
214 shift_by,
215 } = magic_u32(d);
216 let qf; // final quotient
217 let q0 = pos.ins().iconst(I32, mul_by as i64);
218 let q1 = pos.ins().umulhi(n1, q0);
219 if do_add {
220 debug_assert!(shift_by >= 1 && shift_by <= 32);
221 let t1 = pos.ins().isub(n1, q1);
222 let t2 = pos.ins().ushr_imm(t1, 1);
223 let t3 = pos.ins().iadd(t2, q1);
224 // I never found any case where shift_by == 1 here.
225 // So there's no attempt to fold out a zero shift.
226 debug_assert_ne!(shift_by, 1);
227 qf = pos.ins().ushr_imm(t3, (shift_by - 1) as i64);
228 } else {
229 debug_assert!(shift_by >= 0 && shift_by <= 31);
230 // Whereas there are known cases here for shift_by == 0.
231 if shift_by > 0 {
232 qf = pos.ins().ushr_imm(q1, shift_by as i64);
233 } else {
234 qf = q1;
235 }
236 }
237 // Now qf holds the final quotient. If necessary calculate the
238 // remainder instead.
239 if is_rem {
240 let tt = pos.ins().imul_imm(qf, d as i64);
241 pos.func.dfg.replace(inst).isub(n1, tt);
242 } else {
243 replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
244 }
245 }
246
247 // -------------------- U64 --------------------
248
249 // U64 div, rem by zero: ignore
250 DivRemByConstInfo::DivU64(_n1, 0) | DivRemByConstInfo::RemU64(_n1, 0) => {}
251
252 // U64 div by 1: identity
253 // U64 rem by 1: zero
254 DivRemByConstInfo::DivU64(n1, 1) | DivRemByConstInfo::RemU64(n1, 1) => {
255 if is_rem {
256 pos.func.dfg.replace(inst).iconst(I64, 0);
257 } else {
258 replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
259 }
260 }
261
262 // U64 div, rem by a power-of-2
263 DivRemByConstInfo::DivU64(n1, d) | DivRemByConstInfo::RemU64(n1, d)
264 if d.is_power_of_two() =>
265 {
266 debug_assert!(d >= 2);
267 // compute k where d == 2^k
268 let k = d.trailing_zeros();
269 debug_assert!(k >= 1 && k <= 63);
270 if is_rem {
271 let mask = (1u64 << k) - 1;
272 pos.func.dfg.replace(inst).band_imm(n1, mask as i64);
273 } else {
274 pos.func.dfg.replace(inst).ushr_imm(n1, k as i64);
275 }
276 }
277
278 // U64 div, rem by non-power-of-2
279 DivRemByConstInfo::DivU64(n1, d) | DivRemByConstInfo::RemU64(n1, d) => {
280 debug_assert!(d >= 3);
281 let MU64 {
282 mul_by,
283 do_add,
284 shift_by,
285 } = magic_u64(d);
286 let qf; // final quotient
287 let q0 = pos.ins().iconst(I64, mul_by as i64);
288 let q1 = pos.ins().umulhi(n1, q0);
289 if do_add {
290 debug_assert!(shift_by >= 1 && shift_by <= 64);
291 let t1 = pos.ins().isub(n1, q1);
292 let t2 = pos.ins().ushr_imm(t1, 1);
293 let t3 = pos.ins().iadd(t2, q1);
294 // I never found any case where shift_by == 1 here.
295 // So there's no attempt to fold out a zero shift.
296 debug_assert_ne!(shift_by, 1);
297 qf = pos.ins().ushr_imm(t3, (shift_by - 1) as i64);
298 } else {
299 debug_assert!(shift_by >= 0 && shift_by <= 63);
300 // Whereas there are known cases here for shift_by == 0.
301 if shift_by > 0 {
302 qf = pos.ins().ushr_imm(q1, shift_by as i64);
303 } else {
304 qf = q1;
305 }
306 }
307 // Now qf holds the final quotient. If necessary calculate the
308 // remainder instead.
309 if is_rem {
310 let tt = pos.ins().imul_imm(qf, d as i64);
311 pos.func.dfg.replace(inst).isub(n1, tt);
312 } else {
313 replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
314 }
315 }
316
317 // -------------------- S32 --------------------
318
319 // S32 div, rem by zero or -1: ignore
320 DivRemByConstInfo::DivS32(_n1, -1)
321 | DivRemByConstInfo::RemS32(_n1, -1)
322 | DivRemByConstInfo::DivS32(_n1, 0)
323 | DivRemByConstInfo::RemS32(_n1, 0) => {}
324
325 // S32 div by 1: identity
326 // S32 rem by 1: zero
327 DivRemByConstInfo::DivS32(n1, 1) | DivRemByConstInfo::RemS32(n1, 1) => {
328 if is_rem {
329 pos.func.dfg.replace(inst).iconst(I32, 0);
330 } else {
331 replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
332 }
333 }
334
335 DivRemByConstInfo::DivS32(n1, d) | DivRemByConstInfo::RemS32(n1, d) => {
336 if let Some((is_negative, k)) = i32_is_power_of_two(d) {
337 // k can be 31 only in the case that d is -2^31.
338 debug_assert!(k >= 1 && k <= 31);
339 let t1 = if k - 1 == 0 {
340 n1
341 } else {
342 pos.ins().sshr_imm(n1, (k - 1) as i64)
343 };
344 let t2 = pos.ins().ushr_imm(t1, (32 - k) as i64);
345 let t3 = pos.ins().iadd(n1, t2);
346 if is_rem {
347 // S32 rem by a power-of-2
348 let t4 = pos.ins().band_imm(t3, i32::wrapping_neg(1 << k) as i64);
349 // Curiously, we don't care here what the sign of d is.
350 pos.func.dfg.replace(inst).isub(n1, t4);
351 } else {
352 // S32 div by a power-of-2
353 let t4 = pos.ins().sshr_imm(t3, k as i64);
354 if is_negative {
355 pos.func.dfg.replace(inst).irsub_imm(t4, 0);
356 } else {
357 replace_single_result_with_alias(&mut pos.func.dfg, inst, t4);
358 }
359 }
360 } else {
361 // S32 div, rem by a non-power-of-2
362 debug_assert!(d < -2 || d > 2);
363 let MS32 { mul_by, shift_by } = magic_s32(d);
364 let q0 = pos.ins().iconst(I32, mul_by as i64);
365 let q1 = pos.ins().smulhi(n1, q0);
366 let q2 = if d > 0 && mul_by < 0 {
367 pos.ins().iadd(q1, n1)
368 } else if d < 0 && mul_by > 0 {
369 pos.ins().isub(q1, n1)
370 } else {
371 q1
372 };
373 debug_assert!(shift_by >= 0 && shift_by <= 31);
374 let q3 = if shift_by == 0 {
375 q2
376 } else {
377 pos.ins().sshr_imm(q2, shift_by as i64)
378 };
379 let t1 = pos.ins().ushr_imm(q3, 31);
380 let qf = pos.ins().iadd(q3, t1);
381 // Now qf holds the final quotient. If necessary calculate
382 // the remainder instead.
383 if is_rem {
384 let tt = pos.ins().imul_imm(qf, d as i64);
385 pos.func.dfg.replace(inst).isub(n1, tt);
386 } else {
387 replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
388 }
389 }
390 }
391
392 // -------------------- S64 --------------------
393
394 // S64 div, rem by zero or -1: ignore
395 DivRemByConstInfo::DivS64(_n1, -1)
396 | DivRemByConstInfo::RemS64(_n1, -1)
397 | DivRemByConstInfo::DivS64(_n1, 0)
398 | DivRemByConstInfo::RemS64(_n1, 0) => {}
399
400 // S64 div by 1: identity
401 // S64 rem by 1: zero
402 DivRemByConstInfo::DivS64(n1, 1) | DivRemByConstInfo::RemS64(n1, 1) => {
403 if is_rem {
404 pos.func.dfg.replace(inst).iconst(I64, 0);
405 } else {
406 replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
407 }
408 }
409
410 DivRemByConstInfo::DivS64(n1, d) | DivRemByConstInfo::RemS64(n1, d) => {
411 if let Some((is_negative, k)) = i64_is_power_of_two(d) {
412 // k can be 63 only in the case that d is -2^63.
413 debug_assert!(k >= 1 && k <= 63);
414 let t1 = if k - 1 == 0 {
415 n1
416 } else {
417 pos.ins().sshr_imm(n1, (k - 1) as i64)
418 };
419 let t2 = pos.ins().ushr_imm(t1, (64 - k) as i64);
420 let t3 = pos.ins().iadd(n1, t2);
421 if is_rem {
422 // S64 rem by a power-of-2
423 let t4 = pos.ins().band_imm(t3, i64::wrapping_neg(1 << k));
424 // Curiously, we don't care here what the sign of d is.
425 pos.func.dfg.replace(inst).isub(n1, t4);
426 } else {
427 // S64 div by a power-of-2
428 let t4 = pos.ins().sshr_imm(t3, k as i64);
429 if is_negative {
430 pos.func.dfg.replace(inst).irsub_imm(t4, 0);
431 } else {
432 replace_single_result_with_alias(&mut pos.func.dfg, inst, t4);
433 }
434 }
435 } else {
436 // S64 div, rem by a non-power-of-2
437 debug_assert!(d < -2 || d > 2);
438 let MS64 { mul_by, shift_by } = magic_s64(d);
439 let q0 = pos.ins().iconst(I64, mul_by);
440 let q1 = pos.ins().smulhi(n1, q0);
441 let q2 = if d > 0 && mul_by < 0 {
442 pos.ins().iadd(q1, n1)
443 } else if d < 0 && mul_by > 0 {
444 pos.ins().isub(q1, n1)
445 } else {
446 q1
447 };
448 debug_assert!(shift_by >= 0 && shift_by <= 63);
449 let q3 = if shift_by == 0 {
450 q2
451 } else {
452 pos.ins().sshr_imm(q2, shift_by as i64)
453 };
454 let t1 = pos.ins().ushr_imm(q3, 63);
455 let qf = pos.ins().iadd(q3, t1);
456 // Now qf holds the final quotient. If necessary calculate
457 // the remainder instead.
458 if is_rem {
459 let tt = pos.ins().imul_imm(qf, d);
460 pos.func.dfg.replace(inst).isub(n1, tt);
461 } else {
462 replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
463 }
464 }
465 }
466 }
467 }
468
469 enum BranchOrderKind {
470 BrzToBrnz(Value),
471 BrnzToBrz(Value),
472 InvertIcmpCond(IntCC, Value, Value),
473 }
474
475 /// Reorder branches to encourage fallthroughs.
476 ///
477 /// When a block ends with a conditional branch followed by an unconditional
478 /// branch, this will reorder them if one of them is branching to the next Block
479 /// layout-wise. The unconditional jump can then become a fallthrough.
branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, block: Block, inst: Inst)480 fn branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, block: Block, inst: Inst) {
481 let (term_inst, term_inst_args, term_dest, cond_inst, cond_inst_args, cond_dest, kind) =
482 match pos.func.dfg[inst] {
483 InstructionData::Jump {
484 opcode: Opcode::Jump,
485 destination,
486 ref args,
487 } => {
488 let next_block = if let Some(next_block) = pos.func.layout.next_block(block) {
489 next_block
490 } else {
491 return;
492 };
493
494 if destination == next_block {
495 return;
496 }
497
498 let prev_inst = if let Some(prev_inst) = pos.func.layout.prev_inst(inst) {
499 prev_inst
500 } else {
501 return;
502 };
503
504 let prev_inst_data = &pos.func.dfg[prev_inst];
505
506 if let Some(prev_dest) = prev_inst_data.branch_destination() {
507 if prev_dest != next_block {
508 return;
509 }
510 } else {
511 return;
512 }
513
514 match prev_inst_data {
515 InstructionData::Branch {
516 opcode,
517 args: ref prev_args,
518 destination: cond_dest,
519 } => {
520 let cond_arg = {
521 let args = pos.func.dfg.inst_args(prev_inst);
522 args[0]
523 };
524
525 let kind = match opcode {
526 Opcode::Brz => BranchOrderKind::BrzToBrnz(cond_arg),
527 Opcode::Brnz => BranchOrderKind::BrnzToBrz(cond_arg),
528 _ => panic!("unexpected opcode"),
529 };
530
531 (
532 inst,
533 args.clone(),
534 destination,
535 prev_inst,
536 prev_args.clone(),
537 *cond_dest,
538 kind,
539 )
540 }
541 InstructionData::BranchIcmp {
542 opcode: Opcode::BrIcmp,
543 cond,
544 destination: cond_dest,
545 args: ref prev_args,
546 } => {
547 let (x_arg, y_arg) = {
548 let args = pos.func.dfg.inst_args(prev_inst);
549 (args[0], args[1])
550 };
551
552 (
553 inst,
554 args.clone(),
555 destination,
556 prev_inst,
557 prev_args.clone(),
558 *cond_dest,
559 BranchOrderKind::InvertIcmpCond(*cond, x_arg, y_arg),
560 )
561 }
562 _ => return,
563 }
564 }
565
566 _ => return,
567 };
568
569 let cond_args = cond_inst_args.as_slice(&pos.func.dfg.value_lists).to_vec();
570 let term_args = term_inst_args.as_slice(&pos.func.dfg.value_lists).to_vec();
571
572 match kind {
573 BranchOrderKind::BrnzToBrz(cond_arg) => {
574 pos.func
575 .dfg
576 .replace(term_inst)
577 .jump(cond_dest, &cond_args[1..]);
578 pos.func
579 .dfg
580 .replace(cond_inst)
581 .brz(cond_arg, term_dest, &term_args);
582 }
583 BranchOrderKind::BrzToBrnz(cond_arg) => {
584 pos.func
585 .dfg
586 .replace(term_inst)
587 .jump(cond_dest, &cond_args[1..]);
588 pos.func
589 .dfg
590 .replace(cond_inst)
591 .brnz(cond_arg, term_dest, &term_args);
592 }
593 BranchOrderKind::InvertIcmpCond(cond, x_arg, y_arg) => {
594 pos.func
595 .dfg
596 .replace(term_inst)
597 .jump(cond_dest, &cond_args[2..]);
598 pos.func.dfg.replace(cond_inst).br_icmp(
599 cond.inverse(),
600 x_arg,
601 y_arg,
602 term_dest,
603 &term_args,
604 );
605 }
606 }
607
608 cfg.recompute_block(pos.func, block);
609 }
610
611 #[cfg(feature = "enable-peepmatic")]
612 mod simplify {
613 use super::*;
614 use crate::peepmatic::ValueOrInst;
615
616 pub type PeepholeOptimizer<'a, 'b> =
617 peepmatic_runtime::optimizer::PeepholeOptimizer<'static, 'a, &'b dyn TargetIsa>;
618
peephole_optimizer<'a, 'b>(isa: &'b dyn TargetIsa) -> PeepholeOptimizer<'a, 'b>619 pub fn peephole_optimizer<'a, 'b>(isa: &'b dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
620 crate::peepmatic::preopt(isa)
621 }
622
apply_all<'a, 'b>( optimizer: &mut PeepholeOptimizer<'a, 'b>, pos: &mut FuncCursor<'a>, inst: Inst, _native_word_width: u32, )623 pub fn apply_all<'a, 'b>(
624 optimizer: &mut PeepholeOptimizer<'a, 'b>,
625 pos: &mut FuncCursor<'a>,
626 inst: Inst,
627 _native_word_width: u32,
628 ) {
629 // After we apply one optimization, that might make another
630 // optimization applicable. Keep running the peephole optimizer
631 // until either:
632 //
633 // * No optimization applied, and therefore it doesn't make sense to
634 // try again, because no optimization will apply again.
635 //
636 // * Or when we replaced an instruction with an alias to an existing
637 // value, because we already ran the peephole optimizer over the
638 // aliased value's instruction in an early part of the traversal
639 // over the function.
640 while let Some(ValueOrInst::Inst(new_inst)) =
641 optimizer.apply_one(pos, ValueOrInst::Inst(inst))
642 {
643 // We transplanted a new instruction into the current
644 // instruction, so the "new" instruction is actually the same
645 // one, just with different data.
646 debug_assert_eq!(new_inst, inst);
647 }
648 debug_assert_eq!(pos.current_inst(), Some(inst));
649 }
650 }
651
652 #[cfg(not(feature = "enable-peepmatic"))]
653 mod simplify {
654 use super::*;
655 use crate::ir::{
656 dfg::ValueDef,
657 immediates,
658 instructions::{Opcode, ValueList},
659 types::{B8, I16, I32, I8},
660 };
661 use std::marker::PhantomData;
662
663 pub struct PeepholeOptimizer<'a, 'b> {
664 phantom: PhantomData<(&'a (), &'b ())>,
665 }
666
peephole_optimizer<'a, 'b>(_: &dyn TargetIsa) -> PeepholeOptimizer<'a, 'b>667 pub fn peephole_optimizer<'a, 'b>(_: &dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> {
668 PeepholeOptimizer {
669 phantom: PhantomData,
670 }
671 }
672
apply_all<'a, 'b>( _optimizer: &mut PeepholeOptimizer<'a, 'b>, pos: &mut FuncCursor<'a>, inst: Inst, native_word_width: u32, )673 pub fn apply_all<'a, 'b>(
674 _optimizer: &mut PeepholeOptimizer<'a, 'b>,
675 pos: &mut FuncCursor<'a>,
676 inst: Inst,
677 native_word_width: u32,
678 ) {
679 simplify(pos, inst, native_word_width);
680 branch_opt(pos, inst);
681 }
682
683 #[inline]
resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64>684 fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64> {
685 if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) {
686 if let InstructionData::UnaryImm {
687 opcode: Opcode::Iconst,
688 imm,
689 } = dfg[candidate_inst]
690 {
691 return Some(imm);
692 }
693 }
694 None
695 }
696
697 /// Try to transform [(x << N) >> N] into a (un)signed-extending move.
698 /// Returns true if the final instruction has been converted to such a move.
try_fold_extended_move( pos: &mut FuncCursor, inst: Inst, opcode: Opcode, arg: Value, imm: immediates::Imm64, ) -> bool699 fn try_fold_extended_move(
700 pos: &mut FuncCursor,
701 inst: Inst,
702 opcode: Opcode,
703 arg: Value,
704 imm: immediates::Imm64,
705 ) -> bool {
706 if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
707 if let InstructionData::BinaryImm64 {
708 opcode: Opcode::IshlImm,
709 arg: prev_arg,
710 imm: prev_imm,
711 } = &pos.func.dfg[arg_inst]
712 {
713 if imm != *prev_imm {
714 return false;
715 }
716
717 let dest_ty = pos.func.dfg.ctrl_typevar(inst);
718 if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
719 return false;
720 }
721
722 let imm_bits: i64 = imm.into();
723 let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) {
724 8 => I8,
725 16 => I16,
726 32 => I32,
727 _ => return false,
728 };
729 let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
730
731 // This becomes a no-op, since ireduce_ty has a smaller lane width than
732 // the argument type (also the destination type).
733 let arg = *prev_arg;
734 let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
735
736 if opcode == Opcode::UshrImm {
737 pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
738 } else {
739 pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
740 }
741 return true;
742 }
743 }
744 false
745 }
746
747 /// Apply basic simplifications.
748 ///
749 /// This folds constants with arithmetic to form `_imm` instructions, and other minor
750 /// simplifications.
751 ///
752 /// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
753 /// controlling type's width of the instruction. This would result in an illegal instruction that
754 /// would likely be expanded back into an instruction on smaller types with the same initial
755 /// opcode, creating unnecessary churn.
simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32)756 fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
757 match pos.func.dfg[inst] {
758 InstructionData::Binary { opcode, args } => {
759 if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
760 let new_opcode = match opcode {
761 Opcode::Iadd => Opcode::IaddImm,
762 Opcode::Imul => Opcode::ImulImm,
763 Opcode::Sdiv => Opcode::SdivImm,
764 Opcode::Udiv => Opcode::UdivImm,
765 Opcode::Srem => Opcode::SremImm,
766 Opcode::Urem => Opcode::UremImm,
767 Opcode::Band => Opcode::BandImm,
768 Opcode::Bor => Opcode::BorImm,
769 Opcode::Bxor => Opcode::BxorImm,
770 Opcode::Rotl => Opcode::RotlImm,
771 Opcode::Rotr => Opcode::RotrImm,
772 Opcode::Ishl => Opcode::IshlImm,
773 Opcode::Ushr => Opcode::UshrImm,
774 Opcode::Sshr => Opcode::SshrImm,
775 Opcode::Isub => {
776 imm = imm.wrapping_neg();
777 Opcode::IaddImm
778 }
779 Opcode::Ifcmp => Opcode::IfcmpImm,
780 _ => return,
781 };
782 let ty = pos.func.dfg.ctrl_typevar(inst);
783 if ty.bytes() <= native_word_width {
784 pos.func
785 .dfg
786 .replace(inst)
787 .BinaryImm64(new_opcode, ty, imm, args[0]);
788
789 // Repeat for BinaryImm simplification.
790 simplify(pos, inst, native_word_width);
791 }
792 } else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
793 let new_opcode = match opcode {
794 Opcode::Iadd => Opcode::IaddImm,
795 Opcode::Imul => Opcode::ImulImm,
796 Opcode::Band => Opcode::BandImm,
797 Opcode::Bor => Opcode::BorImm,
798 Opcode::Bxor => Opcode::BxorImm,
799 Opcode::Isub => Opcode::IrsubImm,
800 _ => return,
801 };
802 let ty = pos.func.dfg.ctrl_typevar(inst);
803 if ty.bytes() <= native_word_width {
804 pos.func
805 .dfg
806 .replace(inst)
807 .BinaryImm64(new_opcode, ty, imm, args[1]);
808 }
809 }
810 }
811
812 InstructionData::Unary { opcode, arg } => {
813 if let Opcode::AdjustSpDown = opcode {
814 if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) {
815 // Note this works for both positive and negative immediate values.
816 pos.func.dfg.replace(inst).adjust_sp_down_imm(imm);
817 }
818 }
819 }
820
821 InstructionData::BinaryImm64 { opcode, arg, imm } => {
822 let ty = pos.func.dfg.ctrl_typevar(inst);
823
824 let mut arg = arg;
825 let mut imm = imm;
826 match opcode {
827 Opcode::IaddImm
828 | Opcode::ImulImm
829 | Opcode::BorImm
830 | Opcode::BandImm
831 | Opcode::BxorImm => {
832 // Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
833 if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
834 if let InstructionData::BinaryImm64 {
835 opcode: prev_opcode,
836 arg: prev_arg,
837 imm: prev_imm,
838 } = &pos.func.dfg[arg_inst]
839 {
840 if opcode == *prev_opcode
841 && ty == pos.func.dfg.ctrl_typevar(arg_inst)
842 {
843 let lhs: i64 = imm.into();
844 let rhs: i64 = (*prev_imm).into();
845 let new_imm = match opcode {
846 Opcode::BorImm => lhs | rhs,
847 Opcode::BandImm => lhs & rhs,
848 Opcode::BxorImm => lhs ^ rhs,
849 Opcode::IaddImm => lhs.wrapping_add(rhs),
850 Opcode::ImulImm => lhs.wrapping_mul(rhs),
851 _ => panic!("can't happen"),
852 };
853 let new_imm = immediates::Imm64::from(new_imm);
854 let new_arg = *prev_arg;
855 pos.func
856 .dfg
857 .replace(inst)
858 .BinaryImm64(opcode, ty, new_imm, new_arg);
859 imm = new_imm;
860 arg = new_arg;
861 }
862 }
863 }
864 }
865
866 Opcode::UshrImm | Opcode::SshrImm => {
867 if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
868 && try_fold_extended_move(pos, inst, opcode, arg, imm)
869 {
870 return;
871 }
872 }
873
874 _ => {}
875 };
876
877 // Replace operations that are no-ops.
878 match (opcode, imm.into()) {
879 (Opcode::IaddImm, 0)
880 | (Opcode::ImulImm, 1)
881 | (Opcode::SdivImm, 1)
882 | (Opcode::UdivImm, 1)
883 | (Opcode::BorImm, 0)
884 | (Opcode::BandImm, -1)
885 | (Opcode::BxorImm, 0)
886 | (Opcode::RotlImm, 0)
887 | (Opcode::RotrImm, 0)
888 | (Opcode::IshlImm, 0)
889 | (Opcode::UshrImm, 0)
890 | (Opcode::SshrImm, 0) => {
891 // Alias the result value with the original argument.
892 replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
893 }
894 (Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => {
895 // Replace by zero.
896 pos.func.dfg.replace(inst).iconst(ty, 0);
897 }
898 (Opcode::BorImm, -1) => {
899 // Replace by minus one.
900 pos.func.dfg.replace(inst).iconst(ty, -1);
901 }
902 _ => {}
903 }
904 }
905
906 InstructionData::IntCompare { opcode, cond, args } => {
907 debug_assert_eq!(opcode, Opcode::Icmp);
908 if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
909 if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
910 pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
911 }
912 }
913 }
914
915 InstructionData::CondTrap { .. }
916 | InstructionData::Branch { .. }
917 | InstructionData::Ternary {
918 opcode: Opcode::Select,
919 ..
920 } => {
921 // Fold away a redundant `bint`.
922 let condition_def = {
923 let args = pos.func.dfg.inst_args(inst);
924 pos.func.dfg.value_def(args[0])
925 };
926 if let ValueDef::Result(def_inst, _) = condition_def {
927 if let InstructionData::Unary {
928 opcode: Opcode::Bint,
929 arg: bool_val,
930 } = pos.func.dfg[def_inst]
931 {
932 let args = pos.func.dfg.inst_args_mut(inst);
933 args[0] = bool_val;
934 }
935 }
936 }
937
938 InstructionData::Ternary {
939 opcode: Opcode::Bitselect,
940 args,
941 } => {
942 let old_cond_type = pos.func.dfg.value_type(args[0]);
943 if !old_cond_type.is_vector() {
944 return;
945 }
946
947 // Replace bitselect with vselect if each lane of controlling mask is either
948 // all ones or all zeroes; on x86 bitselect is encoded using 3 instructions,
949 // while vselect can be encoded using single BLEND instruction.
950 if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
951 let (cond_val, cond_type) = match pos.func.dfg[def_inst] {
952 InstructionData::Unary {
953 opcode: Opcode::RawBitcast,
954 arg,
955 } => {
956 // If controlling mask is raw-bitcasted boolean vector then
957 // we know each lane is either all zeroes or ones,
958 // so we can use vselect instruction instead.
959 let arg_type = pos.func.dfg.value_type(arg);
960 if !arg_type.is_vector() || !arg_type.lane_type().is_bool() {
961 return;
962 }
963 (arg, arg_type)
964 }
965 InstructionData::UnaryConst {
966 opcode: Opcode::Vconst,
967 constant_handle,
968 } => {
969 // If each byte of controlling mask is 0x00 or 0xFF then
970 // we will always bitcast our way to vselect(B8x16, I8x16, I8x16).
971 // Bitselect operates at bit level, so the lane types don't matter.
972 let const_data = pos.func.dfg.constants.get(constant_handle);
973 if !const_data.iter().all(|&b| b == 0 || b == 0xFF) {
974 return;
975 }
976 let new_type = B8.by(old_cond_type.bytes() as u16).unwrap();
977 (pos.ins().raw_bitcast(new_type, args[0]), new_type)
978 }
979 _ => return,
980 };
981
982 let lane_type = Type::int(cond_type.lane_bits() as u16).unwrap();
983 let arg_type = lane_type.by(cond_type.lane_count()).unwrap();
984 let old_arg_type = pos.func.dfg.value_type(args[1]);
985
986 if arg_type != old_arg_type {
987 // Operands types must match, we need to add bitcasts.
988 let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
989 let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
990 let ret = pos.ins().vselect(cond_val, arg1, arg2);
991 pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
992 } else {
993 pos.func
994 .dfg
995 .replace(inst)
996 .vselect(cond_val, args[1], args[2]);
997 }
998 }
999 }
1000
1001 _ => {}
1002 }
1003 }
1004
1005 struct BranchOptInfo {
1006 br_inst: Inst,
1007 cmp_arg: Value,
1008 args: ValueList,
1009 new_opcode: Opcode,
1010 }
1011
1012 /// Fold comparisons into branch operations when possible.
1013 ///
1014 /// This matches against operations which compare against zero, then use the
1015 /// result in a `brz` or `brnz` branch. It folds those two operations into a
1016 /// single `brz` or `brnz`.
branch_opt(pos: &mut FuncCursor, inst: Inst)1017 fn branch_opt(pos: &mut FuncCursor, inst: Inst) {
1018 let mut info = if let InstructionData::Branch {
1019 opcode: br_opcode,
1020 args: ref br_args,
1021 ..
1022 } = pos.func.dfg[inst]
1023 {
1024 let first_arg = {
1025 let args = pos.func.dfg.inst_args(inst);
1026 args[0]
1027 };
1028
1029 let icmp_inst =
1030 if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) {
1031 icmp_inst
1032 } else {
1033 return;
1034 };
1035
1036 if let InstructionData::IntCompareImm {
1037 opcode: Opcode::IcmpImm,
1038 arg: cmp_arg,
1039 cond: cmp_cond,
1040 imm: cmp_imm,
1041 } = pos.func.dfg[icmp_inst]
1042 {
1043 let cmp_imm: i64 = cmp_imm.into();
1044 if cmp_imm != 0 {
1045 return;
1046 }
1047
1048 // icmp_imm returns non-zero when the comparison is true. So, if
1049 // we're branching on zero, we need to invert the condition.
1050 let cond = match br_opcode {
1051 Opcode::Brz => cmp_cond.inverse(),
1052 Opcode::Brnz => cmp_cond,
1053 _ => return,
1054 };
1055
1056 let new_opcode = match cond {
1057 IntCC::Equal => Opcode::Brz,
1058 IntCC::NotEqual => Opcode::Brnz,
1059 _ => return,
1060 };
1061
1062 BranchOptInfo {
1063 br_inst: inst,
1064 cmp_arg,
1065 args: br_args.clone(),
1066 new_opcode,
1067 }
1068 } else {
1069 return;
1070 }
1071 } else {
1072 return;
1073 };
1074
1075 info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg;
1076 if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] {
1077 *opcode = info.new_opcode;
1078 } else {
1079 panic!();
1080 }
1081 }
1082 }
1083
1084 /// The main pre-opt pass.
do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa)1085 pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) {
1086 let _tt = timing::preopt();
1087
1088 let mut pos = FuncCursor::new(func);
1089 let native_word_width = isa.pointer_bytes() as u32;
1090 let mut optimizer = simplify::peephole_optimizer(isa);
1091
1092 while let Some(block) = pos.next_block() {
1093 while let Some(inst) = pos.next_inst() {
1094 simplify::apply_all(&mut optimizer, &mut pos, inst, native_word_width);
1095
1096 // Try to transform divide-by-constant into simpler operations.
1097 if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) {
1098 do_divrem_transformation(&divrem_info, &mut pos, inst);
1099 continue;
1100 }
1101
1102 branch_order(&mut pos, cfg, block, inst);
1103 }
1104 }
1105 }
1106