1 //! A post-legalization rewriting pass.
2
3 #![allow(non_snake_case)]
4
5 use crate::cursor::{Cursor, EncCursor};
6 use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
7 use crate::ir::dfg::ValueDef;
8 use crate::ir::immediates::{Imm64, Offset32};
9 use crate::ir::instructions::{Opcode, ValueList};
10 use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, MemFlags, Type, Value};
11 use crate::isa::TargetIsa;
12 use crate::timing;
13
14 /// Information collected about a compare+branch sequence.
15 struct CmpBrInfo {
16 /// The branch instruction.
17 br_inst: Inst,
18 /// The icmp, icmp_imm, or fcmp instruction.
19 cmp_inst: Inst,
20 /// The destination of the branch.
21 destination: Block,
22 /// The arguments of the branch.
23 args: ValueList,
24 /// The first argument to the comparison. The second is in the `kind` field.
25 cmp_arg: Value,
26 /// If the branch is `brz` rather than `brnz`, we need to invert the condition
27 /// before the branch.
28 invert_branch_cond: bool,
29 /// The kind of comparison, and the second argument.
30 kind: CmpBrKind,
31 }
32
33 enum CmpBrKind {
34 Icmp { cond: IntCC, arg: Value },
35 IcmpImm { cond: IntCC, imm: Imm64 },
36 Fcmp { cond: FloatCC, arg: Value },
37 }
38
39 /// Optimize comparisons to use flags values, to avoid materializing conditions
40 /// in integer registers.
41 ///
42 /// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff
43 /// sequences.
optimize_cpu_flags( pos: &mut EncCursor, inst: Inst, last_flags_clobber: Option<Inst>, isa: &dyn TargetIsa, )44 fn optimize_cpu_flags(
45 pos: &mut EncCursor,
46 inst: Inst,
47 last_flags_clobber: Option<Inst>,
48 isa: &dyn TargetIsa,
49 ) {
50 // Look for compare and branch patterns.
51 // This code could be considerably simplified with non-lexical lifetimes.
52 let info = match pos.func.dfg[inst] {
53 InstructionData::Branch {
54 opcode,
55 destination,
56 ref args,
57 } => {
58 let first_arg = args.first(&pos.func.dfg.value_lists).unwrap();
59 let invert_branch_cond = match opcode {
60 Opcode::Brz => true,
61 Opcode::Brnz => false,
62 _ => panic!(),
63 };
64 if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) {
65 match pos.func.dfg[cond_inst] {
66 InstructionData::IntCompare {
67 cond,
68 args: cmp_args,
69 ..
70 } => CmpBrInfo {
71 br_inst: inst,
72 cmp_inst: cond_inst,
73 destination,
74 args: args.clone(),
75 cmp_arg: cmp_args[0],
76 invert_branch_cond,
77 kind: CmpBrKind::Icmp {
78 cond,
79 arg: cmp_args[1],
80 },
81 },
82 InstructionData::IntCompareImm {
83 cond,
84 arg: cmp_arg,
85 imm: cmp_imm,
86 ..
87 } => CmpBrInfo {
88 br_inst: inst,
89 cmp_inst: cond_inst,
90 destination,
91 args: args.clone(),
92 cmp_arg,
93 invert_branch_cond,
94 kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm },
95 },
96 InstructionData::FloatCompare {
97 cond,
98 args: cmp_args,
99 ..
100 } => CmpBrInfo {
101 br_inst: inst,
102 cmp_inst: cond_inst,
103 destination,
104 args: args.clone(),
105 cmp_arg: cmp_args[0],
106 invert_branch_cond,
107 kind: CmpBrKind::Fcmp {
108 cond,
109 arg: cmp_args[1],
110 },
111 },
112 _ => return,
113 }
114 } else {
115 return;
116 }
117 }
118 // TODO: trapif, trueif, selectif, and their ff counterparts.
119 _ => return,
120 };
121
122 // If any instructions clobber the flags between the comparison and the branch,
123 // don't optimize them.
124 if last_flags_clobber != Some(info.cmp_inst) {
125 return;
126 }
127
128 // We found a compare+branch pattern. Transform it to use flags.
129 let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec();
130 pos.goto_inst(info.cmp_inst);
131 pos.use_srcloc(info.cmp_inst);
132 match info.kind {
133 CmpBrKind::Icmp { mut cond, arg } => {
134 let flags = pos.ins().ifcmp(info.cmp_arg, arg);
135 pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
136 if info.invert_branch_cond {
137 cond = cond.inverse();
138 }
139 pos.func
140 .dfg
141 .replace(info.br_inst)
142 .brif(cond, flags, info.destination, &args);
143 }
144 CmpBrKind::IcmpImm { mut cond, imm } => {
145 let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm);
146 pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
147 if info.invert_branch_cond {
148 cond = cond.inverse();
149 }
150 pos.func
151 .dfg
152 .replace(info.br_inst)
153 .brif(cond, flags, info.destination, &args);
154 }
155 CmpBrKind::Fcmp { mut cond, arg } => {
156 let flags = pos.ins().ffcmp(info.cmp_arg, arg);
157 pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags);
158 if info.invert_branch_cond {
159 cond = cond.inverse();
160 }
161 pos.func
162 .dfg
163 .replace(info.br_inst)
164 .brff(cond, flags, info.destination, &args);
165 }
166 }
167 let ok = pos.func.update_encoding(info.cmp_inst, isa).is_ok();
168 debug_assert!(ok);
169 let ok = pos.func.update_encoding(info.br_inst, isa).is_ok();
170 debug_assert!(ok);
171 }
172
173 struct MemOpInfo {
174 opcode: Opcode,
175 itype: Type,
176 arg: Value,
177 st_arg: Option<Value>,
178 flags: MemFlags,
179 offset: Offset32,
180 }
181
optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetIsa)182 fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetIsa) {
183 // Look for simple loads and stores we can optimize.
184 let info = match pos.func.dfg[inst] {
185 InstructionData::Load {
186 opcode,
187 arg,
188 flags,
189 offset,
190 } => MemOpInfo {
191 opcode,
192 itype: pos.func.dfg.ctrl_typevar(inst),
193 arg,
194 st_arg: None,
195 flags,
196 offset,
197 },
198 InstructionData::Store {
199 opcode,
200 args,
201 flags,
202 offset,
203 } => MemOpInfo {
204 opcode,
205 itype: pos.func.dfg.ctrl_typevar(inst),
206 arg: args[1],
207 st_arg: Some(args[0]),
208 flags,
209 offset,
210 },
211 _ => return,
212 };
213
214 // Examine the instruction that defines the address operand.
215 if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) {
216 match pos.func.dfg[result_inst] {
217 InstructionData::Binary {
218 opcode: Opcode::Iadd,
219 args,
220 } => match info.opcode {
221 // Operand is an iadd. Fold it into a memory address with a complex address mode.
222 Opcode::Load => {
223 pos.func.dfg.replace(inst).load_complex(
224 info.itype,
225 info.flags,
226 &args,
227 info.offset,
228 );
229 }
230 Opcode::Uload8 => {
231 pos.func.dfg.replace(inst).uload8_complex(
232 info.itype,
233 info.flags,
234 &args,
235 info.offset,
236 );
237 }
238 Opcode::Sload8 => {
239 pos.func.dfg.replace(inst).sload8_complex(
240 info.itype,
241 info.flags,
242 &args,
243 info.offset,
244 );
245 }
246 Opcode::Uload16 => {
247 pos.func.dfg.replace(inst).uload16_complex(
248 info.itype,
249 info.flags,
250 &args,
251 info.offset,
252 );
253 }
254 Opcode::Sload16 => {
255 pos.func.dfg.replace(inst).sload16_complex(
256 info.itype,
257 info.flags,
258 &args,
259 info.offset,
260 );
261 }
262 Opcode::Uload32 => {
263 pos.func
264 .dfg
265 .replace(inst)
266 .uload32_complex(info.flags, &args, info.offset);
267 }
268 Opcode::Sload32 => {
269 pos.func
270 .dfg
271 .replace(inst)
272 .sload32_complex(info.flags, &args, info.offset);
273 }
274 Opcode::Uload8x8 => {
275 pos.func
276 .dfg
277 .replace(inst)
278 .uload8x8_complex(info.flags, &args, info.offset);
279 }
280 Opcode::Sload8x8 => {
281 pos.func
282 .dfg
283 .replace(inst)
284 .sload8x8_complex(info.flags, &args, info.offset);
285 }
286 Opcode::Uload16x4 => {
287 pos.func
288 .dfg
289 .replace(inst)
290 .uload16x4_complex(info.flags, &args, info.offset);
291 }
292 Opcode::Sload16x4 => {
293 pos.func
294 .dfg
295 .replace(inst)
296 .sload16x4_complex(info.flags, &args, info.offset);
297 }
298 Opcode::Uload32x2 => {
299 pos.func
300 .dfg
301 .replace(inst)
302 .uload32x2_complex(info.flags, &args, info.offset);
303 }
304 Opcode::Sload32x2 => {
305 pos.func
306 .dfg
307 .replace(inst)
308 .sload32x2_complex(info.flags, &args, info.offset);
309 }
310 Opcode::Store => {
311 pos.func.dfg.replace(inst).store_complex(
312 info.flags,
313 info.st_arg.unwrap(),
314 &args,
315 info.offset,
316 );
317 }
318 Opcode::Istore8 => {
319 pos.func.dfg.replace(inst).istore8_complex(
320 info.flags,
321 info.st_arg.unwrap(),
322 &args,
323 info.offset,
324 );
325 }
326 Opcode::Istore16 => {
327 pos.func.dfg.replace(inst).istore16_complex(
328 info.flags,
329 info.st_arg.unwrap(),
330 &args,
331 info.offset,
332 );
333 }
334 Opcode::Istore32 => {
335 pos.func.dfg.replace(inst).istore32_complex(
336 info.flags,
337 info.st_arg.unwrap(),
338 &args,
339 info.offset,
340 );
341 }
342 _ => panic!("Unsupported load or store opcode"),
343 },
344 InstructionData::BinaryImm64 {
345 opcode: Opcode::IaddImm,
346 arg,
347 imm,
348 } => match pos.func.dfg[inst] {
349 // Operand is an iadd_imm. Fold the immediate into the offset if possible.
350 InstructionData::Load {
351 arg: ref mut load_arg,
352 ref mut offset,
353 ..
354 } => {
355 if let Some(imm) = offset.try_add_i64(imm.into()) {
356 *load_arg = arg;
357 *offset = imm;
358 } else {
359 // Overflow.
360 return;
361 }
362 }
363 InstructionData::Store {
364 args: ref mut store_args,
365 ref mut offset,
366 ..
367 } => {
368 if let Some(imm) = offset.try_add_i64(imm.into()) {
369 store_args[1] = arg;
370 *offset = imm;
371 } else {
372 // Overflow.
373 return;
374 }
375 }
376 _ => panic!(),
377 },
378 _ => {
379 // Address value is defined by some other kind of instruction.
380 return;
381 }
382 }
383 } else {
384 // Address value is not the result of an instruction.
385 return;
386 }
387
388 let ok = pos.func.update_encoding(inst, isa).is_ok();
389 debug_assert!(
390 ok,
391 "failed to update encoding for `{}`",
392 pos.func.dfg.display_inst(inst, isa)
393 );
394 }
395
396 //----------------------------------------------------------------------
397 //
398 // The main post-opt pass.
399
do_postopt(func: &mut Function, isa: &dyn TargetIsa)400 pub fn do_postopt(func: &mut Function, isa: &dyn TargetIsa) {
401 let _tt = timing::postopt();
402 let mut pos = EncCursor::new(func, isa);
403 let is_mach_backend = isa.get_mach_backend().is_some();
404 while let Some(_block) = pos.next_block() {
405 let mut last_flags_clobber = None;
406 while let Some(inst) = pos.next_inst() {
407 if !is_mach_backend && isa.uses_cpu_flags() {
408 // Optimize instructions to make use of flags.
409 optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
410
411 // Track the most recent seen instruction that clobbers the flags.
412 if let Some(constraints) = isa
413 .encoding_info()
414 .operand_constraints(pos.func.encodings[inst])
415 {
416 if constraints.clobbers_flags {
417 last_flags_clobber = Some(inst)
418 }
419 }
420 }
421
422 if isa.uses_complex_addresses() {
423 optimize_complex_addresses(&mut pos, inst, isa);
424 }
425 }
426 }
427 }
428