1 /* Optimization of PHI nodes by converting them into straightline code.
2 Copyright (C) 2004-2019 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "insn-codes.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "gimple-pretty-print.h"
34 #include "fold-const.h"
35 #include "stor-layout.h"
36 #include "cfganal.h"
37 #include "gimplify.h"
38 #include "gimple-iterator.h"
39 #include "gimplify-me.h"
40 #include "tree-cfg.h"
41 #include "tree-dfa.h"
42 #include "domwalk.h"
43 #include "cfgloop.h"
44 #include "tree-data-ref.h"
45 #include "tree-scalar-evolution.h"
46 #include "tree-inline.h"
47 #include "params.h"
48 #include "case-cfn-macros.h"
49
50 static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
51 static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
52 tree, tree);
53 static bool conditional_replacement (basic_block, basic_block,
54 edge, edge, gphi *, tree, tree);
55 static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
56 gimple *);
57 static int value_replacement (basic_block, basic_block,
58 edge, edge, gimple *, tree, tree);
59 static bool minmax_replacement (basic_block, basic_block,
60 edge, edge, gimple *, tree, tree);
61 static bool abs_replacement (basic_block, basic_block,
62 edge, edge, gimple *, tree, tree);
63 static bool cond_removal_in_popcount_pattern (basic_block, basic_block,
64 edge, edge, gimple *, tree, tree);
65 static bool cond_store_replacement (basic_block, basic_block, edge, edge,
66 hash_set<tree> *);
67 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
68 static hash_set<tree> * get_non_trapping ();
69 static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree);
70 static void hoist_adjacent_loads (basic_block, basic_block,
71 basic_block, basic_block);
72 static bool gate_hoist_loads (void);
73
74 /* This pass tries to transform conditional stores into unconditional
75 ones, enabling further simplifications with the simpler then and else
76 blocks. In particular it replaces this:
77
78 bb0:
79 if (cond) goto bb2; else goto bb1;
80 bb1:
81 *p = RHS;
82 bb2:
83
84 with
85
86 bb0:
87 if (cond) goto bb1; else goto bb2;
88 bb1:
89 condtmp' = *p;
90 bb2:
91 condtmp = PHI <RHS, condtmp'>
92 *p = condtmp;
93
94 This transformation can only be done under several constraints,
95 documented below. It also replaces:
96
97 bb0:
98 if (cond) goto bb2; else goto bb1;
99 bb1:
100 *p = RHS1;
101 goto bb3;
102 bb2:
103 *p = RHS2;
104 bb3:
105
106 with
107
108 bb0:
109 if (cond) goto bb3; else goto bb1;
110 bb1:
111 bb3:
112 condtmp = PHI <RHS1, RHS2>
113 *p = condtmp; */
114
115 static unsigned int
tree_ssa_cs_elim(void)116 tree_ssa_cs_elim (void)
117 {
118 unsigned todo;
119 /* ??? We are not interested in loop related info, but the following
120 will create it, ICEing as we didn't init loops with pre-headers.
121 An interfacing issue of find_data_references_in_bb. */
122 loop_optimizer_init (LOOPS_NORMAL);
123 scev_initialize ();
124 todo = tree_ssa_phiopt_worker (true, false, false);
125 scev_finalize ();
126 loop_optimizer_finalize ();
127 return todo;
128 }
129
130 /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */
131
132 static gphi *
single_non_singleton_phi_for_edges(gimple_seq seq,edge e0,edge e1)133 single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, edge e1)
134 {
135 gimple_stmt_iterator i;
136 gphi *phi = NULL;
137 if (gimple_seq_singleton_p (seq))
138 return as_a <gphi *> (gsi_stmt (gsi_start (seq)));
139 for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i))
140 {
141 gphi *p = as_a <gphi *> (gsi_stmt (i));
142 /* If the PHI arguments are equal then we can skip this PHI. */
143 if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (p, e0->dest_idx),
144 gimple_phi_arg_def (p, e1->dest_idx)))
145 continue;
146
147 /* If we already have a PHI that has the two edge arguments are
148 different, then return it is not a singleton for these PHIs. */
149 if (phi)
150 return NULL;
151
152 phi = p;
153 }
154 return phi;
155 }
156
157 /* The core routine of conditional store replacement and normal
158 phi optimizations. Both share much of the infrastructure in how
159 to match applicable basic block patterns. DO_STORE_ELIM is true
160 when we want to do conditional store replacement, false otherwise.
161 DO_HOIST_LOADS is true when we want to hoist adjacent loads out
162 of diamond control flow patterns, false otherwise. */
163 static unsigned int
tree_ssa_phiopt_worker(bool do_store_elim,bool do_hoist_loads,bool early_p)164 tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
165 {
166 basic_block bb;
167 basic_block *bb_order;
168 unsigned n, i;
169 bool cfgchanged = false;
170 hash_set<tree> *nontrap = 0;
171
172 if (do_store_elim)
173 /* Calculate the set of non-trapping memory accesses. */
174 nontrap = get_non_trapping ();
175
176 /* Search every basic block for COND_EXPR we may be able to optimize.
177
178 We walk the blocks in order that guarantees that a block with
179 a single predecessor is processed before the predecessor.
180 This ensures that we collapse inner ifs before visiting the
181 outer ones, and also that we do not try to visit a removed
182 block. */
183 bb_order = single_pred_before_succ_order ();
184 n = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
185
186 for (i = 0; i < n; i++)
187 {
188 gimple *cond_stmt;
189 gphi *phi;
190 basic_block bb1, bb2;
191 edge e1, e2;
192 tree arg0, arg1;
193
194 bb = bb_order[i];
195
196 cond_stmt = last_stmt (bb);
197 /* Check to see if the last statement is a GIMPLE_COND. */
198 if (!cond_stmt
199 || gimple_code (cond_stmt) != GIMPLE_COND)
200 continue;
201
202 e1 = EDGE_SUCC (bb, 0);
203 bb1 = e1->dest;
204 e2 = EDGE_SUCC (bb, 1);
205 bb2 = e2->dest;
206
207 /* We cannot do the optimization on abnormal edges. */
208 if ((e1->flags & EDGE_ABNORMAL) != 0
209 || (e2->flags & EDGE_ABNORMAL) != 0)
210 continue;
211
212 /* If either bb1's succ or bb2 or bb2's succ is non NULL. */
213 if (EDGE_COUNT (bb1->succs) == 0
214 || bb2 == NULL
215 || EDGE_COUNT (bb2->succs) == 0)
216 continue;
217
218 /* Find the bb which is the fall through to the other. */
219 if (EDGE_SUCC (bb1, 0)->dest == bb2)
220 ;
221 else if (EDGE_SUCC (bb2, 0)->dest == bb1)
222 {
223 std::swap (bb1, bb2);
224 std::swap (e1, e2);
225 }
226 else if (do_store_elim
227 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
228 {
229 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
230
231 if (!single_succ_p (bb1)
232 || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0
233 || !single_succ_p (bb2)
234 || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0
235 || EDGE_COUNT (bb3->preds) != 2)
236 continue;
237 if (cond_if_else_store_replacement (bb1, bb2, bb3))
238 cfgchanged = true;
239 continue;
240 }
241 else if (do_hoist_loads
242 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
243 {
244 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
245
246 if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt)))
247 && single_succ_p (bb1)
248 && single_succ_p (bb2)
249 && single_pred_p (bb1)
250 && single_pred_p (bb2)
251 && EDGE_COUNT (bb->succs) == 2
252 && EDGE_COUNT (bb3->preds) == 2
253 /* If one edge or the other is dominant, a conditional move
254 is likely to perform worse than the well-predicted branch. */
255 && !predictable_edge_p (EDGE_SUCC (bb, 0))
256 && !predictable_edge_p (EDGE_SUCC (bb, 1)))
257 hoist_adjacent_loads (bb, bb1, bb2, bb3);
258 continue;
259 }
260 else
261 continue;
262
263 e1 = EDGE_SUCC (bb1, 0);
264
265 /* Make sure that bb1 is just a fall through. */
266 if (!single_succ_p (bb1)
267 || (e1->flags & EDGE_FALLTHRU) == 0)
268 continue;
269
270 /* Also make sure that bb1 only have one predecessor and that it
271 is bb. */
272 if (!single_pred_p (bb1)
273 || single_pred (bb1) != bb)
274 continue;
275
276 if (do_store_elim)
277 {
278 /* bb1 is the middle block, bb2 the join block, bb the split block,
279 e1 the fallthrough edge from bb1 to bb2. We can't do the
280 optimization if the join block has more than two predecessors. */
281 if (EDGE_COUNT (bb2->preds) > 2)
282 continue;
283 if (cond_store_replacement (bb1, bb2, e1, e2, nontrap))
284 cfgchanged = true;
285 }
286 else
287 {
288 gimple_seq phis = phi_nodes (bb2);
289 gimple_stmt_iterator gsi;
290 bool candorest = true;
291
292 /* Value replacement can work with more than one PHI
293 so try that first. */
294 if (!early_p)
295 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
296 {
297 phi = as_a <gphi *> (gsi_stmt (gsi));
298 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
299 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
300 if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2)
301 {
302 candorest = false;
303 cfgchanged = true;
304 break;
305 }
306 }
307
308 if (!candorest)
309 continue;
310
311 phi = single_non_singleton_phi_for_edges (phis, e1, e2);
312 if (!phi)
313 continue;
314
315 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
316 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
317
318 /* Something is wrong if we cannot find the arguments in the PHI
319 node. */
320 gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE);
321
322 gphi *newphi = factor_out_conditional_conversion (e1, e2, phi,
323 arg0, arg1,
324 cond_stmt);
325 if (newphi != NULL)
326 {
327 phi = newphi;
328 /* factor_out_conditional_conversion may create a new PHI in
329 BB2 and eliminate an existing PHI in BB2. Recompute values
330 that may be affected by that change. */
331 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
332 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
333 gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE);
334 }
335
336 /* Do the replacement of conditional if it can be done. */
337 if (two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
338 cfgchanged = true;
339 else if (!early_p
340 && conditional_replacement (bb, bb1, e1, e2, phi,
341 arg0, arg1))
342 cfgchanged = true;
343 else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
344 cfgchanged = true;
345 else if (!early_p
346 && cond_removal_in_popcount_pattern (bb, bb1, e1, e2,
347 phi, arg0, arg1))
348 cfgchanged = true;
349 else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
350 cfgchanged = true;
351 }
352 }
353
354 free (bb_order);
355
356 if (do_store_elim)
357 delete nontrap;
358 /* If the CFG has changed, we should cleanup the CFG. */
359 if (cfgchanged && do_store_elim)
360 {
361 /* In cond-store replacement we have added some loads on edges
362 and new VOPS (as we moved the store, and created a load). */
363 gsi_commit_edge_inserts ();
364 return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
365 }
366 else if (cfgchanged)
367 return TODO_cleanup_cfg;
368 return 0;
369 }
370
371 /* Replace PHI node element whose edge is E in block BB with variable NEW.
372 Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK
373 is known to have two edges, one of which must reach BB). */
374
375 static void
replace_phi_edge_with_variable(basic_block cond_block,edge e,gimple * phi,tree new_tree)376 replace_phi_edge_with_variable (basic_block cond_block,
377 edge e, gimple *phi, tree new_tree)
378 {
379 basic_block bb = gimple_bb (phi);
380 basic_block block_to_remove;
381 gimple_stmt_iterator gsi;
382
383 /* Change the PHI argument to new. */
384 SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
385
386 /* Remove the empty basic block. */
387 if (EDGE_SUCC (cond_block, 0)->dest == bb)
388 {
389 EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
390 EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
391 EDGE_SUCC (cond_block, 0)->probability = profile_probability::always ();
392
393 block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
394 }
395 else
396 {
397 EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
398 EDGE_SUCC (cond_block, 1)->flags
399 &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
400 EDGE_SUCC (cond_block, 1)->probability = profile_probability::always ();
401
402 block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
403 }
404 delete_basic_block (block_to_remove);
405
406 /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
407 gsi = gsi_last_bb (cond_block);
408 gsi_remove (&gsi, true);
409
410 if (dump_file && (dump_flags & TDF_DETAILS))
411 fprintf (dump_file,
412 "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n",
413 cond_block->index,
414 bb->index);
415 }
416
417 /* PR66726: Factor conversion out of COND_EXPR. If the arguments of the PHI
418 stmt are CONVERT_STMT, factor out the conversion and perform the conversion
419 to the result of PHI stmt. COND_STMT is the controlling predicate.
420 Return the newly-created PHI, if any. */
421
422 static gphi *
factor_out_conditional_conversion(edge e0,edge e1,gphi * phi,tree arg0,tree arg1,gimple * cond_stmt)423 factor_out_conditional_conversion (edge e0, edge e1, gphi *phi,
424 tree arg0, tree arg1, gimple *cond_stmt)
425 {
426 gimple *arg0_def_stmt = NULL, *arg1_def_stmt = NULL, *new_stmt;
427 tree new_arg0 = NULL_TREE, new_arg1 = NULL_TREE;
428 tree temp, result;
429 gphi *newphi;
430 gimple_stmt_iterator gsi, gsi_for_def;
431 location_t locus = gimple_location (phi);
432 enum tree_code convert_code;
433
434 /* Handle only PHI statements with two arguments. TODO: If all
435 other arguments to PHI are INTEGER_CST or if their defining
436 statement have the same unary operation, we can handle more
437 than two arguments too. */
438 if (gimple_phi_num_args (phi) != 2)
439 return NULL;
440
441 /* First canonicalize to simplify tests. */
442 if (TREE_CODE (arg0) != SSA_NAME)
443 {
444 std::swap (arg0, arg1);
445 std::swap (e0, e1);
446 }
447
448 if (TREE_CODE (arg0) != SSA_NAME
449 || (TREE_CODE (arg1) != SSA_NAME
450 && TREE_CODE (arg1) != INTEGER_CST))
451 return NULL;
452
453 /* Check if arg0 is an SSA_NAME and the stmt which defines arg0 is
454 a conversion. */
455 arg0_def_stmt = SSA_NAME_DEF_STMT (arg0);
456 if (!gimple_assign_cast_p (arg0_def_stmt))
457 return NULL;
458
459 /* Use the RHS as new_arg0. */
460 convert_code = gimple_assign_rhs_code (arg0_def_stmt);
461 new_arg0 = gimple_assign_rhs1 (arg0_def_stmt);
462 if (convert_code == VIEW_CONVERT_EXPR)
463 {
464 new_arg0 = TREE_OPERAND (new_arg0, 0);
465 if (!is_gimple_reg_type (TREE_TYPE (new_arg0)))
466 return NULL;
467 }
468 if (TREE_CODE (new_arg0) == SSA_NAME
469 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_arg0))
470 return NULL;
471
472 if (TREE_CODE (arg1) == SSA_NAME)
473 {
474 /* Check if arg1 is an SSA_NAME and the stmt which defines arg1
475 is a conversion. */
476 arg1_def_stmt = SSA_NAME_DEF_STMT (arg1);
477 if (!is_gimple_assign (arg1_def_stmt)
478 || gimple_assign_rhs_code (arg1_def_stmt) != convert_code)
479 return NULL;
480
481 /* Use the RHS as new_arg1. */
482 new_arg1 = gimple_assign_rhs1 (arg1_def_stmt);
483 if (convert_code == VIEW_CONVERT_EXPR)
484 new_arg1 = TREE_OPERAND (new_arg1, 0);
485 if (TREE_CODE (new_arg1) == SSA_NAME
486 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_arg1))
487 return NULL;
488 }
489 else
490 {
491 /* If arg1 is an INTEGER_CST, fold it to new type. */
492 if (INTEGRAL_TYPE_P (TREE_TYPE (new_arg0))
493 && int_fits_type_p (arg1, TREE_TYPE (new_arg0)))
494 {
495 if (gimple_assign_cast_p (arg0_def_stmt))
496 {
497 /* For the INTEGER_CST case, we are just moving the
498 conversion from one place to another, which can often
499 hurt as the conversion moves further away from the
500 statement that computes the value. So, perform this
501 only if new_arg0 is an operand of COND_STMT, or
502 if arg0_def_stmt is the only non-debug stmt in
503 its basic block, because then it is possible this
504 could enable further optimizations (minmax replacement
505 etc.). See PR71016. */
506 if (new_arg0 != gimple_cond_lhs (cond_stmt)
507 && new_arg0 != gimple_cond_rhs (cond_stmt)
508 && gimple_bb (arg0_def_stmt) == e0->src)
509 {
510 gsi = gsi_for_stmt (arg0_def_stmt);
511 gsi_prev_nondebug (&gsi);
512 if (!gsi_end_p (gsi))
513 return NULL;
514 gsi = gsi_for_stmt (arg0_def_stmt);
515 gsi_next_nondebug (&gsi);
516 if (!gsi_end_p (gsi))
517 return NULL;
518 }
519 new_arg1 = fold_convert (TREE_TYPE (new_arg0), arg1);
520 }
521 else
522 return NULL;
523 }
524 else
525 return NULL;
526 }
527
528 /* If arg0/arg1 have > 1 use, then this transformation actually increases
529 the number of expressions evaluated at runtime. */
530 if (!has_single_use (arg0)
531 || (arg1_def_stmt && !has_single_use (arg1)))
532 return NULL;
533
534 /* If types of new_arg0 and new_arg1 are different bailout. */
535 if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1)))
536 return NULL;
537
538 /* Create a new PHI stmt. */
539 result = PHI_RESULT (phi);
540 temp = make_ssa_name (TREE_TYPE (new_arg0), NULL);
541 newphi = create_phi_node (temp, gimple_bb (phi));
542
543 if (dump_file && (dump_flags & TDF_DETAILS))
544 {
545 fprintf (dump_file, "PHI ");
546 print_generic_expr (dump_file, gimple_phi_result (phi));
547 fprintf (dump_file,
548 " changed to factor conversion out from COND_EXPR.\n");
549 fprintf (dump_file, "New stmt with CAST that defines ");
550 print_generic_expr (dump_file, result);
551 fprintf (dump_file, ".\n");
552 }
553
554 /* Remove the old cast(s) that has single use. */
555 gsi_for_def = gsi_for_stmt (arg0_def_stmt);
556 gsi_remove (&gsi_for_def, true);
557 release_defs (arg0_def_stmt);
558
559 if (arg1_def_stmt)
560 {
561 gsi_for_def = gsi_for_stmt (arg1_def_stmt);
562 gsi_remove (&gsi_for_def, true);
563 release_defs (arg1_def_stmt);
564 }
565
566 add_phi_arg (newphi, new_arg0, e0, locus);
567 add_phi_arg (newphi, new_arg1, e1, locus);
568
569 /* Create the conversion stmt and insert it. */
570 if (convert_code == VIEW_CONVERT_EXPR)
571 {
572 temp = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (result), temp);
573 new_stmt = gimple_build_assign (result, temp);
574 }
575 else
576 new_stmt = gimple_build_assign (result, convert_code, temp);
577 gsi = gsi_after_labels (gimple_bb (phi));
578 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
579
580 /* Remove the original PHI stmt. */
581 gsi = gsi_for_stmt (phi);
582 gsi_remove (&gsi, true);
583 return newphi;
584 }
585
586 /* Optimize
587 # x_5 in range [cst1, cst2] where cst2 = cst1 + 1
588 if (x_5 op cstN) # where op is == or != and N is 1 or 2
589 goto bb3;
590 else
591 goto bb4;
592 bb3:
593 bb4:
594 # r_6 = PHI<cst3(2), cst4(3)> # where cst3 == cst4 + 1 or cst4 == cst3 + 1
595
596 to r_6 = x_5 + (min (cst3, cst4) - cst1) or
597 r_6 = (min (cst3, cst4) + cst1) - x_5 depending on op, N and which
598 of cst3 and cst4 is smaller. */
599
600 static bool
two_value_replacement(basic_block cond_bb,basic_block middle_bb,edge e1,gphi * phi,tree arg0,tree arg1)601 two_value_replacement (basic_block cond_bb, basic_block middle_bb,
602 edge e1, gphi *phi, tree arg0, tree arg1)
603 {
604 /* Only look for adjacent integer constants. */
605 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
606 || !INTEGRAL_TYPE_P (TREE_TYPE (arg1))
607 || TREE_CODE (arg0) != INTEGER_CST
608 || TREE_CODE (arg1) != INTEGER_CST
609 || (tree_int_cst_lt (arg0, arg1)
610 ? wi::to_widest (arg0) + 1 != wi::to_widest (arg1)
611 : wi::to_widest (arg1) + 1 != wi::to_widest (arg0)))
612 return false;
613
614 if (!empty_block_p (middle_bb))
615 return false;
616
617 gimple *stmt = last_stmt (cond_bb);
618 tree lhs = gimple_cond_lhs (stmt);
619 tree rhs = gimple_cond_rhs (stmt);
620
621 if (TREE_CODE (lhs) != SSA_NAME
622 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs))
623 || TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
624 || TREE_CODE (rhs) != INTEGER_CST)
625 return false;
626
627 switch (gimple_cond_code (stmt))
628 {
629 case EQ_EXPR:
630 case NE_EXPR:
631 break;
632 default:
633 return false;
634 }
635
636 wide_int min, max;
637 if (get_range_info (lhs, &min, &max) != VR_RANGE
638 || min + 1 != max
639 || (wi::to_wide (rhs) != min
640 && wi::to_wide (rhs) != max))
641 return false;
642
643 /* We need to know which is the true edge and which is the false
644 edge so that we know when to invert the condition below. */
645 edge true_edge, false_edge;
646 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
647 if ((gimple_cond_code (stmt) == EQ_EXPR)
648 ^ (wi::to_wide (rhs) == max)
649 ^ (e1 == false_edge))
650 std::swap (arg0, arg1);
651
652 tree type;
653 if (TYPE_PRECISION (TREE_TYPE (lhs)) == TYPE_PRECISION (TREE_TYPE (arg0)))
654 {
655 /* Avoid performing the arithmetics in bool type which has different
656 semantics, otherwise prefer unsigned types from the two with
657 the same precision. */
658 if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE
659 || !TYPE_UNSIGNED (TREE_TYPE (arg0)))
660 type = TREE_TYPE (lhs);
661 else
662 type = TREE_TYPE (arg0);
663 }
664 else if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (TREE_TYPE (arg0)))
665 type = TREE_TYPE (lhs);
666 else
667 type = TREE_TYPE (arg0);
668
669 min = wide_int::from (min, TYPE_PRECISION (type),
670 TYPE_SIGN (TREE_TYPE (lhs)));
671 wide_int a = wide_int::from (wi::to_wide (arg0), TYPE_PRECISION (type),
672 TYPE_SIGN (TREE_TYPE (arg0)));
673 enum tree_code code;
674 wi::overflow_type ovf;
675 if (tree_int_cst_lt (arg0, arg1))
676 {
677 code = PLUS_EXPR;
678 a -= min;
679 if (!TYPE_UNSIGNED (type))
680 {
681 /* lhs is known to be in range [min, min+1] and we want to add a
682 to it. Check if that operation can overflow for those 2 values
683 and if yes, force unsigned type. */
684 wi::add (min + (wi::neg_p (a) ? 0 : 1), a, SIGNED, &ovf);
685 if (ovf)
686 type = unsigned_type_for (type);
687 }
688 }
689 else
690 {
691 code = MINUS_EXPR;
692 a += min;
693 if (!TYPE_UNSIGNED (type))
694 {
695 /* lhs is known to be in range [min, min+1] and we want to subtract
696 it from a. Check if that operation can overflow for those 2
697 values and if yes, force unsigned type. */
698 wi::sub (a, min + (wi::neg_p (min) ? 0 : 1), SIGNED, &ovf);
699 if (ovf)
700 type = unsigned_type_for (type);
701 }
702 }
703
704 tree arg = wide_int_to_tree (type, a);
705 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
706 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
707 lhs = gimplify_build1 (&gsi, NOP_EXPR, type, lhs);
708 tree new_rhs;
709 if (code == PLUS_EXPR)
710 new_rhs = gimplify_build2 (&gsi, PLUS_EXPR, type, lhs, arg);
711 else
712 new_rhs = gimplify_build2 (&gsi, MINUS_EXPR, type, arg, lhs);
713 if (!useless_type_conversion_p (TREE_TYPE (arg0), type))
714 new_rhs = gimplify_build1 (&gsi, NOP_EXPR, TREE_TYPE (arg0), new_rhs);
715
716 replace_phi_edge_with_variable (cond_bb, e1, phi, new_rhs);
717
718 /* Note that we optimized this PHI. */
719 return true;
720 }
721
722 /* The function conditional_replacement does the main work of doing the
723 conditional replacement. Return true if the replacement is done.
724 Otherwise return false.
725 BB is the basic block where the replacement is going to be done on. ARG0
726 is argument 0 from PHI. Likewise for ARG1. */
727
728 static bool
conditional_replacement(basic_block cond_bb,basic_block middle_bb,edge e0,edge e1,gphi * phi,tree arg0,tree arg1)729 conditional_replacement (basic_block cond_bb, basic_block middle_bb,
730 edge e0, edge e1, gphi *phi,
731 tree arg0, tree arg1)
732 {
733 tree result;
734 gimple *stmt;
735 gassign *new_stmt;
736 tree cond;
737 gimple_stmt_iterator gsi;
738 edge true_edge, false_edge;
739 tree new_var, new_var2;
740 bool neg;
741
742 /* FIXME: Gimplification of complex type is too hard for now. */
743 /* We aren't prepared to handle vectors either (and it is a question
744 if it would be worthwhile anyway). */
745 if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0))
746 || POINTER_TYPE_P (TREE_TYPE (arg0)))
747 || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1))
748 || POINTER_TYPE_P (TREE_TYPE (arg1))))
749 return false;
750
751 /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
752 convert it to the conditional. */
753 if ((integer_zerop (arg0) && integer_onep (arg1))
754 || (integer_zerop (arg1) && integer_onep (arg0)))
755 neg = false;
756 else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
757 || (integer_zerop (arg1) && integer_all_onesp (arg0)))
758 neg = true;
759 else
760 return false;
761
762 if (!empty_block_p (middle_bb))
763 return false;
764
765 /* At this point we know we have a GIMPLE_COND with two successors.
766 One successor is BB, the other successor is an empty block which
767 falls through into BB.
768
769 There is a single PHI node at the join point (BB) and its arguments
770 are constants (0, 1) or (0, -1).
771
772 So, given the condition COND, and the two PHI arguments, we can
773 rewrite this PHI into non-branching code:
774
775 dest = (COND) or dest = COND'
776
777 We use the condition as-is if the argument associated with the
778 true edge has the value one or the argument associated with the
779 false edge as the value zero. Note that those conditions are not
780 the same since only one of the outgoing edges from the GIMPLE_COND
781 will directly reach BB and thus be associated with an argument. */
782
783 stmt = last_stmt (cond_bb);
784 result = PHI_RESULT (phi);
785
786 /* To handle special cases like floating point comparison, it is easier and
787 less error-prone to build a tree and gimplify it on the fly though it is
788 less efficient. */
789 cond = fold_build2_loc (gimple_location (stmt),
790 gimple_cond_code (stmt), boolean_type_node,
791 gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
792
793 /* We need to know which is the true edge and which is the false
794 edge so that we know when to invert the condition below. */
795 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
796 if ((e0 == true_edge && integer_zerop (arg0))
797 || (e0 == false_edge && !integer_zerop (arg0))
798 || (e1 == true_edge && integer_zerop (arg1))
799 || (e1 == false_edge && !integer_zerop (arg1)))
800 cond = fold_build1_loc (gimple_location (stmt),
801 TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
802
803 if (neg)
804 {
805 cond = fold_convert_loc (gimple_location (stmt),
806 TREE_TYPE (result), cond);
807 cond = fold_build1_loc (gimple_location (stmt),
808 NEGATE_EXPR, TREE_TYPE (cond), cond);
809 }
810
811 /* Insert our new statements at the end of conditional block before the
812 COND_STMT. */
813 gsi = gsi_for_stmt (stmt);
814 new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true,
815 GSI_SAME_STMT);
816
817 if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var)))
818 {
819 location_t locus_0, locus_1;
820
821 new_var2 = make_ssa_name (TREE_TYPE (result));
822 new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var);
823 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
824 new_var = new_var2;
825
826 /* Set the locus to the first argument, unless is doesn't have one. */
827 locus_0 = gimple_phi_arg_location (phi, 0);
828 locus_1 = gimple_phi_arg_location (phi, 1);
829 if (locus_0 == UNKNOWN_LOCATION)
830 locus_0 = locus_1;
831 gimple_set_location (new_stmt, locus_0);
832 }
833
834 replace_phi_edge_with_variable (cond_bb, e1, phi, new_var);
835
836 /* Note that we optimized this PHI. */
837 return true;
838 }
839
840 /* Update *ARG which is defined in STMT so that it contains the
841 computed value if that seems profitable. Return true if the
842 statement is made dead by that rewriting. */
843
844 static bool
jump_function_from_stmt(tree * arg,gimple * stmt)845 jump_function_from_stmt (tree *arg, gimple *stmt)
846 {
847 enum tree_code code = gimple_assign_rhs_code (stmt);
848 if (code == ADDR_EXPR)
849 {
850 /* For arg = &p->i transform it to p, if possible. */
851 tree rhs1 = gimple_assign_rhs1 (stmt);
852 poly_int64 offset;
853 tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs1, 0),
854 &offset);
855 if (tem
856 && TREE_CODE (tem) == MEM_REF
857 && known_eq (mem_ref_offset (tem) + offset, 0))
858 {
859 *arg = TREE_OPERAND (tem, 0);
860 return true;
861 }
862 }
863 /* TODO: Much like IPA-CP jump-functions we want to handle constant
864 additions symbolically here, and we'd need to update the comparison
865 code that compares the arg + cst tuples in our caller. For now the
866 code above exactly handles the VEC_BASE pattern from vec.h. */
867 return false;
868 }
869
870 /* RHS is a source argument in a BIT_AND_EXPR which feeds a conditional
871 of the form SSA_NAME NE 0.
872
873 If RHS is fed by a simple EQ_EXPR comparison of two values, see if
874 the two input values of the EQ_EXPR match arg0 and arg1.
875
876 If so update *code and return TRUE. Otherwise return FALSE. */
877
878 static bool
rhs_is_fed_for_value_replacement(const_tree arg0,const_tree arg1,enum tree_code * code,const_tree rhs)879 rhs_is_fed_for_value_replacement (const_tree arg0, const_tree arg1,
880 enum tree_code *code, const_tree rhs)
881 {
882 /* Obviously if RHS is not an SSA_NAME, we can't look at the defining
883 statement. */
884 if (TREE_CODE (rhs) == SSA_NAME)
885 {
886 gimple *def1 = SSA_NAME_DEF_STMT (rhs);
887
888 /* Verify the defining statement has an EQ_EXPR on the RHS. */
889 if (is_gimple_assign (def1) && gimple_assign_rhs_code (def1) == EQ_EXPR)
890 {
891 /* Finally verify the source operands of the EQ_EXPR are equal
892 to arg0 and arg1. */
893 tree op0 = gimple_assign_rhs1 (def1);
894 tree op1 = gimple_assign_rhs2 (def1);
895 if ((operand_equal_for_phi_arg_p (arg0, op0)
896 && operand_equal_for_phi_arg_p (arg1, op1))
897 || (operand_equal_for_phi_arg_p (arg0, op1)
898 && operand_equal_for_phi_arg_p (arg1, op0)))
899 {
900 /* We will perform the optimization. */
901 *code = gimple_assign_rhs_code (def1);
902 return true;
903 }
904 }
905 }
906 return false;
907 }
908
909 /* Return TRUE if arg0/arg1 are equal to the rhs/lhs or lhs/rhs of COND.
910
911 Also return TRUE if arg0/arg1 are equal to the source arguments of a
912 an EQ comparison feeding a BIT_AND_EXPR which feeds COND.
913
914 Return FALSE otherwise. */
915
916 static bool
operand_equal_for_value_replacement(const_tree arg0,const_tree arg1,enum tree_code * code,gimple * cond)917 operand_equal_for_value_replacement (const_tree arg0, const_tree arg1,
918 enum tree_code *code, gimple *cond)
919 {
920 gimple *def;
921 tree lhs = gimple_cond_lhs (cond);
922 tree rhs = gimple_cond_rhs (cond);
923
924 if ((operand_equal_for_phi_arg_p (arg0, lhs)
925 && operand_equal_for_phi_arg_p (arg1, rhs))
926 || (operand_equal_for_phi_arg_p (arg1, lhs)
927 && operand_equal_for_phi_arg_p (arg0, rhs)))
928 return true;
929
930 /* Now handle more complex case where we have an EQ comparison
931 which feeds a BIT_AND_EXPR which feeds COND.
932
933 First verify that COND is of the form SSA_NAME NE 0. */
934 if (*code != NE_EXPR || !integer_zerop (rhs)
935 || TREE_CODE (lhs) != SSA_NAME)
936 return false;
937
938 /* Now ensure that SSA_NAME is set by a BIT_AND_EXPR. */
939 def = SSA_NAME_DEF_STMT (lhs);
940 if (!is_gimple_assign (def) || gimple_assign_rhs_code (def) != BIT_AND_EXPR)
941 return false;
942
943 /* Now verify arg0/arg1 correspond to the source arguments of an
944 EQ comparison feeding the BIT_AND_EXPR. */
945
946 tree tmp = gimple_assign_rhs1 (def);
947 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
948 return true;
949
950 tmp = gimple_assign_rhs2 (def);
951 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
952 return true;
953
954 return false;
955 }
956
957 /* Returns true if ARG is a neutral element for operation CODE
958 on the RIGHT side. */
959
960 static bool
neutral_element_p(tree_code code,tree arg,bool right)961 neutral_element_p (tree_code code, tree arg, bool right)
962 {
963 switch (code)
964 {
965 case PLUS_EXPR:
966 case BIT_IOR_EXPR:
967 case BIT_XOR_EXPR:
968 return integer_zerop (arg);
969
970 case LROTATE_EXPR:
971 case RROTATE_EXPR:
972 case LSHIFT_EXPR:
973 case RSHIFT_EXPR:
974 case MINUS_EXPR:
975 case POINTER_PLUS_EXPR:
976 return right && integer_zerop (arg);
977
978 case MULT_EXPR:
979 return integer_onep (arg);
980
981 case TRUNC_DIV_EXPR:
982 case CEIL_DIV_EXPR:
983 case FLOOR_DIV_EXPR:
984 case ROUND_DIV_EXPR:
985 case EXACT_DIV_EXPR:
986 return right && integer_onep (arg);
987
988 case BIT_AND_EXPR:
989 return integer_all_onesp (arg);
990
991 default:
992 return false;
993 }
994 }
995
996 /* Returns true if ARG is an absorbing element for operation CODE. */
997
998 static bool
absorbing_element_p(tree_code code,tree arg,bool right,tree rval)999 absorbing_element_p (tree_code code, tree arg, bool right, tree rval)
1000 {
1001 switch (code)
1002 {
1003 case BIT_IOR_EXPR:
1004 return integer_all_onesp (arg);
1005
1006 case MULT_EXPR:
1007 case BIT_AND_EXPR:
1008 return integer_zerop (arg);
1009
1010 case LSHIFT_EXPR:
1011 case RSHIFT_EXPR:
1012 case LROTATE_EXPR:
1013 case RROTATE_EXPR:
1014 return !right && integer_zerop (arg);
1015
1016 case TRUNC_DIV_EXPR:
1017 case CEIL_DIV_EXPR:
1018 case FLOOR_DIV_EXPR:
1019 case ROUND_DIV_EXPR:
1020 case EXACT_DIV_EXPR:
1021 case TRUNC_MOD_EXPR:
1022 case CEIL_MOD_EXPR:
1023 case FLOOR_MOD_EXPR:
1024 case ROUND_MOD_EXPR:
1025 return (!right
1026 && integer_zerop (arg)
1027 && tree_single_nonzero_warnv_p (rval, NULL));
1028
1029 default:
1030 return false;
1031 }
1032 }
1033
1034 /* The function value_replacement does the main work of doing the value
1035 replacement. Return non-zero if the replacement is done. Otherwise return
1036 0. If we remove the middle basic block, return 2.
1037 BB is the basic block where the replacement is going to be done on. ARG0
1038 is argument 0 from the PHI. Likewise for ARG1. */
1039
1040 static int
value_replacement(basic_block cond_bb,basic_block middle_bb,edge e0,edge e1,gimple * phi,tree arg0,tree arg1)1041 value_replacement (basic_block cond_bb, basic_block middle_bb,
1042 edge e0, edge e1, gimple *phi,
1043 tree arg0, tree arg1)
1044 {
1045 gimple_stmt_iterator gsi;
1046 gimple *cond;
1047 edge true_edge, false_edge;
1048 enum tree_code code;
1049 bool empty_or_with_defined_p = true;
1050
1051 /* If the type says honor signed zeros we cannot do this
1052 optimization. */
1053 if (HONOR_SIGNED_ZEROS (arg1))
1054 return 0;
1055
1056 /* If there is a statement in MIDDLE_BB that defines one of the PHI
1057 arguments, then adjust arg0 or arg1. */
1058 gsi = gsi_start_nondebug_after_labels_bb (middle_bb);
1059 while (!gsi_end_p (gsi))
1060 {
1061 gimple *stmt = gsi_stmt (gsi);
1062 tree lhs;
1063 gsi_next_nondebug (&gsi);
1064 if (!is_gimple_assign (stmt))
1065 {
1066 if (gimple_code (stmt) != GIMPLE_PREDICT
1067 && gimple_code (stmt) != GIMPLE_NOP)
1068 empty_or_with_defined_p = false;
1069 continue;
1070 }
1071 /* Now try to adjust arg0 or arg1 according to the computation
1072 in the statement. */
1073 lhs = gimple_assign_lhs (stmt);
1074 if (!(lhs == arg0
1075 && jump_function_from_stmt (&arg0, stmt))
1076 || (lhs == arg1
1077 && jump_function_from_stmt (&arg1, stmt)))
1078 empty_or_with_defined_p = false;
1079 }
1080
1081 cond = last_stmt (cond_bb);
1082 code = gimple_cond_code (cond);
1083
1084 /* This transformation is only valid for equality comparisons. */
1085 if (code != NE_EXPR && code != EQ_EXPR)
1086 return 0;
1087
1088 /* We need to know which is the true edge and which is the false
1089 edge so that we know if have abs or negative abs. */
1090 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1091
1092 /* At this point we know we have a COND_EXPR with two successors.
1093 One successor is BB, the other successor is an empty block which
1094 falls through into BB.
1095
1096 The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR.
1097
1098 There is a single PHI node at the join point (BB) with two arguments.
1099
1100 We now need to verify that the two arguments in the PHI node match
1101 the two arguments to the equality comparison. */
1102
1103 if (operand_equal_for_value_replacement (arg0, arg1, &code, cond))
1104 {
1105 edge e;
1106 tree arg;
1107
1108 /* For NE_EXPR, we want to build an assignment result = arg where
1109 arg is the PHI argument associated with the true edge. For
1110 EQ_EXPR we want the PHI argument associated with the false edge. */
1111 e = (code == NE_EXPR ? true_edge : false_edge);
1112
1113 /* Unfortunately, E may not reach BB (it may instead have gone to
1114 OTHER_BLOCK). If that is the case, then we want the single outgoing
1115 edge from OTHER_BLOCK which reaches BB and represents the desired
1116 path from COND_BLOCK. */
1117 if (e->dest == middle_bb)
1118 e = single_succ_edge (e->dest);
1119
1120 /* Now we know the incoming edge to BB that has the argument for the
1121 RHS of our new assignment statement. */
1122 if (e0 == e)
1123 arg = arg0;
1124 else
1125 arg = arg1;
1126
1127 /* If the middle basic block was empty or is defining the
1128 PHI arguments and this is a single phi where the args are different
1129 for the edges e0 and e1 then we can remove the middle basic block. */
1130 if (empty_or_with_defined_p
1131 && single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)),
1132 e0, e1) == phi)
1133 {
1134 replace_phi_edge_with_variable (cond_bb, e1, phi, arg);
1135 /* Note that we optimized this PHI. */
1136 return 2;
1137 }
1138 else
1139 {
1140 /* Replace the PHI arguments with arg. */
1141 SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
1142 SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
1143 if (dump_file && (dump_flags & TDF_DETAILS))
1144 {
1145 fprintf (dump_file, "PHI ");
1146 print_generic_expr (dump_file, gimple_phi_result (phi));
1147 fprintf (dump_file, " reduced for COND_EXPR in block %d to ",
1148 cond_bb->index);
1149 print_generic_expr (dump_file, arg);
1150 fprintf (dump_file, ".\n");
1151 }
1152 return 1;
1153 }
1154
1155 }
1156
1157 /* Now optimize (x != 0) ? x + y : y to just x + y. */
1158 gsi = gsi_last_nondebug_bb (middle_bb);
1159 if (gsi_end_p (gsi))
1160 return 0;
1161
1162 gimple *assign = gsi_stmt (gsi);
1163 if (!is_gimple_assign (assign)
1164 || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
1165 || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
1166 && !POINTER_TYPE_P (TREE_TYPE (arg0))))
1167 return 0;
1168
1169 /* Punt if there are (degenerate) PHIs in middle_bb, there should not be. */
1170 if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
1171 return 0;
1172
1173 /* Allow up to 2 cheap preparation statements that prepare argument
1174 for assign, e.g.:
1175 if (y_4 != 0)
1176 goto <bb 3>;
1177 else
1178 goto <bb 4>;
1179 <bb 3>:
1180 _1 = (int) y_4;
1181 iftmp.0_6 = x_5(D) r<< _1;
1182 <bb 4>:
1183 # iftmp.0_2 = PHI <iftmp.0_6(3), x_5(D)(2)>
1184 or:
1185 if (y_3(D) == 0)
1186 goto <bb 4>;
1187 else
1188 goto <bb 3>;
1189 <bb 3>:
1190 y_4 = y_3(D) & 31;
1191 _1 = (int) y_4;
1192 _6 = x_5(D) r<< _1;
1193 <bb 4>:
1194 # _2 = PHI <x_5(D)(2), _6(3)> */
1195 gimple *prep_stmt[2] = { NULL, NULL };
1196 int prep_cnt;
1197 for (prep_cnt = 0; ; prep_cnt++)
1198 {
1199 gsi_prev_nondebug (&gsi);
1200 if (gsi_end_p (gsi))
1201 break;
1202
1203 gimple *g = gsi_stmt (gsi);
1204 if (gimple_code (g) == GIMPLE_LABEL)
1205 break;
1206
1207 if (prep_cnt == 2 || !is_gimple_assign (g))
1208 return 0;
1209
1210 tree lhs = gimple_assign_lhs (g);
1211 tree rhs1 = gimple_assign_rhs1 (g);
1212 use_operand_p use_p;
1213 gimple *use_stmt;
1214 if (TREE_CODE (lhs) != SSA_NAME
1215 || TREE_CODE (rhs1) != SSA_NAME
1216 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1217 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
1218 || !single_imm_use (lhs, &use_p, &use_stmt)
1219 || use_stmt != (prep_cnt ? prep_stmt[prep_cnt - 1] : assign))
1220 return 0;
1221 switch (gimple_assign_rhs_code (g))
1222 {
1223 CASE_CONVERT:
1224 break;
1225 case PLUS_EXPR:
1226 case BIT_AND_EXPR:
1227 case BIT_IOR_EXPR:
1228 case BIT_XOR_EXPR:
1229 if (TREE_CODE (gimple_assign_rhs2 (g)) != INTEGER_CST)
1230 return 0;
1231 break;
1232 default:
1233 return 0;
1234 }
1235 prep_stmt[prep_cnt] = g;
1236 }
1237
1238 /* Only transform if it removes the condition. */
1239 if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), e0, e1))
1240 return 0;
1241
1242 /* Size-wise, this is always profitable. */
1243 if (optimize_bb_for_speed_p (cond_bb)
1244 /* The special case is useless if it has a low probability. */
1245 && profile_status_for_fn (cfun) != PROFILE_ABSENT
1246 && EDGE_PRED (middle_bb, 0)->probability < profile_probability::even ()
1247 /* If assign is cheap, there is no point avoiding it. */
1248 && estimate_num_insns_seq (bb_seq (middle_bb), &eni_time_weights)
1249 >= 3 * estimate_num_insns (cond, &eni_time_weights))
1250 return 0;
1251
1252 tree lhs = gimple_assign_lhs (assign);
1253 tree rhs1 = gimple_assign_rhs1 (assign);
1254 tree rhs2 = gimple_assign_rhs2 (assign);
1255 enum tree_code code_def = gimple_assign_rhs_code (assign);
1256 tree cond_lhs = gimple_cond_lhs (cond);
1257 tree cond_rhs = gimple_cond_rhs (cond);
1258
1259 /* Propagate the cond_rhs constant through preparation stmts,
1260 make sure UB isn't invoked while doing that. */
1261 for (int i = prep_cnt - 1; i >= 0; --i)
1262 {
1263 gimple *g = prep_stmt[i];
1264 tree grhs1 = gimple_assign_rhs1 (g);
1265 if (!operand_equal_for_phi_arg_p (cond_lhs, grhs1))
1266 return 0;
1267 cond_lhs = gimple_assign_lhs (g);
1268 cond_rhs = fold_convert (TREE_TYPE (grhs1), cond_rhs);
1269 if (TREE_CODE (cond_rhs) != INTEGER_CST
1270 || TREE_OVERFLOW (cond_rhs))
1271 return 0;
1272 if (gimple_assign_rhs_class (g) == GIMPLE_BINARY_RHS)
1273 {
1274 cond_rhs = int_const_binop (gimple_assign_rhs_code (g), cond_rhs,
1275 gimple_assign_rhs2 (g));
1276 if (TREE_OVERFLOW (cond_rhs))
1277 return 0;
1278 }
1279 cond_rhs = fold_convert (TREE_TYPE (cond_lhs), cond_rhs);
1280 if (TREE_CODE (cond_rhs) != INTEGER_CST
1281 || TREE_OVERFLOW (cond_rhs))
1282 return 0;
1283 }
1284
1285 if (((code == NE_EXPR && e1 == false_edge)
1286 || (code == EQ_EXPR && e1 == true_edge))
1287 && arg0 == lhs
1288 && ((arg1 == rhs1
1289 && operand_equal_for_phi_arg_p (rhs2, cond_lhs)
1290 && neutral_element_p (code_def, cond_rhs, true))
1291 || (arg1 == rhs2
1292 && operand_equal_for_phi_arg_p (rhs1, cond_lhs)
1293 && neutral_element_p (code_def, cond_rhs, false))
1294 || (operand_equal_for_phi_arg_p (arg1, cond_rhs)
1295 && ((operand_equal_for_phi_arg_p (rhs2, cond_lhs)
1296 && absorbing_element_p (code_def, cond_rhs, true, rhs2))
1297 || (operand_equal_for_phi_arg_p (rhs1, cond_lhs)
1298 && absorbing_element_p (code_def,
1299 cond_rhs, false, rhs2))))))
1300 {
1301 gsi = gsi_for_stmt (cond);
1302 /* Moving ASSIGN might change VR of lhs, e.g. when moving u_6
1303 def-stmt in:
1304 if (n_5 != 0)
1305 goto <bb 3>;
1306 else
1307 goto <bb 4>;
1308
1309 <bb 3>:
1310 # RANGE [0, 4294967294]
1311 u_6 = n_5 + 4294967295;
1312
1313 <bb 4>:
1314 # u_3 = PHI <u_6(3), 4294967295(2)> */
1315 reset_flow_sensitive_info (lhs);
1316 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1317 {
1318 /* If available, we can use VR of phi result at least. */
1319 tree phires = gimple_phi_result (phi);
1320 struct range_info_def *phires_range_info
1321 = SSA_NAME_RANGE_INFO (phires);
1322 if (phires_range_info)
1323 duplicate_ssa_name_range_info (lhs, SSA_NAME_RANGE_TYPE (phires),
1324 phires_range_info);
1325 }
1326 gimple_stmt_iterator gsi_from;
1327 for (int i = prep_cnt - 1; i >= 0; --i)
1328 {
1329 tree plhs = gimple_assign_lhs (prep_stmt[i]);
1330 reset_flow_sensitive_info (plhs);
1331 gsi_from = gsi_for_stmt (prep_stmt[i]);
1332 gsi_move_before (&gsi_from, &gsi);
1333 }
1334 gsi_from = gsi_for_stmt (assign);
1335 gsi_move_before (&gsi_from, &gsi);
1336 replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
1337 return 2;
1338 }
1339
1340 return 0;
1341 }
1342
1343 /* The function minmax_replacement does the main work of doing the minmax
1344 replacement. Return true if the replacement is done. Otherwise return
1345 false.
1346 BB is the basic block where the replacement is going to be done on. ARG0
1347 is argument 0 from the PHI. Likewise for ARG1. */
1348
1349 static bool
minmax_replacement(basic_block cond_bb,basic_block middle_bb,edge e0,edge e1,gimple * phi,tree arg0,tree arg1)1350 minmax_replacement (basic_block cond_bb, basic_block middle_bb,
1351 edge e0, edge e1, gimple *phi,
1352 tree arg0, tree arg1)
1353 {
1354 tree result, type, rhs;
1355 gcond *cond;
1356 gassign *new_stmt;
1357 edge true_edge, false_edge;
1358 enum tree_code cmp, minmax, ass_code;
1359 tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false;
1360 gimple_stmt_iterator gsi, gsi_from;
1361
1362 type = TREE_TYPE (PHI_RESULT (phi));
1363
1364 /* The optimization may be unsafe due to NaNs. */
1365 if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
1366 return false;
1367
1368 cond = as_a <gcond *> (last_stmt (cond_bb));
1369 cmp = gimple_cond_code (cond);
1370 rhs = gimple_cond_rhs (cond);
1371
1372 /* Turn EQ/NE of extreme values to order comparisons. */
1373 if ((cmp == NE_EXPR || cmp == EQ_EXPR)
1374 && TREE_CODE (rhs) == INTEGER_CST
1375 && INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
1376 {
1377 if (wi::eq_p (wi::to_wide (rhs), wi::min_value (TREE_TYPE (rhs))))
1378 {
1379 cmp = (cmp == EQ_EXPR) ? LT_EXPR : GE_EXPR;
1380 rhs = wide_int_to_tree (TREE_TYPE (rhs),
1381 wi::min_value (TREE_TYPE (rhs)) + 1);
1382 }
1383 else if (wi::eq_p (wi::to_wide (rhs), wi::max_value (TREE_TYPE (rhs))))
1384 {
1385 cmp = (cmp == EQ_EXPR) ? GT_EXPR : LE_EXPR;
1386 rhs = wide_int_to_tree (TREE_TYPE (rhs),
1387 wi::max_value (TREE_TYPE (rhs)) - 1);
1388 }
1389 }
1390
1391 /* This transformation is only valid for order comparisons. Record which
1392 operand is smaller/larger if the result of the comparison is true. */
1393 alt_smaller = NULL_TREE;
1394 alt_larger = NULL_TREE;
1395 if (cmp == LT_EXPR || cmp == LE_EXPR)
1396 {
1397 smaller = gimple_cond_lhs (cond);
1398 larger = rhs;
1399 /* If we have smaller < CST it is equivalent to smaller <= CST-1.
1400 Likewise smaller <= CST is equivalent to smaller < CST+1. */
1401 if (TREE_CODE (larger) == INTEGER_CST
1402 && INTEGRAL_TYPE_P (TREE_TYPE (larger)))
1403 {
1404 if (cmp == LT_EXPR)
1405 {
1406 wi::overflow_type overflow;
1407 wide_int alt = wi::sub (wi::to_wide (larger), 1,
1408 TYPE_SIGN (TREE_TYPE (larger)),
1409 &overflow);
1410 if (! overflow)
1411 alt_larger = wide_int_to_tree (TREE_TYPE (larger), alt);
1412 }
1413 else
1414 {
1415 wi::overflow_type overflow;
1416 wide_int alt = wi::add (wi::to_wide (larger), 1,
1417 TYPE_SIGN (TREE_TYPE (larger)),
1418 &overflow);
1419 if (! overflow)
1420 alt_larger = wide_int_to_tree (TREE_TYPE (larger), alt);
1421 }
1422 }
1423 }
1424 else if (cmp == GT_EXPR || cmp == GE_EXPR)
1425 {
1426 smaller = rhs;
1427 larger = gimple_cond_lhs (cond);
1428 /* If we have larger > CST it is equivalent to larger >= CST+1.
1429 Likewise larger >= CST is equivalent to larger > CST-1. */
1430 if (TREE_CODE (smaller) == INTEGER_CST
1431 && INTEGRAL_TYPE_P (TREE_TYPE (smaller)))
1432 {
1433 wi::overflow_type overflow;
1434 if (cmp == GT_EXPR)
1435 {
1436 wide_int alt = wi::add (wi::to_wide (smaller), 1,
1437 TYPE_SIGN (TREE_TYPE (smaller)),
1438 &overflow);
1439 if (! overflow)
1440 alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), alt);
1441 }
1442 else
1443 {
1444 wide_int alt = wi::sub (wi::to_wide (smaller), 1,
1445 TYPE_SIGN (TREE_TYPE (smaller)),
1446 &overflow);
1447 if (! overflow)
1448 alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), alt);
1449 }
1450 }
1451 }
1452 else
1453 return false;
1454
1455 /* We need to know which is the true edge and which is the false
1456 edge so that we know if have abs or negative abs. */
1457 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1458
1459 /* Forward the edges over the middle basic block. */
1460 if (true_edge->dest == middle_bb)
1461 true_edge = EDGE_SUCC (true_edge->dest, 0);
1462 if (false_edge->dest == middle_bb)
1463 false_edge = EDGE_SUCC (false_edge->dest, 0);
1464
1465 if (true_edge == e0)
1466 {
1467 gcc_assert (false_edge == e1);
1468 arg_true = arg0;
1469 arg_false = arg1;
1470 }
1471 else
1472 {
1473 gcc_assert (false_edge == e0);
1474 gcc_assert (true_edge == e1);
1475 arg_true = arg1;
1476 arg_false = arg0;
1477 }
1478
1479 if (empty_block_p (middle_bb))
1480 {
1481 if ((operand_equal_for_phi_arg_p (arg_true, smaller)
1482 || (alt_smaller
1483 && operand_equal_for_phi_arg_p (arg_true, alt_smaller)))
1484 && (operand_equal_for_phi_arg_p (arg_false, larger)
1485 || (alt_larger
1486 && operand_equal_for_phi_arg_p (arg_true, alt_larger))))
1487 {
1488 /* Case
1489
1490 if (smaller < larger)
1491 rslt = smaller;
1492 else
1493 rslt = larger; */
1494 minmax = MIN_EXPR;
1495 }
1496 else if ((operand_equal_for_phi_arg_p (arg_false, smaller)
1497 || (alt_smaller
1498 && operand_equal_for_phi_arg_p (arg_false, alt_smaller)))
1499 && (operand_equal_for_phi_arg_p (arg_true, larger)
1500 || (alt_larger
1501 && operand_equal_for_phi_arg_p (arg_true, alt_larger))))
1502 minmax = MAX_EXPR;
1503 else
1504 return false;
1505 }
1506 else
1507 {
1508 /* Recognize the following case, assuming d <= u:
1509
1510 if (a <= u)
1511 b = MAX (a, d);
1512 x = PHI <b, u>
1513
1514 This is equivalent to
1515
1516 b = MAX (a, d);
1517 x = MIN (b, u); */
1518
1519 gimple *assign = last_and_only_stmt (middle_bb);
1520 tree lhs, op0, op1, bound;
1521
1522 if (!assign
1523 || gimple_code (assign) != GIMPLE_ASSIGN)
1524 return false;
1525
1526 lhs = gimple_assign_lhs (assign);
1527 ass_code = gimple_assign_rhs_code (assign);
1528 if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
1529 return false;
1530 op0 = gimple_assign_rhs1 (assign);
1531 op1 = gimple_assign_rhs2 (assign);
1532
1533 if (true_edge->src == middle_bb)
1534 {
1535 /* We got here if the condition is true, i.e., SMALLER < LARGER. */
1536 if (!operand_equal_for_phi_arg_p (lhs, arg_true))
1537 return false;
1538
1539 if (operand_equal_for_phi_arg_p (arg_false, larger)
1540 || (alt_larger
1541 && operand_equal_for_phi_arg_p (arg_false, alt_larger)))
1542 {
1543 /* Case
1544
1545 if (smaller < larger)
1546 {
1547 r' = MAX_EXPR (smaller, bound)
1548 }
1549 r = PHI <r', larger> --> to be turned to MIN_EXPR. */
1550 if (ass_code != MAX_EXPR)
1551 return false;
1552
1553 minmax = MIN_EXPR;
1554 if (operand_equal_for_phi_arg_p (op0, smaller)
1555 || (alt_smaller
1556 && operand_equal_for_phi_arg_p (op0, alt_smaller)))
1557 bound = op1;
1558 else if (operand_equal_for_phi_arg_p (op1, smaller)
1559 || (alt_smaller
1560 && operand_equal_for_phi_arg_p (op1, alt_smaller)))
1561 bound = op0;
1562 else
1563 return false;
1564
1565 /* We need BOUND <= LARGER. */
1566 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
1567 bound, larger)))
1568 return false;
1569 }
1570 else if (operand_equal_for_phi_arg_p (arg_false, smaller)
1571 || (alt_smaller
1572 && operand_equal_for_phi_arg_p (arg_false, alt_smaller)))
1573 {
1574 /* Case
1575
1576 if (smaller < larger)
1577 {
1578 r' = MIN_EXPR (larger, bound)
1579 }
1580 r = PHI <r', smaller> --> to be turned to MAX_EXPR. */
1581 if (ass_code != MIN_EXPR)
1582 return false;
1583
1584 minmax = MAX_EXPR;
1585 if (operand_equal_for_phi_arg_p (op0, larger)
1586 || (alt_larger
1587 && operand_equal_for_phi_arg_p (op0, alt_larger)))
1588 bound = op1;
1589 else if (operand_equal_for_phi_arg_p (op1, larger)
1590 || (alt_larger
1591 && operand_equal_for_phi_arg_p (op1, alt_larger)))
1592 bound = op0;
1593 else
1594 return false;
1595
1596 /* We need BOUND >= SMALLER. */
1597 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
1598 bound, smaller)))
1599 return false;
1600 }
1601 else
1602 return false;
1603 }
1604 else
1605 {
1606 /* We got here if the condition is false, i.e., SMALLER > LARGER. */
1607 if (!operand_equal_for_phi_arg_p (lhs, arg_false))
1608 return false;
1609
1610 if (operand_equal_for_phi_arg_p (arg_true, larger)
1611 || (alt_larger
1612 && operand_equal_for_phi_arg_p (arg_true, alt_larger)))
1613 {
1614 /* Case
1615
1616 if (smaller > larger)
1617 {
1618 r' = MIN_EXPR (smaller, bound)
1619 }
1620 r = PHI <r', larger> --> to be turned to MAX_EXPR. */
1621 if (ass_code != MIN_EXPR)
1622 return false;
1623
1624 minmax = MAX_EXPR;
1625 if (operand_equal_for_phi_arg_p (op0, smaller)
1626 || (alt_smaller
1627 && operand_equal_for_phi_arg_p (op0, alt_smaller)))
1628 bound = op1;
1629 else if (operand_equal_for_phi_arg_p (op1, smaller)
1630 || (alt_smaller
1631 && operand_equal_for_phi_arg_p (op1, alt_smaller)))
1632 bound = op0;
1633 else
1634 return false;
1635
1636 /* We need BOUND >= LARGER. */
1637 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
1638 bound, larger)))
1639 return false;
1640 }
1641 else if (operand_equal_for_phi_arg_p (arg_true, smaller)
1642 || (alt_smaller
1643 && operand_equal_for_phi_arg_p (arg_true, alt_smaller)))
1644 {
1645 /* Case
1646
1647 if (smaller > larger)
1648 {
1649 r' = MAX_EXPR (larger, bound)
1650 }
1651 r = PHI <r', smaller> --> to be turned to MIN_EXPR. */
1652 if (ass_code != MAX_EXPR)
1653 return false;
1654
1655 minmax = MIN_EXPR;
1656 if (operand_equal_for_phi_arg_p (op0, larger))
1657 bound = op1;
1658 else if (operand_equal_for_phi_arg_p (op1, larger))
1659 bound = op0;
1660 else
1661 return false;
1662
1663 /* We need BOUND <= SMALLER. */
1664 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
1665 bound, smaller)))
1666 return false;
1667 }
1668 else
1669 return false;
1670 }
1671
1672 /* Move the statement from the middle block. */
1673 gsi = gsi_last_bb (cond_bb);
1674 gsi_from = gsi_last_nondebug_bb (middle_bb);
1675 reset_flow_sensitive_info (SINGLE_SSA_TREE_OPERAND (gsi_stmt (gsi_from),
1676 SSA_OP_DEF));
1677 gsi_move_before (&gsi_from, &gsi);
1678 }
1679
1680 /* Create an SSA var to hold the min/max result. If we're the only
1681 things setting the target PHI, then we can clone the PHI
1682 variable. Otherwise we must create a new one. */
1683 result = PHI_RESULT (phi);
1684 if (EDGE_COUNT (gimple_bb (phi)->preds) == 2)
1685 result = duplicate_ssa_name (result, NULL);
1686 else
1687 result = make_ssa_name (TREE_TYPE (result));
1688
1689 /* Emit the statement to compute min/max. */
1690 new_stmt = gimple_build_assign (result, minmax, arg0, arg1);
1691 gsi = gsi_last_bb (cond_bb);
1692 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1693
1694 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
1695
1696 return true;
1697 }
1698
1699 /* Convert
1700
1701 <bb 2>
1702 if (b_4(D) != 0)
1703 goto <bb 3>
1704 else
1705 goto <bb 4>
1706
1707 <bb 3>
1708 _2 = (unsigned long) b_4(D);
1709 _9 = __builtin_popcountl (_2);
1710 OR
1711 _9 = __builtin_popcountl (b_4(D));
1712
1713 <bb 4>
1714 c_12 = PHI <0(2), _9(3)>
1715
1716 Into
1717 <bb 2>
1718 _2 = (unsigned long) b_4(D);
1719 _9 = __builtin_popcountl (_2);
1720 OR
1721 _9 = __builtin_popcountl (b_4(D));
1722
1723 <bb 4>
1724 c_12 = PHI <_9(2)>
1725 */
1726
1727 static bool
cond_removal_in_popcount_pattern(basic_block cond_bb,basic_block middle_bb,edge e1,edge e2,gimple * phi,tree arg0,tree arg1)1728 cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
1729 edge e1, edge e2,
1730 gimple *phi, tree arg0, tree arg1)
1731 {
1732 gimple *cond;
1733 gimple_stmt_iterator gsi, gsi_from;
1734 gimple *popcount;
1735 gimple *cast = NULL;
1736 tree lhs, arg;
1737
1738 /* Check that
1739 _2 = (unsigned long) b_4(D);
1740 _9 = __builtin_popcountl (_2);
1741 OR
1742 _9 = __builtin_popcountl (b_4(D));
1743 are the only stmts in the middle_bb. */
1744
1745 gsi = gsi_start_nondebug_after_labels_bb (middle_bb);
1746 if (gsi_end_p (gsi))
1747 return false;
1748 cast = gsi_stmt (gsi);
1749 gsi_next_nondebug (&gsi);
1750 if (!gsi_end_p (gsi))
1751 {
1752 popcount = gsi_stmt (gsi);
1753 gsi_next_nondebug (&gsi);
1754 if (!gsi_end_p (gsi))
1755 return false;
1756 }
1757 else
1758 {
1759 popcount = cast;
1760 cast = NULL;
1761 }
1762
1763 /* Check that we have a popcount builtin. */
1764 if (!is_gimple_call (popcount))
1765 return false;
1766 combined_fn cfn = gimple_call_combined_fn (popcount);
1767 switch (cfn)
1768 {
1769 CASE_CFN_POPCOUNT:
1770 break;
1771 default:
1772 return false;
1773 }
1774
1775 arg = gimple_call_arg (popcount, 0);
1776 lhs = gimple_get_lhs (popcount);
1777
1778 if (cast)
1779 {
1780 /* We have a cast stmt feeding popcount builtin. */
1781 /* Check that we have a cast prior to that. */
1782 if (gimple_code (cast) != GIMPLE_ASSIGN
1783 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast)))
1784 return false;
1785 /* Result of the cast stmt is the argument to the builtin. */
1786 if (arg != gimple_assign_lhs (cast))
1787 return false;
1788 arg = gimple_assign_rhs1 (cast);
1789 }
1790
1791 cond = last_stmt (cond_bb);
1792
1793 /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount
1794 builtin. */
1795 if (gimple_code (cond) != GIMPLE_COND
1796 || (gimple_cond_code (cond) != NE_EXPR
1797 && gimple_cond_code (cond) != EQ_EXPR)
1798 || !integer_zerop (gimple_cond_rhs (cond))
1799 || arg != gimple_cond_lhs (cond))
1800 return false;
1801
1802 /* Canonicalize. */
1803 if ((e2->flags & EDGE_TRUE_VALUE
1804 && gimple_cond_code (cond) == NE_EXPR)
1805 || (e1->flags & EDGE_TRUE_VALUE
1806 && gimple_cond_code (cond) == EQ_EXPR))
1807 {
1808 std::swap (arg0, arg1);
1809 std::swap (e1, e2);
1810 }
1811
1812 /* Check PHI arguments. */
1813 if (lhs != arg0 || !integer_zerop (arg1))
1814 return false;
1815
1816 /* And insert the popcount builtin and cast stmt before the cond_bb. */
1817 gsi = gsi_last_bb (cond_bb);
1818 if (cast)
1819 {
1820 gsi_from = gsi_for_stmt (cast);
1821 gsi_move_before (&gsi_from, &gsi);
1822 reset_flow_sensitive_info (gimple_get_lhs (cast));
1823 }
1824 gsi_from = gsi_for_stmt (popcount);
1825 gsi_move_before (&gsi_from, &gsi);
1826 reset_flow_sensitive_info (gimple_get_lhs (popcount));
1827
1828 /* Now update the PHI and remove unneeded bbs. */
1829 replace_phi_edge_with_variable (cond_bb, e2, phi, lhs);
1830 return true;
1831 }
1832
1833 /* The function absolute_replacement does the main work of doing the absolute
1834 replacement. Return true if the replacement is done. Otherwise return
1835 false.
1836 bb is the basic block where the replacement is going to be done on. arg0
1837 is argument 0 from the phi. Likewise for arg1. */
1838
1839 static bool
abs_replacement(basic_block cond_bb,basic_block middle_bb,edge e0 ATTRIBUTE_UNUSED,edge e1,gimple * phi,tree arg0,tree arg1)1840 abs_replacement (basic_block cond_bb, basic_block middle_bb,
1841 edge e0 ATTRIBUTE_UNUSED, edge e1,
1842 gimple *phi, tree arg0, tree arg1)
1843 {
1844 tree result;
1845 gassign *new_stmt;
1846 gimple *cond;
1847 gimple_stmt_iterator gsi;
1848 edge true_edge, false_edge;
1849 gimple *assign;
1850 edge e;
1851 tree rhs, lhs;
1852 bool negate;
1853 enum tree_code cond_code;
1854
1855 /* If the type says honor signed zeros we cannot do this
1856 optimization. */
1857 if (HONOR_SIGNED_ZEROS (arg1))
1858 return false;
1859
1860 /* OTHER_BLOCK must have only one executable statement which must have the
1861 form arg0 = -arg1 or arg1 = -arg0. */
1862
1863 assign = last_and_only_stmt (middle_bb);
1864 /* If we did not find the proper negation assignment, then we cannot
1865 optimize. */
1866 if (assign == NULL)
1867 return false;
1868
1869 /* If we got here, then we have found the only executable statement
1870 in OTHER_BLOCK. If it is anything other than arg = -arg1 or
1871 arg1 = -arg0, then we cannot optimize. */
1872 if (gimple_code (assign) != GIMPLE_ASSIGN)
1873 return false;
1874
1875 lhs = gimple_assign_lhs (assign);
1876
1877 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
1878 return false;
1879
1880 rhs = gimple_assign_rhs1 (assign);
1881
1882 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
1883 if (!(lhs == arg0 && rhs == arg1)
1884 && !(lhs == arg1 && rhs == arg0))
1885 return false;
1886
1887 cond = last_stmt (cond_bb);
1888 result = PHI_RESULT (phi);
1889
1890 /* Only relationals comparing arg[01] against zero are interesting. */
1891 cond_code = gimple_cond_code (cond);
1892 if (cond_code != GT_EXPR && cond_code != GE_EXPR
1893 && cond_code != LT_EXPR && cond_code != LE_EXPR)
1894 return false;
1895
1896 /* Make sure the conditional is arg[01] OP y. */
1897 if (gimple_cond_lhs (cond) != rhs)
1898 return false;
1899
1900 if (FLOAT_TYPE_P (TREE_TYPE (gimple_cond_rhs (cond)))
1901 ? real_zerop (gimple_cond_rhs (cond))
1902 : integer_zerop (gimple_cond_rhs (cond)))
1903 ;
1904 else
1905 return false;
1906
1907 /* We need to know which is the true edge and which is the false
1908 edge so that we know if have abs or negative abs. */
1909 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1910
1911 /* For GT_EXPR/GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
1912 will need to negate the result. Similarly for LT_EXPR/LE_EXPR if
1913 the false edge goes to OTHER_BLOCK. */
1914 if (cond_code == GT_EXPR || cond_code == GE_EXPR)
1915 e = true_edge;
1916 else
1917 e = false_edge;
1918
1919 if (e->dest == middle_bb)
1920 negate = true;
1921 else
1922 negate = false;
1923
1924 /* If the code negates only iff positive then make sure to not
1925 introduce undefined behavior when negating or computing the absolute.
1926 ??? We could use range info if present to check for arg1 == INT_MIN. */
1927 if (negate
1928 && (ANY_INTEGRAL_TYPE_P (TREE_TYPE (arg1))
1929 && ! TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg1))))
1930 return false;
1931
1932 result = duplicate_ssa_name (result, NULL);
1933
1934 if (negate)
1935 lhs = make_ssa_name (TREE_TYPE (result));
1936 else
1937 lhs = result;
1938
1939 /* Build the modify expression with abs expression. */
1940 new_stmt = gimple_build_assign (lhs, ABS_EXPR, rhs);
1941
1942 gsi = gsi_last_bb (cond_bb);
1943 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1944
1945 if (negate)
1946 {
1947 /* Get the right GSI. We want to insert after the recently
1948 added ABS_EXPR statement (which we know is the first statement
1949 in the block. */
1950 new_stmt = gimple_build_assign (result, NEGATE_EXPR, lhs);
1951
1952 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1953 }
1954
1955 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
1956
1957 /* Note that we optimized this PHI. */
1958 return true;
1959 }
1960
1961 /* Auxiliary functions to determine the set of memory accesses which
1962 can't trap because they are preceded by accesses to the same memory
1963 portion. We do that for MEM_REFs, so we only need to track
1964 the SSA_NAME of the pointer indirectly referenced. The algorithm
1965 simply is a walk over all instructions in dominator order. When
1966 we see an MEM_REF we determine if we've already seen a same
1967 ref anywhere up to the root of the dominator tree. If we do the
1968 current access can't trap. If we don't see any dominating access
1969 the current access might trap, but might also make later accesses
1970 non-trapping, so we remember it. We need to be careful with loads
1971 or stores, for instance a load might not trap, while a store would,
1972 so if we see a dominating read access this doesn't mean that a later
1973 write access would not trap. Hence we also need to differentiate the
1974 type of access(es) seen.
1975
1976 ??? We currently are very conservative and assume that a load might
1977 trap even if a store doesn't (write-only memory). This probably is
1978 overly conservative. */
1979
1980 /* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
1981 through it was seen, which would constitute a no-trap region for
1982 same accesses. */
1983 struct name_to_bb
1984 {
1985 unsigned int ssa_name_ver;
1986 unsigned int phase;
1987 bool store;
1988 HOST_WIDE_INT offset, size;
1989 basic_block bb;
1990 };
1991
1992 /* Hashtable helpers. */
1993
1994 struct ssa_names_hasher : free_ptr_hash <name_to_bb>
1995 {
1996 static inline hashval_t hash (const name_to_bb *);
1997 static inline bool equal (const name_to_bb *, const name_to_bb *);
1998 };
1999
2000 /* Used for quick clearing of the hash-table when we see calls.
2001 Hash entries with phase < nt_call_phase are invalid. */
2002 static unsigned int nt_call_phase;
2003
2004 /* The hash function. */
2005
2006 inline hashval_t
hash(const name_to_bb * n)2007 ssa_names_hasher::hash (const name_to_bb *n)
2008 {
2009 return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
2010 ^ (n->offset << 6) ^ (n->size << 3);
2011 }
2012
2013 /* The equality function of *P1 and *P2. */
2014
2015 inline bool
equal(const name_to_bb * n1,const name_to_bb * n2)2016 ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
2017 {
2018 return n1->ssa_name_ver == n2->ssa_name_ver
2019 && n1->store == n2->store
2020 && n1->offset == n2->offset
2021 && n1->size == n2->size;
2022 }
2023
2024 class nontrapping_dom_walker : public dom_walker
2025 {
2026 public:
nontrapping_dom_walker(cdi_direction direction,hash_set<tree> * ps)2027 nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
2028 : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
2029
2030 virtual edge before_dom_children (basic_block);
2031 virtual void after_dom_children (basic_block);
2032
2033 private:
2034
2035 /* We see the expression EXP in basic block BB. If it's an interesting
2036 expression (an MEM_REF through an SSA_NAME) possibly insert the
2037 expression into the set NONTRAP or the hash table of seen expressions.
2038 STORE is true if this expression is on the LHS, otherwise it's on
2039 the RHS. */
2040 void add_or_mark_expr (basic_block, tree, bool);
2041
2042 hash_set<tree> *m_nontrapping;
2043
2044 /* The hash table for remembering what we've seen. */
2045 hash_table<ssa_names_hasher> m_seen_ssa_names;
2046 };
2047
2048 /* Called by walk_dominator_tree, when entering the block BB. */
2049 edge
before_dom_children(basic_block bb)2050 nontrapping_dom_walker::before_dom_children (basic_block bb)
2051 {
2052 edge e;
2053 edge_iterator ei;
2054 gimple_stmt_iterator gsi;
2055
2056 /* If we haven't seen all our predecessors, clear the hash-table. */
2057 FOR_EACH_EDGE (e, ei, bb->preds)
2058 if ((((size_t)e->src->aux) & 2) == 0)
2059 {
2060 nt_call_phase++;
2061 break;
2062 }
2063
2064 /* Mark this BB as being on the path to dominator root and as visited. */
2065 bb->aux = (void*)(1 | 2);
2066
2067 /* And walk the statements in order. */
2068 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2069 {
2070 gimple *stmt = gsi_stmt (gsi);
2071
2072 if ((gimple_code (stmt) == GIMPLE_ASM && gimple_vdef (stmt))
2073 || (is_gimple_call (stmt)
2074 && (!nonfreeing_call_p (stmt) || !nonbarrier_call_p (stmt))))
2075 nt_call_phase++;
2076 else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt))
2077 {
2078 add_or_mark_expr (bb, gimple_assign_lhs (stmt), true);
2079 add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), false);
2080 }
2081 }
2082 return NULL;
2083 }
2084
2085 /* Called by walk_dominator_tree, when basic block BB is exited. */
2086 void
after_dom_children(basic_block bb)2087 nontrapping_dom_walker::after_dom_children (basic_block bb)
2088 {
2089 /* This BB isn't on the path to dominator root anymore. */
2090 bb->aux = (void*)2;
2091 }
2092
2093 /* We see the expression EXP in basic block BB. If it's an interesting
2094 expression (an MEM_REF through an SSA_NAME) possibly insert the
2095 expression into the set NONTRAP or the hash table of seen expressions.
2096 STORE is true if this expression is on the LHS, otherwise it's on
2097 the RHS. */
2098 void
add_or_mark_expr(basic_block bb,tree exp,bool store)2099 nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)
2100 {
2101 HOST_WIDE_INT size;
2102
2103 if (TREE_CODE (exp) == MEM_REF
2104 && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
2105 && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
2106 && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
2107 {
2108 tree name = TREE_OPERAND (exp, 0);
2109 struct name_to_bb map;
2110 name_to_bb **slot;
2111 struct name_to_bb *n2bb;
2112 basic_block found_bb = 0;
2113
2114 /* Try to find the last seen MEM_REF through the same
2115 SSA_NAME, which can trap. */
2116 map.ssa_name_ver = SSA_NAME_VERSION (name);
2117 map.phase = 0;
2118 map.bb = 0;
2119 map.store = store;
2120 map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
2121 map.size = size;
2122
2123 slot = m_seen_ssa_names.find_slot (&map, INSERT);
2124 n2bb = *slot;
2125 if (n2bb && n2bb->phase >= nt_call_phase)
2126 found_bb = n2bb->bb;
2127
2128 /* If we've found a trapping MEM_REF, _and_ it dominates EXP
2129 (it's in a basic block on the path from us to the dominator root)
2130 then we can't trap. */
2131 if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
2132 {
2133 m_nontrapping->add (exp);
2134 }
2135 else
2136 {
2137 /* EXP might trap, so insert it into the hash table. */
2138 if (n2bb)
2139 {
2140 n2bb->phase = nt_call_phase;
2141 n2bb->bb = bb;
2142 }
2143 else
2144 {
2145 n2bb = XNEW (struct name_to_bb);
2146 n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
2147 n2bb->phase = nt_call_phase;
2148 n2bb->bb = bb;
2149 n2bb->store = store;
2150 n2bb->offset = map.offset;
2151 n2bb->size = size;
2152 *slot = n2bb;
2153 }
2154 }
2155 }
2156 }
2157
2158 /* This is the entry point of gathering non trapping memory accesses.
2159 It will do a dominator walk over the whole function, and it will
2160 make use of the bb->aux pointers. It returns a set of trees
2161 (the MEM_REFs itself) which can't trap. */
2162 static hash_set<tree> *
get_non_trapping(void)2163 get_non_trapping (void)
2164 {
2165 nt_call_phase = 0;
2166 hash_set<tree> *nontrap = new hash_set<tree>;
2167 /* We're going to do a dominator walk, so ensure that we have
2168 dominance information. */
2169 calculate_dominance_info (CDI_DOMINATORS);
2170
2171 nontrapping_dom_walker (CDI_DOMINATORS, nontrap)
2172 .walk (cfun->cfg->x_entry_block_ptr);
2173
2174 clear_aux_for_blocks ();
2175 return nontrap;
2176 }
2177
2178 /* Do the main work of conditional store replacement. We already know
2179 that the recognized pattern looks like so:
2180
2181 split:
2182 if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1)
2183 MIDDLE_BB:
2184 something
2185 fallthrough (edge E0)
2186 JOIN_BB:
2187 some more
2188
2189 We check that MIDDLE_BB contains only one store, that that store
2190 doesn't trap (not via NOTRAP, but via checking if an access to the same
2191 memory location dominates us) and that the store has a "simple" RHS. */
2192
2193 static bool
cond_store_replacement(basic_block middle_bb,basic_block join_bb,edge e0,edge e1,hash_set<tree> * nontrap)2194 cond_store_replacement (basic_block middle_bb, basic_block join_bb,
2195 edge e0, edge e1, hash_set<tree> *nontrap)
2196 {
2197 gimple *assign = last_and_only_stmt (middle_bb);
2198 tree lhs, rhs, name, name2;
2199 gphi *newphi;
2200 gassign *new_stmt;
2201 gimple_stmt_iterator gsi;
2202 location_t locus;
2203
2204 /* Check if middle_bb contains of only one store. */
2205 if (!assign
2206 || !gimple_assign_single_p (assign)
2207 || gimple_has_volatile_ops (assign))
2208 return false;
2209
2210 /* And no PHI nodes so all uses in the single stmt are also
2211 available where we insert to. */
2212 if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
2213 return false;
2214
2215 locus = gimple_location (assign);
2216 lhs = gimple_assign_lhs (assign);
2217 rhs = gimple_assign_rhs1 (assign);
2218 if (TREE_CODE (lhs) != MEM_REF
2219 || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME
2220 || !is_gimple_reg_type (TREE_TYPE (lhs)))
2221 return false;
2222
2223 /* Prove that we can move the store down. We could also check
2224 TREE_THIS_NOTRAP here, but in that case we also could move stores,
2225 whose value is not available readily, which we want to avoid. */
2226 if (!nontrap->contains (lhs))
2227 return false;
2228
2229 /* Now we've checked the constraints, so do the transformation:
2230 1) Remove the single store. */
2231 gsi = gsi_for_stmt (assign);
2232 unlink_stmt_vdef (assign);
2233 gsi_remove (&gsi, true);
2234 release_defs (assign);
2235
2236 /* Make both store and load use alias-set zero as we have to
2237 deal with the case of the store being a conditional change
2238 of the dynamic type. */
2239 lhs = unshare_expr (lhs);
2240 tree *basep = &lhs;
2241 while (handled_component_p (*basep))
2242 basep = &TREE_OPERAND (*basep, 0);
2243 if (TREE_CODE (*basep) == MEM_REF
2244 || TREE_CODE (*basep) == TARGET_MEM_REF)
2245 TREE_OPERAND (*basep, 1)
2246 = fold_convert (ptr_type_node, TREE_OPERAND (*basep, 1));
2247 else
2248 *basep = build2 (MEM_REF, TREE_TYPE (*basep),
2249 build_fold_addr_expr (*basep),
2250 build_zero_cst (ptr_type_node));
2251
2252 /* 2) Insert a load from the memory of the store to the temporary
2253 on the edge which did not contain the store. */
2254 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
2255 new_stmt = gimple_build_assign (name, lhs);
2256 gimple_set_location (new_stmt, locus);
2257 gsi_insert_on_edge (e1, new_stmt);
2258
2259 /* 3) Create a PHI node at the join block, with one argument
2260 holding the old RHS, and the other holding the temporary
2261 where we stored the old memory contents. */
2262 name2 = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
2263 newphi = create_phi_node (name2, join_bb);
2264 add_phi_arg (newphi, rhs, e0, locus);
2265 add_phi_arg (newphi, name, e1, locus);
2266
2267 lhs = unshare_expr (lhs);
2268 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
2269
2270 /* 4) Insert that PHI node. */
2271 gsi = gsi_after_labels (join_bb);
2272 if (gsi_end_p (gsi))
2273 {
2274 gsi = gsi_last_bb (join_bb);
2275 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
2276 }
2277 else
2278 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
2279
2280 return true;
2281 }
2282
2283 /* Do the main work of conditional store replacement. */
2284
2285 static bool
cond_if_else_store_replacement_1(basic_block then_bb,basic_block else_bb,basic_block join_bb,gimple * then_assign,gimple * else_assign)2286 cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
2287 basic_block join_bb, gimple *then_assign,
2288 gimple *else_assign)
2289 {
2290 tree lhs_base, lhs, then_rhs, else_rhs, name;
2291 location_t then_locus, else_locus;
2292 gimple_stmt_iterator gsi;
2293 gphi *newphi;
2294 gassign *new_stmt;
2295
2296 if (then_assign == NULL
2297 || !gimple_assign_single_p (then_assign)
2298 || gimple_clobber_p (then_assign)
2299 || gimple_has_volatile_ops (then_assign)
2300 || else_assign == NULL
2301 || !gimple_assign_single_p (else_assign)
2302 || gimple_clobber_p (else_assign)
2303 || gimple_has_volatile_ops (else_assign))
2304 return false;
2305
2306 lhs = gimple_assign_lhs (then_assign);
2307 if (!is_gimple_reg_type (TREE_TYPE (lhs))
2308 || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0))
2309 return false;
2310
2311 lhs_base = get_base_address (lhs);
2312 if (lhs_base == NULL_TREE
2313 || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF))
2314 return false;
2315
2316 then_rhs = gimple_assign_rhs1 (then_assign);
2317 else_rhs = gimple_assign_rhs1 (else_assign);
2318 then_locus = gimple_location (then_assign);
2319 else_locus = gimple_location (else_assign);
2320
2321 /* Now we've checked the constraints, so do the transformation:
2322 1) Remove the stores. */
2323 gsi = gsi_for_stmt (then_assign);
2324 unlink_stmt_vdef (then_assign);
2325 gsi_remove (&gsi, true);
2326 release_defs (then_assign);
2327
2328 gsi = gsi_for_stmt (else_assign);
2329 unlink_stmt_vdef (else_assign);
2330 gsi_remove (&gsi, true);
2331 release_defs (else_assign);
2332
2333 /* 2) Create a PHI node at the join block, with one argument
2334 holding the old RHS, and the other holding the temporary
2335 where we stored the old memory contents. */
2336 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
2337 newphi = create_phi_node (name, join_bb);
2338 add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus);
2339 add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus);
2340
2341 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
2342
2343 /* 3) Insert that PHI node. */
2344 gsi = gsi_after_labels (join_bb);
2345 if (gsi_end_p (gsi))
2346 {
2347 gsi = gsi_last_bb (join_bb);
2348 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
2349 }
2350 else
2351 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
2352
2353 return true;
2354 }
2355
2356 /* Return the single store in BB with VDEF or NULL if there are
2357 other stores in the BB or loads following the store. */
2358
2359 static gimple *
single_trailing_store_in_bb(basic_block bb,tree vdef)2360 single_trailing_store_in_bb (basic_block bb, tree vdef)
2361 {
2362 if (SSA_NAME_IS_DEFAULT_DEF (vdef))
2363 return NULL;
2364 gimple *store = SSA_NAME_DEF_STMT (vdef);
2365 if (gimple_bb (store) != bb
2366 || gimple_code (store) == GIMPLE_PHI)
2367 return NULL;
2368
2369 /* Verify there is no other store in this BB. */
2370 if (!SSA_NAME_IS_DEFAULT_DEF (gimple_vuse (store))
2371 && gimple_bb (SSA_NAME_DEF_STMT (gimple_vuse (store))) == bb
2372 && gimple_code (SSA_NAME_DEF_STMT (gimple_vuse (store))) != GIMPLE_PHI)
2373 return NULL;
2374
2375 /* Verify there is no load or store after the store. */
2376 use_operand_p use_p;
2377 imm_use_iterator imm_iter;
2378 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vdef (store))
2379 if (USE_STMT (use_p) != store
2380 && gimple_bb (USE_STMT (use_p)) == bb)
2381 return NULL;
2382
2383 return store;
2384 }
2385
2386 /* Conditional store replacement. We already know
2387 that the recognized pattern looks like so:
2388
2389 split:
2390 if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
2391 THEN_BB:
2392 ...
2393 X = Y;
2394 ...
2395 goto JOIN_BB;
2396 ELSE_BB:
2397 ...
2398 X = Z;
2399 ...
2400 fallthrough (edge E0)
2401 JOIN_BB:
2402 some more
2403
2404 We check that it is safe to sink the store to JOIN_BB by verifying that
2405 there are no read-after-write or write-after-write dependencies in
2406 THEN_BB and ELSE_BB. */
2407
2408 static bool
cond_if_else_store_replacement(basic_block then_bb,basic_block else_bb,basic_block join_bb)2409 cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
2410 basic_block join_bb)
2411 {
2412 vec<data_reference_p> then_datarefs, else_datarefs;
2413 vec<ddr_p> then_ddrs, else_ddrs;
2414 gimple *then_store, *else_store;
2415 bool found, ok = false, res;
2416 struct data_dependence_relation *ddr;
2417 data_reference_p then_dr, else_dr;
2418 int i, j;
2419 tree then_lhs, else_lhs;
2420 basic_block blocks[3];
2421
2422 /* Handle the case with single store in THEN_BB and ELSE_BB. That is
2423 cheap enough to always handle as it allows us to elide dependence
2424 checking. */
2425 gphi *vphi = NULL;
2426 for (gphi_iterator si = gsi_start_phis (join_bb); !gsi_end_p (si);
2427 gsi_next (&si))
2428 if (virtual_operand_p (gimple_phi_result (si.phi ())))
2429 {
2430 vphi = si.phi ();
2431 break;
2432 }
2433 if (!vphi)
2434 return false;
2435 tree then_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (then_bb));
2436 tree else_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (else_bb));
2437 gimple *then_assign = single_trailing_store_in_bb (then_bb, then_vdef);
2438 if (then_assign)
2439 {
2440 gimple *else_assign = single_trailing_store_in_bb (else_bb, else_vdef);
2441 if (else_assign)
2442 return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
2443 then_assign, else_assign);
2444 }
2445
2446 if (MAX_STORES_TO_SINK == 0)
2447 return false;
2448
2449 /* Find data references. */
2450 then_datarefs.create (1);
2451 else_datarefs.create (1);
2452 if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs)
2453 == chrec_dont_know)
2454 || !then_datarefs.length ()
2455 || (find_data_references_in_bb (NULL, else_bb, &else_datarefs)
2456 == chrec_dont_know)
2457 || !else_datarefs.length ())
2458 {
2459 free_data_refs (then_datarefs);
2460 free_data_refs (else_datarefs);
2461 return false;
2462 }
2463
2464 /* Find pairs of stores with equal LHS. */
2465 auto_vec<gimple *, 1> then_stores, else_stores;
2466 FOR_EACH_VEC_ELT (then_datarefs, i, then_dr)
2467 {
2468 if (DR_IS_READ (then_dr))
2469 continue;
2470
2471 then_store = DR_STMT (then_dr);
2472 then_lhs = gimple_get_lhs (then_store);
2473 if (then_lhs == NULL_TREE)
2474 continue;
2475 found = false;
2476
2477 FOR_EACH_VEC_ELT (else_datarefs, j, else_dr)
2478 {
2479 if (DR_IS_READ (else_dr))
2480 continue;
2481
2482 else_store = DR_STMT (else_dr);
2483 else_lhs = gimple_get_lhs (else_store);
2484 if (else_lhs == NULL_TREE)
2485 continue;
2486
2487 if (operand_equal_p (then_lhs, else_lhs, 0))
2488 {
2489 found = true;
2490 break;
2491 }
2492 }
2493
2494 if (!found)
2495 continue;
2496
2497 then_stores.safe_push (then_store);
2498 else_stores.safe_push (else_store);
2499 }
2500
2501 /* No pairs of stores found. */
2502 if (!then_stores.length ()
2503 || then_stores.length () > (unsigned) MAX_STORES_TO_SINK)
2504 {
2505 free_data_refs (then_datarefs);
2506 free_data_refs (else_datarefs);
2507 return false;
2508 }
2509
2510 /* Compute and check data dependencies in both basic blocks. */
2511 then_ddrs.create (1);
2512 else_ddrs.create (1);
2513 if (!compute_all_dependences (then_datarefs, &then_ddrs,
2514 vNULL, false)
2515 || !compute_all_dependences (else_datarefs, &else_ddrs,
2516 vNULL, false))
2517 {
2518 free_dependence_relations (then_ddrs);
2519 free_dependence_relations (else_ddrs);
2520 free_data_refs (then_datarefs);
2521 free_data_refs (else_datarefs);
2522 return false;
2523 }
2524 blocks[0] = then_bb;
2525 blocks[1] = else_bb;
2526 blocks[2] = join_bb;
2527 renumber_gimple_stmt_uids_in_blocks (blocks, 3);
2528
2529 /* Check that there are no read-after-write or write-after-write dependencies
2530 in THEN_BB. */
2531 FOR_EACH_VEC_ELT (then_ddrs, i, ddr)
2532 {
2533 struct data_reference *dra = DDR_A (ddr);
2534 struct data_reference *drb = DDR_B (ddr);
2535
2536 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
2537 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
2538 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
2539 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
2540 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
2541 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
2542 {
2543 free_dependence_relations (then_ddrs);
2544 free_dependence_relations (else_ddrs);
2545 free_data_refs (then_datarefs);
2546 free_data_refs (else_datarefs);
2547 return false;
2548 }
2549 }
2550
2551 /* Check that there are no read-after-write or write-after-write dependencies
2552 in ELSE_BB. */
2553 FOR_EACH_VEC_ELT (else_ddrs, i, ddr)
2554 {
2555 struct data_reference *dra = DDR_A (ddr);
2556 struct data_reference *drb = DDR_B (ddr);
2557
2558 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
2559 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
2560 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
2561 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
2562 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
2563 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
2564 {
2565 free_dependence_relations (then_ddrs);
2566 free_dependence_relations (else_ddrs);
2567 free_data_refs (then_datarefs);
2568 free_data_refs (else_datarefs);
2569 return false;
2570 }
2571 }
2572
2573 /* Sink stores with same LHS. */
2574 FOR_EACH_VEC_ELT (then_stores, i, then_store)
2575 {
2576 else_store = else_stores[i];
2577 res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
2578 then_store, else_store);
2579 ok = ok || res;
2580 }
2581
2582 free_dependence_relations (then_ddrs);
2583 free_dependence_relations (else_ddrs);
2584 free_data_refs (then_datarefs);
2585 free_data_refs (else_datarefs);
2586
2587 return ok;
2588 }
2589
2590 /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */
2591
2592 static bool
local_mem_dependence(gimple * stmt,basic_block bb)2593 local_mem_dependence (gimple *stmt, basic_block bb)
2594 {
2595 tree vuse = gimple_vuse (stmt);
2596 gimple *def;
2597
2598 if (!vuse)
2599 return false;
2600
2601 def = SSA_NAME_DEF_STMT (vuse);
2602 return (def && gimple_bb (def) == bb);
2603 }
2604
2605 /* Given a "diamond" control-flow pattern where BB0 tests a condition,
2606 BB1 and BB2 are "then" and "else" blocks dependent on this test,
2607 and BB3 rejoins control flow following BB1 and BB2, look for
2608 opportunities to hoist loads as follows. If BB3 contains a PHI of
2609 two loads, one each occurring in BB1 and BB2, and the loads are
2610 provably of adjacent fields in the same structure, then move both
2611 loads into BB0. Of course this can only be done if there are no
2612 dependencies preventing such motion.
2613
2614 One of the hoisted loads will always be speculative, so the
2615 transformation is currently conservative:
2616
2617 - The fields must be strictly adjacent.
2618 - The two fields must occupy a single memory block that is
2619 guaranteed to not cross a page boundary.
2620
2621 The last is difficult to prove, as such memory blocks should be
2622 aligned on the minimum of the stack alignment boundary and the
2623 alignment guaranteed by heap allocation interfaces. Thus we rely
2624 on a parameter for the alignment value.
2625
2626 Provided a good value is used for the last case, the first
2627 restriction could possibly be relaxed. */
2628
2629 static void
hoist_adjacent_loads(basic_block bb0,basic_block bb1,basic_block bb2,basic_block bb3)2630 hoist_adjacent_loads (basic_block bb0, basic_block bb1,
2631 basic_block bb2, basic_block bb3)
2632 {
2633 int param_align = PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE);
2634 unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT);
2635 gphi_iterator gsi;
2636
2637 /* Walk the phis in bb3 looking for an opportunity. We are looking
2638 for phis of two SSA names, one each of which is defined in bb1 and
2639 bb2. */
2640 for (gsi = gsi_start_phis (bb3); !gsi_end_p (gsi); gsi_next (&gsi))
2641 {
2642 gphi *phi_stmt = gsi.phi ();
2643 gimple *def1, *def2;
2644 tree arg1, arg2, ref1, ref2, field1, field2;
2645 tree tree_offset1, tree_offset2, tree_size2, next;
2646 int offset1, offset2, size2;
2647 unsigned align1;
2648 gimple_stmt_iterator gsi2;
2649 basic_block bb_for_def1, bb_for_def2;
2650
2651 if (gimple_phi_num_args (phi_stmt) != 2
2652 || virtual_operand_p (gimple_phi_result (phi_stmt)))
2653 continue;
2654
2655 arg1 = gimple_phi_arg_def (phi_stmt, 0);
2656 arg2 = gimple_phi_arg_def (phi_stmt, 1);
2657
2658 if (TREE_CODE (arg1) != SSA_NAME
2659 || TREE_CODE (arg2) != SSA_NAME
2660 || SSA_NAME_IS_DEFAULT_DEF (arg1)
2661 || SSA_NAME_IS_DEFAULT_DEF (arg2))
2662 continue;
2663
2664 def1 = SSA_NAME_DEF_STMT (arg1);
2665 def2 = SSA_NAME_DEF_STMT (arg2);
2666
2667 if ((gimple_bb (def1) != bb1 || gimple_bb (def2) != bb2)
2668 && (gimple_bb (def2) != bb1 || gimple_bb (def1) != bb2))
2669 continue;
2670
2671 /* Check the mode of the arguments to be sure a conditional move
2672 can be generated for it. */
2673 if (optab_handler (movcc_optab, TYPE_MODE (TREE_TYPE (arg1)))
2674 == CODE_FOR_nothing)
2675 continue;
2676
2677 /* Both statements must be assignments whose RHS is a COMPONENT_REF. */
2678 if (!gimple_assign_single_p (def1)
2679 || !gimple_assign_single_p (def2)
2680 || gimple_has_volatile_ops (def1)
2681 || gimple_has_volatile_ops (def2))
2682 continue;
2683
2684 ref1 = gimple_assign_rhs1 (def1);
2685 ref2 = gimple_assign_rhs1 (def2);
2686
2687 if (TREE_CODE (ref1) != COMPONENT_REF
2688 || TREE_CODE (ref2) != COMPONENT_REF)
2689 continue;
2690
2691 /* The zeroth operand of the two component references must be
2692 identical. It is not sufficient to compare get_base_address of
2693 the two references, because this could allow for different
2694 elements of the same array in the two trees. It is not safe to
2695 assume that the existence of one array element implies the
2696 existence of a different one. */
2697 if (!operand_equal_p (TREE_OPERAND (ref1, 0), TREE_OPERAND (ref2, 0), 0))
2698 continue;
2699
2700 field1 = TREE_OPERAND (ref1, 1);
2701 field2 = TREE_OPERAND (ref2, 1);
2702
2703 /* Check for field adjacency, and ensure field1 comes first. */
2704 for (next = DECL_CHAIN (field1);
2705 next && TREE_CODE (next) != FIELD_DECL;
2706 next = DECL_CHAIN (next))
2707 ;
2708
2709 if (next != field2)
2710 {
2711 for (next = DECL_CHAIN (field2);
2712 next && TREE_CODE (next) != FIELD_DECL;
2713 next = DECL_CHAIN (next))
2714 ;
2715
2716 if (next != field1)
2717 continue;
2718
2719 std::swap (field1, field2);
2720 std::swap (def1, def2);
2721 }
2722
2723 bb_for_def1 = gimple_bb (def1);
2724 bb_for_def2 = gimple_bb (def2);
2725
2726 /* Check for proper alignment of the first field. */
2727 tree_offset1 = bit_position (field1);
2728 tree_offset2 = bit_position (field2);
2729 tree_size2 = DECL_SIZE (field2);
2730
2731 if (!tree_fits_uhwi_p (tree_offset1)
2732 || !tree_fits_uhwi_p (tree_offset2)
2733 || !tree_fits_uhwi_p (tree_size2))
2734 continue;
2735
2736 offset1 = tree_to_uhwi (tree_offset1);
2737 offset2 = tree_to_uhwi (tree_offset2);
2738 size2 = tree_to_uhwi (tree_size2);
2739 align1 = DECL_ALIGN (field1) % param_align_bits;
2740
2741 if (offset1 % BITS_PER_UNIT != 0)
2742 continue;
2743
2744 /* For profitability, the two field references should fit within
2745 a single cache line. */
2746 if (align1 + offset2 - offset1 + size2 > param_align_bits)
2747 continue;
2748
2749 /* The two expressions cannot be dependent upon vdefs defined
2750 in bb1/bb2. */
2751 if (local_mem_dependence (def1, bb_for_def1)
2752 || local_mem_dependence (def2, bb_for_def2))
2753 continue;
2754
2755 /* The conditions are satisfied; hoist the loads from bb1 and bb2 into
2756 bb0. We hoist the first one first so that a cache miss is handled
2757 efficiently regardless of hardware cache-fill policy. */
2758 gsi2 = gsi_for_stmt (def1);
2759 gsi_move_to_bb_end (&gsi2, bb0);
2760 gsi2 = gsi_for_stmt (def2);
2761 gsi_move_to_bb_end (&gsi2, bb0);
2762
2763 if (dump_file && (dump_flags & TDF_DETAILS))
2764 {
2765 fprintf (dump_file,
2766 "\nHoisting adjacent loads from %d and %d into %d: \n",
2767 bb_for_def1->index, bb_for_def2->index, bb0->index);
2768 print_gimple_stmt (dump_file, def1, 0, TDF_VOPS|TDF_MEMSYMS);
2769 print_gimple_stmt (dump_file, def2, 0, TDF_VOPS|TDF_MEMSYMS);
2770 }
2771 }
2772 }
2773
2774 /* Determine whether we should attempt to hoist adjacent loads out of
2775 diamond patterns in pass_phiopt. Always hoist loads if
2776 -fhoist-adjacent-loads is specified and the target machine has
2777 both a conditional move instruction and a defined cache line size. */
2778
2779 static bool
gate_hoist_loads(void)2780 gate_hoist_loads (void)
2781 {
2782 return (flag_hoist_adjacent_loads == 1
2783 && PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
2784 && HAVE_conditional_move);
2785 }
2786
2787 /* This pass tries to replaces an if-then-else block with an
2788 assignment. We have four kinds of transformations. Some of these
2789 transformations are also performed by the ifcvt RTL optimizer.
2790
2791 Conditional Replacement
2792 -----------------------
2793
2794 This transformation, implemented in conditional_replacement,
2795 replaces
2796
2797 bb0:
2798 if (cond) goto bb2; else goto bb1;
2799 bb1:
2800 bb2:
2801 x = PHI <0 (bb1), 1 (bb0), ...>;
2802
2803 with
2804
2805 bb0:
2806 x' = cond;
2807 goto bb2;
2808 bb2:
2809 x = PHI <x' (bb0), ...>;
2810
2811 We remove bb1 as it becomes unreachable. This occurs often due to
2812 gimplification of conditionals.
2813
2814 Value Replacement
2815 -----------------
2816
2817 This transformation, implemented in value_replacement, replaces
2818
2819 bb0:
2820 if (a != b) goto bb2; else goto bb1;
2821 bb1:
2822 bb2:
2823 x = PHI <a (bb1), b (bb0), ...>;
2824
2825 with
2826
2827 bb0:
2828 bb2:
2829 x = PHI <b (bb0), ...>;
2830
2831 This opportunity can sometimes occur as a result of other
2832 optimizations.
2833
2834
2835 Another case caught by value replacement looks like this:
2836
2837 bb0:
2838 t1 = a == CONST;
2839 t2 = b > c;
2840 t3 = t1 & t2;
2841 if (t3 != 0) goto bb1; else goto bb2;
2842 bb1:
2843 bb2:
2844 x = PHI (CONST, a)
2845
2846 Gets replaced with:
2847 bb0:
2848 bb2:
2849 t1 = a == CONST;
2850 t2 = b > c;
2851 t3 = t1 & t2;
2852 x = a;
2853
2854 ABS Replacement
2855 ---------------
2856
2857 This transformation, implemented in abs_replacement, replaces
2858
2859 bb0:
2860 if (a >= 0) goto bb2; else goto bb1;
2861 bb1:
2862 x = -a;
2863 bb2:
2864 x = PHI <x (bb1), a (bb0), ...>;
2865
2866 with
2867
2868 bb0:
2869 x' = ABS_EXPR< a >;
2870 bb2:
2871 x = PHI <x' (bb0), ...>;
2872
2873 MIN/MAX Replacement
2874 -------------------
2875
2876 This transformation, minmax_replacement replaces
2877
2878 bb0:
2879 if (a <= b) goto bb2; else goto bb1;
2880 bb1:
2881 bb2:
2882 x = PHI <b (bb1), a (bb0), ...>;
2883
2884 with
2885
2886 bb0:
2887 x' = MIN_EXPR (a, b)
2888 bb2:
2889 x = PHI <x' (bb0), ...>;
2890
2891 A similar transformation is done for MAX_EXPR.
2892
2893
2894 This pass also performs a fifth transformation of a slightly different
2895 flavor.
2896
2897 Factor conversion in COND_EXPR
2898 ------------------------------
2899
2900 This transformation factors the conversion out of COND_EXPR with
2901 factor_out_conditional_conversion.
2902
2903 For example:
2904 if (a <= CST) goto <bb 3>; else goto <bb 4>;
2905 <bb 3>:
2906 tmp = (int) a;
2907 <bb 4>:
2908 tmp = PHI <tmp, CST>
2909
2910 Into:
2911 if (a <= CST) goto <bb 3>; else goto <bb 4>;
2912 <bb 3>:
2913 <bb 4>:
2914 a = PHI <a, CST>
2915 tmp = (int) a;
2916
2917 Adjacent Load Hoisting
2918 ----------------------
2919
2920 This transformation replaces
2921
2922 bb0:
2923 if (...) goto bb2; else goto bb1;
2924 bb1:
2925 x1 = (<expr>).field1;
2926 goto bb3;
2927 bb2:
2928 x2 = (<expr>).field2;
2929 bb3:
2930 # x = PHI <x1, x2>;
2931
2932 with
2933
2934 bb0:
2935 x1 = (<expr>).field1;
2936 x2 = (<expr>).field2;
2937 if (...) goto bb2; else goto bb1;
2938 bb1:
2939 goto bb3;
2940 bb2:
2941 bb3:
2942 # x = PHI <x1, x2>;
2943
2944 The purpose of this transformation is to enable generation of conditional
2945 move instructions such as Intel CMOVE or PowerPC ISEL. Because one of
2946 the loads is speculative, the transformation is restricted to very
2947 specific cases to avoid introducing a page fault. We are looking for
2948 the common idiom:
2949
2950 if (...)
2951 x = y->left;
2952 else
2953 x = y->right;
2954
2955 where left and right are typically adjacent pointers in a tree structure. */
2956
2957 namespace {
2958
2959 const pass_data pass_data_phiopt =
2960 {
2961 GIMPLE_PASS, /* type */
2962 "phiopt", /* name */
2963 OPTGROUP_NONE, /* optinfo_flags */
2964 TV_TREE_PHIOPT, /* tv_id */
2965 ( PROP_cfg | PROP_ssa ), /* properties_required */
2966 0, /* properties_provided */
2967 0, /* properties_destroyed */
2968 0, /* todo_flags_start */
2969 0, /* todo_flags_finish */
2970 };
2971
2972 class pass_phiopt : public gimple_opt_pass
2973 {
2974 public:
pass_phiopt(gcc::context * ctxt)2975 pass_phiopt (gcc::context *ctxt)
2976 : gimple_opt_pass (pass_data_phiopt, ctxt), early_p (false)
2977 {}
2978
2979 /* opt_pass methods: */
clone()2980 opt_pass * clone () { return new pass_phiopt (m_ctxt); }
set_pass_param(unsigned n,bool param)2981 void set_pass_param (unsigned n, bool param)
2982 {
2983 gcc_assert (n == 0);
2984 early_p = param;
2985 }
gate(function *)2986 virtual bool gate (function *) { return flag_ssa_phiopt; }
execute(function *)2987 virtual unsigned int execute (function *)
2988 {
2989 return tree_ssa_phiopt_worker (false,
2990 !early_p ? gate_hoist_loads () : false,
2991 early_p);
2992 }
2993
2994 private:
2995 bool early_p;
2996 }; // class pass_phiopt
2997
2998 } // anon namespace
2999
3000 gimple_opt_pass *
make_pass_phiopt(gcc::context * ctxt)3001 make_pass_phiopt (gcc::context *ctxt)
3002 {
3003 return new pass_phiopt (ctxt);
3004 }
3005
3006 namespace {
3007
3008 const pass_data pass_data_cselim =
3009 {
3010 GIMPLE_PASS, /* type */
3011 "cselim", /* name */
3012 OPTGROUP_NONE, /* optinfo_flags */
3013 TV_TREE_PHIOPT, /* tv_id */
3014 ( PROP_cfg | PROP_ssa ), /* properties_required */
3015 0, /* properties_provided */
3016 0, /* properties_destroyed */
3017 0, /* todo_flags_start */
3018 0, /* todo_flags_finish */
3019 };
3020
3021 class pass_cselim : public gimple_opt_pass
3022 {
3023 public:
pass_cselim(gcc::context * ctxt)3024 pass_cselim (gcc::context *ctxt)
3025 : gimple_opt_pass (pass_data_cselim, ctxt)
3026 {}
3027
3028 /* opt_pass methods: */
gate(function *)3029 virtual bool gate (function *) { return flag_tree_cselim; }
execute(function *)3030 virtual unsigned int execute (function *) { return tree_ssa_cs_elim (); }
3031
3032 }; // class pass_cselim
3033
3034 } // anon namespace
3035
3036 gimple_opt_pass *
make_pass_cselim(gcc::context * ctxt)3037 make_pass_cselim (gcc::context *ctxt)
3038 {
3039 return new pass_cselim (ctxt);
3040 }
3041