1 /* Reassociation for trees.
2 Copyright (C) 2005-2018 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
55
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
59
60 It consists of five steps:
61
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
64
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
69
70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
72
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
76
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
79
80 5. Repropagate negates, as nothing else will clean it up ATM.
81
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
84
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
87
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
92
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
97
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
100
101 a (1), b (1), c (1), d (2), e (2)
102
103
104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
106
107 You want to first merge all leaves of the same rank, as much as
108 possible.
109
110 So first build a binary op of
111
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
116
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
121
122 Then build a binary op of d + e
123 mergetmp2 = d + e
124
125 and put mergetmp2 on the merge worklist.
126
127 so merge worklist = {mergetmp, c, mergetmp2}
128
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
131
132 So we have
133
134 build binary op
135 mergetmp3 = mergetmp + c
136
137 worklist = {mergetmp2, mergetmp3}
138
139 mergetmp4 = mergetmp2 + mergetmp3
140
141 worklist = {mergetmp4}
142
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
145
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
148
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
151
152 So why don't we do this?
153
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
159
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
162
163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
166
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
170
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
176
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p;
180
181 /* Statistics */
182 static struct
183 {
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
188 int pows_encountered;
189 int pows_created;
190 } reassociate_stats;
191
192 /* Operator, rank pair. */
193 struct operand_entry
194 {
195 unsigned int rank;
196 unsigned int id;
197 tree op;
198 unsigned int count;
199 gimple *stmt_to_insert;
200 };
201
202 static object_allocator<operand_entry> operand_entry_pool
203 ("operand entry pool");
204
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static unsigned int next_operand_entry_id;
208
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
211 depth. */
212 static long *bb_rank;
213
214 /* Operand->rank hashtable. */
215 static hash_map<tree, long> *operand_rank;
216
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec<tree> reassoc_branch_fixups;
222
223 /* Forward decls. */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
229
230 bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 {
233 gimple *stmt = gsi_stmt (*gsi);
234
235 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236 return gsi_remove (gsi, true);
237
238 gimple_stmt_iterator prev = *gsi;
239 gsi_prev (&prev);
240 unsigned uid = gimple_uid (stmt);
241 basic_block bb = gimple_bb (stmt);
242 bool ret = gsi_remove (gsi, true);
243 if (!gsi_end_p (prev))
244 gsi_next (&prev);
245 else
246 prev = gsi_start_bb (bb);
247 gimple *end_stmt = gsi_stmt (*gsi);
248 while ((stmt = gsi_stmt (prev)) != end_stmt)
249 {
250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251 gimple_set_uid (stmt, uid);
252 gsi_next (&prev);
253 }
254 return ret;
255 }
256
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
261
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
269 static long
phi_rank(gimple * stmt)270 phi_rank (gimple *stmt)
271 {
272 basic_block bb = gimple_bb (stmt);
273 struct loop *father = bb->loop_father;
274 tree res;
275 unsigned i;
276 use_operand_p use;
277 gimple *use_stmt;
278
279 /* We only care about real loops (those with a latch). */
280 if (!father->latch)
281 return bb_rank[bb->index];
282
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb != father->header
285 || father->inner)
286 return bb_rank[bb->index];
287
288 /* Ignore virtual SSA_NAMEs. */
289 res = gimple_phi_result (stmt);
290 if (virtual_operand_p (res))
291 return bb_rank[bb->index];
292
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res, &use, &use_stmt)
296 || gimple_bb (use_stmt)->loop_father != father)
297 return bb_rank[bb->index];
298
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i = 0; i < gimple_phi_num_args (stmt); i++)
301 {
302 tree arg = gimple_phi_arg_def (stmt, i);
303 if (TREE_CODE (arg) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 {
306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 if (gimple_bb (def_stmt)->loop_father == father)
308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
309 }
310 }
311
312 /* Must be an uninteresting phi. */
313 return bb_rank[bb->index];
314 }
315
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
318 return FALSE. */
319 static bool
loop_carried_phi(tree exp)320 loop_carried_phi (tree exp)
321 {
322 gimple *phi_stmt;
323 long block_rank;
324
325 if (TREE_CODE (exp) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp))
327 return false;
328
329 phi_stmt = SSA_NAME_DEF_STMT (exp);
330
331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332 return false;
333
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338
339 if (phi_rank (phi_stmt) != block_rank)
340 return true;
341
342 return false;
343 }
344
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
349 static long
propagate_rank(long rank,tree op)350 propagate_rank (long rank, tree op)
351 {
352 long op_rank;
353
354 if (loop_carried_phi (op))
355 return rank;
356
357 op_rank = get_rank (op);
358
359 return MAX (rank, op_rank);
360 }
361
362 /* Look up the operand rank structure for expression E. */
363
364 static inline long
find_operand_rank(tree e)365 find_operand_rank (tree e)
366 {
367 long *slot = operand_rank->get (e);
368 return slot ? *slot : -1;
369 }
370
371 /* Insert {E,RANK} into the operand rank hashtable. */
372
373 static inline void
insert_operand_rank(tree e,long rank)374 insert_operand_rank (tree e, long rank)
375 {
376 gcc_assert (rank > 0);
377 gcc_assert (!operand_rank->put (e, rank));
378 }
379
380 /* Given an expression E, return the rank of the expression. */
381
382 static long
get_rank(tree e)383 get_rank (tree e)
384 {
385 /* SSA_NAME's have the rank of the expression they are the result
386 of.
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
390 the BB.
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
394 results. */
395
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
399
400 x_1 = phi(x_0, x_2)
401 b = a + x_1
402 c = b + d
403 x_2 = c + e
404
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
408
409 x_1 = phi(x_0, x_2)
410 b = a + d
411 c = b + e
412 x_2 = c + x_1
413
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
416
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
421
422 if (TREE_CODE (e) == SSA_NAME)
423 {
424 ssa_op_iter iter;
425 gimple *stmt;
426 long rank;
427 tree op;
428
429 if (SSA_NAME_IS_DEFAULT_DEF (e))
430 return find_operand_rank (e);
431
432 stmt = SSA_NAME_DEF_STMT (e);
433 if (gimple_code (stmt) == GIMPLE_PHI)
434 return phi_rank (stmt);
435
436 if (!is_gimple_assign (stmt))
437 return bb_rank[gimple_bb (stmt)->index];
438
439 /* If we already have a rank for this expression, use that. */
440 rank = find_operand_rank (e);
441 if (rank != -1)
442 return rank;
443
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
449 thus have rank 0. */
450 rank = 0;
451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 rank = propagate_rank (rank, op);
453
454 if (dump_file && (dump_flags & TDF_DETAILS))
455 {
456 fprintf (dump_file, "Rank for ");
457 print_generic_expr (dump_file, e);
458 fprintf (dump_file, " is %ld\n", (rank + 1));
459 }
460
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e, (rank + 1));
463 return (rank + 1);
464 }
465
466 /* Constants, globals, etc., are rank 0 */
467 return 0;
468 }
469
470
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 4
474 #define FLOAT_ONE_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
477
478 /* Classify an invariant tree into integer, float, or other, so that
479 we can sort them to be near other constants of the same type. */
480 static inline int
constant_type(tree t)481 constant_type (tree t)
482 {
483 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
484 return INTEGER_CONST_TYPE;
485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
486 {
487 /* Sort -1.0 and 1.0 constants last, while in some cases
488 const_binop can't optimize some inexact operations, multiplication
489 by -1.0 or 1.0 can be always merged with others. */
490 if (real_onep (t) || real_minus_onep (t))
491 return FLOAT_ONE_CONST_TYPE;
492 return FLOAT_CONST_TYPE;
493 }
494 else
495 return OTHER_CONST_TYPE;
496 }
497
498 /* qsort comparison function to sort operand entries PA and PB by rank
499 so that the sorted array is ordered by rank in decreasing order. */
500 static int
sort_by_operand_rank(const void * pa,const void * pb)501 sort_by_operand_rank (const void *pa, const void *pb)
502 {
503 const operand_entry *oea = *(const operand_entry *const *)pa;
504 const operand_entry *oeb = *(const operand_entry *const *)pb;
505
506 if (oeb->rank != oea->rank)
507 return oeb->rank > oea->rank ? 1 : -1;
508
509 /* It's nicer for optimize_expression if constants that are likely
510 to fold when added/multiplied/whatever are put next to each
511 other. Since all constants have rank 0, order them by type. */
512 if (oea->rank == 0)
513 {
514 if (constant_type (oeb->op) != constant_type (oea->op))
515 return constant_type (oea->op) - constant_type (oeb->op);
516 else
517 /* To make sorting result stable, we use unique IDs to determine
518 order. */
519 return oeb->id > oea->id ? 1 : -1;
520 }
521
522 if (TREE_CODE (oea->op) != SSA_NAME)
523 {
524 if (TREE_CODE (oeb->op) != SSA_NAME)
525 return oeb->id > oea->id ? 1 : -1;
526 else
527 return 1;
528 }
529 else if (TREE_CODE (oeb->op) != SSA_NAME)
530 return -1;
531
532 /* Lastly, make sure the versions that are the same go next to each
533 other. */
534 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
535 {
536 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
537 versions of removed SSA_NAMEs, so if possible, prefer to sort
538 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
539 See PR60418. */
540 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
541 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
542 basic_block bba = gimple_bb (stmta);
543 basic_block bbb = gimple_bb (stmtb);
544 if (bbb != bba)
545 {
546 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
547 but the other might not. */
548 if (!bba)
549 return 1;
550 if (!bbb)
551 return -1;
552 /* If neither is, compare bb_rank. */
553 if (bb_rank[bbb->index] != bb_rank[bba->index])
554 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
555 }
556
557 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
558 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
559 if (da != db)
560 return da ? 1 : -1;
561
562 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
563 }
564
565 return oeb->id > oea->id ? 1 : -1;
566 }
567
568 /* Add an operand entry to *OPS for the tree operand OP. */
569
570 static void
571 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
572 {
573 operand_entry *oe = operand_entry_pool.allocate ();
574
575 oe->op = op;
576 oe->rank = get_rank (op);
577 oe->id = next_operand_entry_id++;
578 oe->count = 1;
579 oe->stmt_to_insert = stmt_to_insert;
580 ops->safe_push (oe);
581 }
582
583 /* Add an operand entry to *OPS for the tree operand OP with repeat
584 count REPEAT. */
585
586 static void
add_repeat_to_ops_vec(vec<operand_entry * > * ops,tree op,HOST_WIDE_INT repeat)587 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
588 HOST_WIDE_INT repeat)
589 {
590 operand_entry *oe = operand_entry_pool.allocate ();
591
592 oe->op = op;
593 oe->rank = get_rank (op);
594 oe->id = next_operand_entry_id++;
595 oe->count = repeat;
596 oe->stmt_to_insert = NULL;
597 ops->safe_push (oe);
598
599 reassociate_stats.pows_encountered++;
600 }
601
602 /* Return true if STMT is reassociable operation containing a binary
603 operation with tree code CODE, and is inside LOOP. */
604
605 static bool
is_reassociable_op(gimple * stmt,enum tree_code code,struct loop * loop)606 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
607 {
608 basic_block bb = gimple_bb (stmt);
609
610 if (gimple_bb (stmt) == NULL)
611 return false;
612
613 if (!flow_bb_inside_loop_p (loop, bb))
614 return false;
615
616 if (is_gimple_assign (stmt)
617 && gimple_assign_rhs_code (stmt) == code
618 && has_single_use (gimple_assign_lhs (stmt)))
619 {
620 tree rhs1 = gimple_assign_rhs1 (stmt);
621 tree rhs2 = gimple_assign_rhs1 (stmt);
622 if (TREE_CODE (rhs1) == SSA_NAME
623 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
624 return false;
625 if (rhs2
626 && TREE_CODE (rhs2) == SSA_NAME
627 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
628 return false;
629 return true;
630 }
631
632 return false;
633 }
634
635
636 /* Return true if STMT is a nop-conversion. */
637
638 static bool
gimple_nop_conversion_p(gimple * stmt)639 gimple_nop_conversion_p (gimple *stmt)
640 {
641 if (gassign *ass = dyn_cast <gassign *> (stmt))
642 {
643 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
644 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
645 TREE_TYPE (gimple_assign_rhs1 (ass))))
646 return true;
647 }
648 return false;
649 }
650
651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
652 operand of the negate operation. Otherwise, return NULL. */
653
654 static tree
get_unary_op(tree name,enum tree_code opcode)655 get_unary_op (tree name, enum tree_code opcode)
656 {
657 gimple *stmt = SSA_NAME_DEF_STMT (name);
658
659 /* Look through nop conversions (sign changes). */
660 if (gimple_nop_conversion_p (stmt)
661 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
662 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
663
664 if (!is_gimple_assign (stmt))
665 return NULL_TREE;
666
667 if (gimple_assign_rhs_code (stmt) == opcode)
668 return gimple_assign_rhs1 (stmt);
669 return NULL_TREE;
670 }
671
672 /* Return true if OP1 and OP2 have the same value if casted to either type. */
673
674 static bool
ops_equal_values_p(tree op1,tree op2)675 ops_equal_values_p (tree op1, tree op2)
676 {
677 if (op1 == op2)
678 return true;
679
680 tree orig_op1 = op1;
681 if (TREE_CODE (op1) == SSA_NAME)
682 {
683 gimple *stmt = SSA_NAME_DEF_STMT (op1);
684 if (gimple_nop_conversion_p (stmt))
685 {
686 op1 = gimple_assign_rhs1 (stmt);
687 if (op1 == op2)
688 return true;
689 }
690 }
691
692 if (TREE_CODE (op2) == SSA_NAME)
693 {
694 gimple *stmt = SSA_NAME_DEF_STMT (op2);
695 if (gimple_nop_conversion_p (stmt))
696 {
697 op2 = gimple_assign_rhs1 (stmt);
698 if (op1 == op2
699 || orig_op1 == op2)
700 return true;
701 }
702 }
703
704 return false;
705 }
706
707
708 /* If CURR and LAST are a pair of ops that OPCODE allows us to
709 eliminate through equivalences, do so, remove them from OPS, and
710 return true. Otherwise, return false. */
711
712 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry * > * ops,bool * all_done,unsigned int i,operand_entry * curr,operand_entry * last)713 eliminate_duplicate_pair (enum tree_code opcode,
714 vec<operand_entry *> *ops,
715 bool *all_done,
716 unsigned int i,
717 operand_entry *curr,
718 operand_entry *last)
719 {
720
721 /* If we have two of the same op, and the opcode is & |, min, or max,
722 we can eliminate one of them.
723 If we have two of the same op, and the opcode is ^, we can
724 eliminate both of them. */
725
726 if (last && last->op == curr->op)
727 {
728 switch (opcode)
729 {
730 case MAX_EXPR:
731 case MIN_EXPR:
732 case BIT_IOR_EXPR:
733 case BIT_AND_EXPR:
734 if (dump_file && (dump_flags & TDF_DETAILS))
735 {
736 fprintf (dump_file, "Equivalence: ");
737 print_generic_expr (dump_file, curr->op);
738 fprintf (dump_file, " [&|minmax] ");
739 print_generic_expr (dump_file, last->op);
740 fprintf (dump_file, " -> ");
741 print_generic_stmt (dump_file, last->op);
742 }
743
744 ops->ordered_remove (i);
745 reassociate_stats.ops_eliminated ++;
746
747 return true;
748
749 case BIT_XOR_EXPR:
750 if (dump_file && (dump_flags & TDF_DETAILS))
751 {
752 fprintf (dump_file, "Equivalence: ");
753 print_generic_expr (dump_file, curr->op);
754 fprintf (dump_file, " ^ ");
755 print_generic_expr (dump_file, last->op);
756 fprintf (dump_file, " -> nothing\n");
757 }
758
759 reassociate_stats.ops_eliminated += 2;
760
761 if (ops->length () == 2)
762 {
763 ops->truncate (0);
764 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
765 *all_done = true;
766 }
767 else
768 {
769 ops->ordered_remove (i-1);
770 ops->ordered_remove (i-1);
771 }
772
773 return true;
774
775 default:
776 break;
777 }
778 }
779 return false;
780 }
781
782 static vec<tree> plus_negates;
783
784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
785 expression, look in OPS for a corresponding positive operation to cancel
786 it out. If we find one, remove the other from OPS, replace
787 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
788 return false. */
789
790 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)791 eliminate_plus_minus_pair (enum tree_code opcode,
792 vec<operand_entry *> *ops,
793 unsigned int currindex,
794 operand_entry *curr)
795 {
796 tree negateop;
797 tree notop;
798 unsigned int i;
799 operand_entry *oe;
800
801 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
802 return false;
803
804 negateop = get_unary_op (curr->op, NEGATE_EXPR);
805 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
806 if (negateop == NULL_TREE && notop == NULL_TREE)
807 return false;
808
809 /* Any non-negated version will have a rank that is one less than
810 the current rank. So once we hit those ranks, if we don't find
811 one, we can stop. */
812
813 for (i = currindex + 1;
814 ops->iterate (i, &oe)
815 && oe->rank >= curr->rank - 1 ;
816 i++)
817 {
818 if (negateop
819 && ops_equal_values_p (oe->op, negateop))
820 {
821 if (dump_file && (dump_flags & TDF_DETAILS))
822 {
823 fprintf (dump_file, "Equivalence: ");
824 print_generic_expr (dump_file, negateop);
825 fprintf (dump_file, " + -");
826 print_generic_expr (dump_file, oe->op);
827 fprintf (dump_file, " -> 0\n");
828 }
829
830 ops->ordered_remove (i);
831 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
832 ops->ordered_remove (currindex);
833 reassociate_stats.ops_eliminated ++;
834
835 return true;
836 }
837 else if (notop
838 && ops_equal_values_p (oe->op, notop))
839 {
840 tree op_type = TREE_TYPE (oe->op);
841
842 if (dump_file && (dump_flags & TDF_DETAILS))
843 {
844 fprintf (dump_file, "Equivalence: ");
845 print_generic_expr (dump_file, notop);
846 fprintf (dump_file, " + ~");
847 print_generic_expr (dump_file, oe->op);
848 fprintf (dump_file, " -> -1\n");
849 }
850
851 ops->ordered_remove (i);
852 add_to_ops_vec (ops, build_all_ones_cst (op_type));
853 ops->ordered_remove (currindex);
854 reassociate_stats.ops_eliminated ++;
855
856 return true;
857 }
858 }
859
860 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
861 save it for later inspection in repropagate_negates(). */
862 if (negateop != NULL_TREE
863 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
864 plus_negates.safe_push (curr->op);
865
866 return false;
867 }
868
869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
870 bitwise not expression, look in OPS for a corresponding operand to
871 cancel it out. If we find one, remove the other from OPS, replace
872 OPS[CURRINDEX] with 0, and return true. Otherwise, return
873 false. */
874
875 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)876 eliminate_not_pairs (enum tree_code opcode,
877 vec<operand_entry *> *ops,
878 unsigned int currindex,
879 operand_entry *curr)
880 {
881 tree notop;
882 unsigned int i;
883 operand_entry *oe;
884
885 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
886 || TREE_CODE (curr->op) != SSA_NAME)
887 return false;
888
889 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
890 if (notop == NULL_TREE)
891 return false;
892
893 /* Any non-not version will have a rank that is one less than
894 the current rank. So once we hit those ranks, if we don't find
895 one, we can stop. */
896
897 for (i = currindex + 1;
898 ops->iterate (i, &oe)
899 && oe->rank >= curr->rank - 1;
900 i++)
901 {
902 if (oe->op == notop)
903 {
904 if (dump_file && (dump_flags & TDF_DETAILS))
905 {
906 fprintf (dump_file, "Equivalence: ");
907 print_generic_expr (dump_file, notop);
908 if (opcode == BIT_AND_EXPR)
909 fprintf (dump_file, " & ~");
910 else if (opcode == BIT_IOR_EXPR)
911 fprintf (dump_file, " | ~");
912 print_generic_expr (dump_file, oe->op);
913 if (opcode == BIT_AND_EXPR)
914 fprintf (dump_file, " -> 0\n");
915 else if (opcode == BIT_IOR_EXPR)
916 fprintf (dump_file, " -> -1\n");
917 }
918
919 if (opcode == BIT_AND_EXPR)
920 oe->op = build_zero_cst (TREE_TYPE (oe->op));
921 else if (opcode == BIT_IOR_EXPR)
922 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
923
924 reassociate_stats.ops_eliminated += ops->length () - 1;
925 ops->truncate (0);
926 ops->quick_push (oe);
927 return true;
928 }
929 }
930
931 return false;
932 }
933
934 /* Use constant value that may be present in OPS to try to eliminate
935 operands. Note that this function is only really used when we've
936 eliminated ops for other reasons, or merged constants. Across
937 single statements, fold already does all of this, plus more. There
938 is little point in duplicating logic, so I've only included the
939 identities that I could ever construct testcases to trigger. */
940
941 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry * > * ops)942 eliminate_using_constants (enum tree_code opcode,
943 vec<operand_entry *> *ops)
944 {
945 operand_entry *oelast = ops->last ();
946 tree type = TREE_TYPE (oelast->op);
947
948 if (oelast->rank == 0
949 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
950 {
951 switch (opcode)
952 {
953 case BIT_AND_EXPR:
954 if (integer_zerop (oelast->op))
955 {
956 if (ops->length () != 1)
957 {
958 if (dump_file && (dump_flags & TDF_DETAILS))
959 fprintf (dump_file, "Found & 0, removing all other ops\n");
960
961 reassociate_stats.ops_eliminated += ops->length () - 1;
962
963 ops->truncate (0);
964 ops->quick_push (oelast);
965 return;
966 }
967 }
968 else if (integer_all_onesp (oelast->op))
969 {
970 if (ops->length () != 1)
971 {
972 if (dump_file && (dump_flags & TDF_DETAILS))
973 fprintf (dump_file, "Found & -1, removing\n");
974 ops->pop ();
975 reassociate_stats.ops_eliminated++;
976 }
977 }
978 break;
979 case BIT_IOR_EXPR:
980 if (integer_all_onesp (oelast->op))
981 {
982 if (ops->length () != 1)
983 {
984 if (dump_file && (dump_flags & TDF_DETAILS))
985 fprintf (dump_file, "Found | -1, removing all other ops\n");
986
987 reassociate_stats.ops_eliminated += ops->length () - 1;
988
989 ops->truncate (0);
990 ops->quick_push (oelast);
991 return;
992 }
993 }
994 else if (integer_zerop (oelast->op))
995 {
996 if (ops->length () != 1)
997 {
998 if (dump_file && (dump_flags & TDF_DETAILS))
999 fprintf (dump_file, "Found | 0, removing\n");
1000 ops->pop ();
1001 reassociate_stats.ops_eliminated++;
1002 }
1003 }
1004 break;
1005 case MULT_EXPR:
1006 if (integer_zerop (oelast->op)
1007 || (FLOAT_TYPE_P (type)
1008 && !HONOR_NANS (type)
1009 && !HONOR_SIGNED_ZEROS (type)
1010 && real_zerop (oelast->op)))
1011 {
1012 if (ops->length () != 1)
1013 {
1014 if (dump_file && (dump_flags & TDF_DETAILS))
1015 fprintf (dump_file, "Found * 0, removing all other ops\n");
1016
1017 reassociate_stats.ops_eliminated += ops->length () - 1;
1018 ops->truncate (0);
1019 ops->quick_push (oelast);
1020 return;
1021 }
1022 }
1023 else if (integer_onep (oelast->op)
1024 || (FLOAT_TYPE_P (type)
1025 && !HONOR_SNANS (type)
1026 && real_onep (oelast->op)))
1027 {
1028 if (ops->length () != 1)
1029 {
1030 if (dump_file && (dump_flags & TDF_DETAILS))
1031 fprintf (dump_file, "Found * 1, removing\n");
1032 ops->pop ();
1033 reassociate_stats.ops_eliminated++;
1034 return;
1035 }
1036 }
1037 break;
1038 case BIT_XOR_EXPR:
1039 case PLUS_EXPR:
1040 case MINUS_EXPR:
1041 if (integer_zerop (oelast->op)
1042 || (FLOAT_TYPE_P (type)
1043 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1044 && fold_real_zero_addition_p (type, oelast->op,
1045 opcode == MINUS_EXPR)))
1046 {
1047 if (ops->length () != 1)
1048 {
1049 if (dump_file && (dump_flags & TDF_DETAILS))
1050 fprintf (dump_file, "Found [|^+] 0, removing\n");
1051 ops->pop ();
1052 reassociate_stats.ops_eliminated++;
1053 return;
1054 }
1055 }
1056 break;
1057 default:
1058 break;
1059 }
1060 }
1061 }
1062
1063
1064 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1065 bool, bool);
1066
1067 /* Structure for tracking and counting operands. */
1068 struct oecount {
1069 unsigned int cnt;
1070 unsigned int id;
1071 enum tree_code oecode;
1072 tree op;
1073 };
1074
1075
1076 /* The heap for the oecount hashtable and the sorted list of operands. */
1077 static vec<oecount> cvec;
1078
1079
1080 /* Oecount hashtable helpers. */
1081
1082 struct oecount_hasher : int_hash <int, 0, 1>
1083 {
1084 static inline hashval_t hash (int);
1085 static inline bool equal (int, int);
1086 };
1087
1088 /* Hash function for oecount. */
1089
1090 inline hashval_t
hash(int p)1091 oecount_hasher::hash (int p)
1092 {
1093 const oecount *c = &cvec[p - 42];
1094 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1095 }
1096
1097 /* Comparison function for oecount. */
1098
1099 inline bool
equal(int p1,int p2)1100 oecount_hasher::equal (int p1, int p2)
1101 {
1102 const oecount *c1 = &cvec[p1 - 42];
1103 const oecount *c2 = &cvec[p2 - 42];
1104 return c1->oecode == c2->oecode && c1->op == c2->op;
1105 }
1106
1107 /* Comparison function for qsort sorting oecount elements by count. */
1108
1109 static int
oecount_cmp(const void * p1,const void * p2)1110 oecount_cmp (const void *p1, const void *p2)
1111 {
1112 const oecount *c1 = (const oecount *)p1;
1113 const oecount *c2 = (const oecount *)p2;
1114 if (c1->cnt != c2->cnt)
1115 return c1->cnt > c2->cnt ? 1 : -1;
1116 else
1117 /* If counts are identical, use unique IDs to stabilize qsort. */
1118 return c1->id > c2->id ? 1 : -1;
1119 }
1120
1121 /* Return TRUE iff STMT represents a builtin call that raises OP
1122 to some exponent. */
1123
1124 static bool
stmt_is_power_of_op(gimple * stmt,tree op)1125 stmt_is_power_of_op (gimple *stmt, tree op)
1126 {
1127 if (!is_gimple_call (stmt))
1128 return false;
1129
1130 switch (gimple_call_combined_fn (stmt))
1131 {
1132 CASE_CFN_POW:
1133 CASE_CFN_POWI:
1134 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1135
1136 default:
1137 return false;
1138 }
1139 }
1140
1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1142 in place and return the result. Assumes that stmt_is_power_of_op
1143 was previously called for STMT and returned TRUE. */
1144
1145 static HOST_WIDE_INT
decrement_power(gimple * stmt)1146 decrement_power (gimple *stmt)
1147 {
1148 REAL_VALUE_TYPE c, cint;
1149 HOST_WIDE_INT power;
1150 tree arg1;
1151
1152 switch (gimple_call_combined_fn (stmt))
1153 {
1154 CASE_CFN_POW:
1155 arg1 = gimple_call_arg (stmt, 1);
1156 c = TREE_REAL_CST (arg1);
1157 power = real_to_integer (&c) - 1;
1158 real_from_integer (&cint, VOIDmode, power, SIGNED);
1159 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1160 return power;
1161
1162 CASE_CFN_POWI:
1163 arg1 = gimple_call_arg (stmt, 1);
1164 power = TREE_INT_CST_LOW (arg1) - 1;
1165 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1166 return power;
1167
1168 default:
1169 gcc_unreachable ();
1170 }
1171 }
1172
1173 /* Replace SSA defined by STMT and replace all its uses with new
1174 SSA. Also return the new SSA. */
1175
1176 static tree
make_new_ssa_for_def(gimple * stmt,enum tree_code opcode,tree op)1177 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1178 {
1179 gimple *use_stmt;
1180 use_operand_p use;
1181 imm_use_iterator iter;
1182 tree new_lhs, new_debug_lhs = NULL_TREE;
1183 tree lhs = gimple_get_lhs (stmt);
1184
1185 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1186 gimple_set_lhs (stmt, new_lhs);
1187
1188 /* Also need to update GIMPLE_DEBUGs. */
1189 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1190 {
1191 tree repl = new_lhs;
1192 if (is_gimple_debug (use_stmt))
1193 {
1194 if (new_debug_lhs == NULL_TREE)
1195 {
1196 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1197 gdebug *def_temp
1198 = gimple_build_debug_bind (new_debug_lhs,
1199 build2 (opcode, TREE_TYPE (lhs),
1200 new_lhs, op),
1201 stmt);
1202 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1203 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1204 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1205 gimple_set_uid (def_temp, gimple_uid (stmt));
1206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1208 }
1209 repl = new_debug_lhs;
1210 }
1211 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1212 SET_USE (use, repl);
1213 update_stmt (use_stmt);
1214 }
1215 return new_lhs;
1216 }
1217
1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219 uses with new SSAs. Also do this for the stmt that defines DEF
1220 if *DEF is not OP. */
1221
1222 static void
make_new_ssa_for_all_defs(tree * def,enum tree_code opcode,tree op,vec<gimple * > & stmts_to_fix)1223 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1224 vec<gimple *> &stmts_to_fix)
1225 {
1226 unsigned i;
1227 gimple *stmt;
1228
1229 if (*def != op
1230 && TREE_CODE (*def) == SSA_NAME
1231 && (stmt = SSA_NAME_DEF_STMT (*def))
1232 && gimple_code (stmt) != GIMPLE_NOP)
1233 *def = make_new_ssa_for_def (stmt, opcode, op);
1234
1235 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1236 make_new_ssa_for_def (stmt, opcode, op);
1237 }
1238
1239 /* Find the single immediate use of STMT's LHS, and replace it
1240 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1241 replace *DEF with OP as well. */
1242
1243 static void
propagate_op_to_single_use(tree op,gimple * stmt,tree * def)1244 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1245 {
1246 tree lhs;
1247 gimple *use_stmt;
1248 use_operand_p use;
1249 gimple_stmt_iterator gsi;
1250
1251 if (is_gimple_call (stmt))
1252 lhs = gimple_call_lhs (stmt);
1253 else
1254 lhs = gimple_assign_lhs (stmt);
1255
1256 gcc_assert (has_single_use (lhs));
1257 single_imm_use (lhs, &use, &use_stmt);
1258 if (lhs == *def)
1259 *def = op;
1260 SET_USE (use, op);
1261 if (TREE_CODE (op) != SSA_NAME)
1262 update_stmt (use_stmt);
1263 gsi = gsi_for_stmt (stmt);
1264 unlink_stmt_vdef (stmt);
1265 reassoc_remove_stmt (&gsi);
1266 release_defs (stmt);
1267 }
1268
1269 /* Walks the linear chain with result *DEF searching for an operation
1270 with operand OP and code OPCODE removing that from the chain. *DEF
1271 is updated if there is only one operand but no operation left. */
1272
1273 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1274 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1275 {
1276 tree orig_def = *def;
1277 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1278 /* PR72835 - Record the stmt chain that has to be updated such that
1279 we dont use the same LHS when the values computed are different. */
1280 auto_vec<gimple *, 64> stmts_to_fix;
1281
1282 do
1283 {
1284 tree name;
1285
1286 if (opcode == MULT_EXPR)
1287 {
1288 if (stmt_is_power_of_op (stmt, op))
1289 {
1290 if (decrement_power (stmt) == 1)
1291 {
1292 if (stmts_to_fix.length () > 0)
1293 stmts_to_fix.pop ();
1294 propagate_op_to_single_use (op, stmt, def);
1295 }
1296 break;
1297 }
1298 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1299 {
1300 if (gimple_assign_rhs1 (stmt) == op)
1301 {
1302 tree cst = build_minus_one_cst (TREE_TYPE (op));
1303 if (stmts_to_fix.length () > 0)
1304 stmts_to_fix.pop ();
1305 propagate_op_to_single_use (cst, stmt, def);
1306 break;
1307 }
1308 else if (integer_minus_onep (op)
1309 || real_minus_onep (op))
1310 {
1311 gimple_assign_set_rhs_code
1312 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1313 break;
1314 }
1315 }
1316 }
1317
1318 name = gimple_assign_rhs1 (stmt);
1319
1320 /* If this is the operation we look for and one of the operands
1321 is ours simply propagate the other operand into the stmts
1322 single use. */
1323 if (gimple_assign_rhs_code (stmt) == opcode
1324 && (name == op
1325 || gimple_assign_rhs2 (stmt) == op))
1326 {
1327 if (name == op)
1328 name = gimple_assign_rhs2 (stmt);
1329 if (stmts_to_fix.length () > 0)
1330 stmts_to_fix.pop ();
1331 propagate_op_to_single_use (name, stmt, def);
1332 break;
1333 }
1334
1335 /* We might have a multiply of two __builtin_pow* calls, and
1336 the operand might be hiding in the rightmost one. Likewise
1337 this can happen for a negate. */
1338 if (opcode == MULT_EXPR
1339 && gimple_assign_rhs_code (stmt) == opcode
1340 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1341 && has_single_use (gimple_assign_rhs2 (stmt)))
1342 {
1343 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1344 if (stmt_is_power_of_op (stmt2, op))
1345 {
1346 if (decrement_power (stmt2) == 1)
1347 propagate_op_to_single_use (op, stmt2, def);
1348 else
1349 stmts_to_fix.safe_push (stmt2);
1350 break;
1351 }
1352 else if (is_gimple_assign (stmt2)
1353 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1354 {
1355 if (gimple_assign_rhs1 (stmt2) == op)
1356 {
1357 tree cst = build_minus_one_cst (TREE_TYPE (op));
1358 propagate_op_to_single_use (cst, stmt2, def);
1359 break;
1360 }
1361 else if (integer_minus_onep (op)
1362 || real_minus_onep (op))
1363 {
1364 stmts_to_fix.safe_push (stmt2);
1365 gimple_assign_set_rhs_code
1366 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1367 break;
1368 }
1369 }
1370 }
1371
1372 /* Continue walking the chain. */
1373 gcc_assert (name != op
1374 && TREE_CODE (name) == SSA_NAME);
1375 stmt = SSA_NAME_DEF_STMT (name);
1376 stmts_to_fix.safe_push (stmt);
1377 }
1378 while (1);
1379
1380 if (stmts_to_fix.length () > 0 || *def == orig_def)
1381 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1382 }
1383
1384 /* Returns true if statement S1 dominates statement S2. Like
1385 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1386
1387 static bool
reassoc_stmt_dominates_stmt_p(gimple * s1,gimple * s2)1388 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1389 {
1390 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1391
1392 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393 SSA_NAME. Assume it lives at the beginning of function and
1394 thus dominates everything. */
1395 if (!bb1 || s1 == s2)
1396 return true;
1397
1398 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1399 if (!bb2)
1400 return false;
1401
1402 if (bb1 == bb2)
1403 {
1404 /* PHIs in the same basic block are assumed to be
1405 executed all in parallel, if only one stmt is a PHI,
1406 it dominates the other stmt in the same basic block. */
1407 if (gimple_code (s1) == GIMPLE_PHI)
1408 return true;
1409
1410 if (gimple_code (s2) == GIMPLE_PHI)
1411 return false;
1412
1413 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1414
1415 if (gimple_uid (s1) < gimple_uid (s2))
1416 return true;
1417
1418 if (gimple_uid (s1) > gimple_uid (s2))
1419 return false;
1420
1421 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1422 unsigned int uid = gimple_uid (s1);
1423 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1424 {
1425 gimple *s = gsi_stmt (gsi);
1426 if (gimple_uid (s) != uid)
1427 break;
1428 if (s == s2)
1429 return true;
1430 }
1431
1432 return false;
1433 }
1434
1435 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1436 }
1437
1438 /* Insert STMT after INSERT_POINT. */
1439
1440 static void
insert_stmt_after(gimple * stmt,gimple * insert_point)1441 insert_stmt_after (gimple *stmt, gimple *insert_point)
1442 {
1443 gimple_stmt_iterator gsi;
1444 basic_block bb;
1445
1446 if (gimple_code (insert_point) == GIMPLE_PHI)
1447 bb = gimple_bb (insert_point);
1448 else if (!stmt_ends_bb_p (insert_point))
1449 {
1450 gsi = gsi_for_stmt (insert_point);
1451 gimple_set_uid (stmt, gimple_uid (insert_point));
1452 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1453 return;
1454 }
1455 else
1456 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457 thus if it must end a basic block, it should be a call that can
1458 throw, or some assignment that can throw. If it throws, the LHS
1459 of it will not be initialized though, so only valid places using
1460 the SSA_NAME should be dominated by the fallthru edge. */
1461 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1462 gsi = gsi_after_labels (bb);
1463 if (gsi_end_p (gsi))
1464 {
1465 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1466 gimple_set_uid (stmt,
1467 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1468 }
1469 else
1470 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1471 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1472 }
1473
1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475 the result. Places the statement after the definition of either
1476 OP1 or OP2. Returns the new statement. */
1477
1478 static gimple *
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1479 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1480 {
1481 gimple *op1def = NULL, *op2def = NULL;
1482 gimple_stmt_iterator gsi;
1483 tree op;
1484 gassign *sum;
1485
1486 /* Create the addition statement. */
1487 op = make_ssa_name (type);
1488 sum = gimple_build_assign (op, opcode, op1, op2);
1489
1490 /* Find an insertion place and insert. */
1491 if (TREE_CODE (op1) == SSA_NAME)
1492 op1def = SSA_NAME_DEF_STMT (op1);
1493 if (TREE_CODE (op2) == SSA_NAME)
1494 op2def = SSA_NAME_DEF_STMT (op2);
1495 if ((!op1def || gimple_nop_p (op1def))
1496 && (!op2def || gimple_nop_p (op2def)))
1497 {
1498 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1499 if (gsi_end_p (gsi))
1500 {
1501 gimple_stmt_iterator gsi2
1502 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1503 gimple_set_uid (sum,
1504 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1505 }
1506 else
1507 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1508 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1509 }
1510 else
1511 {
1512 gimple *insert_point;
1513 if ((!op1def || gimple_nop_p (op1def))
1514 || (op2def && !gimple_nop_p (op2def)
1515 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1516 insert_point = op2def;
1517 else
1518 insert_point = op1def;
1519 insert_stmt_after (sum, insert_point);
1520 }
1521 update_stmt (sum);
1522
1523 return sum;
1524 }
1525
1526 /* Perform un-distribution of divisions and multiplications.
1527 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528 to (A + B) / X for real X.
1529
1530 The algorithm is organized as follows.
1531
1532 - First we walk the addition chain *OPS looking for summands that
1533 are defined by a multiplication or a real division. This results
1534 in the candidates bitmap with relevant indices into *OPS.
1535
1536 - Second we build the chains of multiplications or divisions for
1537 these candidates, counting the number of occurrences of (operand, code)
1538 pairs in all of the candidates chains.
1539
1540 - Third we sort the (operand, code) pairs by number of occurrence and
1541 process them starting with the pair with the most uses.
1542
1543 * For each such pair we walk the candidates again to build a
1544 second candidate bitmap noting all multiplication/division chains
1545 that have at least one occurrence of (operand, code).
1546
1547 * We build an alternate addition chain only covering these
1548 candidates with one (operand, code) operation removed from their
1549 multiplication/division chain.
1550
1551 * The first candidate gets replaced by the alternate addition chain
1552 multiplied/divided by the operand.
1553
1554 * All candidate chains get disabled for further processing and
1555 processing of (operand, code) pairs continues.
1556
1557 The alternate addition chains built are re-processed by the main
1558 reassociation algorithm which allows optimizing a * x * y + b * y * x
1559 to (a + b ) * x * y in one invocation of the reassociation pass. */
1560
1561 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry * > * ops,struct loop * loop)1562 undistribute_ops_list (enum tree_code opcode,
1563 vec<operand_entry *> *ops, struct loop *loop)
1564 {
1565 unsigned int length = ops->length ();
1566 operand_entry *oe1;
1567 unsigned i, j;
1568 unsigned nr_candidates, nr_candidates2;
1569 sbitmap_iterator sbi0;
1570 vec<operand_entry *> *subops;
1571 bool changed = false;
1572 unsigned int next_oecount_id = 0;
1573
1574 if (length <= 1
1575 || opcode != PLUS_EXPR)
1576 return false;
1577
1578 /* Build a list of candidates to process. */
1579 auto_sbitmap candidates (length);
1580 bitmap_clear (candidates);
1581 nr_candidates = 0;
1582 FOR_EACH_VEC_ELT (*ops, i, oe1)
1583 {
1584 enum tree_code dcode;
1585 gimple *oe1def;
1586
1587 if (TREE_CODE (oe1->op) != SSA_NAME)
1588 continue;
1589 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1590 if (!is_gimple_assign (oe1def))
1591 continue;
1592 dcode = gimple_assign_rhs_code (oe1def);
1593 if ((dcode != MULT_EXPR
1594 && dcode != RDIV_EXPR)
1595 || !is_reassociable_op (oe1def, dcode, loop))
1596 continue;
1597
1598 bitmap_set_bit (candidates, i);
1599 nr_candidates++;
1600 }
1601
1602 if (nr_candidates < 2)
1603 return false;
1604
1605 if (dump_file && (dump_flags & TDF_DETAILS))
1606 {
1607 fprintf (dump_file, "searching for un-distribute opportunities ");
1608 print_generic_expr (dump_file,
1609 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1610 fprintf (dump_file, " %d\n", nr_candidates);
1611 }
1612
1613 /* Build linearized sub-operand lists and the counting table. */
1614 cvec.create (0);
1615
1616 hash_table<oecount_hasher> ctable (15);
1617
1618 /* ??? Macro arguments cannot have multi-argument template types in
1619 them. This typedef is needed to workaround that limitation. */
1620 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1621 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1622 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1623 {
1624 gimple *oedef;
1625 enum tree_code oecode;
1626 unsigned j;
1627
1628 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1629 oecode = gimple_assign_rhs_code (oedef);
1630 linearize_expr_tree (&subops[i], oedef,
1631 associative_tree_code (oecode), false);
1632
1633 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1634 {
1635 oecount c;
1636 int *slot;
1637 int idx;
1638 c.oecode = oecode;
1639 c.cnt = 1;
1640 c.id = next_oecount_id++;
1641 c.op = oe1->op;
1642 cvec.safe_push (c);
1643 idx = cvec.length () + 41;
1644 slot = ctable.find_slot (idx, INSERT);
1645 if (!*slot)
1646 {
1647 *slot = idx;
1648 }
1649 else
1650 {
1651 cvec.pop ();
1652 cvec[*slot - 42].cnt++;
1653 }
1654 }
1655 }
1656
1657 /* Sort the counting table. */
1658 cvec.qsort (oecount_cmp);
1659
1660 if (dump_file && (dump_flags & TDF_DETAILS))
1661 {
1662 oecount *c;
1663 fprintf (dump_file, "Candidates:\n");
1664 FOR_EACH_VEC_ELT (cvec, j, c)
1665 {
1666 fprintf (dump_file, " %u %s: ", c->cnt,
1667 c->oecode == MULT_EXPR
1668 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1669 print_generic_expr (dump_file, c->op);
1670 fprintf (dump_file, "\n");
1671 }
1672 }
1673
1674 /* Process the (operand, code) pairs in order of most occurrence. */
1675 auto_sbitmap candidates2 (length);
1676 while (!cvec.is_empty ())
1677 {
1678 oecount *c = &cvec.last ();
1679 if (c->cnt < 2)
1680 break;
1681
1682 /* Now collect the operands in the outer chain that contain
1683 the common operand in their inner chain. */
1684 bitmap_clear (candidates2);
1685 nr_candidates2 = 0;
1686 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1687 {
1688 gimple *oedef;
1689 enum tree_code oecode;
1690 unsigned j;
1691 tree op = (*ops)[i]->op;
1692
1693 /* If we undistributed in this chain already this may be
1694 a constant. */
1695 if (TREE_CODE (op) != SSA_NAME)
1696 continue;
1697
1698 oedef = SSA_NAME_DEF_STMT (op);
1699 oecode = gimple_assign_rhs_code (oedef);
1700 if (oecode != c->oecode)
1701 continue;
1702
1703 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1704 {
1705 if (oe1->op == c->op)
1706 {
1707 bitmap_set_bit (candidates2, i);
1708 ++nr_candidates2;
1709 break;
1710 }
1711 }
1712 }
1713
1714 if (nr_candidates2 >= 2)
1715 {
1716 operand_entry *oe1, *oe2;
1717 gimple *prod;
1718 int first = bitmap_first_set_bit (candidates2);
1719
1720 /* Build the new addition chain. */
1721 oe1 = (*ops)[first];
1722 if (dump_file && (dump_flags & TDF_DETAILS))
1723 {
1724 fprintf (dump_file, "Building (");
1725 print_generic_expr (dump_file, oe1->op);
1726 }
1727 zero_one_operation (&oe1->op, c->oecode, c->op);
1728 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1729 {
1730 gimple *sum;
1731 oe2 = (*ops)[i];
1732 if (dump_file && (dump_flags & TDF_DETAILS))
1733 {
1734 fprintf (dump_file, " + ");
1735 print_generic_expr (dump_file, oe2->op);
1736 }
1737 zero_one_operation (&oe2->op, c->oecode, c->op);
1738 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1739 oe1->op, oe2->op, opcode);
1740 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1741 oe2->rank = 0;
1742 oe1->op = gimple_get_lhs (sum);
1743 }
1744
1745 /* Apply the multiplication/division. */
1746 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1747 oe1->op, c->op, c->oecode);
1748 if (dump_file && (dump_flags & TDF_DETAILS))
1749 {
1750 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1751 print_generic_expr (dump_file, c->op);
1752 fprintf (dump_file, "\n");
1753 }
1754
1755 /* Record it in the addition chain and disable further
1756 undistribution with this op. */
1757 oe1->op = gimple_assign_lhs (prod);
1758 oe1->rank = get_rank (oe1->op);
1759 subops[first].release ();
1760
1761 changed = true;
1762 }
1763
1764 cvec.pop ();
1765 }
1766
1767 for (i = 0; i < ops->length (); ++i)
1768 subops[i].release ();
1769 free (subops);
1770 cvec.release ();
1771
1772 return changed;
1773 }
1774
1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1776 expression, examine the other OPS to see if any of them are comparisons
1777 of the same values, which we may be able to combine or eliminate.
1778 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1779
1780 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)1781 eliminate_redundant_comparison (enum tree_code opcode,
1782 vec<operand_entry *> *ops,
1783 unsigned int currindex,
1784 operand_entry *curr)
1785 {
1786 tree op1, op2;
1787 enum tree_code lcode, rcode;
1788 gimple *def1, *def2;
1789 int i;
1790 operand_entry *oe;
1791
1792 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1793 return false;
1794
1795 /* Check that CURR is a comparison. */
1796 if (TREE_CODE (curr->op) != SSA_NAME)
1797 return false;
1798 def1 = SSA_NAME_DEF_STMT (curr->op);
1799 if (!is_gimple_assign (def1))
1800 return false;
1801 lcode = gimple_assign_rhs_code (def1);
1802 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1803 return false;
1804 op1 = gimple_assign_rhs1 (def1);
1805 op2 = gimple_assign_rhs2 (def1);
1806
1807 /* Now look for a similar comparison in the remaining OPS. */
1808 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1809 {
1810 tree t;
1811
1812 if (TREE_CODE (oe->op) != SSA_NAME)
1813 continue;
1814 def2 = SSA_NAME_DEF_STMT (oe->op);
1815 if (!is_gimple_assign (def2))
1816 continue;
1817 rcode = gimple_assign_rhs_code (def2);
1818 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1819 continue;
1820
1821 /* If we got here, we have a match. See if we can combine the
1822 two comparisons. */
1823 if (opcode == BIT_IOR_EXPR)
1824 t = maybe_fold_or_comparisons (lcode, op1, op2,
1825 rcode, gimple_assign_rhs1 (def2),
1826 gimple_assign_rhs2 (def2));
1827 else
1828 t = maybe_fold_and_comparisons (lcode, op1, op2,
1829 rcode, gimple_assign_rhs1 (def2),
1830 gimple_assign_rhs2 (def2));
1831 if (!t)
1832 continue;
1833
1834 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1835 always give us a boolean_type_node value back. If the original
1836 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1837 we need to convert. */
1838 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1839 t = fold_convert (TREE_TYPE (curr->op), t);
1840
1841 if (TREE_CODE (t) != INTEGER_CST
1842 && !operand_equal_p (t, curr->op, 0))
1843 {
1844 enum tree_code subcode;
1845 tree newop1, newop2;
1846 if (!COMPARISON_CLASS_P (t))
1847 continue;
1848 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1849 STRIP_USELESS_TYPE_CONVERSION (newop1);
1850 STRIP_USELESS_TYPE_CONVERSION (newop2);
1851 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1852 continue;
1853 }
1854
1855 if (dump_file && (dump_flags & TDF_DETAILS))
1856 {
1857 fprintf (dump_file, "Equivalence: ");
1858 print_generic_expr (dump_file, curr->op);
1859 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1860 print_generic_expr (dump_file, oe->op);
1861 fprintf (dump_file, " -> ");
1862 print_generic_expr (dump_file, t);
1863 fprintf (dump_file, "\n");
1864 }
1865
1866 /* Now we can delete oe, as it has been subsumed by the new combined
1867 expression t. */
1868 ops->ordered_remove (i);
1869 reassociate_stats.ops_eliminated ++;
1870
1871 /* If t is the same as curr->op, we're done. Otherwise we must
1872 replace curr->op with t. Special case is if we got a constant
1873 back, in which case we add it to the end instead of in place of
1874 the current entry. */
1875 if (TREE_CODE (t) == INTEGER_CST)
1876 {
1877 ops->ordered_remove (currindex);
1878 add_to_ops_vec (ops, t);
1879 }
1880 else if (!operand_equal_p (t, curr->op, 0))
1881 {
1882 gimple *sum;
1883 enum tree_code subcode;
1884 tree newop1;
1885 tree newop2;
1886 gcc_assert (COMPARISON_CLASS_P (t));
1887 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1888 STRIP_USELESS_TYPE_CONVERSION (newop1);
1889 STRIP_USELESS_TYPE_CONVERSION (newop2);
1890 gcc_checking_assert (is_gimple_val (newop1)
1891 && is_gimple_val (newop2));
1892 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1893 curr->op = gimple_get_lhs (sum);
1894 }
1895 return true;
1896 }
1897
1898 return false;
1899 }
1900
1901
1902 /* Transform repeated addition of same values into multiply with
1903 constant. */
1904 static bool
transform_add_to_multiply(vec<operand_entry * > * ops)1905 transform_add_to_multiply (vec<operand_entry *> *ops)
1906 {
1907 operand_entry *oe;
1908 tree op = NULL_TREE;
1909 int j;
1910 int i, start = -1, end = 0, count = 0;
1911 auto_vec<std::pair <int, int> > indxs;
1912 bool changed = false;
1913
1914 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1915 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1916 || !flag_unsafe_math_optimizations))
1917 return false;
1918
1919 /* Look for repeated operands. */
1920 FOR_EACH_VEC_ELT (*ops, i, oe)
1921 {
1922 if (start == -1)
1923 {
1924 count = 1;
1925 op = oe->op;
1926 start = i;
1927 }
1928 else if (operand_equal_p (oe->op, op, 0))
1929 {
1930 count++;
1931 end = i;
1932 }
1933 else
1934 {
1935 if (count > 1)
1936 indxs.safe_push (std::make_pair (start, end));
1937 count = 1;
1938 op = oe->op;
1939 start = i;
1940 }
1941 }
1942
1943 if (count > 1)
1944 indxs.safe_push (std::make_pair (start, end));
1945
1946 for (j = indxs.length () - 1; j >= 0; --j)
1947 {
1948 /* Convert repeated operand addition to multiplication. */
1949 start = indxs[j].first;
1950 end = indxs[j].second;
1951 op = (*ops)[start]->op;
1952 count = end - start + 1;
1953 for (i = end; i >= start; --i)
1954 ops->unordered_remove (i);
1955 tree tmp = make_ssa_name (TREE_TYPE (op));
1956 tree cst = build_int_cst (integer_type_node, count);
1957 gassign *mul_stmt
1958 = gimple_build_assign (tmp, MULT_EXPR,
1959 op, fold_convert (TREE_TYPE (op), cst));
1960 gimple_set_visited (mul_stmt, true);
1961 add_to_ops_vec (ops, tmp, mul_stmt);
1962 changed = true;
1963 }
1964
1965 return changed;
1966 }
1967
1968
1969 /* Perform various identities and other optimizations on the list of
1970 operand entries, stored in OPS. The tree code for the binary
1971 operation between all the operands is OPCODE. */
1972
1973 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry * > * ops)1974 optimize_ops_list (enum tree_code opcode,
1975 vec<operand_entry *> *ops)
1976 {
1977 unsigned int length = ops->length ();
1978 unsigned int i;
1979 operand_entry *oe;
1980 operand_entry *oelast = NULL;
1981 bool iterate = false;
1982
1983 if (length == 1)
1984 return;
1985
1986 oelast = ops->last ();
1987
1988 /* If the last two are constants, pop the constants off, merge them
1989 and try the next two. */
1990 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1991 {
1992 operand_entry *oelm1 = (*ops)[length - 2];
1993
1994 if (oelm1->rank == 0
1995 && is_gimple_min_invariant (oelm1->op)
1996 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1997 TREE_TYPE (oelast->op)))
1998 {
1999 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2000 oelm1->op, oelast->op);
2001
2002 if (folded && is_gimple_min_invariant (folded))
2003 {
2004 if (dump_file && (dump_flags & TDF_DETAILS))
2005 fprintf (dump_file, "Merging constants\n");
2006
2007 ops->pop ();
2008 ops->pop ();
2009
2010 add_to_ops_vec (ops, folded);
2011 reassociate_stats.constants_eliminated++;
2012
2013 optimize_ops_list (opcode, ops);
2014 return;
2015 }
2016 }
2017 }
2018
2019 eliminate_using_constants (opcode, ops);
2020 oelast = NULL;
2021
2022 for (i = 0; ops->iterate (i, &oe);)
2023 {
2024 bool done = false;
2025
2026 if (eliminate_not_pairs (opcode, ops, i, oe))
2027 return;
2028 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2029 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2030 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2031 {
2032 if (done)
2033 return;
2034 iterate = true;
2035 oelast = NULL;
2036 continue;
2037 }
2038 oelast = oe;
2039 i++;
2040 }
2041
2042 length = ops->length ();
2043 oelast = ops->last ();
2044
2045 if (iterate)
2046 optimize_ops_list (opcode, ops);
2047 }
2048
2049 /* The following functions are subroutines to optimize_range_tests and allow
2050 it to try to change a logical combination of comparisons into a range
2051 test.
2052
2053 For example, both
2054 X == 2 || X == 5 || X == 3 || X == 4
2055 and
2056 X >= 2 && X <= 5
2057 are converted to
2058 (unsigned) (X - 2) <= 3
2059
2060 For more information see comments above fold_test_range in fold-const.c,
2061 this implementation is for GIMPLE. */
2062
2063 struct range_entry
2064 {
2065 tree exp;
2066 tree low;
2067 tree high;
2068 bool in_p;
2069 bool strict_overflow_p;
2070 unsigned int idx, next;
2071 };
2072
2073 /* This is similar to make_range in fold-const.c, but on top of
2074 GIMPLE instead of trees. If EXP is non-NULL, it should be
2075 an SSA_NAME and STMT argument is ignored, otherwise STMT
2076 argument should be a GIMPLE_COND. */
2077
2078 static void
init_range_entry(struct range_entry * r,tree exp,gimple * stmt)2079 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2080 {
2081 int in_p;
2082 tree low, high;
2083 bool is_bool, strict_overflow_p;
2084
2085 r->exp = NULL_TREE;
2086 r->in_p = false;
2087 r->strict_overflow_p = false;
2088 r->low = NULL_TREE;
2089 r->high = NULL_TREE;
2090 if (exp != NULL_TREE
2091 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2092 return;
2093
2094 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2095 and see if we can refine the range. Some of the cases below may not
2096 happen, but it doesn't seem worth worrying about this. We "continue"
2097 the outer loop when we've changed something; otherwise we "break"
2098 the switch, which will "break" the while. */
2099 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2100 high = low;
2101 in_p = 0;
2102 strict_overflow_p = false;
2103 is_bool = false;
2104 if (exp == NULL_TREE)
2105 is_bool = true;
2106 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2107 {
2108 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2109 is_bool = true;
2110 else
2111 return;
2112 }
2113 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2114 is_bool = true;
2115
2116 while (1)
2117 {
2118 enum tree_code code;
2119 tree arg0, arg1, exp_type;
2120 tree nexp;
2121 location_t loc;
2122
2123 if (exp != NULL_TREE)
2124 {
2125 if (TREE_CODE (exp) != SSA_NAME
2126 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2127 break;
2128
2129 stmt = SSA_NAME_DEF_STMT (exp);
2130 if (!is_gimple_assign (stmt))
2131 break;
2132
2133 code = gimple_assign_rhs_code (stmt);
2134 arg0 = gimple_assign_rhs1 (stmt);
2135 arg1 = gimple_assign_rhs2 (stmt);
2136 exp_type = TREE_TYPE (exp);
2137 }
2138 else
2139 {
2140 code = gimple_cond_code (stmt);
2141 arg0 = gimple_cond_lhs (stmt);
2142 arg1 = gimple_cond_rhs (stmt);
2143 exp_type = boolean_type_node;
2144 }
2145
2146 if (TREE_CODE (arg0) != SSA_NAME)
2147 break;
2148 loc = gimple_location (stmt);
2149 switch (code)
2150 {
2151 case BIT_NOT_EXPR:
2152 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2153 /* Ensure the range is either +[-,0], +[0,0],
2154 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2155 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2156 or similar expression of unconditional true or
2157 false, it should not be negated. */
2158 && ((high && integer_zerop (high))
2159 || (low && integer_onep (low))))
2160 {
2161 in_p = !in_p;
2162 exp = arg0;
2163 continue;
2164 }
2165 break;
2166 case SSA_NAME:
2167 exp = arg0;
2168 continue;
2169 CASE_CONVERT:
2170 if (is_bool)
2171 {
2172 if ((TYPE_PRECISION (exp_type) == 1
2173 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2174 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2175 return;
2176 }
2177 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2178 {
2179 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2180 is_bool = true;
2181 else
2182 return;
2183 }
2184 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2185 is_bool = true;
2186 goto do_default;
2187 case EQ_EXPR:
2188 case NE_EXPR:
2189 case LT_EXPR:
2190 case LE_EXPR:
2191 case GE_EXPR:
2192 case GT_EXPR:
2193 is_bool = true;
2194 /* FALLTHRU */
2195 default:
2196 if (!is_bool)
2197 return;
2198 do_default:
2199 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2200 &low, &high, &in_p,
2201 &strict_overflow_p);
2202 if (nexp != NULL_TREE)
2203 {
2204 exp = nexp;
2205 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2206 continue;
2207 }
2208 break;
2209 }
2210 break;
2211 }
2212 if (is_bool)
2213 {
2214 r->exp = exp;
2215 r->in_p = in_p;
2216 r->low = low;
2217 r->high = high;
2218 r->strict_overflow_p = strict_overflow_p;
2219 }
2220 }
2221
2222 /* Comparison function for qsort. Sort entries
2223 without SSA_NAME exp first, then with SSA_NAMEs sorted
2224 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2225 by increasing ->low and if ->low is the same, by increasing
2226 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2227 maximum. */
2228
2229 static int
range_entry_cmp(const void * a,const void * b)2230 range_entry_cmp (const void *a, const void *b)
2231 {
2232 const struct range_entry *p = (const struct range_entry *) a;
2233 const struct range_entry *q = (const struct range_entry *) b;
2234
2235 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2236 {
2237 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2238 {
2239 /* Group range_entries for the same SSA_NAME together. */
2240 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2241 return -1;
2242 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2243 return 1;
2244 /* If ->low is different, NULL low goes first, then by
2245 ascending low. */
2246 if (p->low != NULL_TREE)
2247 {
2248 if (q->low != NULL_TREE)
2249 {
2250 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2251 p->low, q->low);
2252 if (tem && integer_onep (tem))
2253 return -1;
2254 tem = fold_binary (GT_EXPR, boolean_type_node,
2255 p->low, q->low);
2256 if (tem && integer_onep (tem))
2257 return 1;
2258 }
2259 else
2260 return 1;
2261 }
2262 else if (q->low != NULL_TREE)
2263 return -1;
2264 /* If ->high is different, NULL high goes last, before that by
2265 ascending high. */
2266 if (p->high != NULL_TREE)
2267 {
2268 if (q->high != NULL_TREE)
2269 {
2270 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2271 p->high, q->high);
2272 if (tem && integer_onep (tem))
2273 return -1;
2274 tem = fold_binary (GT_EXPR, boolean_type_node,
2275 p->high, q->high);
2276 if (tem && integer_onep (tem))
2277 return 1;
2278 }
2279 else
2280 return -1;
2281 }
2282 else if (q->high != NULL_TREE)
2283 return 1;
2284 /* If both ranges are the same, sort below by ascending idx. */
2285 }
2286 else
2287 return 1;
2288 }
2289 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2290 return -1;
2291
2292 if (p->idx < q->idx)
2293 return -1;
2294 else
2295 {
2296 gcc_checking_assert (p->idx > q->idx);
2297 return 1;
2298 }
2299 }
2300
2301 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2302 insert needed statements BEFORE or after GSI. */
2303
2304 static tree
force_into_ssa_name(gimple_stmt_iterator * gsi,tree expr,bool before)2305 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2306 {
2307 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2308 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2309 if (TREE_CODE (ret) != SSA_NAME)
2310 {
2311 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2312 if (before)
2313 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2314 else
2315 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2316 ret = gimple_assign_lhs (g);
2317 }
2318 return ret;
2319 }
2320
2321 /* Helper routine of optimize_range_test.
2322 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2323 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2324 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2325 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2326 an array of COUNT pointers to other ranges. Return
2327 true if the range merge has been successful.
2328 If OPCODE is ERROR_MARK, this is called from within
2329 maybe_optimize_range_tests and is performing inter-bb range optimization.
2330 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2331 oe->rank. */
2332
2333 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,struct range_entry ** otherrangep,unsigned int count,enum tree_code opcode,vec<operand_entry * > * ops,tree exp,gimple_seq seq,bool in_p,tree low,tree high,bool strict_overflow_p)2334 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2335 struct range_entry **otherrangep,
2336 unsigned int count, enum tree_code opcode,
2337 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2338 bool in_p, tree low, tree high, bool strict_overflow_p)
2339 {
2340 operand_entry *oe = (*ops)[range->idx];
2341 tree op = oe->op;
2342 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2343 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2344 location_t loc = gimple_location (stmt);
2345 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2346 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2347 in_p, low, high);
2348 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2349 gimple_stmt_iterator gsi;
2350 unsigned int i, uid;
2351
2352 if (tem == NULL_TREE)
2353 return false;
2354
2355 /* If op is default def SSA_NAME, there is no place to insert the
2356 new comparison. Give up, unless we can use OP itself as the
2357 range test. */
2358 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2359 {
2360 if (op == range->exp
2361 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2362 || TREE_CODE (optype) == BOOLEAN_TYPE)
2363 && (op == tem
2364 || (TREE_CODE (tem) == EQ_EXPR
2365 && TREE_OPERAND (tem, 0) == op
2366 && integer_onep (TREE_OPERAND (tem, 1))))
2367 && opcode != BIT_IOR_EXPR
2368 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2369 {
2370 stmt = NULL;
2371 tem = op;
2372 }
2373 else
2374 return false;
2375 }
2376
2377 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2378 warning_at (loc, OPT_Wstrict_overflow,
2379 "assuming signed overflow does not occur "
2380 "when simplifying range test");
2381
2382 if (dump_file && (dump_flags & TDF_DETAILS))
2383 {
2384 struct range_entry *r;
2385 fprintf (dump_file, "Optimizing range tests ");
2386 print_generic_expr (dump_file, range->exp);
2387 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2388 print_generic_expr (dump_file, range->low);
2389 fprintf (dump_file, ", ");
2390 print_generic_expr (dump_file, range->high);
2391 fprintf (dump_file, "]");
2392 for (i = 0; i < count; i++)
2393 {
2394 if (otherrange)
2395 r = otherrange + i;
2396 else
2397 r = otherrangep[i];
2398 if (r->exp
2399 && r->exp != range->exp
2400 && TREE_CODE (r->exp) == SSA_NAME)
2401 {
2402 fprintf (dump_file, " and ");
2403 print_generic_expr (dump_file, r->exp);
2404 }
2405 else
2406 fprintf (dump_file, " and");
2407 fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2408 print_generic_expr (dump_file, r->low);
2409 fprintf (dump_file, ", ");
2410 print_generic_expr (dump_file, r->high);
2411 fprintf (dump_file, "]");
2412 }
2413 fprintf (dump_file, "\n into ");
2414 print_generic_expr (dump_file, tem);
2415 fprintf (dump_file, "\n");
2416 }
2417
2418 if (opcode == BIT_IOR_EXPR
2419 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2420 tem = invert_truthvalue_loc (loc, tem);
2421
2422 tem = fold_convert_loc (loc, optype, tem);
2423 if (stmt)
2424 {
2425 gsi = gsi_for_stmt (stmt);
2426 uid = gimple_uid (stmt);
2427 }
2428 else
2429 {
2430 gsi = gsi_none ();
2431 uid = 0;
2432 }
2433 if (stmt == NULL)
2434 gcc_checking_assert (tem == op);
2435 /* In rare cases range->exp can be equal to lhs of stmt.
2436 In that case we have to insert after the stmt rather then before
2437 it. If stmt is a PHI, insert it at the start of the basic block. */
2438 else if (op != range->exp)
2439 {
2440 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2441 tem = force_into_ssa_name (&gsi, tem, true);
2442 gsi_prev (&gsi);
2443 }
2444 else if (gimple_code (stmt) != GIMPLE_PHI)
2445 {
2446 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2447 tem = force_into_ssa_name (&gsi, tem, false);
2448 }
2449 else
2450 {
2451 gsi = gsi_after_labels (gimple_bb (stmt));
2452 if (!gsi_end_p (gsi))
2453 uid = gimple_uid (gsi_stmt (gsi));
2454 else
2455 {
2456 gsi = gsi_start_bb (gimple_bb (stmt));
2457 uid = 1;
2458 while (!gsi_end_p (gsi))
2459 {
2460 uid = gimple_uid (gsi_stmt (gsi));
2461 gsi_next (&gsi);
2462 }
2463 }
2464 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2465 tem = force_into_ssa_name (&gsi, tem, true);
2466 if (gsi_end_p (gsi))
2467 gsi = gsi_last_bb (gimple_bb (stmt));
2468 else
2469 gsi_prev (&gsi);
2470 }
2471 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2472 if (gimple_uid (gsi_stmt (gsi)))
2473 break;
2474 else
2475 gimple_set_uid (gsi_stmt (gsi), uid);
2476
2477 oe->op = tem;
2478 range->exp = exp;
2479 range->low = low;
2480 range->high = high;
2481 range->in_p = in_p;
2482 range->strict_overflow_p = false;
2483
2484 for (i = 0; i < count; i++)
2485 {
2486 if (otherrange)
2487 range = otherrange + i;
2488 else
2489 range = otherrangep[i];
2490 oe = (*ops)[range->idx];
2491 /* Now change all the other range test immediate uses, so that
2492 those tests will be optimized away. */
2493 if (opcode == ERROR_MARK)
2494 {
2495 if (oe->op)
2496 oe->op = build_int_cst (TREE_TYPE (oe->op),
2497 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2498 else
2499 oe->op = (oe->rank == BIT_IOR_EXPR
2500 ? boolean_false_node : boolean_true_node);
2501 }
2502 else
2503 oe->op = error_mark_node;
2504 range->exp = NULL_TREE;
2505 range->low = NULL_TREE;
2506 range->high = NULL_TREE;
2507 }
2508 return true;
2509 }
2510
2511 /* Optimize X == CST1 || X == CST2
2512 if popcount (CST1 ^ CST2) == 1 into
2513 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2514 Similarly for ranges. E.g.
2515 X != 2 && X != 3 && X != 10 && X != 11
2516 will be transformed by the previous optimization into
2517 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2518 and this loop can transform that into
2519 !(((X & ~8) - 2U) <= 1U). */
2520
2521 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2522 optimize_range_tests_xor (enum tree_code opcode, tree type,
2523 tree lowi, tree lowj, tree highi, tree highj,
2524 vec<operand_entry *> *ops,
2525 struct range_entry *rangei,
2526 struct range_entry *rangej)
2527 {
2528 tree lowxor, highxor, tem, exp;
2529 /* Check lowi ^ lowj == highi ^ highj and
2530 popcount (lowi ^ lowj) == 1. */
2531 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2532 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2533 return false;
2534 if (!integer_pow2p (lowxor))
2535 return false;
2536 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2537 if (!tree_int_cst_equal (lowxor, highxor))
2538 return false;
2539
2540 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2541 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2542 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2543 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2544 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2545 NULL, rangei->in_p, lowj, highj,
2546 rangei->strict_overflow_p
2547 || rangej->strict_overflow_p))
2548 return true;
2549 return false;
2550 }
2551
2552 /* Optimize X == CST1 || X == CST2
2553 if popcount (CST2 - CST1) == 1 into
2554 ((X - CST1) & ~(CST2 - CST1)) == 0.
2555 Similarly for ranges. E.g.
2556 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2557 || X == 75 || X == 45
2558 will be transformed by the previous optimization into
2559 (X - 43U) <= 3U || (X - 75U) <= 3U
2560 and this loop can transform that into
2561 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2562 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2563 optimize_range_tests_diff (enum tree_code opcode, tree type,
2564 tree lowi, tree lowj, tree highi, tree highj,
2565 vec<operand_entry *> *ops,
2566 struct range_entry *rangei,
2567 struct range_entry *rangej)
2568 {
2569 tree tem1, tem2, mask;
2570 /* Check highi - lowi == highj - lowj. */
2571 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2572 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2573 return false;
2574 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2575 if (!tree_int_cst_equal (tem1, tem2))
2576 return false;
2577 /* Check popcount (lowj - lowi) == 1. */
2578 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2579 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2580 return false;
2581 if (!integer_pow2p (tem1))
2582 return false;
2583
2584 type = unsigned_type_for (type);
2585 tem1 = fold_convert (type, tem1);
2586 tem2 = fold_convert (type, tem2);
2587 lowi = fold_convert (type, lowi);
2588 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2589 tem1 = fold_build2 (MINUS_EXPR, type,
2590 fold_convert (type, rangei->exp), lowi);
2591 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2592 lowj = build_int_cst (type, 0);
2593 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2594 NULL, rangei->in_p, lowj, tem2,
2595 rangei->strict_overflow_p
2596 || rangej->strict_overflow_p))
2597 return true;
2598 return false;
2599 }
2600
2601 /* It does some common checks for function optimize_range_tests_xor and
2602 optimize_range_tests_diff.
2603 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2604 Else it calls optimize_range_tests_diff. */
2605
2606 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry * > * ops,struct range_entry * ranges)2607 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2608 bool optimize_xor, vec<operand_entry *> *ops,
2609 struct range_entry *ranges)
2610 {
2611 int i, j;
2612 bool any_changes = false;
2613 for (i = first; i < length; i++)
2614 {
2615 tree lowi, highi, lowj, highj, type, tem;
2616
2617 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2618 continue;
2619 type = TREE_TYPE (ranges[i].exp);
2620 if (!INTEGRAL_TYPE_P (type))
2621 continue;
2622 lowi = ranges[i].low;
2623 if (lowi == NULL_TREE)
2624 lowi = TYPE_MIN_VALUE (type);
2625 highi = ranges[i].high;
2626 if (highi == NULL_TREE)
2627 continue;
2628 for (j = i + 1; j < length && j < i + 64; j++)
2629 {
2630 bool changes;
2631 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2632 continue;
2633 lowj = ranges[j].low;
2634 if (lowj == NULL_TREE)
2635 continue;
2636 highj = ranges[j].high;
2637 if (highj == NULL_TREE)
2638 highj = TYPE_MAX_VALUE (type);
2639 /* Check lowj > highi. */
2640 tem = fold_binary (GT_EXPR, boolean_type_node,
2641 lowj, highi);
2642 if (tem == NULL_TREE || !integer_onep (tem))
2643 continue;
2644 if (optimize_xor)
2645 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2646 highi, highj, ops,
2647 ranges + i, ranges + j);
2648 else
2649 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2650 highi, highj, ops,
2651 ranges + i, ranges + j);
2652 if (changes)
2653 {
2654 any_changes = true;
2655 break;
2656 }
2657 }
2658 }
2659 return any_changes;
2660 }
2661
2662 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2663 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2664 EXP on success, NULL otherwise. */
2665
2666 static tree
extract_bit_test_mask(tree exp,int prec,tree totallow,tree low,tree high,wide_int * mask,tree * totallowp)2667 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2668 wide_int *mask, tree *totallowp)
2669 {
2670 tree tem = int_const_binop (MINUS_EXPR, high, low);
2671 if (tem == NULL_TREE
2672 || TREE_CODE (tem) != INTEGER_CST
2673 || TREE_OVERFLOW (tem)
2674 || tree_int_cst_sgn (tem) == -1
2675 || compare_tree_int (tem, prec) != -1)
2676 return NULL_TREE;
2677
2678 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2679 *mask = wi::shifted_mask (0, max, false, prec);
2680 if (TREE_CODE (exp) == BIT_AND_EXPR
2681 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2682 {
2683 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2684 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2685 if (wi::popcount (msk) == 1
2686 && wi::ltu_p (msk, prec - max))
2687 {
2688 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2689 max += msk.to_uhwi ();
2690 exp = TREE_OPERAND (exp, 0);
2691 if (integer_zerop (low)
2692 && TREE_CODE (exp) == PLUS_EXPR
2693 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2694 {
2695 tree ret = TREE_OPERAND (exp, 0);
2696 STRIP_NOPS (ret);
2697 widest_int bias
2698 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2699 TYPE_PRECISION (TREE_TYPE (low))));
2700 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2701 if (totallowp)
2702 {
2703 *totallowp = tbias;
2704 return ret;
2705 }
2706 else if (!tree_int_cst_lt (totallow, tbias))
2707 return NULL_TREE;
2708 bias = wi::to_widest (tbias);
2709 bias -= wi::to_widest (totallow);
2710 if (bias >= 0 && bias < prec - max)
2711 {
2712 *mask = wi::lshift (*mask, bias);
2713 return ret;
2714 }
2715 }
2716 }
2717 }
2718 if (totallowp)
2719 return exp;
2720 if (!tree_int_cst_lt (totallow, low))
2721 return exp;
2722 tem = int_const_binop (MINUS_EXPR, low, totallow);
2723 if (tem == NULL_TREE
2724 || TREE_CODE (tem) != INTEGER_CST
2725 || TREE_OVERFLOW (tem)
2726 || compare_tree_int (tem, prec - max) == 1)
2727 return NULL_TREE;
2728
2729 *mask = wi::lshift (*mask, wi::to_widest (tem));
2730 return exp;
2731 }
2732
2733 /* Attempt to optimize small range tests using bit test.
2734 E.g.
2735 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2736 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2737 has been by earlier optimizations optimized into:
2738 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2739 As all the 43 through 82 range is less than 64 numbers,
2740 for 64-bit word targets optimize that into:
2741 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2742
2743 static bool
optimize_range_tests_to_bit_test(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)2744 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2745 vec<operand_entry *> *ops,
2746 struct range_entry *ranges)
2747 {
2748 int i, j;
2749 bool any_changes = false;
2750 int prec = GET_MODE_BITSIZE (word_mode);
2751 auto_vec<struct range_entry *, 64> candidates;
2752
2753 for (i = first; i < length - 2; i++)
2754 {
2755 tree lowi, highi, lowj, highj, type;
2756
2757 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2758 continue;
2759 type = TREE_TYPE (ranges[i].exp);
2760 if (!INTEGRAL_TYPE_P (type))
2761 continue;
2762 lowi = ranges[i].low;
2763 if (lowi == NULL_TREE)
2764 lowi = TYPE_MIN_VALUE (type);
2765 highi = ranges[i].high;
2766 if (highi == NULL_TREE)
2767 continue;
2768 wide_int mask;
2769 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2770 highi, &mask, &lowi);
2771 if (exp == NULL_TREE)
2772 continue;
2773 bool strict_overflow_p = ranges[i].strict_overflow_p;
2774 candidates.truncate (0);
2775 int end = MIN (i + 64, length);
2776 for (j = i + 1; j < end; j++)
2777 {
2778 tree exp2;
2779 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2780 continue;
2781 if (ranges[j].exp == exp)
2782 ;
2783 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2784 {
2785 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2786 if (exp2 == exp)
2787 ;
2788 else if (TREE_CODE (exp2) == PLUS_EXPR)
2789 {
2790 exp2 = TREE_OPERAND (exp2, 0);
2791 STRIP_NOPS (exp2);
2792 if (exp2 != exp)
2793 continue;
2794 }
2795 else
2796 continue;
2797 }
2798 else
2799 continue;
2800 lowj = ranges[j].low;
2801 if (lowj == NULL_TREE)
2802 continue;
2803 highj = ranges[j].high;
2804 if (highj == NULL_TREE)
2805 highj = TYPE_MAX_VALUE (type);
2806 wide_int mask2;
2807 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2808 highj, &mask2, NULL);
2809 if (exp2 != exp)
2810 continue;
2811 mask |= mask2;
2812 strict_overflow_p |= ranges[j].strict_overflow_p;
2813 candidates.safe_push (&ranges[j]);
2814 }
2815
2816 /* If we need otherwise 3 or more comparisons, use a bit test. */
2817 if (candidates.length () >= 2)
2818 {
2819 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2820 wi::to_widest (lowi)
2821 + prec - 1 - wi::clz (mask));
2822 operand_entry *oe = (*ops)[ranges[i].idx];
2823 tree op = oe->op;
2824 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2825 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2826 location_t loc = gimple_location (stmt);
2827 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2828
2829 /* See if it isn't cheaper to pretend the minimum value of the
2830 range is 0, if maximum value is small enough.
2831 We can avoid then subtraction of the minimum value, but the
2832 mask constant could be perhaps more expensive. */
2833 if (compare_tree_int (lowi, 0) > 0
2834 && compare_tree_int (high, prec) < 0)
2835 {
2836 int cost_diff;
2837 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2838 rtx reg = gen_raw_REG (word_mode, 10000);
2839 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2840 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2841 GEN_INT (-m)), speed_p);
2842 rtx r = immed_wide_int_const (mask, word_mode);
2843 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2844 word_mode, speed_p);
2845 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2846 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2847 word_mode, speed_p);
2848 if (cost_diff > 0)
2849 {
2850 mask = wi::lshift (mask, m);
2851 lowi = build_zero_cst (TREE_TYPE (lowi));
2852 }
2853 }
2854
2855 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2856 false, lowi, high);
2857 if (tem == NULL_TREE || is_gimple_val (tem))
2858 continue;
2859 tree etype = unsigned_type_for (TREE_TYPE (exp));
2860 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2861 fold_convert_loc (loc, etype, exp),
2862 fold_convert_loc (loc, etype, lowi));
2863 exp = fold_convert_loc (loc, integer_type_node, exp);
2864 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2865 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2866 build_int_cst (word_type, 1), exp);
2867 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2868 wide_int_to_tree (word_type, mask));
2869 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2870 build_zero_cst (word_type));
2871 if (is_gimple_val (exp))
2872 continue;
2873
2874 /* The shift might have undefined behavior if TEM is true,
2875 but reassociate_bb isn't prepared to have basic blocks
2876 split when it is running. So, temporarily emit a code
2877 with BIT_IOR_EXPR instead of &&, and fix it up in
2878 branch_fixup. */
2879 gimple_seq seq;
2880 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2881 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2882 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2883 gimple_seq seq2;
2884 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2885 gimple_seq_add_seq_without_update (&seq, seq2);
2886 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2887 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2888 gimple *g = gimple_build_assign (make_ssa_name (optype),
2889 BIT_IOR_EXPR, tem, exp);
2890 gimple_set_location (g, loc);
2891 gimple_seq_add_stmt_without_update (&seq, g);
2892 exp = gimple_assign_lhs (g);
2893 tree val = build_zero_cst (optype);
2894 if (update_range_test (&ranges[i], NULL, candidates.address (),
2895 candidates.length (), opcode, ops, exp,
2896 seq, false, val, val, strict_overflow_p))
2897 {
2898 any_changes = true;
2899 reassoc_branch_fixups.safe_push (tem);
2900 }
2901 else
2902 gimple_seq_discard (seq);
2903 }
2904 }
2905 return any_changes;
2906 }
2907
2908 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
2909 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
2910
2911 static bool
optimize_range_tests_cmp_bitwise(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)2912 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
2913 vec<operand_entry *> *ops,
2914 struct range_entry *ranges)
2915 {
2916 int i;
2917 unsigned int b;
2918 bool any_changes = false;
2919 auto_vec<int, 128> buckets;
2920 auto_vec<int, 32> chains;
2921 auto_vec<struct range_entry *, 32> candidates;
2922
2923 for (i = first; i < length; i++)
2924 {
2925 if (ranges[i].exp == NULL_TREE
2926 || TREE_CODE (ranges[i].exp) != SSA_NAME
2927 || !ranges[i].in_p
2928 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
2929 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
2930 || ranges[i].low == NULL_TREE
2931 || ranges[i].low != ranges[i].high)
2932 continue;
2933
2934 bool zero_p = integer_zerop (ranges[i].low);
2935 if (!zero_p && !integer_all_onesp (ranges[i].low))
2936 continue;
2937
2938 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
2939 if (buckets.length () <= b)
2940 buckets.safe_grow_cleared (b + 1);
2941 if (chains.length () <= (unsigned) i)
2942 chains.safe_grow (i + 1);
2943 chains[i] = buckets[b];
2944 buckets[b] = i + 1;
2945 }
2946
2947 FOR_EACH_VEC_ELT (buckets, b, i)
2948 if (i && chains[i - 1])
2949 {
2950 int j, k = i;
2951 for (j = chains[i - 1]; j; j = chains[j - 1])
2952 {
2953 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
2954 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
2955 if (reassoc_stmt_dominates_stmt_p (gk, gj))
2956 k = j;
2957 }
2958 tree type1 = TREE_TYPE (ranges[k - 1].exp);
2959 tree type2 = NULL_TREE;
2960 bool strict_overflow_p = false;
2961 candidates.truncate (0);
2962 for (j = i; j; j = chains[j - 1])
2963 {
2964 tree type = TREE_TYPE (ranges[j - 1].exp);
2965 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
2966 if (j == k
2967 || useless_type_conversion_p (type1, type))
2968 ;
2969 else if (type2 == NULL_TREE
2970 || useless_type_conversion_p (type2, type))
2971 {
2972 if (type2 == NULL_TREE)
2973 type2 = type;
2974 candidates.safe_push (&ranges[j - 1]);
2975 }
2976 }
2977 unsigned l = candidates.length ();
2978 for (j = i; j; j = chains[j - 1])
2979 {
2980 tree type = TREE_TYPE (ranges[j - 1].exp);
2981 if (j == k)
2982 continue;
2983 if (useless_type_conversion_p (type1, type))
2984 ;
2985 else if (type2 == NULL_TREE
2986 || useless_type_conversion_p (type2, type))
2987 continue;
2988 candidates.safe_push (&ranges[j - 1]);
2989 }
2990 gimple_seq seq = NULL;
2991 tree op = NULL_TREE;
2992 unsigned int id;
2993 struct range_entry *r;
2994 candidates.safe_push (&ranges[k - 1]);
2995 FOR_EACH_VEC_ELT (candidates, id, r)
2996 {
2997 gimple *g;
2998 if (id == 0)
2999 {
3000 op = r->exp;
3001 continue;
3002 }
3003 if (id == l)
3004 {
3005 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3006 gimple_seq_add_stmt_without_update (&seq, g);
3007 op = gimple_assign_lhs (g);
3008 }
3009 tree type = TREE_TYPE (r->exp);
3010 tree exp = r->exp;
3011 if (id >= l && !useless_type_conversion_p (type1, type))
3012 {
3013 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3014 gimple_seq_add_stmt_without_update (&seq, g);
3015 exp = gimple_assign_lhs (g);
3016 }
3017 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3018 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3019 op, exp);
3020 gimple_seq_add_stmt_without_update (&seq, g);
3021 op = gimple_assign_lhs (g);
3022 }
3023 candidates.pop ();
3024 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3025 candidates.length (), opcode, ops, op,
3026 seq, true, ranges[k - 1].low,
3027 ranges[k - 1].low, strict_overflow_p))
3028 any_changes = true;
3029 else
3030 gimple_seq_discard (seq);
3031 }
3032
3033 return any_changes;
3034 }
3035
3036 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3037 a >= 0 && a < b into (unsigned) a < (unsigned) b
3038 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3039
3040 static bool
optimize_range_tests_var_bound(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges,basic_block first_bb)3041 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3042 vec<operand_entry *> *ops,
3043 struct range_entry *ranges,
3044 basic_block first_bb)
3045 {
3046 int i;
3047 bool any_changes = false;
3048 hash_map<tree, int> *map = NULL;
3049
3050 for (i = first; i < length; i++)
3051 {
3052 if (ranges[i].exp == NULL_TREE
3053 || TREE_CODE (ranges[i].exp) != SSA_NAME
3054 || !ranges[i].in_p)
3055 continue;
3056
3057 tree type = TREE_TYPE (ranges[i].exp);
3058 if (!INTEGRAL_TYPE_P (type)
3059 || TYPE_UNSIGNED (type)
3060 || ranges[i].low == NULL_TREE
3061 || !integer_zerop (ranges[i].low)
3062 || ranges[i].high != NULL_TREE)
3063 continue;
3064 /* EXP >= 0 here. */
3065 if (map == NULL)
3066 map = new hash_map <tree, int>;
3067 map->put (ranges[i].exp, i);
3068 }
3069
3070 if (map == NULL)
3071 return false;
3072
3073 for (i = 0; i < length; i++)
3074 {
3075 bool in_p = ranges[i].in_p;
3076 if (ranges[i].low == NULL_TREE
3077 || ranges[i].high == NULL_TREE)
3078 continue;
3079 if (!integer_zerop (ranges[i].low)
3080 || !integer_zerop (ranges[i].high))
3081 {
3082 if (ranges[i].exp
3083 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3084 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3085 && integer_onep (ranges[i].low)
3086 && integer_onep (ranges[i].high))
3087 in_p = !in_p;
3088 else
3089 continue;
3090 }
3091
3092 gimple *stmt;
3093 tree_code ccode;
3094 tree rhs1, rhs2;
3095 if (ranges[i].exp)
3096 {
3097 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3098 continue;
3099 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3100 if (!is_gimple_assign (stmt))
3101 continue;
3102 ccode = gimple_assign_rhs_code (stmt);
3103 rhs1 = gimple_assign_rhs1 (stmt);
3104 rhs2 = gimple_assign_rhs2 (stmt);
3105 }
3106 else
3107 {
3108 operand_entry *oe = (*ops)[ranges[i].idx];
3109 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3110 if (gimple_code (stmt) != GIMPLE_COND)
3111 continue;
3112 ccode = gimple_cond_code (stmt);
3113 rhs1 = gimple_cond_lhs (stmt);
3114 rhs2 = gimple_cond_rhs (stmt);
3115 }
3116
3117 if (TREE_CODE (rhs1) != SSA_NAME
3118 || rhs2 == NULL_TREE
3119 || TREE_CODE (rhs2) != SSA_NAME)
3120 continue;
3121
3122 switch (ccode)
3123 {
3124 case GT_EXPR:
3125 case GE_EXPR:
3126 case LT_EXPR:
3127 case LE_EXPR:
3128 break;
3129 default:
3130 continue;
3131 }
3132 if (in_p)
3133 ccode = invert_tree_comparison (ccode, false);
3134 switch (ccode)
3135 {
3136 case GT_EXPR:
3137 case GE_EXPR:
3138 std::swap (rhs1, rhs2);
3139 ccode = swap_tree_comparison (ccode);
3140 break;
3141 case LT_EXPR:
3142 case LE_EXPR:
3143 break;
3144 default:
3145 gcc_unreachable ();
3146 }
3147
3148 int *idx = map->get (rhs1);
3149 if (idx == NULL)
3150 continue;
3151
3152 /* maybe_optimize_range_tests allows statements without side-effects
3153 in the basic blocks as long as they are consumed in the same bb.
3154 Make sure rhs2's def stmt is not among them, otherwise we can't
3155 use safely get_nonzero_bits on it. E.g. in:
3156 # RANGE [-83, 1] NONZERO 173
3157 # k_32 = PHI <k_47(13), k_12(9)>
3158 ...
3159 if (k_32 >= 0)
3160 goto <bb 5>; [26.46%]
3161 else
3162 goto <bb 9>; [73.54%]
3163
3164 <bb 5> [local count: 140323371]:
3165 # RANGE [0, 1] NONZERO 1
3166 _5 = (int) k_32;
3167 # RANGE [0, 4] NONZERO 4
3168 _21 = _5 << 2;
3169 # RANGE [0, 4] NONZERO 4
3170 iftmp.0_44 = (char) _21;
3171 if (k_32 < iftmp.0_44)
3172 goto <bb 6>; [84.48%]
3173 else
3174 goto <bb 9>; [15.52%]
3175 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3176 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3177 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3178 those stmts even for negative k_32 and the value ranges would be no
3179 longer guaranteed and so the optimization would be invalid. */
3180 if (opcode == ERROR_MARK)
3181 {
3182 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3183 basic_block bb2 = gimple_bb (g);
3184 if (bb2
3185 && bb2 != first_bb
3186 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3187 {
3188 /* As an exception, handle a few common cases. */
3189 if (gimple_assign_cast_p (g)
3190 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g)))
3191 && TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g)))
3192 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3193 > TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (g)))))
3194 /* Zero-extension is always ok. */ ;
3195 else if (is_gimple_assign (g)
3196 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3197 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3198 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3199 /* Masking with INTEGER_CST with MSB clear is always ok
3200 too. */ ;
3201 else
3202 continue;
3203 }
3204 }
3205
3206 wide_int nz = get_nonzero_bits (rhs2);
3207 if (wi::neg_p (nz))
3208 continue;
3209
3210 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3211 and RHS2 is known to be RHS2 >= 0. */
3212 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3213
3214 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3215 if ((ranges[*idx].strict_overflow_p
3216 || ranges[i].strict_overflow_p)
3217 && issue_strict_overflow_warning (wc))
3218 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3219 "assuming signed overflow does not occur "
3220 "when simplifying range test");
3221
3222 if (dump_file && (dump_flags & TDF_DETAILS))
3223 {
3224 struct range_entry *r = &ranges[*idx];
3225 fprintf (dump_file, "Optimizing range test ");
3226 print_generic_expr (dump_file, r->exp);
3227 fprintf (dump_file, " +[");
3228 print_generic_expr (dump_file, r->low);
3229 fprintf (dump_file, ", ");
3230 print_generic_expr (dump_file, r->high);
3231 fprintf (dump_file, "] and comparison ");
3232 print_generic_expr (dump_file, rhs1);
3233 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3234 print_generic_expr (dump_file, rhs2);
3235 fprintf (dump_file, "\n into (");
3236 print_generic_expr (dump_file, utype);
3237 fprintf (dump_file, ") ");
3238 print_generic_expr (dump_file, rhs1);
3239 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3240 print_generic_expr (dump_file, utype);
3241 fprintf (dump_file, ") ");
3242 print_generic_expr (dump_file, rhs2);
3243 fprintf (dump_file, "\n");
3244 }
3245
3246 operand_entry *oe = (*ops)[ranges[i].idx];
3247 ranges[i].in_p = 0;
3248 if (opcode == BIT_IOR_EXPR
3249 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3250 {
3251 ranges[i].in_p = 1;
3252 ccode = invert_tree_comparison (ccode, false);
3253 }
3254
3255 unsigned int uid = gimple_uid (stmt);
3256 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3257 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3258 gimple_set_uid (g, uid);
3259 rhs1 = gimple_assign_lhs (g);
3260 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3261 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3262 gimple_set_uid (g, uid);
3263 rhs2 = gimple_assign_lhs (g);
3264 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3265 if (tree_swap_operands_p (rhs1, rhs2))
3266 {
3267 std::swap (rhs1, rhs2);
3268 ccode = swap_tree_comparison (ccode);
3269 }
3270 if (gimple_code (stmt) == GIMPLE_COND)
3271 {
3272 gcond *c = as_a <gcond *> (stmt);
3273 gimple_cond_set_code (c, ccode);
3274 gimple_cond_set_lhs (c, rhs1);
3275 gimple_cond_set_rhs (c, rhs2);
3276 update_stmt (stmt);
3277 }
3278 else
3279 {
3280 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3281 if (!INTEGRAL_TYPE_P (ctype)
3282 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3283 && TYPE_PRECISION (ctype) != 1))
3284 ctype = boolean_type_node;
3285 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3286 gimple_set_uid (g, uid);
3287 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3288 if (oe->op && ctype != TREE_TYPE (oe->op))
3289 {
3290 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3291 NOP_EXPR, gimple_assign_lhs (g));
3292 gimple_set_uid (g, uid);
3293 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3294 }
3295 ranges[i].exp = gimple_assign_lhs (g);
3296 oe->op = ranges[i].exp;
3297 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3298 ranges[i].high = ranges[i].low;
3299 }
3300 ranges[i].strict_overflow_p = false;
3301 oe = (*ops)[ranges[*idx].idx];
3302 /* Now change all the other range test immediate uses, so that
3303 those tests will be optimized away. */
3304 if (opcode == ERROR_MARK)
3305 {
3306 if (oe->op)
3307 oe->op = build_int_cst (TREE_TYPE (oe->op),
3308 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3309 else
3310 oe->op = (oe->rank == BIT_IOR_EXPR
3311 ? boolean_false_node : boolean_true_node);
3312 }
3313 else
3314 oe->op = error_mark_node;
3315 ranges[*idx].exp = NULL_TREE;
3316 ranges[*idx].low = NULL_TREE;
3317 ranges[*idx].high = NULL_TREE;
3318 any_changes = true;
3319 }
3320
3321 delete map;
3322 return any_changes;
3323 }
3324
3325 /* Optimize range tests, similarly how fold_range_test optimizes
3326 it on trees. The tree code for the binary
3327 operation between all the operands is OPCODE.
3328 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3329 maybe_optimize_range_tests for inter-bb range optimization.
3330 In that case if oe->op is NULL, oe->id is bb->index whose
3331 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3332 the actual opcode.
3333 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3334
3335 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry * > * ops,basic_block first_bb)3336 optimize_range_tests (enum tree_code opcode,
3337 vec<operand_entry *> *ops, basic_block first_bb)
3338 {
3339 unsigned int length = ops->length (), i, j, first;
3340 operand_entry *oe;
3341 struct range_entry *ranges;
3342 bool any_changes = false;
3343
3344 if (length == 1)
3345 return false;
3346
3347 ranges = XNEWVEC (struct range_entry, length);
3348 for (i = 0; i < length; i++)
3349 {
3350 oe = (*ops)[i];
3351 ranges[i].idx = i;
3352 init_range_entry (ranges + i, oe->op,
3353 oe->op
3354 ? NULL
3355 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3356 /* For | invert it now, we will invert it again before emitting
3357 the optimized expression. */
3358 if (opcode == BIT_IOR_EXPR
3359 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3360 ranges[i].in_p = !ranges[i].in_p;
3361 }
3362
3363 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3364 for (i = 0; i < length; i++)
3365 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3366 break;
3367
3368 /* Try to merge ranges. */
3369 for (first = i; i < length; i++)
3370 {
3371 tree low = ranges[i].low;
3372 tree high = ranges[i].high;
3373 int in_p = ranges[i].in_p;
3374 bool strict_overflow_p = ranges[i].strict_overflow_p;
3375 int update_fail_count = 0;
3376
3377 for (j = i + 1; j < length; j++)
3378 {
3379 if (ranges[i].exp != ranges[j].exp)
3380 break;
3381 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3382 ranges[j].in_p, ranges[j].low, ranges[j].high))
3383 break;
3384 strict_overflow_p |= ranges[j].strict_overflow_p;
3385 }
3386
3387 if (j == i + 1)
3388 continue;
3389
3390 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3391 opcode, ops, ranges[i].exp, NULL, in_p,
3392 low, high, strict_overflow_p))
3393 {
3394 i = j - 1;
3395 any_changes = true;
3396 }
3397 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3398 while update_range_test would fail. */
3399 else if (update_fail_count == 64)
3400 i = j - 1;
3401 else
3402 ++update_fail_count;
3403 }
3404
3405 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3406 ops, ranges);
3407
3408 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3409 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3410 ops, ranges);
3411 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3412 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3413 ops, ranges);
3414 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3415 ops, ranges);
3416 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3417 ranges, first_bb);
3418
3419 if (any_changes && opcode != ERROR_MARK)
3420 {
3421 j = 0;
3422 FOR_EACH_VEC_ELT (*ops, i, oe)
3423 {
3424 if (oe->op == error_mark_node)
3425 continue;
3426 else if (i != j)
3427 (*ops)[j] = oe;
3428 j++;
3429 }
3430 ops->truncate (j);
3431 }
3432
3433 XDELETEVEC (ranges);
3434 return any_changes;
3435 }
3436
3437 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3438 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3439 otherwise the comparison code. */
3440
3441 static tree_code
ovce_extract_ops(tree var,gassign ** rets,bool * reti)3442 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3443 {
3444 if (TREE_CODE (var) != SSA_NAME)
3445 return ERROR_MARK;
3446
3447 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3448 if (stmt == NULL)
3449 return ERROR_MARK;
3450
3451 /* ??? If we start creating more COND_EXPR, we could perform
3452 this same optimization with them. For now, simplify. */
3453 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3454 return ERROR_MARK;
3455
3456 tree cond = gimple_assign_rhs1 (stmt);
3457 tree_code cmp = TREE_CODE (cond);
3458 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3459 return ERROR_MARK;
3460
3461 /* ??? For now, allow only canonical true and false result vectors.
3462 We could expand this to other constants should the need arise,
3463 but at the moment we don't create them. */
3464 tree t = gimple_assign_rhs2 (stmt);
3465 tree f = gimple_assign_rhs3 (stmt);
3466 bool inv;
3467 if (integer_all_onesp (t))
3468 inv = false;
3469 else if (integer_all_onesp (f))
3470 {
3471 cmp = invert_tree_comparison (cmp, false);
3472 inv = true;
3473 }
3474 else
3475 return ERROR_MARK;
3476 if (!integer_zerop (f))
3477 return ERROR_MARK;
3478
3479 /* Success! */
3480 if (rets)
3481 *rets = stmt;
3482 if (reti)
3483 *reti = inv;
3484 return cmp;
3485 }
3486
3487 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3488 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3489
3490 static bool
optimize_vec_cond_expr(tree_code opcode,vec<operand_entry * > * ops)3491 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3492 {
3493 unsigned int length = ops->length (), i, j;
3494 bool any_changes = false;
3495
3496 if (length == 1)
3497 return false;
3498
3499 for (i = 0; i < length; ++i)
3500 {
3501 tree elt0 = (*ops)[i]->op;
3502
3503 gassign *stmt0;
3504 bool invert;
3505 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3506 if (cmp0 == ERROR_MARK)
3507 continue;
3508
3509 for (j = i + 1; j < length; ++j)
3510 {
3511 tree &elt1 = (*ops)[j]->op;
3512
3513 gassign *stmt1;
3514 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3515 if (cmp1 == ERROR_MARK)
3516 continue;
3517
3518 tree cond0 = gimple_assign_rhs1 (stmt0);
3519 tree x0 = TREE_OPERAND (cond0, 0);
3520 tree y0 = TREE_OPERAND (cond0, 1);
3521
3522 tree cond1 = gimple_assign_rhs1 (stmt1);
3523 tree x1 = TREE_OPERAND (cond1, 0);
3524 tree y1 = TREE_OPERAND (cond1, 1);
3525
3526 tree comb;
3527 if (opcode == BIT_AND_EXPR)
3528 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3529 else if (opcode == BIT_IOR_EXPR)
3530 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3531 else
3532 gcc_unreachable ();
3533 if (comb == NULL)
3534 continue;
3535
3536 /* Success! */
3537 if (dump_file && (dump_flags & TDF_DETAILS))
3538 {
3539 fprintf (dump_file, "Transforming ");
3540 print_generic_expr (dump_file, cond0);
3541 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3542 print_generic_expr (dump_file, cond1);
3543 fprintf (dump_file, " into ");
3544 print_generic_expr (dump_file, comb);
3545 fputc ('\n', dump_file);
3546 }
3547
3548 gimple_assign_set_rhs1 (stmt0, comb);
3549 if (invert)
3550 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3551 *gimple_assign_rhs3_ptr (stmt0));
3552 update_stmt (stmt0);
3553
3554 elt1 = error_mark_node;
3555 any_changes = true;
3556 }
3557 }
3558
3559 if (any_changes)
3560 {
3561 operand_entry *oe;
3562 j = 0;
3563 FOR_EACH_VEC_ELT (*ops, i, oe)
3564 {
3565 if (oe->op == error_mark_node)
3566 continue;
3567 else if (i != j)
3568 (*ops)[j] = oe;
3569 j++;
3570 }
3571 ops->truncate (j);
3572 }
3573
3574 return any_changes;
3575 }
3576
3577 /* Return true if STMT is a cast like:
3578 <bb N>:
3579 ...
3580 _123 = (int) _234;
3581
3582 <bb M>:
3583 # _345 = PHI <_123(N), 1(...), 1(...)>
3584 where _234 has bool type, _123 has single use and
3585 bb N has a single successor M. This is commonly used in
3586 the last block of a range test.
3587
3588 Also Return true if STMT is tcc_compare like:
3589 <bb N>:
3590 ...
3591 _234 = a_2(D) == 2;
3592
3593 <bb M>:
3594 # _345 = PHI <_234(N), 1(...), 1(...)>
3595 _346 = (int) _345;
3596 where _234 has booltype, single use and
3597 bb N has a single successor M. This is commonly used in
3598 the last block of a range test. */
3599
3600 static bool
final_range_test_p(gimple * stmt)3601 final_range_test_p (gimple *stmt)
3602 {
3603 basic_block bb, rhs_bb, lhs_bb;
3604 edge e;
3605 tree lhs, rhs;
3606 use_operand_p use_p;
3607 gimple *use_stmt;
3608
3609 if (!gimple_assign_cast_p (stmt)
3610 && (!is_gimple_assign (stmt)
3611 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3612 != tcc_comparison)))
3613 return false;
3614 bb = gimple_bb (stmt);
3615 if (!single_succ_p (bb))
3616 return false;
3617 e = single_succ_edge (bb);
3618 if (e->flags & EDGE_COMPLEX)
3619 return false;
3620
3621 lhs = gimple_assign_lhs (stmt);
3622 rhs = gimple_assign_rhs1 (stmt);
3623 if (gimple_assign_cast_p (stmt)
3624 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3625 || TREE_CODE (rhs) != SSA_NAME
3626 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3627 return false;
3628
3629 if (!gimple_assign_cast_p (stmt)
3630 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3631 return false;
3632
3633 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3634 if (!single_imm_use (lhs, &use_p, &use_stmt))
3635 return false;
3636
3637 if (gimple_code (use_stmt) != GIMPLE_PHI
3638 || gimple_bb (use_stmt) != e->dest)
3639 return false;
3640
3641 /* And that the rhs is defined in the same loop. */
3642 if (gimple_assign_cast_p (stmt))
3643 {
3644 if (TREE_CODE (rhs) != SSA_NAME
3645 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3646 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3647 return false;
3648 }
3649 else
3650 {
3651 if (TREE_CODE (lhs) != SSA_NAME
3652 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3653 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3654 return false;
3655 }
3656
3657 return true;
3658 }
3659
3660 /* Return true if BB is suitable basic block for inter-bb range test
3661 optimization. If BACKWARD is true, BB should be the only predecessor
3662 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3663 or compared with to find a common basic block to which all conditions
3664 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3665 be the only predecessor of BB. */
3666
3667 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool backward)3668 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3669 bool backward)
3670 {
3671 edge_iterator ei, ei2;
3672 edge e, e2;
3673 gimple *stmt;
3674 gphi_iterator gsi;
3675 bool other_edge_seen = false;
3676 bool is_cond;
3677
3678 if (test_bb == bb)
3679 return false;
3680 /* Check last stmt first. */
3681 stmt = last_stmt (bb);
3682 if (stmt == NULL
3683 || (gimple_code (stmt) != GIMPLE_COND
3684 && (backward || !final_range_test_p (stmt)))
3685 || gimple_visited_p (stmt)
3686 || stmt_could_throw_p (stmt)
3687 || *other_bb == bb)
3688 return false;
3689 is_cond = gimple_code (stmt) == GIMPLE_COND;
3690 if (is_cond)
3691 {
3692 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3693 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3694 to *OTHER_BB (if not set yet, try to find it out). */
3695 if (EDGE_COUNT (bb->succs) != 2)
3696 return false;
3697 FOR_EACH_EDGE (e, ei, bb->succs)
3698 {
3699 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3700 return false;
3701 if (e->dest == test_bb)
3702 {
3703 if (backward)
3704 continue;
3705 else
3706 return false;
3707 }
3708 if (e->dest == bb)
3709 return false;
3710 if (*other_bb == NULL)
3711 {
3712 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3713 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3714 return false;
3715 else if (e->dest == e2->dest)
3716 *other_bb = e->dest;
3717 if (*other_bb == NULL)
3718 return false;
3719 }
3720 if (e->dest == *other_bb)
3721 other_edge_seen = true;
3722 else if (backward)
3723 return false;
3724 }
3725 if (*other_bb == NULL || !other_edge_seen)
3726 return false;
3727 }
3728 else if (single_succ (bb) != *other_bb)
3729 return false;
3730
3731 /* Now check all PHIs of *OTHER_BB. */
3732 e = find_edge (bb, *other_bb);
3733 e2 = find_edge (test_bb, *other_bb);
3734 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3735 {
3736 gphi *phi = gsi.phi ();
3737 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3738 corresponding to BB and TEST_BB predecessor must be the same. */
3739 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3740 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3741 {
3742 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3743 one of the PHIs should have the lhs of the last stmt in
3744 that block as PHI arg and that PHI should have 0 or 1
3745 corresponding to it in all other range test basic blocks
3746 considered. */
3747 if (!is_cond)
3748 {
3749 if (gimple_phi_arg_def (phi, e->dest_idx)
3750 == gimple_assign_lhs (stmt)
3751 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3752 || integer_onep (gimple_phi_arg_def (phi,
3753 e2->dest_idx))))
3754 continue;
3755 }
3756 else
3757 {
3758 gimple *test_last = last_stmt (test_bb);
3759 if (gimple_code (test_last) != GIMPLE_COND
3760 && gimple_phi_arg_def (phi, e2->dest_idx)
3761 == gimple_assign_lhs (test_last)
3762 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3763 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3764 continue;
3765 }
3766
3767 return false;
3768 }
3769 }
3770 return true;
3771 }
3772
3773 /* Return true if BB doesn't have side-effects that would disallow
3774 range test optimization, all SSA_NAMEs set in the bb are consumed
3775 in the bb and there are no PHIs. */
3776
3777 static bool
no_side_effect_bb(basic_block bb)3778 no_side_effect_bb (basic_block bb)
3779 {
3780 gimple_stmt_iterator gsi;
3781 gimple *last;
3782
3783 if (!gimple_seq_empty_p (phi_nodes (bb)))
3784 return false;
3785 last = last_stmt (bb);
3786 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3787 {
3788 gimple *stmt = gsi_stmt (gsi);
3789 tree lhs;
3790 imm_use_iterator imm_iter;
3791 use_operand_p use_p;
3792
3793 if (is_gimple_debug (stmt))
3794 continue;
3795 if (gimple_has_side_effects (stmt))
3796 return false;
3797 if (stmt == last)
3798 return true;
3799 if (!is_gimple_assign (stmt))
3800 return false;
3801 lhs = gimple_assign_lhs (stmt);
3802 if (TREE_CODE (lhs) != SSA_NAME)
3803 return false;
3804 if (gimple_assign_rhs_could_trap_p (stmt))
3805 return false;
3806 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3807 {
3808 gimple *use_stmt = USE_STMT (use_p);
3809 if (is_gimple_debug (use_stmt))
3810 continue;
3811 if (gimple_bb (use_stmt) != bb)
3812 return false;
3813 }
3814 }
3815 return false;
3816 }
3817
3818 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3819 return true and fill in *OPS recursively. */
3820
3821 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry * > * ops,struct loop * loop)3822 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3823 struct loop *loop)
3824 {
3825 gimple *stmt = SSA_NAME_DEF_STMT (var);
3826 tree rhs[2];
3827 int i;
3828
3829 if (!is_reassociable_op (stmt, code, loop))
3830 return false;
3831
3832 rhs[0] = gimple_assign_rhs1 (stmt);
3833 rhs[1] = gimple_assign_rhs2 (stmt);
3834 gimple_set_visited (stmt, true);
3835 for (i = 0; i < 2; i++)
3836 if (TREE_CODE (rhs[i]) == SSA_NAME
3837 && !get_ops (rhs[i], code, ops, loop)
3838 && has_single_use (rhs[i]))
3839 {
3840 operand_entry *oe = operand_entry_pool.allocate ();
3841
3842 oe->op = rhs[i];
3843 oe->rank = code;
3844 oe->id = 0;
3845 oe->count = 1;
3846 oe->stmt_to_insert = NULL;
3847 ops->safe_push (oe);
3848 }
3849 return true;
3850 }
3851
3852 /* Find the ops that were added by get_ops starting from VAR, see if
3853 they were changed during update_range_test and if yes, create new
3854 stmts. */
3855
3856 static tree
update_ops(tree var,enum tree_code code,vec<operand_entry * > ops,unsigned int * pidx,struct loop * loop)3857 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3858 unsigned int *pidx, struct loop *loop)
3859 {
3860 gimple *stmt = SSA_NAME_DEF_STMT (var);
3861 tree rhs[4];
3862 int i;
3863
3864 if (!is_reassociable_op (stmt, code, loop))
3865 return NULL;
3866
3867 rhs[0] = gimple_assign_rhs1 (stmt);
3868 rhs[1] = gimple_assign_rhs2 (stmt);
3869 rhs[2] = rhs[0];
3870 rhs[3] = rhs[1];
3871 for (i = 0; i < 2; i++)
3872 if (TREE_CODE (rhs[i]) == SSA_NAME)
3873 {
3874 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3875 if (rhs[2 + i] == NULL_TREE)
3876 {
3877 if (has_single_use (rhs[i]))
3878 rhs[2 + i] = ops[(*pidx)++]->op;
3879 else
3880 rhs[2 + i] = rhs[i];
3881 }
3882 }
3883 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3884 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3885 {
3886 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3887 var = make_ssa_name (TREE_TYPE (var));
3888 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3889 rhs[2], rhs[3]);
3890 gimple_set_uid (g, gimple_uid (stmt));
3891 gimple_set_visited (g, true);
3892 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3893 }
3894 return var;
3895 }
3896
3897 /* Structure to track the initial value passed to get_ops and
3898 the range in the ops vector for each basic block. */
3899
3900 struct inter_bb_range_test_entry
3901 {
3902 tree op;
3903 unsigned int first_idx, last_idx;
3904 };
3905
3906 /* Inter-bb range test optimization.
3907
3908 Returns TRUE if a gimple conditional is optimized to a true/false,
3909 otherwise return FALSE.
3910
3911 This indicates to the caller that it should run a CFG cleanup pass
3912 once reassociation is completed. */
3913
3914 static bool
maybe_optimize_range_tests(gimple * stmt)3915 maybe_optimize_range_tests (gimple *stmt)
3916 {
3917 basic_block first_bb = gimple_bb (stmt);
3918 basic_block last_bb = first_bb;
3919 basic_block other_bb = NULL;
3920 basic_block bb;
3921 edge_iterator ei;
3922 edge e;
3923 auto_vec<operand_entry *> ops;
3924 auto_vec<inter_bb_range_test_entry> bbinfo;
3925 bool any_changes = false;
3926 bool cfg_cleanup_needed = false;
3927
3928 /* Consider only basic blocks that end with GIMPLE_COND or
3929 a cast statement satisfying final_range_test_p. All
3930 but the last bb in the first_bb .. last_bb range
3931 should end with GIMPLE_COND. */
3932 if (gimple_code (stmt) == GIMPLE_COND)
3933 {
3934 if (EDGE_COUNT (first_bb->succs) != 2)
3935 return cfg_cleanup_needed;
3936 }
3937 else if (final_range_test_p (stmt))
3938 other_bb = single_succ (first_bb);
3939 else
3940 return cfg_cleanup_needed;
3941
3942 if (stmt_could_throw_p (stmt))
3943 return cfg_cleanup_needed;
3944
3945 /* As relative ordering of post-dominator sons isn't fixed,
3946 maybe_optimize_range_tests can be called first on any
3947 bb in the range we want to optimize. So, start searching
3948 backwards, if first_bb can be set to a predecessor. */
3949 while (single_pred_p (first_bb))
3950 {
3951 basic_block pred_bb = single_pred (first_bb);
3952 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3953 break;
3954 if (!no_side_effect_bb (first_bb))
3955 break;
3956 first_bb = pred_bb;
3957 }
3958 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3959 Before starting forward search in last_bb successors, find
3960 out the other_bb. */
3961 if (first_bb == last_bb)
3962 {
3963 other_bb = NULL;
3964 /* As non-GIMPLE_COND last stmt always terminates the range,
3965 if forward search didn't discover anything, just give up. */
3966 if (gimple_code (stmt) != GIMPLE_COND)
3967 return cfg_cleanup_needed;
3968 /* Look at both successors. Either it ends with a GIMPLE_COND
3969 and satisfies suitable_cond_bb, or ends with a cast and
3970 other_bb is that cast's successor. */
3971 FOR_EACH_EDGE (e, ei, first_bb->succs)
3972 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3973 || e->dest == first_bb)
3974 return cfg_cleanup_needed;
3975 else if (single_pred_p (e->dest))
3976 {
3977 stmt = last_stmt (e->dest);
3978 if (stmt
3979 && gimple_code (stmt) == GIMPLE_COND
3980 && EDGE_COUNT (e->dest->succs) == 2)
3981 {
3982 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3983 break;
3984 else
3985 other_bb = NULL;
3986 }
3987 else if (stmt
3988 && final_range_test_p (stmt)
3989 && find_edge (first_bb, single_succ (e->dest)))
3990 {
3991 other_bb = single_succ (e->dest);
3992 if (other_bb == first_bb)
3993 other_bb = NULL;
3994 }
3995 }
3996 if (other_bb == NULL)
3997 return cfg_cleanup_needed;
3998 }
3999 /* Now do the forward search, moving last_bb to successor bbs
4000 that aren't other_bb. */
4001 while (EDGE_COUNT (last_bb->succs) == 2)
4002 {
4003 FOR_EACH_EDGE (e, ei, last_bb->succs)
4004 if (e->dest != other_bb)
4005 break;
4006 if (e == NULL)
4007 break;
4008 if (!single_pred_p (e->dest))
4009 break;
4010 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4011 break;
4012 if (!no_side_effect_bb (e->dest))
4013 break;
4014 last_bb = e->dest;
4015 }
4016 if (first_bb == last_bb)
4017 return cfg_cleanup_needed;
4018 /* Here basic blocks first_bb through last_bb's predecessor
4019 end with GIMPLE_COND, all of them have one of the edges to
4020 other_bb and another to another block in the range,
4021 all blocks except first_bb don't have side-effects and
4022 last_bb ends with either GIMPLE_COND, or cast satisfying
4023 final_range_test_p. */
4024 for (bb = last_bb; ; bb = single_pred (bb))
4025 {
4026 enum tree_code code;
4027 tree lhs, rhs;
4028 inter_bb_range_test_entry bb_ent;
4029
4030 bb_ent.op = NULL_TREE;
4031 bb_ent.first_idx = ops.length ();
4032 bb_ent.last_idx = bb_ent.first_idx;
4033 e = find_edge (bb, other_bb);
4034 stmt = last_stmt (bb);
4035 gimple_set_visited (stmt, true);
4036 if (gimple_code (stmt) != GIMPLE_COND)
4037 {
4038 use_operand_p use_p;
4039 gimple *phi;
4040 edge e2;
4041 unsigned int d;
4042
4043 lhs = gimple_assign_lhs (stmt);
4044 rhs = gimple_assign_rhs1 (stmt);
4045 gcc_assert (bb == last_bb);
4046
4047 /* stmt is
4048 _123 = (int) _234;
4049 OR
4050 _234 = a_2(D) == 2;
4051
4052 followed by:
4053 <bb M>:
4054 # _345 = PHI <_123(N), 1(...), 1(...)>
4055
4056 or 0 instead of 1. If it is 0, the _234
4057 range test is anded together with all the
4058 other range tests, if it is 1, it is ored with
4059 them. */
4060 single_imm_use (lhs, &use_p, &phi);
4061 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4062 e2 = find_edge (first_bb, other_bb);
4063 d = e2->dest_idx;
4064 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4065 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4066 code = BIT_AND_EXPR;
4067 else
4068 {
4069 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4070 code = BIT_IOR_EXPR;
4071 }
4072
4073 /* If _234 SSA_NAME_DEF_STMT is
4074 _234 = _567 | _789;
4075 (or &, corresponding to 1/0 in the phi arguments,
4076 push into ops the individual range test arguments
4077 of the bitwise or resp. and, recursively. */
4078 if (TREE_CODE (rhs) == SSA_NAME
4079 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4080 != tcc_comparison)
4081 && !get_ops (rhs, code, &ops,
4082 loop_containing_stmt (stmt))
4083 && has_single_use (rhs))
4084 {
4085 /* Otherwise, push the _234 range test itself. */
4086 operand_entry *oe = operand_entry_pool.allocate ();
4087
4088 oe->op = rhs;
4089 oe->rank = code;
4090 oe->id = 0;
4091 oe->count = 1;
4092 oe->stmt_to_insert = NULL;
4093 ops.safe_push (oe);
4094 bb_ent.last_idx++;
4095 bb_ent.op = rhs;
4096 }
4097 else if (is_gimple_assign (stmt)
4098 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4099 == tcc_comparison)
4100 && !get_ops (lhs, code, &ops,
4101 loop_containing_stmt (stmt))
4102 && has_single_use (lhs))
4103 {
4104 operand_entry *oe = operand_entry_pool.allocate ();
4105 oe->op = lhs;
4106 oe->rank = code;
4107 oe->id = 0;
4108 oe->count = 1;
4109 ops.safe_push (oe);
4110 bb_ent.last_idx++;
4111 bb_ent.op = lhs;
4112 }
4113 else
4114 {
4115 bb_ent.last_idx = ops.length ();
4116 bb_ent.op = rhs;
4117 }
4118 bbinfo.safe_push (bb_ent);
4119 continue;
4120 }
4121 /* Otherwise stmt is GIMPLE_COND. */
4122 code = gimple_cond_code (stmt);
4123 lhs = gimple_cond_lhs (stmt);
4124 rhs = gimple_cond_rhs (stmt);
4125 if (TREE_CODE (lhs) == SSA_NAME
4126 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4127 && ((code != EQ_EXPR && code != NE_EXPR)
4128 || rhs != boolean_false_node
4129 /* Either push into ops the individual bitwise
4130 or resp. and operands, depending on which
4131 edge is other_bb. */
4132 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4133 ^ (code == EQ_EXPR))
4134 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4135 loop_containing_stmt (stmt))))
4136 {
4137 /* Or push the GIMPLE_COND stmt itself. */
4138 operand_entry *oe = operand_entry_pool.allocate ();
4139
4140 oe->op = NULL;
4141 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4142 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4143 /* oe->op = NULL signs that there is no SSA_NAME
4144 for the range test, and oe->id instead is the
4145 basic block number, at which's end the GIMPLE_COND
4146 is. */
4147 oe->id = bb->index;
4148 oe->count = 1;
4149 oe->stmt_to_insert = NULL;
4150 ops.safe_push (oe);
4151 bb_ent.op = NULL;
4152 bb_ent.last_idx++;
4153 }
4154 else if (ops.length () > bb_ent.first_idx)
4155 {
4156 bb_ent.op = lhs;
4157 bb_ent.last_idx = ops.length ();
4158 }
4159 bbinfo.safe_push (bb_ent);
4160 if (bb == first_bb)
4161 break;
4162 }
4163 if (ops.length () > 1)
4164 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4165 if (any_changes)
4166 {
4167 unsigned int idx, max_idx = 0;
4168 /* update_ops relies on has_single_use predicates returning the
4169 same values as it did during get_ops earlier. Additionally it
4170 never removes statements, only adds new ones and it should walk
4171 from the single imm use and check the predicate already before
4172 making those changes.
4173 On the other side, the handling of GIMPLE_COND directly can turn
4174 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4175 it needs to be done in a separate loop afterwards. */
4176 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4177 {
4178 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4179 && bbinfo[idx].op != NULL_TREE)
4180 {
4181 tree new_op;
4182
4183 max_idx = idx;
4184 stmt = last_stmt (bb);
4185 new_op = update_ops (bbinfo[idx].op,
4186 (enum tree_code)
4187 ops[bbinfo[idx].first_idx]->rank,
4188 ops, &bbinfo[idx].first_idx,
4189 loop_containing_stmt (stmt));
4190 if (new_op == NULL_TREE)
4191 {
4192 gcc_assert (bb == last_bb);
4193 new_op = ops[bbinfo[idx].first_idx++]->op;
4194 }
4195 if (bbinfo[idx].op != new_op)
4196 {
4197 imm_use_iterator iter;
4198 use_operand_p use_p;
4199 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4200
4201 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4202 if (is_gimple_debug (use_stmt))
4203 continue;
4204 else if (gimple_code (use_stmt) == GIMPLE_COND
4205 || gimple_code (use_stmt) == GIMPLE_PHI)
4206 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4207 SET_USE (use_p, new_op);
4208 else if ((is_gimple_assign (use_stmt)
4209 && (TREE_CODE_CLASS
4210 (gimple_assign_rhs_code (use_stmt))
4211 == tcc_comparison)))
4212 cast_or_tcc_cmp_stmt = use_stmt;
4213 else if (gimple_assign_cast_p (use_stmt))
4214 cast_or_tcc_cmp_stmt = use_stmt;
4215 else
4216 gcc_unreachable ();
4217
4218 if (cast_or_tcc_cmp_stmt)
4219 {
4220 gcc_assert (bb == last_bb);
4221 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4222 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4223 enum tree_code rhs_code
4224 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4225 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4226 : CONVERT_EXPR;
4227 gassign *g;
4228 if (is_gimple_min_invariant (new_op))
4229 {
4230 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4231 g = gimple_build_assign (new_lhs, new_op);
4232 }
4233 else
4234 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4235 gimple_stmt_iterator gsi
4236 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4237 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4238 gimple_set_visited (g, true);
4239 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4240 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4241 if (is_gimple_debug (use_stmt))
4242 continue;
4243 else if (gimple_code (use_stmt) == GIMPLE_COND
4244 || gimple_code (use_stmt) == GIMPLE_PHI)
4245 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4246 SET_USE (use_p, new_lhs);
4247 else
4248 gcc_unreachable ();
4249 }
4250 }
4251 }
4252 if (bb == first_bb)
4253 break;
4254 }
4255 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4256 {
4257 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4258 && bbinfo[idx].op == NULL_TREE
4259 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4260 {
4261 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4262
4263 if (idx > max_idx)
4264 max_idx = idx;
4265
4266 /* If we collapse the conditional to a true/false
4267 condition, then bubble that knowledge up to our caller. */
4268 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4269 {
4270 gimple_cond_make_false (cond_stmt);
4271 cfg_cleanup_needed = true;
4272 }
4273 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4274 {
4275 gimple_cond_make_true (cond_stmt);
4276 cfg_cleanup_needed = true;
4277 }
4278 else
4279 {
4280 gimple_cond_set_code (cond_stmt, NE_EXPR);
4281 gimple_cond_set_lhs (cond_stmt,
4282 ops[bbinfo[idx].first_idx]->op);
4283 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4284 }
4285 update_stmt (cond_stmt);
4286 }
4287 if (bb == first_bb)
4288 break;
4289 }
4290
4291 /* The above changes could result in basic blocks after the first
4292 modified one, up to and including last_bb, to be executed even if
4293 they would not be in the original program. If the value ranges of
4294 assignment lhs' in those bbs were dependent on the conditions
4295 guarding those basic blocks which now can change, the VRs might
4296 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4297 are only used within the same bb, it should be not a big deal if
4298 we just reset all the VRs in those bbs. See PR68671. */
4299 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4300 reset_flow_sensitive_info_in_bb (bb);
4301 }
4302 return cfg_cleanup_needed;
4303 }
4304
4305 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4306 of STMT in it's operands. This is also known as a "destructive
4307 update" operation. */
4308
4309 static bool
is_phi_for_stmt(gimple * stmt,tree operand)4310 is_phi_for_stmt (gimple *stmt, tree operand)
4311 {
4312 gimple *def_stmt;
4313 gphi *def_phi;
4314 tree lhs;
4315 use_operand_p arg_p;
4316 ssa_op_iter i;
4317
4318 if (TREE_CODE (operand) != SSA_NAME)
4319 return false;
4320
4321 lhs = gimple_assign_lhs (stmt);
4322
4323 def_stmt = SSA_NAME_DEF_STMT (operand);
4324 def_phi = dyn_cast <gphi *> (def_stmt);
4325 if (!def_phi)
4326 return false;
4327
4328 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4329 if (lhs == USE_FROM_PTR (arg_p))
4330 return true;
4331 return false;
4332 }
4333
4334 /* Remove def stmt of VAR if VAR has zero uses and recurse
4335 on rhs1 operand if so. */
4336
4337 static void
remove_visited_stmt_chain(tree var)4338 remove_visited_stmt_chain (tree var)
4339 {
4340 gimple *stmt;
4341 gimple_stmt_iterator gsi;
4342
4343 while (1)
4344 {
4345 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4346 return;
4347 stmt = SSA_NAME_DEF_STMT (var);
4348 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4349 {
4350 var = gimple_assign_rhs1 (stmt);
4351 gsi = gsi_for_stmt (stmt);
4352 reassoc_remove_stmt (&gsi);
4353 release_defs (stmt);
4354 }
4355 else
4356 return;
4357 }
4358 }
4359
4360 /* This function checks three consequtive operands in
4361 passed operands vector OPS starting from OPINDEX and
4362 swaps two operands if it is profitable for binary operation
4363 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4364
4365 We pair ops with the same rank if possible.
4366
4367 The alternative we try is to see if STMT is a destructive
4368 update style statement, which is like:
4369 b = phi (a, ...)
4370 a = c + b;
4371 In that case, we want to use the destructive update form to
4372 expose the possible vectorizer sum reduction opportunity.
4373 In that case, the third operand will be the phi node. This
4374 check is not performed if STMT is null.
4375
4376 We could, of course, try to be better as noted above, and do a
4377 lot of work to try to find these opportunities in >3 operand
4378 cases, but it is unlikely to be worth it. */
4379
4380 static void
swap_ops_for_binary_stmt(vec<operand_entry * > ops,unsigned int opindex,gimple * stmt)4381 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4382 unsigned int opindex, gimple *stmt)
4383 {
4384 operand_entry *oe1, *oe2, *oe3;
4385
4386 oe1 = ops[opindex];
4387 oe2 = ops[opindex + 1];
4388 oe3 = ops[opindex + 2];
4389
4390 if ((oe1->rank == oe2->rank
4391 && oe2->rank != oe3->rank)
4392 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4393 && !is_phi_for_stmt (stmt, oe1->op)
4394 && !is_phi_for_stmt (stmt, oe2->op)))
4395 std::swap (*oe1, *oe3);
4396 else if ((oe1->rank == oe3->rank
4397 && oe2->rank != oe3->rank)
4398 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4399 && !is_phi_for_stmt (stmt, oe1->op)
4400 && !is_phi_for_stmt (stmt, oe3->op)))
4401 std::swap (*oe1, *oe2);
4402 }
4403
4404 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4405 two definitions, otherwise return STMT. */
4406
4407 static inline gimple *
find_insert_point(gimple * stmt,tree rhs1,tree rhs2)4408 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4409 {
4410 if (TREE_CODE (rhs1) == SSA_NAME
4411 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4412 stmt = SSA_NAME_DEF_STMT (rhs1);
4413 if (TREE_CODE (rhs2) == SSA_NAME
4414 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4415 stmt = SSA_NAME_DEF_STMT (rhs2);
4416 return stmt;
4417 }
4418
4419 /* If the stmt that defines operand has to be inserted, insert it
4420 before the use. */
4421 static void
insert_stmt_before_use(gimple * stmt,gimple * stmt_to_insert)4422 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4423 {
4424 gcc_assert (is_gimple_assign (stmt_to_insert));
4425 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4426 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4427 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4428 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4429 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4430
4431 /* If the insert point is not stmt, then insert_point would be
4432 the point where operand rhs1 or rhs2 is defined. In this case,
4433 stmt_to_insert has to be inserted afterwards. This would
4434 only happen when the stmt insertion point is flexible. */
4435 if (stmt == insert_point)
4436 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4437 else
4438 insert_stmt_after (stmt_to_insert, insert_point);
4439 }
4440
4441
4442 /* Recursively rewrite our linearized statements so that the operators
4443 match those in OPS[OPINDEX], putting the computation in rank
4444 order. Return new lhs.
4445 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4446 the current stmt and during recursive invocations.
4447 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4448 recursive invocations. */
4449
4450 static tree
rewrite_expr_tree(gimple * stmt,unsigned int opindex,vec<operand_entry * > ops,bool changed,bool next_changed)4451 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4452 vec<operand_entry *> ops, bool changed, bool next_changed)
4453 {
4454 tree rhs1 = gimple_assign_rhs1 (stmt);
4455 tree rhs2 = gimple_assign_rhs2 (stmt);
4456 tree lhs = gimple_assign_lhs (stmt);
4457 operand_entry *oe;
4458
4459 /* The final recursion case for this function is that you have
4460 exactly two operations left.
4461 If we had exactly one op in the entire list to start with, we
4462 would have never called this function, and the tail recursion
4463 rewrites them one at a time. */
4464 if (opindex + 2 == ops.length ())
4465 {
4466 operand_entry *oe1, *oe2;
4467
4468 oe1 = ops[opindex];
4469 oe2 = ops[opindex + 1];
4470
4471 if (rhs1 != oe1->op || rhs2 != oe2->op)
4472 {
4473 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4474 unsigned int uid = gimple_uid (stmt);
4475
4476 if (dump_file && (dump_flags & TDF_DETAILS))
4477 {
4478 fprintf (dump_file, "Transforming ");
4479 print_gimple_stmt (dump_file, stmt, 0);
4480 }
4481
4482 /* If the stmt that defines operand has to be inserted, insert it
4483 before the use. */
4484 if (oe1->stmt_to_insert)
4485 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4486 if (oe2->stmt_to_insert)
4487 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4488 /* Even when changed is false, reassociation could have e.g. removed
4489 some redundant operations, so unless we are just swapping the
4490 arguments or unless there is no change at all (then we just
4491 return lhs), force creation of a new SSA_NAME. */
4492 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4493 {
4494 gimple *insert_point
4495 = find_insert_point (stmt, oe1->op, oe2->op);
4496 lhs = make_ssa_name (TREE_TYPE (lhs));
4497 stmt
4498 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4499 oe1->op, oe2->op);
4500 gimple_set_uid (stmt, uid);
4501 gimple_set_visited (stmt, true);
4502 if (insert_point == gsi_stmt (gsi))
4503 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4504 else
4505 insert_stmt_after (stmt, insert_point);
4506 }
4507 else
4508 {
4509 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4510 == stmt);
4511 gimple_assign_set_rhs1 (stmt, oe1->op);
4512 gimple_assign_set_rhs2 (stmt, oe2->op);
4513 update_stmt (stmt);
4514 }
4515
4516 if (rhs1 != oe1->op && rhs1 != oe2->op)
4517 remove_visited_stmt_chain (rhs1);
4518
4519 if (dump_file && (dump_flags & TDF_DETAILS))
4520 {
4521 fprintf (dump_file, " into ");
4522 print_gimple_stmt (dump_file, stmt, 0);
4523 }
4524 }
4525 return lhs;
4526 }
4527
4528 /* If we hit here, we should have 3 or more ops left. */
4529 gcc_assert (opindex + 2 < ops.length ());
4530
4531 /* Rewrite the next operator. */
4532 oe = ops[opindex];
4533
4534 /* If the stmt that defines operand has to be inserted, insert it
4535 before the use. */
4536 if (oe->stmt_to_insert)
4537 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4538
4539 /* Recurse on the LHS of the binary operator, which is guaranteed to
4540 be the non-leaf side. */
4541 tree new_rhs1
4542 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4543 changed || oe->op != rhs2 || next_changed,
4544 false);
4545
4546 if (oe->op != rhs2 || new_rhs1 != rhs1)
4547 {
4548 if (dump_file && (dump_flags & TDF_DETAILS))
4549 {
4550 fprintf (dump_file, "Transforming ");
4551 print_gimple_stmt (dump_file, stmt, 0);
4552 }
4553
4554 /* If changed is false, this is either opindex == 0
4555 or all outer rhs2's were equal to corresponding oe->op,
4556 and powi_result is NULL.
4557 That means lhs is equivalent before and after reassociation.
4558 Otherwise ensure the old lhs SSA_NAME is not reused and
4559 create a new stmt as well, so that any debug stmts will be
4560 properly adjusted. */
4561 if (changed)
4562 {
4563 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4564 unsigned int uid = gimple_uid (stmt);
4565 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4566
4567 lhs = make_ssa_name (TREE_TYPE (lhs));
4568 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4569 new_rhs1, oe->op);
4570 gimple_set_uid (stmt, uid);
4571 gimple_set_visited (stmt, true);
4572 if (insert_point == gsi_stmt (gsi))
4573 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4574 else
4575 insert_stmt_after (stmt, insert_point);
4576 }
4577 else
4578 {
4579 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4580 == stmt);
4581 gimple_assign_set_rhs1 (stmt, new_rhs1);
4582 gimple_assign_set_rhs2 (stmt, oe->op);
4583 update_stmt (stmt);
4584 }
4585
4586 if (dump_file && (dump_flags & TDF_DETAILS))
4587 {
4588 fprintf (dump_file, " into ");
4589 print_gimple_stmt (dump_file, stmt, 0);
4590 }
4591 }
4592 return lhs;
4593 }
4594
4595 /* Find out how many cycles we need to compute statements chain.
4596 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4597 maximum number of independent statements we may execute per cycle. */
4598
4599 static int
get_required_cycles(int ops_num,int cpu_width)4600 get_required_cycles (int ops_num, int cpu_width)
4601 {
4602 int res;
4603 int elog;
4604 unsigned int rest;
4605
4606 /* While we have more than 2 * cpu_width operands
4607 we may reduce number of operands by cpu_width
4608 per cycle. */
4609 res = ops_num / (2 * cpu_width);
4610
4611 /* Remained operands count may be reduced twice per cycle
4612 until we have only one operand. */
4613 rest = (unsigned)(ops_num - res * cpu_width);
4614 elog = exact_log2 (rest);
4615 if (elog >= 0)
4616 res += elog;
4617 else
4618 res += floor_log2 (rest) + 1;
4619
4620 return res;
4621 }
4622
4623 /* Returns an optimal number of registers to use for computation of
4624 given statements. */
4625
4626 static int
get_reassociation_width(int ops_num,enum tree_code opc,machine_mode mode)4627 get_reassociation_width (int ops_num, enum tree_code opc,
4628 machine_mode mode)
4629 {
4630 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4631 int width;
4632 int width_min;
4633 int cycles_best;
4634
4635 if (param_width > 0)
4636 width = param_width;
4637 else
4638 width = targetm.sched.reassociation_width (opc, mode);
4639
4640 if (width == 1)
4641 return width;
4642
4643 /* Get the minimal time required for sequence computation. */
4644 cycles_best = get_required_cycles (ops_num, width);
4645
4646 /* Check if we may use less width and still compute sequence for
4647 the same time. It will allow us to reduce registers usage.
4648 get_required_cycles is monotonically increasing with lower width
4649 so we can perform a binary search for the minimal width that still
4650 results in the optimal cycle count. */
4651 width_min = 1;
4652 while (width > width_min)
4653 {
4654 int width_mid = (width + width_min) / 2;
4655
4656 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4657 width = width_mid;
4658 else if (width_min < width_mid)
4659 width_min = width_mid;
4660 else
4661 break;
4662 }
4663
4664 return width;
4665 }
4666
4667 /* Recursively rewrite our linearized statements so that the operators
4668 match those in OPS[OPINDEX], putting the computation in rank
4669 order and trying to allow operations to be executed in
4670 parallel. */
4671
4672 static void
rewrite_expr_tree_parallel(gassign * stmt,int width,vec<operand_entry * > ops)4673 rewrite_expr_tree_parallel (gassign *stmt, int width,
4674 vec<operand_entry *> ops)
4675 {
4676 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4677 int op_num = ops.length ();
4678 gcc_assert (op_num > 0);
4679 int stmt_num = op_num - 1;
4680 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4681 int op_index = op_num - 1;
4682 int stmt_index = 0;
4683 int ready_stmts_end = 0;
4684 int i = 0;
4685 gimple *stmt1 = NULL, *stmt2 = NULL;
4686 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4687
4688 /* We start expression rewriting from the top statements.
4689 So, in this loop we create a full list of statements
4690 we will work with. */
4691 stmts[stmt_num - 1] = stmt;
4692 for (i = stmt_num - 2; i >= 0; i--)
4693 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4694
4695 for (i = 0; i < stmt_num; i++)
4696 {
4697 tree op1, op2;
4698
4699 /* Determine whether we should use results of
4700 already handled statements or not. */
4701 if (ready_stmts_end == 0
4702 && (i - stmt_index >= width || op_index < 1))
4703 ready_stmts_end = i;
4704
4705 /* Now we choose operands for the next statement. Non zero
4706 value in ready_stmts_end means here that we should use
4707 the result of already generated statements as new operand. */
4708 if (ready_stmts_end > 0)
4709 {
4710 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4711 if (ready_stmts_end > stmt_index)
4712 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4713 else if (op_index >= 0)
4714 {
4715 operand_entry *oe = ops[op_index--];
4716 stmt2 = oe->stmt_to_insert;
4717 op2 = oe->op;
4718 }
4719 else
4720 {
4721 gcc_assert (stmt_index < i);
4722 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4723 }
4724
4725 if (stmt_index >= ready_stmts_end)
4726 ready_stmts_end = 0;
4727 }
4728 else
4729 {
4730 if (op_index > 1)
4731 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4732 operand_entry *oe2 = ops[op_index--];
4733 operand_entry *oe1 = ops[op_index--];
4734 op2 = oe2->op;
4735 stmt2 = oe2->stmt_to_insert;
4736 op1 = oe1->op;
4737 stmt1 = oe1->stmt_to_insert;
4738 }
4739
4740 /* If we emit the last statement then we should put
4741 operands into the last statement. It will also
4742 break the loop. */
4743 if (op_index < 0 && stmt_index == i)
4744 i = stmt_num - 1;
4745
4746 if (dump_file && (dump_flags & TDF_DETAILS))
4747 {
4748 fprintf (dump_file, "Transforming ");
4749 print_gimple_stmt (dump_file, stmts[i], 0);
4750 }
4751
4752 /* If the stmt that defines operand has to be inserted, insert it
4753 before the use. */
4754 if (stmt1)
4755 insert_stmt_before_use (stmts[i], stmt1);
4756 if (stmt2)
4757 insert_stmt_before_use (stmts[i], stmt2);
4758 stmt1 = stmt2 = NULL;
4759
4760 /* We keep original statement only for the last one. All
4761 others are recreated. */
4762 if (i == stmt_num - 1)
4763 {
4764 gimple_assign_set_rhs1 (stmts[i], op1);
4765 gimple_assign_set_rhs2 (stmts[i], op2);
4766 update_stmt (stmts[i]);
4767 }
4768 else
4769 {
4770 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4771 }
4772 if (dump_file && (dump_flags & TDF_DETAILS))
4773 {
4774 fprintf (dump_file, " into ");
4775 print_gimple_stmt (dump_file, stmts[i], 0);
4776 }
4777 }
4778
4779 remove_visited_stmt_chain (last_rhs1);
4780 }
4781
4782 /* Transform STMT, which is really (A +B) + (C + D) into the left
4783 linear form, ((A+B)+C)+D.
4784 Recurse on D if necessary. */
4785
4786 static void
linearize_expr(gimple * stmt)4787 linearize_expr (gimple *stmt)
4788 {
4789 gimple_stmt_iterator gsi;
4790 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4791 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4792 gimple *oldbinrhs = binrhs;
4793 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4794 gimple *newbinrhs = NULL;
4795 struct loop *loop = loop_containing_stmt (stmt);
4796 tree lhs = gimple_assign_lhs (stmt);
4797
4798 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4799 && is_reassociable_op (binrhs, rhscode, loop));
4800
4801 gsi = gsi_for_stmt (stmt);
4802
4803 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4804 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4805 gimple_assign_rhs_code (binrhs),
4806 gimple_assign_lhs (binlhs),
4807 gimple_assign_rhs2 (binrhs));
4808 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4809 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4810 gimple_set_uid (binrhs, gimple_uid (stmt));
4811
4812 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4813 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4814
4815 if (dump_file && (dump_flags & TDF_DETAILS))
4816 {
4817 fprintf (dump_file, "Linearized: ");
4818 print_gimple_stmt (dump_file, stmt, 0);
4819 }
4820
4821 reassociate_stats.linearized++;
4822 update_stmt (stmt);
4823
4824 gsi = gsi_for_stmt (oldbinrhs);
4825 reassoc_remove_stmt (&gsi);
4826 release_defs (oldbinrhs);
4827
4828 gimple_set_visited (stmt, true);
4829 gimple_set_visited (binlhs, true);
4830 gimple_set_visited (binrhs, true);
4831
4832 /* Tail recurse on the new rhs if it still needs reassociation. */
4833 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4834 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4835 want to change the algorithm while converting to tuples. */
4836 linearize_expr (stmt);
4837 }
4838
4839 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4840 it. Otherwise, return NULL. */
4841
4842 static gimple *
get_single_immediate_use(tree lhs)4843 get_single_immediate_use (tree lhs)
4844 {
4845 use_operand_p immuse;
4846 gimple *immusestmt;
4847
4848 if (TREE_CODE (lhs) == SSA_NAME
4849 && single_imm_use (lhs, &immuse, &immusestmt)
4850 && is_gimple_assign (immusestmt))
4851 return immusestmt;
4852
4853 return NULL;
4854 }
4855
4856 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4857 representing the negated value. Insertions of any necessary
4858 instructions go before GSI.
4859 This function is recursive in that, if you hand it "a_5" as the
4860 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4861 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4862
4863 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)4864 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4865 {
4866 gimple *negatedefstmt = NULL;
4867 tree resultofnegate;
4868 gimple_stmt_iterator gsi;
4869 unsigned int uid;
4870
4871 /* If we are trying to negate a name, defined by an add, negate the
4872 add operands instead. */
4873 if (TREE_CODE (tonegate) == SSA_NAME)
4874 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4875 if (TREE_CODE (tonegate) == SSA_NAME
4876 && is_gimple_assign (negatedefstmt)
4877 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4878 && has_single_use (gimple_assign_lhs (negatedefstmt))
4879 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4880 {
4881 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4882 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4883 tree lhs = gimple_assign_lhs (negatedefstmt);
4884 gimple *g;
4885
4886 gsi = gsi_for_stmt (negatedefstmt);
4887 rhs1 = negate_value (rhs1, &gsi);
4888
4889 gsi = gsi_for_stmt (negatedefstmt);
4890 rhs2 = negate_value (rhs2, &gsi);
4891
4892 gsi = gsi_for_stmt (negatedefstmt);
4893 lhs = make_ssa_name (TREE_TYPE (lhs));
4894 gimple_set_visited (negatedefstmt, true);
4895 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4896 gimple_set_uid (g, gimple_uid (negatedefstmt));
4897 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4898 return lhs;
4899 }
4900
4901 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4902 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4903 NULL_TREE, true, GSI_SAME_STMT);
4904 gsi = *gsip;
4905 uid = gimple_uid (gsi_stmt (gsi));
4906 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4907 {
4908 gimple *stmt = gsi_stmt (gsi);
4909 if (gimple_uid (stmt) != 0)
4910 break;
4911 gimple_set_uid (stmt, uid);
4912 }
4913 return resultofnegate;
4914 }
4915
4916 /* Return true if we should break up the subtract in STMT into an add
4917 with negate. This is true when we the subtract operands are really
4918 adds, or the subtract itself is used in an add expression. In
4919 either case, breaking up the subtract into an add with negate
4920 exposes the adds to reassociation. */
4921
4922 static bool
should_break_up_subtract(gimple * stmt)4923 should_break_up_subtract (gimple *stmt)
4924 {
4925 tree lhs = gimple_assign_lhs (stmt);
4926 tree binlhs = gimple_assign_rhs1 (stmt);
4927 tree binrhs = gimple_assign_rhs2 (stmt);
4928 gimple *immusestmt;
4929 struct loop *loop = loop_containing_stmt (stmt);
4930
4931 if (TREE_CODE (binlhs) == SSA_NAME
4932 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4933 return true;
4934
4935 if (TREE_CODE (binrhs) == SSA_NAME
4936 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4937 return true;
4938
4939 if (TREE_CODE (lhs) == SSA_NAME
4940 && (immusestmt = get_single_immediate_use (lhs))
4941 && is_gimple_assign (immusestmt)
4942 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4943 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
4944 && gimple_assign_rhs1 (immusestmt) == lhs)
4945 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4946 return true;
4947 return false;
4948 }
4949
4950 /* Transform STMT from A - B into A + -B. */
4951
4952 static void
break_up_subtract(gimple * stmt,gimple_stmt_iterator * gsip)4953 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4954 {
4955 tree rhs1 = gimple_assign_rhs1 (stmt);
4956 tree rhs2 = gimple_assign_rhs2 (stmt);
4957
4958 if (dump_file && (dump_flags & TDF_DETAILS))
4959 {
4960 fprintf (dump_file, "Breaking up subtract ");
4961 print_gimple_stmt (dump_file, stmt, 0);
4962 }
4963
4964 rhs2 = negate_value (rhs2, gsip);
4965 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4966 update_stmt (stmt);
4967 }
4968
4969 /* Determine whether STMT is a builtin call that raises an SSA name
4970 to an integer power and has only one use. If so, and this is early
4971 reassociation and unsafe math optimizations are permitted, place
4972 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4973 If any of these conditions does not hold, return FALSE. */
4974
4975 static bool
acceptable_pow_call(gcall * stmt,tree * base,HOST_WIDE_INT * exponent)4976 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4977 {
4978 tree arg1;
4979 REAL_VALUE_TYPE c, cint;
4980
4981 switch (gimple_call_combined_fn (stmt))
4982 {
4983 CASE_CFN_POW:
4984 if (flag_errno_math)
4985 return false;
4986
4987 *base = gimple_call_arg (stmt, 0);
4988 arg1 = gimple_call_arg (stmt, 1);
4989
4990 if (TREE_CODE (arg1) != REAL_CST)
4991 return false;
4992
4993 c = TREE_REAL_CST (arg1);
4994
4995 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4996 return false;
4997
4998 *exponent = real_to_integer (&c);
4999 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5000 if (!real_identical (&c, &cint))
5001 return false;
5002
5003 break;
5004
5005 CASE_CFN_POWI:
5006 *base = gimple_call_arg (stmt, 0);
5007 arg1 = gimple_call_arg (stmt, 1);
5008
5009 if (!tree_fits_shwi_p (arg1))
5010 return false;
5011
5012 *exponent = tree_to_shwi (arg1);
5013 break;
5014
5015 default:
5016 return false;
5017 }
5018
5019 /* Expanding negative exponents is generally unproductive, so we don't
5020 complicate matters with those. Exponents of zero and one should
5021 have been handled by expression folding. */
5022 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5023 return false;
5024
5025 return true;
5026 }
5027
5028 /* Try to derive and add operand entry for OP to *OPS. Return false if
5029 unsuccessful. */
5030
5031 static bool
try_special_add_to_ops(vec<operand_entry * > * ops,enum tree_code code,tree op,gimple * def_stmt)5032 try_special_add_to_ops (vec<operand_entry *> *ops,
5033 enum tree_code code,
5034 tree op, gimple* def_stmt)
5035 {
5036 tree base = NULL_TREE;
5037 HOST_WIDE_INT exponent = 0;
5038
5039 if (TREE_CODE (op) != SSA_NAME
5040 || ! has_single_use (op))
5041 return false;
5042
5043 if (code == MULT_EXPR
5044 && reassoc_insert_powi_p
5045 && flag_unsafe_math_optimizations
5046 && is_gimple_call (def_stmt)
5047 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5048 {
5049 add_repeat_to_ops_vec (ops, base, exponent);
5050 gimple_set_visited (def_stmt, true);
5051 return true;
5052 }
5053 else if (code == MULT_EXPR
5054 && is_gimple_assign (def_stmt)
5055 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5056 && !HONOR_SNANS (TREE_TYPE (op))
5057 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5058 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5059 {
5060 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5061 tree cst = build_minus_one_cst (TREE_TYPE (op));
5062 add_to_ops_vec (ops, rhs1);
5063 add_to_ops_vec (ops, cst);
5064 gimple_set_visited (def_stmt, true);
5065 return true;
5066 }
5067
5068 return false;
5069 }
5070
5071 /* Recursively linearize a binary expression that is the RHS of STMT.
5072 Place the operands of the expression tree in the vector named OPS. */
5073
5074 static void
linearize_expr_tree(vec<operand_entry * > * ops,gimple * stmt,bool is_associative,bool set_visited)5075 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5076 bool is_associative, bool set_visited)
5077 {
5078 tree binlhs = gimple_assign_rhs1 (stmt);
5079 tree binrhs = gimple_assign_rhs2 (stmt);
5080 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5081 bool binlhsisreassoc = false;
5082 bool binrhsisreassoc = false;
5083 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5084 struct loop *loop = loop_containing_stmt (stmt);
5085
5086 if (set_visited)
5087 gimple_set_visited (stmt, true);
5088
5089 if (TREE_CODE (binlhs) == SSA_NAME)
5090 {
5091 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5092 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5093 && !stmt_could_throw_p (binlhsdef));
5094 }
5095
5096 if (TREE_CODE (binrhs) == SSA_NAME)
5097 {
5098 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5099 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5100 && !stmt_could_throw_p (binrhsdef));
5101 }
5102
5103 /* If the LHS is not reassociable, but the RHS is, we need to swap
5104 them. If neither is reassociable, there is nothing we can do, so
5105 just put them in the ops vector. If the LHS is reassociable,
5106 linearize it. If both are reassociable, then linearize the RHS
5107 and the LHS. */
5108
5109 if (!binlhsisreassoc)
5110 {
5111 /* If this is not a associative operation like division, give up. */
5112 if (!is_associative)
5113 {
5114 add_to_ops_vec (ops, binrhs);
5115 return;
5116 }
5117
5118 if (!binrhsisreassoc)
5119 {
5120 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5121 add_to_ops_vec (ops, binrhs);
5122
5123 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5124 add_to_ops_vec (ops, binlhs);
5125
5126 return;
5127 }
5128
5129 if (dump_file && (dump_flags & TDF_DETAILS))
5130 {
5131 fprintf (dump_file, "swapping operands of ");
5132 print_gimple_stmt (dump_file, stmt, 0);
5133 }
5134
5135 swap_ssa_operands (stmt,
5136 gimple_assign_rhs1_ptr (stmt),
5137 gimple_assign_rhs2_ptr (stmt));
5138 update_stmt (stmt);
5139
5140 if (dump_file && (dump_flags & TDF_DETAILS))
5141 {
5142 fprintf (dump_file, " is now ");
5143 print_gimple_stmt (dump_file, stmt, 0);
5144 }
5145
5146 /* We want to make it so the lhs is always the reassociative op,
5147 so swap. */
5148 std::swap (binlhs, binrhs);
5149 }
5150 else if (binrhsisreassoc)
5151 {
5152 linearize_expr (stmt);
5153 binlhs = gimple_assign_rhs1 (stmt);
5154 binrhs = gimple_assign_rhs2 (stmt);
5155 }
5156
5157 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5158 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5159 rhscode, loop));
5160 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5161 is_associative, set_visited);
5162
5163 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5164 add_to_ops_vec (ops, binrhs);
5165 }
5166
5167 /* Repropagate the negates back into subtracts, since no other pass
5168 currently does it. */
5169
5170 static void
repropagate_negates(void)5171 repropagate_negates (void)
5172 {
5173 unsigned int i = 0;
5174 tree negate;
5175
5176 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5177 {
5178 gimple *user = get_single_immediate_use (negate);
5179
5180 if (!user || !is_gimple_assign (user))
5181 continue;
5182
5183 /* The negate operand can be either operand of a PLUS_EXPR
5184 (it can be the LHS if the RHS is a constant for example).
5185
5186 Force the negate operand to the RHS of the PLUS_EXPR, then
5187 transform the PLUS_EXPR into a MINUS_EXPR. */
5188 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5189 {
5190 /* If the negated operand appears on the LHS of the
5191 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5192 to force the negated operand to the RHS of the PLUS_EXPR. */
5193 if (gimple_assign_rhs1 (user) == negate)
5194 {
5195 swap_ssa_operands (user,
5196 gimple_assign_rhs1_ptr (user),
5197 gimple_assign_rhs2_ptr (user));
5198 }
5199
5200 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5201 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5202 if (gimple_assign_rhs2 (user) == negate)
5203 {
5204 tree rhs1 = gimple_assign_rhs1 (user);
5205 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5206 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5207 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5208 update_stmt (user);
5209 }
5210 }
5211 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5212 {
5213 if (gimple_assign_rhs1 (user) == negate)
5214 {
5215 /* We have
5216 x = -a
5217 y = x - b
5218 which we transform into
5219 x = a + b
5220 y = -x .
5221 This pushes down the negate which we possibly can merge
5222 into some other operation, hence insert it into the
5223 plus_negates vector. */
5224 gimple *feed = SSA_NAME_DEF_STMT (negate);
5225 tree a = gimple_assign_rhs1 (feed);
5226 tree b = gimple_assign_rhs2 (user);
5227 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5228 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5229 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5230 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5231 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5232 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5233 user = gsi_stmt (gsi2);
5234 update_stmt (user);
5235 reassoc_remove_stmt (&gsi);
5236 release_defs (feed);
5237 plus_negates.safe_push (gimple_assign_lhs (user));
5238 }
5239 else
5240 {
5241 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5242 rid of one operation. */
5243 gimple *feed = SSA_NAME_DEF_STMT (negate);
5244 tree a = gimple_assign_rhs1 (feed);
5245 tree rhs1 = gimple_assign_rhs1 (user);
5246 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5247 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5248 update_stmt (gsi_stmt (gsi));
5249 }
5250 }
5251 }
5252 }
5253
5254 /* Returns true if OP is of a type for which we can do reassociation.
5255 That is for integral or non-saturating fixed-point types, and for
5256 floating point type when associative-math is enabled. */
5257
5258 static bool
can_reassociate_p(tree op)5259 can_reassociate_p (tree op)
5260 {
5261 tree type = TREE_TYPE (op);
5262 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5263 return false;
5264 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5265 || NON_SAT_FIXED_POINT_TYPE_P (type)
5266 || (flag_associative_math && FLOAT_TYPE_P (type)))
5267 return true;
5268 return false;
5269 }
5270
5271 /* Break up subtract operations in block BB.
5272
5273 We do this top down because we don't know whether the subtract is
5274 part of a possible chain of reassociation except at the top.
5275
5276 IE given
5277 d = f + g
5278 c = a + e
5279 b = c - d
5280 q = b - r
5281 k = t - q
5282
5283 we want to break up k = t - q, but we won't until we've transformed q
5284 = b - r, which won't be broken up until we transform b = c - d.
5285
5286 En passant, clear the GIMPLE visited flag on every statement
5287 and set UIDs within each basic block. */
5288
5289 static void
break_up_subtract_bb(basic_block bb)5290 break_up_subtract_bb (basic_block bb)
5291 {
5292 gimple_stmt_iterator gsi;
5293 basic_block son;
5294 unsigned int uid = 1;
5295
5296 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5297 {
5298 gimple *stmt = gsi_stmt (gsi);
5299 gimple_set_visited (stmt, false);
5300 gimple_set_uid (stmt, uid++);
5301
5302 if (!is_gimple_assign (stmt)
5303 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5304 continue;
5305
5306 /* Look for simple gimple subtract operations. */
5307 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5308 {
5309 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5310 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5311 continue;
5312
5313 /* Check for a subtract used only in an addition. If this
5314 is the case, transform it into add of a negate for better
5315 reassociation. IE transform C = A-B into C = A + -B if C
5316 is only used in an addition. */
5317 if (should_break_up_subtract (stmt))
5318 break_up_subtract (stmt, &gsi);
5319 }
5320 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5321 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5322 plus_negates.safe_push (gimple_assign_lhs (stmt));
5323 }
5324 for (son = first_dom_son (CDI_DOMINATORS, bb);
5325 son;
5326 son = next_dom_son (CDI_DOMINATORS, son))
5327 break_up_subtract_bb (son);
5328 }
5329
5330 /* Used for repeated factor analysis. */
5331 struct repeat_factor
5332 {
5333 /* An SSA name that occurs in a multiply chain. */
5334 tree factor;
5335
5336 /* Cached rank of the factor. */
5337 unsigned rank;
5338
5339 /* Number of occurrences of the factor in the chain. */
5340 HOST_WIDE_INT count;
5341
5342 /* An SSA name representing the product of this factor and
5343 all factors appearing later in the repeated factor vector. */
5344 tree repr;
5345 };
5346
5347
5348 static vec<repeat_factor> repeat_factor_vec;
5349
5350 /* Used for sorting the repeat factor vector. Sort primarily by
5351 ascending occurrence count, secondarily by descending rank. */
5352
5353 static int
compare_repeat_factors(const void * x1,const void * x2)5354 compare_repeat_factors (const void *x1, const void *x2)
5355 {
5356 const repeat_factor *rf1 = (const repeat_factor *) x1;
5357 const repeat_factor *rf2 = (const repeat_factor *) x2;
5358
5359 if (rf1->count != rf2->count)
5360 return rf1->count - rf2->count;
5361
5362 return rf2->rank - rf1->rank;
5363 }
5364
5365 /* Look for repeated operands in OPS in the multiply tree rooted at
5366 STMT. Replace them with an optimal sequence of multiplies and powi
5367 builtin calls, and remove the used operands from OPS. Return an
5368 SSA name representing the value of the replacement sequence. */
5369
5370 static tree
attempt_builtin_powi(gimple * stmt,vec<operand_entry * > * ops)5371 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5372 {
5373 unsigned i, j, vec_len;
5374 int ii;
5375 operand_entry *oe;
5376 repeat_factor *rf1, *rf2;
5377 repeat_factor rfnew;
5378 tree result = NULL_TREE;
5379 tree target_ssa, iter_result;
5380 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5381 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5382 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5383 gimple *mul_stmt, *pow_stmt;
5384
5385 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5386 target. */
5387 if (!powi_fndecl)
5388 return NULL_TREE;
5389
5390 /* Allocate the repeated factor vector. */
5391 repeat_factor_vec.create (10);
5392
5393 /* Scan the OPS vector for all SSA names in the product and build
5394 up a vector of occurrence counts for each factor. */
5395 FOR_EACH_VEC_ELT (*ops, i, oe)
5396 {
5397 if (TREE_CODE (oe->op) == SSA_NAME)
5398 {
5399 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5400 {
5401 if (rf1->factor == oe->op)
5402 {
5403 rf1->count += oe->count;
5404 break;
5405 }
5406 }
5407
5408 if (j >= repeat_factor_vec.length ())
5409 {
5410 rfnew.factor = oe->op;
5411 rfnew.rank = oe->rank;
5412 rfnew.count = oe->count;
5413 rfnew.repr = NULL_TREE;
5414 repeat_factor_vec.safe_push (rfnew);
5415 }
5416 }
5417 }
5418
5419 /* Sort the repeated factor vector by (a) increasing occurrence count,
5420 and (b) decreasing rank. */
5421 repeat_factor_vec.qsort (compare_repeat_factors);
5422
5423 /* It is generally best to combine as many base factors as possible
5424 into a product before applying __builtin_powi to the result.
5425 However, the sort order chosen for the repeated factor vector
5426 allows us to cache partial results for the product of the base
5427 factors for subsequent use. When we already have a cached partial
5428 result from a previous iteration, it is best to make use of it
5429 before looking for another __builtin_pow opportunity.
5430
5431 As an example, consider x * x * y * y * y * z * z * z * z.
5432 We want to first compose the product x * y * z, raise it to the
5433 second power, then multiply this by y * z, and finally multiply
5434 by z. This can be done in 5 multiplies provided we cache y * z
5435 for use in both expressions:
5436
5437 t1 = y * z
5438 t2 = t1 * x
5439 t3 = t2 * t2
5440 t4 = t1 * t3
5441 result = t4 * z
5442
5443 If we instead ignored the cached y * z and first multiplied by
5444 the __builtin_pow opportunity z * z, we would get the inferior:
5445
5446 t1 = y * z
5447 t2 = t1 * x
5448 t3 = t2 * t2
5449 t4 = z * z
5450 t5 = t3 * t4
5451 result = t5 * y */
5452
5453 vec_len = repeat_factor_vec.length ();
5454
5455 /* Repeatedly look for opportunities to create a builtin_powi call. */
5456 while (true)
5457 {
5458 HOST_WIDE_INT power;
5459
5460 /* First look for the largest cached product of factors from
5461 preceding iterations. If found, create a builtin_powi for
5462 it if the minimum occurrence count for its factors is at
5463 least 2, or just use this cached product as our next
5464 multiplicand if the minimum occurrence count is 1. */
5465 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5466 {
5467 if (rf1->repr && rf1->count > 0)
5468 break;
5469 }
5470
5471 if (j < vec_len)
5472 {
5473 power = rf1->count;
5474
5475 if (power == 1)
5476 {
5477 iter_result = rf1->repr;
5478
5479 if (dump_file && (dump_flags & TDF_DETAILS))
5480 {
5481 unsigned elt;
5482 repeat_factor *rf;
5483 fputs ("Multiplying by cached product ", dump_file);
5484 for (elt = j; elt < vec_len; elt++)
5485 {
5486 rf = &repeat_factor_vec[elt];
5487 print_generic_expr (dump_file, rf->factor);
5488 if (elt < vec_len - 1)
5489 fputs (" * ", dump_file);
5490 }
5491 fputs ("\n", dump_file);
5492 }
5493 }
5494 else
5495 {
5496 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5497 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5498 build_int_cst (integer_type_node,
5499 power));
5500 gimple_call_set_lhs (pow_stmt, iter_result);
5501 gimple_set_location (pow_stmt, gimple_location (stmt));
5502 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5503 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5504
5505 if (dump_file && (dump_flags & TDF_DETAILS))
5506 {
5507 unsigned elt;
5508 repeat_factor *rf;
5509 fputs ("Building __builtin_pow call for cached product (",
5510 dump_file);
5511 for (elt = j; elt < vec_len; elt++)
5512 {
5513 rf = &repeat_factor_vec[elt];
5514 print_generic_expr (dump_file, rf->factor);
5515 if (elt < vec_len - 1)
5516 fputs (" * ", dump_file);
5517 }
5518 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5519 power);
5520 }
5521 }
5522 }
5523 else
5524 {
5525 /* Otherwise, find the first factor in the repeated factor
5526 vector whose occurrence count is at least 2. If no such
5527 factor exists, there are no builtin_powi opportunities
5528 remaining. */
5529 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5530 {
5531 if (rf1->count >= 2)
5532 break;
5533 }
5534
5535 if (j >= vec_len)
5536 break;
5537
5538 power = rf1->count;
5539
5540 if (dump_file && (dump_flags & TDF_DETAILS))
5541 {
5542 unsigned elt;
5543 repeat_factor *rf;
5544 fputs ("Building __builtin_pow call for (", dump_file);
5545 for (elt = j; elt < vec_len; elt++)
5546 {
5547 rf = &repeat_factor_vec[elt];
5548 print_generic_expr (dump_file, rf->factor);
5549 if (elt < vec_len - 1)
5550 fputs (" * ", dump_file);
5551 }
5552 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5553 }
5554
5555 reassociate_stats.pows_created++;
5556
5557 /* Visit each element of the vector in reverse order (so that
5558 high-occurrence elements are visited first, and within the
5559 same occurrence count, lower-ranked elements are visited
5560 first). Form a linear product of all elements in this order
5561 whose occurrencce count is at least that of element J.
5562 Record the SSA name representing the product of each element
5563 with all subsequent elements in the vector. */
5564 if (j == vec_len - 1)
5565 rf1->repr = rf1->factor;
5566 else
5567 {
5568 for (ii = vec_len - 2; ii >= (int)j; ii--)
5569 {
5570 tree op1, op2;
5571
5572 rf1 = &repeat_factor_vec[ii];
5573 rf2 = &repeat_factor_vec[ii + 1];
5574
5575 /* Init the last factor's representative to be itself. */
5576 if (!rf2->repr)
5577 rf2->repr = rf2->factor;
5578
5579 op1 = rf1->factor;
5580 op2 = rf2->repr;
5581
5582 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5583 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5584 op1, op2);
5585 gimple_set_location (mul_stmt, gimple_location (stmt));
5586 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5587 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5588 rf1->repr = target_ssa;
5589
5590 /* Don't reprocess the multiply we just introduced. */
5591 gimple_set_visited (mul_stmt, true);
5592 }
5593 }
5594
5595 /* Form a call to __builtin_powi for the maximum product
5596 just formed, raised to the power obtained earlier. */
5597 rf1 = &repeat_factor_vec[j];
5598 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5599 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5600 build_int_cst (integer_type_node,
5601 power));
5602 gimple_call_set_lhs (pow_stmt, iter_result);
5603 gimple_set_location (pow_stmt, gimple_location (stmt));
5604 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5605 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5606 }
5607
5608 /* If we previously formed at least one other builtin_powi call,
5609 form the product of this one and those others. */
5610 if (result)
5611 {
5612 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5613 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5614 result, iter_result);
5615 gimple_set_location (mul_stmt, gimple_location (stmt));
5616 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5617 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5618 gimple_set_visited (mul_stmt, true);
5619 result = new_result;
5620 }
5621 else
5622 result = iter_result;
5623
5624 /* Decrement the occurrence count of each element in the product
5625 by the count found above, and remove this many copies of each
5626 factor from OPS. */
5627 for (i = j; i < vec_len; i++)
5628 {
5629 unsigned k = power;
5630 unsigned n;
5631
5632 rf1 = &repeat_factor_vec[i];
5633 rf1->count -= power;
5634
5635 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5636 {
5637 if (oe->op == rf1->factor)
5638 {
5639 if (oe->count <= k)
5640 {
5641 ops->ordered_remove (n);
5642 k -= oe->count;
5643
5644 if (k == 0)
5645 break;
5646 }
5647 else
5648 {
5649 oe->count -= k;
5650 break;
5651 }
5652 }
5653 }
5654 }
5655 }
5656
5657 /* At this point all elements in the repeated factor vector have a
5658 remaining occurrence count of 0 or 1, and those with a count of 1
5659 don't have cached representatives. Re-sort the ops vector and
5660 clean up. */
5661 ops->qsort (sort_by_operand_rank);
5662 repeat_factor_vec.release ();
5663
5664 /* Return the final product computed herein. Note that there may
5665 still be some elements with single occurrence count left in OPS;
5666 those will be handled by the normal reassociation logic. */
5667 return result;
5668 }
5669
5670 /* Attempt to optimize
5671 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5672 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5673
5674 static void
attempt_builtin_copysign(vec<operand_entry * > * ops)5675 attempt_builtin_copysign (vec<operand_entry *> *ops)
5676 {
5677 operand_entry *oe;
5678 unsigned int i;
5679 unsigned int length = ops->length ();
5680 tree cst = ops->last ()->op;
5681
5682 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5683 return;
5684
5685 FOR_EACH_VEC_ELT (*ops, i, oe)
5686 {
5687 if (TREE_CODE (oe->op) == SSA_NAME
5688 && has_single_use (oe->op))
5689 {
5690 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5691 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5692 {
5693 tree arg0, arg1;
5694 switch (gimple_call_combined_fn (old_call))
5695 {
5696 CASE_CFN_COPYSIGN:
5697 CASE_CFN_COPYSIGN_FN:
5698 arg0 = gimple_call_arg (old_call, 0);
5699 arg1 = gimple_call_arg (old_call, 1);
5700 /* The first argument of copysign must be a constant,
5701 otherwise there's nothing to do. */
5702 if (TREE_CODE (arg0) == REAL_CST)
5703 {
5704 tree type = TREE_TYPE (arg0);
5705 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5706 /* If we couldn't fold to a single constant, skip it.
5707 That happens e.g. for inexact multiplication when
5708 -frounding-math. */
5709 if (mul == NULL_TREE)
5710 break;
5711 /* Instead of adjusting OLD_CALL, let's build a new
5712 call to not leak the LHS and prevent keeping bogus
5713 debug statements. DCE will clean up the old call. */
5714 gcall *new_call;
5715 if (gimple_call_internal_p (old_call))
5716 new_call = gimple_build_call_internal
5717 (IFN_COPYSIGN, 2, mul, arg1);
5718 else
5719 new_call = gimple_build_call
5720 (gimple_call_fndecl (old_call), 2, mul, arg1);
5721 tree lhs = make_ssa_name (type);
5722 gimple_call_set_lhs (new_call, lhs);
5723 gimple_set_location (new_call,
5724 gimple_location (old_call));
5725 insert_stmt_after (new_call, old_call);
5726 /* We've used the constant, get rid of it. */
5727 ops->pop ();
5728 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5729 /* Handle the CST1 < 0 case by negating the result. */
5730 if (cst1_neg)
5731 {
5732 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5733 gimple *negate_stmt
5734 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5735 insert_stmt_after (negate_stmt, new_call);
5736 oe->op = negrhs;
5737 }
5738 else
5739 oe->op = lhs;
5740 if (dump_file && (dump_flags & TDF_DETAILS))
5741 {
5742 fprintf (dump_file, "Optimizing copysign: ");
5743 print_generic_expr (dump_file, cst);
5744 fprintf (dump_file, " * COPYSIGN (");
5745 print_generic_expr (dump_file, arg0);
5746 fprintf (dump_file, ", ");
5747 print_generic_expr (dump_file, arg1);
5748 fprintf (dump_file, ") into %sCOPYSIGN (",
5749 cst1_neg ? "-" : "");
5750 print_generic_expr (dump_file, mul);
5751 fprintf (dump_file, ", ");
5752 print_generic_expr (dump_file, arg1);
5753 fprintf (dump_file, "\n");
5754 }
5755 return;
5756 }
5757 break;
5758 default:
5759 break;
5760 }
5761 }
5762 }
5763 }
5764 }
5765
5766 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5767
5768 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple * stmt,tree new_rhs)5769 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5770 {
5771 tree rhs1;
5772
5773 if (dump_file && (dump_flags & TDF_DETAILS))
5774 {
5775 fprintf (dump_file, "Transforming ");
5776 print_gimple_stmt (dump_file, stmt, 0);
5777 }
5778
5779 rhs1 = gimple_assign_rhs1 (stmt);
5780 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5781 update_stmt (stmt);
5782 remove_visited_stmt_chain (rhs1);
5783
5784 if (dump_file && (dump_flags & TDF_DETAILS))
5785 {
5786 fprintf (dump_file, " into ");
5787 print_gimple_stmt (dump_file, stmt, 0);
5788 }
5789 }
5790
5791 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5792
5793 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple * stmt,tree rhs1,tree rhs2)5794 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5795 tree rhs1, tree rhs2)
5796 {
5797 if (dump_file && (dump_flags & TDF_DETAILS))
5798 {
5799 fprintf (dump_file, "Transforming ");
5800 print_gimple_stmt (dump_file, stmt, 0);
5801 }
5802
5803 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5804 update_stmt (gsi_stmt (*gsi));
5805 remove_visited_stmt_chain (rhs1);
5806
5807 if (dump_file && (dump_flags & TDF_DETAILS))
5808 {
5809 fprintf (dump_file, " into ");
5810 print_gimple_stmt (dump_file, stmt, 0);
5811 }
5812 }
5813
5814 /* Reassociate expressions in basic block BB and its post-dominator as
5815 children.
5816
5817 Bubble up return status from maybe_optimize_range_tests. */
5818
5819 static bool
reassociate_bb(basic_block bb)5820 reassociate_bb (basic_block bb)
5821 {
5822 gimple_stmt_iterator gsi;
5823 basic_block son;
5824 gimple *stmt = last_stmt (bb);
5825 bool cfg_cleanup_needed = false;
5826
5827 if (stmt && !gimple_visited_p (stmt))
5828 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5829
5830 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5831 {
5832 stmt = gsi_stmt (gsi);
5833
5834 if (is_gimple_assign (stmt)
5835 && !stmt_could_throw_p (stmt))
5836 {
5837 tree lhs, rhs1, rhs2;
5838 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5839
5840 /* If this is not a gimple binary expression, there is
5841 nothing for us to do with it. */
5842 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5843 continue;
5844
5845 /* If this was part of an already processed statement,
5846 we don't need to touch it again. */
5847 if (gimple_visited_p (stmt))
5848 {
5849 /* This statement might have become dead because of previous
5850 reassociations. */
5851 if (has_zero_uses (gimple_get_lhs (stmt)))
5852 {
5853 reassoc_remove_stmt (&gsi);
5854 release_defs (stmt);
5855 /* We might end up removing the last stmt above which
5856 places the iterator to the end of the sequence.
5857 Reset it to the last stmt in this case which might
5858 be the end of the sequence as well if we removed
5859 the last statement of the sequence. In which case
5860 we need to bail out. */
5861 if (gsi_end_p (gsi))
5862 {
5863 gsi = gsi_last_bb (bb);
5864 if (gsi_end_p (gsi))
5865 break;
5866 }
5867 }
5868 continue;
5869 }
5870
5871 lhs = gimple_assign_lhs (stmt);
5872 rhs1 = gimple_assign_rhs1 (stmt);
5873 rhs2 = gimple_assign_rhs2 (stmt);
5874
5875 /* For non-bit or min/max operations we can't associate
5876 all types. Verify that here. */
5877 if (rhs_code != BIT_IOR_EXPR
5878 && rhs_code != BIT_AND_EXPR
5879 && rhs_code != BIT_XOR_EXPR
5880 && rhs_code != MIN_EXPR
5881 && rhs_code != MAX_EXPR
5882 && (!can_reassociate_p (lhs)
5883 || !can_reassociate_p (rhs1)
5884 || !can_reassociate_p (rhs2)))
5885 continue;
5886
5887 if (associative_tree_code (rhs_code))
5888 {
5889 auto_vec<operand_entry *> ops;
5890 tree powi_result = NULL_TREE;
5891 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5892
5893 /* There may be no immediate uses left by the time we
5894 get here because we may have eliminated them all. */
5895 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5896 continue;
5897
5898 gimple_set_visited (stmt, true);
5899 linearize_expr_tree (&ops, stmt, true, true);
5900 ops.qsort (sort_by_operand_rank);
5901 int orig_len = ops.length ();
5902 optimize_ops_list (rhs_code, &ops);
5903 if (undistribute_ops_list (rhs_code, &ops,
5904 loop_containing_stmt (stmt)))
5905 {
5906 ops.qsort (sort_by_operand_rank);
5907 optimize_ops_list (rhs_code, &ops);
5908 }
5909
5910 if (rhs_code == PLUS_EXPR
5911 && transform_add_to_multiply (&ops))
5912 ops.qsort (sort_by_operand_rank);
5913
5914 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5915 {
5916 if (is_vector)
5917 optimize_vec_cond_expr (rhs_code, &ops);
5918 else
5919 optimize_range_tests (rhs_code, &ops, NULL);
5920 }
5921
5922 if (rhs_code == MULT_EXPR && !is_vector)
5923 {
5924 attempt_builtin_copysign (&ops);
5925
5926 if (reassoc_insert_powi_p
5927 && flag_unsafe_math_optimizations)
5928 powi_result = attempt_builtin_powi (stmt, &ops);
5929 }
5930
5931 operand_entry *last;
5932 bool negate_result = false;
5933 if (ops.length () > 1
5934 && rhs_code == MULT_EXPR)
5935 {
5936 last = ops.last ();
5937 if ((integer_minus_onep (last->op)
5938 || real_minus_onep (last->op))
5939 && !HONOR_SNANS (TREE_TYPE (lhs))
5940 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5941 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5942 {
5943 ops.pop ();
5944 negate_result = true;
5945 }
5946 }
5947
5948 tree new_lhs = lhs;
5949 /* If the operand vector is now empty, all operands were
5950 consumed by the __builtin_powi optimization. */
5951 if (ops.length () == 0)
5952 transform_stmt_to_copy (&gsi, stmt, powi_result);
5953 else if (ops.length () == 1)
5954 {
5955 tree last_op = ops.last ()->op;
5956
5957 /* If the stmt that defines operand has to be inserted, insert it
5958 before the use. */
5959 if (ops.last ()->stmt_to_insert)
5960 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5961 if (powi_result)
5962 transform_stmt_to_multiply (&gsi, stmt, last_op,
5963 powi_result);
5964 else
5965 transform_stmt_to_copy (&gsi, stmt, last_op);
5966 }
5967 else
5968 {
5969 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5970 int ops_num = ops.length ();
5971 int width = get_reassociation_width (ops_num, rhs_code, mode);
5972
5973 if (dump_file && (dump_flags & TDF_DETAILS))
5974 fprintf (dump_file,
5975 "Width = %d was chosen for reassociation\n", width);
5976
5977
5978 /* For binary bit operations, if there are at least 3
5979 operands and the last last operand in OPS is a constant,
5980 move it to the front. This helps ensure that we generate
5981 (X & Y) & C rather than (X & C) & Y. The former will
5982 often match a canonical bit test when we get to RTL. */
5983 if (ops.length () > 2
5984 && (rhs_code == BIT_AND_EXPR
5985 || rhs_code == BIT_IOR_EXPR
5986 || rhs_code == BIT_XOR_EXPR)
5987 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
5988 std::swap (*ops[0], *ops[ops_num - 1]);
5989
5990 if (width > 1
5991 && ops.length () > 3)
5992 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5993 width, ops);
5994 else
5995 {
5996 /* When there are three operands left, we want
5997 to make sure the ones that get the double
5998 binary op are chosen wisely. */
5999 int len = ops.length ();
6000 if (len >= 3)
6001 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6002
6003 new_lhs = rewrite_expr_tree (stmt, 0, ops,
6004 powi_result != NULL
6005 || negate_result,
6006 len != orig_len);
6007 }
6008
6009 /* If we combined some repeated factors into a
6010 __builtin_powi call, multiply that result by the
6011 reassociated operands. */
6012 if (powi_result)
6013 {
6014 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6015 tree type = TREE_TYPE (lhs);
6016 tree target_ssa = make_temp_ssa_name (type, NULL,
6017 "reassocpow");
6018 gimple_set_lhs (lhs_stmt, target_ssa);
6019 update_stmt (lhs_stmt);
6020 if (lhs != new_lhs)
6021 {
6022 target_ssa = new_lhs;
6023 new_lhs = lhs;
6024 }
6025 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6026 powi_result, target_ssa);
6027 gimple_set_location (mul_stmt, gimple_location (stmt));
6028 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6029 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6030 }
6031 }
6032
6033 if (negate_result)
6034 {
6035 stmt = SSA_NAME_DEF_STMT (lhs);
6036 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6037 gimple_set_lhs (stmt, tmp);
6038 if (lhs != new_lhs)
6039 tmp = new_lhs;
6040 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6041 tmp);
6042 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6043 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6044 update_stmt (stmt);
6045 }
6046 }
6047 }
6048 }
6049 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6050 son;
6051 son = next_dom_son (CDI_POST_DOMINATORS, son))
6052 cfg_cleanup_needed |= reassociate_bb (son);
6053
6054 return cfg_cleanup_needed;
6055 }
6056
6057 /* Add jumps around shifts for range tests turned into bit tests.
6058 For each SSA_NAME VAR we have code like:
6059 VAR = ...; // final stmt of range comparison
6060 // bit test here...;
6061 OTHERVAR = ...; // final stmt of the bit test sequence
6062 RES = VAR | OTHERVAR;
6063 Turn the above into:
6064 VAR = ...;
6065 if (VAR != 0)
6066 goto <l3>;
6067 else
6068 goto <l2>;
6069 <l2>:
6070 // bit test here...;
6071 OTHERVAR = ...;
6072 <l3>:
6073 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6074
6075 static void
branch_fixup(void)6076 branch_fixup (void)
6077 {
6078 tree var;
6079 unsigned int i;
6080
6081 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6082 {
6083 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6084 gimple *use_stmt;
6085 use_operand_p use;
6086 bool ok = single_imm_use (var, &use, &use_stmt);
6087 gcc_assert (ok
6088 && is_gimple_assign (use_stmt)
6089 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6090 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6091
6092 basic_block cond_bb = gimple_bb (def_stmt);
6093 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6094 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6095
6096 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6097 gimple *g = gimple_build_cond (NE_EXPR, var,
6098 build_zero_cst (TREE_TYPE (var)),
6099 NULL_TREE, NULL_TREE);
6100 location_t loc = gimple_location (use_stmt);
6101 gimple_set_location (g, loc);
6102 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6103
6104 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6105 etrue->probability = profile_probability::even ();
6106 edge efalse = find_edge (cond_bb, then_bb);
6107 efalse->flags = EDGE_FALSE_VALUE;
6108 efalse->probability -= etrue->probability;
6109 then_bb->count -= etrue->count ();
6110
6111 tree othervar = NULL_TREE;
6112 if (gimple_assign_rhs1 (use_stmt) == var)
6113 othervar = gimple_assign_rhs2 (use_stmt);
6114 else if (gimple_assign_rhs2 (use_stmt) == var)
6115 othervar = gimple_assign_rhs1 (use_stmt);
6116 else
6117 gcc_unreachable ();
6118 tree lhs = gimple_assign_lhs (use_stmt);
6119 gphi *phi = create_phi_node (lhs, merge_bb);
6120 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6121 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6122 gsi = gsi_for_stmt (use_stmt);
6123 gsi_remove (&gsi, true);
6124
6125 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6126 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6127 }
6128 reassoc_branch_fixups.release ();
6129 }
6130
6131 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6132 void debug_ops_vector (vec<operand_entry *> ops);
6133
6134 /* Dump the operand entry vector OPS to FILE. */
6135
6136 void
dump_ops_vector(FILE * file,vec<operand_entry * > ops)6137 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6138 {
6139 operand_entry *oe;
6140 unsigned int i;
6141
6142 FOR_EACH_VEC_ELT (ops, i, oe)
6143 {
6144 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6145 print_generic_expr (file, oe->op);
6146 fprintf (file, "\n");
6147 }
6148 }
6149
6150 /* Dump the operand entry vector OPS to STDERR. */
6151
6152 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry * > ops)6153 debug_ops_vector (vec<operand_entry *> ops)
6154 {
6155 dump_ops_vector (stderr, ops);
6156 }
6157
6158 /* Bubble up return status from reassociate_bb. */
6159
6160 static bool
do_reassoc(void)6161 do_reassoc (void)
6162 {
6163 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6164 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6165 }
6166
6167 /* Initialize the reassociation pass. */
6168
6169 static void
init_reassoc(void)6170 init_reassoc (void)
6171 {
6172 int i;
6173 long rank = 2;
6174 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6175
6176 /* Find the loops, so that we can prevent moving calculations in
6177 them. */
6178 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6179
6180 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6181
6182 next_operand_entry_id = 0;
6183
6184 /* Reverse RPO (Reverse Post Order) will give us something where
6185 deeper loops come later. */
6186 pre_and_rev_post_order_compute (NULL, bbs, false);
6187 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6188 operand_rank = new hash_map<tree, long>;
6189
6190 /* Give each default definition a distinct rank. This includes
6191 parameters and the static chain. Walk backwards over all
6192 SSA names so that we get proper rank ordering according
6193 to tree_swap_operands_p. */
6194 for (i = num_ssa_names - 1; i > 0; --i)
6195 {
6196 tree name = ssa_name (i);
6197 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6198 insert_operand_rank (name, ++rank);
6199 }
6200
6201 /* Set up rank for each BB */
6202 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6203 bb_rank[bbs[i]] = ++rank << 16;
6204
6205 free (bbs);
6206 calculate_dominance_info (CDI_POST_DOMINATORS);
6207 plus_negates = vNULL;
6208 }
6209
6210 /* Cleanup after the reassociation pass, and print stats if
6211 requested. */
6212
6213 static void
fini_reassoc(void)6214 fini_reassoc (void)
6215 {
6216 statistics_counter_event (cfun, "Linearized",
6217 reassociate_stats.linearized);
6218 statistics_counter_event (cfun, "Constants eliminated",
6219 reassociate_stats.constants_eliminated);
6220 statistics_counter_event (cfun, "Ops eliminated",
6221 reassociate_stats.ops_eliminated);
6222 statistics_counter_event (cfun, "Statements rewritten",
6223 reassociate_stats.rewritten);
6224 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6225 reassociate_stats.pows_encountered);
6226 statistics_counter_event (cfun, "Built-in powi calls created",
6227 reassociate_stats.pows_created);
6228
6229 delete operand_rank;
6230 operand_entry_pool.release ();
6231 free (bb_rank);
6232 plus_negates.release ();
6233 free_dominance_info (CDI_POST_DOMINATORS);
6234 loop_optimizer_finalize ();
6235 }
6236
6237 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6238 insertion of __builtin_powi calls.
6239
6240 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6241 optimization of a gimple conditional. Otherwise returns zero. */
6242
6243 static unsigned int
execute_reassoc(bool insert_powi_p)6244 execute_reassoc (bool insert_powi_p)
6245 {
6246 reassoc_insert_powi_p = insert_powi_p;
6247
6248 init_reassoc ();
6249
6250 bool cfg_cleanup_needed;
6251 cfg_cleanup_needed = do_reassoc ();
6252 repropagate_negates ();
6253 branch_fixup ();
6254
6255 fini_reassoc ();
6256 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6257 }
6258
6259 namespace {
6260
6261 const pass_data pass_data_reassoc =
6262 {
6263 GIMPLE_PASS, /* type */
6264 "reassoc", /* name */
6265 OPTGROUP_NONE, /* optinfo_flags */
6266 TV_TREE_REASSOC, /* tv_id */
6267 ( PROP_cfg | PROP_ssa ), /* properties_required */
6268 0, /* properties_provided */
6269 0, /* properties_destroyed */
6270 0, /* todo_flags_start */
6271 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6272 };
6273
6274 class pass_reassoc : public gimple_opt_pass
6275 {
6276 public:
pass_reassoc(gcc::context * ctxt)6277 pass_reassoc (gcc::context *ctxt)
6278 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6279 {}
6280
6281 /* opt_pass methods: */
clone()6282 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
set_pass_param(unsigned int n,bool param)6283 void set_pass_param (unsigned int n, bool param)
6284 {
6285 gcc_assert (n == 0);
6286 insert_powi_p = param;
6287 }
gate(function *)6288 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
execute(function *)6289 virtual unsigned int execute (function *)
6290 { return execute_reassoc (insert_powi_p); }
6291
6292 private:
6293 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6294 point 3a in the pass header comment. */
6295 bool insert_powi_p;
6296 }; // class pass_reassoc
6297
6298 } // anon namespace
6299
6300 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)6301 make_pass_reassoc (gcc::context *ctxt)
6302 {
6303 return new pass_reassoc (ctxt);
6304 }
6305