1 /* Reassociation for trees.
2 Copyright (C) 2005-2018 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
55
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
59
60 It consists of five steps:
61
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
64
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
69
70 3. Optimization of the operand lists, eliminating things like a +
71 -a, a & a, etc.
72
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
76
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
79
80 5. Repropagate negates, as nothing else will clean it up ATM.
81
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
84
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
87
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
92
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
97
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
100
101 a (1), b (1), c (1), d (2), e (2)
102
103
104 We start with our merge worklist empty, and the ops list with all of
105 those on it.
106
107 You want to first merge all leaves of the same rank, as much as
108 possible.
109
110 So first build a binary op of
111
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
116
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
121
122 Then build a binary op of d + e
123 mergetmp2 = d + e
124
125 and put mergetmp2 on the merge worklist.
126
127 so merge worklist = {mergetmp, c, mergetmp2}
128
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
131
132 So we have
133
134 build binary op
135 mergetmp3 = mergetmp + c
136
137 worklist = {mergetmp2, mergetmp3}
138
139 mergetmp4 = mergetmp2 + mergetmp3
140
141 worklist = {mergetmp4}
142
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
145
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
148
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
151
152 So why don't we do this?
153
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
159
160 mergetmp = op1 + op2
161 newstmt = mergetmp + op3
162
163 instead of
164 mergetmp = op2 + op3
165 newstmt = mergetmp + op1
166
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
169 can do.
170
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
176
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p;
180
181 /* Statistics */
182 static struct
183 {
184 int linearized;
185 int constants_eliminated;
186 int ops_eliminated;
187 int rewritten;
188 int pows_encountered;
189 int pows_created;
190 } reassociate_stats;
191
192 /* Operator, rank pair. */
193 struct operand_entry
194 {
195 unsigned int rank;
196 unsigned int id;
197 tree op;
198 unsigned int count;
199 gimple *stmt_to_insert;
200 };
201
202 static object_allocator<operand_entry> operand_entry_pool
203 ("operand entry pool");
204
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static unsigned int next_operand_entry_id;
208
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
211 depth. */
212 static long *bb_rank;
213
214 /* Operand->rank hashtable. */
215 static hash_map<tree, long> *operand_rank;
216
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec<tree> reassoc_branch_fixups;
222
223 /* Forward decls. */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
229
230 bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 {
233 gimple *stmt = gsi_stmt (*gsi);
234
235 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236 return gsi_remove (gsi, true);
237
238 gimple_stmt_iterator prev = *gsi;
239 gsi_prev (&prev);
240 unsigned uid = gimple_uid (stmt);
241 basic_block bb = gimple_bb (stmt);
242 bool ret = gsi_remove (gsi, true);
243 if (!gsi_end_p (prev))
244 gsi_next (&prev);
245 else
246 prev = gsi_start_bb (bb);
247 gimple *end_stmt = gsi_stmt (*gsi);
248 while ((stmt = gsi_stmt (prev)) != end_stmt)
249 {
250 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251 gimple_set_uid (stmt, uid);
252 gsi_next (&prev);
253 }
254 return ret;
255 }
256
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
261
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
269 static long
phi_rank(gimple * stmt)270 phi_rank (gimple *stmt)
271 {
272 basic_block bb = gimple_bb (stmt);
273 struct loop *father = bb->loop_father;
274 tree res;
275 unsigned i;
276 use_operand_p use;
277 gimple *use_stmt;
278
279 /* We only care about real loops (those with a latch). */
280 if (!father->latch)
281 return bb_rank[bb->index];
282
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb != father->header
285 || father->inner)
286 return bb_rank[bb->index];
287
288 /* Ignore virtual SSA_NAMEs. */
289 res = gimple_phi_result (stmt);
290 if (virtual_operand_p (res))
291 return bb_rank[bb->index];
292
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res, &use, &use_stmt)
296 || gimple_bb (use_stmt)->loop_father != father)
297 return bb_rank[bb->index];
298
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i = 0; i < gimple_phi_num_args (stmt); i++)
301 {
302 tree arg = gimple_phi_arg_def (stmt, i);
303 if (TREE_CODE (arg) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 {
306 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 if (gimple_bb (def_stmt)->loop_father == father)
308 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
309 }
310 }
311
312 /* Must be an uninteresting phi. */
313 return bb_rank[bb->index];
314 }
315
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
318 return FALSE. */
319 static bool
loop_carried_phi(tree exp)320 loop_carried_phi (tree exp)
321 {
322 gimple *phi_stmt;
323 long block_rank;
324
325 if (TREE_CODE (exp) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp))
327 return false;
328
329 phi_stmt = SSA_NAME_DEF_STMT (exp);
330
331 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332 return false;
333
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338
339 if (phi_rank (phi_stmt) != block_rank)
340 return true;
341
342 return false;
343 }
344
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
349 static long
propagate_rank(long rank,tree op)350 propagate_rank (long rank, tree op)
351 {
352 long op_rank;
353
354 if (loop_carried_phi (op))
355 return rank;
356
357 op_rank = get_rank (op);
358
359 return MAX (rank, op_rank);
360 }
361
362 /* Look up the operand rank structure for expression E. */
363
364 static inline long
find_operand_rank(tree e)365 find_operand_rank (tree e)
366 {
367 long *slot = operand_rank->get (e);
368 return slot ? *slot : -1;
369 }
370
371 /* Insert {E,RANK} into the operand rank hashtable. */
372
373 static inline void
insert_operand_rank(tree e,long rank)374 insert_operand_rank (tree e, long rank)
375 {
376 gcc_assert (rank > 0);
377 gcc_assert (!operand_rank->put (e, rank));
378 }
379
380 /* Given an expression E, return the rank of the expression. */
381
382 static long
get_rank(tree e)383 get_rank (tree e)
384 {
385 /* SSA_NAME's have the rank of the expression they are the result
386 of.
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
390 the BB.
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
394 results. */
395
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
399
400 x_1 = phi(x_0, x_2)
401 b = a + x_1
402 c = b + d
403 x_2 = c + e
404
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
408
409 x_1 = phi(x_0, x_2)
410 b = a + d
411 c = b + e
412 x_2 = c + x_1
413
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
416
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
421
422 if (TREE_CODE (e) == SSA_NAME)
423 {
424 ssa_op_iter iter;
425 gimple *stmt;
426 long rank;
427 tree op;
428
429 if (SSA_NAME_IS_DEFAULT_DEF (e))
430 return find_operand_rank (e);
431
432 stmt = SSA_NAME_DEF_STMT (e);
433 if (gimple_code (stmt) == GIMPLE_PHI)
434 return phi_rank (stmt);
435
436 if (!is_gimple_assign (stmt))
437 return bb_rank[gimple_bb (stmt)->index];
438
439 /* If we already have a rank for this expression, use that. */
440 rank = find_operand_rank (e);
441 if (rank != -1)
442 return rank;
443
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
449 thus have rank 0. */
450 rank = 0;
451 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 rank = propagate_rank (rank, op);
453
454 if (dump_file && (dump_flags & TDF_DETAILS))
455 {
456 fprintf (dump_file, "Rank for ");
457 print_generic_expr (dump_file, e);
458 fprintf (dump_file, " is %ld\n", (rank + 1));
459 }
460
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e, (rank + 1));
463 return (rank + 1);
464 }
465
466 /* Constants, globals, etc., are rank 0 */
467 return 0;
468 }
469
470
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 4
474 #define FLOAT_ONE_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
477
478 /* Classify an invariant tree into integer, float, or other, so that
479 we can sort them to be near other constants of the same type. */
480 static inline int
constant_type(tree t)481 constant_type (tree t)
482 {
483 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
484 return INTEGER_CONST_TYPE;
485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
486 {
487 /* Sort -1.0 and 1.0 constants last, while in some cases
488 const_binop can't optimize some inexact operations, multiplication
489 by -1.0 or 1.0 can be always merged with others. */
490 if (real_onep (t) || real_minus_onep (t))
491 return FLOAT_ONE_CONST_TYPE;
492 return FLOAT_CONST_TYPE;
493 }
494 else
495 return OTHER_CONST_TYPE;
496 }
497
498 /* qsort comparison function to sort operand entries PA and PB by rank
499 so that the sorted array is ordered by rank in decreasing order. */
500 static int
sort_by_operand_rank(const void * pa,const void * pb)501 sort_by_operand_rank (const void *pa, const void *pb)
502 {
503 const operand_entry *oea = *(const operand_entry *const *)pa;
504 const operand_entry *oeb = *(const operand_entry *const *)pb;
505
506 if (oeb->rank != oea->rank)
507 return oeb->rank > oea->rank ? 1 : -1;
508
509 /* It's nicer for optimize_expression if constants that are likely
510 to fold when added/multiplied/whatever are put next to each
511 other. Since all constants have rank 0, order them by type. */
512 if (oea->rank == 0)
513 {
514 if (constant_type (oeb->op) != constant_type (oea->op))
515 return constant_type (oea->op) - constant_type (oeb->op);
516 else
517 /* To make sorting result stable, we use unique IDs to determine
518 order. */
519 return oeb->id > oea->id ? 1 : -1;
520 }
521
522 if (TREE_CODE (oea->op) != SSA_NAME)
523 {
524 if (TREE_CODE (oeb->op) != SSA_NAME)
525 return oeb->id > oea->id ? 1 : -1;
526 else
527 return 1;
528 }
529 else if (TREE_CODE (oeb->op) != SSA_NAME)
530 return -1;
531
532 /* Lastly, make sure the versions that are the same go next to each
533 other. */
534 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
535 {
536 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
537 versions of removed SSA_NAMEs, so if possible, prefer to sort
538 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
539 See PR60418. */
540 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
541 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
542 basic_block bba = gimple_bb (stmta);
543 basic_block bbb = gimple_bb (stmtb);
544 if (bbb != bba)
545 {
546 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
547 but the other might not. */
548 if (!bba)
549 return 1;
550 if (!bbb)
551 return -1;
552 /* If neither is, compare bb_rank. */
553 if (bb_rank[bbb->index] != bb_rank[bba->index])
554 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
555 }
556
557 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
558 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
559 if (da != db)
560 return da ? 1 : -1;
561
562 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
563 }
564
565 return oeb->id > oea->id ? 1 : -1;
566 }
567
568 /* Add an operand entry to *OPS for the tree operand OP. */
569
570 static void
571 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
572 {
573 operand_entry *oe = operand_entry_pool.allocate ();
574
575 oe->op = op;
576 oe->rank = get_rank (op);
577 oe->id = next_operand_entry_id++;
578 oe->count = 1;
579 oe->stmt_to_insert = stmt_to_insert;
580 ops->safe_push (oe);
581 }
582
583 /* Add an operand entry to *OPS for the tree operand OP with repeat
584 count REPEAT. */
585
586 static void
add_repeat_to_ops_vec(vec<operand_entry * > * ops,tree op,HOST_WIDE_INT repeat)587 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
588 HOST_WIDE_INT repeat)
589 {
590 operand_entry *oe = operand_entry_pool.allocate ();
591
592 oe->op = op;
593 oe->rank = get_rank (op);
594 oe->id = next_operand_entry_id++;
595 oe->count = repeat;
596 oe->stmt_to_insert = NULL;
597 ops->safe_push (oe);
598
599 reassociate_stats.pows_encountered++;
600 }
601
602 /* Return true if STMT is reassociable operation containing a binary
603 operation with tree code CODE, and is inside LOOP. */
604
605 static bool
is_reassociable_op(gimple * stmt,enum tree_code code,struct loop * loop)606 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
607 {
608 basic_block bb = gimple_bb (stmt);
609
610 if (gimple_bb (stmt) == NULL)
611 return false;
612
613 if (!flow_bb_inside_loop_p (loop, bb))
614 return false;
615
616 if (is_gimple_assign (stmt)
617 && gimple_assign_rhs_code (stmt) == code
618 && has_single_use (gimple_assign_lhs (stmt)))
619 {
620 tree rhs1 = gimple_assign_rhs1 (stmt);
621 tree rhs2 = gimple_assign_rhs1 (stmt);
622 if (TREE_CODE (rhs1) == SSA_NAME
623 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
624 return false;
625 if (rhs2
626 && TREE_CODE (rhs2) == SSA_NAME
627 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
628 return false;
629 return true;
630 }
631
632 return false;
633 }
634
635
636 /* Return true if STMT is a nop-conversion. */
637
638 static bool
gimple_nop_conversion_p(gimple * stmt)639 gimple_nop_conversion_p (gimple *stmt)
640 {
641 if (gassign *ass = dyn_cast <gassign *> (stmt))
642 {
643 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
644 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
645 TREE_TYPE (gimple_assign_rhs1 (ass))))
646 return true;
647 }
648 return false;
649 }
650
651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
652 operand of the negate operation. Otherwise, return NULL. */
653
654 static tree
get_unary_op(tree name,enum tree_code opcode)655 get_unary_op (tree name, enum tree_code opcode)
656 {
657 gimple *stmt = SSA_NAME_DEF_STMT (name);
658
659 /* Look through nop conversions (sign changes). */
660 if (gimple_nop_conversion_p (stmt)
661 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
662 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
663
664 if (!is_gimple_assign (stmt))
665 return NULL_TREE;
666
667 if (gimple_assign_rhs_code (stmt) == opcode)
668 return gimple_assign_rhs1 (stmt);
669 return NULL_TREE;
670 }
671
672 /* Return true if OP1 and OP2 have the same value if casted to either type. */
673
674 static bool
ops_equal_values_p(tree op1,tree op2)675 ops_equal_values_p (tree op1, tree op2)
676 {
677 if (op1 == op2)
678 return true;
679
680 tree orig_op1 = op1;
681 if (TREE_CODE (op1) == SSA_NAME)
682 {
683 gimple *stmt = SSA_NAME_DEF_STMT (op1);
684 if (gimple_nop_conversion_p (stmt))
685 {
686 op1 = gimple_assign_rhs1 (stmt);
687 if (op1 == op2)
688 return true;
689 }
690 }
691
692 if (TREE_CODE (op2) == SSA_NAME)
693 {
694 gimple *stmt = SSA_NAME_DEF_STMT (op2);
695 if (gimple_nop_conversion_p (stmt))
696 {
697 op2 = gimple_assign_rhs1 (stmt);
698 if (op1 == op2
699 || orig_op1 == op2)
700 return true;
701 }
702 }
703
704 return false;
705 }
706
707
708 /* If CURR and LAST are a pair of ops that OPCODE allows us to
709 eliminate through equivalences, do so, remove them from OPS, and
710 return true. Otherwise, return false. */
711
712 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry * > * ops,bool * all_done,unsigned int i,operand_entry * curr,operand_entry * last)713 eliminate_duplicate_pair (enum tree_code opcode,
714 vec<operand_entry *> *ops,
715 bool *all_done,
716 unsigned int i,
717 operand_entry *curr,
718 operand_entry *last)
719 {
720
721 /* If we have two of the same op, and the opcode is & |, min, or max,
722 we can eliminate one of them.
723 If we have two of the same op, and the opcode is ^, we can
724 eliminate both of them. */
725
726 if (last && last->op == curr->op)
727 {
728 switch (opcode)
729 {
730 case MAX_EXPR:
731 case MIN_EXPR:
732 case BIT_IOR_EXPR:
733 case BIT_AND_EXPR:
734 if (dump_file && (dump_flags & TDF_DETAILS))
735 {
736 fprintf (dump_file, "Equivalence: ");
737 print_generic_expr (dump_file, curr->op);
738 fprintf (dump_file, " [&|minmax] ");
739 print_generic_expr (dump_file, last->op);
740 fprintf (dump_file, " -> ");
741 print_generic_stmt (dump_file, last->op);
742 }
743
744 ops->ordered_remove (i);
745 reassociate_stats.ops_eliminated ++;
746
747 return true;
748
749 case BIT_XOR_EXPR:
750 if (dump_file && (dump_flags & TDF_DETAILS))
751 {
752 fprintf (dump_file, "Equivalence: ");
753 print_generic_expr (dump_file, curr->op);
754 fprintf (dump_file, " ^ ");
755 print_generic_expr (dump_file, last->op);
756 fprintf (dump_file, " -> nothing\n");
757 }
758
759 reassociate_stats.ops_eliminated += 2;
760
761 if (ops->length () == 2)
762 {
763 ops->truncate (0);
764 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
765 *all_done = true;
766 }
767 else
768 {
769 ops->ordered_remove (i-1);
770 ops->ordered_remove (i-1);
771 }
772
773 return true;
774
775 default:
776 break;
777 }
778 }
779 return false;
780 }
781
782 static vec<tree> plus_negates;
783
784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
785 expression, look in OPS for a corresponding positive operation to cancel
786 it out. If we find one, remove the other from OPS, replace
787 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
788 return false. */
789
790 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)791 eliminate_plus_minus_pair (enum tree_code opcode,
792 vec<operand_entry *> *ops,
793 unsigned int currindex,
794 operand_entry *curr)
795 {
796 tree negateop;
797 tree notop;
798 unsigned int i;
799 operand_entry *oe;
800
801 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
802 return false;
803
804 negateop = get_unary_op (curr->op, NEGATE_EXPR);
805 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
806 if (negateop == NULL_TREE && notop == NULL_TREE)
807 return false;
808
809 /* Any non-negated version will have a rank that is one less than
810 the current rank. So once we hit those ranks, if we don't find
811 one, we can stop. */
812
813 for (i = currindex + 1;
814 ops->iterate (i, &oe)
815 && oe->rank >= curr->rank - 1 ;
816 i++)
817 {
818 if (negateop
819 && ops_equal_values_p (oe->op, negateop))
820 {
821 if (dump_file && (dump_flags & TDF_DETAILS))
822 {
823 fprintf (dump_file, "Equivalence: ");
824 print_generic_expr (dump_file, negateop);
825 fprintf (dump_file, " + -");
826 print_generic_expr (dump_file, oe->op);
827 fprintf (dump_file, " -> 0\n");
828 }
829
830 ops->ordered_remove (i);
831 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
832 ops->ordered_remove (currindex);
833 reassociate_stats.ops_eliminated ++;
834
835 return true;
836 }
837 else if (notop
838 && ops_equal_values_p (oe->op, notop))
839 {
840 tree op_type = TREE_TYPE (oe->op);
841
842 if (dump_file && (dump_flags & TDF_DETAILS))
843 {
844 fprintf (dump_file, "Equivalence: ");
845 print_generic_expr (dump_file, notop);
846 fprintf (dump_file, " + ~");
847 print_generic_expr (dump_file, oe->op);
848 fprintf (dump_file, " -> -1\n");
849 }
850
851 ops->ordered_remove (i);
852 add_to_ops_vec (ops, build_all_ones_cst (op_type));
853 ops->ordered_remove (currindex);
854 reassociate_stats.ops_eliminated ++;
855
856 return true;
857 }
858 }
859
860 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
861 save it for later inspection in repropagate_negates(). */
862 if (negateop != NULL_TREE
863 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
864 plus_negates.safe_push (curr->op);
865
866 return false;
867 }
868
869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
870 bitwise not expression, look in OPS for a corresponding operand to
871 cancel it out. If we find one, remove the other from OPS, replace
872 OPS[CURRINDEX] with 0, and return true. Otherwise, return
873 false. */
874
875 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)876 eliminate_not_pairs (enum tree_code opcode,
877 vec<operand_entry *> *ops,
878 unsigned int currindex,
879 operand_entry *curr)
880 {
881 tree notop;
882 unsigned int i;
883 operand_entry *oe;
884
885 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
886 || TREE_CODE (curr->op) != SSA_NAME)
887 return false;
888
889 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
890 if (notop == NULL_TREE)
891 return false;
892
893 /* Any non-not version will have a rank that is one less than
894 the current rank. So once we hit those ranks, if we don't find
895 one, we can stop. */
896
897 for (i = currindex + 1;
898 ops->iterate (i, &oe)
899 && oe->rank >= curr->rank - 1;
900 i++)
901 {
902 if (oe->op == notop)
903 {
904 if (dump_file && (dump_flags & TDF_DETAILS))
905 {
906 fprintf (dump_file, "Equivalence: ");
907 print_generic_expr (dump_file, notop);
908 if (opcode == BIT_AND_EXPR)
909 fprintf (dump_file, " & ~");
910 else if (opcode == BIT_IOR_EXPR)
911 fprintf (dump_file, " | ~");
912 print_generic_expr (dump_file, oe->op);
913 if (opcode == BIT_AND_EXPR)
914 fprintf (dump_file, " -> 0\n");
915 else if (opcode == BIT_IOR_EXPR)
916 fprintf (dump_file, " -> -1\n");
917 }
918
919 if (opcode == BIT_AND_EXPR)
920 oe->op = build_zero_cst (TREE_TYPE (oe->op));
921 else if (opcode == BIT_IOR_EXPR)
922 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
923
924 reassociate_stats.ops_eliminated += ops->length () - 1;
925 ops->truncate (0);
926 ops->quick_push (oe);
927 return true;
928 }
929 }
930
931 return false;
932 }
933
934 /* Use constant value that may be present in OPS to try to eliminate
935 operands. Note that this function is only really used when we've
936 eliminated ops for other reasons, or merged constants. Across
937 single statements, fold already does all of this, plus more. There
938 is little point in duplicating logic, so I've only included the
939 identities that I could ever construct testcases to trigger. */
940
941 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry * > * ops)942 eliminate_using_constants (enum tree_code opcode,
943 vec<operand_entry *> *ops)
944 {
945 operand_entry *oelast = ops->last ();
946 tree type = TREE_TYPE (oelast->op);
947
948 if (oelast->rank == 0
949 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
950 {
951 switch (opcode)
952 {
953 case BIT_AND_EXPR:
954 if (integer_zerop (oelast->op))
955 {
956 if (ops->length () != 1)
957 {
958 if (dump_file && (dump_flags & TDF_DETAILS))
959 fprintf (dump_file, "Found & 0, removing all other ops\n");
960
961 reassociate_stats.ops_eliminated += ops->length () - 1;
962
963 ops->truncate (0);
964 ops->quick_push (oelast);
965 return;
966 }
967 }
968 else if (integer_all_onesp (oelast->op))
969 {
970 if (ops->length () != 1)
971 {
972 if (dump_file && (dump_flags & TDF_DETAILS))
973 fprintf (dump_file, "Found & -1, removing\n");
974 ops->pop ();
975 reassociate_stats.ops_eliminated++;
976 }
977 }
978 break;
979 case BIT_IOR_EXPR:
980 if (integer_all_onesp (oelast->op))
981 {
982 if (ops->length () != 1)
983 {
984 if (dump_file && (dump_flags & TDF_DETAILS))
985 fprintf (dump_file, "Found | -1, removing all other ops\n");
986
987 reassociate_stats.ops_eliminated += ops->length () - 1;
988
989 ops->truncate (0);
990 ops->quick_push (oelast);
991 return;
992 }
993 }
994 else if (integer_zerop (oelast->op))
995 {
996 if (ops->length () != 1)
997 {
998 if (dump_file && (dump_flags & TDF_DETAILS))
999 fprintf (dump_file, "Found | 0, removing\n");
1000 ops->pop ();
1001 reassociate_stats.ops_eliminated++;
1002 }
1003 }
1004 break;
1005 case MULT_EXPR:
1006 if (integer_zerop (oelast->op)
1007 || (FLOAT_TYPE_P (type)
1008 && !HONOR_NANS (type)
1009 && !HONOR_SIGNED_ZEROS (type)
1010 && real_zerop (oelast->op)))
1011 {
1012 if (ops->length () != 1)
1013 {
1014 if (dump_file && (dump_flags & TDF_DETAILS))
1015 fprintf (dump_file, "Found * 0, removing all other ops\n");
1016
1017 reassociate_stats.ops_eliminated += ops->length () - 1;
1018 ops->truncate (0);
1019 ops->quick_push (oelast);
1020 return;
1021 }
1022 }
1023 else if (integer_onep (oelast->op)
1024 || (FLOAT_TYPE_P (type)
1025 && !HONOR_SNANS (type)
1026 && real_onep (oelast->op)))
1027 {
1028 if (ops->length () != 1)
1029 {
1030 if (dump_file && (dump_flags & TDF_DETAILS))
1031 fprintf (dump_file, "Found * 1, removing\n");
1032 ops->pop ();
1033 reassociate_stats.ops_eliminated++;
1034 return;
1035 }
1036 }
1037 break;
1038 case BIT_XOR_EXPR:
1039 case PLUS_EXPR:
1040 case MINUS_EXPR:
1041 if (integer_zerop (oelast->op)
1042 || (FLOAT_TYPE_P (type)
1043 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1044 && fold_real_zero_addition_p (type, oelast->op,
1045 opcode == MINUS_EXPR)))
1046 {
1047 if (ops->length () != 1)
1048 {
1049 if (dump_file && (dump_flags & TDF_DETAILS))
1050 fprintf (dump_file, "Found [|^+] 0, removing\n");
1051 ops->pop ();
1052 reassociate_stats.ops_eliminated++;
1053 return;
1054 }
1055 }
1056 break;
1057 default:
1058 break;
1059 }
1060 }
1061 }
1062
1063
1064 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1065 bool, bool);
1066
1067 /* Structure for tracking and counting operands. */
1068 struct oecount {
1069 unsigned int cnt;
1070 unsigned int id;
1071 enum tree_code oecode;
1072 tree op;
1073 };
1074
1075
1076 /* The heap for the oecount hashtable and the sorted list of operands. */
1077 static vec<oecount> cvec;
1078
1079
1080 /* Oecount hashtable helpers. */
1081
1082 struct oecount_hasher : int_hash <int, 0, 1>
1083 {
1084 static inline hashval_t hash (int);
1085 static inline bool equal (int, int);
1086 };
1087
1088 /* Hash function for oecount. */
1089
1090 inline hashval_t
hash(int p)1091 oecount_hasher::hash (int p)
1092 {
1093 const oecount *c = &cvec[p - 42];
1094 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1095 }
1096
1097 /* Comparison function for oecount. */
1098
1099 inline bool
equal(int p1,int p2)1100 oecount_hasher::equal (int p1, int p2)
1101 {
1102 const oecount *c1 = &cvec[p1 - 42];
1103 const oecount *c2 = &cvec[p2 - 42];
1104 return c1->oecode == c2->oecode && c1->op == c2->op;
1105 }
1106
1107 /* Comparison function for qsort sorting oecount elements by count. */
1108
1109 static int
oecount_cmp(const void * p1,const void * p2)1110 oecount_cmp (const void *p1, const void *p2)
1111 {
1112 const oecount *c1 = (const oecount *)p1;
1113 const oecount *c2 = (const oecount *)p2;
1114 if (c1->cnt != c2->cnt)
1115 return c1->cnt > c2->cnt ? 1 : -1;
1116 else
1117 /* If counts are identical, use unique IDs to stabilize qsort. */
1118 return c1->id > c2->id ? 1 : -1;
1119 }
1120
1121 /* Return TRUE iff STMT represents a builtin call that raises OP
1122 to some exponent. */
1123
1124 static bool
stmt_is_power_of_op(gimple * stmt,tree op)1125 stmt_is_power_of_op (gimple *stmt, tree op)
1126 {
1127 if (!is_gimple_call (stmt))
1128 return false;
1129
1130 switch (gimple_call_combined_fn (stmt))
1131 {
1132 CASE_CFN_POW:
1133 CASE_CFN_POWI:
1134 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1135
1136 default:
1137 return false;
1138 }
1139 }
1140
1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1142 in place and return the result. Assumes that stmt_is_power_of_op
1143 was previously called for STMT and returned TRUE. */
1144
1145 static HOST_WIDE_INT
decrement_power(gimple * stmt)1146 decrement_power (gimple *stmt)
1147 {
1148 REAL_VALUE_TYPE c, cint;
1149 HOST_WIDE_INT power;
1150 tree arg1;
1151
1152 switch (gimple_call_combined_fn (stmt))
1153 {
1154 CASE_CFN_POW:
1155 arg1 = gimple_call_arg (stmt, 1);
1156 c = TREE_REAL_CST (arg1);
1157 power = real_to_integer (&c) - 1;
1158 real_from_integer (&cint, VOIDmode, power, SIGNED);
1159 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1160 return power;
1161
1162 CASE_CFN_POWI:
1163 arg1 = gimple_call_arg (stmt, 1);
1164 power = TREE_INT_CST_LOW (arg1) - 1;
1165 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1166 return power;
1167
1168 default:
1169 gcc_unreachable ();
1170 }
1171 }
1172
1173 /* Replace SSA defined by STMT and replace all its uses with new
1174 SSA. Also return the new SSA. */
1175
1176 static tree
make_new_ssa_for_def(gimple * stmt,enum tree_code opcode,tree op)1177 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1178 {
1179 gimple *use_stmt;
1180 use_operand_p use;
1181 imm_use_iterator iter;
1182 tree new_lhs, new_debug_lhs = NULL_TREE;
1183 tree lhs = gimple_get_lhs (stmt);
1184
1185 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1186 gimple_set_lhs (stmt, new_lhs);
1187
1188 /* Also need to update GIMPLE_DEBUGs. */
1189 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1190 {
1191 tree repl = new_lhs;
1192 if (is_gimple_debug (use_stmt))
1193 {
1194 if (new_debug_lhs == NULL_TREE)
1195 {
1196 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1197 gdebug *def_temp
1198 = gimple_build_debug_bind (new_debug_lhs,
1199 build2 (opcode, TREE_TYPE (lhs),
1200 new_lhs, op),
1201 stmt);
1202 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1203 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1204 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1205 gimple_set_uid (def_temp, gimple_uid (stmt));
1206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1208 }
1209 repl = new_debug_lhs;
1210 }
1211 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1212 SET_USE (use, repl);
1213 update_stmt (use_stmt);
1214 }
1215 return new_lhs;
1216 }
1217
1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219 uses with new SSAs. Also do this for the stmt that defines DEF
1220 if *DEF is not OP. */
1221
1222 static void
make_new_ssa_for_all_defs(tree * def,enum tree_code opcode,tree op,vec<gimple * > & stmts_to_fix)1223 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1224 vec<gimple *> &stmts_to_fix)
1225 {
1226 unsigned i;
1227 gimple *stmt;
1228
1229 if (*def != op
1230 && TREE_CODE (*def) == SSA_NAME
1231 && (stmt = SSA_NAME_DEF_STMT (*def))
1232 && gimple_code (stmt) != GIMPLE_NOP)
1233 *def = make_new_ssa_for_def (stmt, opcode, op);
1234
1235 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1236 make_new_ssa_for_def (stmt, opcode, op);
1237 }
1238
1239 /* Find the single immediate use of STMT's LHS, and replace it
1240 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1241 replace *DEF with OP as well. */
1242
1243 static void
propagate_op_to_single_use(tree op,gimple * stmt,tree * def)1244 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1245 {
1246 tree lhs;
1247 gimple *use_stmt;
1248 use_operand_p use;
1249 gimple_stmt_iterator gsi;
1250
1251 if (is_gimple_call (stmt))
1252 lhs = gimple_call_lhs (stmt);
1253 else
1254 lhs = gimple_assign_lhs (stmt);
1255
1256 gcc_assert (has_single_use (lhs));
1257 single_imm_use (lhs, &use, &use_stmt);
1258 if (lhs == *def)
1259 *def = op;
1260 SET_USE (use, op);
1261 if (TREE_CODE (op) != SSA_NAME)
1262 update_stmt (use_stmt);
1263 gsi = gsi_for_stmt (stmt);
1264 unlink_stmt_vdef (stmt);
1265 reassoc_remove_stmt (&gsi);
1266 release_defs (stmt);
1267 }
1268
1269 /* Walks the linear chain with result *DEF searching for an operation
1270 with operand OP and code OPCODE removing that from the chain. *DEF
1271 is updated if there is only one operand but no operation left. */
1272
1273 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1274 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1275 {
1276 tree orig_def = *def;
1277 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1278 /* PR72835 - Record the stmt chain that has to be updated such that
1279 we dont use the same LHS when the values computed are different. */
1280 auto_vec<gimple *, 64> stmts_to_fix;
1281
1282 do
1283 {
1284 tree name;
1285
1286 if (opcode == MULT_EXPR)
1287 {
1288 if (stmt_is_power_of_op (stmt, op))
1289 {
1290 if (decrement_power (stmt) == 1)
1291 {
1292 if (stmts_to_fix.length () > 0)
1293 stmts_to_fix.pop ();
1294 propagate_op_to_single_use (op, stmt, def);
1295 }
1296 break;
1297 }
1298 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1299 {
1300 if (gimple_assign_rhs1 (stmt) == op)
1301 {
1302 tree cst = build_minus_one_cst (TREE_TYPE (op));
1303 if (stmts_to_fix.length () > 0)
1304 stmts_to_fix.pop ();
1305 propagate_op_to_single_use (cst, stmt, def);
1306 break;
1307 }
1308 else if (integer_minus_onep (op)
1309 || real_minus_onep (op))
1310 {
1311 gimple_assign_set_rhs_code
1312 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1313 break;
1314 }
1315 }
1316 }
1317
1318 name = gimple_assign_rhs1 (stmt);
1319
1320 /* If this is the operation we look for and one of the operands
1321 is ours simply propagate the other operand into the stmts
1322 single use. */
1323 if (gimple_assign_rhs_code (stmt) == opcode
1324 && (name == op
1325 || gimple_assign_rhs2 (stmt) == op))
1326 {
1327 if (name == op)
1328 name = gimple_assign_rhs2 (stmt);
1329 if (stmts_to_fix.length () > 0)
1330 stmts_to_fix.pop ();
1331 propagate_op_to_single_use (name, stmt, def);
1332 break;
1333 }
1334
1335 /* We might have a multiply of two __builtin_pow* calls, and
1336 the operand might be hiding in the rightmost one. Likewise
1337 this can happen for a negate. */
1338 if (opcode == MULT_EXPR
1339 && gimple_assign_rhs_code (stmt) == opcode
1340 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1341 && has_single_use (gimple_assign_rhs2 (stmt)))
1342 {
1343 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1344 if (stmt_is_power_of_op (stmt2, op))
1345 {
1346 if (decrement_power (stmt2) == 1)
1347 propagate_op_to_single_use (op, stmt2, def);
1348 else
1349 stmts_to_fix.safe_push (stmt2);
1350 break;
1351 }
1352 else if (is_gimple_assign (stmt2)
1353 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1354 {
1355 if (gimple_assign_rhs1 (stmt2) == op)
1356 {
1357 tree cst = build_minus_one_cst (TREE_TYPE (op));
1358 propagate_op_to_single_use (cst, stmt2, def);
1359 break;
1360 }
1361 else if (integer_minus_onep (op)
1362 || real_minus_onep (op))
1363 {
1364 stmts_to_fix.safe_push (stmt2);
1365 gimple_assign_set_rhs_code
1366 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1367 break;
1368 }
1369 }
1370 }
1371
1372 /* Continue walking the chain. */
1373 gcc_assert (name != op
1374 && TREE_CODE (name) == SSA_NAME);
1375 stmt = SSA_NAME_DEF_STMT (name);
1376 stmts_to_fix.safe_push (stmt);
1377 }
1378 while (1);
1379
1380 if (stmts_to_fix.length () > 0 || *def == orig_def)
1381 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1382 }
1383
1384 /* Returns true if statement S1 dominates statement S2. Like
1385 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1386
1387 static bool
reassoc_stmt_dominates_stmt_p(gimple * s1,gimple * s2)1388 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1389 {
1390 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1391
1392 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393 SSA_NAME. Assume it lives at the beginning of function and
1394 thus dominates everything. */
1395 if (!bb1 || s1 == s2)
1396 return true;
1397
1398 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1399 if (!bb2)
1400 return false;
1401
1402 if (bb1 == bb2)
1403 {
1404 /* PHIs in the same basic block are assumed to be
1405 executed all in parallel, if only one stmt is a PHI,
1406 it dominates the other stmt in the same basic block. */
1407 if (gimple_code (s1) == GIMPLE_PHI)
1408 return true;
1409
1410 if (gimple_code (s2) == GIMPLE_PHI)
1411 return false;
1412
1413 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1414
1415 if (gimple_uid (s1) < gimple_uid (s2))
1416 return true;
1417
1418 if (gimple_uid (s1) > gimple_uid (s2))
1419 return false;
1420
1421 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1422 unsigned int uid = gimple_uid (s1);
1423 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1424 {
1425 gimple *s = gsi_stmt (gsi);
1426 if (gimple_uid (s) != uid)
1427 break;
1428 if (s == s2)
1429 return true;
1430 }
1431
1432 return false;
1433 }
1434
1435 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1436 }
1437
1438 /* Insert STMT after INSERT_POINT. */
1439
1440 static void
insert_stmt_after(gimple * stmt,gimple * insert_point)1441 insert_stmt_after (gimple *stmt, gimple *insert_point)
1442 {
1443 gimple_stmt_iterator gsi;
1444 basic_block bb;
1445
1446 if (gimple_code (insert_point) == GIMPLE_PHI)
1447 bb = gimple_bb (insert_point);
1448 else if (!stmt_ends_bb_p (insert_point))
1449 {
1450 gsi = gsi_for_stmt (insert_point);
1451 gimple_set_uid (stmt, gimple_uid (insert_point));
1452 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1453 return;
1454 }
1455 else
1456 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457 thus if it must end a basic block, it should be a call that can
1458 throw, or some assignment that can throw. If it throws, the LHS
1459 of it will not be initialized though, so only valid places using
1460 the SSA_NAME should be dominated by the fallthru edge. */
1461 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1462 gsi = gsi_after_labels (bb);
1463 if (gsi_end_p (gsi))
1464 {
1465 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1466 gimple_set_uid (stmt,
1467 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1468 }
1469 else
1470 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1471 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1472 }
1473
1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475 the result. Places the statement after the definition of either
1476 OP1 or OP2. Returns the new statement. */
1477
1478 static gimple *
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1479 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1480 {
1481 gimple *op1def = NULL, *op2def = NULL;
1482 gimple_stmt_iterator gsi;
1483 tree op;
1484 gassign *sum;
1485
1486 /* Create the addition statement. */
1487 op = make_ssa_name (type);
1488 sum = gimple_build_assign (op, opcode, op1, op2);
1489
1490 /* Find an insertion place and insert. */
1491 if (TREE_CODE (op1) == SSA_NAME)
1492 op1def = SSA_NAME_DEF_STMT (op1);
1493 if (TREE_CODE (op2) == SSA_NAME)
1494 op2def = SSA_NAME_DEF_STMT (op2);
1495 if ((!op1def || gimple_nop_p (op1def))
1496 && (!op2def || gimple_nop_p (op2def)))
1497 {
1498 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1499 if (gsi_end_p (gsi))
1500 {
1501 gimple_stmt_iterator gsi2
1502 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1503 gimple_set_uid (sum,
1504 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1505 }
1506 else
1507 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1508 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1509 }
1510 else
1511 {
1512 gimple *insert_point;
1513 if ((!op1def || gimple_nop_p (op1def))
1514 || (op2def && !gimple_nop_p (op2def)
1515 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1516 insert_point = op2def;
1517 else
1518 insert_point = op1def;
1519 insert_stmt_after (sum, insert_point);
1520 }
1521 update_stmt (sum);
1522
1523 return sum;
1524 }
1525
1526 /* Perform un-distribution of divisions and multiplications.
1527 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528 to (A + B) / X for real X.
1529
1530 The algorithm is organized as follows.
1531
1532 - First we walk the addition chain *OPS looking for summands that
1533 are defined by a multiplication or a real division. This results
1534 in the candidates bitmap with relevant indices into *OPS.
1535
1536 - Second we build the chains of multiplications or divisions for
1537 these candidates, counting the number of occurrences of (operand, code)
1538 pairs in all of the candidates chains.
1539
1540 - Third we sort the (operand, code) pairs by number of occurrence and
1541 process them starting with the pair with the most uses.
1542
1543 * For each such pair we walk the candidates again to build a
1544 second candidate bitmap noting all multiplication/division chains
1545 that have at least one occurrence of (operand, code).
1546
1547 * We build an alternate addition chain only covering these
1548 candidates with one (operand, code) operation removed from their
1549 multiplication/division chain.
1550
1551 * The first candidate gets replaced by the alternate addition chain
1552 multiplied/divided by the operand.
1553
1554 * All candidate chains get disabled for further processing and
1555 processing of (operand, code) pairs continues.
1556
1557 The alternate addition chains built are re-processed by the main
1558 reassociation algorithm which allows optimizing a * x * y + b * y * x
1559 to (a + b ) * x * y in one invocation of the reassociation pass. */
1560
1561 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry * > * ops,struct loop * loop)1562 undistribute_ops_list (enum tree_code opcode,
1563 vec<operand_entry *> *ops, struct loop *loop)
1564 {
1565 unsigned int length = ops->length ();
1566 operand_entry *oe1;
1567 unsigned i, j;
1568 unsigned nr_candidates, nr_candidates2;
1569 sbitmap_iterator sbi0;
1570 vec<operand_entry *> *subops;
1571 bool changed = false;
1572 unsigned int next_oecount_id = 0;
1573
1574 if (length <= 1
1575 || opcode != PLUS_EXPR)
1576 return false;
1577
1578 /* Build a list of candidates to process. */
1579 auto_sbitmap candidates (length);
1580 bitmap_clear (candidates);
1581 nr_candidates = 0;
1582 FOR_EACH_VEC_ELT (*ops, i, oe1)
1583 {
1584 enum tree_code dcode;
1585 gimple *oe1def;
1586
1587 if (TREE_CODE (oe1->op) != SSA_NAME)
1588 continue;
1589 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1590 if (!is_gimple_assign (oe1def))
1591 continue;
1592 dcode = gimple_assign_rhs_code (oe1def);
1593 if ((dcode != MULT_EXPR
1594 && dcode != RDIV_EXPR)
1595 || !is_reassociable_op (oe1def, dcode, loop))
1596 continue;
1597
1598 bitmap_set_bit (candidates, i);
1599 nr_candidates++;
1600 }
1601
1602 if (nr_candidates < 2)
1603 return false;
1604
1605 if (dump_file && (dump_flags & TDF_DETAILS))
1606 {
1607 fprintf (dump_file, "searching for un-distribute opportunities ");
1608 print_generic_expr (dump_file,
1609 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1610 fprintf (dump_file, " %d\n", nr_candidates);
1611 }
1612
1613 /* Build linearized sub-operand lists and the counting table. */
1614 cvec.create (0);
1615
1616 hash_table<oecount_hasher> ctable (15);
1617
1618 /* ??? Macro arguments cannot have multi-argument template types in
1619 them. This typedef is needed to workaround that limitation. */
1620 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1621 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1622 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1623 {
1624 gimple *oedef;
1625 enum tree_code oecode;
1626 unsigned j;
1627
1628 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1629 oecode = gimple_assign_rhs_code (oedef);
1630 linearize_expr_tree (&subops[i], oedef,
1631 associative_tree_code (oecode), false);
1632
1633 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1634 {
1635 oecount c;
1636 int *slot;
1637 int idx;
1638 c.oecode = oecode;
1639 c.cnt = 1;
1640 c.id = next_oecount_id++;
1641 c.op = oe1->op;
1642 cvec.safe_push (c);
1643 idx = cvec.length () + 41;
1644 slot = ctable.find_slot (idx, INSERT);
1645 if (!*slot)
1646 {
1647 *slot = idx;
1648 }
1649 else
1650 {
1651 cvec.pop ();
1652 cvec[*slot - 42].cnt++;
1653 }
1654 }
1655 }
1656
1657 /* Sort the counting table. */
1658 cvec.qsort (oecount_cmp);
1659
1660 if (dump_file && (dump_flags & TDF_DETAILS))
1661 {
1662 oecount *c;
1663 fprintf (dump_file, "Candidates:\n");
1664 FOR_EACH_VEC_ELT (cvec, j, c)
1665 {
1666 fprintf (dump_file, " %u %s: ", c->cnt,
1667 c->oecode == MULT_EXPR
1668 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1669 print_generic_expr (dump_file, c->op);
1670 fprintf (dump_file, "\n");
1671 }
1672 }
1673
1674 /* Process the (operand, code) pairs in order of most occurrence. */
1675 auto_sbitmap candidates2 (length);
1676 while (!cvec.is_empty ())
1677 {
1678 oecount *c = &cvec.last ();
1679 if (c->cnt < 2)
1680 break;
1681
1682 /* Now collect the operands in the outer chain that contain
1683 the common operand in their inner chain. */
1684 bitmap_clear (candidates2);
1685 nr_candidates2 = 0;
1686 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1687 {
1688 gimple *oedef;
1689 enum tree_code oecode;
1690 unsigned j;
1691 tree op = (*ops)[i]->op;
1692
1693 /* If we undistributed in this chain already this may be
1694 a constant. */
1695 if (TREE_CODE (op) != SSA_NAME)
1696 continue;
1697
1698 oedef = SSA_NAME_DEF_STMT (op);
1699 oecode = gimple_assign_rhs_code (oedef);
1700 if (oecode != c->oecode)
1701 continue;
1702
1703 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1704 {
1705 if (oe1->op == c->op)
1706 {
1707 bitmap_set_bit (candidates2, i);
1708 ++nr_candidates2;
1709 break;
1710 }
1711 }
1712 }
1713
1714 if (nr_candidates2 >= 2)
1715 {
1716 operand_entry *oe1, *oe2;
1717 gimple *prod;
1718 int first = bitmap_first_set_bit (candidates2);
1719
1720 /* Build the new addition chain. */
1721 oe1 = (*ops)[first];
1722 if (dump_file && (dump_flags & TDF_DETAILS))
1723 {
1724 fprintf (dump_file, "Building (");
1725 print_generic_expr (dump_file, oe1->op);
1726 }
1727 zero_one_operation (&oe1->op, c->oecode, c->op);
1728 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1729 {
1730 gimple *sum;
1731 oe2 = (*ops)[i];
1732 if (dump_file && (dump_flags & TDF_DETAILS))
1733 {
1734 fprintf (dump_file, " + ");
1735 print_generic_expr (dump_file, oe2->op);
1736 }
1737 zero_one_operation (&oe2->op, c->oecode, c->op);
1738 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1739 oe1->op, oe2->op, opcode);
1740 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1741 oe2->rank = 0;
1742 oe1->op = gimple_get_lhs (sum);
1743 }
1744
1745 /* Apply the multiplication/division. */
1746 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1747 oe1->op, c->op, c->oecode);
1748 if (dump_file && (dump_flags & TDF_DETAILS))
1749 {
1750 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1751 print_generic_expr (dump_file, c->op);
1752 fprintf (dump_file, "\n");
1753 }
1754
1755 /* Record it in the addition chain and disable further
1756 undistribution with this op. */
1757 oe1->op = gimple_assign_lhs (prod);
1758 oe1->rank = get_rank (oe1->op);
1759 subops[first].release ();
1760
1761 changed = true;
1762 }
1763
1764 cvec.pop ();
1765 }
1766
1767 for (i = 0; i < ops->length (); ++i)
1768 subops[i].release ();
1769 free (subops);
1770 cvec.release ();
1771
1772 return changed;
1773 }
1774
1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1776 expression, examine the other OPS to see if any of them are comparisons
1777 of the same values, which we may be able to combine or eliminate.
1778 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1779
1780 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)1781 eliminate_redundant_comparison (enum tree_code opcode,
1782 vec<operand_entry *> *ops,
1783 unsigned int currindex,
1784 operand_entry *curr)
1785 {
1786 tree op1, op2;
1787 enum tree_code lcode, rcode;
1788 gimple *def1, *def2;
1789 int i;
1790 operand_entry *oe;
1791
1792 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1793 return false;
1794
1795 /* Check that CURR is a comparison. */
1796 if (TREE_CODE (curr->op) != SSA_NAME)
1797 return false;
1798 def1 = SSA_NAME_DEF_STMT (curr->op);
1799 if (!is_gimple_assign (def1))
1800 return false;
1801 lcode = gimple_assign_rhs_code (def1);
1802 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1803 return false;
1804 op1 = gimple_assign_rhs1 (def1);
1805 op2 = gimple_assign_rhs2 (def1);
1806
1807 /* Now look for a similar comparison in the remaining OPS. */
1808 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1809 {
1810 tree t;
1811
1812 if (TREE_CODE (oe->op) != SSA_NAME)
1813 continue;
1814 def2 = SSA_NAME_DEF_STMT (oe->op);
1815 if (!is_gimple_assign (def2))
1816 continue;
1817 rcode = gimple_assign_rhs_code (def2);
1818 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1819 continue;
1820
1821 /* If we got here, we have a match. See if we can combine the
1822 two comparisons. */
1823 if (opcode == BIT_IOR_EXPR)
1824 t = maybe_fold_or_comparisons (lcode, op1, op2,
1825 rcode, gimple_assign_rhs1 (def2),
1826 gimple_assign_rhs2 (def2));
1827 else
1828 t = maybe_fold_and_comparisons (lcode, op1, op2,
1829 rcode, gimple_assign_rhs1 (def2),
1830 gimple_assign_rhs2 (def2));
1831 if (!t)
1832 continue;
1833
1834 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1835 always give us a boolean_type_node value back. If the original
1836 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1837 we need to convert. */
1838 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1839 t = fold_convert (TREE_TYPE (curr->op), t);
1840
1841 if (TREE_CODE (t) != INTEGER_CST
1842 && !operand_equal_p (t, curr->op, 0))
1843 {
1844 enum tree_code subcode;
1845 tree newop1, newop2;
1846 if (!COMPARISON_CLASS_P (t))
1847 continue;
1848 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1849 STRIP_USELESS_TYPE_CONVERSION (newop1);
1850 STRIP_USELESS_TYPE_CONVERSION (newop2);
1851 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1852 continue;
1853 }
1854
1855 if (dump_file && (dump_flags & TDF_DETAILS))
1856 {
1857 fprintf (dump_file, "Equivalence: ");
1858 print_generic_expr (dump_file, curr->op);
1859 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1860 print_generic_expr (dump_file, oe->op);
1861 fprintf (dump_file, " -> ");
1862 print_generic_expr (dump_file, t);
1863 fprintf (dump_file, "\n");
1864 }
1865
1866 /* Now we can delete oe, as it has been subsumed by the new combined
1867 expression t. */
1868 ops->ordered_remove (i);
1869 reassociate_stats.ops_eliminated ++;
1870
1871 /* If t is the same as curr->op, we're done. Otherwise we must
1872 replace curr->op with t. Special case is if we got a constant
1873 back, in which case we add it to the end instead of in place of
1874 the current entry. */
1875 if (TREE_CODE (t) == INTEGER_CST)
1876 {
1877 ops->ordered_remove (currindex);
1878 add_to_ops_vec (ops, t);
1879 }
1880 else if (!operand_equal_p (t, curr->op, 0))
1881 {
1882 gimple *sum;
1883 enum tree_code subcode;
1884 tree newop1;
1885 tree newop2;
1886 gcc_assert (COMPARISON_CLASS_P (t));
1887 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1888 STRIP_USELESS_TYPE_CONVERSION (newop1);
1889 STRIP_USELESS_TYPE_CONVERSION (newop2);
1890 gcc_checking_assert (is_gimple_val (newop1)
1891 && is_gimple_val (newop2));
1892 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1893 curr->op = gimple_get_lhs (sum);
1894 }
1895 return true;
1896 }
1897
1898 return false;
1899 }
1900
1901
1902 /* Transform repeated addition of same values into multiply with
1903 constant. */
1904 static bool
transform_add_to_multiply(vec<operand_entry * > * ops)1905 transform_add_to_multiply (vec<operand_entry *> *ops)
1906 {
1907 operand_entry *oe;
1908 tree op = NULL_TREE;
1909 int j;
1910 int i, start = -1, end = 0, count = 0;
1911 auto_vec<std::pair <int, int> > indxs;
1912 bool changed = false;
1913
1914 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1915 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1916 || !flag_unsafe_math_optimizations))
1917 return false;
1918
1919 /* Look for repeated operands. */
1920 FOR_EACH_VEC_ELT (*ops, i, oe)
1921 {
1922 if (start == -1)
1923 {
1924 count = 1;
1925 op = oe->op;
1926 start = i;
1927 }
1928 else if (operand_equal_p (oe->op, op, 0))
1929 {
1930 count++;
1931 end = i;
1932 }
1933 else
1934 {
1935 if (count > 1)
1936 indxs.safe_push (std::make_pair (start, end));
1937 count = 1;
1938 op = oe->op;
1939 start = i;
1940 }
1941 }
1942
1943 if (count > 1)
1944 indxs.safe_push (std::make_pair (start, end));
1945
1946 for (j = indxs.length () - 1; j >= 0; --j)
1947 {
1948 /* Convert repeated operand addition to multiplication. */
1949 start = indxs[j].first;
1950 end = indxs[j].second;
1951 op = (*ops)[start]->op;
1952 count = end - start + 1;
1953 for (i = end; i >= start; --i)
1954 ops->unordered_remove (i);
1955 tree tmp = make_ssa_name (TREE_TYPE (op));
1956 tree cst = build_int_cst (integer_type_node, count);
1957 gassign *mul_stmt
1958 = gimple_build_assign (tmp, MULT_EXPR,
1959 op, fold_convert (TREE_TYPE (op), cst));
1960 gimple_set_visited (mul_stmt, true);
1961 add_to_ops_vec (ops, tmp, mul_stmt);
1962 changed = true;
1963 }
1964
1965 return changed;
1966 }
1967
1968
1969 /* Perform various identities and other optimizations on the list of
1970 operand entries, stored in OPS. The tree code for the binary
1971 operation between all the operands is OPCODE. */
1972
1973 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry * > * ops)1974 optimize_ops_list (enum tree_code opcode,
1975 vec<operand_entry *> *ops)
1976 {
1977 unsigned int length = ops->length ();
1978 unsigned int i;
1979 operand_entry *oe;
1980 operand_entry *oelast = NULL;
1981 bool iterate = false;
1982
1983 if (length == 1)
1984 return;
1985
1986 oelast = ops->last ();
1987
1988 /* If the last two are constants, pop the constants off, merge them
1989 and try the next two. */
1990 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1991 {
1992 operand_entry *oelm1 = (*ops)[length - 2];
1993
1994 if (oelm1->rank == 0
1995 && is_gimple_min_invariant (oelm1->op)
1996 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1997 TREE_TYPE (oelast->op)))
1998 {
1999 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2000 oelm1->op, oelast->op);
2001
2002 if (folded && is_gimple_min_invariant (folded))
2003 {
2004 if (dump_file && (dump_flags & TDF_DETAILS))
2005 fprintf (dump_file, "Merging constants\n");
2006
2007 ops->pop ();
2008 ops->pop ();
2009
2010 add_to_ops_vec (ops, folded);
2011 reassociate_stats.constants_eliminated++;
2012
2013 optimize_ops_list (opcode, ops);
2014 return;
2015 }
2016 }
2017 }
2018
2019 eliminate_using_constants (opcode, ops);
2020 oelast = NULL;
2021
2022 for (i = 0; ops->iterate (i, &oe);)
2023 {
2024 bool done = false;
2025
2026 if (eliminate_not_pairs (opcode, ops, i, oe))
2027 return;
2028 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2029 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2030 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2031 {
2032 if (done)
2033 return;
2034 iterate = true;
2035 oelast = NULL;
2036 continue;
2037 }
2038 oelast = oe;
2039 i++;
2040 }
2041
2042 length = ops->length ();
2043 oelast = ops->last ();
2044
2045 if (iterate)
2046 optimize_ops_list (opcode, ops);
2047 }
2048
2049 /* The following functions are subroutines to optimize_range_tests and allow
2050 it to try to change a logical combination of comparisons into a range
2051 test.
2052
2053 For example, both
2054 X == 2 || X == 5 || X == 3 || X == 4
2055 and
2056 X >= 2 && X <= 5
2057 are converted to
2058 (unsigned) (X - 2) <= 3
2059
2060 For more information see comments above fold_test_range in fold-const.c,
2061 this implementation is for GIMPLE. */
2062
2063 struct range_entry
2064 {
2065 tree exp;
2066 tree low;
2067 tree high;
2068 bool in_p;
2069 bool strict_overflow_p;
2070 unsigned int idx, next;
2071 };
2072
2073 /* This is similar to make_range in fold-const.c, but on top of
2074 GIMPLE instead of trees. If EXP is non-NULL, it should be
2075 an SSA_NAME and STMT argument is ignored, otherwise STMT
2076 argument should be a GIMPLE_COND. */
2077
2078 static void
init_range_entry(struct range_entry * r,tree exp,gimple * stmt)2079 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2080 {
2081 int in_p;
2082 tree low, high;
2083 bool is_bool, strict_overflow_p;
2084
2085 r->exp = NULL_TREE;
2086 r->in_p = false;
2087 r->strict_overflow_p = false;
2088 r->low = NULL_TREE;
2089 r->high = NULL_TREE;
2090 if (exp != NULL_TREE
2091 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2092 return;
2093
2094 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2095 and see if we can refine the range. Some of the cases below may not
2096 happen, but it doesn't seem worth worrying about this. We "continue"
2097 the outer loop when we've changed something; otherwise we "break"
2098 the switch, which will "break" the while. */
2099 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2100 high = low;
2101 in_p = 0;
2102 strict_overflow_p = false;
2103 is_bool = false;
2104 if (exp == NULL_TREE)
2105 is_bool = true;
2106 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2107 {
2108 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2109 is_bool = true;
2110 else
2111 return;
2112 }
2113 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2114 is_bool = true;
2115
2116 while (1)
2117 {
2118 enum tree_code code;
2119 tree arg0, arg1, exp_type;
2120 tree nexp;
2121 location_t loc;
2122
2123 if (exp != NULL_TREE)
2124 {
2125 if (TREE_CODE (exp) != SSA_NAME
2126 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2127 break;
2128
2129 stmt = SSA_NAME_DEF_STMT (exp);
2130 if (!is_gimple_assign (stmt))
2131 break;
2132
2133 code = gimple_assign_rhs_code (stmt);
2134 arg0 = gimple_assign_rhs1 (stmt);
2135 arg1 = gimple_assign_rhs2 (stmt);
2136 exp_type = TREE_TYPE (exp);
2137 }
2138 else
2139 {
2140 code = gimple_cond_code (stmt);
2141 arg0 = gimple_cond_lhs (stmt);
2142 arg1 = gimple_cond_rhs (stmt);
2143 exp_type = boolean_type_node;
2144 }
2145
2146 if (TREE_CODE (arg0) != SSA_NAME
2147 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2148 break;
2149 loc = gimple_location (stmt);
2150 switch (code)
2151 {
2152 case BIT_NOT_EXPR:
2153 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2154 /* Ensure the range is either +[-,0], +[0,0],
2155 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2156 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2157 or similar expression of unconditional true or
2158 false, it should not be negated. */
2159 && ((high && integer_zerop (high))
2160 || (low && integer_onep (low))))
2161 {
2162 in_p = !in_p;
2163 exp = arg0;
2164 continue;
2165 }
2166 break;
2167 case SSA_NAME:
2168 exp = arg0;
2169 continue;
2170 CASE_CONVERT:
2171 if (is_bool)
2172 {
2173 if ((TYPE_PRECISION (exp_type) == 1
2174 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2175 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2176 return;
2177 }
2178 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2179 {
2180 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2181 is_bool = true;
2182 else
2183 return;
2184 }
2185 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2186 is_bool = true;
2187 goto do_default;
2188 case EQ_EXPR:
2189 case NE_EXPR:
2190 case LT_EXPR:
2191 case LE_EXPR:
2192 case GE_EXPR:
2193 case GT_EXPR:
2194 is_bool = true;
2195 /* FALLTHRU */
2196 default:
2197 if (!is_bool)
2198 return;
2199 do_default:
2200 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2201 &low, &high, &in_p,
2202 &strict_overflow_p);
2203 if (nexp != NULL_TREE)
2204 {
2205 exp = nexp;
2206 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2207 continue;
2208 }
2209 break;
2210 }
2211 break;
2212 }
2213 if (is_bool)
2214 {
2215 r->exp = exp;
2216 r->in_p = in_p;
2217 r->low = low;
2218 r->high = high;
2219 r->strict_overflow_p = strict_overflow_p;
2220 }
2221 }
2222
2223 /* Comparison function for qsort. Sort entries
2224 without SSA_NAME exp first, then with SSA_NAMEs sorted
2225 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2226 by increasing ->low and if ->low is the same, by increasing
2227 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2228 maximum. */
2229
2230 static int
range_entry_cmp(const void * a,const void * b)2231 range_entry_cmp (const void *a, const void *b)
2232 {
2233 const struct range_entry *p = (const struct range_entry *) a;
2234 const struct range_entry *q = (const struct range_entry *) b;
2235
2236 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2237 {
2238 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2239 {
2240 /* Group range_entries for the same SSA_NAME together. */
2241 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2242 return -1;
2243 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2244 return 1;
2245 /* If ->low is different, NULL low goes first, then by
2246 ascending low. */
2247 if (p->low != NULL_TREE)
2248 {
2249 if (q->low != NULL_TREE)
2250 {
2251 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2252 p->low, q->low);
2253 if (tem && integer_onep (tem))
2254 return -1;
2255 tem = fold_binary (GT_EXPR, boolean_type_node,
2256 p->low, q->low);
2257 if (tem && integer_onep (tem))
2258 return 1;
2259 }
2260 else
2261 return 1;
2262 }
2263 else if (q->low != NULL_TREE)
2264 return -1;
2265 /* If ->high is different, NULL high goes last, before that by
2266 ascending high. */
2267 if (p->high != NULL_TREE)
2268 {
2269 if (q->high != NULL_TREE)
2270 {
2271 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2272 p->high, q->high);
2273 if (tem && integer_onep (tem))
2274 return -1;
2275 tem = fold_binary (GT_EXPR, boolean_type_node,
2276 p->high, q->high);
2277 if (tem && integer_onep (tem))
2278 return 1;
2279 }
2280 else
2281 return -1;
2282 }
2283 else if (q->high != NULL_TREE)
2284 return 1;
2285 /* If both ranges are the same, sort below by ascending idx. */
2286 }
2287 else
2288 return 1;
2289 }
2290 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2291 return -1;
2292
2293 if (p->idx < q->idx)
2294 return -1;
2295 else
2296 {
2297 gcc_checking_assert (p->idx > q->idx);
2298 return 1;
2299 }
2300 }
2301
2302 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2303 insert needed statements BEFORE or after GSI. */
2304
2305 static tree
force_into_ssa_name(gimple_stmt_iterator * gsi,tree expr,bool before)2306 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2307 {
2308 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2309 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2310 if (TREE_CODE (ret) != SSA_NAME)
2311 {
2312 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2313 if (before)
2314 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2315 else
2316 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2317 ret = gimple_assign_lhs (g);
2318 }
2319 return ret;
2320 }
2321
2322 /* Helper routine of optimize_range_test.
2323 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2324 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2325 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2326 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2327 an array of COUNT pointers to other ranges. Return
2328 true if the range merge has been successful.
2329 If OPCODE is ERROR_MARK, this is called from within
2330 maybe_optimize_range_tests and is performing inter-bb range optimization.
2331 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2332 oe->rank. */
2333
2334 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,struct range_entry ** otherrangep,unsigned int count,enum tree_code opcode,vec<operand_entry * > * ops,tree exp,gimple_seq seq,bool in_p,tree low,tree high,bool strict_overflow_p)2335 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2336 struct range_entry **otherrangep,
2337 unsigned int count, enum tree_code opcode,
2338 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2339 bool in_p, tree low, tree high, bool strict_overflow_p)
2340 {
2341 operand_entry *oe = (*ops)[range->idx];
2342 tree op = oe->op;
2343 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2344 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2345 location_t loc = gimple_location (stmt);
2346 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2347 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2348 in_p, low, high);
2349 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2350 gimple_stmt_iterator gsi;
2351 unsigned int i, uid;
2352
2353 if (tem == NULL_TREE)
2354 return false;
2355
2356 /* If op is default def SSA_NAME, there is no place to insert the
2357 new comparison. Give up, unless we can use OP itself as the
2358 range test. */
2359 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2360 {
2361 if (op == range->exp
2362 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2363 || TREE_CODE (optype) == BOOLEAN_TYPE)
2364 && (op == tem
2365 || (TREE_CODE (tem) == EQ_EXPR
2366 && TREE_OPERAND (tem, 0) == op
2367 && integer_onep (TREE_OPERAND (tem, 1))))
2368 && opcode != BIT_IOR_EXPR
2369 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2370 {
2371 stmt = NULL;
2372 tem = op;
2373 }
2374 else
2375 return false;
2376 }
2377
2378 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2379 warning_at (loc, OPT_Wstrict_overflow,
2380 "assuming signed overflow does not occur "
2381 "when simplifying range test");
2382
2383 if (dump_file && (dump_flags & TDF_DETAILS))
2384 {
2385 struct range_entry *r;
2386 fprintf (dump_file, "Optimizing range tests ");
2387 print_generic_expr (dump_file, range->exp);
2388 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2389 print_generic_expr (dump_file, range->low);
2390 fprintf (dump_file, ", ");
2391 print_generic_expr (dump_file, range->high);
2392 fprintf (dump_file, "]");
2393 for (i = 0; i < count; i++)
2394 {
2395 if (otherrange)
2396 r = otherrange + i;
2397 else
2398 r = otherrangep[i];
2399 if (r->exp
2400 && r->exp != range->exp
2401 && TREE_CODE (r->exp) == SSA_NAME)
2402 {
2403 fprintf (dump_file, " and ");
2404 print_generic_expr (dump_file, r->exp);
2405 }
2406 else
2407 fprintf (dump_file, " and");
2408 fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2409 print_generic_expr (dump_file, r->low);
2410 fprintf (dump_file, ", ");
2411 print_generic_expr (dump_file, r->high);
2412 fprintf (dump_file, "]");
2413 }
2414 fprintf (dump_file, "\n into ");
2415 print_generic_expr (dump_file, tem);
2416 fprintf (dump_file, "\n");
2417 }
2418
2419 if (opcode == BIT_IOR_EXPR
2420 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2421 tem = invert_truthvalue_loc (loc, tem);
2422
2423 tem = fold_convert_loc (loc, optype, tem);
2424 if (stmt)
2425 {
2426 gsi = gsi_for_stmt (stmt);
2427 uid = gimple_uid (stmt);
2428 }
2429 else
2430 {
2431 gsi = gsi_none ();
2432 uid = 0;
2433 }
2434 if (stmt == NULL)
2435 gcc_checking_assert (tem == op);
2436 /* In rare cases range->exp can be equal to lhs of stmt.
2437 In that case we have to insert after the stmt rather then before
2438 it. If stmt is a PHI, insert it at the start of the basic block. */
2439 else if (op != range->exp)
2440 {
2441 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2442 tem = force_into_ssa_name (&gsi, tem, true);
2443 gsi_prev (&gsi);
2444 }
2445 else if (gimple_code (stmt) != GIMPLE_PHI)
2446 {
2447 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2448 tem = force_into_ssa_name (&gsi, tem, false);
2449 }
2450 else
2451 {
2452 gsi = gsi_after_labels (gimple_bb (stmt));
2453 if (!gsi_end_p (gsi))
2454 uid = gimple_uid (gsi_stmt (gsi));
2455 else
2456 {
2457 gsi = gsi_start_bb (gimple_bb (stmt));
2458 uid = 1;
2459 while (!gsi_end_p (gsi))
2460 {
2461 uid = gimple_uid (gsi_stmt (gsi));
2462 gsi_next (&gsi);
2463 }
2464 }
2465 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2466 tem = force_into_ssa_name (&gsi, tem, true);
2467 if (gsi_end_p (gsi))
2468 gsi = gsi_last_bb (gimple_bb (stmt));
2469 else
2470 gsi_prev (&gsi);
2471 }
2472 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2473 if (gimple_uid (gsi_stmt (gsi)))
2474 break;
2475 else
2476 gimple_set_uid (gsi_stmt (gsi), uid);
2477
2478 oe->op = tem;
2479 range->exp = exp;
2480 range->low = low;
2481 range->high = high;
2482 range->in_p = in_p;
2483 range->strict_overflow_p = false;
2484
2485 for (i = 0; i < count; i++)
2486 {
2487 if (otherrange)
2488 range = otherrange + i;
2489 else
2490 range = otherrangep[i];
2491 oe = (*ops)[range->idx];
2492 /* Now change all the other range test immediate uses, so that
2493 those tests will be optimized away. */
2494 if (opcode == ERROR_MARK)
2495 {
2496 if (oe->op)
2497 oe->op = build_int_cst (TREE_TYPE (oe->op),
2498 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2499 else
2500 oe->op = (oe->rank == BIT_IOR_EXPR
2501 ? boolean_false_node : boolean_true_node);
2502 }
2503 else
2504 oe->op = error_mark_node;
2505 range->exp = NULL_TREE;
2506 range->low = NULL_TREE;
2507 range->high = NULL_TREE;
2508 }
2509 return true;
2510 }
2511
2512 /* Optimize X == CST1 || X == CST2
2513 if popcount (CST1 ^ CST2) == 1 into
2514 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2515 Similarly for ranges. E.g.
2516 X != 2 && X != 3 && X != 10 && X != 11
2517 will be transformed by the previous optimization into
2518 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2519 and this loop can transform that into
2520 !(((X & ~8) - 2U) <= 1U). */
2521
2522 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2523 optimize_range_tests_xor (enum tree_code opcode, tree type,
2524 tree lowi, tree lowj, tree highi, tree highj,
2525 vec<operand_entry *> *ops,
2526 struct range_entry *rangei,
2527 struct range_entry *rangej)
2528 {
2529 tree lowxor, highxor, tem, exp;
2530 /* Check lowi ^ lowj == highi ^ highj and
2531 popcount (lowi ^ lowj) == 1. */
2532 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2533 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2534 return false;
2535 if (!integer_pow2p (lowxor))
2536 return false;
2537 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2538 if (!tree_int_cst_equal (lowxor, highxor))
2539 return false;
2540
2541 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2542 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2543 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2544 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2545 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2546 NULL, rangei->in_p, lowj, highj,
2547 rangei->strict_overflow_p
2548 || rangej->strict_overflow_p))
2549 return true;
2550 return false;
2551 }
2552
2553 /* Optimize X == CST1 || X == CST2
2554 if popcount (CST2 - CST1) == 1 into
2555 ((X - CST1) & ~(CST2 - CST1)) == 0.
2556 Similarly for ranges. E.g.
2557 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2558 || X == 75 || X == 45
2559 will be transformed by the previous optimization into
2560 (X - 43U) <= 3U || (X - 75U) <= 3U
2561 and this loop can transform that into
2562 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2563 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2564 optimize_range_tests_diff (enum tree_code opcode, tree type,
2565 tree lowi, tree lowj, tree highi, tree highj,
2566 vec<operand_entry *> *ops,
2567 struct range_entry *rangei,
2568 struct range_entry *rangej)
2569 {
2570 tree tem1, tem2, mask;
2571 /* Check highi - lowi == highj - lowj. */
2572 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2573 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2574 return false;
2575 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2576 if (!tree_int_cst_equal (tem1, tem2))
2577 return false;
2578 /* Check popcount (lowj - lowi) == 1. */
2579 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2580 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2581 return false;
2582 if (!integer_pow2p (tem1))
2583 return false;
2584
2585 type = unsigned_type_for (type);
2586 tem1 = fold_convert (type, tem1);
2587 tem2 = fold_convert (type, tem2);
2588 lowi = fold_convert (type, lowi);
2589 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2590 tem1 = fold_build2 (MINUS_EXPR, type,
2591 fold_convert (type, rangei->exp), lowi);
2592 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2593 lowj = build_int_cst (type, 0);
2594 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2595 NULL, rangei->in_p, lowj, tem2,
2596 rangei->strict_overflow_p
2597 || rangej->strict_overflow_p))
2598 return true;
2599 return false;
2600 }
2601
2602 /* It does some common checks for function optimize_range_tests_xor and
2603 optimize_range_tests_diff.
2604 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2605 Else it calls optimize_range_tests_diff. */
2606
2607 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry * > * ops,struct range_entry * ranges)2608 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2609 bool optimize_xor, vec<operand_entry *> *ops,
2610 struct range_entry *ranges)
2611 {
2612 int i, j;
2613 bool any_changes = false;
2614 for (i = first; i < length; i++)
2615 {
2616 tree lowi, highi, lowj, highj, type, tem;
2617
2618 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2619 continue;
2620 type = TREE_TYPE (ranges[i].exp);
2621 if (!INTEGRAL_TYPE_P (type))
2622 continue;
2623 lowi = ranges[i].low;
2624 if (lowi == NULL_TREE)
2625 lowi = TYPE_MIN_VALUE (type);
2626 highi = ranges[i].high;
2627 if (highi == NULL_TREE)
2628 continue;
2629 for (j = i + 1; j < length && j < i + 64; j++)
2630 {
2631 bool changes;
2632 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2633 continue;
2634 lowj = ranges[j].low;
2635 if (lowj == NULL_TREE)
2636 continue;
2637 highj = ranges[j].high;
2638 if (highj == NULL_TREE)
2639 highj = TYPE_MAX_VALUE (type);
2640 /* Check lowj > highi. */
2641 tem = fold_binary (GT_EXPR, boolean_type_node,
2642 lowj, highi);
2643 if (tem == NULL_TREE || !integer_onep (tem))
2644 continue;
2645 if (optimize_xor)
2646 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2647 highi, highj, ops,
2648 ranges + i, ranges + j);
2649 else
2650 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2651 highi, highj, ops,
2652 ranges + i, ranges + j);
2653 if (changes)
2654 {
2655 any_changes = true;
2656 break;
2657 }
2658 }
2659 }
2660 return any_changes;
2661 }
2662
2663 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2664 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2665 EXP on success, NULL otherwise. */
2666
2667 static tree
extract_bit_test_mask(tree exp,int prec,tree totallow,tree low,tree high,wide_int * mask,tree * totallowp)2668 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2669 wide_int *mask, tree *totallowp)
2670 {
2671 tree tem = int_const_binop (MINUS_EXPR, high, low);
2672 if (tem == NULL_TREE
2673 || TREE_CODE (tem) != INTEGER_CST
2674 || TREE_OVERFLOW (tem)
2675 || tree_int_cst_sgn (tem) == -1
2676 || compare_tree_int (tem, prec) != -1)
2677 return NULL_TREE;
2678
2679 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2680 *mask = wi::shifted_mask (0, max, false, prec);
2681 if (TREE_CODE (exp) == BIT_AND_EXPR
2682 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2683 {
2684 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2685 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2686 if (wi::popcount (msk) == 1
2687 && wi::ltu_p (msk, prec - max))
2688 {
2689 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2690 max += msk.to_uhwi ();
2691 exp = TREE_OPERAND (exp, 0);
2692 if (integer_zerop (low)
2693 && TREE_CODE (exp) == PLUS_EXPR
2694 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2695 {
2696 tree ret = TREE_OPERAND (exp, 0);
2697 STRIP_NOPS (ret);
2698 widest_int bias
2699 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2700 TYPE_PRECISION (TREE_TYPE (low))));
2701 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2702 if (totallowp)
2703 {
2704 *totallowp = tbias;
2705 return ret;
2706 }
2707 else if (!tree_int_cst_lt (totallow, tbias))
2708 return NULL_TREE;
2709 bias = wi::to_widest (tbias);
2710 bias -= wi::to_widest (totallow);
2711 if (bias >= 0 && bias < prec - max)
2712 {
2713 *mask = wi::lshift (*mask, bias);
2714 return ret;
2715 }
2716 }
2717 }
2718 }
2719 if (totallowp)
2720 return exp;
2721 if (!tree_int_cst_lt (totallow, low))
2722 return exp;
2723 tem = int_const_binop (MINUS_EXPR, low, totallow);
2724 if (tem == NULL_TREE
2725 || TREE_CODE (tem) != INTEGER_CST
2726 || TREE_OVERFLOW (tem)
2727 || compare_tree_int (tem, prec - max) == 1)
2728 return NULL_TREE;
2729
2730 *mask = wi::lshift (*mask, wi::to_widest (tem));
2731 return exp;
2732 }
2733
2734 /* Attempt to optimize small range tests using bit test.
2735 E.g.
2736 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2737 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2738 has been by earlier optimizations optimized into:
2739 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2740 As all the 43 through 82 range is less than 64 numbers,
2741 for 64-bit word targets optimize that into:
2742 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2743
2744 static bool
optimize_range_tests_to_bit_test(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)2745 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2746 vec<operand_entry *> *ops,
2747 struct range_entry *ranges)
2748 {
2749 int i, j;
2750 bool any_changes = false;
2751 int prec = GET_MODE_BITSIZE (word_mode);
2752 auto_vec<struct range_entry *, 64> candidates;
2753
2754 for (i = first; i < length - 2; i++)
2755 {
2756 tree lowi, highi, lowj, highj, type;
2757
2758 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2759 continue;
2760 type = TREE_TYPE (ranges[i].exp);
2761 if (!INTEGRAL_TYPE_P (type))
2762 continue;
2763 lowi = ranges[i].low;
2764 if (lowi == NULL_TREE)
2765 lowi = TYPE_MIN_VALUE (type);
2766 highi = ranges[i].high;
2767 if (highi == NULL_TREE)
2768 continue;
2769 wide_int mask;
2770 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2771 highi, &mask, &lowi);
2772 if (exp == NULL_TREE)
2773 continue;
2774 bool strict_overflow_p = ranges[i].strict_overflow_p;
2775 candidates.truncate (0);
2776 int end = MIN (i + 64, length);
2777 for (j = i + 1; j < end; j++)
2778 {
2779 tree exp2;
2780 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2781 continue;
2782 if (ranges[j].exp == exp)
2783 ;
2784 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2785 {
2786 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2787 if (exp2 == exp)
2788 ;
2789 else if (TREE_CODE (exp2) == PLUS_EXPR)
2790 {
2791 exp2 = TREE_OPERAND (exp2, 0);
2792 STRIP_NOPS (exp2);
2793 if (exp2 != exp)
2794 continue;
2795 }
2796 else
2797 continue;
2798 }
2799 else
2800 continue;
2801 lowj = ranges[j].low;
2802 if (lowj == NULL_TREE)
2803 continue;
2804 highj = ranges[j].high;
2805 if (highj == NULL_TREE)
2806 highj = TYPE_MAX_VALUE (type);
2807 wide_int mask2;
2808 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2809 highj, &mask2, NULL);
2810 if (exp2 != exp)
2811 continue;
2812 mask |= mask2;
2813 strict_overflow_p |= ranges[j].strict_overflow_p;
2814 candidates.safe_push (&ranges[j]);
2815 }
2816
2817 /* If we need otherwise 3 or more comparisons, use a bit test. */
2818 if (candidates.length () >= 2)
2819 {
2820 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2821 wi::to_widest (lowi)
2822 + prec - 1 - wi::clz (mask));
2823 operand_entry *oe = (*ops)[ranges[i].idx];
2824 tree op = oe->op;
2825 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2826 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2827 location_t loc = gimple_location (stmt);
2828 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2829
2830 /* See if it isn't cheaper to pretend the minimum value of the
2831 range is 0, if maximum value is small enough.
2832 We can avoid then subtraction of the minimum value, but the
2833 mask constant could be perhaps more expensive. */
2834 if (compare_tree_int (lowi, 0) > 0
2835 && compare_tree_int (high, prec) < 0)
2836 {
2837 int cost_diff;
2838 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2839 rtx reg = gen_raw_REG (word_mode, 10000);
2840 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2841 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2842 GEN_INT (-m)), speed_p);
2843 rtx r = immed_wide_int_const (mask, word_mode);
2844 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2845 word_mode, speed_p);
2846 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2847 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2848 word_mode, speed_p);
2849 if (cost_diff > 0)
2850 {
2851 mask = wi::lshift (mask, m);
2852 lowi = build_zero_cst (TREE_TYPE (lowi));
2853 }
2854 }
2855
2856 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2857 false, lowi, high);
2858 if (tem == NULL_TREE || is_gimple_val (tem))
2859 continue;
2860 tree etype = unsigned_type_for (TREE_TYPE (exp));
2861 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2862 fold_convert_loc (loc, etype, exp),
2863 fold_convert_loc (loc, etype, lowi));
2864 exp = fold_convert_loc (loc, integer_type_node, exp);
2865 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2866 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2867 build_int_cst (word_type, 1), exp);
2868 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2869 wide_int_to_tree (word_type, mask));
2870 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2871 build_zero_cst (word_type));
2872 if (is_gimple_val (exp))
2873 continue;
2874
2875 /* The shift might have undefined behavior if TEM is true,
2876 but reassociate_bb isn't prepared to have basic blocks
2877 split when it is running. So, temporarily emit a code
2878 with BIT_IOR_EXPR instead of &&, and fix it up in
2879 branch_fixup. */
2880 gimple_seq seq;
2881 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2882 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2883 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2884 gimple_seq seq2;
2885 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2886 gimple_seq_add_seq_without_update (&seq, seq2);
2887 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2888 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2889 gimple *g = gimple_build_assign (make_ssa_name (optype),
2890 BIT_IOR_EXPR, tem, exp);
2891 gimple_set_location (g, loc);
2892 gimple_seq_add_stmt_without_update (&seq, g);
2893 exp = gimple_assign_lhs (g);
2894 tree val = build_zero_cst (optype);
2895 if (update_range_test (&ranges[i], NULL, candidates.address (),
2896 candidates.length (), opcode, ops, exp,
2897 seq, false, val, val, strict_overflow_p))
2898 {
2899 any_changes = true;
2900 reassoc_branch_fixups.safe_push (tem);
2901 }
2902 else
2903 gimple_seq_discard (seq);
2904 }
2905 }
2906 return any_changes;
2907 }
2908
2909 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
2910 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
2911
2912 static bool
optimize_range_tests_cmp_bitwise(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)2913 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
2914 vec<operand_entry *> *ops,
2915 struct range_entry *ranges)
2916 {
2917 int i;
2918 unsigned int b;
2919 bool any_changes = false;
2920 auto_vec<int, 128> buckets;
2921 auto_vec<int, 32> chains;
2922 auto_vec<struct range_entry *, 32> candidates;
2923
2924 for (i = first; i < length; i++)
2925 {
2926 if (ranges[i].exp == NULL_TREE
2927 || TREE_CODE (ranges[i].exp) != SSA_NAME
2928 || !ranges[i].in_p
2929 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
2930 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
2931 || ranges[i].low == NULL_TREE
2932 || ranges[i].low != ranges[i].high)
2933 continue;
2934
2935 bool zero_p = integer_zerop (ranges[i].low);
2936 if (!zero_p && !integer_all_onesp (ranges[i].low))
2937 continue;
2938
2939 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
2940 if (buckets.length () <= b)
2941 buckets.safe_grow_cleared (b + 1);
2942 if (chains.length () <= (unsigned) i)
2943 chains.safe_grow (i + 1);
2944 chains[i] = buckets[b];
2945 buckets[b] = i + 1;
2946 }
2947
2948 FOR_EACH_VEC_ELT (buckets, b, i)
2949 if (i && chains[i - 1])
2950 {
2951 int j, k = i;
2952 for (j = chains[i - 1]; j; j = chains[j - 1])
2953 {
2954 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
2955 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
2956 if (reassoc_stmt_dominates_stmt_p (gk, gj))
2957 k = j;
2958 }
2959 tree type1 = TREE_TYPE (ranges[k - 1].exp);
2960 tree type2 = NULL_TREE;
2961 bool strict_overflow_p = false;
2962 candidates.truncate (0);
2963 for (j = i; j; j = chains[j - 1])
2964 {
2965 tree type = TREE_TYPE (ranges[j - 1].exp);
2966 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
2967 if (j == k
2968 || useless_type_conversion_p (type1, type))
2969 ;
2970 else if (type2 == NULL_TREE
2971 || useless_type_conversion_p (type2, type))
2972 {
2973 if (type2 == NULL_TREE)
2974 type2 = type;
2975 candidates.safe_push (&ranges[j - 1]);
2976 }
2977 }
2978 unsigned l = candidates.length ();
2979 for (j = i; j; j = chains[j - 1])
2980 {
2981 tree type = TREE_TYPE (ranges[j - 1].exp);
2982 if (j == k)
2983 continue;
2984 if (useless_type_conversion_p (type1, type))
2985 ;
2986 else if (type2 == NULL_TREE
2987 || useless_type_conversion_p (type2, type))
2988 continue;
2989 candidates.safe_push (&ranges[j - 1]);
2990 }
2991 gimple_seq seq = NULL;
2992 tree op = NULL_TREE;
2993 unsigned int id;
2994 struct range_entry *r;
2995 candidates.safe_push (&ranges[k - 1]);
2996 FOR_EACH_VEC_ELT (candidates, id, r)
2997 {
2998 gimple *g;
2999 if (id == 0)
3000 {
3001 op = r->exp;
3002 continue;
3003 }
3004 if (id == l)
3005 {
3006 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3007 gimple_seq_add_stmt_without_update (&seq, g);
3008 op = gimple_assign_lhs (g);
3009 }
3010 tree type = TREE_TYPE (r->exp);
3011 tree exp = r->exp;
3012 if (id >= l && !useless_type_conversion_p (type1, type))
3013 {
3014 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3015 gimple_seq_add_stmt_without_update (&seq, g);
3016 exp = gimple_assign_lhs (g);
3017 }
3018 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3019 (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3020 op, exp);
3021 gimple_seq_add_stmt_without_update (&seq, g);
3022 op = gimple_assign_lhs (g);
3023 }
3024 candidates.pop ();
3025 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3026 candidates.length (), opcode, ops, op,
3027 seq, true, ranges[k - 1].low,
3028 ranges[k - 1].low, strict_overflow_p))
3029 any_changes = true;
3030 else
3031 gimple_seq_discard (seq);
3032 }
3033
3034 return any_changes;
3035 }
3036
3037 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3038 a >= 0 && a < b into (unsigned) a < (unsigned) b
3039 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3040
3041 static bool
optimize_range_tests_var_bound(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges,basic_block first_bb)3042 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3043 vec<operand_entry *> *ops,
3044 struct range_entry *ranges,
3045 basic_block first_bb)
3046 {
3047 int i;
3048 bool any_changes = false;
3049 hash_map<tree, int> *map = NULL;
3050
3051 for (i = first; i < length; i++)
3052 {
3053 if (ranges[i].exp == NULL_TREE
3054 || TREE_CODE (ranges[i].exp) != SSA_NAME
3055 || !ranges[i].in_p)
3056 continue;
3057
3058 tree type = TREE_TYPE (ranges[i].exp);
3059 if (!INTEGRAL_TYPE_P (type)
3060 || TYPE_UNSIGNED (type)
3061 || ranges[i].low == NULL_TREE
3062 || !integer_zerop (ranges[i].low)
3063 || ranges[i].high != NULL_TREE)
3064 continue;
3065 /* EXP >= 0 here. */
3066 if (map == NULL)
3067 map = new hash_map <tree, int>;
3068 map->put (ranges[i].exp, i);
3069 }
3070
3071 if (map == NULL)
3072 return false;
3073
3074 for (i = 0; i < length; i++)
3075 {
3076 bool in_p = ranges[i].in_p;
3077 if (ranges[i].low == NULL_TREE
3078 || ranges[i].high == NULL_TREE)
3079 continue;
3080 if (!integer_zerop (ranges[i].low)
3081 || !integer_zerop (ranges[i].high))
3082 {
3083 if (ranges[i].exp
3084 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3085 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3086 && integer_onep (ranges[i].low)
3087 && integer_onep (ranges[i].high))
3088 in_p = !in_p;
3089 else
3090 continue;
3091 }
3092
3093 gimple *stmt;
3094 tree_code ccode;
3095 tree rhs1, rhs2;
3096 if (ranges[i].exp)
3097 {
3098 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3099 continue;
3100 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3101 if (!is_gimple_assign (stmt))
3102 continue;
3103 ccode = gimple_assign_rhs_code (stmt);
3104 rhs1 = gimple_assign_rhs1 (stmt);
3105 rhs2 = gimple_assign_rhs2 (stmt);
3106 }
3107 else
3108 {
3109 operand_entry *oe = (*ops)[ranges[i].idx];
3110 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3111 if (gimple_code (stmt) != GIMPLE_COND)
3112 continue;
3113 ccode = gimple_cond_code (stmt);
3114 rhs1 = gimple_cond_lhs (stmt);
3115 rhs2 = gimple_cond_rhs (stmt);
3116 }
3117
3118 if (TREE_CODE (rhs1) != SSA_NAME
3119 || rhs2 == NULL_TREE
3120 || TREE_CODE (rhs2) != SSA_NAME)
3121 continue;
3122
3123 switch (ccode)
3124 {
3125 case GT_EXPR:
3126 case GE_EXPR:
3127 case LT_EXPR:
3128 case LE_EXPR:
3129 break;
3130 default:
3131 continue;
3132 }
3133 if (in_p)
3134 ccode = invert_tree_comparison (ccode, false);
3135 switch (ccode)
3136 {
3137 case GT_EXPR:
3138 case GE_EXPR:
3139 std::swap (rhs1, rhs2);
3140 ccode = swap_tree_comparison (ccode);
3141 break;
3142 case LT_EXPR:
3143 case LE_EXPR:
3144 break;
3145 default:
3146 gcc_unreachable ();
3147 }
3148
3149 int *idx = map->get (rhs1);
3150 if (idx == NULL)
3151 continue;
3152
3153 /* maybe_optimize_range_tests allows statements without side-effects
3154 in the basic blocks as long as they are consumed in the same bb.
3155 Make sure rhs2's def stmt is not among them, otherwise we can't
3156 use safely get_nonzero_bits on it. E.g. in:
3157 # RANGE [-83, 1] NONZERO 173
3158 # k_32 = PHI <k_47(13), k_12(9)>
3159 ...
3160 if (k_32 >= 0)
3161 goto <bb 5>; [26.46%]
3162 else
3163 goto <bb 9>; [73.54%]
3164
3165 <bb 5> [local count: 140323371]:
3166 # RANGE [0, 1] NONZERO 1
3167 _5 = (int) k_32;
3168 # RANGE [0, 4] NONZERO 4
3169 _21 = _5 << 2;
3170 # RANGE [0, 4] NONZERO 4
3171 iftmp.0_44 = (char) _21;
3172 if (k_32 < iftmp.0_44)
3173 goto <bb 6>; [84.48%]
3174 else
3175 goto <bb 9>; [15.52%]
3176 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3177 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3178 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3179 those stmts even for negative k_32 and the value ranges would be no
3180 longer guaranteed and so the optimization would be invalid. */
3181 if (opcode == ERROR_MARK)
3182 {
3183 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3184 basic_block bb2 = gimple_bb (g);
3185 if (bb2
3186 && bb2 != first_bb
3187 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3188 {
3189 /* As an exception, handle a few common cases. */
3190 if (gimple_assign_cast_p (g)
3191 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g)))
3192 && TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g)))
3193 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3194 > TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (g)))))
3195 /* Zero-extension is always ok. */ ;
3196 else if (is_gimple_assign (g)
3197 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3198 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3199 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3200 /* Masking with INTEGER_CST with MSB clear is always ok
3201 too. */ ;
3202 else
3203 continue;
3204 }
3205 }
3206
3207 wide_int nz = get_nonzero_bits (rhs2);
3208 if (wi::neg_p (nz))
3209 continue;
3210
3211 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3212 and RHS2 is known to be RHS2 >= 0. */
3213 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3214
3215 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3216 if ((ranges[*idx].strict_overflow_p
3217 || ranges[i].strict_overflow_p)
3218 && issue_strict_overflow_warning (wc))
3219 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3220 "assuming signed overflow does not occur "
3221 "when simplifying range test");
3222
3223 if (dump_file && (dump_flags & TDF_DETAILS))
3224 {
3225 struct range_entry *r = &ranges[*idx];
3226 fprintf (dump_file, "Optimizing range test ");
3227 print_generic_expr (dump_file, r->exp);
3228 fprintf (dump_file, " +[");
3229 print_generic_expr (dump_file, r->low);
3230 fprintf (dump_file, ", ");
3231 print_generic_expr (dump_file, r->high);
3232 fprintf (dump_file, "] and comparison ");
3233 print_generic_expr (dump_file, rhs1);
3234 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3235 print_generic_expr (dump_file, rhs2);
3236 fprintf (dump_file, "\n into (");
3237 print_generic_expr (dump_file, utype);
3238 fprintf (dump_file, ") ");
3239 print_generic_expr (dump_file, rhs1);
3240 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3241 print_generic_expr (dump_file, utype);
3242 fprintf (dump_file, ") ");
3243 print_generic_expr (dump_file, rhs2);
3244 fprintf (dump_file, "\n");
3245 }
3246
3247 operand_entry *oe = (*ops)[ranges[i].idx];
3248 ranges[i].in_p = 0;
3249 if (opcode == BIT_IOR_EXPR
3250 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3251 {
3252 ranges[i].in_p = 1;
3253 ccode = invert_tree_comparison (ccode, false);
3254 }
3255
3256 unsigned int uid = gimple_uid (stmt);
3257 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3258 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3259 gimple_set_uid (g, uid);
3260 rhs1 = gimple_assign_lhs (g);
3261 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3262 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3263 gimple_set_uid (g, uid);
3264 rhs2 = gimple_assign_lhs (g);
3265 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3266 if (tree_swap_operands_p (rhs1, rhs2))
3267 {
3268 std::swap (rhs1, rhs2);
3269 ccode = swap_tree_comparison (ccode);
3270 }
3271 if (gimple_code (stmt) == GIMPLE_COND)
3272 {
3273 gcond *c = as_a <gcond *> (stmt);
3274 gimple_cond_set_code (c, ccode);
3275 gimple_cond_set_lhs (c, rhs1);
3276 gimple_cond_set_rhs (c, rhs2);
3277 update_stmt (stmt);
3278 }
3279 else
3280 {
3281 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3282 if (!INTEGRAL_TYPE_P (ctype)
3283 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3284 && TYPE_PRECISION (ctype) != 1))
3285 ctype = boolean_type_node;
3286 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3287 gimple_set_uid (g, uid);
3288 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3289 if (oe->op && ctype != TREE_TYPE (oe->op))
3290 {
3291 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3292 NOP_EXPR, gimple_assign_lhs (g));
3293 gimple_set_uid (g, uid);
3294 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3295 }
3296 ranges[i].exp = gimple_assign_lhs (g);
3297 oe->op = ranges[i].exp;
3298 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3299 ranges[i].high = ranges[i].low;
3300 }
3301 ranges[i].strict_overflow_p = false;
3302 oe = (*ops)[ranges[*idx].idx];
3303 /* Now change all the other range test immediate uses, so that
3304 those tests will be optimized away. */
3305 if (opcode == ERROR_MARK)
3306 {
3307 if (oe->op)
3308 oe->op = build_int_cst (TREE_TYPE (oe->op),
3309 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3310 else
3311 oe->op = (oe->rank == BIT_IOR_EXPR
3312 ? boolean_false_node : boolean_true_node);
3313 }
3314 else
3315 oe->op = error_mark_node;
3316 ranges[*idx].exp = NULL_TREE;
3317 ranges[*idx].low = NULL_TREE;
3318 ranges[*idx].high = NULL_TREE;
3319 any_changes = true;
3320 }
3321
3322 delete map;
3323 return any_changes;
3324 }
3325
3326 /* Optimize range tests, similarly how fold_range_test optimizes
3327 it on trees. The tree code for the binary
3328 operation between all the operands is OPCODE.
3329 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3330 maybe_optimize_range_tests for inter-bb range optimization.
3331 In that case if oe->op is NULL, oe->id is bb->index whose
3332 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3333 the actual opcode.
3334 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3335
3336 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry * > * ops,basic_block first_bb)3337 optimize_range_tests (enum tree_code opcode,
3338 vec<operand_entry *> *ops, basic_block first_bb)
3339 {
3340 unsigned int length = ops->length (), i, j, first;
3341 operand_entry *oe;
3342 struct range_entry *ranges;
3343 bool any_changes = false;
3344
3345 if (length == 1)
3346 return false;
3347
3348 ranges = XNEWVEC (struct range_entry, length);
3349 for (i = 0; i < length; i++)
3350 {
3351 oe = (*ops)[i];
3352 ranges[i].idx = i;
3353 init_range_entry (ranges + i, oe->op,
3354 oe->op
3355 ? NULL
3356 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3357 /* For | invert it now, we will invert it again before emitting
3358 the optimized expression. */
3359 if (opcode == BIT_IOR_EXPR
3360 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3361 ranges[i].in_p = !ranges[i].in_p;
3362 }
3363
3364 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3365 for (i = 0; i < length; i++)
3366 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3367 break;
3368
3369 /* Try to merge ranges. */
3370 for (first = i; i < length; i++)
3371 {
3372 tree low = ranges[i].low;
3373 tree high = ranges[i].high;
3374 int in_p = ranges[i].in_p;
3375 bool strict_overflow_p = ranges[i].strict_overflow_p;
3376 int update_fail_count = 0;
3377
3378 for (j = i + 1; j < length; j++)
3379 {
3380 if (ranges[i].exp != ranges[j].exp)
3381 break;
3382 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3383 ranges[j].in_p, ranges[j].low, ranges[j].high))
3384 break;
3385 strict_overflow_p |= ranges[j].strict_overflow_p;
3386 }
3387
3388 if (j == i + 1)
3389 continue;
3390
3391 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3392 opcode, ops, ranges[i].exp, NULL, in_p,
3393 low, high, strict_overflow_p))
3394 {
3395 i = j - 1;
3396 any_changes = true;
3397 }
3398 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3399 while update_range_test would fail. */
3400 else if (update_fail_count == 64)
3401 i = j - 1;
3402 else
3403 ++update_fail_count;
3404 }
3405
3406 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3407 ops, ranges);
3408
3409 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3410 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3411 ops, ranges);
3412 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3413 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3414 ops, ranges);
3415 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3416 ops, ranges);
3417 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3418 ranges, first_bb);
3419
3420 if (any_changes && opcode != ERROR_MARK)
3421 {
3422 j = 0;
3423 FOR_EACH_VEC_ELT (*ops, i, oe)
3424 {
3425 if (oe->op == error_mark_node)
3426 continue;
3427 else if (i != j)
3428 (*ops)[j] = oe;
3429 j++;
3430 }
3431 ops->truncate (j);
3432 }
3433
3434 XDELETEVEC (ranges);
3435 return any_changes;
3436 }
3437
3438 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3439 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3440 otherwise the comparison code. */
3441
3442 static tree_code
ovce_extract_ops(tree var,gassign ** rets,bool * reti)3443 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3444 {
3445 if (TREE_CODE (var) != SSA_NAME)
3446 return ERROR_MARK;
3447
3448 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3449 if (stmt == NULL)
3450 return ERROR_MARK;
3451
3452 /* ??? If we start creating more COND_EXPR, we could perform
3453 this same optimization with them. For now, simplify. */
3454 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3455 return ERROR_MARK;
3456
3457 tree cond = gimple_assign_rhs1 (stmt);
3458 tree_code cmp = TREE_CODE (cond);
3459 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3460 return ERROR_MARK;
3461
3462 /* ??? For now, allow only canonical true and false result vectors.
3463 We could expand this to other constants should the need arise,
3464 but at the moment we don't create them. */
3465 tree t = gimple_assign_rhs2 (stmt);
3466 tree f = gimple_assign_rhs3 (stmt);
3467 bool inv;
3468 if (integer_all_onesp (t))
3469 inv = false;
3470 else if (integer_all_onesp (f))
3471 {
3472 cmp = invert_tree_comparison (cmp, false);
3473 inv = true;
3474 }
3475 else
3476 return ERROR_MARK;
3477 if (!integer_zerop (f))
3478 return ERROR_MARK;
3479
3480 /* Success! */
3481 if (rets)
3482 *rets = stmt;
3483 if (reti)
3484 *reti = inv;
3485 return cmp;
3486 }
3487
3488 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3489 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3490
3491 static bool
optimize_vec_cond_expr(tree_code opcode,vec<operand_entry * > * ops)3492 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3493 {
3494 unsigned int length = ops->length (), i, j;
3495 bool any_changes = false;
3496
3497 if (length == 1)
3498 return false;
3499
3500 for (i = 0; i < length; ++i)
3501 {
3502 tree elt0 = (*ops)[i]->op;
3503
3504 gassign *stmt0;
3505 bool invert;
3506 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3507 if (cmp0 == ERROR_MARK)
3508 continue;
3509
3510 for (j = i + 1; j < length; ++j)
3511 {
3512 tree &elt1 = (*ops)[j]->op;
3513
3514 gassign *stmt1;
3515 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3516 if (cmp1 == ERROR_MARK)
3517 continue;
3518
3519 tree cond0 = gimple_assign_rhs1 (stmt0);
3520 tree x0 = TREE_OPERAND (cond0, 0);
3521 tree y0 = TREE_OPERAND (cond0, 1);
3522
3523 tree cond1 = gimple_assign_rhs1 (stmt1);
3524 tree x1 = TREE_OPERAND (cond1, 0);
3525 tree y1 = TREE_OPERAND (cond1, 1);
3526
3527 tree comb;
3528 if (opcode == BIT_AND_EXPR)
3529 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3530 else if (opcode == BIT_IOR_EXPR)
3531 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3532 else
3533 gcc_unreachable ();
3534 if (comb == NULL)
3535 continue;
3536
3537 /* Success! */
3538 if (dump_file && (dump_flags & TDF_DETAILS))
3539 {
3540 fprintf (dump_file, "Transforming ");
3541 print_generic_expr (dump_file, cond0);
3542 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3543 print_generic_expr (dump_file, cond1);
3544 fprintf (dump_file, " into ");
3545 print_generic_expr (dump_file, comb);
3546 fputc ('\n', dump_file);
3547 }
3548
3549 gimple_assign_set_rhs1 (stmt0, comb);
3550 if (invert)
3551 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3552 *gimple_assign_rhs3_ptr (stmt0));
3553 update_stmt (stmt0);
3554
3555 elt1 = error_mark_node;
3556 any_changes = true;
3557 }
3558 }
3559
3560 if (any_changes)
3561 {
3562 operand_entry *oe;
3563 j = 0;
3564 FOR_EACH_VEC_ELT (*ops, i, oe)
3565 {
3566 if (oe->op == error_mark_node)
3567 continue;
3568 else if (i != j)
3569 (*ops)[j] = oe;
3570 j++;
3571 }
3572 ops->truncate (j);
3573 }
3574
3575 return any_changes;
3576 }
3577
3578 /* Return true if STMT is a cast like:
3579 <bb N>:
3580 ...
3581 _123 = (int) _234;
3582
3583 <bb M>:
3584 # _345 = PHI <_123(N), 1(...), 1(...)>
3585 where _234 has bool type, _123 has single use and
3586 bb N has a single successor M. This is commonly used in
3587 the last block of a range test.
3588
3589 Also Return true if STMT is tcc_compare like:
3590 <bb N>:
3591 ...
3592 _234 = a_2(D) == 2;
3593
3594 <bb M>:
3595 # _345 = PHI <_234(N), 1(...), 1(...)>
3596 _346 = (int) _345;
3597 where _234 has booltype, single use and
3598 bb N has a single successor M. This is commonly used in
3599 the last block of a range test. */
3600
3601 static bool
final_range_test_p(gimple * stmt)3602 final_range_test_p (gimple *stmt)
3603 {
3604 basic_block bb, rhs_bb, lhs_bb;
3605 edge e;
3606 tree lhs, rhs;
3607 use_operand_p use_p;
3608 gimple *use_stmt;
3609
3610 if (!gimple_assign_cast_p (stmt)
3611 && (!is_gimple_assign (stmt)
3612 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3613 != tcc_comparison)))
3614 return false;
3615 bb = gimple_bb (stmt);
3616 if (!single_succ_p (bb))
3617 return false;
3618 e = single_succ_edge (bb);
3619 if (e->flags & EDGE_COMPLEX)
3620 return false;
3621
3622 lhs = gimple_assign_lhs (stmt);
3623 rhs = gimple_assign_rhs1 (stmt);
3624 if (gimple_assign_cast_p (stmt)
3625 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3626 || TREE_CODE (rhs) != SSA_NAME
3627 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3628 return false;
3629
3630 if (!gimple_assign_cast_p (stmt)
3631 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3632 return false;
3633
3634 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3635 if (!single_imm_use (lhs, &use_p, &use_stmt))
3636 return false;
3637
3638 if (gimple_code (use_stmt) != GIMPLE_PHI
3639 || gimple_bb (use_stmt) != e->dest)
3640 return false;
3641
3642 /* And that the rhs is defined in the same loop. */
3643 if (gimple_assign_cast_p (stmt))
3644 {
3645 if (TREE_CODE (rhs) != SSA_NAME
3646 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3647 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3648 return false;
3649 }
3650 else
3651 {
3652 if (TREE_CODE (lhs) != SSA_NAME
3653 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3654 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3655 return false;
3656 }
3657
3658 return true;
3659 }
3660
3661 /* Return true if BB is suitable basic block for inter-bb range test
3662 optimization. If BACKWARD is true, BB should be the only predecessor
3663 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3664 or compared with to find a common basic block to which all conditions
3665 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3666 be the only predecessor of BB. */
3667
3668 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool backward)3669 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3670 bool backward)
3671 {
3672 edge_iterator ei, ei2;
3673 edge e, e2;
3674 gimple *stmt;
3675 gphi_iterator gsi;
3676 bool other_edge_seen = false;
3677 bool is_cond;
3678
3679 if (test_bb == bb)
3680 return false;
3681 /* Check last stmt first. */
3682 stmt = last_stmt (bb);
3683 if (stmt == NULL
3684 || (gimple_code (stmt) != GIMPLE_COND
3685 && (backward || !final_range_test_p (stmt)))
3686 || gimple_visited_p (stmt)
3687 || stmt_could_throw_p (stmt)
3688 || *other_bb == bb)
3689 return false;
3690 is_cond = gimple_code (stmt) == GIMPLE_COND;
3691 if (is_cond)
3692 {
3693 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3694 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3695 to *OTHER_BB (if not set yet, try to find it out). */
3696 if (EDGE_COUNT (bb->succs) != 2)
3697 return false;
3698 FOR_EACH_EDGE (e, ei, bb->succs)
3699 {
3700 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3701 return false;
3702 if (e->dest == test_bb)
3703 {
3704 if (backward)
3705 continue;
3706 else
3707 return false;
3708 }
3709 if (e->dest == bb)
3710 return false;
3711 if (*other_bb == NULL)
3712 {
3713 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3714 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3715 return false;
3716 else if (e->dest == e2->dest)
3717 *other_bb = e->dest;
3718 if (*other_bb == NULL)
3719 return false;
3720 }
3721 if (e->dest == *other_bb)
3722 other_edge_seen = true;
3723 else if (backward)
3724 return false;
3725 }
3726 if (*other_bb == NULL || !other_edge_seen)
3727 return false;
3728 }
3729 else if (single_succ (bb) != *other_bb)
3730 return false;
3731
3732 /* Now check all PHIs of *OTHER_BB. */
3733 e = find_edge (bb, *other_bb);
3734 e2 = find_edge (test_bb, *other_bb);
3735 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3736 {
3737 gphi *phi = gsi.phi ();
3738 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3739 corresponding to BB and TEST_BB predecessor must be the same. */
3740 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3741 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3742 {
3743 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3744 one of the PHIs should have the lhs of the last stmt in
3745 that block as PHI arg and that PHI should have 0 or 1
3746 corresponding to it in all other range test basic blocks
3747 considered. */
3748 if (!is_cond)
3749 {
3750 if (gimple_phi_arg_def (phi, e->dest_idx)
3751 == gimple_assign_lhs (stmt)
3752 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3753 || integer_onep (gimple_phi_arg_def (phi,
3754 e2->dest_idx))))
3755 continue;
3756 }
3757 else
3758 {
3759 gimple *test_last = last_stmt (test_bb);
3760 if (gimple_code (test_last) != GIMPLE_COND
3761 && gimple_phi_arg_def (phi, e2->dest_idx)
3762 == gimple_assign_lhs (test_last)
3763 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3764 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3765 continue;
3766 }
3767
3768 return false;
3769 }
3770 }
3771 return true;
3772 }
3773
3774 /* Return true if BB doesn't have side-effects that would disallow
3775 range test optimization, all SSA_NAMEs set in the bb are consumed
3776 in the bb and there are no PHIs. */
3777
3778 static bool
no_side_effect_bb(basic_block bb)3779 no_side_effect_bb (basic_block bb)
3780 {
3781 gimple_stmt_iterator gsi;
3782 gimple *last;
3783
3784 if (!gimple_seq_empty_p (phi_nodes (bb)))
3785 return false;
3786 last = last_stmt (bb);
3787 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3788 {
3789 gimple *stmt = gsi_stmt (gsi);
3790 tree lhs;
3791 imm_use_iterator imm_iter;
3792 use_operand_p use_p;
3793
3794 if (is_gimple_debug (stmt))
3795 continue;
3796 if (gimple_has_side_effects (stmt))
3797 return false;
3798 if (stmt == last)
3799 return true;
3800 if (!is_gimple_assign (stmt))
3801 return false;
3802 lhs = gimple_assign_lhs (stmt);
3803 if (TREE_CODE (lhs) != SSA_NAME)
3804 return false;
3805 if (gimple_assign_rhs_could_trap_p (stmt))
3806 return false;
3807 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3808 {
3809 gimple *use_stmt = USE_STMT (use_p);
3810 if (is_gimple_debug (use_stmt))
3811 continue;
3812 if (gimple_bb (use_stmt) != bb)
3813 return false;
3814 }
3815 }
3816 return false;
3817 }
3818
3819 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3820 return true and fill in *OPS recursively. */
3821
3822 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry * > * ops,struct loop * loop)3823 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3824 struct loop *loop)
3825 {
3826 gimple *stmt = SSA_NAME_DEF_STMT (var);
3827 tree rhs[2];
3828 int i;
3829
3830 if (!is_reassociable_op (stmt, code, loop))
3831 return false;
3832
3833 rhs[0] = gimple_assign_rhs1 (stmt);
3834 rhs[1] = gimple_assign_rhs2 (stmt);
3835 gimple_set_visited (stmt, true);
3836 for (i = 0; i < 2; i++)
3837 if (TREE_CODE (rhs[i]) == SSA_NAME
3838 && !get_ops (rhs[i], code, ops, loop)
3839 && has_single_use (rhs[i]))
3840 {
3841 operand_entry *oe = operand_entry_pool.allocate ();
3842
3843 oe->op = rhs[i];
3844 oe->rank = code;
3845 oe->id = 0;
3846 oe->count = 1;
3847 oe->stmt_to_insert = NULL;
3848 ops->safe_push (oe);
3849 }
3850 return true;
3851 }
3852
3853 /* Find the ops that were added by get_ops starting from VAR, see if
3854 they were changed during update_range_test and if yes, create new
3855 stmts. */
3856
3857 static tree
update_ops(tree var,enum tree_code code,vec<operand_entry * > ops,unsigned int * pidx,struct loop * loop)3858 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3859 unsigned int *pidx, struct loop *loop)
3860 {
3861 gimple *stmt = SSA_NAME_DEF_STMT (var);
3862 tree rhs[4];
3863 int i;
3864
3865 if (!is_reassociable_op (stmt, code, loop))
3866 return NULL;
3867
3868 rhs[0] = gimple_assign_rhs1 (stmt);
3869 rhs[1] = gimple_assign_rhs2 (stmt);
3870 rhs[2] = rhs[0];
3871 rhs[3] = rhs[1];
3872 for (i = 0; i < 2; i++)
3873 if (TREE_CODE (rhs[i]) == SSA_NAME)
3874 {
3875 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3876 if (rhs[2 + i] == NULL_TREE)
3877 {
3878 if (has_single_use (rhs[i]))
3879 rhs[2 + i] = ops[(*pidx)++]->op;
3880 else
3881 rhs[2 + i] = rhs[i];
3882 }
3883 }
3884 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3885 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3886 {
3887 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3888 var = make_ssa_name (TREE_TYPE (var));
3889 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3890 rhs[2], rhs[3]);
3891 gimple_set_uid (g, gimple_uid (stmt));
3892 gimple_set_visited (g, true);
3893 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3894 }
3895 return var;
3896 }
3897
3898 /* Structure to track the initial value passed to get_ops and
3899 the range in the ops vector for each basic block. */
3900
3901 struct inter_bb_range_test_entry
3902 {
3903 tree op;
3904 unsigned int first_idx, last_idx;
3905 };
3906
3907 /* Inter-bb range test optimization.
3908
3909 Returns TRUE if a gimple conditional is optimized to a true/false,
3910 otherwise return FALSE.
3911
3912 This indicates to the caller that it should run a CFG cleanup pass
3913 once reassociation is completed. */
3914
3915 static bool
maybe_optimize_range_tests(gimple * stmt)3916 maybe_optimize_range_tests (gimple *stmt)
3917 {
3918 basic_block first_bb = gimple_bb (stmt);
3919 basic_block last_bb = first_bb;
3920 basic_block other_bb = NULL;
3921 basic_block bb;
3922 edge_iterator ei;
3923 edge e;
3924 auto_vec<operand_entry *> ops;
3925 auto_vec<inter_bb_range_test_entry> bbinfo;
3926 bool any_changes = false;
3927 bool cfg_cleanup_needed = false;
3928
3929 /* Consider only basic blocks that end with GIMPLE_COND or
3930 a cast statement satisfying final_range_test_p. All
3931 but the last bb in the first_bb .. last_bb range
3932 should end with GIMPLE_COND. */
3933 if (gimple_code (stmt) == GIMPLE_COND)
3934 {
3935 if (EDGE_COUNT (first_bb->succs) != 2)
3936 return cfg_cleanup_needed;
3937 }
3938 else if (final_range_test_p (stmt))
3939 other_bb = single_succ (first_bb);
3940 else
3941 return cfg_cleanup_needed;
3942
3943 if (stmt_could_throw_p (stmt))
3944 return cfg_cleanup_needed;
3945
3946 /* As relative ordering of post-dominator sons isn't fixed,
3947 maybe_optimize_range_tests can be called first on any
3948 bb in the range we want to optimize. So, start searching
3949 backwards, if first_bb can be set to a predecessor. */
3950 while (single_pred_p (first_bb))
3951 {
3952 basic_block pred_bb = single_pred (first_bb);
3953 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3954 break;
3955 if (!no_side_effect_bb (first_bb))
3956 break;
3957 first_bb = pred_bb;
3958 }
3959 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3960 Before starting forward search in last_bb successors, find
3961 out the other_bb. */
3962 if (first_bb == last_bb)
3963 {
3964 other_bb = NULL;
3965 /* As non-GIMPLE_COND last stmt always terminates the range,
3966 if forward search didn't discover anything, just give up. */
3967 if (gimple_code (stmt) != GIMPLE_COND)
3968 return cfg_cleanup_needed;
3969 /* Look at both successors. Either it ends with a GIMPLE_COND
3970 and satisfies suitable_cond_bb, or ends with a cast and
3971 other_bb is that cast's successor. */
3972 FOR_EACH_EDGE (e, ei, first_bb->succs)
3973 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3974 || e->dest == first_bb)
3975 return cfg_cleanup_needed;
3976 else if (single_pred_p (e->dest))
3977 {
3978 stmt = last_stmt (e->dest);
3979 if (stmt
3980 && gimple_code (stmt) == GIMPLE_COND
3981 && EDGE_COUNT (e->dest->succs) == 2)
3982 {
3983 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3984 break;
3985 else
3986 other_bb = NULL;
3987 }
3988 else if (stmt
3989 && final_range_test_p (stmt)
3990 && find_edge (first_bb, single_succ (e->dest)))
3991 {
3992 other_bb = single_succ (e->dest);
3993 if (other_bb == first_bb)
3994 other_bb = NULL;
3995 }
3996 }
3997 if (other_bb == NULL)
3998 return cfg_cleanup_needed;
3999 }
4000 /* Now do the forward search, moving last_bb to successor bbs
4001 that aren't other_bb. */
4002 while (EDGE_COUNT (last_bb->succs) == 2)
4003 {
4004 FOR_EACH_EDGE (e, ei, last_bb->succs)
4005 if (e->dest != other_bb)
4006 break;
4007 if (e == NULL)
4008 break;
4009 if (!single_pred_p (e->dest))
4010 break;
4011 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4012 break;
4013 if (!no_side_effect_bb (e->dest))
4014 break;
4015 last_bb = e->dest;
4016 }
4017 if (first_bb == last_bb)
4018 return cfg_cleanup_needed;
4019 /* Here basic blocks first_bb through last_bb's predecessor
4020 end with GIMPLE_COND, all of them have one of the edges to
4021 other_bb and another to another block in the range,
4022 all blocks except first_bb don't have side-effects and
4023 last_bb ends with either GIMPLE_COND, or cast satisfying
4024 final_range_test_p. */
4025 for (bb = last_bb; ; bb = single_pred (bb))
4026 {
4027 enum tree_code code;
4028 tree lhs, rhs;
4029 inter_bb_range_test_entry bb_ent;
4030
4031 bb_ent.op = NULL_TREE;
4032 bb_ent.first_idx = ops.length ();
4033 bb_ent.last_idx = bb_ent.first_idx;
4034 e = find_edge (bb, other_bb);
4035 stmt = last_stmt (bb);
4036 gimple_set_visited (stmt, true);
4037 if (gimple_code (stmt) != GIMPLE_COND)
4038 {
4039 use_operand_p use_p;
4040 gimple *phi;
4041 edge e2;
4042 unsigned int d;
4043
4044 lhs = gimple_assign_lhs (stmt);
4045 rhs = gimple_assign_rhs1 (stmt);
4046 gcc_assert (bb == last_bb);
4047
4048 /* stmt is
4049 _123 = (int) _234;
4050 OR
4051 _234 = a_2(D) == 2;
4052
4053 followed by:
4054 <bb M>:
4055 # _345 = PHI <_123(N), 1(...), 1(...)>
4056
4057 or 0 instead of 1. If it is 0, the _234
4058 range test is anded together with all the
4059 other range tests, if it is 1, it is ored with
4060 them. */
4061 single_imm_use (lhs, &use_p, &phi);
4062 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4063 e2 = find_edge (first_bb, other_bb);
4064 d = e2->dest_idx;
4065 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4066 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4067 code = BIT_AND_EXPR;
4068 else
4069 {
4070 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4071 code = BIT_IOR_EXPR;
4072 }
4073
4074 /* If _234 SSA_NAME_DEF_STMT is
4075 _234 = _567 | _789;
4076 (or &, corresponding to 1/0 in the phi arguments,
4077 push into ops the individual range test arguments
4078 of the bitwise or resp. and, recursively. */
4079 if (TREE_CODE (rhs) == SSA_NAME
4080 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4081 != tcc_comparison)
4082 && !get_ops (rhs, code, &ops,
4083 loop_containing_stmt (stmt))
4084 && has_single_use (rhs))
4085 {
4086 /* Otherwise, push the _234 range test itself. */
4087 operand_entry *oe = operand_entry_pool.allocate ();
4088
4089 oe->op = rhs;
4090 oe->rank = code;
4091 oe->id = 0;
4092 oe->count = 1;
4093 oe->stmt_to_insert = NULL;
4094 ops.safe_push (oe);
4095 bb_ent.last_idx++;
4096 bb_ent.op = rhs;
4097 }
4098 else if (is_gimple_assign (stmt)
4099 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4100 == tcc_comparison)
4101 && !get_ops (lhs, code, &ops,
4102 loop_containing_stmt (stmt))
4103 && has_single_use (lhs))
4104 {
4105 operand_entry *oe = operand_entry_pool.allocate ();
4106 oe->op = lhs;
4107 oe->rank = code;
4108 oe->id = 0;
4109 oe->count = 1;
4110 ops.safe_push (oe);
4111 bb_ent.last_idx++;
4112 bb_ent.op = lhs;
4113 }
4114 else
4115 {
4116 bb_ent.last_idx = ops.length ();
4117 bb_ent.op = rhs;
4118 }
4119 bbinfo.safe_push (bb_ent);
4120 continue;
4121 }
4122 /* Otherwise stmt is GIMPLE_COND. */
4123 code = gimple_cond_code (stmt);
4124 lhs = gimple_cond_lhs (stmt);
4125 rhs = gimple_cond_rhs (stmt);
4126 if (TREE_CODE (lhs) == SSA_NAME
4127 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4128 && ((code != EQ_EXPR && code != NE_EXPR)
4129 || rhs != boolean_false_node
4130 /* Either push into ops the individual bitwise
4131 or resp. and operands, depending on which
4132 edge is other_bb. */
4133 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4134 ^ (code == EQ_EXPR))
4135 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4136 loop_containing_stmt (stmt))))
4137 {
4138 /* Or push the GIMPLE_COND stmt itself. */
4139 operand_entry *oe = operand_entry_pool.allocate ();
4140
4141 oe->op = NULL;
4142 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4143 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4144 /* oe->op = NULL signs that there is no SSA_NAME
4145 for the range test, and oe->id instead is the
4146 basic block number, at which's end the GIMPLE_COND
4147 is. */
4148 oe->id = bb->index;
4149 oe->count = 1;
4150 oe->stmt_to_insert = NULL;
4151 ops.safe_push (oe);
4152 bb_ent.op = NULL;
4153 bb_ent.last_idx++;
4154 }
4155 else if (ops.length () > bb_ent.first_idx)
4156 {
4157 bb_ent.op = lhs;
4158 bb_ent.last_idx = ops.length ();
4159 }
4160 bbinfo.safe_push (bb_ent);
4161 if (bb == first_bb)
4162 break;
4163 }
4164 if (ops.length () > 1)
4165 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4166 if (any_changes)
4167 {
4168 unsigned int idx, max_idx = 0;
4169 /* update_ops relies on has_single_use predicates returning the
4170 same values as it did during get_ops earlier. Additionally it
4171 never removes statements, only adds new ones and it should walk
4172 from the single imm use and check the predicate already before
4173 making those changes.
4174 On the other side, the handling of GIMPLE_COND directly can turn
4175 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4176 it needs to be done in a separate loop afterwards. */
4177 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4178 {
4179 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4180 && bbinfo[idx].op != NULL_TREE)
4181 {
4182 tree new_op;
4183
4184 max_idx = idx;
4185 stmt = last_stmt (bb);
4186 new_op = update_ops (bbinfo[idx].op,
4187 (enum tree_code)
4188 ops[bbinfo[idx].first_idx]->rank,
4189 ops, &bbinfo[idx].first_idx,
4190 loop_containing_stmt (stmt));
4191 if (new_op == NULL_TREE)
4192 {
4193 gcc_assert (bb == last_bb);
4194 new_op = ops[bbinfo[idx].first_idx++]->op;
4195 }
4196 if (bbinfo[idx].op != new_op)
4197 {
4198 imm_use_iterator iter;
4199 use_operand_p use_p;
4200 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4201
4202 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4203 if (is_gimple_debug (use_stmt))
4204 continue;
4205 else if (gimple_code (use_stmt) == GIMPLE_COND
4206 || gimple_code (use_stmt) == GIMPLE_PHI)
4207 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4208 SET_USE (use_p, new_op);
4209 else if ((is_gimple_assign (use_stmt)
4210 && (TREE_CODE_CLASS
4211 (gimple_assign_rhs_code (use_stmt))
4212 == tcc_comparison)))
4213 cast_or_tcc_cmp_stmt = use_stmt;
4214 else if (gimple_assign_cast_p (use_stmt))
4215 cast_or_tcc_cmp_stmt = use_stmt;
4216 else
4217 gcc_unreachable ();
4218
4219 if (cast_or_tcc_cmp_stmt)
4220 {
4221 gcc_assert (bb == last_bb);
4222 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4223 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4224 enum tree_code rhs_code
4225 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4226 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4227 : CONVERT_EXPR;
4228 gassign *g;
4229 if (is_gimple_min_invariant (new_op))
4230 {
4231 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4232 g = gimple_build_assign (new_lhs, new_op);
4233 }
4234 else
4235 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4236 gimple_stmt_iterator gsi
4237 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4238 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4239 gimple_set_visited (g, true);
4240 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4241 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4242 if (is_gimple_debug (use_stmt))
4243 continue;
4244 else if (gimple_code (use_stmt) == GIMPLE_COND
4245 || gimple_code (use_stmt) == GIMPLE_PHI)
4246 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4247 SET_USE (use_p, new_lhs);
4248 else
4249 gcc_unreachable ();
4250 }
4251 }
4252 }
4253 if (bb == first_bb)
4254 break;
4255 }
4256 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4257 {
4258 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4259 && bbinfo[idx].op == NULL_TREE
4260 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4261 {
4262 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4263
4264 if (idx > max_idx)
4265 max_idx = idx;
4266
4267 /* If we collapse the conditional to a true/false
4268 condition, then bubble that knowledge up to our caller. */
4269 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4270 {
4271 gimple_cond_make_false (cond_stmt);
4272 cfg_cleanup_needed = true;
4273 }
4274 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4275 {
4276 gimple_cond_make_true (cond_stmt);
4277 cfg_cleanup_needed = true;
4278 }
4279 else
4280 {
4281 gimple_cond_set_code (cond_stmt, NE_EXPR);
4282 gimple_cond_set_lhs (cond_stmt,
4283 ops[bbinfo[idx].first_idx]->op);
4284 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4285 }
4286 update_stmt (cond_stmt);
4287 }
4288 if (bb == first_bb)
4289 break;
4290 }
4291
4292 /* The above changes could result in basic blocks after the first
4293 modified one, up to and including last_bb, to be executed even if
4294 they would not be in the original program. If the value ranges of
4295 assignment lhs' in those bbs were dependent on the conditions
4296 guarding those basic blocks which now can change, the VRs might
4297 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4298 are only used within the same bb, it should be not a big deal if
4299 we just reset all the VRs in those bbs. See PR68671. */
4300 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4301 reset_flow_sensitive_info_in_bb (bb);
4302 }
4303 return cfg_cleanup_needed;
4304 }
4305
4306 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4307 of STMT in it's operands. This is also known as a "destructive
4308 update" operation. */
4309
4310 static bool
is_phi_for_stmt(gimple * stmt,tree operand)4311 is_phi_for_stmt (gimple *stmt, tree operand)
4312 {
4313 gimple *def_stmt;
4314 gphi *def_phi;
4315 tree lhs;
4316 use_operand_p arg_p;
4317 ssa_op_iter i;
4318
4319 if (TREE_CODE (operand) != SSA_NAME)
4320 return false;
4321
4322 lhs = gimple_assign_lhs (stmt);
4323
4324 def_stmt = SSA_NAME_DEF_STMT (operand);
4325 def_phi = dyn_cast <gphi *> (def_stmt);
4326 if (!def_phi)
4327 return false;
4328
4329 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4330 if (lhs == USE_FROM_PTR (arg_p))
4331 return true;
4332 return false;
4333 }
4334
4335 /* Remove def stmt of VAR if VAR has zero uses and recurse
4336 on rhs1 operand if so. */
4337
4338 static void
remove_visited_stmt_chain(tree var)4339 remove_visited_stmt_chain (tree var)
4340 {
4341 gimple *stmt;
4342 gimple_stmt_iterator gsi;
4343
4344 while (1)
4345 {
4346 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4347 return;
4348 stmt = SSA_NAME_DEF_STMT (var);
4349 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4350 {
4351 var = gimple_assign_rhs1 (stmt);
4352 gsi = gsi_for_stmt (stmt);
4353 reassoc_remove_stmt (&gsi);
4354 release_defs (stmt);
4355 }
4356 else
4357 return;
4358 }
4359 }
4360
4361 /* This function checks three consequtive operands in
4362 passed operands vector OPS starting from OPINDEX and
4363 swaps two operands if it is profitable for binary operation
4364 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4365
4366 We pair ops with the same rank if possible.
4367
4368 The alternative we try is to see if STMT is a destructive
4369 update style statement, which is like:
4370 b = phi (a, ...)
4371 a = c + b;
4372 In that case, we want to use the destructive update form to
4373 expose the possible vectorizer sum reduction opportunity.
4374 In that case, the third operand will be the phi node. This
4375 check is not performed if STMT is null.
4376
4377 We could, of course, try to be better as noted above, and do a
4378 lot of work to try to find these opportunities in >3 operand
4379 cases, but it is unlikely to be worth it. */
4380
4381 static void
swap_ops_for_binary_stmt(vec<operand_entry * > ops,unsigned int opindex,gimple * stmt)4382 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4383 unsigned int opindex, gimple *stmt)
4384 {
4385 operand_entry *oe1, *oe2, *oe3;
4386
4387 oe1 = ops[opindex];
4388 oe2 = ops[opindex + 1];
4389 oe3 = ops[opindex + 2];
4390
4391 if ((oe1->rank == oe2->rank
4392 && oe2->rank != oe3->rank)
4393 || (stmt && is_phi_for_stmt (stmt, oe3->op)
4394 && !is_phi_for_stmt (stmt, oe1->op)
4395 && !is_phi_for_stmt (stmt, oe2->op)))
4396 std::swap (*oe1, *oe3);
4397 else if ((oe1->rank == oe3->rank
4398 && oe2->rank != oe3->rank)
4399 || (stmt && is_phi_for_stmt (stmt, oe2->op)
4400 && !is_phi_for_stmt (stmt, oe1->op)
4401 && !is_phi_for_stmt (stmt, oe3->op)))
4402 std::swap (*oe1, *oe2);
4403 }
4404
4405 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4406 two definitions, otherwise return STMT. */
4407
4408 static inline gimple *
find_insert_point(gimple * stmt,tree rhs1,tree rhs2)4409 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4410 {
4411 if (TREE_CODE (rhs1) == SSA_NAME
4412 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4413 stmt = SSA_NAME_DEF_STMT (rhs1);
4414 if (TREE_CODE (rhs2) == SSA_NAME
4415 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4416 stmt = SSA_NAME_DEF_STMT (rhs2);
4417 return stmt;
4418 }
4419
4420 /* If the stmt that defines operand has to be inserted, insert it
4421 before the use. */
4422 static void
insert_stmt_before_use(gimple * stmt,gimple * stmt_to_insert)4423 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4424 {
4425 gcc_assert (is_gimple_assign (stmt_to_insert));
4426 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4427 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4428 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4429 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4430 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4431
4432 /* If the insert point is not stmt, then insert_point would be
4433 the point where operand rhs1 or rhs2 is defined. In this case,
4434 stmt_to_insert has to be inserted afterwards. This would
4435 only happen when the stmt insertion point is flexible. */
4436 if (stmt == insert_point)
4437 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4438 else
4439 insert_stmt_after (stmt_to_insert, insert_point);
4440 }
4441
4442
4443 /* Recursively rewrite our linearized statements so that the operators
4444 match those in OPS[OPINDEX], putting the computation in rank
4445 order. Return new lhs.
4446 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4447 the current stmt and during recursive invocations.
4448 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4449 recursive invocations. */
4450
4451 static tree
rewrite_expr_tree(gimple * stmt,unsigned int opindex,vec<operand_entry * > ops,bool changed,bool next_changed)4452 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4453 vec<operand_entry *> ops, bool changed, bool next_changed)
4454 {
4455 tree rhs1 = gimple_assign_rhs1 (stmt);
4456 tree rhs2 = gimple_assign_rhs2 (stmt);
4457 tree lhs = gimple_assign_lhs (stmt);
4458 operand_entry *oe;
4459
4460 /* The final recursion case for this function is that you have
4461 exactly two operations left.
4462 If we had exactly one op in the entire list to start with, we
4463 would have never called this function, and the tail recursion
4464 rewrites them one at a time. */
4465 if (opindex + 2 == ops.length ())
4466 {
4467 operand_entry *oe1, *oe2;
4468
4469 oe1 = ops[opindex];
4470 oe2 = ops[opindex + 1];
4471
4472 if (rhs1 != oe1->op || rhs2 != oe2->op)
4473 {
4474 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4475 unsigned int uid = gimple_uid (stmt);
4476
4477 if (dump_file && (dump_flags & TDF_DETAILS))
4478 {
4479 fprintf (dump_file, "Transforming ");
4480 print_gimple_stmt (dump_file, stmt, 0);
4481 }
4482
4483 /* If the stmt that defines operand has to be inserted, insert it
4484 before the use. */
4485 if (oe1->stmt_to_insert)
4486 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4487 if (oe2->stmt_to_insert)
4488 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4489 /* Even when changed is false, reassociation could have e.g. removed
4490 some redundant operations, so unless we are just swapping the
4491 arguments or unless there is no change at all (then we just
4492 return lhs), force creation of a new SSA_NAME. */
4493 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4494 {
4495 gimple *insert_point
4496 = find_insert_point (stmt, oe1->op, oe2->op);
4497 lhs = make_ssa_name (TREE_TYPE (lhs));
4498 stmt
4499 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4500 oe1->op, oe2->op);
4501 gimple_set_uid (stmt, uid);
4502 gimple_set_visited (stmt, true);
4503 if (insert_point == gsi_stmt (gsi))
4504 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4505 else
4506 insert_stmt_after (stmt, insert_point);
4507 }
4508 else
4509 {
4510 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4511 == stmt);
4512 gimple_assign_set_rhs1 (stmt, oe1->op);
4513 gimple_assign_set_rhs2 (stmt, oe2->op);
4514 update_stmt (stmt);
4515 }
4516
4517 if (rhs1 != oe1->op && rhs1 != oe2->op)
4518 remove_visited_stmt_chain (rhs1);
4519
4520 if (dump_file && (dump_flags & TDF_DETAILS))
4521 {
4522 fprintf (dump_file, " into ");
4523 print_gimple_stmt (dump_file, stmt, 0);
4524 }
4525 }
4526 return lhs;
4527 }
4528
4529 /* If we hit here, we should have 3 or more ops left. */
4530 gcc_assert (opindex + 2 < ops.length ());
4531
4532 /* Rewrite the next operator. */
4533 oe = ops[opindex];
4534
4535 /* If the stmt that defines operand has to be inserted, insert it
4536 before the use. */
4537 if (oe->stmt_to_insert)
4538 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4539
4540 /* Recurse on the LHS of the binary operator, which is guaranteed to
4541 be the non-leaf side. */
4542 tree new_rhs1
4543 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4544 changed || oe->op != rhs2 || next_changed,
4545 false);
4546
4547 if (oe->op != rhs2 || new_rhs1 != rhs1)
4548 {
4549 if (dump_file && (dump_flags & TDF_DETAILS))
4550 {
4551 fprintf (dump_file, "Transforming ");
4552 print_gimple_stmt (dump_file, stmt, 0);
4553 }
4554
4555 /* If changed is false, this is either opindex == 0
4556 or all outer rhs2's were equal to corresponding oe->op,
4557 and powi_result is NULL.
4558 That means lhs is equivalent before and after reassociation.
4559 Otherwise ensure the old lhs SSA_NAME is not reused and
4560 create a new stmt as well, so that any debug stmts will be
4561 properly adjusted. */
4562 if (changed)
4563 {
4564 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4565 unsigned int uid = gimple_uid (stmt);
4566 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4567
4568 lhs = make_ssa_name (TREE_TYPE (lhs));
4569 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4570 new_rhs1, oe->op);
4571 gimple_set_uid (stmt, uid);
4572 gimple_set_visited (stmt, true);
4573 if (insert_point == gsi_stmt (gsi))
4574 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4575 else
4576 insert_stmt_after (stmt, insert_point);
4577 }
4578 else
4579 {
4580 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4581 == stmt);
4582 gimple_assign_set_rhs1 (stmt, new_rhs1);
4583 gimple_assign_set_rhs2 (stmt, oe->op);
4584 update_stmt (stmt);
4585 }
4586
4587 if (dump_file && (dump_flags & TDF_DETAILS))
4588 {
4589 fprintf (dump_file, " into ");
4590 print_gimple_stmt (dump_file, stmt, 0);
4591 }
4592 }
4593 return lhs;
4594 }
4595
4596 /* Find out how many cycles we need to compute statements chain.
4597 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4598 maximum number of independent statements we may execute per cycle. */
4599
4600 static int
get_required_cycles(int ops_num,int cpu_width)4601 get_required_cycles (int ops_num, int cpu_width)
4602 {
4603 int res;
4604 int elog;
4605 unsigned int rest;
4606
4607 /* While we have more than 2 * cpu_width operands
4608 we may reduce number of operands by cpu_width
4609 per cycle. */
4610 res = ops_num / (2 * cpu_width);
4611
4612 /* Remained operands count may be reduced twice per cycle
4613 until we have only one operand. */
4614 rest = (unsigned)(ops_num - res * cpu_width);
4615 elog = exact_log2 (rest);
4616 if (elog >= 0)
4617 res += elog;
4618 else
4619 res += floor_log2 (rest) + 1;
4620
4621 return res;
4622 }
4623
4624 /* Returns an optimal number of registers to use for computation of
4625 given statements. */
4626
4627 static int
get_reassociation_width(int ops_num,enum tree_code opc,machine_mode mode)4628 get_reassociation_width (int ops_num, enum tree_code opc,
4629 machine_mode mode)
4630 {
4631 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4632 int width;
4633 int width_min;
4634 int cycles_best;
4635
4636 if (param_width > 0)
4637 width = param_width;
4638 else
4639 width = targetm.sched.reassociation_width (opc, mode);
4640
4641 if (width == 1)
4642 return width;
4643
4644 /* Get the minimal time required for sequence computation. */
4645 cycles_best = get_required_cycles (ops_num, width);
4646
4647 /* Check if we may use less width and still compute sequence for
4648 the same time. It will allow us to reduce registers usage.
4649 get_required_cycles is monotonically increasing with lower width
4650 so we can perform a binary search for the minimal width that still
4651 results in the optimal cycle count. */
4652 width_min = 1;
4653 while (width > width_min)
4654 {
4655 int width_mid = (width + width_min) / 2;
4656
4657 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4658 width = width_mid;
4659 else if (width_min < width_mid)
4660 width_min = width_mid;
4661 else
4662 break;
4663 }
4664
4665 return width;
4666 }
4667
4668 /* Recursively rewrite our linearized statements so that the operators
4669 match those in OPS[OPINDEX], putting the computation in rank
4670 order and trying to allow operations to be executed in
4671 parallel. */
4672
4673 static void
rewrite_expr_tree_parallel(gassign * stmt,int width,vec<operand_entry * > ops)4674 rewrite_expr_tree_parallel (gassign *stmt, int width,
4675 vec<operand_entry *> ops)
4676 {
4677 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4678 int op_num = ops.length ();
4679 gcc_assert (op_num > 0);
4680 int stmt_num = op_num - 1;
4681 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4682 int op_index = op_num - 1;
4683 int stmt_index = 0;
4684 int ready_stmts_end = 0;
4685 int i = 0;
4686 gimple *stmt1 = NULL, *stmt2 = NULL;
4687 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4688
4689 /* We start expression rewriting from the top statements.
4690 So, in this loop we create a full list of statements
4691 we will work with. */
4692 stmts[stmt_num - 1] = stmt;
4693 for (i = stmt_num - 2; i >= 0; i--)
4694 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4695
4696 for (i = 0; i < stmt_num; i++)
4697 {
4698 tree op1, op2;
4699
4700 /* Determine whether we should use results of
4701 already handled statements or not. */
4702 if (ready_stmts_end == 0
4703 && (i - stmt_index >= width || op_index < 1))
4704 ready_stmts_end = i;
4705
4706 /* Now we choose operands for the next statement. Non zero
4707 value in ready_stmts_end means here that we should use
4708 the result of already generated statements as new operand. */
4709 if (ready_stmts_end > 0)
4710 {
4711 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4712 if (ready_stmts_end > stmt_index)
4713 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4714 else if (op_index >= 0)
4715 {
4716 operand_entry *oe = ops[op_index--];
4717 stmt2 = oe->stmt_to_insert;
4718 op2 = oe->op;
4719 }
4720 else
4721 {
4722 gcc_assert (stmt_index < i);
4723 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4724 }
4725
4726 if (stmt_index >= ready_stmts_end)
4727 ready_stmts_end = 0;
4728 }
4729 else
4730 {
4731 if (op_index > 1)
4732 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4733 operand_entry *oe2 = ops[op_index--];
4734 operand_entry *oe1 = ops[op_index--];
4735 op2 = oe2->op;
4736 stmt2 = oe2->stmt_to_insert;
4737 op1 = oe1->op;
4738 stmt1 = oe1->stmt_to_insert;
4739 }
4740
4741 /* If we emit the last statement then we should put
4742 operands into the last statement. It will also
4743 break the loop. */
4744 if (op_index < 0 && stmt_index == i)
4745 i = stmt_num - 1;
4746
4747 if (dump_file && (dump_flags & TDF_DETAILS))
4748 {
4749 fprintf (dump_file, "Transforming ");
4750 print_gimple_stmt (dump_file, stmts[i], 0);
4751 }
4752
4753 /* If the stmt that defines operand has to be inserted, insert it
4754 before the use. */
4755 if (stmt1)
4756 insert_stmt_before_use (stmts[i], stmt1);
4757 if (stmt2)
4758 insert_stmt_before_use (stmts[i], stmt2);
4759 stmt1 = stmt2 = NULL;
4760
4761 /* We keep original statement only for the last one. All
4762 others are recreated. */
4763 if (i == stmt_num - 1)
4764 {
4765 gimple_assign_set_rhs1 (stmts[i], op1);
4766 gimple_assign_set_rhs2 (stmts[i], op2);
4767 update_stmt (stmts[i]);
4768 }
4769 else
4770 {
4771 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4772 gimple_set_visited (stmts[i], true);
4773 }
4774 if (dump_file && (dump_flags & TDF_DETAILS))
4775 {
4776 fprintf (dump_file, " into ");
4777 print_gimple_stmt (dump_file, stmts[i], 0);
4778 }
4779 }
4780
4781 remove_visited_stmt_chain (last_rhs1);
4782 }
4783
4784 /* Transform STMT, which is really (A +B) + (C + D) into the left
4785 linear form, ((A+B)+C)+D.
4786 Recurse on D if necessary. */
4787
4788 static void
linearize_expr(gimple * stmt)4789 linearize_expr (gimple *stmt)
4790 {
4791 gimple_stmt_iterator gsi;
4792 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4793 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4794 gimple *oldbinrhs = binrhs;
4795 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4796 gimple *newbinrhs = NULL;
4797 struct loop *loop = loop_containing_stmt (stmt);
4798 tree lhs = gimple_assign_lhs (stmt);
4799
4800 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4801 && is_reassociable_op (binrhs, rhscode, loop));
4802
4803 gsi = gsi_for_stmt (stmt);
4804
4805 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4806 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4807 gimple_assign_rhs_code (binrhs),
4808 gimple_assign_lhs (binlhs),
4809 gimple_assign_rhs2 (binrhs));
4810 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4811 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4812 gimple_set_uid (binrhs, gimple_uid (stmt));
4813
4814 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4815 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4816
4817 if (dump_file && (dump_flags & TDF_DETAILS))
4818 {
4819 fprintf (dump_file, "Linearized: ");
4820 print_gimple_stmt (dump_file, stmt, 0);
4821 }
4822
4823 reassociate_stats.linearized++;
4824 update_stmt (stmt);
4825
4826 gsi = gsi_for_stmt (oldbinrhs);
4827 reassoc_remove_stmt (&gsi);
4828 release_defs (oldbinrhs);
4829
4830 gimple_set_visited (stmt, true);
4831 gimple_set_visited (binlhs, true);
4832 gimple_set_visited (binrhs, true);
4833
4834 /* Tail recurse on the new rhs if it still needs reassociation. */
4835 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4836 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4837 want to change the algorithm while converting to tuples. */
4838 linearize_expr (stmt);
4839 }
4840
4841 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4842 it. Otherwise, return NULL. */
4843
4844 static gimple *
get_single_immediate_use(tree lhs)4845 get_single_immediate_use (tree lhs)
4846 {
4847 use_operand_p immuse;
4848 gimple *immusestmt;
4849
4850 if (TREE_CODE (lhs) == SSA_NAME
4851 && single_imm_use (lhs, &immuse, &immusestmt)
4852 && is_gimple_assign (immusestmt))
4853 return immusestmt;
4854
4855 return NULL;
4856 }
4857
4858 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4859 representing the negated value. Insertions of any necessary
4860 instructions go before GSI.
4861 This function is recursive in that, if you hand it "a_5" as the
4862 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4863 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4864
4865 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)4866 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4867 {
4868 gimple *negatedefstmt = NULL;
4869 tree resultofnegate;
4870 gimple_stmt_iterator gsi;
4871 unsigned int uid;
4872
4873 /* If we are trying to negate a name, defined by an add, negate the
4874 add operands instead. */
4875 if (TREE_CODE (tonegate) == SSA_NAME)
4876 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4877 if (TREE_CODE (tonegate) == SSA_NAME
4878 && is_gimple_assign (negatedefstmt)
4879 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4880 && has_single_use (gimple_assign_lhs (negatedefstmt))
4881 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4882 {
4883 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4884 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4885 tree lhs = gimple_assign_lhs (negatedefstmt);
4886 gimple *g;
4887
4888 gsi = gsi_for_stmt (negatedefstmt);
4889 rhs1 = negate_value (rhs1, &gsi);
4890
4891 gsi = gsi_for_stmt (negatedefstmt);
4892 rhs2 = negate_value (rhs2, &gsi);
4893
4894 gsi = gsi_for_stmt (negatedefstmt);
4895 lhs = make_ssa_name (TREE_TYPE (lhs));
4896 gimple_set_visited (negatedefstmt, true);
4897 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4898 gimple_set_uid (g, gimple_uid (negatedefstmt));
4899 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4900 return lhs;
4901 }
4902
4903 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4904 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4905 NULL_TREE, true, GSI_SAME_STMT);
4906 gsi = *gsip;
4907 uid = gimple_uid (gsi_stmt (gsi));
4908 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4909 {
4910 gimple *stmt = gsi_stmt (gsi);
4911 if (gimple_uid (stmt) != 0)
4912 break;
4913 gimple_set_uid (stmt, uid);
4914 }
4915 return resultofnegate;
4916 }
4917
4918 /* Return true if we should break up the subtract in STMT into an add
4919 with negate. This is true when we the subtract operands are really
4920 adds, or the subtract itself is used in an add expression. In
4921 either case, breaking up the subtract into an add with negate
4922 exposes the adds to reassociation. */
4923
4924 static bool
should_break_up_subtract(gimple * stmt)4925 should_break_up_subtract (gimple *stmt)
4926 {
4927 tree lhs = gimple_assign_lhs (stmt);
4928 tree binlhs = gimple_assign_rhs1 (stmt);
4929 tree binrhs = gimple_assign_rhs2 (stmt);
4930 gimple *immusestmt;
4931 struct loop *loop = loop_containing_stmt (stmt);
4932
4933 if (TREE_CODE (binlhs) == SSA_NAME
4934 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4935 return true;
4936
4937 if (TREE_CODE (binrhs) == SSA_NAME
4938 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4939 return true;
4940
4941 if (TREE_CODE (lhs) == SSA_NAME
4942 && (immusestmt = get_single_immediate_use (lhs))
4943 && is_gimple_assign (immusestmt)
4944 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4945 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
4946 && gimple_assign_rhs1 (immusestmt) == lhs)
4947 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4948 return true;
4949 return false;
4950 }
4951
4952 /* Transform STMT from A - B into A + -B. */
4953
4954 static void
break_up_subtract(gimple * stmt,gimple_stmt_iterator * gsip)4955 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4956 {
4957 tree rhs1 = gimple_assign_rhs1 (stmt);
4958 tree rhs2 = gimple_assign_rhs2 (stmt);
4959
4960 if (dump_file && (dump_flags & TDF_DETAILS))
4961 {
4962 fprintf (dump_file, "Breaking up subtract ");
4963 print_gimple_stmt (dump_file, stmt, 0);
4964 }
4965
4966 rhs2 = negate_value (rhs2, gsip);
4967 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4968 update_stmt (stmt);
4969 }
4970
4971 /* Determine whether STMT is a builtin call that raises an SSA name
4972 to an integer power and has only one use. If so, and this is early
4973 reassociation and unsafe math optimizations are permitted, place
4974 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4975 If any of these conditions does not hold, return FALSE. */
4976
4977 static bool
acceptable_pow_call(gcall * stmt,tree * base,HOST_WIDE_INT * exponent)4978 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4979 {
4980 tree arg1;
4981 REAL_VALUE_TYPE c, cint;
4982
4983 switch (gimple_call_combined_fn (stmt))
4984 {
4985 CASE_CFN_POW:
4986 if (flag_errno_math)
4987 return false;
4988
4989 *base = gimple_call_arg (stmt, 0);
4990 arg1 = gimple_call_arg (stmt, 1);
4991
4992 if (TREE_CODE (arg1) != REAL_CST)
4993 return false;
4994
4995 c = TREE_REAL_CST (arg1);
4996
4997 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4998 return false;
4999
5000 *exponent = real_to_integer (&c);
5001 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5002 if (!real_identical (&c, &cint))
5003 return false;
5004
5005 break;
5006
5007 CASE_CFN_POWI:
5008 *base = gimple_call_arg (stmt, 0);
5009 arg1 = gimple_call_arg (stmt, 1);
5010
5011 if (!tree_fits_shwi_p (arg1))
5012 return false;
5013
5014 *exponent = tree_to_shwi (arg1);
5015 break;
5016
5017 default:
5018 return false;
5019 }
5020
5021 /* Expanding negative exponents is generally unproductive, so we don't
5022 complicate matters with those. Exponents of zero and one should
5023 have been handled by expression folding. */
5024 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5025 return false;
5026
5027 return true;
5028 }
5029
5030 /* Try to derive and add operand entry for OP to *OPS. Return false if
5031 unsuccessful. */
5032
5033 static bool
try_special_add_to_ops(vec<operand_entry * > * ops,enum tree_code code,tree op,gimple * def_stmt)5034 try_special_add_to_ops (vec<operand_entry *> *ops,
5035 enum tree_code code,
5036 tree op, gimple* def_stmt)
5037 {
5038 tree base = NULL_TREE;
5039 HOST_WIDE_INT exponent = 0;
5040
5041 if (TREE_CODE (op) != SSA_NAME
5042 || ! has_single_use (op))
5043 return false;
5044
5045 if (code == MULT_EXPR
5046 && reassoc_insert_powi_p
5047 && flag_unsafe_math_optimizations
5048 && is_gimple_call (def_stmt)
5049 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5050 {
5051 add_repeat_to_ops_vec (ops, base, exponent);
5052 gimple_set_visited (def_stmt, true);
5053 return true;
5054 }
5055 else if (code == MULT_EXPR
5056 && is_gimple_assign (def_stmt)
5057 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5058 && !HONOR_SNANS (TREE_TYPE (op))
5059 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5060 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5061 {
5062 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5063 tree cst = build_minus_one_cst (TREE_TYPE (op));
5064 add_to_ops_vec (ops, rhs1);
5065 add_to_ops_vec (ops, cst);
5066 gimple_set_visited (def_stmt, true);
5067 return true;
5068 }
5069
5070 return false;
5071 }
5072
5073 /* Recursively linearize a binary expression that is the RHS of STMT.
5074 Place the operands of the expression tree in the vector named OPS. */
5075
5076 static void
linearize_expr_tree(vec<operand_entry * > * ops,gimple * stmt,bool is_associative,bool set_visited)5077 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5078 bool is_associative, bool set_visited)
5079 {
5080 tree binlhs = gimple_assign_rhs1 (stmt);
5081 tree binrhs = gimple_assign_rhs2 (stmt);
5082 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5083 bool binlhsisreassoc = false;
5084 bool binrhsisreassoc = false;
5085 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5086 struct loop *loop = loop_containing_stmt (stmt);
5087
5088 if (set_visited)
5089 gimple_set_visited (stmt, true);
5090
5091 if (TREE_CODE (binlhs) == SSA_NAME)
5092 {
5093 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5094 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5095 && !stmt_could_throw_p (binlhsdef));
5096 }
5097
5098 if (TREE_CODE (binrhs) == SSA_NAME)
5099 {
5100 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5101 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5102 && !stmt_could_throw_p (binrhsdef));
5103 }
5104
5105 /* If the LHS is not reassociable, but the RHS is, we need to swap
5106 them. If neither is reassociable, there is nothing we can do, so
5107 just put them in the ops vector. If the LHS is reassociable,
5108 linearize it. If both are reassociable, then linearize the RHS
5109 and the LHS. */
5110
5111 if (!binlhsisreassoc)
5112 {
5113 /* If this is not a associative operation like division, give up. */
5114 if (!is_associative)
5115 {
5116 add_to_ops_vec (ops, binrhs);
5117 return;
5118 }
5119
5120 if (!binrhsisreassoc)
5121 {
5122 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5123 add_to_ops_vec (ops, binrhs);
5124
5125 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5126 add_to_ops_vec (ops, binlhs);
5127
5128 return;
5129 }
5130
5131 if (dump_file && (dump_flags & TDF_DETAILS))
5132 {
5133 fprintf (dump_file, "swapping operands of ");
5134 print_gimple_stmt (dump_file, stmt, 0);
5135 }
5136
5137 swap_ssa_operands (stmt,
5138 gimple_assign_rhs1_ptr (stmt),
5139 gimple_assign_rhs2_ptr (stmt));
5140 update_stmt (stmt);
5141
5142 if (dump_file && (dump_flags & TDF_DETAILS))
5143 {
5144 fprintf (dump_file, " is now ");
5145 print_gimple_stmt (dump_file, stmt, 0);
5146 }
5147
5148 /* We want to make it so the lhs is always the reassociative op,
5149 so swap. */
5150 std::swap (binlhs, binrhs);
5151 }
5152 else if (binrhsisreassoc)
5153 {
5154 linearize_expr (stmt);
5155 binlhs = gimple_assign_rhs1 (stmt);
5156 binrhs = gimple_assign_rhs2 (stmt);
5157 }
5158
5159 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5160 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5161 rhscode, loop));
5162 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5163 is_associative, set_visited);
5164
5165 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5166 add_to_ops_vec (ops, binrhs);
5167 }
5168
5169 /* Repropagate the negates back into subtracts, since no other pass
5170 currently does it. */
5171
5172 static void
repropagate_negates(void)5173 repropagate_negates (void)
5174 {
5175 unsigned int i = 0;
5176 tree negate;
5177
5178 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5179 {
5180 gimple *user = get_single_immediate_use (negate);
5181
5182 if (!user || !is_gimple_assign (user))
5183 continue;
5184
5185 /* The negate operand can be either operand of a PLUS_EXPR
5186 (it can be the LHS if the RHS is a constant for example).
5187
5188 Force the negate operand to the RHS of the PLUS_EXPR, then
5189 transform the PLUS_EXPR into a MINUS_EXPR. */
5190 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5191 {
5192 /* If the negated operand appears on the LHS of the
5193 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5194 to force the negated operand to the RHS of the PLUS_EXPR. */
5195 if (gimple_assign_rhs1 (user) == negate)
5196 {
5197 swap_ssa_operands (user,
5198 gimple_assign_rhs1_ptr (user),
5199 gimple_assign_rhs2_ptr (user));
5200 }
5201
5202 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5203 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5204 if (gimple_assign_rhs2 (user) == negate)
5205 {
5206 tree rhs1 = gimple_assign_rhs1 (user);
5207 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5208 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5209 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5210 update_stmt (user);
5211 }
5212 }
5213 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5214 {
5215 if (gimple_assign_rhs1 (user) == negate)
5216 {
5217 /* We have
5218 x = -a
5219 y = x - b
5220 which we transform into
5221 x = a + b
5222 y = -x .
5223 This pushes down the negate which we possibly can merge
5224 into some other operation, hence insert it into the
5225 plus_negates vector. */
5226 gimple *feed = SSA_NAME_DEF_STMT (negate);
5227 tree a = gimple_assign_rhs1 (feed);
5228 tree b = gimple_assign_rhs2 (user);
5229 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5230 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5231 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5232 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5233 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5234 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5235 user = gsi_stmt (gsi2);
5236 update_stmt (user);
5237 reassoc_remove_stmt (&gsi);
5238 release_defs (feed);
5239 plus_negates.safe_push (gimple_assign_lhs (user));
5240 }
5241 else
5242 {
5243 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5244 rid of one operation. */
5245 gimple *feed = SSA_NAME_DEF_STMT (negate);
5246 tree a = gimple_assign_rhs1 (feed);
5247 tree rhs1 = gimple_assign_rhs1 (user);
5248 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5249 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5250 update_stmt (gsi_stmt (gsi));
5251 }
5252 }
5253 }
5254 }
5255
5256 /* Returns true if OP is of a type for which we can do reassociation.
5257 That is for integral or non-saturating fixed-point types, and for
5258 floating point type when associative-math is enabled. */
5259
5260 static bool
can_reassociate_p(tree op)5261 can_reassociate_p (tree op)
5262 {
5263 tree type = TREE_TYPE (op);
5264 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5265 return false;
5266 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5267 || NON_SAT_FIXED_POINT_TYPE_P (type)
5268 || (flag_associative_math && FLOAT_TYPE_P (type)))
5269 return true;
5270 return false;
5271 }
5272
5273 /* Break up subtract operations in block BB.
5274
5275 We do this top down because we don't know whether the subtract is
5276 part of a possible chain of reassociation except at the top.
5277
5278 IE given
5279 d = f + g
5280 c = a + e
5281 b = c - d
5282 q = b - r
5283 k = t - q
5284
5285 we want to break up k = t - q, but we won't until we've transformed q
5286 = b - r, which won't be broken up until we transform b = c - d.
5287
5288 En passant, clear the GIMPLE visited flag on every statement
5289 and set UIDs within each basic block. */
5290
5291 static void
break_up_subtract_bb(basic_block bb)5292 break_up_subtract_bb (basic_block bb)
5293 {
5294 gimple_stmt_iterator gsi;
5295 basic_block son;
5296 unsigned int uid = 1;
5297
5298 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5299 {
5300 gimple *stmt = gsi_stmt (gsi);
5301 gimple_set_visited (stmt, false);
5302 gimple_set_uid (stmt, uid++);
5303
5304 if (!is_gimple_assign (stmt)
5305 || !can_reassociate_p (gimple_assign_lhs (stmt)))
5306 continue;
5307
5308 /* Look for simple gimple subtract operations. */
5309 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5310 {
5311 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5312 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5313 continue;
5314
5315 /* Check for a subtract used only in an addition. If this
5316 is the case, transform it into add of a negate for better
5317 reassociation. IE transform C = A-B into C = A + -B if C
5318 is only used in an addition. */
5319 if (should_break_up_subtract (stmt))
5320 break_up_subtract (stmt, &gsi);
5321 }
5322 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5323 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5324 plus_negates.safe_push (gimple_assign_lhs (stmt));
5325 }
5326 for (son = first_dom_son (CDI_DOMINATORS, bb);
5327 son;
5328 son = next_dom_son (CDI_DOMINATORS, son))
5329 break_up_subtract_bb (son);
5330 }
5331
5332 /* Used for repeated factor analysis. */
5333 struct repeat_factor
5334 {
5335 /* An SSA name that occurs in a multiply chain. */
5336 tree factor;
5337
5338 /* Cached rank of the factor. */
5339 unsigned rank;
5340
5341 /* Number of occurrences of the factor in the chain. */
5342 HOST_WIDE_INT count;
5343
5344 /* An SSA name representing the product of this factor and
5345 all factors appearing later in the repeated factor vector. */
5346 tree repr;
5347 };
5348
5349
5350 static vec<repeat_factor> repeat_factor_vec;
5351
5352 /* Used for sorting the repeat factor vector. Sort primarily by
5353 ascending occurrence count, secondarily by descending rank. */
5354
5355 static int
compare_repeat_factors(const void * x1,const void * x2)5356 compare_repeat_factors (const void *x1, const void *x2)
5357 {
5358 const repeat_factor *rf1 = (const repeat_factor *) x1;
5359 const repeat_factor *rf2 = (const repeat_factor *) x2;
5360
5361 if (rf1->count != rf2->count)
5362 return rf1->count - rf2->count;
5363
5364 return rf2->rank - rf1->rank;
5365 }
5366
5367 /* Look for repeated operands in OPS in the multiply tree rooted at
5368 STMT. Replace them with an optimal sequence of multiplies and powi
5369 builtin calls, and remove the used operands from OPS. Return an
5370 SSA name representing the value of the replacement sequence. */
5371
5372 static tree
attempt_builtin_powi(gimple * stmt,vec<operand_entry * > * ops)5373 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5374 {
5375 unsigned i, j, vec_len;
5376 int ii;
5377 operand_entry *oe;
5378 repeat_factor *rf1, *rf2;
5379 repeat_factor rfnew;
5380 tree result = NULL_TREE;
5381 tree target_ssa, iter_result;
5382 tree type = TREE_TYPE (gimple_get_lhs (stmt));
5383 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5384 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5385 gimple *mul_stmt, *pow_stmt;
5386
5387 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5388 target. */
5389 if (!powi_fndecl)
5390 return NULL_TREE;
5391
5392 /* Allocate the repeated factor vector. */
5393 repeat_factor_vec.create (10);
5394
5395 /* Scan the OPS vector for all SSA names in the product and build
5396 up a vector of occurrence counts for each factor. */
5397 FOR_EACH_VEC_ELT (*ops, i, oe)
5398 {
5399 if (TREE_CODE (oe->op) == SSA_NAME)
5400 {
5401 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5402 {
5403 if (rf1->factor == oe->op)
5404 {
5405 rf1->count += oe->count;
5406 break;
5407 }
5408 }
5409
5410 if (j >= repeat_factor_vec.length ())
5411 {
5412 rfnew.factor = oe->op;
5413 rfnew.rank = oe->rank;
5414 rfnew.count = oe->count;
5415 rfnew.repr = NULL_TREE;
5416 repeat_factor_vec.safe_push (rfnew);
5417 }
5418 }
5419 }
5420
5421 /* Sort the repeated factor vector by (a) increasing occurrence count,
5422 and (b) decreasing rank. */
5423 repeat_factor_vec.qsort (compare_repeat_factors);
5424
5425 /* It is generally best to combine as many base factors as possible
5426 into a product before applying __builtin_powi to the result.
5427 However, the sort order chosen for the repeated factor vector
5428 allows us to cache partial results for the product of the base
5429 factors for subsequent use. When we already have a cached partial
5430 result from a previous iteration, it is best to make use of it
5431 before looking for another __builtin_pow opportunity.
5432
5433 As an example, consider x * x * y * y * y * z * z * z * z.
5434 We want to first compose the product x * y * z, raise it to the
5435 second power, then multiply this by y * z, and finally multiply
5436 by z. This can be done in 5 multiplies provided we cache y * z
5437 for use in both expressions:
5438
5439 t1 = y * z
5440 t2 = t1 * x
5441 t3 = t2 * t2
5442 t4 = t1 * t3
5443 result = t4 * z
5444
5445 If we instead ignored the cached y * z and first multiplied by
5446 the __builtin_pow opportunity z * z, we would get the inferior:
5447
5448 t1 = y * z
5449 t2 = t1 * x
5450 t3 = t2 * t2
5451 t4 = z * z
5452 t5 = t3 * t4
5453 result = t5 * y */
5454
5455 vec_len = repeat_factor_vec.length ();
5456
5457 /* Repeatedly look for opportunities to create a builtin_powi call. */
5458 while (true)
5459 {
5460 HOST_WIDE_INT power;
5461
5462 /* First look for the largest cached product of factors from
5463 preceding iterations. If found, create a builtin_powi for
5464 it if the minimum occurrence count for its factors is at
5465 least 2, or just use this cached product as our next
5466 multiplicand if the minimum occurrence count is 1. */
5467 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5468 {
5469 if (rf1->repr && rf1->count > 0)
5470 break;
5471 }
5472
5473 if (j < vec_len)
5474 {
5475 power = rf1->count;
5476
5477 if (power == 1)
5478 {
5479 iter_result = rf1->repr;
5480
5481 if (dump_file && (dump_flags & TDF_DETAILS))
5482 {
5483 unsigned elt;
5484 repeat_factor *rf;
5485 fputs ("Multiplying by cached product ", dump_file);
5486 for (elt = j; elt < vec_len; elt++)
5487 {
5488 rf = &repeat_factor_vec[elt];
5489 print_generic_expr (dump_file, rf->factor);
5490 if (elt < vec_len - 1)
5491 fputs (" * ", dump_file);
5492 }
5493 fputs ("\n", dump_file);
5494 }
5495 }
5496 else
5497 {
5498 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5499 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5500 build_int_cst (integer_type_node,
5501 power));
5502 gimple_call_set_lhs (pow_stmt, iter_result);
5503 gimple_set_location (pow_stmt, gimple_location (stmt));
5504 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5505 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5506
5507 if (dump_file && (dump_flags & TDF_DETAILS))
5508 {
5509 unsigned elt;
5510 repeat_factor *rf;
5511 fputs ("Building __builtin_pow call for cached product (",
5512 dump_file);
5513 for (elt = j; elt < vec_len; elt++)
5514 {
5515 rf = &repeat_factor_vec[elt];
5516 print_generic_expr (dump_file, rf->factor);
5517 if (elt < vec_len - 1)
5518 fputs (" * ", dump_file);
5519 }
5520 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5521 power);
5522 }
5523 }
5524 }
5525 else
5526 {
5527 /* Otherwise, find the first factor in the repeated factor
5528 vector whose occurrence count is at least 2. If no such
5529 factor exists, there are no builtin_powi opportunities
5530 remaining. */
5531 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5532 {
5533 if (rf1->count >= 2)
5534 break;
5535 }
5536
5537 if (j >= vec_len)
5538 break;
5539
5540 power = rf1->count;
5541
5542 if (dump_file && (dump_flags & TDF_DETAILS))
5543 {
5544 unsigned elt;
5545 repeat_factor *rf;
5546 fputs ("Building __builtin_pow call for (", dump_file);
5547 for (elt = j; elt < vec_len; elt++)
5548 {
5549 rf = &repeat_factor_vec[elt];
5550 print_generic_expr (dump_file, rf->factor);
5551 if (elt < vec_len - 1)
5552 fputs (" * ", dump_file);
5553 }
5554 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5555 }
5556
5557 reassociate_stats.pows_created++;
5558
5559 /* Visit each element of the vector in reverse order (so that
5560 high-occurrence elements are visited first, and within the
5561 same occurrence count, lower-ranked elements are visited
5562 first). Form a linear product of all elements in this order
5563 whose occurrencce count is at least that of element J.
5564 Record the SSA name representing the product of each element
5565 with all subsequent elements in the vector. */
5566 if (j == vec_len - 1)
5567 rf1->repr = rf1->factor;
5568 else
5569 {
5570 for (ii = vec_len - 2; ii >= (int)j; ii--)
5571 {
5572 tree op1, op2;
5573
5574 rf1 = &repeat_factor_vec[ii];
5575 rf2 = &repeat_factor_vec[ii + 1];
5576
5577 /* Init the last factor's representative to be itself. */
5578 if (!rf2->repr)
5579 rf2->repr = rf2->factor;
5580
5581 op1 = rf1->factor;
5582 op2 = rf2->repr;
5583
5584 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5585 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5586 op1, op2);
5587 gimple_set_location (mul_stmt, gimple_location (stmt));
5588 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5589 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5590 rf1->repr = target_ssa;
5591
5592 /* Don't reprocess the multiply we just introduced. */
5593 gimple_set_visited (mul_stmt, true);
5594 }
5595 }
5596
5597 /* Form a call to __builtin_powi for the maximum product
5598 just formed, raised to the power obtained earlier. */
5599 rf1 = &repeat_factor_vec[j];
5600 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5601 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5602 build_int_cst (integer_type_node,
5603 power));
5604 gimple_call_set_lhs (pow_stmt, iter_result);
5605 gimple_set_location (pow_stmt, gimple_location (stmt));
5606 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5607 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5608 }
5609
5610 /* If we previously formed at least one other builtin_powi call,
5611 form the product of this one and those others. */
5612 if (result)
5613 {
5614 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5615 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5616 result, iter_result);
5617 gimple_set_location (mul_stmt, gimple_location (stmt));
5618 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5619 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5620 gimple_set_visited (mul_stmt, true);
5621 result = new_result;
5622 }
5623 else
5624 result = iter_result;
5625
5626 /* Decrement the occurrence count of each element in the product
5627 by the count found above, and remove this many copies of each
5628 factor from OPS. */
5629 for (i = j; i < vec_len; i++)
5630 {
5631 unsigned k = power;
5632 unsigned n;
5633
5634 rf1 = &repeat_factor_vec[i];
5635 rf1->count -= power;
5636
5637 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5638 {
5639 if (oe->op == rf1->factor)
5640 {
5641 if (oe->count <= k)
5642 {
5643 ops->ordered_remove (n);
5644 k -= oe->count;
5645
5646 if (k == 0)
5647 break;
5648 }
5649 else
5650 {
5651 oe->count -= k;
5652 break;
5653 }
5654 }
5655 }
5656 }
5657 }
5658
5659 /* At this point all elements in the repeated factor vector have a
5660 remaining occurrence count of 0 or 1, and those with a count of 1
5661 don't have cached representatives. Re-sort the ops vector and
5662 clean up. */
5663 ops->qsort (sort_by_operand_rank);
5664 repeat_factor_vec.release ();
5665
5666 /* Return the final product computed herein. Note that there may
5667 still be some elements with single occurrence count left in OPS;
5668 those will be handled by the normal reassociation logic. */
5669 return result;
5670 }
5671
5672 /* Attempt to optimize
5673 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5674 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5675
5676 static void
attempt_builtin_copysign(vec<operand_entry * > * ops)5677 attempt_builtin_copysign (vec<operand_entry *> *ops)
5678 {
5679 operand_entry *oe;
5680 unsigned int i;
5681 unsigned int length = ops->length ();
5682 tree cst = ops->last ()->op;
5683
5684 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5685 return;
5686
5687 FOR_EACH_VEC_ELT (*ops, i, oe)
5688 {
5689 if (TREE_CODE (oe->op) == SSA_NAME
5690 && has_single_use (oe->op))
5691 {
5692 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5693 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5694 {
5695 tree arg0, arg1;
5696 switch (gimple_call_combined_fn (old_call))
5697 {
5698 CASE_CFN_COPYSIGN:
5699 CASE_CFN_COPYSIGN_FN:
5700 arg0 = gimple_call_arg (old_call, 0);
5701 arg1 = gimple_call_arg (old_call, 1);
5702 /* The first argument of copysign must be a constant,
5703 otherwise there's nothing to do. */
5704 if (TREE_CODE (arg0) == REAL_CST)
5705 {
5706 tree type = TREE_TYPE (arg0);
5707 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5708 /* If we couldn't fold to a single constant, skip it.
5709 That happens e.g. for inexact multiplication when
5710 -frounding-math. */
5711 if (mul == NULL_TREE)
5712 break;
5713 /* Instead of adjusting OLD_CALL, let's build a new
5714 call to not leak the LHS and prevent keeping bogus
5715 debug statements. DCE will clean up the old call. */
5716 gcall *new_call;
5717 if (gimple_call_internal_p (old_call))
5718 new_call = gimple_build_call_internal
5719 (IFN_COPYSIGN, 2, mul, arg1);
5720 else
5721 new_call = gimple_build_call
5722 (gimple_call_fndecl (old_call), 2, mul, arg1);
5723 tree lhs = make_ssa_name (type);
5724 gimple_call_set_lhs (new_call, lhs);
5725 gimple_set_location (new_call,
5726 gimple_location (old_call));
5727 insert_stmt_after (new_call, old_call);
5728 /* We've used the constant, get rid of it. */
5729 ops->pop ();
5730 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5731 /* Handle the CST1 < 0 case by negating the result. */
5732 if (cst1_neg)
5733 {
5734 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5735 gimple *negate_stmt
5736 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5737 insert_stmt_after (negate_stmt, new_call);
5738 oe->op = negrhs;
5739 }
5740 else
5741 oe->op = lhs;
5742 if (dump_file && (dump_flags & TDF_DETAILS))
5743 {
5744 fprintf (dump_file, "Optimizing copysign: ");
5745 print_generic_expr (dump_file, cst);
5746 fprintf (dump_file, " * COPYSIGN (");
5747 print_generic_expr (dump_file, arg0);
5748 fprintf (dump_file, ", ");
5749 print_generic_expr (dump_file, arg1);
5750 fprintf (dump_file, ") into %sCOPYSIGN (",
5751 cst1_neg ? "-" : "");
5752 print_generic_expr (dump_file, mul);
5753 fprintf (dump_file, ", ");
5754 print_generic_expr (dump_file, arg1);
5755 fprintf (dump_file, "\n");
5756 }
5757 return;
5758 }
5759 break;
5760 default:
5761 break;
5762 }
5763 }
5764 }
5765 }
5766 }
5767
5768 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5769
5770 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple * stmt,tree new_rhs)5771 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5772 {
5773 tree rhs1;
5774
5775 if (dump_file && (dump_flags & TDF_DETAILS))
5776 {
5777 fprintf (dump_file, "Transforming ");
5778 print_gimple_stmt (dump_file, stmt, 0);
5779 }
5780
5781 rhs1 = gimple_assign_rhs1 (stmt);
5782 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5783 update_stmt (stmt);
5784 remove_visited_stmt_chain (rhs1);
5785
5786 if (dump_file && (dump_flags & TDF_DETAILS))
5787 {
5788 fprintf (dump_file, " into ");
5789 print_gimple_stmt (dump_file, stmt, 0);
5790 }
5791 }
5792
5793 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5794
5795 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple * stmt,tree rhs1,tree rhs2)5796 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5797 tree rhs1, tree rhs2)
5798 {
5799 if (dump_file && (dump_flags & TDF_DETAILS))
5800 {
5801 fprintf (dump_file, "Transforming ");
5802 print_gimple_stmt (dump_file, stmt, 0);
5803 }
5804
5805 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5806 update_stmt (gsi_stmt (*gsi));
5807 remove_visited_stmt_chain (rhs1);
5808
5809 if (dump_file && (dump_flags & TDF_DETAILS))
5810 {
5811 fprintf (dump_file, " into ");
5812 print_gimple_stmt (dump_file, stmt, 0);
5813 }
5814 }
5815
5816 /* Reassociate expressions in basic block BB and its post-dominator as
5817 children.
5818
5819 Bubble up return status from maybe_optimize_range_tests. */
5820
5821 static bool
reassociate_bb(basic_block bb)5822 reassociate_bb (basic_block bb)
5823 {
5824 gimple_stmt_iterator gsi;
5825 basic_block son;
5826 gimple *stmt = last_stmt (bb);
5827 bool cfg_cleanup_needed = false;
5828
5829 if (stmt && !gimple_visited_p (stmt))
5830 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5831
5832 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5833 {
5834 stmt = gsi_stmt (gsi);
5835
5836 if (is_gimple_assign (stmt)
5837 && !stmt_could_throw_p (stmt))
5838 {
5839 tree lhs, rhs1, rhs2;
5840 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5841
5842 /* If this is not a gimple binary expression, there is
5843 nothing for us to do with it. */
5844 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5845 continue;
5846
5847 /* If this was part of an already processed statement,
5848 we don't need to touch it again. */
5849 if (gimple_visited_p (stmt))
5850 {
5851 /* This statement might have become dead because of previous
5852 reassociations. */
5853 if (has_zero_uses (gimple_get_lhs (stmt)))
5854 {
5855 reassoc_remove_stmt (&gsi);
5856 release_defs (stmt);
5857 /* We might end up removing the last stmt above which
5858 places the iterator to the end of the sequence.
5859 Reset it to the last stmt in this case which might
5860 be the end of the sequence as well if we removed
5861 the last statement of the sequence. In which case
5862 we need to bail out. */
5863 if (gsi_end_p (gsi))
5864 {
5865 gsi = gsi_last_bb (bb);
5866 if (gsi_end_p (gsi))
5867 break;
5868 }
5869 }
5870 continue;
5871 }
5872
5873 lhs = gimple_assign_lhs (stmt);
5874 rhs1 = gimple_assign_rhs1 (stmt);
5875 rhs2 = gimple_assign_rhs2 (stmt);
5876
5877 /* For non-bit or min/max operations we can't associate
5878 all types. Verify that here. */
5879 if (rhs_code != BIT_IOR_EXPR
5880 && rhs_code != BIT_AND_EXPR
5881 && rhs_code != BIT_XOR_EXPR
5882 && rhs_code != MIN_EXPR
5883 && rhs_code != MAX_EXPR
5884 && (!can_reassociate_p (lhs)
5885 || !can_reassociate_p (rhs1)
5886 || !can_reassociate_p (rhs2)))
5887 continue;
5888
5889 if (associative_tree_code (rhs_code))
5890 {
5891 auto_vec<operand_entry *> ops;
5892 tree powi_result = NULL_TREE;
5893 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5894
5895 /* There may be no immediate uses left by the time we
5896 get here because we may have eliminated them all. */
5897 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5898 continue;
5899
5900 gimple_set_visited (stmt, true);
5901 linearize_expr_tree (&ops, stmt, true, true);
5902 ops.qsort (sort_by_operand_rank);
5903 int orig_len = ops.length ();
5904 optimize_ops_list (rhs_code, &ops);
5905 if (undistribute_ops_list (rhs_code, &ops,
5906 loop_containing_stmt (stmt)))
5907 {
5908 ops.qsort (sort_by_operand_rank);
5909 optimize_ops_list (rhs_code, &ops);
5910 }
5911
5912 if (rhs_code == PLUS_EXPR
5913 && transform_add_to_multiply (&ops))
5914 ops.qsort (sort_by_operand_rank);
5915
5916 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5917 {
5918 if (is_vector)
5919 optimize_vec_cond_expr (rhs_code, &ops);
5920 else
5921 optimize_range_tests (rhs_code, &ops, NULL);
5922 }
5923
5924 if (rhs_code == MULT_EXPR && !is_vector)
5925 {
5926 attempt_builtin_copysign (&ops);
5927
5928 if (reassoc_insert_powi_p
5929 && flag_unsafe_math_optimizations)
5930 powi_result = attempt_builtin_powi (stmt, &ops);
5931 }
5932
5933 operand_entry *last;
5934 bool negate_result = false;
5935 if (ops.length () > 1
5936 && rhs_code == MULT_EXPR)
5937 {
5938 last = ops.last ();
5939 if ((integer_minus_onep (last->op)
5940 || real_minus_onep (last->op))
5941 && !HONOR_SNANS (TREE_TYPE (lhs))
5942 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5943 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5944 {
5945 ops.pop ();
5946 negate_result = true;
5947 }
5948 }
5949
5950 tree new_lhs = lhs;
5951 /* If the operand vector is now empty, all operands were
5952 consumed by the __builtin_powi optimization. */
5953 if (ops.length () == 0)
5954 transform_stmt_to_copy (&gsi, stmt, powi_result);
5955 else if (ops.length () == 1)
5956 {
5957 tree last_op = ops.last ()->op;
5958
5959 /* If the stmt that defines operand has to be inserted, insert it
5960 before the use. */
5961 if (ops.last ()->stmt_to_insert)
5962 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5963 if (powi_result)
5964 transform_stmt_to_multiply (&gsi, stmt, last_op,
5965 powi_result);
5966 else
5967 transform_stmt_to_copy (&gsi, stmt, last_op);
5968 }
5969 else
5970 {
5971 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5972 int ops_num = ops.length ();
5973 int width = get_reassociation_width (ops_num, rhs_code, mode);
5974
5975 if (dump_file && (dump_flags & TDF_DETAILS))
5976 fprintf (dump_file,
5977 "Width = %d was chosen for reassociation\n", width);
5978
5979
5980 /* For binary bit operations, if there are at least 3
5981 operands and the last last operand in OPS is a constant,
5982 move it to the front. This helps ensure that we generate
5983 (X & Y) & C rather than (X & C) & Y. The former will
5984 often match a canonical bit test when we get to RTL. */
5985 if (ops.length () > 2
5986 && (rhs_code == BIT_AND_EXPR
5987 || rhs_code == BIT_IOR_EXPR
5988 || rhs_code == BIT_XOR_EXPR)
5989 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
5990 std::swap (*ops[0], *ops[ops_num - 1]);
5991
5992 if (width > 1
5993 && ops.length () > 3)
5994 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5995 width, ops);
5996 else
5997 {
5998 /* When there are three operands left, we want
5999 to make sure the ones that get the double
6000 binary op are chosen wisely. */
6001 int len = ops.length ();
6002 if (len >= 3)
6003 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6004
6005 new_lhs = rewrite_expr_tree (stmt, 0, ops,
6006 powi_result != NULL
6007 || negate_result,
6008 len != orig_len);
6009 }
6010
6011 /* If we combined some repeated factors into a
6012 __builtin_powi call, multiply that result by the
6013 reassociated operands. */
6014 if (powi_result)
6015 {
6016 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6017 tree type = TREE_TYPE (lhs);
6018 tree target_ssa = make_temp_ssa_name (type, NULL,
6019 "reassocpow");
6020 gimple_set_lhs (lhs_stmt, target_ssa);
6021 update_stmt (lhs_stmt);
6022 if (lhs != new_lhs)
6023 {
6024 target_ssa = new_lhs;
6025 new_lhs = lhs;
6026 }
6027 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6028 powi_result, target_ssa);
6029 gimple_set_location (mul_stmt, gimple_location (stmt));
6030 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6031 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6032 }
6033 }
6034
6035 if (negate_result)
6036 {
6037 stmt = SSA_NAME_DEF_STMT (lhs);
6038 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6039 gimple_set_lhs (stmt, tmp);
6040 if (lhs != new_lhs)
6041 tmp = new_lhs;
6042 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6043 tmp);
6044 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6045 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6046 update_stmt (stmt);
6047 }
6048 }
6049 }
6050 }
6051 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6052 son;
6053 son = next_dom_son (CDI_POST_DOMINATORS, son))
6054 cfg_cleanup_needed |= reassociate_bb (son);
6055
6056 return cfg_cleanup_needed;
6057 }
6058
6059 /* Add jumps around shifts for range tests turned into bit tests.
6060 For each SSA_NAME VAR we have code like:
6061 VAR = ...; // final stmt of range comparison
6062 // bit test here...;
6063 OTHERVAR = ...; // final stmt of the bit test sequence
6064 RES = VAR | OTHERVAR;
6065 Turn the above into:
6066 VAR = ...;
6067 if (VAR != 0)
6068 goto <l3>;
6069 else
6070 goto <l2>;
6071 <l2>:
6072 // bit test here...;
6073 OTHERVAR = ...;
6074 <l3>:
6075 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6076
6077 static void
branch_fixup(void)6078 branch_fixup (void)
6079 {
6080 tree var;
6081 unsigned int i;
6082
6083 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6084 {
6085 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6086 gimple *use_stmt;
6087 use_operand_p use;
6088 bool ok = single_imm_use (var, &use, &use_stmt);
6089 gcc_assert (ok
6090 && is_gimple_assign (use_stmt)
6091 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6092 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6093
6094 basic_block cond_bb = gimple_bb (def_stmt);
6095 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6096 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6097
6098 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6099 gimple *g = gimple_build_cond (NE_EXPR, var,
6100 build_zero_cst (TREE_TYPE (var)),
6101 NULL_TREE, NULL_TREE);
6102 location_t loc = gimple_location (use_stmt);
6103 gimple_set_location (g, loc);
6104 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6105
6106 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6107 etrue->probability = profile_probability::even ();
6108 edge efalse = find_edge (cond_bb, then_bb);
6109 efalse->flags = EDGE_FALSE_VALUE;
6110 efalse->probability -= etrue->probability;
6111 then_bb->count -= etrue->count ();
6112
6113 tree othervar = NULL_TREE;
6114 if (gimple_assign_rhs1 (use_stmt) == var)
6115 othervar = gimple_assign_rhs2 (use_stmt);
6116 else if (gimple_assign_rhs2 (use_stmt) == var)
6117 othervar = gimple_assign_rhs1 (use_stmt);
6118 else
6119 gcc_unreachable ();
6120 tree lhs = gimple_assign_lhs (use_stmt);
6121 gphi *phi = create_phi_node (lhs, merge_bb);
6122 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6123 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6124 gsi = gsi_for_stmt (use_stmt);
6125 gsi_remove (&gsi, true);
6126
6127 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6128 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6129 }
6130 reassoc_branch_fixups.release ();
6131 }
6132
6133 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6134 void debug_ops_vector (vec<operand_entry *> ops);
6135
6136 /* Dump the operand entry vector OPS to FILE. */
6137
6138 void
dump_ops_vector(FILE * file,vec<operand_entry * > ops)6139 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6140 {
6141 operand_entry *oe;
6142 unsigned int i;
6143
6144 FOR_EACH_VEC_ELT (ops, i, oe)
6145 {
6146 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6147 print_generic_expr (file, oe->op);
6148 fprintf (file, "\n");
6149 }
6150 }
6151
6152 /* Dump the operand entry vector OPS to STDERR. */
6153
6154 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry * > ops)6155 debug_ops_vector (vec<operand_entry *> ops)
6156 {
6157 dump_ops_vector (stderr, ops);
6158 }
6159
6160 /* Bubble up return status from reassociate_bb. */
6161
6162 static bool
do_reassoc(void)6163 do_reassoc (void)
6164 {
6165 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6166 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6167 }
6168
6169 /* Initialize the reassociation pass. */
6170
6171 static void
init_reassoc(void)6172 init_reassoc (void)
6173 {
6174 int i;
6175 long rank = 2;
6176 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6177
6178 /* Find the loops, so that we can prevent moving calculations in
6179 them. */
6180 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6181
6182 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6183
6184 next_operand_entry_id = 0;
6185
6186 /* Reverse RPO (Reverse Post Order) will give us something where
6187 deeper loops come later. */
6188 pre_and_rev_post_order_compute (NULL, bbs, false);
6189 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6190 operand_rank = new hash_map<tree, long>;
6191
6192 /* Give each default definition a distinct rank. This includes
6193 parameters and the static chain. Walk backwards over all
6194 SSA names so that we get proper rank ordering according
6195 to tree_swap_operands_p. */
6196 for (i = num_ssa_names - 1; i > 0; --i)
6197 {
6198 tree name = ssa_name (i);
6199 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6200 insert_operand_rank (name, ++rank);
6201 }
6202
6203 /* Set up rank for each BB */
6204 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6205 bb_rank[bbs[i]] = ++rank << 16;
6206
6207 free (bbs);
6208 calculate_dominance_info (CDI_POST_DOMINATORS);
6209 plus_negates = vNULL;
6210 }
6211
6212 /* Cleanup after the reassociation pass, and print stats if
6213 requested. */
6214
6215 static void
fini_reassoc(void)6216 fini_reassoc (void)
6217 {
6218 statistics_counter_event (cfun, "Linearized",
6219 reassociate_stats.linearized);
6220 statistics_counter_event (cfun, "Constants eliminated",
6221 reassociate_stats.constants_eliminated);
6222 statistics_counter_event (cfun, "Ops eliminated",
6223 reassociate_stats.ops_eliminated);
6224 statistics_counter_event (cfun, "Statements rewritten",
6225 reassociate_stats.rewritten);
6226 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6227 reassociate_stats.pows_encountered);
6228 statistics_counter_event (cfun, "Built-in powi calls created",
6229 reassociate_stats.pows_created);
6230
6231 delete operand_rank;
6232 operand_entry_pool.release ();
6233 free (bb_rank);
6234 plus_negates.release ();
6235 free_dominance_info (CDI_POST_DOMINATORS);
6236 loop_optimizer_finalize ();
6237 }
6238
6239 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6240 insertion of __builtin_powi calls.
6241
6242 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6243 optimization of a gimple conditional. Otherwise returns zero. */
6244
6245 static unsigned int
execute_reassoc(bool insert_powi_p)6246 execute_reassoc (bool insert_powi_p)
6247 {
6248 reassoc_insert_powi_p = insert_powi_p;
6249
6250 init_reassoc ();
6251
6252 bool cfg_cleanup_needed;
6253 cfg_cleanup_needed = do_reassoc ();
6254 repropagate_negates ();
6255 branch_fixup ();
6256
6257 fini_reassoc ();
6258 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6259 }
6260
6261 namespace {
6262
6263 const pass_data pass_data_reassoc =
6264 {
6265 GIMPLE_PASS, /* type */
6266 "reassoc", /* name */
6267 OPTGROUP_NONE, /* optinfo_flags */
6268 TV_TREE_REASSOC, /* tv_id */
6269 ( PROP_cfg | PROP_ssa ), /* properties_required */
6270 0, /* properties_provided */
6271 0, /* properties_destroyed */
6272 0, /* todo_flags_start */
6273 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6274 };
6275
6276 class pass_reassoc : public gimple_opt_pass
6277 {
6278 public:
pass_reassoc(gcc::context * ctxt)6279 pass_reassoc (gcc::context *ctxt)
6280 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6281 {}
6282
6283 /* opt_pass methods: */
clone()6284 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
set_pass_param(unsigned int n,bool param)6285 void set_pass_param (unsigned int n, bool param)
6286 {
6287 gcc_assert (n == 0);
6288 insert_powi_p = param;
6289 }
gate(function *)6290 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
execute(function *)6291 virtual unsigned int execute (function *)
6292 { return execute_reassoc (insert_powi_p); }
6293
6294 private:
6295 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6296 point 3a in the pass header comment. */
6297 bool insert_powi_p;
6298 }; // class pass_reassoc
6299
6300 } // anon namespace
6301
6302 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)6303 make_pass_reassoc (gcc::context *ctxt)
6304 {
6305 return new pass_reassoc (ctxt);
6306 }
6307