1 /* Reassociation for trees.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56
57 /* This is a simple global reassociation pass. It is, in part, based
58 on the LLVM pass of the same name (They do some things more/less
59 than we do, in different orders, etc).
60
61 It consists of five steps:
62
63 1. Breaking up subtract operations into addition + negate, where
64 it would promote the reassociation of adds.
65
66 2. Left linearization of the expression trees, so that (A+B)+(C+D)
67 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
68 During linearization, we place the operands of the binary
69 expressions into a vector of operand_entry_*
70
71 3. Optimization of the operand lists, eliminating things like a +
72 -a, a & a, etc.
73
74 3a. Combine repeated factors with the same occurrence counts
75 into a __builtin_powi call that will later be optimized into
76 an optimal number of multiplies.
77
78 4. Rewrite the expression trees we linearized and optimized so
79 they are in proper rank order.
80
81 5. Repropagate negates, as nothing else will clean it up ATM.
82
83 A bit of theory on #4, since nobody seems to write anything down
84 about why it makes sense to do it the way they do it:
85
86 We could do this much nicer theoretically, but don't (for reasons
87 explained after how to do it theoretically nice :P).
88
89 In order to promote the most redundancy elimination, you want
90 binary expressions whose operands are the same rank (or
91 preferably, the same value) exposed to the redundancy eliminator,
92 for possible elimination.
93
94 So the way to do this if we really cared, is to build the new op
95 tree from the leaves to the roots, merging as you go, and putting the
96 new op on the end of the worklist, until you are left with one
97 thing on the worklist.
98
99 IE if you have to rewrite the following set of operands (listed with
100 rank in parentheses), with opcode PLUS_EXPR:
101
102 a (1), b (1), c (1), d (2), e (2)
103
104
105 We start with our merge worklist empty, and the ops list with all of
106 those on it.
107
108 You want to first merge all leaves of the same rank, as much as
109 possible.
110
111 So first build a binary op of
112
113 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114
115 Because there is no three operand form of PLUS_EXPR, c is not going to
116 be exposed to redundancy elimination as a rank 1 operand.
117
118 So you might as well throw it on the merge worklist (you could also
119 consider it to now be a rank two operand, and merge it with d and e,
120 but in this case, you then have evicted e from a binary op. So at
121 least in this situation, you can't win.)
122
123 Then build a binary op of d + e
124 mergetmp2 = d + e
125
126 and put mergetmp2 on the merge worklist.
127
128 so merge worklist = {mergetmp, c, mergetmp2}
129
130 Continue building binary ops of these operations until you have only
131 one operation left on the worklist.
132
133 So we have
134
135 build binary op
136 mergetmp3 = mergetmp + c
137
138 worklist = {mergetmp2, mergetmp3}
139
140 mergetmp4 = mergetmp2 + mergetmp3
141
142 worklist = {mergetmp4}
143
144 because we have one operation left, we can now just set the original
145 statement equal to the result of that operation.
146
147 This will at least expose a + b and d + e to redundancy elimination
148 as binary operations.
149
150 For extra points, you can reuse the old statements to build the
151 mergetmps, since you shouldn't run out.
152
153 So why don't we do this?
154
155 Because it's expensive, and rarely will help. Most trees we are
156 reassociating have 3 or less ops. If they have 2 ops, they already
157 will be written into a nice single binary op. If you have 3 ops, a
158 single simple check suffices to tell you whether the first two are of the
159 same rank. If so, you know to order it
160
161 mergetmp = op1 + op2
162 newstmt = mergetmp + op3
163
164 instead of
165 mergetmp = op2 + op3
166 newstmt = mergetmp + op1
167
168 If all three are of the same rank, you can't expose them all in a
169 single binary operator anyway, so the above is *still* the best you
170 can do.
171
172 Thus, this is what we do. When we have three ops left, we check to see
173 what order to put them in, and call it a day. As a nod to vector sum
174 reduction, we check if any of the ops are really a phi node that is a
175 destructive update for the associating op, and keep the destructive
176 update together for vector sum reduction recognition. */
177
178 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
179 point 3a in the pass header comment. */
180 static bool reassoc_insert_powi_p;
181
182 /* Statistics */
183 static struct
184 {
185 int linearized;
186 int constants_eliminated;
187 int ops_eliminated;
188 int rewritten;
189 int pows_encountered;
190 int pows_created;
191 } reassociate_stats;
192
193
194 static object_allocator<operand_entry> operand_entry_pool
195 ("operand entry pool");
196
197 /* This is used to assign a unique ID to each struct operand_entry
198 so that qsort results are identical on different hosts. */
199 static unsigned int next_operand_entry_id;
200
201 /* Starting rank number for a given basic block, so that we can rank
202 operations using unmovable instructions in that BB based on the bb
203 depth. */
204 static int64_t *bb_rank;
205
206 /* Operand->rank hashtable. */
207 static hash_map<tree, int64_t> *operand_rank;
208
209 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
210 all basic blocks the CFG should be adjusted - basic blocks
211 split right after that SSA_NAME's definition statement and before
212 the only use, which must be a bit ior. */
213 static vec<tree> reassoc_branch_fixups;
214
215 /* Forward decls. */
216 static int64_t get_rank (tree);
217 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
218
219 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
220 possibly added by gsi_remove. */
221
222 static bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)223 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
224 {
225 gimple *stmt = gsi_stmt (*gsi);
226
227 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
228 return gsi_remove (gsi, true);
229
230 gimple_stmt_iterator prev = *gsi;
231 gsi_prev (&prev);
232 unsigned uid = gimple_uid (stmt);
233 basic_block bb = gimple_bb (stmt);
234 bool ret = gsi_remove (gsi, true);
235 if (!gsi_end_p (prev))
236 gsi_next (&prev);
237 else
238 prev = gsi_start_bb (bb);
239 gimple *end_stmt = gsi_stmt (*gsi);
240 while ((stmt = gsi_stmt (prev)) != end_stmt)
241 {
242 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
243 gimple_set_uid (stmt, uid);
244 gsi_next (&prev);
245 }
246 return ret;
247 }
248
249 /* Bias amount for loop-carried phis. We want this to be larger than
250 the depth of any reassociation tree we can see, but not larger than
251 the rank difference between two blocks. */
252 #define PHI_LOOP_BIAS (1 << 15)
253
254 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
255 an innermost loop, and the phi has only a single use which is inside
256 the loop, then the rank is the block rank of the loop latch plus an
257 extra bias for the loop-carried dependence. This causes expressions
258 calculated into an accumulator variable to be independent for each
259 iteration of the loop. If STMT is some other phi, the rank is the
260 block rank of its containing block. */
261 static int64_t
phi_rank(gimple * stmt)262 phi_rank (gimple *stmt)
263 {
264 basic_block bb = gimple_bb (stmt);
265 class loop *father = bb->loop_father;
266 tree res;
267 unsigned i;
268 use_operand_p use;
269 gimple *use_stmt;
270
271 /* We only care about real loops (those with a latch). */
272 if (!father->latch)
273 return bb_rank[bb->index];
274
275 /* Interesting phis must be in headers of innermost loops. */
276 if (bb != father->header
277 || father->inner)
278 return bb_rank[bb->index];
279
280 /* Ignore virtual SSA_NAMEs. */
281 res = gimple_phi_result (stmt);
282 if (virtual_operand_p (res))
283 return bb_rank[bb->index];
284
285 /* The phi definition must have a single use, and that use must be
286 within the loop. Otherwise this isn't an accumulator pattern. */
287 if (!single_imm_use (res, &use, &use_stmt)
288 || gimple_bb (use_stmt)->loop_father != father)
289 return bb_rank[bb->index];
290
291 /* Look for phi arguments from within the loop. If found, bias this phi. */
292 for (i = 0; i < gimple_phi_num_args (stmt); i++)
293 {
294 tree arg = gimple_phi_arg_def (stmt, i);
295 if (TREE_CODE (arg) == SSA_NAME
296 && !SSA_NAME_IS_DEFAULT_DEF (arg))
297 {
298 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
299 if (gimple_bb (def_stmt)->loop_father == father)
300 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
301 }
302 }
303
304 /* Must be an uninteresting phi. */
305 return bb_rank[bb->index];
306 }
307
308 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
309 loop-carried dependence of an innermost loop, return TRUE; else
310 return FALSE. */
311 static bool
loop_carried_phi(tree exp)312 loop_carried_phi (tree exp)
313 {
314 gimple *phi_stmt;
315 int64_t block_rank;
316
317 if (TREE_CODE (exp) != SSA_NAME
318 || SSA_NAME_IS_DEFAULT_DEF (exp))
319 return false;
320
321 phi_stmt = SSA_NAME_DEF_STMT (exp);
322
323 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
324 return false;
325
326 /* Non-loop-carried phis have block rank. Loop-carried phis have
327 an additional bias added in. If this phi doesn't have block rank,
328 it's biased and should not be propagated. */
329 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
330
331 if (phi_rank (phi_stmt) != block_rank)
332 return true;
333
334 return false;
335 }
336
337 /* Return the maximum of RANK and the rank that should be propagated
338 from expression OP. For most operands, this is just the rank of OP.
339 For loop-carried phis, the value is zero to avoid undoing the bias
340 in favor of the phi. */
341 static int64_t
propagate_rank(int64_t rank,tree op)342 propagate_rank (int64_t rank, tree op)
343 {
344 int64_t op_rank;
345
346 if (loop_carried_phi (op))
347 return rank;
348
349 op_rank = get_rank (op);
350
351 return MAX (rank, op_rank);
352 }
353
354 /* Look up the operand rank structure for expression E. */
355
356 static inline int64_t
find_operand_rank(tree e)357 find_operand_rank (tree e)
358 {
359 int64_t *slot = operand_rank->get (e);
360 return slot ? *slot : -1;
361 }
362
363 /* Insert {E,RANK} into the operand rank hashtable. */
364
365 static inline void
insert_operand_rank(tree e,int64_t rank)366 insert_operand_rank (tree e, int64_t rank)
367 {
368 gcc_assert (rank > 0);
369 gcc_assert (!operand_rank->put (e, rank));
370 }
371
372 /* Given an expression E, return the rank of the expression. */
373
374 static int64_t
get_rank(tree e)375 get_rank (tree e)
376 {
377 /* SSA_NAME's have the rank of the expression they are the result
378 of.
379 For globals and uninitialized values, the rank is 0.
380 For function arguments, use the pre-setup rank.
381 For PHI nodes, stores, asm statements, etc, we use the rank of
382 the BB.
383 For simple operations, the rank is the maximum rank of any of
384 its operands, or the bb_rank, whichever is less.
385 I make no claims that this is optimal, however, it gives good
386 results. */
387
388 /* We make an exception to the normal ranking system to break
389 dependences of accumulator variables in loops. Suppose we
390 have a simple one-block loop containing:
391
392 x_1 = phi(x_0, x_2)
393 b = a + x_1
394 c = b + d
395 x_2 = c + e
396
397 As shown, each iteration of the calculation into x is fully
398 dependent upon the iteration before it. We would prefer to
399 see this in the form:
400
401 x_1 = phi(x_0, x_2)
402 b = a + d
403 c = b + e
404 x_2 = c + x_1
405
406 If the loop is unrolled, the calculations of b and c from
407 different iterations can be interleaved.
408
409 To obtain this result during reassociation, we bias the rank
410 of the phi definition x_1 upward, when it is recognized as an
411 accumulator pattern. The artificial rank causes it to be
412 added last, providing the desired independence. */
413
414 if (TREE_CODE (e) == SSA_NAME)
415 {
416 ssa_op_iter iter;
417 gimple *stmt;
418 int64_t rank;
419 tree op;
420
421 /* If we already have a rank for this expression, use that. */
422 rank = find_operand_rank (e);
423 if (rank != -1)
424 return rank;
425
426 stmt = SSA_NAME_DEF_STMT (e);
427 if (gimple_code (stmt) == GIMPLE_PHI)
428 rank = phi_rank (stmt);
429
430 else if (!is_gimple_assign (stmt))
431 rank = bb_rank[gimple_bb (stmt)->index];
432
433 else
434 {
435 /* Otherwise, find the maximum rank for the operands. As an
436 exception, remove the bias from loop-carried phis when propagating
437 the rank so that dependent operations are not also biased. */
438 /* Simply walk over all SSA uses - this takes advatage of the
439 fact that non-SSA operands are is_gimple_min_invariant and
440 thus have rank 0. */
441 rank = 0;
442 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
443 rank = propagate_rank (rank, op);
444
445 rank += 1;
446 }
447
448 if (dump_file && (dump_flags & TDF_DETAILS))
449 {
450 fprintf (dump_file, "Rank for ");
451 print_generic_expr (dump_file, e);
452 fprintf (dump_file, " is %" PRId64 "\n", rank);
453 }
454
455 /* Note the rank in the hashtable so we don't recompute it. */
456 insert_operand_rank (e, rank);
457 return rank;
458 }
459
460 /* Constants, globals, etc., are rank 0 */
461 return 0;
462 }
463
464
465 /* We want integer ones to end up last no matter what, since they are
466 the ones we can do the most with. */
467 #define INTEGER_CONST_TYPE 1 << 4
468 #define FLOAT_ONE_CONST_TYPE 1 << 3
469 #define FLOAT_CONST_TYPE 1 << 2
470 #define OTHER_CONST_TYPE 1 << 1
471
472 /* Classify an invariant tree into integer, float, or other, so that
473 we can sort them to be near other constants of the same type. */
474 static inline int
constant_type(tree t)475 constant_type (tree t)
476 {
477 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
478 return INTEGER_CONST_TYPE;
479 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
480 {
481 /* Sort -1.0 and 1.0 constants last, while in some cases
482 const_binop can't optimize some inexact operations, multiplication
483 by -1.0 or 1.0 can be always merged with others. */
484 if (real_onep (t) || real_minus_onep (t))
485 return FLOAT_ONE_CONST_TYPE;
486 return FLOAT_CONST_TYPE;
487 }
488 else
489 return OTHER_CONST_TYPE;
490 }
491
492 /* qsort comparison function to sort operand entries PA and PB by rank
493 so that the sorted array is ordered by rank in decreasing order. */
494 static int
sort_by_operand_rank(const void * pa,const void * pb)495 sort_by_operand_rank (const void *pa, const void *pb)
496 {
497 const operand_entry *oea = *(const operand_entry *const *)pa;
498 const operand_entry *oeb = *(const operand_entry *const *)pb;
499
500 if (oeb->rank != oea->rank)
501 return oeb->rank > oea->rank ? 1 : -1;
502
503 /* It's nicer for optimize_expression if constants that are likely
504 to fold when added/multiplied/whatever are put next to each
505 other. Since all constants have rank 0, order them by type. */
506 if (oea->rank == 0)
507 {
508 if (constant_type (oeb->op) != constant_type (oea->op))
509 return constant_type (oea->op) - constant_type (oeb->op);
510 else
511 /* To make sorting result stable, we use unique IDs to determine
512 order. */
513 return oeb->id > oea->id ? 1 : -1;
514 }
515
516 if (TREE_CODE (oea->op) != SSA_NAME)
517 {
518 if (TREE_CODE (oeb->op) != SSA_NAME)
519 return oeb->id > oea->id ? 1 : -1;
520 else
521 return 1;
522 }
523 else if (TREE_CODE (oeb->op) != SSA_NAME)
524 return -1;
525
526 /* Lastly, make sure the versions that are the same go next to each
527 other. */
528 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
529 {
530 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
531 versions of removed SSA_NAMEs, so if possible, prefer to sort
532 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
533 See PR60418. */
534 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
535 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
536 basic_block bba = gimple_bb (stmta);
537 basic_block bbb = gimple_bb (stmtb);
538 if (bbb != bba)
539 {
540 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
541 but the other might not. */
542 if (!bba)
543 return 1;
544 if (!bbb)
545 return -1;
546 /* If neither is, compare bb_rank. */
547 if (bb_rank[bbb->index] != bb_rank[bba->index])
548 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
549 }
550
551 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
552 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
553 if (da != db)
554 return da ? 1 : -1;
555
556 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
557 }
558
559 return oeb->id > oea->id ? 1 : -1;
560 }
561
562 /* Add an operand entry to *OPS for the tree operand OP. */
563
564 static void
565 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
566 {
567 operand_entry *oe = operand_entry_pool.allocate ();
568
569 oe->op = op;
570 oe->rank = get_rank (op);
571 oe->id = next_operand_entry_id++;
572 oe->count = 1;
573 oe->stmt_to_insert = stmt_to_insert;
574 ops->safe_push (oe);
575 }
576
577 /* Add an operand entry to *OPS for the tree operand OP with repeat
578 count REPEAT. */
579
580 static void
add_repeat_to_ops_vec(vec<operand_entry * > * ops,tree op,HOST_WIDE_INT repeat)581 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
582 HOST_WIDE_INT repeat)
583 {
584 operand_entry *oe = operand_entry_pool.allocate ();
585
586 oe->op = op;
587 oe->rank = get_rank (op);
588 oe->id = next_operand_entry_id++;
589 oe->count = repeat;
590 oe->stmt_to_insert = NULL;
591 ops->safe_push (oe);
592
593 reassociate_stats.pows_encountered++;
594 }
595
596 /* Returns true if we can associate the SSA def OP. */
597
598 static bool
can_reassociate_op_p(tree op)599 can_reassociate_op_p (tree op)
600 {
601 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
602 return false;
603 /* Make sure asm goto outputs do not participate in reassociation since
604 we have no way to find an insertion place after asm goto. */
605 if (TREE_CODE (op) == SSA_NAME
606 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
607 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
608 return false;
609 return true;
610 }
611
612 /* Returns true if we can reassociate operations of TYPE.
613 That is for integral or non-saturating fixed-point types, and for
614 floating point type when associative-math is enabled. */
615
616 static bool
can_reassociate_type_p(tree type)617 can_reassociate_type_p (tree type)
618 {
619 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
620 || NON_SAT_FIXED_POINT_TYPE_P (type)
621 || (flag_associative_math && FLOAT_TYPE_P (type)))
622 return true;
623 return false;
624 }
625
626 /* Return true if STMT is reassociable operation containing a binary
627 operation with tree code CODE, and is inside LOOP. */
628
629 static bool
is_reassociable_op(gimple * stmt,enum tree_code code,class loop * loop)630 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
631 {
632 basic_block bb = gimple_bb (stmt);
633
634 if (gimple_bb (stmt) == NULL)
635 return false;
636
637 if (!flow_bb_inside_loop_p (loop, bb))
638 return false;
639
640 if (is_gimple_assign (stmt)
641 && gimple_assign_rhs_code (stmt) == code
642 && has_single_use (gimple_assign_lhs (stmt)))
643 {
644 tree rhs1 = gimple_assign_rhs1 (stmt);
645 tree rhs2 = gimple_assign_rhs2 (stmt);
646 if (!can_reassociate_op_p (rhs1)
647 || (rhs2 && !can_reassociate_op_p (rhs2)))
648 return false;
649 return true;
650 }
651
652 return false;
653 }
654
655
656 /* Return true if STMT is a nop-conversion. */
657
658 static bool
gimple_nop_conversion_p(gimple * stmt)659 gimple_nop_conversion_p (gimple *stmt)
660 {
661 if (gassign *ass = dyn_cast <gassign *> (stmt))
662 {
663 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
664 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
665 TREE_TYPE (gimple_assign_rhs1 (ass))))
666 return true;
667 }
668 return false;
669 }
670
671 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
672 operand of the negate operation. Otherwise, return NULL. */
673
674 static tree
get_unary_op(tree name,enum tree_code opcode)675 get_unary_op (tree name, enum tree_code opcode)
676 {
677 gimple *stmt = SSA_NAME_DEF_STMT (name);
678
679 /* Look through nop conversions (sign changes). */
680 if (gimple_nop_conversion_p (stmt)
681 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
682 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
683
684 if (!is_gimple_assign (stmt))
685 return NULL_TREE;
686
687 if (gimple_assign_rhs_code (stmt) == opcode)
688 return gimple_assign_rhs1 (stmt);
689 return NULL_TREE;
690 }
691
692 /* Return true if OP1 and OP2 have the same value if casted to either type. */
693
694 static bool
ops_equal_values_p(tree op1,tree op2)695 ops_equal_values_p (tree op1, tree op2)
696 {
697 if (op1 == op2)
698 return true;
699
700 tree orig_op1 = op1;
701 if (TREE_CODE (op1) == SSA_NAME)
702 {
703 gimple *stmt = SSA_NAME_DEF_STMT (op1);
704 if (gimple_nop_conversion_p (stmt))
705 {
706 op1 = gimple_assign_rhs1 (stmt);
707 if (op1 == op2)
708 return true;
709 }
710 }
711
712 if (TREE_CODE (op2) == SSA_NAME)
713 {
714 gimple *stmt = SSA_NAME_DEF_STMT (op2);
715 if (gimple_nop_conversion_p (stmt))
716 {
717 op2 = gimple_assign_rhs1 (stmt);
718 if (op1 == op2
719 || orig_op1 == op2)
720 return true;
721 }
722 }
723
724 return false;
725 }
726
727
728 /* If CURR and LAST are a pair of ops that OPCODE allows us to
729 eliminate through equivalences, do so, remove them from OPS, and
730 return true. Otherwise, return false. */
731
732 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry * > * ops,bool * all_done,unsigned int i,operand_entry * curr,operand_entry * last)733 eliminate_duplicate_pair (enum tree_code opcode,
734 vec<operand_entry *> *ops,
735 bool *all_done,
736 unsigned int i,
737 operand_entry *curr,
738 operand_entry *last)
739 {
740
741 /* If we have two of the same op, and the opcode is & |, min, or max,
742 we can eliminate one of them.
743 If we have two of the same op, and the opcode is ^, we can
744 eliminate both of them. */
745
746 if (last && last->op == curr->op)
747 {
748 switch (opcode)
749 {
750 case MAX_EXPR:
751 case MIN_EXPR:
752 case BIT_IOR_EXPR:
753 case BIT_AND_EXPR:
754 if (dump_file && (dump_flags & TDF_DETAILS))
755 {
756 fprintf (dump_file, "Equivalence: ");
757 print_generic_expr (dump_file, curr->op);
758 fprintf (dump_file, " [&|minmax] ");
759 print_generic_expr (dump_file, last->op);
760 fprintf (dump_file, " -> ");
761 print_generic_stmt (dump_file, last->op);
762 }
763
764 ops->ordered_remove (i);
765 reassociate_stats.ops_eliminated ++;
766
767 return true;
768
769 case BIT_XOR_EXPR:
770 if (dump_file && (dump_flags & TDF_DETAILS))
771 {
772 fprintf (dump_file, "Equivalence: ");
773 print_generic_expr (dump_file, curr->op);
774 fprintf (dump_file, " ^ ");
775 print_generic_expr (dump_file, last->op);
776 fprintf (dump_file, " -> nothing\n");
777 }
778
779 reassociate_stats.ops_eliminated += 2;
780
781 if (ops->length () == 2)
782 {
783 ops->truncate (0);
784 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
785 *all_done = true;
786 }
787 else
788 {
789 ops->ordered_remove (i-1);
790 ops->ordered_remove (i-1);
791 }
792
793 return true;
794
795 default:
796 break;
797 }
798 }
799 return false;
800 }
801
802 static vec<tree> plus_negates;
803
804 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
805 expression, look in OPS for a corresponding positive operation to cancel
806 it out. If we find one, remove the other from OPS, replace
807 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
808 return false. */
809
810 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)811 eliminate_plus_minus_pair (enum tree_code opcode,
812 vec<operand_entry *> *ops,
813 unsigned int currindex,
814 operand_entry *curr)
815 {
816 tree negateop;
817 tree notop;
818 unsigned int i;
819 operand_entry *oe;
820
821 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
822 return false;
823
824 negateop = get_unary_op (curr->op, NEGATE_EXPR);
825 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
826 if (negateop == NULL_TREE && notop == NULL_TREE)
827 return false;
828
829 /* Any non-negated version will have a rank that is one less than
830 the current rank. So once we hit those ranks, if we don't find
831 one, we can stop. */
832
833 for (i = currindex + 1;
834 ops->iterate (i, &oe)
835 && oe->rank >= curr->rank - 1 ;
836 i++)
837 {
838 if (negateop
839 && ops_equal_values_p (oe->op, negateop))
840 {
841 if (dump_file && (dump_flags & TDF_DETAILS))
842 {
843 fprintf (dump_file, "Equivalence: ");
844 print_generic_expr (dump_file, negateop);
845 fprintf (dump_file, " + -");
846 print_generic_expr (dump_file, oe->op);
847 fprintf (dump_file, " -> 0\n");
848 }
849
850 ops->ordered_remove (i);
851 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
852 ops->ordered_remove (currindex);
853 reassociate_stats.ops_eliminated ++;
854
855 return true;
856 }
857 else if (notop
858 && ops_equal_values_p (oe->op, notop))
859 {
860 tree op_type = TREE_TYPE (oe->op);
861
862 if (dump_file && (dump_flags & TDF_DETAILS))
863 {
864 fprintf (dump_file, "Equivalence: ");
865 print_generic_expr (dump_file, notop);
866 fprintf (dump_file, " + ~");
867 print_generic_expr (dump_file, oe->op);
868 fprintf (dump_file, " -> -1\n");
869 }
870
871 ops->ordered_remove (i);
872 add_to_ops_vec (ops, build_all_ones_cst (op_type));
873 ops->ordered_remove (currindex);
874 reassociate_stats.ops_eliminated ++;
875
876 return true;
877 }
878 }
879
880 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
881 save it for later inspection in repropagate_negates(). */
882 if (negateop != NULL_TREE
883 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
884 plus_negates.safe_push (curr->op);
885
886 return false;
887 }
888
889 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
890 bitwise not expression, look in OPS for a corresponding operand to
891 cancel it out. If we find one, remove the other from OPS, replace
892 OPS[CURRINDEX] with 0, and return true. Otherwise, return
893 false. */
894
895 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)896 eliminate_not_pairs (enum tree_code opcode,
897 vec<operand_entry *> *ops,
898 unsigned int currindex,
899 operand_entry *curr)
900 {
901 tree notop;
902 unsigned int i;
903 operand_entry *oe;
904
905 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
906 || TREE_CODE (curr->op) != SSA_NAME)
907 return false;
908
909 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
910 if (notop == NULL_TREE)
911 return false;
912
913 /* Any non-not version will have a rank that is one less than
914 the current rank. So once we hit those ranks, if we don't find
915 one, we can stop. */
916
917 for (i = currindex + 1;
918 ops->iterate (i, &oe)
919 && oe->rank >= curr->rank - 1;
920 i++)
921 {
922 if (oe->op == notop)
923 {
924 if (dump_file && (dump_flags & TDF_DETAILS))
925 {
926 fprintf (dump_file, "Equivalence: ");
927 print_generic_expr (dump_file, notop);
928 if (opcode == BIT_AND_EXPR)
929 fprintf (dump_file, " & ~");
930 else if (opcode == BIT_IOR_EXPR)
931 fprintf (dump_file, " | ~");
932 print_generic_expr (dump_file, oe->op);
933 if (opcode == BIT_AND_EXPR)
934 fprintf (dump_file, " -> 0\n");
935 else if (opcode == BIT_IOR_EXPR)
936 fprintf (dump_file, " -> -1\n");
937 }
938
939 if (opcode == BIT_AND_EXPR)
940 oe->op = build_zero_cst (TREE_TYPE (oe->op));
941 else if (opcode == BIT_IOR_EXPR)
942 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
943
944 reassociate_stats.ops_eliminated += ops->length () - 1;
945 ops->truncate (0);
946 ops->quick_push (oe);
947 return true;
948 }
949 }
950
951 return false;
952 }
953
954 /* Use constant value that may be present in OPS to try to eliminate
955 operands. Note that this function is only really used when we've
956 eliminated ops for other reasons, or merged constants. Across
957 single statements, fold already does all of this, plus more. There
958 is little point in duplicating logic, so I've only included the
959 identities that I could ever construct testcases to trigger. */
960
961 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry * > * ops)962 eliminate_using_constants (enum tree_code opcode,
963 vec<operand_entry *> *ops)
964 {
965 operand_entry *oelast = ops->last ();
966 tree type = TREE_TYPE (oelast->op);
967
968 if (oelast->rank == 0
969 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
970 {
971 switch (opcode)
972 {
973 case BIT_AND_EXPR:
974 if (integer_zerop (oelast->op))
975 {
976 if (ops->length () != 1)
977 {
978 if (dump_file && (dump_flags & TDF_DETAILS))
979 fprintf (dump_file, "Found & 0, removing all other ops\n");
980
981 reassociate_stats.ops_eliminated += ops->length () - 1;
982
983 ops->truncate (0);
984 ops->quick_push (oelast);
985 return;
986 }
987 }
988 else if (integer_all_onesp (oelast->op))
989 {
990 if (ops->length () != 1)
991 {
992 if (dump_file && (dump_flags & TDF_DETAILS))
993 fprintf (dump_file, "Found & -1, removing\n");
994 ops->pop ();
995 reassociate_stats.ops_eliminated++;
996 }
997 }
998 break;
999 case BIT_IOR_EXPR:
1000 if (integer_all_onesp (oelast->op))
1001 {
1002 if (ops->length () != 1)
1003 {
1004 if (dump_file && (dump_flags & TDF_DETAILS))
1005 fprintf (dump_file, "Found | -1, removing all other ops\n");
1006
1007 reassociate_stats.ops_eliminated += ops->length () - 1;
1008
1009 ops->truncate (0);
1010 ops->quick_push (oelast);
1011 return;
1012 }
1013 }
1014 else if (integer_zerop (oelast->op))
1015 {
1016 if (ops->length () != 1)
1017 {
1018 if (dump_file && (dump_flags & TDF_DETAILS))
1019 fprintf (dump_file, "Found | 0, removing\n");
1020 ops->pop ();
1021 reassociate_stats.ops_eliminated++;
1022 }
1023 }
1024 break;
1025 case MULT_EXPR:
1026 if (integer_zerop (oelast->op)
1027 || (FLOAT_TYPE_P (type)
1028 && !HONOR_NANS (type)
1029 && !HONOR_SIGNED_ZEROS (type)
1030 && real_zerop (oelast->op)))
1031 {
1032 if (ops->length () != 1)
1033 {
1034 if (dump_file && (dump_flags & TDF_DETAILS))
1035 fprintf (dump_file, "Found * 0, removing all other ops\n");
1036
1037 reassociate_stats.ops_eliminated += ops->length () - 1;
1038 ops->truncate (0);
1039 ops->quick_push (oelast);
1040 return;
1041 }
1042 }
1043 else if (integer_onep (oelast->op)
1044 || (FLOAT_TYPE_P (type)
1045 && !HONOR_SNANS (type)
1046 && real_onep (oelast->op)))
1047 {
1048 if (ops->length () != 1)
1049 {
1050 if (dump_file && (dump_flags & TDF_DETAILS))
1051 fprintf (dump_file, "Found * 1, removing\n");
1052 ops->pop ();
1053 reassociate_stats.ops_eliminated++;
1054 return;
1055 }
1056 }
1057 break;
1058 case BIT_XOR_EXPR:
1059 case PLUS_EXPR:
1060 case MINUS_EXPR:
1061 if (integer_zerop (oelast->op)
1062 || (FLOAT_TYPE_P (type)
1063 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1064 && fold_real_zero_addition_p (type, oelast->op,
1065 opcode == MINUS_EXPR)))
1066 {
1067 if (ops->length () != 1)
1068 {
1069 if (dump_file && (dump_flags & TDF_DETAILS))
1070 fprintf (dump_file, "Found [|^+] 0, removing\n");
1071 ops->pop ();
1072 reassociate_stats.ops_eliminated++;
1073 return;
1074 }
1075 }
1076 break;
1077 default:
1078 break;
1079 }
1080 }
1081 }
1082
1083
1084 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1085 bool, bool);
1086
1087 /* Structure for tracking and counting operands. */
1088 struct oecount {
1089 unsigned int cnt;
1090 unsigned int id;
1091 enum tree_code oecode;
1092 tree op;
1093 };
1094
1095
1096 /* The heap for the oecount hashtable and the sorted list of operands. */
1097 static vec<oecount> cvec;
1098
1099
1100 /* Oecount hashtable helpers. */
1101
1102 struct oecount_hasher : int_hash <int, 0, 1>
1103 {
1104 static inline hashval_t hash (int);
1105 static inline bool equal (int, int);
1106 };
1107
1108 /* Hash function for oecount. */
1109
1110 inline hashval_t
hash(int p)1111 oecount_hasher::hash (int p)
1112 {
1113 const oecount *c = &cvec[p - 42];
1114 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1115 }
1116
1117 /* Comparison function for oecount. */
1118
1119 inline bool
equal(int p1,int p2)1120 oecount_hasher::equal (int p1, int p2)
1121 {
1122 const oecount *c1 = &cvec[p1 - 42];
1123 const oecount *c2 = &cvec[p2 - 42];
1124 return c1->oecode == c2->oecode && c1->op == c2->op;
1125 }
1126
1127 /* Comparison function for qsort sorting oecount elements by count. */
1128
1129 static int
oecount_cmp(const void * p1,const void * p2)1130 oecount_cmp (const void *p1, const void *p2)
1131 {
1132 const oecount *c1 = (const oecount *)p1;
1133 const oecount *c2 = (const oecount *)p2;
1134 if (c1->cnt != c2->cnt)
1135 return c1->cnt > c2->cnt ? 1 : -1;
1136 else
1137 /* If counts are identical, use unique IDs to stabilize qsort. */
1138 return c1->id > c2->id ? 1 : -1;
1139 }
1140
1141 /* Return TRUE iff STMT represents a builtin call that raises OP
1142 to some exponent. */
1143
1144 static bool
stmt_is_power_of_op(gimple * stmt,tree op)1145 stmt_is_power_of_op (gimple *stmt, tree op)
1146 {
1147 if (!is_gimple_call (stmt))
1148 return false;
1149
1150 switch (gimple_call_combined_fn (stmt))
1151 {
1152 CASE_CFN_POW:
1153 CASE_CFN_POWI:
1154 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1155
1156 default:
1157 return false;
1158 }
1159 }
1160
1161 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1162 in place and return the result. Assumes that stmt_is_power_of_op
1163 was previously called for STMT and returned TRUE. */
1164
1165 static HOST_WIDE_INT
decrement_power(gimple * stmt)1166 decrement_power (gimple *stmt)
1167 {
1168 REAL_VALUE_TYPE c, cint;
1169 HOST_WIDE_INT power;
1170 tree arg1;
1171
1172 switch (gimple_call_combined_fn (stmt))
1173 {
1174 CASE_CFN_POW:
1175 arg1 = gimple_call_arg (stmt, 1);
1176 c = TREE_REAL_CST (arg1);
1177 power = real_to_integer (&c) - 1;
1178 real_from_integer (&cint, VOIDmode, power, SIGNED);
1179 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1180 return power;
1181
1182 CASE_CFN_POWI:
1183 arg1 = gimple_call_arg (stmt, 1);
1184 power = TREE_INT_CST_LOW (arg1) - 1;
1185 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1186 return power;
1187
1188 default:
1189 gcc_unreachable ();
1190 }
1191 }
1192
1193 /* Replace SSA defined by STMT and replace all its uses with new
1194 SSA. Also return the new SSA. */
1195
1196 static tree
make_new_ssa_for_def(gimple * stmt,enum tree_code opcode,tree op)1197 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1198 {
1199 gimple *use_stmt;
1200 use_operand_p use;
1201 imm_use_iterator iter;
1202 tree new_lhs, new_debug_lhs = NULL_TREE;
1203 tree lhs = gimple_get_lhs (stmt);
1204
1205 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1206 gimple_set_lhs (stmt, new_lhs);
1207
1208 /* Also need to update GIMPLE_DEBUGs. */
1209 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1210 {
1211 tree repl = new_lhs;
1212 if (is_gimple_debug (use_stmt))
1213 {
1214 if (new_debug_lhs == NULL_TREE)
1215 {
1216 new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1217 gdebug *def_temp
1218 = gimple_build_debug_bind (new_debug_lhs,
1219 build2 (opcode, TREE_TYPE (lhs),
1220 new_lhs, op),
1221 stmt);
1222 DECL_ARTIFICIAL (new_debug_lhs) = 1;
1223 TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1224 SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1225 gimple_set_uid (def_temp, gimple_uid (stmt));
1226 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1227 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1228 }
1229 repl = new_debug_lhs;
1230 }
1231 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1232 SET_USE (use, repl);
1233 update_stmt (use_stmt);
1234 }
1235 return new_lhs;
1236 }
1237
1238 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1239 uses with new SSAs. Also do this for the stmt that defines DEF
1240 if *DEF is not OP. */
1241
1242 static void
make_new_ssa_for_all_defs(tree * def,enum tree_code opcode,tree op,vec<gimple * > & stmts_to_fix)1243 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1244 vec<gimple *> &stmts_to_fix)
1245 {
1246 unsigned i;
1247 gimple *stmt;
1248
1249 if (*def != op
1250 && TREE_CODE (*def) == SSA_NAME
1251 && (stmt = SSA_NAME_DEF_STMT (*def))
1252 && gimple_code (stmt) != GIMPLE_NOP)
1253 *def = make_new_ssa_for_def (stmt, opcode, op);
1254
1255 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1256 make_new_ssa_for_def (stmt, opcode, op);
1257 }
1258
1259 /* Find the single immediate use of STMT's LHS, and replace it
1260 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1261 replace *DEF with OP as well. */
1262
1263 static void
propagate_op_to_single_use(tree op,gimple * stmt,tree * def)1264 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1265 {
1266 tree lhs;
1267 gimple *use_stmt;
1268 use_operand_p use;
1269 gimple_stmt_iterator gsi;
1270
1271 if (is_gimple_call (stmt))
1272 lhs = gimple_call_lhs (stmt);
1273 else
1274 lhs = gimple_assign_lhs (stmt);
1275
1276 gcc_assert (has_single_use (lhs));
1277 single_imm_use (lhs, &use, &use_stmt);
1278 if (lhs == *def)
1279 *def = op;
1280 SET_USE (use, op);
1281 if (TREE_CODE (op) != SSA_NAME)
1282 update_stmt (use_stmt);
1283 gsi = gsi_for_stmt (stmt);
1284 unlink_stmt_vdef (stmt);
1285 reassoc_remove_stmt (&gsi);
1286 release_defs (stmt);
1287 }
1288
1289 /* Walks the linear chain with result *DEF searching for an operation
1290 with operand OP and code OPCODE removing that from the chain. *DEF
1291 is updated if there is only one operand but no operation left. */
1292
1293 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1294 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1295 {
1296 tree orig_def = *def;
1297 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1298 /* PR72835 - Record the stmt chain that has to be updated such that
1299 we dont use the same LHS when the values computed are different. */
1300 auto_vec<gimple *, 64> stmts_to_fix;
1301
1302 do
1303 {
1304 tree name;
1305
1306 if (opcode == MULT_EXPR)
1307 {
1308 if (stmt_is_power_of_op (stmt, op))
1309 {
1310 if (decrement_power (stmt) == 1)
1311 {
1312 if (stmts_to_fix.length () > 0)
1313 stmts_to_fix.pop ();
1314 propagate_op_to_single_use (op, stmt, def);
1315 }
1316 break;
1317 }
1318 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1319 {
1320 if (gimple_assign_rhs1 (stmt) == op)
1321 {
1322 tree cst = build_minus_one_cst (TREE_TYPE (op));
1323 if (stmts_to_fix.length () > 0)
1324 stmts_to_fix.pop ();
1325 propagate_op_to_single_use (cst, stmt, def);
1326 break;
1327 }
1328 else if (integer_minus_onep (op)
1329 || real_minus_onep (op))
1330 {
1331 gimple_assign_set_rhs_code
1332 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1333 break;
1334 }
1335 }
1336 }
1337
1338 name = gimple_assign_rhs1 (stmt);
1339
1340 /* If this is the operation we look for and one of the operands
1341 is ours simply propagate the other operand into the stmts
1342 single use. */
1343 if (gimple_assign_rhs_code (stmt) == opcode
1344 && (name == op
1345 || gimple_assign_rhs2 (stmt) == op))
1346 {
1347 if (name == op)
1348 name = gimple_assign_rhs2 (stmt);
1349 if (stmts_to_fix.length () > 0)
1350 stmts_to_fix.pop ();
1351 propagate_op_to_single_use (name, stmt, def);
1352 break;
1353 }
1354
1355 /* We might have a multiply of two __builtin_pow* calls, and
1356 the operand might be hiding in the rightmost one. Likewise
1357 this can happen for a negate. */
1358 if (opcode == MULT_EXPR
1359 && gimple_assign_rhs_code (stmt) == opcode
1360 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1361 && has_single_use (gimple_assign_rhs2 (stmt)))
1362 {
1363 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1364 if (stmt_is_power_of_op (stmt2, op))
1365 {
1366 if (decrement_power (stmt2) == 1)
1367 propagate_op_to_single_use (op, stmt2, def);
1368 else
1369 stmts_to_fix.safe_push (stmt2);
1370 break;
1371 }
1372 else if (is_gimple_assign (stmt2)
1373 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1374 {
1375 if (gimple_assign_rhs1 (stmt2) == op)
1376 {
1377 tree cst = build_minus_one_cst (TREE_TYPE (op));
1378 propagate_op_to_single_use (cst, stmt2, def);
1379 break;
1380 }
1381 else if (integer_minus_onep (op)
1382 || real_minus_onep (op))
1383 {
1384 stmts_to_fix.safe_push (stmt2);
1385 gimple_assign_set_rhs_code
1386 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1387 break;
1388 }
1389 }
1390 }
1391
1392 /* Continue walking the chain. */
1393 gcc_assert (name != op
1394 && TREE_CODE (name) == SSA_NAME);
1395 stmt = SSA_NAME_DEF_STMT (name);
1396 stmts_to_fix.safe_push (stmt);
1397 }
1398 while (1);
1399
1400 if (stmts_to_fix.length () > 0 || *def == orig_def)
1401 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1402 }
1403
1404 /* Returns true if statement S1 dominates statement S2. Like
1405 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1406
1407 static bool
reassoc_stmt_dominates_stmt_p(gimple * s1,gimple * s2)1408 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1409 {
1410 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1411
1412 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1413 SSA_NAME. Assume it lives at the beginning of function and
1414 thus dominates everything. */
1415 if (!bb1 || s1 == s2)
1416 return true;
1417
1418 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1419 if (!bb2)
1420 return false;
1421
1422 if (bb1 == bb2)
1423 {
1424 /* PHIs in the same basic block are assumed to be
1425 executed all in parallel, if only one stmt is a PHI,
1426 it dominates the other stmt in the same basic block. */
1427 if (gimple_code (s1) == GIMPLE_PHI)
1428 return true;
1429
1430 if (gimple_code (s2) == GIMPLE_PHI)
1431 return false;
1432
1433 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1434
1435 if (gimple_uid (s1) < gimple_uid (s2))
1436 return true;
1437
1438 if (gimple_uid (s1) > gimple_uid (s2))
1439 return false;
1440
1441 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1442 unsigned int uid = gimple_uid (s1);
1443 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1444 {
1445 gimple *s = gsi_stmt (gsi);
1446 if (gimple_uid (s) != uid)
1447 break;
1448 if (s == s2)
1449 return true;
1450 }
1451
1452 return false;
1453 }
1454
1455 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1456 }
1457
1458 /* Insert STMT after INSERT_POINT. */
1459
1460 static void
insert_stmt_after(gimple * stmt,gimple * insert_point)1461 insert_stmt_after (gimple *stmt, gimple *insert_point)
1462 {
1463 gimple_stmt_iterator gsi;
1464 basic_block bb;
1465
1466 if (gimple_code (insert_point) == GIMPLE_PHI)
1467 bb = gimple_bb (insert_point);
1468 else if (!stmt_ends_bb_p (insert_point))
1469 {
1470 gsi = gsi_for_stmt (insert_point);
1471 gimple_set_uid (stmt, gimple_uid (insert_point));
1472 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1473 return;
1474 }
1475 else if (gimple_code (insert_point) == GIMPLE_ASM)
1476 /* We have no idea where to insert - it depends on where the
1477 uses will be placed. */
1478 gcc_unreachable ();
1479 else
1480 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1481 thus if it must end a basic block, it should be a call that can
1482 throw, or some assignment that can throw. If it throws, the LHS
1483 of it will not be initialized though, so only valid places using
1484 the SSA_NAME should be dominated by the fallthru edge. */
1485 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1486 gsi = gsi_after_labels (bb);
1487 if (gsi_end_p (gsi))
1488 {
1489 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1490 gimple_set_uid (stmt,
1491 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1492 }
1493 else
1494 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1495 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1496 }
1497
1498 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1499 the result. Places the statement after the definition of either
1500 OP1 or OP2. Returns the new statement. */
1501
1502 static gimple *
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1503 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1504 {
1505 gimple *op1def = NULL, *op2def = NULL;
1506 gimple_stmt_iterator gsi;
1507 tree op;
1508 gassign *sum;
1509
1510 /* Create the addition statement. */
1511 op = make_ssa_name (type);
1512 sum = gimple_build_assign (op, opcode, op1, op2);
1513
1514 /* Find an insertion place and insert. */
1515 if (TREE_CODE (op1) == SSA_NAME)
1516 op1def = SSA_NAME_DEF_STMT (op1);
1517 if (TREE_CODE (op2) == SSA_NAME)
1518 op2def = SSA_NAME_DEF_STMT (op2);
1519 if ((!op1def || gimple_nop_p (op1def))
1520 && (!op2def || gimple_nop_p (op2def)))
1521 {
1522 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1523 if (gsi_end_p (gsi))
1524 {
1525 gimple_stmt_iterator gsi2
1526 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1527 gimple_set_uid (sum,
1528 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1529 }
1530 else
1531 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1532 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1533 }
1534 else
1535 {
1536 gimple *insert_point;
1537 if ((!op1def || gimple_nop_p (op1def))
1538 || (op2def && !gimple_nop_p (op2def)
1539 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1540 insert_point = op2def;
1541 else
1542 insert_point = op1def;
1543 insert_stmt_after (sum, insert_point);
1544 }
1545 update_stmt (sum);
1546
1547 return sum;
1548 }
1549
1550 /* Perform un-distribution of divisions and multiplications.
1551 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1552 to (A + B) / X for real X.
1553
1554 The algorithm is organized as follows.
1555
1556 - First we walk the addition chain *OPS looking for summands that
1557 are defined by a multiplication or a real division. This results
1558 in the candidates bitmap with relevant indices into *OPS.
1559
1560 - Second we build the chains of multiplications or divisions for
1561 these candidates, counting the number of occurrences of (operand, code)
1562 pairs in all of the candidates chains.
1563
1564 - Third we sort the (operand, code) pairs by number of occurrence and
1565 process them starting with the pair with the most uses.
1566
1567 * For each such pair we walk the candidates again to build a
1568 second candidate bitmap noting all multiplication/division chains
1569 that have at least one occurrence of (operand, code).
1570
1571 * We build an alternate addition chain only covering these
1572 candidates with one (operand, code) operation removed from their
1573 multiplication/division chain.
1574
1575 * The first candidate gets replaced by the alternate addition chain
1576 multiplied/divided by the operand.
1577
1578 * All candidate chains get disabled for further processing and
1579 processing of (operand, code) pairs continues.
1580
1581 The alternate addition chains built are re-processed by the main
1582 reassociation algorithm which allows optimizing a * x * y + b * y * x
1583 to (a + b ) * x * y in one invocation of the reassociation pass. */
1584
1585 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry * > * ops,class loop * loop)1586 undistribute_ops_list (enum tree_code opcode,
1587 vec<operand_entry *> *ops, class loop *loop)
1588 {
1589 unsigned int length = ops->length ();
1590 operand_entry *oe1;
1591 unsigned i, j;
1592 unsigned nr_candidates, nr_candidates2;
1593 sbitmap_iterator sbi0;
1594 vec<operand_entry *> *subops;
1595 bool changed = false;
1596 unsigned int next_oecount_id = 0;
1597
1598 if (length <= 1
1599 || opcode != PLUS_EXPR)
1600 return false;
1601
1602 /* Build a list of candidates to process. */
1603 auto_sbitmap candidates (length);
1604 bitmap_clear (candidates);
1605 nr_candidates = 0;
1606 FOR_EACH_VEC_ELT (*ops, i, oe1)
1607 {
1608 enum tree_code dcode;
1609 gimple *oe1def;
1610
1611 if (TREE_CODE (oe1->op) != SSA_NAME)
1612 continue;
1613 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1614 if (!is_gimple_assign (oe1def))
1615 continue;
1616 dcode = gimple_assign_rhs_code (oe1def);
1617 if ((dcode != MULT_EXPR
1618 && dcode != RDIV_EXPR)
1619 || !is_reassociable_op (oe1def, dcode, loop))
1620 continue;
1621
1622 bitmap_set_bit (candidates, i);
1623 nr_candidates++;
1624 }
1625
1626 if (nr_candidates < 2)
1627 return false;
1628
1629 if (dump_file && (dump_flags & TDF_DETAILS))
1630 {
1631 fprintf (dump_file, "searching for un-distribute opportunities ");
1632 print_generic_expr (dump_file,
1633 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1634 fprintf (dump_file, " %d\n", nr_candidates);
1635 }
1636
1637 /* Build linearized sub-operand lists and the counting table. */
1638 cvec.create (0);
1639
1640 hash_table<oecount_hasher> ctable (15);
1641
1642 /* ??? Macro arguments cannot have multi-argument template types in
1643 them. This typedef is needed to workaround that limitation. */
1644 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1645 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1646 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1647 {
1648 gimple *oedef;
1649 enum tree_code oecode;
1650 unsigned j;
1651
1652 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1653 oecode = gimple_assign_rhs_code (oedef);
1654 linearize_expr_tree (&subops[i], oedef,
1655 associative_tree_code (oecode), false);
1656
1657 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1658 {
1659 oecount c;
1660 int *slot;
1661 int idx;
1662 c.oecode = oecode;
1663 c.cnt = 1;
1664 c.id = next_oecount_id++;
1665 c.op = oe1->op;
1666 cvec.safe_push (c);
1667 idx = cvec.length () + 41;
1668 slot = ctable.find_slot (idx, INSERT);
1669 if (!*slot)
1670 {
1671 *slot = idx;
1672 }
1673 else
1674 {
1675 cvec.pop ();
1676 cvec[*slot - 42].cnt++;
1677 }
1678 }
1679 }
1680
1681 /* Sort the counting table. */
1682 cvec.qsort (oecount_cmp);
1683
1684 if (dump_file && (dump_flags & TDF_DETAILS))
1685 {
1686 oecount *c;
1687 fprintf (dump_file, "Candidates:\n");
1688 FOR_EACH_VEC_ELT (cvec, j, c)
1689 {
1690 fprintf (dump_file, " %u %s: ", c->cnt,
1691 c->oecode == MULT_EXPR
1692 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1693 print_generic_expr (dump_file, c->op);
1694 fprintf (dump_file, "\n");
1695 }
1696 }
1697
1698 /* Process the (operand, code) pairs in order of most occurrence. */
1699 auto_sbitmap candidates2 (length);
1700 while (!cvec.is_empty ())
1701 {
1702 oecount *c = &cvec.last ();
1703 if (c->cnt < 2)
1704 break;
1705
1706 /* Now collect the operands in the outer chain that contain
1707 the common operand in their inner chain. */
1708 bitmap_clear (candidates2);
1709 nr_candidates2 = 0;
1710 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1711 {
1712 gimple *oedef;
1713 enum tree_code oecode;
1714 unsigned j;
1715 tree op = (*ops)[i]->op;
1716
1717 /* If we undistributed in this chain already this may be
1718 a constant. */
1719 if (TREE_CODE (op) != SSA_NAME)
1720 continue;
1721
1722 oedef = SSA_NAME_DEF_STMT (op);
1723 oecode = gimple_assign_rhs_code (oedef);
1724 if (oecode != c->oecode)
1725 continue;
1726
1727 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1728 {
1729 if (oe1->op == c->op)
1730 {
1731 bitmap_set_bit (candidates2, i);
1732 ++nr_candidates2;
1733 break;
1734 }
1735 }
1736 }
1737
1738 if (nr_candidates2 >= 2)
1739 {
1740 operand_entry *oe1, *oe2;
1741 gimple *prod;
1742 int first = bitmap_first_set_bit (candidates2);
1743
1744 /* Build the new addition chain. */
1745 oe1 = (*ops)[first];
1746 if (dump_file && (dump_flags & TDF_DETAILS))
1747 {
1748 fprintf (dump_file, "Building (");
1749 print_generic_expr (dump_file, oe1->op);
1750 }
1751 zero_one_operation (&oe1->op, c->oecode, c->op);
1752 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1753 {
1754 gimple *sum;
1755 oe2 = (*ops)[i];
1756 if (dump_file && (dump_flags & TDF_DETAILS))
1757 {
1758 fprintf (dump_file, " + ");
1759 print_generic_expr (dump_file, oe2->op);
1760 }
1761 zero_one_operation (&oe2->op, c->oecode, c->op);
1762 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1763 oe1->op, oe2->op, opcode);
1764 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1765 oe2->rank = 0;
1766 oe1->op = gimple_get_lhs (sum);
1767 }
1768
1769 /* Apply the multiplication/division. */
1770 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1771 oe1->op, c->op, c->oecode);
1772 if (dump_file && (dump_flags & TDF_DETAILS))
1773 {
1774 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1775 print_generic_expr (dump_file, c->op);
1776 fprintf (dump_file, "\n");
1777 }
1778
1779 /* Record it in the addition chain and disable further
1780 undistribution with this op. */
1781 oe1->op = gimple_assign_lhs (prod);
1782 oe1->rank = get_rank (oe1->op);
1783 subops[first].release ();
1784
1785 changed = true;
1786 }
1787
1788 cvec.pop ();
1789 }
1790
1791 for (i = 0; i < ops->length (); ++i)
1792 subops[i].release ();
1793 free (subops);
1794 cvec.release ();
1795
1796 return changed;
1797 }
1798
1799 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1800 first: element index for each relevant BIT_FIELD_REF.
1801 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1802 typedef std::pair<unsigned, unsigned> v_info_elem;
1803 struct v_info {
1804 tree vec_type;
1805 auto_vec<v_info_elem, 32> vec;
1806 };
1807 typedef v_info *v_info_ptr;
1808
1809 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1810 static int
sort_by_mach_mode(const void * p_i,const void * p_j)1811 sort_by_mach_mode (const void *p_i, const void *p_j)
1812 {
1813 const tree tr1 = *((const tree *) p_i);
1814 const tree tr2 = *((const tree *) p_j);
1815 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1816 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1817 if (mode1 > mode2)
1818 return 1;
1819 else if (mode1 < mode2)
1820 return -1;
1821 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1822 return -1;
1823 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1824 return 1;
1825 return 0;
1826 }
1827
1828 /* Cleanup hash map for VECTOR information. */
1829 static void
cleanup_vinfo_map(hash_map<tree,v_info_ptr> & info_map)1830 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1831 {
1832 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1833 it != info_map.end (); ++it)
1834 {
1835 v_info_ptr info = (*it).second;
1836 delete info;
1837 (*it).second = NULL;
1838 }
1839 }
1840
1841 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1842 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1843 is transformed to
1844 Vs = (V1 + V2 + ... + Vn)
1845 Vs[0] + Vs[1] + ... + Vs[k]
1846
1847 The basic steps are listed below:
1848
1849 1) Check the addition chain *OPS by looking those summands coming from
1850 VECTOR bit_field_ref on VECTOR type. Put the information into
1851 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1852
1853 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1854 continuous, they can cover the whole VECTOR perfectly without any holes.
1855 Obtain one VECTOR list which contain candidates to be transformed.
1856
1857 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1858 candidates with same mode, build the addition statements for them and
1859 generate BIT_FIELD_REFs accordingly.
1860
1861 TODO:
1862 The current implementation requires the whole VECTORs should be fully
1863 covered, but it can be extended to support partial, checking adjacent
1864 but not fill the whole, it may need some cost model to define the
1865 boundary to do or not.
1866 */
1867 static bool
undistribute_bitref_for_vector(enum tree_code opcode,vec<operand_entry * > * ops,struct loop * loop)1868 undistribute_bitref_for_vector (enum tree_code opcode,
1869 vec<operand_entry *> *ops, struct loop *loop)
1870 {
1871 if (ops->length () <= 1)
1872 return false;
1873
1874 if (opcode != PLUS_EXPR
1875 && opcode != MULT_EXPR
1876 && opcode != BIT_XOR_EXPR
1877 && opcode != BIT_IOR_EXPR
1878 && opcode != BIT_AND_EXPR)
1879 return false;
1880
1881 hash_map<tree, v_info_ptr> v_info_map;
1882 operand_entry *oe1;
1883 unsigned i;
1884
1885 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1886 information into map. */
1887 FOR_EACH_VEC_ELT (*ops, i, oe1)
1888 {
1889 enum tree_code dcode;
1890 gimple *oe1def;
1891
1892 if (TREE_CODE (oe1->op) != SSA_NAME)
1893 continue;
1894 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1895 if (!is_gimple_assign (oe1def))
1896 continue;
1897 dcode = gimple_assign_rhs_code (oe1def);
1898 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1899 continue;
1900
1901 tree rhs = gimple_assign_rhs1 (oe1def);
1902 tree vec = TREE_OPERAND (rhs, 0);
1903 tree vec_type = TREE_TYPE (vec);
1904
1905 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1906 continue;
1907
1908 /* Ignore it if target machine can't support this VECTOR type. */
1909 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1910 continue;
1911
1912 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1913 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1914 continue;
1915
1916 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1917 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1918 continue;
1919
1920 /* The type of BIT_FIELD_REF might not be equal to the element type of
1921 the vector. We want to use a vector type with element type the
1922 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1923 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1924 {
1925 machine_mode simd_mode;
1926 unsigned HOST_WIDE_INT size, nunits;
1927 unsigned HOST_WIDE_INT elem_size
1928 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1929 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1930 continue;
1931 if (size <= elem_size || (size % elem_size) != 0)
1932 continue;
1933 nunits = size / elem_size;
1934 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1935 nunits).exists (&simd_mode))
1936 continue;
1937 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1938
1939 /* Ignore it if target machine can't support this VECTOR type. */
1940 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1941 continue;
1942
1943 /* Check const vector type, constrain BIT_FIELD_REF offset and
1944 size. */
1945 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1946 continue;
1947
1948 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1949 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1950 continue;
1951 }
1952
1953 tree elem_type = TREE_TYPE (vec_type);
1954 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1955 if (maybe_ne (bit_field_size (rhs), elem_size))
1956 continue;
1957
1958 unsigned idx;
1959 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1960 continue;
1961
1962 /* Ignore it if target machine can't support this type of VECTOR
1963 operation. */
1964 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1965 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1966 continue;
1967
1968 bool existed;
1969 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1970 if (!existed)
1971 {
1972 info = new v_info;
1973 info->vec_type = vec_type;
1974 }
1975 else if (!types_compatible_p (vec_type, info->vec_type))
1976 continue;
1977 info->vec.safe_push (std::make_pair (idx, i));
1978 }
1979
1980 /* At least two VECTOR to combine. */
1981 if (v_info_map.elements () <= 1)
1982 {
1983 cleanup_vinfo_map (v_info_map);
1984 return false;
1985 }
1986
1987 /* Verify all VECTOR candidates by checking two conditions:
1988 1) sorted offsets are adjacent, no holes.
1989 2) can fill the whole VECTOR perfectly.
1990 And add the valid candidates to a vector for further handling. */
1991 auto_vec<tree> valid_vecs (v_info_map.elements ());
1992 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1993 it != v_info_map.end (); ++it)
1994 {
1995 tree cand_vec = (*it).first;
1996 v_info_ptr cand_info = (*it).second;
1997 unsigned int num_elems
1998 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
1999 if (cand_info->vec.length () != num_elems)
2000 continue;
2001 sbitmap holes = sbitmap_alloc (num_elems);
2002 bitmap_ones (holes);
2003 bool valid = true;
2004 v_info_elem *curr;
2005 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2006 {
2007 if (!bitmap_bit_p (holes, curr->first))
2008 {
2009 valid = false;
2010 break;
2011 }
2012 else
2013 bitmap_clear_bit (holes, curr->first);
2014 }
2015 if (valid && bitmap_empty_p (holes))
2016 valid_vecs.quick_push (cand_vec);
2017 sbitmap_free (holes);
2018 }
2019
2020 /* At least two VECTOR to combine. */
2021 if (valid_vecs.length () <= 1)
2022 {
2023 cleanup_vinfo_map (v_info_map);
2024 return false;
2025 }
2026
2027 valid_vecs.qsort (sort_by_mach_mode);
2028 /* Go through all candidates by machine mode order, query the mode_to_total
2029 to get the total number for each mode and skip the single one. */
2030 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2031 {
2032 tree tvec = valid_vecs[i];
2033 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2034
2035 /* Skip modes with only a single candidate. */
2036 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2037 continue;
2038
2039 unsigned int idx, j;
2040 gimple *sum = NULL;
2041 tree sum_vec = tvec;
2042 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2043 v_info_elem *elem;
2044 tree vec_type = info_ptr->vec_type;
2045
2046 /* Build the sum for all candidates with same mode. */
2047 do
2048 {
2049 sum = build_and_add_sum (vec_type, sum_vec,
2050 valid_vecs[i + 1], opcode);
2051 if (!useless_type_conversion_p (vec_type,
2052 TREE_TYPE (valid_vecs[i + 1])))
2053 {
2054 /* Update the operands only after build_and_add_sum,
2055 so that we don't have to repeat the placement algorithm
2056 of build_and_add_sum. */
2057 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2058 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2059 valid_vecs[i + 1]);
2060 tree lhs = make_ssa_name (vec_type);
2061 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2062 gimple_set_uid (g, gimple_uid (sum));
2063 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2064 gimple_assign_set_rhs2 (sum, lhs);
2065 if (sum_vec == tvec)
2066 {
2067 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2068 lhs = make_ssa_name (vec_type);
2069 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2070 gimple_set_uid (g, gimple_uid (sum));
2071 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2072 gimple_assign_set_rhs1 (sum, lhs);
2073 }
2074 update_stmt (sum);
2075 }
2076 sum_vec = gimple_get_lhs (sum);
2077 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2078 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2079 /* Update those related ops of current candidate VECTOR. */
2080 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2081 {
2082 idx = elem->second;
2083 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2084 /* Set this then op definition will get DCEd later. */
2085 gimple_set_visited (def, true);
2086 if (opcode == PLUS_EXPR
2087 || opcode == BIT_XOR_EXPR
2088 || opcode == BIT_IOR_EXPR)
2089 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2090 else if (opcode == MULT_EXPR)
2091 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2092 else
2093 {
2094 gcc_assert (opcode == BIT_AND_EXPR);
2095 (*ops)[idx]->op
2096 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2097 }
2098 (*ops)[idx]->rank = 0;
2099 }
2100 if (dump_file && (dump_flags & TDF_DETAILS))
2101 {
2102 fprintf (dump_file, "Generating addition -> ");
2103 print_gimple_stmt (dump_file, sum, 0);
2104 }
2105 i++;
2106 }
2107 while ((i < valid_vecs.length () - 1)
2108 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2109
2110 /* Referring to first valid VECTOR with this mode, generate the
2111 BIT_FIELD_REF statements accordingly. */
2112 info_ptr = *(v_info_map.get (tvec));
2113 gcc_assert (sum);
2114 tree elem_type = TREE_TYPE (vec_type);
2115 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2116 {
2117 idx = elem->second;
2118 tree dst = make_ssa_name (elem_type);
2119 tree pos = bitsize_int (elem->first
2120 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2121 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2122 TYPE_SIZE (elem_type), pos);
2123 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2124 insert_stmt_after (gs, sum);
2125 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2126 /* Set this then op definition will get DCEd later. */
2127 gimple_set_visited (def, true);
2128 (*ops)[idx]->op = gimple_assign_lhs (gs);
2129 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2130 if (dump_file && (dump_flags & TDF_DETAILS))
2131 {
2132 fprintf (dump_file, "Generating bit_field_ref -> ");
2133 print_gimple_stmt (dump_file, gs, 0);
2134 }
2135 }
2136 }
2137
2138 if (dump_file && (dump_flags & TDF_DETAILS))
2139 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2140
2141 cleanup_vinfo_map (v_info_map);
2142
2143 return true;
2144 }
2145
2146 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2147 expression, examine the other OPS to see if any of them are comparisons
2148 of the same values, which we may be able to combine or eliminate.
2149 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2150
2151 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)2152 eliminate_redundant_comparison (enum tree_code opcode,
2153 vec<operand_entry *> *ops,
2154 unsigned int currindex,
2155 operand_entry *curr)
2156 {
2157 tree op1, op2;
2158 enum tree_code lcode, rcode;
2159 gimple *def1, *def2;
2160 int i;
2161 operand_entry *oe;
2162
2163 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2164 return false;
2165
2166 /* Check that CURR is a comparison. */
2167 if (TREE_CODE (curr->op) != SSA_NAME)
2168 return false;
2169 def1 = SSA_NAME_DEF_STMT (curr->op);
2170 if (!is_gimple_assign (def1))
2171 return false;
2172 lcode = gimple_assign_rhs_code (def1);
2173 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2174 return false;
2175 op1 = gimple_assign_rhs1 (def1);
2176 op2 = gimple_assign_rhs2 (def1);
2177
2178 /* Now look for a similar comparison in the remaining OPS. */
2179 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2180 {
2181 tree t;
2182
2183 if (TREE_CODE (oe->op) != SSA_NAME)
2184 continue;
2185 def2 = SSA_NAME_DEF_STMT (oe->op);
2186 if (!is_gimple_assign (def2))
2187 continue;
2188 rcode = gimple_assign_rhs_code (def2);
2189 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2190 continue;
2191
2192 /* If we got here, we have a match. See if we can combine the
2193 two comparisons. */
2194 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2195 if (opcode == BIT_IOR_EXPR)
2196 t = maybe_fold_or_comparisons (type,
2197 lcode, op1, op2,
2198 rcode, gimple_assign_rhs1 (def2),
2199 gimple_assign_rhs2 (def2));
2200 else
2201 t = maybe_fold_and_comparisons (type,
2202 lcode, op1, op2,
2203 rcode, gimple_assign_rhs1 (def2),
2204 gimple_assign_rhs2 (def2));
2205 if (!t)
2206 continue;
2207
2208 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2209 always give us a boolean_type_node value back. If the original
2210 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2211 we need to convert. */
2212 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2213 t = fold_convert (TREE_TYPE (curr->op), t);
2214
2215 if (TREE_CODE (t) != INTEGER_CST
2216 && !operand_equal_p (t, curr->op, 0))
2217 {
2218 enum tree_code subcode;
2219 tree newop1, newop2;
2220 if (!COMPARISON_CLASS_P (t))
2221 continue;
2222 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2223 STRIP_USELESS_TYPE_CONVERSION (newop1);
2224 STRIP_USELESS_TYPE_CONVERSION (newop2);
2225 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2226 continue;
2227 }
2228
2229 if (dump_file && (dump_flags & TDF_DETAILS))
2230 {
2231 fprintf (dump_file, "Equivalence: ");
2232 print_generic_expr (dump_file, curr->op);
2233 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2234 print_generic_expr (dump_file, oe->op);
2235 fprintf (dump_file, " -> ");
2236 print_generic_expr (dump_file, t);
2237 fprintf (dump_file, "\n");
2238 }
2239
2240 /* Now we can delete oe, as it has been subsumed by the new combined
2241 expression t. */
2242 ops->ordered_remove (i);
2243 reassociate_stats.ops_eliminated ++;
2244
2245 /* If t is the same as curr->op, we're done. Otherwise we must
2246 replace curr->op with t. Special case is if we got a constant
2247 back, in which case we add it to the end instead of in place of
2248 the current entry. */
2249 if (TREE_CODE (t) == INTEGER_CST)
2250 {
2251 ops->ordered_remove (currindex);
2252 add_to_ops_vec (ops, t);
2253 }
2254 else if (!operand_equal_p (t, curr->op, 0))
2255 {
2256 gimple *sum;
2257 enum tree_code subcode;
2258 tree newop1;
2259 tree newop2;
2260 gcc_assert (COMPARISON_CLASS_P (t));
2261 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2262 STRIP_USELESS_TYPE_CONVERSION (newop1);
2263 STRIP_USELESS_TYPE_CONVERSION (newop2);
2264 gcc_checking_assert (is_gimple_val (newop1)
2265 && is_gimple_val (newop2));
2266 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2267 curr->op = gimple_get_lhs (sum);
2268 }
2269 return true;
2270 }
2271
2272 return false;
2273 }
2274
2275
2276 /* Transform repeated addition of same values into multiply with
2277 constant. */
2278 static bool
transform_add_to_multiply(vec<operand_entry * > * ops)2279 transform_add_to_multiply (vec<operand_entry *> *ops)
2280 {
2281 operand_entry *oe;
2282 tree op = NULL_TREE;
2283 int j;
2284 int i, start = -1, end = 0, count = 0;
2285 auto_vec<std::pair <int, int> > indxs;
2286 bool changed = false;
2287
2288 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2289 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2290 || !flag_unsafe_math_optimizations))
2291 return false;
2292
2293 /* Look for repeated operands. */
2294 FOR_EACH_VEC_ELT (*ops, i, oe)
2295 {
2296 if (start == -1)
2297 {
2298 count = 1;
2299 op = oe->op;
2300 start = i;
2301 }
2302 else if (operand_equal_p (oe->op, op, 0))
2303 {
2304 count++;
2305 end = i;
2306 }
2307 else
2308 {
2309 if (count > 1)
2310 indxs.safe_push (std::make_pair (start, end));
2311 count = 1;
2312 op = oe->op;
2313 start = i;
2314 }
2315 }
2316
2317 if (count > 1)
2318 indxs.safe_push (std::make_pair (start, end));
2319
2320 for (j = indxs.length () - 1; j >= 0; --j)
2321 {
2322 /* Convert repeated operand addition to multiplication. */
2323 start = indxs[j].first;
2324 end = indxs[j].second;
2325 op = (*ops)[start]->op;
2326 count = end - start + 1;
2327 for (i = end; i >= start; --i)
2328 ops->unordered_remove (i);
2329 tree tmp = make_ssa_name (TREE_TYPE (op));
2330 tree cst = build_int_cst (integer_type_node, count);
2331 gassign *mul_stmt
2332 = gimple_build_assign (tmp, MULT_EXPR,
2333 op, fold_convert (TREE_TYPE (op), cst));
2334 gimple_set_visited (mul_stmt, true);
2335 add_to_ops_vec (ops, tmp, mul_stmt);
2336 changed = true;
2337 }
2338
2339 return changed;
2340 }
2341
2342
2343 /* Perform various identities and other optimizations on the list of
2344 operand entries, stored in OPS. The tree code for the binary
2345 operation between all the operands is OPCODE. */
2346
2347 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry * > * ops)2348 optimize_ops_list (enum tree_code opcode,
2349 vec<operand_entry *> *ops)
2350 {
2351 unsigned int length = ops->length ();
2352 unsigned int i;
2353 operand_entry *oe;
2354 operand_entry *oelast = NULL;
2355 bool iterate = false;
2356
2357 if (length == 1)
2358 return;
2359
2360 oelast = ops->last ();
2361
2362 /* If the last two are constants, pop the constants off, merge them
2363 and try the next two. */
2364 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2365 {
2366 operand_entry *oelm1 = (*ops)[length - 2];
2367
2368 if (oelm1->rank == 0
2369 && is_gimple_min_invariant (oelm1->op)
2370 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2371 TREE_TYPE (oelast->op)))
2372 {
2373 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2374 oelm1->op, oelast->op);
2375
2376 if (folded && is_gimple_min_invariant (folded))
2377 {
2378 if (dump_file && (dump_flags & TDF_DETAILS))
2379 fprintf (dump_file, "Merging constants\n");
2380
2381 ops->pop ();
2382 ops->pop ();
2383
2384 add_to_ops_vec (ops, folded);
2385 reassociate_stats.constants_eliminated++;
2386
2387 optimize_ops_list (opcode, ops);
2388 return;
2389 }
2390 }
2391 }
2392
2393 eliminate_using_constants (opcode, ops);
2394 oelast = NULL;
2395
2396 for (i = 0; ops->iterate (i, &oe);)
2397 {
2398 bool done = false;
2399
2400 if (eliminate_not_pairs (opcode, ops, i, oe))
2401 return;
2402 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2403 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2404 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2405 {
2406 if (done)
2407 return;
2408 iterate = true;
2409 oelast = NULL;
2410 continue;
2411 }
2412 oelast = oe;
2413 i++;
2414 }
2415
2416 if (iterate)
2417 optimize_ops_list (opcode, ops);
2418 }
2419
2420 /* The following functions are subroutines to optimize_range_tests and allow
2421 it to try to change a logical combination of comparisons into a range
2422 test.
2423
2424 For example, both
2425 X == 2 || X == 5 || X == 3 || X == 4
2426 and
2427 X >= 2 && X <= 5
2428 are converted to
2429 (unsigned) (X - 2) <= 3
2430
2431 For more information see comments above fold_test_range in fold-const.c,
2432 this implementation is for GIMPLE. */
2433
2434
2435
2436 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2437
2438 void
dump_range_entry(FILE * file,struct range_entry * r,bool skip_exp)2439 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2440 {
2441 if (!skip_exp)
2442 print_generic_expr (file, r->exp);
2443 fprintf (file, " %c[", r->in_p ? '+' : '-');
2444 print_generic_expr (file, r->low);
2445 fputs (", ", file);
2446 print_generic_expr (file, r->high);
2447 fputc (']', file);
2448 }
2449
2450 /* Dump the range entry R to STDERR. */
2451
2452 DEBUG_FUNCTION void
debug_range_entry(struct range_entry * r)2453 debug_range_entry (struct range_entry *r)
2454 {
2455 dump_range_entry (stderr, r, false);
2456 fputc ('\n', stderr);
2457 }
2458
2459 /* This is similar to make_range in fold-const.c, but on top of
2460 GIMPLE instead of trees. If EXP is non-NULL, it should be
2461 an SSA_NAME and STMT argument is ignored, otherwise STMT
2462 argument should be a GIMPLE_COND. */
2463
2464 void
init_range_entry(struct range_entry * r,tree exp,gimple * stmt)2465 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2466 {
2467 int in_p;
2468 tree low, high;
2469 bool is_bool, strict_overflow_p;
2470
2471 r->exp = NULL_TREE;
2472 r->in_p = false;
2473 r->strict_overflow_p = false;
2474 r->low = NULL_TREE;
2475 r->high = NULL_TREE;
2476 if (exp != NULL_TREE
2477 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2478 return;
2479
2480 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2481 and see if we can refine the range. Some of the cases below may not
2482 happen, but it doesn't seem worth worrying about this. We "continue"
2483 the outer loop when we've changed something; otherwise we "break"
2484 the switch, which will "break" the while. */
2485 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2486 high = low;
2487 in_p = 0;
2488 strict_overflow_p = false;
2489 is_bool = false;
2490 if (exp == NULL_TREE)
2491 is_bool = true;
2492 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2493 {
2494 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2495 is_bool = true;
2496 else
2497 return;
2498 }
2499 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2500 is_bool = true;
2501
2502 while (1)
2503 {
2504 enum tree_code code;
2505 tree arg0, arg1, exp_type;
2506 tree nexp;
2507 location_t loc;
2508
2509 if (exp != NULL_TREE)
2510 {
2511 if (TREE_CODE (exp) != SSA_NAME
2512 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2513 break;
2514
2515 stmt = SSA_NAME_DEF_STMT (exp);
2516 if (!is_gimple_assign (stmt))
2517 break;
2518
2519 code = gimple_assign_rhs_code (stmt);
2520 arg0 = gimple_assign_rhs1 (stmt);
2521 arg1 = gimple_assign_rhs2 (stmt);
2522 exp_type = TREE_TYPE (exp);
2523 }
2524 else
2525 {
2526 code = gimple_cond_code (stmt);
2527 arg0 = gimple_cond_lhs (stmt);
2528 arg1 = gimple_cond_rhs (stmt);
2529 exp_type = boolean_type_node;
2530 }
2531
2532 if (TREE_CODE (arg0) != SSA_NAME
2533 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2534 break;
2535 loc = gimple_location (stmt);
2536 switch (code)
2537 {
2538 case BIT_NOT_EXPR:
2539 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2540 /* Ensure the range is either +[-,0], +[0,0],
2541 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2542 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2543 or similar expression of unconditional true or
2544 false, it should not be negated. */
2545 && ((high && integer_zerop (high))
2546 || (low && integer_onep (low))))
2547 {
2548 in_p = !in_p;
2549 exp = arg0;
2550 continue;
2551 }
2552 break;
2553 case SSA_NAME:
2554 exp = arg0;
2555 continue;
2556 CASE_CONVERT:
2557 if (is_bool)
2558 {
2559 if ((TYPE_PRECISION (exp_type) == 1
2560 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2561 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2562 return;
2563 }
2564 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2565 {
2566 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2567 is_bool = true;
2568 else
2569 return;
2570 }
2571 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2572 is_bool = true;
2573 goto do_default;
2574 case EQ_EXPR:
2575 case NE_EXPR:
2576 case LT_EXPR:
2577 case LE_EXPR:
2578 case GE_EXPR:
2579 case GT_EXPR:
2580 is_bool = true;
2581 /* FALLTHRU */
2582 default:
2583 if (!is_bool)
2584 return;
2585 do_default:
2586 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2587 &low, &high, &in_p,
2588 &strict_overflow_p);
2589 if (nexp != NULL_TREE)
2590 {
2591 exp = nexp;
2592 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2593 continue;
2594 }
2595 break;
2596 }
2597 break;
2598 }
2599 if (is_bool)
2600 {
2601 r->exp = exp;
2602 r->in_p = in_p;
2603 r->low = low;
2604 r->high = high;
2605 r->strict_overflow_p = strict_overflow_p;
2606 }
2607 }
2608
2609 /* Comparison function for qsort. Sort entries
2610 without SSA_NAME exp first, then with SSA_NAMEs sorted
2611 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2612 by increasing ->low and if ->low is the same, by increasing
2613 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2614 maximum. */
2615
2616 static int
range_entry_cmp(const void * a,const void * b)2617 range_entry_cmp (const void *a, const void *b)
2618 {
2619 const struct range_entry *p = (const struct range_entry *) a;
2620 const struct range_entry *q = (const struct range_entry *) b;
2621
2622 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2623 {
2624 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2625 {
2626 /* Group range_entries for the same SSA_NAME together. */
2627 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2628 return -1;
2629 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2630 return 1;
2631 /* If ->low is different, NULL low goes first, then by
2632 ascending low. */
2633 if (p->low != NULL_TREE)
2634 {
2635 if (q->low != NULL_TREE)
2636 {
2637 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2638 p->low, q->low);
2639 if (tem && integer_onep (tem))
2640 return -1;
2641 tem = fold_binary (GT_EXPR, boolean_type_node,
2642 p->low, q->low);
2643 if (tem && integer_onep (tem))
2644 return 1;
2645 }
2646 else
2647 return 1;
2648 }
2649 else if (q->low != NULL_TREE)
2650 return -1;
2651 /* If ->high is different, NULL high goes last, before that by
2652 ascending high. */
2653 if (p->high != NULL_TREE)
2654 {
2655 if (q->high != NULL_TREE)
2656 {
2657 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2658 p->high, q->high);
2659 if (tem && integer_onep (tem))
2660 return -1;
2661 tem = fold_binary (GT_EXPR, boolean_type_node,
2662 p->high, q->high);
2663 if (tem && integer_onep (tem))
2664 return 1;
2665 }
2666 else
2667 return -1;
2668 }
2669 else if (q->high != NULL_TREE)
2670 return 1;
2671 /* If both ranges are the same, sort below by ascending idx. */
2672 }
2673 else
2674 return 1;
2675 }
2676 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2677 return -1;
2678
2679 if (p->idx < q->idx)
2680 return -1;
2681 else
2682 {
2683 gcc_checking_assert (p->idx > q->idx);
2684 return 1;
2685 }
2686 }
2687
2688 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2689 insert needed statements BEFORE or after GSI. */
2690
2691 static tree
force_into_ssa_name(gimple_stmt_iterator * gsi,tree expr,bool before)2692 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2693 {
2694 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2695 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2696 if (TREE_CODE (ret) != SSA_NAME)
2697 {
2698 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2699 if (before)
2700 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2701 else
2702 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2703 ret = gimple_assign_lhs (g);
2704 }
2705 return ret;
2706 }
2707
2708 /* Helper routine of optimize_range_test.
2709 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2710 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2711 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2712 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2713 an array of COUNT pointers to other ranges. Return
2714 true if the range merge has been successful.
2715 If OPCODE is ERROR_MARK, this is called from within
2716 maybe_optimize_range_tests and is performing inter-bb range optimization.
2717 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2718 oe->rank. */
2719
2720 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,struct range_entry ** otherrangep,unsigned int count,enum tree_code opcode,vec<operand_entry * > * ops,tree exp,gimple_seq seq,bool in_p,tree low,tree high,bool strict_overflow_p)2721 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2722 struct range_entry **otherrangep,
2723 unsigned int count, enum tree_code opcode,
2724 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2725 bool in_p, tree low, tree high, bool strict_overflow_p)
2726 {
2727 operand_entry *oe = (*ops)[range->idx];
2728 tree op = oe->op;
2729 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2730 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2731 location_t loc = gimple_location (stmt);
2732 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2733 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2734 in_p, low, high);
2735 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2736 gimple_stmt_iterator gsi;
2737 unsigned int i, uid;
2738
2739 if (tem == NULL_TREE)
2740 return false;
2741
2742 /* If op is default def SSA_NAME, there is no place to insert the
2743 new comparison. Give up, unless we can use OP itself as the
2744 range test. */
2745 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2746 {
2747 if (op == range->exp
2748 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2749 || TREE_CODE (optype) == BOOLEAN_TYPE)
2750 && (op == tem
2751 || (TREE_CODE (tem) == EQ_EXPR
2752 && TREE_OPERAND (tem, 0) == op
2753 && integer_onep (TREE_OPERAND (tem, 1))))
2754 && opcode != BIT_IOR_EXPR
2755 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2756 {
2757 stmt = NULL;
2758 tem = op;
2759 }
2760 else
2761 return false;
2762 }
2763
2764 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2765 warning_at (loc, OPT_Wstrict_overflow,
2766 "assuming signed overflow does not occur "
2767 "when simplifying range test");
2768
2769 if (dump_file && (dump_flags & TDF_DETAILS))
2770 {
2771 struct range_entry *r;
2772 fprintf (dump_file, "Optimizing range tests ");
2773 dump_range_entry (dump_file, range, false);
2774 for (i = 0; i < count; i++)
2775 {
2776 if (otherrange)
2777 r = otherrange + i;
2778 else
2779 r = otherrangep[i];
2780 if (r->exp
2781 && r->exp != range->exp
2782 && TREE_CODE (r->exp) == SSA_NAME)
2783 {
2784 fprintf (dump_file, " and ");
2785 dump_range_entry (dump_file, r, false);
2786 }
2787 else
2788 {
2789 fprintf (dump_file, " and");
2790 dump_range_entry (dump_file, r, true);
2791 }
2792 }
2793 fprintf (dump_file, "\n into ");
2794 print_generic_expr (dump_file, tem);
2795 fprintf (dump_file, "\n");
2796 }
2797
2798 if (opcode == BIT_IOR_EXPR
2799 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2800 tem = invert_truthvalue_loc (loc, tem);
2801
2802 tem = fold_convert_loc (loc, optype, tem);
2803 if (stmt)
2804 {
2805 gsi = gsi_for_stmt (stmt);
2806 uid = gimple_uid (stmt);
2807 }
2808 else
2809 {
2810 gsi = gsi_none ();
2811 uid = 0;
2812 }
2813 if (stmt == NULL)
2814 gcc_checking_assert (tem == op);
2815 /* In rare cases range->exp can be equal to lhs of stmt.
2816 In that case we have to insert after the stmt rather then before
2817 it. If stmt is a PHI, insert it at the start of the basic block. */
2818 else if (op != range->exp)
2819 {
2820 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2821 tem = force_into_ssa_name (&gsi, tem, true);
2822 gsi_prev (&gsi);
2823 }
2824 else if (gimple_code (stmt) != GIMPLE_PHI)
2825 {
2826 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2827 tem = force_into_ssa_name (&gsi, tem, false);
2828 }
2829 else
2830 {
2831 gsi = gsi_after_labels (gimple_bb (stmt));
2832 if (!gsi_end_p (gsi))
2833 uid = gimple_uid (gsi_stmt (gsi));
2834 else
2835 {
2836 gsi = gsi_start_bb (gimple_bb (stmt));
2837 uid = 1;
2838 while (!gsi_end_p (gsi))
2839 {
2840 uid = gimple_uid (gsi_stmt (gsi));
2841 gsi_next (&gsi);
2842 }
2843 }
2844 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2845 tem = force_into_ssa_name (&gsi, tem, true);
2846 if (gsi_end_p (gsi))
2847 gsi = gsi_last_bb (gimple_bb (stmt));
2848 else
2849 gsi_prev (&gsi);
2850 }
2851 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2852 if (gimple_uid (gsi_stmt (gsi)))
2853 break;
2854 else
2855 gimple_set_uid (gsi_stmt (gsi), uid);
2856
2857 oe->op = tem;
2858 range->exp = exp;
2859 range->low = low;
2860 range->high = high;
2861 range->in_p = in_p;
2862 range->strict_overflow_p = false;
2863
2864 for (i = 0; i < count; i++)
2865 {
2866 if (otherrange)
2867 range = otherrange + i;
2868 else
2869 range = otherrangep[i];
2870 oe = (*ops)[range->idx];
2871 /* Now change all the other range test immediate uses, so that
2872 those tests will be optimized away. */
2873 if (opcode == ERROR_MARK)
2874 {
2875 if (oe->op)
2876 oe->op = build_int_cst (TREE_TYPE (oe->op),
2877 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2878 else
2879 oe->op = (oe->rank == BIT_IOR_EXPR
2880 ? boolean_false_node : boolean_true_node);
2881 }
2882 else
2883 oe->op = error_mark_node;
2884 range->exp = NULL_TREE;
2885 range->low = NULL_TREE;
2886 range->high = NULL_TREE;
2887 }
2888 return true;
2889 }
2890
2891 /* Optimize X == CST1 || X == CST2
2892 if popcount (CST1 ^ CST2) == 1 into
2893 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2894 Similarly for ranges. E.g.
2895 X != 2 && X != 3 && X != 10 && X != 11
2896 will be transformed by the previous optimization into
2897 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2898 and this loop can transform that into
2899 !(((X & ~8) - 2U) <= 1U). */
2900
2901 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2902 optimize_range_tests_xor (enum tree_code opcode, tree type,
2903 tree lowi, tree lowj, tree highi, tree highj,
2904 vec<operand_entry *> *ops,
2905 struct range_entry *rangei,
2906 struct range_entry *rangej)
2907 {
2908 tree lowxor, highxor, tem, exp;
2909 /* Check lowi ^ lowj == highi ^ highj and
2910 popcount (lowi ^ lowj) == 1. */
2911 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2912 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2913 return false;
2914 if (!integer_pow2p (lowxor))
2915 return false;
2916 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2917 if (!tree_int_cst_equal (lowxor, highxor))
2918 return false;
2919
2920 exp = rangei->exp;
2921 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2922 int prec = GET_MODE_PRECISION (mode);
2923 if (TYPE_PRECISION (type) < prec
2924 || (wi::to_wide (TYPE_MIN_VALUE (type))
2925 != wi::min_value (prec, TYPE_SIGN (type)))
2926 || (wi::to_wide (TYPE_MAX_VALUE (type))
2927 != wi::max_value (prec, TYPE_SIGN (type))))
2928 {
2929 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2930 exp = fold_convert (type, exp);
2931 lowxor = fold_convert (type, lowxor);
2932 lowi = fold_convert (type, lowi);
2933 highi = fold_convert (type, highi);
2934 }
2935 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2936 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2937 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2938 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2939 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2940 NULL, rangei->in_p, lowj, highj,
2941 rangei->strict_overflow_p
2942 || rangej->strict_overflow_p))
2943 return true;
2944 return false;
2945 }
2946
2947 /* Optimize X == CST1 || X == CST2
2948 if popcount (CST2 - CST1) == 1 into
2949 ((X - CST1) & ~(CST2 - CST1)) == 0.
2950 Similarly for ranges. E.g.
2951 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2952 || X == 75 || X == 45
2953 will be transformed by the previous optimization into
2954 (X - 43U) <= 3U || (X - 75U) <= 3U
2955 and this loop can transform that into
2956 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2957 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2958 optimize_range_tests_diff (enum tree_code opcode, tree type,
2959 tree lowi, tree lowj, tree highi, tree highj,
2960 vec<operand_entry *> *ops,
2961 struct range_entry *rangei,
2962 struct range_entry *rangej)
2963 {
2964 tree tem1, tem2, mask;
2965 /* Check highi - lowi == highj - lowj. */
2966 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2967 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2968 return false;
2969 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2970 if (!tree_int_cst_equal (tem1, tem2))
2971 return false;
2972 /* Check popcount (lowj - lowi) == 1. */
2973 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2974 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2975 return false;
2976 if (!integer_pow2p (tem1))
2977 return false;
2978
2979 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2980 int prec = GET_MODE_PRECISION (mode);
2981 if (TYPE_PRECISION (type) < prec
2982 || (wi::to_wide (TYPE_MIN_VALUE (type))
2983 != wi::min_value (prec, TYPE_SIGN (type)))
2984 || (wi::to_wide (TYPE_MAX_VALUE (type))
2985 != wi::max_value (prec, TYPE_SIGN (type))))
2986 type = build_nonstandard_integer_type (prec, 1);
2987 else
2988 type = unsigned_type_for (type);
2989 tem1 = fold_convert (type, tem1);
2990 tem2 = fold_convert (type, tem2);
2991 lowi = fold_convert (type, lowi);
2992 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2993 tem1 = fold_build2 (MINUS_EXPR, type,
2994 fold_convert (type, rangei->exp), lowi);
2995 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2996 lowj = build_int_cst (type, 0);
2997 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2998 NULL, rangei->in_p, lowj, tem2,
2999 rangei->strict_overflow_p
3000 || rangej->strict_overflow_p))
3001 return true;
3002 return false;
3003 }
3004
3005 /* It does some common checks for function optimize_range_tests_xor and
3006 optimize_range_tests_diff.
3007 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3008 Else it calls optimize_range_tests_diff. */
3009
3010 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry * > * ops,struct range_entry * ranges)3011 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3012 bool optimize_xor, vec<operand_entry *> *ops,
3013 struct range_entry *ranges)
3014 {
3015 int i, j;
3016 bool any_changes = false;
3017 for (i = first; i < length; i++)
3018 {
3019 tree lowi, highi, lowj, highj, type, tem;
3020
3021 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3022 continue;
3023 type = TREE_TYPE (ranges[i].exp);
3024 if (!INTEGRAL_TYPE_P (type))
3025 continue;
3026 lowi = ranges[i].low;
3027 if (lowi == NULL_TREE)
3028 lowi = TYPE_MIN_VALUE (type);
3029 highi = ranges[i].high;
3030 if (highi == NULL_TREE)
3031 continue;
3032 for (j = i + 1; j < length && j < i + 64; j++)
3033 {
3034 bool changes;
3035 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3036 continue;
3037 lowj = ranges[j].low;
3038 if (lowj == NULL_TREE)
3039 continue;
3040 highj = ranges[j].high;
3041 if (highj == NULL_TREE)
3042 highj = TYPE_MAX_VALUE (type);
3043 /* Check lowj > highi. */
3044 tem = fold_binary (GT_EXPR, boolean_type_node,
3045 lowj, highi);
3046 if (tem == NULL_TREE || !integer_onep (tem))
3047 continue;
3048 if (optimize_xor)
3049 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3050 highi, highj, ops,
3051 ranges + i, ranges + j);
3052 else
3053 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3054 highi, highj, ops,
3055 ranges + i, ranges + j);
3056 if (changes)
3057 {
3058 any_changes = true;
3059 break;
3060 }
3061 }
3062 }
3063 return any_changes;
3064 }
3065
3066 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3067 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3068 EXP on success, NULL otherwise. */
3069
3070 static tree
extract_bit_test_mask(tree exp,int prec,tree totallow,tree low,tree high,wide_int * mask,tree * totallowp)3071 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3072 wide_int *mask, tree *totallowp)
3073 {
3074 tree tem = int_const_binop (MINUS_EXPR, high, low);
3075 if (tem == NULL_TREE
3076 || TREE_CODE (tem) != INTEGER_CST
3077 || TREE_OVERFLOW (tem)
3078 || tree_int_cst_sgn (tem) == -1
3079 || compare_tree_int (tem, prec) != -1)
3080 return NULL_TREE;
3081
3082 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3083 *mask = wi::shifted_mask (0, max, false, prec);
3084 if (TREE_CODE (exp) == BIT_AND_EXPR
3085 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3086 {
3087 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3088 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3089 if (wi::popcount (msk) == 1
3090 && wi::ltu_p (msk, prec - max))
3091 {
3092 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3093 max += msk.to_uhwi ();
3094 exp = TREE_OPERAND (exp, 0);
3095 if (integer_zerop (low)
3096 && TREE_CODE (exp) == PLUS_EXPR
3097 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3098 {
3099 tree ret = TREE_OPERAND (exp, 0);
3100 STRIP_NOPS (ret);
3101 widest_int bias
3102 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3103 TYPE_PRECISION (TREE_TYPE (low))));
3104 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3105 if (totallowp)
3106 {
3107 *totallowp = tbias;
3108 return ret;
3109 }
3110 else if (!tree_int_cst_lt (totallow, tbias))
3111 return NULL_TREE;
3112 bias = wi::to_widest (tbias);
3113 bias -= wi::to_widest (totallow);
3114 if (bias >= 0 && bias < prec - max)
3115 {
3116 *mask = wi::lshift (*mask, bias);
3117 return ret;
3118 }
3119 }
3120 }
3121 }
3122 if (totallowp)
3123 return exp;
3124 if (!tree_int_cst_lt (totallow, low))
3125 return exp;
3126 tem = int_const_binop (MINUS_EXPR, low, totallow);
3127 if (tem == NULL_TREE
3128 || TREE_CODE (tem) != INTEGER_CST
3129 || TREE_OVERFLOW (tem)
3130 || compare_tree_int (tem, prec - max) == 1)
3131 return NULL_TREE;
3132
3133 *mask = wi::lshift (*mask, wi::to_widest (tem));
3134 return exp;
3135 }
3136
3137 /* Attempt to optimize small range tests using bit test.
3138 E.g.
3139 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3140 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3141 has been by earlier optimizations optimized into:
3142 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3143 As all the 43 through 82 range is less than 64 numbers,
3144 for 64-bit word targets optimize that into:
3145 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3146
3147 static bool
optimize_range_tests_to_bit_test(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3148 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3149 vec<operand_entry *> *ops,
3150 struct range_entry *ranges)
3151 {
3152 int i, j;
3153 bool any_changes = false;
3154 int prec = GET_MODE_BITSIZE (word_mode);
3155 auto_vec<struct range_entry *, 64> candidates;
3156
3157 for (i = first; i < length - 1; i++)
3158 {
3159 tree lowi, highi, lowj, highj, type;
3160
3161 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3162 continue;
3163 type = TREE_TYPE (ranges[i].exp);
3164 if (!INTEGRAL_TYPE_P (type))
3165 continue;
3166 lowi = ranges[i].low;
3167 if (lowi == NULL_TREE)
3168 lowi = TYPE_MIN_VALUE (type);
3169 highi = ranges[i].high;
3170 if (highi == NULL_TREE)
3171 continue;
3172 wide_int mask;
3173 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3174 highi, &mask, &lowi);
3175 if (exp == NULL_TREE)
3176 continue;
3177 bool strict_overflow_p = ranges[i].strict_overflow_p;
3178 candidates.truncate (0);
3179 int end = MIN (i + 64, length);
3180 for (j = i + 1; j < end; j++)
3181 {
3182 tree exp2;
3183 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3184 continue;
3185 if (ranges[j].exp == exp)
3186 ;
3187 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3188 {
3189 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3190 if (exp2 == exp)
3191 ;
3192 else if (TREE_CODE (exp2) == PLUS_EXPR)
3193 {
3194 exp2 = TREE_OPERAND (exp2, 0);
3195 STRIP_NOPS (exp2);
3196 if (exp2 != exp)
3197 continue;
3198 }
3199 else
3200 continue;
3201 }
3202 else
3203 continue;
3204 lowj = ranges[j].low;
3205 if (lowj == NULL_TREE)
3206 continue;
3207 highj = ranges[j].high;
3208 if (highj == NULL_TREE)
3209 highj = TYPE_MAX_VALUE (type);
3210 wide_int mask2;
3211 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3212 highj, &mask2, NULL);
3213 if (exp2 != exp)
3214 continue;
3215 mask |= mask2;
3216 strict_overflow_p |= ranges[j].strict_overflow_p;
3217 candidates.safe_push (&ranges[j]);
3218 }
3219
3220 /* If every possible relative value of the expression is a valid shift
3221 amount, then we can merge the entry test in the bit test. In this
3222 case, if we would need otherwise 2 or more comparisons, then use
3223 the bit test; in the other cases, the threshold is 3 comparisons. */
3224 wide_int min, max;
3225 bool entry_test_needed;
3226 if (TREE_CODE (exp) == SSA_NAME
3227 && get_range_info (exp, &min, &max) == VR_RANGE
3228 && wi::leu_p (max - min, prec - 1))
3229 {
3230 wide_int ilowi = wi::to_wide (lowi);
3231 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3232 {
3233 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3234 mask = wi::lshift (mask, ilowi - min);
3235 }
3236 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3237 {
3238 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3239 mask = wi::lrshift (mask, min - ilowi);
3240 }
3241 entry_test_needed = false;
3242 }
3243 else
3244 entry_test_needed = true;
3245 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3246 {
3247 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3248 wi::to_widest (lowi)
3249 + prec - 1 - wi::clz (mask));
3250 operand_entry *oe = (*ops)[ranges[i].idx];
3251 tree op = oe->op;
3252 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3253 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3254 location_t loc = gimple_location (stmt);
3255 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3256
3257 /* See if it isn't cheaper to pretend the minimum value of the
3258 range is 0, if maximum value is small enough.
3259 We can avoid then subtraction of the minimum value, but the
3260 mask constant could be perhaps more expensive. */
3261 if (compare_tree_int (lowi, 0) > 0
3262 && compare_tree_int (high, prec) < 0)
3263 {
3264 int cost_diff;
3265 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3266 rtx reg = gen_raw_REG (word_mode, 10000);
3267 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3268 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3269 GEN_INT (-m)),
3270 word_mode, speed_p);
3271 rtx r = immed_wide_int_const (mask, word_mode);
3272 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3273 word_mode, speed_p);
3274 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3275 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3276 word_mode, speed_p);
3277 if (cost_diff > 0)
3278 {
3279 mask = wi::lshift (mask, m);
3280 lowi = build_zero_cst (TREE_TYPE (lowi));
3281 }
3282 }
3283
3284 tree tem;
3285 if (entry_test_needed)
3286 {
3287 tem = build_range_check (loc, optype, unshare_expr (exp),
3288 false, lowi, high);
3289 if (tem == NULL_TREE || is_gimple_val (tem))
3290 continue;
3291 }
3292 else
3293 tem = NULL_TREE;
3294 tree etype = unsigned_type_for (TREE_TYPE (exp));
3295 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3296 fold_convert_loc (loc, etype, exp),
3297 fold_convert_loc (loc, etype, lowi));
3298 exp = fold_convert_loc (loc, integer_type_node, exp);
3299 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3300 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3301 build_int_cst (word_type, 1), exp);
3302 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3303 wide_int_to_tree (word_type, mask));
3304 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3305 build_zero_cst (word_type));
3306 if (is_gimple_val (exp))
3307 continue;
3308
3309 /* The shift might have undefined behavior if TEM is true,
3310 but reassociate_bb isn't prepared to have basic blocks
3311 split when it is running. So, temporarily emit a code
3312 with BIT_IOR_EXPR instead of &&, and fix it up in
3313 branch_fixup. */
3314 gimple_seq seq = NULL;
3315 if (tem)
3316 {
3317 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3318 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3319 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3320 }
3321 gimple_seq seq2;
3322 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3323 gimple_seq_add_seq_without_update (&seq, seq2);
3324 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3325 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3326 if (tem)
3327 {
3328 gimple *g = gimple_build_assign (make_ssa_name (optype),
3329 BIT_IOR_EXPR, tem, exp);
3330 gimple_set_location (g, loc);
3331 gimple_seq_add_stmt_without_update (&seq, g);
3332 exp = gimple_assign_lhs (g);
3333 }
3334 tree val = build_zero_cst (optype);
3335 if (update_range_test (&ranges[i], NULL, candidates.address (),
3336 candidates.length (), opcode, ops, exp,
3337 seq, false, val, val, strict_overflow_p))
3338 {
3339 any_changes = true;
3340 if (tem)
3341 reassoc_branch_fixups.safe_push (tem);
3342 }
3343 else
3344 gimple_seq_discard (seq);
3345 }
3346 }
3347 return any_changes;
3348 }
3349
3350 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3351 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3352 Also, handle x < C && y < C && z < C where C is power of two as
3353 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3354 as (x | y | z) < 0. */
3355
3356 static bool
optimize_range_tests_cmp_bitwise(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3357 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3358 vec<operand_entry *> *ops,
3359 struct range_entry *ranges)
3360 {
3361 int i;
3362 unsigned int b;
3363 bool any_changes = false;
3364 auto_vec<int, 128> buckets;
3365 auto_vec<int, 32> chains;
3366 auto_vec<struct range_entry *, 32> candidates;
3367
3368 for (i = first; i < length; i++)
3369 {
3370 int idx;
3371
3372 if (ranges[i].exp == NULL_TREE
3373 || TREE_CODE (ranges[i].exp) != SSA_NAME
3374 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3375 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3376 continue;
3377
3378 if (ranges[i].low != NULL_TREE
3379 && ranges[i].high != NULL_TREE
3380 && ranges[i].in_p
3381 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3382 {
3383 idx = !integer_zerop (ranges[i].low);
3384 if (idx && !integer_all_onesp (ranges[i].low))
3385 continue;
3386 }
3387 else if (ranges[i].high != NULL_TREE
3388 && TREE_CODE (ranges[i].high) == INTEGER_CST
3389 && ranges[i].in_p)
3390 {
3391 wide_int w = wi::to_wide (ranges[i].high);
3392 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3393 int l = wi::clz (w);
3394 idx = 2;
3395 if (l <= 0
3396 || l >= prec
3397 || w != wi::mask (prec - l, false, prec))
3398 continue;
3399 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3400 && ranges[i].low == NULL_TREE)
3401 || (ranges[i].low
3402 && integer_zerop (ranges[i].low))))
3403 continue;
3404 }
3405 else if (ranges[i].high == NULL_TREE
3406 && ranges[i].low != NULL_TREE
3407 /* Perform this optimization only in the last
3408 reassoc pass, as it interferes with the reassociation
3409 itself or could also with VRP etc. which might not
3410 be able to virtually undo the optimization. */
3411 && !reassoc_insert_powi_p
3412 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3413 && integer_zerop (ranges[i].low))
3414 idx = 3;
3415 else
3416 continue;
3417
3418 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3419 if (buckets.length () <= b)
3420 buckets.safe_grow_cleared (b + 1, true);
3421 if (chains.length () <= (unsigned) i)
3422 chains.safe_grow (i + 1, true);
3423 chains[i] = buckets[b];
3424 buckets[b] = i + 1;
3425 }
3426
3427 FOR_EACH_VEC_ELT (buckets, b, i)
3428 if (i && chains[i - 1])
3429 {
3430 int j, k = i;
3431 if ((b % 4) == 2)
3432 {
3433 /* When ranges[X - 1].high + 1 is a power of two,
3434 we need to process the same bucket up to
3435 precision - 1 times, each time split the entries
3436 with the same high bound into one chain and the
3437 rest into another one to be processed later. */
3438 int this_prev = i;
3439 int other_prev = 0;
3440 for (j = chains[i - 1]; j; j = chains[j - 1])
3441 {
3442 if (tree_int_cst_equal (ranges[i - 1].high,
3443 ranges[j - 1].high))
3444 {
3445 chains[this_prev - 1] = j;
3446 this_prev = j;
3447 }
3448 else if (other_prev == 0)
3449 {
3450 buckets[b] = j;
3451 other_prev = j;
3452 }
3453 else
3454 {
3455 chains[other_prev - 1] = j;
3456 other_prev = j;
3457 }
3458 }
3459 chains[this_prev - 1] = 0;
3460 if (other_prev)
3461 chains[other_prev - 1] = 0;
3462 if (chains[i - 1] == 0)
3463 {
3464 if (other_prev)
3465 b--;
3466 continue;
3467 }
3468 }
3469 for (j = chains[i - 1]; j; j = chains[j - 1])
3470 {
3471 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3472 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3473 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3474 k = j;
3475 }
3476 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3477 tree type2 = NULL_TREE;
3478 bool strict_overflow_p = false;
3479 candidates.truncate (0);
3480 for (j = i; j; j = chains[j - 1])
3481 {
3482 tree type = TREE_TYPE (ranges[j - 1].exp);
3483 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3484 if ((b % 4) == 3)
3485 {
3486 /* For the signed < 0 cases, the types should be
3487 really compatible (all signed with the same precision,
3488 instead put ranges that have different in_p from
3489 k first. */
3490 if (!useless_type_conversion_p (type1, type))
3491 continue;
3492 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3493 candidates.safe_push (&ranges[j - 1]);
3494 type2 = type1;
3495 continue;
3496 }
3497 if (j == k
3498 || useless_type_conversion_p (type1, type))
3499 ;
3500 else if (type2 == NULL_TREE
3501 || useless_type_conversion_p (type2, type))
3502 {
3503 if (type2 == NULL_TREE)
3504 type2 = type;
3505 candidates.safe_push (&ranges[j - 1]);
3506 }
3507 }
3508 unsigned l = candidates.length ();
3509 for (j = i; j; j = chains[j - 1])
3510 {
3511 tree type = TREE_TYPE (ranges[j - 1].exp);
3512 if (j == k)
3513 continue;
3514 if ((b % 4) == 3)
3515 {
3516 if (!useless_type_conversion_p (type1, type))
3517 continue;
3518 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3519 candidates.safe_push (&ranges[j - 1]);
3520 continue;
3521 }
3522 if (useless_type_conversion_p (type1, type))
3523 ;
3524 else if (type2 == NULL_TREE
3525 || useless_type_conversion_p (type2, type))
3526 continue;
3527 candidates.safe_push (&ranges[j - 1]);
3528 }
3529 gimple_seq seq = NULL;
3530 tree op = NULL_TREE;
3531 unsigned int id;
3532 struct range_entry *r;
3533 candidates.safe_push (&ranges[k - 1]);
3534 FOR_EACH_VEC_ELT (candidates, id, r)
3535 {
3536 gimple *g;
3537 enum tree_code code;
3538 if (id == 0)
3539 {
3540 op = r->exp;
3541 continue;
3542 }
3543 if (id == l)
3544 {
3545 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3546 g = gimple_build_assign (make_ssa_name (type1), code, op);
3547 gimple_seq_add_stmt_without_update (&seq, g);
3548 op = gimple_assign_lhs (g);
3549 }
3550 tree type = TREE_TYPE (r->exp);
3551 tree exp = r->exp;
3552 if (id >= l && !useless_type_conversion_p (type1, type))
3553 {
3554 g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3555 gimple_seq_add_stmt_without_update (&seq, g);
3556 exp = gimple_assign_lhs (g);
3557 }
3558 if ((b % 4) == 3)
3559 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3560 else
3561 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3562 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3563 code, op, exp);
3564 gimple_seq_add_stmt_without_update (&seq, g);
3565 op = gimple_assign_lhs (g);
3566 }
3567 candidates.pop ();
3568 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3569 candidates.length (), opcode, ops, op,
3570 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3571 ranges[k - 1].high, strict_overflow_p))
3572 any_changes = true;
3573 else
3574 gimple_seq_discard (seq);
3575 if ((b % 4) == 2 && buckets[b] != i)
3576 /* There is more work to do for this bucket. */
3577 b--;
3578 }
3579
3580 return any_changes;
3581 }
3582
3583 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3584 a >= 0 && a < b into (unsigned) a < (unsigned) b
3585 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3586
3587 static bool
optimize_range_tests_var_bound(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges,basic_block first_bb)3588 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3589 vec<operand_entry *> *ops,
3590 struct range_entry *ranges,
3591 basic_block first_bb)
3592 {
3593 int i;
3594 bool any_changes = false;
3595 hash_map<tree, int> *map = NULL;
3596
3597 for (i = first; i < length; i++)
3598 {
3599 if (ranges[i].exp == NULL_TREE
3600 || TREE_CODE (ranges[i].exp) != SSA_NAME
3601 || !ranges[i].in_p)
3602 continue;
3603
3604 tree type = TREE_TYPE (ranges[i].exp);
3605 if (!INTEGRAL_TYPE_P (type)
3606 || TYPE_UNSIGNED (type)
3607 || ranges[i].low == NULL_TREE
3608 || !integer_zerop (ranges[i].low)
3609 || ranges[i].high != NULL_TREE)
3610 continue;
3611 /* EXP >= 0 here. */
3612 if (map == NULL)
3613 map = new hash_map <tree, int>;
3614 map->put (ranges[i].exp, i);
3615 }
3616
3617 if (map == NULL)
3618 return false;
3619
3620 for (i = 0; i < length; i++)
3621 {
3622 bool in_p = ranges[i].in_p;
3623 if (ranges[i].low == NULL_TREE
3624 || ranges[i].high == NULL_TREE)
3625 continue;
3626 if (!integer_zerop (ranges[i].low)
3627 || !integer_zerop (ranges[i].high))
3628 {
3629 if (ranges[i].exp
3630 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3631 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3632 && integer_onep (ranges[i].low)
3633 && integer_onep (ranges[i].high))
3634 in_p = !in_p;
3635 else
3636 continue;
3637 }
3638
3639 gimple *stmt;
3640 tree_code ccode;
3641 tree rhs1, rhs2;
3642 if (ranges[i].exp)
3643 {
3644 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3645 continue;
3646 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3647 if (!is_gimple_assign (stmt))
3648 continue;
3649 ccode = gimple_assign_rhs_code (stmt);
3650 rhs1 = gimple_assign_rhs1 (stmt);
3651 rhs2 = gimple_assign_rhs2 (stmt);
3652 }
3653 else
3654 {
3655 operand_entry *oe = (*ops)[ranges[i].idx];
3656 stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3657 if (gimple_code (stmt) != GIMPLE_COND)
3658 continue;
3659 ccode = gimple_cond_code (stmt);
3660 rhs1 = gimple_cond_lhs (stmt);
3661 rhs2 = gimple_cond_rhs (stmt);
3662 }
3663
3664 if (TREE_CODE (rhs1) != SSA_NAME
3665 || rhs2 == NULL_TREE
3666 || TREE_CODE (rhs2) != SSA_NAME)
3667 continue;
3668
3669 switch (ccode)
3670 {
3671 case GT_EXPR:
3672 case GE_EXPR:
3673 case LT_EXPR:
3674 case LE_EXPR:
3675 break;
3676 default:
3677 continue;
3678 }
3679 if (in_p)
3680 ccode = invert_tree_comparison (ccode, false);
3681 switch (ccode)
3682 {
3683 case GT_EXPR:
3684 case GE_EXPR:
3685 std::swap (rhs1, rhs2);
3686 ccode = swap_tree_comparison (ccode);
3687 break;
3688 case LT_EXPR:
3689 case LE_EXPR:
3690 break;
3691 default:
3692 gcc_unreachable ();
3693 }
3694
3695 int *idx = map->get (rhs1);
3696 if (idx == NULL)
3697 continue;
3698
3699 /* maybe_optimize_range_tests allows statements without side-effects
3700 in the basic blocks as long as they are consumed in the same bb.
3701 Make sure rhs2's def stmt is not among them, otherwise we can't
3702 use safely get_nonzero_bits on it. E.g. in:
3703 # RANGE [-83, 1] NONZERO 173
3704 # k_32 = PHI <k_47(13), k_12(9)>
3705 ...
3706 if (k_32 >= 0)
3707 goto <bb 5>; [26.46%]
3708 else
3709 goto <bb 9>; [73.54%]
3710
3711 <bb 5> [local count: 140323371]:
3712 # RANGE [0, 1] NONZERO 1
3713 _5 = (int) k_32;
3714 # RANGE [0, 4] NONZERO 4
3715 _21 = _5 << 2;
3716 # RANGE [0, 4] NONZERO 4
3717 iftmp.0_44 = (char) _21;
3718 if (k_32 < iftmp.0_44)
3719 goto <bb 6>; [84.48%]
3720 else
3721 goto <bb 9>; [15.52%]
3722 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3723 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3724 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3725 those stmts even for negative k_32 and the value ranges would be no
3726 longer guaranteed and so the optimization would be invalid. */
3727 while (opcode == ERROR_MARK)
3728 {
3729 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3730 basic_block bb2 = gimple_bb (g);
3731 if (bb2
3732 && bb2 != first_bb
3733 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3734 {
3735 /* As an exception, handle a few common cases. */
3736 if (gimple_assign_cast_p (g)
3737 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3738 {
3739 tree op0 = gimple_assign_rhs1 (g);
3740 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3741 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3742 > TYPE_PRECISION (TREE_TYPE (op0))))
3743 /* Zero-extension is always ok. */
3744 break;
3745 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3746 == TYPE_PRECISION (TREE_TYPE (op0))
3747 && TREE_CODE (op0) == SSA_NAME)
3748 {
3749 /* Cast from signed to unsigned or vice versa. Retry
3750 with the op0 as new rhs2. */
3751 rhs2 = op0;
3752 continue;
3753 }
3754 }
3755 else if (is_gimple_assign (g)
3756 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3757 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3758 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3759 /* Masking with INTEGER_CST with MSB clear is always ok
3760 too. */
3761 break;
3762 rhs2 = NULL_TREE;
3763 }
3764 break;
3765 }
3766 if (rhs2 == NULL_TREE)
3767 continue;
3768
3769 wide_int nz = get_nonzero_bits (rhs2);
3770 if (wi::neg_p (nz))
3771 continue;
3772
3773 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3774 and RHS2 is known to be RHS2 >= 0. */
3775 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3776
3777 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3778 if ((ranges[*idx].strict_overflow_p
3779 || ranges[i].strict_overflow_p)
3780 && issue_strict_overflow_warning (wc))
3781 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3782 "assuming signed overflow does not occur "
3783 "when simplifying range test");
3784
3785 if (dump_file && (dump_flags & TDF_DETAILS))
3786 {
3787 struct range_entry *r = &ranges[*idx];
3788 fprintf (dump_file, "Optimizing range test ");
3789 print_generic_expr (dump_file, r->exp);
3790 fprintf (dump_file, " +[");
3791 print_generic_expr (dump_file, r->low);
3792 fprintf (dump_file, ", ");
3793 print_generic_expr (dump_file, r->high);
3794 fprintf (dump_file, "] and comparison ");
3795 print_generic_expr (dump_file, rhs1);
3796 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3797 print_generic_expr (dump_file, rhs2);
3798 fprintf (dump_file, "\n into (");
3799 print_generic_expr (dump_file, utype);
3800 fprintf (dump_file, ") ");
3801 print_generic_expr (dump_file, rhs1);
3802 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3803 print_generic_expr (dump_file, utype);
3804 fprintf (dump_file, ") ");
3805 print_generic_expr (dump_file, rhs2);
3806 fprintf (dump_file, "\n");
3807 }
3808
3809 operand_entry *oe = (*ops)[ranges[i].idx];
3810 ranges[i].in_p = 0;
3811 if (opcode == BIT_IOR_EXPR
3812 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3813 {
3814 ranges[i].in_p = 1;
3815 ccode = invert_tree_comparison (ccode, false);
3816 }
3817
3818 unsigned int uid = gimple_uid (stmt);
3819 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3820 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3821 gimple_set_uid (g, uid);
3822 rhs1 = gimple_assign_lhs (g);
3823 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3824 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3825 {
3826 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3827 gimple_set_uid (g, uid);
3828 rhs2 = gimple_assign_lhs (g);
3829 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3830 }
3831 if (tree_swap_operands_p (rhs1, rhs2))
3832 {
3833 std::swap (rhs1, rhs2);
3834 ccode = swap_tree_comparison (ccode);
3835 }
3836 if (gimple_code (stmt) == GIMPLE_COND)
3837 {
3838 gcond *c = as_a <gcond *> (stmt);
3839 gimple_cond_set_code (c, ccode);
3840 gimple_cond_set_lhs (c, rhs1);
3841 gimple_cond_set_rhs (c, rhs2);
3842 update_stmt (stmt);
3843 }
3844 else
3845 {
3846 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3847 if (!INTEGRAL_TYPE_P (ctype)
3848 || (TREE_CODE (ctype) != BOOLEAN_TYPE
3849 && TYPE_PRECISION (ctype) != 1))
3850 ctype = boolean_type_node;
3851 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3852 gimple_set_uid (g, uid);
3853 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3854 if (oe->op && ctype != TREE_TYPE (oe->op))
3855 {
3856 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3857 NOP_EXPR, gimple_assign_lhs (g));
3858 gimple_set_uid (g, uid);
3859 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3860 }
3861 ranges[i].exp = gimple_assign_lhs (g);
3862 oe->op = ranges[i].exp;
3863 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3864 ranges[i].high = ranges[i].low;
3865 }
3866 ranges[i].strict_overflow_p = false;
3867 oe = (*ops)[ranges[*idx].idx];
3868 /* Now change all the other range test immediate uses, so that
3869 those tests will be optimized away. */
3870 if (opcode == ERROR_MARK)
3871 {
3872 if (oe->op)
3873 oe->op = build_int_cst (TREE_TYPE (oe->op),
3874 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3875 else
3876 oe->op = (oe->rank == BIT_IOR_EXPR
3877 ? boolean_false_node : boolean_true_node);
3878 }
3879 else
3880 oe->op = error_mark_node;
3881 ranges[*idx].exp = NULL_TREE;
3882 ranges[*idx].low = NULL_TREE;
3883 ranges[*idx].high = NULL_TREE;
3884 any_changes = true;
3885 }
3886
3887 delete map;
3888 return any_changes;
3889 }
3890
3891 /* Optimize range tests, similarly how fold_range_test optimizes
3892 it on trees. The tree code for the binary
3893 operation between all the operands is OPCODE.
3894 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3895 maybe_optimize_range_tests for inter-bb range optimization.
3896 In that case if oe->op is NULL, oe->id is bb->index whose
3897 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3898 the actual opcode.
3899 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3900
3901 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry * > * ops,basic_block first_bb)3902 optimize_range_tests (enum tree_code opcode,
3903 vec<operand_entry *> *ops, basic_block first_bb)
3904 {
3905 unsigned int length = ops->length (), i, j, first;
3906 operand_entry *oe;
3907 struct range_entry *ranges;
3908 bool any_changes = false;
3909
3910 if (length == 1)
3911 return false;
3912
3913 ranges = XNEWVEC (struct range_entry, length);
3914 for (i = 0; i < length; i++)
3915 {
3916 oe = (*ops)[i];
3917 ranges[i].idx = i;
3918 init_range_entry (ranges + i, oe->op,
3919 oe->op
3920 ? NULL
3921 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3922 /* For | invert it now, we will invert it again before emitting
3923 the optimized expression. */
3924 if (opcode == BIT_IOR_EXPR
3925 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3926 ranges[i].in_p = !ranges[i].in_p;
3927 }
3928
3929 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3930 for (i = 0; i < length; i++)
3931 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3932 break;
3933
3934 /* Try to merge ranges. */
3935 for (first = i; i < length; i++)
3936 {
3937 tree low = ranges[i].low;
3938 tree high = ranges[i].high;
3939 int in_p = ranges[i].in_p;
3940 bool strict_overflow_p = ranges[i].strict_overflow_p;
3941 int update_fail_count = 0;
3942
3943 for (j = i + 1; j < length; j++)
3944 {
3945 if (ranges[i].exp != ranges[j].exp)
3946 break;
3947 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3948 ranges[j].in_p, ranges[j].low, ranges[j].high))
3949 break;
3950 strict_overflow_p |= ranges[j].strict_overflow_p;
3951 }
3952
3953 if (j == i + 1)
3954 continue;
3955
3956 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3957 opcode, ops, ranges[i].exp, NULL, in_p,
3958 low, high, strict_overflow_p))
3959 {
3960 i = j - 1;
3961 any_changes = true;
3962 }
3963 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3964 while update_range_test would fail. */
3965 else if (update_fail_count == 64)
3966 i = j - 1;
3967 else
3968 ++update_fail_count;
3969 }
3970
3971 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3972 ops, ranges);
3973
3974 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3975 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3976 ops, ranges);
3977 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3978 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3979 ops, ranges);
3980 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3981 ranges, first_bb);
3982 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3983 ops, ranges);
3984
3985 if (any_changes && opcode != ERROR_MARK)
3986 {
3987 j = 0;
3988 FOR_EACH_VEC_ELT (*ops, i, oe)
3989 {
3990 if (oe->op == error_mark_node)
3991 continue;
3992 else if (i != j)
3993 (*ops)[j] = oe;
3994 j++;
3995 }
3996 ops->truncate (j);
3997 }
3998
3999 XDELETEVEC (ranges);
4000 return any_changes;
4001 }
4002
4003 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4004 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4005 otherwise the comparison code. TYPE is a return value that is set
4006 to type of comparison. */
4007
4008 static tree_code
ovce_extract_ops(tree var,gassign ** rets,bool * reti,tree * type,tree * lhs,tree * rhs,gassign ** vcond)4009 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4010 tree *lhs, tree *rhs, gassign **vcond)
4011 {
4012 if (TREE_CODE (var) != SSA_NAME)
4013 return ERROR_MARK;
4014
4015 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4016 if (stmt == NULL)
4017 return ERROR_MARK;
4018 if (vcond)
4019 *vcond = stmt;
4020
4021 /* ??? If we start creating more COND_EXPR, we could perform
4022 this same optimization with them. For now, simplify. */
4023 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4024 return ERROR_MARK;
4025
4026 tree cond = gimple_assign_rhs1 (stmt);
4027 tree_code cmp = TREE_CODE (cond);
4028 if (cmp != SSA_NAME)
4029 return ERROR_MARK;
4030
4031 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4032 if (assign == NULL
4033 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4034 return ERROR_MARK;
4035
4036 cmp = gimple_assign_rhs_code (assign);
4037 if (lhs)
4038 *lhs = gimple_assign_rhs1 (assign);
4039 if (rhs)
4040 *rhs = gimple_assign_rhs2 (assign);
4041
4042 /* ??? For now, allow only canonical true and false result vectors.
4043 We could expand this to other constants should the need arise,
4044 but at the moment we don't create them. */
4045 tree t = gimple_assign_rhs2 (stmt);
4046 tree f = gimple_assign_rhs3 (stmt);
4047 bool inv;
4048 if (integer_all_onesp (t))
4049 inv = false;
4050 else if (integer_all_onesp (f))
4051 {
4052 cmp = invert_tree_comparison (cmp, false);
4053 inv = true;
4054 }
4055 else
4056 return ERROR_MARK;
4057 if (!integer_zerop (f))
4058 return ERROR_MARK;
4059
4060 /* Success! */
4061 if (rets)
4062 *rets = assign;
4063 if (reti)
4064 *reti = inv;
4065 if (type)
4066 *type = TREE_TYPE (cond);
4067 return cmp;
4068 }
4069
4070 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4071 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4072
4073 static bool
optimize_vec_cond_expr(tree_code opcode,vec<operand_entry * > * ops)4074 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4075 {
4076 unsigned int length = ops->length (), i, j;
4077 bool any_changes = false;
4078
4079 if (length == 1)
4080 return false;
4081
4082 for (i = 0; i < length; ++i)
4083 {
4084 tree elt0 = (*ops)[i]->op;
4085
4086 gassign *stmt0, *vcond0;
4087 bool invert;
4088 tree type, lhs0, rhs0;
4089 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4090 &rhs0, &vcond0);
4091 if (cmp0 == ERROR_MARK)
4092 continue;
4093
4094 for (j = i + 1; j < length; ++j)
4095 {
4096 tree &elt1 = (*ops)[j]->op;
4097
4098 gassign *stmt1, *vcond1;
4099 tree lhs1, rhs1;
4100 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4101 &rhs1, &vcond1);
4102 if (cmp1 == ERROR_MARK)
4103 continue;
4104
4105 tree comb;
4106 if (opcode == BIT_AND_EXPR)
4107 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4108 cmp1, lhs1, rhs1);
4109 else if (opcode == BIT_IOR_EXPR)
4110 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4111 cmp1, lhs1, rhs1);
4112 else
4113 gcc_unreachable ();
4114 if (comb == NULL)
4115 continue;
4116
4117 /* Success! */
4118 if (dump_file && (dump_flags & TDF_DETAILS))
4119 {
4120 fprintf (dump_file, "Transforming ");
4121 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4122 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4123 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4124 fprintf (dump_file, " into ");
4125 print_generic_expr (dump_file, comb);
4126 fputc ('\n', dump_file);
4127 }
4128
4129 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4130 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4131 true, GSI_SAME_STMT);
4132 if (invert)
4133 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4134 gimple_assign_rhs3_ptr (vcond0));
4135 gimple_assign_set_rhs1 (vcond0, exp);
4136 update_stmt (vcond0);
4137
4138 elt1 = error_mark_node;
4139 any_changes = true;
4140 }
4141 }
4142
4143 if (any_changes)
4144 {
4145 operand_entry *oe;
4146 j = 0;
4147 FOR_EACH_VEC_ELT (*ops, i, oe)
4148 {
4149 if (oe->op == error_mark_node)
4150 continue;
4151 else if (i != j)
4152 (*ops)[j] = oe;
4153 j++;
4154 }
4155 ops->truncate (j);
4156 }
4157
4158 return any_changes;
4159 }
4160
4161 /* Return true if STMT is a cast like:
4162 <bb N>:
4163 ...
4164 _123 = (int) _234;
4165
4166 <bb M>:
4167 # _345 = PHI <_123(N), 1(...), 1(...)>
4168 where _234 has bool type, _123 has single use and
4169 bb N has a single successor M. This is commonly used in
4170 the last block of a range test.
4171
4172 Also Return true if STMT is tcc_compare like:
4173 <bb N>:
4174 ...
4175 _234 = a_2(D) == 2;
4176
4177 <bb M>:
4178 # _345 = PHI <_234(N), 1(...), 1(...)>
4179 _346 = (int) _345;
4180 where _234 has booltype, single use and
4181 bb N has a single successor M. This is commonly used in
4182 the last block of a range test. */
4183
4184 static bool
final_range_test_p(gimple * stmt)4185 final_range_test_p (gimple *stmt)
4186 {
4187 basic_block bb, rhs_bb, lhs_bb;
4188 edge e;
4189 tree lhs, rhs;
4190 use_operand_p use_p;
4191 gimple *use_stmt;
4192
4193 if (!gimple_assign_cast_p (stmt)
4194 && (!is_gimple_assign (stmt)
4195 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4196 != tcc_comparison)))
4197 return false;
4198 bb = gimple_bb (stmt);
4199 if (!single_succ_p (bb))
4200 return false;
4201 e = single_succ_edge (bb);
4202 if (e->flags & EDGE_COMPLEX)
4203 return false;
4204
4205 lhs = gimple_assign_lhs (stmt);
4206 rhs = gimple_assign_rhs1 (stmt);
4207 if (gimple_assign_cast_p (stmt)
4208 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4209 || TREE_CODE (rhs) != SSA_NAME
4210 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4211 return false;
4212
4213 if (!gimple_assign_cast_p (stmt)
4214 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4215 return false;
4216
4217 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4218 if (!single_imm_use (lhs, &use_p, &use_stmt))
4219 return false;
4220
4221 if (gimple_code (use_stmt) != GIMPLE_PHI
4222 || gimple_bb (use_stmt) != e->dest)
4223 return false;
4224
4225 /* And that the rhs is defined in the same loop. */
4226 if (gimple_assign_cast_p (stmt))
4227 {
4228 if (TREE_CODE (rhs) != SSA_NAME
4229 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4230 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4231 return false;
4232 }
4233 else
4234 {
4235 if (TREE_CODE (lhs) != SSA_NAME
4236 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4237 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4238 return false;
4239 }
4240
4241 return true;
4242 }
4243
4244 /* Return true if BB is suitable basic block for inter-bb range test
4245 optimization. If BACKWARD is true, BB should be the only predecessor
4246 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4247 or compared with to find a common basic block to which all conditions
4248 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4249 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4250 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4251 block points to an empty block that falls through into *OTHER_BB and
4252 the phi args match that path. */
4253
4254 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool * test_swapped_p,bool backward)4255 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4256 bool *test_swapped_p, bool backward)
4257 {
4258 edge_iterator ei, ei2;
4259 edge e, e2;
4260 gimple *stmt;
4261 gphi_iterator gsi;
4262 bool other_edge_seen = false;
4263 bool is_cond;
4264
4265 if (test_bb == bb)
4266 return false;
4267 /* Check last stmt first. */
4268 stmt = last_stmt (bb);
4269 if (stmt == NULL
4270 || (gimple_code (stmt) != GIMPLE_COND
4271 && (backward || !final_range_test_p (stmt)))
4272 || gimple_visited_p (stmt)
4273 || stmt_could_throw_p (cfun, stmt)
4274 || *other_bb == bb)
4275 return false;
4276 is_cond = gimple_code (stmt) == GIMPLE_COND;
4277 if (is_cond)
4278 {
4279 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4280 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4281 to *OTHER_BB (if not set yet, try to find it out). */
4282 if (EDGE_COUNT (bb->succs) != 2)
4283 return false;
4284 FOR_EACH_EDGE (e, ei, bb->succs)
4285 {
4286 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4287 return false;
4288 if (e->dest == test_bb)
4289 {
4290 if (backward)
4291 continue;
4292 else
4293 return false;
4294 }
4295 if (e->dest == bb)
4296 return false;
4297 if (*other_bb == NULL)
4298 {
4299 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4300 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4301 return false;
4302 else if (e->dest == e2->dest)
4303 *other_bb = e->dest;
4304 if (*other_bb == NULL)
4305 return false;
4306 }
4307 if (e->dest == *other_bb)
4308 other_edge_seen = true;
4309 else if (backward)
4310 return false;
4311 }
4312 if (*other_bb == NULL || !other_edge_seen)
4313 return false;
4314 }
4315 else if (single_succ (bb) != *other_bb)
4316 return false;
4317
4318 /* Now check all PHIs of *OTHER_BB. */
4319 e = find_edge (bb, *other_bb);
4320 e2 = find_edge (test_bb, *other_bb);
4321 retry:;
4322 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4323 {
4324 gphi *phi = gsi.phi ();
4325 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4326 corresponding to BB and TEST_BB predecessor must be the same. */
4327 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4328 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4329 {
4330 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4331 one of the PHIs should have the lhs of the last stmt in
4332 that block as PHI arg and that PHI should have 0 or 1
4333 corresponding to it in all other range test basic blocks
4334 considered. */
4335 if (!is_cond)
4336 {
4337 if (gimple_phi_arg_def (phi, e->dest_idx)
4338 == gimple_assign_lhs (stmt)
4339 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4340 || integer_onep (gimple_phi_arg_def (phi,
4341 e2->dest_idx))))
4342 continue;
4343 }
4344 else
4345 {
4346 gimple *test_last = last_stmt (test_bb);
4347 if (gimple_code (test_last) == GIMPLE_COND)
4348 {
4349 if (backward ? e2->src != test_bb : e->src != bb)
4350 return false;
4351
4352 /* For last_bb, handle also:
4353 if (x_3(D) == 3)
4354 goto <bb 6>; [34.00%]
4355 else
4356 goto <bb 7>; [66.00%]
4357
4358 <bb 6> [local count: 79512730]:
4359
4360 <bb 7> [local count: 1073741824]:
4361 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4362 where bb 7 is *OTHER_BB, but the PHI values from the
4363 earlier bbs match the path through the empty bb
4364 in between. */
4365 edge e3;
4366 if (backward)
4367 e3 = EDGE_SUCC (test_bb,
4368 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4369 else
4370 e3 = EDGE_SUCC (bb,
4371 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4372 if (empty_block_p (e3->dest)
4373 && single_succ_p (e3->dest)
4374 && single_succ (e3->dest) == *other_bb
4375 && single_pred_p (e3->dest)
4376 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4377 {
4378 if (backward)
4379 e2 = single_succ_edge (e3->dest);
4380 else
4381 e = single_succ_edge (e3->dest);
4382 if (test_swapped_p)
4383 *test_swapped_p = true;
4384 goto retry;
4385 }
4386 }
4387 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4388 == gimple_assign_lhs (test_last)
4389 && (integer_zerop (gimple_phi_arg_def (phi,
4390 e->dest_idx))
4391 || integer_onep (gimple_phi_arg_def (phi,
4392 e->dest_idx))))
4393 continue;
4394 }
4395
4396 return false;
4397 }
4398 }
4399 return true;
4400 }
4401
4402 /* Return true if BB doesn't have side-effects that would disallow
4403 range test optimization, all SSA_NAMEs set in the bb are consumed
4404 in the bb and there are no PHIs. */
4405
4406 bool
no_side_effect_bb(basic_block bb)4407 no_side_effect_bb (basic_block bb)
4408 {
4409 gimple_stmt_iterator gsi;
4410 gimple *last;
4411
4412 if (!gimple_seq_empty_p (phi_nodes (bb)))
4413 return false;
4414 last = last_stmt (bb);
4415 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4416 {
4417 gimple *stmt = gsi_stmt (gsi);
4418 tree lhs;
4419 imm_use_iterator imm_iter;
4420 use_operand_p use_p;
4421
4422 if (is_gimple_debug (stmt))
4423 continue;
4424 if (gimple_has_side_effects (stmt))
4425 return false;
4426 if (stmt == last)
4427 return true;
4428 if (!is_gimple_assign (stmt))
4429 return false;
4430 lhs = gimple_assign_lhs (stmt);
4431 if (TREE_CODE (lhs) != SSA_NAME)
4432 return false;
4433 if (gimple_assign_rhs_could_trap_p (stmt))
4434 return false;
4435 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4436 {
4437 gimple *use_stmt = USE_STMT (use_p);
4438 if (is_gimple_debug (use_stmt))
4439 continue;
4440 if (gimple_bb (use_stmt) != bb)
4441 return false;
4442 }
4443 }
4444 return false;
4445 }
4446
4447 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4448 return true and fill in *OPS recursively. */
4449
4450 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry * > * ops,class loop * loop)4451 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4452 class loop *loop)
4453 {
4454 gimple *stmt = SSA_NAME_DEF_STMT (var);
4455 tree rhs[2];
4456 int i;
4457
4458 if (!is_reassociable_op (stmt, code, loop))
4459 return false;
4460
4461 rhs[0] = gimple_assign_rhs1 (stmt);
4462 rhs[1] = gimple_assign_rhs2 (stmt);
4463 gimple_set_visited (stmt, true);
4464 for (i = 0; i < 2; i++)
4465 if (TREE_CODE (rhs[i]) == SSA_NAME
4466 && !get_ops (rhs[i], code, ops, loop)
4467 && has_single_use (rhs[i]))
4468 {
4469 operand_entry *oe = operand_entry_pool.allocate ();
4470
4471 oe->op = rhs[i];
4472 oe->rank = code;
4473 oe->id = 0;
4474 oe->count = 1;
4475 oe->stmt_to_insert = NULL;
4476 ops->safe_push (oe);
4477 }
4478 return true;
4479 }
4480
4481 /* Find the ops that were added by get_ops starting from VAR, see if
4482 they were changed during update_range_test and if yes, create new
4483 stmts. */
4484
4485 static tree
update_ops(tree var,enum tree_code code,vec<operand_entry * > ops,unsigned int * pidx,class loop * loop)4486 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
4487 unsigned int *pidx, class loop *loop)
4488 {
4489 gimple *stmt = SSA_NAME_DEF_STMT (var);
4490 tree rhs[4];
4491 int i;
4492
4493 if (!is_reassociable_op (stmt, code, loop))
4494 return NULL;
4495
4496 rhs[0] = gimple_assign_rhs1 (stmt);
4497 rhs[1] = gimple_assign_rhs2 (stmt);
4498 rhs[2] = rhs[0];
4499 rhs[3] = rhs[1];
4500 for (i = 0; i < 2; i++)
4501 if (TREE_CODE (rhs[i]) == SSA_NAME)
4502 {
4503 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4504 if (rhs[2 + i] == NULL_TREE)
4505 {
4506 if (has_single_use (rhs[i]))
4507 rhs[2 + i] = ops[(*pidx)++]->op;
4508 else
4509 rhs[2 + i] = rhs[i];
4510 }
4511 }
4512 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4513 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4514 {
4515 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4516 var = make_ssa_name (TREE_TYPE (var));
4517 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4518 rhs[2], rhs[3]);
4519 gimple_set_uid (g, gimple_uid (stmt));
4520 gimple_set_visited (g, true);
4521 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4522 }
4523 return var;
4524 }
4525
4526 /* Structure to track the initial value passed to get_ops and
4527 the range in the ops vector for each basic block. */
4528
4529 struct inter_bb_range_test_entry
4530 {
4531 tree op;
4532 unsigned int first_idx, last_idx;
4533 };
4534
4535 /* Inter-bb range test optimization.
4536
4537 Returns TRUE if a gimple conditional is optimized to a true/false,
4538 otherwise return FALSE.
4539
4540 This indicates to the caller that it should run a CFG cleanup pass
4541 once reassociation is completed. */
4542
4543 static bool
maybe_optimize_range_tests(gimple * stmt)4544 maybe_optimize_range_tests (gimple *stmt)
4545 {
4546 basic_block first_bb = gimple_bb (stmt);
4547 basic_block last_bb = first_bb;
4548 basic_block other_bb = NULL;
4549 basic_block bb;
4550 edge_iterator ei;
4551 edge e;
4552 auto_vec<operand_entry *> ops;
4553 auto_vec<inter_bb_range_test_entry> bbinfo;
4554 bool any_changes = false;
4555 bool cfg_cleanup_needed = false;
4556
4557 /* Consider only basic blocks that end with GIMPLE_COND or
4558 a cast statement satisfying final_range_test_p. All
4559 but the last bb in the first_bb .. last_bb range
4560 should end with GIMPLE_COND. */
4561 if (gimple_code (stmt) == GIMPLE_COND)
4562 {
4563 if (EDGE_COUNT (first_bb->succs) != 2)
4564 return cfg_cleanup_needed;
4565 }
4566 else if (final_range_test_p (stmt))
4567 other_bb = single_succ (first_bb);
4568 else
4569 return cfg_cleanup_needed;
4570
4571 if (stmt_could_throw_p (cfun, stmt))
4572 return cfg_cleanup_needed;
4573
4574 /* As relative ordering of post-dominator sons isn't fixed,
4575 maybe_optimize_range_tests can be called first on any
4576 bb in the range we want to optimize. So, start searching
4577 backwards, if first_bb can be set to a predecessor. */
4578 while (single_pred_p (first_bb))
4579 {
4580 basic_block pred_bb = single_pred (first_bb);
4581 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4582 break;
4583 if (!no_side_effect_bb (first_bb))
4584 break;
4585 first_bb = pred_bb;
4586 }
4587 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4588 Before starting forward search in last_bb successors, find
4589 out the other_bb. */
4590 if (first_bb == last_bb)
4591 {
4592 other_bb = NULL;
4593 /* As non-GIMPLE_COND last stmt always terminates the range,
4594 if forward search didn't discover anything, just give up. */
4595 if (gimple_code (stmt) != GIMPLE_COND)
4596 return cfg_cleanup_needed;
4597 /* Look at both successors. Either it ends with a GIMPLE_COND
4598 and satisfies suitable_cond_bb, or ends with a cast and
4599 other_bb is that cast's successor. */
4600 FOR_EACH_EDGE (e, ei, first_bb->succs)
4601 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4602 || e->dest == first_bb)
4603 return cfg_cleanup_needed;
4604 else if (single_pred_p (e->dest))
4605 {
4606 stmt = last_stmt (e->dest);
4607 if (stmt
4608 && gimple_code (stmt) == GIMPLE_COND
4609 && EDGE_COUNT (e->dest->succs) == 2)
4610 {
4611 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4612 NULL, true))
4613 break;
4614 else
4615 other_bb = NULL;
4616 }
4617 else if (stmt
4618 && final_range_test_p (stmt)
4619 && find_edge (first_bb, single_succ (e->dest)))
4620 {
4621 other_bb = single_succ (e->dest);
4622 if (other_bb == first_bb)
4623 other_bb = NULL;
4624 }
4625 }
4626 if (other_bb == NULL)
4627 return cfg_cleanup_needed;
4628 }
4629 /* Now do the forward search, moving last_bb to successor bbs
4630 that aren't other_bb. */
4631 while (EDGE_COUNT (last_bb->succs) == 2)
4632 {
4633 FOR_EACH_EDGE (e, ei, last_bb->succs)
4634 if (e->dest != other_bb)
4635 break;
4636 if (e == NULL)
4637 break;
4638 if (!single_pred_p (e->dest))
4639 break;
4640 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4641 break;
4642 if (!no_side_effect_bb (e->dest))
4643 break;
4644 last_bb = e->dest;
4645 }
4646 if (first_bb == last_bb)
4647 return cfg_cleanup_needed;
4648 /* Here basic blocks first_bb through last_bb's predecessor
4649 end with GIMPLE_COND, all of them have one of the edges to
4650 other_bb and another to another block in the range,
4651 all blocks except first_bb don't have side-effects and
4652 last_bb ends with either GIMPLE_COND, or cast satisfying
4653 final_range_test_p. */
4654 for (bb = last_bb; ; bb = single_pred (bb))
4655 {
4656 enum tree_code code;
4657 tree lhs, rhs;
4658 inter_bb_range_test_entry bb_ent;
4659
4660 bb_ent.op = NULL_TREE;
4661 bb_ent.first_idx = ops.length ();
4662 bb_ent.last_idx = bb_ent.first_idx;
4663 e = find_edge (bb, other_bb);
4664 stmt = last_stmt (bb);
4665 gimple_set_visited (stmt, true);
4666 if (gimple_code (stmt) != GIMPLE_COND)
4667 {
4668 use_operand_p use_p;
4669 gimple *phi;
4670 edge e2;
4671 unsigned int d;
4672
4673 lhs = gimple_assign_lhs (stmt);
4674 rhs = gimple_assign_rhs1 (stmt);
4675 gcc_assert (bb == last_bb);
4676
4677 /* stmt is
4678 _123 = (int) _234;
4679 OR
4680 _234 = a_2(D) == 2;
4681
4682 followed by:
4683 <bb M>:
4684 # _345 = PHI <_123(N), 1(...), 1(...)>
4685
4686 or 0 instead of 1. If it is 0, the _234
4687 range test is anded together with all the
4688 other range tests, if it is 1, it is ored with
4689 them. */
4690 single_imm_use (lhs, &use_p, &phi);
4691 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4692 e2 = find_edge (first_bb, other_bb);
4693 d = e2->dest_idx;
4694 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4695 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4696 code = BIT_AND_EXPR;
4697 else
4698 {
4699 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4700 code = BIT_IOR_EXPR;
4701 }
4702
4703 /* If _234 SSA_NAME_DEF_STMT is
4704 _234 = _567 | _789;
4705 (or &, corresponding to 1/0 in the phi arguments,
4706 push into ops the individual range test arguments
4707 of the bitwise or resp. and, recursively. */
4708 if (TREE_CODE (rhs) == SSA_NAME
4709 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4710 != tcc_comparison)
4711 && !get_ops (rhs, code, &ops,
4712 loop_containing_stmt (stmt))
4713 && has_single_use (rhs))
4714 {
4715 /* Otherwise, push the _234 range test itself. */
4716 operand_entry *oe = operand_entry_pool.allocate ();
4717
4718 oe->op = rhs;
4719 oe->rank = code;
4720 oe->id = 0;
4721 oe->count = 1;
4722 oe->stmt_to_insert = NULL;
4723 ops.safe_push (oe);
4724 bb_ent.last_idx++;
4725 bb_ent.op = rhs;
4726 }
4727 else if (is_gimple_assign (stmt)
4728 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4729 == tcc_comparison)
4730 && !get_ops (lhs, code, &ops,
4731 loop_containing_stmt (stmt))
4732 && has_single_use (lhs))
4733 {
4734 operand_entry *oe = operand_entry_pool.allocate ();
4735 oe->op = lhs;
4736 oe->rank = code;
4737 oe->id = 0;
4738 oe->count = 1;
4739 ops.safe_push (oe);
4740 bb_ent.last_idx++;
4741 bb_ent.op = lhs;
4742 }
4743 else
4744 {
4745 bb_ent.last_idx = ops.length ();
4746 bb_ent.op = rhs;
4747 }
4748 bbinfo.safe_push (bb_ent);
4749 continue;
4750 }
4751 else if (bb == last_bb)
4752 {
4753 /* For last_bb, handle also:
4754 if (x_3(D) == 3)
4755 goto <bb 6>; [34.00%]
4756 else
4757 goto <bb 7>; [66.00%]
4758
4759 <bb 6> [local count: 79512730]:
4760
4761 <bb 7> [local count: 1073741824]:
4762 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4763 where bb 7 is OTHER_BB, but the PHI values from the
4764 earlier bbs match the path through the empty bb
4765 in between. */
4766 bool test_swapped_p = false;
4767 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4768 &other_bb, &test_swapped_p, true);
4769 gcc_assert (ok);
4770 if (test_swapped_p)
4771 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4772 }
4773 /* Otherwise stmt is GIMPLE_COND. */
4774 code = gimple_cond_code (stmt);
4775 lhs = gimple_cond_lhs (stmt);
4776 rhs = gimple_cond_rhs (stmt);
4777 if (TREE_CODE (lhs) == SSA_NAME
4778 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4779 && ((code != EQ_EXPR && code != NE_EXPR)
4780 || rhs != boolean_false_node
4781 /* Either push into ops the individual bitwise
4782 or resp. and operands, depending on which
4783 edge is other_bb. */
4784 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4785 ^ (code == EQ_EXPR))
4786 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4787 loop_containing_stmt (stmt))))
4788 {
4789 /* Or push the GIMPLE_COND stmt itself. */
4790 operand_entry *oe = operand_entry_pool.allocate ();
4791
4792 oe->op = NULL;
4793 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4794 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4795 /* oe->op = NULL signs that there is no SSA_NAME
4796 for the range test, and oe->id instead is the
4797 basic block number, at which's end the GIMPLE_COND
4798 is. */
4799 oe->id = bb->index;
4800 oe->count = 1;
4801 oe->stmt_to_insert = NULL;
4802 ops.safe_push (oe);
4803 bb_ent.op = NULL;
4804 bb_ent.last_idx++;
4805 }
4806 else if (ops.length () > bb_ent.first_idx)
4807 {
4808 bb_ent.op = lhs;
4809 bb_ent.last_idx = ops.length ();
4810 }
4811 bbinfo.safe_push (bb_ent);
4812 if (bb == first_bb)
4813 break;
4814 }
4815 if (ops.length () > 1)
4816 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4817 if (any_changes)
4818 {
4819 unsigned int idx, max_idx = 0;
4820 /* update_ops relies on has_single_use predicates returning the
4821 same values as it did during get_ops earlier. Additionally it
4822 never removes statements, only adds new ones and it should walk
4823 from the single imm use and check the predicate already before
4824 making those changes.
4825 On the other side, the handling of GIMPLE_COND directly can turn
4826 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4827 it needs to be done in a separate loop afterwards. */
4828 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4829 {
4830 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4831 && bbinfo[idx].op != NULL_TREE)
4832 {
4833 tree new_op;
4834
4835 max_idx = idx;
4836 stmt = last_stmt (bb);
4837 new_op = update_ops (bbinfo[idx].op,
4838 (enum tree_code)
4839 ops[bbinfo[idx].first_idx]->rank,
4840 ops, &bbinfo[idx].first_idx,
4841 loop_containing_stmt (stmt));
4842 if (new_op == NULL_TREE)
4843 {
4844 gcc_assert (bb == last_bb);
4845 new_op = ops[bbinfo[idx].first_idx++]->op;
4846 }
4847 if (bbinfo[idx].op != new_op)
4848 {
4849 imm_use_iterator iter;
4850 use_operand_p use_p;
4851 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4852
4853 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4854 if (is_gimple_debug (use_stmt))
4855 continue;
4856 else if (gimple_code (use_stmt) == GIMPLE_COND
4857 || gimple_code (use_stmt) == GIMPLE_PHI)
4858 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4859 SET_USE (use_p, new_op);
4860 else if ((is_gimple_assign (use_stmt)
4861 && (TREE_CODE_CLASS
4862 (gimple_assign_rhs_code (use_stmt))
4863 == tcc_comparison)))
4864 cast_or_tcc_cmp_stmt = use_stmt;
4865 else if (gimple_assign_cast_p (use_stmt))
4866 cast_or_tcc_cmp_stmt = use_stmt;
4867 else
4868 gcc_unreachable ();
4869
4870 if (cast_or_tcc_cmp_stmt)
4871 {
4872 gcc_assert (bb == last_bb);
4873 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4874 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4875 enum tree_code rhs_code
4876 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4877 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4878 : CONVERT_EXPR;
4879 gassign *g;
4880 if (is_gimple_min_invariant (new_op))
4881 {
4882 new_op = fold_convert (TREE_TYPE (lhs), new_op);
4883 g = gimple_build_assign (new_lhs, new_op);
4884 }
4885 else
4886 g = gimple_build_assign (new_lhs, rhs_code, new_op);
4887 gimple_stmt_iterator gsi
4888 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
4889 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4890 gimple_set_visited (g, true);
4891 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4892 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4893 if (is_gimple_debug (use_stmt))
4894 continue;
4895 else if (gimple_code (use_stmt) == GIMPLE_COND
4896 || gimple_code (use_stmt) == GIMPLE_PHI)
4897 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4898 SET_USE (use_p, new_lhs);
4899 else
4900 gcc_unreachable ();
4901 }
4902 }
4903 }
4904 if (bb == first_bb)
4905 break;
4906 }
4907 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4908 {
4909 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4910 && bbinfo[idx].op == NULL_TREE
4911 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4912 {
4913 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4914
4915 if (idx > max_idx)
4916 max_idx = idx;
4917
4918 /* If we collapse the conditional to a true/false
4919 condition, then bubble that knowledge up to our caller. */
4920 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4921 {
4922 gimple_cond_make_false (cond_stmt);
4923 cfg_cleanup_needed = true;
4924 }
4925 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4926 {
4927 gimple_cond_make_true (cond_stmt);
4928 cfg_cleanup_needed = true;
4929 }
4930 else
4931 {
4932 gimple_cond_set_code (cond_stmt, NE_EXPR);
4933 gimple_cond_set_lhs (cond_stmt,
4934 ops[bbinfo[idx].first_idx]->op);
4935 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4936 }
4937 update_stmt (cond_stmt);
4938 }
4939 if (bb == first_bb)
4940 break;
4941 }
4942
4943 /* The above changes could result in basic blocks after the first
4944 modified one, up to and including last_bb, to be executed even if
4945 they would not be in the original program. If the value ranges of
4946 assignment lhs' in those bbs were dependent on the conditions
4947 guarding those basic blocks which now can change, the VRs might
4948 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4949 are only used within the same bb, it should be not a big deal if
4950 we just reset all the VRs in those bbs. See PR68671. */
4951 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4952 reset_flow_sensitive_info_in_bb (bb);
4953 }
4954 return cfg_cleanup_needed;
4955 }
4956
4957 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4958 of STMT in it's operands. This is also known as a "destructive
4959 update" operation. */
4960
4961 static bool
is_phi_for_stmt(gimple * stmt,tree operand)4962 is_phi_for_stmt (gimple *stmt, tree operand)
4963 {
4964 gimple *def_stmt;
4965 gphi *def_phi;
4966 tree lhs;
4967 use_operand_p arg_p;
4968 ssa_op_iter i;
4969
4970 if (TREE_CODE (operand) != SSA_NAME)
4971 return false;
4972
4973 lhs = gimple_assign_lhs (stmt);
4974
4975 def_stmt = SSA_NAME_DEF_STMT (operand);
4976 def_phi = dyn_cast <gphi *> (def_stmt);
4977 if (!def_phi)
4978 return false;
4979
4980 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4981 if (lhs == USE_FROM_PTR (arg_p))
4982 return true;
4983 return false;
4984 }
4985
4986 /* Remove def stmt of VAR if VAR has zero uses and recurse
4987 on rhs1 operand if so. */
4988
4989 static void
remove_visited_stmt_chain(tree var)4990 remove_visited_stmt_chain (tree var)
4991 {
4992 gimple *stmt;
4993 gimple_stmt_iterator gsi;
4994
4995 while (1)
4996 {
4997 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4998 return;
4999 stmt = SSA_NAME_DEF_STMT (var);
5000 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5001 {
5002 var = gimple_assign_rhs1 (stmt);
5003 gsi = gsi_for_stmt (stmt);
5004 reassoc_remove_stmt (&gsi);
5005 release_defs (stmt);
5006 }
5007 else
5008 return;
5009 }
5010 }
5011
5012 /* This function checks three consequtive operands in
5013 passed operands vector OPS starting from OPINDEX and
5014 swaps two operands if it is profitable for binary operation
5015 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5016
5017 We pair ops with the same rank if possible.
5018
5019 The alternative we try is to see if STMT is a destructive
5020 update style statement, which is like:
5021 b = phi (a, ...)
5022 a = c + b;
5023 In that case, we want to use the destructive update form to
5024 expose the possible vectorizer sum reduction opportunity.
5025 In that case, the third operand will be the phi node. This
5026 check is not performed if STMT is null.
5027
5028 We could, of course, try to be better as noted above, and do a
5029 lot of work to try to find these opportunities in >3 operand
5030 cases, but it is unlikely to be worth it. */
5031
5032 static void
swap_ops_for_binary_stmt(vec<operand_entry * > ops,unsigned int opindex,gimple * stmt)5033 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
5034 unsigned int opindex, gimple *stmt)
5035 {
5036 operand_entry *oe1, *oe2, *oe3;
5037
5038 oe1 = ops[opindex];
5039 oe2 = ops[opindex + 1];
5040 oe3 = ops[opindex + 2];
5041
5042 if ((oe1->rank == oe2->rank
5043 && oe2->rank != oe3->rank)
5044 || (stmt && is_phi_for_stmt (stmt, oe3->op)
5045 && !is_phi_for_stmt (stmt, oe1->op)
5046 && !is_phi_for_stmt (stmt, oe2->op)))
5047 std::swap (*oe1, *oe3);
5048 else if ((oe1->rank == oe3->rank
5049 && oe2->rank != oe3->rank)
5050 || (stmt && is_phi_for_stmt (stmt, oe2->op)
5051 && !is_phi_for_stmt (stmt, oe1->op)
5052 && !is_phi_for_stmt (stmt, oe3->op)))
5053 std::swap (*oe1, *oe2);
5054 }
5055
5056 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5057 two definitions, otherwise return STMT. */
5058
5059 static inline gimple *
find_insert_point(gimple * stmt,tree rhs1,tree rhs2)5060 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
5061 {
5062 if (TREE_CODE (rhs1) == SSA_NAME
5063 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5064 stmt = SSA_NAME_DEF_STMT (rhs1);
5065 if (TREE_CODE (rhs2) == SSA_NAME
5066 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5067 stmt = SSA_NAME_DEF_STMT (rhs2);
5068 return stmt;
5069 }
5070
5071 /* If the stmt that defines operand has to be inserted, insert it
5072 before the use. */
5073 static void
insert_stmt_before_use(gimple * stmt,gimple * stmt_to_insert)5074 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5075 {
5076 gcc_assert (is_gimple_assign (stmt_to_insert));
5077 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5078 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5079 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
5080 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5081 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5082
5083 /* If the insert point is not stmt, then insert_point would be
5084 the point where operand rhs1 or rhs2 is defined. In this case,
5085 stmt_to_insert has to be inserted afterwards. This would
5086 only happen when the stmt insertion point is flexible. */
5087 if (stmt == insert_point)
5088 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5089 else
5090 insert_stmt_after (stmt_to_insert, insert_point);
5091 }
5092
5093
5094 /* Recursively rewrite our linearized statements so that the operators
5095 match those in OPS[OPINDEX], putting the computation in rank
5096 order. Return new lhs.
5097 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5098 the current stmt and during recursive invocations.
5099 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5100 recursive invocations. */
5101
5102 static tree
rewrite_expr_tree(gimple * stmt,enum tree_code rhs_code,unsigned int opindex,vec<operand_entry * > ops,bool changed,bool next_changed)5103 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5104 vec<operand_entry *> ops, bool changed, bool next_changed)
5105 {
5106 tree rhs1 = gimple_assign_rhs1 (stmt);
5107 tree rhs2 = gimple_assign_rhs2 (stmt);
5108 tree lhs = gimple_assign_lhs (stmt);
5109 operand_entry *oe;
5110
5111 /* The final recursion case for this function is that you have
5112 exactly two operations left.
5113 If we had exactly one op in the entire list to start with, we
5114 would have never called this function, and the tail recursion
5115 rewrites them one at a time. */
5116 if (opindex + 2 == ops.length ())
5117 {
5118 operand_entry *oe1, *oe2;
5119
5120 oe1 = ops[opindex];
5121 oe2 = ops[opindex + 1];
5122
5123 if (rhs1 != oe1->op || rhs2 != oe2->op)
5124 {
5125 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5126 unsigned int uid = gimple_uid (stmt);
5127
5128 if (dump_file && (dump_flags & TDF_DETAILS))
5129 {
5130 fprintf (dump_file, "Transforming ");
5131 print_gimple_stmt (dump_file, stmt, 0);
5132 }
5133
5134 /* If the stmt that defines operand has to be inserted, insert it
5135 before the use. */
5136 if (oe1->stmt_to_insert)
5137 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5138 if (oe2->stmt_to_insert)
5139 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5140 /* Even when changed is false, reassociation could have e.g. removed
5141 some redundant operations, so unless we are just swapping the
5142 arguments or unless there is no change at all (then we just
5143 return lhs), force creation of a new SSA_NAME. */
5144 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5145 {
5146 gimple *insert_point
5147 = find_insert_point (stmt, oe1->op, oe2->op);
5148 lhs = make_ssa_name (TREE_TYPE (lhs));
5149 stmt
5150 = gimple_build_assign (lhs, rhs_code,
5151 oe1->op, oe2->op);
5152 gimple_set_uid (stmt, uid);
5153 gimple_set_visited (stmt, true);
5154 if (insert_point == gsi_stmt (gsi))
5155 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5156 else
5157 insert_stmt_after (stmt, insert_point);
5158 }
5159 else
5160 {
5161 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
5162 == stmt);
5163 gimple_assign_set_rhs1 (stmt, oe1->op);
5164 gimple_assign_set_rhs2 (stmt, oe2->op);
5165 update_stmt (stmt);
5166 }
5167
5168 if (rhs1 != oe1->op && rhs1 != oe2->op)
5169 remove_visited_stmt_chain (rhs1);
5170
5171 if (dump_file && (dump_flags & TDF_DETAILS))
5172 {
5173 fprintf (dump_file, " into ");
5174 print_gimple_stmt (dump_file, stmt, 0);
5175 }
5176 }
5177 return lhs;
5178 }
5179
5180 /* If we hit here, we should have 3 or more ops left. */
5181 gcc_assert (opindex + 2 < ops.length ());
5182
5183 /* Rewrite the next operator. */
5184 oe = ops[opindex];
5185
5186 /* If the stmt that defines operand has to be inserted, insert it
5187 before the use. */
5188 if (oe->stmt_to_insert)
5189 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5190
5191 /* Recurse on the LHS of the binary operator, which is guaranteed to
5192 be the non-leaf side. */
5193 tree new_rhs1
5194 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5195 changed || oe->op != rhs2 || next_changed,
5196 false);
5197
5198 if (oe->op != rhs2 || new_rhs1 != rhs1)
5199 {
5200 if (dump_file && (dump_flags & TDF_DETAILS))
5201 {
5202 fprintf (dump_file, "Transforming ");
5203 print_gimple_stmt (dump_file, stmt, 0);
5204 }
5205
5206 /* If changed is false, this is either opindex == 0
5207 or all outer rhs2's were equal to corresponding oe->op,
5208 and powi_result is NULL.
5209 That means lhs is equivalent before and after reassociation.
5210 Otherwise ensure the old lhs SSA_NAME is not reused and
5211 create a new stmt as well, so that any debug stmts will be
5212 properly adjusted. */
5213 if (changed)
5214 {
5215 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5216 unsigned int uid = gimple_uid (stmt);
5217 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
5218
5219 lhs = make_ssa_name (TREE_TYPE (lhs));
5220 stmt = gimple_build_assign (lhs, rhs_code,
5221 new_rhs1, oe->op);
5222 gimple_set_uid (stmt, uid);
5223 gimple_set_visited (stmt, true);
5224 if (insert_point == gsi_stmt (gsi))
5225 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5226 else
5227 insert_stmt_after (stmt, insert_point);
5228 }
5229 else
5230 {
5231 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
5232 == stmt);
5233 gimple_assign_set_rhs1 (stmt, new_rhs1);
5234 gimple_assign_set_rhs2 (stmt, oe->op);
5235 update_stmt (stmt);
5236 }
5237
5238 if (dump_file && (dump_flags & TDF_DETAILS))
5239 {
5240 fprintf (dump_file, " into ");
5241 print_gimple_stmt (dump_file, stmt, 0);
5242 }
5243 }
5244 return lhs;
5245 }
5246
5247 /* Find out how many cycles we need to compute statements chain.
5248 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5249 maximum number of independent statements we may execute per cycle. */
5250
5251 static int
get_required_cycles(int ops_num,int cpu_width)5252 get_required_cycles (int ops_num, int cpu_width)
5253 {
5254 int res;
5255 int elog;
5256 unsigned int rest;
5257
5258 /* While we have more than 2 * cpu_width operands
5259 we may reduce number of operands by cpu_width
5260 per cycle. */
5261 res = ops_num / (2 * cpu_width);
5262
5263 /* Remained operands count may be reduced twice per cycle
5264 until we have only one operand. */
5265 rest = (unsigned)(ops_num - res * cpu_width);
5266 elog = exact_log2 (rest);
5267 if (elog >= 0)
5268 res += elog;
5269 else
5270 res += floor_log2 (rest) + 1;
5271
5272 return res;
5273 }
5274
5275 /* Returns an optimal number of registers to use for computation of
5276 given statements. */
5277
5278 static int
get_reassociation_width(int ops_num,enum tree_code opc,machine_mode mode)5279 get_reassociation_width (int ops_num, enum tree_code opc,
5280 machine_mode mode)
5281 {
5282 int param_width = param_tree_reassoc_width;
5283 int width;
5284 int width_min;
5285 int cycles_best;
5286
5287 if (param_width > 0)
5288 width = param_width;
5289 else
5290 width = targetm.sched.reassociation_width (opc, mode);
5291
5292 if (width == 1)
5293 return width;
5294
5295 /* Get the minimal time required for sequence computation. */
5296 cycles_best = get_required_cycles (ops_num, width);
5297
5298 /* Check if we may use less width and still compute sequence for
5299 the same time. It will allow us to reduce registers usage.
5300 get_required_cycles is monotonically increasing with lower width
5301 so we can perform a binary search for the minimal width that still
5302 results in the optimal cycle count. */
5303 width_min = 1;
5304 while (width > width_min)
5305 {
5306 int width_mid = (width + width_min) / 2;
5307
5308 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5309 width = width_mid;
5310 else if (width_min < width_mid)
5311 width_min = width_mid;
5312 else
5313 break;
5314 }
5315
5316 return width;
5317 }
5318
5319 /* Recursively rewrite our linearized statements so that the operators
5320 match those in OPS[OPINDEX], putting the computation in rank
5321 order and trying to allow operations to be executed in
5322 parallel. */
5323
5324 static void
rewrite_expr_tree_parallel(gassign * stmt,int width,vec<operand_entry * > ops)5325 rewrite_expr_tree_parallel (gassign *stmt, int width,
5326 vec<operand_entry *> ops)
5327 {
5328 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5329 int op_num = ops.length ();
5330 gcc_assert (op_num > 0);
5331 int stmt_num = op_num - 1;
5332 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5333 int op_index = op_num - 1;
5334 int stmt_index = 0;
5335 int ready_stmts_end = 0;
5336 int i = 0;
5337 gimple *stmt1 = NULL, *stmt2 = NULL;
5338 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5339
5340 /* We start expression rewriting from the top statements.
5341 So, in this loop we create a full list of statements
5342 we will work with. */
5343 stmts[stmt_num - 1] = stmt;
5344 for (i = stmt_num - 2; i >= 0; i--)
5345 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5346
5347 for (i = 0; i < stmt_num; i++)
5348 {
5349 tree op1, op2;
5350
5351 /* Determine whether we should use results of
5352 already handled statements or not. */
5353 if (ready_stmts_end == 0
5354 && (i - stmt_index >= width || op_index < 1))
5355 ready_stmts_end = i;
5356
5357 /* Now we choose operands for the next statement. Non zero
5358 value in ready_stmts_end means here that we should use
5359 the result of already generated statements as new operand. */
5360 if (ready_stmts_end > 0)
5361 {
5362 op1 = gimple_assign_lhs (stmts[stmt_index++]);
5363 if (ready_stmts_end > stmt_index)
5364 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5365 else if (op_index >= 0)
5366 {
5367 operand_entry *oe = ops[op_index--];
5368 stmt2 = oe->stmt_to_insert;
5369 op2 = oe->op;
5370 }
5371 else
5372 {
5373 gcc_assert (stmt_index < i);
5374 op2 = gimple_assign_lhs (stmts[stmt_index++]);
5375 }
5376
5377 if (stmt_index >= ready_stmts_end)
5378 ready_stmts_end = 0;
5379 }
5380 else
5381 {
5382 if (op_index > 1)
5383 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5384 operand_entry *oe2 = ops[op_index--];
5385 operand_entry *oe1 = ops[op_index--];
5386 op2 = oe2->op;
5387 stmt2 = oe2->stmt_to_insert;
5388 op1 = oe1->op;
5389 stmt1 = oe1->stmt_to_insert;
5390 }
5391
5392 /* If we emit the last statement then we should put
5393 operands into the last statement. It will also
5394 break the loop. */
5395 if (op_index < 0 && stmt_index == i)
5396 i = stmt_num - 1;
5397
5398 if (dump_file && (dump_flags & TDF_DETAILS))
5399 {
5400 fprintf (dump_file, "Transforming ");
5401 print_gimple_stmt (dump_file, stmts[i], 0);
5402 }
5403
5404 /* If the stmt that defines operand has to be inserted, insert it
5405 before the use. */
5406 if (stmt1)
5407 insert_stmt_before_use (stmts[i], stmt1);
5408 if (stmt2)
5409 insert_stmt_before_use (stmts[i], stmt2);
5410 stmt1 = stmt2 = NULL;
5411
5412 /* We keep original statement only for the last one. All
5413 others are recreated. */
5414 if (i == stmt_num - 1)
5415 {
5416 gimple_assign_set_rhs1 (stmts[i], op1);
5417 gimple_assign_set_rhs2 (stmts[i], op2);
5418 update_stmt (stmts[i]);
5419 }
5420 else
5421 {
5422 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5423 gimple_set_visited (stmts[i], true);
5424 }
5425 if (dump_file && (dump_flags & TDF_DETAILS))
5426 {
5427 fprintf (dump_file, " into ");
5428 print_gimple_stmt (dump_file, stmts[i], 0);
5429 }
5430 }
5431
5432 remove_visited_stmt_chain (last_rhs1);
5433 }
5434
5435 /* Transform STMT, which is really (A +B) + (C + D) into the left
5436 linear form, ((A+B)+C)+D.
5437 Recurse on D if necessary. */
5438
5439 static void
linearize_expr(gimple * stmt)5440 linearize_expr (gimple *stmt)
5441 {
5442 gimple_stmt_iterator gsi;
5443 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5444 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5445 gimple *oldbinrhs = binrhs;
5446 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5447 gimple *newbinrhs = NULL;
5448 class loop *loop = loop_containing_stmt (stmt);
5449 tree lhs = gimple_assign_lhs (stmt);
5450
5451 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5452 && is_reassociable_op (binrhs, rhscode, loop));
5453
5454 gsi = gsi_for_stmt (stmt);
5455
5456 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5457 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5458 gimple_assign_rhs_code (binrhs),
5459 gimple_assign_lhs (binlhs),
5460 gimple_assign_rhs2 (binrhs));
5461 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5462 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5463 gimple_set_uid (binrhs, gimple_uid (stmt));
5464
5465 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5466 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5467
5468 if (dump_file && (dump_flags & TDF_DETAILS))
5469 {
5470 fprintf (dump_file, "Linearized: ");
5471 print_gimple_stmt (dump_file, stmt, 0);
5472 }
5473
5474 reassociate_stats.linearized++;
5475 update_stmt (stmt);
5476
5477 gsi = gsi_for_stmt (oldbinrhs);
5478 reassoc_remove_stmt (&gsi);
5479 release_defs (oldbinrhs);
5480
5481 gimple_set_visited (stmt, true);
5482 gimple_set_visited (binlhs, true);
5483 gimple_set_visited (binrhs, true);
5484
5485 /* Tail recurse on the new rhs if it still needs reassociation. */
5486 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5487 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5488 want to change the algorithm while converting to tuples. */
5489 linearize_expr (stmt);
5490 }
5491
5492 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5493 it. Otherwise, return NULL. */
5494
5495 static gimple *
get_single_immediate_use(tree lhs)5496 get_single_immediate_use (tree lhs)
5497 {
5498 use_operand_p immuse;
5499 gimple *immusestmt;
5500
5501 if (TREE_CODE (lhs) == SSA_NAME
5502 && single_imm_use (lhs, &immuse, &immusestmt)
5503 && is_gimple_assign (immusestmt))
5504 return immusestmt;
5505
5506 return NULL;
5507 }
5508
5509 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5510 representing the negated value. Insertions of any necessary
5511 instructions go before GSI.
5512 This function is recursive in that, if you hand it "a_5" as the
5513 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5514 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5515
5516 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)5517 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5518 {
5519 gimple *negatedefstmt = NULL;
5520 tree resultofnegate;
5521 gimple_stmt_iterator gsi;
5522 unsigned int uid;
5523
5524 /* If we are trying to negate a name, defined by an add, negate the
5525 add operands instead. */
5526 if (TREE_CODE (tonegate) == SSA_NAME)
5527 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5528 if (TREE_CODE (tonegate) == SSA_NAME
5529 && is_gimple_assign (negatedefstmt)
5530 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5531 && has_single_use (gimple_assign_lhs (negatedefstmt))
5532 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5533 {
5534 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5535 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5536 tree lhs = gimple_assign_lhs (negatedefstmt);
5537 gimple *g;
5538
5539 gsi = gsi_for_stmt (negatedefstmt);
5540 rhs1 = negate_value (rhs1, &gsi);
5541
5542 gsi = gsi_for_stmt (negatedefstmt);
5543 rhs2 = negate_value (rhs2, &gsi);
5544
5545 gsi = gsi_for_stmt (negatedefstmt);
5546 lhs = make_ssa_name (TREE_TYPE (lhs));
5547 gimple_set_visited (negatedefstmt, true);
5548 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5549 gimple_set_uid (g, gimple_uid (negatedefstmt));
5550 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5551 return lhs;
5552 }
5553
5554 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5555 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5556 NULL_TREE, true, GSI_SAME_STMT);
5557 gsi = *gsip;
5558 uid = gimple_uid (gsi_stmt (gsi));
5559 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5560 {
5561 gimple *stmt = gsi_stmt (gsi);
5562 if (gimple_uid (stmt) != 0)
5563 break;
5564 gimple_set_uid (stmt, uid);
5565 }
5566 return resultofnegate;
5567 }
5568
5569 /* Return true if we should break up the subtract in STMT into an add
5570 with negate. This is true when we the subtract operands are really
5571 adds, or the subtract itself is used in an add expression. In
5572 either case, breaking up the subtract into an add with negate
5573 exposes the adds to reassociation. */
5574
5575 static bool
should_break_up_subtract(gimple * stmt)5576 should_break_up_subtract (gimple *stmt)
5577 {
5578 tree lhs = gimple_assign_lhs (stmt);
5579 tree binlhs = gimple_assign_rhs1 (stmt);
5580 tree binrhs = gimple_assign_rhs2 (stmt);
5581 gimple *immusestmt;
5582 class loop *loop = loop_containing_stmt (stmt);
5583
5584 if (TREE_CODE (binlhs) == SSA_NAME
5585 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5586 return true;
5587
5588 if (TREE_CODE (binrhs) == SSA_NAME
5589 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5590 return true;
5591
5592 if (TREE_CODE (lhs) == SSA_NAME
5593 && (immusestmt = get_single_immediate_use (lhs))
5594 && is_gimple_assign (immusestmt)
5595 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5596 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5597 && gimple_assign_rhs1 (immusestmt) == lhs)
5598 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5599 return true;
5600 return false;
5601 }
5602
5603 /* Transform STMT from A - B into A + -B. */
5604
5605 static void
break_up_subtract(gimple * stmt,gimple_stmt_iterator * gsip)5606 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5607 {
5608 tree rhs1 = gimple_assign_rhs1 (stmt);
5609 tree rhs2 = gimple_assign_rhs2 (stmt);
5610
5611 if (dump_file && (dump_flags & TDF_DETAILS))
5612 {
5613 fprintf (dump_file, "Breaking up subtract ");
5614 print_gimple_stmt (dump_file, stmt, 0);
5615 }
5616
5617 rhs2 = negate_value (rhs2, gsip);
5618 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5619 update_stmt (stmt);
5620 }
5621
5622 /* Determine whether STMT is a builtin call that raises an SSA name
5623 to an integer power and has only one use. If so, and this is early
5624 reassociation and unsafe math optimizations are permitted, place
5625 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5626 If any of these conditions does not hold, return FALSE. */
5627
5628 static bool
acceptable_pow_call(gcall * stmt,tree * base,HOST_WIDE_INT * exponent)5629 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5630 {
5631 tree arg1;
5632 REAL_VALUE_TYPE c, cint;
5633
5634 switch (gimple_call_combined_fn (stmt))
5635 {
5636 CASE_CFN_POW:
5637 if (flag_errno_math)
5638 return false;
5639
5640 *base = gimple_call_arg (stmt, 0);
5641 arg1 = gimple_call_arg (stmt, 1);
5642
5643 if (TREE_CODE (arg1) != REAL_CST)
5644 return false;
5645
5646 c = TREE_REAL_CST (arg1);
5647
5648 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5649 return false;
5650
5651 *exponent = real_to_integer (&c);
5652 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5653 if (!real_identical (&c, &cint))
5654 return false;
5655
5656 break;
5657
5658 CASE_CFN_POWI:
5659 *base = gimple_call_arg (stmt, 0);
5660 arg1 = gimple_call_arg (stmt, 1);
5661
5662 if (!tree_fits_shwi_p (arg1))
5663 return false;
5664
5665 *exponent = tree_to_shwi (arg1);
5666 break;
5667
5668 default:
5669 return false;
5670 }
5671
5672 /* Expanding negative exponents is generally unproductive, so we don't
5673 complicate matters with those. Exponents of zero and one should
5674 have been handled by expression folding. */
5675 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5676 return false;
5677
5678 return true;
5679 }
5680
5681 /* Try to derive and add operand entry for OP to *OPS. Return false if
5682 unsuccessful. */
5683
5684 static bool
try_special_add_to_ops(vec<operand_entry * > * ops,enum tree_code code,tree op,gimple * def_stmt)5685 try_special_add_to_ops (vec<operand_entry *> *ops,
5686 enum tree_code code,
5687 tree op, gimple* def_stmt)
5688 {
5689 tree base = NULL_TREE;
5690 HOST_WIDE_INT exponent = 0;
5691
5692 if (TREE_CODE (op) != SSA_NAME
5693 || ! has_single_use (op))
5694 return false;
5695
5696 if (code == MULT_EXPR
5697 && reassoc_insert_powi_p
5698 && flag_unsafe_math_optimizations
5699 && is_gimple_call (def_stmt)
5700 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5701 {
5702 add_repeat_to_ops_vec (ops, base, exponent);
5703 gimple_set_visited (def_stmt, true);
5704 return true;
5705 }
5706 else if (code == MULT_EXPR
5707 && is_gimple_assign (def_stmt)
5708 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5709 && !HONOR_SNANS (TREE_TYPE (op))
5710 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5711 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5712 {
5713 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5714 tree cst = build_minus_one_cst (TREE_TYPE (op));
5715 add_to_ops_vec (ops, rhs1);
5716 add_to_ops_vec (ops, cst);
5717 gimple_set_visited (def_stmt, true);
5718 return true;
5719 }
5720
5721 return false;
5722 }
5723
5724 /* Recursively linearize a binary expression that is the RHS of STMT.
5725 Place the operands of the expression tree in the vector named OPS. */
5726
5727 static void
linearize_expr_tree(vec<operand_entry * > * ops,gimple * stmt,bool is_associative,bool set_visited)5728 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5729 bool is_associative, bool set_visited)
5730 {
5731 tree binlhs = gimple_assign_rhs1 (stmt);
5732 tree binrhs = gimple_assign_rhs2 (stmt);
5733 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5734 bool binlhsisreassoc = false;
5735 bool binrhsisreassoc = false;
5736 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5737 class loop *loop = loop_containing_stmt (stmt);
5738
5739 if (set_visited)
5740 gimple_set_visited (stmt, true);
5741
5742 if (TREE_CODE (binlhs) == SSA_NAME)
5743 {
5744 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5745 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5746 && !stmt_could_throw_p (cfun, binlhsdef));
5747 }
5748
5749 if (TREE_CODE (binrhs) == SSA_NAME)
5750 {
5751 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5752 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5753 && !stmt_could_throw_p (cfun, binrhsdef));
5754 }
5755
5756 /* If the LHS is not reassociable, but the RHS is, we need to swap
5757 them. If neither is reassociable, there is nothing we can do, so
5758 just put them in the ops vector. If the LHS is reassociable,
5759 linearize it. If both are reassociable, then linearize the RHS
5760 and the LHS. */
5761
5762 if (!binlhsisreassoc)
5763 {
5764 /* If this is not a associative operation like division, give up. */
5765 if (!is_associative)
5766 {
5767 add_to_ops_vec (ops, binrhs);
5768 return;
5769 }
5770
5771 if (!binrhsisreassoc)
5772 {
5773 bool swap = false;
5774 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5775 /* If we add ops for the rhs we expect to be able to recurse
5776 to it via the lhs during expression rewrite so swap
5777 operands. */
5778 swap = true;
5779 else
5780 add_to_ops_vec (ops, binrhs);
5781
5782 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5783 add_to_ops_vec (ops, binlhs);
5784
5785 if (!swap)
5786 return;
5787 }
5788
5789 if (dump_file && (dump_flags & TDF_DETAILS))
5790 {
5791 fprintf (dump_file, "swapping operands of ");
5792 print_gimple_stmt (dump_file, stmt, 0);
5793 }
5794
5795 swap_ssa_operands (stmt,
5796 gimple_assign_rhs1_ptr (stmt),
5797 gimple_assign_rhs2_ptr (stmt));
5798 update_stmt (stmt);
5799
5800 if (dump_file && (dump_flags & TDF_DETAILS))
5801 {
5802 fprintf (dump_file, " is now ");
5803 print_gimple_stmt (dump_file, stmt, 0);
5804 }
5805 if (!binrhsisreassoc)
5806 return;
5807
5808 /* We want to make it so the lhs is always the reassociative op,
5809 so swap. */
5810 std::swap (binlhs, binrhs);
5811 }
5812 else if (binrhsisreassoc)
5813 {
5814 linearize_expr (stmt);
5815 binlhs = gimple_assign_rhs1 (stmt);
5816 binrhs = gimple_assign_rhs2 (stmt);
5817 }
5818
5819 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5820 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5821 rhscode, loop));
5822 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5823 is_associative, set_visited);
5824
5825 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5826 add_to_ops_vec (ops, binrhs);
5827 }
5828
5829 /* Repropagate the negates back into subtracts, since no other pass
5830 currently does it. */
5831
5832 static void
repropagate_negates(void)5833 repropagate_negates (void)
5834 {
5835 unsigned int i = 0;
5836 tree negate;
5837
5838 FOR_EACH_VEC_ELT (plus_negates, i, negate)
5839 {
5840 gimple *user = get_single_immediate_use (negate);
5841
5842 if (!user || !is_gimple_assign (user))
5843 continue;
5844
5845 /* The negate operand can be either operand of a PLUS_EXPR
5846 (it can be the LHS if the RHS is a constant for example).
5847
5848 Force the negate operand to the RHS of the PLUS_EXPR, then
5849 transform the PLUS_EXPR into a MINUS_EXPR. */
5850 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5851 {
5852 /* If the negated operand appears on the LHS of the
5853 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5854 to force the negated operand to the RHS of the PLUS_EXPR. */
5855 if (gimple_assign_rhs1 (user) == negate)
5856 {
5857 swap_ssa_operands (user,
5858 gimple_assign_rhs1_ptr (user),
5859 gimple_assign_rhs2_ptr (user));
5860 }
5861
5862 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5863 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5864 if (gimple_assign_rhs2 (user) == negate)
5865 {
5866 tree rhs1 = gimple_assign_rhs1 (user);
5867 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5868 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5869 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5870 update_stmt (user);
5871 }
5872 }
5873 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5874 {
5875 if (gimple_assign_rhs1 (user) == negate)
5876 {
5877 /* We have
5878 x = -a
5879 y = x - b
5880 which we transform into
5881 x = a + b
5882 y = -x .
5883 This pushes down the negate which we possibly can merge
5884 into some other operation, hence insert it into the
5885 plus_negates vector. */
5886 gimple *feed = SSA_NAME_DEF_STMT (negate);
5887 tree a = gimple_assign_rhs1 (feed);
5888 tree b = gimple_assign_rhs2 (user);
5889 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5890 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5891 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5892 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5893 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5894 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5895 user = gsi_stmt (gsi2);
5896 update_stmt (user);
5897 reassoc_remove_stmt (&gsi);
5898 release_defs (feed);
5899 plus_negates.safe_push (gimple_assign_lhs (user));
5900 }
5901 else
5902 {
5903 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5904 rid of one operation. */
5905 gimple *feed = SSA_NAME_DEF_STMT (negate);
5906 tree a = gimple_assign_rhs1 (feed);
5907 tree rhs1 = gimple_assign_rhs1 (user);
5908 gimple_stmt_iterator gsi = gsi_for_stmt (user);
5909 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5910 update_stmt (gsi_stmt (gsi));
5911 }
5912 }
5913 }
5914 }
5915
5916 /* Break up subtract operations in block BB.
5917
5918 We do this top down because we don't know whether the subtract is
5919 part of a possible chain of reassociation except at the top.
5920
5921 IE given
5922 d = f + g
5923 c = a + e
5924 b = c - d
5925 q = b - r
5926 k = t - q
5927
5928 we want to break up k = t - q, but we won't until we've transformed q
5929 = b - r, which won't be broken up until we transform b = c - d.
5930
5931 En passant, clear the GIMPLE visited flag on every statement
5932 and set UIDs within each basic block. */
5933
5934 static void
break_up_subtract_bb(basic_block bb)5935 break_up_subtract_bb (basic_block bb)
5936 {
5937 gimple_stmt_iterator gsi;
5938 basic_block son;
5939 unsigned int uid = 1;
5940
5941 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5942 {
5943 gimple *stmt = gsi_stmt (gsi);
5944 gimple_set_visited (stmt, false);
5945 gimple_set_uid (stmt, uid++);
5946
5947 if (!is_gimple_assign (stmt)
5948 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
5949 || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
5950 continue;
5951
5952 /* Look for simple gimple subtract operations. */
5953 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5954 {
5955 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
5956 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
5957 continue;
5958
5959 /* Check for a subtract used only in an addition. If this
5960 is the case, transform it into add of a negate for better
5961 reassociation. IE transform C = A-B into C = A + -B if C
5962 is only used in an addition. */
5963 if (should_break_up_subtract (stmt))
5964 break_up_subtract (stmt, &gsi);
5965 }
5966 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5967 && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
5968 plus_negates.safe_push (gimple_assign_lhs (stmt));
5969 }
5970 for (son = first_dom_son (CDI_DOMINATORS, bb);
5971 son;
5972 son = next_dom_son (CDI_DOMINATORS, son))
5973 break_up_subtract_bb (son);
5974 }
5975
5976 /* Used for repeated factor analysis. */
5977 struct repeat_factor
5978 {
5979 /* An SSA name that occurs in a multiply chain. */
5980 tree factor;
5981
5982 /* Cached rank of the factor. */
5983 unsigned rank;
5984
5985 /* Number of occurrences of the factor in the chain. */
5986 HOST_WIDE_INT count;
5987
5988 /* An SSA name representing the product of this factor and
5989 all factors appearing later in the repeated factor vector. */
5990 tree repr;
5991 };
5992
5993
5994 static vec<repeat_factor> repeat_factor_vec;
5995
5996 /* Used for sorting the repeat factor vector. Sort primarily by
5997 ascending occurrence count, secondarily by descending rank. */
5998
5999 static int
compare_repeat_factors(const void * x1,const void * x2)6000 compare_repeat_factors (const void *x1, const void *x2)
6001 {
6002 const repeat_factor *rf1 = (const repeat_factor *) x1;
6003 const repeat_factor *rf2 = (const repeat_factor *) x2;
6004
6005 if (rf1->count != rf2->count)
6006 return rf1->count - rf2->count;
6007
6008 return rf2->rank - rf1->rank;
6009 }
6010
6011 /* Look for repeated operands in OPS in the multiply tree rooted at
6012 STMT. Replace them with an optimal sequence of multiplies and powi
6013 builtin calls, and remove the used operands from OPS. Return an
6014 SSA name representing the value of the replacement sequence. */
6015
6016 static tree
attempt_builtin_powi(gimple * stmt,vec<operand_entry * > * ops)6017 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6018 {
6019 unsigned i, j, vec_len;
6020 int ii;
6021 operand_entry *oe;
6022 repeat_factor *rf1, *rf2;
6023 repeat_factor rfnew;
6024 tree result = NULL_TREE;
6025 tree target_ssa, iter_result;
6026 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6027 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6028 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6029 gimple *mul_stmt, *pow_stmt;
6030
6031 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6032 target, unless type is integral. */
6033 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6034 return NULL_TREE;
6035
6036 /* Allocate the repeated factor vector. */
6037 repeat_factor_vec.create (10);
6038
6039 /* Scan the OPS vector for all SSA names in the product and build
6040 up a vector of occurrence counts for each factor. */
6041 FOR_EACH_VEC_ELT (*ops, i, oe)
6042 {
6043 if (TREE_CODE (oe->op) == SSA_NAME)
6044 {
6045 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6046 {
6047 if (rf1->factor == oe->op)
6048 {
6049 rf1->count += oe->count;
6050 break;
6051 }
6052 }
6053
6054 if (j >= repeat_factor_vec.length ())
6055 {
6056 rfnew.factor = oe->op;
6057 rfnew.rank = oe->rank;
6058 rfnew.count = oe->count;
6059 rfnew.repr = NULL_TREE;
6060 repeat_factor_vec.safe_push (rfnew);
6061 }
6062 }
6063 }
6064
6065 /* Sort the repeated factor vector by (a) increasing occurrence count,
6066 and (b) decreasing rank. */
6067 repeat_factor_vec.qsort (compare_repeat_factors);
6068
6069 /* It is generally best to combine as many base factors as possible
6070 into a product before applying __builtin_powi to the result.
6071 However, the sort order chosen for the repeated factor vector
6072 allows us to cache partial results for the product of the base
6073 factors for subsequent use. When we already have a cached partial
6074 result from a previous iteration, it is best to make use of it
6075 before looking for another __builtin_pow opportunity.
6076
6077 As an example, consider x * x * y * y * y * z * z * z * z.
6078 We want to first compose the product x * y * z, raise it to the
6079 second power, then multiply this by y * z, and finally multiply
6080 by z. This can be done in 5 multiplies provided we cache y * z
6081 for use in both expressions:
6082
6083 t1 = y * z
6084 t2 = t1 * x
6085 t3 = t2 * t2
6086 t4 = t1 * t3
6087 result = t4 * z
6088
6089 If we instead ignored the cached y * z and first multiplied by
6090 the __builtin_pow opportunity z * z, we would get the inferior:
6091
6092 t1 = y * z
6093 t2 = t1 * x
6094 t3 = t2 * t2
6095 t4 = z * z
6096 t5 = t3 * t4
6097 result = t5 * y */
6098
6099 vec_len = repeat_factor_vec.length ();
6100
6101 /* Repeatedly look for opportunities to create a builtin_powi call. */
6102 while (true)
6103 {
6104 HOST_WIDE_INT power;
6105
6106 /* First look for the largest cached product of factors from
6107 preceding iterations. If found, create a builtin_powi for
6108 it if the minimum occurrence count for its factors is at
6109 least 2, or just use this cached product as our next
6110 multiplicand if the minimum occurrence count is 1. */
6111 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6112 {
6113 if (rf1->repr && rf1->count > 0)
6114 break;
6115 }
6116
6117 if (j < vec_len)
6118 {
6119 power = rf1->count;
6120
6121 if (power == 1)
6122 {
6123 iter_result = rf1->repr;
6124
6125 if (dump_file && (dump_flags & TDF_DETAILS))
6126 {
6127 unsigned elt;
6128 repeat_factor *rf;
6129 fputs ("Multiplying by cached product ", dump_file);
6130 for (elt = j; elt < vec_len; elt++)
6131 {
6132 rf = &repeat_factor_vec[elt];
6133 print_generic_expr (dump_file, rf->factor);
6134 if (elt < vec_len - 1)
6135 fputs (" * ", dump_file);
6136 }
6137 fputs ("\n", dump_file);
6138 }
6139 }
6140 else
6141 {
6142 if (INTEGRAL_TYPE_P (type))
6143 {
6144 gcc_assert (power > 1);
6145 gimple_stmt_iterator gsip = gsi;
6146 gsi_prev (&gsip);
6147 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6148 rf1->repr, power);
6149 gimple_stmt_iterator gsic = gsi;
6150 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6151 {
6152 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6153 gimple_set_visited (gsi_stmt (gsic), true);
6154 gsi_prev (&gsic);
6155 }
6156 }
6157 else
6158 {
6159 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6160 pow_stmt
6161 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6162 build_int_cst (integer_type_node,
6163 power));
6164 gimple_call_set_lhs (pow_stmt, iter_result);
6165 gimple_set_location (pow_stmt, gimple_location (stmt));
6166 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6167 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6168 }
6169
6170 if (dump_file && (dump_flags & TDF_DETAILS))
6171 {
6172 unsigned elt;
6173 repeat_factor *rf;
6174 fputs ("Building __builtin_pow call for cached product (",
6175 dump_file);
6176 for (elt = j; elt < vec_len; elt++)
6177 {
6178 rf = &repeat_factor_vec[elt];
6179 print_generic_expr (dump_file, rf->factor);
6180 if (elt < vec_len - 1)
6181 fputs (" * ", dump_file);
6182 }
6183 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6184 power);
6185 }
6186 }
6187 }
6188 else
6189 {
6190 /* Otherwise, find the first factor in the repeated factor
6191 vector whose occurrence count is at least 2. If no such
6192 factor exists, there are no builtin_powi opportunities
6193 remaining. */
6194 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6195 {
6196 if (rf1->count >= 2)
6197 break;
6198 }
6199
6200 if (j >= vec_len)
6201 break;
6202
6203 power = rf1->count;
6204
6205 if (dump_file && (dump_flags & TDF_DETAILS))
6206 {
6207 unsigned elt;
6208 repeat_factor *rf;
6209 fputs ("Building __builtin_pow call for (", dump_file);
6210 for (elt = j; elt < vec_len; elt++)
6211 {
6212 rf = &repeat_factor_vec[elt];
6213 print_generic_expr (dump_file, rf->factor);
6214 if (elt < vec_len - 1)
6215 fputs (" * ", dump_file);
6216 }
6217 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6218 }
6219
6220 reassociate_stats.pows_created++;
6221
6222 /* Visit each element of the vector in reverse order (so that
6223 high-occurrence elements are visited first, and within the
6224 same occurrence count, lower-ranked elements are visited
6225 first). Form a linear product of all elements in this order
6226 whose occurrencce count is at least that of element J.
6227 Record the SSA name representing the product of each element
6228 with all subsequent elements in the vector. */
6229 if (j == vec_len - 1)
6230 rf1->repr = rf1->factor;
6231 else
6232 {
6233 for (ii = vec_len - 2; ii >= (int)j; ii--)
6234 {
6235 tree op1, op2;
6236
6237 rf1 = &repeat_factor_vec[ii];
6238 rf2 = &repeat_factor_vec[ii + 1];
6239
6240 /* Init the last factor's representative to be itself. */
6241 if (!rf2->repr)
6242 rf2->repr = rf2->factor;
6243
6244 op1 = rf1->factor;
6245 op2 = rf2->repr;
6246
6247 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6248 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6249 op1, op2);
6250 gimple_set_location (mul_stmt, gimple_location (stmt));
6251 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6252 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6253 rf1->repr = target_ssa;
6254
6255 /* Don't reprocess the multiply we just introduced. */
6256 gimple_set_visited (mul_stmt, true);
6257 }
6258 }
6259
6260 /* Form a call to __builtin_powi for the maximum product
6261 just formed, raised to the power obtained earlier. */
6262 rf1 = &repeat_factor_vec[j];
6263 if (INTEGRAL_TYPE_P (type))
6264 {
6265 gcc_assert (power > 1);
6266 gimple_stmt_iterator gsip = gsi;
6267 gsi_prev (&gsip);
6268 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6269 rf1->repr, power);
6270 gimple_stmt_iterator gsic = gsi;
6271 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6272 {
6273 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6274 gimple_set_visited (gsi_stmt (gsic), true);
6275 gsi_prev (&gsic);
6276 }
6277 }
6278 else
6279 {
6280 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6281 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6282 build_int_cst (integer_type_node,
6283 power));
6284 gimple_call_set_lhs (pow_stmt, iter_result);
6285 gimple_set_location (pow_stmt, gimple_location (stmt));
6286 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6287 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6288 }
6289 }
6290
6291 /* If we previously formed at least one other builtin_powi call,
6292 form the product of this one and those others. */
6293 if (result)
6294 {
6295 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6296 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6297 result, iter_result);
6298 gimple_set_location (mul_stmt, gimple_location (stmt));
6299 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6300 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6301 gimple_set_visited (mul_stmt, true);
6302 result = new_result;
6303 }
6304 else
6305 result = iter_result;
6306
6307 /* Decrement the occurrence count of each element in the product
6308 by the count found above, and remove this many copies of each
6309 factor from OPS. */
6310 for (i = j; i < vec_len; i++)
6311 {
6312 unsigned k = power;
6313 unsigned n;
6314
6315 rf1 = &repeat_factor_vec[i];
6316 rf1->count -= power;
6317
6318 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6319 {
6320 if (oe->op == rf1->factor)
6321 {
6322 if (oe->count <= k)
6323 {
6324 ops->ordered_remove (n);
6325 k -= oe->count;
6326
6327 if (k == 0)
6328 break;
6329 }
6330 else
6331 {
6332 oe->count -= k;
6333 break;
6334 }
6335 }
6336 }
6337 }
6338 }
6339
6340 /* At this point all elements in the repeated factor vector have a
6341 remaining occurrence count of 0 or 1, and those with a count of 1
6342 don't have cached representatives. Re-sort the ops vector and
6343 clean up. */
6344 ops->qsort (sort_by_operand_rank);
6345 repeat_factor_vec.release ();
6346
6347 /* Return the final product computed herein. Note that there may
6348 still be some elements with single occurrence count left in OPS;
6349 those will be handled by the normal reassociation logic. */
6350 return result;
6351 }
6352
6353 /* Attempt to optimize
6354 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6355 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6356
6357 static void
attempt_builtin_copysign(vec<operand_entry * > * ops)6358 attempt_builtin_copysign (vec<operand_entry *> *ops)
6359 {
6360 operand_entry *oe;
6361 unsigned int i;
6362 unsigned int length = ops->length ();
6363 tree cst = ops->last ()->op;
6364
6365 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6366 return;
6367
6368 FOR_EACH_VEC_ELT (*ops, i, oe)
6369 {
6370 if (TREE_CODE (oe->op) == SSA_NAME
6371 && has_single_use (oe->op))
6372 {
6373 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6374 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6375 {
6376 tree arg0, arg1;
6377 switch (gimple_call_combined_fn (old_call))
6378 {
6379 CASE_CFN_COPYSIGN:
6380 CASE_CFN_COPYSIGN_FN:
6381 arg0 = gimple_call_arg (old_call, 0);
6382 arg1 = gimple_call_arg (old_call, 1);
6383 /* The first argument of copysign must be a constant,
6384 otherwise there's nothing to do. */
6385 if (TREE_CODE (arg0) == REAL_CST)
6386 {
6387 tree type = TREE_TYPE (arg0);
6388 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6389 /* If we couldn't fold to a single constant, skip it.
6390 That happens e.g. for inexact multiplication when
6391 -frounding-math. */
6392 if (mul == NULL_TREE)
6393 break;
6394 /* Instead of adjusting OLD_CALL, let's build a new
6395 call to not leak the LHS and prevent keeping bogus
6396 debug statements. DCE will clean up the old call. */
6397 gcall *new_call;
6398 if (gimple_call_internal_p (old_call))
6399 new_call = gimple_build_call_internal
6400 (IFN_COPYSIGN, 2, mul, arg1);
6401 else
6402 new_call = gimple_build_call
6403 (gimple_call_fndecl (old_call), 2, mul, arg1);
6404 tree lhs = make_ssa_name (type);
6405 gimple_call_set_lhs (new_call, lhs);
6406 gimple_set_location (new_call,
6407 gimple_location (old_call));
6408 insert_stmt_after (new_call, old_call);
6409 /* We've used the constant, get rid of it. */
6410 ops->pop ();
6411 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6412 /* Handle the CST1 < 0 case by negating the result. */
6413 if (cst1_neg)
6414 {
6415 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6416 gimple *negate_stmt
6417 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6418 insert_stmt_after (negate_stmt, new_call);
6419 oe->op = negrhs;
6420 }
6421 else
6422 oe->op = lhs;
6423 if (dump_file && (dump_flags & TDF_DETAILS))
6424 {
6425 fprintf (dump_file, "Optimizing copysign: ");
6426 print_generic_expr (dump_file, cst);
6427 fprintf (dump_file, " * COPYSIGN (");
6428 print_generic_expr (dump_file, arg0);
6429 fprintf (dump_file, ", ");
6430 print_generic_expr (dump_file, arg1);
6431 fprintf (dump_file, ") into %sCOPYSIGN (",
6432 cst1_neg ? "-" : "");
6433 print_generic_expr (dump_file, mul);
6434 fprintf (dump_file, ", ");
6435 print_generic_expr (dump_file, arg1);
6436 fprintf (dump_file, "\n");
6437 }
6438 return;
6439 }
6440 break;
6441 default:
6442 break;
6443 }
6444 }
6445 }
6446 }
6447 }
6448
6449 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6450
6451 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple * stmt,tree new_rhs)6452 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6453 {
6454 tree rhs1;
6455
6456 if (dump_file && (dump_flags & TDF_DETAILS))
6457 {
6458 fprintf (dump_file, "Transforming ");
6459 print_gimple_stmt (dump_file, stmt, 0);
6460 }
6461
6462 rhs1 = gimple_assign_rhs1 (stmt);
6463 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6464 update_stmt (stmt);
6465 remove_visited_stmt_chain (rhs1);
6466
6467 if (dump_file && (dump_flags & TDF_DETAILS))
6468 {
6469 fprintf (dump_file, " into ");
6470 print_gimple_stmt (dump_file, stmt, 0);
6471 }
6472 }
6473
6474 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6475
6476 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple * stmt,tree rhs1,tree rhs2)6477 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6478 tree rhs1, tree rhs2)
6479 {
6480 if (dump_file && (dump_flags & TDF_DETAILS))
6481 {
6482 fprintf (dump_file, "Transforming ");
6483 print_gimple_stmt (dump_file, stmt, 0);
6484 }
6485
6486 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6487 update_stmt (gsi_stmt (*gsi));
6488 remove_visited_stmt_chain (rhs1);
6489
6490 if (dump_file && (dump_flags & TDF_DETAILS))
6491 {
6492 fprintf (dump_file, " into ");
6493 print_gimple_stmt (dump_file, stmt, 0);
6494 }
6495 }
6496
6497 /* Reassociate expressions in basic block BB and its post-dominator as
6498 children.
6499
6500 Bubble up return status from maybe_optimize_range_tests. */
6501
6502 static bool
reassociate_bb(basic_block bb)6503 reassociate_bb (basic_block bb)
6504 {
6505 gimple_stmt_iterator gsi;
6506 basic_block son;
6507 gimple *stmt = last_stmt (bb);
6508 bool cfg_cleanup_needed = false;
6509
6510 if (stmt && !gimple_visited_p (stmt))
6511 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6512
6513 bool do_prev = false;
6514 for (gsi = gsi_last_bb (bb);
6515 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6516 {
6517 do_prev = true;
6518 stmt = gsi_stmt (gsi);
6519
6520 if (is_gimple_assign (stmt)
6521 && !stmt_could_throw_p (cfun, stmt))
6522 {
6523 tree lhs, rhs1, rhs2;
6524 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6525
6526 /* If this was part of an already processed statement,
6527 we don't need to touch it again. */
6528 if (gimple_visited_p (stmt))
6529 {
6530 /* This statement might have become dead because of previous
6531 reassociations. */
6532 if (has_zero_uses (gimple_get_lhs (stmt)))
6533 {
6534 reassoc_remove_stmt (&gsi);
6535 release_defs (stmt);
6536 /* We might end up removing the last stmt above which
6537 places the iterator to the end of the sequence.
6538 Reset it to the last stmt in this case and make sure
6539 we don't do gsi_prev in that case. */
6540 if (gsi_end_p (gsi))
6541 {
6542 gsi = gsi_last_bb (bb);
6543 do_prev = false;
6544 }
6545 }
6546 continue;
6547 }
6548
6549 /* If this is not a gimple binary expression, there is
6550 nothing for us to do with it. */
6551 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6552 continue;
6553
6554 lhs = gimple_assign_lhs (stmt);
6555 rhs1 = gimple_assign_rhs1 (stmt);
6556 rhs2 = gimple_assign_rhs2 (stmt);
6557
6558 /* For non-bit or min/max operations we can't associate
6559 all types. Verify that here. */
6560 if ((rhs_code != BIT_IOR_EXPR
6561 && rhs_code != BIT_AND_EXPR
6562 && rhs_code != BIT_XOR_EXPR
6563 && rhs_code != MIN_EXPR
6564 && rhs_code != MAX_EXPR
6565 && !can_reassociate_type_p (TREE_TYPE (lhs)))
6566 || !can_reassociate_op_p (rhs1)
6567 || !can_reassociate_op_p (rhs2))
6568 continue;
6569
6570 if (associative_tree_code (rhs_code))
6571 {
6572 auto_vec<operand_entry *> ops;
6573 tree powi_result = NULL_TREE;
6574 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6575
6576 /* There may be no immediate uses left by the time we
6577 get here because we may have eliminated them all. */
6578 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6579 continue;
6580
6581 gimple_set_visited (stmt, true);
6582 linearize_expr_tree (&ops, stmt, true, true);
6583 ops.qsort (sort_by_operand_rank);
6584 int orig_len = ops.length ();
6585 optimize_ops_list (rhs_code, &ops);
6586 if (undistribute_ops_list (rhs_code, &ops,
6587 loop_containing_stmt (stmt)))
6588 {
6589 ops.qsort (sort_by_operand_rank);
6590 optimize_ops_list (rhs_code, &ops);
6591 }
6592 if (undistribute_bitref_for_vector (rhs_code, &ops,
6593 loop_containing_stmt (stmt)))
6594 {
6595 ops.qsort (sort_by_operand_rank);
6596 optimize_ops_list (rhs_code, &ops);
6597 }
6598 if (rhs_code == PLUS_EXPR
6599 && transform_add_to_multiply (&ops))
6600 ops.qsort (sort_by_operand_rank);
6601
6602 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6603 {
6604 if (is_vector)
6605 optimize_vec_cond_expr (rhs_code, &ops);
6606 else
6607 optimize_range_tests (rhs_code, &ops, NULL);
6608 }
6609
6610 if (rhs_code == MULT_EXPR && !is_vector)
6611 {
6612 attempt_builtin_copysign (&ops);
6613
6614 if (reassoc_insert_powi_p
6615 && (flag_unsafe_math_optimizations
6616 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6617 powi_result = attempt_builtin_powi (stmt, &ops);
6618 }
6619
6620 operand_entry *last;
6621 bool negate_result = false;
6622 if (ops.length () > 1
6623 && rhs_code == MULT_EXPR)
6624 {
6625 last = ops.last ();
6626 if ((integer_minus_onep (last->op)
6627 || real_minus_onep (last->op))
6628 && !HONOR_SNANS (TREE_TYPE (lhs))
6629 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6630 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6631 {
6632 ops.pop ();
6633 negate_result = true;
6634 }
6635 }
6636
6637 tree new_lhs = lhs;
6638 /* If the operand vector is now empty, all operands were
6639 consumed by the __builtin_powi optimization. */
6640 if (ops.length () == 0)
6641 transform_stmt_to_copy (&gsi, stmt, powi_result);
6642 else if (ops.length () == 1)
6643 {
6644 tree last_op = ops.last ()->op;
6645
6646 /* If the stmt that defines operand has to be inserted, insert it
6647 before the use. */
6648 if (ops.last ()->stmt_to_insert)
6649 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6650 if (powi_result)
6651 transform_stmt_to_multiply (&gsi, stmt, last_op,
6652 powi_result);
6653 else
6654 transform_stmt_to_copy (&gsi, stmt, last_op);
6655 }
6656 else
6657 {
6658 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6659 int ops_num = ops.length ();
6660 int width;
6661
6662 /* For binary bit operations, if there are at least 3
6663 operands and the last operand in OPS is a constant,
6664 move it to the front. This helps ensure that we generate
6665 (X & Y) & C rather than (X & C) & Y. The former will
6666 often match a canonical bit test when we get to RTL. */
6667 if (ops.length () > 2
6668 && (rhs_code == BIT_AND_EXPR
6669 || rhs_code == BIT_IOR_EXPR
6670 || rhs_code == BIT_XOR_EXPR)
6671 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6672 std::swap (*ops[0], *ops[ops_num - 1]);
6673
6674 /* Only rewrite the expression tree to parallel in the
6675 last reassoc pass to avoid useless work back-and-forth
6676 with initial linearization. */
6677 if (!reassoc_insert_powi_p
6678 && ops.length () > 3
6679 && (width = get_reassociation_width (ops_num, rhs_code,
6680 mode)) > 1)
6681 {
6682 if (dump_file && (dump_flags & TDF_DETAILS))
6683 fprintf (dump_file,
6684 "Width = %d was chosen for reassociation\n",
6685 width);
6686 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6687 width, ops);
6688 }
6689 else
6690 {
6691 /* When there are three operands left, we want
6692 to make sure the ones that get the double
6693 binary op are chosen wisely. */
6694 int len = ops.length ();
6695 if (len >= 3)
6696 swap_ops_for_binary_stmt (ops, len - 3, stmt);
6697
6698 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6699 powi_result != NULL
6700 || negate_result,
6701 len != orig_len);
6702 }
6703
6704 /* If we combined some repeated factors into a
6705 __builtin_powi call, multiply that result by the
6706 reassociated operands. */
6707 if (powi_result)
6708 {
6709 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6710 tree type = TREE_TYPE (lhs);
6711 tree target_ssa = make_temp_ssa_name (type, NULL,
6712 "reassocpow");
6713 gimple_set_lhs (lhs_stmt, target_ssa);
6714 update_stmt (lhs_stmt);
6715 if (lhs != new_lhs)
6716 {
6717 target_ssa = new_lhs;
6718 new_lhs = lhs;
6719 }
6720 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6721 powi_result, target_ssa);
6722 gimple_set_location (mul_stmt, gimple_location (stmt));
6723 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6724 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6725 }
6726 }
6727
6728 if (negate_result)
6729 {
6730 stmt = SSA_NAME_DEF_STMT (lhs);
6731 tree tmp = make_ssa_name (TREE_TYPE (lhs));
6732 gimple_set_lhs (stmt, tmp);
6733 if (lhs != new_lhs)
6734 tmp = new_lhs;
6735 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6736 tmp);
6737 gimple_set_uid (neg_stmt, gimple_uid (stmt));
6738 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6739 update_stmt (stmt);
6740 }
6741 }
6742 }
6743 }
6744 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6745 son;
6746 son = next_dom_son (CDI_POST_DOMINATORS, son))
6747 cfg_cleanup_needed |= reassociate_bb (son);
6748
6749 return cfg_cleanup_needed;
6750 }
6751
6752 /* Add jumps around shifts for range tests turned into bit tests.
6753 For each SSA_NAME VAR we have code like:
6754 VAR = ...; // final stmt of range comparison
6755 // bit test here...;
6756 OTHERVAR = ...; // final stmt of the bit test sequence
6757 RES = VAR | OTHERVAR;
6758 Turn the above into:
6759 VAR = ...;
6760 if (VAR != 0)
6761 goto <l3>;
6762 else
6763 goto <l2>;
6764 <l2>:
6765 // bit test here...;
6766 OTHERVAR = ...;
6767 <l3>:
6768 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6769
6770 static void
branch_fixup(void)6771 branch_fixup (void)
6772 {
6773 tree var;
6774 unsigned int i;
6775
6776 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6777 {
6778 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6779 gimple *use_stmt;
6780 use_operand_p use;
6781 bool ok = single_imm_use (var, &use, &use_stmt);
6782 gcc_assert (ok
6783 && is_gimple_assign (use_stmt)
6784 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6785 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6786
6787 basic_block cond_bb = gimple_bb (def_stmt);
6788 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6789 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6790
6791 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6792 gimple *g = gimple_build_cond (NE_EXPR, var,
6793 build_zero_cst (TREE_TYPE (var)),
6794 NULL_TREE, NULL_TREE);
6795 location_t loc = gimple_location (use_stmt);
6796 gimple_set_location (g, loc);
6797 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6798
6799 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6800 etrue->probability = profile_probability::even ();
6801 edge efalse = find_edge (cond_bb, then_bb);
6802 efalse->flags = EDGE_FALSE_VALUE;
6803 efalse->probability -= etrue->probability;
6804 then_bb->count -= etrue->count ();
6805
6806 tree othervar = NULL_TREE;
6807 if (gimple_assign_rhs1 (use_stmt) == var)
6808 othervar = gimple_assign_rhs2 (use_stmt);
6809 else if (gimple_assign_rhs2 (use_stmt) == var)
6810 othervar = gimple_assign_rhs1 (use_stmt);
6811 else
6812 gcc_unreachable ();
6813 tree lhs = gimple_assign_lhs (use_stmt);
6814 gphi *phi = create_phi_node (lhs, merge_bb);
6815 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6816 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6817 gsi = gsi_for_stmt (use_stmt);
6818 gsi_remove (&gsi, true);
6819
6820 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6821 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6822 }
6823 reassoc_branch_fixups.release ();
6824 }
6825
6826 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6827 void debug_ops_vector (vec<operand_entry *> ops);
6828
6829 /* Dump the operand entry vector OPS to FILE. */
6830
6831 void
dump_ops_vector(FILE * file,vec<operand_entry * > ops)6832 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6833 {
6834 operand_entry *oe;
6835 unsigned int i;
6836
6837 FOR_EACH_VEC_ELT (ops, i, oe)
6838 {
6839 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6840 print_generic_expr (file, oe->op);
6841 fprintf (file, "\n");
6842 }
6843 }
6844
6845 /* Dump the operand entry vector OPS to STDERR. */
6846
6847 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry * > ops)6848 debug_ops_vector (vec<operand_entry *> ops)
6849 {
6850 dump_ops_vector (stderr, ops);
6851 }
6852
6853 /* Bubble up return status from reassociate_bb. */
6854
6855 static bool
do_reassoc(void)6856 do_reassoc (void)
6857 {
6858 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6859 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6860 }
6861
6862 /* Initialize the reassociation pass. */
6863
6864 static void
init_reassoc(void)6865 init_reassoc (void)
6866 {
6867 int i;
6868 int64_t rank = 2;
6869 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6870
6871 /* Find the loops, so that we can prevent moving calculations in
6872 them. */
6873 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6874
6875 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6876
6877 next_operand_entry_id = 0;
6878
6879 /* Reverse RPO (Reverse Post Order) will give us something where
6880 deeper loops come later. */
6881 pre_and_rev_post_order_compute (NULL, bbs, false);
6882 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
6883 operand_rank = new hash_map<tree, int64_t>;
6884
6885 /* Give each default definition a distinct rank. This includes
6886 parameters and the static chain. Walk backwards over all
6887 SSA names so that we get proper rank ordering according
6888 to tree_swap_operands_p. */
6889 for (i = num_ssa_names - 1; i > 0; --i)
6890 {
6891 tree name = ssa_name (i);
6892 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6893 insert_operand_rank (name, ++rank);
6894 }
6895
6896 /* Set up rank for each BB */
6897 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6898 bb_rank[bbs[i]] = ++rank << 16;
6899
6900 free (bbs);
6901 calculate_dominance_info (CDI_POST_DOMINATORS);
6902 plus_negates = vNULL;
6903 }
6904
6905 /* Cleanup after the reassociation pass, and print stats if
6906 requested. */
6907
6908 static void
fini_reassoc(void)6909 fini_reassoc (void)
6910 {
6911 statistics_counter_event (cfun, "Linearized",
6912 reassociate_stats.linearized);
6913 statistics_counter_event (cfun, "Constants eliminated",
6914 reassociate_stats.constants_eliminated);
6915 statistics_counter_event (cfun, "Ops eliminated",
6916 reassociate_stats.ops_eliminated);
6917 statistics_counter_event (cfun, "Statements rewritten",
6918 reassociate_stats.rewritten);
6919 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6920 reassociate_stats.pows_encountered);
6921 statistics_counter_event (cfun, "Built-in powi calls created",
6922 reassociate_stats.pows_created);
6923
6924 delete operand_rank;
6925 operand_entry_pool.release ();
6926 free (bb_rank);
6927 plus_negates.release ();
6928 free_dominance_info (CDI_POST_DOMINATORS);
6929 loop_optimizer_finalize ();
6930 }
6931
6932 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6933 insertion of __builtin_powi calls.
6934
6935 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6936 optimization of a gimple conditional. Otherwise returns zero. */
6937
6938 static unsigned int
execute_reassoc(bool insert_powi_p)6939 execute_reassoc (bool insert_powi_p)
6940 {
6941 reassoc_insert_powi_p = insert_powi_p;
6942
6943 init_reassoc ();
6944
6945 bool cfg_cleanup_needed;
6946 cfg_cleanup_needed = do_reassoc ();
6947 repropagate_negates ();
6948 branch_fixup ();
6949
6950 fini_reassoc ();
6951 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6952 }
6953
6954 namespace {
6955
6956 const pass_data pass_data_reassoc =
6957 {
6958 GIMPLE_PASS, /* type */
6959 "reassoc", /* name */
6960 OPTGROUP_NONE, /* optinfo_flags */
6961 TV_TREE_REASSOC, /* tv_id */
6962 ( PROP_cfg | PROP_ssa ), /* properties_required */
6963 0, /* properties_provided */
6964 0, /* properties_destroyed */
6965 0, /* todo_flags_start */
6966 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6967 };
6968
6969 class pass_reassoc : public gimple_opt_pass
6970 {
6971 public:
pass_reassoc(gcc::context * ctxt)6972 pass_reassoc (gcc::context *ctxt)
6973 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6974 {}
6975
6976 /* opt_pass methods: */
clone()6977 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
set_pass_param(unsigned int n,bool param)6978 void set_pass_param (unsigned int n, bool param)
6979 {
6980 gcc_assert (n == 0);
6981 insert_powi_p = param;
6982 }
gate(function *)6983 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
execute(function *)6984 virtual unsigned int execute (function *)
6985 { return execute_reassoc (insert_powi_p); }
6986
6987 private:
6988 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6989 point 3a in the pass header comment. */
6990 bool insert_powi_p;
6991 }; // class pass_reassoc
6992
6993 } // anon namespace
6994
6995 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)6996 make_pass_reassoc (gcc::context *ctxt)
6997 {
6998 return new pass_reassoc (ctxt);
6999 }
7000