1 /* Reassociation for trees.
2    Copyright (C) 2005-2021 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dan@dberlin.org>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 
57 /*  This is a simple global reassociation pass.  It is, in part, based
58     on the LLVM pass of the same name (They do some things more/less
59     than we do, in different orders, etc).
60 
61     It consists of five steps:
62 
63     1. Breaking up subtract operations into addition + negate, where
64     it would promote the reassociation of adds.
65 
66     2. Left linearization of the expression trees, so that (A+B)+(C+D)
67     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
68     During linearization, we place the operands of the binary
69     expressions into a vector of operand_entry_*
70 
71     3. Optimization of the operand lists, eliminating things like a +
72     -a, a & a, etc.
73 
74     3a. Combine repeated factors with the same occurrence counts
75     into a __builtin_powi call that will later be optimized into
76     an optimal number of multiplies.
77 
78     4. Rewrite the expression trees we linearized and optimized so
79     they are in proper rank order.
80 
81     5. Repropagate negates, as nothing else will clean it up ATM.
82 
83     A bit of theory on #4, since nobody seems to write anything down
84     about why it makes sense to do it the way they do it:
85 
86     We could do this much nicer theoretically, but don't (for reasons
87     explained after how to do it theoretically nice :P).
88 
89     In order to promote the most redundancy elimination, you want
90     binary expressions whose operands are the same rank (or
91     preferably, the same value) exposed to the redundancy eliminator,
92     for possible elimination.
93 
94     So the way to do this if we really cared, is to build the new op
95     tree from the leaves to the roots, merging as you go, and putting the
96     new op on the end of the worklist, until you are left with one
97     thing on the worklist.
98 
99     IE if you have to rewrite the following set of operands (listed with
100     rank in parentheses), with opcode PLUS_EXPR:
101 
102     a (1),  b (1),  c (1),  d (2), e (2)
103 
104 
105     We start with our merge worklist empty, and the ops list with all of
106     those on it.
107 
108     You want to first merge all leaves of the same rank, as much as
109     possible.
110 
111     So first build a binary op of
112 
113     mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 
115     Because there is no three operand form of PLUS_EXPR, c is not going to
116     be exposed to redundancy elimination as a rank 1 operand.
117 
118     So you might as well throw it on the merge worklist (you could also
119     consider it to now be a rank two operand, and merge it with d and e,
120     but in this case, you then have evicted e from a binary op. So at
121     least in this situation, you can't win.)
122 
123     Then build a binary op of d + e
124     mergetmp2 = d + e
125 
126     and put mergetmp2 on the merge worklist.
127 
128     so merge worklist = {mergetmp, c, mergetmp2}
129 
130     Continue building binary ops of these operations until you have only
131     one operation left on the worklist.
132 
133     So we have
134 
135     build binary op
136     mergetmp3 = mergetmp + c
137 
138     worklist = {mergetmp2, mergetmp3}
139 
140     mergetmp4 = mergetmp2 + mergetmp3
141 
142     worklist = {mergetmp4}
143 
144     because we have one operation left, we can now just set the original
145     statement equal to the result of that operation.
146 
147     This will at least expose a + b  and d + e to redundancy elimination
148     as binary operations.
149 
150     For extra points, you can reuse the old statements to build the
151     mergetmps, since you shouldn't run out.
152 
153     So why don't we do this?
154 
155     Because it's expensive, and rarely will help.  Most trees we are
156     reassociating have 3 or less ops.  If they have 2 ops, they already
157     will be written into a nice single binary op.  If you have 3 ops, a
158     single simple check suffices to tell you whether the first two are of the
159     same rank.  If so, you know to order it
160 
161     mergetmp = op1 + op2
162     newstmt = mergetmp + op3
163 
164     instead of
165     mergetmp = op2 + op3
166     newstmt = mergetmp + op1
167 
168     If all three are of the same rank, you can't expose them all in a
169     single binary operator anyway, so the above is *still* the best you
170     can do.
171 
172     Thus, this is what we do.  When we have three ops left, we check to see
173     what order to put them in, and call it a day.  As a nod to vector sum
174     reduction, we check if any of the ops are really a phi node that is a
175     destructive update for the associating op, and keep the destructive
176     update together for vector sum reduction recognition.  */
177 
178 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
179    point 3a in the pass header comment.  */
180 static bool reassoc_insert_powi_p;
181 
182 /* Statistics */
183 static struct
184 {
185   int linearized;
186   int constants_eliminated;
187   int ops_eliminated;
188   int rewritten;
189   int pows_encountered;
190   int pows_created;
191 } reassociate_stats;
192 
193 
194 static object_allocator<operand_entry> operand_entry_pool
195   ("operand entry pool");
196 
197 /* This is used to assign a unique ID to each struct operand_entry
198    so that qsort results are identical on different hosts.  */
199 static unsigned int next_operand_entry_id;
200 
201 /* Starting rank number for a given basic block, so that we can rank
202    operations using unmovable instructions in that BB based on the bb
203    depth.  */
204 static int64_t *bb_rank;
205 
206 /* Operand->rank hashtable.  */
207 static hash_map<tree, int64_t> *operand_rank;
208 
209 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
210    all basic blocks the CFG should be adjusted - basic blocks
211    split right after that SSA_NAME's definition statement and before
212    the only use, which must be a bit ior.  */
213 static vec<tree> reassoc_branch_fixups;
214 
215 /* Forward decls.  */
216 static int64_t get_rank (tree);
217 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
218 
219 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
220    possibly added by gsi_remove.  */
221 
222 static bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)223 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
224 {
225   gimple *stmt = gsi_stmt (*gsi);
226 
227   if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
228     return gsi_remove (gsi, true);
229 
230   gimple_stmt_iterator prev = *gsi;
231   gsi_prev (&prev);
232   unsigned uid = gimple_uid (stmt);
233   basic_block bb = gimple_bb (stmt);
234   bool ret = gsi_remove (gsi, true);
235   if (!gsi_end_p (prev))
236     gsi_next (&prev);
237   else
238     prev = gsi_start_bb (bb);
239   gimple *end_stmt = gsi_stmt (*gsi);
240   while ((stmt = gsi_stmt (prev)) != end_stmt)
241     {
242       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
243       gimple_set_uid (stmt, uid);
244       gsi_next (&prev);
245     }
246   return ret;
247 }
248 
249 /* Bias amount for loop-carried phis.  We want this to be larger than
250    the depth of any reassociation tree we can see, but not larger than
251    the rank difference between two blocks.  */
252 #define PHI_LOOP_BIAS (1 << 15)
253 
254 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
255    an innermost loop, and the phi has only a single use which is inside
256    the loop, then the rank is the block rank of the loop latch plus an
257    extra bias for the loop-carried dependence.  This causes expressions
258    calculated into an accumulator variable to be independent for each
259    iteration of the loop.  If STMT is some other phi, the rank is the
260    block rank of its containing block.  */
261 static int64_t
phi_rank(gimple * stmt)262 phi_rank (gimple *stmt)
263 {
264   basic_block bb = gimple_bb (stmt);
265   class loop *father = bb->loop_father;
266   tree res;
267   unsigned i;
268   use_operand_p use;
269   gimple *use_stmt;
270 
271   /* We only care about real loops (those with a latch).  */
272   if (!father->latch)
273     return bb_rank[bb->index];
274 
275   /* Interesting phis must be in headers of innermost loops.  */
276   if (bb != father->header
277       || father->inner)
278     return bb_rank[bb->index];
279 
280   /* Ignore virtual SSA_NAMEs.  */
281   res = gimple_phi_result (stmt);
282   if (virtual_operand_p (res))
283     return bb_rank[bb->index];
284 
285   /* The phi definition must have a single use, and that use must be
286      within the loop.  Otherwise this isn't an accumulator pattern.  */
287   if (!single_imm_use (res, &use, &use_stmt)
288       || gimple_bb (use_stmt)->loop_father != father)
289     return bb_rank[bb->index];
290 
291   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
292   for (i = 0; i < gimple_phi_num_args (stmt); i++)
293     {
294       tree arg = gimple_phi_arg_def (stmt, i);
295       if (TREE_CODE (arg) == SSA_NAME
296 	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
297 	{
298 	  gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
299 	  if (gimple_bb (def_stmt)->loop_father == father)
300 	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
301 	}
302     }
303 
304   /* Must be an uninteresting phi.  */
305   return bb_rank[bb->index];
306 }
307 
308 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
309    loop-carried dependence of an innermost loop, return TRUE; else
310    return FALSE.  */
311 static bool
loop_carried_phi(tree exp)312 loop_carried_phi (tree exp)
313 {
314   gimple *phi_stmt;
315   int64_t block_rank;
316 
317   if (TREE_CODE (exp) != SSA_NAME
318       || SSA_NAME_IS_DEFAULT_DEF (exp))
319     return false;
320 
321   phi_stmt = SSA_NAME_DEF_STMT (exp);
322 
323   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
324     return false;
325 
326   /* Non-loop-carried phis have block rank.  Loop-carried phis have
327      an additional bias added in.  If this phi doesn't have block rank,
328      it's biased and should not be propagated.  */
329   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
330 
331   if (phi_rank (phi_stmt) != block_rank)
332     return true;
333 
334   return false;
335 }
336 
337 /* Return the maximum of RANK and the rank that should be propagated
338    from expression OP.  For most operands, this is just the rank of OP.
339    For loop-carried phis, the value is zero to avoid undoing the bias
340    in favor of the phi.  */
341 static int64_t
propagate_rank(int64_t rank,tree op)342 propagate_rank (int64_t rank, tree op)
343 {
344   int64_t op_rank;
345 
346   if (loop_carried_phi (op))
347     return rank;
348 
349   op_rank = get_rank (op);
350 
351   return MAX (rank, op_rank);
352 }
353 
354 /* Look up the operand rank structure for expression E.  */
355 
356 static inline int64_t
find_operand_rank(tree e)357 find_operand_rank (tree e)
358 {
359   int64_t *slot = operand_rank->get (e);
360   return slot ? *slot : -1;
361 }
362 
363 /* Insert {E,RANK} into the operand rank hashtable.  */
364 
365 static inline void
insert_operand_rank(tree e,int64_t rank)366 insert_operand_rank (tree e, int64_t rank)
367 {
368   gcc_assert (rank > 0);
369   gcc_assert (!operand_rank->put (e, rank));
370 }
371 
372 /* Given an expression E, return the rank of the expression.  */
373 
374 static int64_t
get_rank(tree e)375 get_rank (tree e)
376 {
377   /* SSA_NAME's have the rank of the expression they are the result
378      of.
379      For globals and uninitialized values, the rank is 0.
380      For function arguments, use the pre-setup rank.
381      For PHI nodes, stores, asm statements, etc, we use the rank of
382      the BB.
383      For simple operations, the rank is the maximum rank of any of
384      its operands, or the bb_rank, whichever is less.
385      I make no claims that this is optimal, however, it gives good
386      results.  */
387 
388   /* We make an exception to the normal ranking system to break
389      dependences of accumulator variables in loops.  Suppose we
390      have a simple one-block loop containing:
391 
392        x_1 = phi(x_0, x_2)
393        b = a + x_1
394        c = b + d
395        x_2 = c + e
396 
397      As shown, each iteration of the calculation into x is fully
398      dependent upon the iteration before it.  We would prefer to
399      see this in the form:
400 
401        x_1 = phi(x_0, x_2)
402        b = a + d
403        c = b + e
404        x_2 = c + x_1
405 
406      If the loop is unrolled, the calculations of b and c from
407      different iterations can be interleaved.
408 
409      To obtain this result during reassociation, we bias the rank
410      of the phi definition x_1 upward, when it is recognized as an
411      accumulator pattern.  The artificial rank causes it to be
412      added last, providing the desired independence.  */
413 
414   if (TREE_CODE (e) == SSA_NAME)
415     {
416       ssa_op_iter iter;
417       gimple *stmt;
418       int64_t rank;
419       tree op;
420 
421       /* If we already have a rank for this expression, use that.  */
422       rank = find_operand_rank (e);
423       if (rank != -1)
424 	return rank;
425 
426       stmt = SSA_NAME_DEF_STMT (e);
427       if (gimple_code (stmt) == GIMPLE_PHI)
428 	rank = phi_rank (stmt);
429 
430       else if (!is_gimple_assign (stmt))
431 	rank = bb_rank[gimple_bb (stmt)->index];
432 
433       else
434 	{
435 	  /* Otherwise, find the maximum rank for the operands.  As an
436 	     exception, remove the bias from loop-carried phis when propagating
437 	     the rank so that dependent operations are not also biased.  */
438 	  /* Simply walk over all SSA uses - this takes advatage of the
439 	     fact that non-SSA operands are is_gimple_min_invariant and
440 	     thus have rank 0.  */
441 	  rank = 0;
442 	  FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
443 	    rank = propagate_rank (rank, op);
444 
445 	  rank += 1;
446 	}
447 
448       if (dump_file && (dump_flags & TDF_DETAILS))
449 	{
450 	  fprintf (dump_file, "Rank for ");
451 	  print_generic_expr (dump_file, e);
452 	  fprintf (dump_file, " is %" PRId64 "\n", rank);
453 	}
454 
455       /* Note the rank in the hashtable so we don't recompute it.  */
456       insert_operand_rank (e, rank);
457       return rank;
458     }
459 
460   /* Constants, globals, etc., are rank 0 */
461   return 0;
462 }
463 
464 
465 /* We want integer ones to end up last no matter what, since they are
466    the ones we can do the most with.  */
467 #define INTEGER_CONST_TYPE 1 << 4
468 #define FLOAT_ONE_CONST_TYPE 1 << 3
469 #define FLOAT_CONST_TYPE 1 << 2
470 #define OTHER_CONST_TYPE 1 << 1
471 
472 /* Classify an invariant tree into integer, float, or other, so that
473    we can sort them to be near other constants of the same type.  */
474 static inline int
constant_type(tree t)475 constant_type (tree t)
476 {
477   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
478     return INTEGER_CONST_TYPE;
479   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
480     {
481       /* Sort -1.0 and 1.0 constants last, while in some cases
482 	 const_binop can't optimize some inexact operations, multiplication
483 	 by -1.0 or 1.0 can be always merged with others.  */
484       if (real_onep (t) || real_minus_onep (t))
485 	return FLOAT_ONE_CONST_TYPE;
486       return FLOAT_CONST_TYPE;
487     }
488   else
489     return OTHER_CONST_TYPE;
490 }
491 
492 /* qsort comparison function to sort operand entries PA and PB by rank
493    so that the sorted array is ordered by rank in decreasing order.  */
494 static int
sort_by_operand_rank(const void * pa,const void * pb)495 sort_by_operand_rank (const void *pa, const void *pb)
496 {
497   const operand_entry *oea = *(const operand_entry *const *)pa;
498   const operand_entry *oeb = *(const operand_entry *const *)pb;
499 
500   if (oeb->rank != oea->rank)
501     return oeb->rank > oea->rank ? 1 : -1;
502 
503   /* It's nicer for optimize_expression if constants that are likely
504      to fold when added/multiplied/whatever are put next to each
505      other.  Since all constants have rank 0, order them by type.  */
506   if (oea->rank == 0)
507     {
508       if (constant_type (oeb->op) != constant_type (oea->op))
509 	return constant_type (oea->op) - constant_type (oeb->op);
510       else
511 	/* To make sorting result stable, we use unique IDs to determine
512 	   order.  */
513 	return oeb->id > oea->id ? 1 : -1;
514     }
515 
516   if (TREE_CODE (oea->op) != SSA_NAME)
517     {
518       if (TREE_CODE (oeb->op) != SSA_NAME)
519 	return oeb->id > oea->id ? 1 : -1;
520       else
521 	return 1;
522     }
523   else if (TREE_CODE (oeb->op) != SSA_NAME)
524     return -1;
525 
526   /* Lastly, make sure the versions that are the same go next to each
527      other.  */
528   if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
529     {
530       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
531 	 versions of removed SSA_NAMEs, so if possible, prefer to sort
532 	 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
533 	 See PR60418.  */
534       gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
535       gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
536       basic_block bba = gimple_bb (stmta);
537       basic_block bbb = gimple_bb (stmtb);
538       if (bbb != bba)
539 	{
540 	  /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
541 	     but the other might not.  */
542 	  if (!bba)
543 	    return 1;
544 	  if (!bbb)
545 	    return -1;
546 	  /* If neither is, compare bb_rank.  */
547 	  if (bb_rank[bbb->index] != bb_rank[bba->index])
548 	    return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
549 	}
550 
551       bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
552       bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
553       if (da != db)
554 	return da ? 1 : -1;
555 
556       return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
557     }
558 
559   return oeb->id > oea->id ? 1 : -1;
560 }
561 
562 /* Add an operand entry to *OPS for the tree operand OP.  */
563 
564 static void
565 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
566 {
567   operand_entry *oe = operand_entry_pool.allocate ();
568 
569   oe->op = op;
570   oe->rank = get_rank (op);
571   oe->id = next_operand_entry_id++;
572   oe->count = 1;
573   oe->stmt_to_insert = stmt_to_insert;
574   ops->safe_push (oe);
575 }
576 
577 /* Add an operand entry to *OPS for the tree operand OP with repeat
578    count REPEAT.  */
579 
580 static void
add_repeat_to_ops_vec(vec<operand_entry * > * ops,tree op,HOST_WIDE_INT repeat)581 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
582 		       HOST_WIDE_INT repeat)
583 {
584   operand_entry *oe = operand_entry_pool.allocate ();
585 
586   oe->op = op;
587   oe->rank = get_rank (op);
588   oe->id = next_operand_entry_id++;
589   oe->count = repeat;
590   oe->stmt_to_insert = NULL;
591   ops->safe_push (oe);
592 
593   reassociate_stats.pows_encountered++;
594 }
595 
596 /* Returns true if we can associate the SSA def OP.  */
597 
598 static bool
can_reassociate_op_p(tree op)599 can_reassociate_op_p (tree op)
600 {
601   if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
602     return false;
603   /* Make sure asm goto outputs do not participate in reassociation since
604      we have no way to find an insertion place after asm goto.  */
605   if (TREE_CODE (op) == SSA_NAME
606       && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
607       && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
608     return false;
609   return true;
610 }
611 
612 /* Returns true if we can reassociate operations of TYPE.
613    That is for integral or non-saturating fixed-point types, and for
614    floating point type when associative-math is enabled.  */
615 
616 static bool
can_reassociate_type_p(tree type)617 can_reassociate_type_p (tree type)
618 {
619   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
620       || NON_SAT_FIXED_POINT_TYPE_P (type)
621       || (flag_associative_math && FLOAT_TYPE_P (type)))
622     return true;
623   return false;
624 }
625 
626 /* Return true if STMT is reassociable operation containing a binary
627    operation with tree code CODE, and is inside LOOP.  */
628 
629 static bool
is_reassociable_op(gimple * stmt,enum tree_code code,class loop * loop)630 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
631 {
632   basic_block bb = gimple_bb (stmt);
633 
634   if (gimple_bb (stmt) == NULL)
635     return false;
636 
637   if (!flow_bb_inside_loop_p (loop, bb))
638     return false;
639 
640   if (is_gimple_assign (stmt)
641       && gimple_assign_rhs_code (stmt) == code
642       && has_single_use (gimple_assign_lhs (stmt)))
643     {
644       tree rhs1 = gimple_assign_rhs1 (stmt);
645       tree rhs2 = gimple_assign_rhs2 (stmt);
646       if (!can_reassociate_op_p (rhs1)
647 	  || (rhs2 && !can_reassociate_op_p (rhs2)))
648 	return false;
649       return true;
650     }
651 
652   return false;
653 }
654 
655 
656 /* Return true if STMT is a nop-conversion.  */
657 
658 static bool
gimple_nop_conversion_p(gimple * stmt)659 gimple_nop_conversion_p (gimple *stmt)
660 {
661   if (gassign *ass = dyn_cast <gassign *> (stmt))
662     {
663       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
664 	  && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
665 				    TREE_TYPE (gimple_assign_rhs1 (ass))))
666 	return true;
667     }
668   return false;
669 }
670 
671 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
672    operand of the negate operation.  Otherwise, return NULL.  */
673 
674 static tree
get_unary_op(tree name,enum tree_code opcode)675 get_unary_op (tree name, enum tree_code opcode)
676 {
677   gimple *stmt = SSA_NAME_DEF_STMT (name);
678 
679   /* Look through nop conversions (sign changes).  */
680   if (gimple_nop_conversion_p (stmt)
681       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
682     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
683 
684   if (!is_gimple_assign (stmt))
685     return NULL_TREE;
686 
687   if (gimple_assign_rhs_code (stmt) == opcode)
688     return gimple_assign_rhs1 (stmt);
689   return NULL_TREE;
690 }
691 
692 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
693 
694 static bool
ops_equal_values_p(tree op1,tree op2)695 ops_equal_values_p (tree op1, tree op2)
696 {
697   if (op1 == op2)
698     return true;
699 
700   tree orig_op1 = op1;
701   if (TREE_CODE (op1) == SSA_NAME)
702     {
703       gimple *stmt = SSA_NAME_DEF_STMT (op1);
704       if (gimple_nop_conversion_p (stmt))
705 	{
706 	  op1 = gimple_assign_rhs1 (stmt);
707 	  if (op1 == op2)
708 	    return true;
709 	}
710     }
711 
712   if (TREE_CODE (op2) == SSA_NAME)
713     {
714       gimple *stmt = SSA_NAME_DEF_STMT (op2);
715       if (gimple_nop_conversion_p (stmt))
716 	{
717 	  op2 = gimple_assign_rhs1 (stmt);
718 	  if (op1 == op2
719 	      || orig_op1 == op2)
720 	    return true;
721 	}
722     }
723 
724   return false;
725 }
726 
727 
728 /* If CURR and LAST are a pair of ops that OPCODE allows us to
729    eliminate through equivalences, do so, remove them from OPS, and
730    return true.  Otherwise, return false.  */
731 
732 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry * > * ops,bool * all_done,unsigned int i,operand_entry * curr,operand_entry * last)733 eliminate_duplicate_pair (enum tree_code opcode,
734 			  vec<operand_entry *> *ops,
735 			  bool *all_done,
736 			  unsigned int i,
737 			  operand_entry *curr,
738 			  operand_entry *last)
739 {
740 
741   /* If we have two of the same op, and the opcode is & |, min, or max,
742      we can eliminate one of them.
743      If we have two of the same op, and the opcode is ^, we can
744      eliminate both of them.  */
745 
746   if (last && last->op == curr->op)
747     {
748       switch (opcode)
749 	{
750 	case MAX_EXPR:
751 	case MIN_EXPR:
752 	case BIT_IOR_EXPR:
753 	case BIT_AND_EXPR:
754 	  if (dump_file && (dump_flags & TDF_DETAILS))
755 	    {
756 	      fprintf (dump_file, "Equivalence: ");
757 	      print_generic_expr (dump_file, curr->op);
758 	      fprintf (dump_file, " [&|minmax] ");
759 	      print_generic_expr (dump_file, last->op);
760 	      fprintf (dump_file, " -> ");
761 	      print_generic_stmt (dump_file, last->op);
762 	    }
763 
764 	  ops->ordered_remove (i);
765 	  reassociate_stats.ops_eliminated ++;
766 
767 	  return true;
768 
769 	case BIT_XOR_EXPR:
770 	  if (dump_file && (dump_flags & TDF_DETAILS))
771 	    {
772 	      fprintf (dump_file, "Equivalence: ");
773 	      print_generic_expr (dump_file, curr->op);
774 	      fprintf (dump_file, " ^ ");
775 	      print_generic_expr (dump_file, last->op);
776 	      fprintf (dump_file, " -> nothing\n");
777 	    }
778 
779 	  reassociate_stats.ops_eliminated += 2;
780 
781 	  if (ops->length () == 2)
782 	    {
783 	      ops->truncate (0);
784 	      add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
785 	      *all_done = true;
786 	    }
787 	  else
788 	    {
789 	      ops->ordered_remove (i-1);
790 	      ops->ordered_remove (i-1);
791 	    }
792 
793 	  return true;
794 
795 	default:
796 	  break;
797 	}
798     }
799   return false;
800 }
801 
802 static vec<tree> plus_negates;
803 
804 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
805    expression, look in OPS for a corresponding positive operation to cancel
806    it out.  If we find one, remove the other from OPS, replace
807    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
808    return false. */
809 
810 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)811 eliminate_plus_minus_pair (enum tree_code opcode,
812 			   vec<operand_entry *> *ops,
813 			   unsigned int currindex,
814 			   operand_entry *curr)
815 {
816   tree negateop;
817   tree notop;
818   unsigned int i;
819   operand_entry *oe;
820 
821   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
822     return false;
823 
824   negateop = get_unary_op (curr->op, NEGATE_EXPR);
825   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
826   if (negateop == NULL_TREE && notop == NULL_TREE)
827     return false;
828 
829   /* Any non-negated version will have a rank that is one less than
830      the current rank.  So once we hit those ranks, if we don't find
831      one, we can stop.  */
832 
833   for (i = currindex + 1;
834        ops->iterate (i, &oe)
835        && oe->rank >= curr->rank - 1 ;
836        i++)
837     {
838       if (negateop
839 	  && ops_equal_values_p (oe->op, negateop))
840 	{
841 	  if (dump_file && (dump_flags & TDF_DETAILS))
842 	    {
843 	      fprintf (dump_file, "Equivalence: ");
844 	      print_generic_expr (dump_file, negateop);
845 	      fprintf (dump_file, " + -");
846 	      print_generic_expr (dump_file, oe->op);
847 	      fprintf (dump_file, " -> 0\n");
848 	    }
849 
850 	  ops->ordered_remove (i);
851 	  add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
852 	  ops->ordered_remove (currindex);
853 	  reassociate_stats.ops_eliminated ++;
854 
855 	  return true;
856 	}
857       else if (notop
858 	       && ops_equal_values_p (oe->op, notop))
859 	{
860 	  tree op_type = TREE_TYPE (oe->op);
861 
862 	  if (dump_file && (dump_flags & TDF_DETAILS))
863 	    {
864 	      fprintf (dump_file, "Equivalence: ");
865 	      print_generic_expr (dump_file, notop);
866 	      fprintf (dump_file, " + ~");
867 	      print_generic_expr (dump_file, oe->op);
868 	      fprintf (dump_file, " -> -1\n");
869 	    }
870 
871 	  ops->ordered_remove (i);
872 	  add_to_ops_vec (ops, build_all_ones_cst (op_type));
873 	  ops->ordered_remove (currindex);
874 	  reassociate_stats.ops_eliminated ++;
875 
876 	  return true;
877 	}
878     }
879 
880   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
881      save it for later inspection in repropagate_negates().  */
882   if (negateop != NULL_TREE
883       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
884     plus_negates.safe_push (curr->op);
885 
886   return false;
887 }
888 
889 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
890    bitwise not expression, look in OPS for a corresponding operand to
891    cancel it out.  If we find one, remove the other from OPS, replace
892    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
893    false. */
894 
895 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)896 eliminate_not_pairs (enum tree_code opcode,
897 		     vec<operand_entry *> *ops,
898 		     unsigned int currindex,
899 		     operand_entry *curr)
900 {
901   tree notop;
902   unsigned int i;
903   operand_entry *oe;
904 
905   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
906       || TREE_CODE (curr->op) != SSA_NAME)
907     return false;
908 
909   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
910   if (notop == NULL_TREE)
911     return false;
912 
913   /* Any non-not version will have a rank that is one less than
914      the current rank.  So once we hit those ranks, if we don't find
915      one, we can stop.  */
916 
917   for (i = currindex + 1;
918        ops->iterate (i, &oe)
919        && oe->rank >= curr->rank - 1;
920        i++)
921     {
922       if (oe->op == notop)
923 	{
924 	  if (dump_file && (dump_flags & TDF_DETAILS))
925 	    {
926 	      fprintf (dump_file, "Equivalence: ");
927 	      print_generic_expr (dump_file, notop);
928 	      if (opcode == BIT_AND_EXPR)
929 		fprintf (dump_file, " & ~");
930 	      else if (opcode == BIT_IOR_EXPR)
931 		fprintf (dump_file, " | ~");
932 	      print_generic_expr (dump_file, oe->op);
933 	      if (opcode == BIT_AND_EXPR)
934 		fprintf (dump_file, " -> 0\n");
935 	      else if (opcode == BIT_IOR_EXPR)
936 		fprintf (dump_file, " -> -1\n");
937 	    }
938 
939 	  if (opcode == BIT_AND_EXPR)
940 	    oe->op = build_zero_cst (TREE_TYPE (oe->op));
941 	  else if (opcode == BIT_IOR_EXPR)
942 	    oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
943 
944 	  reassociate_stats.ops_eliminated += ops->length () - 1;
945 	  ops->truncate (0);
946 	  ops->quick_push (oe);
947 	  return true;
948 	}
949     }
950 
951   return false;
952 }
953 
954 /* Use constant value that may be present in OPS to try to eliminate
955    operands.  Note that this function is only really used when we've
956    eliminated ops for other reasons, or merged constants.  Across
957    single statements, fold already does all of this, plus more.  There
958    is little point in duplicating logic, so I've only included the
959    identities that I could ever construct testcases to trigger.  */
960 
961 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry * > * ops)962 eliminate_using_constants (enum tree_code opcode,
963 			   vec<operand_entry *> *ops)
964 {
965   operand_entry *oelast = ops->last ();
966   tree type = TREE_TYPE (oelast->op);
967 
968   if (oelast->rank == 0
969       && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
970     {
971       switch (opcode)
972 	{
973 	case BIT_AND_EXPR:
974 	  if (integer_zerop (oelast->op))
975 	    {
976 	      if (ops->length () != 1)
977 		{
978 		  if (dump_file && (dump_flags & TDF_DETAILS))
979 		    fprintf (dump_file, "Found & 0, removing all other ops\n");
980 
981 		  reassociate_stats.ops_eliminated += ops->length () - 1;
982 
983 		  ops->truncate (0);
984 		  ops->quick_push (oelast);
985 		  return;
986 		}
987 	    }
988 	  else if (integer_all_onesp (oelast->op))
989 	    {
990 	      if (ops->length () != 1)
991 		{
992 		  if (dump_file && (dump_flags & TDF_DETAILS))
993 		    fprintf (dump_file, "Found & -1, removing\n");
994 		  ops->pop ();
995 		  reassociate_stats.ops_eliminated++;
996 		}
997 	    }
998 	  break;
999 	case BIT_IOR_EXPR:
1000 	  if (integer_all_onesp (oelast->op))
1001 	    {
1002 	      if (ops->length () != 1)
1003 		{
1004 		  if (dump_file && (dump_flags & TDF_DETAILS))
1005 		    fprintf (dump_file, "Found | -1, removing all other ops\n");
1006 
1007 		  reassociate_stats.ops_eliminated += ops->length () - 1;
1008 
1009 		  ops->truncate (0);
1010 		  ops->quick_push (oelast);
1011 		  return;
1012 		}
1013 	    }
1014 	  else if (integer_zerop (oelast->op))
1015 	    {
1016 	      if (ops->length () != 1)
1017 		{
1018 		  if (dump_file && (dump_flags & TDF_DETAILS))
1019 		    fprintf (dump_file, "Found | 0, removing\n");
1020 		  ops->pop ();
1021 		  reassociate_stats.ops_eliminated++;
1022 		}
1023 	    }
1024 	  break;
1025 	case MULT_EXPR:
1026 	  if (integer_zerop (oelast->op)
1027 	      || (FLOAT_TYPE_P (type)
1028 		  && !HONOR_NANS (type)
1029 		  && !HONOR_SIGNED_ZEROS (type)
1030 		  && real_zerop (oelast->op)))
1031 	    {
1032 	      if (ops->length () != 1)
1033 		{
1034 		  if (dump_file && (dump_flags & TDF_DETAILS))
1035 		    fprintf (dump_file, "Found * 0, removing all other ops\n");
1036 
1037 		  reassociate_stats.ops_eliminated += ops->length () - 1;
1038 		  ops->truncate (0);
1039 		  ops->quick_push (oelast);
1040 		  return;
1041 		}
1042 	    }
1043 	  else if (integer_onep (oelast->op)
1044 		   || (FLOAT_TYPE_P (type)
1045 		       && !HONOR_SNANS (type)
1046 		       && real_onep (oelast->op)))
1047 	    {
1048 	      if (ops->length () != 1)
1049 		{
1050 		  if (dump_file && (dump_flags & TDF_DETAILS))
1051 		    fprintf (dump_file, "Found * 1, removing\n");
1052 		  ops->pop ();
1053 		  reassociate_stats.ops_eliminated++;
1054 		  return;
1055 		}
1056 	    }
1057 	  break;
1058 	case BIT_XOR_EXPR:
1059 	case PLUS_EXPR:
1060 	case MINUS_EXPR:
1061 	  if (integer_zerop (oelast->op)
1062 	      || (FLOAT_TYPE_P (type)
1063 		  && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1064 		  && fold_real_zero_addition_p (type, oelast->op,
1065 						opcode == MINUS_EXPR)))
1066 	    {
1067 	      if (ops->length () != 1)
1068 		{
1069 		  if (dump_file && (dump_flags & TDF_DETAILS))
1070 		    fprintf (dump_file, "Found [|^+] 0, removing\n");
1071 		  ops->pop ();
1072 		  reassociate_stats.ops_eliminated++;
1073 		  return;
1074 		}
1075 	    }
1076 	  break;
1077 	default:
1078 	  break;
1079 	}
1080     }
1081 }
1082 
1083 
1084 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1085 				 bool, bool);
1086 
1087 /* Structure for tracking and counting operands.  */
1088 struct oecount {
1089   unsigned int cnt;
1090   unsigned int id;
1091   enum tree_code oecode;
1092   tree op;
1093 };
1094 
1095 
1096 /* The heap for the oecount hashtable and the sorted list of operands.  */
1097 static vec<oecount> cvec;
1098 
1099 
1100 /* Oecount hashtable helpers.  */
1101 
1102 struct oecount_hasher : int_hash <int, 0, 1>
1103 {
1104   static inline hashval_t hash (int);
1105   static inline bool equal (int, int);
1106 };
1107 
1108 /* Hash function for oecount.  */
1109 
1110 inline hashval_t
hash(int p)1111 oecount_hasher::hash (int p)
1112 {
1113   const oecount *c = &cvec[p - 42];
1114   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1115 }
1116 
1117 /* Comparison function for oecount.  */
1118 
1119 inline bool
equal(int p1,int p2)1120 oecount_hasher::equal (int p1, int p2)
1121 {
1122   const oecount *c1 = &cvec[p1 - 42];
1123   const oecount *c2 = &cvec[p2 - 42];
1124   return c1->oecode == c2->oecode && c1->op == c2->op;
1125 }
1126 
1127 /* Comparison function for qsort sorting oecount elements by count.  */
1128 
1129 static int
oecount_cmp(const void * p1,const void * p2)1130 oecount_cmp (const void *p1, const void *p2)
1131 {
1132   const oecount *c1 = (const oecount *)p1;
1133   const oecount *c2 = (const oecount *)p2;
1134   if (c1->cnt != c2->cnt)
1135     return c1->cnt > c2->cnt ? 1 : -1;
1136   else
1137     /* If counts are identical, use unique IDs to stabilize qsort.  */
1138     return c1->id > c2->id ? 1 : -1;
1139 }
1140 
1141 /* Return TRUE iff STMT represents a builtin call that raises OP
1142    to some exponent.  */
1143 
1144 static bool
stmt_is_power_of_op(gimple * stmt,tree op)1145 stmt_is_power_of_op (gimple *stmt, tree op)
1146 {
1147   if (!is_gimple_call (stmt))
1148     return false;
1149 
1150   switch (gimple_call_combined_fn (stmt))
1151     {
1152     CASE_CFN_POW:
1153     CASE_CFN_POWI:
1154       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1155 
1156     default:
1157       return false;
1158     }
1159 }
1160 
1161 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1162    in place and return the result.  Assumes that stmt_is_power_of_op
1163    was previously called for STMT and returned TRUE.  */
1164 
1165 static HOST_WIDE_INT
decrement_power(gimple * stmt)1166 decrement_power (gimple *stmt)
1167 {
1168   REAL_VALUE_TYPE c, cint;
1169   HOST_WIDE_INT power;
1170   tree arg1;
1171 
1172   switch (gimple_call_combined_fn (stmt))
1173     {
1174     CASE_CFN_POW:
1175       arg1 = gimple_call_arg (stmt, 1);
1176       c = TREE_REAL_CST (arg1);
1177       power = real_to_integer (&c) - 1;
1178       real_from_integer (&cint, VOIDmode, power, SIGNED);
1179       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1180       return power;
1181 
1182     CASE_CFN_POWI:
1183       arg1 = gimple_call_arg (stmt, 1);
1184       power = TREE_INT_CST_LOW (arg1) - 1;
1185       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1186       return power;
1187 
1188     default:
1189       gcc_unreachable ();
1190     }
1191 }
1192 
1193 /* Replace SSA defined by STMT and replace all its uses with new
1194    SSA.  Also return the new SSA.  */
1195 
1196 static tree
make_new_ssa_for_def(gimple * stmt,enum tree_code opcode,tree op)1197 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1198 {
1199   gimple *use_stmt;
1200   use_operand_p use;
1201   imm_use_iterator iter;
1202   tree new_lhs, new_debug_lhs = NULL_TREE;
1203   tree lhs = gimple_get_lhs (stmt);
1204 
1205   new_lhs = make_ssa_name (TREE_TYPE (lhs));
1206   gimple_set_lhs (stmt, new_lhs);
1207 
1208   /* Also need to update GIMPLE_DEBUGs.  */
1209   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1210     {
1211       tree repl = new_lhs;
1212       if (is_gimple_debug (use_stmt))
1213 	{
1214 	  if (new_debug_lhs == NULL_TREE)
1215 	    {
1216 	      new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1217 	      gdebug *def_temp
1218 		= gimple_build_debug_bind (new_debug_lhs,
1219 					   build2 (opcode, TREE_TYPE (lhs),
1220 						   new_lhs, op),
1221 					   stmt);
1222 	      DECL_ARTIFICIAL (new_debug_lhs) = 1;
1223 	      TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1224 	      SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1225 	      gimple_set_uid (def_temp, gimple_uid (stmt));
1226 	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1227 	      gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1228 	    }
1229 	  repl = new_debug_lhs;
1230 	}
1231       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1232 	SET_USE (use, repl);
1233       update_stmt (use_stmt);
1234     }
1235   return new_lhs;
1236 }
1237 
1238 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1239    uses with new SSAs.  Also do this for the stmt that defines DEF
1240    if *DEF is not OP.  */
1241 
1242 static void
make_new_ssa_for_all_defs(tree * def,enum tree_code opcode,tree op,vec<gimple * > & stmts_to_fix)1243 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1244 			   vec<gimple *> &stmts_to_fix)
1245 {
1246   unsigned i;
1247   gimple *stmt;
1248 
1249   if (*def != op
1250       && TREE_CODE (*def) == SSA_NAME
1251       && (stmt = SSA_NAME_DEF_STMT (*def))
1252       && gimple_code (stmt) != GIMPLE_NOP)
1253     *def = make_new_ssa_for_def (stmt, opcode, op);
1254 
1255   FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1256     make_new_ssa_for_def (stmt, opcode, op);
1257 }
1258 
1259 /* Find the single immediate use of STMT's LHS, and replace it
1260    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1261    replace *DEF with OP as well.  */
1262 
1263 static void
propagate_op_to_single_use(tree op,gimple * stmt,tree * def)1264 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1265 {
1266   tree lhs;
1267   gimple *use_stmt;
1268   use_operand_p use;
1269   gimple_stmt_iterator gsi;
1270 
1271   if (is_gimple_call (stmt))
1272     lhs = gimple_call_lhs (stmt);
1273   else
1274     lhs = gimple_assign_lhs (stmt);
1275 
1276   gcc_assert (has_single_use (lhs));
1277   single_imm_use (lhs, &use, &use_stmt);
1278   if (lhs == *def)
1279     *def = op;
1280   SET_USE (use, op);
1281   if (TREE_CODE (op) != SSA_NAME)
1282     update_stmt (use_stmt);
1283   gsi = gsi_for_stmt (stmt);
1284   unlink_stmt_vdef (stmt);
1285   reassoc_remove_stmt (&gsi);
1286   release_defs (stmt);
1287 }
1288 
1289 /* Walks the linear chain with result *DEF searching for an operation
1290    with operand OP and code OPCODE removing that from the chain.  *DEF
1291    is updated if there is only one operand but no operation left.  */
1292 
1293 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1294 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1295 {
1296   tree orig_def = *def;
1297   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1298   /* PR72835 - Record the stmt chain that has to be updated such that
1299      we dont use the same LHS when the values computed are different.  */
1300   auto_vec<gimple *, 64> stmts_to_fix;
1301 
1302   do
1303     {
1304       tree name;
1305 
1306       if (opcode == MULT_EXPR)
1307 	{
1308 	  if (stmt_is_power_of_op (stmt, op))
1309 	    {
1310 	      if (decrement_power (stmt) == 1)
1311 		{
1312 		  if (stmts_to_fix.length () > 0)
1313 		    stmts_to_fix.pop ();
1314 		  propagate_op_to_single_use (op, stmt, def);
1315 		}
1316 	      break;
1317 	    }
1318 	  else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1319 	    {
1320 	      if (gimple_assign_rhs1 (stmt) == op)
1321 		{
1322 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1323 		  if (stmts_to_fix.length () > 0)
1324 		    stmts_to_fix.pop ();
1325 		  propagate_op_to_single_use (cst, stmt, def);
1326 		  break;
1327 		}
1328 	      else if (integer_minus_onep (op)
1329 		       || real_minus_onep (op))
1330 		{
1331 		  gimple_assign_set_rhs_code
1332 		    (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1333 		  break;
1334 		}
1335 	    }
1336 	}
1337 
1338       name = gimple_assign_rhs1 (stmt);
1339 
1340       /* If this is the operation we look for and one of the operands
1341          is ours simply propagate the other operand into the stmts
1342 	 single use.  */
1343       if (gimple_assign_rhs_code (stmt) == opcode
1344 	  && (name == op
1345 	      || gimple_assign_rhs2 (stmt) == op))
1346 	{
1347 	  if (name == op)
1348 	    name = gimple_assign_rhs2 (stmt);
1349 	  if (stmts_to_fix.length () > 0)
1350 	    stmts_to_fix.pop ();
1351 	  propagate_op_to_single_use (name, stmt, def);
1352 	  break;
1353 	}
1354 
1355       /* We might have a multiply of two __builtin_pow* calls, and
1356 	 the operand might be hiding in the rightmost one.  Likewise
1357 	 this can happen for a negate.  */
1358       if (opcode == MULT_EXPR
1359 	  && gimple_assign_rhs_code (stmt) == opcode
1360 	  && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1361 	  && has_single_use (gimple_assign_rhs2 (stmt)))
1362 	{
1363 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1364 	  if (stmt_is_power_of_op (stmt2, op))
1365 	    {
1366 	      if (decrement_power (stmt2) == 1)
1367 		propagate_op_to_single_use (op, stmt2, def);
1368 	      else
1369 		stmts_to_fix.safe_push (stmt2);
1370 	      break;
1371 	    }
1372 	  else if (is_gimple_assign (stmt2)
1373 		   && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1374 	    {
1375 	      if (gimple_assign_rhs1 (stmt2) == op)
1376 		{
1377 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1378 		  propagate_op_to_single_use (cst, stmt2, def);
1379 		  break;
1380 		}
1381 	      else if (integer_minus_onep (op)
1382 		       || real_minus_onep (op))
1383 		{
1384 		  stmts_to_fix.safe_push (stmt2);
1385 		  gimple_assign_set_rhs_code
1386 		    (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1387 		  break;
1388 		}
1389 	    }
1390 	}
1391 
1392       /* Continue walking the chain.  */
1393       gcc_assert (name != op
1394 		  && TREE_CODE (name) == SSA_NAME);
1395       stmt = SSA_NAME_DEF_STMT (name);
1396       stmts_to_fix.safe_push (stmt);
1397     }
1398   while (1);
1399 
1400   if (stmts_to_fix.length () > 0 || *def == orig_def)
1401     make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1402 }
1403 
1404 /* Returns true if statement S1 dominates statement S2.  Like
1405    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1406 
1407 static bool
reassoc_stmt_dominates_stmt_p(gimple * s1,gimple * s2)1408 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1409 {
1410   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1411 
1412   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1413      SSA_NAME.  Assume it lives at the beginning of function and
1414      thus dominates everything.  */
1415   if (!bb1 || s1 == s2)
1416     return true;
1417 
1418   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1419   if (!bb2)
1420     return false;
1421 
1422   if (bb1 == bb2)
1423     {
1424       /* PHIs in the same basic block are assumed to be
1425 	 executed all in parallel, if only one stmt is a PHI,
1426 	 it dominates the other stmt in the same basic block.  */
1427       if (gimple_code (s1) == GIMPLE_PHI)
1428 	return true;
1429 
1430       if (gimple_code (s2) == GIMPLE_PHI)
1431 	return false;
1432 
1433       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1434 
1435       if (gimple_uid (s1) < gimple_uid (s2))
1436 	return true;
1437 
1438       if (gimple_uid (s1) > gimple_uid (s2))
1439 	return false;
1440 
1441       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1442       unsigned int uid = gimple_uid (s1);
1443       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1444 	{
1445 	  gimple *s = gsi_stmt (gsi);
1446 	  if (gimple_uid (s) != uid)
1447 	    break;
1448 	  if (s == s2)
1449 	    return true;
1450 	}
1451 
1452       return false;
1453     }
1454 
1455   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1456 }
1457 
1458 /* Insert STMT after INSERT_POINT.  */
1459 
1460 static void
insert_stmt_after(gimple * stmt,gimple * insert_point)1461 insert_stmt_after (gimple *stmt, gimple *insert_point)
1462 {
1463   gimple_stmt_iterator gsi;
1464   basic_block bb;
1465 
1466   if (gimple_code (insert_point) == GIMPLE_PHI)
1467     bb = gimple_bb (insert_point);
1468   else if (!stmt_ends_bb_p (insert_point))
1469     {
1470       gsi = gsi_for_stmt (insert_point);
1471       gimple_set_uid (stmt, gimple_uid (insert_point));
1472       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1473       return;
1474     }
1475   else if (gimple_code (insert_point) == GIMPLE_ASM)
1476     /* We have no idea where to insert - it depends on where the
1477        uses will be placed.  */
1478     gcc_unreachable ();
1479   else
1480     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1481        thus if it must end a basic block, it should be a call that can
1482        throw, or some assignment that can throw.  If it throws, the LHS
1483        of it will not be initialized though, so only valid places using
1484        the SSA_NAME should be dominated by the fallthru edge.  */
1485     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1486   gsi = gsi_after_labels (bb);
1487   if (gsi_end_p (gsi))
1488     {
1489       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1490       gimple_set_uid (stmt,
1491 		      gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1492     }
1493   else
1494     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1495   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1496 }
1497 
1498 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1499    the result.  Places the statement after the definition of either
1500    OP1 or OP2.  Returns the new statement.  */
1501 
1502 static gimple *
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1503 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1504 {
1505   gimple *op1def = NULL, *op2def = NULL;
1506   gimple_stmt_iterator gsi;
1507   tree op;
1508   gassign *sum;
1509 
1510   /* Create the addition statement.  */
1511   op = make_ssa_name (type);
1512   sum = gimple_build_assign (op, opcode, op1, op2);
1513 
1514   /* Find an insertion place and insert.  */
1515   if (TREE_CODE (op1) == SSA_NAME)
1516     op1def = SSA_NAME_DEF_STMT (op1);
1517   if (TREE_CODE (op2) == SSA_NAME)
1518     op2def = SSA_NAME_DEF_STMT (op2);
1519   if ((!op1def || gimple_nop_p (op1def))
1520       && (!op2def || gimple_nop_p (op2def)))
1521     {
1522       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1523       if (gsi_end_p (gsi))
1524 	{
1525 	  gimple_stmt_iterator gsi2
1526 	    = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1527 	  gimple_set_uid (sum,
1528 			  gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1529 	}
1530       else
1531 	gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1532       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1533     }
1534   else
1535     {
1536       gimple *insert_point;
1537       if ((!op1def || gimple_nop_p (op1def))
1538 	   || (op2def && !gimple_nop_p (op2def)
1539 	       && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1540 	insert_point = op2def;
1541       else
1542 	insert_point = op1def;
1543       insert_stmt_after (sum, insert_point);
1544     }
1545   update_stmt (sum);
1546 
1547   return sum;
1548 }
1549 
1550 /* Perform un-distribution of divisions and multiplications.
1551    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1552    to (A + B) / X for real X.
1553 
1554    The algorithm is organized as follows.
1555 
1556     - First we walk the addition chain *OPS looking for summands that
1557       are defined by a multiplication or a real division.  This results
1558       in the candidates bitmap with relevant indices into *OPS.
1559 
1560     - Second we build the chains of multiplications or divisions for
1561       these candidates, counting the number of occurrences of (operand, code)
1562       pairs in all of the candidates chains.
1563 
1564     - Third we sort the (operand, code) pairs by number of occurrence and
1565       process them starting with the pair with the most uses.
1566 
1567       * For each such pair we walk the candidates again to build a
1568         second candidate bitmap noting all multiplication/division chains
1569 	that have at least one occurrence of (operand, code).
1570 
1571       * We build an alternate addition chain only covering these
1572         candidates with one (operand, code) operation removed from their
1573 	multiplication/division chain.
1574 
1575       * The first candidate gets replaced by the alternate addition chain
1576         multiplied/divided by the operand.
1577 
1578       * All candidate chains get disabled for further processing and
1579         processing of (operand, code) pairs continues.
1580 
1581   The alternate addition chains built are re-processed by the main
1582   reassociation algorithm which allows optimizing a * x * y + b * y * x
1583   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1584 
1585 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry * > * ops,class loop * loop)1586 undistribute_ops_list (enum tree_code opcode,
1587 		       vec<operand_entry *> *ops, class loop *loop)
1588 {
1589   unsigned int length = ops->length ();
1590   operand_entry *oe1;
1591   unsigned i, j;
1592   unsigned nr_candidates, nr_candidates2;
1593   sbitmap_iterator sbi0;
1594   vec<operand_entry *> *subops;
1595   bool changed = false;
1596   unsigned int next_oecount_id = 0;
1597 
1598   if (length <= 1
1599       || opcode != PLUS_EXPR)
1600     return false;
1601 
1602   /* Build a list of candidates to process.  */
1603   auto_sbitmap candidates (length);
1604   bitmap_clear (candidates);
1605   nr_candidates = 0;
1606   FOR_EACH_VEC_ELT (*ops, i, oe1)
1607     {
1608       enum tree_code dcode;
1609       gimple *oe1def;
1610 
1611       if (TREE_CODE (oe1->op) != SSA_NAME)
1612 	continue;
1613       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1614       if (!is_gimple_assign (oe1def))
1615 	continue;
1616       dcode = gimple_assign_rhs_code (oe1def);
1617       if ((dcode != MULT_EXPR
1618 	   && dcode != RDIV_EXPR)
1619 	  || !is_reassociable_op (oe1def, dcode, loop))
1620 	continue;
1621 
1622       bitmap_set_bit (candidates, i);
1623       nr_candidates++;
1624     }
1625 
1626   if (nr_candidates < 2)
1627     return false;
1628 
1629   if (dump_file && (dump_flags & TDF_DETAILS))
1630     {
1631       fprintf (dump_file, "searching for un-distribute opportunities ");
1632       print_generic_expr (dump_file,
1633 	(*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1634       fprintf (dump_file, " %d\n", nr_candidates);
1635     }
1636 
1637   /* Build linearized sub-operand lists and the counting table.  */
1638   cvec.create (0);
1639 
1640   hash_table<oecount_hasher> ctable (15);
1641 
1642   /* ??? Macro arguments cannot have multi-argument template types in
1643      them.  This typedef is needed to workaround that limitation.  */
1644   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1645   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1646   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1647     {
1648       gimple *oedef;
1649       enum tree_code oecode;
1650       unsigned j;
1651 
1652       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1653       oecode = gimple_assign_rhs_code (oedef);
1654       linearize_expr_tree (&subops[i], oedef,
1655 			   associative_tree_code (oecode), false);
1656 
1657       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1658 	{
1659 	  oecount c;
1660 	  int *slot;
1661 	  int idx;
1662 	  c.oecode = oecode;
1663 	  c.cnt = 1;
1664 	  c.id = next_oecount_id++;
1665 	  c.op = oe1->op;
1666 	  cvec.safe_push (c);
1667 	  idx = cvec.length () + 41;
1668 	  slot = ctable.find_slot (idx, INSERT);
1669 	  if (!*slot)
1670 	    {
1671 	      *slot = idx;
1672 	    }
1673 	  else
1674 	    {
1675 	      cvec.pop ();
1676 	      cvec[*slot - 42].cnt++;
1677 	    }
1678 	}
1679     }
1680 
1681   /* Sort the counting table.  */
1682   cvec.qsort (oecount_cmp);
1683 
1684   if (dump_file && (dump_flags & TDF_DETAILS))
1685     {
1686       oecount *c;
1687       fprintf (dump_file, "Candidates:\n");
1688       FOR_EACH_VEC_ELT (cvec, j, c)
1689 	{
1690 	  fprintf (dump_file, "  %u %s: ", c->cnt,
1691 		   c->oecode == MULT_EXPR
1692 		   ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1693 	  print_generic_expr (dump_file, c->op);
1694 	  fprintf (dump_file, "\n");
1695 	}
1696     }
1697 
1698   /* Process the (operand, code) pairs in order of most occurrence.  */
1699   auto_sbitmap candidates2 (length);
1700   while (!cvec.is_empty ())
1701     {
1702       oecount *c = &cvec.last ();
1703       if (c->cnt < 2)
1704 	break;
1705 
1706       /* Now collect the operands in the outer chain that contain
1707          the common operand in their inner chain.  */
1708       bitmap_clear (candidates2);
1709       nr_candidates2 = 0;
1710       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1711 	{
1712 	  gimple *oedef;
1713 	  enum tree_code oecode;
1714 	  unsigned j;
1715 	  tree op = (*ops)[i]->op;
1716 
1717 	  /* If we undistributed in this chain already this may be
1718 	     a constant.  */
1719 	  if (TREE_CODE (op) != SSA_NAME)
1720 	    continue;
1721 
1722 	  oedef = SSA_NAME_DEF_STMT (op);
1723 	  oecode = gimple_assign_rhs_code (oedef);
1724 	  if (oecode != c->oecode)
1725 	    continue;
1726 
1727 	  FOR_EACH_VEC_ELT (subops[i], j, oe1)
1728 	    {
1729 	      if (oe1->op == c->op)
1730 		{
1731 		  bitmap_set_bit (candidates2, i);
1732 		  ++nr_candidates2;
1733 		  break;
1734 		}
1735 	    }
1736 	}
1737 
1738       if (nr_candidates2 >= 2)
1739 	{
1740 	  operand_entry *oe1, *oe2;
1741 	  gimple *prod;
1742 	  int first = bitmap_first_set_bit (candidates2);
1743 
1744 	  /* Build the new addition chain.  */
1745 	  oe1 = (*ops)[first];
1746 	  if (dump_file && (dump_flags & TDF_DETAILS))
1747 	    {
1748 	      fprintf (dump_file, "Building (");
1749 	      print_generic_expr (dump_file, oe1->op);
1750 	    }
1751 	  zero_one_operation (&oe1->op, c->oecode, c->op);
1752 	  EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1753 	    {
1754 	      gimple *sum;
1755 	      oe2 = (*ops)[i];
1756 	      if (dump_file && (dump_flags & TDF_DETAILS))
1757 		{
1758 		  fprintf (dump_file, " + ");
1759 		  print_generic_expr (dump_file, oe2->op);
1760 		}
1761 	      zero_one_operation (&oe2->op, c->oecode, c->op);
1762 	      sum = build_and_add_sum (TREE_TYPE (oe1->op),
1763 				       oe1->op, oe2->op, opcode);
1764 	      oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1765 	      oe2->rank = 0;
1766 	      oe1->op = gimple_get_lhs (sum);
1767 	    }
1768 
1769 	  /* Apply the multiplication/division.  */
1770 	  prod = build_and_add_sum (TREE_TYPE (oe1->op),
1771 				    oe1->op, c->op, c->oecode);
1772 	  if (dump_file && (dump_flags & TDF_DETAILS))
1773 	    {
1774 	      fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1775 	      print_generic_expr (dump_file, c->op);
1776 	      fprintf (dump_file, "\n");
1777 	    }
1778 
1779 	  /* Record it in the addition chain and disable further
1780 	     undistribution with this op.  */
1781 	  oe1->op = gimple_assign_lhs (prod);
1782 	  oe1->rank = get_rank (oe1->op);
1783 	  subops[first].release ();
1784 
1785 	  changed = true;
1786 	}
1787 
1788       cvec.pop ();
1789     }
1790 
1791   for (i = 0; i < ops->length (); ++i)
1792     subops[i].release ();
1793   free (subops);
1794   cvec.release ();
1795 
1796   return changed;
1797 }
1798 
1799 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1800    first: element index for each relevant BIT_FIELD_REF.
1801    second: the index of vec ops* for each relevant BIT_FIELD_REF.  */
1802 typedef std::pair<unsigned, unsigned> v_info_elem;
1803 struct v_info {
1804   tree vec_type;
1805   auto_vec<v_info_elem, 32> vec;
1806 };
1807 typedef v_info *v_info_ptr;
1808 
1809 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode.  */
1810 static int
sort_by_mach_mode(const void * p_i,const void * p_j)1811 sort_by_mach_mode (const void *p_i, const void *p_j)
1812 {
1813   const tree tr1 = *((const tree *) p_i);
1814   const tree tr2 = *((const tree *) p_j);
1815   unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1816   unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1817   if (mode1 > mode2)
1818     return 1;
1819   else if (mode1 < mode2)
1820     return -1;
1821   if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1822     return -1;
1823   else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1824     return 1;
1825   return 0;
1826 }
1827 
1828 /* Cleanup hash map for VECTOR information.  */
1829 static void
cleanup_vinfo_map(hash_map<tree,v_info_ptr> & info_map)1830 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1831 {
1832   for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1833        it != info_map.end (); ++it)
1834     {
1835       v_info_ptr info = (*it).second;
1836       delete info;
1837       (*it).second = NULL;
1838     }
1839 }
1840 
1841 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1842      V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1843    is transformed to
1844      Vs = (V1 + V2 + ... + Vn)
1845      Vs[0] + Vs[1] + ... + Vs[k]
1846 
1847    The basic steps are listed below:
1848 
1849     1) Check the addition chain *OPS by looking those summands coming from
1850        VECTOR bit_field_ref on VECTOR type.  Put the information into
1851        v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1852 
1853     2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1854        continuous, they can cover the whole VECTOR perfectly without any holes.
1855        Obtain one VECTOR list which contain candidates to be transformed.
1856 
1857     3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1858        candidates with same mode, build the addition statements for them and
1859        generate BIT_FIELD_REFs accordingly.
1860 
1861    TODO:
1862        The current implementation requires the whole VECTORs should be fully
1863        covered, but it can be extended to support partial, checking adjacent
1864        but not fill the whole, it may need some cost model to define the
1865        boundary to do or not.
1866 */
1867 static bool
undistribute_bitref_for_vector(enum tree_code opcode,vec<operand_entry * > * ops,struct loop * loop)1868 undistribute_bitref_for_vector (enum tree_code opcode,
1869 				vec<operand_entry *> *ops, struct loop *loop)
1870 {
1871   if (ops->length () <= 1)
1872     return false;
1873 
1874   if (opcode != PLUS_EXPR
1875       && opcode != MULT_EXPR
1876       && opcode != BIT_XOR_EXPR
1877       && opcode != BIT_IOR_EXPR
1878       && opcode != BIT_AND_EXPR)
1879     return false;
1880 
1881   hash_map<tree, v_info_ptr> v_info_map;
1882   operand_entry *oe1;
1883   unsigned i;
1884 
1885   /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1886      information into map.  */
1887   FOR_EACH_VEC_ELT (*ops, i, oe1)
1888     {
1889       enum tree_code dcode;
1890       gimple *oe1def;
1891 
1892       if (TREE_CODE (oe1->op) != SSA_NAME)
1893 	continue;
1894       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1895       if (!is_gimple_assign (oe1def))
1896 	continue;
1897       dcode = gimple_assign_rhs_code (oe1def);
1898       if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1899 	continue;
1900 
1901       tree rhs = gimple_assign_rhs1 (oe1def);
1902       tree vec = TREE_OPERAND (rhs, 0);
1903       tree vec_type = TREE_TYPE (vec);
1904 
1905       if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1906 	continue;
1907 
1908       /* Ignore it if target machine can't support this VECTOR type.  */
1909       if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1910 	continue;
1911 
1912       /* Check const vector type, constrain BIT_FIELD_REF offset and size.  */
1913       if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1914 	continue;
1915 
1916       if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1917 	  || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1918 	continue;
1919 
1920       /* The type of BIT_FIELD_REF might not be equal to the element type of
1921 	 the vector.  We want to use a vector type with element type the
1922 	 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec).  */
1923       if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1924 	{
1925 	  machine_mode simd_mode;
1926 	  unsigned HOST_WIDE_INT size, nunits;
1927 	  unsigned HOST_WIDE_INT elem_size
1928 	    = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1929 	  if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1930 	    continue;
1931 	  if (size <= elem_size || (size % elem_size) != 0)
1932 	    continue;
1933 	  nunits = size / elem_size;
1934 	  if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1935 				nunits).exists (&simd_mode))
1936 	    continue;
1937 	  vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1938 
1939 	  /* Ignore it if target machine can't support this VECTOR type.  */
1940 	  if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1941 	    continue;
1942 
1943 	  /* Check const vector type, constrain BIT_FIELD_REF offset and
1944 	     size.  */
1945 	  if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1946 	    continue;
1947 
1948 	  if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1949 			GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1950 	    continue;
1951 	}
1952 
1953       tree elem_type = TREE_TYPE (vec_type);
1954       unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1955       if (maybe_ne (bit_field_size (rhs), elem_size))
1956 	continue;
1957 
1958       unsigned idx;
1959       if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1960 	continue;
1961 
1962       /* Ignore it if target machine can't support this type of VECTOR
1963          operation.  */
1964       optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1965       if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1966 	continue;
1967 
1968       bool existed;
1969       v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1970       if (!existed)
1971 	{
1972 	  info = new v_info;
1973 	  info->vec_type = vec_type;
1974 	}
1975       else if (!types_compatible_p (vec_type, info->vec_type))
1976 	continue;
1977       info->vec.safe_push (std::make_pair (idx, i));
1978     }
1979 
1980   /* At least two VECTOR to combine.  */
1981   if (v_info_map.elements () <= 1)
1982     {
1983       cleanup_vinfo_map (v_info_map);
1984       return false;
1985     }
1986 
1987   /* Verify all VECTOR candidates by checking two conditions:
1988        1) sorted offsets are adjacent, no holes.
1989        2) can fill the whole VECTOR perfectly.
1990      And add the valid candidates to a vector for further handling.  */
1991   auto_vec<tree> valid_vecs (v_info_map.elements ());
1992   for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1993        it != v_info_map.end (); ++it)
1994     {
1995       tree cand_vec = (*it).first;
1996       v_info_ptr cand_info = (*it).second;
1997       unsigned int num_elems
1998 	= TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
1999       if (cand_info->vec.length () != num_elems)
2000 	continue;
2001       sbitmap holes = sbitmap_alloc (num_elems);
2002       bitmap_ones (holes);
2003       bool valid = true;
2004       v_info_elem *curr;
2005       FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2006 	{
2007 	  if (!bitmap_bit_p (holes, curr->first))
2008 	    {
2009 	      valid = false;
2010 	      break;
2011 	    }
2012 	  else
2013 	    bitmap_clear_bit (holes, curr->first);
2014 	}
2015       if (valid && bitmap_empty_p (holes))
2016 	valid_vecs.quick_push (cand_vec);
2017       sbitmap_free (holes);
2018     }
2019 
2020   /* At least two VECTOR to combine.  */
2021   if (valid_vecs.length () <= 1)
2022     {
2023       cleanup_vinfo_map (v_info_map);
2024       return false;
2025     }
2026 
2027   valid_vecs.qsort (sort_by_mach_mode);
2028   /* Go through all candidates by machine mode order, query the mode_to_total
2029      to get the total number for each mode and skip the single one.  */
2030   for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2031     {
2032       tree tvec = valid_vecs[i];
2033       enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2034 
2035       /* Skip modes with only a single candidate.  */
2036       if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2037 	continue;
2038 
2039       unsigned int idx, j;
2040       gimple *sum = NULL;
2041       tree sum_vec = tvec;
2042       v_info_ptr info_ptr = *(v_info_map.get (tvec));
2043       v_info_elem *elem;
2044       tree vec_type = info_ptr->vec_type;
2045 
2046       /* Build the sum for all candidates with same mode.  */
2047       do
2048 	{
2049 	  sum = build_and_add_sum (vec_type, sum_vec,
2050 				   valid_vecs[i + 1], opcode);
2051 	  if (!useless_type_conversion_p (vec_type,
2052 					  TREE_TYPE (valid_vecs[i + 1])))
2053 	    {
2054 	      /* Update the operands only after build_and_add_sum,
2055 		 so that we don't have to repeat the placement algorithm
2056 		 of build_and_add_sum.  */
2057 	      gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2058 	      tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2059 				 valid_vecs[i + 1]);
2060 	      tree lhs = make_ssa_name (vec_type);
2061 	      gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2062 	      gimple_set_uid (g, gimple_uid (sum));
2063 	      gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2064 	      gimple_assign_set_rhs2 (sum, lhs);
2065 	      if (sum_vec == tvec)
2066 		{
2067 		  vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2068 		  lhs = make_ssa_name (vec_type);
2069 		  g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2070 		  gimple_set_uid (g, gimple_uid (sum));
2071 		  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2072 		  gimple_assign_set_rhs1 (sum, lhs);
2073 		}
2074 	      update_stmt (sum);
2075 	    }
2076 	  sum_vec = gimple_get_lhs (sum);
2077 	  info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2078 	  gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2079 	  /* Update those related ops of current candidate VECTOR.  */
2080 	  FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2081 	    {
2082 	      idx = elem->second;
2083 	      gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2084 	      /* Set this then op definition will get DCEd later.  */
2085 	      gimple_set_visited (def, true);
2086 	      if (opcode == PLUS_EXPR
2087 		  || opcode == BIT_XOR_EXPR
2088 		  || opcode == BIT_IOR_EXPR)
2089 		(*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2090 	      else if (opcode == MULT_EXPR)
2091 		(*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2092 	      else
2093 		{
2094 		  gcc_assert (opcode == BIT_AND_EXPR);
2095 		  (*ops)[idx]->op
2096 		    = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2097 		}
2098 	      (*ops)[idx]->rank = 0;
2099 	    }
2100 	  if (dump_file && (dump_flags & TDF_DETAILS))
2101 	    {
2102 	      fprintf (dump_file, "Generating addition -> ");
2103 	      print_gimple_stmt (dump_file, sum, 0);
2104 	    }
2105 	  i++;
2106 	}
2107       while ((i < valid_vecs.length () - 1)
2108 	     && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2109 
2110       /* Referring to first valid VECTOR with this mode, generate the
2111          BIT_FIELD_REF statements accordingly.  */
2112       info_ptr = *(v_info_map.get (tvec));
2113       gcc_assert (sum);
2114       tree elem_type = TREE_TYPE (vec_type);
2115       FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2116 	{
2117 	  idx = elem->second;
2118 	  tree dst = make_ssa_name (elem_type);
2119 	  tree pos = bitsize_int (elem->first
2120 				  * tree_to_uhwi (TYPE_SIZE (elem_type)));
2121 	  tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2122 			     TYPE_SIZE (elem_type), pos);
2123 	  gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2124 	  insert_stmt_after (gs, sum);
2125 	  gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2126 	  /* Set this then op definition will get DCEd later.  */
2127 	  gimple_set_visited (def, true);
2128 	  (*ops)[idx]->op = gimple_assign_lhs (gs);
2129 	  (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2130 	  if (dump_file && (dump_flags & TDF_DETAILS))
2131 	    {
2132 	      fprintf (dump_file, "Generating bit_field_ref -> ");
2133 	      print_gimple_stmt (dump_file, gs, 0);
2134 	    }
2135 	}
2136     }
2137 
2138   if (dump_file && (dump_flags & TDF_DETAILS))
2139     fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2140 
2141   cleanup_vinfo_map (v_info_map);
2142 
2143   return true;
2144 }
2145 
2146 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2147    expression, examine the other OPS to see if any of them are comparisons
2148    of the same values, which we may be able to combine or eliminate.
2149    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
2150 
2151 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)2152 eliminate_redundant_comparison (enum tree_code opcode,
2153 				vec<operand_entry *> *ops,
2154 				unsigned int currindex,
2155 				operand_entry *curr)
2156 {
2157   tree op1, op2;
2158   enum tree_code lcode, rcode;
2159   gimple *def1, *def2;
2160   int i;
2161   operand_entry *oe;
2162 
2163   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2164     return false;
2165 
2166   /* Check that CURR is a comparison.  */
2167   if (TREE_CODE (curr->op) != SSA_NAME)
2168     return false;
2169   def1 = SSA_NAME_DEF_STMT (curr->op);
2170   if (!is_gimple_assign (def1))
2171     return false;
2172   lcode = gimple_assign_rhs_code (def1);
2173   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2174     return false;
2175   op1 = gimple_assign_rhs1 (def1);
2176   op2 = gimple_assign_rhs2 (def1);
2177 
2178   /* Now look for a similar comparison in the remaining OPS.  */
2179   for (i = currindex + 1; ops->iterate (i, &oe); i++)
2180     {
2181       tree t;
2182 
2183       if (TREE_CODE (oe->op) != SSA_NAME)
2184 	continue;
2185       def2 = SSA_NAME_DEF_STMT (oe->op);
2186       if (!is_gimple_assign (def2))
2187 	continue;
2188       rcode = gimple_assign_rhs_code (def2);
2189       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2190 	continue;
2191 
2192       /* If we got here, we have a match.  See if we can combine the
2193 	 two comparisons.  */
2194       tree type = TREE_TYPE (gimple_assign_lhs (def1));
2195       if (opcode == BIT_IOR_EXPR)
2196 	t = maybe_fold_or_comparisons (type,
2197 				       lcode, op1, op2,
2198 				       rcode, gimple_assign_rhs1 (def2),
2199 				       gimple_assign_rhs2 (def2));
2200       else
2201 	t = maybe_fold_and_comparisons (type,
2202 					lcode, op1, op2,
2203 					rcode, gimple_assign_rhs1 (def2),
2204 					gimple_assign_rhs2 (def2));
2205       if (!t)
2206 	continue;
2207 
2208       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2209 	 always give us a boolean_type_node value back.  If the original
2210 	 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2211 	 we need to convert.  */
2212       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2213 	t = fold_convert (TREE_TYPE (curr->op), t);
2214 
2215       if (TREE_CODE (t) != INTEGER_CST
2216 	  && !operand_equal_p (t, curr->op, 0))
2217 	{
2218 	  enum tree_code subcode;
2219 	  tree newop1, newop2;
2220 	  if (!COMPARISON_CLASS_P (t))
2221 	    continue;
2222 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2223 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
2224 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
2225 	  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2226 	    continue;
2227 	}
2228 
2229       if (dump_file && (dump_flags & TDF_DETAILS))
2230 	{
2231 	  fprintf (dump_file, "Equivalence: ");
2232 	  print_generic_expr (dump_file, curr->op);
2233 	  fprintf (dump_file, " %s ", op_symbol_code (opcode));
2234 	  print_generic_expr (dump_file, oe->op);
2235 	  fprintf (dump_file, " -> ");
2236 	  print_generic_expr (dump_file, t);
2237 	  fprintf (dump_file, "\n");
2238 	}
2239 
2240       /* Now we can delete oe, as it has been subsumed by the new combined
2241          expression t.  */
2242       ops->ordered_remove (i);
2243       reassociate_stats.ops_eliminated ++;
2244 
2245       /* If t is the same as curr->op, we're done.  Otherwise we must
2246 	 replace curr->op with t.  Special case is if we got a constant
2247 	 back, in which case we add it to the end instead of in place of
2248 	 the current entry.  */
2249       if (TREE_CODE (t) == INTEGER_CST)
2250 	{
2251 	  ops->ordered_remove (currindex);
2252 	  add_to_ops_vec (ops, t);
2253 	}
2254       else if (!operand_equal_p (t, curr->op, 0))
2255 	{
2256 	  gimple *sum;
2257 	  enum tree_code subcode;
2258 	  tree newop1;
2259 	  tree newop2;
2260 	  gcc_assert (COMPARISON_CLASS_P (t));
2261 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2262 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
2263 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
2264 	  gcc_checking_assert (is_gimple_val (newop1)
2265 			       && is_gimple_val (newop2));
2266 	  sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2267 	  curr->op = gimple_get_lhs (sum);
2268 	}
2269       return true;
2270     }
2271 
2272   return false;
2273 }
2274 
2275 
2276 /* Transform repeated addition of same values into multiply with
2277    constant.  */
2278 static bool
transform_add_to_multiply(vec<operand_entry * > * ops)2279 transform_add_to_multiply (vec<operand_entry *> *ops)
2280 {
2281   operand_entry *oe;
2282   tree op = NULL_TREE;
2283   int j;
2284   int i, start = -1, end = 0, count = 0;
2285   auto_vec<std::pair <int, int> > indxs;
2286   bool changed = false;
2287 
2288   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2289       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2290 	  || !flag_unsafe_math_optimizations))
2291     return false;
2292 
2293   /* Look for repeated operands.  */
2294   FOR_EACH_VEC_ELT (*ops, i, oe)
2295     {
2296       if (start == -1)
2297 	{
2298 	  count = 1;
2299 	  op = oe->op;
2300 	  start = i;
2301 	}
2302       else if (operand_equal_p (oe->op, op, 0))
2303 	{
2304 	  count++;
2305 	  end = i;
2306 	}
2307       else
2308 	{
2309 	  if (count > 1)
2310 	    indxs.safe_push (std::make_pair (start, end));
2311 	  count = 1;
2312 	  op = oe->op;
2313 	  start = i;
2314 	}
2315     }
2316 
2317   if (count > 1)
2318     indxs.safe_push (std::make_pair (start, end));
2319 
2320   for (j = indxs.length () - 1; j >= 0; --j)
2321     {
2322       /* Convert repeated operand addition to multiplication.  */
2323       start = indxs[j].first;
2324       end = indxs[j].second;
2325       op = (*ops)[start]->op;
2326       count = end - start + 1;
2327       for (i = end; i >= start; --i)
2328 	ops->unordered_remove (i);
2329       tree tmp = make_ssa_name (TREE_TYPE (op));
2330       tree cst = build_int_cst (integer_type_node, count);
2331       gassign *mul_stmt
2332 	= gimple_build_assign (tmp, MULT_EXPR,
2333 			       op, fold_convert (TREE_TYPE (op), cst));
2334       gimple_set_visited (mul_stmt, true);
2335       add_to_ops_vec (ops, tmp, mul_stmt);
2336       changed = true;
2337     }
2338 
2339   return changed;
2340 }
2341 
2342 
2343 /* Perform various identities and other optimizations on the list of
2344    operand entries, stored in OPS.  The tree code for the binary
2345    operation between all the operands is OPCODE.  */
2346 
2347 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry * > * ops)2348 optimize_ops_list (enum tree_code opcode,
2349 		   vec<operand_entry *> *ops)
2350 {
2351   unsigned int length = ops->length ();
2352   unsigned int i;
2353   operand_entry *oe;
2354   operand_entry *oelast = NULL;
2355   bool iterate = false;
2356 
2357   if (length == 1)
2358     return;
2359 
2360   oelast = ops->last ();
2361 
2362   /* If the last two are constants, pop the constants off, merge them
2363      and try the next two.  */
2364   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2365     {
2366       operand_entry *oelm1 = (*ops)[length - 2];
2367 
2368       if (oelm1->rank == 0
2369 	  && is_gimple_min_invariant (oelm1->op)
2370 	  && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2371 				       TREE_TYPE (oelast->op)))
2372 	{
2373 	  tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2374 				     oelm1->op, oelast->op);
2375 
2376 	  if (folded && is_gimple_min_invariant (folded))
2377 	    {
2378 	      if (dump_file && (dump_flags & TDF_DETAILS))
2379 		fprintf (dump_file, "Merging constants\n");
2380 
2381 	      ops->pop ();
2382 	      ops->pop ();
2383 
2384 	      add_to_ops_vec (ops, folded);
2385 	      reassociate_stats.constants_eliminated++;
2386 
2387 	      optimize_ops_list (opcode, ops);
2388 	      return;
2389 	    }
2390 	}
2391     }
2392 
2393   eliminate_using_constants (opcode, ops);
2394   oelast = NULL;
2395 
2396   for (i = 0; ops->iterate (i, &oe);)
2397     {
2398       bool done = false;
2399 
2400       if (eliminate_not_pairs (opcode, ops, i, oe))
2401 	return;
2402       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2403 	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2404 	  || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2405 	{
2406 	  if (done)
2407 	    return;
2408 	  iterate = true;
2409 	  oelast = NULL;
2410 	  continue;
2411 	}
2412       oelast = oe;
2413       i++;
2414     }
2415 
2416   if (iterate)
2417     optimize_ops_list (opcode, ops);
2418 }
2419 
2420 /* The following functions are subroutines to optimize_range_tests and allow
2421    it to try to change a logical combination of comparisons into a range
2422    test.
2423 
2424    For example, both
2425 	X == 2 || X == 5 || X == 3 || X == 4
2426    and
2427 	X >= 2 && X <= 5
2428    are converted to
2429 	(unsigned) (X - 2) <= 3
2430 
2431    For more information see comments above fold_test_range in fold-const.c,
2432    this implementation is for GIMPLE.  */
2433 
2434 
2435 
2436 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP.  */
2437 
2438 void
dump_range_entry(FILE * file,struct range_entry * r,bool skip_exp)2439 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2440 {
2441   if (!skip_exp)
2442     print_generic_expr (file, r->exp);
2443   fprintf (file, " %c[", r->in_p ? '+' : '-');
2444   print_generic_expr (file, r->low);
2445   fputs (", ", file);
2446   print_generic_expr (file, r->high);
2447   fputc (']', file);
2448 }
2449 
2450 /* Dump the range entry R to STDERR.  */
2451 
2452 DEBUG_FUNCTION void
debug_range_entry(struct range_entry * r)2453 debug_range_entry (struct range_entry *r)
2454 {
2455   dump_range_entry (stderr, r, false);
2456   fputc ('\n', stderr);
2457 }
2458 
2459 /* This is similar to make_range in fold-const.c, but on top of
2460    GIMPLE instead of trees.  If EXP is non-NULL, it should be
2461    an SSA_NAME and STMT argument is ignored, otherwise STMT
2462    argument should be a GIMPLE_COND.  */
2463 
2464 void
init_range_entry(struct range_entry * r,tree exp,gimple * stmt)2465 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2466 {
2467   int in_p;
2468   tree low, high;
2469   bool is_bool, strict_overflow_p;
2470 
2471   r->exp = NULL_TREE;
2472   r->in_p = false;
2473   r->strict_overflow_p = false;
2474   r->low = NULL_TREE;
2475   r->high = NULL_TREE;
2476   if (exp != NULL_TREE
2477       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2478     return;
2479 
2480   /* Start with simply saying "EXP != 0" and then look at the code of EXP
2481      and see if we can refine the range.  Some of the cases below may not
2482      happen, but it doesn't seem worth worrying about this.  We "continue"
2483      the outer loop when we've changed something; otherwise we "break"
2484      the switch, which will "break" the while.  */
2485   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2486   high = low;
2487   in_p = 0;
2488   strict_overflow_p = false;
2489   is_bool = false;
2490   if (exp == NULL_TREE)
2491     is_bool = true;
2492   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2493     {
2494       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2495 	is_bool = true;
2496       else
2497 	return;
2498     }
2499   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2500     is_bool = true;
2501 
2502   while (1)
2503     {
2504       enum tree_code code;
2505       tree arg0, arg1, exp_type;
2506       tree nexp;
2507       location_t loc;
2508 
2509       if (exp != NULL_TREE)
2510 	{
2511 	  if (TREE_CODE (exp) != SSA_NAME
2512 	      || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2513 	    break;
2514 
2515 	  stmt = SSA_NAME_DEF_STMT (exp);
2516 	  if (!is_gimple_assign (stmt))
2517 	    break;
2518 
2519 	  code = gimple_assign_rhs_code (stmt);
2520 	  arg0 = gimple_assign_rhs1 (stmt);
2521 	  arg1 = gimple_assign_rhs2 (stmt);
2522 	  exp_type = TREE_TYPE (exp);
2523 	}
2524       else
2525 	{
2526 	  code = gimple_cond_code (stmt);
2527 	  arg0 = gimple_cond_lhs (stmt);
2528 	  arg1 = gimple_cond_rhs (stmt);
2529 	  exp_type = boolean_type_node;
2530 	}
2531 
2532       if (TREE_CODE (arg0) != SSA_NAME
2533 	  || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2534 	break;
2535       loc = gimple_location (stmt);
2536       switch (code)
2537 	{
2538 	case BIT_NOT_EXPR:
2539 	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2540 	      /* Ensure the range is either +[-,0], +[0,0],
2541 		 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2542 		 -[1,1].  If it is e.g. +[-,-] or -[-,-]
2543 		 or similar expression of unconditional true or
2544 		 false, it should not be negated.  */
2545 	      && ((high && integer_zerop (high))
2546 		  || (low && integer_onep (low))))
2547 	    {
2548 	      in_p = !in_p;
2549 	      exp = arg0;
2550 	      continue;
2551 	    }
2552 	  break;
2553 	case SSA_NAME:
2554 	  exp = arg0;
2555 	  continue;
2556 	CASE_CONVERT:
2557 	  if (is_bool)
2558 	    {
2559 	      if ((TYPE_PRECISION (exp_type) == 1
2560 		   || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2561 		  && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2562 		return;
2563 	    }
2564 	  else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2565 	    {
2566 	      if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2567 		is_bool = true;
2568 	      else
2569 		return;
2570 	    }
2571 	  else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2572 	    is_bool = true;
2573 	  goto do_default;
2574 	case EQ_EXPR:
2575 	case NE_EXPR:
2576 	case LT_EXPR:
2577 	case LE_EXPR:
2578 	case GE_EXPR:
2579 	case GT_EXPR:
2580 	  is_bool = true;
2581 	  /* FALLTHRU */
2582 	default:
2583 	  if (!is_bool)
2584 	    return;
2585 	do_default:
2586 	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2587 				  &low, &high, &in_p,
2588 				  &strict_overflow_p);
2589 	  if (nexp != NULL_TREE)
2590 	    {
2591 	      exp = nexp;
2592 	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
2593 	      continue;
2594 	    }
2595 	  break;
2596 	}
2597       break;
2598     }
2599   if (is_bool)
2600     {
2601       r->exp = exp;
2602       r->in_p = in_p;
2603       r->low = low;
2604       r->high = high;
2605       r->strict_overflow_p = strict_overflow_p;
2606     }
2607 }
2608 
2609 /* Comparison function for qsort.  Sort entries
2610    without SSA_NAME exp first, then with SSA_NAMEs sorted
2611    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2612    by increasing ->low and if ->low is the same, by increasing
2613    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2614    maximum.  */
2615 
2616 static int
range_entry_cmp(const void * a,const void * b)2617 range_entry_cmp (const void *a, const void *b)
2618 {
2619   const struct range_entry *p = (const struct range_entry *) a;
2620   const struct range_entry *q = (const struct range_entry *) b;
2621 
2622   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2623     {
2624       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2625 	{
2626 	  /* Group range_entries for the same SSA_NAME together.  */
2627 	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2628 	    return -1;
2629 	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2630 	    return 1;
2631 	  /* If ->low is different, NULL low goes first, then by
2632 	     ascending low.  */
2633 	  if (p->low != NULL_TREE)
2634 	    {
2635 	      if (q->low != NULL_TREE)
2636 		{
2637 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2638 					  p->low, q->low);
2639 		  if (tem && integer_onep (tem))
2640 		    return -1;
2641 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2642 				     p->low, q->low);
2643 		  if (tem && integer_onep (tem))
2644 		    return 1;
2645 		}
2646 	      else
2647 		return 1;
2648 	    }
2649 	  else if (q->low != NULL_TREE)
2650 	    return -1;
2651 	  /* If ->high is different, NULL high goes last, before that by
2652 	     ascending high.  */
2653 	  if (p->high != NULL_TREE)
2654 	    {
2655 	      if (q->high != NULL_TREE)
2656 		{
2657 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2658 					  p->high, q->high);
2659 		  if (tem && integer_onep (tem))
2660 		    return -1;
2661 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2662 				     p->high, q->high);
2663 		  if (tem && integer_onep (tem))
2664 		    return 1;
2665 		}
2666 	      else
2667 		return -1;
2668 	    }
2669 	  else if (q->high != NULL_TREE)
2670 	    return 1;
2671 	  /* If both ranges are the same, sort below by ascending idx.  */
2672 	}
2673       else
2674 	return 1;
2675     }
2676   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2677     return -1;
2678 
2679   if (p->idx < q->idx)
2680     return -1;
2681   else
2682     {
2683       gcc_checking_assert (p->idx > q->idx);
2684       return 1;
2685     }
2686 }
2687 
2688 /* Helper function for update_range_test.  Force EXPR into an SSA_NAME,
2689    insert needed statements BEFORE or after GSI.  */
2690 
2691 static tree
force_into_ssa_name(gimple_stmt_iterator * gsi,tree expr,bool before)2692 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2693 {
2694   enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2695   tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2696   if (TREE_CODE (ret) != SSA_NAME)
2697     {
2698       gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2699       if (before)
2700 	gsi_insert_before (gsi, g, GSI_SAME_STMT);
2701       else
2702 	gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2703       ret = gimple_assign_lhs (g);
2704     }
2705   return ret;
2706 }
2707 
2708 /* Helper routine of optimize_range_test.
2709    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2710    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2711    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2712    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2713    an array of COUNT pointers to other ranges.  Return
2714    true if the range merge has been successful.
2715    If OPCODE is ERROR_MARK, this is called from within
2716    maybe_optimize_range_tests and is performing inter-bb range optimization.
2717    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2718    oe->rank.  */
2719 
2720 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,struct range_entry ** otherrangep,unsigned int count,enum tree_code opcode,vec<operand_entry * > * ops,tree exp,gimple_seq seq,bool in_p,tree low,tree high,bool strict_overflow_p)2721 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2722 		   struct range_entry **otherrangep,
2723 		   unsigned int count, enum tree_code opcode,
2724 		   vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2725 		   bool in_p, tree low, tree high, bool strict_overflow_p)
2726 {
2727   operand_entry *oe = (*ops)[range->idx];
2728   tree op = oe->op;
2729   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2730 		    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2731   location_t loc = gimple_location (stmt);
2732   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2733   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2734 				in_p, low, high);
2735   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2736   gimple_stmt_iterator gsi;
2737   unsigned int i, uid;
2738 
2739   if (tem == NULL_TREE)
2740     return false;
2741 
2742   /* If op is default def SSA_NAME, there is no place to insert the
2743      new comparison.  Give up, unless we can use OP itself as the
2744      range test.  */
2745   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2746     {
2747       if (op == range->exp
2748 	  && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2749 	      || TREE_CODE (optype) == BOOLEAN_TYPE)
2750 	  && (op == tem
2751 	      || (TREE_CODE (tem) == EQ_EXPR
2752 		  && TREE_OPERAND (tem, 0) == op
2753 		  && integer_onep (TREE_OPERAND (tem, 1))))
2754 	  && opcode != BIT_IOR_EXPR
2755 	  && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2756 	{
2757 	  stmt = NULL;
2758 	  tem = op;
2759 	}
2760       else
2761 	return false;
2762     }
2763 
2764   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2765     warning_at (loc, OPT_Wstrict_overflow,
2766 		"assuming signed overflow does not occur "
2767 		"when simplifying range test");
2768 
2769   if (dump_file && (dump_flags & TDF_DETAILS))
2770     {
2771       struct range_entry *r;
2772       fprintf (dump_file, "Optimizing range tests ");
2773       dump_range_entry (dump_file, range, false);
2774       for (i = 0; i < count; i++)
2775 	{
2776 	  if (otherrange)
2777 	    r = otherrange + i;
2778 	  else
2779 	    r = otherrangep[i];
2780 	  if (r->exp
2781 	      && r->exp != range->exp
2782 	      && TREE_CODE (r->exp) == SSA_NAME)
2783 	    {
2784 	      fprintf (dump_file, " and ");
2785 	      dump_range_entry (dump_file, r, false);
2786 	    }
2787 	  else
2788 	    {
2789 	      fprintf (dump_file, " and");
2790 	      dump_range_entry (dump_file, r, true);
2791 	    }
2792 	}
2793       fprintf (dump_file, "\n into ");
2794       print_generic_expr (dump_file, tem);
2795       fprintf (dump_file, "\n");
2796     }
2797 
2798   if (opcode == BIT_IOR_EXPR
2799       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2800     tem = invert_truthvalue_loc (loc, tem);
2801 
2802   tem = fold_convert_loc (loc, optype, tem);
2803   if (stmt)
2804     {
2805       gsi = gsi_for_stmt (stmt);
2806       uid = gimple_uid (stmt);
2807     }
2808   else
2809     {
2810       gsi = gsi_none ();
2811       uid = 0;
2812     }
2813   if (stmt == NULL)
2814     gcc_checking_assert (tem == op);
2815   /* In rare cases range->exp can be equal to lhs of stmt.
2816      In that case we have to insert after the stmt rather then before
2817      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2818   else if (op != range->exp)
2819     {
2820       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2821       tem = force_into_ssa_name (&gsi, tem, true);
2822       gsi_prev (&gsi);
2823     }
2824   else if (gimple_code (stmt) != GIMPLE_PHI)
2825     {
2826       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2827       tem = force_into_ssa_name (&gsi, tem, false);
2828     }
2829   else
2830     {
2831       gsi = gsi_after_labels (gimple_bb (stmt));
2832       if (!gsi_end_p (gsi))
2833 	uid = gimple_uid (gsi_stmt (gsi));
2834       else
2835 	{
2836 	  gsi = gsi_start_bb (gimple_bb (stmt));
2837 	  uid = 1;
2838 	  while (!gsi_end_p (gsi))
2839 	    {
2840 	      uid = gimple_uid (gsi_stmt (gsi));
2841 	      gsi_next (&gsi);
2842 	    }
2843 	}
2844       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2845       tem = force_into_ssa_name (&gsi, tem, true);
2846       if (gsi_end_p (gsi))
2847 	gsi = gsi_last_bb (gimple_bb (stmt));
2848       else
2849 	gsi_prev (&gsi);
2850     }
2851   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2852     if (gimple_uid (gsi_stmt (gsi)))
2853       break;
2854     else
2855       gimple_set_uid (gsi_stmt (gsi), uid);
2856 
2857   oe->op = tem;
2858   range->exp = exp;
2859   range->low = low;
2860   range->high = high;
2861   range->in_p = in_p;
2862   range->strict_overflow_p = false;
2863 
2864   for (i = 0; i < count; i++)
2865     {
2866       if (otherrange)
2867 	range = otherrange + i;
2868       else
2869 	range = otherrangep[i];
2870       oe = (*ops)[range->idx];
2871       /* Now change all the other range test immediate uses, so that
2872 	 those tests will be optimized away.  */
2873       if (opcode == ERROR_MARK)
2874 	{
2875 	  if (oe->op)
2876 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
2877 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
2878 	  else
2879 	    oe->op = (oe->rank == BIT_IOR_EXPR
2880 		      ? boolean_false_node : boolean_true_node);
2881 	}
2882       else
2883 	oe->op = error_mark_node;
2884       range->exp = NULL_TREE;
2885       range->low = NULL_TREE;
2886       range->high = NULL_TREE;
2887     }
2888   return true;
2889 }
2890 
2891 /* Optimize X == CST1 || X == CST2
2892    if popcount (CST1 ^ CST2) == 1 into
2893    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2894    Similarly for ranges.  E.g.
2895    X != 2 && X != 3 && X != 10 && X != 11
2896    will be transformed by the previous optimization into
2897    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2898    and this loop can transform that into
2899    !(((X & ~8) - 2U) <= 1U).  */
2900 
2901 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2902 optimize_range_tests_xor (enum tree_code opcode, tree type,
2903 			  tree lowi, tree lowj, tree highi, tree highj,
2904 			  vec<operand_entry *> *ops,
2905 			  struct range_entry *rangei,
2906 			  struct range_entry *rangej)
2907 {
2908   tree lowxor, highxor, tem, exp;
2909   /* Check lowi ^ lowj == highi ^ highj and
2910      popcount (lowi ^ lowj) == 1.  */
2911   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2912   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2913     return false;
2914   if (!integer_pow2p (lowxor))
2915     return false;
2916   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2917   if (!tree_int_cst_equal (lowxor, highxor))
2918     return false;
2919 
2920   exp = rangei->exp;
2921   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2922   int prec = GET_MODE_PRECISION (mode);
2923   if (TYPE_PRECISION (type) < prec
2924       || (wi::to_wide (TYPE_MIN_VALUE (type))
2925 	  != wi::min_value (prec, TYPE_SIGN (type)))
2926       || (wi::to_wide (TYPE_MAX_VALUE (type))
2927 	  != wi::max_value (prec, TYPE_SIGN (type))))
2928     {
2929       type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2930       exp = fold_convert (type, exp);
2931       lowxor = fold_convert (type, lowxor);
2932       lowi = fold_convert (type, lowi);
2933       highi = fold_convert (type, highi);
2934     }
2935   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2936   exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2937   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2938   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2939   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2940 			 NULL, rangei->in_p, lowj, highj,
2941 			 rangei->strict_overflow_p
2942 			 || rangej->strict_overflow_p))
2943     return true;
2944   return false;
2945 }
2946 
2947 /* Optimize X == CST1 || X == CST2
2948    if popcount (CST2 - CST1) == 1 into
2949    ((X - CST1) & ~(CST2 - CST1)) == 0.
2950    Similarly for ranges.  E.g.
2951    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2952    || X == 75 || X == 45
2953    will be transformed by the previous optimization into
2954    (X - 43U) <= 3U || (X - 75U) <= 3U
2955    and this loop can transform that into
2956    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2957 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2958 optimize_range_tests_diff (enum tree_code opcode, tree type,
2959 			   tree lowi, tree lowj, tree highi, tree highj,
2960 			   vec<operand_entry *> *ops,
2961 			   struct range_entry *rangei,
2962 			   struct range_entry *rangej)
2963 {
2964   tree tem1, tem2, mask;
2965   /* Check highi - lowi == highj - lowj.  */
2966   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2967   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2968     return false;
2969   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2970   if (!tree_int_cst_equal (tem1, tem2))
2971     return false;
2972   /* Check popcount (lowj - lowi) == 1.  */
2973   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2974   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2975     return false;
2976   if (!integer_pow2p (tem1))
2977     return false;
2978 
2979   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2980   int prec = GET_MODE_PRECISION (mode);
2981   if (TYPE_PRECISION (type) < prec
2982       || (wi::to_wide (TYPE_MIN_VALUE (type))
2983 	  != wi::min_value (prec, TYPE_SIGN (type)))
2984       || (wi::to_wide (TYPE_MAX_VALUE (type))
2985 	  != wi::max_value (prec, TYPE_SIGN (type))))
2986     type = build_nonstandard_integer_type (prec, 1);
2987   else
2988     type = unsigned_type_for (type);
2989   tem1 = fold_convert (type, tem1);
2990   tem2 = fold_convert (type, tem2);
2991   lowi = fold_convert (type, lowi);
2992   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2993   tem1 = fold_build2 (MINUS_EXPR, type,
2994 		      fold_convert (type, rangei->exp), lowi);
2995   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2996   lowj = build_int_cst (type, 0);
2997   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2998 			 NULL, rangei->in_p, lowj, tem2,
2999 			 rangei->strict_overflow_p
3000 			 || rangej->strict_overflow_p))
3001     return true;
3002   return false;
3003 }
3004 
3005 /* It does some common checks for function optimize_range_tests_xor and
3006    optimize_range_tests_diff.
3007    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3008    Else it calls optimize_range_tests_diff.  */
3009 
3010 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry * > * ops,struct range_entry * ranges)3011 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3012 			bool optimize_xor, vec<operand_entry *> *ops,
3013 			struct range_entry *ranges)
3014 {
3015   int i, j;
3016   bool any_changes = false;
3017   for (i = first; i < length; i++)
3018     {
3019       tree lowi, highi, lowj, highj, type, tem;
3020 
3021       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3022 	continue;
3023       type = TREE_TYPE (ranges[i].exp);
3024       if (!INTEGRAL_TYPE_P (type))
3025 	continue;
3026       lowi = ranges[i].low;
3027       if (lowi == NULL_TREE)
3028 	lowi = TYPE_MIN_VALUE (type);
3029       highi = ranges[i].high;
3030       if (highi == NULL_TREE)
3031 	continue;
3032       for (j = i + 1; j < length && j < i + 64; j++)
3033 	{
3034 	  bool changes;
3035 	  if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3036 	    continue;
3037 	  lowj = ranges[j].low;
3038 	  if (lowj == NULL_TREE)
3039 	    continue;
3040 	  highj = ranges[j].high;
3041 	  if (highj == NULL_TREE)
3042 	    highj = TYPE_MAX_VALUE (type);
3043 	  /* Check lowj > highi.  */
3044 	  tem = fold_binary (GT_EXPR, boolean_type_node,
3045 			     lowj, highi);
3046 	  if (tem == NULL_TREE || !integer_onep (tem))
3047 	    continue;
3048 	  if (optimize_xor)
3049 	    changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3050 						highi, highj, ops,
3051 						ranges + i, ranges + j);
3052 	  else
3053 	    changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3054 						 highi, highj, ops,
3055 						 ranges + i, ranges + j);
3056 	  if (changes)
3057 	    {
3058 	      any_changes = true;
3059 	      break;
3060 	    }
3061 	}
3062     }
3063   return any_changes;
3064 }
3065 
3066 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
3067    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3068    EXP on success, NULL otherwise.  */
3069 
3070 static tree
extract_bit_test_mask(tree exp,int prec,tree totallow,tree low,tree high,wide_int * mask,tree * totallowp)3071 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3072 		       wide_int *mask, tree *totallowp)
3073 {
3074   tree tem = int_const_binop (MINUS_EXPR, high, low);
3075   if (tem == NULL_TREE
3076       || TREE_CODE (tem) != INTEGER_CST
3077       || TREE_OVERFLOW (tem)
3078       || tree_int_cst_sgn (tem) == -1
3079       || compare_tree_int (tem, prec) != -1)
3080     return NULL_TREE;
3081 
3082   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3083   *mask = wi::shifted_mask (0, max, false, prec);
3084   if (TREE_CODE (exp) == BIT_AND_EXPR
3085       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3086     {
3087       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3088       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3089       if (wi::popcount (msk) == 1
3090 	  && wi::ltu_p (msk, prec - max))
3091 	{
3092 	  *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3093 	  max += msk.to_uhwi ();
3094 	  exp = TREE_OPERAND (exp, 0);
3095 	  if (integer_zerop (low)
3096 	      && TREE_CODE (exp) == PLUS_EXPR
3097 	      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3098 	    {
3099 	      tree ret = TREE_OPERAND (exp, 0);
3100 	      STRIP_NOPS (ret);
3101 	      widest_int bias
3102 	        = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3103 				     TYPE_PRECISION (TREE_TYPE (low))));
3104 	      tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3105 	      if (totallowp)
3106 		{
3107 		  *totallowp = tbias;
3108 		  return ret;
3109 		}
3110 	      else if (!tree_int_cst_lt (totallow, tbias))
3111 		return NULL_TREE;
3112 	      bias = wi::to_widest (tbias);
3113 	      bias -= wi::to_widest (totallow);
3114 	      if (bias >= 0 && bias < prec - max)
3115 		{
3116 		  *mask = wi::lshift (*mask, bias);
3117 		  return ret;
3118 		}
3119 	    }
3120 	}
3121     }
3122   if (totallowp)
3123     return exp;
3124   if (!tree_int_cst_lt (totallow, low))
3125     return exp;
3126   tem = int_const_binop (MINUS_EXPR, low, totallow);
3127   if (tem == NULL_TREE
3128       || TREE_CODE (tem) != INTEGER_CST
3129       || TREE_OVERFLOW (tem)
3130       || compare_tree_int (tem, prec - max) == 1)
3131     return NULL_TREE;
3132 
3133   *mask = wi::lshift (*mask, wi::to_widest (tem));
3134   return exp;
3135 }
3136 
3137 /* Attempt to optimize small range tests using bit test.
3138    E.g.
3139    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3140    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3141    has been by earlier optimizations optimized into:
3142    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3143    As all the 43 through 82 range is less than 64 numbers,
3144    for 64-bit word targets optimize that into:
3145    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
3146 
3147 static bool
optimize_range_tests_to_bit_test(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3148 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3149 				  vec<operand_entry *> *ops,
3150 				  struct range_entry *ranges)
3151 {
3152   int i, j;
3153   bool any_changes = false;
3154   int prec = GET_MODE_BITSIZE (word_mode);
3155   auto_vec<struct range_entry *, 64> candidates;
3156 
3157   for (i = first; i < length - 1; i++)
3158     {
3159       tree lowi, highi, lowj, highj, type;
3160 
3161       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3162 	continue;
3163       type = TREE_TYPE (ranges[i].exp);
3164       if (!INTEGRAL_TYPE_P (type))
3165 	continue;
3166       lowi = ranges[i].low;
3167       if (lowi == NULL_TREE)
3168 	lowi = TYPE_MIN_VALUE (type);
3169       highi = ranges[i].high;
3170       if (highi == NULL_TREE)
3171 	continue;
3172       wide_int mask;
3173       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3174 					highi, &mask, &lowi);
3175       if (exp == NULL_TREE)
3176 	continue;
3177       bool strict_overflow_p = ranges[i].strict_overflow_p;
3178       candidates.truncate (0);
3179       int end = MIN (i + 64, length);
3180       for (j = i + 1; j < end; j++)
3181 	{
3182 	  tree exp2;
3183 	  if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3184 	    continue;
3185 	  if (ranges[j].exp == exp)
3186 	    ;
3187 	  else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3188 	    {
3189 	      exp2 = TREE_OPERAND (ranges[j].exp, 0);
3190 	      if (exp2 == exp)
3191 		;
3192 	      else if (TREE_CODE (exp2) == PLUS_EXPR)
3193 		{
3194 		  exp2 = TREE_OPERAND (exp2, 0);
3195 		  STRIP_NOPS (exp2);
3196 		  if (exp2 != exp)
3197 		    continue;
3198 		}
3199 	      else
3200 		continue;
3201 	    }
3202 	  else
3203 	    continue;
3204 	  lowj = ranges[j].low;
3205 	  if (lowj == NULL_TREE)
3206 	    continue;
3207 	  highj = ranges[j].high;
3208 	  if (highj == NULL_TREE)
3209 	    highj = TYPE_MAX_VALUE (type);
3210 	  wide_int mask2;
3211 	  exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3212 					highj, &mask2, NULL);
3213 	  if (exp2 != exp)
3214 	    continue;
3215 	  mask |= mask2;
3216 	  strict_overflow_p |= ranges[j].strict_overflow_p;
3217 	  candidates.safe_push (&ranges[j]);
3218 	}
3219 
3220       /* If every possible relative value of the expression is a valid shift
3221 	 amount, then we can merge the entry test in the bit test.  In this
3222 	 case, if we would need otherwise 2 or more comparisons, then use
3223 	 the bit test; in the other cases, the threshold is 3 comparisons.  */
3224       wide_int min, max;
3225       bool entry_test_needed;
3226       if (TREE_CODE (exp) == SSA_NAME
3227 	  && get_range_info (exp, &min, &max) == VR_RANGE
3228 	  && wi::leu_p (max - min, prec - 1))
3229 	{
3230 	  wide_int ilowi = wi::to_wide (lowi);
3231 	  if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3232 	    {
3233 	      lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3234 	      mask = wi::lshift (mask, ilowi - min);
3235 	    }
3236 	  else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3237 	    {
3238 	      lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3239 	      mask = wi::lrshift (mask, min - ilowi);
3240 	    }
3241 	  entry_test_needed = false;
3242 	}
3243       else
3244 	entry_test_needed = true;
3245       if (candidates.length () >= (entry_test_needed ? 2 : 1))
3246 	{
3247 	  tree high = wide_int_to_tree (TREE_TYPE (lowi),
3248 					wi::to_widest (lowi)
3249 					+ prec - 1 - wi::clz (mask));
3250 	  operand_entry *oe = (*ops)[ranges[i].idx];
3251 	  tree op = oe->op;
3252 	  gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3253 			    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3254 	  location_t loc = gimple_location (stmt);
3255 	  tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3256 
3257 	  /* See if it isn't cheaper to pretend the minimum value of the
3258 	     range is 0, if maximum value is small enough.
3259 	     We can avoid then subtraction of the minimum value, but the
3260 	     mask constant could be perhaps more expensive.  */
3261 	  if (compare_tree_int (lowi, 0) > 0
3262 	      && compare_tree_int (high, prec) < 0)
3263 	    {
3264 	      int cost_diff;
3265 	      HOST_WIDE_INT m = tree_to_uhwi (lowi);
3266 	      rtx reg = gen_raw_REG (word_mode, 10000);
3267 	      bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3268 	      cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3269 						      GEN_INT (-m)),
3270 					word_mode, speed_p);
3271 	      rtx r = immed_wide_int_const (mask, word_mode);
3272 	      cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3273 					 word_mode, speed_p);
3274 	      r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3275 	      cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3276 					 word_mode, speed_p);
3277 	      if (cost_diff > 0)
3278 		{
3279 		  mask = wi::lshift (mask, m);
3280 		  lowi = build_zero_cst (TREE_TYPE (lowi));
3281 		}
3282 	    }
3283 
3284 	  tree tem;
3285 	  if (entry_test_needed)
3286 	    {
3287 	      tem = build_range_check (loc, optype, unshare_expr (exp),
3288 				       false, lowi, high);
3289 	      if (tem == NULL_TREE || is_gimple_val (tem))
3290 		continue;
3291 	    }
3292 	  else
3293 	    tem = NULL_TREE;
3294 	  tree etype = unsigned_type_for (TREE_TYPE (exp));
3295 	  exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3296 				 fold_convert_loc (loc, etype, exp),
3297 				 fold_convert_loc (loc, etype, lowi));
3298 	  exp = fold_convert_loc (loc, integer_type_node, exp);
3299 	  tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3300 	  exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3301 				 build_int_cst (word_type, 1), exp);
3302 	  exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3303 				 wide_int_to_tree (word_type, mask));
3304 	  exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3305 				 build_zero_cst (word_type));
3306 	  if (is_gimple_val (exp))
3307 	    continue;
3308 
3309 	  /* The shift might have undefined behavior if TEM is true,
3310 	     but reassociate_bb isn't prepared to have basic blocks
3311 	     split when it is running.  So, temporarily emit a code
3312 	     with BIT_IOR_EXPR instead of &&, and fix it up in
3313 	     branch_fixup.  */
3314 	  gimple_seq seq = NULL;
3315 	  if (tem)
3316 	    {
3317 	      tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3318 	      gcc_assert (TREE_CODE (tem) == SSA_NAME);
3319 	      gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3320 	    }
3321 	  gimple_seq seq2;
3322 	  exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3323 	  gimple_seq_add_seq_without_update (&seq, seq2);
3324 	  gcc_assert (TREE_CODE (exp) == SSA_NAME);
3325 	  gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3326 	  if (tem)
3327 	    {
3328 	      gimple *g = gimple_build_assign (make_ssa_name (optype),
3329 					       BIT_IOR_EXPR, tem, exp);
3330 	      gimple_set_location (g, loc);
3331 	      gimple_seq_add_stmt_without_update (&seq, g);
3332 	      exp = gimple_assign_lhs (g);
3333 	    }
3334 	  tree val = build_zero_cst (optype);
3335 	  if (update_range_test (&ranges[i], NULL, candidates.address (),
3336 				 candidates.length (), opcode, ops, exp,
3337 				 seq, false, val, val, strict_overflow_p))
3338 	    {
3339 	      any_changes = true;
3340 	      if (tem)
3341 		reassoc_branch_fixups.safe_push (tem);
3342 	    }
3343 	  else
3344 	    gimple_seq_discard (seq);
3345 	}
3346     }
3347   return any_changes;
3348 }
3349 
3350 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3351    and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3352    Also, handle x < C && y < C && z < C where C is power of two as
3353    (x | y | z) < C.  And also handle signed x < 0 && y < 0 && z < 0
3354    as (x | y | z) < 0.  */
3355 
3356 static bool
optimize_range_tests_cmp_bitwise(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3357 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3358 				  vec<operand_entry *> *ops,
3359 				  struct range_entry *ranges)
3360 {
3361   int i;
3362   unsigned int b;
3363   bool any_changes = false;
3364   auto_vec<int, 128> buckets;
3365   auto_vec<int, 32> chains;
3366   auto_vec<struct range_entry *, 32> candidates;
3367 
3368   for (i = first; i < length; i++)
3369     {
3370       int idx;
3371 
3372       if (ranges[i].exp == NULL_TREE
3373 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
3374 	  || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3375 	  || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3376 	continue;
3377 
3378       if (ranges[i].low != NULL_TREE
3379 	  && ranges[i].high != NULL_TREE
3380 	  && ranges[i].in_p
3381 	  && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3382 	{
3383 	  idx = !integer_zerop (ranges[i].low);
3384 	  if (idx && !integer_all_onesp (ranges[i].low))
3385 	    continue;
3386 	}
3387       else if (ranges[i].high != NULL_TREE
3388 	       && TREE_CODE (ranges[i].high) == INTEGER_CST
3389 	       && ranges[i].in_p)
3390 	{
3391 	  wide_int w = wi::to_wide (ranges[i].high);
3392 	  int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3393 	  int l = wi::clz (w);
3394 	  idx = 2;
3395 	  if (l <= 0
3396 	      || l >= prec
3397 	      || w != wi::mask (prec - l, false, prec))
3398 	    continue;
3399 	  if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3400 		 && ranges[i].low == NULL_TREE)
3401 		|| (ranges[i].low
3402 		    && integer_zerop (ranges[i].low))))
3403 	    continue;
3404 	}
3405       else if (ranges[i].high == NULL_TREE
3406 	       && ranges[i].low != NULL_TREE
3407 	       /* Perform this optimization only in the last
3408 		  reassoc pass, as it interferes with the reassociation
3409 		  itself or could also with VRP etc. which might not
3410 		  be able to virtually undo the optimization.  */
3411 	       && !reassoc_insert_powi_p
3412 	       && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3413 	       && integer_zerop (ranges[i].low))
3414 	idx = 3;
3415       else
3416 	continue;
3417 
3418       b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3419       if (buckets.length () <= b)
3420 	buckets.safe_grow_cleared (b + 1, true);
3421       if (chains.length () <= (unsigned) i)
3422 	chains.safe_grow (i + 1, true);
3423       chains[i] = buckets[b];
3424       buckets[b] = i + 1;
3425     }
3426 
3427   FOR_EACH_VEC_ELT (buckets, b, i)
3428     if (i && chains[i - 1])
3429       {
3430 	int j, k = i;
3431 	if ((b % 4) == 2)
3432 	  {
3433 	    /* When ranges[X - 1].high + 1 is a power of two,
3434 	       we need to process the same bucket up to
3435 	       precision - 1 times, each time split the entries
3436 	       with the same high bound into one chain and the
3437 	       rest into another one to be processed later.  */
3438 	    int this_prev = i;
3439 	    int other_prev = 0;
3440 	    for (j = chains[i - 1]; j; j = chains[j - 1])
3441 	      {
3442 		if (tree_int_cst_equal (ranges[i - 1].high,
3443 					ranges[j - 1].high))
3444 		  {
3445 		    chains[this_prev - 1] = j;
3446 		    this_prev = j;
3447 		  }
3448 		else if (other_prev == 0)
3449 		  {
3450 		    buckets[b] = j;
3451 		    other_prev = j;
3452 		  }
3453 		else
3454 		  {
3455 		    chains[other_prev - 1] = j;
3456 		    other_prev = j;
3457 		  }
3458 	      }
3459 	    chains[this_prev - 1] = 0;
3460 	    if (other_prev)
3461 	      chains[other_prev - 1] = 0;
3462 	    if (chains[i - 1] == 0)
3463 	      {
3464 		if (other_prev)
3465 		  b--;
3466 		continue;
3467 	      }
3468 	  }
3469 	for (j = chains[i - 1]; j; j = chains[j - 1])
3470 	  {
3471 	    gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3472 	    gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3473 	    if (reassoc_stmt_dominates_stmt_p (gk, gj))
3474 	      k = j;
3475 	  }
3476 	tree type1 = TREE_TYPE (ranges[k - 1].exp);
3477 	tree type2 = NULL_TREE;
3478 	bool strict_overflow_p = false;
3479 	candidates.truncate (0);
3480 	for (j = i; j; j = chains[j - 1])
3481 	  {
3482 	    tree type = TREE_TYPE (ranges[j - 1].exp);
3483 	    strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3484 	    if ((b % 4) == 3)
3485 	      {
3486 		/* For the signed < 0 cases, the types should be
3487 		   really compatible (all signed with the same precision,
3488 		   instead put ranges that have different in_p from
3489 		   k first.  */
3490 		if (!useless_type_conversion_p (type1, type))
3491 		  continue;
3492 		if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3493 		  candidates.safe_push (&ranges[j - 1]);
3494 		type2 = type1;
3495 		continue;
3496 	      }
3497 	    if (j == k
3498 		|| useless_type_conversion_p (type1, type))
3499 	      ;
3500 	    else if (type2 == NULL_TREE
3501 		     || useless_type_conversion_p (type2, type))
3502 	      {
3503 		if (type2 == NULL_TREE)
3504 		  type2 = type;
3505 		candidates.safe_push (&ranges[j - 1]);
3506 	      }
3507 	  }
3508 	unsigned l = candidates.length ();
3509 	for (j = i; j; j = chains[j - 1])
3510 	  {
3511 	    tree type = TREE_TYPE (ranges[j - 1].exp);
3512 	    if (j == k)
3513 	      continue;
3514 	    if ((b % 4) == 3)
3515 	      {
3516 		if (!useless_type_conversion_p (type1, type))
3517 		  continue;
3518 		if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3519 		  candidates.safe_push (&ranges[j - 1]);
3520 		continue;
3521 	      }
3522 	    if (useless_type_conversion_p (type1, type))
3523 	      ;
3524 	    else if (type2 == NULL_TREE
3525 		     || useless_type_conversion_p (type2, type))
3526 	      continue;
3527 	    candidates.safe_push (&ranges[j - 1]);
3528 	  }
3529 	gimple_seq seq = NULL;
3530 	tree op = NULL_TREE;
3531 	unsigned int id;
3532 	struct range_entry *r;
3533 	candidates.safe_push (&ranges[k - 1]);
3534 	FOR_EACH_VEC_ELT (candidates, id, r)
3535 	  {
3536 	    gimple *g;
3537 	    enum tree_code code;
3538 	    if (id == 0)
3539 	      {
3540 		op = r->exp;
3541 		continue;
3542 	      }
3543 	    if (id == l)
3544 	      {
3545 		code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3546 		g = gimple_build_assign (make_ssa_name (type1), code, op);
3547 		gimple_seq_add_stmt_without_update (&seq, g);
3548 		op = gimple_assign_lhs (g);
3549 	      }
3550 	    tree type = TREE_TYPE (r->exp);
3551 	    tree exp = r->exp;
3552 	    if (id >= l && !useless_type_conversion_p (type1, type))
3553 	      {
3554 		g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3555 		gimple_seq_add_stmt_without_update (&seq, g);
3556 		exp = gimple_assign_lhs (g);
3557 	      }
3558 	    if ((b % 4) == 3)
3559 	      code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3560 	    else
3561 	      code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3562 	    g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3563 				     code, op, exp);
3564 	    gimple_seq_add_stmt_without_update (&seq, g);
3565 	    op = gimple_assign_lhs (g);
3566 	  }
3567 	candidates.pop ();
3568 	if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3569 			       candidates.length (), opcode, ops, op,
3570 			       seq, ranges[k - 1].in_p, ranges[k - 1].low,
3571 			       ranges[k - 1].high, strict_overflow_p))
3572 	  any_changes = true;
3573 	else
3574 	  gimple_seq_discard (seq);
3575 	if ((b % 4) == 2 && buckets[b] != i)
3576 	  /* There is more work to do for this bucket.  */
3577 	  b--;
3578       }
3579 
3580   return any_changes;
3581 }
3582 
3583 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3584    a >= 0 && a < b into (unsigned) a < (unsigned) b
3585    a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
3586 
3587 static bool
optimize_range_tests_var_bound(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges,basic_block first_bb)3588 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3589 				vec<operand_entry *> *ops,
3590 				struct range_entry *ranges,
3591 				basic_block first_bb)
3592 {
3593   int i;
3594   bool any_changes = false;
3595   hash_map<tree, int> *map = NULL;
3596 
3597   for (i = first; i < length; i++)
3598     {
3599       if (ranges[i].exp == NULL_TREE
3600 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
3601 	  || !ranges[i].in_p)
3602 	continue;
3603 
3604       tree type = TREE_TYPE (ranges[i].exp);
3605       if (!INTEGRAL_TYPE_P (type)
3606 	  || TYPE_UNSIGNED (type)
3607 	  || ranges[i].low == NULL_TREE
3608 	  || !integer_zerop (ranges[i].low)
3609 	  || ranges[i].high != NULL_TREE)
3610 	continue;
3611       /* EXP >= 0 here.  */
3612       if (map == NULL)
3613 	map = new hash_map <tree, int>;
3614       map->put (ranges[i].exp, i);
3615     }
3616 
3617   if (map == NULL)
3618     return false;
3619 
3620   for (i = 0; i < length; i++)
3621     {
3622       bool in_p = ranges[i].in_p;
3623       if (ranges[i].low == NULL_TREE
3624 	  || ranges[i].high == NULL_TREE)
3625 	continue;
3626       if (!integer_zerop (ranges[i].low)
3627 	  || !integer_zerop (ranges[i].high))
3628 	{
3629 	  if (ranges[i].exp
3630 	      && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3631 	      && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3632 	      && integer_onep (ranges[i].low)
3633 	      && integer_onep (ranges[i].high))
3634 	    in_p = !in_p;
3635 	  else
3636 	    continue;
3637 	}
3638 
3639       gimple *stmt;
3640       tree_code ccode;
3641       tree rhs1, rhs2;
3642       if (ranges[i].exp)
3643 	{
3644 	  if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3645 	    continue;
3646 	  stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3647 	  if (!is_gimple_assign (stmt))
3648 	    continue;
3649 	  ccode = gimple_assign_rhs_code (stmt);
3650 	  rhs1 = gimple_assign_rhs1 (stmt);
3651 	  rhs2 = gimple_assign_rhs2 (stmt);
3652 	}
3653       else
3654 	{
3655 	  operand_entry *oe = (*ops)[ranges[i].idx];
3656 	  stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3657 	  if (gimple_code (stmt) != GIMPLE_COND)
3658 	    continue;
3659 	  ccode = gimple_cond_code (stmt);
3660 	  rhs1 = gimple_cond_lhs (stmt);
3661 	  rhs2 = gimple_cond_rhs (stmt);
3662 	}
3663 
3664       if (TREE_CODE (rhs1) != SSA_NAME
3665 	  || rhs2 == NULL_TREE
3666 	  || TREE_CODE (rhs2) != SSA_NAME)
3667 	continue;
3668 
3669       switch (ccode)
3670 	{
3671 	case GT_EXPR:
3672 	case GE_EXPR:
3673 	case LT_EXPR:
3674 	case LE_EXPR:
3675 	  break;
3676 	default:
3677 	  continue;
3678 	}
3679       if (in_p)
3680 	ccode = invert_tree_comparison (ccode, false);
3681       switch (ccode)
3682 	{
3683 	case GT_EXPR:
3684 	case GE_EXPR:
3685 	  std::swap (rhs1, rhs2);
3686 	  ccode = swap_tree_comparison (ccode);
3687 	  break;
3688 	case LT_EXPR:
3689 	case LE_EXPR:
3690 	  break;
3691 	default:
3692 	  gcc_unreachable ();
3693 	}
3694 
3695       int *idx = map->get (rhs1);
3696       if (idx == NULL)
3697 	continue;
3698 
3699       /* maybe_optimize_range_tests allows statements without side-effects
3700 	 in the basic blocks as long as they are consumed in the same bb.
3701 	 Make sure rhs2's def stmt is not among them, otherwise we can't
3702 	 use safely get_nonzero_bits on it.  E.g. in:
3703 	  # RANGE [-83, 1] NONZERO 173
3704 	  # k_32 = PHI <k_47(13), k_12(9)>
3705 	 ...
3706 	  if (k_32 >= 0)
3707 	    goto <bb 5>; [26.46%]
3708 	  else
3709 	    goto <bb 9>; [73.54%]
3710 
3711 	  <bb 5> [local count: 140323371]:
3712 	  # RANGE [0, 1] NONZERO 1
3713 	  _5 = (int) k_32;
3714 	  # RANGE [0, 4] NONZERO 4
3715 	  _21 = _5 << 2;
3716 	  # RANGE [0, 4] NONZERO 4
3717 	  iftmp.0_44 = (char) _21;
3718 	  if (k_32 < iftmp.0_44)
3719 	    goto <bb 6>; [84.48%]
3720 	  else
3721 	    goto <bb 9>; [15.52%]
3722 	 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3723 	 k_32 >= 0.  If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3724 	 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3725 	 those stmts even for negative k_32 and the value ranges would be no
3726 	 longer guaranteed and so the optimization would be invalid.  */
3727       while (opcode == ERROR_MARK)
3728 	{
3729 	  gimple *g = SSA_NAME_DEF_STMT (rhs2);
3730 	  basic_block bb2 = gimple_bb (g);
3731 	  if (bb2
3732 	      && bb2 != first_bb
3733 	      && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3734 	    {
3735 	      /* As an exception, handle a few common cases.  */
3736 	      if (gimple_assign_cast_p (g)
3737 		  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3738 		{
3739 		  tree op0 = gimple_assign_rhs1 (g);
3740 		  if (TYPE_UNSIGNED (TREE_TYPE (op0))
3741 		      && (TYPE_PRECISION (TREE_TYPE (rhs2))
3742 			  > TYPE_PRECISION (TREE_TYPE (op0))))
3743 		    /* Zero-extension is always ok.  */
3744 		    break;
3745 		  else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3746 			   == TYPE_PRECISION (TREE_TYPE (op0))
3747 			   && TREE_CODE (op0) == SSA_NAME)
3748 		    {
3749 		      /* Cast from signed to unsigned or vice versa.  Retry
3750 			 with the op0 as new rhs2.  */
3751 		      rhs2 = op0;
3752 		      continue;
3753 		    }
3754 		}
3755 	      else if (is_gimple_assign (g)
3756 		       && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3757 		       && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3758 		       && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3759 		/* Masking with INTEGER_CST with MSB clear is always ok
3760 		   too.  */
3761 		break;
3762 	      rhs2 = NULL_TREE;
3763 	    }
3764 	  break;
3765 	}
3766       if (rhs2 == NULL_TREE)
3767 	continue;
3768 
3769       wide_int nz = get_nonzero_bits (rhs2);
3770       if (wi::neg_p (nz))
3771 	continue;
3772 
3773       /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3774 	 and RHS2 is known to be RHS2 >= 0.  */
3775       tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3776 
3777       enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3778       if ((ranges[*idx].strict_overflow_p
3779 	   || ranges[i].strict_overflow_p)
3780 	  && issue_strict_overflow_warning (wc))
3781 	warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3782 		    "assuming signed overflow does not occur "
3783 		    "when simplifying range test");
3784 
3785       if (dump_file && (dump_flags & TDF_DETAILS))
3786 	{
3787 	  struct range_entry *r = &ranges[*idx];
3788 	  fprintf (dump_file, "Optimizing range test ");
3789 	  print_generic_expr (dump_file, r->exp);
3790 	  fprintf (dump_file, " +[");
3791 	  print_generic_expr (dump_file, r->low);
3792 	  fprintf (dump_file, ", ");
3793 	  print_generic_expr (dump_file, r->high);
3794 	  fprintf (dump_file, "] and comparison ");
3795 	  print_generic_expr (dump_file, rhs1);
3796 	  fprintf (dump_file, " %s ", op_symbol_code (ccode));
3797 	  print_generic_expr (dump_file, rhs2);
3798 	  fprintf (dump_file, "\n into (");
3799 	  print_generic_expr (dump_file, utype);
3800 	  fprintf (dump_file, ") ");
3801 	  print_generic_expr (dump_file, rhs1);
3802 	  fprintf (dump_file, " %s (", op_symbol_code (ccode));
3803 	  print_generic_expr (dump_file, utype);
3804 	  fprintf (dump_file, ") ");
3805 	  print_generic_expr (dump_file, rhs2);
3806 	  fprintf (dump_file, "\n");
3807 	}
3808 
3809       operand_entry *oe = (*ops)[ranges[i].idx];
3810       ranges[i].in_p = 0;
3811       if (opcode == BIT_IOR_EXPR
3812 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3813 	{
3814 	  ranges[i].in_p = 1;
3815 	  ccode = invert_tree_comparison (ccode, false);
3816 	}
3817 
3818       unsigned int uid = gimple_uid (stmt);
3819       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3820       gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3821       gimple_set_uid (g, uid);
3822       rhs1 = gimple_assign_lhs (g);
3823       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3824       if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3825 	{
3826 	  g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3827 	  gimple_set_uid (g, uid);
3828 	  rhs2 = gimple_assign_lhs (g);
3829 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3830 	}
3831       if (tree_swap_operands_p (rhs1, rhs2))
3832 	{
3833 	  std::swap (rhs1, rhs2);
3834 	  ccode = swap_tree_comparison (ccode);
3835 	}
3836       if (gimple_code (stmt) == GIMPLE_COND)
3837 	{
3838 	  gcond *c = as_a <gcond *> (stmt);
3839 	  gimple_cond_set_code (c, ccode);
3840 	  gimple_cond_set_lhs (c, rhs1);
3841 	  gimple_cond_set_rhs (c, rhs2);
3842 	  update_stmt (stmt);
3843 	}
3844       else
3845 	{
3846 	  tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3847 	  if (!INTEGRAL_TYPE_P (ctype)
3848 	      || (TREE_CODE (ctype) != BOOLEAN_TYPE
3849 		  && TYPE_PRECISION (ctype) != 1))
3850 	    ctype = boolean_type_node;
3851 	  g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3852 	  gimple_set_uid (g, uid);
3853 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3854 	  if (oe->op && ctype != TREE_TYPE (oe->op))
3855 	    {
3856 	      g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3857 				       NOP_EXPR, gimple_assign_lhs (g));
3858 	      gimple_set_uid (g, uid);
3859 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3860 	    }
3861 	  ranges[i].exp = gimple_assign_lhs (g);
3862 	  oe->op = ranges[i].exp;
3863 	  ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3864 	  ranges[i].high = ranges[i].low;
3865 	}
3866       ranges[i].strict_overflow_p = false;
3867       oe = (*ops)[ranges[*idx].idx];
3868       /* Now change all the other range test immediate uses, so that
3869 	 those tests will be optimized away.  */
3870       if (opcode == ERROR_MARK)
3871 	{
3872 	  if (oe->op)
3873 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
3874 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
3875 	  else
3876 	    oe->op = (oe->rank == BIT_IOR_EXPR
3877 		      ? boolean_false_node : boolean_true_node);
3878 	}
3879       else
3880 	oe->op = error_mark_node;
3881       ranges[*idx].exp = NULL_TREE;
3882       ranges[*idx].low = NULL_TREE;
3883       ranges[*idx].high = NULL_TREE;
3884       any_changes = true;
3885     }
3886 
3887   delete map;
3888   return any_changes;
3889 }
3890 
3891 /* Optimize range tests, similarly how fold_range_test optimizes
3892    it on trees.  The tree code for the binary
3893    operation between all the operands is OPCODE.
3894    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3895    maybe_optimize_range_tests for inter-bb range optimization.
3896    In that case if oe->op is NULL, oe->id is bb->index whose
3897    GIMPLE_COND is && or ||ed into the test, and oe->rank says
3898    the actual opcode.
3899    FIRST_BB is the first basic block if OPCODE is ERROR_MARK.  */
3900 
3901 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry * > * ops,basic_block first_bb)3902 optimize_range_tests (enum tree_code opcode,
3903 		      vec<operand_entry *> *ops, basic_block first_bb)
3904 {
3905   unsigned int length = ops->length (), i, j, first;
3906   operand_entry *oe;
3907   struct range_entry *ranges;
3908   bool any_changes = false;
3909 
3910   if (length == 1)
3911     return false;
3912 
3913   ranges = XNEWVEC (struct range_entry, length);
3914   for (i = 0; i < length; i++)
3915     {
3916       oe = (*ops)[i];
3917       ranges[i].idx = i;
3918       init_range_entry (ranges + i, oe->op,
3919 			oe->op
3920 			? NULL
3921 			: last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3922       /* For | invert it now, we will invert it again before emitting
3923 	 the optimized expression.  */
3924       if (opcode == BIT_IOR_EXPR
3925 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3926 	ranges[i].in_p = !ranges[i].in_p;
3927     }
3928 
3929   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3930   for (i = 0; i < length; i++)
3931     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3932       break;
3933 
3934   /* Try to merge ranges.  */
3935   for (first = i; i < length; i++)
3936     {
3937       tree low = ranges[i].low;
3938       tree high = ranges[i].high;
3939       int in_p = ranges[i].in_p;
3940       bool strict_overflow_p = ranges[i].strict_overflow_p;
3941       int update_fail_count = 0;
3942 
3943       for (j = i + 1; j < length; j++)
3944 	{
3945 	  if (ranges[i].exp != ranges[j].exp)
3946 	    break;
3947 	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3948 			     ranges[j].in_p, ranges[j].low, ranges[j].high))
3949 	    break;
3950 	  strict_overflow_p |= ranges[j].strict_overflow_p;
3951 	}
3952 
3953       if (j == i + 1)
3954 	continue;
3955 
3956       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3957 			     opcode, ops, ranges[i].exp, NULL, in_p,
3958 			     low, high, strict_overflow_p))
3959 	{
3960 	  i = j - 1;
3961 	  any_changes = true;
3962 	}
3963       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3964 	 while update_range_test would fail.  */
3965       else if (update_fail_count == 64)
3966 	i = j - 1;
3967       else
3968 	++update_fail_count;
3969     }
3970 
3971   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3972 					 ops, ranges);
3973 
3974   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3975     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3976 					   ops, ranges);
3977   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3978     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3979 						     ops, ranges);
3980   any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3981 						 ranges, first_bb);
3982   any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3983 						   ops, ranges);
3984 
3985   if (any_changes && opcode != ERROR_MARK)
3986     {
3987       j = 0;
3988       FOR_EACH_VEC_ELT (*ops, i, oe)
3989 	{
3990 	  if (oe->op == error_mark_node)
3991 	    continue;
3992 	  else if (i != j)
3993 	    (*ops)[j] = oe;
3994 	  j++;
3995 	}
3996       ops->truncate (j);
3997     }
3998 
3999   XDELETEVEC (ranges);
4000   return any_changes;
4001 }
4002 
4003 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4004    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
4005    otherwise the comparison code.  TYPE is a return value that is set
4006    to type of comparison.  */
4007 
4008 static tree_code
ovce_extract_ops(tree var,gassign ** rets,bool * reti,tree * type,tree * lhs,tree * rhs,gassign ** vcond)4009 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4010 		  tree *lhs, tree *rhs, gassign **vcond)
4011 {
4012   if (TREE_CODE (var) != SSA_NAME)
4013     return ERROR_MARK;
4014 
4015   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4016   if (stmt == NULL)
4017     return ERROR_MARK;
4018   if (vcond)
4019     *vcond = stmt;
4020 
4021   /* ??? If we start creating more COND_EXPR, we could perform
4022      this same optimization with them.	For now, simplify.  */
4023   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4024     return ERROR_MARK;
4025 
4026   tree cond = gimple_assign_rhs1 (stmt);
4027   tree_code cmp = TREE_CODE (cond);
4028   if (cmp != SSA_NAME)
4029     return ERROR_MARK;
4030 
4031   gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4032   if (assign == NULL
4033       || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4034     return ERROR_MARK;
4035 
4036   cmp = gimple_assign_rhs_code (assign);
4037   if (lhs)
4038     *lhs = gimple_assign_rhs1 (assign);
4039   if (rhs)
4040     *rhs = gimple_assign_rhs2 (assign);
4041 
4042   /* ??? For now, allow only canonical true and false result vectors.
4043      We could expand this to other constants should the need arise,
4044      but at the moment we don't create them.  */
4045   tree t = gimple_assign_rhs2 (stmt);
4046   tree f = gimple_assign_rhs3 (stmt);
4047   bool inv;
4048   if (integer_all_onesp (t))
4049     inv = false;
4050   else if (integer_all_onesp (f))
4051     {
4052       cmp = invert_tree_comparison (cmp, false);
4053       inv = true;
4054     }
4055   else
4056     return ERROR_MARK;
4057   if (!integer_zerop (f))
4058     return ERROR_MARK;
4059 
4060   /* Success!  */
4061   if (rets)
4062     *rets = assign;
4063   if (reti)
4064     *reti = inv;
4065   if (type)
4066     *type = TREE_TYPE (cond);
4067   return cmp;
4068 }
4069 
4070 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4071    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
4072 
4073 static bool
optimize_vec_cond_expr(tree_code opcode,vec<operand_entry * > * ops)4074 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4075 {
4076   unsigned int length = ops->length (), i, j;
4077   bool any_changes = false;
4078 
4079   if (length == 1)
4080     return false;
4081 
4082   for (i = 0; i < length; ++i)
4083     {
4084       tree elt0 = (*ops)[i]->op;
4085 
4086       gassign *stmt0, *vcond0;
4087       bool invert;
4088       tree type, lhs0, rhs0;
4089       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4090 					 &rhs0, &vcond0);
4091       if (cmp0 == ERROR_MARK)
4092 	continue;
4093 
4094       for (j = i + 1; j < length; ++j)
4095 	{
4096 	  tree &elt1 = (*ops)[j]->op;
4097 
4098 	  gassign *stmt1, *vcond1;
4099 	  tree lhs1, rhs1;
4100 	  tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4101 					     &rhs1, &vcond1);
4102 	  if (cmp1 == ERROR_MARK)
4103 	    continue;
4104 
4105 	  tree comb;
4106 	  if (opcode == BIT_AND_EXPR)
4107 	    comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4108 					       cmp1, lhs1, rhs1);
4109 	  else if (opcode == BIT_IOR_EXPR)
4110 	    comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4111 					      cmp1, lhs1, rhs1);
4112 	  else
4113 	    gcc_unreachable ();
4114 	  if (comb == NULL)
4115 	    continue;
4116 
4117 	  /* Success! */
4118 	  if (dump_file && (dump_flags & TDF_DETAILS))
4119 	    {
4120 	      fprintf (dump_file, "Transforming ");
4121 	      print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4122 	      fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4123 	      print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4124 	      fprintf (dump_file, " into ");
4125 	      print_generic_expr (dump_file, comb);
4126 	      fputc ('\n', dump_file);
4127 	    }
4128 
4129 	  gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4130 	  tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4131 					       true, GSI_SAME_STMT);
4132 	  if (invert)
4133 	    swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4134 			       gimple_assign_rhs3_ptr (vcond0));
4135 	  gimple_assign_set_rhs1 (vcond0, exp);
4136 	  update_stmt (vcond0);
4137 
4138 	  elt1 = error_mark_node;
4139 	  any_changes = true;
4140 	}
4141     }
4142 
4143   if (any_changes)
4144     {
4145       operand_entry *oe;
4146       j = 0;
4147       FOR_EACH_VEC_ELT (*ops, i, oe)
4148 	{
4149 	  if (oe->op == error_mark_node)
4150 	    continue;
4151 	  else if (i != j)
4152 	    (*ops)[j] = oe;
4153 	  j++;
4154 	}
4155       ops->truncate (j);
4156     }
4157 
4158   return any_changes;
4159 }
4160 
4161 /* Return true if STMT is a cast like:
4162    <bb N>:
4163    ...
4164    _123 = (int) _234;
4165 
4166    <bb M>:
4167    # _345 = PHI <_123(N), 1(...), 1(...)>
4168    where _234 has bool type, _123 has single use and
4169    bb N has a single successor M.  This is commonly used in
4170    the last block of a range test.
4171 
4172    Also Return true if STMT is tcc_compare like:
4173    <bb N>:
4174    ...
4175    _234 = a_2(D) == 2;
4176 
4177    <bb M>:
4178    # _345 = PHI <_234(N), 1(...), 1(...)>
4179    _346 = (int) _345;
4180    where _234 has booltype, single use and
4181    bb N has a single successor M.  This is commonly used in
4182    the last block of a range test.  */
4183 
4184 static bool
final_range_test_p(gimple * stmt)4185 final_range_test_p (gimple *stmt)
4186 {
4187   basic_block bb, rhs_bb, lhs_bb;
4188   edge e;
4189   tree lhs, rhs;
4190   use_operand_p use_p;
4191   gimple *use_stmt;
4192 
4193   if (!gimple_assign_cast_p (stmt)
4194       && (!is_gimple_assign (stmt)
4195 	  || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4196 	      != tcc_comparison)))
4197     return false;
4198   bb = gimple_bb (stmt);
4199   if (!single_succ_p (bb))
4200     return false;
4201   e = single_succ_edge (bb);
4202   if (e->flags & EDGE_COMPLEX)
4203     return false;
4204 
4205   lhs = gimple_assign_lhs (stmt);
4206   rhs = gimple_assign_rhs1 (stmt);
4207   if (gimple_assign_cast_p (stmt)
4208       && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4209 	  || TREE_CODE (rhs) != SSA_NAME
4210 	  || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4211     return false;
4212 
4213   if (!gimple_assign_cast_p (stmt)
4214       && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4215       return false;
4216 
4217   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
4218   if (!single_imm_use (lhs, &use_p, &use_stmt))
4219     return false;
4220 
4221   if (gimple_code (use_stmt) != GIMPLE_PHI
4222       || gimple_bb (use_stmt) != e->dest)
4223     return false;
4224 
4225   /* And that the rhs is defined in the same loop.  */
4226   if (gimple_assign_cast_p (stmt))
4227     {
4228       if (TREE_CODE (rhs) != SSA_NAME
4229 	  || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4230 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4231 	return false;
4232     }
4233   else
4234     {
4235       if (TREE_CODE (lhs) != SSA_NAME
4236 	  || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4237 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4238 	return false;
4239     }
4240 
4241   return true;
4242 }
4243 
4244 /* Return true if BB is suitable basic block for inter-bb range test
4245    optimization.  If BACKWARD is true, BB should be the only predecessor
4246    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4247    or compared with to find a common basic block to which all conditions
4248    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
4249    be the only predecessor of BB.  *TEST_SWAPPED_P is set to true if
4250    TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4251    block points to an empty block that falls through into *OTHER_BB and
4252    the phi args match that path.  */
4253 
4254 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool * test_swapped_p,bool backward)4255 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4256 		  bool *test_swapped_p, bool backward)
4257 {
4258   edge_iterator ei, ei2;
4259   edge e, e2;
4260   gimple *stmt;
4261   gphi_iterator gsi;
4262   bool other_edge_seen = false;
4263   bool is_cond;
4264 
4265   if (test_bb == bb)
4266     return false;
4267   /* Check last stmt first.  */
4268   stmt = last_stmt (bb);
4269   if (stmt == NULL
4270       || (gimple_code (stmt) != GIMPLE_COND
4271 	  && (backward || !final_range_test_p (stmt)))
4272       || gimple_visited_p (stmt)
4273       || stmt_could_throw_p (cfun, stmt)
4274       || *other_bb == bb)
4275     return false;
4276   is_cond = gimple_code (stmt) == GIMPLE_COND;
4277   if (is_cond)
4278     {
4279       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4280 	 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4281 	 to *OTHER_BB (if not set yet, try to find it out).  */
4282       if (EDGE_COUNT (bb->succs) != 2)
4283 	return false;
4284       FOR_EACH_EDGE (e, ei, bb->succs)
4285 	{
4286 	  if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4287 	    return false;
4288 	  if (e->dest == test_bb)
4289 	    {
4290 	      if (backward)
4291 		continue;
4292 	      else
4293 		return false;
4294 	    }
4295 	  if (e->dest == bb)
4296 	    return false;
4297 	  if (*other_bb == NULL)
4298 	    {
4299 	      FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4300 		if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4301 		  return false;
4302 		else if (e->dest == e2->dest)
4303 		  *other_bb = e->dest;
4304 	      if (*other_bb == NULL)
4305 		return false;
4306 	    }
4307 	  if (e->dest == *other_bb)
4308 	    other_edge_seen = true;
4309 	  else if (backward)
4310 	    return false;
4311 	}
4312       if (*other_bb == NULL || !other_edge_seen)
4313 	return false;
4314     }
4315   else if (single_succ (bb) != *other_bb)
4316     return false;
4317 
4318   /* Now check all PHIs of *OTHER_BB.  */
4319   e = find_edge (bb, *other_bb);
4320   e2 = find_edge (test_bb, *other_bb);
4321  retry:;
4322   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4323     {
4324       gphi *phi = gsi.phi ();
4325       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4326 	 corresponding to BB and TEST_BB predecessor must be the same.  */
4327       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4328 			    gimple_phi_arg_def (phi, e2->dest_idx), 0))
4329 	{
4330 	  /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4331 	     one of the PHIs should have the lhs of the last stmt in
4332 	     that block as PHI arg and that PHI should have 0 or 1
4333 	     corresponding to it in all other range test basic blocks
4334 	     considered.  */
4335 	  if (!is_cond)
4336 	    {
4337 	      if (gimple_phi_arg_def (phi, e->dest_idx)
4338 		  == gimple_assign_lhs (stmt)
4339 		  && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4340 		      || integer_onep (gimple_phi_arg_def (phi,
4341 							   e2->dest_idx))))
4342 		continue;
4343 	    }
4344 	  else
4345 	    {
4346 	      gimple *test_last = last_stmt (test_bb);
4347 	      if (gimple_code (test_last) == GIMPLE_COND)
4348 		{
4349 		  if (backward ? e2->src != test_bb : e->src != bb)
4350 		    return false;
4351 
4352 		  /* For last_bb, handle also:
4353 		     if (x_3(D) == 3)
4354 		       goto <bb 6>; [34.00%]
4355 		     else
4356 		       goto <bb 7>; [66.00%]
4357 
4358 		     <bb 6> [local count: 79512730]:
4359 
4360 		     <bb 7> [local count: 1073741824]:
4361 		     # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4362 		     where bb 7 is *OTHER_BB, but the PHI values from the
4363 		     earlier bbs match the path through the empty bb
4364 		     in between.  */
4365 		  edge e3;
4366 		  if (backward)
4367 		    e3 = EDGE_SUCC (test_bb,
4368 				    e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4369 		  else
4370 		    e3 = EDGE_SUCC (bb,
4371 				    e == EDGE_SUCC (bb, 0) ? 1 : 0);
4372 		  if (empty_block_p (e3->dest)
4373 		      && single_succ_p (e3->dest)
4374 		      && single_succ (e3->dest) == *other_bb
4375 		      && single_pred_p (e3->dest)
4376 		      && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4377 		    {
4378 		      if (backward)
4379 			e2 = single_succ_edge (e3->dest);
4380 		      else
4381 			e = single_succ_edge (e3->dest);
4382 		      if (test_swapped_p)
4383 			*test_swapped_p = true;
4384 		      goto retry;
4385 		    }
4386 		}
4387 	      else if (gimple_phi_arg_def (phi, e2->dest_idx)
4388 		       == gimple_assign_lhs (test_last)
4389 		       && (integer_zerop (gimple_phi_arg_def (phi,
4390 							      e->dest_idx))
4391 			   || integer_onep (gimple_phi_arg_def (phi,
4392 								e->dest_idx))))
4393 		continue;
4394 	    }
4395 
4396 	  return false;
4397 	}
4398     }
4399   return true;
4400 }
4401 
4402 /* Return true if BB doesn't have side-effects that would disallow
4403    range test optimization, all SSA_NAMEs set in the bb are consumed
4404    in the bb and there are no PHIs.  */
4405 
4406 bool
no_side_effect_bb(basic_block bb)4407 no_side_effect_bb (basic_block bb)
4408 {
4409   gimple_stmt_iterator gsi;
4410   gimple *last;
4411 
4412   if (!gimple_seq_empty_p (phi_nodes (bb)))
4413     return false;
4414   last = last_stmt (bb);
4415   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4416     {
4417       gimple *stmt = gsi_stmt (gsi);
4418       tree lhs;
4419       imm_use_iterator imm_iter;
4420       use_operand_p use_p;
4421 
4422       if (is_gimple_debug (stmt))
4423 	continue;
4424       if (gimple_has_side_effects (stmt))
4425 	return false;
4426       if (stmt == last)
4427 	return true;
4428       if (!is_gimple_assign (stmt))
4429 	return false;
4430       lhs = gimple_assign_lhs (stmt);
4431       if (TREE_CODE (lhs) != SSA_NAME)
4432 	return false;
4433       if (gimple_assign_rhs_could_trap_p (stmt))
4434 	return false;
4435       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4436 	{
4437 	  gimple *use_stmt = USE_STMT (use_p);
4438 	  if (is_gimple_debug (use_stmt))
4439 	    continue;
4440 	  if (gimple_bb (use_stmt) != bb)
4441 	    return false;
4442 	}
4443     }
4444   return false;
4445 }
4446 
4447 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4448    return true and fill in *OPS recursively.  */
4449 
4450 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry * > * ops,class loop * loop)4451 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4452 	 class loop *loop)
4453 {
4454   gimple *stmt = SSA_NAME_DEF_STMT (var);
4455   tree rhs[2];
4456   int i;
4457 
4458   if (!is_reassociable_op (stmt, code, loop))
4459     return false;
4460 
4461   rhs[0] = gimple_assign_rhs1 (stmt);
4462   rhs[1] = gimple_assign_rhs2 (stmt);
4463   gimple_set_visited (stmt, true);
4464   for (i = 0; i < 2; i++)
4465     if (TREE_CODE (rhs[i]) == SSA_NAME
4466 	&& !get_ops (rhs[i], code, ops, loop)
4467 	&& has_single_use (rhs[i]))
4468       {
4469 	operand_entry *oe = operand_entry_pool.allocate ();
4470 
4471 	oe->op = rhs[i];
4472 	oe->rank = code;
4473 	oe->id = 0;
4474 	oe->count = 1;
4475 	oe->stmt_to_insert = NULL;
4476 	ops->safe_push (oe);
4477       }
4478   return true;
4479 }
4480 
4481 /* Find the ops that were added by get_ops starting from VAR, see if
4482    they were changed during update_range_test and if yes, create new
4483    stmts.  */
4484 
4485 static tree
update_ops(tree var,enum tree_code code,vec<operand_entry * > ops,unsigned int * pidx,class loop * loop)4486 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
4487 	    unsigned int *pidx, class loop *loop)
4488 {
4489   gimple *stmt = SSA_NAME_DEF_STMT (var);
4490   tree rhs[4];
4491   int i;
4492 
4493   if (!is_reassociable_op (stmt, code, loop))
4494     return NULL;
4495 
4496   rhs[0] = gimple_assign_rhs1 (stmt);
4497   rhs[1] = gimple_assign_rhs2 (stmt);
4498   rhs[2] = rhs[0];
4499   rhs[3] = rhs[1];
4500   for (i = 0; i < 2; i++)
4501     if (TREE_CODE (rhs[i]) == SSA_NAME)
4502       {
4503 	rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4504 	if (rhs[2 + i] == NULL_TREE)
4505 	  {
4506 	    if (has_single_use (rhs[i]))
4507 	      rhs[2 + i] = ops[(*pidx)++]->op;
4508 	    else
4509 	      rhs[2 + i] = rhs[i];
4510 	  }
4511       }
4512   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4513       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4514     {
4515       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4516       var = make_ssa_name (TREE_TYPE (var));
4517       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4518 					rhs[2], rhs[3]);
4519       gimple_set_uid (g, gimple_uid (stmt));
4520       gimple_set_visited (g, true);
4521       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4522     }
4523   return var;
4524 }
4525 
4526 /* Structure to track the initial value passed to get_ops and
4527    the range in the ops vector for each basic block.  */
4528 
4529 struct inter_bb_range_test_entry
4530 {
4531   tree op;
4532   unsigned int first_idx, last_idx;
4533 };
4534 
4535 /* Inter-bb range test optimization.
4536 
4537    Returns TRUE if a gimple conditional is optimized to a true/false,
4538    otherwise return FALSE.
4539 
4540    This indicates to the caller that it should run a CFG cleanup pass
4541    once reassociation is completed.  */
4542 
4543 static bool
maybe_optimize_range_tests(gimple * stmt)4544 maybe_optimize_range_tests (gimple *stmt)
4545 {
4546   basic_block first_bb = gimple_bb (stmt);
4547   basic_block last_bb = first_bb;
4548   basic_block other_bb = NULL;
4549   basic_block bb;
4550   edge_iterator ei;
4551   edge e;
4552   auto_vec<operand_entry *> ops;
4553   auto_vec<inter_bb_range_test_entry> bbinfo;
4554   bool any_changes = false;
4555   bool cfg_cleanup_needed = false;
4556 
4557   /* Consider only basic blocks that end with GIMPLE_COND or
4558      a cast statement satisfying final_range_test_p.  All
4559      but the last bb in the first_bb .. last_bb range
4560      should end with GIMPLE_COND.  */
4561   if (gimple_code (stmt) == GIMPLE_COND)
4562     {
4563       if (EDGE_COUNT (first_bb->succs) != 2)
4564 	return cfg_cleanup_needed;
4565     }
4566   else if (final_range_test_p (stmt))
4567     other_bb = single_succ (first_bb);
4568   else
4569     return cfg_cleanup_needed;
4570 
4571   if (stmt_could_throw_p (cfun, stmt))
4572     return cfg_cleanup_needed;
4573 
4574   /* As relative ordering of post-dominator sons isn't fixed,
4575      maybe_optimize_range_tests can be called first on any
4576      bb in the range we want to optimize.  So, start searching
4577      backwards, if first_bb can be set to a predecessor.  */
4578   while (single_pred_p (first_bb))
4579     {
4580       basic_block pred_bb = single_pred (first_bb);
4581       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4582 	break;
4583       if (!no_side_effect_bb (first_bb))
4584 	break;
4585       first_bb = pred_bb;
4586     }
4587   /* If first_bb is last_bb, other_bb hasn't been computed yet.
4588      Before starting forward search in last_bb successors, find
4589      out the other_bb.  */
4590   if (first_bb == last_bb)
4591     {
4592       other_bb = NULL;
4593       /* As non-GIMPLE_COND last stmt always terminates the range,
4594 	 if forward search didn't discover anything, just give up.  */
4595       if (gimple_code (stmt) != GIMPLE_COND)
4596 	return cfg_cleanup_needed;
4597       /* Look at both successors.  Either it ends with a GIMPLE_COND
4598 	 and satisfies suitable_cond_bb, or ends with a cast and
4599 	 other_bb is that cast's successor.  */
4600       FOR_EACH_EDGE (e, ei, first_bb->succs)
4601 	if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4602 	    || e->dest == first_bb)
4603 	  return cfg_cleanup_needed;
4604 	else if (single_pred_p (e->dest))
4605 	  {
4606 	    stmt = last_stmt (e->dest);
4607 	    if (stmt
4608 		&& gimple_code (stmt) == GIMPLE_COND
4609 		&& EDGE_COUNT (e->dest->succs) == 2)
4610 	      {
4611 		if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4612 				      NULL, true))
4613 		  break;
4614 		else
4615 		  other_bb = NULL;
4616 	      }
4617 	    else if (stmt
4618 		     && final_range_test_p (stmt)
4619 		     && find_edge (first_bb, single_succ (e->dest)))
4620 	      {
4621 		other_bb = single_succ (e->dest);
4622 		if (other_bb == first_bb)
4623 		  other_bb = NULL;
4624 	      }
4625 	  }
4626       if (other_bb == NULL)
4627 	return cfg_cleanup_needed;
4628     }
4629   /* Now do the forward search, moving last_bb to successor bbs
4630      that aren't other_bb.  */
4631   while (EDGE_COUNT (last_bb->succs) == 2)
4632     {
4633       FOR_EACH_EDGE (e, ei, last_bb->succs)
4634 	if (e->dest != other_bb)
4635 	  break;
4636       if (e == NULL)
4637 	break;
4638       if (!single_pred_p (e->dest))
4639 	break;
4640       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4641 	break;
4642       if (!no_side_effect_bb (e->dest))
4643 	break;
4644       last_bb = e->dest;
4645     }
4646   if (first_bb == last_bb)
4647     return cfg_cleanup_needed;
4648   /* Here basic blocks first_bb through last_bb's predecessor
4649      end with GIMPLE_COND, all of them have one of the edges to
4650      other_bb and another to another block in the range,
4651      all blocks except first_bb don't have side-effects and
4652      last_bb ends with either GIMPLE_COND, or cast satisfying
4653      final_range_test_p.  */
4654   for (bb = last_bb; ; bb = single_pred (bb))
4655     {
4656       enum tree_code code;
4657       tree lhs, rhs;
4658       inter_bb_range_test_entry bb_ent;
4659 
4660       bb_ent.op = NULL_TREE;
4661       bb_ent.first_idx = ops.length ();
4662       bb_ent.last_idx = bb_ent.first_idx;
4663       e = find_edge (bb, other_bb);
4664       stmt = last_stmt (bb);
4665       gimple_set_visited (stmt, true);
4666       if (gimple_code (stmt) != GIMPLE_COND)
4667 	{
4668 	  use_operand_p use_p;
4669 	  gimple *phi;
4670 	  edge e2;
4671 	  unsigned int d;
4672 
4673 	  lhs = gimple_assign_lhs (stmt);
4674 	  rhs = gimple_assign_rhs1 (stmt);
4675 	  gcc_assert (bb == last_bb);
4676 
4677 	  /* stmt is
4678 	     _123 = (int) _234;
4679 	     OR
4680 	     _234 = a_2(D) == 2;
4681 
4682 	     followed by:
4683 	     <bb M>:
4684 	     # _345 = PHI <_123(N), 1(...), 1(...)>
4685 
4686 	     or 0 instead of 1.  If it is 0, the _234
4687 	     range test is anded together with all the
4688 	     other range tests, if it is 1, it is ored with
4689 	     them.  */
4690 	  single_imm_use (lhs, &use_p, &phi);
4691 	  gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4692 	  e2 = find_edge (first_bb, other_bb);
4693 	  d = e2->dest_idx;
4694 	  gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4695 	  if (integer_zerop (gimple_phi_arg_def (phi, d)))
4696 	    code = BIT_AND_EXPR;
4697 	  else
4698 	    {
4699 	      gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4700 	      code = BIT_IOR_EXPR;
4701 	    }
4702 
4703 	  /* If _234 SSA_NAME_DEF_STMT is
4704 	     _234 = _567 | _789;
4705 	     (or &, corresponding to 1/0 in the phi arguments,
4706 	     push into ops the individual range test arguments
4707 	     of the bitwise or resp. and, recursively.  */
4708 	  if (TREE_CODE (rhs) == SSA_NAME
4709 	      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4710 		  != tcc_comparison)
4711 	      && !get_ops (rhs, code, &ops,
4712 			loop_containing_stmt (stmt))
4713 	      && has_single_use (rhs))
4714 	    {
4715 	      /* Otherwise, push the _234 range test itself.  */
4716 	      operand_entry *oe = operand_entry_pool.allocate ();
4717 
4718 	      oe->op = rhs;
4719 	      oe->rank = code;
4720 	      oe->id = 0;
4721 	      oe->count = 1;
4722 	      oe->stmt_to_insert = NULL;
4723 	      ops.safe_push (oe);
4724 	      bb_ent.last_idx++;
4725 	      bb_ent.op = rhs;
4726 	    }
4727 	  else if (is_gimple_assign (stmt)
4728 		   && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4729 		       == tcc_comparison)
4730 		   && !get_ops (lhs, code, &ops,
4731 				loop_containing_stmt (stmt))
4732 		   && has_single_use (lhs))
4733 	    {
4734 	      operand_entry *oe = operand_entry_pool.allocate ();
4735 	      oe->op = lhs;
4736 	      oe->rank = code;
4737 	      oe->id = 0;
4738 	      oe->count = 1;
4739 	      ops.safe_push (oe);
4740 	      bb_ent.last_idx++;
4741 	      bb_ent.op = lhs;
4742 	    }
4743 	  else
4744 	    {
4745 	      bb_ent.last_idx = ops.length ();
4746 	      bb_ent.op = rhs;
4747 	    }
4748 	  bbinfo.safe_push (bb_ent);
4749 	  continue;
4750 	}
4751       else if (bb == last_bb)
4752 	{
4753 	  /* For last_bb, handle also:
4754 	     if (x_3(D) == 3)
4755 	       goto <bb 6>; [34.00%]
4756 	     else
4757 	       goto <bb 7>; [66.00%]
4758 
4759 	     <bb 6> [local count: 79512730]:
4760 
4761 	     <bb 7> [local count: 1073741824]:
4762 	     # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4763 	     where bb 7 is OTHER_BB, but the PHI values from the
4764 	     earlier bbs match the path through the empty bb
4765 	     in between.  */
4766 	  bool test_swapped_p = false;
4767 	  bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4768 				      &other_bb, &test_swapped_p, true);
4769 	  gcc_assert (ok);
4770 	  if (test_swapped_p)
4771 	    e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4772 	}
4773       /* Otherwise stmt is GIMPLE_COND.  */
4774       code = gimple_cond_code (stmt);
4775       lhs = gimple_cond_lhs (stmt);
4776       rhs = gimple_cond_rhs (stmt);
4777       if (TREE_CODE (lhs) == SSA_NAME
4778 	  && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4779 	  && ((code != EQ_EXPR && code != NE_EXPR)
4780 	      || rhs != boolean_false_node
4781 		 /* Either push into ops the individual bitwise
4782 		    or resp. and operands, depending on which
4783 		    edge is other_bb.  */
4784 	      || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4785 				 ^ (code == EQ_EXPR))
4786 				? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4787 			   loop_containing_stmt (stmt))))
4788 	{
4789 	  /* Or push the GIMPLE_COND stmt itself.  */
4790 	  operand_entry *oe = operand_entry_pool.allocate ();
4791 
4792 	  oe->op = NULL;
4793 	  oe->rank = (e->flags & EDGE_TRUE_VALUE)
4794 		     ? BIT_IOR_EXPR : BIT_AND_EXPR;
4795 	  /* oe->op = NULL signs that there is no SSA_NAME
4796 	     for the range test, and oe->id instead is the
4797 	     basic block number, at which's end the GIMPLE_COND
4798 	     is.  */
4799 	  oe->id = bb->index;
4800 	  oe->count = 1;
4801 	  oe->stmt_to_insert = NULL;
4802 	  ops.safe_push (oe);
4803 	  bb_ent.op = NULL;
4804 	  bb_ent.last_idx++;
4805 	}
4806       else if (ops.length () > bb_ent.first_idx)
4807 	{
4808 	  bb_ent.op = lhs;
4809 	  bb_ent.last_idx = ops.length ();
4810 	}
4811       bbinfo.safe_push (bb_ent);
4812       if (bb == first_bb)
4813 	break;
4814     }
4815   if (ops.length () > 1)
4816     any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4817   if (any_changes)
4818     {
4819       unsigned int idx, max_idx = 0;
4820       /* update_ops relies on has_single_use predicates returning the
4821 	 same values as it did during get_ops earlier.  Additionally it
4822 	 never removes statements, only adds new ones and it should walk
4823 	 from the single imm use and check the predicate already before
4824 	 making those changes.
4825 	 On the other side, the handling of GIMPLE_COND directly can turn
4826 	 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4827 	 it needs to be done in a separate loop afterwards.  */
4828       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4829 	{
4830 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4831 	      && bbinfo[idx].op != NULL_TREE)
4832 	    {
4833 	      tree new_op;
4834 
4835 	      max_idx = idx;
4836 	      stmt = last_stmt (bb);
4837 	      new_op = update_ops (bbinfo[idx].op,
4838 				   (enum tree_code)
4839 				   ops[bbinfo[idx].first_idx]->rank,
4840 				   ops, &bbinfo[idx].first_idx,
4841 				   loop_containing_stmt (stmt));
4842 	      if (new_op == NULL_TREE)
4843 		{
4844 		  gcc_assert (bb == last_bb);
4845 		  new_op = ops[bbinfo[idx].first_idx++]->op;
4846 		}
4847 	      if (bbinfo[idx].op != new_op)
4848 		{
4849 		  imm_use_iterator iter;
4850 		  use_operand_p use_p;
4851 		  gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4852 
4853 		  FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4854 		    if (is_gimple_debug (use_stmt))
4855 		      continue;
4856 		    else if (gimple_code (use_stmt) == GIMPLE_COND
4857 			     || gimple_code (use_stmt) == GIMPLE_PHI)
4858 		      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4859 			SET_USE (use_p, new_op);
4860 		    else if ((is_gimple_assign (use_stmt)
4861 			      && (TREE_CODE_CLASS
4862 				  (gimple_assign_rhs_code (use_stmt))
4863 				  == tcc_comparison)))
4864 		      cast_or_tcc_cmp_stmt = use_stmt;
4865 		    else if (gimple_assign_cast_p (use_stmt))
4866 		      cast_or_tcc_cmp_stmt = use_stmt;
4867 		    else
4868 		      gcc_unreachable ();
4869 
4870 		  if (cast_or_tcc_cmp_stmt)
4871 		    {
4872 		      gcc_assert (bb == last_bb);
4873 		      tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4874 		      tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4875 		      enum tree_code rhs_code
4876 			= gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4877 			? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4878 			: CONVERT_EXPR;
4879 		      gassign *g;
4880 		      if (is_gimple_min_invariant (new_op))
4881 			{
4882 			  new_op = fold_convert (TREE_TYPE (lhs), new_op);
4883 			  g = gimple_build_assign (new_lhs, new_op);
4884 			}
4885 		      else
4886 			g = gimple_build_assign (new_lhs, rhs_code, new_op);
4887 		      gimple_stmt_iterator gsi
4888 			= gsi_for_stmt (cast_or_tcc_cmp_stmt);
4889 		      gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4890 		      gimple_set_visited (g, true);
4891 		      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4892 		      FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4893 			if (is_gimple_debug (use_stmt))
4894 			  continue;
4895 			else if (gimple_code (use_stmt) == GIMPLE_COND
4896 				 || gimple_code (use_stmt) == GIMPLE_PHI)
4897 			  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4898 			    SET_USE (use_p, new_lhs);
4899 			else
4900 			  gcc_unreachable ();
4901 		    }
4902 		}
4903 	    }
4904 	  if (bb == first_bb)
4905 	    break;
4906 	}
4907       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4908 	{
4909 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4910 	      && bbinfo[idx].op == NULL_TREE
4911 	      && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4912 	    {
4913 	      gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4914 
4915 	      if (idx > max_idx)
4916 		max_idx = idx;
4917 
4918 	      /* If we collapse the conditional to a true/false
4919 		 condition, then bubble that knowledge up to our caller.  */
4920 	      if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4921 		{
4922 		  gimple_cond_make_false (cond_stmt);
4923 		  cfg_cleanup_needed = true;
4924 		}
4925 	      else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4926 		{
4927 		  gimple_cond_make_true (cond_stmt);
4928 		  cfg_cleanup_needed = true;
4929 		}
4930 	      else
4931 		{
4932 		  gimple_cond_set_code (cond_stmt, NE_EXPR);
4933 		  gimple_cond_set_lhs (cond_stmt,
4934 				       ops[bbinfo[idx].first_idx]->op);
4935 		  gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4936 		}
4937 	      update_stmt (cond_stmt);
4938 	    }
4939 	  if (bb == first_bb)
4940 	    break;
4941 	}
4942 
4943       /* The above changes could result in basic blocks after the first
4944 	 modified one, up to and including last_bb, to be executed even if
4945 	 they would not be in the original program.  If the value ranges of
4946 	 assignment lhs' in those bbs were dependent on the conditions
4947 	 guarding those basic blocks which now can change, the VRs might
4948 	 be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
4949 	 are only used within the same bb, it should be not a big deal if
4950 	 we just reset all the VRs in those bbs.  See PR68671.  */
4951       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4952 	reset_flow_sensitive_info_in_bb (bb);
4953     }
4954   return cfg_cleanup_needed;
4955 }
4956 
4957 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4958    of STMT in it's operands.  This is also known as a "destructive
4959    update" operation.  */
4960 
4961 static bool
is_phi_for_stmt(gimple * stmt,tree operand)4962 is_phi_for_stmt (gimple *stmt, tree operand)
4963 {
4964   gimple *def_stmt;
4965   gphi *def_phi;
4966   tree lhs;
4967   use_operand_p arg_p;
4968   ssa_op_iter i;
4969 
4970   if (TREE_CODE (operand) != SSA_NAME)
4971     return false;
4972 
4973   lhs = gimple_assign_lhs (stmt);
4974 
4975   def_stmt = SSA_NAME_DEF_STMT (operand);
4976   def_phi = dyn_cast <gphi *> (def_stmt);
4977   if (!def_phi)
4978     return false;
4979 
4980   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4981     if (lhs == USE_FROM_PTR (arg_p))
4982       return true;
4983   return false;
4984 }
4985 
4986 /* Remove def stmt of VAR if VAR has zero uses and recurse
4987    on rhs1 operand if so.  */
4988 
4989 static void
remove_visited_stmt_chain(tree var)4990 remove_visited_stmt_chain (tree var)
4991 {
4992   gimple *stmt;
4993   gimple_stmt_iterator gsi;
4994 
4995   while (1)
4996     {
4997       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4998 	return;
4999       stmt = SSA_NAME_DEF_STMT (var);
5000       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5001 	{
5002 	  var = gimple_assign_rhs1 (stmt);
5003 	  gsi = gsi_for_stmt (stmt);
5004 	  reassoc_remove_stmt (&gsi);
5005 	  release_defs (stmt);
5006 	}
5007       else
5008 	return;
5009     }
5010 }
5011 
5012 /* This function checks three consequtive operands in
5013    passed operands vector OPS starting from OPINDEX and
5014    swaps two operands if it is profitable for binary operation
5015    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5016 
5017    We pair ops with the same rank if possible.
5018 
5019    The alternative we try is to see if STMT is a destructive
5020    update style statement, which is like:
5021    b = phi (a, ...)
5022    a = c + b;
5023    In that case, we want to use the destructive update form to
5024    expose the possible vectorizer sum reduction opportunity.
5025    In that case, the third operand will be the phi node. This
5026    check is not performed if STMT is null.
5027 
5028    We could, of course, try to be better as noted above, and do a
5029    lot of work to try to find these opportunities in >3 operand
5030    cases, but it is unlikely to be worth it.  */
5031 
5032 static void
swap_ops_for_binary_stmt(vec<operand_entry * > ops,unsigned int opindex,gimple * stmt)5033 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
5034 			  unsigned int opindex, gimple *stmt)
5035 {
5036   operand_entry *oe1, *oe2, *oe3;
5037 
5038   oe1 = ops[opindex];
5039   oe2 = ops[opindex + 1];
5040   oe3 = ops[opindex + 2];
5041 
5042   if ((oe1->rank == oe2->rank
5043        && oe2->rank != oe3->rank)
5044       || (stmt && is_phi_for_stmt (stmt, oe3->op)
5045 	  && !is_phi_for_stmt (stmt, oe1->op)
5046 	  && !is_phi_for_stmt (stmt, oe2->op)))
5047     std::swap (*oe1, *oe3);
5048   else if ((oe1->rank == oe3->rank
5049 	    && oe2->rank != oe3->rank)
5050 	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
5051 	       && !is_phi_for_stmt (stmt, oe1->op)
5052 	       && !is_phi_for_stmt (stmt, oe3->op)))
5053     std::swap (*oe1, *oe2);
5054 }
5055 
5056 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5057    two definitions, otherwise return STMT.  */
5058 
5059 static inline gimple *
find_insert_point(gimple * stmt,tree rhs1,tree rhs2)5060 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
5061 {
5062   if (TREE_CODE (rhs1) == SSA_NAME
5063       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5064     stmt = SSA_NAME_DEF_STMT (rhs1);
5065   if (TREE_CODE (rhs2) == SSA_NAME
5066       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5067     stmt = SSA_NAME_DEF_STMT (rhs2);
5068   return stmt;
5069 }
5070 
5071 /* If the stmt that defines operand has to be inserted, insert it
5072    before the use.  */
5073 static void
insert_stmt_before_use(gimple * stmt,gimple * stmt_to_insert)5074 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5075 {
5076   gcc_assert (is_gimple_assign (stmt_to_insert));
5077   tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5078   tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5079   gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
5080   gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5081   gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5082 
5083   /* If the insert point is not stmt, then insert_point would be
5084      the point where operand rhs1 or rhs2 is defined. In this case,
5085      stmt_to_insert has to be inserted afterwards. This would
5086      only happen when the stmt insertion point is flexible. */
5087   if (stmt == insert_point)
5088     gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5089   else
5090     insert_stmt_after (stmt_to_insert, insert_point);
5091 }
5092 
5093 
5094 /* Recursively rewrite our linearized statements so that the operators
5095    match those in OPS[OPINDEX], putting the computation in rank
5096    order.  Return new lhs.
5097    CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5098    the current stmt and during recursive invocations.
5099    NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5100    recursive invocations.  */
5101 
5102 static tree
rewrite_expr_tree(gimple * stmt,enum tree_code rhs_code,unsigned int opindex,vec<operand_entry * > ops,bool changed,bool next_changed)5103 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5104 		   vec<operand_entry *> ops, bool changed, bool next_changed)
5105 {
5106   tree rhs1 = gimple_assign_rhs1 (stmt);
5107   tree rhs2 = gimple_assign_rhs2 (stmt);
5108   tree lhs = gimple_assign_lhs (stmt);
5109   operand_entry *oe;
5110 
5111   /* The final recursion case for this function is that you have
5112      exactly two operations left.
5113      If we had exactly one op in the entire list to start with, we
5114      would have never called this function, and the tail recursion
5115      rewrites them one at a time.  */
5116   if (opindex + 2 == ops.length ())
5117     {
5118       operand_entry *oe1, *oe2;
5119 
5120       oe1 = ops[opindex];
5121       oe2 = ops[opindex + 1];
5122 
5123       if (rhs1 != oe1->op || rhs2 != oe2->op)
5124 	{
5125 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5126 	  unsigned int uid = gimple_uid (stmt);
5127 
5128 	  if (dump_file && (dump_flags & TDF_DETAILS))
5129 	    {
5130 	      fprintf (dump_file, "Transforming ");
5131 	      print_gimple_stmt (dump_file, stmt, 0);
5132 	    }
5133 
5134 	  /* If the stmt that defines operand has to be inserted, insert it
5135 	     before the use.  */
5136 	  if (oe1->stmt_to_insert)
5137 	    insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5138 	  if (oe2->stmt_to_insert)
5139 	    insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5140 	  /* Even when changed is false, reassociation could have e.g. removed
5141 	     some redundant operations, so unless we are just swapping the
5142 	     arguments or unless there is no change at all (then we just
5143 	     return lhs), force creation of a new SSA_NAME.  */
5144 	  if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5145 	    {
5146 	      gimple *insert_point
5147 		= find_insert_point (stmt, oe1->op, oe2->op);
5148 	      lhs = make_ssa_name (TREE_TYPE (lhs));
5149 	      stmt
5150 		= gimple_build_assign (lhs, rhs_code,
5151 				       oe1->op, oe2->op);
5152 	      gimple_set_uid (stmt, uid);
5153 	      gimple_set_visited (stmt, true);
5154 	      if (insert_point == gsi_stmt (gsi))
5155 		gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5156 	      else
5157 		insert_stmt_after (stmt, insert_point);
5158 	    }
5159 	  else
5160 	    {
5161 	      gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
5162 				   == stmt);
5163 	      gimple_assign_set_rhs1 (stmt, oe1->op);
5164 	      gimple_assign_set_rhs2 (stmt, oe2->op);
5165 	      update_stmt (stmt);
5166 	    }
5167 
5168 	  if (rhs1 != oe1->op && rhs1 != oe2->op)
5169 	    remove_visited_stmt_chain (rhs1);
5170 
5171 	  if (dump_file && (dump_flags & TDF_DETAILS))
5172 	    {
5173 	      fprintf (dump_file, " into ");
5174 	      print_gimple_stmt (dump_file, stmt, 0);
5175 	    }
5176 	}
5177       return lhs;
5178     }
5179 
5180   /* If we hit here, we should have 3 or more ops left.  */
5181   gcc_assert (opindex + 2 < ops.length ());
5182 
5183   /* Rewrite the next operator.  */
5184   oe = ops[opindex];
5185 
5186   /* If the stmt that defines operand has to be inserted, insert it
5187      before the use.  */
5188   if (oe->stmt_to_insert)
5189     insert_stmt_before_use (stmt, oe->stmt_to_insert);
5190 
5191   /* Recurse on the LHS of the binary operator, which is guaranteed to
5192      be the non-leaf side.  */
5193   tree new_rhs1
5194     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5195 			 changed || oe->op != rhs2 || next_changed,
5196 			 false);
5197 
5198   if (oe->op != rhs2 || new_rhs1 != rhs1)
5199     {
5200       if (dump_file && (dump_flags & TDF_DETAILS))
5201 	{
5202 	  fprintf (dump_file, "Transforming ");
5203 	  print_gimple_stmt (dump_file, stmt, 0);
5204 	}
5205 
5206       /* If changed is false, this is either opindex == 0
5207 	 or all outer rhs2's were equal to corresponding oe->op,
5208 	 and powi_result is NULL.
5209 	 That means lhs is equivalent before and after reassociation.
5210 	 Otherwise ensure the old lhs SSA_NAME is not reused and
5211 	 create a new stmt as well, so that any debug stmts will be
5212 	 properly adjusted.  */
5213       if (changed)
5214 	{
5215 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5216 	  unsigned int uid = gimple_uid (stmt);
5217 	  gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
5218 
5219 	  lhs = make_ssa_name (TREE_TYPE (lhs));
5220 	  stmt = gimple_build_assign (lhs, rhs_code,
5221 				      new_rhs1, oe->op);
5222 	  gimple_set_uid (stmt, uid);
5223 	  gimple_set_visited (stmt, true);
5224 	  if (insert_point == gsi_stmt (gsi))
5225 	    gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5226 	  else
5227 	    insert_stmt_after (stmt, insert_point);
5228 	}
5229       else
5230 	{
5231 	  gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
5232 			       == stmt);
5233 	  gimple_assign_set_rhs1 (stmt, new_rhs1);
5234 	  gimple_assign_set_rhs2 (stmt, oe->op);
5235 	  update_stmt (stmt);
5236 	}
5237 
5238       if (dump_file && (dump_flags & TDF_DETAILS))
5239 	{
5240 	  fprintf (dump_file, " into ");
5241 	  print_gimple_stmt (dump_file, stmt, 0);
5242 	}
5243     }
5244   return lhs;
5245 }
5246 
5247 /* Find out how many cycles we need to compute statements chain.
5248    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
5249    maximum number of independent statements we may execute per cycle.  */
5250 
5251 static int
get_required_cycles(int ops_num,int cpu_width)5252 get_required_cycles (int ops_num, int cpu_width)
5253 {
5254   int res;
5255   int elog;
5256   unsigned int rest;
5257 
5258   /* While we have more than 2 * cpu_width operands
5259      we may reduce number of operands by cpu_width
5260      per cycle.  */
5261   res = ops_num / (2 * cpu_width);
5262 
5263   /* Remained operands count may be reduced twice per cycle
5264      until we have only one operand.  */
5265   rest = (unsigned)(ops_num - res * cpu_width);
5266   elog = exact_log2 (rest);
5267   if (elog >= 0)
5268     res += elog;
5269   else
5270     res += floor_log2 (rest) + 1;
5271 
5272   return res;
5273 }
5274 
5275 /* Returns an optimal number of registers to use for computation of
5276    given statements.  */
5277 
5278 static int
get_reassociation_width(int ops_num,enum tree_code opc,machine_mode mode)5279 get_reassociation_width (int ops_num, enum tree_code opc,
5280 			 machine_mode mode)
5281 {
5282   int param_width = param_tree_reassoc_width;
5283   int width;
5284   int width_min;
5285   int cycles_best;
5286 
5287   if (param_width > 0)
5288     width = param_width;
5289   else
5290     width = targetm.sched.reassociation_width (opc, mode);
5291 
5292   if (width == 1)
5293     return width;
5294 
5295   /* Get the minimal time required for sequence computation.  */
5296   cycles_best = get_required_cycles (ops_num, width);
5297 
5298   /* Check if we may use less width and still compute sequence for
5299      the same time.  It will allow us to reduce registers usage.
5300      get_required_cycles is monotonically increasing with lower width
5301      so we can perform a binary search for the minimal width that still
5302      results in the optimal cycle count.  */
5303   width_min = 1;
5304   while (width > width_min)
5305     {
5306       int width_mid = (width + width_min) / 2;
5307 
5308       if (get_required_cycles (ops_num, width_mid) == cycles_best)
5309 	width = width_mid;
5310       else if (width_min < width_mid)
5311 	width_min = width_mid;
5312       else
5313 	break;
5314     }
5315 
5316   return width;
5317 }
5318 
5319 /* Recursively rewrite our linearized statements so that the operators
5320    match those in OPS[OPINDEX], putting the computation in rank
5321    order and trying to allow operations to be executed in
5322    parallel.  */
5323 
5324 static void
rewrite_expr_tree_parallel(gassign * stmt,int width,vec<operand_entry * > ops)5325 rewrite_expr_tree_parallel (gassign *stmt, int width,
5326 			    vec<operand_entry *> ops)
5327 {
5328   enum tree_code opcode = gimple_assign_rhs_code (stmt);
5329   int op_num = ops.length ();
5330   gcc_assert (op_num > 0);
5331   int stmt_num = op_num - 1;
5332   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5333   int op_index = op_num - 1;
5334   int stmt_index = 0;
5335   int ready_stmts_end = 0;
5336   int i = 0;
5337   gimple *stmt1 = NULL, *stmt2 = NULL;
5338   tree last_rhs1 = gimple_assign_rhs1 (stmt);
5339 
5340   /* We start expression rewriting from the top statements.
5341      So, in this loop we create a full list of statements
5342      we will work with.  */
5343   stmts[stmt_num - 1] = stmt;
5344   for (i = stmt_num - 2; i >= 0; i--)
5345     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5346 
5347   for (i = 0; i < stmt_num; i++)
5348     {
5349       tree op1, op2;
5350 
5351       /* Determine whether we should use results of
5352 	 already handled statements or not.  */
5353       if (ready_stmts_end == 0
5354 	  && (i - stmt_index >= width || op_index < 1))
5355 	ready_stmts_end = i;
5356 
5357       /* Now we choose operands for the next statement.  Non zero
5358 	 value in ready_stmts_end means here that we should use
5359 	 the result of already generated statements as new operand.  */
5360       if (ready_stmts_end > 0)
5361 	{
5362 	  op1 = gimple_assign_lhs (stmts[stmt_index++]);
5363 	  if (ready_stmts_end > stmt_index)
5364 	    op2 = gimple_assign_lhs (stmts[stmt_index++]);
5365 	  else if (op_index >= 0)
5366 	    {
5367 	      operand_entry *oe = ops[op_index--];
5368 	      stmt2 = oe->stmt_to_insert;
5369 	      op2 = oe->op;
5370 	    }
5371 	  else
5372 	    {
5373 	      gcc_assert (stmt_index < i);
5374 	      op2 = gimple_assign_lhs (stmts[stmt_index++]);
5375 	    }
5376 
5377 	  if (stmt_index >= ready_stmts_end)
5378 	    ready_stmts_end = 0;
5379 	}
5380       else
5381 	{
5382 	  if (op_index > 1)
5383 	    swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5384 	  operand_entry *oe2 = ops[op_index--];
5385 	  operand_entry *oe1 = ops[op_index--];
5386 	  op2 = oe2->op;
5387 	  stmt2 = oe2->stmt_to_insert;
5388 	  op1 = oe1->op;
5389 	  stmt1 = oe1->stmt_to_insert;
5390 	}
5391 
5392       /* If we emit the last statement then we should put
5393 	 operands into the last statement.  It will also
5394 	 break the loop.  */
5395       if (op_index < 0 && stmt_index == i)
5396 	i = stmt_num - 1;
5397 
5398       if (dump_file && (dump_flags & TDF_DETAILS))
5399 	{
5400 	  fprintf (dump_file, "Transforming ");
5401 	  print_gimple_stmt (dump_file, stmts[i], 0);
5402 	}
5403 
5404       /* If the stmt that defines operand has to be inserted, insert it
5405 	 before the use.  */
5406       if (stmt1)
5407 	insert_stmt_before_use (stmts[i], stmt1);
5408       if (stmt2)
5409 	insert_stmt_before_use (stmts[i], stmt2);
5410       stmt1 = stmt2 = NULL;
5411 
5412       /* We keep original statement only for the last one.  All
5413 	 others are recreated.  */
5414       if (i == stmt_num - 1)
5415 	{
5416 	  gimple_assign_set_rhs1 (stmts[i], op1);
5417 	  gimple_assign_set_rhs2 (stmts[i], op2);
5418 	  update_stmt (stmts[i]);
5419 	}
5420       else
5421 	{
5422 	  stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5423 	  gimple_set_visited (stmts[i], true);
5424 	}
5425       if (dump_file && (dump_flags & TDF_DETAILS))
5426 	{
5427 	  fprintf (dump_file, " into ");
5428 	  print_gimple_stmt (dump_file, stmts[i], 0);
5429 	}
5430     }
5431 
5432   remove_visited_stmt_chain (last_rhs1);
5433 }
5434 
5435 /* Transform STMT, which is really (A +B) + (C + D) into the left
5436    linear form, ((A+B)+C)+D.
5437    Recurse on D if necessary.  */
5438 
5439 static void
linearize_expr(gimple * stmt)5440 linearize_expr (gimple *stmt)
5441 {
5442   gimple_stmt_iterator gsi;
5443   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5444   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5445   gimple *oldbinrhs = binrhs;
5446   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5447   gimple *newbinrhs = NULL;
5448   class loop *loop = loop_containing_stmt (stmt);
5449   tree lhs = gimple_assign_lhs (stmt);
5450 
5451   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5452 	      && is_reassociable_op (binrhs, rhscode, loop));
5453 
5454   gsi = gsi_for_stmt (stmt);
5455 
5456   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5457   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5458 				gimple_assign_rhs_code (binrhs),
5459 				gimple_assign_lhs (binlhs),
5460 				gimple_assign_rhs2 (binrhs));
5461   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5462   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5463   gimple_set_uid (binrhs, gimple_uid (stmt));
5464 
5465   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5466     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5467 
5468   if (dump_file && (dump_flags & TDF_DETAILS))
5469     {
5470       fprintf (dump_file, "Linearized: ");
5471       print_gimple_stmt (dump_file, stmt, 0);
5472     }
5473 
5474   reassociate_stats.linearized++;
5475   update_stmt (stmt);
5476 
5477   gsi = gsi_for_stmt (oldbinrhs);
5478   reassoc_remove_stmt (&gsi);
5479   release_defs (oldbinrhs);
5480 
5481   gimple_set_visited (stmt, true);
5482   gimple_set_visited (binlhs, true);
5483   gimple_set_visited (binrhs, true);
5484 
5485   /* Tail recurse on the new rhs if it still needs reassociation.  */
5486   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5487     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5488 	   want to change the algorithm while converting to tuples.  */
5489     linearize_expr (stmt);
5490 }
5491 
5492 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5493    it.  Otherwise, return NULL.  */
5494 
5495 static gimple *
get_single_immediate_use(tree lhs)5496 get_single_immediate_use (tree lhs)
5497 {
5498   use_operand_p immuse;
5499   gimple *immusestmt;
5500 
5501   if (TREE_CODE (lhs) == SSA_NAME
5502       && single_imm_use (lhs, &immuse, &immusestmt)
5503       && is_gimple_assign (immusestmt))
5504     return immusestmt;
5505 
5506   return NULL;
5507 }
5508 
5509 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5510    representing the negated value.  Insertions of any necessary
5511    instructions go before GSI.
5512    This function is recursive in that, if you hand it "a_5" as the
5513    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5514    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
5515 
5516 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)5517 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5518 {
5519   gimple *negatedefstmt = NULL;
5520   tree resultofnegate;
5521   gimple_stmt_iterator gsi;
5522   unsigned int uid;
5523 
5524   /* If we are trying to negate a name, defined by an add, negate the
5525      add operands instead.  */
5526   if (TREE_CODE (tonegate) == SSA_NAME)
5527     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5528   if (TREE_CODE (tonegate) == SSA_NAME
5529       && is_gimple_assign (negatedefstmt)
5530       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5531       && has_single_use (gimple_assign_lhs (negatedefstmt))
5532       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5533     {
5534       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5535       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5536       tree lhs = gimple_assign_lhs (negatedefstmt);
5537       gimple *g;
5538 
5539       gsi = gsi_for_stmt (negatedefstmt);
5540       rhs1 = negate_value (rhs1, &gsi);
5541 
5542       gsi = gsi_for_stmt (negatedefstmt);
5543       rhs2 = negate_value (rhs2, &gsi);
5544 
5545       gsi = gsi_for_stmt (negatedefstmt);
5546       lhs = make_ssa_name (TREE_TYPE (lhs));
5547       gimple_set_visited (negatedefstmt, true);
5548       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5549       gimple_set_uid (g, gimple_uid (negatedefstmt));
5550       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5551       return lhs;
5552     }
5553 
5554   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5555   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5556 					     NULL_TREE, true, GSI_SAME_STMT);
5557   gsi = *gsip;
5558   uid = gimple_uid (gsi_stmt (gsi));
5559   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5560     {
5561       gimple *stmt = gsi_stmt (gsi);
5562       if (gimple_uid (stmt) != 0)
5563 	break;
5564       gimple_set_uid (stmt, uid);
5565     }
5566   return resultofnegate;
5567 }
5568 
5569 /* Return true if we should break up the subtract in STMT into an add
5570    with negate.  This is true when we the subtract operands are really
5571    adds, or the subtract itself is used in an add expression.  In
5572    either case, breaking up the subtract into an add with negate
5573    exposes the adds to reassociation.  */
5574 
5575 static bool
should_break_up_subtract(gimple * stmt)5576 should_break_up_subtract (gimple *stmt)
5577 {
5578   tree lhs = gimple_assign_lhs (stmt);
5579   tree binlhs = gimple_assign_rhs1 (stmt);
5580   tree binrhs = gimple_assign_rhs2 (stmt);
5581   gimple *immusestmt;
5582   class loop *loop = loop_containing_stmt (stmt);
5583 
5584   if (TREE_CODE (binlhs) == SSA_NAME
5585       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5586     return true;
5587 
5588   if (TREE_CODE (binrhs) == SSA_NAME
5589       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5590     return true;
5591 
5592   if (TREE_CODE (lhs) == SSA_NAME
5593       && (immusestmt = get_single_immediate_use (lhs))
5594       && is_gimple_assign (immusestmt)
5595       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5596 	  || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5597 	      && gimple_assign_rhs1 (immusestmt) == lhs)
5598 	  || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5599     return true;
5600   return false;
5601 }
5602 
5603 /* Transform STMT from A - B into A + -B.  */
5604 
5605 static void
break_up_subtract(gimple * stmt,gimple_stmt_iterator * gsip)5606 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5607 {
5608   tree rhs1 = gimple_assign_rhs1 (stmt);
5609   tree rhs2 = gimple_assign_rhs2 (stmt);
5610 
5611   if (dump_file && (dump_flags & TDF_DETAILS))
5612     {
5613       fprintf (dump_file, "Breaking up subtract ");
5614       print_gimple_stmt (dump_file, stmt, 0);
5615     }
5616 
5617   rhs2 = negate_value (rhs2, gsip);
5618   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5619   update_stmt (stmt);
5620 }
5621 
5622 /* Determine whether STMT is a builtin call that raises an SSA name
5623    to an integer power and has only one use.  If so, and this is early
5624    reassociation and unsafe math optimizations are permitted, place
5625    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5626    If any of these conditions does not hold, return FALSE.  */
5627 
5628 static bool
acceptable_pow_call(gcall * stmt,tree * base,HOST_WIDE_INT * exponent)5629 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5630 {
5631   tree arg1;
5632   REAL_VALUE_TYPE c, cint;
5633 
5634   switch (gimple_call_combined_fn (stmt))
5635     {
5636     CASE_CFN_POW:
5637       if (flag_errno_math)
5638 	return false;
5639 
5640       *base = gimple_call_arg (stmt, 0);
5641       arg1 = gimple_call_arg (stmt, 1);
5642 
5643       if (TREE_CODE (arg1) != REAL_CST)
5644 	return false;
5645 
5646       c = TREE_REAL_CST (arg1);
5647 
5648       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5649 	return false;
5650 
5651       *exponent = real_to_integer (&c);
5652       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5653       if (!real_identical (&c, &cint))
5654 	return false;
5655 
5656       break;
5657 
5658     CASE_CFN_POWI:
5659       *base = gimple_call_arg (stmt, 0);
5660       arg1 = gimple_call_arg (stmt, 1);
5661 
5662       if (!tree_fits_shwi_p (arg1))
5663 	return false;
5664 
5665       *exponent = tree_to_shwi (arg1);
5666       break;
5667 
5668     default:
5669       return false;
5670     }
5671 
5672   /* Expanding negative exponents is generally unproductive, so we don't
5673      complicate matters with those.  Exponents of zero and one should
5674      have been handled by expression folding.  */
5675   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5676     return false;
5677 
5678   return true;
5679 }
5680 
5681 /* Try to derive and add operand entry for OP to *OPS.  Return false if
5682    unsuccessful.  */
5683 
5684 static bool
try_special_add_to_ops(vec<operand_entry * > * ops,enum tree_code code,tree op,gimple * def_stmt)5685 try_special_add_to_ops (vec<operand_entry *> *ops,
5686 			enum tree_code code,
5687 			tree op, gimple* def_stmt)
5688 {
5689   tree base = NULL_TREE;
5690   HOST_WIDE_INT exponent = 0;
5691 
5692   if (TREE_CODE (op) != SSA_NAME
5693       || ! has_single_use (op))
5694     return false;
5695 
5696   if (code == MULT_EXPR
5697       && reassoc_insert_powi_p
5698       && flag_unsafe_math_optimizations
5699       && is_gimple_call (def_stmt)
5700       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5701     {
5702       add_repeat_to_ops_vec (ops, base, exponent);
5703       gimple_set_visited (def_stmt, true);
5704       return true;
5705     }
5706   else if (code == MULT_EXPR
5707 	   && is_gimple_assign (def_stmt)
5708 	   && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5709 	   && !HONOR_SNANS (TREE_TYPE (op))
5710 	   && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5711 	       || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5712     {
5713       tree rhs1 = gimple_assign_rhs1 (def_stmt);
5714       tree cst = build_minus_one_cst (TREE_TYPE (op));
5715       add_to_ops_vec (ops, rhs1);
5716       add_to_ops_vec (ops, cst);
5717       gimple_set_visited (def_stmt, true);
5718       return true;
5719     }
5720 
5721   return false;
5722 }
5723 
5724 /* Recursively linearize a binary expression that is the RHS of STMT.
5725    Place the operands of the expression tree in the vector named OPS.  */
5726 
5727 static void
linearize_expr_tree(vec<operand_entry * > * ops,gimple * stmt,bool is_associative,bool set_visited)5728 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5729 		     bool is_associative, bool set_visited)
5730 {
5731   tree binlhs = gimple_assign_rhs1 (stmt);
5732   tree binrhs = gimple_assign_rhs2 (stmt);
5733   gimple *binlhsdef = NULL, *binrhsdef = NULL;
5734   bool binlhsisreassoc = false;
5735   bool binrhsisreassoc = false;
5736   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5737   class loop *loop = loop_containing_stmt (stmt);
5738 
5739   if (set_visited)
5740     gimple_set_visited (stmt, true);
5741 
5742   if (TREE_CODE (binlhs) == SSA_NAME)
5743     {
5744       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5745       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5746 			 && !stmt_could_throw_p (cfun, binlhsdef));
5747     }
5748 
5749   if (TREE_CODE (binrhs) == SSA_NAME)
5750     {
5751       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5752       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5753 			 && !stmt_could_throw_p (cfun, binrhsdef));
5754     }
5755 
5756   /* If the LHS is not reassociable, but the RHS is, we need to swap
5757      them.  If neither is reassociable, there is nothing we can do, so
5758      just put them in the ops vector.  If the LHS is reassociable,
5759      linearize it.  If both are reassociable, then linearize the RHS
5760      and the LHS.  */
5761 
5762   if (!binlhsisreassoc)
5763     {
5764       /* If this is not a associative operation like division, give up.  */
5765       if (!is_associative)
5766 	{
5767 	  add_to_ops_vec (ops, binrhs);
5768 	  return;
5769 	}
5770 
5771       if (!binrhsisreassoc)
5772 	{
5773 	  bool swap = false;
5774 	  if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5775 	    /* If we add ops for the rhs we expect to be able to recurse
5776 	       to it via the lhs during expression rewrite so swap
5777 	       operands.  */
5778 	    swap = true;
5779 	  else
5780 	    add_to_ops_vec (ops, binrhs);
5781 
5782 	  if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5783 	    add_to_ops_vec (ops, binlhs);
5784 
5785 	  if (!swap)
5786 	    return;
5787 	}
5788 
5789       if (dump_file && (dump_flags & TDF_DETAILS))
5790 	{
5791 	  fprintf (dump_file, "swapping operands of ");
5792 	  print_gimple_stmt (dump_file, stmt, 0);
5793 	}
5794 
5795       swap_ssa_operands (stmt,
5796 			 gimple_assign_rhs1_ptr (stmt),
5797 			 gimple_assign_rhs2_ptr (stmt));
5798       update_stmt (stmt);
5799 
5800       if (dump_file && (dump_flags & TDF_DETAILS))
5801 	{
5802 	  fprintf (dump_file, " is now ");
5803 	  print_gimple_stmt (dump_file, stmt, 0);
5804 	}
5805       if (!binrhsisreassoc)
5806 	return;
5807 
5808       /* We want to make it so the lhs is always the reassociative op,
5809 	 so swap.  */
5810       std::swap (binlhs, binrhs);
5811     }
5812   else if (binrhsisreassoc)
5813     {
5814       linearize_expr (stmt);
5815       binlhs = gimple_assign_rhs1 (stmt);
5816       binrhs = gimple_assign_rhs2 (stmt);
5817     }
5818 
5819   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5820 	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5821 				      rhscode, loop));
5822   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5823 		       is_associative, set_visited);
5824 
5825   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5826     add_to_ops_vec (ops, binrhs);
5827 }
5828 
5829 /* Repropagate the negates back into subtracts, since no other pass
5830    currently does it.  */
5831 
5832 static void
repropagate_negates(void)5833 repropagate_negates (void)
5834 {
5835   unsigned int i = 0;
5836   tree negate;
5837 
5838   FOR_EACH_VEC_ELT (plus_negates, i, negate)
5839     {
5840       gimple *user = get_single_immediate_use (negate);
5841 
5842       if (!user || !is_gimple_assign (user))
5843 	continue;
5844 
5845       /* The negate operand can be either operand of a PLUS_EXPR
5846 	 (it can be the LHS if the RHS is a constant for example).
5847 
5848 	 Force the negate operand to the RHS of the PLUS_EXPR, then
5849 	 transform the PLUS_EXPR into a MINUS_EXPR.  */
5850       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5851 	{
5852 	  /* If the negated operand appears on the LHS of the
5853 	     PLUS_EXPR, exchange the operands of the PLUS_EXPR
5854 	     to force the negated operand to the RHS of the PLUS_EXPR.  */
5855 	  if (gimple_assign_rhs1 (user) == negate)
5856 	    {
5857 	      swap_ssa_operands (user,
5858 				 gimple_assign_rhs1_ptr (user),
5859 				 gimple_assign_rhs2_ptr (user));
5860 	    }
5861 
5862 	  /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5863 	     the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
5864 	  if (gimple_assign_rhs2 (user) == negate)
5865 	    {
5866 	      tree rhs1 = gimple_assign_rhs1 (user);
5867 	      tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5868 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
5869 	      gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5870 	      update_stmt (user);
5871 	    }
5872 	}
5873       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5874 	{
5875 	  if (gimple_assign_rhs1 (user) == negate)
5876 	    {
5877 	      /* We have
5878 	           x = -a
5879 		   y = x - b
5880 		 which we transform into
5881 		   x = a + b
5882 		   y = -x .
5883 		 This pushes down the negate which we possibly can merge
5884 		 into some other operation, hence insert it into the
5885 		 plus_negates vector.  */
5886 	      gimple *feed = SSA_NAME_DEF_STMT (negate);
5887 	      tree a = gimple_assign_rhs1 (feed);
5888 	      tree b = gimple_assign_rhs2 (user);
5889 	      gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5890 	      gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5891 	      tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5892 	      gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5893 	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5894 	      gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5895 	      user = gsi_stmt (gsi2);
5896 	      update_stmt (user);
5897 	      reassoc_remove_stmt (&gsi);
5898 	      release_defs (feed);
5899 	      plus_negates.safe_push (gimple_assign_lhs (user));
5900 	    }
5901 	  else
5902 	    {
5903 	      /* Transform "x = -a; y = b - x" into "y = b + a", getting
5904 	         rid of one operation.  */
5905 	      gimple *feed = SSA_NAME_DEF_STMT (negate);
5906 	      tree a = gimple_assign_rhs1 (feed);
5907 	      tree rhs1 = gimple_assign_rhs1 (user);
5908 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
5909 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5910 	      update_stmt (gsi_stmt (gsi));
5911 	    }
5912 	}
5913     }
5914 }
5915 
5916 /* Break up subtract operations in block BB.
5917 
5918    We do this top down because we don't know whether the subtract is
5919    part of a possible chain of reassociation except at the top.
5920 
5921    IE given
5922    d = f + g
5923    c = a + e
5924    b = c - d
5925    q = b - r
5926    k = t - q
5927 
5928    we want to break up k = t - q, but we won't until we've transformed q
5929    = b - r, which won't be broken up until we transform b = c - d.
5930 
5931    En passant, clear the GIMPLE visited flag on every statement
5932    and set UIDs within each basic block.  */
5933 
5934 static void
break_up_subtract_bb(basic_block bb)5935 break_up_subtract_bb (basic_block bb)
5936 {
5937   gimple_stmt_iterator gsi;
5938   basic_block son;
5939   unsigned int uid = 1;
5940 
5941   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5942     {
5943       gimple *stmt = gsi_stmt (gsi);
5944       gimple_set_visited (stmt, false);
5945       gimple_set_uid (stmt, uid++);
5946 
5947       if (!is_gimple_assign (stmt)
5948 	  || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
5949 	  || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
5950 	continue;
5951 
5952       /* Look for simple gimple subtract operations.  */
5953       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5954 	{
5955 	  if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
5956 	      || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
5957 	    continue;
5958 
5959 	  /* Check for a subtract used only in an addition.  If this
5960 	     is the case, transform it into add of a negate for better
5961 	     reassociation.  IE transform C = A-B into C = A + -B if C
5962 	     is only used in an addition.  */
5963 	  if (should_break_up_subtract (stmt))
5964 	    break_up_subtract (stmt, &gsi);
5965 	}
5966       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5967 	       && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
5968 	plus_negates.safe_push (gimple_assign_lhs (stmt));
5969     }
5970   for (son = first_dom_son (CDI_DOMINATORS, bb);
5971        son;
5972        son = next_dom_son (CDI_DOMINATORS, son))
5973     break_up_subtract_bb (son);
5974 }
5975 
5976 /* Used for repeated factor analysis.  */
5977 struct repeat_factor
5978 {
5979   /* An SSA name that occurs in a multiply chain.  */
5980   tree factor;
5981 
5982   /* Cached rank of the factor.  */
5983   unsigned rank;
5984 
5985   /* Number of occurrences of the factor in the chain.  */
5986   HOST_WIDE_INT count;
5987 
5988   /* An SSA name representing the product of this factor and
5989      all factors appearing later in the repeated factor vector.  */
5990   tree repr;
5991 };
5992 
5993 
5994 static vec<repeat_factor> repeat_factor_vec;
5995 
5996 /* Used for sorting the repeat factor vector.  Sort primarily by
5997    ascending occurrence count, secondarily by descending rank.  */
5998 
5999 static int
compare_repeat_factors(const void * x1,const void * x2)6000 compare_repeat_factors (const void *x1, const void *x2)
6001 {
6002   const repeat_factor *rf1 = (const repeat_factor *) x1;
6003   const repeat_factor *rf2 = (const repeat_factor *) x2;
6004 
6005   if (rf1->count != rf2->count)
6006     return rf1->count - rf2->count;
6007 
6008   return rf2->rank - rf1->rank;
6009 }
6010 
6011 /* Look for repeated operands in OPS in the multiply tree rooted at
6012    STMT.  Replace them with an optimal sequence of multiplies and powi
6013    builtin calls, and remove the used operands from OPS.  Return an
6014    SSA name representing the value of the replacement sequence.  */
6015 
6016 static tree
attempt_builtin_powi(gimple * stmt,vec<operand_entry * > * ops)6017 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6018 {
6019   unsigned i, j, vec_len;
6020   int ii;
6021   operand_entry *oe;
6022   repeat_factor *rf1, *rf2;
6023   repeat_factor rfnew;
6024   tree result = NULL_TREE;
6025   tree target_ssa, iter_result;
6026   tree type = TREE_TYPE (gimple_get_lhs (stmt));
6027   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6028   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6029   gimple *mul_stmt, *pow_stmt;
6030 
6031   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6032      target, unless type is integral.  */
6033   if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6034     return NULL_TREE;
6035 
6036   /* Allocate the repeated factor vector.  */
6037   repeat_factor_vec.create (10);
6038 
6039   /* Scan the OPS vector for all SSA names in the product and build
6040      up a vector of occurrence counts for each factor.  */
6041   FOR_EACH_VEC_ELT (*ops, i, oe)
6042     {
6043       if (TREE_CODE (oe->op) == SSA_NAME)
6044 	{
6045 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6046 	    {
6047 	      if (rf1->factor == oe->op)
6048 		{
6049 		  rf1->count += oe->count;
6050 		  break;
6051 		}
6052 	    }
6053 
6054 	  if (j >= repeat_factor_vec.length ())
6055 	    {
6056 	      rfnew.factor = oe->op;
6057 	      rfnew.rank = oe->rank;
6058 	      rfnew.count = oe->count;
6059 	      rfnew.repr = NULL_TREE;
6060 	      repeat_factor_vec.safe_push (rfnew);
6061 	    }
6062 	}
6063     }
6064 
6065   /* Sort the repeated factor vector by (a) increasing occurrence count,
6066      and (b) decreasing rank.  */
6067   repeat_factor_vec.qsort (compare_repeat_factors);
6068 
6069   /* It is generally best to combine as many base factors as possible
6070      into a product before applying __builtin_powi to the result.
6071      However, the sort order chosen for the repeated factor vector
6072      allows us to cache partial results for the product of the base
6073      factors for subsequent use.  When we already have a cached partial
6074      result from a previous iteration, it is best to make use of it
6075      before looking for another __builtin_pow opportunity.
6076 
6077      As an example, consider x * x * y * y * y * z * z * z * z.
6078      We want to first compose the product x * y * z, raise it to the
6079      second power, then multiply this by y * z, and finally multiply
6080      by z.  This can be done in 5 multiplies provided we cache y * z
6081      for use in both expressions:
6082 
6083         t1 = y * z
6084 	t2 = t1 * x
6085 	t3 = t2 * t2
6086 	t4 = t1 * t3
6087 	result = t4 * z
6088 
6089      If we instead ignored the cached y * z and first multiplied by
6090      the __builtin_pow opportunity z * z, we would get the inferior:
6091 
6092         t1 = y * z
6093 	t2 = t1 * x
6094 	t3 = t2 * t2
6095 	t4 = z * z
6096 	t5 = t3 * t4
6097         result = t5 * y  */
6098 
6099   vec_len = repeat_factor_vec.length ();
6100 
6101   /* Repeatedly look for opportunities to create a builtin_powi call.  */
6102   while (true)
6103     {
6104       HOST_WIDE_INT power;
6105 
6106       /* First look for the largest cached product of factors from
6107 	 preceding iterations.  If found, create a builtin_powi for
6108 	 it if the minimum occurrence count for its factors is at
6109 	 least 2, or just use this cached product as our next
6110 	 multiplicand if the minimum occurrence count is 1.  */
6111       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6112 	{
6113 	  if (rf1->repr && rf1->count > 0)
6114 	    break;
6115 	}
6116 
6117       if (j < vec_len)
6118 	{
6119 	  power = rf1->count;
6120 
6121 	  if (power == 1)
6122 	    {
6123 	      iter_result = rf1->repr;
6124 
6125 	      if (dump_file && (dump_flags & TDF_DETAILS))
6126 		{
6127 		  unsigned elt;
6128 		  repeat_factor *rf;
6129 		  fputs ("Multiplying by cached product ", dump_file);
6130 		  for (elt = j; elt < vec_len; elt++)
6131 		    {
6132 		      rf = &repeat_factor_vec[elt];
6133 		      print_generic_expr (dump_file, rf->factor);
6134 		      if (elt < vec_len - 1)
6135 			fputs (" * ", dump_file);
6136 		    }
6137 		  fputs ("\n", dump_file);
6138 		}
6139 	    }
6140 	  else
6141 	    {
6142 	      if (INTEGRAL_TYPE_P (type))
6143 		{
6144 		  gcc_assert (power > 1);
6145 		  gimple_stmt_iterator gsip = gsi;
6146 		  gsi_prev (&gsip);
6147 		  iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6148 					       rf1->repr, power);
6149 		  gimple_stmt_iterator gsic = gsi;
6150 		  while (gsi_stmt (gsic) != gsi_stmt (gsip))
6151 		    {
6152 		      gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6153 		      gimple_set_visited (gsi_stmt (gsic), true);
6154 		      gsi_prev (&gsic);
6155 		    }
6156 		}
6157 	      else
6158 		{
6159 		  iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6160 		  pow_stmt
6161 		    = gimple_build_call (powi_fndecl, 2, rf1->repr,
6162 					 build_int_cst (integer_type_node,
6163 							power));
6164 		  gimple_call_set_lhs (pow_stmt, iter_result);
6165 		  gimple_set_location (pow_stmt, gimple_location (stmt));
6166 		  gimple_set_uid (pow_stmt, gimple_uid (stmt));
6167 		  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6168 		}
6169 
6170 	      if (dump_file && (dump_flags & TDF_DETAILS))
6171 		{
6172 		  unsigned elt;
6173 		  repeat_factor *rf;
6174 		  fputs ("Building __builtin_pow call for cached product (",
6175 			 dump_file);
6176 		  for (elt = j; elt < vec_len; elt++)
6177 		    {
6178 		      rf = &repeat_factor_vec[elt];
6179 		      print_generic_expr (dump_file, rf->factor);
6180 		      if (elt < vec_len - 1)
6181 			fputs (" * ", dump_file);
6182 		    }
6183 		  fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6184 			   power);
6185 		}
6186 	    }
6187 	}
6188       else
6189 	{
6190 	  /* Otherwise, find the first factor in the repeated factor
6191 	     vector whose occurrence count is at least 2.  If no such
6192 	     factor exists, there are no builtin_powi opportunities
6193 	     remaining.  */
6194 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6195 	    {
6196 	      if (rf1->count >= 2)
6197 		break;
6198 	    }
6199 
6200 	  if (j >= vec_len)
6201 	    break;
6202 
6203 	  power = rf1->count;
6204 
6205 	  if (dump_file && (dump_flags & TDF_DETAILS))
6206 	    {
6207 	      unsigned elt;
6208 	      repeat_factor *rf;
6209 	      fputs ("Building __builtin_pow call for (", dump_file);
6210 	      for (elt = j; elt < vec_len; elt++)
6211 		{
6212 		  rf = &repeat_factor_vec[elt];
6213 		  print_generic_expr (dump_file, rf->factor);
6214 		  if (elt < vec_len - 1)
6215 		    fputs (" * ", dump_file);
6216 		}
6217 	      fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6218 	    }
6219 
6220 	  reassociate_stats.pows_created++;
6221 
6222 	  /* Visit each element of the vector in reverse order (so that
6223 	     high-occurrence elements are visited first, and within the
6224 	     same occurrence count, lower-ranked elements are visited
6225 	     first).  Form a linear product of all elements in this order
6226 	     whose occurrencce count is at least that of element J.
6227 	     Record the SSA name representing the product of each element
6228 	     with all subsequent elements in the vector.  */
6229 	  if (j == vec_len - 1)
6230 	    rf1->repr = rf1->factor;
6231 	  else
6232 	    {
6233 	      for (ii = vec_len - 2; ii >= (int)j; ii--)
6234 		{
6235 		  tree op1, op2;
6236 
6237 		  rf1 = &repeat_factor_vec[ii];
6238 		  rf2 = &repeat_factor_vec[ii + 1];
6239 
6240 		  /* Init the last factor's representative to be itself.  */
6241 		  if (!rf2->repr)
6242 		    rf2->repr = rf2->factor;
6243 
6244 		  op1 = rf1->factor;
6245 		  op2 = rf2->repr;
6246 
6247 		  target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6248 		  mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6249 						  op1, op2);
6250 		  gimple_set_location (mul_stmt, gimple_location (stmt));
6251 		  gimple_set_uid (mul_stmt, gimple_uid (stmt));
6252 		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6253 		  rf1->repr = target_ssa;
6254 
6255 		  /* Don't reprocess the multiply we just introduced.  */
6256 		  gimple_set_visited (mul_stmt, true);
6257 		}
6258 	    }
6259 
6260 	  /* Form a call to __builtin_powi for the maximum product
6261 	     just formed, raised to the power obtained earlier.  */
6262 	  rf1 = &repeat_factor_vec[j];
6263 	  if (INTEGRAL_TYPE_P (type))
6264 	    {
6265 	      gcc_assert (power > 1);
6266 	      gimple_stmt_iterator gsip = gsi;
6267 	      gsi_prev (&gsip);
6268 	      iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6269 					   rf1->repr, power);
6270 	      gimple_stmt_iterator gsic = gsi;
6271 	      while (gsi_stmt (gsic) != gsi_stmt (gsip))
6272 		{
6273 		  gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6274 		  gimple_set_visited (gsi_stmt (gsic), true);
6275 		  gsi_prev (&gsic);
6276 		}
6277 	    }
6278 	  else
6279 	    {
6280 	      iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6281 	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6282 					    build_int_cst (integer_type_node,
6283 							   power));
6284 	      gimple_call_set_lhs (pow_stmt, iter_result);
6285 	      gimple_set_location (pow_stmt, gimple_location (stmt));
6286 	      gimple_set_uid (pow_stmt, gimple_uid (stmt));
6287 	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6288 	    }
6289 	}
6290 
6291       /* If we previously formed at least one other builtin_powi call,
6292 	 form the product of this one and those others.  */
6293       if (result)
6294 	{
6295 	  tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6296 	  mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6297 					  result, iter_result);
6298 	  gimple_set_location (mul_stmt, gimple_location (stmt));
6299 	  gimple_set_uid (mul_stmt, gimple_uid (stmt));
6300 	  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6301 	  gimple_set_visited (mul_stmt, true);
6302 	  result = new_result;
6303 	}
6304       else
6305 	result = iter_result;
6306 
6307       /* Decrement the occurrence count of each element in the product
6308 	 by the count found above, and remove this many copies of each
6309 	 factor from OPS.  */
6310       for (i = j; i < vec_len; i++)
6311 	{
6312 	  unsigned k = power;
6313 	  unsigned n;
6314 
6315 	  rf1 = &repeat_factor_vec[i];
6316 	  rf1->count -= power;
6317 
6318 	  FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6319 	    {
6320 	      if (oe->op == rf1->factor)
6321 		{
6322 		  if (oe->count <= k)
6323 		    {
6324 		      ops->ordered_remove (n);
6325 		      k -= oe->count;
6326 
6327 		      if (k == 0)
6328 			break;
6329 		    }
6330 		  else
6331 		    {
6332 		      oe->count -= k;
6333 		      break;
6334 		    }
6335 		}
6336 	    }
6337 	}
6338     }
6339 
6340   /* At this point all elements in the repeated factor vector have a
6341      remaining occurrence count of 0 or 1, and those with a count of 1
6342      don't have cached representatives.  Re-sort the ops vector and
6343      clean up.  */
6344   ops->qsort (sort_by_operand_rank);
6345   repeat_factor_vec.release ();
6346 
6347   /* Return the final product computed herein.  Note that there may
6348      still be some elements with single occurrence count left in OPS;
6349      those will be handled by the normal reassociation logic.  */
6350   return result;
6351 }
6352 
6353 /* Attempt to optimize
6354    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6355    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
6356 
6357 static void
attempt_builtin_copysign(vec<operand_entry * > * ops)6358 attempt_builtin_copysign (vec<operand_entry *> *ops)
6359 {
6360   operand_entry *oe;
6361   unsigned int i;
6362   unsigned int length = ops->length ();
6363   tree cst = ops->last ()->op;
6364 
6365   if (length == 1 || TREE_CODE (cst) != REAL_CST)
6366     return;
6367 
6368   FOR_EACH_VEC_ELT (*ops, i, oe)
6369     {
6370       if (TREE_CODE (oe->op) == SSA_NAME
6371 	  && has_single_use (oe->op))
6372 	{
6373 	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6374 	  if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6375 	    {
6376 	      tree arg0, arg1;
6377 	      switch (gimple_call_combined_fn (old_call))
6378 		{
6379 		CASE_CFN_COPYSIGN:
6380 		CASE_CFN_COPYSIGN_FN:
6381 		  arg0 = gimple_call_arg (old_call, 0);
6382 		  arg1 = gimple_call_arg (old_call, 1);
6383 		  /* The first argument of copysign must be a constant,
6384 		     otherwise there's nothing to do.  */
6385 		  if (TREE_CODE (arg0) == REAL_CST)
6386 		    {
6387 		      tree type = TREE_TYPE (arg0);
6388 		      tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6389 		      /* If we couldn't fold to a single constant, skip it.
6390 			 That happens e.g. for inexact multiplication when
6391 			 -frounding-math.  */
6392 		      if (mul == NULL_TREE)
6393 			break;
6394 		      /* Instead of adjusting OLD_CALL, let's build a new
6395 			 call to not leak the LHS and prevent keeping bogus
6396 			 debug statements.  DCE will clean up the old call.  */
6397 		      gcall *new_call;
6398 		      if (gimple_call_internal_p (old_call))
6399 			new_call = gimple_build_call_internal
6400 			  (IFN_COPYSIGN, 2, mul, arg1);
6401 		      else
6402 			new_call = gimple_build_call
6403 			  (gimple_call_fndecl (old_call), 2, mul, arg1);
6404 		      tree lhs = make_ssa_name (type);
6405 		      gimple_call_set_lhs (new_call, lhs);
6406 		      gimple_set_location (new_call,
6407 					   gimple_location (old_call));
6408 		      insert_stmt_after (new_call, old_call);
6409 		      /* We've used the constant, get rid of it.  */
6410 		      ops->pop ();
6411 		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6412 		      /* Handle the CST1 < 0 case by negating the result.  */
6413 		      if (cst1_neg)
6414 			{
6415 			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6416 			  gimple *negate_stmt
6417 			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6418 			  insert_stmt_after (negate_stmt, new_call);
6419 			  oe->op = negrhs;
6420 			}
6421 		      else
6422 			oe->op = lhs;
6423 		      if (dump_file && (dump_flags & TDF_DETAILS))
6424 			{
6425 			  fprintf (dump_file, "Optimizing copysign: ");
6426 			  print_generic_expr (dump_file, cst);
6427 			  fprintf (dump_file, " * COPYSIGN (");
6428 			  print_generic_expr (dump_file, arg0);
6429 			  fprintf (dump_file, ", ");
6430 			  print_generic_expr (dump_file, arg1);
6431 			  fprintf (dump_file, ") into %sCOPYSIGN (",
6432 				   cst1_neg ? "-" : "");
6433 			  print_generic_expr (dump_file, mul);
6434 			  fprintf (dump_file, ", ");
6435 			  print_generic_expr (dump_file, arg1);
6436 			  fprintf (dump_file, "\n");
6437 			}
6438 		      return;
6439 		    }
6440 		  break;
6441 		default:
6442 		  break;
6443 		}
6444 	    }
6445 	}
6446     }
6447 }
6448 
6449 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
6450 
6451 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple * stmt,tree new_rhs)6452 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6453 {
6454   tree rhs1;
6455 
6456   if (dump_file && (dump_flags & TDF_DETAILS))
6457     {
6458       fprintf (dump_file, "Transforming ");
6459       print_gimple_stmt (dump_file, stmt, 0);
6460     }
6461 
6462   rhs1 = gimple_assign_rhs1 (stmt);
6463   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6464   update_stmt (stmt);
6465   remove_visited_stmt_chain (rhs1);
6466 
6467   if (dump_file && (dump_flags & TDF_DETAILS))
6468     {
6469       fprintf (dump_file, " into ");
6470       print_gimple_stmt (dump_file, stmt, 0);
6471     }
6472 }
6473 
6474 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
6475 
6476 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple * stmt,tree rhs1,tree rhs2)6477 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6478 			    tree rhs1, tree rhs2)
6479 {
6480   if (dump_file && (dump_flags & TDF_DETAILS))
6481     {
6482       fprintf (dump_file, "Transforming ");
6483       print_gimple_stmt (dump_file, stmt, 0);
6484     }
6485 
6486   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6487   update_stmt (gsi_stmt (*gsi));
6488   remove_visited_stmt_chain (rhs1);
6489 
6490   if (dump_file && (dump_flags & TDF_DETAILS))
6491     {
6492       fprintf (dump_file, " into ");
6493       print_gimple_stmt (dump_file, stmt, 0);
6494     }
6495 }
6496 
6497 /* Reassociate expressions in basic block BB and its post-dominator as
6498    children.
6499 
6500    Bubble up return status from maybe_optimize_range_tests.  */
6501 
6502 static bool
reassociate_bb(basic_block bb)6503 reassociate_bb (basic_block bb)
6504 {
6505   gimple_stmt_iterator gsi;
6506   basic_block son;
6507   gimple *stmt = last_stmt (bb);
6508   bool cfg_cleanup_needed = false;
6509 
6510   if (stmt && !gimple_visited_p (stmt))
6511     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6512 
6513   bool do_prev = false;
6514   for (gsi = gsi_last_bb (bb);
6515        !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6516     {
6517       do_prev = true;
6518       stmt = gsi_stmt (gsi);
6519 
6520       if (is_gimple_assign (stmt)
6521 	  && !stmt_could_throw_p (cfun, stmt))
6522 	{
6523 	  tree lhs, rhs1, rhs2;
6524 	  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6525 
6526 	  /* If this was part of an already processed statement,
6527 	     we don't need to touch it again. */
6528 	  if (gimple_visited_p (stmt))
6529 	    {
6530 	      /* This statement might have become dead because of previous
6531 		 reassociations.  */
6532 	      if (has_zero_uses (gimple_get_lhs (stmt)))
6533 		{
6534 		  reassoc_remove_stmt (&gsi);
6535 		  release_defs (stmt);
6536 		  /* We might end up removing the last stmt above which
6537 		     places the iterator to the end of the sequence.
6538 		     Reset it to the last stmt in this case and make sure
6539 		     we don't do gsi_prev in that case.  */
6540 		  if (gsi_end_p (gsi))
6541 		    {
6542 		      gsi = gsi_last_bb (bb);
6543 		      do_prev = false;
6544 		    }
6545 		}
6546 	      continue;
6547 	    }
6548 
6549 	  /* If this is not a gimple binary expression, there is
6550 	     nothing for us to do with it.  */
6551 	  if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6552 	    continue;
6553 
6554 	  lhs = gimple_assign_lhs (stmt);
6555 	  rhs1 = gimple_assign_rhs1 (stmt);
6556 	  rhs2 = gimple_assign_rhs2 (stmt);
6557 
6558 	  /* For non-bit or min/max operations we can't associate
6559 	     all types.  Verify that here.  */
6560 	  if ((rhs_code != BIT_IOR_EXPR
6561 	       && rhs_code != BIT_AND_EXPR
6562 	       && rhs_code != BIT_XOR_EXPR
6563 	       && rhs_code != MIN_EXPR
6564 	       && rhs_code != MAX_EXPR
6565 	       && !can_reassociate_type_p (TREE_TYPE (lhs)))
6566 	      || !can_reassociate_op_p (rhs1)
6567 	      || !can_reassociate_op_p (rhs2))
6568 	    continue;
6569 
6570 	  if (associative_tree_code (rhs_code))
6571 	    {
6572 	      auto_vec<operand_entry *> ops;
6573 	      tree powi_result = NULL_TREE;
6574 	      bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6575 
6576 	      /* There may be no immediate uses left by the time we
6577 		 get here because we may have eliminated them all.  */
6578 	      if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6579 		continue;
6580 
6581 	      gimple_set_visited (stmt, true);
6582 	      linearize_expr_tree (&ops, stmt, true, true);
6583 	      ops.qsort (sort_by_operand_rank);
6584 	      int orig_len = ops.length ();
6585 	      optimize_ops_list (rhs_code, &ops);
6586 	      if (undistribute_ops_list (rhs_code, &ops,
6587 					 loop_containing_stmt (stmt)))
6588 		{
6589 		  ops.qsort (sort_by_operand_rank);
6590 		  optimize_ops_list (rhs_code, &ops);
6591 		}
6592 	      if (undistribute_bitref_for_vector (rhs_code, &ops,
6593 						  loop_containing_stmt (stmt)))
6594 		{
6595 		  ops.qsort (sort_by_operand_rank);
6596 		  optimize_ops_list (rhs_code, &ops);
6597 		}
6598 	      if (rhs_code == PLUS_EXPR
6599 		  && transform_add_to_multiply (&ops))
6600 		ops.qsort (sort_by_operand_rank);
6601 
6602 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6603 		{
6604 		  if (is_vector)
6605 		    optimize_vec_cond_expr (rhs_code, &ops);
6606 		  else
6607 		    optimize_range_tests (rhs_code, &ops, NULL);
6608 	        }
6609 
6610 	      if (rhs_code == MULT_EXPR && !is_vector)
6611 	        {
6612 		  attempt_builtin_copysign (&ops);
6613 
6614 		  if (reassoc_insert_powi_p
6615 		      && (flag_unsafe_math_optimizations
6616 			  || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
6617 		    powi_result = attempt_builtin_powi (stmt, &ops);
6618 		}
6619 
6620 	      operand_entry *last;
6621 	      bool negate_result = false;
6622 	      if (ops.length () > 1
6623 		  && rhs_code == MULT_EXPR)
6624 		{
6625 		  last = ops.last ();
6626 		  if ((integer_minus_onep (last->op)
6627 		       || real_minus_onep (last->op))
6628 		      && !HONOR_SNANS (TREE_TYPE (lhs))
6629 		      && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6630 			  || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6631 		    {
6632 		      ops.pop ();
6633 		      negate_result = true;
6634 		    }
6635 		}
6636 
6637 	      tree new_lhs = lhs;
6638 	      /* If the operand vector is now empty, all operands were
6639 		 consumed by the __builtin_powi optimization.  */
6640 	      if (ops.length () == 0)
6641 		transform_stmt_to_copy (&gsi, stmt, powi_result);
6642 	      else if (ops.length () == 1)
6643 		{
6644 		  tree last_op = ops.last ()->op;
6645 
6646 		  /* If the stmt that defines operand has to be inserted, insert it
6647 		     before the use.  */
6648 		  if (ops.last ()->stmt_to_insert)
6649 		    insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6650 		  if (powi_result)
6651 		    transform_stmt_to_multiply (&gsi, stmt, last_op,
6652 						powi_result);
6653 		  else
6654 		    transform_stmt_to_copy (&gsi, stmt, last_op);
6655 		}
6656 	      else
6657 		{
6658 		  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6659 		  int ops_num = ops.length ();
6660 		  int width;
6661 
6662 		  /* For binary bit operations, if there are at least 3
6663 		     operands and the last operand in OPS is a constant,
6664 		     move it to the front.  This helps ensure that we generate
6665 		     (X & Y) & C rather than (X & C) & Y.  The former will
6666 		     often match a canonical bit test when we get to RTL.  */
6667 		  if (ops.length () > 2
6668 		      && (rhs_code == BIT_AND_EXPR
6669 		          || rhs_code == BIT_IOR_EXPR
6670 		          || rhs_code == BIT_XOR_EXPR)
6671 		      && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6672 		    std::swap (*ops[0], *ops[ops_num - 1]);
6673 
6674 		  /* Only rewrite the expression tree to parallel in the
6675 		     last reassoc pass to avoid useless work back-and-forth
6676 		     with initial linearization.  */
6677 		  if (!reassoc_insert_powi_p
6678 		      && ops.length () > 3
6679 		      && (width = get_reassociation_width (ops_num, rhs_code,
6680 							   mode)) > 1)
6681 		    {
6682 		      if (dump_file && (dump_flags & TDF_DETAILS))
6683 			fprintf (dump_file,
6684 				 "Width = %d was chosen for reassociation\n",
6685 				 width);
6686 		      rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6687 						  width, ops);
6688 		    }
6689 		  else
6690                     {
6691                       /* When there are three operands left, we want
6692                          to make sure the ones that get the double
6693                          binary op are chosen wisely.  */
6694                       int len = ops.length ();
6695                       if (len >= 3)
6696                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
6697 
6698 		      new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6699 						   powi_result != NULL
6700 						   || negate_result,
6701 						   len != orig_len);
6702                     }
6703 
6704 		  /* If we combined some repeated factors into a
6705 		     __builtin_powi call, multiply that result by the
6706 		     reassociated operands.  */
6707 		  if (powi_result)
6708 		    {
6709 		      gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6710 		      tree type = TREE_TYPE (lhs);
6711 		      tree target_ssa = make_temp_ssa_name (type, NULL,
6712 							    "reassocpow");
6713 		      gimple_set_lhs (lhs_stmt, target_ssa);
6714 		      update_stmt (lhs_stmt);
6715 		      if (lhs != new_lhs)
6716 			{
6717 			  target_ssa = new_lhs;
6718 			  new_lhs = lhs;
6719 			}
6720 		      mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6721 						      powi_result, target_ssa);
6722 		      gimple_set_location (mul_stmt, gimple_location (stmt));
6723 		      gimple_set_uid (mul_stmt, gimple_uid (stmt));
6724 		      gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6725 		    }
6726 		}
6727 
6728 	      if (negate_result)
6729 		{
6730 		  stmt = SSA_NAME_DEF_STMT (lhs);
6731 		  tree tmp = make_ssa_name (TREE_TYPE (lhs));
6732 		  gimple_set_lhs (stmt, tmp);
6733 		  if (lhs != new_lhs)
6734 		    tmp = new_lhs;
6735 		  gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6736 							   tmp);
6737 		  gimple_set_uid (neg_stmt, gimple_uid (stmt));
6738 		  gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6739 		  update_stmt (stmt);
6740 		}
6741 	    }
6742 	}
6743     }
6744   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6745        son;
6746        son = next_dom_son (CDI_POST_DOMINATORS, son))
6747     cfg_cleanup_needed |= reassociate_bb (son);
6748 
6749   return cfg_cleanup_needed;
6750 }
6751 
6752 /* Add jumps around shifts for range tests turned into bit tests.
6753    For each SSA_NAME VAR we have code like:
6754    VAR = ...; // final stmt of range comparison
6755    // bit test here...;
6756    OTHERVAR = ...; // final stmt of the bit test sequence
6757    RES = VAR | OTHERVAR;
6758    Turn the above into:
6759    VAR = ...;
6760    if (VAR != 0)
6761      goto <l3>;
6762    else
6763      goto <l2>;
6764    <l2>:
6765    // bit test here...;
6766    OTHERVAR = ...;
6767    <l3>:
6768    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
6769 
6770 static void
branch_fixup(void)6771 branch_fixup (void)
6772 {
6773   tree var;
6774   unsigned int i;
6775 
6776   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6777     {
6778       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6779       gimple *use_stmt;
6780       use_operand_p use;
6781       bool ok = single_imm_use (var, &use, &use_stmt);
6782       gcc_assert (ok
6783 		  && is_gimple_assign (use_stmt)
6784 		  && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6785 		  && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6786 
6787       basic_block cond_bb = gimple_bb (def_stmt);
6788       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6789       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6790 
6791       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6792       gimple *g = gimple_build_cond (NE_EXPR, var,
6793 				     build_zero_cst (TREE_TYPE (var)),
6794 				     NULL_TREE, NULL_TREE);
6795       location_t loc = gimple_location (use_stmt);
6796       gimple_set_location (g, loc);
6797       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6798 
6799       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6800       etrue->probability = profile_probability::even ();
6801       edge efalse = find_edge (cond_bb, then_bb);
6802       efalse->flags = EDGE_FALSE_VALUE;
6803       efalse->probability -= etrue->probability;
6804       then_bb->count -= etrue->count ();
6805 
6806       tree othervar = NULL_TREE;
6807       if (gimple_assign_rhs1 (use_stmt) == var)
6808 	othervar = gimple_assign_rhs2 (use_stmt);
6809       else if (gimple_assign_rhs2 (use_stmt) == var)
6810 	othervar = gimple_assign_rhs1 (use_stmt);
6811       else
6812 	gcc_unreachable ();
6813       tree lhs = gimple_assign_lhs (use_stmt);
6814       gphi *phi = create_phi_node (lhs, merge_bb);
6815       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6816       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6817       gsi = gsi_for_stmt (use_stmt);
6818       gsi_remove (&gsi, true);
6819 
6820       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6821       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6822     }
6823   reassoc_branch_fixups.release ();
6824 }
6825 
6826 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6827 void debug_ops_vector (vec<operand_entry *> ops);
6828 
6829 /* Dump the operand entry vector OPS to FILE.  */
6830 
6831 void
dump_ops_vector(FILE * file,vec<operand_entry * > ops)6832 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6833 {
6834   operand_entry *oe;
6835   unsigned int i;
6836 
6837   FOR_EACH_VEC_ELT (ops, i, oe)
6838     {
6839       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6840       print_generic_expr (file, oe->op);
6841       fprintf (file, "\n");
6842     }
6843 }
6844 
6845 /* Dump the operand entry vector OPS to STDERR.  */
6846 
6847 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry * > ops)6848 debug_ops_vector (vec<operand_entry *> ops)
6849 {
6850   dump_ops_vector (stderr, ops);
6851 }
6852 
6853 /* Bubble up return status from reassociate_bb.  */
6854 
6855 static bool
do_reassoc(void)6856 do_reassoc (void)
6857 {
6858   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6859   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6860 }
6861 
6862 /* Initialize the reassociation pass.  */
6863 
6864 static void
init_reassoc(void)6865 init_reassoc (void)
6866 {
6867   int i;
6868   int64_t rank = 2;
6869   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6870 
6871   /* Find the loops, so that we can prevent moving calculations in
6872      them.  */
6873   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6874 
6875   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6876 
6877   next_operand_entry_id = 0;
6878 
6879   /* Reverse RPO (Reverse Post Order) will give us something where
6880      deeper loops come later.  */
6881   pre_and_rev_post_order_compute (NULL, bbs, false);
6882   bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
6883   operand_rank = new hash_map<tree, int64_t>;
6884 
6885   /* Give each default definition a distinct rank.  This includes
6886      parameters and the static chain.  Walk backwards over all
6887      SSA names so that we get proper rank ordering according
6888      to tree_swap_operands_p.  */
6889   for (i = num_ssa_names - 1; i > 0; --i)
6890     {
6891       tree name = ssa_name (i);
6892       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6893 	insert_operand_rank (name, ++rank);
6894     }
6895 
6896   /* Set up rank for each BB  */
6897   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6898     bb_rank[bbs[i]] = ++rank << 16;
6899 
6900   free (bbs);
6901   calculate_dominance_info (CDI_POST_DOMINATORS);
6902   plus_negates = vNULL;
6903 }
6904 
6905 /* Cleanup after the reassociation pass, and print stats if
6906    requested.  */
6907 
6908 static void
fini_reassoc(void)6909 fini_reassoc (void)
6910 {
6911   statistics_counter_event (cfun, "Linearized",
6912 			    reassociate_stats.linearized);
6913   statistics_counter_event (cfun, "Constants eliminated",
6914 			    reassociate_stats.constants_eliminated);
6915   statistics_counter_event (cfun, "Ops eliminated",
6916 			    reassociate_stats.ops_eliminated);
6917   statistics_counter_event (cfun, "Statements rewritten",
6918 			    reassociate_stats.rewritten);
6919   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6920 			    reassociate_stats.pows_encountered);
6921   statistics_counter_event (cfun, "Built-in powi calls created",
6922 			    reassociate_stats.pows_created);
6923 
6924   delete operand_rank;
6925   operand_entry_pool.release ();
6926   free (bb_rank);
6927   plus_negates.release ();
6928   free_dominance_info (CDI_POST_DOMINATORS);
6929   loop_optimizer_finalize ();
6930 }
6931 
6932 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
6933    insertion of __builtin_powi calls.
6934 
6935    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6936    optimization of a gimple conditional.  Otherwise returns zero.  */
6937 
6938 static unsigned int
execute_reassoc(bool insert_powi_p)6939 execute_reassoc (bool insert_powi_p)
6940 {
6941   reassoc_insert_powi_p = insert_powi_p;
6942 
6943   init_reassoc ();
6944 
6945   bool cfg_cleanup_needed;
6946   cfg_cleanup_needed = do_reassoc ();
6947   repropagate_negates ();
6948   branch_fixup ();
6949 
6950   fini_reassoc ();
6951   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6952 }
6953 
6954 namespace {
6955 
6956 const pass_data pass_data_reassoc =
6957 {
6958   GIMPLE_PASS, /* type */
6959   "reassoc", /* name */
6960   OPTGROUP_NONE, /* optinfo_flags */
6961   TV_TREE_REASSOC, /* tv_id */
6962   ( PROP_cfg | PROP_ssa ), /* properties_required */
6963   0, /* properties_provided */
6964   0, /* properties_destroyed */
6965   0, /* todo_flags_start */
6966   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6967 };
6968 
6969 class pass_reassoc : public gimple_opt_pass
6970 {
6971 public:
pass_reassoc(gcc::context * ctxt)6972   pass_reassoc (gcc::context *ctxt)
6973     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6974   {}
6975 
6976   /* opt_pass methods: */
clone()6977   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
set_pass_param(unsigned int n,bool param)6978   void set_pass_param (unsigned int n, bool param)
6979     {
6980       gcc_assert (n == 0);
6981       insert_powi_p = param;
6982     }
gate(function *)6983   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
execute(function *)6984   virtual unsigned int execute (function *)
6985     { return execute_reassoc (insert_powi_p); }
6986 
6987  private:
6988   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
6989      point 3a in the pass header comment.  */
6990   bool insert_powi_p;
6991 }; // class pass_reassoc
6992 
6993 } // anon namespace
6994 
6995 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)6996 make_pass_reassoc (gcc::context *ctxt)
6997 {
6998   return new pass_reassoc (ctxt);
6999 }
7000