1 /* Reassociation for trees.
2    Copyright (C) 2005-2014 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dan@dberlin.org>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
33 #include "pointer-set.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimple-expr.h"
39 #include "is-a.h"
40 #include "gimple.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
44 #include "tree-cfg.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
51 #include "expr.h"
52 #include "tree-dfa.h"
53 #include "tree-ssa.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
58 #include "cfgloop.h"
59 #include "flags.h"
60 #include "target.h"
61 #include "params.h"
62 #include "diagnostic-core.h"
63 
64 /*  This is a simple global reassociation pass.  It is, in part, based
65     on the LLVM pass of the same name (They do some things more/less
66     than we do, in different orders, etc).
67 
68     It consists of five steps:
69 
70     1. Breaking up subtract operations into addition + negate, where
71     it would promote the reassociation of adds.
72 
73     2. Left linearization of the expression trees, so that (A+B)+(C+D)
74     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
75     During linearization, we place the operands of the binary
76     expressions into a vector of operand_entry_t
77 
78     3. Optimization of the operand lists, eliminating things like a +
79     -a, a & a, etc.
80 
81     3a. Combine repeated factors with the same occurrence counts
82     into a __builtin_powi call that will later be optimized into
83     an optimal number of multiplies.
84 
85     4. Rewrite the expression trees we linearized and optimized so
86     they are in proper rank order.
87 
88     5. Repropagate negates, as nothing else will clean it up ATM.
89 
90     A bit of theory on #4, since nobody seems to write anything down
91     about why it makes sense to do it the way they do it:
92 
93     We could do this much nicer theoretically, but don't (for reasons
94     explained after how to do it theoretically nice :P).
95 
96     In order to promote the most redundancy elimination, you want
97     binary expressions whose operands are the same rank (or
98     preferably, the same value) exposed to the redundancy eliminator,
99     for possible elimination.
100 
101     So the way to do this if we really cared, is to build the new op
102     tree from the leaves to the roots, merging as you go, and putting the
103     new op on the end of the worklist, until you are left with one
104     thing on the worklist.
105 
106     IE if you have to rewrite the following set of operands (listed with
107     rank in parentheses), with opcode PLUS_EXPR:
108 
109     a (1),  b (1),  c (1),  d (2), e (2)
110 
111 
112     We start with our merge worklist empty, and the ops list with all of
113     those on it.
114 
115     You want to first merge all leaves of the same rank, as much as
116     possible.
117 
118     So first build a binary op of
119 
120     mergetmp = a + b, and put "mergetmp" on the merge worklist.
121 
122     Because there is no three operand form of PLUS_EXPR, c is not going to
123     be exposed to redundancy elimination as a rank 1 operand.
124 
125     So you might as well throw it on the merge worklist (you could also
126     consider it to now be a rank two operand, and merge it with d and e,
127     but in this case, you then have evicted e from a binary op. So at
128     least in this situation, you can't win.)
129 
130     Then build a binary op of d + e
131     mergetmp2 = d + e
132 
133     and put mergetmp2 on the merge worklist.
134 
135     so merge worklist = {mergetmp, c, mergetmp2}
136 
137     Continue building binary ops of these operations until you have only
138     one operation left on the worklist.
139 
140     So we have
141 
142     build binary op
143     mergetmp3 = mergetmp + c
144 
145     worklist = {mergetmp2, mergetmp3}
146 
147     mergetmp4 = mergetmp2 + mergetmp3
148 
149     worklist = {mergetmp4}
150 
151     because we have one operation left, we can now just set the original
152     statement equal to the result of that operation.
153 
154     This will at least expose a + b  and d + e to redundancy elimination
155     as binary operations.
156 
157     For extra points, you can reuse the old statements to build the
158     mergetmps, since you shouldn't run out.
159 
160     So why don't we do this?
161 
162     Because it's expensive, and rarely will help.  Most trees we are
163     reassociating have 3 or less ops.  If they have 2 ops, they already
164     will be written into a nice single binary op.  If you have 3 ops, a
165     single simple check suffices to tell you whether the first two are of the
166     same rank.  If so, you know to order it
167 
168     mergetmp = op1 + op2
169     newstmt = mergetmp + op3
170 
171     instead of
172     mergetmp = op2 + op3
173     newstmt = mergetmp + op1
174 
175     If all three are of the same rank, you can't expose them all in a
176     single binary operator anyway, so the above is *still* the best you
177     can do.
178 
179     Thus, this is what we do.  When we have three ops left, we check to see
180     what order to put them in, and call it a day.  As a nod to vector sum
181     reduction, we check if any of the ops are really a phi node that is a
182     destructive update for the associating op, and keep the destructive
183     update together for vector sum reduction recognition.  */
184 
185 
186 /* Statistics */
187 static struct
188 {
189   int linearized;
190   int constants_eliminated;
191   int ops_eliminated;
192   int rewritten;
193   int pows_encountered;
194   int pows_created;
195 } reassociate_stats;
196 
197 /* Operator, rank pair.  */
198 typedef struct operand_entry
199 {
200   unsigned int rank;
201   int id;
202   tree op;
203   unsigned int count;
204 } *operand_entry_t;
205 
206 static alloc_pool operand_entry_pool;
207 
208 /* This is used to assign a unique ID to each struct operand_entry
209    so that qsort results are identical on different hosts.  */
210 static int next_operand_entry_id;
211 
212 /* Starting rank number for a given basic block, so that we can rank
213    operations using unmovable instructions in that BB based on the bb
214    depth.  */
215 static long *bb_rank;
216 
217 /* Operand->rank hashtable.  */
218 static struct pointer_map_t *operand_rank;
219 
220 /* Forward decls.  */
221 static long get_rank (tree);
222 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
223 
224 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
225    possibly added by gsi_remove.  */
226 
227 bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)228 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
229 {
230   gimple stmt = gsi_stmt (*gsi);
231 
232   if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
233     return gsi_remove (gsi, true);
234 
235   gimple_stmt_iterator prev = *gsi;
236   gsi_prev (&prev);
237   unsigned uid = gimple_uid (stmt);
238   basic_block bb = gimple_bb (stmt);
239   bool ret = gsi_remove (gsi, true);
240   if (!gsi_end_p (prev))
241     gsi_next (&prev);
242   else
243     prev = gsi_start_bb (bb);
244   gimple end_stmt = gsi_stmt (*gsi);
245   while ((stmt = gsi_stmt (prev)) != end_stmt)
246     {
247       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
248       gimple_set_uid (stmt, uid);
249       gsi_next (&prev);
250     }
251   return ret;
252 }
253 
254 /* Bias amount for loop-carried phis.  We want this to be larger than
255    the depth of any reassociation tree we can see, but not larger than
256    the rank difference between two blocks.  */
257 #define PHI_LOOP_BIAS (1 << 15)
258 
259 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
260    an innermost loop, and the phi has only a single use which is inside
261    the loop, then the rank is the block rank of the loop latch plus an
262    extra bias for the loop-carried dependence.  This causes expressions
263    calculated into an accumulator variable to be independent for each
264    iteration of the loop.  If STMT is some other phi, the rank is the
265    block rank of its containing block.  */
266 static long
phi_rank(gimple stmt)267 phi_rank (gimple stmt)
268 {
269   basic_block bb = gimple_bb (stmt);
270   struct loop *father = bb->loop_father;
271   tree res;
272   unsigned i;
273   use_operand_p use;
274   gimple use_stmt;
275 
276   /* We only care about real loops (those with a latch).  */
277   if (!father->latch)
278     return bb_rank[bb->index];
279 
280   /* Interesting phis must be in headers of innermost loops.  */
281   if (bb != father->header
282       || father->inner)
283     return bb_rank[bb->index];
284 
285   /* Ignore virtual SSA_NAMEs.  */
286   res = gimple_phi_result (stmt);
287   if (virtual_operand_p (res))
288     return bb_rank[bb->index];
289 
290   /* The phi definition must have a single use, and that use must be
291      within the loop.  Otherwise this isn't an accumulator pattern.  */
292   if (!single_imm_use (res, &use, &use_stmt)
293       || gimple_bb (use_stmt)->loop_father != father)
294     return bb_rank[bb->index];
295 
296   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
297   for (i = 0; i < gimple_phi_num_args (stmt); i++)
298     {
299       tree arg = gimple_phi_arg_def (stmt, i);
300       if (TREE_CODE (arg) == SSA_NAME
301 	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
302 	{
303 	  gimple def_stmt = SSA_NAME_DEF_STMT (arg);
304 	  if (gimple_bb (def_stmt)->loop_father == father)
305 	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
306 	}
307     }
308 
309   /* Must be an uninteresting phi.  */
310   return bb_rank[bb->index];
311 }
312 
313 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
314    loop-carried dependence of an innermost loop, return TRUE; else
315    return FALSE.  */
316 static bool
loop_carried_phi(tree exp)317 loop_carried_phi (tree exp)
318 {
319   gimple phi_stmt;
320   long block_rank;
321 
322   if (TREE_CODE (exp) != SSA_NAME
323       || SSA_NAME_IS_DEFAULT_DEF (exp))
324     return false;
325 
326   phi_stmt = SSA_NAME_DEF_STMT (exp);
327 
328   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
329     return false;
330 
331   /* Non-loop-carried phis have block rank.  Loop-carried phis have
332      an additional bias added in.  If this phi doesn't have block rank,
333      it's biased and should not be propagated.  */
334   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
335 
336   if (phi_rank (phi_stmt) != block_rank)
337     return true;
338 
339   return false;
340 }
341 
342 /* Return the maximum of RANK and the rank that should be propagated
343    from expression OP.  For most operands, this is just the rank of OP.
344    For loop-carried phis, the value is zero to avoid undoing the bias
345    in favor of the phi.  */
346 static long
propagate_rank(long rank,tree op)347 propagate_rank (long rank, tree op)
348 {
349   long op_rank;
350 
351   if (loop_carried_phi (op))
352     return rank;
353 
354   op_rank = get_rank (op);
355 
356   return MAX (rank, op_rank);
357 }
358 
359 /* Look up the operand rank structure for expression E.  */
360 
361 static inline long
find_operand_rank(tree e)362 find_operand_rank (tree e)
363 {
364   void **slot = pointer_map_contains (operand_rank, e);
365   return slot ? (long) (intptr_t) *slot : -1;
366 }
367 
368 /* Insert {E,RANK} into the operand rank hashtable.  */
369 
370 static inline void
insert_operand_rank(tree e,long rank)371 insert_operand_rank (tree e, long rank)
372 {
373   void **slot;
374   gcc_assert (rank > 0);
375   slot = pointer_map_insert (operand_rank, e);
376   gcc_assert (!*slot);
377   *slot = (void *) (intptr_t) rank;
378 }
379 
380 /* Given an expression E, return the rank of the expression.  */
381 
382 static long
get_rank(tree e)383 get_rank (tree e)
384 {
385   /* Constants have rank 0.  */
386   if (is_gimple_min_invariant (e))
387     return 0;
388 
389   /* SSA_NAME's have the rank of the expression they are the result
390      of.
391      For globals and uninitialized values, the rank is 0.
392      For function arguments, use the pre-setup rank.
393      For PHI nodes, stores, asm statements, etc, we use the rank of
394      the BB.
395      For simple operations, the rank is the maximum rank of any of
396      its operands, or the bb_rank, whichever is less.
397      I make no claims that this is optimal, however, it gives good
398      results.  */
399 
400   /* We make an exception to the normal ranking system to break
401      dependences of accumulator variables in loops.  Suppose we
402      have a simple one-block loop containing:
403 
404        x_1 = phi(x_0, x_2)
405        b = a + x_1
406        c = b + d
407        x_2 = c + e
408 
409      As shown, each iteration of the calculation into x is fully
410      dependent upon the iteration before it.  We would prefer to
411      see this in the form:
412 
413        x_1 = phi(x_0, x_2)
414        b = a + d
415        c = b + e
416        x_2 = c + x_1
417 
418      If the loop is unrolled, the calculations of b and c from
419      different iterations can be interleaved.
420 
421      To obtain this result during reassociation, we bias the rank
422      of the phi definition x_1 upward, when it is recognized as an
423      accumulator pattern.  The artificial rank causes it to be
424      added last, providing the desired independence.  */
425 
426   if (TREE_CODE (e) == SSA_NAME)
427     {
428       gimple stmt;
429       long rank;
430       int i, n;
431       tree op;
432 
433       if (SSA_NAME_IS_DEFAULT_DEF (e))
434 	return find_operand_rank (e);
435 
436       stmt = SSA_NAME_DEF_STMT (e);
437       if (gimple_code (stmt) == GIMPLE_PHI)
438 	return phi_rank (stmt);
439 
440       if (!is_gimple_assign (stmt)
441 	  || gimple_vdef (stmt))
442 	return bb_rank[gimple_bb (stmt)->index];
443 
444       /* If we already have a rank for this expression, use that.  */
445       rank = find_operand_rank (e);
446       if (rank != -1)
447 	return rank;
448 
449       /* Otherwise, find the maximum rank for the operands.  As an
450 	 exception, remove the bias from loop-carried phis when propagating
451 	 the rank so that dependent operations are not also biased.  */
452       rank = 0;
453       if (gimple_assign_single_p (stmt))
454 	{
455 	  tree rhs = gimple_assign_rhs1 (stmt);
456 	  n = TREE_OPERAND_LENGTH (rhs);
457 	  if (n == 0)
458 	    rank = propagate_rank (rank, rhs);
459 	  else
460 	    {
461 	      for (i = 0; i < n; i++)
462 		{
463 		  op = TREE_OPERAND (rhs, i);
464 
465 		  if (op != NULL_TREE)
466 		    rank = propagate_rank (rank, op);
467 		}
468 	    }
469 	}
470       else
471 	{
472 	  n = gimple_num_ops (stmt);
473 	  for (i = 1; i < n; i++)
474 	    {
475 	      op = gimple_op (stmt, i);
476 	      gcc_assert (op);
477 	      rank = propagate_rank (rank, op);
478 	    }
479 	}
480 
481       if (dump_file && (dump_flags & TDF_DETAILS))
482 	{
483 	  fprintf (dump_file, "Rank for ");
484 	  print_generic_expr (dump_file, e, 0);
485 	  fprintf (dump_file, " is %ld\n", (rank + 1));
486 	}
487 
488       /* Note the rank in the hashtable so we don't recompute it.  */
489       insert_operand_rank (e, (rank + 1));
490       return (rank + 1);
491     }
492 
493   /* Globals, etc,  are rank 0 */
494   return 0;
495 }
496 
497 
498 /* We want integer ones to end up last no matter what, since they are
499    the ones we can do the most with.  */
500 #define INTEGER_CONST_TYPE 1 << 3
501 #define FLOAT_CONST_TYPE 1 << 2
502 #define OTHER_CONST_TYPE 1 << 1
503 
504 /* Classify an invariant tree into integer, float, or other, so that
505    we can sort them to be near other constants of the same type.  */
506 static inline int
constant_type(tree t)507 constant_type (tree t)
508 {
509   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
510     return INTEGER_CONST_TYPE;
511   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
512     return FLOAT_CONST_TYPE;
513   else
514     return OTHER_CONST_TYPE;
515 }
516 
517 /* qsort comparison function to sort operand entries PA and PB by rank
518    so that the sorted array is ordered by rank in decreasing order.  */
519 static int
sort_by_operand_rank(const void * pa,const void * pb)520 sort_by_operand_rank (const void *pa, const void *pb)
521 {
522   const operand_entry_t oea = *(const operand_entry_t *)pa;
523   const operand_entry_t oeb = *(const operand_entry_t *)pb;
524 
525   /* It's nicer for optimize_expression if constants that are likely
526      to fold when added/multiplied//whatever are put next to each
527      other.  Since all constants have rank 0, order them by type.  */
528   if (oeb->rank == 0 && oea->rank == 0)
529     {
530       if (constant_type (oeb->op) != constant_type (oea->op))
531 	return constant_type (oeb->op) - constant_type (oea->op);
532       else
533 	/* To make sorting result stable, we use unique IDs to determine
534 	   order.  */
535         return oeb->id - oea->id;
536     }
537 
538   /* Lastly, make sure the versions that are the same go next to each
539      other.  */
540   if ((oeb->rank - oea->rank == 0)
541       && TREE_CODE (oea->op) == SSA_NAME
542       && TREE_CODE (oeb->op) == SSA_NAME)
543     {
544       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
545 	 versions of removed SSA_NAMEs, so if possible, prefer to sort
546 	 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
547 	 See PR60418.  */
548       if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
549 	  && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
550 	  && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
551 	{
552 	  gimple stmta = SSA_NAME_DEF_STMT (oea->op);
553 	  gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
554 	  basic_block bba = gimple_bb (stmta);
555 	  basic_block bbb = gimple_bb (stmtb);
556 	  if (bbb != bba)
557 	    {
558 	      if (bb_rank[bbb->index] != bb_rank[bba->index])
559 		return bb_rank[bbb->index] - bb_rank[bba->index];
560 	    }
561 	  else
562 	    {
563 	      bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
564 	      bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
565 	      if (da != db)
566 		return da ? 1 : -1;
567 	    }
568 	}
569 
570       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
571 	return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
572       else
573 	return oeb->id - oea->id;
574     }
575 
576   if (oeb->rank != oea->rank)
577     return oeb->rank - oea->rank;
578   else
579     return oeb->id - oea->id;
580 }
581 
582 /* Add an operand entry to *OPS for the tree operand OP.  */
583 
584 static void
add_to_ops_vec(vec<operand_entry_t> * ops,tree op)585 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
586 {
587   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
588 
589   oe->op = op;
590   oe->rank = get_rank (op);
591   oe->id = next_operand_entry_id++;
592   oe->count = 1;
593   ops->safe_push (oe);
594 }
595 
596 /* Add an operand entry to *OPS for the tree operand OP with repeat
597    count REPEAT.  */
598 
599 static void
add_repeat_to_ops_vec(vec<operand_entry_t> * ops,tree op,HOST_WIDE_INT repeat)600 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
601 		       HOST_WIDE_INT repeat)
602 {
603   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
604 
605   oe->op = op;
606   oe->rank = get_rank (op);
607   oe->id = next_operand_entry_id++;
608   oe->count = repeat;
609   ops->safe_push (oe);
610 
611   reassociate_stats.pows_encountered++;
612 }
613 
614 /* Return true if STMT is reassociable operation containing a binary
615    operation with tree code CODE, and is inside LOOP.  */
616 
617 static bool
is_reassociable_op(gimple stmt,enum tree_code code,struct loop * loop)618 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
619 {
620   basic_block bb = gimple_bb (stmt);
621 
622   if (gimple_bb (stmt) == NULL)
623     return false;
624 
625   if (!flow_bb_inside_loop_p (loop, bb))
626     return false;
627 
628   if (is_gimple_assign (stmt)
629       && gimple_assign_rhs_code (stmt) == code
630       && has_single_use (gimple_assign_lhs (stmt)))
631     return true;
632 
633   return false;
634 }
635 
636 
637 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
638    operand of the negate operation.  Otherwise, return NULL.  */
639 
640 static tree
get_unary_op(tree name,enum tree_code opcode)641 get_unary_op (tree name, enum tree_code opcode)
642 {
643   gimple stmt = SSA_NAME_DEF_STMT (name);
644 
645   if (!is_gimple_assign (stmt))
646     return NULL_TREE;
647 
648   if (gimple_assign_rhs_code (stmt) == opcode)
649     return gimple_assign_rhs1 (stmt);
650   return NULL_TREE;
651 }
652 
653 /* If CURR and LAST are a pair of ops that OPCODE allows us to
654    eliminate through equivalences, do so, remove them from OPS, and
655    return true.  Otherwise, return false.  */
656 
657 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry_t> * ops,bool * all_done,unsigned int i,operand_entry_t curr,operand_entry_t last)658 eliminate_duplicate_pair (enum tree_code opcode,
659 			  vec<operand_entry_t> *ops,
660 			  bool *all_done,
661 			  unsigned int i,
662 			  operand_entry_t curr,
663 			  operand_entry_t last)
664 {
665 
666   /* If we have two of the same op, and the opcode is & |, min, or max,
667      we can eliminate one of them.
668      If we have two of the same op, and the opcode is ^, we can
669      eliminate both of them.  */
670 
671   if (last && last->op == curr->op)
672     {
673       switch (opcode)
674 	{
675 	case MAX_EXPR:
676 	case MIN_EXPR:
677 	case BIT_IOR_EXPR:
678 	case BIT_AND_EXPR:
679 	  if (dump_file && (dump_flags & TDF_DETAILS))
680 	    {
681 	      fprintf (dump_file, "Equivalence: ");
682 	      print_generic_expr (dump_file, curr->op, 0);
683 	      fprintf (dump_file, " [&|minmax] ");
684 	      print_generic_expr (dump_file, last->op, 0);
685 	      fprintf (dump_file, " -> ");
686 	      print_generic_stmt (dump_file, last->op, 0);
687 	    }
688 
689 	  ops->ordered_remove (i);
690 	  reassociate_stats.ops_eliminated ++;
691 
692 	  return true;
693 
694 	case BIT_XOR_EXPR:
695 	  if (dump_file && (dump_flags & TDF_DETAILS))
696 	    {
697 	      fprintf (dump_file, "Equivalence: ");
698 	      print_generic_expr (dump_file, curr->op, 0);
699 	      fprintf (dump_file, " ^ ");
700 	      print_generic_expr (dump_file, last->op, 0);
701 	      fprintf (dump_file, " -> nothing\n");
702 	    }
703 
704 	  reassociate_stats.ops_eliminated += 2;
705 
706 	  if (ops->length () == 2)
707 	    {
708 	      ops->create (0);
709 	      add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
710 	      *all_done = true;
711 	    }
712 	  else
713 	    {
714 	      ops->ordered_remove (i-1);
715 	      ops->ordered_remove (i-1);
716 	    }
717 
718 	  return true;
719 
720 	default:
721 	  break;
722 	}
723     }
724   return false;
725 }
726 
727 static vec<tree> plus_negates;
728 
729 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
730    expression, look in OPS for a corresponding positive operation to cancel
731    it out.  If we find one, remove the other from OPS, replace
732    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
733    return false. */
734 
735 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry_t> * ops,unsigned int currindex,operand_entry_t curr)736 eliminate_plus_minus_pair (enum tree_code opcode,
737 			   vec<operand_entry_t> *ops,
738 			   unsigned int currindex,
739 			   operand_entry_t curr)
740 {
741   tree negateop;
742   tree notop;
743   unsigned int i;
744   operand_entry_t oe;
745 
746   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
747     return false;
748 
749   negateop = get_unary_op (curr->op, NEGATE_EXPR);
750   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
751   if (negateop == NULL_TREE && notop == NULL_TREE)
752     return false;
753 
754   /* Any non-negated version will have a rank that is one less than
755      the current rank.  So once we hit those ranks, if we don't find
756      one, we can stop.  */
757 
758   for (i = currindex + 1;
759        ops->iterate (i, &oe)
760        && oe->rank >= curr->rank - 1 ;
761        i++)
762     {
763       if (oe->op == negateop)
764 	{
765 
766 	  if (dump_file && (dump_flags & TDF_DETAILS))
767 	    {
768 	      fprintf (dump_file, "Equivalence: ");
769 	      print_generic_expr (dump_file, negateop, 0);
770 	      fprintf (dump_file, " + -");
771 	      print_generic_expr (dump_file, oe->op, 0);
772 	      fprintf (dump_file, " -> 0\n");
773 	    }
774 
775 	  ops->ordered_remove (i);
776 	  add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
777 	  ops->ordered_remove (currindex);
778 	  reassociate_stats.ops_eliminated ++;
779 
780 	  return true;
781 	}
782       else if (oe->op == notop)
783 	{
784 	  tree op_type = TREE_TYPE (oe->op);
785 
786 	  if (dump_file && (dump_flags & TDF_DETAILS))
787 	    {
788 	      fprintf (dump_file, "Equivalence: ");
789 	      print_generic_expr (dump_file, notop, 0);
790 	      fprintf (dump_file, " + ~");
791 	      print_generic_expr (dump_file, oe->op, 0);
792 	      fprintf (dump_file, " -> -1\n");
793 	    }
794 
795 	  ops->ordered_remove (i);
796 	  add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
797 	  ops->ordered_remove (currindex);
798 	  reassociate_stats.ops_eliminated ++;
799 
800 	  return true;
801 	}
802     }
803 
804   /* CURR->OP is a negate expr in a plus expr: save it for later
805      inspection in repropagate_negates().  */
806   if (negateop != NULL_TREE)
807     plus_negates.safe_push (curr->op);
808 
809   return false;
810 }
811 
812 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
813    bitwise not expression, look in OPS for a corresponding operand to
814    cancel it out.  If we find one, remove the other from OPS, replace
815    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
816    false. */
817 
818 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry_t> * ops,unsigned int currindex,operand_entry_t curr)819 eliminate_not_pairs (enum tree_code opcode,
820 		     vec<operand_entry_t> *ops,
821 		     unsigned int currindex,
822 		     operand_entry_t curr)
823 {
824   tree notop;
825   unsigned int i;
826   operand_entry_t oe;
827 
828   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
829       || TREE_CODE (curr->op) != SSA_NAME)
830     return false;
831 
832   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
833   if (notop == NULL_TREE)
834     return false;
835 
836   /* Any non-not version will have a rank that is one less than
837      the current rank.  So once we hit those ranks, if we don't find
838      one, we can stop.  */
839 
840   for (i = currindex + 1;
841        ops->iterate (i, &oe)
842        && oe->rank >= curr->rank - 1;
843        i++)
844     {
845       if (oe->op == notop)
846 	{
847 	  if (dump_file && (dump_flags & TDF_DETAILS))
848 	    {
849 	      fprintf (dump_file, "Equivalence: ");
850 	      print_generic_expr (dump_file, notop, 0);
851 	      if (opcode == BIT_AND_EXPR)
852 		fprintf (dump_file, " & ~");
853 	      else if (opcode == BIT_IOR_EXPR)
854 		fprintf (dump_file, " | ~");
855 	      print_generic_expr (dump_file, oe->op, 0);
856 	      if (opcode == BIT_AND_EXPR)
857 		fprintf (dump_file, " -> 0\n");
858 	      else if (opcode == BIT_IOR_EXPR)
859 		fprintf (dump_file, " -> -1\n");
860 	    }
861 
862 	  if (opcode == BIT_AND_EXPR)
863 	    oe->op = build_zero_cst (TREE_TYPE (oe->op));
864 	  else if (opcode == BIT_IOR_EXPR)
865 	    oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
866 
867 	  reassociate_stats.ops_eliminated += ops->length () - 1;
868 	  ops->truncate (0);
869 	  ops->quick_push (oe);
870 	  return true;
871 	}
872     }
873 
874   return false;
875 }
876 
877 /* Use constant value that may be present in OPS to try to eliminate
878    operands.  Note that this function is only really used when we've
879    eliminated ops for other reasons, or merged constants.  Across
880    single statements, fold already does all of this, plus more.  There
881    is little point in duplicating logic, so I've only included the
882    identities that I could ever construct testcases to trigger.  */
883 
884 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry_t> * ops)885 eliminate_using_constants (enum tree_code opcode,
886 			   vec<operand_entry_t> *ops)
887 {
888   operand_entry_t oelast = ops->last ();
889   tree type = TREE_TYPE (oelast->op);
890 
891   if (oelast->rank == 0
892       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
893     {
894       switch (opcode)
895 	{
896 	case BIT_AND_EXPR:
897 	  if (integer_zerop (oelast->op))
898 	    {
899 	      if (ops->length () != 1)
900 		{
901 		  if (dump_file && (dump_flags & TDF_DETAILS))
902 		    fprintf (dump_file, "Found & 0, removing all other ops\n");
903 
904 		  reassociate_stats.ops_eliminated += ops->length () - 1;
905 
906 		  ops->truncate (0);
907 		  ops->quick_push (oelast);
908 		  return;
909 		}
910 	    }
911 	  else if (integer_all_onesp (oelast->op))
912 	    {
913 	      if (ops->length () != 1)
914 		{
915 		  if (dump_file && (dump_flags & TDF_DETAILS))
916 		    fprintf (dump_file, "Found & -1, removing\n");
917 		  ops->pop ();
918 		  reassociate_stats.ops_eliminated++;
919 		}
920 	    }
921 	  break;
922 	case BIT_IOR_EXPR:
923 	  if (integer_all_onesp (oelast->op))
924 	    {
925 	      if (ops->length () != 1)
926 		{
927 		  if (dump_file && (dump_flags & TDF_DETAILS))
928 		    fprintf (dump_file, "Found | -1, removing all other ops\n");
929 
930 		  reassociate_stats.ops_eliminated += ops->length () - 1;
931 
932 		  ops->truncate (0);
933 		  ops->quick_push (oelast);
934 		  return;
935 		}
936 	    }
937 	  else if (integer_zerop (oelast->op))
938 	    {
939 	      if (ops->length () != 1)
940 		{
941 		  if (dump_file && (dump_flags & TDF_DETAILS))
942 		    fprintf (dump_file, "Found | 0, removing\n");
943 		  ops->pop ();
944 		  reassociate_stats.ops_eliminated++;
945 		}
946 	    }
947 	  break;
948 	case MULT_EXPR:
949 	  if (integer_zerop (oelast->op)
950 	      || (FLOAT_TYPE_P (type)
951 		  && !HONOR_NANS (TYPE_MODE (type))
952 		  && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
953 		  && real_zerop (oelast->op)))
954 	    {
955 	      if (ops->length () != 1)
956 		{
957 		  if (dump_file && (dump_flags & TDF_DETAILS))
958 		    fprintf (dump_file, "Found * 0, removing all other ops\n");
959 
960 		  reassociate_stats.ops_eliminated += ops->length () - 1;
961 		  ops->truncate (1);
962 		  ops->quick_push (oelast);
963 		  return;
964 		}
965 	    }
966 	  else if (integer_onep (oelast->op)
967 		   || (FLOAT_TYPE_P (type)
968 		       && !HONOR_SNANS (TYPE_MODE (type))
969 		       && real_onep (oelast->op)))
970 	    {
971 	      if (ops->length () != 1)
972 		{
973 		  if (dump_file && (dump_flags & TDF_DETAILS))
974 		    fprintf (dump_file, "Found * 1, removing\n");
975 		  ops->pop ();
976 		  reassociate_stats.ops_eliminated++;
977 		  return;
978 		}
979 	    }
980 	  break;
981 	case BIT_XOR_EXPR:
982 	case PLUS_EXPR:
983 	case MINUS_EXPR:
984 	  if (integer_zerop (oelast->op)
985 	      || (FLOAT_TYPE_P (type)
986 		  && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
987 		  && fold_real_zero_addition_p (type, oelast->op,
988 						opcode == MINUS_EXPR)))
989 	    {
990 	      if (ops->length () != 1)
991 		{
992 		  if (dump_file && (dump_flags & TDF_DETAILS))
993 		    fprintf (dump_file, "Found [|^+] 0, removing\n");
994 		  ops->pop ();
995 		  reassociate_stats.ops_eliminated++;
996 		  return;
997 		}
998 	    }
999 	  break;
1000 	default:
1001 	  break;
1002 	}
1003     }
1004 }
1005 
1006 
1007 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1008 				 bool, bool);
1009 
1010 /* Structure for tracking and counting operands.  */
1011 typedef struct oecount_s {
1012   int cnt;
1013   int id;
1014   enum tree_code oecode;
1015   tree op;
1016 } oecount;
1017 
1018 
1019 /* The heap for the oecount hashtable and the sorted list of operands.  */
1020 static vec<oecount> cvec;
1021 
1022 
1023 /* Oecount hashtable helpers.  */
1024 
1025 struct oecount_hasher : typed_noop_remove <void>
1026 {
1027   /* Note that this hash table stores integers, not pointers.
1028      So, observe the casting in the member functions.  */
1029   typedef void value_type;
1030   typedef void compare_type;
1031   static inline hashval_t hash (const value_type *);
1032   static inline bool equal (const value_type *, const compare_type *);
1033 };
1034 
1035 /* Hash function for oecount.  */
1036 
1037 inline hashval_t
hash(const value_type * p)1038 oecount_hasher::hash (const value_type *p)
1039 {
1040   const oecount *c = &cvec[(size_t)p - 42];
1041   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1042 }
1043 
1044 /* Comparison function for oecount.  */
1045 
1046 inline bool
equal(const value_type * p1,const compare_type * p2)1047 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
1048 {
1049   const oecount *c1 = &cvec[(size_t)p1 - 42];
1050   const oecount *c2 = &cvec[(size_t)p2 - 42];
1051   return (c1->oecode == c2->oecode
1052 	  && c1->op == c2->op);
1053 }
1054 
1055 /* Comparison function for qsort sorting oecount elements by count.  */
1056 
1057 static int
oecount_cmp(const void * p1,const void * p2)1058 oecount_cmp (const void *p1, const void *p2)
1059 {
1060   const oecount *c1 = (const oecount *)p1;
1061   const oecount *c2 = (const oecount *)p2;
1062   if (c1->cnt != c2->cnt)
1063     return c1->cnt - c2->cnt;
1064   else
1065     /* If counts are identical, use unique IDs to stabilize qsort.  */
1066     return c1->id - c2->id;
1067 }
1068 
1069 /* Return TRUE iff STMT represents a builtin call that raises OP
1070    to some exponent.  */
1071 
1072 static bool
stmt_is_power_of_op(gimple stmt,tree op)1073 stmt_is_power_of_op (gimple stmt, tree op)
1074 {
1075   tree fndecl;
1076 
1077   if (!is_gimple_call (stmt))
1078     return false;
1079 
1080   fndecl = gimple_call_fndecl (stmt);
1081 
1082   if (!fndecl
1083       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1084     return false;
1085 
1086   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1087     {
1088     CASE_FLT_FN (BUILT_IN_POW):
1089     CASE_FLT_FN (BUILT_IN_POWI):
1090       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1091 
1092     default:
1093       return false;
1094     }
1095 }
1096 
1097 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1098    in place and return the result.  Assumes that stmt_is_power_of_op
1099    was previously called for STMT and returned TRUE.  */
1100 
1101 static HOST_WIDE_INT
decrement_power(gimple stmt)1102 decrement_power (gimple stmt)
1103 {
1104   REAL_VALUE_TYPE c, cint;
1105   HOST_WIDE_INT power;
1106   tree arg1;
1107 
1108   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1109     {
1110     CASE_FLT_FN (BUILT_IN_POW):
1111       arg1 = gimple_call_arg (stmt, 1);
1112       c = TREE_REAL_CST (arg1);
1113       power = real_to_integer (&c) - 1;
1114       real_from_integer (&cint, VOIDmode, power, 0, 0);
1115       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1116       return power;
1117 
1118     CASE_FLT_FN (BUILT_IN_POWI):
1119       arg1 = gimple_call_arg (stmt, 1);
1120       power = TREE_INT_CST_LOW (arg1) - 1;
1121       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1122       return power;
1123 
1124     default:
1125       gcc_unreachable ();
1126     }
1127 }
1128 
1129 /* Find the single immediate use of STMT's LHS, and replace it
1130    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1131    replace *DEF with OP as well.  */
1132 
1133 static void
propagate_op_to_single_use(tree op,gimple stmt,tree * def)1134 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1135 {
1136   tree lhs;
1137   gimple use_stmt;
1138   use_operand_p use;
1139   gimple_stmt_iterator gsi;
1140 
1141   if (is_gimple_call (stmt))
1142     lhs = gimple_call_lhs (stmt);
1143   else
1144     lhs = gimple_assign_lhs (stmt);
1145 
1146   gcc_assert (has_single_use (lhs));
1147   single_imm_use (lhs, &use, &use_stmt);
1148   if (lhs == *def)
1149     *def = op;
1150   SET_USE (use, op);
1151   if (TREE_CODE (op) != SSA_NAME)
1152     update_stmt (use_stmt);
1153   gsi = gsi_for_stmt (stmt);
1154   unlink_stmt_vdef (stmt);
1155   reassoc_remove_stmt (&gsi);
1156   release_defs (stmt);
1157 }
1158 
1159 /* Walks the linear chain with result *DEF searching for an operation
1160    with operand OP and code OPCODE removing that from the chain.  *DEF
1161    is updated if there is only one operand but no operation left.  */
1162 
1163 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1164 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1165 {
1166   gimple stmt = SSA_NAME_DEF_STMT (*def);
1167 
1168   do
1169     {
1170       tree name;
1171 
1172       if (opcode == MULT_EXPR
1173 	  && stmt_is_power_of_op (stmt, op))
1174 	{
1175 	  if (decrement_power (stmt) == 1)
1176 	    propagate_op_to_single_use (op, stmt, def);
1177 	  return;
1178 	}
1179 
1180       name = gimple_assign_rhs1 (stmt);
1181 
1182       /* If this is the operation we look for and one of the operands
1183          is ours simply propagate the other operand into the stmts
1184 	 single use.  */
1185       if (gimple_assign_rhs_code (stmt) == opcode
1186 	  && (name == op
1187 	      || gimple_assign_rhs2 (stmt) == op))
1188 	{
1189 	  if (name == op)
1190 	    name = gimple_assign_rhs2 (stmt);
1191 	  propagate_op_to_single_use (name, stmt, def);
1192 	  return;
1193 	}
1194 
1195       /* We might have a multiply of two __builtin_pow* calls, and
1196 	 the operand might be hiding in the rightmost one.  */
1197       if (opcode == MULT_EXPR
1198 	  && gimple_assign_rhs_code (stmt) == opcode
1199 	  && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1200 	  && has_single_use (gimple_assign_rhs2 (stmt)))
1201 	{
1202 	  gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1203 	  if (stmt_is_power_of_op (stmt2, op))
1204 	    {
1205 	      if (decrement_power (stmt2) == 1)
1206 		propagate_op_to_single_use (op, stmt2, def);
1207 	      return;
1208 	    }
1209 	}
1210 
1211       /* Continue walking the chain.  */
1212       gcc_assert (name != op
1213 		  && TREE_CODE (name) == SSA_NAME);
1214       stmt = SSA_NAME_DEF_STMT (name);
1215     }
1216   while (1);
1217 }
1218 
1219 /* Returns true if statement S1 dominates statement S2.  Like
1220    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1221 
1222 static bool
reassoc_stmt_dominates_stmt_p(gimple s1,gimple s2)1223 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1224 {
1225   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1226 
1227   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1228      SSA_NAME.  Assume it lives at the beginning of function and
1229      thus dominates everything.  */
1230   if (!bb1 || s1 == s2)
1231     return true;
1232 
1233   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1234   if (!bb2)
1235     return false;
1236 
1237   if (bb1 == bb2)
1238     {
1239       /* PHIs in the same basic block are assumed to be
1240 	 executed all in parallel, if only one stmt is a PHI,
1241 	 it dominates the other stmt in the same basic block.  */
1242       if (gimple_code (s1) == GIMPLE_PHI)
1243 	return true;
1244 
1245       if (gimple_code (s2) == GIMPLE_PHI)
1246 	return false;
1247 
1248       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1249 
1250       if (gimple_uid (s1) < gimple_uid (s2))
1251 	return true;
1252 
1253       if (gimple_uid (s1) > gimple_uid (s2))
1254 	return false;
1255 
1256       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1257       unsigned int uid = gimple_uid (s1);
1258       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1259 	{
1260 	  gimple s = gsi_stmt (gsi);
1261 	  if (gimple_uid (s) != uid)
1262 	    break;
1263 	  if (s == s2)
1264 	    return true;
1265 	}
1266 
1267       return false;
1268     }
1269 
1270   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1271 }
1272 
1273 /* Insert STMT after INSERT_POINT.  */
1274 
1275 static void
insert_stmt_after(gimple stmt,gimple insert_point)1276 insert_stmt_after (gimple stmt, gimple insert_point)
1277 {
1278   gimple_stmt_iterator gsi;
1279   basic_block bb;
1280 
1281   if (gimple_code (insert_point) == GIMPLE_PHI)
1282     bb = gimple_bb (insert_point);
1283   else if (!stmt_ends_bb_p (insert_point))
1284     {
1285       gsi = gsi_for_stmt (insert_point);
1286       gimple_set_uid (stmt, gimple_uid (insert_point));
1287       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1288       return;
1289     }
1290   else
1291     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1292        thus if it must end a basic block, it should be a call that can
1293        throw, or some assignment that can throw.  If it throws, the LHS
1294        of it will not be initialized though, so only valid places using
1295        the SSA_NAME should be dominated by the fallthru edge.  */
1296     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1297   gsi = gsi_after_labels (bb);
1298   if (gsi_end_p (gsi))
1299     {
1300       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1301       gimple_set_uid (stmt,
1302 		      gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1303     }
1304   else
1305     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1306   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1307 }
1308 
1309 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1310    the result.  Places the statement after the definition of either
1311    OP1 or OP2.  Returns the new statement.  */
1312 
1313 static gimple
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1314 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1315 {
1316   gimple op1def = NULL, op2def = NULL;
1317   gimple_stmt_iterator gsi;
1318   tree op;
1319   gimple sum;
1320 
1321   /* Create the addition statement.  */
1322   op = make_ssa_name (type, NULL);
1323   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1324 
1325   /* Find an insertion place and insert.  */
1326   if (TREE_CODE (op1) == SSA_NAME)
1327     op1def = SSA_NAME_DEF_STMT (op1);
1328   if (TREE_CODE (op2) == SSA_NAME)
1329     op2def = SSA_NAME_DEF_STMT (op2);
1330   if ((!op1def || gimple_nop_p (op1def))
1331       && (!op2def || gimple_nop_p (op2def)))
1332     {
1333       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1334       if (gsi_end_p (gsi))
1335 	{
1336 	  gimple_stmt_iterator gsi2
1337 	    = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1338 	  gimple_set_uid (sum,
1339 			  gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1340 	}
1341       else
1342 	gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1343       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1344     }
1345   else
1346     {
1347       gimple insert_point;
1348       if ((!op1def || gimple_nop_p (op1def))
1349 	   || (op2def && !gimple_nop_p (op2def)
1350 	       && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1351 	insert_point = op2def;
1352       else
1353 	insert_point = op1def;
1354       insert_stmt_after (sum, insert_point);
1355     }
1356   update_stmt (sum);
1357 
1358   return sum;
1359 }
1360 
1361 /* Perform un-distribution of divisions and multiplications.
1362    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1363    to (A + B) / X for real X.
1364 
1365    The algorithm is organized as follows.
1366 
1367     - First we walk the addition chain *OPS looking for summands that
1368       are defined by a multiplication or a real division.  This results
1369       in the candidates bitmap with relevant indices into *OPS.
1370 
1371     - Second we build the chains of multiplications or divisions for
1372       these candidates, counting the number of occurrences of (operand, code)
1373       pairs in all of the candidates chains.
1374 
1375     - Third we sort the (operand, code) pairs by number of occurrence and
1376       process them starting with the pair with the most uses.
1377 
1378       * For each such pair we walk the candidates again to build a
1379         second candidate bitmap noting all multiplication/division chains
1380 	that have at least one occurrence of (operand, code).
1381 
1382       * We build an alternate addition chain only covering these
1383         candidates with one (operand, code) operation removed from their
1384 	multiplication/division chain.
1385 
1386       * The first candidate gets replaced by the alternate addition chain
1387         multiplied/divided by the operand.
1388 
1389       * All candidate chains get disabled for further processing and
1390         processing of (operand, code) pairs continues.
1391 
1392   The alternate addition chains built are re-processed by the main
1393   reassociation algorithm which allows optimizing a * x * y + b * y * x
1394   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1395 
1396 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry_t> * ops,struct loop * loop)1397 undistribute_ops_list (enum tree_code opcode,
1398 		       vec<operand_entry_t> *ops, struct loop *loop)
1399 {
1400   unsigned int length = ops->length ();
1401   operand_entry_t oe1;
1402   unsigned i, j;
1403   sbitmap candidates, candidates2;
1404   unsigned nr_candidates, nr_candidates2;
1405   sbitmap_iterator sbi0;
1406   vec<operand_entry_t> *subops;
1407   hash_table <oecount_hasher> ctable;
1408   bool changed = false;
1409   int next_oecount_id = 0;
1410 
1411   if (length <= 1
1412       || opcode != PLUS_EXPR)
1413     return false;
1414 
1415   /* Build a list of candidates to process.  */
1416   candidates = sbitmap_alloc (length);
1417   bitmap_clear (candidates);
1418   nr_candidates = 0;
1419   FOR_EACH_VEC_ELT (*ops, i, oe1)
1420     {
1421       enum tree_code dcode;
1422       gimple oe1def;
1423 
1424       if (TREE_CODE (oe1->op) != SSA_NAME)
1425 	continue;
1426       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1427       if (!is_gimple_assign (oe1def))
1428 	continue;
1429       dcode = gimple_assign_rhs_code (oe1def);
1430       if ((dcode != MULT_EXPR
1431 	   && dcode != RDIV_EXPR)
1432 	  || !is_reassociable_op (oe1def, dcode, loop))
1433 	continue;
1434 
1435       bitmap_set_bit (candidates, i);
1436       nr_candidates++;
1437     }
1438 
1439   if (nr_candidates < 2)
1440     {
1441       sbitmap_free (candidates);
1442       return false;
1443     }
1444 
1445   if (dump_file && (dump_flags & TDF_DETAILS))
1446     {
1447       fprintf (dump_file, "searching for un-distribute opportunities ");
1448       print_generic_expr (dump_file,
1449 	(*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1450       fprintf (dump_file, " %d\n", nr_candidates);
1451     }
1452 
1453   /* Build linearized sub-operand lists and the counting table.  */
1454   cvec.create (0);
1455   ctable.create (15);
1456   /* ??? Macro arguments cannot have multi-argument template types in
1457      them.  This typedef is needed to workaround that limitation.  */
1458   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1459   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1460   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1461     {
1462       gimple oedef;
1463       enum tree_code oecode;
1464       unsigned j;
1465 
1466       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1467       oecode = gimple_assign_rhs_code (oedef);
1468       linearize_expr_tree (&subops[i], oedef,
1469 			   associative_tree_code (oecode), false);
1470 
1471       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1472 	{
1473 	  oecount c;
1474 	  void **slot;
1475 	  size_t idx;
1476 	  c.oecode = oecode;
1477 	  c.cnt = 1;
1478 	  c.id = next_oecount_id++;
1479 	  c.op = oe1->op;
1480 	  cvec.safe_push (c);
1481 	  idx = cvec.length () + 41;
1482 	  slot = ctable.find_slot ((void *)idx, INSERT);
1483 	  if (!*slot)
1484 	    {
1485 	      *slot = (void *)idx;
1486 	    }
1487 	  else
1488 	    {
1489 	      cvec.pop ();
1490 	      cvec[(size_t)*slot - 42].cnt++;
1491 	    }
1492 	}
1493     }
1494   ctable.dispose ();
1495 
1496   /* Sort the counting table.  */
1497   cvec.qsort (oecount_cmp);
1498 
1499   if (dump_file && (dump_flags & TDF_DETAILS))
1500     {
1501       oecount *c;
1502       fprintf (dump_file, "Candidates:\n");
1503       FOR_EACH_VEC_ELT (cvec, j, c)
1504 	{
1505 	  fprintf (dump_file, "  %u %s: ", c->cnt,
1506 		   c->oecode == MULT_EXPR
1507 		   ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1508 	  print_generic_expr (dump_file, c->op, 0);
1509 	  fprintf (dump_file, "\n");
1510 	}
1511     }
1512 
1513   /* Process the (operand, code) pairs in order of most occurrence.  */
1514   candidates2 = sbitmap_alloc (length);
1515   while (!cvec.is_empty ())
1516     {
1517       oecount *c = &cvec.last ();
1518       if (c->cnt < 2)
1519 	break;
1520 
1521       /* Now collect the operands in the outer chain that contain
1522          the common operand in their inner chain.  */
1523       bitmap_clear (candidates2);
1524       nr_candidates2 = 0;
1525       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1526 	{
1527 	  gimple oedef;
1528 	  enum tree_code oecode;
1529 	  unsigned j;
1530 	  tree op = (*ops)[i]->op;
1531 
1532 	  /* If we undistributed in this chain already this may be
1533 	     a constant.  */
1534 	  if (TREE_CODE (op) != SSA_NAME)
1535 	    continue;
1536 
1537 	  oedef = SSA_NAME_DEF_STMT (op);
1538 	  oecode = gimple_assign_rhs_code (oedef);
1539 	  if (oecode != c->oecode)
1540 	    continue;
1541 
1542 	  FOR_EACH_VEC_ELT (subops[i], j, oe1)
1543 	    {
1544 	      if (oe1->op == c->op)
1545 		{
1546 		  bitmap_set_bit (candidates2, i);
1547 		  ++nr_candidates2;
1548 		  break;
1549 		}
1550 	    }
1551 	}
1552 
1553       if (nr_candidates2 >= 2)
1554 	{
1555 	  operand_entry_t oe1, oe2;
1556 	  gimple prod;
1557 	  int first = bitmap_first_set_bit (candidates2);
1558 
1559 	  /* Build the new addition chain.  */
1560 	  oe1 = (*ops)[first];
1561 	  if (dump_file && (dump_flags & TDF_DETAILS))
1562 	    {
1563 	      fprintf (dump_file, "Building (");
1564 	      print_generic_expr (dump_file, oe1->op, 0);
1565 	    }
1566 	  zero_one_operation (&oe1->op, c->oecode, c->op);
1567 	  EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1568 	    {
1569 	      gimple sum;
1570 	      oe2 = (*ops)[i];
1571 	      if (dump_file && (dump_flags & TDF_DETAILS))
1572 		{
1573 		  fprintf (dump_file, " + ");
1574 		  print_generic_expr (dump_file, oe2->op, 0);
1575 		}
1576 	      zero_one_operation (&oe2->op, c->oecode, c->op);
1577 	      sum = build_and_add_sum (TREE_TYPE (oe1->op),
1578 				       oe1->op, oe2->op, opcode);
1579 	      oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1580 	      oe2->rank = 0;
1581 	      oe1->op = gimple_get_lhs (sum);
1582 	    }
1583 
1584 	  /* Apply the multiplication/division.  */
1585 	  prod = build_and_add_sum (TREE_TYPE (oe1->op),
1586 				    oe1->op, c->op, c->oecode);
1587 	  if (dump_file && (dump_flags & TDF_DETAILS))
1588 	    {
1589 	      fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1590 	      print_generic_expr (dump_file, c->op, 0);
1591 	      fprintf (dump_file, "\n");
1592 	    }
1593 
1594 	  /* Record it in the addition chain and disable further
1595 	     undistribution with this op.  */
1596 	  oe1->op = gimple_assign_lhs (prod);
1597 	  oe1->rank = get_rank (oe1->op);
1598 	  subops[first].release ();
1599 
1600 	  changed = true;
1601 	}
1602 
1603       cvec.pop ();
1604     }
1605 
1606   for (i = 0; i < ops->length (); ++i)
1607     subops[i].release ();
1608   free (subops);
1609   cvec.release ();
1610   sbitmap_free (candidates);
1611   sbitmap_free (candidates2);
1612 
1613   return changed;
1614 }
1615 
1616 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1617    expression, examine the other OPS to see if any of them are comparisons
1618    of the same values, which we may be able to combine or eliminate.
1619    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1620 
1621 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry_t> * ops,unsigned int currindex,operand_entry_t curr)1622 eliminate_redundant_comparison (enum tree_code opcode,
1623 				vec<operand_entry_t> *ops,
1624 				unsigned int currindex,
1625 				operand_entry_t curr)
1626 {
1627   tree op1, op2;
1628   enum tree_code lcode, rcode;
1629   gimple def1, def2;
1630   int i;
1631   operand_entry_t oe;
1632 
1633   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1634     return false;
1635 
1636   /* Check that CURR is a comparison.  */
1637   if (TREE_CODE (curr->op) != SSA_NAME)
1638     return false;
1639   def1 = SSA_NAME_DEF_STMT (curr->op);
1640   if (!is_gimple_assign (def1))
1641     return false;
1642   lcode = gimple_assign_rhs_code (def1);
1643   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1644     return false;
1645   op1 = gimple_assign_rhs1 (def1);
1646   op2 = gimple_assign_rhs2 (def1);
1647 
1648   /* Now look for a similar comparison in the remaining OPS.  */
1649   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1650     {
1651       tree t;
1652 
1653       if (TREE_CODE (oe->op) != SSA_NAME)
1654 	continue;
1655       def2 = SSA_NAME_DEF_STMT (oe->op);
1656       if (!is_gimple_assign (def2))
1657 	continue;
1658       rcode = gimple_assign_rhs_code (def2);
1659       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1660 	continue;
1661 
1662       /* If we got here, we have a match.  See if we can combine the
1663 	 two comparisons.  */
1664       if (opcode == BIT_IOR_EXPR)
1665 	t = maybe_fold_or_comparisons (lcode, op1, op2,
1666 				       rcode, gimple_assign_rhs1 (def2),
1667 				       gimple_assign_rhs2 (def2));
1668       else
1669 	t = maybe_fold_and_comparisons (lcode, op1, op2,
1670 					rcode, gimple_assign_rhs1 (def2),
1671 					gimple_assign_rhs2 (def2));
1672       if (!t)
1673 	continue;
1674 
1675       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1676 	 always give us a boolean_type_node value back.  If the original
1677 	 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1678 	 we need to convert.  */
1679       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1680 	t = fold_convert (TREE_TYPE (curr->op), t);
1681 
1682       if (TREE_CODE (t) != INTEGER_CST
1683 	  && !operand_equal_p (t, curr->op, 0))
1684 	{
1685 	  enum tree_code subcode;
1686 	  tree newop1, newop2;
1687 	  if (!COMPARISON_CLASS_P (t))
1688 	    continue;
1689 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1690 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1691 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1692 	  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1693 	    continue;
1694 	}
1695 
1696       if (dump_file && (dump_flags & TDF_DETAILS))
1697 	{
1698 	  fprintf (dump_file, "Equivalence: ");
1699 	  print_generic_expr (dump_file, curr->op, 0);
1700 	  fprintf (dump_file, " %s ", op_symbol_code (opcode));
1701 	  print_generic_expr (dump_file, oe->op, 0);
1702 	  fprintf (dump_file, " -> ");
1703 	  print_generic_expr (dump_file, t, 0);
1704 	  fprintf (dump_file, "\n");
1705 	}
1706 
1707       /* Now we can delete oe, as it has been subsumed by the new combined
1708          expression t.  */
1709       ops->ordered_remove (i);
1710       reassociate_stats.ops_eliminated ++;
1711 
1712       /* If t is the same as curr->op, we're done.  Otherwise we must
1713 	 replace curr->op with t.  Special case is if we got a constant
1714 	 back, in which case we add it to the end instead of in place of
1715 	 the current entry.  */
1716       if (TREE_CODE (t) == INTEGER_CST)
1717 	{
1718 	  ops->ordered_remove (currindex);
1719 	  add_to_ops_vec (ops, t);
1720 	}
1721       else if (!operand_equal_p (t, curr->op, 0))
1722 	{
1723 	  gimple sum;
1724 	  enum tree_code subcode;
1725 	  tree newop1;
1726 	  tree newop2;
1727 	  gcc_assert (COMPARISON_CLASS_P (t));
1728 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1729 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1730 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1731 	  gcc_checking_assert (is_gimple_val (newop1)
1732 			       && is_gimple_val (newop2));
1733 	  sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1734 	  curr->op = gimple_get_lhs (sum);
1735 	}
1736       return true;
1737     }
1738 
1739   return false;
1740 }
1741 
1742 /* Perform various identities and other optimizations on the list of
1743    operand entries, stored in OPS.  The tree code for the binary
1744    operation between all the operands is OPCODE.  */
1745 
1746 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry_t> * ops)1747 optimize_ops_list (enum tree_code opcode,
1748 		   vec<operand_entry_t> *ops)
1749 {
1750   unsigned int length = ops->length ();
1751   unsigned int i;
1752   operand_entry_t oe;
1753   operand_entry_t oelast = NULL;
1754   bool iterate = false;
1755 
1756   if (length == 1)
1757     return;
1758 
1759   oelast = ops->last ();
1760 
1761   /* If the last two are constants, pop the constants off, merge them
1762      and try the next two.  */
1763   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1764     {
1765       operand_entry_t oelm1 = (*ops)[length - 2];
1766 
1767       if (oelm1->rank == 0
1768 	  && is_gimple_min_invariant (oelm1->op)
1769 	  && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1770 				       TREE_TYPE (oelast->op)))
1771 	{
1772 	  tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1773 				     oelm1->op, oelast->op);
1774 
1775 	  if (folded && is_gimple_min_invariant (folded))
1776 	    {
1777 	      if (dump_file && (dump_flags & TDF_DETAILS))
1778 		fprintf (dump_file, "Merging constants\n");
1779 
1780 	      ops->pop ();
1781 	      ops->pop ();
1782 
1783 	      add_to_ops_vec (ops, folded);
1784 	      reassociate_stats.constants_eliminated++;
1785 
1786 	      optimize_ops_list (opcode, ops);
1787 	      return;
1788 	    }
1789 	}
1790     }
1791 
1792   eliminate_using_constants (opcode, ops);
1793   oelast = NULL;
1794 
1795   for (i = 0; ops->iterate (i, &oe);)
1796     {
1797       bool done = false;
1798 
1799       if (eliminate_not_pairs (opcode, ops, i, oe))
1800 	return;
1801       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1802 	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1803 	  || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1804 	{
1805 	  if (done)
1806 	    return;
1807 	  iterate = true;
1808 	  oelast = NULL;
1809 	  continue;
1810 	}
1811       oelast = oe;
1812       i++;
1813     }
1814 
1815   length = ops->length ();
1816   oelast = ops->last ();
1817 
1818   if (iterate)
1819     optimize_ops_list (opcode, ops);
1820 }
1821 
1822 /* The following functions are subroutines to optimize_range_tests and allow
1823    it to try to change a logical combination of comparisons into a range
1824    test.
1825 
1826    For example, both
1827 	X == 2 || X == 5 || X == 3 || X == 4
1828    and
1829 	X >= 2 && X <= 5
1830    are converted to
1831 	(unsigned) (X - 2) <= 3
1832 
1833    For more information see comments above fold_test_range in fold-const.c,
1834    this implementation is for GIMPLE.  */
1835 
1836 struct range_entry
1837 {
1838   tree exp;
1839   tree low;
1840   tree high;
1841   bool in_p;
1842   bool strict_overflow_p;
1843   unsigned int idx, next;
1844 };
1845 
1846 /* This is similar to make_range in fold-const.c, but on top of
1847    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1848    an SSA_NAME and STMT argument is ignored, otherwise STMT
1849    argument should be a GIMPLE_COND.  */
1850 
1851 static void
init_range_entry(struct range_entry * r,tree exp,gimple stmt)1852 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1853 {
1854   int in_p;
1855   tree low, high;
1856   bool is_bool, strict_overflow_p;
1857 
1858   r->exp = NULL_TREE;
1859   r->in_p = false;
1860   r->strict_overflow_p = false;
1861   r->low = NULL_TREE;
1862   r->high = NULL_TREE;
1863   if (exp != NULL_TREE
1864       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1865     return;
1866 
1867   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1868      and see if we can refine the range.  Some of the cases below may not
1869      happen, but it doesn't seem worth worrying about this.  We "continue"
1870      the outer loop when we've changed something; otherwise we "break"
1871      the switch, which will "break" the while.  */
1872   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1873   high = low;
1874   in_p = 0;
1875   strict_overflow_p = false;
1876   is_bool = false;
1877   if (exp == NULL_TREE)
1878     is_bool = true;
1879   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1880     {
1881       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1882 	is_bool = true;
1883       else
1884 	return;
1885     }
1886   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1887     is_bool = true;
1888 
1889   while (1)
1890     {
1891       enum tree_code code;
1892       tree arg0, arg1, exp_type;
1893       tree nexp;
1894       location_t loc;
1895 
1896       if (exp != NULL_TREE)
1897 	{
1898 	  if (TREE_CODE (exp) != SSA_NAME
1899 	      || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1900 	    break;
1901 
1902 	  stmt = SSA_NAME_DEF_STMT (exp);
1903 	  if (!is_gimple_assign (stmt))
1904 	    break;
1905 
1906 	  code = gimple_assign_rhs_code (stmt);
1907 	  arg0 = gimple_assign_rhs1 (stmt);
1908 	  arg1 = gimple_assign_rhs2 (stmt);
1909 	  exp_type = TREE_TYPE (exp);
1910 	}
1911       else
1912 	{
1913 	  code = gimple_cond_code (stmt);
1914 	  arg0 = gimple_cond_lhs (stmt);
1915 	  arg1 = gimple_cond_rhs (stmt);
1916 	  exp_type = boolean_type_node;
1917 	}
1918 
1919       if (TREE_CODE (arg0) != SSA_NAME)
1920 	break;
1921       loc = gimple_location (stmt);
1922       switch (code)
1923 	{
1924 	case BIT_NOT_EXPR:
1925 	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1926 	      /* Ensure the range is either +[-,0], +[0,0],
1927 		 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1928 		 -[1,1].  If it is e.g. +[-,-] or -[-,-]
1929 		 or similar expression of unconditional true or
1930 		 false, it should not be negated.  */
1931 	      && ((high && integer_zerop (high))
1932 		  || (low && integer_onep (low))))
1933 	    {
1934 	      in_p = !in_p;
1935 	      exp = arg0;
1936 	      continue;
1937 	    }
1938 	  break;
1939 	case SSA_NAME:
1940 	  exp = arg0;
1941 	  continue;
1942 	CASE_CONVERT:
1943 	  if (is_bool)
1944 	    goto do_default;
1945 	  if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1946 	    {
1947 	      if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1948 		is_bool = true;
1949 	      else
1950 		return;
1951 	    }
1952 	  else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1953 	    is_bool = true;
1954 	  goto do_default;
1955 	case EQ_EXPR:
1956 	case NE_EXPR:
1957 	case LT_EXPR:
1958 	case LE_EXPR:
1959 	case GE_EXPR:
1960 	case GT_EXPR:
1961 	  is_bool = true;
1962 	  /* FALLTHRU */
1963 	default:
1964 	  if (!is_bool)
1965 	    return;
1966 	do_default:
1967 	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1968 				  &low, &high, &in_p,
1969 				  &strict_overflow_p);
1970 	  if (nexp != NULL_TREE)
1971 	    {
1972 	      exp = nexp;
1973 	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
1974 	      continue;
1975 	    }
1976 	  break;
1977 	}
1978       break;
1979     }
1980   if (is_bool)
1981     {
1982       r->exp = exp;
1983       r->in_p = in_p;
1984       r->low = low;
1985       r->high = high;
1986       r->strict_overflow_p = strict_overflow_p;
1987     }
1988 }
1989 
1990 /* Comparison function for qsort.  Sort entries
1991    without SSA_NAME exp first, then with SSA_NAMEs sorted
1992    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1993    by increasing ->low and if ->low is the same, by increasing
1994    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1995    maximum.  */
1996 
1997 static int
range_entry_cmp(const void * a,const void * b)1998 range_entry_cmp (const void *a, const void *b)
1999 {
2000   const struct range_entry *p = (const struct range_entry *) a;
2001   const struct range_entry *q = (const struct range_entry *) b;
2002 
2003   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2004     {
2005       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2006 	{
2007 	  /* Group range_entries for the same SSA_NAME together.  */
2008 	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2009 	    return -1;
2010 	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2011 	    return 1;
2012 	  /* If ->low is different, NULL low goes first, then by
2013 	     ascending low.  */
2014 	  if (p->low != NULL_TREE)
2015 	    {
2016 	      if (q->low != NULL_TREE)
2017 		{
2018 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2019 					  p->low, q->low);
2020 		  if (tem && integer_onep (tem))
2021 		    return -1;
2022 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2023 				     p->low, q->low);
2024 		  if (tem && integer_onep (tem))
2025 		    return 1;
2026 		}
2027 	      else
2028 		return 1;
2029 	    }
2030 	  else if (q->low != NULL_TREE)
2031 	    return -1;
2032 	  /* If ->high is different, NULL high goes last, before that by
2033 	     ascending high.  */
2034 	  if (p->high != NULL_TREE)
2035 	    {
2036 	      if (q->high != NULL_TREE)
2037 		{
2038 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2039 					  p->high, q->high);
2040 		  if (tem && integer_onep (tem))
2041 		    return -1;
2042 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2043 				     p->high, q->high);
2044 		  if (tem && integer_onep (tem))
2045 		    return 1;
2046 		}
2047 	      else
2048 		return -1;
2049 	    }
2050 	  else if (p->high != NULL_TREE)
2051 	    return 1;
2052 	  /* If both ranges are the same, sort below by ascending idx.  */
2053 	}
2054       else
2055 	return 1;
2056     }
2057   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2058     return -1;
2059 
2060   if (p->idx < q->idx)
2061     return -1;
2062   else
2063     {
2064       gcc_checking_assert (p->idx > q->idx);
2065       return 1;
2066     }
2067 }
2068 
2069 /* Helper routine of optimize_range_test.
2070    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2071    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2072    OPCODE and OPS are arguments of optimize_range_tests.  Return
2073    true if the range merge has been successful.
2074    If OPCODE is ERROR_MARK, this is called from within
2075    maybe_optimize_range_tests and is performing inter-bb range optimization.
2076    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2077    oe->rank.  */
2078 
2079 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,unsigned int count,enum tree_code opcode,vec<operand_entry_t> * ops,tree exp,bool in_p,tree low,tree high,bool strict_overflow_p)2080 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2081 		   unsigned int count, enum tree_code opcode,
2082 		   vec<operand_entry_t> *ops, tree exp, bool in_p,
2083 		   tree low, tree high, bool strict_overflow_p)
2084 {
2085   operand_entry_t oe = (*ops)[range->idx];
2086   tree op = oe->op;
2087   gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2088     last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2089   location_t loc = gimple_location (stmt);
2090   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2091   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2092   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2093   gimple_stmt_iterator gsi;
2094 
2095   if (tem == NULL_TREE)
2096     return false;
2097 
2098   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2099     warning_at (loc, OPT_Wstrict_overflow,
2100 		"assuming signed overflow does not occur "
2101 		"when simplifying range test");
2102 
2103   if (dump_file && (dump_flags & TDF_DETAILS))
2104     {
2105       struct range_entry *r;
2106       fprintf (dump_file, "Optimizing range tests ");
2107       print_generic_expr (dump_file, range->exp, 0);
2108       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2109       print_generic_expr (dump_file, range->low, 0);
2110       fprintf (dump_file, ", ");
2111       print_generic_expr (dump_file, range->high, 0);
2112       fprintf (dump_file, "]");
2113       for (r = otherrange; r < otherrange + count; r++)
2114 	{
2115 	  fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2116 	  print_generic_expr (dump_file, r->low, 0);
2117 	  fprintf (dump_file, ", ");
2118 	  print_generic_expr (dump_file, r->high, 0);
2119 	  fprintf (dump_file, "]");
2120 	}
2121       fprintf (dump_file, "\n into ");
2122       print_generic_expr (dump_file, tem, 0);
2123       fprintf (dump_file, "\n");
2124     }
2125 
2126   if (opcode == BIT_IOR_EXPR
2127       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2128     tem = invert_truthvalue_loc (loc, tem);
2129 
2130   tem = fold_convert_loc (loc, optype, tem);
2131   gsi = gsi_for_stmt (stmt);
2132   /* In rare cases range->exp can be equal to lhs of stmt.
2133      In that case we have to insert after the stmt rather then before
2134      it.  */
2135   if (op == range->exp)
2136     tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2137 				    GSI_CONTINUE_LINKING);
2138   else
2139     {
2140       tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2141 				      GSI_SAME_STMT);
2142       gsi_prev (&gsi);
2143     }
2144   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2145     if (gimple_uid (gsi_stmt (gsi)))
2146       break;
2147     else
2148       gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2149 
2150   oe->op = tem;
2151   range->exp = exp;
2152   range->low = low;
2153   range->high = high;
2154   range->in_p = in_p;
2155   range->strict_overflow_p = false;
2156 
2157   for (range = otherrange; range < otherrange + count; range++)
2158     {
2159       oe = (*ops)[range->idx];
2160       /* Now change all the other range test immediate uses, so that
2161 	 those tests will be optimized away.  */
2162       if (opcode == ERROR_MARK)
2163 	{
2164 	  if (oe->op)
2165 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
2166 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
2167 	  else
2168 	    oe->op = (oe->rank == BIT_IOR_EXPR
2169 		      ? boolean_false_node : boolean_true_node);
2170 	}
2171       else
2172 	oe->op = error_mark_node;
2173       range->exp = NULL_TREE;
2174     }
2175   return true;
2176 }
2177 
2178 /* Optimize X == CST1 || X == CST2
2179    if popcount (CST1 ^ CST2) == 1 into
2180    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2181    Similarly for ranges.  E.g.
2182    X != 2 && X != 3 && X != 10 && X != 11
2183    will be transformed by the previous optimization into
2184    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2185    and this loop can transform that into
2186    !(((X & ~8) - 2U) <= 1U).  */
2187 
2188 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry_t> * ops,struct range_entry * rangei,struct range_entry * rangej)2189 optimize_range_tests_xor (enum tree_code opcode, tree type,
2190 			  tree lowi, tree lowj, tree highi, tree highj,
2191 			  vec<operand_entry_t> *ops,
2192 			  struct range_entry *rangei,
2193 			  struct range_entry *rangej)
2194 {
2195   tree lowxor, highxor, tem, exp;
2196   /* Check highi ^ lowi == highj ^ lowj and
2197      popcount (highi ^ lowi) == 1.  */
2198   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2199   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2200     return false;
2201   if (!integer_pow2p (lowxor))
2202     return false;
2203   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2204   if (!tree_int_cst_equal (lowxor, highxor))
2205     return false;
2206 
2207   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2208   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2209   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2210   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2211   if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2212 			 rangei->in_p, lowj, highj,
2213 			 rangei->strict_overflow_p
2214 			 || rangej->strict_overflow_p))
2215     return true;
2216   return false;
2217 }
2218 
2219 /* Optimize X == CST1 || X == CST2
2220    if popcount (CST2 - CST1) == 1 into
2221    ((X - CST1) & ~(CST2 - CST1)) == 0.
2222    Similarly for ranges.  E.g.
2223    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2224    || X == 75 || X == 45
2225    will be transformed by the previous optimization into
2226    (X - 43U) <= 3U || (X - 75U) <= 3U
2227    and this loop can transform that into
2228    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2229 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry_t> * ops,struct range_entry * rangei,struct range_entry * rangej)2230 optimize_range_tests_diff (enum tree_code opcode, tree type,
2231 			    tree lowi, tree lowj, tree highi, tree highj,
2232 			    vec<operand_entry_t> *ops,
2233 			    struct range_entry *rangei,
2234 			    struct range_entry *rangej)
2235 {
2236   tree tem1, tem2, mask;
2237   /* Check highi - lowi == highj - lowj.  */
2238   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2239   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2240     return false;
2241   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2242   if (!tree_int_cst_equal (tem1, tem2))
2243     return false;
2244   /* Check popcount (lowj - lowi) == 1.  */
2245   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2246   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2247     return false;
2248   if (!integer_pow2p (tem1))
2249     return false;
2250 
2251   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2252   tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2253   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2254   lowj = build_int_cst (type, 0);
2255   if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2256 			 rangei->in_p, lowj, tem2,
2257 			 rangei->strict_overflow_p
2258 			 || rangej->strict_overflow_p))
2259     return true;
2260   return false;
2261 }
2262 
2263 /* It does some common checks for function optimize_range_tests_xor and
2264    optimize_range_tests_diff.
2265    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2266    Else it calls optimize_range_tests_diff.  */
2267 
2268 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry_t> * ops,struct range_entry * ranges)2269 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2270 			bool optimize_xor, vec<operand_entry_t> *ops,
2271 			struct range_entry *ranges)
2272 {
2273   int i, j;
2274   bool any_changes = false;
2275   for (i = first; i < length; i++)
2276     {
2277       tree lowi, highi, lowj, highj, type, tem;
2278 
2279       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2280 	continue;
2281       type = TREE_TYPE (ranges[i].exp);
2282       if (!INTEGRAL_TYPE_P (type))
2283 	continue;
2284       lowi = ranges[i].low;
2285       if (lowi == NULL_TREE)
2286 	lowi = TYPE_MIN_VALUE (type);
2287       highi = ranges[i].high;
2288       if (highi == NULL_TREE)
2289 	continue;
2290       for (j = i + 1; j < length && j < i + 64; j++)
2291 	{
2292 	  bool changes;
2293 	  if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2294 	    continue;
2295 	  lowj = ranges[j].low;
2296 	  if (lowj == NULL_TREE)
2297 	    continue;
2298 	  highj = ranges[j].high;
2299 	  if (highj == NULL_TREE)
2300 	    highj = TYPE_MAX_VALUE (type);
2301 	  /* Check lowj > highi.  */
2302 	  tem = fold_binary (GT_EXPR, boolean_type_node,
2303 			     lowj, highi);
2304 	  if (tem == NULL_TREE || !integer_onep (tem))
2305 	    continue;
2306 	  if (optimize_xor)
2307 	    changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2308 						highi, highj, ops,
2309 						ranges + i, ranges + j);
2310 	  else
2311 	    changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2312 						 highi, highj, ops,
2313 						 ranges + i, ranges + j);
2314 	  if (changes)
2315 	    {
2316 	      any_changes = true;
2317 	      break;
2318 	    }
2319 	}
2320     }
2321   return any_changes;
2322 }
2323 
2324 /* Optimize range tests, similarly how fold_range_test optimizes
2325    it on trees.  The tree code for the binary
2326    operation between all the operands is OPCODE.
2327    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2328    maybe_optimize_range_tests for inter-bb range optimization.
2329    In that case if oe->op is NULL, oe->id is bb->index whose
2330    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2331    the actual opcode.  */
2332 
2333 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry_t> * ops)2334 optimize_range_tests (enum tree_code opcode,
2335 		      vec<operand_entry_t> *ops)
2336 {
2337   unsigned int length = ops->length (), i, j, first;
2338   operand_entry_t oe;
2339   struct range_entry *ranges;
2340   bool any_changes = false;
2341 
2342   if (length == 1)
2343     return false;
2344 
2345   ranges = XNEWVEC (struct range_entry, length);
2346   for (i = 0; i < length; i++)
2347     {
2348       oe = (*ops)[i];
2349       ranges[i].idx = i;
2350       init_range_entry (ranges + i, oe->op,
2351 			oe->op ? NULL :
2352 			  last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2353       /* For | invert it now, we will invert it again before emitting
2354 	 the optimized expression.  */
2355       if (opcode == BIT_IOR_EXPR
2356 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2357 	ranges[i].in_p = !ranges[i].in_p;
2358     }
2359 
2360   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2361   for (i = 0; i < length; i++)
2362     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2363       break;
2364 
2365   /* Try to merge ranges.  */
2366   for (first = i; i < length; i++)
2367     {
2368       tree low = ranges[i].low;
2369       tree high = ranges[i].high;
2370       int in_p = ranges[i].in_p;
2371       bool strict_overflow_p = ranges[i].strict_overflow_p;
2372       int update_fail_count = 0;
2373 
2374       for (j = i + 1; j < length; j++)
2375 	{
2376 	  if (ranges[i].exp != ranges[j].exp)
2377 	    break;
2378 	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2379 			     ranges[j].in_p, ranges[j].low, ranges[j].high))
2380 	    break;
2381 	  strict_overflow_p |= ranges[j].strict_overflow_p;
2382 	}
2383 
2384       if (j == i + 1)
2385 	continue;
2386 
2387       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2388 			     ops, ranges[i].exp, in_p, low, high,
2389 			     strict_overflow_p))
2390 	{
2391 	  i = j - 1;
2392 	  any_changes = true;
2393 	}
2394       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2395 	 while update_range_test would fail.  */
2396       else if (update_fail_count == 64)
2397 	i = j - 1;
2398       else
2399 	++update_fail_count;
2400     }
2401 
2402   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2403 					 ops, ranges);
2404 
2405   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2406     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2407 					   ops, ranges);
2408 
2409   if (any_changes && opcode != ERROR_MARK)
2410     {
2411       j = 0;
2412       FOR_EACH_VEC_ELT (*ops, i, oe)
2413 	{
2414 	  if (oe->op == error_mark_node)
2415 	    continue;
2416 	  else if (i != j)
2417 	    (*ops)[j] = oe;
2418 	  j++;
2419 	}
2420       ops->truncate (j);
2421     }
2422 
2423   XDELETEVEC (ranges);
2424   return any_changes;
2425 }
2426 
2427 /* Return true if STMT is a cast like:
2428    <bb N>:
2429    ...
2430    _123 = (int) _234;
2431 
2432    <bb M>:
2433    # _345 = PHI <_123(N), 1(...), 1(...)>
2434    where _234 has bool type, _123 has single use and
2435    bb N has a single successor M.  This is commonly used in
2436    the last block of a range test.  */
2437 
2438 static bool
final_range_test_p(gimple stmt)2439 final_range_test_p (gimple stmt)
2440 {
2441   basic_block bb, rhs_bb;
2442   edge e;
2443   tree lhs, rhs;
2444   use_operand_p use_p;
2445   gimple use_stmt;
2446 
2447   if (!gimple_assign_cast_p (stmt))
2448     return false;
2449   bb = gimple_bb (stmt);
2450   if (!single_succ_p (bb))
2451     return false;
2452   e = single_succ_edge (bb);
2453   if (e->flags & EDGE_COMPLEX)
2454     return false;
2455 
2456   lhs = gimple_assign_lhs (stmt);
2457   rhs = gimple_assign_rhs1 (stmt);
2458   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2459       || TREE_CODE (rhs) != SSA_NAME
2460       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2461     return false;
2462 
2463   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2464   if (!single_imm_use (lhs, &use_p, &use_stmt))
2465     return false;
2466 
2467   if (gimple_code (use_stmt) != GIMPLE_PHI
2468       || gimple_bb (use_stmt) != e->dest)
2469     return false;
2470 
2471   /* And that the rhs is defined in the same loop.  */
2472   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2473   if (rhs_bb == NULL
2474       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2475     return false;
2476 
2477   return true;
2478 }
2479 
2480 /* Return true if BB is suitable basic block for inter-bb range test
2481    optimization.  If BACKWARD is true, BB should be the only predecessor
2482    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2483    or compared with to find a common basic block to which all conditions
2484    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2485    be the only predecessor of BB.  */
2486 
2487 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool backward)2488 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2489 		  bool backward)
2490 {
2491   edge_iterator ei, ei2;
2492   edge e, e2;
2493   gimple stmt;
2494   gimple_stmt_iterator gsi;
2495   bool other_edge_seen = false;
2496   bool is_cond;
2497 
2498   if (test_bb == bb)
2499     return false;
2500   /* Check last stmt first.  */
2501   stmt = last_stmt (bb);
2502   if (stmt == NULL
2503       || (gimple_code (stmt) != GIMPLE_COND
2504 	  && (backward || !final_range_test_p (stmt)))
2505       || gimple_visited_p (stmt)
2506       || stmt_could_throw_p (stmt)
2507       || *other_bb == bb)
2508     return false;
2509   is_cond = gimple_code (stmt) == GIMPLE_COND;
2510   if (is_cond)
2511     {
2512       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2513 	 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2514 	 to *OTHER_BB (if not set yet, try to find it out).  */
2515       if (EDGE_COUNT (bb->succs) != 2)
2516 	return false;
2517       FOR_EACH_EDGE (e, ei, bb->succs)
2518 	{
2519 	  if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2520 	    return false;
2521 	  if (e->dest == test_bb)
2522 	    {
2523 	      if (backward)
2524 		continue;
2525 	      else
2526 		return false;
2527 	    }
2528 	  if (e->dest == bb)
2529 	    return false;
2530 	  if (*other_bb == NULL)
2531 	    {
2532 	      FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2533 		if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2534 		  return false;
2535 		else if (e->dest == e2->dest)
2536 		  *other_bb = e->dest;
2537 	      if (*other_bb == NULL)
2538 		return false;
2539 	    }
2540 	  if (e->dest == *other_bb)
2541 	    other_edge_seen = true;
2542 	  else if (backward)
2543 	    return false;
2544 	}
2545       if (*other_bb == NULL || !other_edge_seen)
2546 	return false;
2547     }
2548   else if (single_succ (bb) != *other_bb)
2549     return false;
2550 
2551   /* Now check all PHIs of *OTHER_BB.  */
2552   e = find_edge (bb, *other_bb);
2553   e2 = find_edge (test_bb, *other_bb);
2554   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2555     {
2556       gimple phi = gsi_stmt (gsi);
2557       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2558 	 corresponding to BB and TEST_BB predecessor must be the same.  */
2559       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2560 			    gimple_phi_arg_def (phi, e2->dest_idx), 0))
2561 	{
2562 	  /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2563 	     one of the PHIs should have the lhs of the last stmt in
2564 	     that block as PHI arg and that PHI should have 0 or 1
2565 	     corresponding to it in all other range test basic blocks
2566 	     considered.  */
2567 	  if (!is_cond)
2568 	    {
2569 	      if (gimple_phi_arg_def (phi, e->dest_idx)
2570 		  == gimple_assign_lhs (stmt)
2571 		  && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2572 		      || integer_onep (gimple_phi_arg_def (phi,
2573 							   e2->dest_idx))))
2574 		continue;
2575 	    }
2576 	  else
2577 	    {
2578 	      gimple test_last = last_stmt (test_bb);
2579 	      if (gimple_code (test_last) != GIMPLE_COND
2580 		  && gimple_phi_arg_def (phi, e2->dest_idx)
2581 		     == gimple_assign_lhs (test_last)
2582 		  && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2583 		      || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2584 		continue;
2585 	    }
2586 
2587 	  return false;
2588 	}
2589     }
2590   return true;
2591 }
2592 
2593 /* Return true if BB doesn't have side-effects that would disallow
2594    range test optimization, all SSA_NAMEs set in the bb are consumed
2595    in the bb and there are no PHIs.  */
2596 
2597 static bool
no_side_effect_bb(basic_block bb)2598 no_side_effect_bb (basic_block bb)
2599 {
2600   gimple_stmt_iterator gsi;
2601   gimple last;
2602 
2603   if (!gimple_seq_empty_p (phi_nodes (bb)))
2604     return false;
2605   last = last_stmt (bb);
2606   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2607     {
2608       gimple stmt = gsi_stmt (gsi);
2609       tree lhs;
2610       imm_use_iterator imm_iter;
2611       use_operand_p use_p;
2612 
2613       if (is_gimple_debug (stmt))
2614 	continue;
2615       if (gimple_has_side_effects (stmt))
2616 	return false;
2617       if (stmt == last)
2618 	return true;
2619       if (!is_gimple_assign (stmt))
2620 	return false;
2621       lhs = gimple_assign_lhs (stmt);
2622       if (TREE_CODE (lhs) != SSA_NAME)
2623 	return false;
2624       if (gimple_assign_rhs_could_trap_p (stmt))
2625 	return false;
2626       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2627 	{
2628 	  gimple use_stmt = USE_STMT (use_p);
2629 	  if (is_gimple_debug (use_stmt))
2630 	    continue;
2631 	  if (gimple_bb (use_stmt) != bb)
2632 	    return false;
2633 	}
2634     }
2635   return false;
2636 }
2637 
2638 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2639    return true and fill in *OPS recursively.  */
2640 
2641 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry_t> * ops,struct loop * loop)2642 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2643 	 struct loop *loop)
2644 {
2645   gimple stmt = SSA_NAME_DEF_STMT (var);
2646   tree rhs[2];
2647   int i;
2648 
2649   if (!is_reassociable_op (stmt, code, loop))
2650     return false;
2651 
2652   rhs[0] = gimple_assign_rhs1 (stmt);
2653   rhs[1] = gimple_assign_rhs2 (stmt);
2654   gimple_set_visited (stmt, true);
2655   for (i = 0; i < 2; i++)
2656     if (TREE_CODE (rhs[i]) == SSA_NAME
2657 	&& !get_ops (rhs[i], code, ops, loop)
2658 	&& has_single_use (rhs[i]))
2659       {
2660 	operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2661 
2662 	oe->op = rhs[i];
2663 	oe->rank = code;
2664 	oe->id = 0;
2665 	oe->count = 1;
2666 	ops->safe_push (oe);
2667       }
2668   return true;
2669 }
2670 
2671 /* Find the ops that were added by get_ops starting from VAR, see if
2672    they were changed during update_range_test and if yes, create new
2673    stmts.  */
2674 
2675 static tree
update_ops(tree var,enum tree_code code,vec<operand_entry_t> ops,unsigned int * pidx,struct loop * loop)2676 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2677 	    unsigned int *pidx, struct loop *loop)
2678 {
2679   gimple stmt = SSA_NAME_DEF_STMT (var);
2680   tree rhs[4];
2681   int i;
2682 
2683   if (!is_reassociable_op (stmt, code, loop))
2684     return NULL;
2685 
2686   rhs[0] = gimple_assign_rhs1 (stmt);
2687   rhs[1] = gimple_assign_rhs2 (stmt);
2688   rhs[2] = rhs[0];
2689   rhs[3] = rhs[1];
2690   for (i = 0; i < 2; i++)
2691     if (TREE_CODE (rhs[i]) == SSA_NAME)
2692       {
2693 	rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2694 	if (rhs[2 + i] == NULL_TREE)
2695 	  {
2696 	    if (has_single_use (rhs[i]))
2697 	      rhs[2 + i] = ops[(*pidx)++]->op;
2698 	    else
2699 	      rhs[2 + i] = rhs[i];
2700 	  }
2701       }
2702   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2703       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2704     {
2705       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2706       var = make_ssa_name (TREE_TYPE (var), NULL);
2707       gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2708 					       var, rhs[2], rhs[3]);
2709       gimple_set_uid (g, gimple_uid (stmt));
2710       gimple_set_visited (g, true);
2711       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2712     }
2713   return var;
2714 }
2715 
2716 /* Structure to track the initial value passed to get_ops and
2717    the range in the ops vector for each basic block.  */
2718 
2719 struct inter_bb_range_test_entry
2720 {
2721   tree op;
2722   unsigned int first_idx, last_idx;
2723 };
2724 
2725 /* Inter-bb range test optimization.  */
2726 
2727 static void
maybe_optimize_range_tests(gimple stmt)2728 maybe_optimize_range_tests (gimple stmt)
2729 {
2730   basic_block first_bb = gimple_bb (stmt);
2731   basic_block last_bb = first_bb;
2732   basic_block other_bb = NULL;
2733   basic_block bb;
2734   edge_iterator ei;
2735   edge e;
2736   auto_vec<operand_entry_t> ops;
2737   auto_vec<inter_bb_range_test_entry> bbinfo;
2738   bool any_changes = false;
2739 
2740   /* Consider only basic blocks that end with GIMPLE_COND or
2741      a cast statement satisfying final_range_test_p.  All
2742      but the last bb in the first_bb .. last_bb range
2743      should end with GIMPLE_COND.  */
2744   if (gimple_code (stmt) == GIMPLE_COND)
2745     {
2746       if (EDGE_COUNT (first_bb->succs) != 2)
2747 	return;
2748     }
2749   else if (final_range_test_p (stmt))
2750     other_bb = single_succ (first_bb);
2751   else
2752     return;
2753 
2754   if (stmt_could_throw_p (stmt))
2755     return;
2756 
2757   /* As relative ordering of post-dominator sons isn't fixed,
2758      maybe_optimize_range_tests can be called first on any
2759      bb in the range we want to optimize.  So, start searching
2760      backwards, if first_bb can be set to a predecessor.  */
2761   while (single_pred_p (first_bb))
2762     {
2763       basic_block pred_bb = single_pred (first_bb);
2764       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2765 	break;
2766       if (!no_side_effect_bb (first_bb))
2767 	break;
2768       first_bb = pred_bb;
2769     }
2770   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2771      Before starting forward search in last_bb successors, find
2772      out the other_bb.  */
2773   if (first_bb == last_bb)
2774     {
2775       other_bb = NULL;
2776       /* As non-GIMPLE_COND last stmt always terminates the range,
2777 	 if forward search didn't discover anything, just give up.  */
2778       if (gimple_code (stmt) != GIMPLE_COND)
2779 	return;
2780       /* Look at both successors.  Either it ends with a GIMPLE_COND
2781 	 and satisfies suitable_cond_bb, or ends with a cast and
2782 	 other_bb is that cast's successor.  */
2783       FOR_EACH_EDGE (e, ei, first_bb->succs)
2784 	if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2785 	    || e->dest == first_bb)
2786 	  return;
2787 	else if (single_pred_p (e->dest))
2788 	  {
2789 	    stmt = last_stmt (e->dest);
2790 	    if (stmt
2791 		&& gimple_code (stmt) == GIMPLE_COND
2792 		&& EDGE_COUNT (e->dest->succs) == 2)
2793 	      {
2794 		if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2795 		  break;
2796 		else
2797 		  other_bb = NULL;
2798 	      }
2799 	    else if (stmt
2800 		     && final_range_test_p (stmt)
2801 		     && find_edge (first_bb, single_succ (e->dest)))
2802 	      {
2803 		other_bb = single_succ (e->dest);
2804 		if (other_bb == first_bb)
2805 		  other_bb = NULL;
2806 	      }
2807 	  }
2808       if (other_bb == NULL)
2809 	return;
2810     }
2811   /* Now do the forward search, moving last_bb to successor bbs
2812      that aren't other_bb.  */
2813   while (EDGE_COUNT (last_bb->succs) == 2)
2814     {
2815       FOR_EACH_EDGE (e, ei, last_bb->succs)
2816 	if (e->dest != other_bb)
2817 	  break;
2818       if (e == NULL)
2819 	break;
2820       if (!single_pred_p (e->dest))
2821 	break;
2822       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2823 	break;
2824       if (!no_side_effect_bb (e->dest))
2825 	break;
2826       last_bb = e->dest;
2827     }
2828   if (first_bb == last_bb)
2829     return;
2830   /* Here basic blocks first_bb through last_bb's predecessor
2831      end with GIMPLE_COND, all of them have one of the edges to
2832      other_bb and another to another block in the range,
2833      all blocks except first_bb don't have side-effects and
2834      last_bb ends with either GIMPLE_COND, or cast satisfying
2835      final_range_test_p.  */
2836   for (bb = last_bb; ; bb = single_pred (bb))
2837     {
2838       enum tree_code code;
2839       tree lhs, rhs;
2840       inter_bb_range_test_entry bb_ent;
2841 
2842       bb_ent.op = NULL_TREE;
2843       bb_ent.first_idx = ops.length ();
2844       bb_ent.last_idx = bb_ent.first_idx;
2845       e = find_edge (bb, other_bb);
2846       stmt = last_stmt (bb);
2847       gimple_set_visited (stmt, true);
2848       if (gimple_code (stmt) != GIMPLE_COND)
2849 	{
2850 	  use_operand_p use_p;
2851 	  gimple phi;
2852 	  edge e2;
2853 	  unsigned int d;
2854 
2855 	  lhs = gimple_assign_lhs (stmt);
2856 	  rhs = gimple_assign_rhs1 (stmt);
2857 	  gcc_assert (bb == last_bb);
2858 
2859 	  /* stmt is
2860 	     _123 = (int) _234;
2861 
2862 	     followed by:
2863 	     <bb M>:
2864 	     # _345 = PHI <_123(N), 1(...), 1(...)>
2865 
2866 	     or 0 instead of 1.  If it is 0, the _234
2867 	     range test is anded together with all the
2868 	     other range tests, if it is 1, it is ored with
2869 	     them.  */
2870 	  single_imm_use (lhs, &use_p, &phi);
2871 	  gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2872 	  e2 = find_edge (first_bb, other_bb);
2873 	  d = e2->dest_idx;
2874 	  gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2875 	  if (integer_zerop (gimple_phi_arg_def (phi, d)))
2876 	    code = BIT_AND_EXPR;
2877 	  else
2878 	    {
2879 	      gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2880 	      code = BIT_IOR_EXPR;
2881 	    }
2882 
2883 	  /* If _234 SSA_NAME_DEF_STMT is
2884 	     _234 = _567 | _789;
2885 	     (or &, corresponding to 1/0 in the phi arguments,
2886 	     push into ops the individual range test arguments
2887 	     of the bitwise or resp. and, recursively.  */
2888 	  if (!get_ops (rhs, code, &ops,
2889 			loop_containing_stmt (stmt))
2890 	      && has_single_use (rhs))
2891 	    {
2892 	      /* Otherwise, push the _234 range test itself.  */
2893 	      operand_entry_t oe
2894 		= (operand_entry_t) pool_alloc (operand_entry_pool);
2895 
2896 	      oe->op = rhs;
2897 	      oe->rank = code;
2898 	      oe->id = 0;
2899 	      oe->count = 1;
2900 	      ops.safe_push (oe);
2901 	      bb_ent.last_idx++;
2902 	    }
2903 	  else
2904 	    bb_ent.last_idx = ops.length ();
2905 	  bb_ent.op = rhs;
2906 	  bbinfo.safe_push (bb_ent);
2907 	  continue;
2908 	}
2909       /* Otherwise stmt is GIMPLE_COND.  */
2910       code = gimple_cond_code (stmt);
2911       lhs = gimple_cond_lhs (stmt);
2912       rhs = gimple_cond_rhs (stmt);
2913       if (TREE_CODE (lhs) == SSA_NAME
2914 	  && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2915 	  && ((code != EQ_EXPR && code != NE_EXPR)
2916 	      || rhs != boolean_false_node
2917 		 /* Either push into ops the individual bitwise
2918 		    or resp. and operands, depending on which
2919 		    edge is other_bb.  */
2920 	      || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2921 				 ^ (code == EQ_EXPR))
2922 				? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2923 			   loop_containing_stmt (stmt))))
2924 	{
2925 	  /* Or push the GIMPLE_COND stmt itself.  */
2926 	  operand_entry_t oe
2927 	    = (operand_entry_t) pool_alloc (operand_entry_pool);
2928 
2929 	  oe->op = NULL;
2930 	  oe->rank = (e->flags & EDGE_TRUE_VALUE)
2931 		     ? BIT_IOR_EXPR : BIT_AND_EXPR;
2932 	  /* oe->op = NULL signs that there is no SSA_NAME
2933 	     for the range test, and oe->id instead is the
2934 	     basic block number, at which's end the GIMPLE_COND
2935 	     is.  */
2936 	  oe->id = bb->index;
2937 	  oe->count = 1;
2938 	  ops.safe_push (oe);
2939 	  bb_ent.op = NULL;
2940 	  bb_ent.last_idx++;
2941 	}
2942       else if (ops.length () > bb_ent.first_idx)
2943 	{
2944 	  bb_ent.op = lhs;
2945 	  bb_ent.last_idx = ops.length ();
2946 	}
2947       bbinfo.safe_push (bb_ent);
2948       if (bb == first_bb)
2949 	break;
2950     }
2951   if (ops.length () > 1)
2952     any_changes = optimize_range_tests (ERROR_MARK, &ops);
2953   if (any_changes)
2954     {
2955       unsigned int idx;
2956       /* update_ops relies on has_single_use predicates returning the
2957 	 same values as it did during get_ops earlier.  Additionally it
2958 	 never removes statements, only adds new ones and it should walk
2959 	 from the single imm use and check the predicate already before
2960 	 making those changes.
2961 	 On the other side, the handling of GIMPLE_COND directly can turn
2962 	 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2963 	 it needs to be done in a separate loop afterwards.  */
2964       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2965 	{
2966 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2967 	      && bbinfo[idx].op != NULL_TREE)
2968 	    {
2969 	      tree new_op;
2970 
2971 	      stmt = last_stmt (bb);
2972 	      new_op = update_ops (bbinfo[idx].op,
2973 				   (enum tree_code)
2974 				   ops[bbinfo[idx].first_idx]->rank,
2975 				   ops, &bbinfo[idx].first_idx,
2976 				   loop_containing_stmt (stmt));
2977 	      if (new_op == NULL_TREE)
2978 		{
2979 		  gcc_assert (bb == last_bb);
2980 		  new_op = ops[bbinfo[idx].first_idx++]->op;
2981 		}
2982 	      if (bbinfo[idx].op != new_op)
2983 		{
2984 		  imm_use_iterator iter;
2985 		  use_operand_p use_p;
2986 		  gimple use_stmt, cast_stmt = NULL;
2987 
2988 		  FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2989 		    if (is_gimple_debug (use_stmt))
2990 		      continue;
2991 		    else if (gimple_code (use_stmt) == GIMPLE_COND
2992 			     || gimple_code (use_stmt) == GIMPLE_PHI)
2993 		      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2994 			SET_USE (use_p, new_op);
2995 		    else if (gimple_assign_cast_p (use_stmt))
2996 		      cast_stmt = use_stmt;
2997 		    else
2998 		      gcc_unreachable ();
2999 		  if (cast_stmt)
3000 		    {
3001 		      gcc_assert (bb == last_bb);
3002 		      tree lhs = gimple_assign_lhs (cast_stmt);
3003 		      tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3004 		      enum tree_code rhs_code
3005 			= gimple_assign_rhs_code (cast_stmt);
3006 		      gimple g;
3007 		      if (is_gimple_min_invariant (new_op))
3008 			{
3009 			  new_op = fold_convert (TREE_TYPE (lhs), new_op);
3010 			  g = gimple_build_assign (new_lhs, new_op);
3011 			}
3012 		      else
3013 			g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3014 							  new_op, NULL_TREE);
3015 		      gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3016 		      gimple_set_uid (g, gimple_uid (cast_stmt));
3017 		      gimple_set_visited (g, true);
3018 		      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3019 		      FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3020 			if (is_gimple_debug (use_stmt))
3021 			  continue;
3022 			else if (gimple_code (use_stmt) == GIMPLE_COND
3023 				 || gimple_code (use_stmt) == GIMPLE_PHI)
3024 			  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3025 			    SET_USE (use_p, new_lhs);
3026 			else
3027 			  gcc_unreachable ();
3028 		    }
3029 		}
3030 	    }
3031 	  if (bb == first_bb)
3032 	    break;
3033 	}
3034       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3035 	{
3036 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3037 	      && bbinfo[idx].op == NULL_TREE
3038 	      && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3039 	    {
3040 	      stmt = last_stmt (bb);
3041 	      if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3042 		gimple_cond_make_false (stmt);
3043 	      else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3044 		gimple_cond_make_true (stmt);
3045 	      else
3046 		{
3047 		  gimple_cond_set_code (stmt, NE_EXPR);
3048 		  gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3049 		  gimple_cond_set_rhs (stmt, boolean_false_node);
3050 		}
3051 	      update_stmt (stmt);
3052 	    }
3053 	  if (bb == first_bb)
3054 	    break;
3055 	}
3056     }
3057 }
3058 
3059 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3060    of STMT in it's operands.  This is also known as a "destructive
3061    update" operation.  */
3062 
3063 static bool
is_phi_for_stmt(gimple stmt,tree operand)3064 is_phi_for_stmt (gimple stmt, tree operand)
3065 {
3066   gimple def_stmt;
3067   tree lhs;
3068   use_operand_p arg_p;
3069   ssa_op_iter i;
3070 
3071   if (TREE_CODE (operand) != SSA_NAME)
3072     return false;
3073 
3074   lhs = gimple_assign_lhs (stmt);
3075 
3076   def_stmt = SSA_NAME_DEF_STMT (operand);
3077   if (gimple_code (def_stmt) != GIMPLE_PHI)
3078     return false;
3079 
3080   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3081     if (lhs == USE_FROM_PTR (arg_p))
3082       return true;
3083   return false;
3084 }
3085 
3086 /* Remove def stmt of VAR if VAR has zero uses and recurse
3087    on rhs1 operand if so.  */
3088 
3089 static void
remove_visited_stmt_chain(tree var)3090 remove_visited_stmt_chain (tree var)
3091 {
3092   gimple stmt;
3093   gimple_stmt_iterator gsi;
3094 
3095   while (1)
3096     {
3097       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3098 	return;
3099       stmt = SSA_NAME_DEF_STMT (var);
3100       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3101 	{
3102 	  var = gimple_assign_rhs1 (stmt);
3103 	  gsi = gsi_for_stmt (stmt);
3104 	  reassoc_remove_stmt (&gsi);
3105 	  release_defs (stmt);
3106 	}
3107       else
3108 	return;
3109     }
3110 }
3111 
3112 /* This function checks three consequtive operands in
3113    passed operands vector OPS starting from OPINDEX and
3114    swaps two operands if it is profitable for binary operation
3115    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3116 
3117    We pair ops with the same rank if possible.
3118 
3119    The alternative we try is to see if STMT is a destructive
3120    update style statement, which is like:
3121    b = phi (a, ...)
3122    a = c + b;
3123    In that case, we want to use the destructive update form to
3124    expose the possible vectorizer sum reduction opportunity.
3125    In that case, the third operand will be the phi node. This
3126    check is not performed if STMT is null.
3127 
3128    We could, of course, try to be better as noted above, and do a
3129    lot of work to try to find these opportunities in >3 operand
3130    cases, but it is unlikely to be worth it.  */
3131 
3132 static void
swap_ops_for_binary_stmt(vec<operand_entry_t> ops,unsigned int opindex,gimple stmt)3133 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3134 			  unsigned int opindex, gimple stmt)
3135 {
3136   operand_entry_t oe1, oe2, oe3;
3137 
3138   oe1 = ops[opindex];
3139   oe2 = ops[opindex + 1];
3140   oe3 = ops[opindex + 2];
3141 
3142   if ((oe1->rank == oe2->rank
3143        && oe2->rank != oe3->rank)
3144       || (stmt && is_phi_for_stmt (stmt, oe3->op)
3145 	  && !is_phi_for_stmt (stmt, oe1->op)
3146 	  && !is_phi_for_stmt (stmt, oe2->op)))
3147     {
3148       struct operand_entry temp = *oe3;
3149       oe3->op = oe1->op;
3150       oe3->rank = oe1->rank;
3151       oe1->op = temp.op;
3152       oe1->rank= temp.rank;
3153     }
3154   else if ((oe1->rank == oe3->rank
3155 	    && oe2->rank != oe3->rank)
3156 	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
3157 	       && !is_phi_for_stmt (stmt, oe1->op)
3158 	       && !is_phi_for_stmt (stmt, oe3->op)))
3159     {
3160       struct operand_entry temp = *oe2;
3161       oe2->op = oe1->op;
3162       oe2->rank = oe1->rank;
3163       oe1->op = temp.op;
3164       oe1->rank = temp.rank;
3165     }
3166 }
3167 
3168 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3169    two definitions, otherwise return STMT.  */
3170 
3171 static inline gimple
find_insert_point(gimple stmt,tree rhs1,tree rhs2)3172 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3173 {
3174   if (TREE_CODE (rhs1) == SSA_NAME
3175       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3176     stmt = SSA_NAME_DEF_STMT (rhs1);
3177   if (TREE_CODE (rhs2) == SSA_NAME
3178       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3179     stmt = SSA_NAME_DEF_STMT (rhs2);
3180   return stmt;
3181 }
3182 
3183 /* Recursively rewrite our linearized statements so that the operators
3184    match those in OPS[OPINDEX], putting the computation in rank
3185    order.  Return new lhs.  */
3186 
3187 static tree
rewrite_expr_tree(gimple stmt,unsigned int opindex,vec<operand_entry_t> ops,bool changed)3188 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3189 		   vec<operand_entry_t> ops, bool changed)
3190 {
3191   tree rhs1 = gimple_assign_rhs1 (stmt);
3192   tree rhs2 = gimple_assign_rhs2 (stmt);
3193   tree lhs = gimple_assign_lhs (stmt);
3194   operand_entry_t oe;
3195 
3196   /* The final recursion case for this function is that you have
3197      exactly two operations left.
3198      If we had one exactly one op in the entire list to start with, we
3199      would have never called this function, and the tail recursion
3200      rewrites them one at a time.  */
3201   if (opindex + 2 == ops.length ())
3202     {
3203       operand_entry_t oe1, oe2;
3204 
3205       oe1 = ops[opindex];
3206       oe2 = ops[opindex + 1];
3207 
3208       if (rhs1 != oe1->op || rhs2 != oe2->op)
3209 	{
3210 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3211 	  unsigned int uid = gimple_uid (stmt);
3212 
3213 	  if (dump_file && (dump_flags & TDF_DETAILS))
3214 	    {
3215 	      fprintf (dump_file, "Transforming ");
3216 	      print_gimple_stmt (dump_file, stmt, 0, 0);
3217 	    }
3218 
3219 	  if (changed)
3220 	    {
3221 	      gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3222 	      lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3223 	      stmt
3224 		= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3225 						lhs, oe1->op, oe2->op);
3226 	      gimple_set_uid (stmt, uid);
3227 	      gimple_set_visited (stmt, true);
3228 	      if (insert_point == gsi_stmt (gsi))
3229 		gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3230 	      else
3231 		insert_stmt_after (stmt, insert_point);
3232 	    }
3233 	  else
3234 	    {
3235 	      gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3236 				   == stmt);
3237 	      gimple_assign_set_rhs1 (stmt, oe1->op);
3238 	      gimple_assign_set_rhs2 (stmt, oe2->op);
3239 	      update_stmt (stmt);
3240 	    }
3241 
3242 	  if (rhs1 != oe1->op && rhs1 != oe2->op)
3243 	    remove_visited_stmt_chain (rhs1);
3244 
3245 	  if (dump_file && (dump_flags & TDF_DETAILS))
3246 	    {
3247 	      fprintf (dump_file, " into ");
3248 	      print_gimple_stmt (dump_file, stmt, 0, 0);
3249 	    }
3250 	}
3251       return lhs;
3252     }
3253 
3254   /* If we hit here, we should have 3 or more ops left.  */
3255   gcc_assert (opindex + 2 < ops.length ());
3256 
3257   /* Rewrite the next operator.  */
3258   oe = ops[opindex];
3259 
3260   /* Recurse on the LHS of the binary operator, which is guaranteed to
3261      be the non-leaf side.  */
3262   tree new_rhs1
3263     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3264 			 changed || oe->op != rhs2);
3265 
3266   if (oe->op != rhs2 || new_rhs1 != rhs1)
3267     {
3268       if (dump_file && (dump_flags & TDF_DETAILS))
3269 	{
3270 	  fprintf (dump_file, "Transforming ");
3271 	  print_gimple_stmt (dump_file, stmt, 0, 0);
3272 	}
3273 
3274       /* If changed is false, this is either opindex == 0
3275 	 or all outer rhs2's were equal to corresponding oe->op,
3276 	 and powi_result is NULL.
3277 	 That means lhs is equivalent before and after reassociation.
3278 	 Otherwise ensure the old lhs SSA_NAME is not reused and
3279 	 create a new stmt as well, so that any debug stmts will be
3280 	 properly adjusted.  */
3281       if (changed)
3282 	{
3283 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3284 	  unsigned int uid = gimple_uid (stmt);
3285 	  gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3286 
3287 	  lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3288 	  stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3289 					       lhs, new_rhs1, oe->op);
3290 	  gimple_set_uid (stmt, uid);
3291 	  gimple_set_visited (stmt, true);
3292 	  if (insert_point == gsi_stmt (gsi))
3293 	    gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3294 	  else
3295 	    insert_stmt_after (stmt, insert_point);
3296 	}
3297       else
3298 	{
3299 	  gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3300 			       == stmt);
3301 	  gimple_assign_set_rhs1 (stmt, new_rhs1);
3302 	  gimple_assign_set_rhs2 (stmt, oe->op);
3303 	  update_stmt (stmt);
3304 	}
3305 
3306       if (dump_file && (dump_flags & TDF_DETAILS))
3307 	{
3308 	  fprintf (dump_file, " into ");
3309 	  print_gimple_stmt (dump_file, stmt, 0, 0);
3310 	}
3311     }
3312   return lhs;
3313 }
3314 
3315 /* Find out how many cycles we need to compute statements chain.
3316    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
3317    maximum number of independent statements we may execute per cycle.  */
3318 
3319 static int
get_required_cycles(int ops_num,int cpu_width)3320 get_required_cycles (int ops_num, int cpu_width)
3321 {
3322   int res;
3323   int elog;
3324   unsigned int rest;
3325 
3326   /* While we have more than 2 * cpu_width operands
3327      we may reduce number of operands by cpu_width
3328      per cycle.  */
3329   res = ops_num / (2 * cpu_width);
3330 
3331   /* Remained operands count may be reduced twice per cycle
3332      until we have only one operand.  */
3333   rest = (unsigned)(ops_num - res * cpu_width);
3334   elog = exact_log2 (rest);
3335   if (elog >= 0)
3336     res += elog;
3337   else
3338     res += floor_log2 (rest) + 1;
3339 
3340   return res;
3341 }
3342 
3343 /* Returns an optimal number of registers to use for computation of
3344    given statements.  */
3345 
3346 static int
get_reassociation_width(int ops_num,enum tree_code opc,enum machine_mode mode)3347 get_reassociation_width (int ops_num, enum tree_code opc,
3348 			 enum machine_mode mode)
3349 {
3350   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3351   int width;
3352   int width_min;
3353   int cycles_best;
3354 
3355   if (param_width > 0)
3356     width = param_width;
3357   else
3358     width = targetm.sched.reassociation_width (opc, mode);
3359 
3360   if (width == 1)
3361     return width;
3362 
3363   /* Get the minimal time required for sequence computation.  */
3364   cycles_best = get_required_cycles (ops_num, width);
3365 
3366   /* Check if we may use less width and still compute sequence for
3367      the same time.  It will allow us to reduce registers usage.
3368      get_required_cycles is monotonically increasing with lower width
3369      so we can perform a binary search for the minimal width that still
3370      results in the optimal cycle count.  */
3371   width_min = 1;
3372   while (width > width_min)
3373     {
3374       int width_mid = (width + width_min) / 2;
3375 
3376       if (get_required_cycles (ops_num, width_mid) == cycles_best)
3377 	width = width_mid;
3378       else if (width_min < width_mid)
3379 	width_min = width_mid;
3380       else
3381 	break;
3382     }
3383 
3384   return width;
3385 }
3386 
3387 /* Recursively rewrite our linearized statements so that the operators
3388    match those in OPS[OPINDEX], putting the computation in rank
3389    order and trying to allow operations to be executed in
3390    parallel.  */
3391 
3392 static void
rewrite_expr_tree_parallel(gimple stmt,int width,vec<operand_entry_t> ops)3393 rewrite_expr_tree_parallel (gimple stmt, int width,
3394 			    vec<operand_entry_t> ops)
3395 {
3396   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3397   int op_num = ops.length ();
3398   int stmt_num = op_num - 1;
3399   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3400   int op_index = op_num - 1;
3401   int stmt_index = 0;
3402   int ready_stmts_end = 0;
3403   int i = 0;
3404   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3405 
3406   /* We start expression rewriting from the top statements.
3407      So, in this loop we create a full list of statements
3408      we will work with.  */
3409   stmts[stmt_num - 1] = stmt;
3410   for (i = stmt_num - 2; i >= 0; i--)
3411     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3412 
3413   for (i = 0; i < stmt_num; i++)
3414     {
3415       tree op1, op2;
3416 
3417       /* Determine whether we should use results of
3418 	 already handled statements or not.  */
3419       if (ready_stmts_end == 0
3420 	  && (i - stmt_index >= width || op_index < 1))
3421 	ready_stmts_end = i;
3422 
3423       /* Now we choose operands for the next statement.  Non zero
3424 	 value in ready_stmts_end means here that we should use
3425 	 the result of already generated statements as new operand.  */
3426       if (ready_stmts_end > 0)
3427 	{
3428 	  op1 = gimple_assign_lhs (stmts[stmt_index++]);
3429 	  if (ready_stmts_end > stmt_index)
3430 	    op2 = gimple_assign_lhs (stmts[stmt_index++]);
3431 	  else if (op_index >= 0)
3432 	    op2 = ops[op_index--]->op;
3433 	  else
3434 	    {
3435 	      gcc_assert (stmt_index < i);
3436 	      op2 = gimple_assign_lhs (stmts[stmt_index++]);
3437 	    }
3438 
3439 	  if (stmt_index >= ready_stmts_end)
3440 	    ready_stmts_end = 0;
3441 	}
3442       else
3443 	{
3444 	  if (op_index > 1)
3445 	    swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3446 	  op2 = ops[op_index--]->op;
3447 	  op1 = ops[op_index--]->op;
3448 	}
3449 
3450       /* If we emit the last statement then we should put
3451 	 operands into the last statement.  It will also
3452 	 break the loop.  */
3453       if (op_index < 0 && stmt_index == i)
3454 	i = stmt_num - 1;
3455 
3456       if (dump_file && (dump_flags & TDF_DETAILS))
3457 	{
3458 	  fprintf (dump_file, "Transforming ");
3459 	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
3460 	}
3461 
3462       /* We keep original statement only for the last one.  All
3463 	 others are recreated.  */
3464       if (i == stmt_num - 1)
3465 	{
3466 	  gimple_assign_set_rhs1 (stmts[i], op1);
3467 	  gimple_assign_set_rhs2 (stmts[i], op2);
3468 	  update_stmt (stmts[i]);
3469 	}
3470       else
3471 	stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3472 
3473       if (dump_file && (dump_flags & TDF_DETAILS))
3474 	{
3475 	  fprintf (dump_file, " into ");
3476 	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
3477 	}
3478     }
3479 
3480   remove_visited_stmt_chain (last_rhs1);
3481 }
3482 
3483 /* Transform STMT, which is really (A +B) + (C + D) into the left
3484    linear form, ((A+B)+C)+D.
3485    Recurse on D if necessary.  */
3486 
3487 static void
linearize_expr(gimple stmt)3488 linearize_expr (gimple stmt)
3489 {
3490   gimple_stmt_iterator gsi;
3491   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3492   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3493   gimple oldbinrhs = binrhs;
3494   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3495   gimple newbinrhs = NULL;
3496   struct loop *loop = loop_containing_stmt (stmt);
3497   tree lhs = gimple_assign_lhs (stmt);
3498 
3499   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3500 	      && is_reassociable_op (binrhs, rhscode, loop));
3501 
3502   gsi = gsi_for_stmt (stmt);
3503 
3504   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3505   binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3506 					 make_ssa_name (TREE_TYPE (lhs), NULL),
3507 					 gimple_assign_lhs (binlhs),
3508 					 gimple_assign_rhs2 (binrhs));
3509   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3510   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3511   gimple_set_uid (binrhs, gimple_uid (stmt));
3512 
3513   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3514     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3515 
3516   if (dump_file && (dump_flags & TDF_DETAILS))
3517     {
3518       fprintf (dump_file, "Linearized: ");
3519       print_gimple_stmt (dump_file, stmt, 0, 0);
3520     }
3521 
3522   reassociate_stats.linearized++;
3523   update_stmt (stmt);
3524 
3525   gsi = gsi_for_stmt (oldbinrhs);
3526   reassoc_remove_stmt (&gsi);
3527   release_defs (oldbinrhs);
3528 
3529   gimple_set_visited (stmt, true);
3530   gimple_set_visited (binlhs, true);
3531   gimple_set_visited (binrhs, true);
3532 
3533   /* Tail recurse on the new rhs if it still needs reassociation.  */
3534   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3535     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3536 	   want to change the algorithm while converting to tuples.  */
3537     linearize_expr (stmt);
3538 }
3539 
3540 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3541    it.  Otherwise, return NULL.  */
3542 
3543 static gimple
get_single_immediate_use(tree lhs)3544 get_single_immediate_use (tree lhs)
3545 {
3546   use_operand_p immuse;
3547   gimple immusestmt;
3548 
3549   if (TREE_CODE (lhs) == SSA_NAME
3550       && single_imm_use (lhs, &immuse, &immusestmt)
3551       && is_gimple_assign (immusestmt))
3552     return immusestmt;
3553 
3554   return NULL;
3555 }
3556 
3557 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3558    representing the negated value.  Insertions of any necessary
3559    instructions go before GSI.
3560    This function is recursive in that, if you hand it "a_5" as the
3561    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3562    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3563 
3564 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)3565 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3566 {
3567   gimple negatedefstmt = NULL;
3568   tree resultofnegate;
3569   gimple_stmt_iterator gsi;
3570   unsigned int uid;
3571 
3572   /* If we are trying to negate a name, defined by an add, negate the
3573      add operands instead.  */
3574   if (TREE_CODE (tonegate) == SSA_NAME)
3575     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3576   if (TREE_CODE (tonegate) == SSA_NAME
3577       && is_gimple_assign (negatedefstmt)
3578       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3579       && has_single_use (gimple_assign_lhs (negatedefstmt))
3580       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3581     {
3582       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3583       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3584       tree lhs = gimple_assign_lhs (negatedefstmt);
3585       gimple g;
3586 
3587       gsi = gsi_for_stmt (negatedefstmt);
3588       rhs1 = negate_value (rhs1, &gsi);
3589 
3590       gsi = gsi_for_stmt (negatedefstmt);
3591       rhs2 = negate_value (rhs2, &gsi);
3592 
3593       gsi = gsi_for_stmt (negatedefstmt);
3594       lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3595       gimple_set_visited (negatedefstmt, true);
3596       g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3597       gimple_set_uid (g, gimple_uid (negatedefstmt));
3598       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3599       return lhs;
3600     }
3601 
3602   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3603   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3604 					     NULL_TREE, true, GSI_SAME_STMT);
3605   gsi = *gsip;
3606   uid = gimple_uid (gsi_stmt (gsi));
3607   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3608     {
3609       gimple stmt = gsi_stmt (gsi);
3610       if (gimple_uid (stmt) != 0)
3611 	break;
3612       gimple_set_uid (stmt, uid);
3613     }
3614   return resultofnegate;
3615 }
3616 
3617 /* Return true if we should break up the subtract in STMT into an add
3618    with negate.  This is true when we the subtract operands are really
3619    adds, or the subtract itself is used in an add expression.  In
3620    either case, breaking up the subtract into an add with negate
3621    exposes the adds to reassociation.  */
3622 
3623 static bool
should_break_up_subtract(gimple stmt)3624 should_break_up_subtract (gimple stmt)
3625 {
3626   tree lhs = gimple_assign_lhs (stmt);
3627   tree binlhs = gimple_assign_rhs1 (stmt);
3628   tree binrhs = gimple_assign_rhs2 (stmt);
3629   gimple immusestmt;
3630   struct loop *loop = loop_containing_stmt (stmt);
3631 
3632   if (TREE_CODE (binlhs) == SSA_NAME
3633       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3634     return true;
3635 
3636   if (TREE_CODE (binrhs) == SSA_NAME
3637       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3638     return true;
3639 
3640   if (TREE_CODE (lhs) == SSA_NAME
3641       && (immusestmt = get_single_immediate_use (lhs))
3642       && is_gimple_assign (immusestmt)
3643       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3644 	  ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3645     return true;
3646   return false;
3647 }
3648 
3649 /* Transform STMT from A - B into A + -B.  */
3650 
3651 static void
break_up_subtract(gimple stmt,gimple_stmt_iterator * gsip)3652 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3653 {
3654   tree rhs1 = gimple_assign_rhs1 (stmt);
3655   tree rhs2 = gimple_assign_rhs2 (stmt);
3656 
3657   if (dump_file && (dump_flags & TDF_DETAILS))
3658     {
3659       fprintf (dump_file, "Breaking up subtract ");
3660       print_gimple_stmt (dump_file, stmt, 0, 0);
3661     }
3662 
3663   rhs2 = negate_value (rhs2, gsip);
3664   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3665   update_stmt (stmt);
3666 }
3667 
3668 /* Determine whether STMT is a builtin call that raises an SSA name
3669    to an integer power and has only one use.  If so, and this is early
3670    reassociation and unsafe math optimizations are permitted, place
3671    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3672    If any of these conditions does not hold, return FALSE.  */
3673 
3674 static bool
acceptable_pow_call(gimple stmt,tree * base,HOST_WIDE_INT * exponent)3675 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3676 {
3677   tree fndecl, arg1;
3678   REAL_VALUE_TYPE c, cint;
3679 
3680   if (!first_pass_instance
3681       || !flag_unsafe_math_optimizations
3682       || !is_gimple_call (stmt)
3683       || !has_single_use (gimple_call_lhs (stmt)))
3684     return false;
3685 
3686   fndecl = gimple_call_fndecl (stmt);
3687 
3688   if (!fndecl
3689       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3690     return false;
3691 
3692   switch (DECL_FUNCTION_CODE (fndecl))
3693     {
3694     CASE_FLT_FN (BUILT_IN_POW):
3695       *base = gimple_call_arg (stmt, 0);
3696       arg1 = gimple_call_arg (stmt, 1);
3697 
3698       if (TREE_CODE (arg1) != REAL_CST)
3699 	return false;
3700 
3701       c = TREE_REAL_CST (arg1);
3702 
3703       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3704 	return false;
3705 
3706       *exponent = real_to_integer (&c);
3707       real_from_integer (&cint, VOIDmode, *exponent,
3708 			 *exponent < 0 ? -1 : 0, 0);
3709       if (!real_identical (&c, &cint))
3710 	return false;
3711 
3712       break;
3713 
3714     CASE_FLT_FN (BUILT_IN_POWI):
3715       *base = gimple_call_arg (stmt, 0);
3716       arg1 = gimple_call_arg (stmt, 1);
3717 
3718       if (!tree_fits_shwi_p (arg1))
3719 	return false;
3720 
3721       *exponent = tree_to_shwi (arg1);
3722       break;
3723 
3724     default:
3725       return false;
3726     }
3727 
3728   /* Expanding negative exponents is generally unproductive, so we don't
3729      complicate matters with those.  Exponents of zero and one should
3730      have been handled by expression folding.  */
3731   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3732     return false;
3733 
3734   return true;
3735 }
3736 
3737 /* Recursively linearize a binary expression that is the RHS of STMT.
3738    Place the operands of the expression tree in the vector named OPS.  */
3739 
3740 static void
linearize_expr_tree(vec<operand_entry_t> * ops,gimple stmt,bool is_associative,bool set_visited)3741 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3742 		     bool is_associative, bool set_visited)
3743 {
3744   tree binlhs = gimple_assign_rhs1 (stmt);
3745   tree binrhs = gimple_assign_rhs2 (stmt);
3746   gimple binlhsdef = NULL, binrhsdef = NULL;
3747   bool binlhsisreassoc = false;
3748   bool binrhsisreassoc = false;
3749   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3750   struct loop *loop = loop_containing_stmt (stmt);
3751   tree base = NULL_TREE;
3752   HOST_WIDE_INT exponent = 0;
3753 
3754   if (set_visited)
3755     gimple_set_visited (stmt, true);
3756 
3757   if (TREE_CODE (binlhs) == SSA_NAME)
3758     {
3759       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3760       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3761 			 && !stmt_could_throw_p (binlhsdef));
3762     }
3763 
3764   if (TREE_CODE (binrhs) == SSA_NAME)
3765     {
3766       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3767       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3768 			 && !stmt_could_throw_p (binrhsdef));
3769     }
3770 
3771   /* If the LHS is not reassociable, but the RHS is, we need to swap
3772      them.  If neither is reassociable, there is nothing we can do, so
3773      just put them in the ops vector.  If the LHS is reassociable,
3774      linearize it.  If both are reassociable, then linearize the RHS
3775      and the LHS.  */
3776 
3777   if (!binlhsisreassoc)
3778     {
3779       tree temp;
3780 
3781       /* If this is not a associative operation like division, give up.  */
3782       if (!is_associative)
3783 	{
3784 	  add_to_ops_vec (ops, binrhs);
3785 	  return;
3786 	}
3787 
3788       if (!binrhsisreassoc)
3789 	{
3790 	  if (rhscode == MULT_EXPR
3791 	      && TREE_CODE (binrhs) == SSA_NAME
3792 	      && acceptable_pow_call (binrhsdef, &base, &exponent))
3793 	    {
3794 	      add_repeat_to_ops_vec (ops, base, exponent);
3795 	      gimple_set_visited (binrhsdef, true);
3796 	    }
3797 	  else
3798 	    add_to_ops_vec (ops, binrhs);
3799 
3800 	  if (rhscode == MULT_EXPR
3801 	      && TREE_CODE (binlhs) == SSA_NAME
3802 	      && acceptable_pow_call (binlhsdef, &base, &exponent))
3803 	    {
3804 	      add_repeat_to_ops_vec (ops, base, exponent);
3805 	      gimple_set_visited (binlhsdef, true);
3806 	    }
3807 	  else
3808 	    add_to_ops_vec (ops, binlhs);
3809 
3810 	  return;
3811 	}
3812 
3813       if (dump_file && (dump_flags & TDF_DETAILS))
3814 	{
3815 	  fprintf (dump_file, "swapping operands of ");
3816 	  print_gimple_stmt (dump_file, stmt, 0, 0);
3817 	}
3818 
3819       swap_ssa_operands (stmt,
3820 			 gimple_assign_rhs1_ptr (stmt),
3821 			 gimple_assign_rhs2_ptr (stmt));
3822       update_stmt (stmt);
3823 
3824       if (dump_file && (dump_flags & TDF_DETAILS))
3825 	{
3826 	  fprintf (dump_file, " is now ");
3827 	  print_gimple_stmt (dump_file, stmt, 0, 0);
3828 	}
3829 
3830       /* We want to make it so the lhs is always the reassociative op,
3831 	 so swap.  */
3832       temp = binlhs;
3833       binlhs = binrhs;
3834       binrhs = temp;
3835     }
3836   else if (binrhsisreassoc)
3837     {
3838       linearize_expr (stmt);
3839       binlhs = gimple_assign_rhs1 (stmt);
3840       binrhs = gimple_assign_rhs2 (stmt);
3841     }
3842 
3843   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3844 	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3845 				      rhscode, loop));
3846   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3847 		       is_associative, set_visited);
3848 
3849   if (rhscode == MULT_EXPR
3850       && TREE_CODE (binrhs) == SSA_NAME
3851       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3852     {
3853       add_repeat_to_ops_vec (ops, base, exponent);
3854       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3855     }
3856   else
3857     add_to_ops_vec (ops, binrhs);
3858 }
3859 
3860 /* Repropagate the negates back into subtracts, since no other pass
3861    currently does it.  */
3862 
3863 static void
repropagate_negates(void)3864 repropagate_negates (void)
3865 {
3866   unsigned int i = 0;
3867   tree negate;
3868 
3869   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3870     {
3871       gimple user = get_single_immediate_use (negate);
3872 
3873       if (!user || !is_gimple_assign (user))
3874 	continue;
3875 
3876       /* The negate operand can be either operand of a PLUS_EXPR
3877 	 (it can be the LHS if the RHS is a constant for example).
3878 
3879 	 Force the negate operand to the RHS of the PLUS_EXPR, then
3880 	 transform the PLUS_EXPR into a MINUS_EXPR.  */
3881       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3882 	{
3883 	  /* If the negated operand appears on the LHS of the
3884 	     PLUS_EXPR, exchange the operands of the PLUS_EXPR
3885 	     to force the negated operand to the RHS of the PLUS_EXPR.  */
3886 	  if (gimple_assign_rhs1 (user) == negate)
3887 	    {
3888 	      swap_ssa_operands (user,
3889 				 gimple_assign_rhs1_ptr (user),
3890 				 gimple_assign_rhs2_ptr (user));
3891 	    }
3892 
3893 	  /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3894 	     the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3895 	  if (gimple_assign_rhs2 (user) == negate)
3896 	    {
3897 	      tree rhs1 = gimple_assign_rhs1 (user);
3898 	      tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3899 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
3900 	      gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3901 	      update_stmt (user);
3902 	    }
3903 	}
3904       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3905 	{
3906 	  if (gimple_assign_rhs1 (user) == negate)
3907 	    {
3908 	      /* We have
3909 	           x = -a
3910 		   y = x - b
3911 		 which we transform into
3912 		   x = a + b
3913 		   y = -x .
3914 		 This pushes down the negate which we possibly can merge
3915 		 into some other operation, hence insert it into the
3916 		 plus_negates vector.  */
3917 	      gimple feed = SSA_NAME_DEF_STMT (negate);
3918 	      tree a = gimple_assign_rhs1 (feed);
3919 	      tree b = gimple_assign_rhs2 (user);
3920 	      gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3921 	      gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3922 	      tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3923 	      gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3924 	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3925 	      gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3926 	      user = gsi_stmt (gsi2);
3927 	      update_stmt (user);
3928 	      reassoc_remove_stmt (&gsi);
3929 	      release_defs (feed);
3930 	      plus_negates.safe_push (gimple_assign_lhs (user));
3931 	    }
3932 	  else
3933 	    {
3934 	      /* Transform "x = -a; y = b - x" into "y = b + a", getting
3935 	         rid of one operation.  */
3936 	      gimple feed = SSA_NAME_DEF_STMT (negate);
3937 	      tree a = gimple_assign_rhs1 (feed);
3938 	      tree rhs1 = gimple_assign_rhs1 (user);
3939 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
3940 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3941 	      update_stmt (gsi_stmt (gsi));
3942 	    }
3943 	}
3944     }
3945 }
3946 
3947 /* Returns true if OP is of a type for which we can do reassociation.
3948    That is for integral or non-saturating fixed-point types, and for
3949    floating point type when associative-math is enabled.  */
3950 
3951 static bool
can_reassociate_p(tree op)3952 can_reassociate_p (tree op)
3953 {
3954   tree type = TREE_TYPE (op);
3955   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3956       || NON_SAT_FIXED_POINT_TYPE_P (type)
3957       || (flag_associative_math && FLOAT_TYPE_P (type)))
3958     return true;
3959   return false;
3960 }
3961 
3962 /* Break up subtract operations in block BB.
3963 
3964    We do this top down because we don't know whether the subtract is
3965    part of a possible chain of reassociation except at the top.
3966 
3967    IE given
3968    d = f + g
3969    c = a + e
3970    b = c - d
3971    q = b - r
3972    k = t - q
3973 
3974    we want to break up k = t - q, but we won't until we've transformed q
3975    = b - r, which won't be broken up until we transform b = c - d.
3976 
3977    En passant, clear the GIMPLE visited flag on every statement
3978    and set UIDs within each basic block.  */
3979 
3980 static void
break_up_subtract_bb(basic_block bb)3981 break_up_subtract_bb (basic_block bb)
3982 {
3983   gimple_stmt_iterator gsi;
3984   basic_block son;
3985   unsigned int uid = 1;
3986 
3987   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3988     {
3989       gimple stmt = gsi_stmt (gsi);
3990       gimple_set_visited (stmt, false);
3991       gimple_set_uid (stmt, uid++);
3992 
3993       if (!is_gimple_assign (stmt)
3994 	  || !can_reassociate_p (gimple_assign_lhs (stmt)))
3995 	continue;
3996 
3997       /* Look for simple gimple subtract operations.  */
3998       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3999 	{
4000 	  if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4001 	      || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4002 	    continue;
4003 
4004 	  /* Check for a subtract used only in an addition.  If this
4005 	     is the case, transform it into add of a negate for better
4006 	     reassociation.  IE transform C = A-B into C = A + -B if C
4007 	     is only used in an addition.  */
4008 	  if (should_break_up_subtract (stmt))
4009 	    break_up_subtract (stmt, &gsi);
4010 	}
4011       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4012 	       && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4013 	plus_negates.safe_push (gimple_assign_lhs (stmt));
4014     }
4015   for (son = first_dom_son (CDI_DOMINATORS, bb);
4016        son;
4017        son = next_dom_son (CDI_DOMINATORS, son))
4018     break_up_subtract_bb (son);
4019 }
4020 
4021 /* Used for repeated factor analysis.  */
4022 struct repeat_factor_d
4023 {
4024   /* An SSA name that occurs in a multiply chain.  */
4025   tree factor;
4026 
4027   /* Cached rank of the factor.  */
4028   unsigned rank;
4029 
4030   /* Number of occurrences of the factor in the chain.  */
4031   HOST_WIDE_INT count;
4032 
4033   /* An SSA name representing the product of this factor and
4034      all factors appearing later in the repeated factor vector.  */
4035   tree repr;
4036 };
4037 
4038 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4039 typedef const struct repeat_factor_d *const_repeat_factor_t;
4040 
4041 
4042 static vec<repeat_factor> repeat_factor_vec;
4043 
4044 /* Used for sorting the repeat factor vector.  Sort primarily by
4045    ascending occurrence count, secondarily by descending rank.  */
4046 
4047 static int
compare_repeat_factors(const void * x1,const void * x2)4048 compare_repeat_factors (const void *x1, const void *x2)
4049 {
4050   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4051   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4052 
4053   if (rf1->count != rf2->count)
4054     return rf1->count - rf2->count;
4055 
4056   return rf2->rank - rf1->rank;
4057 }
4058 
4059 /* Look for repeated operands in OPS in the multiply tree rooted at
4060    STMT.  Replace them with an optimal sequence of multiplies and powi
4061    builtin calls, and remove the used operands from OPS.  Return an
4062    SSA name representing the value of the replacement sequence.  */
4063 
4064 static tree
attempt_builtin_powi(gimple stmt,vec<operand_entry_t> * ops)4065 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4066 {
4067   unsigned i, j, vec_len;
4068   int ii;
4069   operand_entry_t oe;
4070   repeat_factor_t rf1, rf2;
4071   repeat_factor rfnew;
4072   tree result = NULL_TREE;
4073   tree target_ssa, iter_result;
4074   tree type = TREE_TYPE (gimple_get_lhs (stmt));
4075   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4076   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4077   gimple mul_stmt, pow_stmt;
4078 
4079   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4080      target.  */
4081   if (!powi_fndecl)
4082     return NULL_TREE;
4083 
4084   /* Allocate the repeated factor vector.  */
4085   repeat_factor_vec.create (10);
4086 
4087   /* Scan the OPS vector for all SSA names in the product and build
4088      up a vector of occurrence counts for each factor.  */
4089   FOR_EACH_VEC_ELT (*ops, i, oe)
4090     {
4091       if (TREE_CODE (oe->op) == SSA_NAME)
4092 	{
4093 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4094 	    {
4095 	      if (rf1->factor == oe->op)
4096 		{
4097 		  rf1->count += oe->count;
4098 		  break;
4099 		}
4100 	    }
4101 
4102 	  if (j >= repeat_factor_vec.length ())
4103 	    {
4104 	      rfnew.factor = oe->op;
4105 	      rfnew.rank = oe->rank;
4106 	      rfnew.count = oe->count;
4107 	      rfnew.repr = NULL_TREE;
4108 	      repeat_factor_vec.safe_push (rfnew);
4109 	    }
4110 	}
4111     }
4112 
4113   /* Sort the repeated factor vector by (a) increasing occurrence count,
4114      and (b) decreasing rank.  */
4115   repeat_factor_vec.qsort (compare_repeat_factors);
4116 
4117   /* It is generally best to combine as many base factors as possible
4118      into a product before applying __builtin_powi to the result.
4119      However, the sort order chosen for the repeated factor vector
4120      allows us to cache partial results for the product of the base
4121      factors for subsequent use.  When we already have a cached partial
4122      result from a previous iteration, it is best to make use of it
4123      before looking for another __builtin_pow opportunity.
4124 
4125      As an example, consider x * x * y * y * y * z * z * z * z.
4126      We want to first compose the product x * y * z, raise it to the
4127      second power, then multiply this by y * z, and finally multiply
4128      by z.  This can be done in 5 multiplies provided we cache y * z
4129      for use in both expressions:
4130 
4131         t1 = y * z
4132 	t2 = t1 * x
4133 	t3 = t2 * t2
4134 	t4 = t1 * t3
4135 	result = t4 * z
4136 
4137      If we instead ignored the cached y * z and first multiplied by
4138      the __builtin_pow opportunity z * z, we would get the inferior:
4139 
4140         t1 = y * z
4141 	t2 = t1 * x
4142 	t3 = t2 * t2
4143 	t4 = z * z
4144 	t5 = t3 * t4
4145         result = t5 * y  */
4146 
4147   vec_len = repeat_factor_vec.length ();
4148 
4149   /* Repeatedly look for opportunities to create a builtin_powi call.  */
4150   while (true)
4151     {
4152       HOST_WIDE_INT power;
4153 
4154       /* First look for the largest cached product of factors from
4155 	 preceding iterations.  If found, create a builtin_powi for
4156 	 it if the minimum occurrence count for its factors is at
4157 	 least 2, or just use this cached product as our next
4158 	 multiplicand if the minimum occurrence count is 1.  */
4159       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4160 	{
4161 	  if (rf1->repr && rf1->count > 0)
4162 	    break;
4163 	}
4164 
4165       if (j < vec_len)
4166 	{
4167 	  power = rf1->count;
4168 
4169 	  if (power == 1)
4170 	    {
4171 	      iter_result = rf1->repr;
4172 
4173 	      if (dump_file && (dump_flags & TDF_DETAILS))
4174 		{
4175 		  unsigned elt;
4176 		  repeat_factor_t rf;
4177 		  fputs ("Multiplying by cached product ", dump_file);
4178 		  for (elt = j; elt < vec_len; elt++)
4179 		    {
4180 		      rf = &repeat_factor_vec[elt];
4181 		      print_generic_expr (dump_file, rf->factor, 0);
4182 		      if (elt < vec_len - 1)
4183 			fputs (" * ", dump_file);
4184 		    }
4185 		  fputs ("\n", dump_file);
4186 		}
4187 	    }
4188 	  else
4189 	    {
4190 	      iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4191 	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4192 					    build_int_cst (integer_type_node,
4193 							   power));
4194 	      gimple_call_set_lhs (pow_stmt, iter_result);
4195 	      gimple_set_location (pow_stmt, gimple_location (stmt));
4196 	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4197 
4198 	      if (dump_file && (dump_flags & TDF_DETAILS))
4199 		{
4200 		  unsigned elt;
4201 		  repeat_factor_t rf;
4202 		  fputs ("Building __builtin_pow call for cached product (",
4203 			 dump_file);
4204 		  for (elt = j; elt < vec_len; elt++)
4205 		    {
4206 		      rf = &repeat_factor_vec[elt];
4207 		      print_generic_expr (dump_file, rf->factor, 0);
4208 		      if (elt < vec_len - 1)
4209 			fputs (" * ", dump_file);
4210 		    }
4211 		  fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC "\n",
4212 			   power);
4213 		}
4214 	    }
4215 	}
4216       else
4217 	{
4218 	  /* Otherwise, find the first factor in the repeated factor
4219 	     vector whose occurrence count is at least 2.  If no such
4220 	     factor exists, there are no builtin_powi opportunities
4221 	     remaining.  */
4222 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4223 	    {
4224 	      if (rf1->count >= 2)
4225 		break;
4226 	    }
4227 
4228 	  if (j >= vec_len)
4229 	    break;
4230 
4231 	  power = rf1->count;
4232 
4233 	  if (dump_file && (dump_flags & TDF_DETAILS))
4234 	    {
4235 	      unsigned elt;
4236 	      repeat_factor_t rf;
4237 	      fputs ("Building __builtin_pow call for (", dump_file);
4238 	      for (elt = j; elt < vec_len; elt++)
4239 		{
4240 		  rf = &repeat_factor_vec[elt];
4241 		  print_generic_expr (dump_file, rf->factor, 0);
4242 		  if (elt < vec_len - 1)
4243 		    fputs (" * ", dump_file);
4244 		}
4245 	      fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC "\n", power);
4246 	    }
4247 
4248 	  reassociate_stats.pows_created++;
4249 
4250 	  /* Visit each element of the vector in reverse order (so that
4251 	     high-occurrence elements are visited first, and within the
4252 	     same occurrence count, lower-ranked elements are visited
4253 	     first).  Form a linear product of all elements in this order
4254 	     whose occurrencce count is at least that of element J.
4255 	     Record the SSA name representing the product of each element
4256 	     with all subsequent elements in the vector.  */
4257 	  if (j == vec_len - 1)
4258 	    rf1->repr = rf1->factor;
4259 	  else
4260 	    {
4261 	      for (ii = vec_len - 2; ii >= (int)j; ii--)
4262 		{
4263 		  tree op1, op2;
4264 
4265 		  rf1 = &repeat_factor_vec[ii];
4266 		  rf2 = &repeat_factor_vec[ii + 1];
4267 
4268 		  /* Init the last factor's representative to be itself.  */
4269 		  if (!rf2->repr)
4270 		    rf2->repr = rf2->factor;
4271 
4272 		  op1 = rf1->factor;
4273 		  op2 = rf2->repr;
4274 
4275 		  target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4276 		  mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4277 							   target_ssa,
4278 							   op1, op2);
4279 		  gimple_set_location (mul_stmt, gimple_location (stmt));
4280 		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4281 		  rf1->repr = target_ssa;
4282 
4283 		  /* Don't reprocess the multiply we just introduced.  */
4284 		  gimple_set_visited (mul_stmt, true);
4285 		}
4286 	    }
4287 
4288 	  /* Form a call to __builtin_powi for the maximum product
4289 	     just formed, raised to the power obtained earlier.  */
4290 	  rf1 = &repeat_factor_vec[j];
4291 	  iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4292 	  pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4293 					build_int_cst (integer_type_node,
4294 						       power));
4295 	  gimple_call_set_lhs (pow_stmt, iter_result);
4296 	  gimple_set_location (pow_stmt, gimple_location (stmt));
4297 	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4298 	}
4299 
4300       /* If we previously formed at least one other builtin_powi call,
4301 	 form the product of this one and those others.  */
4302       if (result)
4303 	{
4304 	  tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4305 	  mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4306 						   result, iter_result);
4307 	  gimple_set_location (mul_stmt, gimple_location (stmt));
4308 	  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4309 	  gimple_set_visited (mul_stmt, true);
4310 	  result = new_result;
4311 	}
4312       else
4313 	result = iter_result;
4314 
4315       /* Decrement the occurrence count of each element in the product
4316 	 by the count found above, and remove this many copies of each
4317 	 factor from OPS.  */
4318       for (i = j; i < vec_len; i++)
4319 	{
4320 	  unsigned k = power;
4321 	  unsigned n;
4322 
4323 	  rf1 = &repeat_factor_vec[i];
4324 	  rf1->count -= power;
4325 
4326 	  FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4327 	    {
4328 	      if (oe->op == rf1->factor)
4329 		{
4330 		  if (oe->count <= k)
4331 		    {
4332 		      ops->ordered_remove (n);
4333 		      k -= oe->count;
4334 
4335 		      if (k == 0)
4336 			break;
4337 		    }
4338 		  else
4339 		    {
4340 		      oe->count -= k;
4341 		      break;
4342 		    }
4343 		}
4344 	    }
4345 	}
4346     }
4347 
4348   /* At this point all elements in the repeated factor vector have a
4349      remaining occurrence count of 0 or 1, and those with a count of 1
4350      don't have cached representatives.  Re-sort the ops vector and
4351      clean up.  */
4352   ops->qsort (sort_by_operand_rank);
4353   repeat_factor_vec.release ();
4354 
4355   /* Return the final product computed herein.  Note that there may
4356      still be some elements with single occurrence count left in OPS;
4357      those will be handled by the normal reassociation logic.  */
4358   return result;
4359 }
4360 
4361 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
4362 
4363 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple stmt,tree new_rhs)4364 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4365 {
4366   tree rhs1;
4367 
4368   if (dump_file && (dump_flags & TDF_DETAILS))
4369     {
4370       fprintf (dump_file, "Transforming ");
4371       print_gimple_stmt (dump_file, stmt, 0, 0);
4372     }
4373 
4374   rhs1 = gimple_assign_rhs1 (stmt);
4375   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4376   update_stmt (stmt);
4377   remove_visited_stmt_chain (rhs1);
4378 
4379   if (dump_file && (dump_flags & TDF_DETAILS))
4380     {
4381       fprintf (dump_file, " into ");
4382       print_gimple_stmt (dump_file, stmt, 0, 0);
4383     }
4384 }
4385 
4386 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
4387 
4388 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple stmt,tree rhs1,tree rhs2)4389 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4390 			    tree rhs1, tree rhs2)
4391 {
4392   if (dump_file && (dump_flags & TDF_DETAILS))
4393     {
4394       fprintf (dump_file, "Transforming ");
4395       print_gimple_stmt (dump_file, stmt, 0, 0);
4396     }
4397 
4398   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4399   update_stmt (gsi_stmt (*gsi));
4400   remove_visited_stmt_chain (rhs1);
4401 
4402   if (dump_file && (dump_flags & TDF_DETAILS))
4403     {
4404       fprintf (dump_file, " into ");
4405       print_gimple_stmt (dump_file, stmt, 0, 0);
4406     }
4407 }
4408 
4409 /* Reassociate expressions in basic block BB and its post-dominator as
4410    children.  */
4411 
4412 static void
reassociate_bb(basic_block bb)4413 reassociate_bb (basic_block bb)
4414 {
4415   gimple_stmt_iterator gsi;
4416   basic_block son;
4417   gimple stmt = last_stmt (bb);
4418 
4419   if (stmt && !gimple_visited_p (stmt))
4420     maybe_optimize_range_tests (stmt);
4421 
4422   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4423     {
4424       stmt = gsi_stmt (gsi);
4425 
4426       if (is_gimple_assign (stmt)
4427 	  && !stmt_could_throw_p (stmt))
4428 	{
4429 	  tree lhs, rhs1, rhs2;
4430 	  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4431 
4432 	  /* If this is not a gimple binary expression, there is
4433 	     nothing for us to do with it.  */
4434 	  if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4435 	    continue;
4436 
4437 	  /* If this was part of an already processed statement,
4438 	     we don't need to touch it again. */
4439 	  if (gimple_visited_p (stmt))
4440 	    {
4441 	      /* This statement might have become dead because of previous
4442 		 reassociations.  */
4443 	      if (has_zero_uses (gimple_get_lhs (stmt)))
4444 		{
4445 		  reassoc_remove_stmt (&gsi);
4446 		  release_defs (stmt);
4447 		  /* We might end up removing the last stmt above which
4448 		     places the iterator to the end of the sequence.
4449 		     Reset it to the last stmt in this case which might
4450 		     be the end of the sequence as well if we removed
4451 		     the last statement of the sequence.  In which case
4452 		     we need to bail out.  */
4453 		  if (gsi_end_p (gsi))
4454 		    {
4455 		      gsi = gsi_last_bb (bb);
4456 		      if (gsi_end_p (gsi))
4457 			break;
4458 		    }
4459 		}
4460 	      continue;
4461 	    }
4462 
4463 	  lhs = gimple_assign_lhs (stmt);
4464 	  rhs1 = gimple_assign_rhs1 (stmt);
4465 	  rhs2 = gimple_assign_rhs2 (stmt);
4466 
4467 	  /* For non-bit or min/max operations we can't associate
4468 	     all types.  Verify that here.  */
4469 	  if (rhs_code != BIT_IOR_EXPR
4470 	      && rhs_code != BIT_AND_EXPR
4471 	      && rhs_code != BIT_XOR_EXPR
4472 	      && rhs_code != MIN_EXPR
4473 	      && rhs_code != MAX_EXPR
4474 	      && (!can_reassociate_p (lhs)
4475 		  || !can_reassociate_p (rhs1)
4476 		  || !can_reassociate_p (rhs2)))
4477 	    continue;
4478 
4479 	  if (associative_tree_code (rhs_code))
4480 	    {
4481 	      auto_vec<operand_entry_t> ops;
4482 	      tree powi_result = NULL_TREE;
4483 
4484 	      /* There may be no immediate uses left by the time we
4485 		 get here because we may have eliminated them all.  */
4486 	      if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4487 		continue;
4488 
4489 	      gimple_set_visited (stmt, true);
4490 	      linearize_expr_tree (&ops, stmt, true, true);
4491 	      ops.qsort (sort_by_operand_rank);
4492 	      optimize_ops_list (rhs_code, &ops);
4493 	      if (undistribute_ops_list (rhs_code, &ops,
4494 					 loop_containing_stmt (stmt)))
4495 		{
4496 		  ops.qsort (sort_by_operand_rank);
4497 		  optimize_ops_list (rhs_code, &ops);
4498 		}
4499 
4500 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4501 		optimize_range_tests (rhs_code, &ops);
4502 
4503 	      if (first_pass_instance
4504 		  && rhs_code == MULT_EXPR
4505 		  && flag_unsafe_math_optimizations)
4506 		powi_result = attempt_builtin_powi (stmt, &ops);
4507 
4508 	      /* If the operand vector is now empty, all operands were
4509 		 consumed by the __builtin_powi optimization.  */
4510 	      if (ops.length () == 0)
4511 		transform_stmt_to_copy (&gsi, stmt, powi_result);
4512 	      else if (ops.length () == 1)
4513 		{
4514 		  tree last_op = ops.last ()->op;
4515 
4516 		  if (powi_result)
4517 		    transform_stmt_to_multiply (&gsi, stmt, last_op,
4518 						powi_result);
4519 		  else
4520 		    transform_stmt_to_copy (&gsi, stmt, last_op);
4521 		}
4522 	      else
4523 		{
4524 		  enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4525 		  int ops_num = ops.length ();
4526 		  int width = get_reassociation_width (ops_num, rhs_code, mode);
4527 		  tree new_lhs = lhs;
4528 
4529 		  if (dump_file && (dump_flags & TDF_DETAILS))
4530 		    fprintf (dump_file,
4531 			     "Width = %d was chosen for reassociation\n", width);
4532 
4533 		  if (width > 1
4534 		      && ops.length () > 3)
4535 		    rewrite_expr_tree_parallel (stmt, width, ops);
4536 		  else
4537                     {
4538                       /* When there are three operands left, we want
4539                          to make sure the ones that get the double
4540                          binary op are chosen wisely.  */
4541                       int len = ops.length ();
4542                       if (len >= 3)
4543                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
4544 
4545 		      new_lhs = rewrite_expr_tree (stmt, 0, ops,
4546 						   powi_result != NULL);
4547                     }
4548 
4549 		  /* If we combined some repeated factors into a
4550 		     __builtin_powi call, multiply that result by the
4551 		     reassociated operands.  */
4552 		  if (powi_result)
4553 		    {
4554 		      gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4555 		      tree type = TREE_TYPE (lhs);
4556 		      tree target_ssa = make_temp_ssa_name (type, NULL,
4557 							    "reassocpow");
4558 		      gimple_set_lhs (lhs_stmt, target_ssa);
4559 		      update_stmt (lhs_stmt);
4560 		      if (lhs != new_lhs)
4561 			target_ssa = new_lhs;
4562 		      mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4563 							       powi_result,
4564 							       target_ssa);
4565 		      gimple_set_location (mul_stmt, gimple_location (stmt));
4566 		      gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4567 		    }
4568 		}
4569 	    }
4570 	}
4571     }
4572   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4573        son;
4574        son = next_dom_son (CDI_POST_DOMINATORS, son))
4575     reassociate_bb (son);
4576 }
4577 
4578 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4579 void debug_ops_vector (vec<operand_entry_t> ops);
4580 
4581 /* Dump the operand entry vector OPS to FILE.  */
4582 
4583 void
dump_ops_vector(FILE * file,vec<operand_entry_t> ops)4584 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4585 {
4586   operand_entry_t oe;
4587   unsigned int i;
4588 
4589   FOR_EACH_VEC_ELT (ops, i, oe)
4590     {
4591       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4592       print_generic_expr (file, oe->op, 0);
4593     }
4594 }
4595 
4596 /* Dump the operand entry vector OPS to STDERR.  */
4597 
4598 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry_t> ops)4599 debug_ops_vector (vec<operand_entry_t> ops)
4600 {
4601   dump_ops_vector (stderr, ops);
4602 }
4603 
4604 static void
do_reassoc(void)4605 do_reassoc (void)
4606 {
4607   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4608   reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4609 }
4610 
4611 /* Initialize the reassociation pass.  */
4612 
4613 static void
init_reassoc(void)4614 init_reassoc (void)
4615 {
4616   int i;
4617   long rank = 2;
4618   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4619 
4620   /* Find the loops, so that we can prevent moving calculations in
4621      them.  */
4622   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4623 
4624   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4625 
4626   operand_entry_pool = create_alloc_pool ("operand entry pool",
4627 					  sizeof (struct operand_entry), 30);
4628   next_operand_entry_id = 0;
4629 
4630   /* Reverse RPO (Reverse Post Order) will give us something where
4631      deeper loops come later.  */
4632   pre_and_rev_post_order_compute (NULL, bbs, false);
4633   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4634   operand_rank = pointer_map_create ();
4635 
4636   /* Give each default definition a distinct rank.  This includes
4637      parameters and the static chain.  Walk backwards over all
4638      SSA names so that we get proper rank ordering according
4639      to tree_swap_operands_p.  */
4640   for (i = num_ssa_names - 1; i > 0; --i)
4641     {
4642       tree name = ssa_name (i);
4643       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4644 	insert_operand_rank (name, ++rank);
4645     }
4646 
4647   /* Set up rank for each BB  */
4648   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4649     bb_rank[bbs[i]] = ++rank  << 16;
4650 
4651   free (bbs);
4652   calculate_dominance_info (CDI_POST_DOMINATORS);
4653   plus_negates = vNULL;
4654 }
4655 
4656 /* Cleanup after the reassociation pass, and print stats if
4657    requested.  */
4658 
4659 static void
fini_reassoc(void)4660 fini_reassoc (void)
4661 {
4662   statistics_counter_event (cfun, "Linearized",
4663 			    reassociate_stats.linearized);
4664   statistics_counter_event (cfun, "Constants eliminated",
4665 			    reassociate_stats.constants_eliminated);
4666   statistics_counter_event (cfun, "Ops eliminated",
4667 			    reassociate_stats.ops_eliminated);
4668   statistics_counter_event (cfun, "Statements rewritten",
4669 			    reassociate_stats.rewritten);
4670   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4671 			    reassociate_stats.pows_encountered);
4672   statistics_counter_event (cfun, "Built-in powi calls created",
4673 			    reassociate_stats.pows_created);
4674 
4675   pointer_map_destroy (operand_rank);
4676   free_alloc_pool (operand_entry_pool);
4677   free (bb_rank);
4678   plus_negates.release ();
4679   free_dominance_info (CDI_POST_DOMINATORS);
4680   loop_optimizer_finalize ();
4681 }
4682 
4683 /* Gate and execute functions for Reassociation.  */
4684 
4685 static unsigned int
execute_reassoc(void)4686 execute_reassoc (void)
4687 {
4688   init_reassoc ();
4689 
4690   do_reassoc ();
4691   repropagate_negates ();
4692 
4693   fini_reassoc ();
4694   return 0;
4695 }
4696 
4697 static bool
gate_tree_ssa_reassoc(void)4698 gate_tree_ssa_reassoc (void)
4699 {
4700   return flag_tree_reassoc != 0;
4701 }
4702 
4703 namespace {
4704 
4705 const pass_data pass_data_reassoc =
4706 {
4707   GIMPLE_PASS, /* type */
4708   "reassoc", /* name */
4709   OPTGROUP_NONE, /* optinfo_flags */
4710   true, /* has_gate */
4711   true, /* has_execute */
4712   TV_TREE_REASSOC, /* tv_id */
4713   ( PROP_cfg | PROP_ssa ), /* properties_required */
4714   0, /* properties_provided */
4715   0, /* properties_destroyed */
4716   0, /* todo_flags_start */
4717   ( TODO_verify_ssa
4718     | TODO_update_ssa_only_virtuals
4719     | TODO_verify_flow ), /* todo_flags_finish */
4720 };
4721 
4722 class pass_reassoc : public gimple_opt_pass
4723 {
4724 public:
pass_reassoc(gcc::context * ctxt)4725   pass_reassoc (gcc::context *ctxt)
4726     : gimple_opt_pass (pass_data_reassoc, ctxt)
4727   {}
4728 
4729   /* opt_pass methods: */
clone()4730   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
gate()4731   bool gate () { return gate_tree_ssa_reassoc (); }
execute()4732   unsigned int execute () { return execute_reassoc (); }
4733 
4734 }; // class pass_reassoc
4735 
4736 } // anon namespace
4737 
4738 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)4739 make_pass_reassoc (gcc::context *ctxt)
4740 {
4741   return new pass_reassoc (ctxt);
4742 }
4743