1 /* Reassociation for trees.
2    Copyright (C) 2005-2013 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dan@dberlin.org>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "basic-block.h"
27 #include "gimple-pretty-print.h"
28 #include "tree-inline.h"
29 #include "tree-flow.h"
30 #include "gimple.h"
31 #include "tree-iterator.h"
32 #include "tree-pass.h"
33 #include "alloc-pool.h"
34 #include "vec.h"
35 #include "langhooks.h"
36 #include "pointer-set.h"
37 #include "cfgloop.h"
38 #include "flags.h"
39 #include "target.h"
40 #include "params.h"
41 #include "diagnostic-core.h"
42 
43 /*  This is a simple global reassociation pass.  It is, in part, based
44     on the LLVM pass of the same name (They do some things more/less
45     than we do, in different orders, etc).
46 
47     It consists of five steps:
48 
49     1. Breaking up subtract operations into addition + negate, where
50     it would promote the reassociation of adds.
51 
52     2. Left linearization of the expression trees, so that (A+B)+(C+D)
53     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
54     During linearization, we place the operands of the binary
55     expressions into a vector of operand_entry_t
56 
57     3. Optimization of the operand lists, eliminating things like a +
58     -a, a & a, etc.
59 
60     3a. Combine repeated factors with the same occurrence counts
61     into a __builtin_powi call that will later be optimized into
62     an optimal number of multiplies.
63 
64     4. Rewrite the expression trees we linearized and optimized so
65     they are in proper rank order.
66 
67     5. Repropagate negates, as nothing else will clean it up ATM.
68 
69     A bit of theory on #4, since nobody seems to write anything down
70     about why it makes sense to do it the way they do it:
71 
72     We could do this much nicer theoretically, but don't (for reasons
73     explained after how to do it theoretically nice :P).
74 
75     In order to promote the most redundancy elimination, you want
76     binary expressions whose operands are the same rank (or
77     preferably, the same value) exposed to the redundancy eliminator,
78     for possible elimination.
79 
80     So the way to do this if we really cared, is to build the new op
81     tree from the leaves to the roots, merging as you go, and putting the
82     new op on the end of the worklist, until you are left with one
83     thing on the worklist.
84 
85     IE if you have to rewrite the following set of operands (listed with
86     rank in parentheses), with opcode PLUS_EXPR:
87 
88     a (1),  b (1),  c (1),  d (2), e (2)
89 
90 
91     We start with our merge worklist empty, and the ops list with all of
92     those on it.
93 
94     You want to first merge all leaves of the same rank, as much as
95     possible.
96 
97     So first build a binary op of
98 
99     mergetmp = a + b, and put "mergetmp" on the merge worklist.
100 
101     Because there is no three operand form of PLUS_EXPR, c is not going to
102     be exposed to redundancy elimination as a rank 1 operand.
103 
104     So you might as well throw it on the merge worklist (you could also
105     consider it to now be a rank two operand, and merge it with d and e,
106     but in this case, you then have evicted e from a binary op. So at
107     least in this situation, you can't win.)
108 
109     Then build a binary op of d + e
110     mergetmp2 = d + e
111 
112     and put mergetmp2 on the merge worklist.
113 
114     so merge worklist = {mergetmp, c, mergetmp2}
115 
116     Continue building binary ops of these operations until you have only
117     one operation left on the worklist.
118 
119     So we have
120 
121     build binary op
122     mergetmp3 = mergetmp + c
123 
124     worklist = {mergetmp2, mergetmp3}
125 
126     mergetmp4 = mergetmp2 + mergetmp3
127 
128     worklist = {mergetmp4}
129 
130     because we have one operation left, we can now just set the original
131     statement equal to the result of that operation.
132 
133     This will at least expose a + b  and d + e to redundancy elimination
134     as binary operations.
135 
136     For extra points, you can reuse the old statements to build the
137     mergetmps, since you shouldn't run out.
138 
139     So why don't we do this?
140 
141     Because it's expensive, and rarely will help.  Most trees we are
142     reassociating have 3 or less ops.  If they have 2 ops, they already
143     will be written into a nice single binary op.  If you have 3 ops, a
144     single simple check suffices to tell you whether the first two are of the
145     same rank.  If so, you know to order it
146 
147     mergetmp = op1 + op2
148     newstmt = mergetmp + op3
149 
150     instead of
151     mergetmp = op2 + op3
152     newstmt = mergetmp + op1
153 
154     If all three are of the same rank, you can't expose them all in a
155     single binary operator anyway, so the above is *still* the best you
156     can do.
157 
158     Thus, this is what we do.  When we have three ops left, we check to see
159     what order to put them in, and call it a day.  As a nod to vector sum
160     reduction, we check if any of the ops are really a phi node that is a
161     destructive update for the associating op, and keep the destructive
162     update together for vector sum reduction recognition.  */
163 
164 
165 /* Statistics */
166 static struct
167 {
168   int linearized;
169   int constants_eliminated;
170   int ops_eliminated;
171   int rewritten;
172   int pows_encountered;
173   int pows_created;
174 } reassociate_stats;
175 
176 /* Operator, rank pair.  */
177 typedef struct operand_entry
178 {
179   unsigned int rank;
180   int id;
181   tree op;
182   unsigned int count;
183 } *operand_entry_t;
184 
185 static alloc_pool operand_entry_pool;
186 
187 /* This is used to assign a unique ID to each struct operand_entry
188    so that qsort results are identical on different hosts.  */
189 static int next_operand_entry_id;
190 
191 /* Starting rank number for a given basic block, so that we can rank
192    operations using unmovable instructions in that BB based on the bb
193    depth.  */
194 static long *bb_rank;
195 
196 /* Operand->rank hashtable.  */
197 static struct pointer_map_t *operand_rank;
198 
199 /* Forward decls.  */
200 static long get_rank (tree);
201 
202 
203 /* Bias amount for loop-carried phis.  We want this to be larger than
204    the depth of any reassociation tree we can see, but not larger than
205    the rank difference between two blocks.  */
206 #define PHI_LOOP_BIAS (1 << 15)
207 
208 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
209    an innermost loop, and the phi has only a single use which is inside
210    the loop, then the rank is the block rank of the loop latch plus an
211    extra bias for the loop-carried dependence.  This causes expressions
212    calculated into an accumulator variable to be independent for each
213    iteration of the loop.  If STMT is some other phi, the rank is the
214    block rank of its containing block.  */
215 static long
phi_rank(gimple stmt)216 phi_rank (gimple stmt)
217 {
218   basic_block bb = gimple_bb (stmt);
219   struct loop *father = bb->loop_father;
220   tree res;
221   unsigned i;
222   use_operand_p use;
223   gimple use_stmt;
224 
225   /* We only care about real loops (those with a latch).  */
226   if (!father->latch)
227     return bb_rank[bb->index];
228 
229   /* Interesting phis must be in headers of innermost loops.  */
230   if (bb != father->header
231       || father->inner)
232     return bb_rank[bb->index];
233 
234   /* Ignore virtual SSA_NAMEs.  */
235   res = gimple_phi_result (stmt);
236   if (virtual_operand_p (res))
237     return bb_rank[bb->index];
238 
239   /* The phi definition must have a single use, and that use must be
240      within the loop.  Otherwise this isn't an accumulator pattern.  */
241   if (!single_imm_use (res, &use, &use_stmt)
242       || gimple_bb (use_stmt)->loop_father != father)
243     return bb_rank[bb->index];
244 
245   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
246   for (i = 0; i < gimple_phi_num_args (stmt); i++)
247     {
248       tree arg = gimple_phi_arg_def (stmt, i);
249       if (TREE_CODE (arg) == SSA_NAME
250 	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
251 	{
252 	  gimple def_stmt = SSA_NAME_DEF_STMT (arg);
253 	  if (gimple_bb (def_stmt)->loop_father == father)
254 	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
255 	}
256     }
257 
258   /* Must be an uninteresting phi.  */
259   return bb_rank[bb->index];
260 }
261 
262 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
263    loop-carried dependence of an innermost loop, return TRUE; else
264    return FALSE.  */
265 static bool
loop_carried_phi(tree exp)266 loop_carried_phi (tree exp)
267 {
268   gimple phi_stmt;
269   long block_rank;
270 
271   if (TREE_CODE (exp) != SSA_NAME
272       || SSA_NAME_IS_DEFAULT_DEF (exp))
273     return false;
274 
275   phi_stmt = SSA_NAME_DEF_STMT (exp);
276 
277   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
278     return false;
279 
280   /* Non-loop-carried phis have block rank.  Loop-carried phis have
281      an additional bias added in.  If this phi doesn't have block rank,
282      it's biased and should not be propagated.  */
283   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
284 
285   if (phi_rank (phi_stmt) != block_rank)
286     return true;
287 
288   return false;
289 }
290 
291 /* Return the maximum of RANK and the rank that should be propagated
292    from expression OP.  For most operands, this is just the rank of OP.
293    For loop-carried phis, the value is zero to avoid undoing the bias
294    in favor of the phi.  */
295 static long
propagate_rank(long rank,tree op)296 propagate_rank (long rank, tree op)
297 {
298   long op_rank;
299 
300   if (loop_carried_phi (op))
301     return rank;
302 
303   op_rank = get_rank (op);
304 
305   return MAX (rank, op_rank);
306 }
307 
308 /* Look up the operand rank structure for expression E.  */
309 
310 static inline long
find_operand_rank(tree e)311 find_operand_rank (tree e)
312 {
313   void **slot = pointer_map_contains (operand_rank, e);
314   return slot ? (long) (intptr_t) *slot : -1;
315 }
316 
317 /* Insert {E,RANK} into the operand rank hashtable.  */
318 
319 static inline void
insert_operand_rank(tree e,long rank)320 insert_operand_rank (tree e, long rank)
321 {
322   void **slot;
323   gcc_assert (rank > 0);
324   slot = pointer_map_insert (operand_rank, e);
325   gcc_assert (!*slot);
326   *slot = (void *) (intptr_t) rank;
327 }
328 
329 /* Given an expression E, return the rank of the expression.  */
330 
331 static long
get_rank(tree e)332 get_rank (tree e)
333 {
334   /* Constants have rank 0.  */
335   if (is_gimple_min_invariant (e))
336     return 0;
337 
338   /* SSA_NAME's have the rank of the expression they are the result
339      of.
340      For globals and uninitialized values, the rank is 0.
341      For function arguments, use the pre-setup rank.
342      For PHI nodes, stores, asm statements, etc, we use the rank of
343      the BB.
344      For simple operations, the rank is the maximum rank of any of
345      its operands, or the bb_rank, whichever is less.
346      I make no claims that this is optimal, however, it gives good
347      results.  */
348 
349   /* We make an exception to the normal ranking system to break
350      dependences of accumulator variables in loops.  Suppose we
351      have a simple one-block loop containing:
352 
353        x_1 = phi(x_0, x_2)
354        b = a + x_1
355        c = b + d
356        x_2 = c + e
357 
358      As shown, each iteration of the calculation into x is fully
359      dependent upon the iteration before it.  We would prefer to
360      see this in the form:
361 
362        x_1 = phi(x_0, x_2)
363        b = a + d
364        c = b + e
365        x_2 = c + x_1
366 
367      If the loop is unrolled, the calculations of b and c from
368      different iterations can be interleaved.
369 
370      To obtain this result during reassociation, we bias the rank
371      of the phi definition x_1 upward, when it is recognized as an
372      accumulator pattern.  The artificial rank causes it to be
373      added last, providing the desired independence.  */
374 
375   if (TREE_CODE (e) == SSA_NAME)
376     {
377       gimple stmt;
378       long rank;
379       int i, n;
380       tree op;
381 
382       if (SSA_NAME_IS_DEFAULT_DEF (e))
383 	return find_operand_rank (e);
384 
385       stmt = SSA_NAME_DEF_STMT (e);
386       if (gimple_code (stmt) == GIMPLE_PHI)
387 	return phi_rank (stmt);
388 
389       if (!is_gimple_assign (stmt)
390 	  || gimple_vdef (stmt))
391 	return bb_rank[gimple_bb (stmt)->index];
392 
393       /* If we already have a rank for this expression, use that.  */
394       rank = find_operand_rank (e);
395       if (rank != -1)
396 	return rank;
397 
398       /* Otherwise, find the maximum rank for the operands.  As an
399 	 exception, remove the bias from loop-carried phis when propagating
400 	 the rank so that dependent operations are not also biased.  */
401       rank = 0;
402       if (gimple_assign_single_p (stmt))
403 	{
404 	  tree rhs = gimple_assign_rhs1 (stmt);
405 	  n = TREE_OPERAND_LENGTH (rhs);
406 	  if (n == 0)
407 	    rank = propagate_rank (rank, rhs);
408 	  else
409 	    {
410 	      for (i = 0; i < n; i++)
411 		{
412 		  op = TREE_OPERAND (rhs, i);
413 
414 		  if (op != NULL_TREE)
415 		    rank = propagate_rank (rank, op);
416 		}
417 	    }
418 	}
419       else
420 	{
421 	  n = gimple_num_ops (stmt);
422 	  for (i = 1; i < n; i++)
423 	    {
424 	      op = gimple_op (stmt, i);
425 	      gcc_assert (op);
426 	      rank = propagate_rank (rank, op);
427 	    }
428 	}
429 
430       if (dump_file && (dump_flags & TDF_DETAILS))
431 	{
432 	  fprintf (dump_file, "Rank for ");
433 	  print_generic_expr (dump_file, e, 0);
434 	  fprintf (dump_file, " is %ld\n", (rank + 1));
435 	}
436 
437       /* Note the rank in the hashtable so we don't recompute it.  */
438       insert_operand_rank (e, (rank + 1));
439       return (rank + 1);
440     }
441 
442   /* Globals, etc,  are rank 0 */
443   return 0;
444 }
445 
446 
447 /* We want integer ones to end up last no matter what, since they are
448    the ones we can do the most with.  */
449 #define INTEGER_CONST_TYPE 1 << 3
450 #define FLOAT_CONST_TYPE 1 << 2
451 #define OTHER_CONST_TYPE 1 << 1
452 
453 /* Classify an invariant tree into integer, float, or other, so that
454    we can sort them to be near other constants of the same type.  */
455 static inline int
constant_type(tree t)456 constant_type (tree t)
457 {
458   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
459     return INTEGER_CONST_TYPE;
460   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
461     return FLOAT_CONST_TYPE;
462   else
463     return OTHER_CONST_TYPE;
464 }
465 
466 /* qsort comparison function to sort operand entries PA and PB by rank
467    so that the sorted array is ordered by rank in decreasing order.  */
468 static int
sort_by_operand_rank(const void * pa,const void * pb)469 sort_by_operand_rank (const void *pa, const void *pb)
470 {
471   const operand_entry_t oea = *(const operand_entry_t *)pa;
472   const operand_entry_t oeb = *(const operand_entry_t *)pb;
473 
474   /* It's nicer for optimize_expression if constants that are likely
475      to fold when added/multiplied//whatever are put next to each
476      other.  Since all constants have rank 0, order them by type.  */
477   if (oeb->rank == 0 && oea->rank == 0)
478     {
479       if (constant_type (oeb->op) != constant_type (oea->op))
480 	return constant_type (oeb->op) - constant_type (oea->op);
481       else
482 	/* To make sorting result stable, we use unique IDs to determine
483 	   order.  */
484         return oeb->id - oea->id;
485     }
486 
487   /* Lastly, make sure the versions that are the same go next to each
488      other.  We use SSA_NAME_VERSION because it's stable.  */
489   if ((oeb->rank - oea->rank == 0)
490       && TREE_CODE (oea->op) == SSA_NAME
491       && TREE_CODE (oeb->op) == SSA_NAME)
492     {
493       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
494 	return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
495       else
496 	return oeb->id - oea->id;
497     }
498 
499   if (oeb->rank != oea->rank)
500     return oeb->rank - oea->rank;
501   else
502     return oeb->id - oea->id;
503 }
504 
505 /* Add an operand entry to *OPS for the tree operand OP.  */
506 
507 static void
add_to_ops_vec(vec<operand_entry_t> * ops,tree op)508 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
509 {
510   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
511 
512   oe->op = op;
513   oe->rank = get_rank (op);
514   oe->id = next_operand_entry_id++;
515   oe->count = 1;
516   ops->safe_push (oe);
517 }
518 
519 /* Add an operand entry to *OPS for the tree operand OP with repeat
520    count REPEAT.  */
521 
522 static void
add_repeat_to_ops_vec(vec<operand_entry_t> * ops,tree op,HOST_WIDE_INT repeat)523 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
524 		       HOST_WIDE_INT repeat)
525 {
526   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
527 
528   oe->op = op;
529   oe->rank = get_rank (op);
530   oe->id = next_operand_entry_id++;
531   oe->count = repeat;
532   ops->safe_push (oe);
533 
534   reassociate_stats.pows_encountered++;
535 }
536 
537 /* Return true if STMT is reassociable operation containing a binary
538    operation with tree code CODE, and is inside LOOP.  */
539 
540 static bool
is_reassociable_op(gimple stmt,enum tree_code code,struct loop * loop)541 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
542 {
543   basic_block bb = gimple_bb (stmt);
544 
545   if (gimple_bb (stmt) == NULL)
546     return false;
547 
548   if (!flow_bb_inside_loop_p (loop, bb))
549     return false;
550 
551   if (is_gimple_assign (stmt)
552       && gimple_assign_rhs_code (stmt) == code
553       && has_single_use (gimple_assign_lhs (stmt)))
554     return true;
555 
556   return false;
557 }
558 
559 
560 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
561    operand of the negate operation.  Otherwise, return NULL.  */
562 
563 static tree
get_unary_op(tree name,enum tree_code opcode)564 get_unary_op (tree name, enum tree_code opcode)
565 {
566   gimple stmt = SSA_NAME_DEF_STMT (name);
567 
568   if (!is_gimple_assign (stmt))
569     return NULL_TREE;
570 
571   if (gimple_assign_rhs_code (stmt) == opcode)
572     return gimple_assign_rhs1 (stmt);
573   return NULL_TREE;
574 }
575 
576 /* If CURR and LAST are a pair of ops that OPCODE allows us to
577    eliminate through equivalences, do so, remove them from OPS, and
578    return true.  Otherwise, return false.  */
579 
580 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry_t> * ops,bool * all_done,unsigned int i,operand_entry_t curr,operand_entry_t last)581 eliminate_duplicate_pair (enum tree_code opcode,
582 			  vec<operand_entry_t> *ops,
583 			  bool *all_done,
584 			  unsigned int i,
585 			  operand_entry_t curr,
586 			  operand_entry_t last)
587 {
588 
589   /* If we have two of the same op, and the opcode is & |, min, or max,
590      we can eliminate one of them.
591      If we have two of the same op, and the opcode is ^, we can
592      eliminate both of them.  */
593 
594   if (last && last->op == curr->op)
595     {
596       switch (opcode)
597 	{
598 	case MAX_EXPR:
599 	case MIN_EXPR:
600 	case BIT_IOR_EXPR:
601 	case BIT_AND_EXPR:
602 	  if (dump_file && (dump_flags & TDF_DETAILS))
603 	    {
604 	      fprintf (dump_file, "Equivalence: ");
605 	      print_generic_expr (dump_file, curr->op, 0);
606 	      fprintf (dump_file, " [&|minmax] ");
607 	      print_generic_expr (dump_file, last->op, 0);
608 	      fprintf (dump_file, " -> ");
609 	      print_generic_stmt (dump_file, last->op, 0);
610 	    }
611 
612 	  ops->ordered_remove (i);
613 	  reassociate_stats.ops_eliminated ++;
614 
615 	  return true;
616 
617 	case BIT_XOR_EXPR:
618 	  if (dump_file && (dump_flags & TDF_DETAILS))
619 	    {
620 	      fprintf (dump_file, "Equivalence: ");
621 	      print_generic_expr (dump_file, curr->op, 0);
622 	      fprintf (dump_file, " ^ ");
623 	      print_generic_expr (dump_file, last->op, 0);
624 	      fprintf (dump_file, " -> nothing\n");
625 	    }
626 
627 	  reassociate_stats.ops_eliminated += 2;
628 
629 	  if (ops->length () == 2)
630 	    {
631 	      ops->create (0);
632 	      add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
633 	      *all_done = true;
634 	    }
635 	  else
636 	    {
637 	      ops->ordered_remove (i-1);
638 	      ops->ordered_remove (i-1);
639 	    }
640 
641 	  return true;
642 
643 	default:
644 	  break;
645 	}
646     }
647   return false;
648 }
649 
650 static vec<tree> plus_negates;
651 
652 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
653    expression, look in OPS for a corresponding positive operation to cancel
654    it out.  If we find one, remove the other from OPS, replace
655    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
656    return false. */
657 
658 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry_t> * ops,unsigned int currindex,operand_entry_t curr)659 eliminate_plus_minus_pair (enum tree_code opcode,
660 			   vec<operand_entry_t> *ops,
661 			   unsigned int currindex,
662 			   operand_entry_t curr)
663 {
664   tree negateop;
665   tree notop;
666   unsigned int i;
667   operand_entry_t oe;
668 
669   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
670     return false;
671 
672   negateop = get_unary_op (curr->op, NEGATE_EXPR);
673   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
674   if (negateop == NULL_TREE && notop == NULL_TREE)
675     return false;
676 
677   /* Any non-negated version will have a rank that is one less than
678      the current rank.  So once we hit those ranks, if we don't find
679      one, we can stop.  */
680 
681   for (i = currindex + 1;
682        ops->iterate (i, &oe)
683        && oe->rank >= curr->rank - 1 ;
684        i++)
685     {
686       if (oe->op == negateop)
687 	{
688 
689 	  if (dump_file && (dump_flags & TDF_DETAILS))
690 	    {
691 	      fprintf (dump_file, "Equivalence: ");
692 	      print_generic_expr (dump_file, negateop, 0);
693 	      fprintf (dump_file, " + -");
694 	      print_generic_expr (dump_file, oe->op, 0);
695 	      fprintf (dump_file, " -> 0\n");
696 	    }
697 
698 	  ops->ordered_remove (i);
699 	  add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
700 	  ops->ordered_remove (currindex);
701 	  reassociate_stats.ops_eliminated ++;
702 
703 	  return true;
704 	}
705       else if (oe->op == notop)
706 	{
707 	  tree op_type = TREE_TYPE (oe->op);
708 
709 	  if (dump_file && (dump_flags & TDF_DETAILS))
710 	    {
711 	      fprintf (dump_file, "Equivalence: ");
712 	      print_generic_expr (dump_file, notop, 0);
713 	      fprintf (dump_file, " + ~");
714 	      print_generic_expr (dump_file, oe->op, 0);
715 	      fprintf (dump_file, " -> -1\n");
716 	    }
717 
718 	  ops->ordered_remove (i);
719 	  add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
720 	  ops->ordered_remove (currindex);
721 	  reassociate_stats.ops_eliminated ++;
722 
723 	  return true;
724 	}
725     }
726 
727   /* CURR->OP is a negate expr in a plus expr: save it for later
728      inspection in repropagate_negates().  */
729   if (negateop != NULL_TREE)
730     plus_negates.safe_push (curr->op);
731 
732   return false;
733 }
734 
735 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
736    bitwise not expression, look in OPS for a corresponding operand to
737    cancel it out.  If we find one, remove the other from OPS, replace
738    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
739    false. */
740 
741 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry_t> * ops,unsigned int currindex,operand_entry_t curr)742 eliminate_not_pairs (enum tree_code opcode,
743 		     vec<operand_entry_t> *ops,
744 		     unsigned int currindex,
745 		     operand_entry_t curr)
746 {
747   tree notop;
748   unsigned int i;
749   operand_entry_t oe;
750 
751   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
752       || TREE_CODE (curr->op) != SSA_NAME)
753     return false;
754 
755   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
756   if (notop == NULL_TREE)
757     return false;
758 
759   /* Any non-not version will have a rank that is one less than
760      the current rank.  So once we hit those ranks, if we don't find
761      one, we can stop.  */
762 
763   for (i = currindex + 1;
764        ops->iterate (i, &oe)
765        && oe->rank >= curr->rank - 1;
766        i++)
767     {
768       if (oe->op == notop)
769 	{
770 	  if (dump_file && (dump_flags & TDF_DETAILS))
771 	    {
772 	      fprintf (dump_file, "Equivalence: ");
773 	      print_generic_expr (dump_file, notop, 0);
774 	      if (opcode == BIT_AND_EXPR)
775 		fprintf (dump_file, " & ~");
776 	      else if (opcode == BIT_IOR_EXPR)
777 		fprintf (dump_file, " | ~");
778 	      print_generic_expr (dump_file, oe->op, 0);
779 	      if (opcode == BIT_AND_EXPR)
780 		fprintf (dump_file, " -> 0\n");
781 	      else if (opcode == BIT_IOR_EXPR)
782 		fprintf (dump_file, " -> -1\n");
783 	    }
784 
785 	  if (opcode == BIT_AND_EXPR)
786 	    oe->op = build_zero_cst (TREE_TYPE (oe->op));
787 	  else if (opcode == BIT_IOR_EXPR)
788 	    oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
789 					  TYPE_PRECISION (TREE_TYPE (oe->op)));
790 
791 	  reassociate_stats.ops_eliminated += ops->length () - 1;
792 	  ops->truncate (0);
793 	  ops->quick_push (oe);
794 	  return true;
795 	}
796     }
797 
798   return false;
799 }
800 
801 /* Use constant value that may be present in OPS to try to eliminate
802    operands.  Note that this function is only really used when we've
803    eliminated ops for other reasons, or merged constants.  Across
804    single statements, fold already does all of this, plus more.  There
805    is little point in duplicating logic, so I've only included the
806    identities that I could ever construct testcases to trigger.  */
807 
808 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry_t> * ops)809 eliminate_using_constants (enum tree_code opcode,
810 			   vec<operand_entry_t> *ops)
811 {
812   operand_entry_t oelast = ops->last ();
813   tree type = TREE_TYPE (oelast->op);
814 
815   if (oelast->rank == 0
816       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
817     {
818       switch (opcode)
819 	{
820 	case BIT_AND_EXPR:
821 	  if (integer_zerop (oelast->op))
822 	    {
823 	      if (ops->length () != 1)
824 		{
825 		  if (dump_file && (dump_flags & TDF_DETAILS))
826 		    fprintf (dump_file, "Found & 0, removing all other ops\n");
827 
828 		  reassociate_stats.ops_eliminated += ops->length () - 1;
829 
830 		  ops->truncate (0);
831 		  ops->quick_push (oelast);
832 		  return;
833 		}
834 	    }
835 	  else if (integer_all_onesp (oelast->op))
836 	    {
837 	      if (ops->length () != 1)
838 		{
839 		  if (dump_file && (dump_flags & TDF_DETAILS))
840 		    fprintf (dump_file, "Found & -1, removing\n");
841 		  ops->pop ();
842 		  reassociate_stats.ops_eliminated++;
843 		}
844 	    }
845 	  break;
846 	case BIT_IOR_EXPR:
847 	  if (integer_all_onesp (oelast->op))
848 	    {
849 	      if (ops->length () != 1)
850 		{
851 		  if (dump_file && (dump_flags & TDF_DETAILS))
852 		    fprintf (dump_file, "Found | -1, removing all other ops\n");
853 
854 		  reassociate_stats.ops_eliminated += ops->length () - 1;
855 
856 		  ops->truncate (0);
857 		  ops->quick_push (oelast);
858 		  return;
859 		}
860 	    }
861 	  else if (integer_zerop (oelast->op))
862 	    {
863 	      if (ops->length () != 1)
864 		{
865 		  if (dump_file && (dump_flags & TDF_DETAILS))
866 		    fprintf (dump_file, "Found | 0, removing\n");
867 		  ops->pop ();
868 		  reassociate_stats.ops_eliminated++;
869 		}
870 	    }
871 	  break;
872 	case MULT_EXPR:
873 	  if (integer_zerop (oelast->op)
874 	      || (FLOAT_TYPE_P (type)
875 		  && !HONOR_NANS (TYPE_MODE (type))
876 		  && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
877 		  && real_zerop (oelast->op)))
878 	    {
879 	      if (ops->length () != 1)
880 		{
881 		  if (dump_file && (dump_flags & TDF_DETAILS))
882 		    fprintf (dump_file, "Found * 0, removing all other ops\n");
883 
884 		  reassociate_stats.ops_eliminated += ops->length () - 1;
885 		  ops->truncate (1);
886 		  ops->quick_push (oelast);
887 		  return;
888 		}
889 	    }
890 	  else if (integer_onep (oelast->op)
891 		   || (FLOAT_TYPE_P (type)
892 		       && !HONOR_SNANS (TYPE_MODE (type))
893 		       && real_onep (oelast->op)))
894 	    {
895 	      if (ops->length () != 1)
896 		{
897 		  if (dump_file && (dump_flags & TDF_DETAILS))
898 		    fprintf (dump_file, "Found * 1, removing\n");
899 		  ops->pop ();
900 		  reassociate_stats.ops_eliminated++;
901 		  return;
902 		}
903 	    }
904 	  break;
905 	case BIT_XOR_EXPR:
906 	case PLUS_EXPR:
907 	case MINUS_EXPR:
908 	  if (integer_zerop (oelast->op)
909 	      || (FLOAT_TYPE_P (type)
910 		  && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
911 		  && fold_real_zero_addition_p (type, oelast->op,
912 						opcode == MINUS_EXPR)))
913 	    {
914 	      if (ops->length () != 1)
915 		{
916 		  if (dump_file && (dump_flags & TDF_DETAILS))
917 		    fprintf (dump_file, "Found [|^+] 0, removing\n");
918 		  ops->pop ();
919 		  reassociate_stats.ops_eliminated++;
920 		  return;
921 		}
922 	    }
923 	  break;
924 	default:
925 	  break;
926 	}
927     }
928 }
929 
930 
931 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
932 				 bool, bool);
933 
934 /* Structure for tracking and counting operands.  */
935 typedef struct oecount_s {
936   int cnt;
937   int id;
938   enum tree_code oecode;
939   tree op;
940 } oecount;
941 
942 
943 /* The heap for the oecount hashtable and the sorted list of operands.  */
944 static vec<oecount> cvec;
945 
946 /* Hash function for oecount.  */
947 
948 static hashval_t
oecount_hash(const void * p)949 oecount_hash (const void *p)
950 {
951   const oecount *c = &cvec[(size_t)p - 42];
952   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
953 }
954 
955 /* Comparison function for oecount.  */
956 
957 static int
oecount_eq(const void * p1,const void * p2)958 oecount_eq (const void *p1, const void *p2)
959 {
960   const oecount *c1 = &cvec[(size_t)p1 - 42];
961   const oecount *c2 = &cvec[(size_t)p2 - 42];
962   return (c1->oecode == c2->oecode
963 	  && c1->op == c2->op);
964 }
965 
966 /* Comparison function for qsort sorting oecount elements by count.  */
967 
968 static int
oecount_cmp(const void * p1,const void * p2)969 oecount_cmp (const void *p1, const void *p2)
970 {
971   const oecount *c1 = (const oecount *)p1;
972   const oecount *c2 = (const oecount *)p2;
973   if (c1->cnt != c2->cnt)
974     return c1->cnt - c2->cnt;
975   else
976     /* If counts are identical, use unique IDs to stabilize qsort.  */
977     return c1->id - c2->id;
978 }
979 
980 /* Return TRUE iff STMT represents a builtin call that raises OP
981    to some exponent.  */
982 
983 static bool
stmt_is_power_of_op(gimple stmt,tree op)984 stmt_is_power_of_op (gimple stmt, tree op)
985 {
986   tree fndecl;
987 
988   if (!is_gimple_call (stmt))
989     return false;
990 
991   fndecl = gimple_call_fndecl (stmt);
992 
993   if (!fndecl
994       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
995     return false;
996 
997   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
998     {
999     CASE_FLT_FN (BUILT_IN_POW):
1000     CASE_FLT_FN (BUILT_IN_POWI):
1001       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1002 
1003     default:
1004       return false;
1005     }
1006 }
1007 
1008 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1009    in place and return the result.  Assumes that stmt_is_power_of_op
1010    was previously called for STMT and returned TRUE.  */
1011 
1012 static HOST_WIDE_INT
decrement_power(gimple stmt)1013 decrement_power (gimple stmt)
1014 {
1015   REAL_VALUE_TYPE c, cint;
1016   HOST_WIDE_INT power;
1017   tree arg1;
1018 
1019   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1020     {
1021     CASE_FLT_FN (BUILT_IN_POW):
1022       arg1 = gimple_call_arg (stmt, 1);
1023       c = TREE_REAL_CST (arg1);
1024       power = real_to_integer (&c) - 1;
1025       real_from_integer (&cint, VOIDmode, power, 0, 0);
1026       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1027       return power;
1028 
1029     CASE_FLT_FN (BUILT_IN_POWI):
1030       arg1 = gimple_call_arg (stmt, 1);
1031       power = TREE_INT_CST_LOW (arg1) - 1;
1032       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1033       return power;
1034 
1035     default:
1036       gcc_unreachable ();
1037     }
1038 }
1039 
1040 /* Find the single immediate use of STMT's LHS, and replace it
1041    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1042    replace *DEF with OP as well.  */
1043 
1044 static void
propagate_op_to_single_use(tree op,gimple stmt,tree * def)1045 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1046 {
1047   tree lhs;
1048   gimple use_stmt;
1049   use_operand_p use;
1050   gimple_stmt_iterator gsi;
1051 
1052   if (is_gimple_call (stmt))
1053     lhs = gimple_call_lhs (stmt);
1054   else
1055     lhs = gimple_assign_lhs (stmt);
1056 
1057   gcc_assert (has_single_use (lhs));
1058   single_imm_use (lhs, &use, &use_stmt);
1059   if (lhs == *def)
1060     *def = op;
1061   SET_USE (use, op);
1062   if (TREE_CODE (op) != SSA_NAME)
1063     update_stmt (use_stmt);
1064   gsi = gsi_for_stmt (stmt);
1065   unlink_stmt_vdef (stmt);
1066   gsi_remove (&gsi, true);
1067   release_defs (stmt);
1068 }
1069 
1070 /* Walks the linear chain with result *DEF searching for an operation
1071    with operand OP and code OPCODE removing that from the chain.  *DEF
1072    is updated if there is only one operand but no operation left.  */
1073 
1074 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1075 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1076 {
1077   gimple stmt = SSA_NAME_DEF_STMT (*def);
1078 
1079   do
1080     {
1081       tree name;
1082 
1083       if (opcode == MULT_EXPR
1084 	  && stmt_is_power_of_op (stmt, op))
1085 	{
1086 	  if (decrement_power (stmt) == 1)
1087 	    propagate_op_to_single_use (op, stmt, def);
1088 	  return;
1089 	}
1090 
1091       name = gimple_assign_rhs1 (stmt);
1092 
1093       /* If this is the operation we look for and one of the operands
1094          is ours simply propagate the other operand into the stmts
1095 	 single use.  */
1096       if (gimple_assign_rhs_code (stmt) == opcode
1097 	  && (name == op
1098 	      || gimple_assign_rhs2 (stmt) == op))
1099 	{
1100 	  if (name == op)
1101 	    name = gimple_assign_rhs2 (stmt);
1102 	  propagate_op_to_single_use (name, stmt, def);
1103 	  return;
1104 	}
1105 
1106       /* We might have a multiply of two __builtin_pow* calls, and
1107 	 the operand might be hiding in the rightmost one.  */
1108       if (opcode == MULT_EXPR
1109 	  && gimple_assign_rhs_code (stmt) == opcode
1110 	  && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1111 	  && has_single_use (gimple_assign_rhs2 (stmt)))
1112 	{
1113 	  gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1114 	  if (stmt_is_power_of_op (stmt2, op))
1115 	    {
1116 	      if (decrement_power (stmt2) == 1)
1117 		propagate_op_to_single_use (op, stmt2, def);
1118 	      return;
1119 	    }
1120 	}
1121 
1122       /* Continue walking the chain.  */
1123       gcc_assert (name != op
1124 		  && TREE_CODE (name) == SSA_NAME);
1125       stmt = SSA_NAME_DEF_STMT (name);
1126     }
1127   while (1);
1128 }
1129 
1130 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1131    the result.  Places the statement after the definition of either
1132    OP1 or OP2.  Returns the new statement.  */
1133 
1134 static gimple
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1135 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1136 {
1137   gimple op1def = NULL, op2def = NULL;
1138   gimple_stmt_iterator gsi;
1139   tree op;
1140   gimple sum;
1141 
1142   /* Create the addition statement.  */
1143   op = make_ssa_name (type, NULL);
1144   sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1145 
1146   /* Find an insertion place and insert.  */
1147   if (TREE_CODE (op1) == SSA_NAME)
1148     op1def = SSA_NAME_DEF_STMT (op1);
1149   if (TREE_CODE (op2) == SSA_NAME)
1150     op2def = SSA_NAME_DEF_STMT (op2);
1151   if ((!op1def || gimple_nop_p (op1def))
1152       && (!op2def || gimple_nop_p (op2def)))
1153     {
1154       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1155       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1156     }
1157   else if ((!op1def || gimple_nop_p (op1def))
1158 	   || (op2def && !gimple_nop_p (op2def)
1159 	       && stmt_dominates_stmt_p (op1def, op2def)))
1160     {
1161       if (gimple_code (op2def) == GIMPLE_PHI)
1162 	{
1163 	  gsi = gsi_after_labels (gimple_bb (op2def));
1164 	  gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1165 	}
1166       else
1167 	{
1168 	  if (!stmt_ends_bb_p (op2def))
1169 	    {
1170 	      gsi = gsi_for_stmt (op2def);
1171 	      gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1172 	    }
1173 	  else
1174 	    {
1175 	      edge e;
1176 	      edge_iterator ei;
1177 
1178 	      FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1179 		if (e->flags & EDGE_FALLTHRU)
1180 		  gsi_insert_on_edge_immediate (e, sum);
1181 	    }
1182 	}
1183     }
1184   else
1185     {
1186       if (gimple_code (op1def) == GIMPLE_PHI)
1187 	{
1188 	  gsi = gsi_after_labels (gimple_bb (op1def));
1189 	  gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1190 	}
1191       else
1192 	{
1193 	  if (!stmt_ends_bb_p (op1def))
1194 	    {
1195 	      gsi = gsi_for_stmt (op1def);
1196 	      gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1197 	    }
1198 	  else
1199 	    {
1200 	      edge e;
1201 	      edge_iterator ei;
1202 
1203 	      FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1204 		if (e->flags & EDGE_FALLTHRU)
1205 		  gsi_insert_on_edge_immediate (e, sum);
1206 	    }
1207 	}
1208     }
1209   update_stmt (sum);
1210 
1211   return sum;
1212 }
1213 
1214 /* Perform un-distribution of divisions and multiplications.
1215    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1216    to (A + B) / X for real X.
1217 
1218    The algorithm is organized as follows.
1219 
1220     - First we walk the addition chain *OPS looking for summands that
1221       are defined by a multiplication or a real division.  This results
1222       in the candidates bitmap with relevant indices into *OPS.
1223 
1224     - Second we build the chains of multiplications or divisions for
1225       these candidates, counting the number of occurrences of (operand, code)
1226       pairs in all of the candidates chains.
1227 
1228     - Third we sort the (operand, code) pairs by number of occurrence and
1229       process them starting with the pair with the most uses.
1230 
1231       * For each such pair we walk the candidates again to build a
1232         second candidate bitmap noting all multiplication/division chains
1233 	that have at least one occurrence of (operand, code).
1234 
1235       * We build an alternate addition chain only covering these
1236         candidates with one (operand, code) operation removed from their
1237 	multiplication/division chain.
1238 
1239       * The first candidate gets replaced by the alternate addition chain
1240         multiplied/divided by the operand.
1241 
1242       * All candidate chains get disabled for further processing and
1243         processing of (operand, code) pairs continues.
1244 
1245   The alternate addition chains built are re-processed by the main
1246   reassociation algorithm which allows optimizing a * x * y + b * y * x
1247   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1248 
1249 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry_t> * ops,struct loop * loop)1250 undistribute_ops_list (enum tree_code opcode,
1251 		       vec<operand_entry_t> *ops, struct loop *loop)
1252 {
1253   unsigned int length = ops->length ();
1254   operand_entry_t oe1;
1255   unsigned i, j;
1256   sbitmap candidates, candidates2;
1257   unsigned nr_candidates, nr_candidates2;
1258   sbitmap_iterator sbi0;
1259   vec<operand_entry_t> *subops;
1260   htab_t ctable;
1261   bool changed = false;
1262   int next_oecount_id = 0;
1263 
1264   if (length <= 1
1265       || opcode != PLUS_EXPR)
1266     return false;
1267 
1268   /* Build a list of candidates to process.  */
1269   candidates = sbitmap_alloc (length);
1270   bitmap_clear (candidates);
1271   nr_candidates = 0;
1272   FOR_EACH_VEC_ELT (*ops, i, oe1)
1273     {
1274       enum tree_code dcode;
1275       gimple oe1def;
1276 
1277       if (TREE_CODE (oe1->op) != SSA_NAME)
1278 	continue;
1279       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1280       if (!is_gimple_assign (oe1def))
1281 	continue;
1282       dcode = gimple_assign_rhs_code (oe1def);
1283       if ((dcode != MULT_EXPR
1284 	   && dcode != RDIV_EXPR)
1285 	  || !is_reassociable_op (oe1def, dcode, loop))
1286 	continue;
1287 
1288       bitmap_set_bit (candidates, i);
1289       nr_candidates++;
1290     }
1291 
1292   if (nr_candidates < 2)
1293     {
1294       sbitmap_free (candidates);
1295       return false;
1296     }
1297 
1298   if (dump_file && (dump_flags & TDF_DETAILS))
1299     {
1300       fprintf (dump_file, "searching for un-distribute opportunities ");
1301       print_generic_expr (dump_file,
1302 	(*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1303       fprintf (dump_file, " %d\n", nr_candidates);
1304     }
1305 
1306   /* Build linearized sub-operand lists and the counting table.  */
1307   cvec.create (0);
1308   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1309   /* ??? Macro arguments cannot have multi-argument template types in
1310      them.  This typedef is needed to workaround that limitation.  */
1311   typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1312   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1313   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1314     {
1315       gimple oedef;
1316       enum tree_code oecode;
1317       unsigned j;
1318 
1319       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1320       oecode = gimple_assign_rhs_code (oedef);
1321       linearize_expr_tree (&subops[i], oedef,
1322 			   associative_tree_code (oecode), false);
1323 
1324       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1325 	{
1326 	  oecount c;
1327 	  void **slot;
1328 	  size_t idx;
1329 	  c.oecode = oecode;
1330 	  c.cnt = 1;
1331 	  c.id = next_oecount_id++;
1332 	  c.op = oe1->op;
1333 	  cvec.safe_push (c);
1334 	  idx = cvec.length () + 41;
1335 	  slot = htab_find_slot (ctable, (void *)idx, INSERT);
1336 	  if (!*slot)
1337 	    {
1338 	      *slot = (void *)idx;
1339 	    }
1340 	  else
1341 	    {
1342 	      cvec.pop ();
1343 	      cvec[(size_t)*slot - 42].cnt++;
1344 	    }
1345 	}
1346     }
1347   htab_delete (ctable);
1348 
1349   /* Sort the counting table.  */
1350   cvec.qsort (oecount_cmp);
1351 
1352   if (dump_file && (dump_flags & TDF_DETAILS))
1353     {
1354       oecount *c;
1355       fprintf (dump_file, "Candidates:\n");
1356       FOR_EACH_VEC_ELT (cvec, j, c)
1357 	{
1358 	  fprintf (dump_file, "  %u %s: ", c->cnt,
1359 		   c->oecode == MULT_EXPR
1360 		   ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1361 	  print_generic_expr (dump_file, c->op, 0);
1362 	  fprintf (dump_file, "\n");
1363 	}
1364     }
1365 
1366   /* Process the (operand, code) pairs in order of most occurence.  */
1367   candidates2 = sbitmap_alloc (length);
1368   while (!cvec.is_empty ())
1369     {
1370       oecount *c = &cvec.last ();
1371       if (c->cnt < 2)
1372 	break;
1373 
1374       /* Now collect the operands in the outer chain that contain
1375          the common operand in their inner chain.  */
1376       bitmap_clear (candidates2);
1377       nr_candidates2 = 0;
1378       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1379 	{
1380 	  gimple oedef;
1381 	  enum tree_code oecode;
1382 	  unsigned j;
1383 	  tree op = (*ops)[i]->op;
1384 
1385 	  /* If we undistributed in this chain already this may be
1386 	     a constant.  */
1387 	  if (TREE_CODE (op) != SSA_NAME)
1388 	    continue;
1389 
1390 	  oedef = SSA_NAME_DEF_STMT (op);
1391 	  oecode = gimple_assign_rhs_code (oedef);
1392 	  if (oecode != c->oecode)
1393 	    continue;
1394 
1395 	  FOR_EACH_VEC_ELT (subops[i], j, oe1)
1396 	    {
1397 	      if (oe1->op == c->op)
1398 		{
1399 		  bitmap_set_bit (candidates2, i);
1400 		  ++nr_candidates2;
1401 		  break;
1402 		}
1403 	    }
1404 	}
1405 
1406       if (nr_candidates2 >= 2)
1407 	{
1408 	  operand_entry_t oe1, oe2;
1409 	  gimple prod;
1410 	  int first = bitmap_first_set_bit (candidates2);
1411 
1412 	  /* Build the new addition chain.  */
1413 	  oe1 = (*ops)[first];
1414 	  if (dump_file && (dump_flags & TDF_DETAILS))
1415 	    {
1416 	      fprintf (dump_file, "Building (");
1417 	      print_generic_expr (dump_file, oe1->op, 0);
1418 	    }
1419 	  zero_one_operation (&oe1->op, c->oecode, c->op);
1420 	  EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1421 	    {
1422 	      gimple sum;
1423 	      oe2 = (*ops)[i];
1424 	      if (dump_file && (dump_flags & TDF_DETAILS))
1425 		{
1426 		  fprintf (dump_file, " + ");
1427 		  print_generic_expr (dump_file, oe2->op, 0);
1428 		}
1429 	      zero_one_operation (&oe2->op, c->oecode, c->op);
1430 	      sum = build_and_add_sum (TREE_TYPE (oe1->op),
1431 				       oe1->op, oe2->op, opcode);
1432 	      oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1433 	      oe2->rank = 0;
1434 	      oe1->op = gimple_get_lhs (sum);
1435 	    }
1436 
1437 	  /* Apply the multiplication/division.  */
1438 	  prod = build_and_add_sum (TREE_TYPE (oe1->op),
1439 				    oe1->op, c->op, c->oecode);
1440 	  if (dump_file && (dump_flags & TDF_DETAILS))
1441 	    {
1442 	      fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1443 	      print_generic_expr (dump_file, c->op, 0);
1444 	      fprintf (dump_file, "\n");
1445 	    }
1446 
1447 	  /* Record it in the addition chain and disable further
1448 	     undistribution with this op.  */
1449 	  oe1->op = gimple_assign_lhs (prod);
1450 	  oe1->rank = get_rank (oe1->op);
1451 	  subops[first].release ();
1452 
1453 	  changed = true;
1454 	}
1455 
1456       cvec.pop ();
1457     }
1458 
1459   for (i = 0; i < ops->length (); ++i)
1460     subops[i].release ();
1461   free (subops);
1462   cvec.release ();
1463   sbitmap_free (candidates);
1464   sbitmap_free (candidates2);
1465 
1466   return changed;
1467 }
1468 
1469 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1470    expression, examine the other OPS to see if any of them are comparisons
1471    of the same values, which we may be able to combine or eliminate.
1472    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1473 
1474 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry_t> * ops,unsigned int currindex,operand_entry_t curr)1475 eliminate_redundant_comparison (enum tree_code opcode,
1476 				vec<operand_entry_t> *ops,
1477 				unsigned int currindex,
1478 				operand_entry_t curr)
1479 {
1480   tree op1, op2;
1481   enum tree_code lcode, rcode;
1482   gimple def1, def2;
1483   int i;
1484   operand_entry_t oe;
1485 
1486   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1487     return false;
1488 
1489   /* Check that CURR is a comparison.  */
1490   if (TREE_CODE (curr->op) != SSA_NAME)
1491     return false;
1492   def1 = SSA_NAME_DEF_STMT (curr->op);
1493   if (!is_gimple_assign (def1))
1494     return false;
1495   lcode = gimple_assign_rhs_code (def1);
1496   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1497     return false;
1498   op1 = gimple_assign_rhs1 (def1);
1499   op2 = gimple_assign_rhs2 (def1);
1500 
1501   /* Now look for a similar comparison in the remaining OPS.  */
1502   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1503     {
1504       tree t;
1505 
1506       if (TREE_CODE (oe->op) != SSA_NAME)
1507 	continue;
1508       def2 = SSA_NAME_DEF_STMT (oe->op);
1509       if (!is_gimple_assign (def2))
1510 	continue;
1511       rcode = gimple_assign_rhs_code (def2);
1512       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1513 	continue;
1514 
1515       /* If we got here, we have a match.  See if we can combine the
1516 	 two comparisons.  */
1517       if (opcode == BIT_IOR_EXPR)
1518 	t = maybe_fold_or_comparisons (lcode, op1, op2,
1519 				       rcode, gimple_assign_rhs1 (def2),
1520 				       gimple_assign_rhs2 (def2));
1521       else
1522 	t = maybe_fold_and_comparisons (lcode, op1, op2,
1523 					rcode, gimple_assign_rhs1 (def2),
1524 					gimple_assign_rhs2 (def2));
1525       if (!t)
1526 	continue;
1527 
1528       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1529 	 always give us a boolean_type_node value back.  If the original
1530 	 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1531 	 we need to convert.  */
1532       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1533 	t = fold_convert (TREE_TYPE (curr->op), t);
1534 
1535       if (TREE_CODE (t) != INTEGER_CST
1536 	  && !operand_equal_p (t, curr->op, 0))
1537 	{
1538 	  enum tree_code subcode;
1539 	  tree newop1, newop2;
1540 	  if (!COMPARISON_CLASS_P (t))
1541 	    continue;
1542 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1543 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1544 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1545 	  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1546 	    continue;
1547 	}
1548 
1549       if (dump_file && (dump_flags & TDF_DETAILS))
1550 	{
1551 	  fprintf (dump_file, "Equivalence: ");
1552 	  print_generic_expr (dump_file, curr->op, 0);
1553 	  fprintf (dump_file, " %s ", op_symbol_code (opcode));
1554 	  print_generic_expr (dump_file, oe->op, 0);
1555 	  fprintf (dump_file, " -> ");
1556 	  print_generic_expr (dump_file, t, 0);
1557 	  fprintf (dump_file, "\n");
1558 	}
1559 
1560       /* Now we can delete oe, as it has been subsumed by the new combined
1561          expression t.  */
1562       ops->ordered_remove (i);
1563       reassociate_stats.ops_eliminated ++;
1564 
1565       /* If t is the same as curr->op, we're done.  Otherwise we must
1566 	 replace curr->op with t.  Special case is if we got a constant
1567 	 back, in which case we add it to the end instead of in place of
1568 	 the current entry.  */
1569       if (TREE_CODE (t) == INTEGER_CST)
1570 	{
1571 	  ops->ordered_remove (currindex);
1572 	  add_to_ops_vec (ops, t);
1573 	}
1574       else if (!operand_equal_p (t, curr->op, 0))
1575 	{
1576 	  gimple sum;
1577 	  enum tree_code subcode;
1578 	  tree newop1;
1579 	  tree newop2;
1580 	  gcc_assert (COMPARISON_CLASS_P (t));
1581 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1582 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1583 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1584 	  gcc_checking_assert (is_gimple_val (newop1)
1585 			       && is_gimple_val (newop2));
1586 	  sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1587 	  curr->op = gimple_get_lhs (sum);
1588 	}
1589       return true;
1590     }
1591 
1592   return false;
1593 }
1594 
1595 /* Perform various identities and other optimizations on the list of
1596    operand entries, stored in OPS.  The tree code for the binary
1597    operation between all the operands is OPCODE.  */
1598 
1599 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry_t> * ops)1600 optimize_ops_list (enum tree_code opcode,
1601 		   vec<operand_entry_t> *ops)
1602 {
1603   unsigned int length = ops->length ();
1604   unsigned int i;
1605   operand_entry_t oe;
1606   operand_entry_t oelast = NULL;
1607   bool iterate = false;
1608 
1609   if (length == 1)
1610     return;
1611 
1612   oelast = ops->last ();
1613 
1614   /* If the last two are constants, pop the constants off, merge them
1615      and try the next two.  */
1616   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1617     {
1618       operand_entry_t oelm1 = (*ops)[length - 2];
1619 
1620       if (oelm1->rank == 0
1621 	  && is_gimple_min_invariant (oelm1->op)
1622 	  && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1623 				       TREE_TYPE (oelast->op)))
1624 	{
1625 	  tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1626 				     oelm1->op, oelast->op);
1627 
1628 	  if (folded && is_gimple_min_invariant (folded))
1629 	    {
1630 	      if (dump_file && (dump_flags & TDF_DETAILS))
1631 		fprintf (dump_file, "Merging constants\n");
1632 
1633 	      ops->pop ();
1634 	      ops->pop ();
1635 
1636 	      add_to_ops_vec (ops, folded);
1637 	      reassociate_stats.constants_eliminated++;
1638 
1639 	      optimize_ops_list (opcode, ops);
1640 	      return;
1641 	    }
1642 	}
1643     }
1644 
1645   eliminate_using_constants (opcode, ops);
1646   oelast = NULL;
1647 
1648   for (i = 0; ops->iterate (i, &oe);)
1649     {
1650       bool done = false;
1651 
1652       if (eliminate_not_pairs (opcode, ops, i, oe))
1653 	return;
1654       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1655 	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1656 	  || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1657 	{
1658 	  if (done)
1659 	    return;
1660 	  iterate = true;
1661 	  oelast = NULL;
1662 	  continue;
1663 	}
1664       oelast = oe;
1665       i++;
1666     }
1667 
1668   length = ops->length ();
1669   oelast = ops->last ();
1670 
1671   if (iterate)
1672     optimize_ops_list (opcode, ops);
1673 }
1674 
1675 /* The following functions are subroutines to optimize_range_tests and allow
1676    it to try to change a logical combination of comparisons into a range
1677    test.
1678 
1679    For example, both
1680 	X == 2 || X == 5 || X == 3 || X == 4
1681    and
1682 	X >= 2 && X <= 5
1683    are converted to
1684 	(unsigned) (X - 2) <= 3
1685 
1686    For more information see comments above fold_test_range in fold-const.c,
1687    this implementation is for GIMPLE.  */
1688 
1689 struct range_entry
1690 {
1691   tree exp;
1692   tree low;
1693   tree high;
1694   bool in_p;
1695   bool strict_overflow_p;
1696   unsigned int idx, next;
1697 };
1698 
1699 /* This is similar to make_range in fold-const.c, but on top of
1700    GIMPLE instead of trees.  If EXP is non-NULL, it should be
1701    an SSA_NAME and STMT argument is ignored, otherwise STMT
1702    argument should be a GIMPLE_COND.  */
1703 
1704 static void
init_range_entry(struct range_entry * r,tree exp,gimple stmt)1705 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1706 {
1707   int in_p;
1708   tree low, high;
1709   bool is_bool, strict_overflow_p;
1710 
1711   r->exp = NULL_TREE;
1712   r->in_p = false;
1713   r->strict_overflow_p = false;
1714   r->low = NULL_TREE;
1715   r->high = NULL_TREE;
1716   if (exp != NULL_TREE
1717       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1718     return;
1719 
1720   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1721      and see if we can refine the range.  Some of the cases below may not
1722      happen, but it doesn't seem worth worrying about this.  We "continue"
1723      the outer loop when we've changed something; otherwise we "break"
1724      the switch, which will "break" the while.  */
1725   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1726   high = low;
1727   in_p = 0;
1728   strict_overflow_p = false;
1729   is_bool = false;
1730   if (exp == NULL_TREE)
1731     is_bool = true;
1732   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1733     {
1734       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1735 	is_bool = true;
1736       else
1737 	return;
1738     }
1739   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1740     is_bool = true;
1741 
1742   while (1)
1743     {
1744       enum tree_code code;
1745       tree arg0, arg1, exp_type;
1746       tree nexp;
1747       location_t loc;
1748 
1749       if (exp != NULL_TREE)
1750 	{
1751 	  if (TREE_CODE (exp) != SSA_NAME)
1752 	    break;
1753 
1754 	  stmt = SSA_NAME_DEF_STMT (exp);
1755 	  if (!is_gimple_assign (stmt))
1756 	    break;
1757 
1758 	  code = gimple_assign_rhs_code (stmt);
1759 	  arg0 = gimple_assign_rhs1 (stmt);
1760 	  arg1 = gimple_assign_rhs2 (stmt);
1761 	  exp_type = TREE_TYPE (exp);
1762 	}
1763       else
1764 	{
1765 	  code = gimple_cond_code (stmt);
1766 	  arg0 = gimple_cond_lhs (stmt);
1767 	  arg1 = gimple_cond_rhs (stmt);
1768 	  exp_type = boolean_type_node;
1769 	}
1770 
1771       if (TREE_CODE (arg0) != SSA_NAME)
1772 	break;
1773       loc = gimple_location (stmt);
1774       switch (code)
1775 	{
1776 	case BIT_NOT_EXPR:
1777 	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1778 	    {
1779 	      in_p = !in_p;
1780 	      exp = arg0;
1781 	      continue;
1782 	    }
1783 	  break;
1784 	case SSA_NAME:
1785 	  exp = arg0;
1786 	  continue;
1787 	CASE_CONVERT:
1788 	  if (is_bool)
1789 	    goto do_default;
1790 	  if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1791 	    {
1792 	      if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1793 		is_bool = true;
1794 	      else
1795 		return;
1796 	    }
1797 	  else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1798 	    is_bool = true;
1799 	  goto do_default;
1800 	case EQ_EXPR:
1801 	case NE_EXPR:
1802 	case LT_EXPR:
1803 	case LE_EXPR:
1804 	case GE_EXPR:
1805 	case GT_EXPR:
1806 	  is_bool = true;
1807 	  /* FALLTHRU */
1808 	default:
1809 	  if (!is_bool)
1810 	    return;
1811 	do_default:
1812 	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1813 				  &low, &high, &in_p,
1814 				  &strict_overflow_p);
1815 	  if (nexp != NULL_TREE)
1816 	    {
1817 	      exp = nexp;
1818 	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
1819 	      continue;
1820 	    }
1821 	  break;
1822 	}
1823       break;
1824     }
1825   if (is_bool)
1826     {
1827       r->exp = exp;
1828       r->in_p = in_p;
1829       r->low = low;
1830       r->high = high;
1831       r->strict_overflow_p = strict_overflow_p;
1832     }
1833 }
1834 
1835 /* Comparison function for qsort.  Sort entries
1836    without SSA_NAME exp first, then with SSA_NAMEs sorted
1837    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1838    by increasing ->low and if ->low is the same, by increasing
1839    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1840    maximum.  */
1841 
1842 static int
range_entry_cmp(const void * a,const void * b)1843 range_entry_cmp (const void *a, const void *b)
1844 {
1845   const struct range_entry *p = (const struct range_entry *) a;
1846   const struct range_entry *q = (const struct range_entry *) b;
1847 
1848   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1849     {
1850       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1851 	{
1852 	  /* Group range_entries for the same SSA_NAME together.  */
1853 	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1854 	    return -1;
1855 	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1856 	    return 1;
1857 	  /* If ->low is different, NULL low goes first, then by
1858 	     ascending low.  */
1859 	  if (p->low != NULL_TREE)
1860 	    {
1861 	      if (q->low != NULL_TREE)
1862 		{
1863 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
1864 					  p->low, q->low);
1865 		  if (tem && integer_onep (tem))
1866 		    return -1;
1867 		  tem = fold_binary (GT_EXPR, boolean_type_node,
1868 				     p->low, q->low);
1869 		  if (tem && integer_onep (tem))
1870 		    return 1;
1871 		}
1872 	      else
1873 		return 1;
1874 	    }
1875 	  else if (q->low != NULL_TREE)
1876 	    return -1;
1877 	  /* If ->high is different, NULL high goes last, before that by
1878 	     ascending high.  */
1879 	  if (p->high != NULL_TREE)
1880 	    {
1881 	      if (q->high != NULL_TREE)
1882 		{
1883 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
1884 					  p->high, q->high);
1885 		  if (tem && integer_onep (tem))
1886 		    return -1;
1887 		  tem = fold_binary (GT_EXPR, boolean_type_node,
1888 				     p->high, q->high);
1889 		  if (tem && integer_onep (tem))
1890 		    return 1;
1891 		}
1892 	      else
1893 		return -1;
1894 	    }
1895 	  else if (p->high != NULL_TREE)
1896 	    return 1;
1897 	  /* If both ranges are the same, sort below by ascending idx.  */
1898 	}
1899       else
1900 	return 1;
1901     }
1902   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1903     return -1;
1904 
1905   if (p->idx < q->idx)
1906     return -1;
1907   else
1908     {
1909       gcc_checking_assert (p->idx > q->idx);
1910       return 1;
1911     }
1912 }
1913 
1914 /* Helper routine of optimize_range_test.
1915    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1916    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1917    OPCODE and OPS are arguments of optimize_range_tests.  Return
1918    true if the range merge has been successful.
1919    If OPCODE is ERROR_MARK, this is called from within
1920    maybe_optimize_range_tests and is performing inter-bb range optimization.
1921    Changes should be then performed right away, and whether an op is
1922    BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank.  */
1923 
1924 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,unsigned int count,enum tree_code opcode,vec<operand_entry_t> * ops,tree exp,bool in_p,tree low,tree high,bool strict_overflow_p)1925 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1926 		   unsigned int count, enum tree_code opcode,
1927 		   vec<operand_entry_t> *ops, tree exp, bool in_p,
1928 		   tree low, tree high, bool strict_overflow_p)
1929 {
1930   operand_entry_t oe = (*ops)[range->idx];
1931   tree op = oe->op;
1932   gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1933   location_t loc = gimple_location (stmt);
1934   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1935   tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1936   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1937   gimple_stmt_iterator gsi;
1938 
1939   if (tem == NULL_TREE)
1940     return false;
1941 
1942   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1943     warning_at (loc, OPT_Wstrict_overflow,
1944 		"assuming signed overflow does not occur "
1945 		"when simplifying range test");
1946 
1947   if (dump_file && (dump_flags & TDF_DETAILS))
1948     {
1949       struct range_entry *r;
1950       fprintf (dump_file, "Optimizing range tests ");
1951       print_generic_expr (dump_file, range->exp, 0);
1952       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1953       print_generic_expr (dump_file, range->low, 0);
1954       fprintf (dump_file, ", ");
1955       print_generic_expr (dump_file, range->high, 0);
1956       fprintf (dump_file, "]");
1957       for (r = otherrange; r < otherrange + count; r++)
1958 	{
1959 	  fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1960 	  print_generic_expr (dump_file, r->low, 0);
1961 	  fprintf (dump_file, ", ");
1962 	  print_generic_expr (dump_file, r->high, 0);
1963 	  fprintf (dump_file, "]");
1964 	}
1965       fprintf (dump_file, "\n into ");
1966       print_generic_expr (dump_file, tem, 0);
1967       fprintf (dump_file, "\n");
1968     }
1969 
1970   if (opcode == BIT_IOR_EXPR
1971       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1972     tem = invert_truthvalue_loc (loc, tem);
1973 
1974   tem = fold_convert_loc (loc, optype, tem);
1975   gsi = gsi_for_stmt (stmt);
1976   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1977 				  GSI_SAME_STMT);
1978 
1979   /* If doing inter-bb range test optimization, update the
1980      stmts immediately.  Start with changing the first range test
1981      immediate use to the new value (TEM), or, if the first range
1982      test is a GIMPLE_COND stmt, change that condition.  */
1983   if (opcode == ERROR_MARK)
1984     {
1985       if (op)
1986 	{
1987 	  imm_use_iterator iter;
1988 	  use_operand_p use_p;
1989 	  gimple use_stmt;
1990 
1991 	  FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
1992 	    {
1993 	      if (is_gimple_debug (use_stmt))
1994 		continue;
1995 	      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1996 		SET_USE (use_p, tem);
1997 	      update_stmt (use_stmt);
1998 	    }
1999 	}
2000       else
2001 	{
2002 	  gimple_cond_set_code (stmt, NE_EXPR);
2003 	  gimple_cond_set_lhs (stmt, tem);
2004 	  gimple_cond_set_rhs (stmt, boolean_false_node);
2005 	  update_stmt (stmt);
2006 	}
2007     }
2008   oe->op = tem;
2009   range->exp = exp;
2010   range->low = low;
2011   range->high = high;
2012   range->in_p = in_p;
2013   range->strict_overflow_p = false;
2014 
2015   for (range = otherrange; range < otherrange + count; range++)
2016     {
2017       oe = (*ops)[range->idx];
2018       /* Now change all the other range test immediate uses, so that
2019 	 those tests will be optimized away.  */
2020       if (opcode == ERROR_MARK)
2021 	{
2022 	  if (oe->op)
2023 	    {
2024 	      imm_use_iterator iter;
2025 	      use_operand_p use_p;
2026 	      gimple use_stmt;
2027 
2028 	      FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2029 		{
2030 		  if (is_gimple_debug (use_stmt))
2031 		    continue;
2032 		  /* If imm use of _8 is a statement like _7 = _8 | _9;,
2033 		     adjust it into _7 = _9;.  */
2034 		  if (is_gimple_assign (use_stmt)
2035 		      && gimple_assign_rhs_code (use_stmt) == oe->rank)
2036 		    {
2037 		      tree expr = NULL_TREE;
2038 		      if (oe->op == gimple_assign_rhs1 (use_stmt))
2039 			expr = gimple_assign_rhs2 (use_stmt);
2040 		      else if (oe->op == gimple_assign_rhs2 (use_stmt))
2041 			expr = gimple_assign_rhs1 (use_stmt);
2042 		      if (expr
2043 			  && expr != oe->op
2044 			  && TREE_CODE (expr) == SSA_NAME)
2045 			{
2046 			  gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2047 			  gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2048 							  expr, NULL_TREE);
2049 			  update_stmt (use_stmt);
2050 			  continue;
2051 			}
2052 		    }
2053 		  /* If imm use of _8 is a statement like _7 = (int) _8;,
2054 		     adjust it into _7 = 0; or _7 = 1;.  */
2055 		  if (gimple_assign_cast_p (use_stmt)
2056 		      && oe->op == gimple_assign_rhs1 (use_stmt))
2057 		    {
2058 		      tree lhs = gimple_assign_lhs (use_stmt);
2059 		      if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2060 			{
2061 			  gimple_stmt_iterator gsi2
2062 			    = gsi_for_stmt (use_stmt);
2063 			  tree expr = build_int_cst (TREE_TYPE (lhs),
2064 						     oe->rank == BIT_IOR_EXPR
2065 						     ? 0 : 1);
2066 			  gimple_assign_set_rhs_with_ops (&gsi2,
2067 							  INTEGER_CST,
2068 							  expr, NULL_TREE);
2069 			  update_stmt (use_stmt);
2070 			  continue;
2071 			}
2072 		    }
2073 		  /* Otherwise replace the use with 0 or 1.  */
2074 		  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2075 		    SET_USE (use_p,
2076 			     build_int_cst (TREE_TYPE (oe->op),
2077 					    oe->rank == BIT_IOR_EXPR
2078 					    ? 0 : 1));
2079 		  update_stmt (use_stmt);
2080 		}
2081 	    }
2082 	  else
2083 	    {
2084 	      /* If range test was a GIMPLE_COND, simply change it
2085 		 into an always false or always true condition.  */
2086 	      stmt = last_stmt (BASIC_BLOCK (oe->id));
2087 	      if (oe->rank == BIT_IOR_EXPR)
2088 		gimple_cond_make_false (stmt);
2089 	      else
2090 		gimple_cond_make_true (stmt);
2091 	      update_stmt (stmt);
2092 	    }
2093 	}
2094       oe->op = error_mark_node;
2095       range->exp = NULL_TREE;
2096     }
2097   return true;
2098 }
2099 
2100 /* Optimize range tests, similarly how fold_range_test optimizes
2101    it on trees.  The tree code for the binary
2102    operation between all the operands is OPCODE.
2103    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2104    maybe_optimize_range_tests for inter-bb range optimization.
2105    In that case if oe->op is NULL, oe->id is bb->index whose
2106    GIMPLE_COND is && or ||ed into the test, and oe->rank says
2107    the actual opcode.  */
2108 
2109 static void
optimize_range_tests(enum tree_code opcode,vec<operand_entry_t> * ops)2110 optimize_range_tests (enum tree_code opcode,
2111 		      vec<operand_entry_t> *ops)
2112 {
2113   unsigned int length = ops->length (), i, j, first;
2114   operand_entry_t oe;
2115   struct range_entry *ranges;
2116   bool any_changes = false;
2117 
2118   if (length == 1)
2119     return;
2120 
2121   ranges = XNEWVEC (struct range_entry, length);
2122   for (i = 0; i < length; i++)
2123     {
2124       oe = (*ops)[i];
2125       ranges[i].idx = i;
2126       init_range_entry (ranges + i, oe->op,
2127 			oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2128       /* For | invert it now, we will invert it again before emitting
2129 	 the optimized expression.  */
2130       if (opcode == BIT_IOR_EXPR
2131 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2132 	ranges[i].in_p = !ranges[i].in_p;
2133     }
2134 
2135   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2136   for (i = 0; i < length; i++)
2137     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2138       break;
2139 
2140   /* Try to merge ranges.  */
2141   for (first = i; i < length; i++)
2142     {
2143       tree low = ranges[i].low;
2144       tree high = ranges[i].high;
2145       int in_p = ranges[i].in_p;
2146       bool strict_overflow_p = ranges[i].strict_overflow_p;
2147       int update_fail_count = 0;
2148 
2149       for (j = i + 1; j < length; j++)
2150 	{
2151 	  if (ranges[i].exp != ranges[j].exp)
2152 	    break;
2153 	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2154 			     ranges[j].in_p, ranges[j].low, ranges[j].high))
2155 	    break;
2156 	  strict_overflow_p |= ranges[j].strict_overflow_p;
2157 	}
2158 
2159       if (j == i + 1)
2160 	continue;
2161 
2162       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2163 			     ops, ranges[i].exp, in_p, low, high,
2164 			     strict_overflow_p))
2165 	{
2166 	  i = j - 1;
2167 	  any_changes = true;
2168 	}
2169       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2170 	 while update_range_test would fail.  */
2171       else if (update_fail_count == 64)
2172 	i = j - 1;
2173       else
2174 	++update_fail_count;
2175     }
2176 
2177   /* Optimize X == CST1 || X == CST2
2178      if popcount (CST1 ^ CST2) == 1 into
2179      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2180      Similarly for ranges.  E.g.
2181      X != 2 && X != 3 && X != 10 && X != 11
2182      will be transformed by the above loop into
2183      (X - 2U) <= 1U && (X - 10U) <= 1U
2184      and this loop can transform that into
2185      ((X & ~8) - 2U) <= 1U.  */
2186   for (i = first; i < length; i++)
2187     {
2188       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2189 
2190       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2191 	continue;
2192       type = TREE_TYPE (ranges[i].exp);
2193       if (!INTEGRAL_TYPE_P (type))
2194 	continue;
2195       lowi = ranges[i].low;
2196       if (lowi == NULL_TREE)
2197 	lowi = TYPE_MIN_VALUE (type);
2198       highi = ranges[i].high;
2199       if (highi == NULL_TREE)
2200 	continue;
2201       for (j = i + 1; j < length && j < i + 64; j++)
2202 	{
2203 	  if (ranges[j].exp == NULL_TREE)
2204 	    continue;
2205 	  if (ranges[i].exp != ranges[j].exp)
2206 	    break;
2207 	  if (ranges[j].in_p)
2208 	    continue;
2209 	  lowj = ranges[j].low;
2210 	  if (lowj == NULL_TREE)
2211 	    continue;
2212 	  highj = ranges[j].high;
2213 	  if (highj == NULL_TREE)
2214 	    highj = TYPE_MAX_VALUE (type);
2215 	  tem = fold_binary (GT_EXPR, boolean_type_node,
2216 			     lowj, highi);
2217 	  if (tem == NULL_TREE || !integer_onep (tem))
2218 	    continue;
2219 	  lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2220 	  if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2221 	    continue;
2222 	  gcc_checking_assert (!integer_zerop (lowxor));
2223 	  tem = fold_binary (MINUS_EXPR, type, lowxor,
2224 			     build_int_cst (type, 1));
2225 	  if (tem == NULL_TREE)
2226 	    continue;
2227 	  tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2228 	  if (tem == NULL_TREE || !integer_zerop (tem))
2229 	    continue;
2230 	  highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2231 	  if (!tree_int_cst_equal (lowxor, highxor))
2232 	    continue;
2233 	  tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2234 	  exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2235 	  lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2236 	  highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2237 	  if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2238 				 ranges[i].in_p, lowj, highj,
2239 				 ranges[i].strict_overflow_p
2240 				 || ranges[j].strict_overflow_p))
2241 	    {
2242 	      any_changes = true;
2243 	      break;
2244 	    }
2245 	}
2246     }
2247 
2248   if (any_changes && opcode != ERROR_MARK)
2249     {
2250       j = 0;
2251       FOR_EACH_VEC_ELT (*ops, i, oe)
2252 	{
2253 	  if (oe->op == error_mark_node)
2254 	    continue;
2255 	  else if (i != j)
2256 	    (*ops)[j] = oe;
2257 	  j++;
2258 	}
2259       ops->truncate (j);
2260     }
2261 
2262   XDELETEVEC (ranges);
2263 }
2264 
2265 /* Return true if STMT is a cast like:
2266    <bb N>:
2267    ...
2268    _123 = (int) _234;
2269 
2270    <bb M>:
2271    # _345 = PHI <_123(N), 1(...), 1(...)>
2272    where _234 has bool type, _123 has single use and
2273    bb N has a single successor M.  This is commonly used in
2274    the last block of a range test.  */
2275 
2276 static bool
final_range_test_p(gimple stmt)2277 final_range_test_p (gimple stmt)
2278 {
2279   basic_block bb, rhs_bb;
2280   edge e;
2281   tree lhs, rhs;
2282   use_operand_p use_p;
2283   gimple use_stmt;
2284 
2285   if (!gimple_assign_cast_p (stmt))
2286     return false;
2287   bb = gimple_bb (stmt);
2288   if (!single_succ_p (bb))
2289     return false;
2290   e = single_succ_edge (bb);
2291   if (e->flags & EDGE_COMPLEX)
2292     return false;
2293 
2294   lhs = gimple_assign_lhs (stmt);
2295   rhs = gimple_assign_rhs1 (stmt);
2296   if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2297       || TREE_CODE (rhs) != SSA_NAME
2298       || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2299     return false;
2300 
2301   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
2302   if (!single_imm_use (lhs, &use_p, &use_stmt))
2303     return false;
2304 
2305   if (gimple_code (use_stmt) != GIMPLE_PHI
2306       || gimple_bb (use_stmt) != e->dest)
2307     return false;
2308 
2309   /* And that the rhs is defined in the same loop.  */
2310   rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2311   if (rhs_bb == NULL
2312       || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2313     return false;
2314 
2315   return true;
2316 }
2317 
2318 /* Return true if BB is suitable basic block for inter-bb range test
2319    optimization.  If BACKWARD is true, BB should be the only predecessor
2320    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2321    or compared with to find a common basic block to which all conditions
2322    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
2323    be the only predecessor of BB.  */
2324 
2325 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool backward)2326 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2327 		  bool backward)
2328 {
2329   edge_iterator ei, ei2;
2330   edge e, e2;
2331   gimple stmt;
2332   gimple_stmt_iterator gsi;
2333   bool other_edge_seen = false;
2334   bool is_cond;
2335 
2336   if (test_bb == bb)
2337     return false;
2338   /* Check last stmt first.  */
2339   stmt = last_stmt (bb);
2340   if (stmt == NULL
2341       || (gimple_code (stmt) != GIMPLE_COND
2342 	  && (backward || !final_range_test_p (stmt)))
2343       || gimple_visited_p (stmt)
2344       || stmt_could_throw_p (stmt)
2345       || *other_bb == bb)
2346     return false;
2347   is_cond = gimple_code (stmt) == GIMPLE_COND;
2348   if (is_cond)
2349     {
2350       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2351 	 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2352 	 to *OTHER_BB (if not set yet, try to find it out).  */
2353       if (EDGE_COUNT (bb->succs) != 2)
2354 	return false;
2355       FOR_EACH_EDGE (e, ei, bb->succs)
2356 	{
2357 	  if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2358 	    return false;
2359 	  if (e->dest == test_bb)
2360 	    {
2361 	      if (backward)
2362 		continue;
2363 	      else
2364 		return false;
2365 	    }
2366 	  if (e->dest == bb)
2367 	    return false;
2368 	  if (*other_bb == NULL)
2369 	    {
2370 	      FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2371 		if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2372 		  return false;
2373 		else if (e->dest == e2->dest)
2374 		  *other_bb = e->dest;
2375 	      if (*other_bb == NULL)
2376 		return false;
2377 	    }
2378 	  if (e->dest == *other_bb)
2379 	    other_edge_seen = true;
2380 	  else if (backward)
2381 	    return false;
2382 	}
2383       if (*other_bb == NULL || !other_edge_seen)
2384 	return false;
2385     }
2386   else if (single_succ (bb) != *other_bb)
2387     return false;
2388 
2389   /* Now check all PHIs of *OTHER_BB.  */
2390   e = find_edge (bb, *other_bb);
2391   e2 = find_edge (test_bb, *other_bb);
2392   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2393     {
2394       gimple phi = gsi_stmt (gsi);
2395       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2396 	 corresponding to BB and TEST_BB predecessor must be the same.  */
2397       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2398 			    gimple_phi_arg_def (phi, e2->dest_idx), 0))
2399 	{
2400 	  /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2401 	     one of the PHIs should have the lhs of the last stmt in
2402 	     that block as PHI arg and that PHI should have 0 or 1
2403 	     corresponding to it in all other range test basic blocks
2404 	     considered.  */
2405 	  if (!is_cond)
2406 	    {
2407 	      if (gimple_phi_arg_def (phi, e->dest_idx)
2408 		  == gimple_assign_lhs (stmt)
2409 		  && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2410 		      || integer_onep (gimple_phi_arg_def (phi,
2411 							   e2->dest_idx))))
2412 		continue;
2413 	    }
2414 	  else
2415 	    {
2416 	      gimple test_last = last_stmt (test_bb);
2417 	      if (gimple_code (test_last) != GIMPLE_COND
2418 		  && gimple_phi_arg_def (phi, e2->dest_idx)
2419 		     == gimple_assign_lhs (test_last)
2420 		  && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2421 		      || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2422 		continue;
2423 	    }
2424 
2425 	  return false;
2426 	}
2427     }
2428   return true;
2429 }
2430 
2431 /* Return true if BB doesn't have side-effects that would disallow
2432    range test optimization, all SSA_NAMEs set in the bb are consumed
2433    in the bb and there are no PHIs.  */
2434 
2435 static bool
no_side_effect_bb(basic_block bb)2436 no_side_effect_bb (basic_block bb)
2437 {
2438   gimple_stmt_iterator gsi;
2439   gimple last;
2440 
2441   if (!gimple_seq_empty_p (phi_nodes (bb)))
2442     return false;
2443   last = last_stmt (bb);
2444   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2445     {
2446       gimple stmt = gsi_stmt (gsi);
2447       tree lhs;
2448       imm_use_iterator imm_iter;
2449       use_operand_p use_p;
2450 
2451       if (is_gimple_debug (stmt))
2452 	continue;
2453       if (gimple_has_side_effects (stmt))
2454 	return false;
2455       if (stmt == last)
2456 	return true;
2457       if (!is_gimple_assign (stmt))
2458 	return false;
2459       lhs = gimple_assign_lhs (stmt);
2460       if (TREE_CODE (lhs) != SSA_NAME)
2461 	return false;
2462       if (gimple_assign_rhs_could_trap_p (stmt))
2463 	return false;
2464       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2465 	{
2466 	  gimple use_stmt = USE_STMT (use_p);
2467 	  if (is_gimple_debug (use_stmt))
2468 	    continue;
2469 	  if (gimple_bb (use_stmt) != bb)
2470 	    return false;
2471 	}
2472     }
2473   return false;
2474 }
2475 
2476 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2477    return true and fill in *OPS recursively.  */
2478 
2479 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry_t> * ops,struct loop * loop)2480 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2481 	 struct loop *loop)
2482 {
2483   gimple stmt = SSA_NAME_DEF_STMT (var);
2484   tree rhs[2];
2485   int i;
2486 
2487   if (!is_reassociable_op (stmt, code, loop))
2488     return false;
2489 
2490   rhs[0] = gimple_assign_rhs1 (stmt);
2491   rhs[1] = gimple_assign_rhs2 (stmt);
2492   gimple_set_visited (stmt, true);
2493   for (i = 0; i < 2; i++)
2494     if (TREE_CODE (rhs[i]) == SSA_NAME
2495 	&& !get_ops (rhs[i], code, ops, loop)
2496 	&& has_single_use (rhs[i]))
2497       {
2498 	operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2499 
2500 	oe->op = rhs[i];
2501 	oe->rank = code;
2502 	oe->id = 0;
2503 	oe->count = 1;
2504 	ops->safe_push (oe);
2505       }
2506   return true;
2507 }
2508 
2509 /* Inter-bb range test optimization.  */
2510 
2511 static void
maybe_optimize_range_tests(gimple stmt)2512 maybe_optimize_range_tests (gimple stmt)
2513 {
2514   basic_block first_bb = gimple_bb (stmt);
2515   basic_block last_bb = first_bb;
2516   basic_block other_bb = NULL;
2517   basic_block bb;
2518   edge_iterator ei;
2519   edge e;
2520   vec<operand_entry_t> ops = vNULL;
2521 
2522   /* Consider only basic blocks that end with GIMPLE_COND or
2523      a cast statement satisfying final_range_test_p.  All
2524      but the last bb in the first_bb .. last_bb range
2525      should end with GIMPLE_COND.  */
2526   if (gimple_code (stmt) == GIMPLE_COND)
2527     {
2528       if (EDGE_COUNT (first_bb->succs) != 2)
2529 	return;
2530     }
2531   else if (final_range_test_p (stmt))
2532     other_bb = single_succ (first_bb);
2533   else
2534     return;
2535 
2536   if (stmt_could_throw_p (stmt))
2537     return;
2538 
2539   /* As relative ordering of post-dominator sons isn't fixed,
2540      maybe_optimize_range_tests can be called first on any
2541      bb in the range we want to optimize.  So, start searching
2542      backwards, if first_bb can be set to a predecessor.  */
2543   while (single_pred_p (first_bb))
2544     {
2545       basic_block pred_bb = single_pred (first_bb);
2546       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2547 	break;
2548       if (!no_side_effect_bb (first_bb))
2549 	break;
2550       first_bb = pred_bb;
2551     }
2552   /* If first_bb is last_bb, other_bb hasn't been computed yet.
2553      Before starting forward search in last_bb successors, find
2554      out the other_bb.  */
2555   if (first_bb == last_bb)
2556     {
2557       other_bb = NULL;
2558       /* As non-GIMPLE_COND last stmt always terminates the range,
2559 	 if forward search didn't discover anything, just give up.  */
2560       if (gimple_code (stmt) != GIMPLE_COND)
2561 	return;
2562       /* Look at both successors.  Either it ends with a GIMPLE_COND
2563 	 and satisfies suitable_cond_bb, or ends with a cast and
2564 	 other_bb is that cast's successor.  */
2565       FOR_EACH_EDGE (e, ei, first_bb->succs)
2566 	if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2567 	    || e->dest == first_bb)
2568 	  return;
2569 	else if (single_pred_p (e->dest))
2570 	  {
2571 	    stmt = last_stmt (e->dest);
2572 	    if (stmt
2573 		&& gimple_code (stmt) == GIMPLE_COND
2574 		&& EDGE_COUNT (e->dest->succs) == 2)
2575 	      {
2576 		if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2577 		  break;
2578 		else
2579 		  other_bb = NULL;
2580 	      }
2581 	    else if (stmt
2582 		     && final_range_test_p (stmt)
2583 		     && find_edge (first_bb, single_succ (e->dest)))
2584 	      {
2585 		other_bb = single_succ (e->dest);
2586 		if (other_bb == first_bb)
2587 		  other_bb = NULL;
2588 	      }
2589 	  }
2590       if (other_bb == NULL)
2591 	return;
2592     }
2593   /* Now do the forward search, moving last_bb to successor bbs
2594      that aren't other_bb.  */
2595   while (EDGE_COUNT (last_bb->succs) == 2)
2596     {
2597       FOR_EACH_EDGE (e, ei, last_bb->succs)
2598 	if (e->dest != other_bb)
2599 	  break;
2600       if (e == NULL)
2601 	break;
2602       if (!single_pred_p (e->dest))
2603 	break;
2604       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2605 	break;
2606       if (!no_side_effect_bb (e->dest))
2607 	break;
2608       last_bb = e->dest;
2609     }
2610   if (first_bb == last_bb)
2611     return;
2612   /* Here basic blocks first_bb through last_bb's predecessor
2613      end with GIMPLE_COND, all of them have one of the edges to
2614      other_bb and another to another block in the range,
2615      all blocks except first_bb don't have side-effects and
2616      last_bb ends with either GIMPLE_COND, or cast satisfying
2617      final_range_test_p.  */
2618   for (bb = last_bb; ; bb = single_pred (bb))
2619     {
2620       enum tree_code code;
2621       tree lhs, rhs;
2622 
2623       e = find_edge (bb, other_bb);
2624       stmt = last_stmt (bb);
2625       gimple_set_visited (stmt, true);
2626       if (gimple_code (stmt) != GIMPLE_COND)
2627 	{
2628 	  use_operand_p use_p;
2629 	  gimple phi;
2630 	  edge e2;
2631 	  unsigned int d;
2632 
2633 	  lhs = gimple_assign_lhs (stmt);
2634 	  rhs = gimple_assign_rhs1 (stmt);
2635 	  gcc_assert (bb == last_bb);
2636 
2637 	  /* stmt is
2638 	     _123 = (int) _234;
2639 
2640 	     followed by:
2641 	     <bb M>:
2642 	     # _345 = PHI <_123(N), 1(...), 1(...)>
2643 
2644 	     or 0 instead of 1.  If it is 0, the _234
2645 	     range test is anded together with all the
2646 	     other range tests, if it is 1, it is ored with
2647 	     them.  */
2648 	  single_imm_use (lhs, &use_p, &phi);
2649 	  gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2650 	  e2 = find_edge (first_bb, other_bb);
2651 	  d = e2->dest_idx;
2652 	  gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2653 	  if (integer_zerop (gimple_phi_arg_def (phi, d)))
2654 	    code = BIT_AND_EXPR;
2655 	  else
2656 	    {
2657 	      gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2658 	      code = BIT_IOR_EXPR;
2659 	    }
2660 
2661 	  /* If _234 SSA_NAME_DEF_STMT is
2662 	     _234 = _567 | _789;
2663 	     (or &, corresponding to 1/0 in the phi arguments,
2664 	     push into ops the individual range test arguments
2665 	     of the bitwise or resp. and, recursively.  */
2666 	  if (!get_ops (rhs, code, &ops,
2667 			loop_containing_stmt (stmt))
2668 	      && has_single_use (rhs))
2669 	    {
2670 	      /* Otherwise, push the _234 range test itself.  */
2671 	      operand_entry_t oe
2672 		= (operand_entry_t) pool_alloc (operand_entry_pool);
2673 
2674 	      oe->op = rhs;
2675 	      oe->rank = code;
2676 	      oe->id = 0;
2677 	      oe->count = 1;
2678 	      ops.safe_push (oe);
2679 	    }
2680 	  continue;
2681 	}
2682       /* Otherwise stmt is GIMPLE_COND.  */
2683       code = gimple_cond_code (stmt);
2684       lhs = gimple_cond_lhs (stmt);
2685       rhs = gimple_cond_rhs (stmt);
2686       if (TREE_CODE (lhs) == SSA_NAME
2687 	  && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2688 	  && ((code != EQ_EXPR && code != NE_EXPR)
2689 	      || rhs != boolean_false_node
2690 		 /* Either push into ops the individual bitwise
2691 		    or resp. and operands, depending on which
2692 		    edge is other_bb.  */
2693 	      || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2694 				 ^ (code == EQ_EXPR))
2695 				? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2696 			   loop_containing_stmt (stmt))))
2697 	{
2698 	  /* Or push the GIMPLE_COND stmt itself.  */
2699 	  operand_entry_t oe
2700 	    = (operand_entry_t) pool_alloc (operand_entry_pool);
2701 
2702 	  oe->op = NULL;
2703 	  oe->rank = (e->flags & EDGE_TRUE_VALUE)
2704 		     ? BIT_IOR_EXPR : BIT_AND_EXPR;
2705 	  /* oe->op = NULL signs that there is no SSA_NAME
2706 	     for the range test, and oe->id instead is the
2707 	     basic block number, at which's end the GIMPLE_COND
2708 	     is.  */
2709 	  oe->id = bb->index;
2710 	  oe->count = 1;
2711 	  ops.safe_push (oe);
2712 	}
2713       if (bb == first_bb)
2714 	break;
2715     }
2716   if (ops.length () > 1)
2717     optimize_range_tests (ERROR_MARK, &ops);
2718   ops.release ();
2719 }
2720 
2721 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2722    of STMT in it's operands.  This is also known as a "destructive
2723    update" operation.  */
2724 
2725 static bool
is_phi_for_stmt(gimple stmt,tree operand)2726 is_phi_for_stmt (gimple stmt, tree operand)
2727 {
2728   gimple def_stmt;
2729   tree lhs;
2730   use_operand_p arg_p;
2731   ssa_op_iter i;
2732 
2733   if (TREE_CODE (operand) != SSA_NAME)
2734     return false;
2735 
2736   lhs = gimple_assign_lhs (stmt);
2737 
2738   def_stmt = SSA_NAME_DEF_STMT (operand);
2739   if (gimple_code (def_stmt) != GIMPLE_PHI)
2740     return false;
2741 
2742   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2743     if (lhs == USE_FROM_PTR (arg_p))
2744       return true;
2745   return false;
2746 }
2747 
2748 /* Remove def stmt of VAR if VAR has zero uses and recurse
2749    on rhs1 operand if so.  */
2750 
2751 static void
remove_visited_stmt_chain(tree var)2752 remove_visited_stmt_chain (tree var)
2753 {
2754   gimple stmt;
2755   gimple_stmt_iterator gsi;
2756 
2757   while (1)
2758     {
2759       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2760 	return;
2761       stmt = SSA_NAME_DEF_STMT (var);
2762       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2763 	{
2764 	  var = gimple_assign_rhs1 (stmt);
2765 	  gsi = gsi_for_stmt (stmt);
2766 	  gsi_remove (&gsi, true);
2767 	  release_defs (stmt);
2768 	}
2769       else
2770 	return;
2771     }
2772 }
2773 
2774 /* This function checks three consequtive operands in
2775    passed operands vector OPS starting from OPINDEX and
2776    swaps two operands if it is profitable for binary operation
2777    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2778 
2779    We pair ops with the same rank if possible.
2780 
2781    The alternative we try is to see if STMT is a destructive
2782    update style statement, which is like:
2783    b = phi (a, ...)
2784    a = c + b;
2785    In that case, we want to use the destructive update form to
2786    expose the possible vectorizer sum reduction opportunity.
2787    In that case, the third operand will be the phi node. This
2788    check is not performed if STMT is null.
2789 
2790    We could, of course, try to be better as noted above, and do a
2791    lot of work to try to find these opportunities in >3 operand
2792    cases, but it is unlikely to be worth it.  */
2793 
2794 static void
swap_ops_for_binary_stmt(vec<operand_entry_t> ops,unsigned int opindex,gimple stmt)2795 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2796 			  unsigned int opindex, gimple stmt)
2797 {
2798   operand_entry_t oe1, oe2, oe3;
2799 
2800   oe1 = ops[opindex];
2801   oe2 = ops[opindex + 1];
2802   oe3 = ops[opindex + 2];
2803 
2804   if ((oe1->rank == oe2->rank
2805        && oe2->rank != oe3->rank)
2806       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2807 	  && !is_phi_for_stmt (stmt, oe1->op)
2808 	  && !is_phi_for_stmt (stmt, oe2->op)))
2809     {
2810       struct operand_entry temp = *oe3;
2811       oe3->op = oe1->op;
2812       oe3->rank = oe1->rank;
2813       oe1->op = temp.op;
2814       oe1->rank= temp.rank;
2815     }
2816   else if ((oe1->rank == oe3->rank
2817 	    && oe2->rank != oe3->rank)
2818 	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
2819 	       && !is_phi_for_stmt (stmt, oe1->op)
2820 	       && !is_phi_for_stmt (stmt, oe3->op)))
2821     {
2822       struct operand_entry temp = *oe2;
2823       oe2->op = oe1->op;
2824       oe2->rank = oe1->rank;
2825       oe1->op = temp.op;
2826       oe1->rank= temp.rank;
2827     }
2828 }
2829 
2830 /* Recursively rewrite our linearized statements so that the operators
2831    match those in OPS[OPINDEX], putting the computation in rank
2832    order.  */
2833 
2834 static void
rewrite_expr_tree(gimple stmt,unsigned int opindex,vec<operand_entry_t> ops,bool moved)2835 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2836 		   vec<operand_entry_t> ops, bool moved)
2837 {
2838   tree rhs1 = gimple_assign_rhs1 (stmt);
2839   tree rhs2 = gimple_assign_rhs2 (stmt);
2840   operand_entry_t oe;
2841 
2842   /* If we have three operands left, then we want to make sure the ones
2843      that get the double binary op are chosen wisely.  */
2844   if (opindex + 3 == ops.length ())
2845     swap_ops_for_binary_stmt (ops, opindex, stmt);
2846 
2847   /* The final recursion case for this function is that you have
2848      exactly two operations left.
2849      If we had one exactly one op in the entire list to start with, we
2850      would have never called this function, and the tail recursion
2851      rewrites them one at a time.  */
2852   if (opindex + 2 == ops.length ())
2853     {
2854       operand_entry_t oe1, oe2;
2855 
2856       oe1 = ops[opindex];
2857       oe2 = ops[opindex + 1];
2858 
2859       if (rhs1 != oe1->op || rhs2 != oe2->op)
2860 	{
2861 	  if (dump_file && (dump_flags & TDF_DETAILS))
2862 	    {
2863 	      fprintf (dump_file, "Transforming ");
2864 	      print_gimple_stmt (dump_file, stmt, 0, 0);
2865 	    }
2866 
2867 	  gimple_assign_set_rhs1 (stmt, oe1->op);
2868 	  gimple_assign_set_rhs2 (stmt, oe2->op);
2869 	  update_stmt (stmt);
2870 	  if (rhs1 != oe1->op && rhs1 != oe2->op)
2871 	    remove_visited_stmt_chain (rhs1);
2872 
2873 	  if (dump_file && (dump_flags & TDF_DETAILS))
2874 	    {
2875 	      fprintf (dump_file, " into ");
2876 	      print_gimple_stmt (dump_file, stmt, 0, 0);
2877 	    }
2878 	}
2879       return;
2880     }
2881 
2882   /* If we hit here, we should have 3 or more ops left.  */
2883   gcc_assert (opindex + 2 < ops.length ());
2884 
2885   /* Rewrite the next operator.  */
2886   oe = ops[opindex];
2887 
2888   if (oe->op != rhs2)
2889     {
2890       if (!moved)
2891 	{
2892 	  gimple_stmt_iterator gsinow, gsirhs1;
2893 	  gimple stmt1 = stmt, stmt2;
2894 	  unsigned int count;
2895 
2896 	  gsinow = gsi_for_stmt (stmt);
2897 	  count = ops.length () - opindex - 2;
2898 	  while (count-- != 0)
2899 	    {
2900 	      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2901 	      gsirhs1 = gsi_for_stmt (stmt2);
2902 	      gsi_move_before (&gsirhs1, &gsinow);
2903 	      gsi_prev (&gsinow);
2904 	      stmt1 = stmt2;
2905 	    }
2906 	  moved = true;
2907 	}
2908 
2909       if (dump_file && (dump_flags & TDF_DETAILS))
2910 	{
2911 	  fprintf (dump_file, "Transforming ");
2912 	  print_gimple_stmt (dump_file, stmt, 0, 0);
2913 	}
2914 
2915       gimple_assign_set_rhs2 (stmt, oe->op);
2916       update_stmt (stmt);
2917 
2918       if (dump_file && (dump_flags & TDF_DETAILS))
2919 	{
2920 	  fprintf (dump_file, " into ");
2921 	  print_gimple_stmt (dump_file, stmt, 0, 0);
2922 	}
2923     }
2924   /* Recurse on the LHS of the binary operator, which is guaranteed to
2925      be the non-leaf side.  */
2926   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2927 }
2928 
2929 /* Find out how many cycles we need to compute statements chain.
2930    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2931    maximum number of independent statements we may execute per cycle.  */
2932 
2933 static int
get_required_cycles(int ops_num,int cpu_width)2934 get_required_cycles (int ops_num, int cpu_width)
2935 {
2936   int res;
2937   int elog;
2938   unsigned int rest;
2939 
2940   /* While we have more than 2 * cpu_width operands
2941      we may reduce number of operands by cpu_width
2942      per cycle.  */
2943   res = ops_num / (2 * cpu_width);
2944 
2945   /* Remained operands count may be reduced twice per cycle
2946      until we have only one operand.  */
2947   rest = (unsigned)(ops_num - res * cpu_width);
2948   elog = exact_log2 (rest);
2949   if (elog >= 0)
2950     res += elog;
2951   else
2952     res += floor_log2 (rest) + 1;
2953 
2954   return res;
2955 }
2956 
2957 /* Returns an optimal number of registers to use for computation of
2958    given statements.  */
2959 
2960 static int
get_reassociation_width(int ops_num,enum tree_code opc,enum machine_mode mode)2961 get_reassociation_width (int ops_num, enum tree_code opc,
2962 			 enum machine_mode mode)
2963 {
2964   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2965   int width;
2966   int width_min;
2967   int cycles_best;
2968 
2969   if (param_width > 0)
2970     width = param_width;
2971   else
2972     width = targetm.sched.reassociation_width (opc, mode);
2973 
2974   if (width == 1)
2975     return width;
2976 
2977   /* Get the minimal time required for sequence computation.  */
2978   cycles_best = get_required_cycles (ops_num, width);
2979 
2980   /* Check if we may use less width and still compute sequence for
2981      the same time.  It will allow us to reduce registers usage.
2982      get_required_cycles is monotonically increasing with lower width
2983      so we can perform a binary search for the minimal width that still
2984      results in the optimal cycle count.  */
2985   width_min = 1;
2986   while (width > width_min)
2987     {
2988       int width_mid = (width + width_min) / 2;
2989 
2990       if (get_required_cycles (ops_num, width_mid) == cycles_best)
2991 	width = width_mid;
2992       else if (width_min < width_mid)
2993 	width_min = width_mid;
2994       else
2995 	break;
2996     }
2997 
2998   return width;
2999 }
3000 
3001 /* Recursively rewrite our linearized statements so that the operators
3002    match those in OPS[OPINDEX], putting the computation in rank
3003    order and trying to allow operations to be executed in
3004    parallel.  */
3005 
3006 static void
rewrite_expr_tree_parallel(gimple stmt,int width,vec<operand_entry_t> ops)3007 rewrite_expr_tree_parallel (gimple stmt, int width,
3008 			    vec<operand_entry_t>  ops)
3009 {
3010   enum tree_code opcode = gimple_assign_rhs_code (stmt);
3011   int op_num = ops.length ();
3012   int stmt_num = op_num - 1;
3013   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3014   int op_index = op_num - 1;
3015   int stmt_index = 0;
3016   int ready_stmts_end = 0;
3017   int i = 0;
3018   tree last_rhs1 = gimple_assign_rhs1 (stmt);
3019 
3020   /* We start expression rewriting from the top statements.
3021      So, in this loop we create a full list of statements
3022      we will work with.  */
3023   stmts[stmt_num - 1] = stmt;
3024   for (i = stmt_num - 2; i >= 0; i--)
3025     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3026 
3027   for (i = 0; i < stmt_num; i++)
3028     {
3029       tree op1, op2;
3030 
3031       /* Determine whether we should use results of
3032 	 already handled statements or not.  */
3033       if (ready_stmts_end == 0
3034 	  && (i - stmt_index >= width || op_index < 1))
3035 	ready_stmts_end = i;
3036 
3037       /* Now we choose operands for the next statement.  Non zero
3038 	 value in ready_stmts_end means here that we should use
3039 	 the result of already generated statements as new operand.  */
3040       if (ready_stmts_end > 0)
3041 	{
3042 	  op1 = gimple_assign_lhs (stmts[stmt_index++]);
3043 	  if (ready_stmts_end > stmt_index)
3044 	    op2 = gimple_assign_lhs (stmts[stmt_index++]);
3045 	  else if (op_index >= 0)
3046 	    op2 = ops[op_index--]->op;
3047 	  else
3048 	    {
3049 	      gcc_assert (stmt_index < i);
3050 	      op2 = gimple_assign_lhs (stmts[stmt_index++]);
3051 	    }
3052 
3053 	  if (stmt_index >= ready_stmts_end)
3054 	    ready_stmts_end = 0;
3055 	}
3056       else
3057 	{
3058 	  if (op_index > 1)
3059 	    swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3060 	  op2 = ops[op_index--]->op;
3061 	  op1 = ops[op_index--]->op;
3062 	}
3063 
3064       /* If we emit the last statement then we should put
3065 	 operands into the last statement.  It will also
3066 	 break the loop.  */
3067       if (op_index < 0 && stmt_index == i)
3068 	i = stmt_num - 1;
3069 
3070       if (dump_file && (dump_flags & TDF_DETAILS))
3071 	{
3072 	  fprintf (dump_file, "Transforming ");
3073 	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
3074 	}
3075 
3076       /* We keep original statement only for the last one.  All
3077 	 others are recreated.  */
3078       if (i == stmt_num - 1)
3079 	{
3080 	  gimple_assign_set_rhs1 (stmts[i], op1);
3081 	  gimple_assign_set_rhs2 (stmts[i], op2);
3082 	  update_stmt (stmts[i]);
3083 	}
3084       else
3085 	stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3086 
3087       if (dump_file && (dump_flags & TDF_DETAILS))
3088 	{
3089 	  fprintf (dump_file, " into ");
3090 	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
3091 	}
3092     }
3093 
3094   remove_visited_stmt_chain (last_rhs1);
3095 }
3096 
3097 /* Transform STMT, which is really (A +B) + (C + D) into the left
3098    linear form, ((A+B)+C)+D.
3099    Recurse on D if necessary.  */
3100 
3101 static void
linearize_expr(gimple stmt)3102 linearize_expr (gimple stmt)
3103 {
3104   gimple_stmt_iterator gsinow, gsirhs;
3105   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3106   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3107   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3108   gimple newbinrhs = NULL;
3109   struct loop *loop = loop_containing_stmt (stmt);
3110 
3111   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3112 	      && is_reassociable_op (binrhs, rhscode, loop));
3113 
3114   gsinow = gsi_for_stmt (stmt);
3115   gsirhs = gsi_for_stmt (binrhs);
3116   gsi_move_before (&gsirhs, &gsinow);
3117 
3118   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3119   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3120   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3121 
3122   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3123     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3124 
3125   if (dump_file && (dump_flags & TDF_DETAILS))
3126     {
3127       fprintf (dump_file, "Linearized: ");
3128       print_gimple_stmt (dump_file, stmt, 0, 0);
3129     }
3130 
3131   reassociate_stats.linearized++;
3132   update_stmt (binrhs);
3133   update_stmt (binlhs);
3134   update_stmt (stmt);
3135 
3136   gimple_set_visited (stmt, true);
3137   gimple_set_visited (binlhs, true);
3138   gimple_set_visited (binrhs, true);
3139 
3140   /* Tail recurse on the new rhs if it still needs reassociation.  */
3141   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3142     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3143 	   want to change the algorithm while converting to tuples.  */
3144     linearize_expr (stmt);
3145 }
3146 
3147 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3148    it.  Otherwise, return NULL.  */
3149 
3150 static gimple
get_single_immediate_use(tree lhs)3151 get_single_immediate_use (tree lhs)
3152 {
3153   use_operand_p immuse;
3154   gimple immusestmt;
3155 
3156   if (TREE_CODE (lhs) == SSA_NAME
3157       && single_imm_use (lhs, &immuse, &immusestmt)
3158       && is_gimple_assign (immusestmt))
3159     return immusestmt;
3160 
3161   return NULL;
3162 }
3163 
3164 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3165    representing the negated value.  Insertions of any necessary
3166    instructions go before GSI.
3167    This function is recursive in that, if you hand it "a_5" as the
3168    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3169    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
3170 
3171 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsi)3172 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3173 {
3174   gimple negatedefstmt= NULL;
3175   tree resultofnegate;
3176 
3177   /* If we are trying to negate a name, defined by an add, negate the
3178      add operands instead.  */
3179   if (TREE_CODE (tonegate) == SSA_NAME)
3180     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3181   if (TREE_CODE (tonegate) == SSA_NAME
3182       && is_gimple_assign (negatedefstmt)
3183       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3184       && has_single_use (gimple_assign_lhs (negatedefstmt))
3185       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3186     {
3187       gimple_stmt_iterator gsi;
3188       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3189       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3190 
3191       gsi = gsi_for_stmt (negatedefstmt);
3192       rhs1 = negate_value (rhs1, &gsi);
3193       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3194 
3195       gsi = gsi_for_stmt (negatedefstmt);
3196       rhs2 = negate_value (rhs2, &gsi);
3197       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3198 
3199       update_stmt (negatedefstmt);
3200       return gimple_assign_lhs (negatedefstmt);
3201     }
3202 
3203   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3204   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3205 					     NULL_TREE, true, GSI_SAME_STMT);
3206   return resultofnegate;
3207 }
3208 
3209 /* Return true if we should break up the subtract in STMT into an add
3210    with negate.  This is true when we the subtract operands are really
3211    adds, or the subtract itself is used in an add expression.  In
3212    either case, breaking up the subtract into an add with negate
3213    exposes the adds to reassociation.  */
3214 
3215 static bool
should_break_up_subtract(gimple stmt)3216 should_break_up_subtract (gimple stmt)
3217 {
3218   tree lhs = gimple_assign_lhs (stmt);
3219   tree binlhs = gimple_assign_rhs1 (stmt);
3220   tree binrhs = gimple_assign_rhs2 (stmt);
3221   gimple immusestmt;
3222   struct loop *loop = loop_containing_stmt (stmt);
3223 
3224   if (TREE_CODE (binlhs) == SSA_NAME
3225       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3226     return true;
3227 
3228   if (TREE_CODE (binrhs) == SSA_NAME
3229       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3230     return true;
3231 
3232   if (TREE_CODE (lhs) == SSA_NAME
3233       && (immusestmt = get_single_immediate_use (lhs))
3234       && is_gimple_assign (immusestmt)
3235       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3236 	  ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3237     return true;
3238   return false;
3239 }
3240 
3241 /* Transform STMT from A - B into A + -B.  */
3242 
3243 static void
break_up_subtract(gimple stmt,gimple_stmt_iterator * gsip)3244 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3245 {
3246   tree rhs1 = gimple_assign_rhs1 (stmt);
3247   tree rhs2 = gimple_assign_rhs2 (stmt);
3248 
3249   if (dump_file && (dump_flags & TDF_DETAILS))
3250     {
3251       fprintf (dump_file, "Breaking up subtract ");
3252       print_gimple_stmt (dump_file, stmt, 0, 0);
3253     }
3254 
3255   rhs2 = negate_value (rhs2, gsip);
3256   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3257   update_stmt (stmt);
3258 }
3259 
3260 /* Determine whether STMT is a builtin call that raises an SSA name
3261    to an integer power and has only one use.  If so, and this is early
3262    reassociation and unsafe math optimizations are permitted, place
3263    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3264    If any of these conditions does not hold, return FALSE.  */
3265 
3266 static bool
acceptable_pow_call(gimple stmt,tree * base,HOST_WIDE_INT * exponent)3267 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3268 {
3269   tree fndecl, arg1;
3270   REAL_VALUE_TYPE c, cint;
3271 
3272   if (!first_pass_instance
3273       || !flag_unsafe_math_optimizations
3274       || !is_gimple_call (stmt)
3275       || !has_single_use (gimple_call_lhs (stmt)))
3276     return false;
3277 
3278   fndecl = gimple_call_fndecl (stmt);
3279 
3280   if (!fndecl
3281       || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3282     return false;
3283 
3284   switch (DECL_FUNCTION_CODE (fndecl))
3285     {
3286     CASE_FLT_FN (BUILT_IN_POW):
3287       *base = gimple_call_arg (stmt, 0);
3288       arg1 = gimple_call_arg (stmt, 1);
3289 
3290       if (TREE_CODE (arg1) != REAL_CST)
3291 	return false;
3292 
3293       c = TREE_REAL_CST (arg1);
3294 
3295       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3296 	return false;
3297 
3298       *exponent = real_to_integer (&c);
3299       real_from_integer (&cint, VOIDmode, *exponent,
3300 			 *exponent < 0 ? -1 : 0, 0);
3301       if (!real_identical (&c, &cint))
3302 	return false;
3303 
3304       break;
3305 
3306     CASE_FLT_FN (BUILT_IN_POWI):
3307       *base = gimple_call_arg (stmt, 0);
3308       arg1 = gimple_call_arg (stmt, 1);
3309 
3310       if (!host_integerp (arg1, 0))
3311 	return false;
3312 
3313       *exponent = TREE_INT_CST_LOW (arg1);
3314       break;
3315 
3316     default:
3317       return false;
3318     }
3319 
3320   /* Expanding negative exponents is generally unproductive, so we don't
3321      complicate matters with those.  Exponents of zero and one should
3322      have been handled by expression folding.  */
3323   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3324     return false;
3325 
3326   return true;
3327 }
3328 
3329 /* Recursively linearize a binary expression that is the RHS of STMT.
3330    Place the operands of the expression tree in the vector named OPS.  */
3331 
3332 static void
linearize_expr_tree(vec<operand_entry_t> * ops,gimple stmt,bool is_associative,bool set_visited)3333 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3334 		     bool is_associative, bool set_visited)
3335 {
3336   tree binlhs = gimple_assign_rhs1 (stmt);
3337   tree binrhs = gimple_assign_rhs2 (stmt);
3338   gimple binlhsdef = NULL, binrhsdef = NULL;
3339   bool binlhsisreassoc = false;
3340   bool binrhsisreassoc = false;
3341   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3342   struct loop *loop = loop_containing_stmt (stmt);
3343   tree base = NULL_TREE;
3344   HOST_WIDE_INT exponent = 0;
3345 
3346   if (set_visited)
3347     gimple_set_visited (stmt, true);
3348 
3349   if (TREE_CODE (binlhs) == SSA_NAME)
3350     {
3351       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3352       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3353 			 && !stmt_could_throw_p (binlhsdef));
3354     }
3355 
3356   if (TREE_CODE (binrhs) == SSA_NAME)
3357     {
3358       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3359       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3360 			 && !stmt_could_throw_p (binrhsdef));
3361     }
3362 
3363   /* If the LHS is not reassociable, but the RHS is, we need to swap
3364      them.  If neither is reassociable, there is nothing we can do, so
3365      just put them in the ops vector.  If the LHS is reassociable,
3366      linearize it.  If both are reassociable, then linearize the RHS
3367      and the LHS.  */
3368 
3369   if (!binlhsisreassoc)
3370     {
3371       tree temp;
3372 
3373       /* If this is not a associative operation like division, give up.  */
3374       if (!is_associative)
3375 	{
3376 	  add_to_ops_vec (ops, binrhs);
3377 	  return;
3378 	}
3379 
3380       if (!binrhsisreassoc)
3381 	{
3382 	  if (rhscode == MULT_EXPR
3383 	      && TREE_CODE (binrhs) == SSA_NAME
3384 	      && acceptable_pow_call (binrhsdef, &base, &exponent))
3385 	    {
3386 	      add_repeat_to_ops_vec (ops, base, exponent);
3387 	      gimple_set_visited (binrhsdef, true);
3388 	    }
3389 	  else
3390 	    add_to_ops_vec (ops, binrhs);
3391 
3392 	  if (rhscode == MULT_EXPR
3393 	      && TREE_CODE (binlhs) == SSA_NAME
3394 	      && acceptable_pow_call (binlhsdef, &base, &exponent))
3395 	    {
3396 	      add_repeat_to_ops_vec (ops, base, exponent);
3397 	      gimple_set_visited (binlhsdef, true);
3398 	    }
3399 	  else
3400 	    add_to_ops_vec (ops, binlhs);
3401 
3402 	  return;
3403 	}
3404 
3405       if (dump_file && (dump_flags & TDF_DETAILS))
3406 	{
3407 	  fprintf (dump_file, "swapping operands of ");
3408 	  print_gimple_stmt (dump_file, stmt, 0, 0);
3409 	}
3410 
3411       swap_tree_operands (stmt,
3412 			  gimple_assign_rhs1_ptr (stmt),
3413 			  gimple_assign_rhs2_ptr (stmt));
3414       update_stmt (stmt);
3415 
3416       if (dump_file && (dump_flags & TDF_DETAILS))
3417 	{
3418 	  fprintf (dump_file, " is now ");
3419 	  print_gimple_stmt (dump_file, stmt, 0, 0);
3420 	}
3421 
3422       /* We want to make it so the lhs is always the reassociative op,
3423 	 so swap.  */
3424       temp = binlhs;
3425       binlhs = binrhs;
3426       binrhs = temp;
3427     }
3428   else if (binrhsisreassoc)
3429     {
3430       linearize_expr (stmt);
3431       binlhs = gimple_assign_rhs1 (stmt);
3432       binrhs = gimple_assign_rhs2 (stmt);
3433     }
3434 
3435   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3436 	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3437 				      rhscode, loop));
3438   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3439 		       is_associative, set_visited);
3440 
3441   if (rhscode == MULT_EXPR
3442       && TREE_CODE (binrhs) == SSA_NAME
3443       && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3444     {
3445       add_repeat_to_ops_vec (ops, base, exponent);
3446       gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3447     }
3448   else
3449     add_to_ops_vec (ops, binrhs);
3450 }
3451 
3452 /* Repropagate the negates back into subtracts, since no other pass
3453    currently does it.  */
3454 
3455 static void
repropagate_negates(void)3456 repropagate_negates (void)
3457 {
3458   unsigned int i = 0;
3459   tree negate;
3460 
3461   FOR_EACH_VEC_ELT (plus_negates, i, negate)
3462     {
3463       gimple user = get_single_immediate_use (negate);
3464 
3465       if (!user || !is_gimple_assign (user))
3466 	continue;
3467 
3468       /* The negate operand can be either operand of a PLUS_EXPR
3469 	 (it can be the LHS if the RHS is a constant for example).
3470 
3471 	 Force the negate operand to the RHS of the PLUS_EXPR, then
3472 	 transform the PLUS_EXPR into a MINUS_EXPR.  */
3473       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3474 	{
3475 	  /* If the negated operand appears on the LHS of the
3476 	     PLUS_EXPR, exchange the operands of the PLUS_EXPR
3477 	     to force the negated operand to the RHS of the PLUS_EXPR.  */
3478 	  if (gimple_assign_rhs1 (user) == negate)
3479 	    {
3480 	      swap_tree_operands (user,
3481 				  gimple_assign_rhs1_ptr (user),
3482 				  gimple_assign_rhs2_ptr (user));
3483 	    }
3484 
3485 	  /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3486 	     the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
3487 	  if (gimple_assign_rhs2 (user) == negate)
3488 	    {
3489 	      tree rhs1 = gimple_assign_rhs1 (user);
3490 	      tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3491 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
3492 	      gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3493 	      update_stmt (user);
3494 	    }
3495 	}
3496       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3497 	{
3498 	  if (gimple_assign_rhs1 (user) == negate)
3499 	    {
3500 	      /* We have
3501 	           x = -a
3502 		   y = x - b
3503 		 which we transform into
3504 		   x = a + b
3505 		   y = -x .
3506 		 This pushes down the negate which we possibly can merge
3507 		 into some other operation, hence insert it into the
3508 		 plus_negates vector.  */
3509 	      gimple feed = SSA_NAME_DEF_STMT (negate);
3510 	      tree a = gimple_assign_rhs1 (feed);
3511 	      tree rhs2 = gimple_assign_rhs2 (user);
3512 	      gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3513 	      gimple_replace_lhs (feed, negate);
3514 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3515 	      update_stmt (gsi_stmt (gsi));
3516 	      gsi2 = gsi_for_stmt (user);
3517 	      gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3518 	      update_stmt (gsi_stmt (gsi2));
3519 	      gsi_move_before (&gsi, &gsi2);
3520 	      plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3521 	    }
3522 	  else
3523 	    {
3524 	      /* Transform "x = -a; y = b - x" into "y = b + a", getting
3525 	         rid of one operation.  */
3526 	      gimple feed = SSA_NAME_DEF_STMT (negate);
3527 	      tree a = gimple_assign_rhs1 (feed);
3528 	      tree rhs1 = gimple_assign_rhs1 (user);
3529 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
3530 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3531 	      update_stmt (gsi_stmt (gsi));
3532 	    }
3533 	}
3534     }
3535 }
3536 
3537 /* Returns true if OP is of a type for which we can do reassociation.
3538    That is for integral or non-saturating fixed-point types, and for
3539    floating point type when associative-math is enabled.  */
3540 
3541 static bool
can_reassociate_p(tree op)3542 can_reassociate_p (tree op)
3543 {
3544   tree type = TREE_TYPE (op);
3545   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3546       || NON_SAT_FIXED_POINT_TYPE_P (type)
3547       || (flag_associative_math && FLOAT_TYPE_P (type)))
3548     return true;
3549   return false;
3550 }
3551 
3552 /* Break up subtract operations in block BB.
3553 
3554    We do this top down because we don't know whether the subtract is
3555    part of a possible chain of reassociation except at the top.
3556 
3557    IE given
3558    d = f + g
3559    c = a + e
3560    b = c - d
3561    q = b - r
3562    k = t - q
3563 
3564    we want to break up k = t - q, but we won't until we've transformed q
3565    = b - r, which won't be broken up until we transform b = c - d.
3566 
3567    En passant, clear the GIMPLE visited flag on every statement.  */
3568 
3569 static void
break_up_subtract_bb(basic_block bb)3570 break_up_subtract_bb (basic_block bb)
3571 {
3572   gimple_stmt_iterator gsi;
3573   basic_block son;
3574 
3575   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3576     {
3577       gimple stmt = gsi_stmt (gsi);
3578       gimple_set_visited (stmt, false);
3579 
3580       if (!is_gimple_assign (stmt)
3581 	  || !can_reassociate_p (gimple_assign_lhs (stmt)))
3582 	continue;
3583 
3584       /* Look for simple gimple subtract operations.  */
3585       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3586 	{
3587 	  if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3588 	      || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3589 	    continue;
3590 
3591 	  /* Check for a subtract used only in an addition.  If this
3592 	     is the case, transform it into add of a negate for better
3593 	     reassociation.  IE transform C = A-B into C = A + -B if C
3594 	     is only used in an addition.  */
3595 	  if (should_break_up_subtract (stmt))
3596 	    break_up_subtract (stmt, &gsi);
3597 	}
3598       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3599 	       && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3600 	plus_negates.safe_push (gimple_assign_lhs (stmt));
3601     }
3602   for (son = first_dom_son (CDI_DOMINATORS, bb);
3603        son;
3604        son = next_dom_son (CDI_DOMINATORS, son))
3605     break_up_subtract_bb (son);
3606 }
3607 
3608 /* Used for repeated factor analysis.  */
3609 struct repeat_factor_d
3610 {
3611   /* An SSA name that occurs in a multiply chain.  */
3612   tree factor;
3613 
3614   /* Cached rank of the factor.  */
3615   unsigned rank;
3616 
3617   /* Number of occurrences of the factor in the chain.  */
3618   HOST_WIDE_INT count;
3619 
3620   /* An SSA name representing the product of this factor and
3621      all factors appearing later in the repeated factor vector.  */
3622   tree repr;
3623 };
3624 
3625 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3626 typedef const struct repeat_factor_d *const_repeat_factor_t;
3627 
3628 
3629 static vec<repeat_factor> repeat_factor_vec;
3630 
3631 /* Used for sorting the repeat factor vector.  Sort primarily by
3632    ascending occurrence count, secondarily by descending rank.  */
3633 
3634 static int
compare_repeat_factors(const void * x1,const void * x2)3635 compare_repeat_factors (const void *x1, const void *x2)
3636 {
3637   const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3638   const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3639 
3640   if (rf1->count != rf2->count)
3641     return rf1->count - rf2->count;
3642 
3643   return rf2->rank - rf1->rank;
3644 }
3645 
3646 /* Look for repeated operands in OPS in the multiply tree rooted at
3647    STMT.  Replace them with an optimal sequence of multiplies and powi
3648    builtin calls, and remove the used operands from OPS.  Return an
3649    SSA name representing the value of the replacement sequence.  */
3650 
3651 static tree
attempt_builtin_powi(gimple stmt,vec<operand_entry_t> * ops)3652 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3653 {
3654   unsigned i, j, vec_len;
3655   int ii;
3656   operand_entry_t oe;
3657   repeat_factor_t rf1, rf2;
3658   repeat_factor rfnew;
3659   tree result = NULL_TREE;
3660   tree target_ssa, iter_result;
3661   tree type = TREE_TYPE (gimple_get_lhs (stmt));
3662   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3663   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3664   gimple mul_stmt, pow_stmt;
3665 
3666   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3667      target.  */
3668   if (!powi_fndecl)
3669     return NULL_TREE;
3670 
3671   /* Allocate the repeated factor vector.  */
3672   repeat_factor_vec.create (10);
3673 
3674   /* Scan the OPS vector for all SSA names in the product and build
3675      up a vector of occurrence counts for each factor.  */
3676   FOR_EACH_VEC_ELT (*ops, i, oe)
3677     {
3678       if (TREE_CODE (oe->op) == SSA_NAME)
3679 	{
3680 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3681 	    {
3682 	      if (rf1->factor == oe->op)
3683 		{
3684 		  rf1->count += oe->count;
3685 		  break;
3686 		}
3687 	    }
3688 
3689 	  if (j >= repeat_factor_vec.length ())
3690 	    {
3691 	      rfnew.factor = oe->op;
3692 	      rfnew.rank = oe->rank;
3693 	      rfnew.count = oe->count;
3694 	      rfnew.repr = NULL_TREE;
3695 	      repeat_factor_vec.safe_push (rfnew);
3696 	    }
3697 	}
3698     }
3699 
3700   /* Sort the repeated factor vector by (a) increasing occurrence count,
3701      and (b) decreasing rank.  */
3702   repeat_factor_vec.qsort (compare_repeat_factors);
3703 
3704   /* It is generally best to combine as many base factors as possible
3705      into a product before applying __builtin_powi to the result.
3706      However, the sort order chosen for the repeated factor vector
3707      allows us to cache partial results for the product of the base
3708      factors for subsequent use.  When we already have a cached partial
3709      result from a previous iteration, it is best to make use of it
3710      before looking for another __builtin_pow opportunity.
3711 
3712      As an example, consider x * x * y * y * y * z * z * z * z.
3713      We want to first compose the product x * y * z, raise it to the
3714      second power, then multiply this by y * z, and finally multiply
3715      by z.  This can be done in 5 multiplies provided we cache y * z
3716      for use in both expressions:
3717 
3718         t1 = y * z
3719 	t2 = t1 * x
3720 	t3 = t2 * t2
3721 	t4 = t1 * t3
3722 	result = t4 * z
3723 
3724      If we instead ignored the cached y * z and first multiplied by
3725      the __builtin_pow opportunity z * z, we would get the inferior:
3726 
3727         t1 = y * z
3728 	t2 = t1 * x
3729 	t3 = t2 * t2
3730 	t4 = z * z
3731 	t5 = t3 * t4
3732         result = t5 * y  */
3733 
3734   vec_len = repeat_factor_vec.length ();
3735 
3736   /* Repeatedly look for opportunities to create a builtin_powi call.  */
3737   while (true)
3738     {
3739       HOST_WIDE_INT power;
3740 
3741       /* First look for the largest cached product of factors from
3742 	 preceding iterations.  If found, create a builtin_powi for
3743 	 it if the minimum occurrence count for its factors is at
3744 	 least 2, or just use this cached product as our next
3745 	 multiplicand if the minimum occurrence count is 1.  */
3746       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3747 	{
3748 	  if (rf1->repr && rf1->count > 0)
3749 	    break;
3750 	}
3751 
3752       if (j < vec_len)
3753 	{
3754 	  power = rf1->count;
3755 
3756 	  if (power == 1)
3757 	    {
3758 	      iter_result = rf1->repr;
3759 
3760 	      if (dump_file && (dump_flags & TDF_DETAILS))
3761 		{
3762 		  unsigned elt;
3763 		  repeat_factor_t rf;
3764 		  fputs ("Multiplying by cached product ", dump_file);
3765 		  for (elt = j; elt < vec_len; elt++)
3766 		    {
3767 		      rf = &repeat_factor_vec[elt];
3768 		      print_generic_expr (dump_file, rf->factor, 0);
3769 		      if (elt < vec_len - 1)
3770 			fputs (" * ", dump_file);
3771 		    }
3772 		  fputs ("\n", dump_file);
3773 		}
3774 	    }
3775 	  else
3776 	    {
3777 	      iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3778 	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3779 					    build_int_cst (integer_type_node,
3780 							   power));
3781 	      gimple_call_set_lhs (pow_stmt, iter_result);
3782 	      gimple_set_location (pow_stmt, gimple_location (stmt));
3783 	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3784 
3785 	      if (dump_file && (dump_flags & TDF_DETAILS))
3786 		{
3787 		  unsigned elt;
3788 		  repeat_factor_t rf;
3789 		  fputs ("Building __builtin_pow call for cached product (",
3790 			 dump_file);
3791 		  for (elt = j; elt < vec_len; elt++)
3792 		    {
3793 		      rf = &repeat_factor_vec[elt];
3794 		      print_generic_expr (dump_file, rf->factor, 0);
3795 		      if (elt < vec_len - 1)
3796 			fputs (" * ", dump_file);
3797 		    }
3798 		  fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC "\n",
3799 			   power);
3800 		}
3801 	    }
3802 	}
3803       else
3804 	{
3805 	  /* Otherwise, find the first factor in the repeated factor
3806 	     vector whose occurrence count is at least 2.  If no such
3807 	     factor exists, there are no builtin_powi opportunities
3808 	     remaining.  */
3809 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3810 	    {
3811 	      if (rf1->count >= 2)
3812 		break;
3813 	    }
3814 
3815 	  if (j >= vec_len)
3816 	    break;
3817 
3818 	  power = rf1->count;
3819 
3820 	  if (dump_file && (dump_flags & TDF_DETAILS))
3821 	    {
3822 	      unsigned elt;
3823 	      repeat_factor_t rf;
3824 	      fputs ("Building __builtin_pow call for (", dump_file);
3825 	      for (elt = j; elt < vec_len; elt++)
3826 		{
3827 		  rf = &repeat_factor_vec[elt];
3828 		  print_generic_expr (dump_file, rf->factor, 0);
3829 		  if (elt < vec_len - 1)
3830 		    fputs (" * ", dump_file);
3831 		}
3832 	      fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC "\n", power);
3833 	    }
3834 
3835 	  reassociate_stats.pows_created++;
3836 
3837 	  /* Visit each element of the vector in reverse order (so that
3838 	     high-occurrence elements are visited first, and within the
3839 	     same occurrence count, lower-ranked elements are visited
3840 	     first).  Form a linear product of all elements in this order
3841 	     whose occurrencce count is at least that of element J.
3842 	     Record the SSA name representing the product of each element
3843 	     with all subsequent elements in the vector.  */
3844 	  if (j == vec_len - 1)
3845 	    rf1->repr = rf1->factor;
3846 	  else
3847 	    {
3848 	      for (ii = vec_len - 2; ii >= (int)j; ii--)
3849 		{
3850 		  tree op1, op2;
3851 
3852 		  rf1 = &repeat_factor_vec[ii];
3853 		  rf2 = &repeat_factor_vec[ii + 1];
3854 
3855 		  /* Init the last factor's representative to be itself.  */
3856 		  if (!rf2->repr)
3857 		    rf2->repr = rf2->factor;
3858 
3859 		  op1 = rf1->factor;
3860 		  op2 = rf2->repr;
3861 
3862 		  target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3863 		  mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3864 							   target_ssa,
3865 							   op1, op2);
3866 		  gimple_set_location (mul_stmt, gimple_location (stmt));
3867 		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3868 		  rf1->repr = target_ssa;
3869 
3870 		  /* Don't reprocess the multiply we just introduced.  */
3871 		  gimple_set_visited (mul_stmt, true);
3872 		}
3873 	    }
3874 
3875 	  /* Form a call to __builtin_powi for the maximum product
3876 	     just formed, raised to the power obtained earlier.  */
3877 	  rf1 = &repeat_factor_vec[j];
3878 	  iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3879 	  pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3880 					build_int_cst (integer_type_node,
3881 						       power));
3882 	  gimple_call_set_lhs (pow_stmt, iter_result);
3883 	  gimple_set_location (pow_stmt, gimple_location (stmt));
3884 	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3885 	}
3886 
3887       /* If we previously formed at least one other builtin_powi call,
3888 	 form the product of this one and those others.  */
3889       if (result)
3890 	{
3891 	  tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3892 	  mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3893 						   result, iter_result);
3894 	  gimple_set_location (mul_stmt, gimple_location (stmt));
3895 	  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3896 	  gimple_set_visited (mul_stmt, true);
3897 	  result = new_result;
3898 	}
3899       else
3900 	result = iter_result;
3901 
3902       /* Decrement the occurrence count of each element in the product
3903 	 by the count found above, and remove this many copies of each
3904 	 factor from OPS.  */
3905       for (i = j; i < vec_len; i++)
3906 	{
3907 	  unsigned k = power;
3908 	  unsigned n;
3909 
3910 	  rf1 = &repeat_factor_vec[i];
3911 	  rf1->count -= power;
3912 
3913 	  FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3914 	    {
3915 	      if (oe->op == rf1->factor)
3916 		{
3917 		  if (oe->count <= k)
3918 		    {
3919 		      ops->ordered_remove (n);
3920 		      k -= oe->count;
3921 
3922 		      if (k == 0)
3923 			break;
3924 		    }
3925 		  else
3926 		    {
3927 		      oe->count -= k;
3928 		      break;
3929 		    }
3930 		}
3931 	    }
3932 	}
3933     }
3934 
3935   /* At this point all elements in the repeated factor vector have a
3936      remaining occurrence count of 0 or 1, and those with a count of 1
3937      don't have cached representatives.  Re-sort the ops vector and
3938      clean up.  */
3939   ops->qsort (sort_by_operand_rank);
3940   repeat_factor_vec.release ();
3941 
3942   /* Return the final product computed herein.  Note that there may
3943      still be some elements with single occurrence count left in OPS;
3944      those will be handled by the normal reassociation logic.  */
3945   return result;
3946 }
3947 
3948 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
3949 
3950 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple stmt,tree new_rhs)3951 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3952 {
3953   tree rhs1;
3954 
3955   if (dump_file && (dump_flags & TDF_DETAILS))
3956     {
3957       fprintf (dump_file, "Transforming ");
3958       print_gimple_stmt (dump_file, stmt, 0, 0);
3959     }
3960 
3961   rhs1 = gimple_assign_rhs1 (stmt);
3962   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3963   update_stmt (stmt);
3964   remove_visited_stmt_chain (rhs1);
3965 
3966   if (dump_file && (dump_flags & TDF_DETAILS))
3967     {
3968       fprintf (dump_file, " into ");
3969       print_gimple_stmt (dump_file, stmt, 0, 0);
3970     }
3971 }
3972 
3973 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
3974 
3975 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple stmt,tree rhs1,tree rhs2)3976 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3977 			    tree rhs1, tree rhs2)
3978 {
3979   if (dump_file && (dump_flags & TDF_DETAILS))
3980     {
3981       fprintf (dump_file, "Transforming ");
3982       print_gimple_stmt (dump_file, stmt, 0, 0);
3983     }
3984 
3985   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3986   update_stmt (gsi_stmt (*gsi));
3987   remove_visited_stmt_chain (rhs1);
3988 
3989   if (dump_file && (dump_flags & TDF_DETAILS))
3990     {
3991       fprintf (dump_file, " into ");
3992       print_gimple_stmt (dump_file, stmt, 0, 0);
3993     }
3994 }
3995 
3996 /* Reassociate expressions in basic block BB and its post-dominator as
3997    children.  */
3998 
3999 static void
reassociate_bb(basic_block bb)4000 reassociate_bb (basic_block bb)
4001 {
4002   gimple_stmt_iterator gsi;
4003   basic_block son;
4004   gimple stmt = last_stmt (bb);
4005 
4006   if (stmt && !gimple_visited_p (stmt))
4007     maybe_optimize_range_tests (stmt);
4008 
4009   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4010     {
4011       stmt = gsi_stmt (gsi);
4012 
4013       if (is_gimple_assign (stmt)
4014 	  && !stmt_could_throw_p (stmt))
4015 	{
4016 	  tree lhs, rhs1, rhs2;
4017 	  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4018 
4019 	  /* If this is not a gimple binary expression, there is
4020 	     nothing for us to do with it.  */
4021 	  if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4022 	    continue;
4023 
4024 	  /* If this was part of an already processed statement,
4025 	     we don't need to touch it again. */
4026 	  if (gimple_visited_p (stmt))
4027 	    {
4028 	      /* This statement might have become dead because of previous
4029 		 reassociations.  */
4030 	      if (has_zero_uses (gimple_get_lhs (stmt)))
4031 		{
4032 		  gsi_remove (&gsi, true);
4033 		  release_defs (stmt);
4034 		  /* We might end up removing the last stmt above which
4035 		     places the iterator to the end of the sequence.
4036 		     Reset it to the last stmt in this case which might
4037 		     be the end of the sequence as well if we removed
4038 		     the last statement of the sequence.  In which case
4039 		     we need to bail out.  */
4040 		  if (gsi_end_p (gsi))
4041 		    {
4042 		      gsi = gsi_last_bb (bb);
4043 		      if (gsi_end_p (gsi))
4044 			break;
4045 		    }
4046 		}
4047 	      continue;
4048 	    }
4049 
4050 	  lhs = gimple_assign_lhs (stmt);
4051 	  rhs1 = gimple_assign_rhs1 (stmt);
4052 	  rhs2 = gimple_assign_rhs2 (stmt);
4053 
4054 	  /* For non-bit or min/max operations we can't associate
4055 	     all types.  Verify that here.  */
4056 	  if (rhs_code != BIT_IOR_EXPR
4057 	      && rhs_code != BIT_AND_EXPR
4058 	      && rhs_code != BIT_XOR_EXPR
4059 	      && rhs_code != MIN_EXPR
4060 	      && rhs_code != MAX_EXPR
4061 	      && (!can_reassociate_p (lhs)
4062 		  || !can_reassociate_p (rhs1)
4063 		  || !can_reassociate_p (rhs2)))
4064 	    continue;
4065 
4066 	  if (associative_tree_code (rhs_code))
4067 	    {
4068 	      vec<operand_entry_t> ops = vNULL;
4069 	      tree powi_result = NULL_TREE;
4070 
4071 	      /* There may be no immediate uses left by the time we
4072 		 get here because we may have eliminated them all.  */
4073 	      if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4074 		continue;
4075 
4076 	      gimple_set_visited (stmt, true);
4077 	      linearize_expr_tree (&ops, stmt, true, true);
4078 	      ops.qsort (sort_by_operand_rank);
4079 	      optimize_ops_list (rhs_code, &ops);
4080 	      if (undistribute_ops_list (rhs_code, &ops,
4081 					 loop_containing_stmt (stmt)))
4082 		{
4083 		  ops.qsort (sort_by_operand_rank);
4084 		  optimize_ops_list (rhs_code, &ops);
4085 		}
4086 
4087 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4088 		optimize_range_tests (rhs_code, &ops);
4089 
4090 	      if (first_pass_instance
4091 		  && rhs_code == MULT_EXPR
4092 		  && flag_unsafe_math_optimizations)
4093 		powi_result = attempt_builtin_powi (stmt, &ops);
4094 
4095 	      /* If the operand vector is now empty, all operands were
4096 		 consumed by the __builtin_powi optimization.  */
4097 	      if (ops.length () == 0)
4098 		transform_stmt_to_copy (&gsi, stmt, powi_result);
4099 	      else if (ops.length () == 1)
4100 		{
4101 		  tree last_op = ops.last ()->op;
4102 
4103 		  if (powi_result)
4104 		    transform_stmt_to_multiply (&gsi, stmt, last_op,
4105 						powi_result);
4106 		  else
4107 		    transform_stmt_to_copy (&gsi, stmt, last_op);
4108 		}
4109 	      else
4110 		{
4111 		  enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4112 		  int ops_num = ops.length ();
4113 		  int width = get_reassociation_width (ops_num, rhs_code, mode);
4114 
4115 		  if (dump_file && (dump_flags & TDF_DETAILS))
4116 		    fprintf (dump_file,
4117 			     "Width = %d was chosen for reassociation\n", width);
4118 
4119 		  if (width > 1
4120 		      && ops.length () > 3)
4121 		    rewrite_expr_tree_parallel (stmt, width, ops);
4122 		  else
4123 		    rewrite_expr_tree (stmt, 0, ops, false);
4124 
4125 		  /* If we combined some repeated factors into a
4126 		     __builtin_powi call, multiply that result by the
4127 		     reassociated operands.  */
4128 		  if (powi_result)
4129 		    {
4130 		      gimple mul_stmt;
4131 		      tree type = TREE_TYPE (gimple_get_lhs (stmt));
4132 		      tree target_ssa = make_temp_ssa_name (type, NULL,
4133 							    "reassocpow");
4134 		      gimple_set_lhs (stmt, target_ssa);
4135 		      update_stmt (stmt);
4136 		      mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4137 							       powi_result,
4138 							       target_ssa);
4139 		      gimple_set_location (mul_stmt, gimple_location (stmt));
4140 		      gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4141 		    }
4142 		}
4143 
4144 	      ops.release ();
4145 	    }
4146 	}
4147     }
4148   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4149        son;
4150        son = next_dom_son (CDI_POST_DOMINATORS, son))
4151     reassociate_bb (son);
4152 }
4153 
4154 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4155 void debug_ops_vector (vec<operand_entry_t> ops);
4156 
4157 /* Dump the operand entry vector OPS to FILE.  */
4158 
4159 void
dump_ops_vector(FILE * file,vec<operand_entry_t> ops)4160 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4161 {
4162   operand_entry_t oe;
4163   unsigned int i;
4164 
4165   FOR_EACH_VEC_ELT (ops, i, oe)
4166     {
4167       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4168       print_generic_expr (file, oe->op, 0);
4169     }
4170 }
4171 
4172 /* Dump the operand entry vector OPS to STDERR.  */
4173 
4174 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry_t> ops)4175 debug_ops_vector (vec<operand_entry_t> ops)
4176 {
4177   dump_ops_vector (stderr, ops);
4178 }
4179 
4180 static void
do_reassoc(void)4181 do_reassoc (void)
4182 {
4183   break_up_subtract_bb (ENTRY_BLOCK_PTR);
4184   reassociate_bb (EXIT_BLOCK_PTR);
4185 }
4186 
4187 /* Initialize the reassociation pass.  */
4188 
4189 static void
init_reassoc(void)4190 init_reassoc (void)
4191 {
4192   int i;
4193   long rank = 2;
4194   int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4195 
4196   /* Find the loops, so that we can prevent moving calculations in
4197      them.  */
4198   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4199 
4200   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4201 
4202   operand_entry_pool = create_alloc_pool ("operand entry pool",
4203 					  sizeof (struct operand_entry), 30);
4204   next_operand_entry_id = 0;
4205 
4206   /* Reverse RPO (Reverse Post Order) will give us something where
4207      deeper loops come later.  */
4208   pre_and_rev_post_order_compute (NULL, bbs, false);
4209   bb_rank = XCNEWVEC (long, last_basic_block);
4210   operand_rank = pointer_map_create ();
4211 
4212   /* Give each default definition a distinct rank.  This includes
4213      parameters and the static chain.  Walk backwards over all
4214      SSA names so that we get proper rank ordering according
4215      to tree_swap_operands_p.  */
4216   for (i = num_ssa_names - 1; i > 0; --i)
4217     {
4218       tree name = ssa_name (i);
4219       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4220 	insert_operand_rank (name, ++rank);
4221     }
4222 
4223   /* Set up rank for each BB  */
4224   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4225     bb_rank[bbs[i]] = ++rank  << 16;
4226 
4227   free (bbs);
4228   calculate_dominance_info (CDI_POST_DOMINATORS);
4229   plus_negates = vNULL;
4230 }
4231 
4232 /* Cleanup after the reassociation pass, and print stats if
4233    requested.  */
4234 
4235 static void
fini_reassoc(void)4236 fini_reassoc (void)
4237 {
4238   statistics_counter_event (cfun, "Linearized",
4239 			    reassociate_stats.linearized);
4240   statistics_counter_event (cfun, "Constants eliminated",
4241 			    reassociate_stats.constants_eliminated);
4242   statistics_counter_event (cfun, "Ops eliminated",
4243 			    reassociate_stats.ops_eliminated);
4244   statistics_counter_event (cfun, "Statements rewritten",
4245 			    reassociate_stats.rewritten);
4246   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4247 			    reassociate_stats.pows_encountered);
4248   statistics_counter_event (cfun, "Built-in powi calls created",
4249 			    reassociate_stats.pows_created);
4250 
4251   pointer_map_destroy (operand_rank);
4252   free_alloc_pool (operand_entry_pool);
4253   free (bb_rank);
4254   plus_negates.release ();
4255   free_dominance_info (CDI_POST_DOMINATORS);
4256   loop_optimizer_finalize ();
4257 }
4258 
4259 /* Gate and execute functions for Reassociation.  */
4260 
4261 static unsigned int
execute_reassoc(void)4262 execute_reassoc (void)
4263 {
4264   init_reassoc ();
4265 
4266   do_reassoc ();
4267   repropagate_negates ();
4268 
4269   fini_reassoc ();
4270   return 0;
4271 }
4272 
4273 static bool
gate_tree_ssa_reassoc(void)4274 gate_tree_ssa_reassoc (void)
4275 {
4276   return flag_tree_reassoc != 0;
4277 }
4278 
4279 struct gimple_opt_pass pass_reassoc =
4280 {
4281  {
4282   GIMPLE_PASS,
4283   "reassoc",				/* name */
4284   OPTGROUP_NONE,                        /* optinfo_flags */
4285   gate_tree_ssa_reassoc,		/* gate */
4286   execute_reassoc,			/* execute */
4287   NULL,					/* sub */
4288   NULL,					/* next */
4289   0,					/* static_pass_number */
4290   TV_TREE_REASSOC,			/* tv_id */
4291   PROP_cfg | PROP_ssa,			/* properties_required */
4292   0,					/* properties_provided */
4293   0,					/* properties_destroyed */
4294   0,					/* todo_flags_start */
4295   TODO_verify_ssa
4296     | TODO_update_ssa_only_virtuals
4297     | TODO_verify_flow
4298     | TODO_ggc_collect			/* todo_flags_finish */
4299  }
4300 };
4301