1 /* Reassociation for trees.
2    Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
3    Free Software Foundation, Inc.
4    Contributed by Daniel Berlin <dan@dberlin.org>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12 
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "basic-block.h"
28 #include "tree-pretty-print.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-inline.h"
31 #include "tree-flow.h"
32 #include "gimple.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "alloc-pool.h"
38 #include "vec.h"
39 #include "langhooks.h"
40 #include "pointer-set.h"
41 #include "cfgloop.h"
42 #include "flags.h"
43 #include "target.h"
44 #include "params.h"
45 #include "diagnostic-core.h"
46 
47 /*  This is a simple global reassociation pass.  It is, in part, based
48     on the LLVM pass of the same name (They do some things more/less
49     than we do, in different orders, etc).
50 
51     It consists of five steps:
52 
53     1. Breaking up subtract operations into addition + negate, where
54     it would promote the reassociation of adds.
55 
56     2. Left linearization of the expression trees, so that (A+B)+(C+D)
57     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
58     During linearization, we place the operands of the binary
59     expressions into a vector of operand_entry_t
60 
61     3. Optimization of the operand lists, eliminating things like a +
62     -a, a & a, etc.
63 
64     4. Rewrite the expression trees we linearized and optimized so
65     they are in proper rank order.
66 
67     5. Repropagate negates, as nothing else will clean it up ATM.
68 
69     A bit of theory on #4, since nobody seems to write anything down
70     about why it makes sense to do it the way they do it:
71 
72     We could do this much nicer theoretically, but don't (for reasons
73     explained after how to do it theoretically nice :P).
74 
75     In order to promote the most redundancy elimination, you want
76     binary expressions whose operands are the same rank (or
77     preferably, the same value) exposed to the redundancy eliminator,
78     for possible elimination.
79 
80     So the way to do this if we really cared, is to build the new op
81     tree from the leaves to the roots, merging as you go, and putting the
82     new op on the end of the worklist, until you are left with one
83     thing on the worklist.
84 
85     IE if you have to rewrite the following set of operands (listed with
86     rank in parentheses), with opcode PLUS_EXPR:
87 
88     a (1),  b (1),  c (1),  d (2), e (2)
89 
90 
91     We start with our merge worklist empty, and the ops list with all of
92     those on it.
93 
94     You want to first merge all leaves of the same rank, as much as
95     possible.
96 
97     So first build a binary op of
98 
99     mergetmp = a + b, and put "mergetmp" on the merge worklist.
100 
101     Because there is no three operand form of PLUS_EXPR, c is not going to
102     be exposed to redundancy elimination as a rank 1 operand.
103 
104     So you might as well throw it on the merge worklist (you could also
105     consider it to now be a rank two operand, and merge it with d and e,
106     but in this case, you then have evicted e from a binary op. So at
107     least in this situation, you can't win.)
108 
109     Then build a binary op of d + e
110     mergetmp2 = d + e
111 
112     and put mergetmp2 on the merge worklist.
113 
114     so merge worklist = {mergetmp, c, mergetmp2}
115 
116     Continue building binary ops of these operations until you have only
117     one operation left on the worklist.
118 
119     So we have
120 
121     build binary op
122     mergetmp3 = mergetmp + c
123 
124     worklist = {mergetmp2, mergetmp3}
125 
126     mergetmp4 = mergetmp2 + mergetmp3
127 
128     worklist = {mergetmp4}
129 
130     because we have one operation left, we can now just set the original
131     statement equal to the result of that operation.
132 
133     This will at least expose a + b  and d + e to redundancy elimination
134     as binary operations.
135 
136     For extra points, you can reuse the old statements to build the
137     mergetmps, since you shouldn't run out.
138 
139     So why don't we do this?
140 
141     Because it's expensive, and rarely will help.  Most trees we are
142     reassociating have 3 or less ops.  If they have 2 ops, they already
143     will be written into a nice single binary op.  If you have 3 ops, a
144     single simple check suffices to tell you whether the first two are of the
145     same rank.  If so, you know to order it
146 
147     mergetmp = op1 + op2
148     newstmt = mergetmp + op3
149 
150     instead of
151     mergetmp = op2 + op3
152     newstmt = mergetmp + op1
153 
154     If all three are of the same rank, you can't expose them all in a
155     single binary operator anyway, so the above is *still* the best you
156     can do.
157 
158     Thus, this is what we do.  When we have three ops left, we check to see
159     what order to put them in, and call it a day.  As a nod to vector sum
160     reduction, we check if any of the ops are really a phi node that is a
161     destructive update for the associating op, and keep the destructive
162     update together for vector sum reduction recognition.  */
163 
164 
165 /* Statistics */
166 static struct
167 {
168   int linearized;
169   int constants_eliminated;
170   int ops_eliminated;
171   int rewritten;
172 } reassociate_stats;
173 
174 /* Operator, rank pair.  */
175 typedef struct operand_entry
176 {
177   unsigned int rank;
178   int id;
179   tree op;
180 } *operand_entry_t;
181 
182 static alloc_pool operand_entry_pool;
183 
184 /* This is used to assign a unique ID to each struct operand_entry
185    so that qsort results are identical on different hosts.  */
186 static int next_operand_entry_id;
187 
188 /* Starting rank number for a given basic block, so that we can rank
189    operations using unmovable instructions in that BB based on the bb
190    depth.  */
191 static long *bb_rank;
192 
193 /* Operand->rank hashtable.  */
194 static struct pointer_map_t *operand_rank;
195 
196 /* Forward decls.  */
197 static long get_rank (tree);
198 
199 
200 /* Bias amount for loop-carried phis.  We want this to be larger than
201    the depth of any reassociation tree we can see, but not larger than
202    the rank difference between two blocks.  */
203 #define PHI_LOOP_BIAS (1 << 15)
204 
205 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
206    an innermost loop, and the phi has only a single use which is inside
207    the loop, then the rank is the block rank of the loop latch plus an
208    extra bias for the loop-carried dependence.  This causes expressions
209    calculated into an accumulator variable to be independent for each
210    iteration of the loop.  If STMT is some other phi, the rank is the
211    block rank of its containing block.  */
212 static long
213 phi_rank (gimple stmt)
214 {
215   basic_block bb = gimple_bb (stmt);
216   struct loop *father = bb->loop_father;
217   tree res;
218   unsigned i;
219   use_operand_p use;
220   gimple use_stmt;
221 
222   /* We only care about real loops (those with a latch).  */
223   if (!father->latch)
224     return bb_rank[bb->index];
225 
226   /* Interesting phis must be in headers of innermost loops.  */
227   if (bb != father->header
228       || father->inner)
229     return bb_rank[bb->index];
230 
231   /* Ignore virtual SSA_NAMEs.  */
232   res = gimple_phi_result (stmt);
233   if (!is_gimple_reg (SSA_NAME_VAR (res)))
234     return bb_rank[bb->index];
235 
236   /* The phi definition must have a single use, and that use must be
237      within the loop.  Otherwise this isn't an accumulator pattern.  */
238   if (!single_imm_use (res, &use, &use_stmt)
239       || gimple_bb (use_stmt)->loop_father != father)
240     return bb_rank[bb->index];
241 
242   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
243   for (i = 0; i < gimple_phi_num_args (stmt); i++)
244     {
245       tree arg = gimple_phi_arg_def (stmt, i);
246       if (TREE_CODE (arg) == SSA_NAME
247 	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
248 	{
249 	  gimple def_stmt = SSA_NAME_DEF_STMT (arg);
250 	  if (gimple_bb (def_stmt)->loop_father == father)
251 	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
252 	}
253     }
254 
255   /* Must be an uninteresting phi.  */
256   return bb_rank[bb->index];
257 }
258 
259 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
260    loop-carried dependence of an innermost loop, return TRUE; else
261    return FALSE.  */
262 static bool
263 loop_carried_phi (tree exp)
264 {
265   gimple phi_stmt;
266   long block_rank;
267 
268   if (TREE_CODE (exp) != SSA_NAME
269       || SSA_NAME_IS_DEFAULT_DEF (exp))
270     return false;
271 
272   phi_stmt = SSA_NAME_DEF_STMT (exp);
273 
274   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
275     return false;
276 
277   /* Non-loop-carried phis have block rank.  Loop-carried phis have
278      an additional bias added in.  If this phi doesn't have block rank,
279      it's biased and should not be propagated.  */
280   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
281 
282   if (phi_rank (phi_stmt) != block_rank)
283     return true;
284 
285   return false;
286 }
287 
288 /* Return the maximum of RANK and the rank that should be propagated
289    from expression OP.  For most operands, this is just the rank of OP.
290    For loop-carried phis, the value is zero to avoid undoing the bias
291    in favor of the phi.  */
292 static long
293 propagate_rank (long rank, tree op)
294 {
295   long op_rank;
296 
297   if (loop_carried_phi (op))
298     return rank;
299 
300   op_rank = get_rank (op);
301 
302   return MAX (rank, op_rank);
303 }
304 
305 /* Look up the operand rank structure for expression E.  */
306 
307 static inline long
308 find_operand_rank (tree e)
309 {
310   void **slot = pointer_map_contains (operand_rank, e);
311   return slot ? (long) (intptr_t) *slot : -1;
312 }
313 
314 /* Insert {E,RANK} into the operand rank hashtable.  */
315 
316 static inline void
317 insert_operand_rank (tree e, long rank)
318 {
319   void **slot;
320   gcc_assert (rank > 0);
321   slot = pointer_map_insert (operand_rank, e);
322   gcc_assert (!*slot);
323   *slot = (void *) (intptr_t) rank;
324 }
325 
326 /* Given an expression E, return the rank of the expression.  */
327 
328 static long
329 get_rank (tree e)
330 {
331   /* Constants have rank 0.  */
332   if (is_gimple_min_invariant (e))
333     return 0;
334 
335   /* SSA_NAME's have the rank of the expression they are the result
336      of.
337      For globals and uninitialized values, the rank is 0.
338      For function arguments, use the pre-setup rank.
339      For PHI nodes, stores, asm statements, etc, we use the rank of
340      the BB.
341      For simple operations, the rank is the maximum rank of any of
342      its operands, or the bb_rank, whichever is less.
343      I make no claims that this is optimal, however, it gives good
344      results.  */
345 
346   /* We make an exception to the normal ranking system to break
347      dependences of accumulator variables in loops.  Suppose we
348      have a simple one-block loop containing:
349 
350        x_1 = phi(x_0, x_2)
351        b = a + x_1
352        c = b + d
353        x_2 = c + e
354 
355      As shown, each iteration of the calculation into x is fully
356      dependent upon the iteration before it.  We would prefer to
357      see this in the form:
358 
359        x_1 = phi(x_0, x_2)
360        b = a + d
361        c = b + e
362        x_2 = c + x_1
363 
364      If the loop is unrolled, the calculations of b and c from
365      different iterations can be interleaved.
366 
367      To obtain this result during reassociation, we bias the rank
368      of the phi definition x_1 upward, when it is recognized as an
369      accumulator pattern.  The artificial rank causes it to be
370      added last, providing the desired independence.  */
371 
372   if (TREE_CODE (e) == SSA_NAME)
373     {
374       gimple stmt;
375       long rank;
376       int i, n;
377       tree op;
378 
379       if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
380 	  && SSA_NAME_IS_DEFAULT_DEF (e))
381 	return find_operand_rank (e);
382 
383       stmt = SSA_NAME_DEF_STMT (e);
384       if (gimple_bb (stmt) == NULL)
385 	return 0;
386 
387       if (gimple_code (stmt) == GIMPLE_PHI)
388 	return phi_rank (stmt);
389 
390       if (!is_gimple_assign (stmt)
391 	  || gimple_vdef (stmt))
392 	return bb_rank[gimple_bb (stmt)->index];
393 
394       /* If we already have a rank for this expression, use that.  */
395       rank = find_operand_rank (e);
396       if (rank != -1)
397 	return rank;
398 
399       /* Otherwise, find the maximum rank for the operands.  As an
400 	 exception, remove the bias from loop-carried phis when propagating
401 	 the rank so that dependent operations are not also biased.  */
402       rank = 0;
403       if (gimple_assign_single_p (stmt))
404 	{
405 	  tree rhs = gimple_assign_rhs1 (stmt);
406 	  n = TREE_OPERAND_LENGTH (rhs);
407 	  if (n == 0)
408 	    rank = propagate_rank (rank, rhs);
409 	  else
410 	    {
411 	      for (i = 0; i < n; i++)
412 		{
413 		  op = TREE_OPERAND (rhs, i);
414 
415 		  if (op != NULL_TREE)
416 		    rank = propagate_rank (rank, op);
417 		}
418 	    }
419 	}
420       else
421 	{
422 	  n = gimple_num_ops (stmt);
423 	  for (i = 1; i < n; i++)
424 	    {
425 	      op = gimple_op (stmt, i);
426 	      gcc_assert (op);
427 	      rank = propagate_rank (rank, op);
428 	    }
429 	}
430 
431       if (dump_file && (dump_flags & TDF_DETAILS))
432 	{
433 	  fprintf (dump_file, "Rank for ");
434 	  print_generic_expr (dump_file, e, 0);
435 	  fprintf (dump_file, " is %ld\n", (rank + 1));
436 	}
437 
438       /* Note the rank in the hashtable so we don't recompute it.  */
439       insert_operand_rank (e, (rank + 1));
440       return (rank + 1);
441     }
442 
443   /* Globals, etc,  are rank 0 */
444   return 0;
445 }
446 
447 DEF_VEC_P(operand_entry_t);
448 DEF_VEC_ALLOC_P(operand_entry_t, heap);
449 
450 /* We want integer ones to end up last no matter what, since they are
451    the ones we can do the most with.  */
452 #define INTEGER_CONST_TYPE 1 << 3
453 #define FLOAT_CONST_TYPE 1 << 2
454 #define OTHER_CONST_TYPE 1 << 1
455 
456 /* Classify an invariant tree into integer, float, or other, so that
457    we can sort them to be near other constants of the same type.  */
458 static inline int
459 constant_type (tree t)
460 {
461   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
462     return INTEGER_CONST_TYPE;
463   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
464     return FLOAT_CONST_TYPE;
465   else
466     return OTHER_CONST_TYPE;
467 }
468 
469 /* qsort comparison function to sort operand entries PA and PB by rank
470    so that the sorted array is ordered by rank in decreasing order.  */
471 static int
472 sort_by_operand_rank (const void *pa, const void *pb)
473 {
474   const operand_entry_t oea = *(const operand_entry_t *)pa;
475   const operand_entry_t oeb = *(const operand_entry_t *)pb;
476 
477   /* It's nicer for optimize_expression if constants that are likely
478      to fold when added/multiplied//whatever are put next to each
479      other.  Since all constants have rank 0, order them by type.  */
480   if (oeb->rank == 0 &&  oea->rank == 0)
481     {
482       if (constant_type (oeb->op) != constant_type (oea->op))
483 	return constant_type (oeb->op) - constant_type (oea->op);
484       else
485 	/* To make sorting result stable, we use unique IDs to determine
486 	   order.  */
487         return oeb->id - oea->id;
488     }
489 
490   /* Lastly, make sure the versions that are the same go next to each
491      other.  We use SSA_NAME_VERSION because it's stable.  */
492   if ((oeb->rank - oea->rank == 0)
493       && TREE_CODE (oea->op) == SSA_NAME
494       && TREE_CODE (oeb->op) == SSA_NAME)
495     {
496       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
497 	return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
498       else
499 	return oeb->id - oea->id;
500     }
501 
502   if (oeb->rank != oea->rank)
503     return oeb->rank - oea->rank;
504   else
505     return oeb->id - oea->id;
506 }
507 
508 /* Add an operand entry to *OPS for the tree operand OP.  */
509 
510 static void
511 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
512 {
513   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
514 
515   oe->op = op;
516   oe->rank = get_rank (op);
517   oe->id = next_operand_entry_id++;
518   VEC_safe_push (operand_entry_t, heap, *ops, oe);
519 }
520 
521 /* Return true if STMT is reassociable operation containing a binary
522    operation with tree code CODE, and is inside LOOP.  */
523 
524 static bool
525 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
526 {
527   basic_block bb = gimple_bb (stmt);
528 
529   if (gimple_bb (stmt) == NULL)
530     return false;
531 
532   if (!flow_bb_inside_loop_p (loop, bb))
533     return false;
534 
535   if (is_gimple_assign (stmt)
536       && gimple_assign_rhs_code (stmt) == code
537       && has_single_use (gimple_assign_lhs (stmt)))
538     return true;
539 
540   return false;
541 }
542 
543 
544 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
545    operand of the negate operation.  Otherwise, return NULL.  */
546 
547 static tree
548 get_unary_op (tree name, enum tree_code opcode)
549 {
550   gimple stmt = SSA_NAME_DEF_STMT (name);
551 
552   if (!is_gimple_assign (stmt))
553     return NULL_TREE;
554 
555   if (gimple_assign_rhs_code (stmt) == opcode)
556     return gimple_assign_rhs1 (stmt);
557   return NULL_TREE;
558 }
559 
560 /* If CURR and LAST are a pair of ops that OPCODE allows us to
561    eliminate through equivalences, do so, remove them from OPS, and
562    return true.  Otherwise, return false.  */
563 
564 static bool
565 eliminate_duplicate_pair (enum tree_code opcode,
566 			  VEC (operand_entry_t, heap) **ops,
567 			  bool *all_done,
568 			  unsigned int i,
569 			  operand_entry_t curr,
570 			  operand_entry_t last)
571 {
572 
573   /* If we have two of the same op, and the opcode is & |, min, or max,
574      we can eliminate one of them.
575      If we have two of the same op, and the opcode is ^, we can
576      eliminate both of them.  */
577 
578   if (last && last->op == curr->op)
579     {
580       switch (opcode)
581 	{
582 	case MAX_EXPR:
583 	case MIN_EXPR:
584 	case BIT_IOR_EXPR:
585 	case BIT_AND_EXPR:
586 	  if (dump_file && (dump_flags & TDF_DETAILS))
587 	    {
588 	      fprintf (dump_file, "Equivalence: ");
589 	      print_generic_expr (dump_file, curr->op, 0);
590 	      fprintf (dump_file, " [&|minmax] ");
591 	      print_generic_expr (dump_file, last->op, 0);
592 	      fprintf (dump_file, " -> ");
593 	      print_generic_stmt (dump_file, last->op, 0);
594 	    }
595 
596 	  VEC_ordered_remove (operand_entry_t, *ops, i);
597 	  reassociate_stats.ops_eliminated ++;
598 
599 	  return true;
600 
601 	case BIT_XOR_EXPR:
602 	  if (dump_file && (dump_flags & TDF_DETAILS))
603 	    {
604 	      fprintf (dump_file, "Equivalence: ");
605 	      print_generic_expr (dump_file, curr->op, 0);
606 	      fprintf (dump_file, " ^ ");
607 	      print_generic_expr (dump_file, last->op, 0);
608 	      fprintf (dump_file, " -> nothing\n");
609 	    }
610 
611 	  reassociate_stats.ops_eliminated += 2;
612 
613 	  if (VEC_length (operand_entry_t, *ops) == 2)
614 	    {
615 	      VEC_free (operand_entry_t, heap, *ops);
616 	      *ops = NULL;
617 	      add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
618 	      *all_done = true;
619 	    }
620 	  else
621 	    {
622 	      VEC_ordered_remove (operand_entry_t, *ops, i-1);
623 	      VEC_ordered_remove (operand_entry_t, *ops, i-1);
624 	    }
625 
626 	  return true;
627 
628 	default:
629 	  break;
630 	}
631     }
632   return false;
633 }
634 
635 static VEC(tree, heap) *plus_negates;
636 
637 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
638    expression, look in OPS for a corresponding positive operation to cancel
639    it out.  If we find one, remove the other from OPS, replace
640    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
641    return false. */
642 
643 static bool
644 eliminate_plus_minus_pair (enum tree_code opcode,
645 			   VEC (operand_entry_t, heap) **ops,
646 			   unsigned int currindex,
647 			   operand_entry_t curr)
648 {
649   tree negateop;
650   tree notop;
651   unsigned int i;
652   operand_entry_t oe;
653 
654   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
655     return false;
656 
657   negateop = get_unary_op (curr->op, NEGATE_EXPR);
658   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
659   if (negateop == NULL_TREE && notop == NULL_TREE)
660     return false;
661 
662   /* Any non-negated version will have a rank that is one less than
663      the current rank.  So once we hit those ranks, if we don't find
664      one, we can stop.  */
665 
666   for (i = currindex + 1;
667        VEC_iterate (operand_entry_t, *ops, i, oe)
668        && oe->rank >= curr->rank - 1 ;
669        i++)
670     {
671       if (oe->op == negateop)
672 	{
673 
674 	  if (dump_file && (dump_flags & TDF_DETAILS))
675 	    {
676 	      fprintf (dump_file, "Equivalence: ");
677 	      print_generic_expr (dump_file, negateop, 0);
678 	      fprintf (dump_file, " + -");
679 	      print_generic_expr (dump_file, oe->op, 0);
680 	      fprintf (dump_file, " -> 0\n");
681 	    }
682 
683 	  VEC_ordered_remove (operand_entry_t, *ops, i);
684 	  add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
685 	  VEC_ordered_remove (operand_entry_t, *ops, currindex);
686 	  reassociate_stats.ops_eliminated ++;
687 
688 	  return true;
689 	}
690       else if (oe->op == notop)
691 	{
692 	  tree op_type = TREE_TYPE (oe->op);
693 
694 	  if (dump_file && (dump_flags & TDF_DETAILS))
695 	    {
696 	      fprintf (dump_file, "Equivalence: ");
697 	      print_generic_expr (dump_file, notop, 0);
698 	      fprintf (dump_file, " + ~");
699 	      print_generic_expr (dump_file, oe->op, 0);
700 	      fprintf (dump_file, " -> -1\n");
701 	    }
702 
703 	  VEC_ordered_remove (operand_entry_t, *ops, i);
704 	  add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
705 	  VEC_ordered_remove (operand_entry_t, *ops, currindex);
706 	  reassociate_stats.ops_eliminated ++;
707 
708 	  return true;
709 	}
710     }
711 
712   /* CURR->OP is a negate expr in a plus expr: save it for later
713      inspection in repropagate_negates().  */
714   if (negateop != NULL_TREE)
715     VEC_safe_push (tree, heap, plus_negates, curr->op);
716 
717   return false;
718 }
719 
720 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
721    bitwise not expression, look in OPS for a corresponding operand to
722    cancel it out.  If we find one, remove the other from OPS, replace
723    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
724    false. */
725 
726 static bool
727 eliminate_not_pairs (enum tree_code opcode,
728 		     VEC (operand_entry_t, heap) **ops,
729 		     unsigned int currindex,
730 		     operand_entry_t curr)
731 {
732   tree notop;
733   unsigned int i;
734   operand_entry_t oe;
735 
736   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
737       || TREE_CODE (curr->op) != SSA_NAME)
738     return false;
739 
740   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
741   if (notop == NULL_TREE)
742     return false;
743 
744   /* Any non-not version will have a rank that is one less than
745      the current rank.  So once we hit those ranks, if we don't find
746      one, we can stop.  */
747 
748   for (i = currindex + 1;
749        VEC_iterate (operand_entry_t, *ops, i, oe)
750        && oe->rank >= curr->rank - 1;
751        i++)
752     {
753       if (oe->op == notop)
754 	{
755 	  if (dump_file && (dump_flags & TDF_DETAILS))
756 	    {
757 	      fprintf (dump_file, "Equivalence: ");
758 	      print_generic_expr (dump_file, notop, 0);
759 	      if (opcode == BIT_AND_EXPR)
760 		fprintf (dump_file, " & ~");
761 	      else if (opcode == BIT_IOR_EXPR)
762 		fprintf (dump_file, " | ~");
763 	      print_generic_expr (dump_file, oe->op, 0);
764 	      if (opcode == BIT_AND_EXPR)
765 		fprintf (dump_file, " -> 0\n");
766 	      else if (opcode == BIT_IOR_EXPR)
767 		fprintf (dump_file, " -> -1\n");
768 	    }
769 
770 	  if (opcode == BIT_AND_EXPR)
771 	    oe->op = build_zero_cst (TREE_TYPE (oe->op));
772 	  else if (opcode == BIT_IOR_EXPR)
773 	    oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
774 					  TYPE_PRECISION (TREE_TYPE (oe->op)));
775 
776 	  reassociate_stats.ops_eliminated
777 	    += VEC_length (operand_entry_t, *ops) - 1;
778 	  VEC_free (operand_entry_t, heap, *ops);
779 	  *ops = NULL;
780 	  VEC_safe_push (operand_entry_t, heap, *ops, oe);
781 	  return true;
782 	}
783     }
784 
785   return false;
786 }
787 
788 /* Use constant value that may be present in OPS to try to eliminate
789    operands.  Note that this function is only really used when we've
790    eliminated ops for other reasons, or merged constants.  Across
791    single statements, fold already does all of this, plus more.  There
792    is little point in duplicating logic, so I've only included the
793    identities that I could ever construct testcases to trigger.  */
794 
795 static void
796 eliminate_using_constants (enum tree_code opcode,
797 			   VEC(operand_entry_t, heap) **ops)
798 {
799   operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
800   tree type = TREE_TYPE (oelast->op);
801 
802   if (oelast->rank == 0
803       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
804     {
805       switch (opcode)
806 	{
807 	case BIT_AND_EXPR:
808 	  if (integer_zerop (oelast->op))
809 	    {
810 	      if (VEC_length (operand_entry_t, *ops) != 1)
811 		{
812 		  if (dump_file && (dump_flags & TDF_DETAILS))
813 		    fprintf (dump_file, "Found & 0, removing all other ops\n");
814 
815 		  reassociate_stats.ops_eliminated
816 		    += VEC_length (operand_entry_t, *ops) - 1;
817 
818 		  VEC_free (operand_entry_t, heap, *ops);
819 		  *ops = NULL;
820 		  VEC_safe_push (operand_entry_t, heap, *ops, oelast);
821 		  return;
822 		}
823 	    }
824 	  else if (integer_all_onesp (oelast->op))
825 	    {
826 	      if (VEC_length (operand_entry_t, *ops) != 1)
827 		{
828 		  if (dump_file && (dump_flags & TDF_DETAILS))
829 		    fprintf (dump_file, "Found & -1, removing\n");
830 		  VEC_pop (operand_entry_t, *ops);
831 		  reassociate_stats.ops_eliminated++;
832 		}
833 	    }
834 	  break;
835 	case BIT_IOR_EXPR:
836 	  if (integer_all_onesp (oelast->op))
837 	    {
838 	      if (VEC_length (operand_entry_t, *ops) != 1)
839 		{
840 		  if (dump_file && (dump_flags & TDF_DETAILS))
841 		    fprintf (dump_file, "Found | -1, removing all other ops\n");
842 
843 		  reassociate_stats.ops_eliminated
844 		    += VEC_length (operand_entry_t, *ops) - 1;
845 
846 		  VEC_free (operand_entry_t, heap, *ops);
847 		  *ops = NULL;
848 		  VEC_safe_push (operand_entry_t, heap, *ops, oelast);
849 		  return;
850 		}
851 	    }
852 	  else if (integer_zerop (oelast->op))
853 	    {
854 	      if (VEC_length (operand_entry_t, *ops) != 1)
855 		{
856 		  if (dump_file && (dump_flags & TDF_DETAILS))
857 		    fprintf (dump_file, "Found | 0, removing\n");
858 		  VEC_pop (operand_entry_t, *ops);
859 		  reassociate_stats.ops_eliminated++;
860 		}
861 	    }
862 	  break;
863 	case MULT_EXPR:
864 	  if (integer_zerop (oelast->op)
865 	      || (FLOAT_TYPE_P (type)
866 		  && !HONOR_NANS (TYPE_MODE (type))
867 		  && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
868 		  && real_zerop (oelast->op)))
869 	    {
870 	      if (VEC_length (operand_entry_t, *ops) != 1)
871 		{
872 		  if (dump_file && (dump_flags & TDF_DETAILS))
873 		    fprintf (dump_file, "Found * 0, removing all other ops\n");
874 
875 		  reassociate_stats.ops_eliminated
876 		    += VEC_length (operand_entry_t, *ops) - 1;
877 		  VEC_free (operand_entry_t, heap, *ops);
878 		  *ops = NULL;
879 		  VEC_safe_push (operand_entry_t, heap, *ops, oelast);
880 		  return;
881 		}
882 	    }
883 	  else if (integer_onep (oelast->op)
884 		   || (FLOAT_TYPE_P (type)
885 		       && !HONOR_SNANS (TYPE_MODE (type))
886 		       && real_onep (oelast->op)))
887 	    {
888 	      if (VEC_length (operand_entry_t, *ops) != 1)
889 		{
890 		  if (dump_file && (dump_flags & TDF_DETAILS))
891 		    fprintf (dump_file, "Found * 1, removing\n");
892 		  VEC_pop (operand_entry_t, *ops);
893 		  reassociate_stats.ops_eliminated++;
894 		  return;
895 		}
896 	    }
897 	  break;
898 	case BIT_XOR_EXPR:
899 	case PLUS_EXPR:
900 	case MINUS_EXPR:
901 	  if (integer_zerop (oelast->op)
902 	      || (FLOAT_TYPE_P (type)
903 		  && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
904 		  && fold_real_zero_addition_p (type, oelast->op,
905 						opcode == MINUS_EXPR)))
906 	    {
907 	      if (VEC_length (operand_entry_t, *ops) != 1)
908 		{
909 		  if (dump_file && (dump_flags & TDF_DETAILS))
910 		    fprintf (dump_file, "Found [|^+] 0, removing\n");
911 		  VEC_pop (operand_entry_t, *ops);
912 		  reassociate_stats.ops_eliminated++;
913 		  return;
914 		}
915 	    }
916 	  break;
917 	default:
918 	  break;
919 	}
920     }
921 }
922 
923 
924 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
925 				 bool, bool);
926 
927 /* Structure for tracking and counting operands.  */
928 typedef struct oecount_s {
929   int cnt;
930   int id;
931   enum tree_code oecode;
932   tree op;
933 } oecount;
934 
935 DEF_VEC_O(oecount);
936 DEF_VEC_ALLOC_O(oecount,heap);
937 
938 /* The heap for the oecount hashtable and the sorted list of operands.  */
939 static VEC (oecount, heap) *cvec;
940 
941 /* Hash function for oecount.  */
942 
943 static hashval_t
944 oecount_hash (const void *p)
945 {
946   const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
947   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
948 }
949 
950 /* Comparison function for oecount.  */
951 
952 static int
953 oecount_eq (const void *p1, const void *p2)
954 {
955   const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
956   const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
957   return (c1->oecode == c2->oecode
958 	  && c1->op == c2->op);
959 }
960 
961 /* Comparison function for qsort sorting oecount elements by count.  */
962 
963 static int
964 oecount_cmp (const void *p1, const void *p2)
965 {
966   const oecount *c1 = (const oecount *)p1;
967   const oecount *c2 = (const oecount *)p2;
968   if (c1->cnt != c2->cnt)
969     return c1->cnt - c2->cnt;
970   else
971     /* If counts are identical, use unique IDs to stabilize qsort.  */
972     return c1->id - c2->id;
973 }
974 
975 /* Walks the linear chain with result *DEF searching for an operation
976    with operand OP and code OPCODE removing that from the chain.  *DEF
977    is updated if there is only one operand but no operation left.  */
978 
979 static void
980 zero_one_operation (tree *def, enum tree_code opcode, tree op)
981 {
982   gimple stmt = SSA_NAME_DEF_STMT (*def);
983 
984   do
985     {
986       tree name = gimple_assign_rhs1 (stmt);
987 
988       /* If this is the operation we look for and one of the operands
989          is ours simply propagate the other operand into the stmts
990 	 single use.  */
991       if (gimple_assign_rhs_code (stmt) == opcode
992 	  && (name == op
993 	      || gimple_assign_rhs2 (stmt) == op))
994 	{
995 	  gimple use_stmt;
996 	  use_operand_p use;
997 	  gimple_stmt_iterator gsi;
998 	  if (name == op)
999 	    name = gimple_assign_rhs2 (stmt);
1000 	  gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
1001 	  single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
1002 	  if (gimple_assign_lhs (stmt) == *def)
1003 	    *def = name;
1004 	  SET_USE (use, name);
1005 	  if (TREE_CODE (name) != SSA_NAME)
1006 	    update_stmt (use_stmt);
1007 	  gsi = gsi_for_stmt (stmt);
1008 	  gsi_remove (&gsi, true);
1009 	  release_defs (stmt);
1010 	  return;
1011 	}
1012 
1013       /* Continue walking the chain.  */
1014       gcc_assert (name != op
1015 		  && TREE_CODE (name) == SSA_NAME);
1016       stmt = SSA_NAME_DEF_STMT (name);
1017     }
1018   while (1);
1019 }
1020 
1021 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1022    the result.  Places the statement after the definition of either
1023    OP1 or OP2.  Returns the new statement.  */
1024 
1025 static gimple
1026 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
1027 {
1028   gimple op1def = NULL, op2def = NULL;
1029   gimple_stmt_iterator gsi;
1030   tree op;
1031   gimple sum;
1032 
1033   /* Create the addition statement.  */
1034   sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
1035   op = make_ssa_name (tmpvar, sum);
1036   gimple_assign_set_lhs (sum, op);
1037 
1038   /* Find an insertion place and insert.  */
1039   if (TREE_CODE (op1) == SSA_NAME)
1040     op1def = SSA_NAME_DEF_STMT (op1);
1041   if (TREE_CODE (op2) == SSA_NAME)
1042     op2def = SSA_NAME_DEF_STMT (op2);
1043   if ((!op1def || gimple_nop_p (op1def))
1044       && (!op2def || gimple_nop_p (op2def)))
1045     {
1046       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1047       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1048     }
1049   else if ((!op1def || gimple_nop_p (op1def))
1050 	   || (op2def && !gimple_nop_p (op2def)
1051 	       && stmt_dominates_stmt_p (op1def, op2def)))
1052     {
1053       if (gimple_code (op2def) == GIMPLE_PHI)
1054 	{
1055 	  gsi = gsi_after_labels (gimple_bb (op2def));
1056 	  gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1057 	}
1058       else
1059 	{
1060 	  if (!stmt_ends_bb_p (op2def))
1061 	    {
1062 	      gsi = gsi_for_stmt (op2def);
1063 	      gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1064 	    }
1065 	  else
1066 	    {
1067 	      edge e;
1068 	      edge_iterator ei;
1069 
1070 	      FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1071 		if (e->flags & EDGE_FALLTHRU)
1072 		  gsi_insert_on_edge_immediate (e, sum);
1073 	    }
1074 	}
1075     }
1076   else
1077     {
1078       if (gimple_code (op1def) == GIMPLE_PHI)
1079 	{
1080 	  gsi = gsi_after_labels (gimple_bb (op1def));
1081 	  gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1082 	}
1083       else
1084 	{
1085 	  if (!stmt_ends_bb_p (op1def))
1086 	    {
1087 	      gsi = gsi_for_stmt (op1def);
1088 	      gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1089 	    }
1090 	  else
1091 	    {
1092 	      edge e;
1093 	      edge_iterator ei;
1094 
1095 	      FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1096 		if (e->flags & EDGE_FALLTHRU)
1097 		  gsi_insert_on_edge_immediate (e, sum);
1098 	    }
1099 	}
1100     }
1101   update_stmt (sum);
1102 
1103   return sum;
1104 }
1105 
1106 /* Perform un-distribution of divisions and multiplications.
1107    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1108    to (A + B) / X for real X.
1109 
1110    The algorithm is organized as follows.
1111 
1112     - First we walk the addition chain *OPS looking for summands that
1113       are defined by a multiplication or a real division.  This results
1114       in the candidates bitmap with relevant indices into *OPS.
1115 
1116     - Second we build the chains of multiplications or divisions for
1117       these candidates, counting the number of occurences of (operand, code)
1118       pairs in all of the candidates chains.
1119 
1120     - Third we sort the (operand, code) pairs by number of occurence and
1121       process them starting with the pair with the most uses.
1122 
1123       * For each such pair we walk the candidates again to build a
1124         second candidate bitmap noting all multiplication/division chains
1125 	that have at least one occurence of (operand, code).
1126 
1127       * We build an alternate addition chain only covering these
1128         candidates with one (operand, code) operation removed from their
1129 	multiplication/division chain.
1130 
1131       * The first candidate gets replaced by the alternate addition chain
1132         multiplied/divided by the operand.
1133 
1134       * All candidate chains get disabled for further processing and
1135         processing of (operand, code) pairs continues.
1136 
1137   The alternate addition chains built are re-processed by the main
1138   reassociation algorithm which allows optimizing a * x * y + b * y * x
1139   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1140 
1141 static bool
1142 undistribute_ops_list (enum tree_code opcode,
1143 		       VEC (operand_entry_t, heap) **ops, struct loop *loop)
1144 {
1145   unsigned int length = VEC_length (operand_entry_t, *ops);
1146   operand_entry_t oe1;
1147   unsigned i, j;
1148   sbitmap candidates, candidates2;
1149   unsigned nr_candidates, nr_candidates2;
1150   sbitmap_iterator sbi0;
1151   VEC (operand_entry_t, heap) **subops;
1152   htab_t ctable;
1153   bool changed = false;
1154   int next_oecount_id = 0;
1155 
1156   if (length <= 1
1157       || opcode != PLUS_EXPR)
1158     return false;
1159 
1160   /* Build a list of candidates to process.  */
1161   candidates = sbitmap_alloc (length);
1162   sbitmap_zero (candidates);
1163   nr_candidates = 0;
1164   FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
1165     {
1166       enum tree_code dcode;
1167       gimple oe1def;
1168 
1169       if (TREE_CODE (oe1->op) != SSA_NAME)
1170 	continue;
1171       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1172       if (!is_gimple_assign (oe1def))
1173 	continue;
1174       dcode = gimple_assign_rhs_code (oe1def);
1175       if ((dcode != MULT_EXPR
1176 	   && dcode != RDIV_EXPR)
1177 	  || !is_reassociable_op (oe1def, dcode, loop))
1178 	continue;
1179 
1180       SET_BIT (candidates, i);
1181       nr_candidates++;
1182     }
1183 
1184   if (nr_candidates < 2)
1185     {
1186       sbitmap_free (candidates);
1187       return false;
1188     }
1189 
1190   if (dump_file && (dump_flags & TDF_DETAILS))
1191     {
1192       fprintf (dump_file, "searching for un-distribute opportunities ");
1193       print_generic_expr (dump_file,
1194 	VEC_index (operand_entry_t, *ops,
1195 		   sbitmap_first_set_bit (candidates))->op, 0);
1196       fprintf (dump_file, " %d\n", nr_candidates);
1197     }
1198 
1199   /* Build linearized sub-operand lists and the counting table.  */
1200   cvec = NULL;
1201   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1202   subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1203 		     VEC_length (operand_entry_t, *ops));
1204   EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1205     {
1206       gimple oedef;
1207       enum tree_code oecode;
1208       unsigned j;
1209 
1210       oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1211       oecode = gimple_assign_rhs_code (oedef);
1212       linearize_expr_tree (&subops[i], oedef,
1213 			   associative_tree_code (oecode), false);
1214 
1215       FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1216 	{
1217 	  oecount c;
1218 	  void **slot;
1219 	  size_t idx;
1220 	  c.oecode = oecode;
1221 	  c.cnt = 1;
1222 	  c.id = next_oecount_id++;
1223 	  c.op = oe1->op;
1224 	  VEC_safe_push (oecount, heap, cvec, &c);
1225 	  idx = VEC_length (oecount, cvec) + 41;
1226 	  slot = htab_find_slot (ctable, (void *)idx, INSERT);
1227 	  if (!*slot)
1228 	    {
1229 	      *slot = (void *)idx;
1230 	    }
1231 	  else
1232 	    {
1233 	      VEC_pop (oecount, cvec);
1234 	      VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1235 	    }
1236 	}
1237     }
1238   htab_delete (ctable);
1239 
1240   /* Sort the counting table.  */
1241   VEC_qsort (oecount, cvec, oecount_cmp);
1242 
1243   if (dump_file && (dump_flags & TDF_DETAILS))
1244     {
1245       oecount *c;
1246       fprintf (dump_file, "Candidates:\n");
1247       FOR_EACH_VEC_ELT (oecount, cvec, j, c)
1248 	{
1249 	  fprintf (dump_file, "  %u %s: ", c->cnt,
1250 		   c->oecode == MULT_EXPR
1251 		   ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1252 	  print_generic_expr (dump_file, c->op, 0);
1253 	  fprintf (dump_file, "\n");
1254 	}
1255     }
1256 
1257   /* Process the (operand, code) pairs in order of most occurence.  */
1258   candidates2 = sbitmap_alloc (length);
1259   while (!VEC_empty (oecount, cvec))
1260     {
1261       oecount *c = VEC_last (oecount, cvec);
1262       if (c->cnt < 2)
1263 	break;
1264 
1265       /* Now collect the operands in the outer chain that contain
1266          the common operand in their inner chain.  */
1267       sbitmap_zero (candidates2);
1268       nr_candidates2 = 0;
1269       EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1270 	{
1271 	  gimple oedef;
1272 	  enum tree_code oecode;
1273 	  unsigned j;
1274 	  tree op = VEC_index (operand_entry_t, *ops, i)->op;
1275 
1276 	  /* If we undistributed in this chain already this may be
1277 	     a constant.  */
1278 	  if (TREE_CODE (op) != SSA_NAME)
1279 	    continue;
1280 
1281 	  oedef = SSA_NAME_DEF_STMT (op);
1282 	  oecode = gimple_assign_rhs_code (oedef);
1283 	  if (oecode != c->oecode)
1284 	    continue;
1285 
1286 	  FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1287 	    {
1288 	      if (oe1->op == c->op)
1289 		{
1290 		  SET_BIT (candidates2, i);
1291 		  ++nr_candidates2;
1292 		  break;
1293 		}
1294 	    }
1295 	}
1296 
1297       if (nr_candidates2 >= 2)
1298 	{
1299 	  operand_entry_t oe1, oe2;
1300 	  tree tmpvar;
1301 	  gimple prod;
1302 	  int first = sbitmap_first_set_bit (candidates2);
1303 
1304 	  /* Build the new addition chain.  */
1305 	  oe1 = VEC_index (operand_entry_t, *ops, first);
1306 	  if (dump_file && (dump_flags & TDF_DETAILS))
1307 	    {
1308 	      fprintf (dump_file, "Building (");
1309 	      print_generic_expr (dump_file, oe1->op, 0);
1310 	    }
1311 	  tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
1312 	  add_referenced_var (tmpvar);
1313 	  zero_one_operation (&oe1->op, c->oecode, c->op);
1314 	  EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1315 	    {
1316 	      gimple sum;
1317 	      oe2 = VEC_index (operand_entry_t, *ops, i);
1318 	      if (dump_file && (dump_flags & TDF_DETAILS))
1319 		{
1320 		  fprintf (dump_file, " + ");
1321 		  print_generic_expr (dump_file, oe2->op, 0);
1322 		}
1323 	      zero_one_operation (&oe2->op, c->oecode, c->op);
1324 	      sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1325 	      oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1326 	      oe2->rank = 0;
1327 	      oe1->op = gimple_get_lhs (sum);
1328 	    }
1329 
1330 	  /* Apply the multiplication/division.  */
1331 	  prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1332 	  if (dump_file && (dump_flags & TDF_DETAILS))
1333 	    {
1334 	      fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1335 	      print_generic_expr (dump_file, c->op, 0);
1336 	      fprintf (dump_file, "\n");
1337 	    }
1338 
1339 	  /* Record it in the addition chain and disable further
1340 	     undistribution with this op.  */
1341 	  oe1->op = gimple_assign_lhs (prod);
1342 	  oe1->rank = get_rank (oe1->op);
1343 	  VEC_free (operand_entry_t, heap, subops[first]);
1344 
1345 	  changed = true;
1346 	}
1347 
1348       VEC_pop (oecount, cvec);
1349     }
1350 
1351   for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1352     VEC_free (operand_entry_t, heap, subops[i]);
1353   free (subops);
1354   VEC_free (oecount, heap, cvec);
1355   sbitmap_free (candidates);
1356   sbitmap_free (candidates2);
1357 
1358   return changed;
1359 }
1360 
1361 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1362    expression, examine the other OPS to see if any of them are comparisons
1363    of the same values, which we may be able to combine or eliminate.
1364    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1365 
1366 static bool
1367 eliminate_redundant_comparison (enum tree_code opcode,
1368 				VEC (operand_entry_t, heap) **ops,
1369 				unsigned int currindex,
1370 				operand_entry_t curr)
1371 {
1372   tree op1, op2;
1373   enum tree_code lcode, rcode;
1374   gimple def1, def2;
1375   int i;
1376   operand_entry_t oe;
1377 
1378   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1379     return false;
1380 
1381   /* Check that CURR is a comparison.  */
1382   if (TREE_CODE (curr->op) != SSA_NAME)
1383     return false;
1384   def1 = SSA_NAME_DEF_STMT (curr->op);
1385   if (!is_gimple_assign (def1))
1386     return false;
1387   lcode = gimple_assign_rhs_code (def1);
1388   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1389     return false;
1390   op1 = gimple_assign_rhs1 (def1);
1391   op2 = gimple_assign_rhs2 (def1);
1392 
1393   /* Now look for a similar comparison in the remaining OPS.  */
1394   for (i = currindex + 1;
1395        VEC_iterate (operand_entry_t, *ops, i, oe);
1396        i++)
1397     {
1398       tree t;
1399 
1400       if (TREE_CODE (oe->op) != SSA_NAME)
1401 	continue;
1402       def2 = SSA_NAME_DEF_STMT (oe->op);
1403       if (!is_gimple_assign (def2))
1404 	continue;
1405       rcode = gimple_assign_rhs_code (def2);
1406       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1407 	continue;
1408 
1409       /* If we got here, we have a match.  See if we can combine the
1410 	 two comparisons.  */
1411       if (opcode == BIT_IOR_EXPR)
1412 	t = maybe_fold_or_comparisons (lcode, op1, op2,
1413 				       rcode, gimple_assign_rhs1 (def2),
1414 				       gimple_assign_rhs2 (def2));
1415       else
1416 	t = maybe_fold_and_comparisons (lcode, op1, op2,
1417 					rcode, gimple_assign_rhs1 (def2),
1418 					gimple_assign_rhs2 (def2));
1419       if (!t)
1420 	continue;
1421 
1422       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1423 	 always give us a boolean_type_node value back.  If the original
1424 	 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1425 	 we need to convert.  */
1426       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1427 	t = fold_convert (TREE_TYPE (curr->op), t);
1428 
1429       if (TREE_CODE (t) != INTEGER_CST
1430 	  && !operand_equal_p (t, curr->op, 0))
1431 	{
1432 	  enum tree_code subcode;
1433 	  tree newop1, newop2;
1434 	  if (!COMPARISON_CLASS_P (t))
1435 	    continue;
1436 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1437 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1438 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1439 	  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1440 	    continue;
1441 	}
1442 
1443       if (dump_file && (dump_flags & TDF_DETAILS))
1444 	{
1445 	  fprintf (dump_file, "Equivalence: ");
1446 	  print_generic_expr (dump_file, curr->op, 0);
1447 	  fprintf (dump_file, " %s ", op_symbol_code (opcode));
1448 	  print_generic_expr (dump_file, oe->op, 0);
1449 	  fprintf (dump_file, " -> ");
1450 	  print_generic_expr (dump_file, t, 0);
1451 	  fprintf (dump_file, "\n");
1452 	}
1453 
1454       /* Now we can delete oe, as it has been subsumed by the new combined
1455          expression t.  */
1456       VEC_ordered_remove (operand_entry_t, *ops, i);
1457       reassociate_stats.ops_eliminated ++;
1458 
1459       /* If t is the same as curr->op, we're done.  Otherwise we must
1460 	 replace curr->op with t.  Special case is if we got a constant
1461 	 back, in which case we add it to the end instead of in place of
1462 	 the current entry.  */
1463       if (TREE_CODE (t) == INTEGER_CST)
1464 	{
1465 	  VEC_ordered_remove (operand_entry_t, *ops, currindex);
1466 	  add_to_ops_vec (ops, t);
1467 	}
1468       else if (!operand_equal_p (t, curr->op, 0))
1469 	{
1470 	  tree tmpvar;
1471 	  gimple sum;
1472 	  enum tree_code subcode;
1473 	  tree newop1;
1474 	  tree newop2;
1475 	  gcc_assert (COMPARISON_CLASS_P (t));
1476 	  tmpvar = create_tmp_var (TREE_TYPE (t), NULL);
1477 	  add_referenced_var (tmpvar);
1478 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1479 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1480 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1481 	  gcc_checking_assert (is_gimple_val (newop1)
1482 			       && is_gimple_val (newop2));
1483 	  sum = build_and_add_sum (tmpvar, newop1, newop2, subcode);
1484 	  curr->op = gimple_get_lhs (sum);
1485 	}
1486       return true;
1487     }
1488 
1489   return false;
1490 }
1491 
1492 /* Perform various identities and other optimizations on the list of
1493    operand entries, stored in OPS.  The tree code for the binary
1494    operation between all the operands is OPCODE.  */
1495 
1496 static void
1497 optimize_ops_list (enum tree_code opcode,
1498 		   VEC (operand_entry_t, heap) **ops)
1499 {
1500   unsigned int length = VEC_length (operand_entry_t, *ops);
1501   unsigned int i;
1502   operand_entry_t oe;
1503   operand_entry_t oelast = NULL;
1504   bool iterate = false;
1505 
1506   if (length == 1)
1507     return;
1508 
1509   oelast = VEC_last (operand_entry_t, *ops);
1510 
1511   /* If the last two are constants, pop the constants off, merge them
1512      and try the next two.  */
1513   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1514     {
1515       operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1516 
1517       if (oelm1->rank == 0
1518 	  && is_gimple_min_invariant (oelm1->op)
1519 	  && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1520 				       TREE_TYPE (oelast->op)))
1521 	{
1522 	  tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1523 				     oelm1->op, oelast->op);
1524 
1525 	  if (folded && is_gimple_min_invariant (folded))
1526 	    {
1527 	      if (dump_file && (dump_flags & TDF_DETAILS))
1528 		fprintf (dump_file, "Merging constants\n");
1529 
1530 	      VEC_pop (operand_entry_t, *ops);
1531 	      VEC_pop (operand_entry_t, *ops);
1532 
1533 	      add_to_ops_vec (ops, folded);
1534 	      reassociate_stats.constants_eliminated++;
1535 
1536 	      optimize_ops_list (opcode, ops);
1537 	      return;
1538 	    }
1539 	}
1540     }
1541 
1542   eliminate_using_constants (opcode, ops);
1543   oelast = NULL;
1544 
1545   for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1546     {
1547       bool done = false;
1548 
1549       if (eliminate_not_pairs (opcode, ops, i, oe))
1550 	return;
1551       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1552 	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1553 	  || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1554 	{
1555 	  if (done)
1556 	    return;
1557 	  iterate = true;
1558 	  oelast = NULL;
1559 	  continue;
1560 	}
1561       oelast = oe;
1562       i++;
1563     }
1564 
1565   length  = VEC_length (operand_entry_t, *ops);
1566   oelast = VEC_last (operand_entry_t, *ops);
1567 
1568   if (iterate)
1569     optimize_ops_list (opcode, ops);
1570 }
1571 
1572 /* The following functions are subroutines to optimize_range_tests and allow
1573    it to try to change a logical combination of comparisons into a range
1574    test.
1575 
1576    For example, both
1577 	X == 2 || X == 5 || X == 3 || X == 4
1578    and
1579 	X >= 2 && X <= 5
1580    are converted to
1581 	(unsigned) (X - 2) <= 3
1582 
1583    For more information see comments above fold_test_range in fold-const.c,
1584    this implementation is for GIMPLE.  */
1585 
1586 struct range_entry
1587 {
1588   tree exp;
1589   tree low;
1590   tree high;
1591   bool in_p;
1592   bool strict_overflow_p;
1593   unsigned int idx, next;
1594 };
1595 
1596 /* This is similar to make_range in fold-const.c, but on top of
1597    GIMPLE instead of trees.  */
1598 
1599 static void
1600 init_range_entry (struct range_entry *r, tree exp)
1601 {
1602   int in_p;
1603   tree low, high;
1604   bool is_bool, strict_overflow_p;
1605 
1606   r->exp = NULL_TREE;
1607   r->in_p = false;
1608   r->strict_overflow_p = false;
1609   r->low = NULL_TREE;
1610   r->high = NULL_TREE;
1611   if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
1612     return;
1613 
1614   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1615      and see if we can refine the range.  Some of the cases below may not
1616      happen, but it doesn't seem worth worrying about this.  We "continue"
1617      the outer loop when we've changed something; otherwise we "break"
1618      the switch, which will "break" the while.  */
1619   low = build_int_cst (TREE_TYPE (exp), 0);
1620   high = low;
1621   in_p = 0;
1622   strict_overflow_p = false;
1623   is_bool = false;
1624   if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1625     {
1626       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1627 	is_bool = true;
1628       else
1629 	return;
1630     }
1631   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1632     is_bool = true;
1633 
1634   while (1)
1635     {
1636       gimple stmt;
1637       enum tree_code code;
1638       tree arg0, arg1, exp_type;
1639       tree nexp;
1640       location_t loc;
1641 
1642       if (TREE_CODE (exp) != SSA_NAME)
1643 	break;
1644 
1645       stmt = SSA_NAME_DEF_STMT (exp);
1646       if (!is_gimple_assign (stmt))
1647 	break;
1648 
1649       code = gimple_assign_rhs_code (stmt);
1650       arg0 = gimple_assign_rhs1 (stmt);
1651       if (TREE_CODE (arg0) != SSA_NAME)
1652 	break;
1653       arg1 = gimple_assign_rhs2 (stmt);
1654       exp_type = TREE_TYPE (exp);
1655       loc = gimple_location (stmt);
1656       switch (code)
1657 	{
1658 	case BIT_NOT_EXPR:
1659 	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1660 	    {
1661 	      in_p = !in_p;
1662 	      exp = arg0;
1663 	      continue;
1664 	    }
1665 	  break;
1666 	case SSA_NAME:
1667 	  exp = arg0;
1668 	  continue;
1669 	CASE_CONVERT:
1670 	  if (is_bool)
1671 	    goto do_default;
1672 	  if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1673 	    {
1674 	      if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1675 		is_bool = true;
1676 	      else
1677 		return;
1678 	    }
1679 	  else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1680 	    is_bool = true;
1681 	  goto do_default;
1682 	case EQ_EXPR:
1683 	case NE_EXPR:
1684 	case LT_EXPR:
1685 	case LE_EXPR:
1686 	case GE_EXPR:
1687 	case GT_EXPR:
1688 	  is_bool = true;
1689 	  /* FALLTHRU */
1690 	default:
1691 	  if (!is_bool)
1692 	    return;
1693 	do_default:
1694 	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1695 				  &low, &high, &in_p,
1696 				  &strict_overflow_p);
1697 	  if (nexp != NULL_TREE)
1698 	    {
1699 	      exp = nexp;
1700 	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
1701 	      continue;
1702 	    }
1703 	  break;
1704 	}
1705       break;
1706     }
1707   if (is_bool)
1708     {
1709       r->exp = exp;
1710       r->in_p = in_p;
1711       r->low = low;
1712       r->high = high;
1713       r->strict_overflow_p = strict_overflow_p;
1714     }
1715 }
1716 
1717 /* Comparison function for qsort.  Sort entries
1718    without SSA_NAME exp first, then with SSA_NAMEs sorted
1719    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1720    by increasing ->low and if ->low is the same, by increasing
1721    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1722    maximum.  */
1723 
1724 static int
1725 range_entry_cmp (const void *a, const void *b)
1726 {
1727   const struct range_entry *p = (const struct range_entry *) a;
1728   const struct range_entry *q = (const struct range_entry *) b;
1729 
1730   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1731     {
1732       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1733 	{
1734 	  /* Group range_entries for the same SSA_NAME together.  */
1735 	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1736 	    return -1;
1737 	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1738 	    return 1;
1739 	  /* If ->low is different, NULL low goes first, then by
1740 	     ascending low.  */
1741 	  if (p->low != NULL_TREE)
1742 	    {
1743 	      if (q->low != NULL_TREE)
1744 		{
1745 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
1746 					  p->low, q->low);
1747 		  if (tem && integer_onep (tem))
1748 		    return -1;
1749 		  tem = fold_binary (GT_EXPR, boolean_type_node,
1750 				     p->low, q->low);
1751 		  if (tem && integer_onep (tem))
1752 		    return 1;
1753 		}
1754 	      else
1755 		return 1;
1756 	    }
1757 	  else if (q->low != NULL_TREE)
1758 	    return -1;
1759 	  /* If ->high is different, NULL high goes last, before that by
1760 	     ascending high.  */
1761 	  if (p->high != NULL_TREE)
1762 	    {
1763 	      if (q->high != NULL_TREE)
1764 		{
1765 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
1766 					  p->high, q->high);
1767 		  if (tem && integer_onep (tem))
1768 		    return -1;
1769 		  tem = fold_binary (GT_EXPR, boolean_type_node,
1770 				     p->high, q->high);
1771 		  if (tem && integer_onep (tem))
1772 		    return 1;
1773 		}
1774 	      else
1775 		return -1;
1776 	    }
1777 	  else if (p->high != NULL_TREE)
1778 	    return 1;
1779 	  /* If both ranges are the same, sort below by ascending idx.  */
1780 	}
1781       else
1782 	return 1;
1783     }
1784   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1785     return -1;
1786 
1787   if (p->idx < q->idx)
1788     return -1;
1789   else
1790     {
1791       gcc_checking_assert (p->idx > q->idx);
1792       return 1;
1793     }
1794 }
1795 
1796 /* Helper routine of optimize_range_test.
1797    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1798    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1799    OPCODE and OPS are arguments of optimize_range_tests.  Return
1800    true if the range merge has been successful.  */
1801 
1802 static bool
1803 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1804 		   unsigned int count, enum tree_code opcode,
1805 		   VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
1806 		   tree low, tree high, bool strict_overflow_p)
1807 {
1808   tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
1809   location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
1810   tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
1811   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1812   gimple_stmt_iterator gsi;
1813 
1814   if (tem == NULL_TREE)
1815     return false;
1816 
1817   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1818     warning_at (loc, OPT_Wstrict_overflow,
1819 		"assuming signed overflow does not occur "
1820 		"when simplifying range test");
1821 
1822   if (dump_file && (dump_flags & TDF_DETAILS))
1823     {
1824       struct range_entry *r;
1825       fprintf (dump_file, "Optimizing range tests ");
1826       print_generic_expr (dump_file, range->exp, 0);
1827       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1828       print_generic_expr (dump_file, range->low, 0);
1829       fprintf (dump_file, ", ");
1830       print_generic_expr (dump_file, range->high, 0);
1831       fprintf (dump_file, "]");
1832       for (r = otherrange; r < otherrange + count; r++)
1833 	{
1834 	  fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1835 	  print_generic_expr (dump_file, r->low, 0);
1836 	  fprintf (dump_file, ", ");
1837 	  print_generic_expr (dump_file, r->high, 0);
1838 	  fprintf (dump_file, "]");
1839 	}
1840       fprintf (dump_file, "\n into ");
1841       print_generic_expr (dump_file, tem, 0);
1842       fprintf (dump_file, "\n");
1843     }
1844 
1845   if (opcode == BIT_IOR_EXPR)
1846     tem = invert_truthvalue_loc (loc, tem);
1847 
1848   tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
1849   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
1850   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1851 				  GSI_SAME_STMT);
1852 
1853   VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
1854   range->exp = exp;
1855   range->low = low;
1856   range->high = high;
1857   range->in_p = in_p;
1858   range->strict_overflow_p = false;
1859 
1860   for (range = otherrange; range < otherrange + count; range++)
1861     {
1862       VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
1863       range->exp = NULL_TREE;
1864     }
1865   return true;
1866 }
1867 
1868 /* Optimize range tests, similarly how fold_range_test optimizes
1869    it on trees.  The tree code for the binary
1870    operation between all the operands is OPCODE.  */
1871 
1872 static void
1873 optimize_range_tests (enum tree_code opcode,
1874 		      VEC (operand_entry_t, heap) **ops)
1875 {
1876   unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
1877   operand_entry_t oe;
1878   struct range_entry *ranges;
1879   bool any_changes = false;
1880 
1881   if (length == 1)
1882     return;
1883 
1884   ranges = XNEWVEC (struct range_entry, length);
1885   for (i = 0; i < length; i++)
1886     {
1887       ranges[i].idx = i;
1888       init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
1889       /* For | invert it now, we will invert it again before emitting
1890 	 the optimized expression.  */
1891       if (opcode == BIT_IOR_EXPR)
1892 	ranges[i].in_p = !ranges[i].in_p;
1893     }
1894 
1895   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
1896   for (i = 0; i < length; i++)
1897     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
1898       break;
1899 
1900   /* Try to merge ranges.  */
1901   for (first = i; i < length; i++)
1902     {
1903       tree low = ranges[i].low;
1904       tree high = ranges[i].high;
1905       int in_p = ranges[i].in_p;
1906       bool strict_overflow_p = ranges[i].strict_overflow_p;
1907       int update_fail_count = 0;
1908 
1909       for (j = i + 1; j < length; j++)
1910 	{
1911 	  if (ranges[i].exp != ranges[j].exp)
1912 	    break;
1913 	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
1914 			     ranges[j].in_p, ranges[j].low, ranges[j].high))
1915 	    break;
1916 	  strict_overflow_p |= ranges[j].strict_overflow_p;
1917 	}
1918 
1919       if (j == i + 1)
1920 	continue;
1921 
1922       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
1923 			     ops, ranges[i].exp, in_p, low, high,
1924 			     strict_overflow_p))
1925 	{
1926 	  i = j - 1;
1927 	  any_changes = true;
1928 	}
1929       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
1930 	 while update_range_test would fail.  */
1931       else if (update_fail_count == 64)
1932 	i = j - 1;
1933       else
1934 	++update_fail_count;
1935     }
1936 
1937   /* Optimize X == CST1 || X == CST2
1938      if popcount (CST1 ^ CST2) == 1 into
1939      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
1940      Similarly for ranges.  E.g.
1941      X != 2 && X != 3 && X != 10 && X != 11
1942      will be transformed by the above loop into
1943      (X - 2U) <= 1U && (X - 10U) <= 1U
1944      and this loop can transform that into
1945      ((X & ~8) - 2U) <= 1U.  */
1946   for (i = first; i < length; i++)
1947     {
1948       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
1949 
1950       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
1951 	continue;
1952       type = TREE_TYPE (ranges[i].exp);
1953       if (!INTEGRAL_TYPE_P (type))
1954 	continue;
1955       lowi = ranges[i].low;
1956       if (lowi == NULL_TREE)
1957 	lowi = TYPE_MIN_VALUE (type);
1958       highi = ranges[i].high;
1959       if (highi == NULL_TREE)
1960 	continue;
1961       for (j = i + 1; j < length && j < i + 64; j++)
1962 	{
1963 	  if (ranges[j].exp == NULL_TREE)
1964 	    continue;
1965 	  if (ranges[i].exp != ranges[j].exp)
1966 	    break;
1967 	  if (ranges[j].in_p)
1968 	    continue;
1969 	  lowj = ranges[j].low;
1970 	  if (lowj == NULL_TREE)
1971 	    continue;
1972 	  highj = ranges[j].high;
1973 	  if (highj == NULL_TREE)
1974 	    highj = TYPE_MAX_VALUE (type);
1975 	  tem = fold_binary (GT_EXPR, boolean_type_node,
1976 			     lowj, highi);
1977 	  if (tem == NULL_TREE || !integer_onep (tem))
1978 	    continue;
1979 	  lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
1980 	  if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
1981 	    continue;
1982 	  gcc_checking_assert (!integer_zerop (lowxor));
1983 	  tem = fold_binary (MINUS_EXPR, type, lowxor,
1984 			     build_int_cst (type, 1));
1985 	  if (tem == NULL_TREE)
1986 	    continue;
1987 	  tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
1988 	  if (tem == NULL_TREE || !integer_zerop (tem))
1989 	    continue;
1990 	  highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
1991 	  if (!tree_int_cst_equal (lowxor, highxor))
1992 	    continue;
1993 	  tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
1994 	  exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
1995 	  lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
1996 	  highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
1997 	  if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
1998 				 ranges[i].in_p, lowj, highj,
1999 				 ranges[i].strict_overflow_p
2000 				 || ranges[j].strict_overflow_p))
2001 	    {
2002 	      any_changes = true;
2003 	      break;
2004 	    }
2005 	}
2006     }
2007 
2008   if (any_changes)
2009     {
2010       j = 0;
2011       FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
2012 	{
2013 	  if (oe->op == error_mark_node)
2014 	    continue;
2015 	  else if (i != j)
2016 	    VEC_replace (operand_entry_t, *ops, j, oe);
2017 	  j++;
2018 	}
2019       VEC_truncate (operand_entry_t, *ops, j);
2020     }
2021 
2022   XDELETEVEC (ranges);
2023 }
2024 
2025 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2026    of STMT in it's operands.  This is also known as a "destructive
2027    update" operation.  */
2028 
2029 static bool
2030 is_phi_for_stmt (gimple stmt, tree operand)
2031 {
2032   gimple def_stmt;
2033   tree lhs;
2034   use_operand_p arg_p;
2035   ssa_op_iter i;
2036 
2037   if (TREE_CODE (operand) != SSA_NAME)
2038     return false;
2039 
2040   lhs = gimple_assign_lhs (stmt);
2041 
2042   def_stmt = SSA_NAME_DEF_STMT (operand);
2043   if (gimple_code (def_stmt) != GIMPLE_PHI)
2044     return false;
2045 
2046   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2047     if (lhs == USE_FROM_PTR (arg_p))
2048       return true;
2049   return false;
2050 }
2051 
2052 /* Remove def stmt of VAR if VAR has zero uses and recurse
2053    on rhs1 operand if so.  */
2054 
2055 static void
2056 remove_visited_stmt_chain (tree var)
2057 {
2058   gimple stmt;
2059   gimple_stmt_iterator gsi;
2060 
2061   while (1)
2062     {
2063       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2064 	return;
2065       stmt = SSA_NAME_DEF_STMT (var);
2066       if (!is_gimple_assign (stmt)
2067 	  || !gimple_visited_p (stmt))
2068 	return;
2069       var = gimple_assign_rhs1 (stmt);
2070       gsi = gsi_for_stmt (stmt);
2071       gsi_remove (&gsi, true);
2072       release_defs (stmt);
2073     }
2074 }
2075 
2076 /* This function checks three consequtive operands in
2077    passed operands vector OPS starting from OPINDEX and
2078    swaps two operands if it is profitable for binary operation
2079    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2080 
2081    We pair ops with the same rank if possible.
2082 
2083    The alternative we try is to see if STMT is a destructive
2084    update style statement, which is like:
2085    b = phi (a, ...)
2086    a = c + b;
2087    In that case, we want to use the destructive update form to
2088    expose the possible vectorizer sum reduction opportunity.
2089    In that case, the third operand will be the phi node. This
2090    check is not performed if STMT is null.
2091 
2092    We could, of course, try to be better as noted above, and do a
2093    lot of work to try to find these opportunities in >3 operand
2094    cases, but it is unlikely to be worth it.  */
2095 
2096 static void
2097 swap_ops_for_binary_stmt (VEC(operand_entry_t, heap) * ops,
2098 			  unsigned int opindex, gimple stmt)
2099 {
2100   operand_entry_t oe1, oe2, oe3;
2101 
2102   oe1 = VEC_index (operand_entry_t, ops, opindex);
2103   oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2104   oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
2105 
2106   if ((oe1->rank == oe2->rank
2107        && oe2->rank != oe3->rank)
2108       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2109 	  && !is_phi_for_stmt (stmt, oe1->op)
2110 	  && !is_phi_for_stmt (stmt, oe2->op)))
2111     {
2112       struct operand_entry temp = *oe3;
2113       oe3->op = oe1->op;
2114       oe3->rank = oe1->rank;
2115       oe1->op = temp.op;
2116       oe1->rank= temp.rank;
2117     }
2118   else if ((oe1->rank == oe3->rank
2119 	    && oe2->rank != oe3->rank)
2120 	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
2121 	       && !is_phi_for_stmt (stmt, oe1->op)
2122 	       && !is_phi_for_stmt (stmt, oe3->op)))
2123     {
2124       struct operand_entry temp = *oe2;
2125       oe2->op = oe1->op;
2126       oe2->rank = oe1->rank;
2127       oe1->op = temp.op;
2128       oe1->rank= temp.rank;
2129     }
2130 }
2131 
2132 /* Recursively rewrite our linearized statements so that the operators
2133    match those in OPS[OPINDEX], putting the computation in rank
2134    order.  */
2135 
2136 static void
2137 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2138 		   VEC(operand_entry_t, heap) * ops, bool moved)
2139 {
2140   tree rhs1 = gimple_assign_rhs1 (stmt);
2141   tree rhs2 = gimple_assign_rhs2 (stmt);
2142   operand_entry_t oe;
2143 
2144   /* If we have three operands left, then we want to make sure the ones
2145      that get the double binary op are chosen wisely.  */
2146   if (opindex + 3 == VEC_length (operand_entry_t, ops))
2147     swap_ops_for_binary_stmt (ops, opindex, stmt);
2148 
2149   /* The final recursion case for this function is that you have
2150      exactly two operations left.
2151      If we had one exactly one op in the entire list to start with, we
2152      would have never called this function, and the tail recursion
2153      rewrites them one at a time.  */
2154   if (opindex + 2 == VEC_length (operand_entry_t, ops))
2155     {
2156       operand_entry_t oe1, oe2;
2157 
2158       oe1 = VEC_index (operand_entry_t, ops, opindex);
2159       oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2160 
2161       if (rhs1 != oe1->op || rhs2 != oe2->op)
2162 	{
2163 	  if (dump_file && (dump_flags & TDF_DETAILS))
2164 	    {
2165 	      fprintf (dump_file, "Transforming ");
2166 	      print_gimple_stmt (dump_file, stmt, 0, 0);
2167 	    }
2168 
2169 	  gimple_assign_set_rhs1 (stmt, oe1->op);
2170 	  gimple_assign_set_rhs2 (stmt, oe2->op);
2171 	  update_stmt (stmt);
2172 	  if (rhs1 != oe1->op && rhs1 != oe2->op)
2173 	    remove_visited_stmt_chain (rhs1);
2174 
2175 	  if (dump_file && (dump_flags & TDF_DETAILS))
2176 	    {
2177 	      fprintf (dump_file, " into ");
2178 	      print_gimple_stmt (dump_file, stmt, 0, 0);
2179 	    }
2180 
2181 	}
2182       return;
2183     }
2184 
2185   /* If we hit here, we should have 3 or more ops left.  */
2186   gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
2187 
2188   /* Rewrite the next operator.  */
2189   oe = VEC_index (operand_entry_t, ops, opindex);
2190 
2191   if (oe->op != rhs2)
2192     {
2193       if (!moved)
2194 	{
2195 	  gimple_stmt_iterator gsinow, gsirhs1;
2196 	  gimple stmt1 = stmt, stmt2;
2197 	  unsigned int count;
2198 
2199 	  gsinow = gsi_for_stmt (stmt);
2200 	  count = VEC_length (operand_entry_t, ops) - opindex - 2;
2201 	  while (count-- != 0)
2202 	    {
2203 	      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2204 	      gsirhs1 = gsi_for_stmt (stmt2);
2205 	      gsi_move_before (&gsirhs1, &gsinow);
2206 	      gsi_prev (&gsinow);
2207 	      stmt1 = stmt2;
2208 	    }
2209 	  moved = true;
2210 	}
2211 
2212       if (dump_file && (dump_flags & TDF_DETAILS))
2213 	{
2214 	  fprintf (dump_file, "Transforming ");
2215 	  print_gimple_stmt (dump_file, stmt, 0, 0);
2216 	}
2217 
2218       gimple_assign_set_rhs2 (stmt, oe->op);
2219       update_stmt (stmt);
2220 
2221       if (dump_file && (dump_flags & TDF_DETAILS))
2222 	{
2223 	  fprintf (dump_file, " into ");
2224 	  print_gimple_stmt (dump_file, stmt, 0, 0);
2225 	}
2226     }
2227   /* Recurse on the LHS of the binary operator, which is guaranteed to
2228      be the non-leaf side.  */
2229   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2230 }
2231 
2232 /* Find out how many cycles we need to compute statements chain.
2233    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2234    maximum number of independent statements we may execute per cycle.  */
2235 
2236 static int
2237 get_required_cycles (int ops_num, int cpu_width)
2238 {
2239   int res;
2240   int elog;
2241   unsigned int rest;
2242 
2243   /* While we have more than 2 * cpu_width operands
2244      we may reduce number of operands by cpu_width
2245      per cycle.  */
2246   res = ops_num / (2 * cpu_width);
2247 
2248   /* Remained operands count may be reduced twice per cycle
2249      until we have only one operand.  */
2250   rest = (unsigned)(ops_num - res * cpu_width);
2251   elog = exact_log2 (rest);
2252   if (elog >= 0)
2253     res += elog;
2254   else
2255     res += floor_log2 (rest) + 1;
2256 
2257   return res;
2258 }
2259 
2260 /* Returns an optimal number of registers to use for computation of
2261    given statements.  */
2262 
2263 static int
2264 get_reassociation_width (int ops_num, enum tree_code opc,
2265 			 enum machine_mode mode)
2266 {
2267   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2268   int width;
2269   int width_min;
2270   int cycles_best;
2271 
2272   if (param_width > 0)
2273     width = param_width;
2274   else
2275     width = targetm.sched.reassociation_width (opc, mode);
2276 
2277   if (width == 1)
2278     return width;
2279 
2280   /* Get the minimal time required for sequence computation.  */
2281   cycles_best = get_required_cycles (ops_num, width);
2282 
2283   /* Check if we may use less width and still compute sequence for
2284      the same time.  It will allow us to reduce registers usage.
2285      get_required_cycles is monotonically increasing with lower width
2286      so we can perform a binary search for the minimal width that still
2287      results in the optimal cycle count.  */
2288   width_min = 1;
2289   while (width > width_min)
2290     {
2291       int width_mid = (width + width_min) / 2;
2292 
2293       if (get_required_cycles (ops_num, width_mid) == cycles_best)
2294 	width = width_mid;
2295       else if (width_min < width_mid)
2296 	width_min = width_mid;
2297       else
2298 	break;
2299     }
2300 
2301   return width;
2302 }
2303 
2304 /* Recursively rewrite our linearized statements so that the operators
2305    match those in OPS[OPINDEX], putting the computation in rank
2306    order and trying to allow operations to be executed in
2307    parallel.  */
2308 
2309 static void
2310 rewrite_expr_tree_parallel (gimple stmt, int width,
2311 			    VEC(operand_entry_t, heap) * ops)
2312 {
2313   enum tree_code opcode = gimple_assign_rhs_code (stmt);
2314   int op_num = VEC_length (operand_entry_t, ops);
2315   gcc_assert (op_num > 0);
2316   int stmt_num = op_num - 1;
2317   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
2318   int op_index = op_num - 1;
2319   int stmt_index = 0;
2320   int ready_stmts_end = 0;
2321   int i = 0;
2322   tree last_rhs1 = gimple_assign_rhs1 (stmt);
2323   tree lhs_var;
2324 
2325   /* We start expression rewriting from the top statements.
2326      So, in this loop we create a full list of statements
2327      we will work with.  */
2328   stmts[stmt_num - 1] = stmt;
2329   for (i = stmt_num - 2; i >= 0; i--)
2330     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
2331 
2332   lhs_var = create_tmp_reg (TREE_TYPE (last_rhs1), NULL);
2333   add_referenced_var (lhs_var);
2334 
2335   for (i = 0; i < stmt_num; i++)
2336     {
2337       tree op1, op2;
2338 
2339       /* Determine whether we should use results of
2340 	 already handled statements or not.  */
2341       if (ready_stmts_end == 0
2342 	  && (i - stmt_index >= width || op_index < 1))
2343 	ready_stmts_end = i;
2344 
2345       /* Now we choose operands for the next statement.  Non zero
2346 	 value in ready_stmts_end means here that we should use
2347 	 the result of already generated statements as new operand.  */
2348       if (ready_stmts_end > 0)
2349 	{
2350 	  op1 = gimple_assign_lhs (stmts[stmt_index++]);
2351 	  if (ready_stmts_end > stmt_index)
2352 	    op2 = gimple_assign_lhs (stmts[stmt_index++]);
2353 	  else if (op_index >= 0)
2354 	    op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2355 	  else
2356 	    {
2357 	      gcc_assert (stmt_index < i);
2358 	      op2 = gimple_assign_lhs (stmts[stmt_index++]);
2359 	    }
2360 
2361 	  if (stmt_index >= ready_stmts_end)
2362 	    ready_stmts_end = 0;
2363 	}
2364       else
2365 	{
2366 	  if (op_index > 1)
2367 	    swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
2368 	  op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2369 	  op1 = VEC_index (operand_entry_t, ops, op_index--)->op;
2370 	}
2371 
2372       /* If we emit the last statement then we should put
2373 	 operands into the last statement.  It will also
2374 	 break the loop.  */
2375       if (op_index < 0 && stmt_index == i)
2376 	i = stmt_num - 1;
2377 
2378       if (dump_file && (dump_flags & TDF_DETAILS))
2379 	{
2380 	  fprintf (dump_file, "Transforming ");
2381 	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
2382 	}
2383 
2384       /* We keep original statement only for the last one.  All
2385 	 others are recreated.  */
2386       if (i == stmt_num - 1)
2387 	{
2388 	  gimple_assign_set_rhs1 (stmts[i], op1);
2389 	  gimple_assign_set_rhs2 (stmts[i], op2);
2390 	  update_stmt (stmts[i]);
2391 	}
2392       else
2393 	stmts[i] = build_and_add_sum (lhs_var, op1, op2, opcode);
2394 
2395       if (dump_file && (dump_flags & TDF_DETAILS))
2396 	{
2397 	  fprintf (dump_file, " into ");
2398 	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
2399 	}
2400     }
2401 
2402   remove_visited_stmt_chain (last_rhs1);
2403 }
2404 
2405 /* Transform STMT, which is really (A +B) + (C + D) into the left
2406    linear form, ((A+B)+C)+D.
2407    Recurse on D if necessary.  */
2408 
2409 static void
2410 linearize_expr (gimple stmt)
2411 {
2412   gimple_stmt_iterator gsinow, gsirhs;
2413   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2414   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2415   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2416   gimple newbinrhs = NULL;
2417   struct loop *loop = loop_containing_stmt (stmt);
2418 
2419   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
2420 	      && is_reassociable_op (binrhs, rhscode, loop));
2421 
2422   gsinow = gsi_for_stmt (stmt);
2423   gsirhs = gsi_for_stmt (binrhs);
2424   gsi_move_before (&gsirhs, &gsinow);
2425 
2426   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
2427   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
2428   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
2429 
2430   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
2431     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2432 
2433   if (dump_file && (dump_flags & TDF_DETAILS))
2434     {
2435       fprintf (dump_file, "Linearized: ");
2436       print_gimple_stmt (dump_file, stmt, 0, 0);
2437     }
2438 
2439   reassociate_stats.linearized++;
2440   update_stmt (binrhs);
2441   update_stmt (binlhs);
2442   update_stmt (stmt);
2443 
2444   gimple_set_visited (stmt, true);
2445   gimple_set_visited (binlhs, true);
2446   gimple_set_visited (binrhs, true);
2447 
2448   /* Tail recurse on the new rhs if it still needs reassociation.  */
2449   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
2450     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
2451 	   want to change the algorithm while converting to tuples.  */
2452     linearize_expr (stmt);
2453 }
2454 
2455 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
2456    it.  Otherwise, return NULL.  */
2457 
2458 static gimple
2459 get_single_immediate_use (tree lhs)
2460 {
2461   use_operand_p immuse;
2462   gimple immusestmt;
2463 
2464   if (TREE_CODE (lhs) == SSA_NAME
2465       && single_imm_use (lhs, &immuse, &immusestmt)
2466       && is_gimple_assign (immusestmt))
2467     return immusestmt;
2468 
2469   return NULL;
2470 }
2471 
2472 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
2473    representing the negated value.  Insertions of any necessary
2474    instructions go before GSI.
2475    This function is recursive in that, if you hand it "a_5" as the
2476    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
2477    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
2478 
2479 static tree
2480 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
2481 {
2482   gimple negatedefstmt= NULL;
2483   tree resultofnegate;
2484 
2485   /* If we are trying to negate a name, defined by an add, negate the
2486      add operands instead.  */
2487   if (TREE_CODE (tonegate) == SSA_NAME)
2488     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
2489   if (TREE_CODE (tonegate) == SSA_NAME
2490       && is_gimple_assign (negatedefstmt)
2491       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
2492       && has_single_use (gimple_assign_lhs (negatedefstmt))
2493       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
2494     {
2495       gimple_stmt_iterator gsi;
2496       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
2497       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
2498 
2499       gsi = gsi_for_stmt (negatedefstmt);
2500       rhs1 = negate_value (rhs1, &gsi);
2501       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
2502 
2503       gsi = gsi_for_stmt (negatedefstmt);
2504       rhs2 = negate_value (rhs2, &gsi);
2505       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
2506 
2507       update_stmt (negatedefstmt);
2508       return gimple_assign_lhs (negatedefstmt);
2509     }
2510 
2511   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
2512   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
2513 					     NULL_TREE, true, GSI_SAME_STMT);
2514   return resultofnegate;
2515 }
2516 
2517 /* Return true if we should break up the subtract in STMT into an add
2518    with negate.  This is true when we the subtract operands are really
2519    adds, or the subtract itself is used in an add expression.  In
2520    either case, breaking up the subtract into an add with negate
2521    exposes the adds to reassociation.  */
2522 
2523 static bool
2524 should_break_up_subtract (gimple stmt)
2525 {
2526   tree lhs = gimple_assign_lhs (stmt);
2527   tree binlhs = gimple_assign_rhs1 (stmt);
2528   tree binrhs = gimple_assign_rhs2 (stmt);
2529   gimple immusestmt;
2530   struct loop *loop = loop_containing_stmt (stmt);
2531 
2532   if (TREE_CODE (binlhs) == SSA_NAME
2533       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
2534     return true;
2535 
2536   if (TREE_CODE (binrhs) == SSA_NAME
2537       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
2538     return true;
2539 
2540   if (TREE_CODE (lhs) == SSA_NAME
2541       && (immusestmt = get_single_immediate_use (lhs))
2542       && is_gimple_assign (immusestmt)
2543       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
2544 	  ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
2545     return true;
2546   return false;
2547 }
2548 
2549 /* Transform STMT from A - B into A + -B.  */
2550 
2551 static void
2552 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
2553 {
2554   tree rhs1 = gimple_assign_rhs1 (stmt);
2555   tree rhs2 = gimple_assign_rhs2 (stmt);
2556 
2557   if (dump_file && (dump_flags & TDF_DETAILS))
2558     {
2559       fprintf (dump_file, "Breaking up subtract ");
2560       print_gimple_stmt (dump_file, stmt, 0, 0);
2561     }
2562 
2563   rhs2 = negate_value (rhs2, gsip);
2564   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
2565   update_stmt (stmt);
2566 }
2567 
2568 /* Recursively linearize a binary expression that is the RHS of STMT.
2569    Place the operands of the expression tree in the vector named OPS.  */
2570 
2571 static void
2572 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
2573 		     bool is_associative, bool set_visited)
2574 {
2575   tree binlhs = gimple_assign_rhs1 (stmt);
2576   tree binrhs = gimple_assign_rhs2 (stmt);
2577   gimple binlhsdef, binrhsdef;
2578   bool binlhsisreassoc = false;
2579   bool binrhsisreassoc = false;
2580   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2581   struct loop *loop = loop_containing_stmt (stmt);
2582 
2583   if (set_visited)
2584     gimple_set_visited (stmt, true);
2585 
2586   if (TREE_CODE (binlhs) == SSA_NAME)
2587     {
2588       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
2589       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
2590 			 && !stmt_could_throw_p (binlhsdef));
2591     }
2592 
2593   if (TREE_CODE (binrhs) == SSA_NAME)
2594     {
2595       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
2596       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
2597 			 && !stmt_could_throw_p (binrhsdef));
2598     }
2599 
2600   /* If the LHS is not reassociable, but the RHS is, we need to swap
2601      them.  If neither is reassociable, there is nothing we can do, so
2602      just put them in the ops vector.  If the LHS is reassociable,
2603      linearize it.  If both are reassociable, then linearize the RHS
2604      and the LHS.  */
2605 
2606   if (!binlhsisreassoc)
2607     {
2608       tree temp;
2609 
2610       /* If this is not a associative operation like division, give up.  */
2611       if (!is_associative)
2612 	{
2613 	  add_to_ops_vec (ops, binrhs);
2614 	  return;
2615 	}
2616 
2617       if (!binrhsisreassoc)
2618 	{
2619 	  add_to_ops_vec (ops, binrhs);
2620 	  add_to_ops_vec (ops, binlhs);
2621 	  return;
2622 	}
2623 
2624       if (dump_file && (dump_flags & TDF_DETAILS))
2625 	{
2626 	  fprintf (dump_file, "swapping operands of ");
2627 	  print_gimple_stmt (dump_file, stmt, 0, 0);
2628 	}
2629 
2630       swap_tree_operands (stmt,
2631 			  gimple_assign_rhs1_ptr (stmt),
2632 			  gimple_assign_rhs2_ptr (stmt));
2633       update_stmt (stmt);
2634 
2635       if (dump_file && (dump_flags & TDF_DETAILS))
2636 	{
2637 	  fprintf (dump_file, " is now ");
2638 	  print_gimple_stmt (dump_file, stmt, 0, 0);
2639 	}
2640 
2641       /* We want to make it so the lhs is always the reassociative op,
2642 	 so swap.  */
2643       temp = binlhs;
2644       binlhs = binrhs;
2645       binrhs = temp;
2646     }
2647   else if (binrhsisreassoc)
2648     {
2649       linearize_expr (stmt);
2650       binlhs = gimple_assign_rhs1 (stmt);
2651       binrhs = gimple_assign_rhs2 (stmt);
2652     }
2653 
2654   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
2655 	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
2656 				      rhscode, loop));
2657   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
2658 		       is_associative, set_visited);
2659   add_to_ops_vec (ops, binrhs);
2660 }
2661 
2662 /* Repropagate the negates back into subtracts, since no other pass
2663    currently does it.  */
2664 
2665 static void
2666 repropagate_negates (void)
2667 {
2668   unsigned int i = 0;
2669   tree negate;
2670 
2671   FOR_EACH_VEC_ELT (tree, plus_negates, i, negate)
2672     {
2673       gimple user = get_single_immediate_use (negate);
2674 
2675       if (!user || !is_gimple_assign (user))
2676 	continue;
2677 
2678       /* The negate operand can be either operand of a PLUS_EXPR
2679 	 (it can be the LHS if the RHS is a constant for example).
2680 
2681 	 Force the negate operand to the RHS of the PLUS_EXPR, then
2682 	 transform the PLUS_EXPR into a MINUS_EXPR.  */
2683       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
2684 	{
2685 	  /* If the negated operand appears on the LHS of the
2686 	     PLUS_EXPR, exchange the operands of the PLUS_EXPR
2687 	     to force the negated operand to the RHS of the PLUS_EXPR.  */
2688 	  if (gimple_assign_rhs1 (user) == negate)
2689 	    {
2690 	      swap_tree_operands (user,
2691 				  gimple_assign_rhs1_ptr (user),
2692 				  gimple_assign_rhs2_ptr (user));
2693 	    }
2694 
2695 	  /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
2696 	     the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
2697 	  if (gimple_assign_rhs2 (user) == negate)
2698 	    {
2699 	      tree rhs1 = gimple_assign_rhs1 (user);
2700 	      tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
2701 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
2702 	      gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
2703 	      update_stmt (user);
2704 	    }
2705 	}
2706       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
2707 	{
2708 	  if (gimple_assign_rhs1 (user) == negate)
2709 	    {
2710 	      /* We have
2711 	           x = -a
2712 		   y = x - b
2713 		 which we transform into
2714 		   x = a + b
2715 		   y = -x .
2716 		 This pushes down the negate which we possibly can merge
2717 		 into some other operation, hence insert it into the
2718 		 plus_negates vector.  */
2719 	      gimple feed = SSA_NAME_DEF_STMT (negate);
2720 	      tree a = gimple_assign_rhs1 (feed);
2721 	      tree rhs2 = gimple_assign_rhs2 (user);
2722 	      gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
2723 	      gimple_replace_lhs (feed, negate);
2724 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
2725 	      update_stmt (gsi_stmt (gsi));
2726 	      gsi2 = gsi_for_stmt (user);
2727 	      gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
2728 	      update_stmt (gsi_stmt (gsi2));
2729 	      gsi_move_before (&gsi, &gsi2);
2730 	      VEC_safe_push (tree, heap, plus_negates,
2731 			     gimple_assign_lhs (gsi_stmt (gsi2)));
2732 	    }
2733 	  else
2734 	    {
2735 	      /* Transform "x = -a; y = b - x" into "y = b + a", getting
2736 	         rid of one operation.  */
2737 	      gimple feed = SSA_NAME_DEF_STMT (negate);
2738 	      tree a = gimple_assign_rhs1 (feed);
2739 	      tree rhs1 = gimple_assign_rhs1 (user);
2740 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
2741 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
2742 	      update_stmt (gsi_stmt (gsi));
2743 	    }
2744 	}
2745     }
2746 }
2747 
2748 /* Returns true if OP is of a type for which we can do reassociation.
2749    That is for integral or non-saturating fixed-point types, and for
2750    floating point type when associative-math is enabled.  */
2751 
2752 static bool
2753 can_reassociate_p (tree op)
2754 {
2755   tree type = TREE_TYPE (op);
2756   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
2757       || NON_SAT_FIXED_POINT_TYPE_P (type)
2758       || (flag_associative_math && FLOAT_TYPE_P (type)))
2759     return true;
2760   return false;
2761 }
2762 
2763 /* Break up subtract operations in block BB.
2764 
2765    We do this top down because we don't know whether the subtract is
2766    part of a possible chain of reassociation except at the top.
2767 
2768    IE given
2769    d = f + g
2770    c = a + e
2771    b = c - d
2772    q = b - r
2773    k = t - q
2774 
2775    we want to break up k = t - q, but we won't until we've transformed q
2776    = b - r, which won't be broken up until we transform b = c - d.
2777 
2778    En passant, clear the GIMPLE visited flag on every statement.  */
2779 
2780 static void
2781 break_up_subtract_bb (basic_block bb)
2782 {
2783   gimple_stmt_iterator gsi;
2784   basic_block son;
2785 
2786   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2787     {
2788       gimple stmt = gsi_stmt (gsi);
2789       gimple_set_visited (stmt, false);
2790 
2791       if (!is_gimple_assign (stmt)
2792 	  || !can_reassociate_p (gimple_assign_lhs (stmt)))
2793 	continue;
2794 
2795       /* Look for simple gimple subtract operations.  */
2796       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
2797 	{
2798 	  if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
2799 	      || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
2800 	    continue;
2801 
2802 	  /* Check for a subtract used only in an addition.  If this
2803 	     is the case, transform it into add of a negate for better
2804 	     reassociation.  IE transform C = A-B into C = A + -B if C
2805 	     is only used in an addition.  */
2806 	  if (should_break_up_subtract (stmt))
2807 	    break_up_subtract (stmt, &gsi);
2808 	}
2809       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
2810 	       && can_reassociate_p (gimple_assign_rhs1 (stmt)))
2811 	VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
2812     }
2813   for (son = first_dom_son (CDI_DOMINATORS, bb);
2814        son;
2815        son = next_dom_son (CDI_DOMINATORS, son))
2816     break_up_subtract_bb (son);
2817 }
2818 
2819 /* Reassociate expressions in basic block BB and its post-dominator as
2820    children.  */
2821 
2822 static void
2823 reassociate_bb (basic_block bb)
2824 {
2825   gimple_stmt_iterator gsi;
2826   basic_block son;
2827 
2828   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2829     {
2830       gimple stmt = gsi_stmt (gsi);
2831 
2832       if (is_gimple_assign (stmt)
2833 	  && !stmt_could_throw_p (stmt))
2834 	{
2835 	  tree lhs, rhs1, rhs2;
2836 	  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2837 
2838 	  /* If this is not a gimple binary expression, there is
2839 	     nothing for us to do with it.  */
2840 	  if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
2841 	    continue;
2842 
2843 	  /* If this was part of an already processed statement,
2844 	     we don't need to touch it again. */
2845 	  if (gimple_visited_p (stmt))
2846 	    {
2847 	      /* This statement might have become dead because of previous
2848 		 reassociations.  */
2849 	      if (has_zero_uses (gimple_get_lhs (stmt)))
2850 		{
2851 		  gsi_remove (&gsi, true);
2852 		  release_defs (stmt);
2853 		  /* We might end up removing the last stmt above which
2854 		     places the iterator to the end of the sequence.
2855 		     Reset it to the last stmt in this case which might
2856 		     be the end of the sequence as well if we removed
2857 		     the last statement of the sequence.  In which case
2858 		     we need to bail out.  */
2859 		  if (gsi_end_p (gsi))
2860 		    {
2861 		      gsi = gsi_last_bb (bb);
2862 		      if (gsi_end_p (gsi))
2863 			break;
2864 		    }
2865 		}
2866 	      continue;
2867 	    }
2868 
2869 	  lhs = gimple_assign_lhs (stmt);
2870 	  rhs1 = gimple_assign_rhs1 (stmt);
2871 	  rhs2 = gimple_assign_rhs2 (stmt);
2872 
2873 	  /* For non-bit or min/max operations we can't associate
2874 	     all types.  Verify that here.  */
2875 	  if (rhs_code != BIT_IOR_EXPR
2876 	      && rhs_code != BIT_AND_EXPR
2877 	      && rhs_code != BIT_XOR_EXPR
2878 	      && rhs_code != MIN_EXPR
2879 	      && rhs_code != MAX_EXPR
2880 	      && (!can_reassociate_p (lhs)
2881 		  || !can_reassociate_p (rhs1)
2882 		  || !can_reassociate_p (rhs2)))
2883 	    continue;
2884 
2885 	  if (associative_tree_code (rhs_code))
2886 	    {
2887 	      VEC(operand_entry_t, heap) *ops = NULL;
2888 
2889 	      /* There may be no immediate uses left by the time we
2890 		 get here because we may have eliminated them all.  */
2891 	      if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
2892 		continue;
2893 
2894 	      gimple_set_visited (stmt, true);
2895 	      linearize_expr_tree (&ops, stmt, true, true);
2896 	      VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2897 	      optimize_ops_list (rhs_code, &ops);
2898 	      if (undistribute_ops_list (rhs_code, &ops,
2899 					 loop_containing_stmt (stmt)))
2900 		{
2901 		  VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2902 		  optimize_ops_list (rhs_code, &ops);
2903 		}
2904 
2905 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
2906 		optimize_range_tests (rhs_code, &ops);
2907 
2908 	      if (VEC_length (operand_entry_t, ops) == 1)
2909 		{
2910 		  if (dump_file && (dump_flags & TDF_DETAILS))
2911 		    {
2912 		      fprintf (dump_file, "Transforming ");
2913 		      print_gimple_stmt (dump_file, stmt, 0, 0);
2914 		    }
2915 
2916 		  rhs1 = gimple_assign_rhs1 (stmt);
2917 		  gimple_assign_set_rhs_from_tree (&gsi,
2918 						   VEC_last (operand_entry_t,
2919 							     ops)->op);
2920 		  update_stmt (stmt);
2921 		  remove_visited_stmt_chain (rhs1);
2922 
2923 		  if (dump_file && (dump_flags & TDF_DETAILS))
2924 		    {
2925 		      fprintf (dump_file, " into ");
2926 		      print_gimple_stmt (dump_file, stmt, 0, 0);
2927 		    }
2928 		}
2929 	      else
2930 		{
2931 		  enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2932 		  int ops_num = VEC_length (operand_entry_t, ops);
2933 		  int width = get_reassociation_width (ops_num, rhs_code, mode);
2934 
2935 		  if (dump_file && (dump_flags & TDF_DETAILS))
2936 		    fprintf (dump_file,
2937 			     "Width = %d was chosen for reassociation\n", width);
2938 
2939 		  if (width > 1
2940 		      && VEC_length (operand_entry_t, ops) > 3)
2941 		    rewrite_expr_tree_parallel (stmt, width, ops);
2942 		  else
2943 		    rewrite_expr_tree (stmt, 0, ops, false);
2944 		}
2945 
2946 	      VEC_free (operand_entry_t, heap, ops);
2947 	    }
2948 	}
2949     }
2950   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
2951        son;
2952        son = next_dom_son (CDI_POST_DOMINATORS, son))
2953     reassociate_bb (son);
2954 }
2955 
2956 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
2957 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
2958 
2959 /* Dump the operand entry vector OPS to FILE.  */
2960 
2961 void
2962 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
2963 {
2964   operand_entry_t oe;
2965   unsigned int i;
2966 
2967   FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe)
2968     {
2969       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
2970       print_generic_expr (file, oe->op, 0);
2971     }
2972 }
2973 
2974 /* Dump the operand entry vector OPS to STDERR.  */
2975 
2976 DEBUG_FUNCTION void
2977 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
2978 {
2979   dump_ops_vector (stderr, ops);
2980 }
2981 
2982 static void
2983 do_reassoc (void)
2984 {
2985   break_up_subtract_bb (ENTRY_BLOCK_PTR);
2986   reassociate_bb (EXIT_BLOCK_PTR);
2987 }
2988 
2989 /* Initialize the reassociation pass.  */
2990 
2991 static void
2992 init_reassoc (void)
2993 {
2994   int i;
2995   long rank = 2;
2996   tree param;
2997   int *bbs = XNEWVEC (int, last_basic_block + 1);
2998 
2999   /* Find the loops, so that we can prevent moving calculations in
3000      them.  */
3001   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3002 
3003   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
3004 
3005   operand_entry_pool = create_alloc_pool ("operand entry pool",
3006 					  sizeof (struct operand_entry), 30);
3007   next_operand_entry_id = 0;
3008 
3009   /* Reverse RPO (Reverse Post Order) will give us something where
3010      deeper loops come later.  */
3011   pre_and_rev_post_order_compute (NULL, bbs, false);
3012   bb_rank = XCNEWVEC (long, last_basic_block + 1);
3013   operand_rank = pointer_map_create ();
3014 
3015   /* Give each argument a distinct rank.   */
3016   for (param = DECL_ARGUMENTS (current_function_decl);
3017        param;
3018        param = DECL_CHAIN (param))
3019     {
3020       if (gimple_default_def (cfun, param) != NULL)
3021 	{
3022 	  tree def = gimple_default_def (cfun, param);
3023 	  insert_operand_rank (def, ++rank);
3024 	}
3025     }
3026 
3027   /* Give the chain decl a distinct rank. */
3028   if (cfun->static_chain_decl != NULL)
3029     {
3030       tree def = gimple_default_def (cfun, cfun->static_chain_decl);
3031       if (def != NULL)
3032 	insert_operand_rank (def, ++rank);
3033     }
3034 
3035   /* Set up rank for each BB  */
3036   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
3037     bb_rank[bbs[i]] = ++rank  << 16;
3038 
3039   free (bbs);
3040   calculate_dominance_info (CDI_POST_DOMINATORS);
3041   plus_negates = NULL;
3042 }
3043 
3044 /* Cleanup after the reassociation pass, and print stats if
3045    requested.  */
3046 
3047 static void
3048 fini_reassoc (void)
3049 {
3050   statistics_counter_event (cfun, "Linearized",
3051 			    reassociate_stats.linearized);
3052   statistics_counter_event (cfun, "Constants eliminated",
3053 			    reassociate_stats.constants_eliminated);
3054   statistics_counter_event (cfun, "Ops eliminated",
3055 			    reassociate_stats.ops_eliminated);
3056   statistics_counter_event (cfun, "Statements rewritten",
3057 			    reassociate_stats.rewritten);
3058 
3059   pointer_map_destroy (operand_rank);
3060   free_alloc_pool (operand_entry_pool);
3061   free (bb_rank);
3062   VEC_free (tree, heap, plus_negates);
3063   free_dominance_info (CDI_POST_DOMINATORS);
3064   loop_optimizer_finalize ();
3065 }
3066 
3067 /* Gate and execute functions for Reassociation.  */
3068 
3069 static unsigned int
3070 execute_reassoc (void)
3071 {
3072   init_reassoc ();
3073 
3074   do_reassoc ();
3075   repropagate_negates ();
3076 
3077   fini_reassoc ();
3078   return 0;
3079 }
3080 
3081 static bool
3082 gate_tree_ssa_reassoc (void)
3083 {
3084   return flag_tree_reassoc != 0;
3085 }
3086 
3087 struct gimple_opt_pass pass_reassoc =
3088 {
3089  {
3090   GIMPLE_PASS,
3091   "reassoc",				/* name */
3092   gate_tree_ssa_reassoc,		/* gate */
3093   execute_reassoc,			/* execute */
3094   NULL,					/* sub */
3095   NULL,					/* next */
3096   0,					/* static_pass_number */
3097   TV_TREE_REASSOC,			/* tv_id */
3098   PROP_cfg | PROP_ssa,			/* properties_required */
3099   0,					/* properties_provided */
3100   0,					/* properties_destroyed */
3101   0,					/* todo_flags_start */
3102   TODO_verify_ssa
3103     | TODO_verify_flow
3104     | TODO_ggc_collect			/* todo_flags_finish */
3105  }
3106 };
3107