1 /* Reassociation for trees.
2    Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
3    Free Software Foundation, Inc.
4    Contributed by Daniel Berlin <dan@dberlin.org>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12 
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "basic-block.h"
28 #include "tree-pretty-print.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-inline.h"
31 #include "tree-flow.h"
32 #include "gimple.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "alloc-pool.h"
38 #include "vec.h"
39 #include "langhooks.h"
40 #include "pointer-set.h"
41 #include "cfgloop.h"
42 #include "flags.h"
43 #include "target.h"
44 #include "params.h"
45 #include "diagnostic-core.h"
46 
47 /*  This is a simple global reassociation pass.  It is, in part, based
48     on the LLVM pass of the same name (They do some things more/less
49     than we do, in different orders, etc).
50 
51     It consists of five steps:
52 
53     1. Breaking up subtract operations into addition + negate, where
54     it would promote the reassociation of adds.
55 
56     2. Left linearization of the expression trees, so that (A+B)+(C+D)
57     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
58     During linearization, we place the operands of the binary
59     expressions into a vector of operand_entry_t
60 
61     3. Optimization of the operand lists, eliminating things like a +
62     -a, a & a, etc.
63 
64     4. Rewrite the expression trees we linearized and optimized so
65     they are in proper rank order.
66 
67     5. Repropagate negates, as nothing else will clean it up ATM.
68 
69     A bit of theory on #4, since nobody seems to write anything down
70     about why it makes sense to do it the way they do it:
71 
72     We could do this much nicer theoretically, but don't (for reasons
73     explained after how to do it theoretically nice :P).
74 
75     In order to promote the most redundancy elimination, you want
76     binary expressions whose operands are the same rank (or
77     preferably, the same value) exposed to the redundancy eliminator,
78     for possible elimination.
79 
80     So the way to do this if we really cared, is to build the new op
81     tree from the leaves to the roots, merging as you go, and putting the
82     new op on the end of the worklist, until you are left with one
83     thing on the worklist.
84 
85     IE if you have to rewrite the following set of operands (listed with
86     rank in parentheses), with opcode PLUS_EXPR:
87 
88     a (1),  b (1),  c (1),  d (2), e (2)
89 
90 
91     We start with our merge worklist empty, and the ops list with all of
92     those on it.
93 
94     You want to first merge all leaves of the same rank, as much as
95     possible.
96 
97     So first build a binary op of
98 
99     mergetmp = a + b, and put "mergetmp" on the merge worklist.
100 
101     Because there is no three operand form of PLUS_EXPR, c is not going to
102     be exposed to redundancy elimination as a rank 1 operand.
103 
104     So you might as well throw it on the merge worklist (you could also
105     consider it to now be a rank two operand, and merge it with d and e,
106     but in this case, you then have evicted e from a binary op. So at
107     least in this situation, you can't win.)
108 
109     Then build a binary op of d + e
110     mergetmp2 = d + e
111 
112     and put mergetmp2 on the merge worklist.
113 
114     so merge worklist = {mergetmp, c, mergetmp2}
115 
116     Continue building binary ops of these operations until you have only
117     one operation left on the worklist.
118 
119     So we have
120 
121     build binary op
122     mergetmp3 = mergetmp + c
123 
124     worklist = {mergetmp2, mergetmp3}
125 
126     mergetmp4 = mergetmp2 + mergetmp3
127 
128     worklist = {mergetmp4}
129 
130     because we have one operation left, we can now just set the original
131     statement equal to the result of that operation.
132 
133     This will at least expose a + b  and d + e to redundancy elimination
134     as binary operations.
135 
136     For extra points, you can reuse the old statements to build the
137     mergetmps, since you shouldn't run out.
138 
139     So why don't we do this?
140 
141     Because it's expensive, and rarely will help.  Most trees we are
142     reassociating have 3 or less ops.  If they have 2 ops, they already
143     will be written into a nice single binary op.  If you have 3 ops, a
144     single simple check suffices to tell you whether the first two are of the
145     same rank.  If so, you know to order it
146 
147     mergetmp = op1 + op2
148     newstmt = mergetmp + op3
149 
150     instead of
151     mergetmp = op2 + op3
152     newstmt = mergetmp + op1
153 
154     If all three are of the same rank, you can't expose them all in a
155     single binary operator anyway, so the above is *still* the best you
156     can do.
157 
158     Thus, this is what we do.  When we have three ops left, we check to see
159     what order to put them in, and call it a day.  As a nod to vector sum
160     reduction, we check if any of the ops are really a phi node that is a
161     destructive update for the associating op, and keep the destructive
162     update together for vector sum reduction recognition.  */
163 
164 
165 /* Statistics */
166 static struct
167 {
168   int linearized;
169   int constants_eliminated;
170   int ops_eliminated;
171   int rewritten;
172 } reassociate_stats;
173 
174 /* Operator, rank pair.  */
175 typedef struct operand_entry
176 {
177   unsigned int rank;
178   int id;
179   tree op;
180 } *operand_entry_t;
181 
182 static alloc_pool operand_entry_pool;
183 
184 /* This is used to assign a unique ID to each struct operand_entry
185    so that qsort results are identical on different hosts.  */
186 static int next_operand_entry_id;
187 
188 /* Starting rank number for a given basic block, so that we can rank
189    operations using unmovable instructions in that BB based on the bb
190    depth.  */
191 static long *bb_rank;
192 
193 /* Operand->rank hashtable.  */
194 static struct pointer_map_t *operand_rank;
195 
196 /* Forward decls.  */
197 static long get_rank (tree);
198 
199 
200 /* Bias amount for loop-carried phis.  We want this to be larger than
201    the depth of any reassociation tree we can see, but not larger than
202    the rank difference between two blocks.  */
203 #define PHI_LOOP_BIAS (1 << 15)
204 
205 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
206    an innermost loop, and the phi has only a single use which is inside
207    the loop, then the rank is the block rank of the loop latch plus an
208    extra bias for the loop-carried dependence.  This causes expressions
209    calculated into an accumulator variable to be independent for each
210    iteration of the loop.  If STMT is some other phi, the rank is the
211    block rank of its containing block.  */
212 static long
213 phi_rank (gimple stmt)
214 {
215   basic_block bb = gimple_bb (stmt);
216   struct loop *father = bb->loop_father;
217   tree res;
218   unsigned i;
219   use_operand_p use;
220   gimple use_stmt;
221 
222   /* We only care about real loops (those with a latch).  */
223   if (!father->latch)
224     return bb_rank[bb->index];
225 
226   /* Interesting phis must be in headers of innermost loops.  */
227   if (bb != father->header
228       || father->inner)
229     return bb_rank[bb->index];
230 
231   /* Ignore virtual SSA_NAMEs.  */
232   res = gimple_phi_result (stmt);
233   if (!is_gimple_reg (SSA_NAME_VAR (res)))
234     return bb_rank[bb->index];
235 
236   /* The phi definition must have a single use, and that use must be
237      within the loop.  Otherwise this isn't an accumulator pattern.  */
238   if (!single_imm_use (res, &use, &use_stmt)
239       || gimple_bb (use_stmt)->loop_father != father)
240     return bb_rank[bb->index];
241 
242   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
243   for (i = 0; i < gimple_phi_num_args (stmt); i++)
244     {
245       tree arg = gimple_phi_arg_def (stmt, i);
246       if (TREE_CODE (arg) == SSA_NAME
247 	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
248 	{
249 	  gimple def_stmt = SSA_NAME_DEF_STMT (arg);
250 	  if (gimple_bb (def_stmt)->loop_father == father)
251 	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
252 	}
253     }
254 
255   /* Must be an uninteresting phi.  */
256   return bb_rank[bb->index];
257 }
258 
259 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
260    loop-carried dependence of an innermost loop, return TRUE; else
261    return FALSE.  */
262 static bool
263 loop_carried_phi (tree exp)
264 {
265   gimple phi_stmt;
266   long block_rank;
267 
268   if (TREE_CODE (exp) != SSA_NAME
269       || SSA_NAME_IS_DEFAULT_DEF (exp))
270     return false;
271 
272   phi_stmt = SSA_NAME_DEF_STMT (exp);
273 
274   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
275     return false;
276 
277   /* Non-loop-carried phis have block rank.  Loop-carried phis have
278      an additional bias added in.  If this phi doesn't have block rank,
279      it's biased and should not be propagated.  */
280   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
281 
282   if (phi_rank (phi_stmt) != block_rank)
283     return true;
284 
285   return false;
286 }
287 
288 /* Return the maximum of RANK and the rank that should be propagated
289    from expression OP.  For most operands, this is just the rank of OP.
290    For loop-carried phis, the value is zero to avoid undoing the bias
291    in favor of the phi.  */
292 static long
293 propagate_rank (long rank, tree op)
294 {
295   long op_rank;
296 
297   if (loop_carried_phi (op))
298     return rank;
299 
300   op_rank = get_rank (op);
301 
302   return MAX (rank, op_rank);
303 }
304 
305 /* Look up the operand rank structure for expression E.  */
306 
307 static inline long
308 find_operand_rank (tree e)
309 {
310   void **slot = pointer_map_contains (operand_rank, e);
311   return slot ? (long) (intptr_t) *slot : -1;
312 }
313 
314 /* Insert {E,RANK} into the operand rank hashtable.  */
315 
316 static inline void
317 insert_operand_rank (tree e, long rank)
318 {
319   void **slot;
320   gcc_assert (rank > 0);
321   slot = pointer_map_insert (operand_rank, e);
322   gcc_assert (!*slot);
323   *slot = (void *) (intptr_t) rank;
324 }
325 
326 /* Given an expression E, return the rank of the expression.  */
327 
328 static long
329 get_rank (tree e)
330 {
331   /* Constants have rank 0.  */
332   if (is_gimple_min_invariant (e))
333     return 0;
334 
335   /* SSA_NAME's have the rank of the expression they are the result
336      of.
337      For globals and uninitialized values, the rank is 0.
338      For function arguments, use the pre-setup rank.
339      For PHI nodes, stores, asm statements, etc, we use the rank of
340      the BB.
341      For simple operations, the rank is the maximum rank of any of
342      its operands, or the bb_rank, whichever is less.
343      I make no claims that this is optimal, however, it gives good
344      results.  */
345 
346   /* We make an exception to the normal ranking system to break
347      dependences of accumulator variables in loops.  Suppose we
348      have a simple one-block loop containing:
349 
350        x_1 = phi(x_0, x_2)
351        b = a + x_1
352        c = b + d
353        x_2 = c + e
354 
355      As shown, each iteration of the calculation into x is fully
356      dependent upon the iteration before it.  We would prefer to
357      see this in the form:
358 
359        x_1 = phi(x_0, x_2)
360        b = a + d
361        c = b + e
362        x_2 = c + x_1
363 
364      If the loop is unrolled, the calculations of b and c from
365      different iterations can be interleaved.
366 
367      To obtain this result during reassociation, we bias the rank
368      of the phi definition x_1 upward, when it is recognized as an
369      accumulator pattern.  The artificial rank causes it to be
370      added last, providing the desired independence.  */
371 
372   if (TREE_CODE (e) == SSA_NAME)
373     {
374       gimple stmt;
375       long rank;
376       int i, n;
377       tree op;
378 
379       if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
380 	  && SSA_NAME_IS_DEFAULT_DEF (e))
381 	return find_operand_rank (e);
382 
383       stmt = SSA_NAME_DEF_STMT (e);
384       if (gimple_bb (stmt) == NULL)
385 	return 0;
386 
387       if (gimple_code (stmt) == GIMPLE_PHI)
388 	return phi_rank (stmt);
389 
390       if (!is_gimple_assign (stmt)
391 	  || gimple_vdef (stmt))
392 	return bb_rank[gimple_bb (stmt)->index];
393 
394       /* If we already have a rank for this expression, use that.  */
395       rank = find_operand_rank (e);
396       if (rank != -1)
397 	return rank;
398 
399       /* Otherwise, find the maximum rank for the operands.  As an
400 	 exception, remove the bias from loop-carried phis when propagating
401 	 the rank so that dependent operations are not also biased.  */
402       rank = 0;
403       if (gimple_assign_single_p (stmt))
404 	{
405 	  tree rhs = gimple_assign_rhs1 (stmt);
406 	  n = TREE_OPERAND_LENGTH (rhs);
407 	  if (n == 0)
408 	    rank = propagate_rank (rank, rhs);
409 	  else
410 	    {
411 	      for (i = 0; i < n; i++)
412 		{
413 		  op = TREE_OPERAND (rhs, i);
414 
415 		  if (op != NULL_TREE)
416 		    rank = propagate_rank (rank, op);
417 		}
418 	    }
419 	}
420       else
421 	{
422 	  n = gimple_num_ops (stmt);
423 	  for (i = 1; i < n; i++)
424 	    {
425 	      op = gimple_op (stmt, i);
426 	      gcc_assert (op);
427 	      rank = propagate_rank (rank, op);
428 	    }
429 	}
430 
431       if (dump_file && (dump_flags & TDF_DETAILS))
432 	{
433 	  fprintf (dump_file, "Rank for ");
434 	  print_generic_expr (dump_file, e, 0);
435 	  fprintf (dump_file, " is %ld\n", (rank + 1));
436 	}
437 
438       /* Note the rank in the hashtable so we don't recompute it.  */
439       insert_operand_rank (e, (rank + 1));
440       return (rank + 1);
441     }
442 
443   /* Globals, etc,  are rank 0 */
444   return 0;
445 }
446 
447 DEF_VEC_P(operand_entry_t);
448 DEF_VEC_ALLOC_P(operand_entry_t, heap);
449 
450 /* We want integer ones to end up last no matter what, since they are
451    the ones we can do the most with.  */
452 #define INTEGER_CONST_TYPE 1 << 3
453 #define FLOAT_CONST_TYPE 1 << 2
454 #define OTHER_CONST_TYPE 1 << 1
455 
456 /* Classify an invariant tree into integer, float, or other, so that
457    we can sort them to be near other constants of the same type.  */
458 static inline int
459 constant_type (tree t)
460 {
461   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
462     return INTEGER_CONST_TYPE;
463   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
464     return FLOAT_CONST_TYPE;
465   else
466     return OTHER_CONST_TYPE;
467 }
468 
469 /* qsort comparison function to sort operand entries PA and PB by rank
470    so that the sorted array is ordered by rank in decreasing order.  */
471 static int
472 sort_by_operand_rank (const void *pa, const void *pb)
473 {
474   const operand_entry_t oea = *(const operand_entry_t *)pa;
475   const operand_entry_t oeb = *(const operand_entry_t *)pb;
476 
477   /* It's nicer for optimize_expression if constants that are likely
478      to fold when added/multiplied//whatever are put next to each
479      other.  Since all constants have rank 0, order them by type.  */
480   if (oeb->rank == 0 &&  oea->rank == 0)
481     {
482       if (constant_type (oeb->op) != constant_type (oea->op))
483 	return constant_type (oeb->op) - constant_type (oea->op);
484       else
485 	/* To make sorting result stable, we use unique IDs to determine
486 	   order.  */
487         return oeb->id - oea->id;
488     }
489 
490   /* Lastly, make sure the versions that are the same go next to each
491      other.  We use SSA_NAME_VERSION because it's stable.  */
492   if ((oeb->rank - oea->rank == 0)
493       && TREE_CODE (oea->op) == SSA_NAME
494       && TREE_CODE (oeb->op) == SSA_NAME)
495     {
496       if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
497 	return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
498       else
499 	return oeb->id - oea->id;
500     }
501 
502   if (oeb->rank != oea->rank)
503     return oeb->rank - oea->rank;
504   else
505     return oeb->id - oea->id;
506 }
507 
508 /* Add an operand entry to *OPS for the tree operand OP.  */
509 
510 static void
511 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
512 {
513   operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
514 
515   oe->op = op;
516   oe->rank = get_rank (op);
517   oe->id = next_operand_entry_id++;
518   VEC_safe_push (operand_entry_t, heap, *ops, oe);
519 }
520 
521 /* Return true if STMT is reassociable operation containing a binary
522    operation with tree code CODE, and is inside LOOP.  */
523 
524 static bool
525 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
526 {
527   basic_block bb = gimple_bb (stmt);
528 
529   if (gimple_bb (stmt) == NULL)
530     return false;
531 
532   if (!flow_bb_inside_loop_p (loop, bb))
533     return false;
534 
535   if (is_gimple_assign (stmt)
536       && gimple_assign_rhs_code (stmt) == code
537       && has_single_use (gimple_assign_lhs (stmt)))
538     return true;
539 
540   return false;
541 }
542 
543 
544 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
545    operand of the negate operation.  Otherwise, return NULL.  */
546 
547 static tree
548 get_unary_op (tree name, enum tree_code opcode)
549 {
550   gimple stmt = SSA_NAME_DEF_STMT (name);
551 
552   if (!is_gimple_assign (stmt))
553     return NULL_TREE;
554 
555   if (gimple_assign_rhs_code (stmt) == opcode)
556     return gimple_assign_rhs1 (stmt);
557   return NULL_TREE;
558 }
559 
560 /* If CURR and LAST are a pair of ops that OPCODE allows us to
561    eliminate through equivalences, do so, remove them from OPS, and
562    return true.  Otherwise, return false.  */
563 
564 static bool
565 eliminate_duplicate_pair (enum tree_code opcode,
566 			  VEC (operand_entry_t, heap) **ops,
567 			  bool *all_done,
568 			  unsigned int i,
569 			  operand_entry_t curr,
570 			  operand_entry_t last)
571 {
572 
573   /* If we have two of the same op, and the opcode is & |, min, or max,
574      we can eliminate one of them.
575      If we have two of the same op, and the opcode is ^, we can
576      eliminate both of them.  */
577 
578   if (last && last->op == curr->op)
579     {
580       switch (opcode)
581 	{
582 	case MAX_EXPR:
583 	case MIN_EXPR:
584 	case BIT_IOR_EXPR:
585 	case BIT_AND_EXPR:
586 	  if (dump_file && (dump_flags & TDF_DETAILS))
587 	    {
588 	      fprintf (dump_file, "Equivalence: ");
589 	      print_generic_expr (dump_file, curr->op, 0);
590 	      fprintf (dump_file, " [&|minmax] ");
591 	      print_generic_expr (dump_file, last->op, 0);
592 	      fprintf (dump_file, " -> ");
593 	      print_generic_stmt (dump_file, last->op, 0);
594 	    }
595 
596 	  VEC_ordered_remove (operand_entry_t, *ops, i);
597 	  reassociate_stats.ops_eliminated ++;
598 
599 	  return true;
600 
601 	case BIT_XOR_EXPR:
602 	  if (dump_file && (dump_flags & TDF_DETAILS))
603 	    {
604 	      fprintf (dump_file, "Equivalence: ");
605 	      print_generic_expr (dump_file, curr->op, 0);
606 	      fprintf (dump_file, " ^ ");
607 	      print_generic_expr (dump_file, last->op, 0);
608 	      fprintf (dump_file, " -> nothing\n");
609 	    }
610 
611 	  reassociate_stats.ops_eliminated += 2;
612 
613 	  if (VEC_length (operand_entry_t, *ops) == 2)
614 	    {
615 	      VEC_free (operand_entry_t, heap, *ops);
616 	      *ops = NULL;
617 	      add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
618 	      *all_done = true;
619 	    }
620 	  else
621 	    {
622 	      VEC_ordered_remove (operand_entry_t, *ops, i-1);
623 	      VEC_ordered_remove (operand_entry_t, *ops, i-1);
624 	    }
625 
626 	  return true;
627 
628 	default:
629 	  break;
630 	}
631     }
632   return false;
633 }
634 
635 static VEC(tree, heap) *plus_negates;
636 
637 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
638    expression, look in OPS for a corresponding positive operation to cancel
639    it out.  If we find one, remove the other from OPS, replace
640    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
641    return false. */
642 
643 static bool
644 eliminate_plus_minus_pair (enum tree_code opcode,
645 			   VEC (operand_entry_t, heap) **ops,
646 			   unsigned int currindex,
647 			   operand_entry_t curr)
648 {
649   tree negateop;
650   tree notop;
651   unsigned int i;
652   operand_entry_t oe;
653 
654   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
655     return false;
656 
657   negateop = get_unary_op (curr->op, NEGATE_EXPR);
658   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
659   if (negateop == NULL_TREE && notop == NULL_TREE)
660     return false;
661 
662   /* Any non-negated version will have a rank that is one less than
663      the current rank.  So once we hit those ranks, if we don't find
664      one, we can stop.  */
665 
666   for (i = currindex + 1;
667        VEC_iterate (operand_entry_t, *ops, i, oe)
668        && oe->rank >= curr->rank - 1 ;
669        i++)
670     {
671       if (oe->op == negateop)
672 	{
673 
674 	  if (dump_file && (dump_flags & TDF_DETAILS))
675 	    {
676 	      fprintf (dump_file, "Equivalence: ");
677 	      print_generic_expr (dump_file, negateop, 0);
678 	      fprintf (dump_file, " + -");
679 	      print_generic_expr (dump_file, oe->op, 0);
680 	      fprintf (dump_file, " -> 0\n");
681 	    }
682 
683 	  VEC_ordered_remove (operand_entry_t, *ops, i);
684 	  add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
685 	  VEC_ordered_remove (operand_entry_t, *ops, currindex);
686 	  reassociate_stats.ops_eliminated ++;
687 
688 	  return true;
689 	}
690       else if (oe->op == notop)
691 	{
692 	  tree op_type = TREE_TYPE (oe->op);
693 
694 	  if (dump_file && (dump_flags & TDF_DETAILS))
695 	    {
696 	      fprintf (dump_file, "Equivalence: ");
697 	      print_generic_expr (dump_file, notop, 0);
698 	      fprintf (dump_file, " + ~");
699 	      print_generic_expr (dump_file, oe->op, 0);
700 	      fprintf (dump_file, " -> -1\n");
701 	    }
702 
703 	  VEC_ordered_remove (operand_entry_t, *ops, i);
704 	  add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
705 	  VEC_ordered_remove (operand_entry_t, *ops, currindex);
706 	  reassociate_stats.ops_eliminated ++;
707 
708 	  return true;
709 	}
710     }
711 
712   /* CURR->OP is a negate expr in a plus expr: save it for later
713      inspection in repropagate_negates().  */
714   if (negateop != NULL_TREE)
715     VEC_safe_push (tree, heap, plus_negates, curr->op);
716 
717   return false;
718 }
719 
720 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
721    bitwise not expression, look in OPS for a corresponding operand to
722    cancel it out.  If we find one, remove the other from OPS, replace
723    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
724    false. */
725 
726 static bool
727 eliminate_not_pairs (enum tree_code opcode,
728 		     VEC (operand_entry_t, heap) **ops,
729 		     unsigned int currindex,
730 		     operand_entry_t curr)
731 {
732   tree notop;
733   unsigned int i;
734   operand_entry_t oe;
735 
736   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
737       || TREE_CODE (curr->op) != SSA_NAME)
738     return false;
739 
740   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
741   if (notop == NULL_TREE)
742     return false;
743 
744   /* Any non-not version will have a rank that is one less than
745      the current rank.  So once we hit those ranks, if we don't find
746      one, we can stop.  */
747 
748   for (i = currindex + 1;
749        VEC_iterate (operand_entry_t, *ops, i, oe)
750        && oe->rank >= curr->rank - 1;
751        i++)
752     {
753       if (oe->op == notop)
754 	{
755 	  if (dump_file && (dump_flags & TDF_DETAILS))
756 	    {
757 	      fprintf (dump_file, "Equivalence: ");
758 	      print_generic_expr (dump_file, notop, 0);
759 	      if (opcode == BIT_AND_EXPR)
760 		fprintf (dump_file, " & ~");
761 	      else if (opcode == BIT_IOR_EXPR)
762 		fprintf (dump_file, " | ~");
763 	      print_generic_expr (dump_file, oe->op, 0);
764 	      if (opcode == BIT_AND_EXPR)
765 		fprintf (dump_file, " -> 0\n");
766 	      else if (opcode == BIT_IOR_EXPR)
767 		fprintf (dump_file, " -> -1\n");
768 	    }
769 
770 	  if (opcode == BIT_AND_EXPR)
771 	    oe->op = build_zero_cst (TREE_TYPE (oe->op));
772 	  else if (opcode == BIT_IOR_EXPR)
773 	    oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
774 					  TYPE_PRECISION (TREE_TYPE (oe->op)));
775 
776 	  reassociate_stats.ops_eliminated
777 	    += VEC_length (operand_entry_t, *ops) - 1;
778 	  VEC_free (operand_entry_t, heap, *ops);
779 	  *ops = NULL;
780 	  VEC_safe_push (operand_entry_t, heap, *ops, oe);
781 	  return true;
782 	}
783     }
784 
785   return false;
786 }
787 
788 /* Use constant value that may be present in OPS to try to eliminate
789    operands.  Note that this function is only really used when we've
790    eliminated ops for other reasons, or merged constants.  Across
791    single statements, fold already does all of this, plus more.  There
792    is little point in duplicating logic, so I've only included the
793    identities that I could ever construct testcases to trigger.  */
794 
795 static void
796 eliminate_using_constants (enum tree_code opcode,
797 			   VEC(operand_entry_t, heap) **ops)
798 {
799   operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
800   tree type = TREE_TYPE (oelast->op);
801 
802   if (oelast->rank == 0
803       && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
804     {
805       switch (opcode)
806 	{
807 	case BIT_AND_EXPR:
808 	  if (integer_zerop (oelast->op))
809 	    {
810 	      if (VEC_length (operand_entry_t, *ops) != 1)
811 		{
812 		  if (dump_file && (dump_flags & TDF_DETAILS))
813 		    fprintf (dump_file, "Found & 0, removing all other ops\n");
814 
815 		  reassociate_stats.ops_eliminated
816 		    += VEC_length (operand_entry_t, *ops) - 1;
817 
818 		  VEC_free (operand_entry_t, heap, *ops);
819 		  *ops = NULL;
820 		  VEC_safe_push (operand_entry_t, heap, *ops, oelast);
821 		  return;
822 		}
823 	    }
824 	  else if (integer_all_onesp (oelast->op))
825 	    {
826 	      if (VEC_length (operand_entry_t, *ops) != 1)
827 		{
828 		  if (dump_file && (dump_flags & TDF_DETAILS))
829 		    fprintf (dump_file, "Found & -1, removing\n");
830 		  VEC_pop (operand_entry_t, *ops);
831 		  reassociate_stats.ops_eliminated++;
832 		}
833 	    }
834 	  break;
835 	case BIT_IOR_EXPR:
836 	  if (integer_all_onesp (oelast->op))
837 	    {
838 	      if (VEC_length (operand_entry_t, *ops) != 1)
839 		{
840 		  if (dump_file && (dump_flags & TDF_DETAILS))
841 		    fprintf (dump_file, "Found | -1, removing all other ops\n");
842 
843 		  reassociate_stats.ops_eliminated
844 		    += VEC_length (operand_entry_t, *ops) - 1;
845 
846 		  VEC_free (operand_entry_t, heap, *ops);
847 		  *ops = NULL;
848 		  VEC_safe_push (operand_entry_t, heap, *ops, oelast);
849 		  return;
850 		}
851 	    }
852 	  else if (integer_zerop (oelast->op))
853 	    {
854 	      if (VEC_length (operand_entry_t, *ops) != 1)
855 		{
856 		  if (dump_file && (dump_flags & TDF_DETAILS))
857 		    fprintf (dump_file, "Found | 0, removing\n");
858 		  VEC_pop (operand_entry_t, *ops);
859 		  reassociate_stats.ops_eliminated++;
860 		}
861 	    }
862 	  break;
863 	case MULT_EXPR:
864 	  if (integer_zerop (oelast->op)
865 	      || (FLOAT_TYPE_P (type)
866 		  && !HONOR_NANS (TYPE_MODE (type))
867 		  && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
868 		  && real_zerop (oelast->op)))
869 	    {
870 	      if (VEC_length (operand_entry_t, *ops) != 1)
871 		{
872 		  if (dump_file && (dump_flags & TDF_DETAILS))
873 		    fprintf (dump_file, "Found * 0, removing all other ops\n");
874 
875 		  reassociate_stats.ops_eliminated
876 		    += VEC_length (operand_entry_t, *ops) - 1;
877 		  VEC_free (operand_entry_t, heap, *ops);
878 		  *ops = NULL;
879 		  VEC_safe_push (operand_entry_t, heap, *ops, oelast);
880 		  return;
881 		}
882 	    }
883 	  else if (integer_onep (oelast->op)
884 		   || (FLOAT_TYPE_P (type)
885 		       && !HONOR_SNANS (TYPE_MODE (type))
886 		       && real_onep (oelast->op)))
887 	    {
888 	      if (VEC_length (operand_entry_t, *ops) != 1)
889 		{
890 		  if (dump_file && (dump_flags & TDF_DETAILS))
891 		    fprintf (dump_file, "Found * 1, removing\n");
892 		  VEC_pop (operand_entry_t, *ops);
893 		  reassociate_stats.ops_eliminated++;
894 		  return;
895 		}
896 	    }
897 	  break;
898 	case BIT_XOR_EXPR:
899 	case PLUS_EXPR:
900 	case MINUS_EXPR:
901 	  if (integer_zerop (oelast->op)
902 	      || (FLOAT_TYPE_P (type)
903 		  && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
904 		  && fold_real_zero_addition_p (type, oelast->op,
905 						opcode == MINUS_EXPR)))
906 	    {
907 	      if (VEC_length (operand_entry_t, *ops) != 1)
908 		{
909 		  if (dump_file && (dump_flags & TDF_DETAILS))
910 		    fprintf (dump_file, "Found [|^+] 0, removing\n");
911 		  VEC_pop (operand_entry_t, *ops);
912 		  reassociate_stats.ops_eliminated++;
913 		  return;
914 		}
915 	    }
916 	  break;
917 	default:
918 	  break;
919 	}
920     }
921 }
922 
923 
924 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
925 				 bool, bool);
926 
927 /* Structure for tracking and counting operands.  */
928 typedef struct oecount_s {
929   int cnt;
930   int id;
931   enum tree_code oecode;
932   tree op;
933 } oecount;
934 
935 DEF_VEC_O(oecount);
936 DEF_VEC_ALLOC_O(oecount,heap);
937 
938 /* The heap for the oecount hashtable and the sorted list of operands.  */
939 static VEC (oecount, heap) *cvec;
940 
941 /* Hash function for oecount.  */
942 
943 static hashval_t
944 oecount_hash (const void *p)
945 {
946   const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
947   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
948 }
949 
950 /* Comparison function for oecount.  */
951 
952 static int
953 oecount_eq (const void *p1, const void *p2)
954 {
955   const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
956   const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
957   return (c1->oecode == c2->oecode
958 	  && c1->op == c2->op);
959 }
960 
961 /* Comparison function for qsort sorting oecount elements by count.  */
962 
963 static int
964 oecount_cmp (const void *p1, const void *p2)
965 {
966   const oecount *c1 = (const oecount *)p1;
967   const oecount *c2 = (const oecount *)p2;
968   if (c1->cnt != c2->cnt)
969     return c1->cnt - c2->cnt;
970   else
971     /* If counts are identical, use unique IDs to stabilize qsort.  */
972     return c1->id - c2->id;
973 }
974 
975 /* Walks the linear chain with result *DEF searching for an operation
976    with operand OP and code OPCODE removing that from the chain.  *DEF
977    is updated if there is only one operand but no operation left.  */
978 
979 static void
980 zero_one_operation (tree *def, enum tree_code opcode, tree op)
981 {
982   gimple stmt = SSA_NAME_DEF_STMT (*def);
983 
984   do
985     {
986       tree name = gimple_assign_rhs1 (stmt);
987 
988       /* If this is the operation we look for and one of the operands
989          is ours simply propagate the other operand into the stmts
990 	 single use.  */
991       if (gimple_assign_rhs_code (stmt) == opcode
992 	  && (name == op
993 	      || gimple_assign_rhs2 (stmt) == op))
994 	{
995 	  gimple use_stmt;
996 	  use_operand_p use;
997 	  gimple_stmt_iterator gsi;
998 	  if (name == op)
999 	    name = gimple_assign_rhs2 (stmt);
1000 	  gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
1001 	  single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
1002 	  if (gimple_assign_lhs (stmt) == *def)
1003 	    *def = name;
1004 	  SET_USE (use, name);
1005 	  if (TREE_CODE (name) != SSA_NAME)
1006 	    update_stmt (use_stmt);
1007 	  gsi = gsi_for_stmt (stmt);
1008 	  gsi_remove (&gsi, true);
1009 	  release_defs (stmt);
1010 	  return;
1011 	}
1012 
1013       /* Continue walking the chain.  */
1014       gcc_assert (name != op
1015 		  && TREE_CODE (name) == SSA_NAME);
1016       stmt = SSA_NAME_DEF_STMT (name);
1017     }
1018   while (1);
1019 }
1020 
1021 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1022    the result.  Places the statement after the definition of either
1023    OP1 or OP2.  Returns the new statement.  */
1024 
1025 static gimple
1026 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
1027 {
1028   gimple op1def = NULL, op2def = NULL;
1029   gimple_stmt_iterator gsi;
1030   tree op;
1031   gimple sum;
1032 
1033   /* Create the addition statement.  */
1034   sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
1035   op = make_ssa_name (tmpvar, sum);
1036   gimple_assign_set_lhs (sum, op);
1037 
1038   /* Find an insertion place and insert.  */
1039   if (TREE_CODE (op1) == SSA_NAME)
1040     op1def = SSA_NAME_DEF_STMT (op1);
1041   if (TREE_CODE (op2) == SSA_NAME)
1042     op2def = SSA_NAME_DEF_STMT (op2);
1043   if ((!op1def || gimple_nop_p (op1def))
1044       && (!op2def || gimple_nop_p (op2def)))
1045     {
1046       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1047       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1048     }
1049   else if ((!op1def || gimple_nop_p (op1def))
1050 	   || (op2def && !gimple_nop_p (op2def)
1051 	       && stmt_dominates_stmt_p (op1def, op2def)))
1052     {
1053       if (gimple_code (op2def) == GIMPLE_PHI)
1054 	{
1055 	  gsi = gsi_after_labels (gimple_bb (op2def));
1056 	  gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1057 	}
1058       else
1059 	{
1060 	  if (!stmt_ends_bb_p (op2def))
1061 	    {
1062 	      gsi = gsi_for_stmt (op2def);
1063 	      gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1064 	    }
1065 	  else
1066 	    {
1067 	      edge e;
1068 	      edge_iterator ei;
1069 
1070 	      FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1071 		if (e->flags & EDGE_FALLTHRU)
1072 		  gsi_insert_on_edge_immediate (e, sum);
1073 	    }
1074 	}
1075     }
1076   else
1077     {
1078       if (gimple_code (op1def) == GIMPLE_PHI)
1079 	{
1080 	  gsi = gsi_after_labels (gimple_bb (op1def));
1081 	  gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1082 	}
1083       else
1084 	{
1085 	  if (!stmt_ends_bb_p (op1def))
1086 	    {
1087 	      gsi = gsi_for_stmt (op1def);
1088 	      gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1089 	    }
1090 	  else
1091 	    {
1092 	      edge e;
1093 	      edge_iterator ei;
1094 
1095 	      FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1096 		if (e->flags & EDGE_FALLTHRU)
1097 		  gsi_insert_on_edge_immediate (e, sum);
1098 	    }
1099 	}
1100     }
1101   update_stmt (sum);
1102 
1103   return sum;
1104 }
1105 
1106 /* Perform un-distribution of divisions and multiplications.
1107    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1108    to (A + B) / X for real X.
1109 
1110    The algorithm is organized as follows.
1111 
1112     - First we walk the addition chain *OPS looking for summands that
1113       are defined by a multiplication or a real division.  This results
1114       in the candidates bitmap with relevant indices into *OPS.
1115 
1116     - Second we build the chains of multiplications or divisions for
1117       these candidates, counting the number of occurences of (operand, code)
1118       pairs in all of the candidates chains.
1119 
1120     - Third we sort the (operand, code) pairs by number of occurence and
1121       process them starting with the pair with the most uses.
1122 
1123       * For each such pair we walk the candidates again to build a
1124         second candidate bitmap noting all multiplication/division chains
1125 	that have at least one occurence of (operand, code).
1126 
1127       * We build an alternate addition chain only covering these
1128         candidates with one (operand, code) operation removed from their
1129 	multiplication/division chain.
1130 
1131       * The first candidate gets replaced by the alternate addition chain
1132         multiplied/divided by the operand.
1133 
1134       * All candidate chains get disabled for further processing and
1135         processing of (operand, code) pairs continues.
1136 
1137   The alternate addition chains built are re-processed by the main
1138   reassociation algorithm which allows optimizing a * x * y + b * y * x
1139   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1140 
1141 static bool
1142 undistribute_ops_list (enum tree_code opcode,
1143 		       VEC (operand_entry_t, heap) **ops, struct loop *loop)
1144 {
1145   unsigned int length = VEC_length (operand_entry_t, *ops);
1146   operand_entry_t oe1;
1147   unsigned i, j;
1148   sbitmap candidates, candidates2;
1149   unsigned nr_candidates, nr_candidates2;
1150   sbitmap_iterator sbi0;
1151   VEC (operand_entry_t, heap) **subops;
1152   htab_t ctable;
1153   bool changed = false;
1154   int next_oecount_id = 0;
1155 
1156   if (length <= 1
1157       || opcode != PLUS_EXPR)
1158     return false;
1159 
1160   /* Build a list of candidates to process.  */
1161   candidates = sbitmap_alloc (length);
1162   sbitmap_zero (candidates);
1163   nr_candidates = 0;
1164   FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
1165     {
1166       enum tree_code dcode;
1167       gimple oe1def;
1168 
1169       if (TREE_CODE (oe1->op) != SSA_NAME)
1170 	continue;
1171       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1172       if (!is_gimple_assign (oe1def))
1173 	continue;
1174       dcode = gimple_assign_rhs_code (oe1def);
1175       if ((dcode != MULT_EXPR
1176 	   && dcode != RDIV_EXPR)
1177 	  || !is_reassociable_op (oe1def, dcode, loop))
1178 	continue;
1179 
1180       SET_BIT (candidates, i);
1181       nr_candidates++;
1182     }
1183 
1184   if (nr_candidates < 2)
1185     {
1186       sbitmap_free (candidates);
1187       return false;
1188     }
1189 
1190   if (dump_file && (dump_flags & TDF_DETAILS))
1191     {
1192       fprintf (dump_file, "searching for un-distribute opportunities ");
1193       print_generic_expr (dump_file,
1194 	VEC_index (operand_entry_t, *ops,
1195 		   sbitmap_first_set_bit (candidates))->op, 0);
1196       fprintf (dump_file, " %d\n", nr_candidates);
1197     }
1198 
1199   /* Build linearized sub-operand lists and the counting table.  */
1200   cvec = NULL;
1201   ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1202   subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1203 		     VEC_length (operand_entry_t, *ops));
1204   EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1205     {
1206       gimple oedef;
1207       enum tree_code oecode;
1208       unsigned j;
1209 
1210       oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1211       oecode = gimple_assign_rhs_code (oedef);
1212       linearize_expr_tree (&subops[i], oedef,
1213 			   associative_tree_code (oecode), false);
1214 
1215       FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1216 	{
1217 	  oecount c;
1218 	  void **slot;
1219 	  size_t idx;
1220 	  c.oecode = oecode;
1221 	  c.cnt = 1;
1222 	  c.id = next_oecount_id++;
1223 	  c.op = oe1->op;
1224 	  VEC_safe_push (oecount, heap, cvec, &c);
1225 	  idx = VEC_length (oecount, cvec) + 41;
1226 	  slot = htab_find_slot (ctable, (void *)idx, INSERT);
1227 	  if (!*slot)
1228 	    {
1229 	      *slot = (void *)idx;
1230 	    }
1231 	  else
1232 	    {
1233 	      VEC_pop (oecount, cvec);
1234 	      VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1235 	    }
1236 	}
1237     }
1238   htab_delete (ctable);
1239 
1240   /* Sort the counting table.  */
1241   VEC_qsort (oecount, cvec, oecount_cmp);
1242 
1243   if (dump_file && (dump_flags & TDF_DETAILS))
1244     {
1245       oecount *c;
1246       fprintf (dump_file, "Candidates:\n");
1247       FOR_EACH_VEC_ELT (oecount, cvec, j, c)
1248 	{
1249 	  fprintf (dump_file, "  %u %s: ", c->cnt,
1250 		   c->oecode == MULT_EXPR
1251 		   ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1252 	  print_generic_expr (dump_file, c->op, 0);
1253 	  fprintf (dump_file, "\n");
1254 	}
1255     }
1256 
1257   /* Process the (operand, code) pairs in order of most occurence.  */
1258   candidates2 = sbitmap_alloc (length);
1259   while (!VEC_empty (oecount, cvec))
1260     {
1261       oecount *c = VEC_last (oecount, cvec);
1262       if (c->cnt < 2)
1263 	break;
1264 
1265       /* Now collect the operands in the outer chain that contain
1266          the common operand in their inner chain.  */
1267       sbitmap_zero (candidates2);
1268       nr_candidates2 = 0;
1269       EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1270 	{
1271 	  gimple oedef;
1272 	  enum tree_code oecode;
1273 	  unsigned j;
1274 	  tree op = VEC_index (operand_entry_t, *ops, i)->op;
1275 
1276 	  /* If we undistributed in this chain already this may be
1277 	     a constant.  */
1278 	  if (TREE_CODE (op) != SSA_NAME)
1279 	    continue;
1280 
1281 	  oedef = SSA_NAME_DEF_STMT (op);
1282 	  oecode = gimple_assign_rhs_code (oedef);
1283 	  if (oecode != c->oecode)
1284 	    continue;
1285 
1286 	  FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1287 	    {
1288 	      if (oe1->op == c->op)
1289 		{
1290 		  SET_BIT (candidates2, i);
1291 		  ++nr_candidates2;
1292 		  break;
1293 		}
1294 	    }
1295 	}
1296 
1297       if (nr_candidates2 >= 2)
1298 	{
1299 	  operand_entry_t oe1, oe2;
1300 	  tree tmpvar;
1301 	  gimple prod;
1302 	  int first = sbitmap_first_set_bit (candidates2);
1303 
1304 	  /* Build the new addition chain.  */
1305 	  oe1 = VEC_index (operand_entry_t, *ops, first);
1306 	  if (dump_file && (dump_flags & TDF_DETAILS))
1307 	    {
1308 	      fprintf (dump_file, "Building (");
1309 	      print_generic_expr (dump_file, oe1->op, 0);
1310 	    }
1311 	  tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
1312 	  add_referenced_var (tmpvar);
1313 	  zero_one_operation (&oe1->op, c->oecode, c->op);
1314 	  EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1315 	    {
1316 	      gimple sum;
1317 	      oe2 = VEC_index (operand_entry_t, *ops, i);
1318 	      if (dump_file && (dump_flags & TDF_DETAILS))
1319 		{
1320 		  fprintf (dump_file, " + ");
1321 		  print_generic_expr (dump_file, oe2->op, 0);
1322 		}
1323 	      zero_one_operation (&oe2->op, c->oecode, c->op);
1324 	      sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1325 	      oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1326 	      oe2->rank = 0;
1327 	      oe1->op = gimple_get_lhs (sum);
1328 	    }
1329 
1330 	  /* Apply the multiplication/division.  */
1331 	  prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1332 	  if (dump_file && (dump_flags & TDF_DETAILS))
1333 	    {
1334 	      fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1335 	      print_generic_expr (dump_file, c->op, 0);
1336 	      fprintf (dump_file, "\n");
1337 	    }
1338 
1339 	  /* Record it in the addition chain and disable further
1340 	     undistribution with this op.  */
1341 	  oe1->op = gimple_assign_lhs (prod);
1342 	  oe1->rank = get_rank (oe1->op);
1343 	  VEC_free (operand_entry_t, heap, subops[first]);
1344 
1345 	  changed = true;
1346 	}
1347 
1348       VEC_pop (oecount, cvec);
1349     }
1350 
1351   for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1352     VEC_free (operand_entry_t, heap, subops[i]);
1353   free (subops);
1354   VEC_free (oecount, heap, cvec);
1355   sbitmap_free (candidates);
1356   sbitmap_free (candidates2);
1357 
1358   return changed;
1359 }
1360 
1361 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1362    expression, examine the other OPS to see if any of them are comparisons
1363    of the same values, which we may be able to combine or eliminate.
1364    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1365 
1366 static bool
1367 eliminate_redundant_comparison (enum tree_code opcode,
1368 				VEC (operand_entry_t, heap) **ops,
1369 				unsigned int currindex,
1370 				operand_entry_t curr)
1371 {
1372   tree op1, op2;
1373   enum tree_code lcode, rcode;
1374   gimple def1, def2;
1375   int i;
1376   operand_entry_t oe;
1377 
1378   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1379     return false;
1380 
1381   /* Check that CURR is a comparison.  */
1382   if (TREE_CODE (curr->op) != SSA_NAME)
1383     return false;
1384   def1 = SSA_NAME_DEF_STMT (curr->op);
1385   if (!is_gimple_assign (def1))
1386     return false;
1387   lcode = gimple_assign_rhs_code (def1);
1388   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1389     return false;
1390   op1 = gimple_assign_rhs1 (def1);
1391   op2 = gimple_assign_rhs2 (def1);
1392 
1393   /* Now look for a similar comparison in the remaining OPS.  */
1394   for (i = currindex + 1;
1395        VEC_iterate (operand_entry_t, *ops, i, oe);
1396        i++)
1397     {
1398       tree t;
1399 
1400       if (TREE_CODE (oe->op) != SSA_NAME)
1401 	continue;
1402       def2 = SSA_NAME_DEF_STMT (oe->op);
1403       if (!is_gimple_assign (def2))
1404 	continue;
1405       rcode = gimple_assign_rhs_code (def2);
1406       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1407 	continue;
1408 
1409       /* If we got here, we have a match.  See if we can combine the
1410 	 two comparisons.  */
1411       if (opcode == BIT_IOR_EXPR)
1412 	t = maybe_fold_or_comparisons (lcode, op1, op2,
1413 				       rcode, gimple_assign_rhs1 (def2),
1414 				       gimple_assign_rhs2 (def2));
1415       else
1416 	t = maybe_fold_and_comparisons (lcode, op1, op2,
1417 					rcode, gimple_assign_rhs1 (def2),
1418 					gimple_assign_rhs2 (def2));
1419       if (!t)
1420 	continue;
1421 
1422       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1423 	 always give us a boolean_type_node value back.  If the original
1424 	 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1425 	 we need to convert.  */
1426       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1427 	t = fold_convert (TREE_TYPE (curr->op), t);
1428 
1429       if (TREE_CODE (t) != INTEGER_CST
1430 	  && !operand_equal_p (t, curr->op, 0))
1431 	{
1432 	  enum tree_code subcode;
1433 	  tree newop1, newop2;
1434 	  if (!COMPARISON_CLASS_P (t))
1435 	    continue;
1436 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1437 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1438 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1439 	  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1440 	    continue;
1441 	}
1442 
1443       if (dump_file && (dump_flags & TDF_DETAILS))
1444 	{
1445 	  fprintf (dump_file, "Equivalence: ");
1446 	  print_generic_expr (dump_file, curr->op, 0);
1447 	  fprintf (dump_file, " %s ", op_symbol_code (opcode));
1448 	  print_generic_expr (dump_file, oe->op, 0);
1449 	  fprintf (dump_file, " -> ");
1450 	  print_generic_expr (dump_file, t, 0);
1451 	  fprintf (dump_file, "\n");
1452 	}
1453 
1454       /* Now we can delete oe, as it has been subsumed by the new combined
1455          expression t.  */
1456       VEC_ordered_remove (operand_entry_t, *ops, i);
1457       reassociate_stats.ops_eliminated ++;
1458 
1459       /* If t is the same as curr->op, we're done.  Otherwise we must
1460 	 replace curr->op with t.  Special case is if we got a constant
1461 	 back, in which case we add it to the end instead of in place of
1462 	 the current entry.  */
1463       if (TREE_CODE (t) == INTEGER_CST)
1464 	{
1465 	  VEC_ordered_remove (operand_entry_t, *ops, currindex);
1466 	  add_to_ops_vec (ops, t);
1467 	}
1468       else if (!operand_equal_p (t, curr->op, 0))
1469 	{
1470 	  tree tmpvar;
1471 	  gimple sum;
1472 	  enum tree_code subcode;
1473 	  tree newop1;
1474 	  tree newop2;
1475 	  gcc_assert (COMPARISON_CLASS_P (t));
1476 	  tmpvar = create_tmp_var (TREE_TYPE (t), NULL);
1477 	  add_referenced_var (tmpvar);
1478 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1479 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1480 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1481 	  gcc_checking_assert (is_gimple_val (newop1)
1482 			       && is_gimple_val (newop2));
1483 	  sum = build_and_add_sum (tmpvar, newop1, newop2, subcode);
1484 	  curr->op = gimple_get_lhs (sum);
1485 	}
1486       return true;
1487     }
1488 
1489   return false;
1490 }
1491 
1492 /* Perform various identities and other optimizations on the list of
1493    operand entries, stored in OPS.  The tree code for the binary
1494    operation between all the operands is OPCODE.  */
1495 
1496 static void
1497 optimize_ops_list (enum tree_code opcode,
1498 		   VEC (operand_entry_t, heap) **ops)
1499 {
1500   unsigned int length = VEC_length (operand_entry_t, *ops);
1501   unsigned int i;
1502   operand_entry_t oe;
1503   operand_entry_t oelast = NULL;
1504   bool iterate = false;
1505 
1506   if (length == 1)
1507     return;
1508 
1509   oelast = VEC_last (operand_entry_t, *ops);
1510 
1511   /* If the last two are constants, pop the constants off, merge them
1512      and try the next two.  */
1513   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1514     {
1515       operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1516 
1517       if (oelm1->rank == 0
1518 	  && is_gimple_min_invariant (oelm1->op)
1519 	  && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1520 				       TREE_TYPE (oelast->op)))
1521 	{
1522 	  tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1523 				     oelm1->op, oelast->op);
1524 
1525 	  if (folded && is_gimple_min_invariant (folded))
1526 	    {
1527 	      if (dump_file && (dump_flags & TDF_DETAILS))
1528 		fprintf (dump_file, "Merging constants\n");
1529 
1530 	      VEC_pop (operand_entry_t, *ops);
1531 	      VEC_pop (operand_entry_t, *ops);
1532 
1533 	      add_to_ops_vec (ops, folded);
1534 	      reassociate_stats.constants_eliminated++;
1535 
1536 	      optimize_ops_list (opcode, ops);
1537 	      return;
1538 	    }
1539 	}
1540     }
1541 
1542   eliminate_using_constants (opcode, ops);
1543   oelast = NULL;
1544 
1545   for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1546     {
1547       bool done = false;
1548 
1549       if (eliminate_not_pairs (opcode, ops, i, oe))
1550 	return;
1551       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1552 	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1553 	  || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1554 	{
1555 	  if (done)
1556 	    return;
1557 	  iterate = true;
1558 	  oelast = NULL;
1559 	  continue;
1560 	}
1561       oelast = oe;
1562       i++;
1563     }
1564 
1565   length  = VEC_length (operand_entry_t, *ops);
1566   oelast = VEC_last (operand_entry_t, *ops);
1567 
1568   if (iterate)
1569     optimize_ops_list (opcode, ops);
1570 }
1571 
1572 /* The following functions are subroutines to optimize_range_tests and allow
1573    it to try to change a logical combination of comparisons into a range
1574    test.
1575 
1576    For example, both
1577 	X == 2 || X == 5 || X == 3 || X == 4
1578    and
1579 	X >= 2 && X <= 5
1580    are converted to
1581 	(unsigned) (X - 2) <= 3
1582 
1583    For more information see comments above fold_test_range in fold-const.c,
1584    this implementation is for GIMPLE.  */
1585 
1586 struct range_entry
1587 {
1588   tree exp;
1589   tree low;
1590   tree high;
1591   bool in_p;
1592   bool strict_overflow_p;
1593   unsigned int idx, next;
1594 };
1595 
1596 /* This is similar to make_range in fold-const.c, but on top of
1597    GIMPLE instead of trees.  */
1598 
1599 static void
1600 init_range_entry (struct range_entry *r, tree exp)
1601 {
1602   int in_p;
1603   tree low, high;
1604   bool is_bool, strict_overflow_p;
1605 
1606   r->exp = NULL_TREE;
1607   r->in_p = false;
1608   r->strict_overflow_p = false;
1609   r->low = NULL_TREE;
1610   r->high = NULL_TREE;
1611   if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
1612     return;
1613 
1614   /* Start with simply saying "EXP != 0" and then look at the code of EXP
1615      and see if we can refine the range.  Some of the cases below may not
1616      happen, but it doesn't seem worth worrying about this.  We "continue"
1617      the outer loop when we've changed something; otherwise we "break"
1618      the switch, which will "break" the while.  */
1619   low = build_int_cst (TREE_TYPE (exp), 0);
1620   high = low;
1621   in_p = 0;
1622   strict_overflow_p = false;
1623   is_bool = false;
1624   if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1625     {
1626       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1627 	is_bool = true;
1628       else
1629 	return;
1630     }
1631   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1632     is_bool = true;
1633 
1634   while (1)
1635     {
1636       gimple stmt;
1637       enum tree_code code;
1638       tree arg0, arg1, exp_type;
1639       tree nexp;
1640       location_t loc;
1641 
1642       if (TREE_CODE (exp) != SSA_NAME)
1643 	break;
1644 
1645       stmt = SSA_NAME_DEF_STMT (exp);
1646       if (!is_gimple_assign (stmt))
1647 	break;
1648 
1649       code = gimple_assign_rhs_code (stmt);
1650       arg0 = gimple_assign_rhs1 (stmt);
1651       if (TREE_CODE (arg0) != SSA_NAME)
1652 	break;
1653       arg1 = gimple_assign_rhs2 (stmt);
1654       exp_type = TREE_TYPE (exp);
1655       loc = gimple_location (stmt);
1656       switch (code)
1657 	{
1658 	case BIT_NOT_EXPR:
1659 	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1660 	    {
1661 	      in_p = !in_p;
1662 	      exp = arg0;
1663 	      continue;
1664 	    }
1665 	  break;
1666 	case SSA_NAME:
1667 	  exp = arg0;
1668 	  continue;
1669 	CASE_CONVERT:
1670 	  if (is_bool)
1671 	    goto do_default;
1672 	  if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1673 	    {
1674 	      if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1675 		is_bool = true;
1676 	      else
1677 		return;
1678 	    }
1679 	  else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1680 	    is_bool = true;
1681 	  goto do_default;
1682 	case EQ_EXPR:
1683 	case NE_EXPR:
1684 	case LT_EXPR:
1685 	case LE_EXPR:
1686 	case GE_EXPR:
1687 	case GT_EXPR:
1688 	  is_bool = true;
1689 	  /* FALLTHRU */
1690 	default:
1691 	  if (!is_bool)
1692 	    return;
1693 	do_default:
1694 	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1695 				  &low, &high, &in_p,
1696 				  &strict_overflow_p);
1697 	  if (nexp != NULL_TREE)
1698 	    {
1699 	      exp = nexp;
1700 	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
1701 	      continue;
1702 	    }
1703 	  break;
1704 	}
1705       break;
1706     }
1707   if (is_bool)
1708     {
1709       r->exp = exp;
1710       r->in_p = in_p;
1711       r->low = low;
1712       r->high = high;
1713       r->strict_overflow_p = strict_overflow_p;
1714     }
1715 }
1716 
1717 /* Comparison function for qsort.  Sort entries
1718    without SSA_NAME exp first, then with SSA_NAMEs sorted
1719    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1720    by increasing ->low and if ->low is the same, by increasing
1721    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
1722    maximum.  */
1723 
1724 static int
1725 range_entry_cmp (const void *a, const void *b)
1726 {
1727   const struct range_entry *p = (const struct range_entry *) a;
1728   const struct range_entry *q = (const struct range_entry *) b;
1729 
1730   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1731     {
1732       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1733 	{
1734 	  /* Group range_entries for the same SSA_NAME together.  */
1735 	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1736 	    return -1;
1737 	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1738 	    return 1;
1739 	  /* If ->low is different, NULL low goes first, then by
1740 	     ascending low.  */
1741 	  if (p->low != NULL_TREE)
1742 	    {
1743 	      if (q->low != NULL_TREE)
1744 		{
1745 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
1746 					  p->low, q->low);
1747 		  if (tem && integer_onep (tem))
1748 		    return -1;
1749 		  tem = fold_binary (GT_EXPR, boolean_type_node,
1750 				     p->low, q->low);
1751 		  if (tem && integer_onep (tem))
1752 		    return 1;
1753 		}
1754 	      else
1755 		return 1;
1756 	    }
1757 	  else if (q->low != NULL_TREE)
1758 	    return -1;
1759 	  /* If ->high is different, NULL high goes last, before that by
1760 	     ascending high.  */
1761 	  if (p->high != NULL_TREE)
1762 	    {
1763 	      if (q->high != NULL_TREE)
1764 		{
1765 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
1766 					  p->high, q->high);
1767 		  if (tem && integer_onep (tem))
1768 		    return -1;
1769 		  tem = fold_binary (GT_EXPR, boolean_type_node,
1770 				     p->high, q->high);
1771 		  if (tem && integer_onep (tem))
1772 		    return 1;
1773 		}
1774 	      else
1775 		return -1;
1776 	    }
1777 	  else if (p->high != NULL_TREE)
1778 	    return 1;
1779 	  /* If both ranges are the same, sort below by ascending idx.  */
1780 	}
1781       else
1782 	return 1;
1783     }
1784   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1785     return -1;
1786 
1787   if (p->idx < q->idx)
1788     return -1;
1789   else
1790     {
1791       gcc_checking_assert (p->idx > q->idx);
1792       return 1;
1793     }
1794 }
1795 
1796 /* Helper routine of optimize_range_test.
1797    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1798    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1799    OPCODE and OPS are arguments of optimize_range_tests.  Return
1800    true if the range merge has been successful.  */
1801 
1802 static bool
1803 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1804 		   unsigned int count, enum tree_code opcode,
1805 		   VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
1806 		   tree low, tree high, bool strict_overflow_p)
1807 {
1808   tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
1809   location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
1810   tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
1811   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1812   gimple_stmt_iterator gsi;
1813 
1814   if (tem == NULL_TREE)
1815     return false;
1816 
1817   if (strict_overflow_p && issue_strict_overflow_warning (wc))
1818     warning_at (loc, OPT_Wstrict_overflow,
1819 		"assuming signed overflow does not occur "
1820 		"when simplifying range test");
1821 
1822   if (dump_file && (dump_flags & TDF_DETAILS))
1823     {
1824       struct range_entry *r;
1825       fprintf (dump_file, "Optimizing range tests ");
1826       print_generic_expr (dump_file, range->exp, 0);
1827       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1828       print_generic_expr (dump_file, range->low, 0);
1829       fprintf (dump_file, ", ");
1830       print_generic_expr (dump_file, range->high, 0);
1831       fprintf (dump_file, "]");
1832       for (r = otherrange; r < otherrange + count; r++)
1833 	{
1834 	  fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1835 	  print_generic_expr (dump_file, r->low, 0);
1836 	  fprintf (dump_file, ", ");
1837 	  print_generic_expr (dump_file, r->high, 0);
1838 	  fprintf (dump_file, "]");
1839 	}
1840       fprintf (dump_file, "\n into ");
1841       print_generic_expr (dump_file, tem, 0);
1842       fprintf (dump_file, "\n");
1843     }
1844 
1845   if (opcode == BIT_IOR_EXPR)
1846     tem = invert_truthvalue_loc (loc, tem);
1847 
1848   tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
1849   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
1850   tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1851 				  GSI_SAME_STMT);
1852 
1853   VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
1854   range->exp = exp;
1855   range->low = low;
1856   range->high = high;
1857   range->in_p = in_p;
1858   range->strict_overflow_p = false;
1859 
1860   for (range = otherrange; range < otherrange + count; range++)
1861     {
1862       VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
1863       range->exp = NULL_TREE;
1864     }
1865   return true;
1866 }
1867 
1868 /* Optimize range tests, similarly how fold_range_test optimizes
1869    it on trees.  The tree code for the binary
1870    operation between all the operands is OPCODE.  */
1871 
1872 static void
1873 optimize_range_tests (enum tree_code opcode,
1874 		      VEC (operand_entry_t, heap) **ops)
1875 {
1876   unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
1877   operand_entry_t oe;
1878   struct range_entry *ranges;
1879   bool any_changes = false;
1880 
1881   if (length == 1)
1882     return;
1883 
1884   ranges = XNEWVEC (struct range_entry, length);
1885   for (i = 0; i < length; i++)
1886     {
1887       ranges[i].idx = i;
1888       init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
1889       /* For | invert it now, we will invert it again before emitting
1890 	 the optimized expression.  */
1891       if (opcode == BIT_IOR_EXPR)
1892 	ranges[i].in_p = !ranges[i].in_p;
1893     }
1894 
1895   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
1896   for (i = 0; i < length; i++)
1897     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
1898       break;
1899 
1900   /* Try to merge ranges.  */
1901   for (first = i; i < length; i++)
1902     {
1903       tree low = ranges[i].low;
1904       tree high = ranges[i].high;
1905       int in_p = ranges[i].in_p;
1906       bool strict_overflow_p = ranges[i].strict_overflow_p;
1907       int update_fail_count = 0;
1908 
1909       for (j = i + 1; j < length; j++)
1910 	{
1911 	  if (ranges[i].exp != ranges[j].exp)
1912 	    break;
1913 	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
1914 			     ranges[j].in_p, ranges[j].low, ranges[j].high))
1915 	    break;
1916 	  strict_overflow_p |= ranges[j].strict_overflow_p;
1917 	}
1918 
1919       if (j == i + 1)
1920 	continue;
1921 
1922       if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
1923 			     ops, ranges[i].exp, in_p, low, high,
1924 			     strict_overflow_p))
1925 	{
1926 	  i = j - 1;
1927 	  any_changes = true;
1928 	}
1929       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
1930 	 while update_range_test would fail.  */
1931       else if (update_fail_count == 64)
1932 	i = j - 1;
1933       else
1934 	++update_fail_count;
1935     }
1936 
1937   /* Optimize X == CST1 || X == CST2
1938      if popcount (CST1 ^ CST2) == 1 into
1939      (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
1940      Similarly for ranges.  E.g.
1941      X != 2 && X != 3 && X != 10 && X != 11
1942      will be transformed by the above loop into
1943      (X - 2U) <= 1U && (X - 10U) <= 1U
1944      and this loop can transform that into
1945      ((X & ~8) - 2U) <= 1U.  */
1946   for (i = first; i < length; i++)
1947     {
1948       tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
1949 
1950       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
1951 	continue;
1952       type = TREE_TYPE (ranges[i].exp);
1953       if (!INTEGRAL_TYPE_P (type))
1954 	continue;
1955       lowi = ranges[i].low;
1956       if (lowi == NULL_TREE)
1957 	lowi = TYPE_MIN_VALUE (type);
1958       highi = ranges[i].high;
1959       if (highi == NULL_TREE)
1960 	continue;
1961       for (j = i + 1; j < length && j < i + 64; j++)
1962 	{
1963 	  if (ranges[j].exp == NULL_TREE)
1964 	    continue;
1965 	  if (ranges[i].exp != ranges[j].exp)
1966 	    break;
1967 	  if (ranges[j].in_p)
1968 	    continue;
1969 	  lowj = ranges[j].low;
1970 	  if (lowj == NULL_TREE)
1971 	    continue;
1972 	  highj = ranges[j].high;
1973 	  if (highj == NULL_TREE)
1974 	    highj = TYPE_MAX_VALUE (type);
1975 	  tem = fold_binary (GT_EXPR, boolean_type_node,
1976 			     lowj, highi);
1977 	  if (tem == NULL_TREE || !integer_onep (tem))
1978 	    continue;
1979 	  lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
1980 	  if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
1981 	    continue;
1982 	  gcc_checking_assert (!integer_zerop (lowxor));
1983 	  tem = fold_binary (MINUS_EXPR, type, lowxor,
1984 			     build_int_cst (type, 1));
1985 	  if (tem == NULL_TREE)
1986 	    continue;
1987 	  tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
1988 	  if (tem == NULL_TREE || !integer_zerop (tem))
1989 	    continue;
1990 	  highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
1991 	  if (!tree_int_cst_equal (lowxor, highxor))
1992 	    continue;
1993 	  tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
1994 	  exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
1995 	  lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
1996 	  highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
1997 	  if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
1998 				 ranges[i].in_p, lowj, highj,
1999 				 ranges[i].strict_overflow_p
2000 				 || ranges[j].strict_overflow_p))
2001 	    {
2002 	      any_changes = true;
2003 	      break;
2004 	    }
2005 	}
2006     }
2007 
2008   if (any_changes)
2009     {
2010       j = 0;
2011       FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
2012 	{
2013 	  if (oe->op == error_mark_node)
2014 	    continue;
2015 	  else if (i != j)
2016 	    VEC_replace (operand_entry_t, *ops, j, oe);
2017 	  j++;
2018 	}
2019       VEC_truncate (operand_entry_t, *ops, j);
2020     }
2021 
2022   XDELETEVEC (ranges);
2023 }
2024 
2025 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2026    of STMT in it's operands.  This is also known as a "destructive
2027    update" operation.  */
2028 
2029 static bool
2030 is_phi_for_stmt (gimple stmt, tree operand)
2031 {
2032   gimple def_stmt;
2033   tree lhs;
2034   use_operand_p arg_p;
2035   ssa_op_iter i;
2036 
2037   if (TREE_CODE (operand) != SSA_NAME)
2038     return false;
2039 
2040   lhs = gimple_assign_lhs (stmt);
2041 
2042   def_stmt = SSA_NAME_DEF_STMT (operand);
2043   if (gimple_code (def_stmt) != GIMPLE_PHI)
2044     return false;
2045 
2046   FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2047     if (lhs == USE_FROM_PTR (arg_p))
2048       return true;
2049   return false;
2050 }
2051 
2052 /* Remove def stmt of VAR if VAR has zero uses and recurse
2053    on rhs1 operand if so.  */
2054 
2055 static void
2056 remove_visited_stmt_chain (tree var)
2057 {
2058   gimple stmt;
2059   gimple_stmt_iterator gsi;
2060 
2061   while (1)
2062     {
2063       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2064 	return;
2065       stmt = SSA_NAME_DEF_STMT (var);
2066       if (!is_gimple_assign (stmt)
2067 	  || !gimple_visited_p (stmt))
2068 	return;
2069       var = gimple_assign_rhs1 (stmt);
2070       gsi = gsi_for_stmt (stmt);
2071       gsi_remove (&gsi, true);
2072       release_defs (stmt);
2073     }
2074 }
2075 
2076 /* This function checks three consequtive operands in
2077    passed operands vector OPS starting from OPINDEX and
2078    swaps two operands if it is profitable for binary operation
2079    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2080 
2081    We pair ops with the same rank if possible.
2082 
2083    The alternative we try is to see if STMT is a destructive
2084    update style statement, which is like:
2085    b = phi (a, ...)
2086    a = c + b;
2087    In that case, we want to use the destructive update form to
2088    expose the possible vectorizer sum reduction opportunity.
2089    In that case, the third operand will be the phi node. This
2090    check is not performed if STMT is null.
2091 
2092    We could, of course, try to be better as noted above, and do a
2093    lot of work to try to find these opportunities in >3 operand
2094    cases, but it is unlikely to be worth it.  */
2095 
2096 static void
2097 swap_ops_for_binary_stmt (VEC(operand_entry_t, heap) * ops,
2098 			  unsigned int opindex, gimple stmt)
2099 {
2100   operand_entry_t oe1, oe2, oe3;
2101 
2102   oe1 = VEC_index (operand_entry_t, ops, opindex);
2103   oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2104   oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
2105 
2106   if ((oe1->rank == oe2->rank
2107        && oe2->rank != oe3->rank)
2108       || (stmt && is_phi_for_stmt (stmt, oe3->op)
2109 	  && !is_phi_for_stmt (stmt, oe1->op)
2110 	  && !is_phi_for_stmt (stmt, oe2->op)))
2111     {
2112       struct operand_entry temp = *oe3;
2113       oe3->op = oe1->op;
2114       oe3->rank = oe1->rank;
2115       oe1->op = temp.op;
2116       oe1->rank= temp.rank;
2117     }
2118   else if ((oe1->rank == oe3->rank
2119 	    && oe2->rank != oe3->rank)
2120 	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
2121 	       && !is_phi_for_stmt (stmt, oe1->op)
2122 	       && !is_phi_for_stmt (stmt, oe3->op)))
2123     {
2124       struct operand_entry temp = *oe2;
2125       oe2->op = oe1->op;
2126       oe2->rank = oe1->rank;
2127       oe1->op = temp.op;
2128       oe1->rank= temp.rank;
2129     }
2130 }
2131 
2132 /* Recursively rewrite our linearized statements so that the operators
2133    match those in OPS[OPINDEX], putting the computation in rank
2134    order.  */
2135 
2136 static void
2137 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2138 		   VEC(operand_entry_t, heap) * ops, bool moved)
2139 {
2140   tree rhs1 = gimple_assign_rhs1 (stmt);
2141   tree rhs2 = gimple_assign_rhs2 (stmt);
2142   operand_entry_t oe;
2143 
2144   /* If we have three operands left, then we want to make sure the ones
2145      that get the double binary op are chosen wisely.  */
2146   if (opindex + 3 == VEC_length (operand_entry_t, ops))
2147     swap_ops_for_binary_stmt (ops, opindex, stmt);
2148 
2149   /* The final recursion case for this function is that you have
2150      exactly two operations left.
2151      If we had one exactly one op in the entire list to start with, we
2152      would have never called this function, and the tail recursion
2153      rewrites them one at a time.  */
2154   if (opindex + 2 == VEC_length (operand_entry_t, ops))
2155     {
2156       operand_entry_t oe1, oe2;
2157 
2158       oe1 = VEC_index (operand_entry_t, ops, opindex);
2159       oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2160 
2161       if (rhs1 != oe1->op || rhs2 != oe2->op)
2162 	{
2163 	  if (dump_file && (dump_flags & TDF_DETAILS))
2164 	    {
2165 	      fprintf (dump_file, "Transforming ");
2166 	      print_gimple_stmt (dump_file, stmt, 0, 0);
2167 	    }
2168 
2169 	  gimple_assign_set_rhs1 (stmt, oe1->op);
2170 	  gimple_assign_set_rhs2 (stmt, oe2->op);
2171 	  update_stmt (stmt);
2172 	  if (rhs1 != oe1->op && rhs1 != oe2->op)
2173 	    remove_visited_stmt_chain (rhs1);
2174 
2175 	  if (dump_file && (dump_flags & TDF_DETAILS))
2176 	    {
2177 	      fprintf (dump_file, " into ");
2178 	      print_gimple_stmt (dump_file, stmt, 0, 0);
2179 	    }
2180 
2181 	}
2182       return;
2183     }
2184 
2185   /* If we hit here, we should have 3 or more ops left.  */
2186   gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
2187 
2188   /* Rewrite the next operator.  */
2189   oe = VEC_index (operand_entry_t, ops, opindex);
2190 
2191   if (oe->op != rhs2)
2192     {
2193       if (!moved)
2194 	{
2195 	  gimple_stmt_iterator gsinow, gsirhs1;
2196 	  gimple stmt1 = stmt, stmt2;
2197 	  unsigned int count;
2198 
2199 	  gsinow = gsi_for_stmt (stmt);
2200 	  count = VEC_length (operand_entry_t, ops) - opindex - 2;
2201 	  while (count-- != 0)
2202 	    {
2203 	      stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2204 	      gsirhs1 = gsi_for_stmt (stmt2);
2205 	      gsi_move_before (&gsirhs1, &gsinow);
2206 	      gsi_prev (&gsinow);
2207 	      stmt1 = stmt2;
2208 	    }
2209 	  moved = true;
2210 	}
2211 
2212       if (dump_file && (dump_flags & TDF_DETAILS))
2213 	{
2214 	  fprintf (dump_file, "Transforming ");
2215 	  print_gimple_stmt (dump_file, stmt, 0, 0);
2216 	}
2217 
2218       gimple_assign_set_rhs2 (stmt, oe->op);
2219       update_stmt (stmt);
2220 
2221       if (dump_file && (dump_flags & TDF_DETAILS))
2222 	{
2223 	  fprintf (dump_file, " into ");
2224 	  print_gimple_stmt (dump_file, stmt, 0, 0);
2225 	}
2226     }
2227   /* Recurse on the LHS of the binary operator, which is guaranteed to
2228      be the non-leaf side.  */
2229   rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2230 }
2231 
2232 /* Find out how many cycles we need to compute statements chain.
2233    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
2234    maximum number of independent statements we may execute per cycle.  */
2235 
2236 static int
2237 get_required_cycles (int ops_num, int cpu_width)
2238 {
2239   int res;
2240   int elog;
2241   unsigned int rest;
2242 
2243   /* While we have more than 2 * cpu_width operands
2244      we may reduce number of operands by cpu_width
2245      per cycle.  */
2246   res = ops_num / (2 * cpu_width);
2247 
2248   /* Remained operands count may be reduced twice per cycle
2249      until we have only one operand.  */
2250   rest = (unsigned)(ops_num - res * cpu_width);
2251   elog = exact_log2 (rest);
2252   if (elog >= 0)
2253     res += elog;
2254   else
2255     res += floor_log2 (rest) + 1;
2256 
2257   return res;
2258 }
2259 
2260 /* Returns an optimal number of registers to use for computation of
2261    given statements.  */
2262 
2263 static int
2264 get_reassociation_width (int ops_num, enum tree_code opc,
2265 			 enum machine_mode mode)
2266 {
2267   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2268   int width;
2269   int width_min;
2270   int cycles_best;
2271 
2272   if (param_width > 0)
2273     width = param_width;
2274   else
2275     width = targetm.sched.reassociation_width (opc, mode);
2276 
2277   if (width == 1)
2278     return width;
2279 
2280   /* Get the minimal time required for sequence computation.  */
2281   cycles_best = get_required_cycles (ops_num, width);
2282 
2283   /* Check if we may use less width and still compute sequence for
2284      the same time.  It will allow us to reduce registers usage.
2285      get_required_cycles is monotonically increasing with lower width
2286      so we can perform a binary search for the minimal width that still
2287      results in the optimal cycle count.  */
2288   width_min = 1;
2289   while (width > width_min)
2290     {
2291       int width_mid = (width + width_min) / 2;
2292 
2293       if (get_required_cycles (ops_num, width_mid) == cycles_best)
2294 	width = width_mid;
2295       else if (width_min < width_mid)
2296 	width_min = width_mid;
2297       else
2298 	break;
2299     }
2300 
2301   return width;
2302 }
2303 
2304 /* Recursively rewrite our linearized statements so that the operators
2305    match those in OPS[OPINDEX], putting the computation in rank
2306    order and trying to allow operations to be executed in
2307    parallel.  */
2308 
2309 static void
2310 rewrite_expr_tree_parallel (gimple stmt, int width,
2311 			    VEC(operand_entry_t, heap) * ops)
2312 {
2313   enum tree_code opcode = gimple_assign_rhs_code (stmt);
2314   int op_num = VEC_length (operand_entry_t, ops);
2315   int stmt_num = op_num - 1;
2316   gimple *stmts = XALLOCAVEC (gimple, stmt_num);
2317   int op_index = op_num - 1;
2318   int stmt_index = 0;
2319   int ready_stmts_end = 0;
2320   int i = 0;
2321   tree last_rhs1 = gimple_assign_rhs1 (stmt);
2322   tree lhs_var;
2323 
2324   /* We start expression rewriting from the top statements.
2325      So, in this loop we create a full list of statements
2326      we will work with.  */
2327   stmts[stmt_num - 1] = stmt;
2328   for (i = stmt_num - 2; i >= 0; i--)
2329     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
2330 
2331   lhs_var = create_tmp_reg (TREE_TYPE (last_rhs1), NULL);
2332   add_referenced_var (lhs_var);
2333 
2334   for (i = 0; i < stmt_num; i++)
2335     {
2336       tree op1, op2;
2337 
2338       /* Determine whether we should use results of
2339 	 already handled statements or not.  */
2340       if (ready_stmts_end == 0
2341 	  && (i - stmt_index >= width || op_index < 1))
2342 	ready_stmts_end = i;
2343 
2344       /* Now we choose operands for the next statement.  Non zero
2345 	 value in ready_stmts_end means here that we should use
2346 	 the result of already generated statements as new operand.  */
2347       if (ready_stmts_end > 0)
2348 	{
2349 	  op1 = gimple_assign_lhs (stmts[stmt_index++]);
2350 	  if (ready_stmts_end > stmt_index)
2351 	    op2 = gimple_assign_lhs (stmts[stmt_index++]);
2352 	  else if (op_index >= 0)
2353 	    op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2354 	  else
2355 	    {
2356 	      gcc_assert (stmt_index < i);
2357 	      op2 = gimple_assign_lhs (stmts[stmt_index++]);
2358 	    }
2359 
2360 	  if (stmt_index >= ready_stmts_end)
2361 	    ready_stmts_end = 0;
2362 	}
2363       else
2364 	{
2365 	  if (op_index > 1)
2366 	    swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
2367 	  op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2368 	  op1 = VEC_index (operand_entry_t, ops, op_index--)->op;
2369 	}
2370 
2371       /* If we emit the last statement then we should put
2372 	 operands into the last statement.  It will also
2373 	 break the loop.  */
2374       if (op_index < 0 && stmt_index == i)
2375 	i = stmt_num - 1;
2376 
2377       if (dump_file && (dump_flags & TDF_DETAILS))
2378 	{
2379 	  fprintf (dump_file, "Transforming ");
2380 	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
2381 	}
2382 
2383       /* We keep original statement only for the last one.  All
2384 	 others are recreated.  */
2385       if (i == stmt_num - 1)
2386 	{
2387 	  gimple_assign_set_rhs1 (stmts[i], op1);
2388 	  gimple_assign_set_rhs2 (stmts[i], op2);
2389 	  update_stmt (stmts[i]);
2390 	}
2391       else
2392 	stmts[i] = build_and_add_sum (lhs_var, op1, op2, opcode);
2393 
2394       if (dump_file && (dump_flags & TDF_DETAILS))
2395 	{
2396 	  fprintf (dump_file, " into ");
2397 	  print_gimple_stmt (dump_file, stmts[i], 0, 0);
2398 	}
2399     }
2400 
2401   remove_visited_stmt_chain (last_rhs1);
2402 }
2403 
2404 /* Transform STMT, which is really (A +B) + (C + D) into the left
2405    linear form, ((A+B)+C)+D.
2406    Recurse on D if necessary.  */
2407 
2408 static void
2409 linearize_expr (gimple stmt)
2410 {
2411   gimple_stmt_iterator gsinow, gsirhs;
2412   gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2413   gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2414   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2415   gimple newbinrhs = NULL;
2416   struct loop *loop = loop_containing_stmt (stmt);
2417 
2418   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
2419 	      && is_reassociable_op (binrhs, rhscode, loop));
2420 
2421   gsinow = gsi_for_stmt (stmt);
2422   gsirhs = gsi_for_stmt (binrhs);
2423   gsi_move_before (&gsirhs, &gsinow);
2424 
2425   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
2426   gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
2427   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
2428 
2429   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
2430     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2431 
2432   if (dump_file && (dump_flags & TDF_DETAILS))
2433     {
2434       fprintf (dump_file, "Linearized: ");
2435       print_gimple_stmt (dump_file, stmt, 0, 0);
2436     }
2437 
2438   reassociate_stats.linearized++;
2439   update_stmt (binrhs);
2440   update_stmt (binlhs);
2441   update_stmt (stmt);
2442 
2443   gimple_set_visited (stmt, true);
2444   gimple_set_visited (binlhs, true);
2445   gimple_set_visited (binrhs, true);
2446 
2447   /* Tail recurse on the new rhs if it still needs reassociation.  */
2448   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
2449     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
2450 	   want to change the algorithm while converting to tuples.  */
2451     linearize_expr (stmt);
2452 }
2453 
2454 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
2455    it.  Otherwise, return NULL.  */
2456 
2457 static gimple
2458 get_single_immediate_use (tree lhs)
2459 {
2460   use_operand_p immuse;
2461   gimple immusestmt;
2462 
2463   if (TREE_CODE (lhs) == SSA_NAME
2464       && single_imm_use (lhs, &immuse, &immusestmt)
2465       && is_gimple_assign (immusestmt))
2466     return immusestmt;
2467 
2468   return NULL;
2469 }
2470 
2471 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
2472    representing the negated value.  Insertions of any necessary
2473    instructions go before GSI.
2474    This function is recursive in that, if you hand it "a_5" as the
2475    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
2476    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
2477 
2478 static tree
2479 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
2480 {
2481   gimple negatedefstmt= NULL;
2482   tree resultofnegate;
2483 
2484   /* If we are trying to negate a name, defined by an add, negate the
2485      add operands instead.  */
2486   if (TREE_CODE (tonegate) == SSA_NAME)
2487     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
2488   if (TREE_CODE (tonegate) == SSA_NAME
2489       && is_gimple_assign (negatedefstmt)
2490       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
2491       && has_single_use (gimple_assign_lhs (negatedefstmt))
2492       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
2493     {
2494       gimple_stmt_iterator gsi;
2495       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
2496       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
2497 
2498       gsi = gsi_for_stmt (negatedefstmt);
2499       rhs1 = negate_value (rhs1, &gsi);
2500       gimple_assign_set_rhs1 (negatedefstmt, rhs1);
2501 
2502       gsi = gsi_for_stmt (negatedefstmt);
2503       rhs2 = negate_value (rhs2, &gsi);
2504       gimple_assign_set_rhs2 (negatedefstmt, rhs2);
2505 
2506       update_stmt (negatedefstmt);
2507       return gimple_assign_lhs (negatedefstmt);
2508     }
2509 
2510   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
2511   resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
2512 					     NULL_TREE, true, GSI_SAME_STMT);
2513   return resultofnegate;
2514 }
2515 
2516 /* Return true if we should break up the subtract in STMT into an add
2517    with negate.  This is true when we the subtract operands are really
2518    adds, or the subtract itself is used in an add expression.  In
2519    either case, breaking up the subtract into an add with negate
2520    exposes the adds to reassociation.  */
2521 
2522 static bool
2523 should_break_up_subtract (gimple stmt)
2524 {
2525   tree lhs = gimple_assign_lhs (stmt);
2526   tree binlhs = gimple_assign_rhs1 (stmt);
2527   tree binrhs = gimple_assign_rhs2 (stmt);
2528   gimple immusestmt;
2529   struct loop *loop = loop_containing_stmt (stmt);
2530 
2531   if (TREE_CODE (binlhs) == SSA_NAME
2532       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
2533     return true;
2534 
2535   if (TREE_CODE (binrhs) == SSA_NAME
2536       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
2537     return true;
2538 
2539   if (TREE_CODE (lhs) == SSA_NAME
2540       && (immusestmt = get_single_immediate_use (lhs))
2541       && is_gimple_assign (immusestmt)
2542       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
2543 	  ||  gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
2544     return true;
2545   return false;
2546 }
2547 
2548 /* Transform STMT from A - B into A + -B.  */
2549 
2550 static void
2551 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
2552 {
2553   tree rhs1 = gimple_assign_rhs1 (stmt);
2554   tree rhs2 = gimple_assign_rhs2 (stmt);
2555 
2556   if (dump_file && (dump_flags & TDF_DETAILS))
2557     {
2558       fprintf (dump_file, "Breaking up subtract ");
2559       print_gimple_stmt (dump_file, stmt, 0, 0);
2560     }
2561 
2562   rhs2 = negate_value (rhs2, gsip);
2563   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
2564   update_stmt (stmt);
2565 }
2566 
2567 /* Recursively linearize a binary expression that is the RHS of STMT.
2568    Place the operands of the expression tree in the vector named OPS.  */
2569 
2570 static void
2571 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
2572 		     bool is_associative, bool set_visited)
2573 {
2574   tree binlhs = gimple_assign_rhs1 (stmt);
2575   tree binrhs = gimple_assign_rhs2 (stmt);
2576   gimple binlhsdef, binrhsdef;
2577   bool binlhsisreassoc = false;
2578   bool binrhsisreassoc = false;
2579   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2580   struct loop *loop = loop_containing_stmt (stmt);
2581 
2582   if (set_visited)
2583     gimple_set_visited (stmt, true);
2584 
2585   if (TREE_CODE (binlhs) == SSA_NAME)
2586     {
2587       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
2588       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
2589 			 && !stmt_could_throw_p (binlhsdef));
2590     }
2591 
2592   if (TREE_CODE (binrhs) == SSA_NAME)
2593     {
2594       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
2595       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
2596 			 && !stmt_could_throw_p (binrhsdef));
2597     }
2598 
2599   /* If the LHS is not reassociable, but the RHS is, we need to swap
2600      them.  If neither is reassociable, there is nothing we can do, so
2601      just put them in the ops vector.  If the LHS is reassociable,
2602      linearize it.  If both are reassociable, then linearize the RHS
2603      and the LHS.  */
2604 
2605   if (!binlhsisreassoc)
2606     {
2607       tree temp;
2608 
2609       /* If this is not a associative operation like division, give up.  */
2610       if (!is_associative)
2611 	{
2612 	  add_to_ops_vec (ops, binrhs);
2613 	  return;
2614 	}
2615 
2616       if (!binrhsisreassoc)
2617 	{
2618 	  add_to_ops_vec (ops, binrhs);
2619 	  add_to_ops_vec (ops, binlhs);
2620 	  return;
2621 	}
2622 
2623       if (dump_file && (dump_flags & TDF_DETAILS))
2624 	{
2625 	  fprintf (dump_file, "swapping operands of ");
2626 	  print_gimple_stmt (dump_file, stmt, 0, 0);
2627 	}
2628 
2629       swap_tree_operands (stmt,
2630 			  gimple_assign_rhs1_ptr (stmt),
2631 			  gimple_assign_rhs2_ptr (stmt));
2632       update_stmt (stmt);
2633 
2634       if (dump_file && (dump_flags & TDF_DETAILS))
2635 	{
2636 	  fprintf (dump_file, " is now ");
2637 	  print_gimple_stmt (dump_file, stmt, 0, 0);
2638 	}
2639 
2640       /* We want to make it so the lhs is always the reassociative op,
2641 	 so swap.  */
2642       temp = binlhs;
2643       binlhs = binrhs;
2644       binrhs = temp;
2645     }
2646   else if (binrhsisreassoc)
2647     {
2648       linearize_expr (stmt);
2649       binlhs = gimple_assign_rhs1 (stmt);
2650       binrhs = gimple_assign_rhs2 (stmt);
2651     }
2652 
2653   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
2654 	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
2655 				      rhscode, loop));
2656   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
2657 		       is_associative, set_visited);
2658   add_to_ops_vec (ops, binrhs);
2659 }
2660 
2661 /* Repropagate the negates back into subtracts, since no other pass
2662    currently does it.  */
2663 
2664 static void
2665 repropagate_negates (void)
2666 {
2667   unsigned int i = 0;
2668   tree negate;
2669 
2670   FOR_EACH_VEC_ELT (tree, plus_negates, i, negate)
2671     {
2672       gimple user = get_single_immediate_use (negate);
2673 
2674       if (!user || !is_gimple_assign (user))
2675 	continue;
2676 
2677       /* The negate operand can be either operand of a PLUS_EXPR
2678 	 (it can be the LHS if the RHS is a constant for example).
2679 
2680 	 Force the negate operand to the RHS of the PLUS_EXPR, then
2681 	 transform the PLUS_EXPR into a MINUS_EXPR.  */
2682       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
2683 	{
2684 	  /* If the negated operand appears on the LHS of the
2685 	     PLUS_EXPR, exchange the operands of the PLUS_EXPR
2686 	     to force the negated operand to the RHS of the PLUS_EXPR.  */
2687 	  if (gimple_assign_rhs1 (user) == negate)
2688 	    {
2689 	      swap_tree_operands (user,
2690 				  gimple_assign_rhs1_ptr (user),
2691 				  gimple_assign_rhs2_ptr (user));
2692 	    }
2693 
2694 	  /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
2695 	     the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
2696 	  if (gimple_assign_rhs2 (user) == negate)
2697 	    {
2698 	      tree rhs1 = gimple_assign_rhs1 (user);
2699 	      tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
2700 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
2701 	      gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
2702 	      update_stmt (user);
2703 	    }
2704 	}
2705       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
2706 	{
2707 	  if (gimple_assign_rhs1 (user) == negate)
2708 	    {
2709 	      /* We have
2710 	           x = -a
2711 		   y = x - b
2712 		 which we transform into
2713 		   x = a + b
2714 		   y = -x .
2715 		 This pushes down the negate which we possibly can merge
2716 		 into some other operation, hence insert it into the
2717 		 plus_negates vector.  */
2718 	      gimple feed = SSA_NAME_DEF_STMT (negate);
2719 	      tree a = gimple_assign_rhs1 (feed);
2720 	      tree rhs2 = gimple_assign_rhs2 (user);
2721 	      gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
2722 	      gimple_replace_lhs (feed, negate);
2723 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
2724 	      update_stmt (gsi_stmt (gsi));
2725 	      gsi2 = gsi_for_stmt (user);
2726 	      gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
2727 	      update_stmt (gsi_stmt (gsi2));
2728 	      gsi_move_before (&gsi, &gsi2);
2729 	      VEC_safe_push (tree, heap, plus_negates,
2730 			     gimple_assign_lhs (gsi_stmt (gsi2)));
2731 	    }
2732 	  else
2733 	    {
2734 	      /* Transform "x = -a; y = b - x" into "y = b + a", getting
2735 	         rid of one operation.  */
2736 	      gimple feed = SSA_NAME_DEF_STMT (negate);
2737 	      tree a = gimple_assign_rhs1 (feed);
2738 	      tree rhs1 = gimple_assign_rhs1 (user);
2739 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
2740 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
2741 	      update_stmt (gsi_stmt (gsi));
2742 	    }
2743 	}
2744     }
2745 }
2746 
2747 /* Returns true if OP is of a type for which we can do reassociation.
2748    That is for integral or non-saturating fixed-point types, and for
2749    floating point type when associative-math is enabled.  */
2750 
2751 static bool
2752 can_reassociate_p (tree op)
2753 {
2754   tree type = TREE_TYPE (op);
2755   if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
2756       || NON_SAT_FIXED_POINT_TYPE_P (type)
2757       || (flag_associative_math && FLOAT_TYPE_P (type)))
2758     return true;
2759   return false;
2760 }
2761 
2762 /* Break up subtract operations in block BB.
2763 
2764    We do this top down because we don't know whether the subtract is
2765    part of a possible chain of reassociation except at the top.
2766 
2767    IE given
2768    d = f + g
2769    c = a + e
2770    b = c - d
2771    q = b - r
2772    k = t - q
2773 
2774    we want to break up k = t - q, but we won't until we've transformed q
2775    = b - r, which won't be broken up until we transform b = c - d.
2776 
2777    En passant, clear the GIMPLE visited flag on every statement.  */
2778 
2779 static void
2780 break_up_subtract_bb (basic_block bb)
2781 {
2782   gimple_stmt_iterator gsi;
2783   basic_block son;
2784 
2785   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2786     {
2787       gimple stmt = gsi_stmt (gsi);
2788       gimple_set_visited (stmt, false);
2789 
2790       if (!is_gimple_assign (stmt)
2791 	  || !can_reassociate_p (gimple_assign_lhs (stmt)))
2792 	continue;
2793 
2794       /* Look for simple gimple subtract operations.  */
2795       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
2796 	{
2797 	  if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
2798 	      || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
2799 	    continue;
2800 
2801 	  /* Check for a subtract used only in an addition.  If this
2802 	     is the case, transform it into add of a negate for better
2803 	     reassociation.  IE transform C = A-B into C = A + -B if C
2804 	     is only used in an addition.  */
2805 	  if (should_break_up_subtract (stmt))
2806 	    break_up_subtract (stmt, &gsi);
2807 	}
2808       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
2809 	       && can_reassociate_p (gimple_assign_rhs1 (stmt)))
2810 	VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
2811     }
2812   for (son = first_dom_son (CDI_DOMINATORS, bb);
2813        son;
2814        son = next_dom_son (CDI_DOMINATORS, son))
2815     break_up_subtract_bb (son);
2816 }
2817 
2818 /* Reassociate expressions in basic block BB and its post-dominator as
2819    children.  */
2820 
2821 static void
2822 reassociate_bb (basic_block bb)
2823 {
2824   gimple_stmt_iterator gsi;
2825   basic_block son;
2826 
2827   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2828     {
2829       gimple stmt = gsi_stmt (gsi);
2830 
2831       if (is_gimple_assign (stmt)
2832 	  && !stmt_could_throw_p (stmt))
2833 	{
2834 	  tree lhs, rhs1, rhs2;
2835 	  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2836 
2837 	  /* If this is not a gimple binary expression, there is
2838 	     nothing for us to do with it.  */
2839 	  if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
2840 	    continue;
2841 
2842 	  /* If this was part of an already processed statement,
2843 	     we don't need to touch it again. */
2844 	  if (gimple_visited_p (stmt))
2845 	    {
2846 	      /* This statement might have become dead because of previous
2847 		 reassociations.  */
2848 	      if (has_zero_uses (gimple_get_lhs (stmt)))
2849 		{
2850 		  gsi_remove (&gsi, true);
2851 		  release_defs (stmt);
2852 		  /* We might end up removing the last stmt above which
2853 		     places the iterator to the end of the sequence.
2854 		     Reset it to the last stmt in this case which might
2855 		     be the end of the sequence as well if we removed
2856 		     the last statement of the sequence.  In which case
2857 		     we need to bail out.  */
2858 		  if (gsi_end_p (gsi))
2859 		    {
2860 		      gsi = gsi_last_bb (bb);
2861 		      if (gsi_end_p (gsi))
2862 			break;
2863 		    }
2864 		}
2865 	      continue;
2866 	    }
2867 
2868 	  lhs = gimple_assign_lhs (stmt);
2869 	  rhs1 = gimple_assign_rhs1 (stmt);
2870 	  rhs2 = gimple_assign_rhs2 (stmt);
2871 
2872 	  /* For non-bit or min/max operations we can't associate
2873 	     all types.  Verify that here.  */
2874 	  if (rhs_code != BIT_IOR_EXPR
2875 	      && rhs_code != BIT_AND_EXPR
2876 	      && rhs_code != BIT_XOR_EXPR
2877 	      && rhs_code != MIN_EXPR
2878 	      && rhs_code != MAX_EXPR
2879 	      && (!can_reassociate_p (lhs)
2880 		  || !can_reassociate_p (rhs1)
2881 		  || !can_reassociate_p (rhs2)))
2882 	    continue;
2883 
2884 	  if (associative_tree_code (rhs_code))
2885 	    {
2886 	      VEC(operand_entry_t, heap) *ops = NULL;
2887 
2888 	      /* There may be no immediate uses left by the time we
2889 		 get here because we may have eliminated them all.  */
2890 	      if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
2891 		continue;
2892 
2893 	      gimple_set_visited (stmt, true);
2894 	      linearize_expr_tree (&ops, stmt, true, true);
2895 	      VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2896 	      optimize_ops_list (rhs_code, &ops);
2897 	      if (undistribute_ops_list (rhs_code, &ops,
2898 					 loop_containing_stmt (stmt)))
2899 		{
2900 		  VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
2901 		  optimize_ops_list (rhs_code, &ops);
2902 		}
2903 
2904 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
2905 		optimize_range_tests (rhs_code, &ops);
2906 
2907 	      if (VEC_length (operand_entry_t, ops) == 1)
2908 		{
2909 		  if (dump_file && (dump_flags & TDF_DETAILS))
2910 		    {
2911 		      fprintf (dump_file, "Transforming ");
2912 		      print_gimple_stmt (dump_file, stmt, 0, 0);
2913 		    }
2914 
2915 		  rhs1 = gimple_assign_rhs1 (stmt);
2916 		  gimple_assign_set_rhs_from_tree (&gsi,
2917 						   VEC_last (operand_entry_t,
2918 							     ops)->op);
2919 		  update_stmt (stmt);
2920 		  remove_visited_stmt_chain (rhs1);
2921 
2922 		  if (dump_file && (dump_flags & TDF_DETAILS))
2923 		    {
2924 		      fprintf (dump_file, " into ");
2925 		      print_gimple_stmt (dump_file, stmt, 0, 0);
2926 		    }
2927 		}
2928 	      else
2929 		{
2930 		  enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2931 		  int ops_num = VEC_length (operand_entry_t, ops);
2932 		  int width = get_reassociation_width (ops_num, rhs_code, mode);
2933 
2934 		  if (dump_file && (dump_flags & TDF_DETAILS))
2935 		    fprintf (dump_file,
2936 			     "Width = %d was chosen for reassociation\n", width);
2937 
2938 		  if (width > 1
2939 		      && VEC_length (operand_entry_t, ops) > 3)
2940 		    rewrite_expr_tree_parallel (stmt, width, ops);
2941 		  else
2942 		    rewrite_expr_tree (stmt, 0, ops, false);
2943 		}
2944 
2945 	      VEC_free (operand_entry_t, heap, ops);
2946 	    }
2947 	}
2948     }
2949   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
2950        son;
2951        son = next_dom_son (CDI_POST_DOMINATORS, son))
2952     reassociate_bb (son);
2953 }
2954 
2955 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
2956 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
2957 
2958 /* Dump the operand entry vector OPS to FILE.  */
2959 
2960 void
2961 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
2962 {
2963   operand_entry_t oe;
2964   unsigned int i;
2965 
2966   FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe)
2967     {
2968       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
2969       print_generic_expr (file, oe->op, 0);
2970     }
2971 }
2972 
2973 /* Dump the operand entry vector OPS to STDERR.  */
2974 
2975 DEBUG_FUNCTION void
2976 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
2977 {
2978   dump_ops_vector (stderr, ops);
2979 }
2980 
2981 static void
2982 do_reassoc (void)
2983 {
2984   break_up_subtract_bb (ENTRY_BLOCK_PTR);
2985   reassociate_bb (EXIT_BLOCK_PTR);
2986 }
2987 
2988 /* Initialize the reassociation pass.  */
2989 
2990 static void
2991 init_reassoc (void)
2992 {
2993   int i;
2994   long rank = 2;
2995   tree param;
2996   int *bbs = XNEWVEC (int, last_basic_block + 1);
2997 
2998   /* Find the loops, so that we can prevent moving calculations in
2999      them.  */
3000   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3001 
3002   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
3003 
3004   operand_entry_pool = create_alloc_pool ("operand entry pool",
3005 					  sizeof (struct operand_entry), 30);
3006   next_operand_entry_id = 0;
3007 
3008   /* Reverse RPO (Reverse Post Order) will give us something where
3009      deeper loops come later.  */
3010   pre_and_rev_post_order_compute (NULL, bbs, false);
3011   bb_rank = XCNEWVEC (long, last_basic_block + 1);
3012   operand_rank = pointer_map_create ();
3013 
3014   /* Give each argument a distinct rank.   */
3015   for (param = DECL_ARGUMENTS (current_function_decl);
3016        param;
3017        param = DECL_CHAIN (param))
3018     {
3019       if (gimple_default_def (cfun, param) != NULL)
3020 	{
3021 	  tree def = gimple_default_def (cfun, param);
3022 	  insert_operand_rank (def, ++rank);
3023 	}
3024     }
3025 
3026   /* Give the chain decl a distinct rank. */
3027   if (cfun->static_chain_decl != NULL)
3028     {
3029       tree def = gimple_default_def (cfun, cfun->static_chain_decl);
3030       if (def != NULL)
3031 	insert_operand_rank (def, ++rank);
3032     }
3033 
3034   /* Set up rank for each BB  */
3035   for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
3036     bb_rank[bbs[i]] = ++rank  << 16;
3037 
3038   free (bbs);
3039   calculate_dominance_info (CDI_POST_DOMINATORS);
3040   plus_negates = NULL;
3041 }
3042 
3043 /* Cleanup after the reassociation pass, and print stats if
3044    requested.  */
3045 
3046 static void
3047 fini_reassoc (void)
3048 {
3049   statistics_counter_event (cfun, "Linearized",
3050 			    reassociate_stats.linearized);
3051   statistics_counter_event (cfun, "Constants eliminated",
3052 			    reassociate_stats.constants_eliminated);
3053   statistics_counter_event (cfun, "Ops eliminated",
3054 			    reassociate_stats.ops_eliminated);
3055   statistics_counter_event (cfun, "Statements rewritten",
3056 			    reassociate_stats.rewritten);
3057 
3058   pointer_map_destroy (operand_rank);
3059   free_alloc_pool (operand_entry_pool);
3060   free (bb_rank);
3061   VEC_free (tree, heap, plus_negates);
3062   free_dominance_info (CDI_POST_DOMINATORS);
3063   loop_optimizer_finalize ();
3064 }
3065 
3066 /* Gate and execute functions for Reassociation.  */
3067 
3068 static unsigned int
3069 execute_reassoc (void)
3070 {
3071   init_reassoc ();
3072 
3073   do_reassoc ();
3074   repropagate_negates ();
3075 
3076   fini_reassoc ();
3077   return 0;
3078 }
3079 
3080 static bool
3081 gate_tree_ssa_reassoc (void)
3082 {
3083   return flag_tree_reassoc != 0;
3084 }
3085 
3086 struct gimple_opt_pass pass_reassoc =
3087 {
3088  {
3089   GIMPLE_PASS,
3090   "reassoc",				/* name */
3091   gate_tree_ssa_reassoc,		/* gate */
3092   execute_reassoc,			/* execute */
3093   NULL,					/* sub */
3094   NULL,					/* next */
3095   0,					/* static_pass_number */
3096   TV_TREE_REASSOC,			/* tv_id */
3097   PROP_cfg | PROP_ssa,			/* properties_required */
3098   0,					/* properties_provided */
3099   0,					/* properties_destroyed */
3100   0,					/* todo_flags_start */
3101   TODO_verify_ssa
3102     | TODO_verify_flow
3103     | TODO_ggc_collect			/* todo_flags_finish */
3104  }
3105 };
3106