1 /* Reassociation for trees.
2    Copyright (C) 2005-2018 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dan@dberlin.org>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
55 
56 /*  This is a simple global reassociation pass.  It is, in part, based
57     on the LLVM pass of the same name (They do some things more/less
58     than we do, in different orders, etc).
59 
60     It consists of five steps:
61 
62     1. Breaking up subtract operations into addition + negate, where
63     it would promote the reassociation of adds.
64 
65     2. Left linearization of the expression trees, so that (A+B)+(C+D)
66     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67     During linearization, we place the operands of the binary
68     expressions into a vector of operand_entry_*
69 
70     3. Optimization of the operand lists, eliminating things like a +
71     -a, a & a, etc.
72 
73     3a. Combine repeated factors with the same occurrence counts
74     into a __builtin_powi call that will later be optimized into
75     an optimal number of multiplies.
76 
77     4. Rewrite the expression trees we linearized and optimized so
78     they are in proper rank order.
79 
80     5. Repropagate negates, as nothing else will clean it up ATM.
81 
82     A bit of theory on #4, since nobody seems to write anything down
83     about why it makes sense to do it the way they do it:
84 
85     We could do this much nicer theoretically, but don't (for reasons
86     explained after how to do it theoretically nice :P).
87 
88     In order to promote the most redundancy elimination, you want
89     binary expressions whose operands are the same rank (or
90     preferably, the same value) exposed to the redundancy eliminator,
91     for possible elimination.
92 
93     So the way to do this if we really cared, is to build the new op
94     tree from the leaves to the roots, merging as you go, and putting the
95     new op on the end of the worklist, until you are left with one
96     thing on the worklist.
97 
98     IE if you have to rewrite the following set of operands (listed with
99     rank in parentheses), with opcode PLUS_EXPR:
100 
101     a (1),  b (1),  c (1),  d (2), e (2)
102 
103 
104     We start with our merge worklist empty, and the ops list with all of
105     those on it.
106 
107     You want to first merge all leaves of the same rank, as much as
108     possible.
109 
110     So first build a binary op of
111 
112     mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 
114     Because there is no three operand form of PLUS_EXPR, c is not going to
115     be exposed to redundancy elimination as a rank 1 operand.
116 
117     So you might as well throw it on the merge worklist (you could also
118     consider it to now be a rank two operand, and merge it with d and e,
119     but in this case, you then have evicted e from a binary op. So at
120     least in this situation, you can't win.)
121 
122     Then build a binary op of d + e
123     mergetmp2 = d + e
124 
125     and put mergetmp2 on the merge worklist.
126 
127     so merge worklist = {mergetmp, c, mergetmp2}
128 
129     Continue building binary ops of these operations until you have only
130     one operation left on the worklist.
131 
132     So we have
133 
134     build binary op
135     mergetmp3 = mergetmp + c
136 
137     worklist = {mergetmp2, mergetmp3}
138 
139     mergetmp4 = mergetmp2 + mergetmp3
140 
141     worklist = {mergetmp4}
142 
143     because we have one operation left, we can now just set the original
144     statement equal to the result of that operation.
145 
146     This will at least expose a + b  and d + e to redundancy elimination
147     as binary operations.
148 
149     For extra points, you can reuse the old statements to build the
150     mergetmps, since you shouldn't run out.
151 
152     So why don't we do this?
153 
154     Because it's expensive, and rarely will help.  Most trees we are
155     reassociating have 3 or less ops.  If they have 2 ops, they already
156     will be written into a nice single binary op.  If you have 3 ops, a
157     single simple check suffices to tell you whether the first two are of the
158     same rank.  If so, you know to order it
159 
160     mergetmp = op1 + op2
161     newstmt = mergetmp + op3
162 
163     instead of
164     mergetmp = op2 + op3
165     newstmt = mergetmp + op1
166 
167     If all three are of the same rank, you can't expose them all in a
168     single binary operator anyway, so the above is *still* the best you
169     can do.
170 
171     Thus, this is what we do.  When we have three ops left, we check to see
172     what order to put them in, and call it a day.  As a nod to vector sum
173     reduction, we check if any of the ops are really a phi node that is a
174     destructive update for the associating op, and keep the destructive
175     update together for vector sum reduction recognition.  */
176 
177 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
178    point 3a in the pass header comment.  */
179 static bool reassoc_insert_powi_p;
180 
181 /* Statistics */
182 static struct
183 {
184   int linearized;
185   int constants_eliminated;
186   int ops_eliminated;
187   int rewritten;
188   int pows_encountered;
189   int pows_created;
190 } reassociate_stats;
191 
192 /* Operator, rank pair.  */
193 struct operand_entry
194 {
195   unsigned int rank;
196   unsigned int id;
197   tree op;
198   unsigned int count;
199   gimple *stmt_to_insert;
200 };
201 
202 static object_allocator<operand_entry> operand_entry_pool
203   ("operand entry pool");
204 
205 /* This is used to assign a unique ID to each struct operand_entry
206    so that qsort results are identical on different hosts.  */
207 static unsigned int next_operand_entry_id;
208 
209 /* Starting rank number for a given basic block, so that we can rank
210    operations using unmovable instructions in that BB based on the bb
211    depth.  */
212 static long *bb_rank;
213 
214 /* Operand->rank hashtable.  */
215 static hash_map<tree, long> *operand_rank;
216 
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218    all basic blocks the CFG should be adjusted - basic blocks
219    split right after that SSA_NAME's definition statement and before
220    the only use, which must be a bit ior.  */
221 static vec<tree> reassoc_branch_fixups;
222 
223 /* Forward decls.  */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226 
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228    possibly added by gsi_remove.  */
229 
230 bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 {
233   gimple *stmt = gsi_stmt (*gsi);
234 
235   if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236     return gsi_remove (gsi, true);
237 
238   gimple_stmt_iterator prev = *gsi;
239   gsi_prev (&prev);
240   unsigned uid = gimple_uid (stmt);
241   basic_block bb = gimple_bb (stmt);
242   bool ret = gsi_remove (gsi, true);
243   if (!gsi_end_p (prev))
244     gsi_next (&prev);
245   else
246     prev = gsi_start_bb (bb);
247   gimple *end_stmt = gsi_stmt (*gsi);
248   while ((stmt = gsi_stmt (prev)) != end_stmt)
249     {
250       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251       gimple_set_uid (stmt, uid);
252       gsi_next (&prev);
253     }
254   return ret;
255 }
256 
257 /* Bias amount for loop-carried phis.  We want this to be larger than
258    the depth of any reassociation tree we can see, but not larger than
259    the rank difference between two blocks.  */
260 #define PHI_LOOP_BIAS (1 << 15)
261 
262 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
263    an innermost loop, and the phi has only a single use which is inside
264    the loop, then the rank is the block rank of the loop latch plus an
265    extra bias for the loop-carried dependence.  This causes expressions
266    calculated into an accumulator variable to be independent for each
267    iteration of the loop.  If STMT is some other phi, the rank is the
268    block rank of its containing block.  */
269 static long
phi_rank(gimple * stmt)270 phi_rank (gimple *stmt)
271 {
272   basic_block bb = gimple_bb (stmt);
273   struct loop *father = bb->loop_father;
274   tree res;
275   unsigned i;
276   use_operand_p use;
277   gimple *use_stmt;
278 
279   /* We only care about real loops (those with a latch).  */
280   if (!father->latch)
281     return bb_rank[bb->index];
282 
283   /* Interesting phis must be in headers of innermost loops.  */
284   if (bb != father->header
285       || father->inner)
286     return bb_rank[bb->index];
287 
288   /* Ignore virtual SSA_NAMEs.  */
289   res = gimple_phi_result (stmt);
290   if (virtual_operand_p (res))
291     return bb_rank[bb->index];
292 
293   /* The phi definition must have a single use, and that use must be
294      within the loop.  Otherwise this isn't an accumulator pattern.  */
295   if (!single_imm_use (res, &use, &use_stmt)
296       || gimple_bb (use_stmt)->loop_father != father)
297     return bb_rank[bb->index];
298 
299   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
300   for (i = 0; i < gimple_phi_num_args (stmt); i++)
301     {
302       tree arg = gimple_phi_arg_def (stmt, i);
303       if (TREE_CODE (arg) == SSA_NAME
304 	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 	{
306 	  gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 	  if (gimple_bb (def_stmt)->loop_father == father)
308 	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
309 	}
310     }
311 
312   /* Must be an uninteresting phi.  */
313   return bb_rank[bb->index];
314 }
315 
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317    loop-carried dependence of an innermost loop, return TRUE; else
318    return FALSE.  */
319 static bool
loop_carried_phi(tree exp)320 loop_carried_phi (tree exp)
321 {
322   gimple *phi_stmt;
323   long block_rank;
324 
325   if (TREE_CODE (exp) != SSA_NAME
326       || SSA_NAME_IS_DEFAULT_DEF (exp))
327     return false;
328 
329   phi_stmt = SSA_NAME_DEF_STMT (exp);
330 
331   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332     return false;
333 
334   /* Non-loop-carried phis have block rank.  Loop-carried phis have
335      an additional bias added in.  If this phi doesn't have block rank,
336      it's biased and should not be propagated.  */
337   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338 
339   if (phi_rank (phi_stmt) != block_rank)
340     return true;
341 
342   return false;
343 }
344 
345 /* Return the maximum of RANK and the rank that should be propagated
346    from expression OP.  For most operands, this is just the rank of OP.
347    For loop-carried phis, the value is zero to avoid undoing the bias
348    in favor of the phi.  */
349 static long
propagate_rank(long rank,tree op)350 propagate_rank (long rank, tree op)
351 {
352   long op_rank;
353 
354   if (loop_carried_phi (op))
355     return rank;
356 
357   op_rank = get_rank (op);
358 
359   return MAX (rank, op_rank);
360 }
361 
362 /* Look up the operand rank structure for expression E.  */
363 
364 static inline long
find_operand_rank(tree e)365 find_operand_rank (tree e)
366 {
367   long *slot = operand_rank->get (e);
368   return slot ? *slot : -1;
369 }
370 
371 /* Insert {E,RANK} into the operand rank hashtable.  */
372 
373 static inline void
insert_operand_rank(tree e,long rank)374 insert_operand_rank (tree e, long rank)
375 {
376   gcc_assert (rank > 0);
377   gcc_assert (!operand_rank->put (e, rank));
378 }
379 
380 /* Given an expression E, return the rank of the expression.  */
381 
382 static long
get_rank(tree e)383 get_rank (tree e)
384 {
385   /* SSA_NAME's have the rank of the expression they are the result
386      of.
387      For globals and uninitialized values, the rank is 0.
388      For function arguments, use the pre-setup rank.
389      For PHI nodes, stores, asm statements, etc, we use the rank of
390      the BB.
391      For simple operations, the rank is the maximum rank of any of
392      its operands, or the bb_rank, whichever is less.
393      I make no claims that this is optimal, however, it gives good
394      results.  */
395 
396   /* We make an exception to the normal ranking system to break
397      dependences of accumulator variables in loops.  Suppose we
398      have a simple one-block loop containing:
399 
400        x_1 = phi(x_0, x_2)
401        b = a + x_1
402        c = b + d
403        x_2 = c + e
404 
405      As shown, each iteration of the calculation into x is fully
406      dependent upon the iteration before it.  We would prefer to
407      see this in the form:
408 
409        x_1 = phi(x_0, x_2)
410        b = a + d
411        c = b + e
412        x_2 = c + x_1
413 
414      If the loop is unrolled, the calculations of b and c from
415      different iterations can be interleaved.
416 
417      To obtain this result during reassociation, we bias the rank
418      of the phi definition x_1 upward, when it is recognized as an
419      accumulator pattern.  The artificial rank causes it to be
420      added last, providing the desired independence.  */
421 
422   if (TREE_CODE (e) == SSA_NAME)
423     {
424       ssa_op_iter iter;
425       gimple *stmt;
426       long rank;
427       tree op;
428 
429       if (SSA_NAME_IS_DEFAULT_DEF (e))
430 	return find_operand_rank (e);
431 
432       stmt = SSA_NAME_DEF_STMT (e);
433       if (gimple_code (stmt) == GIMPLE_PHI)
434 	return phi_rank (stmt);
435 
436       if (!is_gimple_assign (stmt))
437 	return bb_rank[gimple_bb (stmt)->index];
438 
439       /* If we already have a rank for this expression, use that.  */
440       rank = find_operand_rank (e);
441       if (rank != -1)
442 	return rank;
443 
444       /* Otherwise, find the maximum rank for the operands.  As an
445 	 exception, remove the bias from loop-carried phis when propagating
446 	 the rank so that dependent operations are not also biased.  */
447       /* Simply walk over all SSA uses - this takes advatage of the
448          fact that non-SSA operands are is_gimple_min_invariant and
449 	 thus have rank 0.  */
450       rank = 0;
451       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 	rank = propagate_rank (rank, op);
453 
454       if (dump_file && (dump_flags & TDF_DETAILS))
455 	{
456 	  fprintf (dump_file, "Rank for ");
457 	  print_generic_expr (dump_file, e);
458 	  fprintf (dump_file, " is %ld\n", (rank + 1));
459 	}
460 
461       /* Note the rank in the hashtable so we don't recompute it.  */
462       insert_operand_rank (e, (rank + 1));
463       return (rank + 1);
464     }
465 
466   /* Constants, globals, etc., are rank 0 */
467   return 0;
468 }
469 
470 
471 /* We want integer ones to end up last no matter what, since they are
472    the ones we can do the most with.  */
473 #define INTEGER_CONST_TYPE 1 << 4
474 #define FLOAT_ONE_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
477 
478 /* Classify an invariant tree into integer, float, or other, so that
479    we can sort them to be near other constants of the same type.  */
480 static inline int
constant_type(tree t)481 constant_type (tree t)
482 {
483   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
484     return INTEGER_CONST_TYPE;
485   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
486     {
487       /* Sort -1.0 and 1.0 constants last, while in some cases
488 	 const_binop can't optimize some inexact operations, multiplication
489 	 by -1.0 or 1.0 can be always merged with others.  */
490       if (real_onep (t) || real_minus_onep (t))
491 	return FLOAT_ONE_CONST_TYPE;
492       return FLOAT_CONST_TYPE;
493     }
494   else
495     return OTHER_CONST_TYPE;
496 }
497 
498 /* qsort comparison function to sort operand entries PA and PB by rank
499    so that the sorted array is ordered by rank in decreasing order.  */
500 static int
sort_by_operand_rank(const void * pa,const void * pb)501 sort_by_operand_rank (const void *pa, const void *pb)
502 {
503   const operand_entry *oea = *(const operand_entry *const *)pa;
504   const operand_entry *oeb = *(const operand_entry *const *)pb;
505 
506   if (oeb->rank != oea->rank)
507     return oeb->rank > oea->rank ? 1 : -1;
508 
509   /* It's nicer for optimize_expression if constants that are likely
510      to fold when added/multiplied/whatever are put next to each
511      other.  Since all constants have rank 0, order them by type.  */
512   if (oea->rank == 0)
513     {
514       if (constant_type (oeb->op) != constant_type (oea->op))
515 	return constant_type (oea->op) - constant_type (oeb->op);
516       else
517 	/* To make sorting result stable, we use unique IDs to determine
518 	   order.  */
519 	return oeb->id > oea->id ? 1 : -1;
520     }
521 
522   if (TREE_CODE (oea->op) != SSA_NAME)
523     {
524       if (TREE_CODE (oeb->op) != SSA_NAME)
525 	return oeb->id > oea->id ? 1 : -1;
526       else
527 	return 1;
528     }
529   else if (TREE_CODE (oeb->op) != SSA_NAME)
530     return -1;
531 
532   /* Lastly, make sure the versions that are the same go next to each
533      other.  */
534   if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
535     {
536       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
537 	 versions of removed SSA_NAMEs, so if possible, prefer to sort
538 	 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
539 	 See PR60418.  */
540       gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
541       gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
542       basic_block bba = gimple_bb (stmta);
543       basic_block bbb = gimple_bb (stmtb);
544       if (bbb != bba)
545 	{
546 	  /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
547 	     but the other might not.  */
548 	  if (!bba)
549 	    return 1;
550 	  if (!bbb)
551 	    return -1;
552 	  /* If neither is, compare bb_rank.  */
553 	  if (bb_rank[bbb->index] != bb_rank[bba->index])
554 	    return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
555 	}
556 
557       bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
558       bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
559       if (da != db)
560 	return da ? 1 : -1;
561 
562       return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
563     }
564 
565   return oeb->id > oea->id ? 1 : -1;
566 }
567 
568 /* Add an operand entry to *OPS for the tree operand OP.  */
569 
570 static void
571 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
572 {
573   operand_entry *oe = operand_entry_pool.allocate ();
574 
575   oe->op = op;
576   oe->rank = get_rank (op);
577   oe->id = next_operand_entry_id++;
578   oe->count = 1;
579   oe->stmt_to_insert = stmt_to_insert;
580   ops->safe_push (oe);
581 }
582 
583 /* Add an operand entry to *OPS for the tree operand OP with repeat
584    count REPEAT.  */
585 
586 static void
add_repeat_to_ops_vec(vec<operand_entry * > * ops,tree op,HOST_WIDE_INT repeat)587 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
588 		       HOST_WIDE_INT repeat)
589 {
590   operand_entry *oe = operand_entry_pool.allocate ();
591 
592   oe->op = op;
593   oe->rank = get_rank (op);
594   oe->id = next_operand_entry_id++;
595   oe->count = repeat;
596   oe->stmt_to_insert = NULL;
597   ops->safe_push (oe);
598 
599   reassociate_stats.pows_encountered++;
600 }
601 
602 /* Return true if STMT is reassociable operation containing a binary
603    operation with tree code CODE, and is inside LOOP.  */
604 
605 static bool
is_reassociable_op(gimple * stmt,enum tree_code code,struct loop * loop)606 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
607 {
608   basic_block bb = gimple_bb (stmt);
609 
610   if (gimple_bb (stmt) == NULL)
611     return false;
612 
613   if (!flow_bb_inside_loop_p (loop, bb))
614     return false;
615 
616   if (is_gimple_assign (stmt)
617       && gimple_assign_rhs_code (stmt) == code
618       && has_single_use (gimple_assign_lhs (stmt)))
619     {
620       tree rhs1 = gimple_assign_rhs1 (stmt);
621       tree rhs2 = gimple_assign_rhs1 (stmt);
622       if (TREE_CODE (rhs1) == SSA_NAME
623 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
624 	return false;
625       if (rhs2
626 	  && TREE_CODE (rhs2) == SSA_NAME
627 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
628 	return false;
629       return true;
630     }
631 
632   return false;
633 }
634 
635 
636 /* Return true if STMT is a nop-conversion.  */
637 
638 static bool
gimple_nop_conversion_p(gimple * stmt)639 gimple_nop_conversion_p (gimple *stmt)
640 {
641   if (gassign *ass = dyn_cast <gassign *> (stmt))
642     {
643       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
644 	  && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
645 				    TREE_TYPE (gimple_assign_rhs1 (ass))))
646 	return true;
647     }
648   return false;
649 }
650 
651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
652    operand of the negate operation.  Otherwise, return NULL.  */
653 
654 static tree
get_unary_op(tree name,enum tree_code opcode)655 get_unary_op (tree name, enum tree_code opcode)
656 {
657   gimple *stmt = SSA_NAME_DEF_STMT (name);
658 
659   /* Look through nop conversions (sign changes).  */
660   if (gimple_nop_conversion_p (stmt)
661       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
662     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
663 
664   if (!is_gimple_assign (stmt))
665     return NULL_TREE;
666 
667   if (gimple_assign_rhs_code (stmt) == opcode)
668     return gimple_assign_rhs1 (stmt);
669   return NULL_TREE;
670 }
671 
672 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
673 
674 static bool
ops_equal_values_p(tree op1,tree op2)675 ops_equal_values_p (tree op1, tree op2)
676 {
677   if (op1 == op2)
678     return true;
679 
680   tree orig_op1 = op1;
681   if (TREE_CODE (op1) == SSA_NAME)
682     {
683       gimple *stmt = SSA_NAME_DEF_STMT (op1);
684       if (gimple_nop_conversion_p (stmt))
685 	{
686 	  op1 = gimple_assign_rhs1 (stmt);
687 	  if (op1 == op2)
688 	    return true;
689 	}
690     }
691 
692   if (TREE_CODE (op2) == SSA_NAME)
693     {
694       gimple *stmt = SSA_NAME_DEF_STMT (op2);
695       if (gimple_nop_conversion_p (stmt))
696 	{
697 	  op2 = gimple_assign_rhs1 (stmt);
698 	  if (op1 == op2
699 	      || orig_op1 == op2)
700 	    return true;
701 	}
702     }
703 
704   return false;
705 }
706 
707 
708 /* If CURR and LAST are a pair of ops that OPCODE allows us to
709    eliminate through equivalences, do so, remove them from OPS, and
710    return true.  Otherwise, return false.  */
711 
712 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry * > * ops,bool * all_done,unsigned int i,operand_entry * curr,operand_entry * last)713 eliminate_duplicate_pair (enum tree_code opcode,
714 			  vec<operand_entry *> *ops,
715 			  bool *all_done,
716 			  unsigned int i,
717 			  operand_entry *curr,
718 			  operand_entry *last)
719 {
720 
721   /* If we have two of the same op, and the opcode is & |, min, or max,
722      we can eliminate one of them.
723      If we have two of the same op, and the opcode is ^, we can
724      eliminate both of them.  */
725 
726   if (last && last->op == curr->op)
727     {
728       switch (opcode)
729 	{
730 	case MAX_EXPR:
731 	case MIN_EXPR:
732 	case BIT_IOR_EXPR:
733 	case BIT_AND_EXPR:
734 	  if (dump_file && (dump_flags & TDF_DETAILS))
735 	    {
736 	      fprintf (dump_file, "Equivalence: ");
737 	      print_generic_expr (dump_file, curr->op);
738 	      fprintf (dump_file, " [&|minmax] ");
739 	      print_generic_expr (dump_file, last->op);
740 	      fprintf (dump_file, " -> ");
741 	      print_generic_stmt (dump_file, last->op);
742 	    }
743 
744 	  ops->ordered_remove (i);
745 	  reassociate_stats.ops_eliminated ++;
746 
747 	  return true;
748 
749 	case BIT_XOR_EXPR:
750 	  if (dump_file && (dump_flags & TDF_DETAILS))
751 	    {
752 	      fprintf (dump_file, "Equivalence: ");
753 	      print_generic_expr (dump_file, curr->op);
754 	      fprintf (dump_file, " ^ ");
755 	      print_generic_expr (dump_file, last->op);
756 	      fprintf (dump_file, " -> nothing\n");
757 	    }
758 
759 	  reassociate_stats.ops_eliminated += 2;
760 
761 	  if (ops->length () == 2)
762 	    {
763 	      ops->truncate (0);
764 	      add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
765 	      *all_done = true;
766 	    }
767 	  else
768 	    {
769 	      ops->ordered_remove (i-1);
770 	      ops->ordered_remove (i-1);
771 	    }
772 
773 	  return true;
774 
775 	default:
776 	  break;
777 	}
778     }
779   return false;
780 }
781 
782 static vec<tree> plus_negates;
783 
784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
785    expression, look in OPS for a corresponding positive operation to cancel
786    it out.  If we find one, remove the other from OPS, replace
787    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
788    return false. */
789 
790 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)791 eliminate_plus_minus_pair (enum tree_code opcode,
792 			   vec<operand_entry *> *ops,
793 			   unsigned int currindex,
794 			   operand_entry *curr)
795 {
796   tree negateop;
797   tree notop;
798   unsigned int i;
799   operand_entry *oe;
800 
801   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
802     return false;
803 
804   negateop = get_unary_op (curr->op, NEGATE_EXPR);
805   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
806   if (negateop == NULL_TREE && notop == NULL_TREE)
807     return false;
808 
809   /* Any non-negated version will have a rank that is one less than
810      the current rank.  So once we hit those ranks, if we don't find
811      one, we can stop.  */
812 
813   for (i = currindex + 1;
814        ops->iterate (i, &oe)
815        && oe->rank >= curr->rank - 1 ;
816        i++)
817     {
818       if (negateop
819 	  && ops_equal_values_p (oe->op, negateop))
820 	{
821 	  if (dump_file && (dump_flags & TDF_DETAILS))
822 	    {
823 	      fprintf (dump_file, "Equivalence: ");
824 	      print_generic_expr (dump_file, negateop);
825 	      fprintf (dump_file, " + -");
826 	      print_generic_expr (dump_file, oe->op);
827 	      fprintf (dump_file, " -> 0\n");
828 	    }
829 
830 	  ops->ordered_remove (i);
831 	  add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
832 	  ops->ordered_remove (currindex);
833 	  reassociate_stats.ops_eliminated ++;
834 
835 	  return true;
836 	}
837       else if (notop
838 	       && ops_equal_values_p (oe->op, notop))
839 	{
840 	  tree op_type = TREE_TYPE (oe->op);
841 
842 	  if (dump_file && (dump_flags & TDF_DETAILS))
843 	    {
844 	      fprintf (dump_file, "Equivalence: ");
845 	      print_generic_expr (dump_file, notop);
846 	      fprintf (dump_file, " + ~");
847 	      print_generic_expr (dump_file, oe->op);
848 	      fprintf (dump_file, " -> -1\n");
849 	    }
850 
851 	  ops->ordered_remove (i);
852 	  add_to_ops_vec (ops, build_all_ones_cst (op_type));
853 	  ops->ordered_remove (currindex);
854 	  reassociate_stats.ops_eliminated ++;
855 
856 	  return true;
857 	}
858     }
859 
860   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
861      save it for later inspection in repropagate_negates().  */
862   if (negateop != NULL_TREE
863       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
864     plus_negates.safe_push (curr->op);
865 
866   return false;
867 }
868 
869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
870    bitwise not expression, look in OPS for a corresponding operand to
871    cancel it out.  If we find one, remove the other from OPS, replace
872    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
873    false. */
874 
875 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)876 eliminate_not_pairs (enum tree_code opcode,
877 		     vec<operand_entry *> *ops,
878 		     unsigned int currindex,
879 		     operand_entry *curr)
880 {
881   tree notop;
882   unsigned int i;
883   operand_entry *oe;
884 
885   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
886       || TREE_CODE (curr->op) != SSA_NAME)
887     return false;
888 
889   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
890   if (notop == NULL_TREE)
891     return false;
892 
893   /* Any non-not version will have a rank that is one less than
894      the current rank.  So once we hit those ranks, if we don't find
895      one, we can stop.  */
896 
897   for (i = currindex + 1;
898        ops->iterate (i, &oe)
899        && oe->rank >= curr->rank - 1;
900        i++)
901     {
902       if (oe->op == notop)
903 	{
904 	  if (dump_file && (dump_flags & TDF_DETAILS))
905 	    {
906 	      fprintf (dump_file, "Equivalence: ");
907 	      print_generic_expr (dump_file, notop);
908 	      if (opcode == BIT_AND_EXPR)
909 		fprintf (dump_file, " & ~");
910 	      else if (opcode == BIT_IOR_EXPR)
911 		fprintf (dump_file, " | ~");
912 	      print_generic_expr (dump_file, oe->op);
913 	      if (opcode == BIT_AND_EXPR)
914 		fprintf (dump_file, " -> 0\n");
915 	      else if (opcode == BIT_IOR_EXPR)
916 		fprintf (dump_file, " -> -1\n");
917 	    }
918 
919 	  if (opcode == BIT_AND_EXPR)
920 	    oe->op = build_zero_cst (TREE_TYPE (oe->op));
921 	  else if (opcode == BIT_IOR_EXPR)
922 	    oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
923 
924 	  reassociate_stats.ops_eliminated += ops->length () - 1;
925 	  ops->truncate (0);
926 	  ops->quick_push (oe);
927 	  return true;
928 	}
929     }
930 
931   return false;
932 }
933 
934 /* Use constant value that may be present in OPS to try to eliminate
935    operands.  Note that this function is only really used when we've
936    eliminated ops for other reasons, or merged constants.  Across
937    single statements, fold already does all of this, plus more.  There
938    is little point in duplicating logic, so I've only included the
939    identities that I could ever construct testcases to trigger.  */
940 
941 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry * > * ops)942 eliminate_using_constants (enum tree_code opcode,
943 			   vec<operand_entry *> *ops)
944 {
945   operand_entry *oelast = ops->last ();
946   tree type = TREE_TYPE (oelast->op);
947 
948   if (oelast->rank == 0
949       && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
950     {
951       switch (opcode)
952 	{
953 	case BIT_AND_EXPR:
954 	  if (integer_zerop (oelast->op))
955 	    {
956 	      if (ops->length () != 1)
957 		{
958 		  if (dump_file && (dump_flags & TDF_DETAILS))
959 		    fprintf (dump_file, "Found & 0, removing all other ops\n");
960 
961 		  reassociate_stats.ops_eliminated += ops->length () - 1;
962 
963 		  ops->truncate (0);
964 		  ops->quick_push (oelast);
965 		  return;
966 		}
967 	    }
968 	  else if (integer_all_onesp (oelast->op))
969 	    {
970 	      if (ops->length () != 1)
971 		{
972 		  if (dump_file && (dump_flags & TDF_DETAILS))
973 		    fprintf (dump_file, "Found & -1, removing\n");
974 		  ops->pop ();
975 		  reassociate_stats.ops_eliminated++;
976 		}
977 	    }
978 	  break;
979 	case BIT_IOR_EXPR:
980 	  if (integer_all_onesp (oelast->op))
981 	    {
982 	      if (ops->length () != 1)
983 		{
984 		  if (dump_file && (dump_flags & TDF_DETAILS))
985 		    fprintf (dump_file, "Found | -1, removing all other ops\n");
986 
987 		  reassociate_stats.ops_eliminated += ops->length () - 1;
988 
989 		  ops->truncate (0);
990 		  ops->quick_push (oelast);
991 		  return;
992 		}
993 	    }
994 	  else if (integer_zerop (oelast->op))
995 	    {
996 	      if (ops->length () != 1)
997 		{
998 		  if (dump_file && (dump_flags & TDF_DETAILS))
999 		    fprintf (dump_file, "Found | 0, removing\n");
1000 		  ops->pop ();
1001 		  reassociate_stats.ops_eliminated++;
1002 		}
1003 	    }
1004 	  break;
1005 	case MULT_EXPR:
1006 	  if (integer_zerop (oelast->op)
1007 	      || (FLOAT_TYPE_P (type)
1008 		  && !HONOR_NANS (type)
1009 		  && !HONOR_SIGNED_ZEROS (type)
1010 		  && real_zerop (oelast->op)))
1011 	    {
1012 	      if (ops->length () != 1)
1013 		{
1014 		  if (dump_file && (dump_flags & TDF_DETAILS))
1015 		    fprintf (dump_file, "Found * 0, removing all other ops\n");
1016 
1017 		  reassociate_stats.ops_eliminated += ops->length () - 1;
1018 		  ops->truncate (0);
1019 		  ops->quick_push (oelast);
1020 		  return;
1021 		}
1022 	    }
1023 	  else if (integer_onep (oelast->op)
1024 		   || (FLOAT_TYPE_P (type)
1025 		       && !HONOR_SNANS (type)
1026 		       && real_onep (oelast->op)))
1027 	    {
1028 	      if (ops->length () != 1)
1029 		{
1030 		  if (dump_file && (dump_flags & TDF_DETAILS))
1031 		    fprintf (dump_file, "Found * 1, removing\n");
1032 		  ops->pop ();
1033 		  reassociate_stats.ops_eliminated++;
1034 		  return;
1035 		}
1036 	    }
1037 	  break;
1038 	case BIT_XOR_EXPR:
1039 	case PLUS_EXPR:
1040 	case MINUS_EXPR:
1041 	  if (integer_zerop (oelast->op)
1042 	      || (FLOAT_TYPE_P (type)
1043 		  && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1044 		  && fold_real_zero_addition_p (type, oelast->op,
1045 						opcode == MINUS_EXPR)))
1046 	    {
1047 	      if (ops->length () != 1)
1048 		{
1049 		  if (dump_file && (dump_flags & TDF_DETAILS))
1050 		    fprintf (dump_file, "Found [|^+] 0, removing\n");
1051 		  ops->pop ();
1052 		  reassociate_stats.ops_eliminated++;
1053 		  return;
1054 		}
1055 	    }
1056 	  break;
1057 	default:
1058 	  break;
1059 	}
1060     }
1061 }
1062 
1063 
1064 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1065 				 bool, bool);
1066 
1067 /* Structure for tracking and counting operands.  */
1068 struct oecount {
1069   unsigned int cnt;
1070   unsigned int id;
1071   enum tree_code oecode;
1072   tree op;
1073 };
1074 
1075 
1076 /* The heap for the oecount hashtable and the sorted list of operands.  */
1077 static vec<oecount> cvec;
1078 
1079 
1080 /* Oecount hashtable helpers.  */
1081 
1082 struct oecount_hasher : int_hash <int, 0, 1>
1083 {
1084   static inline hashval_t hash (int);
1085   static inline bool equal (int, int);
1086 };
1087 
1088 /* Hash function for oecount.  */
1089 
1090 inline hashval_t
hash(int p)1091 oecount_hasher::hash (int p)
1092 {
1093   const oecount *c = &cvec[p - 42];
1094   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1095 }
1096 
1097 /* Comparison function for oecount.  */
1098 
1099 inline bool
equal(int p1,int p2)1100 oecount_hasher::equal (int p1, int p2)
1101 {
1102   const oecount *c1 = &cvec[p1 - 42];
1103   const oecount *c2 = &cvec[p2 - 42];
1104   return c1->oecode == c2->oecode && c1->op == c2->op;
1105 }
1106 
1107 /* Comparison function for qsort sorting oecount elements by count.  */
1108 
1109 static int
oecount_cmp(const void * p1,const void * p2)1110 oecount_cmp (const void *p1, const void *p2)
1111 {
1112   const oecount *c1 = (const oecount *)p1;
1113   const oecount *c2 = (const oecount *)p2;
1114   if (c1->cnt != c2->cnt)
1115     return c1->cnt > c2->cnt ? 1 : -1;
1116   else
1117     /* If counts are identical, use unique IDs to stabilize qsort.  */
1118     return c1->id > c2->id ? 1 : -1;
1119 }
1120 
1121 /* Return TRUE iff STMT represents a builtin call that raises OP
1122    to some exponent.  */
1123 
1124 static bool
stmt_is_power_of_op(gimple * stmt,tree op)1125 stmt_is_power_of_op (gimple *stmt, tree op)
1126 {
1127   if (!is_gimple_call (stmt))
1128     return false;
1129 
1130   switch (gimple_call_combined_fn (stmt))
1131     {
1132     CASE_CFN_POW:
1133     CASE_CFN_POWI:
1134       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1135 
1136     default:
1137       return false;
1138     }
1139 }
1140 
1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1142    in place and return the result.  Assumes that stmt_is_power_of_op
1143    was previously called for STMT and returned TRUE.  */
1144 
1145 static HOST_WIDE_INT
decrement_power(gimple * stmt)1146 decrement_power (gimple *stmt)
1147 {
1148   REAL_VALUE_TYPE c, cint;
1149   HOST_WIDE_INT power;
1150   tree arg1;
1151 
1152   switch (gimple_call_combined_fn (stmt))
1153     {
1154     CASE_CFN_POW:
1155       arg1 = gimple_call_arg (stmt, 1);
1156       c = TREE_REAL_CST (arg1);
1157       power = real_to_integer (&c) - 1;
1158       real_from_integer (&cint, VOIDmode, power, SIGNED);
1159       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1160       return power;
1161 
1162     CASE_CFN_POWI:
1163       arg1 = gimple_call_arg (stmt, 1);
1164       power = TREE_INT_CST_LOW (arg1) - 1;
1165       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1166       return power;
1167 
1168     default:
1169       gcc_unreachable ();
1170     }
1171 }
1172 
1173 /* Replace SSA defined by STMT and replace all its uses with new
1174    SSA.  Also return the new SSA.  */
1175 
1176 static tree
make_new_ssa_for_def(gimple * stmt,enum tree_code opcode,tree op)1177 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1178 {
1179   gimple *use_stmt;
1180   use_operand_p use;
1181   imm_use_iterator iter;
1182   tree new_lhs, new_debug_lhs = NULL_TREE;
1183   tree lhs = gimple_get_lhs (stmt);
1184 
1185   new_lhs = make_ssa_name (TREE_TYPE (lhs));
1186   gimple_set_lhs (stmt, new_lhs);
1187 
1188   /* Also need to update GIMPLE_DEBUGs.  */
1189   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1190     {
1191       tree repl = new_lhs;
1192       if (is_gimple_debug (use_stmt))
1193 	{
1194 	  if (new_debug_lhs == NULL_TREE)
1195 	    {
1196 	      new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1197 	      gdebug *def_temp
1198 		= gimple_build_debug_bind (new_debug_lhs,
1199 					   build2 (opcode, TREE_TYPE (lhs),
1200 						   new_lhs, op),
1201 					   stmt);
1202 	      DECL_ARTIFICIAL (new_debug_lhs) = 1;
1203 	      TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1204 	      SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1205 	      gimple_set_uid (def_temp, gimple_uid (stmt));
1206 	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207 	      gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1208 	    }
1209 	  repl = new_debug_lhs;
1210 	}
1211       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1212 	SET_USE (use, repl);
1213       update_stmt (use_stmt);
1214     }
1215   return new_lhs;
1216 }
1217 
1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219    uses with new SSAs.  Also do this for the stmt that defines DEF
1220    if *DEF is not OP.  */
1221 
1222 static void
make_new_ssa_for_all_defs(tree * def,enum tree_code opcode,tree op,vec<gimple * > & stmts_to_fix)1223 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1224 			   vec<gimple *> &stmts_to_fix)
1225 {
1226   unsigned i;
1227   gimple *stmt;
1228 
1229   if (*def != op
1230       && TREE_CODE (*def) == SSA_NAME
1231       && (stmt = SSA_NAME_DEF_STMT (*def))
1232       && gimple_code (stmt) != GIMPLE_NOP)
1233     *def = make_new_ssa_for_def (stmt, opcode, op);
1234 
1235   FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1236     make_new_ssa_for_def (stmt, opcode, op);
1237 }
1238 
1239 /* Find the single immediate use of STMT's LHS, and replace it
1240    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1241    replace *DEF with OP as well.  */
1242 
1243 static void
propagate_op_to_single_use(tree op,gimple * stmt,tree * def)1244 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1245 {
1246   tree lhs;
1247   gimple *use_stmt;
1248   use_operand_p use;
1249   gimple_stmt_iterator gsi;
1250 
1251   if (is_gimple_call (stmt))
1252     lhs = gimple_call_lhs (stmt);
1253   else
1254     lhs = gimple_assign_lhs (stmt);
1255 
1256   gcc_assert (has_single_use (lhs));
1257   single_imm_use (lhs, &use, &use_stmt);
1258   if (lhs == *def)
1259     *def = op;
1260   SET_USE (use, op);
1261   if (TREE_CODE (op) != SSA_NAME)
1262     update_stmt (use_stmt);
1263   gsi = gsi_for_stmt (stmt);
1264   unlink_stmt_vdef (stmt);
1265   reassoc_remove_stmt (&gsi);
1266   release_defs (stmt);
1267 }
1268 
1269 /* Walks the linear chain with result *DEF searching for an operation
1270    with operand OP and code OPCODE removing that from the chain.  *DEF
1271    is updated if there is only one operand but no operation left.  */
1272 
1273 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1274 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1275 {
1276   tree orig_def = *def;
1277   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1278   /* PR72835 - Record the stmt chain that has to be updated such that
1279      we dont use the same LHS when the values computed are different.  */
1280   auto_vec<gimple *, 64> stmts_to_fix;
1281 
1282   do
1283     {
1284       tree name;
1285 
1286       if (opcode == MULT_EXPR)
1287 	{
1288 	  if (stmt_is_power_of_op (stmt, op))
1289 	    {
1290 	      if (decrement_power (stmt) == 1)
1291 		{
1292 		  if (stmts_to_fix.length () > 0)
1293 		    stmts_to_fix.pop ();
1294 		  propagate_op_to_single_use (op, stmt, def);
1295 		}
1296 	      break;
1297 	    }
1298 	  else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1299 	    {
1300 	      if (gimple_assign_rhs1 (stmt) == op)
1301 		{
1302 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1303 		  if (stmts_to_fix.length () > 0)
1304 		    stmts_to_fix.pop ();
1305 		  propagate_op_to_single_use (cst, stmt, def);
1306 		  break;
1307 		}
1308 	      else if (integer_minus_onep (op)
1309 		       || real_minus_onep (op))
1310 		{
1311 		  gimple_assign_set_rhs_code
1312 		    (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1313 		  break;
1314 		}
1315 	    }
1316 	}
1317 
1318       name = gimple_assign_rhs1 (stmt);
1319 
1320       /* If this is the operation we look for and one of the operands
1321          is ours simply propagate the other operand into the stmts
1322 	 single use.  */
1323       if (gimple_assign_rhs_code (stmt) == opcode
1324 	  && (name == op
1325 	      || gimple_assign_rhs2 (stmt) == op))
1326 	{
1327 	  if (name == op)
1328 	    name = gimple_assign_rhs2 (stmt);
1329 	  if (stmts_to_fix.length () > 0)
1330 	    stmts_to_fix.pop ();
1331 	  propagate_op_to_single_use (name, stmt, def);
1332 	  break;
1333 	}
1334 
1335       /* We might have a multiply of two __builtin_pow* calls, and
1336 	 the operand might be hiding in the rightmost one.  Likewise
1337 	 this can happen for a negate.  */
1338       if (opcode == MULT_EXPR
1339 	  && gimple_assign_rhs_code (stmt) == opcode
1340 	  && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1341 	  && has_single_use (gimple_assign_rhs2 (stmt)))
1342 	{
1343 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1344 	  if (stmt_is_power_of_op (stmt2, op))
1345 	    {
1346 	      if (decrement_power (stmt2) == 1)
1347 		propagate_op_to_single_use (op, stmt2, def);
1348 	      else
1349 		stmts_to_fix.safe_push (stmt2);
1350 	      break;
1351 	    }
1352 	  else if (is_gimple_assign (stmt2)
1353 		   && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1354 	    {
1355 	      if (gimple_assign_rhs1 (stmt2) == op)
1356 		{
1357 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1358 		  propagate_op_to_single_use (cst, stmt2, def);
1359 		  break;
1360 		}
1361 	      else if (integer_minus_onep (op)
1362 		       || real_minus_onep (op))
1363 		{
1364 		  stmts_to_fix.safe_push (stmt2);
1365 		  gimple_assign_set_rhs_code
1366 		    (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1367 		  break;
1368 		}
1369 	    }
1370 	}
1371 
1372       /* Continue walking the chain.  */
1373       gcc_assert (name != op
1374 		  && TREE_CODE (name) == SSA_NAME);
1375       stmt = SSA_NAME_DEF_STMT (name);
1376       stmts_to_fix.safe_push (stmt);
1377     }
1378   while (1);
1379 
1380   if (stmts_to_fix.length () > 0 || *def == orig_def)
1381     make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1382 }
1383 
1384 /* Returns true if statement S1 dominates statement S2.  Like
1385    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1386 
1387 static bool
reassoc_stmt_dominates_stmt_p(gimple * s1,gimple * s2)1388 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1389 {
1390   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1391 
1392   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393      SSA_NAME.  Assume it lives at the beginning of function and
1394      thus dominates everything.  */
1395   if (!bb1 || s1 == s2)
1396     return true;
1397 
1398   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1399   if (!bb2)
1400     return false;
1401 
1402   if (bb1 == bb2)
1403     {
1404       /* PHIs in the same basic block are assumed to be
1405 	 executed all in parallel, if only one stmt is a PHI,
1406 	 it dominates the other stmt in the same basic block.  */
1407       if (gimple_code (s1) == GIMPLE_PHI)
1408 	return true;
1409 
1410       if (gimple_code (s2) == GIMPLE_PHI)
1411 	return false;
1412 
1413       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1414 
1415       if (gimple_uid (s1) < gimple_uid (s2))
1416 	return true;
1417 
1418       if (gimple_uid (s1) > gimple_uid (s2))
1419 	return false;
1420 
1421       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1422       unsigned int uid = gimple_uid (s1);
1423       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1424 	{
1425 	  gimple *s = gsi_stmt (gsi);
1426 	  if (gimple_uid (s) != uid)
1427 	    break;
1428 	  if (s == s2)
1429 	    return true;
1430 	}
1431 
1432       return false;
1433     }
1434 
1435   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1436 }
1437 
1438 /* Insert STMT after INSERT_POINT.  */
1439 
1440 static void
insert_stmt_after(gimple * stmt,gimple * insert_point)1441 insert_stmt_after (gimple *stmt, gimple *insert_point)
1442 {
1443   gimple_stmt_iterator gsi;
1444   basic_block bb;
1445 
1446   if (gimple_code (insert_point) == GIMPLE_PHI)
1447     bb = gimple_bb (insert_point);
1448   else if (!stmt_ends_bb_p (insert_point))
1449     {
1450       gsi = gsi_for_stmt (insert_point);
1451       gimple_set_uid (stmt, gimple_uid (insert_point));
1452       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1453       return;
1454     }
1455   else
1456     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457        thus if it must end a basic block, it should be a call that can
1458        throw, or some assignment that can throw.  If it throws, the LHS
1459        of it will not be initialized though, so only valid places using
1460        the SSA_NAME should be dominated by the fallthru edge.  */
1461     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1462   gsi = gsi_after_labels (bb);
1463   if (gsi_end_p (gsi))
1464     {
1465       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1466       gimple_set_uid (stmt,
1467 		      gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1468     }
1469   else
1470     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1471   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1472 }
1473 
1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475    the result.  Places the statement after the definition of either
1476    OP1 or OP2.  Returns the new statement.  */
1477 
1478 static gimple *
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1479 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1480 {
1481   gimple *op1def = NULL, *op2def = NULL;
1482   gimple_stmt_iterator gsi;
1483   tree op;
1484   gassign *sum;
1485 
1486   /* Create the addition statement.  */
1487   op = make_ssa_name (type);
1488   sum = gimple_build_assign (op, opcode, op1, op2);
1489 
1490   /* Find an insertion place and insert.  */
1491   if (TREE_CODE (op1) == SSA_NAME)
1492     op1def = SSA_NAME_DEF_STMT (op1);
1493   if (TREE_CODE (op2) == SSA_NAME)
1494     op2def = SSA_NAME_DEF_STMT (op2);
1495   if ((!op1def || gimple_nop_p (op1def))
1496       && (!op2def || gimple_nop_p (op2def)))
1497     {
1498       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1499       if (gsi_end_p (gsi))
1500 	{
1501 	  gimple_stmt_iterator gsi2
1502 	    = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1503 	  gimple_set_uid (sum,
1504 			  gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1505 	}
1506       else
1507 	gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1508       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1509     }
1510   else
1511     {
1512       gimple *insert_point;
1513       if ((!op1def || gimple_nop_p (op1def))
1514 	   || (op2def && !gimple_nop_p (op2def)
1515 	       && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1516 	insert_point = op2def;
1517       else
1518 	insert_point = op1def;
1519       insert_stmt_after (sum, insert_point);
1520     }
1521   update_stmt (sum);
1522 
1523   return sum;
1524 }
1525 
1526 /* Perform un-distribution of divisions and multiplications.
1527    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528    to (A + B) / X for real X.
1529 
1530    The algorithm is organized as follows.
1531 
1532     - First we walk the addition chain *OPS looking for summands that
1533       are defined by a multiplication or a real division.  This results
1534       in the candidates bitmap with relevant indices into *OPS.
1535 
1536     - Second we build the chains of multiplications or divisions for
1537       these candidates, counting the number of occurrences of (operand, code)
1538       pairs in all of the candidates chains.
1539 
1540     - Third we sort the (operand, code) pairs by number of occurrence and
1541       process them starting with the pair with the most uses.
1542 
1543       * For each such pair we walk the candidates again to build a
1544         second candidate bitmap noting all multiplication/division chains
1545 	that have at least one occurrence of (operand, code).
1546 
1547       * We build an alternate addition chain only covering these
1548         candidates with one (operand, code) operation removed from their
1549 	multiplication/division chain.
1550 
1551       * The first candidate gets replaced by the alternate addition chain
1552         multiplied/divided by the operand.
1553 
1554       * All candidate chains get disabled for further processing and
1555         processing of (operand, code) pairs continues.
1556 
1557   The alternate addition chains built are re-processed by the main
1558   reassociation algorithm which allows optimizing a * x * y + b * y * x
1559   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1560 
1561 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry * > * ops,struct loop * loop)1562 undistribute_ops_list (enum tree_code opcode,
1563 		       vec<operand_entry *> *ops, struct loop *loop)
1564 {
1565   unsigned int length = ops->length ();
1566   operand_entry *oe1;
1567   unsigned i, j;
1568   unsigned nr_candidates, nr_candidates2;
1569   sbitmap_iterator sbi0;
1570   vec<operand_entry *> *subops;
1571   bool changed = false;
1572   unsigned int next_oecount_id = 0;
1573 
1574   if (length <= 1
1575       || opcode != PLUS_EXPR)
1576     return false;
1577 
1578   /* Build a list of candidates to process.  */
1579   auto_sbitmap candidates (length);
1580   bitmap_clear (candidates);
1581   nr_candidates = 0;
1582   FOR_EACH_VEC_ELT (*ops, i, oe1)
1583     {
1584       enum tree_code dcode;
1585       gimple *oe1def;
1586 
1587       if (TREE_CODE (oe1->op) != SSA_NAME)
1588 	continue;
1589       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1590       if (!is_gimple_assign (oe1def))
1591 	continue;
1592       dcode = gimple_assign_rhs_code (oe1def);
1593       if ((dcode != MULT_EXPR
1594 	   && dcode != RDIV_EXPR)
1595 	  || !is_reassociable_op (oe1def, dcode, loop))
1596 	continue;
1597 
1598       bitmap_set_bit (candidates, i);
1599       nr_candidates++;
1600     }
1601 
1602   if (nr_candidates < 2)
1603     return false;
1604 
1605   if (dump_file && (dump_flags & TDF_DETAILS))
1606     {
1607       fprintf (dump_file, "searching for un-distribute opportunities ");
1608       print_generic_expr (dump_file,
1609 	(*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1610       fprintf (dump_file, " %d\n", nr_candidates);
1611     }
1612 
1613   /* Build linearized sub-operand lists and the counting table.  */
1614   cvec.create (0);
1615 
1616   hash_table<oecount_hasher> ctable (15);
1617 
1618   /* ??? Macro arguments cannot have multi-argument template types in
1619      them.  This typedef is needed to workaround that limitation.  */
1620   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1621   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1622   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1623     {
1624       gimple *oedef;
1625       enum tree_code oecode;
1626       unsigned j;
1627 
1628       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1629       oecode = gimple_assign_rhs_code (oedef);
1630       linearize_expr_tree (&subops[i], oedef,
1631 			   associative_tree_code (oecode), false);
1632 
1633       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1634 	{
1635 	  oecount c;
1636 	  int *slot;
1637 	  int idx;
1638 	  c.oecode = oecode;
1639 	  c.cnt = 1;
1640 	  c.id = next_oecount_id++;
1641 	  c.op = oe1->op;
1642 	  cvec.safe_push (c);
1643 	  idx = cvec.length () + 41;
1644 	  slot = ctable.find_slot (idx, INSERT);
1645 	  if (!*slot)
1646 	    {
1647 	      *slot = idx;
1648 	    }
1649 	  else
1650 	    {
1651 	      cvec.pop ();
1652 	      cvec[*slot - 42].cnt++;
1653 	    }
1654 	}
1655     }
1656 
1657   /* Sort the counting table.  */
1658   cvec.qsort (oecount_cmp);
1659 
1660   if (dump_file && (dump_flags & TDF_DETAILS))
1661     {
1662       oecount *c;
1663       fprintf (dump_file, "Candidates:\n");
1664       FOR_EACH_VEC_ELT (cvec, j, c)
1665 	{
1666 	  fprintf (dump_file, "  %u %s: ", c->cnt,
1667 		   c->oecode == MULT_EXPR
1668 		   ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1669 	  print_generic_expr (dump_file, c->op);
1670 	  fprintf (dump_file, "\n");
1671 	}
1672     }
1673 
1674   /* Process the (operand, code) pairs in order of most occurrence.  */
1675   auto_sbitmap candidates2 (length);
1676   while (!cvec.is_empty ())
1677     {
1678       oecount *c = &cvec.last ();
1679       if (c->cnt < 2)
1680 	break;
1681 
1682       /* Now collect the operands in the outer chain that contain
1683          the common operand in their inner chain.  */
1684       bitmap_clear (candidates2);
1685       nr_candidates2 = 0;
1686       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1687 	{
1688 	  gimple *oedef;
1689 	  enum tree_code oecode;
1690 	  unsigned j;
1691 	  tree op = (*ops)[i]->op;
1692 
1693 	  /* If we undistributed in this chain already this may be
1694 	     a constant.  */
1695 	  if (TREE_CODE (op) != SSA_NAME)
1696 	    continue;
1697 
1698 	  oedef = SSA_NAME_DEF_STMT (op);
1699 	  oecode = gimple_assign_rhs_code (oedef);
1700 	  if (oecode != c->oecode)
1701 	    continue;
1702 
1703 	  FOR_EACH_VEC_ELT (subops[i], j, oe1)
1704 	    {
1705 	      if (oe1->op == c->op)
1706 		{
1707 		  bitmap_set_bit (candidates2, i);
1708 		  ++nr_candidates2;
1709 		  break;
1710 		}
1711 	    }
1712 	}
1713 
1714       if (nr_candidates2 >= 2)
1715 	{
1716 	  operand_entry *oe1, *oe2;
1717 	  gimple *prod;
1718 	  int first = bitmap_first_set_bit (candidates2);
1719 
1720 	  /* Build the new addition chain.  */
1721 	  oe1 = (*ops)[first];
1722 	  if (dump_file && (dump_flags & TDF_DETAILS))
1723 	    {
1724 	      fprintf (dump_file, "Building (");
1725 	      print_generic_expr (dump_file, oe1->op);
1726 	    }
1727 	  zero_one_operation (&oe1->op, c->oecode, c->op);
1728 	  EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1729 	    {
1730 	      gimple *sum;
1731 	      oe2 = (*ops)[i];
1732 	      if (dump_file && (dump_flags & TDF_DETAILS))
1733 		{
1734 		  fprintf (dump_file, " + ");
1735 		  print_generic_expr (dump_file, oe2->op);
1736 		}
1737 	      zero_one_operation (&oe2->op, c->oecode, c->op);
1738 	      sum = build_and_add_sum (TREE_TYPE (oe1->op),
1739 				       oe1->op, oe2->op, opcode);
1740 	      oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1741 	      oe2->rank = 0;
1742 	      oe1->op = gimple_get_lhs (sum);
1743 	    }
1744 
1745 	  /* Apply the multiplication/division.  */
1746 	  prod = build_and_add_sum (TREE_TYPE (oe1->op),
1747 				    oe1->op, c->op, c->oecode);
1748 	  if (dump_file && (dump_flags & TDF_DETAILS))
1749 	    {
1750 	      fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1751 	      print_generic_expr (dump_file, c->op);
1752 	      fprintf (dump_file, "\n");
1753 	    }
1754 
1755 	  /* Record it in the addition chain and disable further
1756 	     undistribution with this op.  */
1757 	  oe1->op = gimple_assign_lhs (prod);
1758 	  oe1->rank = get_rank (oe1->op);
1759 	  subops[first].release ();
1760 
1761 	  changed = true;
1762 	}
1763 
1764       cvec.pop ();
1765     }
1766 
1767   for (i = 0; i < ops->length (); ++i)
1768     subops[i].release ();
1769   free (subops);
1770   cvec.release ();
1771 
1772   return changed;
1773 }
1774 
1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1776    expression, examine the other OPS to see if any of them are comparisons
1777    of the same values, which we may be able to combine or eliminate.
1778    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1779 
1780 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)1781 eliminate_redundant_comparison (enum tree_code opcode,
1782 				vec<operand_entry *> *ops,
1783 				unsigned int currindex,
1784 				operand_entry *curr)
1785 {
1786   tree op1, op2;
1787   enum tree_code lcode, rcode;
1788   gimple *def1, *def2;
1789   int i;
1790   operand_entry *oe;
1791 
1792   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1793     return false;
1794 
1795   /* Check that CURR is a comparison.  */
1796   if (TREE_CODE (curr->op) != SSA_NAME)
1797     return false;
1798   def1 = SSA_NAME_DEF_STMT (curr->op);
1799   if (!is_gimple_assign (def1))
1800     return false;
1801   lcode = gimple_assign_rhs_code (def1);
1802   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1803     return false;
1804   op1 = gimple_assign_rhs1 (def1);
1805   op2 = gimple_assign_rhs2 (def1);
1806 
1807   /* Now look for a similar comparison in the remaining OPS.  */
1808   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1809     {
1810       tree t;
1811 
1812       if (TREE_CODE (oe->op) != SSA_NAME)
1813 	continue;
1814       def2 = SSA_NAME_DEF_STMT (oe->op);
1815       if (!is_gimple_assign (def2))
1816 	continue;
1817       rcode = gimple_assign_rhs_code (def2);
1818       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1819 	continue;
1820 
1821       /* If we got here, we have a match.  See if we can combine the
1822 	 two comparisons.  */
1823       if (opcode == BIT_IOR_EXPR)
1824 	t = maybe_fold_or_comparisons (lcode, op1, op2,
1825 				       rcode, gimple_assign_rhs1 (def2),
1826 				       gimple_assign_rhs2 (def2));
1827       else
1828 	t = maybe_fold_and_comparisons (lcode, op1, op2,
1829 					rcode, gimple_assign_rhs1 (def2),
1830 					gimple_assign_rhs2 (def2));
1831       if (!t)
1832 	continue;
1833 
1834       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1835 	 always give us a boolean_type_node value back.  If the original
1836 	 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1837 	 we need to convert.  */
1838       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1839 	t = fold_convert (TREE_TYPE (curr->op), t);
1840 
1841       if (TREE_CODE (t) != INTEGER_CST
1842 	  && !operand_equal_p (t, curr->op, 0))
1843 	{
1844 	  enum tree_code subcode;
1845 	  tree newop1, newop2;
1846 	  if (!COMPARISON_CLASS_P (t))
1847 	    continue;
1848 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1849 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1850 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1851 	  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1852 	    continue;
1853 	}
1854 
1855       if (dump_file && (dump_flags & TDF_DETAILS))
1856 	{
1857 	  fprintf (dump_file, "Equivalence: ");
1858 	  print_generic_expr (dump_file, curr->op);
1859 	  fprintf (dump_file, " %s ", op_symbol_code (opcode));
1860 	  print_generic_expr (dump_file, oe->op);
1861 	  fprintf (dump_file, " -> ");
1862 	  print_generic_expr (dump_file, t);
1863 	  fprintf (dump_file, "\n");
1864 	}
1865 
1866       /* Now we can delete oe, as it has been subsumed by the new combined
1867          expression t.  */
1868       ops->ordered_remove (i);
1869       reassociate_stats.ops_eliminated ++;
1870 
1871       /* If t is the same as curr->op, we're done.  Otherwise we must
1872 	 replace curr->op with t.  Special case is if we got a constant
1873 	 back, in which case we add it to the end instead of in place of
1874 	 the current entry.  */
1875       if (TREE_CODE (t) == INTEGER_CST)
1876 	{
1877 	  ops->ordered_remove (currindex);
1878 	  add_to_ops_vec (ops, t);
1879 	}
1880       else if (!operand_equal_p (t, curr->op, 0))
1881 	{
1882 	  gimple *sum;
1883 	  enum tree_code subcode;
1884 	  tree newop1;
1885 	  tree newop2;
1886 	  gcc_assert (COMPARISON_CLASS_P (t));
1887 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1888 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1889 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1890 	  gcc_checking_assert (is_gimple_val (newop1)
1891 			       && is_gimple_val (newop2));
1892 	  sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1893 	  curr->op = gimple_get_lhs (sum);
1894 	}
1895       return true;
1896     }
1897 
1898   return false;
1899 }
1900 
1901 
1902 /* Transform repeated addition of same values into multiply with
1903    constant.  */
1904 static bool
transform_add_to_multiply(vec<operand_entry * > * ops)1905 transform_add_to_multiply (vec<operand_entry *> *ops)
1906 {
1907   operand_entry *oe;
1908   tree op = NULL_TREE;
1909   int j;
1910   int i, start = -1, end = 0, count = 0;
1911   auto_vec<std::pair <int, int> > indxs;
1912   bool changed = false;
1913 
1914   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1915       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1916 	  || !flag_unsafe_math_optimizations))
1917     return false;
1918 
1919   /* Look for repeated operands.  */
1920   FOR_EACH_VEC_ELT (*ops, i, oe)
1921     {
1922       if (start == -1)
1923 	{
1924 	  count = 1;
1925 	  op = oe->op;
1926 	  start = i;
1927 	}
1928       else if (operand_equal_p (oe->op, op, 0))
1929 	{
1930 	  count++;
1931 	  end = i;
1932 	}
1933       else
1934 	{
1935 	  if (count > 1)
1936 	    indxs.safe_push (std::make_pair (start, end));
1937 	  count = 1;
1938 	  op = oe->op;
1939 	  start = i;
1940 	}
1941     }
1942 
1943   if (count > 1)
1944     indxs.safe_push (std::make_pair (start, end));
1945 
1946   for (j = indxs.length () - 1; j >= 0; --j)
1947     {
1948       /* Convert repeated operand addition to multiplication.  */
1949       start = indxs[j].first;
1950       end = indxs[j].second;
1951       op = (*ops)[start]->op;
1952       count = end - start + 1;
1953       for (i = end; i >= start; --i)
1954 	ops->unordered_remove (i);
1955       tree tmp = make_ssa_name (TREE_TYPE (op));
1956       tree cst = build_int_cst (integer_type_node, count);
1957       gassign *mul_stmt
1958 	= gimple_build_assign (tmp, MULT_EXPR,
1959 			       op, fold_convert (TREE_TYPE (op), cst));
1960       gimple_set_visited (mul_stmt, true);
1961       add_to_ops_vec (ops, tmp, mul_stmt);
1962       changed = true;
1963     }
1964 
1965   return changed;
1966 }
1967 
1968 
1969 /* Perform various identities and other optimizations on the list of
1970    operand entries, stored in OPS.  The tree code for the binary
1971    operation between all the operands is OPCODE.  */
1972 
1973 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry * > * ops)1974 optimize_ops_list (enum tree_code opcode,
1975 		   vec<operand_entry *> *ops)
1976 {
1977   unsigned int length = ops->length ();
1978   unsigned int i;
1979   operand_entry *oe;
1980   operand_entry *oelast = NULL;
1981   bool iterate = false;
1982 
1983   if (length == 1)
1984     return;
1985 
1986   oelast = ops->last ();
1987 
1988   /* If the last two are constants, pop the constants off, merge them
1989      and try the next two.  */
1990   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1991     {
1992       operand_entry *oelm1 = (*ops)[length - 2];
1993 
1994       if (oelm1->rank == 0
1995 	  && is_gimple_min_invariant (oelm1->op)
1996 	  && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1997 				       TREE_TYPE (oelast->op)))
1998 	{
1999 	  tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2000 				     oelm1->op, oelast->op);
2001 
2002 	  if (folded && is_gimple_min_invariant (folded))
2003 	    {
2004 	      if (dump_file && (dump_flags & TDF_DETAILS))
2005 		fprintf (dump_file, "Merging constants\n");
2006 
2007 	      ops->pop ();
2008 	      ops->pop ();
2009 
2010 	      add_to_ops_vec (ops, folded);
2011 	      reassociate_stats.constants_eliminated++;
2012 
2013 	      optimize_ops_list (opcode, ops);
2014 	      return;
2015 	    }
2016 	}
2017     }
2018 
2019   eliminate_using_constants (opcode, ops);
2020   oelast = NULL;
2021 
2022   for (i = 0; ops->iterate (i, &oe);)
2023     {
2024       bool done = false;
2025 
2026       if (eliminate_not_pairs (opcode, ops, i, oe))
2027 	return;
2028       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2029 	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2030 	  || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2031 	{
2032 	  if (done)
2033 	    return;
2034 	  iterate = true;
2035 	  oelast = NULL;
2036 	  continue;
2037 	}
2038       oelast = oe;
2039       i++;
2040     }
2041 
2042   length = ops->length ();
2043   oelast = ops->last ();
2044 
2045   if (iterate)
2046     optimize_ops_list (opcode, ops);
2047 }
2048 
2049 /* The following functions are subroutines to optimize_range_tests and allow
2050    it to try to change a logical combination of comparisons into a range
2051    test.
2052 
2053    For example, both
2054 	X == 2 || X == 5 || X == 3 || X == 4
2055    and
2056 	X >= 2 && X <= 5
2057    are converted to
2058 	(unsigned) (X - 2) <= 3
2059 
2060    For more information see comments above fold_test_range in fold-const.c,
2061    this implementation is for GIMPLE.  */
2062 
2063 struct range_entry
2064 {
2065   tree exp;
2066   tree low;
2067   tree high;
2068   bool in_p;
2069   bool strict_overflow_p;
2070   unsigned int idx, next;
2071 };
2072 
2073 /* This is similar to make_range in fold-const.c, but on top of
2074    GIMPLE instead of trees.  If EXP is non-NULL, it should be
2075    an SSA_NAME and STMT argument is ignored, otherwise STMT
2076    argument should be a GIMPLE_COND.  */
2077 
2078 static void
init_range_entry(struct range_entry * r,tree exp,gimple * stmt)2079 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2080 {
2081   int in_p;
2082   tree low, high;
2083   bool is_bool, strict_overflow_p;
2084 
2085   r->exp = NULL_TREE;
2086   r->in_p = false;
2087   r->strict_overflow_p = false;
2088   r->low = NULL_TREE;
2089   r->high = NULL_TREE;
2090   if (exp != NULL_TREE
2091       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2092     return;
2093 
2094   /* Start with simply saying "EXP != 0" and then look at the code of EXP
2095      and see if we can refine the range.  Some of the cases below may not
2096      happen, but it doesn't seem worth worrying about this.  We "continue"
2097      the outer loop when we've changed something; otherwise we "break"
2098      the switch, which will "break" the while.  */
2099   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2100   high = low;
2101   in_p = 0;
2102   strict_overflow_p = false;
2103   is_bool = false;
2104   if (exp == NULL_TREE)
2105     is_bool = true;
2106   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2107     {
2108       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2109 	is_bool = true;
2110       else
2111 	return;
2112     }
2113   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2114     is_bool = true;
2115 
2116   while (1)
2117     {
2118       enum tree_code code;
2119       tree arg0, arg1, exp_type;
2120       tree nexp;
2121       location_t loc;
2122 
2123       if (exp != NULL_TREE)
2124 	{
2125 	  if (TREE_CODE (exp) != SSA_NAME
2126 	      || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2127 	    break;
2128 
2129 	  stmt = SSA_NAME_DEF_STMT (exp);
2130 	  if (!is_gimple_assign (stmt))
2131 	    break;
2132 
2133 	  code = gimple_assign_rhs_code (stmt);
2134 	  arg0 = gimple_assign_rhs1 (stmt);
2135 	  arg1 = gimple_assign_rhs2 (stmt);
2136 	  exp_type = TREE_TYPE (exp);
2137 	}
2138       else
2139 	{
2140 	  code = gimple_cond_code (stmt);
2141 	  arg0 = gimple_cond_lhs (stmt);
2142 	  arg1 = gimple_cond_rhs (stmt);
2143 	  exp_type = boolean_type_node;
2144 	}
2145 
2146       if (TREE_CODE (arg0) != SSA_NAME)
2147 	break;
2148       loc = gimple_location (stmt);
2149       switch (code)
2150 	{
2151 	case BIT_NOT_EXPR:
2152 	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2153 	      /* Ensure the range is either +[-,0], +[0,0],
2154 		 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2155 		 -[1,1].  If it is e.g. +[-,-] or -[-,-]
2156 		 or similar expression of unconditional true or
2157 		 false, it should not be negated.  */
2158 	      && ((high && integer_zerop (high))
2159 		  || (low && integer_onep (low))))
2160 	    {
2161 	      in_p = !in_p;
2162 	      exp = arg0;
2163 	      continue;
2164 	    }
2165 	  break;
2166 	case SSA_NAME:
2167 	  exp = arg0;
2168 	  continue;
2169 	CASE_CONVERT:
2170 	  if (is_bool)
2171 	    {
2172 	      if ((TYPE_PRECISION (exp_type) == 1
2173 		   || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2174 		  && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2175 		return;
2176 	    }
2177 	  else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2178 	    {
2179 	      if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2180 		is_bool = true;
2181 	      else
2182 		return;
2183 	    }
2184 	  else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2185 	    is_bool = true;
2186 	  goto do_default;
2187 	case EQ_EXPR:
2188 	case NE_EXPR:
2189 	case LT_EXPR:
2190 	case LE_EXPR:
2191 	case GE_EXPR:
2192 	case GT_EXPR:
2193 	  is_bool = true;
2194 	  /* FALLTHRU */
2195 	default:
2196 	  if (!is_bool)
2197 	    return;
2198 	do_default:
2199 	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2200 				  &low, &high, &in_p,
2201 				  &strict_overflow_p);
2202 	  if (nexp != NULL_TREE)
2203 	    {
2204 	      exp = nexp;
2205 	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
2206 	      continue;
2207 	    }
2208 	  break;
2209 	}
2210       break;
2211     }
2212   if (is_bool)
2213     {
2214       r->exp = exp;
2215       r->in_p = in_p;
2216       r->low = low;
2217       r->high = high;
2218       r->strict_overflow_p = strict_overflow_p;
2219     }
2220 }
2221 
2222 /* Comparison function for qsort.  Sort entries
2223    without SSA_NAME exp first, then with SSA_NAMEs sorted
2224    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2225    by increasing ->low and if ->low is the same, by increasing
2226    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2227    maximum.  */
2228 
2229 static int
range_entry_cmp(const void * a,const void * b)2230 range_entry_cmp (const void *a, const void *b)
2231 {
2232   const struct range_entry *p = (const struct range_entry *) a;
2233   const struct range_entry *q = (const struct range_entry *) b;
2234 
2235   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2236     {
2237       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2238 	{
2239 	  /* Group range_entries for the same SSA_NAME together.  */
2240 	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2241 	    return -1;
2242 	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2243 	    return 1;
2244 	  /* If ->low is different, NULL low goes first, then by
2245 	     ascending low.  */
2246 	  if (p->low != NULL_TREE)
2247 	    {
2248 	      if (q->low != NULL_TREE)
2249 		{
2250 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2251 					  p->low, q->low);
2252 		  if (tem && integer_onep (tem))
2253 		    return -1;
2254 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2255 				     p->low, q->low);
2256 		  if (tem && integer_onep (tem))
2257 		    return 1;
2258 		}
2259 	      else
2260 		return 1;
2261 	    }
2262 	  else if (q->low != NULL_TREE)
2263 	    return -1;
2264 	  /* If ->high is different, NULL high goes last, before that by
2265 	     ascending high.  */
2266 	  if (p->high != NULL_TREE)
2267 	    {
2268 	      if (q->high != NULL_TREE)
2269 		{
2270 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2271 					  p->high, q->high);
2272 		  if (tem && integer_onep (tem))
2273 		    return -1;
2274 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2275 				     p->high, q->high);
2276 		  if (tem && integer_onep (tem))
2277 		    return 1;
2278 		}
2279 	      else
2280 		return -1;
2281 	    }
2282 	  else if (q->high != NULL_TREE)
2283 	    return 1;
2284 	  /* If both ranges are the same, sort below by ascending idx.  */
2285 	}
2286       else
2287 	return 1;
2288     }
2289   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2290     return -1;
2291 
2292   if (p->idx < q->idx)
2293     return -1;
2294   else
2295     {
2296       gcc_checking_assert (p->idx > q->idx);
2297       return 1;
2298     }
2299 }
2300 
2301 /* Helper function for update_range_test.  Force EXPR into an SSA_NAME,
2302    insert needed statements BEFORE or after GSI.  */
2303 
2304 static tree
force_into_ssa_name(gimple_stmt_iterator * gsi,tree expr,bool before)2305 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2306 {
2307   enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2308   tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2309   if (TREE_CODE (ret) != SSA_NAME)
2310     {
2311       gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2312       if (before)
2313 	gsi_insert_before (gsi, g, GSI_SAME_STMT);
2314       else
2315 	gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2316       ret = gimple_assign_lhs (g);
2317     }
2318   return ret;
2319 }
2320 
2321 /* Helper routine of optimize_range_test.
2322    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2323    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2324    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2325    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2326    an array of COUNT pointers to other ranges.  Return
2327    true if the range merge has been successful.
2328    If OPCODE is ERROR_MARK, this is called from within
2329    maybe_optimize_range_tests and is performing inter-bb range optimization.
2330    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2331    oe->rank.  */
2332 
2333 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,struct range_entry ** otherrangep,unsigned int count,enum tree_code opcode,vec<operand_entry * > * ops,tree exp,gimple_seq seq,bool in_p,tree low,tree high,bool strict_overflow_p)2334 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2335 		   struct range_entry **otherrangep,
2336 		   unsigned int count, enum tree_code opcode,
2337 		   vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2338 		   bool in_p, tree low, tree high, bool strict_overflow_p)
2339 {
2340   operand_entry *oe = (*ops)[range->idx];
2341   tree op = oe->op;
2342   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2343 		    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2344   location_t loc = gimple_location (stmt);
2345   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2346   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2347 				in_p, low, high);
2348   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2349   gimple_stmt_iterator gsi;
2350   unsigned int i, uid;
2351 
2352   if (tem == NULL_TREE)
2353     return false;
2354 
2355   /* If op is default def SSA_NAME, there is no place to insert the
2356      new comparison.  Give up, unless we can use OP itself as the
2357      range test.  */
2358   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2359     {
2360       if (op == range->exp
2361 	  && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2362 	      || TREE_CODE (optype) == BOOLEAN_TYPE)
2363 	  && (op == tem
2364 	      || (TREE_CODE (tem) == EQ_EXPR
2365 		  && TREE_OPERAND (tem, 0) == op
2366 		  && integer_onep (TREE_OPERAND (tem, 1))))
2367 	  && opcode != BIT_IOR_EXPR
2368 	  && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2369 	{
2370 	  stmt = NULL;
2371 	  tem = op;
2372 	}
2373       else
2374 	return false;
2375     }
2376 
2377   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2378     warning_at (loc, OPT_Wstrict_overflow,
2379 		"assuming signed overflow does not occur "
2380 		"when simplifying range test");
2381 
2382   if (dump_file && (dump_flags & TDF_DETAILS))
2383     {
2384       struct range_entry *r;
2385       fprintf (dump_file, "Optimizing range tests ");
2386       print_generic_expr (dump_file, range->exp);
2387       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2388       print_generic_expr (dump_file, range->low);
2389       fprintf (dump_file, ", ");
2390       print_generic_expr (dump_file, range->high);
2391       fprintf (dump_file, "]");
2392       for (i = 0; i < count; i++)
2393 	{
2394 	  if (otherrange)
2395 	    r = otherrange + i;
2396 	  else
2397 	    r = otherrangep[i];
2398 	  if (r->exp
2399 	      && r->exp != range->exp
2400 	      && TREE_CODE (r->exp) == SSA_NAME)
2401 	    {
2402 	      fprintf (dump_file, " and ");
2403 	      print_generic_expr (dump_file, r->exp);
2404 	    }
2405 	  else
2406 	    fprintf (dump_file, " and");
2407 	  fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2408 	  print_generic_expr (dump_file, r->low);
2409 	  fprintf (dump_file, ", ");
2410 	  print_generic_expr (dump_file, r->high);
2411 	  fprintf (dump_file, "]");
2412 	}
2413       fprintf (dump_file, "\n into ");
2414       print_generic_expr (dump_file, tem);
2415       fprintf (dump_file, "\n");
2416     }
2417 
2418   if (opcode == BIT_IOR_EXPR
2419       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2420     tem = invert_truthvalue_loc (loc, tem);
2421 
2422   tem = fold_convert_loc (loc, optype, tem);
2423   if (stmt)
2424     {
2425       gsi = gsi_for_stmt (stmt);
2426       uid = gimple_uid (stmt);
2427     }
2428   else
2429     {
2430       gsi = gsi_none ();
2431       uid = 0;
2432     }
2433   if (stmt == NULL)
2434     gcc_checking_assert (tem == op);
2435   /* In rare cases range->exp can be equal to lhs of stmt.
2436      In that case we have to insert after the stmt rather then before
2437      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2438   else if (op != range->exp)
2439     {
2440       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2441       tem = force_into_ssa_name (&gsi, tem, true);
2442       gsi_prev (&gsi);
2443     }
2444   else if (gimple_code (stmt) != GIMPLE_PHI)
2445     {
2446       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2447       tem = force_into_ssa_name (&gsi, tem, false);
2448     }
2449   else
2450     {
2451       gsi = gsi_after_labels (gimple_bb (stmt));
2452       if (!gsi_end_p (gsi))
2453 	uid = gimple_uid (gsi_stmt (gsi));
2454       else
2455 	{
2456 	  gsi = gsi_start_bb (gimple_bb (stmt));
2457 	  uid = 1;
2458 	  while (!gsi_end_p (gsi))
2459 	    {
2460 	      uid = gimple_uid (gsi_stmt (gsi));
2461 	      gsi_next (&gsi);
2462 	    }
2463 	}
2464       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2465       tem = force_into_ssa_name (&gsi, tem, true);
2466       if (gsi_end_p (gsi))
2467 	gsi = gsi_last_bb (gimple_bb (stmt));
2468       else
2469 	gsi_prev (&gsi);
2470     }
2471   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2472     if (gimple_uid (gsi_stmt (gsi)))
2473       break;
2474     else
2475       gimple_set_uid (gsi_stmt (gsi), uid);
2476 
2477   oe->op = tem;
2478   range->exp = exp;
2479   range->low = low;
2480   range->high = high;
2481   range->in_p = in_p;
2482   range->strict_overflow_p = false;
2483 
2484   for (i = 0; i < count; i++)
2485     {
2486       if (otherrange)
2487 	range = otherrange + i;
2488       else
2489 	range = otherrangep[i];
2490       oe = (*ops)[range->idx];
2491       /* Now change all the other range test immediate uses, so that
2492 	 those tests will be optimized away.  */
2493       if (opcode == ERROR_MARK)
2494 	{
2495 	  if (oe->op)
2496 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
2497 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
2498 	  else
2499 	    oe->op = (oe->rank == BIT_IOR_EXPR
2500 		      ? boolean_false_node : boolean_true_node);
2501 	}
2502       else
2503 	oe->op = error_mark_node;
2504       range->exp = NULL_TREE;
2505       range->low = NULL_TREE;
2506       range->high = NULL_TREE;
2507     }
2508   return true;
2509 }
2510 
2511 /* Optimize X == CST1 || X == CST2
2512    if popcount (CST1 ^ CST2) == 1 into
2513    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2514    Similarly for ranges.  E.g.
2515    X != 2 && X != 3 && X != 10 && X != 11
2516    will be transformed by the previous optimization into
2517    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2518    and this loop can transform that into
2519    !(((X & ~8) - 2U) <= 1U).  */
2520 
2521 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2522 optimize_range_tests_xor (enum tree_code opcode, tree type,
2523 			  tree lowi, tree lowj, tree highi, tree highj,
2524 			  vec<operand_entry *> *ops,
2525 			  struct range_entry *rangei,
2526 			  struct range_entry *rangej)
2527 {
2528   tree lowxor, highxor, tem, exp;
2529   /* Check lowi ^ lowj == highi ^ highj and
2530      popcount (lowi ^ lowj) == 1.  */
2531   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2532   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2533     return false;
2534   if (!integer_pow2p (lowxor))
2535     return false;
2536   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2537   if (!tree_int_cst_equal (lowxor, highxor))
2538     return false;
2539 
2540   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2541   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2542   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2543   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2544   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2545 			 NULL, rangei->in_p, lowj, highj,
2546 			 rangei->strict_overflow_p
2547 			 || rangej->strict_overflow_p))
2548     return true;
2549   return false;
2550 }
2551 
2552 /* Optimize X == CST1 || X == CST2
2553    if popcount (CST2 - CST1) == 1 into
2554    ((X - CST1) & ~(CST2 - CST1)) == 0.
2555    Similarly for ranges.  E.g.
2556    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2557    || X == 75 || X == 45
2558    will be transformed by the previous optimization into
2559    (X - 43U) <= 3U || (X - 75U) <= 3U
2560    and this loop can transform that into
2561    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2562 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2563 optimize_range_tests_diff (enum tree_code opcode, tree type,
2564 			   tree lowi, tree lowj, tree highi, tree highj,
2565 			   vec<operand_entry *> *ops,
2566 			   struct range_entry *rangei,
2567 			   struct range_entry *rangej)
2568 {
2569   tree tem1, tem2, mask;
2570   /* Check highi - lowi == highj - lowj.  */
2571   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2572   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2573     return false;
2574   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2575   if (!tree_int_cst_equal (tem1, tem2))
2576     return false;
2577   /* Check popcount (lowj - lowi) == 1.  */
2578   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2579   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2580     return false;
2581   if (!integer_pow2p (tem1))
2582     return false;
2583 
2584   type = unsigned_type_for (type);
2585   tem1 = fold_convert (type, tem1);
2586   tem2 = fold_convert (type, tem2);
2587   lowi = fold_convert (type, lowi);
2588   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2589   tem1 = fold_build2 (MINUS_EXPR, type,
2590 		      fold_convert (type, rangei->exp), lowi);
2591   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2592   lowj = build_int_cst (type, 0);
2593   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2594 			 NULL, rangei->in_p, lowj, tem2,
2595 			 rangei->strict_overflow_p
2596 			 || rangej->strict_overflow_p))
2597     return true;
2598   return false;
2599 }
2600 
2601 /* It does some common checks for function optimize_range_tests_xor and
2602    optimize_range_tests_diff.
2603    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2604    Else it calls optimize_range_tests_diff.  */
2605 
2606 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry * > * ops,struct range_entry * ranges)2607 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2608 			bool optimize_xor, vec<operand_entry *> *ops,
2609 			struct range_entry *ranges)
2610 {
2611   int i, j;
2612   bool any_changes = false;
2613   for (i = first; i < length; i++)
2614     {
2615       tree lowi, highi, lowj, highj, type, tem;
2616 
2617       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2618 	continue;
2619       type = TREE_TYPE (ranges[i].exp);
2620       if (!INTEGRAL_TYPE_P (type))
2621 	continue;
2622       lowi = ranges[i].low;
2623       if (lowi == NULL_TREE)
2624 	lowi = TYPE_MIN_VALUE (type);
2625       highi = ranges[i].high;
2626       if (highi == NULL_TREE)
2627 	continue;
2628       for (j = i + 1; j < length && j < i + 64; j++)
2629 	{
2630 	  bool changes;
2631 	  if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2632 	    continue;
2633 	  lowj = ranges[j].low;
2634 	  if (lowj == NULL_TREE)
2635 	    continue;
2636 	  highj = ranges[j].high;
2637 	  if (highj == NULL_TREE)
2638 	    highj = TYPE_MAX_VALUE (type);
2639 	  /* Check lowj > highi.  */
2640 	  tem = fold_binary (GT_EXPR, boolean_type_node,
2641 			     lowj, highi);
2642 	  if (tem == NULL_TREE || !integer_onep (tem))
2643 	    continue;
2644 	  if (optimize_xor)
2645 	    changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2646 						highi, highj, ops,
2647 						ranges + i, ranges + j);
2648 	  else
2649 	    changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2650 						 highi, highj, ops,
2651 						 ranges + i, ranges + j);
2652 	  if (changes)
2653 	    {
2654 	      any_changes = true;
2655 	      break;
2656 	    }
2657 	}
2658     }
2659   return any_changes;
2660 }
2661 
2662 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
2663    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2664    EXP on success, NULL otherwise.  */
2665 
2666 static tree
extract_bit_test_mask(tree exp,int prec,tree totallow,tree low,tree high,wide_int * mask,tree * totallowp)2667 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2668 		       wide_int *mask, tree *totallowp)
2669 {
2670   tree tem = int_const_binop (MINUS_EXPR, high, low);
2671   if (tem == NULL_TREE
2672       || TREE_CODE (tem) != INTEGER_CST
2673       || TREE_OVERFLOW (tem)
2674       || tree_int_cst_sgn (tem) == -1
2675       || compare_tree_int (tem, prec) != -1)
2676     return NULL_TREE;
2677 
2678   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2679   *mask = wi::shifted_mask (0, max, false, prec);
2680   if (TREE_CODE (exp) == BIT_AND_EXPR
2681       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2682     {
2683       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2684       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2685       if (wi::popcount (msk) == 1
2686 	  && wi::ltu_p (msk, prec - max))
2687 	{
2688 	  *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2689 	  max += msk.to_uhwi ();
2690 	  exp = TREE_OPERAND (exp, 0);
2691 	  if (integer_zerop (low)
2692 	      && TREE_CODE (exp) == PLUS_EXPR
2693 	      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2694 	    {
2695 	      tree ret = TREE_OPERAND (exp, 0);
2696 	      STRIP_NOPS (ret);
2697 	      widest_int bias
2698 	        = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2699 				     TYPE_PRECISION (TREE_TYPE (low))));
2700 	      tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2701 	      if (totallowp)
2702 		{
2703 		  *totallowp = tbias;
2704 		  return ret;
2705 		}
2706 	      else if (!tree_int_cst_lt (totallow, tbias))
2707 		return NULL_TREE;
2708 	      bias = wi::to_widest (tbias);
2709 	      bias -= wi::to_widest (totallow);
2710 	      if (bias >= 0 && bias < prec - max)
2711 		{
2712 		  *mask = wi::lshift (*mask, bias);
2713 		  return ret;
2714 		}
2715 	    }
2716 	}
2717     }
2718   if (totallowp)
2719     return exp;
2720   if (!tree_int_cst_lt (totallow, low))
2721     return exp;
2722   tem = int_const_binop (MINUS_EXPR, low, totallow);
2723   if (tem == NULL_TREE
2724       || TREE_CODE (tem) != INTEGER_CST
2725       || TREE_OVERFLOW (tem)
2726       || compare_tree_int (tem, prec - max) == 1)
2727     return NULL_TREE;
2728 
2729   *mask = wi::lshift (*mask, wi::to_widest (tem));
2730   return exp;
2731 }
2732 
2733 /* Attempt to optimize small range tests using bit test.
2734    E.g.
2735    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2736    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2737    has been by earlier optimizations optimized into:
2738    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2739    As all the 43 through 82 range is less than 64 numbers,
2740    for 64-bit word targets optimize that into:
2741    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
2742 
2743 static bool
optimize_range_tests_to_bit_test(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)2744 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2745 				  vec<operand_entry *> *ops,
2746 				  struct range_entry *ranges)
2747 {
2748   int i, j;
2749   bool any_changes = false;
2750   int prec = GET_MODE_BITSIZE (word_mode);
2751   auto_vec<struct range_entry *, 64> candidates;
2752 
2753   for (i = first; i < length - 2; i++)
2754     {
2755       tree lowi, highi, lowj, highj, type;
2756 
2757       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2758 	continue;
2759       type = TREE_TYPE (ranges[i].exp);
2760       if (!INTEGRAL_TYPE_P (type))
2761 	continue;
2762       lowi = ranges[i].low;
2763       if (lowi == NULL_TREE)
2764 	lowi = TYPE_MIN_VALUE (type);
2765       highi = ranges[i].high;
2766       if (highi == NULL_TREE)
2767 	continue;
2768       wide_int mask;
2769       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2770 					highi, &mask, &lowi);
2771       if (exp == NULL_TREE)
2772 	continue;
2773       bool strict_overflow_p = ranges[i].strict_overflow_p;
2774       candidates.truncate (0);
2775       int end = MIN (i + 64, length);
2776       for (j = i + 1; j < end; j++)
2777 	{
2778 	  tree exp2;
2779 	  if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2780 	    continue;
2781 	  if (ranges[j].exp == exp)
2782 	    ;
2783 	  else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2784 	    {
2785 	      exp2 = TREE_OPERAND (ranges[j].exp, 0);
2786 	      if (exp2 == exp)
2787 		;
2788 	      else if (TREE_CODE (exp2) == PLUS_EXPR)
2789 		{
2790 		  exp2 = TREE_OPERAND (exp2, 0);
2791 		  STRIP_NOPS (exp2);
2792 		  if (exp2 != exp)
2793 		    continue;
2794 		}
2795 	      else
2796 		continue;
2797 	    }
2798 	  else
2799 	    continue;
2800 	  lowj = ranges[j].low;
2801 	  if (lowj == NULL_TREE)
2802 	    continue;
2803 	  highj = ranges[j].high;
2804 	  if (highj == NULL_TREE)
2805 	    highj = TYPE_MAX_VALUE (type);
2806 	  wide_int mask2;
2807 	  exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2808 					highj, &mask2, NULL);
2809 	  if (exp2 != exp)
2810 	    continue;
2811 	  mask |= mask2;
2812 	  strict_overflow_p |= ranges[j].strict_overflow_p;
2813 	  candidates.safe_push (&ranges[j]);
2814 	}
2815 
2816       /* If we need otherwise 3 or more comparisons, use a bit test.  */
2817       if (candidates.length () >= 2)
2818 	{
2819 	  tree high = wide_int_to_tree (TREE_TYPE (lowi),
2820 					wi::to_widest (lowi)
2821 					+ prec - 1 - wi::clz (mask));
2822 	  operand_entry *oe = (*ops)[ranges[i].idx];
2823 	  tree op = oe->op;
2824 	  gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2825 			    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2826 	  location_t loc = gimple_location (stmt);
2827 	  tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2828 
2829 	  /* See if it isn't cheaper to pretend the minimum value of the
2830 	     range is 0, if maximum value is small enough.
2831 	     We can avoid then subtraction of the minimum value, but the
2832 	     mask constant could be perhaps more expensive.  */
2833 	  if (compare_tree_int (lowi, 0) > 0
2834 	      && compare_tree_int (high, prec) < 0)
2835 	    {
2836 	      int cost_diff;
2837 	      HOST_WIDE_INT m = tree_to_uhwi (lowi);
2838 	      rtx reg = gen_raw_REG (word_mode, 10000);
2839 	      bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2840 	      cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2841 						      GEN_INT (-m)), speed_p);
2842 	      rtx r = immed_wide_int_const (mask, word_mode);
2843 	      cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2844 					 word_mode, speed_p);
2845 	      r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2846 	      cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2847 					 word_mode, speed_p);
2848 	      if (cost_diff > 0)
2849 		{
2850 		  mask = wi::lshift (mask, m);
2851 		  lowi = build_zero_cst (TREE_TYPE (lowi));
2852 		}
2853 	    }
2854 
2855 	  tree tem = build_range_check (loc, optype, unshare_expr (exp),
2856 					false, lowi, high);
2857 	  if (tem == NULL_TREE || is_gimple_val (tem))
2858 	    continue;
2859 	  tree etype = unsigned_type_for (TREE_TYPE (exp));
2860 	  exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2861 				 fold_convert_loc (loc, etype, exp),
2862 				 fold_convert_loc (loc, etype, lowi));
2863 	  exp = fold_convert_loc (loc, integer_type_node, exp);
2864 	  tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2865 	  exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2866 				 build_int_cst (word_type, 1), exp);
2867 	  exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2868 				 wide_int_to_tree (word_type, mask));
2869 	  exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2870 				 build_zero_cst (word_type));
2871 	  if (is_gimple_val (exp))
2872 	    continue;
2873 
2874 	  /* The shift might have undefined behavior if TEM is true,
2875 	     but reassociate_bb isn't prepared to have basic blocks
2876 	     split when it is running.  So, temporarily emit a code
2877 	     with BIT_IOR_EXPR instead of &&, and fix it up in
2878 	     branch_fixup.  */
2879 	  gimple_seq seq;
2880 	  tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2881 	  gcc_assert (TREE_CODE (tem) == SSA_NAME);
2882 	  gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2883 	  gimple_seq seq2;
2884 	  exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2885 	  gimple_seq_add_seq_without_update (&seq, seq2);
2886 	  gcc_assert (TREE_CODE (exp) == SSA_NAME);
2887 	  gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2888 	  gimple *g = gimple_build_assign (make_ssa_name (optype),
2889 					   BIT_IOR_EXPR, tem, exp);
2890 	  gimple_set_location (g, loc);
2891 	  gimple_seq_add_stmt_without_update (&seq, g);
2892 	  exp = gimple_assign_lhs (g);
2893 	  tree val = build_zero_cst (optype);
2894 	  if (update_range_test (&ranges[i], NULL, candidates.address (),
2895 				 candidates.length (), opcode, ops, exp,
2896 				 seq, false, val, val, strict_overflow_p))
2897 	    {
2898 	      any_changes = true;
2899 	      reassoc_branch_fixups.safe_push (tem);
2900 	    }
2901 	  else
2902 	    gimple_seq_discard (seq);
2903 	}
2904     }
2905   return any_changes;
2906 }
2907 
2908 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
2909    and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.  */
2910 
2911 static bool
optimize_range_tests_cmp_bitwise(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)2912 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
2913 				  vec<operand_entry *> *ops,
2914 				  struct range_entry *ranges)
2915 {
2916   int i;
2917   unsigned int b;
2918   bool any_changes = false;
2919   auto_vec<int, 128> buckets;
2920   auto_vec<int, 32> chains;
2921   auto_vec<struct range_entry *, 32> candidates;
2922 
2923   for (i = first; i < length; i++)
2924     {
2925       if (ranges[i].exp == NULL_TREE
2926 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
2927 	  || !ranges[i].in_p
2928 	  || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
2929 	  || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
2930 	  || ranges[i].low == NULL_TREE
2931 	  || ranges[i].low != ranges[i].high)
2932 	continue;
2933 
2934       bool zero_p = integer_zerop (ranges[i].low);
2935       if (!zero_p && !integer_all_onesp (ranges[i].low))
2936 	continue;
2937 
2938       b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
2939       if (buckets.length () <= b)
2940 	buckets.safe_grow_cleared (b + 1);
2941       if (chains.length () <= (unsigned) i)
2942 	chains.safe_grow (i + 1);
2943       chains[i] = buckets[b];
2944       buckets[b] = i + 1;
2945     }
2946 
2947   FOR_EACH_VEC_ELT (buckets, b, i)
2948     if (i && chains[i - 1])
2949       {
2950 	int j, k = i;
2951 	for (j = chains[i - 1]; j; j = chains[j - 1])
2952 	  {
2953 	    gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
2954 	    gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
2955 	    if (reassoc_stmt_dominates_stmt_p (gk, gj))
2956 	      k = j;
2957 	  }
2958 	tree type1 = TREE_TYPE (ranges[k - 1].exp);
2959 	tree type2 = NULL_TREE;
2960 	bool strict_overflow_p = false;
2961 	candidates.truncate (0);
2962 	for (j = i; j; j = chains[j - 1])
2963 	  {
2964 	    tree type = TREE_TYPE (ranges[j - 1].exp);
2965 	    strict_overflow_p |= ranges[j - 1].strict_overflow_p;
2966 	    if (j == k
2967 		|| useless_type_conversion_p (type1, type))
2968 	      ;
2969 	    else if (type2 == NULL_TREE
2970 		     || useless_type_conversion_p (type2, type))
2971 	      {
2972 		if (type2 == NULL_TREE)
2973 		  type2 = type;
2974 		candidates.safe_push (&ranges[j - 1]);
2975 	      }
2976 	  }
2977 	unsigned l = candidates.length ();
2978 	for (j = i; j; j = chains[j - 1])
2979 	  {
2980 	    tree type = TREE_TYPE (ranges[j - 1].exp);
2981 	    if (j == k)
2982 	      continue;
2983 	    if (useless_type_conversion_p (type1, type))
2984 	      ;
2985 	    else if (type2 == NULL_TREE
2986 		     || useless_type_conversion_p (type2, type))
2987 	      continue;
2988 	    candidates.safe_push (&ranges[j - 1]);
2989 	  }
2990 	gimple_seq seq = NULL;
2991 	tree op = NULL_TREE;
2992 	unsigned int id;
2993 	struct range_entry *r;
2994 	candidates.safe_push (&ranges[k - 1]);
2995 	FOR_EACH_VEC_ELT (candidates, id, r)
2996 	  {
2997 	    gimple *g;
2998 	    if (id == 0)
2999 	      {
3000 		op = r->exp;
3001 		continue;
3002 	      }
3003 	    if (id == l)
3004 	      {
3005 		g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3006 		gimple_seq_add_stmt_without_update (&seq, g);
3007 		op = gimple_assign_lhs (g);
3008 	      }
3009 	    tree type = TREE_TYPE (r->exp);
3010 	    tree exp = r->exp;
3011 	    if (id >= l && !useless_type_conversion_p (type1, type))
3012 	      {
3013 		g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3014 		gimple_seq_add_stmt_without_update (&seq, g);
3015 		exp = gimple_assign_lhs (g);
3016 	      }
3017 	    g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3018 				     (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3019 				     op, exp);
3020 	    gimple_seq_add_stmt_without_update (&seq, g);
3021 	    op = gimple_assign_lhs (g);
3022 	  }
3023 	candidates.pop ();
3024 	if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3025 			       candidates.length (), opcode, ops, op,
3026 			       seq, true, ranges[k - 1].low,
3027 			       ranges[k - 1].low, strict_overflow_p))
3028 	  any_changes = true;
3029 	else
3030 	  gimple_seq_discard (seq);
3031       }
3032 
3033   return any_changes;
3034 }
3035 
3036 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3037    a >= 0 && a < b into (unsigned) a < (unsigned) b
3038    a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
3039 
3040 static bool
optimize_range_tests_var_bound(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges,basic_block first_bb)3041 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3042 				vec<operand_entry *> *ops,
3043 				struct range_entry *ranges,
3044 				basic_block first_bb)
3045 {
3046   int i;
3047   bool any_changes = false;
3048   hash_map<tree, int> *map = NULL;
3049 
3050   for (i = first; i < length; i++)
3051     {
3052       if (ranges[i].exp == NULL_TREE
3053 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
3054 	  || !ranges[i].in_p)
3055 	continue;
3056 
3057       tree type = TREE_TYPE (ranges[i].exp);
3058       if (!INTEGRAL_TYPE_P (type)
3059 	  || TYPE_UNSIGNED (type)
3060 	  || ranges[i].low == NULL_TREE
3061 	  || !integer_zerop (ranges[i].low)
3062 	  || ranges[i].high != NULL_TREE)
3063 	continue;
3064       /* EXP >= 0 here.  */
3065       if (map == NULL)
3066 	map = new hash_map <tree, int>;
3067       map->put (ranges[i].exp, i);
3068     }
3069 
3070   if (map == NULL)
3071     return false;
3072 
3073   for (i = 0; i < length; i++)
3074     {
3075       bool in_p = ranges[i].in_p;
3076       if (ranges[i].low == NULL_TREE
3077 	  || ranges[i].high == NULL_TREE)
3078 	continue;
3079       if (!integer_zerop (ranges[i].low)
3080 	  || !integer_zerop (ranges[i].high))
3081 	{
3082 	  if (ranges[i].exp
3083 	      && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3084 	      && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3085 	      && integer_onep (ranges[i].low)
3086 	      && integer_onep (ranges[i].high))
3087 	    in_p = !in_p;
3088 	  else
3089 	    continue;
3090 	}
3091 
3092       gimple *stmt;
3093       tree_code ccode;
3094       tree rhs1, rhs2;
3095       if (ranges[i].exp)
3096 	{
3097 	  if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3098 	    continue;
3099 	  stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3100 	  if (!is_gimple_assign (stmt))
3101 	    continue;
3102 	  ccode = gimple_assign_rhs_code (stmt);
3103 	  rhs1 = gimple_assign_rhs1 (stmt);
3104 	  rhs2 = gimple_assign_rhs2 (stmt);
3105 	}
3106       else
3107 	{
3108 	  operand_entry *oe = (*ops)[ranges[i].idx];
3109 	  stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3110 	  if (gimple_code (stmt) != GIMPLE_COND)
3111 	    continue;
3112 	  ccode = gimple_cond_code (stmt);
3113 	  rhs1 = gimple_cond_lhs (stmt);
3114 	  rhs2 = gimple_cond_rhs (stmt);
3115 	}
3116 
3117       if (TREE_CODE (rhs1) != SSA_NAME
3118 	  || rhs2 == NULL_TREE
3119 	  || TREE_CODE (rhs2) != SSA_NAME)
3120 	continue;
3121 
3122       switch (ccode)
3123 	{
3124 	case GT_EXPR:
3125 	case GE_EXPR:
3126 	case LT_EXPR:
3127 	case LE_EXPR:
3128 	  break;
3129 	default:
3130 	  continue;
3131 	}
3132       if (in_p)
3133 	ccode = invert_tree_comparison (ccode, false);
3134       switch (ccode)
3135 	{
3136 	case GT_EXPR:
3137 	case GE_EXPR:
3138 	  std::swap (rhs1, rhs2);
3139 	  ccode = swap_tree_comparison (ccode);
3140 	  break;
3141 	case LT_EXPR:
3142 	case LE_EXPR:
3143 	  break;
3144 	default:
3145 	  gcc_unreachable ();
3146 	}
3147 
3148       int *idx = map->get (rhs1);
3149       if (idx == NULL)
3150 	continue;
3151 
3152       /* maybe_optimize_range_tests allows statements without side-effects
3153 	 in the basic blocks as long as they are consumed in the same bb.
3154 	 Make sure rhs2's def stmt is not among them, otherwise we can't
3155 	 use safely get_nonzero_bits on it.  E.g. in:
3156 	  # RANGE [-83, 1] NONZERO 173
3157 	  # k_32 = PHI <k_47(13), k_12(9)>
3158 	 ...
3159 	  if (k_32 >= 0)
3160 	    goto <bb 5>; [26.46%]
3161 	  else
3162 	    goto <bb 9>; [73.54%]
3163 
3164 	  <bb 5> [local count: 140323371]:
3165 	  # RANGE [0, 1] NONZERO 1
3166 	  _5 = (int) k_32;
3167 	  # RANGE [0, 4] NONZERO 4
3168 	  _21 = _5 << 2;
3169 	  # RANGE [0, 4] NONZERO 4
3170 	  iftmp.0_44 = (char) _21;
3171 	  if (k_32 < iftmp.0_44)
3172 	    goto <bb 6>; [84.48%]
3173 	  else
3174 	    goto <bb 9>; [15.52%]
3175 	 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3176 	 k_32 >= 0.  If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3177 	 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3178 	 those stmts even for negative k_32 and the value ranges would be no
3179 	 longer guaranteed and so the optimization would be invalid.  */
3180       if (opcode == ERROR_MARK)
3181 	{
3182 	  gimple *g = SSA_NAME_DEF_STMT (rhs2);
3183 	  basic_block bb2 = gimple_bb (g);
3184 	  if (bb2
3185 	      && bb2 != first_bb
3186 	      && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3187 	    {
3188 	      /* As an exception, handle a few common cases.  */
3189 	      if (gimple_assign_cast_p (g)
3190 		  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g)))
3191 		  && TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g)))
3192 		  && (TYPE_PRECISION (TREE_TYPE (rhs2))
3193 		      > TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (g)))))
3194 		/* Zero-extension is always ok.  */ ;
3195 	      else if (is_gimple_assign (g)
3196 		       && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3197 		       && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3198 		       && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3199 		/* Masking with INTEGER_CST with MSB clear is always ok
3200 		   too.  */ ;
3201 	      else
3202 		continue;
3203 	    }
3204 	}
3205 
3206       wide_int nz = get_nonzero_bits (rhs2);
3207       if (wi::neg_p (nz))
3208 	continue;
3209 
3210       /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3211 	 and RHS2 is known to be RHS2 >= 0.  */
3212       tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3213 
3214       enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3215       if ((ranges[*idx].strict_overflow_p
3216 	   || ranges[i].strict_overflow_p)
3217 	  && issue_strict_overflow_warning (wc))
3218 	warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3219 		    "assuming signed overflow does not occur "
3220 		    "when simplifying range test");
3221 
3222       if (dump_file && (dump_flags & TDF_DETAILS))
3223 	{
3224 	  struct range_entry *r = &ranges[*idx];
3225 	  fprintf (dump_file, "Optimizing range test ");
3226 	  print_generic_expr (dump_file, r->exp);
3227 	  fprintf (dump_file, " +[");
3228 	  print_generic_expr (dump_file, r->low);
3229 	  fprintf (dump_file, ", ");
3230 	  print_generic_expr (dump_file, r->high);
3231 	  fprintf (dump_file, "] and comparison ");
3232 	  print_generic_expr (dump_file, rhs1);
3233 	  fprintf (dump_file, " %s ", op_symbol_code (ccode));
3234 	  print_generic_expr (dump_file, rhs2);
3235 	  fprintf (dump_file, "\n into (");
3236 	  print_generic_expr (dump_file, utype);
3237 	  fprintf (dump_file, ") ");
3238 	  print_generic_expr (dump_file, rhs1);
3239 	  fprintf (dump_file, " %s (", op_symbol_code (ccode));
3240 	  print_generic_expr (dump_file, utype);
3241 	  fprintf (dump_file, ") ");
3242 	  print_generic_expr (dump_file, rhs2);
3243 	  fprintf (dump_file, "\n");
3244 	}
3245 
3246       operand_entry *oe = (*ops)[ranges[i].idx];
3247       ranges[i].in_p = 0;
3248       if (opcode == BIT_IOR_EXPR
3249 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3250 	{
3251 	  ranges[i].in_p = 1;
3252 	  ccode = invert_tree_comparison (ccode, false);
3253 	}
3254 
3255       unsigned int uid = gimple_uid (stmt);
3256       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3257       gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3258       gimple_set_uid (g, uid);
3259       rhs1 = gimple_assign_lhs (g);
3260       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3261       g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3262       gimple_set_uid (g, uid);
3263       rhs2 = gimple_assign_lhs (g);
3264       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3265       if (tree_swap_operands_p (rhs1, rhs2))
3266 	{
3267 	  std::swap (rhs1, rhs2);
3268 	  ccode = swap_tree_comparison (ccode);
3269 	}
3270       if (gimple_code (stmt) == GIMPLE_COND)
3271 	{
3272 	  gcond *c = as_a <gcond *> (stmt);
3273 	  gimple_cond_set_code (c, ccode);
3274 	  gimple_cond_set_lhs (c, rhs1);
3275 	  gimple_cond_set_rhs (c, rhs2);
3276 	  update_stmt (stmt);
3277 	}
3278       else
3279 	{
3280 	  tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3281 	  if (!INTEGRAL_TYPE_P (ctype)
3282 	      || (TREE_CODE (ctype) != BOOLEAN_TYPE
3283 		  && TYPE_PRECISION (ctype) != 1))
3284 	    ctype = boolean_type_node;
3285 	  g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3286 	  gimple_set_uid (g, uid);
3287 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3288 	  if (oe->op && ctype != TREE_TYPE (oe->op))
3289 	    {
3290 	      g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3291 				       NOP_EXPR, gimple_assign_lhs (g));
3292 	      gimple_set_uid (g, uid);
3293 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3294 	    }
3295 	  ranges[i].exp = gimple_assign_lhs (g);
3296 	  oe->op = ranges[i].exp;
3297 	  ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3298 	  ranges[i].high = ranges[i].low;
3299 	}
3300       ranges[i].strict_overflow_p = false;
3301       oe = (*ops)[ranges[*idx].idx];
3302       /* Now change all the other range test immediate uses, so that
3303 	 those tests will be optimized away.  */
3304       if (opcode == ERROR_MARK)
3305 	{
3306 	  if (oe->op)
3307 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
3308 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
3309 	  else
3310 	    oe->op = (oe->rank == BIT_IOR_EXPR
3311 		      ? boolean_false_node : boolean_true_node);
3312 	}
3313       else
3314 	oe->op = error_mark_node;
3315       ranges[*idx].exp = NULL_TREE;
3316       ranges[*idx].low = NULL_TREE;
3317       ranges[*idx].high = NULL_TREE;
3318       any_changes = true;
3319     }
3320 
3321   delete map;
3322   return any_changes;
3323 }
3324 
3325 /* Optimize range tests, similarly how fold_range_test optimizes
3326    it on trees.  The tree code for the binary
3327    operation between all the operands is OPCODE.
3328    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3329    maybe_optimize_range_tests for inter-bb range optimization.
3330    In that case if oe->op is NULL, oe->id is bb->index whose
3331    GIMPLE_COND is && or ||ed into the test, and oe->rank says
3332    the actual opcode.
3333    FIRST_BB is the first basic block if OPCODE is ERROR_MARK.  */
3334 
3335 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry * > * ops,basic_block first_bb)3336 optimize_range_tests (enum tree_code opcode,
3337 		      vec<operand_entry *> *ops, basic_block first_bb)
3338 {
3339   unsigned int length = ops->length (), i, j, first;
3340   operand_entry *oe;
3341   struct range_entry *ranges;
3342   bool any_changes = false;
3343 
3344   if (length == 1)
3345     return false;
3346 
3347   ranges = XNEWVEC (struct range_entry, length);
3348   for (i = 0; i < length; i++)
3349     {
3350       oe = (*ops)[i];
3351       ranges[i].idx = i;
3352       init_range_entry (ranges + i, oe->op,
3353 			oe->op
3354 			? NULL
3355 			: last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3356       /* For | invert it now, we will invert it again before emitting
3357 	 the optimized expression.  */
3358       if (opcode == BIT_IOR_EXPR
3359 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3360 	ranges[i].in_p = !ranges[i].in_p;
3361     }
3362 
3363   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3364   for (i = 0; i < length; i++)
3365     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3366       break;
3367 
3368   /* Try to merge ranges.  */
3369   for (first = i; i < length; i++)
3370     {
3371       tree low = ranges[i].low;
3372       tree high = ranges[i].high;
3373       int in_p = ranges[i].in_p;
3374       bool strict_overflow_p = ranges[i].strict_overflow_p;
3375       int update_fail_count = 0;
3376 
3377       for (j = i + 1; j < length; j++)
3378 	{
3379 	  if (ranges[i].exp != ranges[j].exp)
3380 	    break;
3381 	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3382 			     ranges[j].in_p, ranges[j].low, ranges[j].high))
3383 	    break;
3384 	  strict_overflow_p |= ranges[j].strict_overflow_p;
3385 	}
3386 
3387       if (j == i + 1)
3388 	continue;
3389 
3390       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3391 			     opcode, ops, ranges[i].exp, NULL, in_p,
3392 			     low, high, strict_overflow_p))
3393 	{
3394 	  i = j - 1;
3395 	  any_changes = true;
3396 	}
3397       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3398 	 while update_range_test would fail.  */
3399       else if (update_fail_count == 64)
3400 	i = j - 1;
3401       else
3402 	++update_fail_count;
3403     }
3404 
3405   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3406 					 ops, ranges);
3407 
3408   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3409     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3410 					   ops, ranges);
3411   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3412     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3413 						     ops, ranges);
3414   any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3415 						   ops, ranges);
3416   any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3417 						 ranges, first_bb);
3418 
3419   if (any_changes && opcode != ERROR_MARK)
3420     {
3421       j = 0;
3422       FOR_EACH_VEC_ELT (*ops, i, oe)
3423 	{
3424 	  if (oe->op == error_mark_node)
3425 	    continue;
3426 	  else if (i != j)
3427 	    (*ops)[j] = oe;
3428 	  j++;
3429 	}
3430       ops->truncate (j);
3431     }
3432 
3433   XDELETEVEC (ranges);
3434   return any_changes;
3435 }
3436 
3437 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3438    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
3439    otherwise the comparison code.  */
3440 
3441 static tree_code
ovce_extract_ops(tree var,gassign ** rets,bool * reti)3442 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3443 {
3444   if (TREE_CODE (var) != SSA_NAME)
3445     return ERROR_MARK;
3446 
3447   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3448   if (stmt == NULL)
3449     return ERROR_MARK;
3450 
3451   /* ??? If we start creating more COND_EXPR, we could perform
3452      this same optimization with them.	For now, simplify.  */
3453   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3454     return ERROR_MARK;
3455 
3456   tree cond = gimple_assign_rhs1 (stmt);
3457   tree_code cmp = TREE_CODE (cond);
3458   if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3459     return ERROR_MARK;
3460 
3461   /* ??? For now, allow only canonical true and false result vectors.
3462      We could expand this to other constants should the need arise,
3463      but at the moment we don't create them.  */
3464   tree t = gimple_assign_rhs2 (stmt);
3465   tree f = gimple_assign_rhs3 (stmt);
3466   bool inv;
3467   if (integer_all_onesp (t))
3468     inv = false;
3469   else if (integer_all_onesp (f))
3470     {
3471       cmp = invert_tree_comparison (cmp, false);
3472       inv = true;
3473     }
3474   else
3475     return ERROR_MARK;
3476   if (!integer_zerop (f))
3477     return ERROR_MARK;
3478 
3479   /* Success!  */
3480   if (rets)
3481     *rets = stmt;
3482   if (reti)
3483     *reti = inv;
3484   return cmp;
3485 }
3486 
3487 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3488    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
3489 
3490 static bool
optimize_vec_cond_expr(tree_code opcode,vec<operand_entry * > * ops)3491 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3492 {
3493   unsigned int length = ops->length (), i, j;
3494   bool any_changes = false;
3495 
3496   if (length == 1)
3497     return false;
3498 
3499   for (i = 0; i < length; ++i)
3500     {
3501       tree elt0 = (*ops)[i]->op;
3502 
3503       gassign *stmt0;
3504       bool invert;
3505       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3506       if (cmp0 == ERROR_MARK)
3507 	continue;
3508 
3509       for (j = i + 1; j < length; ++j)
3510 	{
3511 	  tree &elt1 = (*ops)[j]->op;
3512 
3513 	  gassign *stmt1;
3514 	  tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3515 	  if (cmp1 == ERROR_MARK)
3516 	    continue;
3517 
3518 	  tree cond0 = gimple_assign_rhs1 (stmt0);
3519 	  tree x0 = TREE_OPERAND (cond0, 0);
3520 	  tree y0 = TREE_OPERAND (cond0, 1);
3521 
3522 	  tree cond1 = gimple_assign_rhs1 (stmt1);
3523 	  tree x1 = TREE_OPERAND (cond1, 0);
3524 	  tree y1 = TREE_OPERAND (cond1, 1);
3525 
3526 	  tree comb;
3527 	  if (opcode == BIT_AND_EXPR)
3528 	    comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3529 	  else if (opcode == BIT_IOR_EXPR)
3530 	    comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3531 	  else
3532 	    gcc_unreachable ();
3533 	  if (comb == NULL)
3534 	    continue;
3535 
3536 	  /* Success! */
3537 	  if (dump_file && (dump_flags & TDF_DETAILS))
3538 	    {
3539 	      fprintf (dump_file, "Transforming ");
3540 	      print_generic_expr (dump_file, cond0);
3541 	      fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3542 	      print_generic_expr (dump_file, cond1);
3543 	      fprintf (dump_file, " into ");
3544 	      print_generic_expr (dump_file, comb);
3545 	      fputc ('\n', dump_file);
3546 	    }
3547 
3548 	  gimple_assign_set_rhs1 (stmt0, comb);
3549 	  if (invert)
3550 	    std::swap (*gimple_assign_rhs2_ptr (stmt0),
3551 		       *gimple_assign_rhs3_ptr (stmt0));
3552 	  update_stmt (stmt0);
3553 
3554 	  elt1 = error_mark_node;
3555 	  any_changes = true;
3556 	}
3557     }
3558 
3559   if (any_changes)
3560     {
3561       operand_entry *oe;
3562       j = 0;
3563       FOR_EACH_VEC_ELT (*ops, i, oe)
3564 	{
3565 	  if (oe->op == error_mark_node)
3566 	    continue;
3567 	  else if (i != j)
3568 	    (*ops)[j] = oe;
3569 	  j++;
3570 	}
3571       ops->truncate (j);
3572     }
3573 
3574   return any_changes;
3575 }
3576 
3577 /* Return true if STMT is a cast like:
3578    <bb N>:
3579    ...
3580    _123 = (int) _234;
3581 
3582    <bb M>:
3583    # _345 = PHI <_123(N), 1(...), 1(...)>
3584    where _234 has bool type, _123 has single use and
3585    bb N has a single successor M.  This is commonly used in
3586    the last block of a range test.
3587 
3588    Also Return true if STMT is tcc_compare like:
3589    <bb N>:
3590    ...
3591    _234 = a_2(D) == 2;
3592 
3593    <bb M>:
3594    # _345 = PHI <_234(N), 1(...), 1(...)>
3595    _346 = (int) _345;
3596    where _234 has booltype, single use and
3597    bb N has a single successor M.  This is commonly used in
3598    the last block of a range test.  */
3599 
3600 static bool
final_range_test_p(gimple * stmt)3601 final_range_test_p (gimple *stmt)
3602 {
3603   basic_block bb, rhs_bb, lhs_bb;
3604   edge e;
3605   tree lhs, rhs;
3606   use_operand_p use_p;
3607   gimple *use_stmt;
3608 
3609   if (!gimple_assign_cast_p (stmt)
3610       && (!is_gimple_assign (stmt)
3611 	  || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3612 	      != tcc_comparison)))
3613     return false;
3614   bb = gimple_bb (stmt);
3615   if (!single_succ_p (bb))
3616     return false;
3617   e = single_succ_edge (bb);
3618   if (e->flags & EDGE_COMPLEX)
3619     return false;
3620 
3621   lhs = gimple_assign_lhs (stmt);
3622   rhs = gimple_assign_rhs1 (stmt);
3623   if (gimple_assign_cast_p (stmt)
3624       && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3625 	  || TREE_CODE (rhs) != SSA_NAME
3626 	  || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3627     return false;
3628 
3629   if (!gimple_assign_cast_p (stmt)
3630       && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3631       return false;
3632 
3633   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
3634   if (!single_imm_use (lhs, &use_p, &use_stmt))
3635     return false;
3636 
3637   if (gimple_code (use_stmt) != GIMPLE_PHI
3638       || gimple_bb (use_stmt) != e->dest)
3639     return false;
3640 
3641   /* And that the rhs is defined in the same loop.  */
3642   if (gimple_assign_cast_p (stmt))
3643     {
3644       if (TREE_CODE (rhs) != SSA_NAME
3645 	  || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3646 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3647 	return false;
3648     }
3649   else
3650     {
3651       if (TREE_CODE (lhs) != SSA_NAME
3652 	  || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3653 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3654 	return false;
3655     }
3656 
3657   return true;
3658 }
3659 
3660 /* Return true if BB is suitable basic block for inter-bb range test
3661    optimization.  If BACKWARD is true, BB should be the only predecessor
3662    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3663    or compared with to find a common basic block to which all conditions
3664    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
3665    be the only predecessor of BB.  */
3666 
3667 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool backward)3668 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3669 		  bool backward)
3670 {
3671   edge_iterator ei, ei2;
3672   edge e, e2;
3673   gimple *stmt;
3674   gphi_iterator gsi;
3675   bool other_edge_seen = false;
3676   bool is_cond;
3677 
3678   if (test_bb == bb)
3679     return false;
3680   /* Check last stmt first.  */
3681   stmt = last_stmt (bb);
3682   if (stmt == NULL
3683       || (gimple_code (stmt) != GIMPLE_COND
3684 	  && (backward || !final_range_test_p (stmt)))
3685       || gimple_visited_p (stmt)
3686       || stmt_could_throw_p (stmt)
3687       || *other_bb == bb)
3688     return false;
3689   is_cond = gimple_code (stmt) == GIMPLE_COND;
3690   if (is_cond)
3691     {
3692       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3693 	 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3694 	 to *OTHER_BB (if not set yet, try to find it out).  */
3695       if (EDGE_COUNT (bb->succs) != 2)
3696 	return false;
3697       FOR_EACH_EDGE (e, ei, bb->succs)
3698 	{
3699 	  if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3700 	    return false;
3701 	  if (e->dest == test_bb)
3702 	    {
3703 	      if (backward)
3704 		continue;
3705 	      else
3706 		return false;
3707 	    }
3708 	  if (e->dest == bb)
3709 	    return false;
3710 	  if (*other_bb == NULL)
3711 	    {
3712 	      FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3713 		if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3714 		  return false;
3715 		else if (e->dest == e2->dest)
3716 		  *other_bb = e->dest;
3717 	      if (*other_bb == NULL)
3718 		return false;
3719 	    }
3720 	  if (e->dest == *other_bb)
3721 	    other_edge_seen = true;
3722 	  else if (backward)
3723 	    return false;
3724 	}
3725       if (*other_bb == NULL || !other_edge_seen)
3726 	return false;
3727     }
3728   else if (single_succ (bb) != *other_bb)
3729     return false;
3730 
3731   /* Now check all PHIs of *OTHER_BB.  */
3732   e = find_edge (bb, *other_bb);
3733   e2 = find_edge (test_bb, *other_bb);
3734   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3735     {
3736       gphi *phi = gsi.phi ();
3737       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3738 	 corresponding to BB and TEST_BB predecessor must be the same.  */
3739       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3740 			    gimple_phi_arg_def (phi, e2->dest_idx), 0))
3741 	{
3742 	  /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3743 	     one of the PHIs should have the lhs of the last stmt in
3744 	     that block as PHI arg and that PHI should have 0 or 1
3745 	     corresponding to it in all other range test basic blocks
3746 	     considered.  */
3747 	  if (!is_cond)
3748 	    {
3749 	      if (gimple_phi_arg_def (phi, e->dest_idx)
3750 		  == gimple_assign_lhs (stmt)
3751 		  && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3752 		      || integer_onep (gimple_phi_arg_def (phi,
3753 							   e2->dest_idx))))
3754 		continue;
3755 	    }
3756 	  else
3757 	    {
3758 	      gimple *test_last = last_stmt (test_bb);
3759 	      if (gimple_code (test_last) != GIMPLE_COND
3760 		  && gimple_phi_arg_def (phi, e2->dest_idx)
3761 		     == gimple_assign_lhs (test_last)
3762 		  && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3763 		      || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3764 		continue;
3765 	    }
3766 
3767 	  return false;
3768 	}
3769     }
3770   return true;
3771 }
3772 
3773 /* Return true if BB doesn't have side-effects that would disallow
3774    range test optimization, all SSA_NAMEs set in the bb are consumed
3775    in the bb and there are no PHIs.  */
3776 
3777 static bool
no_side_effect_bb(basic_block bb)3778 no_side_effect_bb (basic_block bb)
3779 {
3780   gimple_stmt_iterator gsi;
3781   gimple *last;
3782 
3783   if (!gimple_seq_empty_p (phi_nodes (bb)))
3784     return false;
3785   last = last_stmt (bb);
3786   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3787     {
3788       gimple *stmt = gsi_stmt (gsi);
3789       tree lhs;
3790       imm_use_iterator imm_iter;
3791       use_operand_p use_p;
3792 
3793       if (is_gimple_debug (stmt))
3794 	continue;
3795       if (gimple_has_side_effects (stmt))
3796 	return false;
3797       if (stmt == last)
3798 	return true;
3799       if (!is_gimple_assign (stmt))
3800 	return false;
3801       lhs = gimple_assign_lhs (stmt);
3802       if (TREE_CODE (lhs) != SSA_NAME)
3803 	return false;
3804       if (gimple_assign_rhs_could_trap_p (stmt))
3805 	return false;
3806       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3807 	{
3808 	  gimple *use_stmt = USE_STMT (use_p);
3809 	  if (is_gimple_debug (use_stmt))
3810 	    continue;
3811 	  if (gimple_bb (use_stmt) != bb)
3812 	    return false;
3813 	}
3814     }
3815   return false;
3816 }
3817 
3818 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3819    return true and fill in *OPS recursively.  */
3820 
3821 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry * > * ops,struct loop * loop)3822 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3823 	 struct loop *loop)
3824 {
3825   gimple *stmt = SSA_NAME_DEF_STMT (var);
3826   tree rhs[2];
3827   int i;
3828 
3829   if (!is_reassociable_op (stmt, code, loop))
3830     return false;
3831 
3832   rhs[0] = gimple_assign_rhs1 (stmt);
3833   rhs[1] = gimple_assign_rhs2 (stmt);
3834   gimple_set_visited (stmt, true);
3835   for (i = 0; i < 2; i++)
3836     if (TREE_CODE (rhs[i]) == SSA_NAME
3837 	&& !get_ops (rhs[i], code, ops, loop)
3838 	&& has_single_use (rhs[i]))
3839       {
3840 	operand_entry *oe = operand_entry_pool.allocate ();
3841 
3842 	oe->op = rhs[i];
3843 	oe->rank = code;
3844 	oe->id = 0;
3845 	oe->count = 1;
3846 	oe->stmt_to_insert = NULL;
3847 	ops->safe_push (oe);
3848       }
3849   return true;
3850 }
3851 
3852 /* Find the ops that were added by get_ops starting from VAR, see if
3853    they were changed during update_range_test and if yes, create new
3854    stmts.  */
3855 
3856 static tree
update_ops(tree var,enum tree_code code,vec<operand_entry * > ops,unsigned int * pidx,struct loop * loop)3857 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3858 	    unsigned int *pidx, struct loop *loop)
3859 {
3860   gimple *stmt = SSA_NAME_DEF_STMT (var);
3861   tree rhs[4];
3862   int i;
3863 
3864   if (!is_reassociable_op (stmt, code, loop))
3865     return NULL;
3866 
3867   rhs[0] = gimple_assign_rhs1 (stmt);
3868   rhs[1] = gimple_assign_rhs2 (stmt);
3869   rhs[2] = rhs[0];
3870   rhs[3] = rhs[1];
3871   for (i = 0; i < 2; i++)
3872     if (TREE_CODE (rhs[i]) == SSA_NAME)
3873       {
3874 	rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3875 	if (rhs[2 + i] == NULL_TREE)
3876 	  {
3877 	    if (has_single_use (rhs[i]))
3878 	      rhs[2 + i] = ops[(*pidx)++]->op;
3879 	    else
3880 	      rhs[2 + i] = rhs[i];
3881 	  }
3882       }
3883   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3884       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3885     {
3886       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3887       var = make_ssa_name (TREE_TYPE (var));
3888       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3889 					rhs[2], rhs[3]);
3890       gimple_set_uid (g, gimple_uid (stmt));
3891       gimple_set_visited (g, true);
3892       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3893     }
3894   return var;
3895 }
3896 
3897 /* Structure to track the initial value passed to get_ops and
3898    the range in the ops vector for each basic block.  */
3899 
3900 struct inter_bb_range_test_entry
3901 {
3902   tree op;
3903   unsigned int first_idx, last_idx;
3904 };
3905 
3906 /* Inter-bb range test optimization.
3907 
3908    Returns TRUE if a gimple conditional is optimized to a true/false,
3909    otherwise return FALSE.
3910 
3911    This indicates to the caller that it should run a CFG cleanup pass
3912    once reassociation is completed.  */
3913 
3914 static bool
maybe_optimize_range_tests(gimple * stmt)3915 maybe_optimize_range_tests (gimple *stmt)
3916 {
3917   basic_block first_bb = gimple_bb (stmt);
3918   basic_block last_bb = first_bb;
3919   basic_block other_bb = NULL;
3920   basic_block bb;
3921   edge_iterator ei;
3922   edge e;
3923   auto_vec<operand_entry *> ops;
3924   auto_vec<inter_bb_range_test_entry> bbinfo;
3925   bool any_changes = false;
3926   bool cfg_cleanup_needed = false;
3927 
3928   /* Consider only basic blocks that end with GIMPLE_COND or
3929      a cast statement satisfying final_range_test_p.  All
3930      but the last bb in the first_bb .. last_bb range
3931      should end with GIMPLE_COND.  */
3932   if (gimple_code (stmt) == GIMPLE_COND)
3933     {
3934       if (EDGE_COUNT (first_bb->succs) != 2)
3935 	return cfg_cleanup_needed;
3936     }
3937   else if (final_range_test_p (stmt))
3938     other_bb = single_succ (first_bb);
3939   else
3940     return cfg_cleanup_needed;
3941 
3942   if (stmt_could_throw_p (stmt))
3943     return cfg_cleanup_needed;
3944 
3945   /* As relative ordering of post-dominator sons isn't fixed,
3946      maybe_optimize_range_tests can be called first on any
3947      bb in the range we want to optimize.  So, start searching
3948      backwards, if first_bb can be set to a predecessor.  */
3949   while (single_pred_p (first_bb))
3950     {
3951       basic_block pred_bb = single_pred (first_bb);
3952       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3953 	break;
3954       if (!no_side_effect_bb (first_bb))
3955 	break;
3956       first_bb = pred_bb;
3957     }
3958   /* If first_bb is last_bb, other_bb hasn't been computed yet.
3959      Before starting forward search in last_bb successors, find
3960      out the other_bb.  */
3961   if (first_bb == last_bb)
3962     {
3963       other_bb = NULL;
3964       /* As non-GIMPLE_COND last stmt always terminates the range,
3965 	 if forward search didn't discover anything, just give up.  */
3966       if (gimple_code (stmt) != GIMPLE_COND)
3967 	return cfg_cleanup_needed;
3968       /* Look at both successors.  Either it ends with a GIMPLE_COND
3969 	 and satisfies suitable_cond_bb, or ends with a cast and
3970 	 other_bb is that cast's successor.  */
3971       FOR_EACH_EDGE (e, ei, first_bb->succs)
3972 	if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3973 	    || e->dest == first_bb)
3974 	  return cfg_cleanup_needed;
3975 	else if (single_pred_p (e->dest))
3976 	  {
3977 	    stmt = last_stmt (e->dest);
3978 	    if (stmt
3979 		&& gimple_code (stmt) == GIMPLE_COND
3980 		&& EDGE_COUNT (e->dest->succs) == 2)
3981 	      {
3982 		if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3983 		  break;
3984 		else
3985 		  other_bb = NULL;
3986 	      }
3987 	    else if (stmt
3988 		     && final_range_test_p (stmt)
3989 		     && find_edge (first_bb, single_succ (e->dest)))
3990 	      {
3991 		other_bb = single_succ (e->dest);
3992 		if (other_bb == first_bb)
3993 		  other_bb = NULL;
3994 	      }
3995 	  }
3996       if (other_bb == NULL)
3997 	return cfg_cleanup_needed;
3998     }
3999   /* Now do the forward search, moving last_bb to successor bbs
4000      that aren't other_bb.  */
4001   while (EDGE_COUNT (last_bb->succs) == 2)
4002     {
4003       FOR_EACH_EDGE (e, ei, last_bb->succs)
4004 	if (e->dest != other_bb)
4005 	  break;
4006       if (e == NULL)
4007 	break;
4008       if (!single_pred_p (e->dest))
4009 	break;
4010       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4011 	break;
4012       if (!no_side_effect_bb (e->dest))
4013 	break;
4014       last_bb = e->dest;
4015     }
4016   if (first_bb == last_bb)
4017     return cfg_cleanup_needed;
4018   /* Here basic blocks first_bb through last_bb's predecessor
4019      end with GIMPLE_COND, all of them have one of the edges to
4020      other_bb and another to another block in the range,
4021      all blocks except first_bb don't have side-effects and
4022      last_bb ends with either GIMPLE_COND, or cast satisfying
4023      final_range_test_p.  */
4024   for (bb = last_bb; ; bb = single_pred (bb))
4025     {
4026       enum tree_code code;
4027       tree lhs, rhs;
4028       inter_bb_range_test_entry bb_ent;
4029 
4030       bb_ent.op = NULL_TREE;
4031       bb_ent.first_idx = ops.length ();
4032       bb_ent.last_idx = bb_ent.first_idx;
4033       e = find_edge (bb, other_bb);
4034       stmt = last_stmt (bb);
4035       gimple_set_visited (stmt, true);
4036       if (gimple_code (stmt) != GIMPLE_COND)
4037 	{
4038 	  use_operand_p use_p;
4039 	  gimple *phi;
4040 	  edge e2;
4041 	  unsigned int d;
4042 
4043 	  lhs = gimple_assign_lhs (stmt);
4044 	  rhs = gimple_assign_rhs1 (stmt);
4045 	  gcc_assert (bb == last_bb);
4046 
4047 	  /* stmt is
4048 	     _123 = (int) _234;
4049 	     OR
4050 	     _234 = a_2(D) == 2;
4051 
4052 	     followed by:
4053 	     <bb M>:
4054 	     # _345 = PHI <_123(N), 1(...), 1(...)>
4055 
4056 	     or 0 instead of 1.  If it is 0, the _234
4057 	     range test is anded together with all the
4058 	     other range tests, if it is 1, it is ored with
4059 	     them.  */
4060 	  single_imm_use (lhs, &use_p, &phi);
4061 	  gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4062 	  e2 = find_edge (first_bb, other_bb);
4063 	  d = e2->dest_idx;
4064 	  gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4065 	  if (integer_zerop (gimple_phi_arg_def (phi, d)))
4066 	    code = BIT_AND_EXPR;
4067 	  else
4068 	    {
4069 	      gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4070 	      code = BIT_IOR_EXPR;
4071 	    }
4072 
4073 	  /* If _234 SSA_NAME_DEF_STMT is
4074 	     _234 = _567 | _789;
4075 	     (or &, corresponding to 1/0 in the phi arguments,
4076 	     push into ops the individual range test arguments
4077 	     of the bitwise or resp. and, recursively.  */
4078 	  if (TREE_CODE (rhs) == SSA_NAME
4079 	      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4080 		  != tcc_comparison)
4081 	      && !get_ops (rhs, code, &ops,
4082 			loop_containing_stmt (stmt))
4083 	      && has_single_use (rhs))
4084 	    {
4085 	      /* Otherwise, push the _234 range test itself.  */
4086 	      operand_entry *oe = operand_entry_pool.allocate ();
4087 
4088 	      oe->op = rhs;
4089 	      oe->rank = code;
4090 	      oe->id = 0;
4091 	      oe->count = 1;
4092 	      oe->stmt_to_insert = NULL;
4093 	      ops.safe_push (oe);
4094 	      bb_ent.last_idx++;
4095 	      bb_ent.op = rhs;
4096 	    }
4097 	  else if (is_gimple_assign (stmt)
4098 		   && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4099 		       == tcc_comparison)
4100 		   && !get_ops (lhs, code, &ops,
4101 				loop_containing_stmt (stmt))
4102 		   && has_single_use (lhs))
4103 	    {
4104 	      operand_entry *oe = operand_entry_pool.allocate ();
4105 	      oe->op = lhs;
4106 	      oe->rank = code;
4107 	      oe->id = 0;
4108 	      oe->count = 1;
4109 	      ops.safe_push (oe);
4110 	      bb_ent.last_idx++;
4111 	      bb_ent.op = lhs;
4112 	    }
4113 	  else
4114 	    {
4115 	      bb_ent.last_idx = ops.length ();
4116 	      bb_ent.op = rhs;
4117 	    }
4118 	  bbinfo.safe_push (bb_ent);
4119 	  continue;
4120 	}
4121       /* Otherwise stmt is GIMPLE_COND.  */
4122       code = gimple_cond_code (stmt);
4123       lhs = gimple_cond_lhs (stmt);
4124       rhs = gimple_cond_rhs (stmt);
4125       if (TREE_CODE (lhs) == SSA_NAME
4126 	  && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4127 	  && ((code != EQ_EXPR && code != NE_EXPR)
4128 	      || rhs != boolean_false_node
4129 		 /* Either push into ops the individual bitwise
4130 		    or resp. and operands, depending on which
4131 		    edge is other_bb.  */
4132 	      || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4133 				 ^ (code == EQ_EXPR))
4134 				? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4135 			   loop_containing_stmt (stmt))))
4136 	{
4137 	  /* Or push the GIMPLE_COND stmt itself.  */
4138 	  operand_entry *oe = operand_entry_pool.allocate ();
4139 
4140 	  oe->op = NULL;
4141 	  oe->rank = (e->flags & EDGE_TRUE_VALUE)
4142 		     ? BIT_IOR_EXPR : BIT_AND_EXPR;
4143 	  /* oe->op = NULL signs that there is no SSA_NAME
4144 	     for the range test, and oe->id instead is the
4145 	     basic block number, at which's end the GIMPLE_COND
4146 	     is.  */
4147 	  oe->id = bb->index;
4148 	  oe->count = 1;
4149 	  oe->stmt_to_insert = NULL;
4150 	  ops.safe_push (oe);
4151 	  bb_ent.op = NULL;
4152 	  bb_ent.last_idx++;
4153 	}
4154       else if (ops.length () > bb_ent.first_idx)
4155 	{
4156 	  bb_ent.op = lhs;
4157 	  bb_ent.last_idx = ops.length ();
4158 	}
4159       bbinfo.safe_push (bb_ent);
4160       if (bb == first_bb)
4161 	break;
4162     }
4163   if (ops.length () > 1)
4164     any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4165   if (any_changes)
4166     {
4167       unsigned int idx, max_idx = 0;
4168       /* update_ops relies on has_single_use predicates returning the
4169 	 same values as it did during get_ops earlier.  Additionally it
4170 	 never removes statements, only adds new ones and it should walk
4171 	 from the single imm use and check the predicate already before
4172 	 making those changes.
4173 	 On the other side, the handling of GIMPLE_COND directly can turn
4174 	 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4175 	 it needs to be done in a separate loop afterwards.  */
4176       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4177 	{
4178 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4179 	      && bbinfo[idx].op != NULL_TREE)
4180 	    {
4181 	      tree new_op;
4182 
4183 	      max_idx = idx;
4184 	      stmt = last_stmt (bb);
4185 	      new_op = update_ops (bbinfo[idx].op,
4186 				   (enum tree_code)
4187 				   ops[bbinfo[idx].first_idx]->rank,
4188 				   ops, &bbinfo[idx].first_idx,
4189 				   loop_containing_stmt (stmt));
4190 	      if (new_op == NULL_TREE)
4191 		{
4192 		  gcc_assert (bb == last_bb);
4193 		  new_op = ops[bbinfo[idx].first_idx++]->op;
4194 		}
4195 	      if (bbinfo[idx].op != new_op)
4196 		{
4197 		  imm_use_iterator iter;
4198 		  use_operand_p use_p;
4199 		  gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4200 
4201 		  FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4202 		    if (is_gimple_debug (use_stmt))
4203 		      continue;
4204 		    else if (gimple_code (use_stmt) == GIMPLE_COND
4205 			     || gimple_code (use_stmt) == GIMPLE_PHI)
4206 		      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4207 			SET_USE (use_p, new_op);
4208 		    else if ((is_gimple_assign (use_stmt)
4209 			      && (TREE_CODE_CLASS
4210 				  (gimple_assign_rhs_code (use_stmt))
4211 				  == tcc_comparison)))
4212 		      cast_or_tcc_cmp_stmt = use_stmt;
4213 		    else if (gimple_assign_cast_p (use_stmt))
4214 		      cast_or_tcc_cmp_stmt = use_stmt;
4215 		    else
4216 		      gcc_unreachable ();
4217 
4218 		  if (cast_or_tcc_cmp_stmt)
4219 		    {
4220 		      gcc_assert (bb == last_bb);
4221 		      tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4222 		      tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4223 		      enum tree_code rhs_code
4224 			= gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4225 			? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4226 			: CONVERT_EXPR;
4227 		      gassign *g;
4228 		      if (is_gimple_min_invariant (new_op))
4229 			{
4230 			  new_op = fold_convert (TREE_TYPE (lhs), new_op);
4231 			  g = gimple_build_assign (new_lhs, new_op);
4232 			}
4233 		      else
4234 			g = gimple_build_assign (new_lhs, rhs_code, new_op);
4235 		      gimple_stmt_iterator gsi
4236 			= gsi_for_stmt (cast_or_tcc_cmp_stmt);
4237 		      gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4238 		      gimple_set_visited (g, true);
4239 		      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4240 		      FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4241 			if (is_gimple_debug (use_stmt))
4242 			  continue;
4243 			else if (gimple_code (use_stmt) == GIMPLE_COND
4244 				 || gimple_code (use_stmt) == GIMPLE_PHI)
4245 			  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4246 			    SET_USE (use_p, new_lhs);
4247 			else
4248 			  gcc_unreachable ();
4249 		    }
4250 		}
4251 	    }
4252 	  if (bb == first_bb)
4253 	    break;
4254 	}
4255       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4256 	{
4257 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4258 	      && bbinfo[idx].op == NULL_TREE
4259 	      && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4260 	    {
4261 	      gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4262 
4263 	      if (idx > max_idx)
4264 		max_idx = idx;
4265 
4266 	      /* If we collapse the conditional to a true/false
4267 		 condition, then bubble that knowledge up to our caller.  */
4268 	      if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4269 		{
4270 		  gimple_cond_make_false (cond_stmt);
4271 		  cfg_cleanup_needed = true;
4272 		}
4273 	      else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4274 		{
4275 		  gimple_cond_make_true (cond_stmt);
4276 		  cfg_cleanup_needed = true;
4277 		}
4278 	      else
4279 		{
4280 		  gimple_cond_set_code (cond_stmt, NE_EXPR);
4281 		  gimple_cond_set_lhs (cond_stmt,
4282 				       ops[bbinfo[idx].first_idx]->op);
4283 		  gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4284 		}
4285 	      update_stmt (cond_stmt);
4286 	    }
4287 	  if (bb == first_bb)
4288 	    break;
4289 	}
4290 
4291       /* The above changes could result in basic blocks after the first
4292 	 modified one, up to and including last_bb, to be executed even if
4293 	 they would not be in the original program.  If the value ranges of
4294 	 assignment lhs' in those bbs were dependent on the conditions
4295 	 guarding those basic blocks which now can change, the VRs might
4296 	 be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
4297 	 are only used within the same bb, it should be not a big deal if
4298 	 we just reset all the VRs in those bbs.  See PR68671.  */
4299       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4300 	reset_flow_sensitive_info_in_bb (bb);
4301     }
4302   return cfg_cleanup_needed;
4303 }
4304 
4305 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4306    of STMT in it's operands.  This is also known as a "destructive
4307    update" operation.  */
4308 
4309 static bool
is_phi_for_stmt(gimple * stmt,tree operand)4310 is_phi_for_stmt (gimple *stmt, tree operand)
4311 {
4312   gimple *def_stmt;
4313   gphi *def_phi;
4314   tree lhs;
4315   use_operand_p arg_p;
4316   ssa_op_iter i;
4317 
4318   if (TREE_CODE (operand) != SSA_NAME)
4319     return false;
4320 
4321   lhs = gimple_assign_lhs (stmt);
4322 
4323   def_stmt = SSA_NAME_DEF_STMT (operand);
4324   def_phi = dyn_cast <gphi *> (def_stmt);
4325   if (!def_phi)
4326     return false;
4327 
4328   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4329     if (lhs == USE_FROM_PTR (arg_p))
4330       return true;
4331   return false;
4332 }
4333 
4334 /* Remove def stmt of VAR if VAR has zero uses and recurse
4335    on rhs1 operand if so.  */
4336 
4337 static void
remove_visited_stmt_chain(tree var)4338 remove_visited_stmt_chain (tree var)
4339 {
4340   gimple *stmt;
4341   gimple_stmt_iterator gsi;
4342 
4343   while (1)
4344     {
4345       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4346 	return;
4347       stmt = SSA_NAME_DEF_STMT (var);
4348       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4349 	{
4350 	  var = gimple_assign_rhs1 (stmt);
4351 	  gsi = gsi_for_stmt (stmt);
4352 	  reassoc_remove_stmt (&gsi);
4353 	  release_defs (stmt);
4354 	}
4355       else
4356 	return;
4357     }
4358 }
4359 
4360 /* This function checks three consequtive operands in
4361    passed operands vector OPS starting from OPINDEX and
4362    swaps two operands if it is profitable for binary operation
4363    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4364 
4365    We pair ops with the same rank if possible.
4366 
4367    The alternative we try is to see if STMT is a destructive
4368    update style statement, which is like:
4369    b = phi (a, ...)
4370    a = c + b;
4371    In that case, we want to use the destructive update form to
4372    expose the possible vectorizer sum reduction opportunity.
4373    In that case, the third operand will be the phi node. This
4374    check is not performed if STMT is null.
4375 
4376    We could, of course, try to be better as noted above, and do a
4377    lot of work to try to find these opportunities in >3 operand
4378    cases, but it is unlikely to be worth it.  */
4379 
4380 static void
swap_ops_for_binary_stmt(vec<operand_entry * > ops,unsigned int opindex,gimple * stmt)4381 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4382 			  unsigned int opindex, gimple *stmt)
4383 {
4384   operand_entry *oe1, *oe2, *oe3;
4385 
4386   oe1 = ops[opindex];
4387   oe2 = ops[opindex + 1];
4388   oe3 = ops[opindex + 2];
4389 
4390   if ((oe1->rank == oe2->rank
4391        && oe2->rank != oe3->rank)
4392       || (stmt && is_phi_for_stmt (stmt, oe3->op)
4393 	  && !is_phi_for_stmt (stmt, oe1->op)
4394 	  && !is_phi_for_stmt (stmt, oe2->op)))
4395     std::swap (*oe1, *oe3);
4396   else if ((oe1->rank == oe3->rank
4397 	    && oe2->rank != oe3->rank)
4398 	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
4399 	       && !is_phi_for_stmt (stmt, oe1->op)
4400 	       && !is_phi_for_stmt (stmt, oe3->op)))
4401     std::swap (*oe1, *oe2);
4402 }
4403 
4404 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4405    two definitions, otherwise return STMT.  */
4406 
4407 static inline gimple *
find_insert_point(gimple * stmt,tree rhs1,tree rhs2)4408 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4409 {
4410   if (TREE_CODE (rhs1) == SSA_NAME
4411       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4412     stmt = SSA_NAME_DEF_STMT (rhs1);
4413   if (TREE_CODE (rhs2) == SSA_NAME
4414       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4415     stmt = SSA_NAME_DEF_STMT (rhs2);
4416   return stmt;
4417 }
4418 
4419 /* If the stmt that defines operand has to be inserted, insert it
4420    before the use.  */
4421 static void
insert_stmt_before_use(gimple * stmt,gimple * stmt_to_insert)4422 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4423 {
4424   gcc_assert (is_gimple_assign (stmt_to_insert));
4425   tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4426   tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4427   gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4428   gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4429   gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4430 
4431   /* If the insert point is not stmt, then insert_point would be
4432      the point where operand rhs1 or rhs2 is defined. In this case,
4433      stmt_to_insert has to be inserted afterwards. This would
4434      only happen when the stmt insertion point is flexible. */
4435   if (stmt == insert_point)
4436     gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4437   else
4438     insert_stmt_after (stmt_to_insert, insert_point);
4439 }
4440 
4441 
4442 /* Recursively rewrite our linearized statements so that the operators
4443    match those in OPS[OPINDEX], putting the computation in rank
4444    order.  Return new lhs.
4445    CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4446    the current stmt and during recursive invocations.
4447    NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4448    recursive invocations.  */
4449 
4450 static tree
rewrite_expr_tree(gimple * stmt,unsigned int opindex,vec<operand_entry * > ops,bool changed,bool next_changed)4451 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4452 		   vec<operand_entry *> ops, bool changed, bool next_changed)
4453 {
4454   tree rhs1 = gimple_assign_rhs1 (stmt);
4455   tree rhs2 = gimple_assign_rhs2 (stmt);
4456   tree lhs = gimple_assign_lhs (stmt);
4457   operand_entry *oe;
4458 
4459   /* The final recursion case for this function is that you have
4460      exactly two operations left.
4461      If we had exactly one op in the entire list to start with, we
4462      would have never called this function, and the tail recursion
4463      rewrites them one at a time.  */
4464   if (opindex + 2 == ops.length ())
4465     {
4466       operand_entry *oe1, *oe2;
4467 
4468       oe1 = ops[opindex];
4469       oe2 = ops[opindex + 1];
4470 
4471       if (rhs1 != oe1->op || rhs2 != oe2->op)
4472 	{
4473 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4474 	  unsigned int uid = gimple_uid (stmt);
4475 
4476 	  if (dump_file && (dump_flags & TDF_DETAILS))
4477 	    {
4478 	      fprintf (dump_file, "Transforming ");
4479 	      print_gimple_stmt (dump_file, stmt, 0);
4480 	    }
4481 
4482 	  /* If the stmt that defines operand has to be inserted, insert it
4483 	     before the use.  */
4484 	  if (oe1->stmt_to_insert)
4485 	    insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4486 	  if (oe2->stmt_to_insert)
4487 	    insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4488 	  /* Even when changed is false, reassociation could have e.g. removed
4489 	     some redundant operations, so unless we are just swapping the
4490 	     arguments or unless there is no change at all (then we just
4491 	     return lhs), force creation of a new SSA_NAME.  */
4492 	  if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4493 	    {
4494 	      gimple *insert_point
4495 		= find_insert_point (stmt, oe1->op, oe2->op);
4496 	      lhs = make_ssa_name (TREE_TYPE (lhs));
4497 	      stmt
4498 		= gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4499 				       oe1->op, oe2->op);
4500 	      gimple_set_uid (stmt, uid);
4501 	      gimple_set_visited (stmt, true);
4502 	      if (insert_point == gsi_stmt (gsi))
4503 		gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4504 	      else
4505 		insert_stmt_after (stmt, insert_point);
4506 	    }
4507 	  else
4508 	    {
4509 	      gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4510 				   == stmt);
4511 	      gimple_assign_set_rhs1 (stmt, oe1->op);
4512 	      gimple_assign_set_rhs2 (stmt, oe2->op);
4513 	      update_stmt (stmt);
4514 	    }
4515 
4516 	  if (rhs1 != oe1->op && rhs1 != oe2->op)
4517 	    remove_visited_stmt_chain (rhs1);
4518 
4519 	  if (dump_file && (dump_flags & TDF_DETAILS))
4520 	    {
4521 	      fprintf (dump_file, " into ");
4522 	      print_gimple_stmt (dump_file, stmt, 0);
4523 	    }
4524 	}
4525       return lhs;
4526     }
4527 
4528   /* If we hit here, we should have 3 or more ops left.  */
4529   gcc_assert (opindex + 2 < ops.length ());
4530 
4531   /* Rewrite the next operator.  */
4532   oe = ops[opindex];
4533 
4534   /* If the stmt that defines operand has to be inserted, insert it
4535      before the use.  */
4536   if (oe->stmt_to_insert)
4537     insert_stmt_before_use (stmt, oe->stmt_to_insert);
4538 
4539   /* Recurse on the LHS of the binary operator, which is guaranteed to
4540      be the non-leaf side.  */
4541   tree new_rhs1
4542     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4543 			 changed || oe->op != rhs2 || next_changed,
4544 			 false);
4545 
4546   if (oe->op != rhs2 || new_rhs1 != rhs1)
4547     {
4548       if (dump_file && (dump_flags & TDF_DETAILS))
4549 	{
4550 	  fprintf (dump_file, "Transforming ");
4551 	  print_gimple_stmt (dump_file, stmt, 0);
4552 	}
4553 
4554       /* If changed is false, this is either opindex == 0
4555 	 or all outer rhs2's were equal to corresponding oe->op,
4556 	 and powi_result is NULL.
4557 	 That means lhs is equivalent before and after reassociation.
4558 	 Otherwise ensure the old lhs SSA_NAME is not reused and
4559 	 create a new stmt as well, so that any debug stmts will be
4560 	 properly adjusted.  */
4561       if (changed)
4562 	{
4563 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4564 	  unsigned int uid = gimple_uid (stmt);
4565 	  gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4566 
4567 	  lhs = make_ssa_name (TREE_TYPE (lhs));
4568 	  stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4569 				      new_rhs1, oe->op);
4570 	  gimple_set_uid (stmt, uid);
4571 	  gimple_set_visited (stmt, true);
4572 	  if (insert_point == gsi_stmt (gsi))
4573 	    gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4574 	  else
4575 	    insert_stmt_after (stmt, insert_point);
4576 	}
4577       else
4578 	{
4579 	  gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4580 			       == stmt);
4581 	  gimple_assign_set_rhs1 (stmt, new_rhs1);
4582 	  gimple_assign_set_rhs2 (stmt, oe->op);
4583 	  update_stmt (stmt);
4584 	}
4585 
4586       if (dump_file && (dump_flags & TDF_DETAILS))
4587 	{
4588 	  fprintf (dump_file, " into ");
4589 	  print_gimple_stmt (dump_file, stmt, 0);
4590 	}
4591     }
4592   return lhs;
4593 }
4594 
4595 /* Find out how many cycles we need to compute statements chain.
4596    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
4597    maximum number of independent statements we may execute per cycle.  */
4598 
4599 static int
get_required_cycles(int ops_num,int cpu_width)4600 get_required_cycles (int ops_num, int cpu_width)
4601 {
4602   int res;
4603   int elog;
4604   unsigned int rest;
4605 
4606   /* While we have more than 2 * cpu_width operands
4607      we may reduce number of operands by cpu_width
4608      per cycle.  */
4609   res = ops_num / (2 * cpu_width);
4610 
4611   /* Remained operands count may be reduced twice per cycle
4612      until we have only one operand.  */
4613   rest = (unsigned)(ops_num - res * cpu_width);
4614   elog = exact_log2 (rest);
4615   if (elog >= 0)
4616     res += elog;
4617   else
4618     res += floor_log2 (rest) + 1;
4619 
4620   return res;
4621 }
4622 
4623 /* Returns an optimal number of registers to use for computation of
4624    given statements.  */
4625 
4626 static int
get_reassociation_width(int ops_num,enum tree_code opc,machine_mode mode)4627 get_reassociation_width (int ops_num, enum tree_code opc,
4628 			 machine_mode mode)
4629 {
4630   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4631   int width;
4632   int width_min;
4633   int cycles_best;
4634 
4635   if (param_width > 0)
4636     width = param_width;
4637   else
4638     width = targetm.sched.reassociation_width (opc, mode);
4639 
4640   if (width == 1)
4641     return width;
4642 
4643   /* Get the minimal time required for sequence computation.  */
4644   cycles_best = get_required_cycles (ops_num, width);
4645 
4646   /* Check if we may use less width and still compute sequence for
4647      the same time.  It will allow us to reduce registers usage.
4648      get_required_cycles is monotonically increasing with lower width
4649      so we can perform a binary search for the minimal width that still
4650      results in the optimal cycle count.  */
4651   width_min = 1;
4652   while (width > width_min)
4653     {
4654       int width_mid = (width + width_min) / 2;
4655 
4656       if (get_required_cycles (ops_num, width_mid) == cycles_best)
4657 	width = width_mid;
4658       else if (width_min < width_mid)
4659 	width_min = width_mid;
4660       else
4661 	break;
4662     }
4663 
4664   return width;
4665 }
4666 
4667 /* Recursively rewrite our linearized statements so that the operators
4668    match those in OPS[OPINDEX], putting the computation in rank
4669    order and trying to allow operations to be executed in
4670    parallel.  */
4671 
4672 static void
rewrite_expr_tree_parallel(gassign * stmt,int width,vec<operand_entry * > ops)4673 rewrite_expr_tree_parallel (gassign *stmt, int width,
4674 			    vec<operand_entry *> ops)
4675 {
4676   enum tree_code opcode = gimple_assign_rhs_code (stmt);
4677   int op_num = ops.length ();
4678   gcc_assert (op_num > 0);
4679   int stmt_num = op_num - 1;
4680   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4681   int op_index = op_num - 1;
4682   int stmt_index = 0;
4683   int ready_stmts_end = 0;
4684   int i = 0;
4685   gimple *stmt1 = NULL, *stmt2 = NULL;
4686   tree last_rhs1 = gimple_assign_rhs1 (stmt);
4687 
4688   /* We start expression rewriting from the top statements.
4689      So, in this loop we create a full list of statements
4690      we will work with.  */
4691   stmts[stmt_num - 1] = stmt;
4692   for (i = stmt_num - 2; i >= 0; i--)
4693     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4694 
4695   for (i = 0; i < stmt_num; i++)
4696     {
4697       tree op1, op2;
4698 
4699       /* Determine whether we should use results of
4700 	 already handled statements or not.  */
4701       if (ready_stmts_end == 0
4702 	  && (i - stmt_index >= width || op_index < 1))
4703 	ready_stmts_end = i;
4704 
4705       /* Now we choose operands for the next statement.  Non zero
4706 	 value in ready_stmts_end means here that we should use
4707 	 the result of already generated statements as new operand.  */
4708       if (ready_stmts_end > 0)
4709 	{
4710 	  op1 = gimple_assign_lhs (stmts[stmt_index++]);
4711 	  if (ready_stmts_end > stmt_index)
4712 	    op2 = gimple_assign_lhs (stmts[stmt_index++]);
4713 	  else if (op_index >= 0)
4714 	    {
4715 	      operand_entry *oe = ops[op_index--];
4716 	      stmt2 = oe->stmt_to_insert;
4717 	      op2 = oe->op;
4718 	    }
4719 	  else
4720 	    {
4721 	      gcc_assert (stmt_index < i);
4722 	      op2 = gimple_assign_lhs (stmts[stmt_index++]);
4723 	    }
4724 
4725 	  if (stmt_index >= ready_stmts_end)
4726 	    ready_stmts_end = 0;
4727 	}
4728       else
4729 	{
4730 	  if (op_index > 1)
4731 	    swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4732 	  operand_entry *oe2 = ops[op_index--];
4733 	  operand_entry *oe1 = ops[op_index--];
4734 	  op2 = oe2->op;
4735 	  stmt2 = oe2->stmt_to_insert;
4736 	  op1 = oe1->op;
4737 	  stmt1 = oe1->stmt_to_insert;
4738 	}
4739 
4740       /* If we emit the last statement then we should put
4741 	 operands into the last statement.  It will also
4742 	 break the loop.  */
4743       if (op_index < 0 && stmt_index == i)
4744 	i = stmt_num - 1;
4745 
4746       if (dump_file && (dump_flags & TDF_DETAILS))
4747 	{
4748 	  fprintf (dump_file, "Transforming ");
4749 	  print_gimple_stmt (dump_file, stmts[i], 0);
4750 	}
4751 
4752       /* If the stmt that defines operand has to be inserted, insert it
4753 	 before the use.  */
4754       if (stmt1)
4755 	insert_stmt_before_use (stmts[i], stmt1);
4756       if (stmt2)
4757 	insert_stmt_before_use (stmts[i], stmt2);
4758       stmt1 = stmt2 = NULL;
4759 
4760       /* We keep original statement only for the last one.  All
4761 	 others are recreated.  */
4762       if (i == stmt_num - 1)
4763 	{
4764 	  gimple_assign_set_rhs1 (stmts[i], op1);
4765 	  gimple_assign_set_rhs2 (stmts[i], op2);
4766 	  update_stmt (stmts[i]);
4767 	}
4768       else
4769 	{
4770 	  stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4771 	}
4772       if (dump_file && (dump_flags & TDF_DETAILS))
4773 	{
4774 	  fprintf (dump_file, " into ");
4775 	  print_gimple_stmt (dump_file, stmts[i], 0);
4776 	}
4777     }
4778 
4779   remove_visited_stmt_chain (last_rhs1);
4780 }
4781 
4782 /* Transform STMT, which is really (A +B) + (C + D) into the left
4783    linear form, ((A+B)+C)+D.
4784    Recurse on D if necessary.  */
4785 
4786 static void
linearize_expr(gimple * stmt)4787 linearize_expr (gimple *stmt)
4788 {
4789   gimple_stmt_iterator gsi;
4790   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4791   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4792   gimple *oldbinrhs = binrhs;
4793   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4794   gimple *newbinrhs = NULL;
4795   struct loop *loop = loop_containing_stmt (stmt);
4796   tree lhs = gimple_assign_lhs (stmt);
4797 
4798   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4799 	      && is_reassociable_op (binrhs, rhscode, loop));
4800 
4801   gsi = gsi_for_stmt (stmt);
4802 
4803   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4804   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4805 				gimple_assign_rhs_code (binrhs),
4806 				gimple_assign_lhs (binlhs),
4807 				gimple_assign_rhs2 (binrhs));
4808   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4809   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4810   gimple_set_uid (binrhs, gimple_uid (stmt));
4811 
4812   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4813     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4814 
4815   if (dump_file && (dump_flags & TDF_DETAILS))
4816     {
4817       fprintf (dump_file, "Linearized: ");
4818       print_gimple_stmt (dump_file, stmt, 0);
4819     }
4820 
4821   reassociate_stats.linearized++;
4822   update_stmt (stmt);
4823 
4824   gsi = gsi_for_stmt (oldbinrhs);
4825   reassoc_remove_stmt (&gsi);
4826   release_defs (oldbinrhs);
4827 
4828   gimple_set_visited (stmt, true);
4829   gimple_set_visited (binlhs, true);
4830   gimple_set_visited (binrhs, true);
4831 
4832   /* Tail recurse on the new rhs if it still needs reassociation.  */
4833   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4834     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4835 	   want to change the algorithm while converting to tuples.  */
4836     linearize_expr (stmt);
4837 }
4838 
4839 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4840    it.  Otherwise, return NULL.  */
4841 
4842 static gimple *
get_single_immediate_use(tree lhs)4843 get_single_immediate_use (tree lhs)
4844 {
4845   use_operand_p immuse;
4846   gimple *immusestmt;
4847 
4848   if (TREE_CODE (lhs) == SSA_NAME
4849       && single_imm_use (lhs, &immuse, &immusestmt)
4850       && is_gimple_assign (immusestmt))
4851     return immusestmt;
4852 
4853   return NULL;
4854 }
4855 
4856 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4857    representing the negated value.  Insertions of any necessary
4858    instructions go before GSI.
4859    This function is recursive in that, if you hand it "a_5" as the
4860    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4861    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
4862 
4863 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)4864 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4865 {
4866   gimple *negatedefstmt = NULL;
4867   tree resultofnegate;
4868   gimple_stmt_iterator gsi;
4869   unsigned int uid;
4870 
4871   /* If we are trying to negate a name, defined by an add, negate the
4872      add operands instead.  */
4873   if (TREE_CODE (tonegate) == SSA_NAME)
4874     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4875   if (TREE_CODE (tonegate) == SSA_NAME
4876       && is_gimple_assign (negatedefstmt)
4877       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4878       && has_single_use (gimple_assign_lhs (negatedefstmt))
4879       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4880     {
4881       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4882       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4883       tree lhs = gimple_assign_lhs (negatedefstmt);
4884       gimple *g;
4885 
4886       gsi = gsi_for_stmt (negatedefstmt);
4887       rhs1 = negate_value (rhs1, &gsi);
4888 
4889       gsi = gsi_for_stmt (negatedefstmt);
4890       rhs2 = negate_value (rhs2, &gsi);
4891 
4892       gsi = gsi_for_stmt (negatedefstmt);
4893       lhs = make_ssa_name (TREE_TYPE (lhs));
4894       gimple_set_visited (negatedefstmt, true);
4895       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4896       gimple_set_uid (g, gimple_uid (negatedefstmt));
4897       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4898       return lhs;
4899     }
4900 
4901   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4902   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4903 					     NULL_TREE, true, GSI_SAME_STMT);
4904   gsi = *gsip;
4905   uid = gimple_uid (gsi_stmt (gsi));
4906   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4907     {
4908       gimple *stmt = gsi_stmt (gsi);
4909       if (gimple_uid (stmt) != 0)
4910 	break;
4911       gimple_set_uid (stmt, uid);
4912     }
4913   return resultofnegate;
4914 }
4915 
4916 /* Return true if we should break up the subtract in STMT into an add
4917    with negate.  This is true when we the subtract operands are really
4918    adds, or the subtract itself is used in an add expression.  In
4919    either case, breaking up the subtract into an add with negate
4920    exposes the adds to reassociation.  */
4921 
4922 static bool
should_break_up_subtract(gimple * stmt)4923 should_break_up_subtract (gimple *stmt)
4924 {
4925   tree lhs = gimple_assign_lhs (stmt);
4926   tree binlhs = gimple_assign_rhs1 (stmt);
4927   tree binrhs = gimple_assign_rhs2 (stmt);
4928   gimple *immusestmt;
4929   struct loop *loop = loop_containing_stmt (stmt);
4930 
4931   if (TREE_CODE (binlhs) == SSA_NAME
4932       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4933     return true;
4934 
4935   if (TREE_CODE (binrhs) == SSA_NAME
4936       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4937     return true;
4938 
4939   if (TREE_CODE (lhs) == SSA_NAME
4940       && (immusestmt = get_single_immediate_use (lhs))
4941       && is_gimple_assign (immusestmt)
4942       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4943 	  || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
4944 	      && gimple_assign_rhs1 (immusestmt) == lhs)
4945 	  || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4946     return true;
4947   return false;
4948 }
4949 
4950 /* Transform STMT from A - B into A + -B.  */
4951 
4952 static void
break_up_subtract(gimple * stmt,gimple_stmt_iterator * gsip)4953 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4954 {
4955   tree rhs1 = gimple_assign_rhs1 (stmt);
4956   tree rhs2 = gimple_assign_rhs2 (stmt);
4957 
4958   if (dump_file && (dump_flags & TDF_DETAILS))
4959     {
4960       fprintf (dump_file, "Breaking up subtract ");
4961       print_gimple_stmt (dump_file, stmt, 0);
4962     }
4963 
4964   rhs2 = negate_value (rhs2, gsip);
4965   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4966   update_stmt (stmt);
4967 }
4968 
4969 /* Determine whether STMT is a builtin call that raises an SSA name
4970    to an integer power and has only one use.  If so, and this is early
4971    reassociation and unsafe math optimizations are permitted, place
4972    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4973    If any of these conditions does not hold, return FALSE.  */
4974 
4975 static bool
acceptable_pow_call(gcall * stmt,tree * base,HOST_WIDE_INT * exponent)4976 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4977 {
4978   tree arg1;
4979   REAL_VALUE_TYPE c, cint;
4980 
4981   switch (gimple_call_combined_fn (stmt))
4982     {
4983     CASE_CFN_POW:
4984       if (flag_errno_math)
4985 	return false;
4986 
4987       *base = gimple_call_arg (stmt, 0);
4988       arg1 = gimple_call_arg (stmt, 1);
4989 
4990       if (TREE_CODE (arg1) != REAL_CST)
4991 	return false;
4992 
4993       c = TREE_REAL_CST (arg1);
4994 
4995       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4996 	return false;
4997 
4998       *exponent = real_to_integer (&c);
4999       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5000       if (!real_identical (&c, &cint))
5001 	return false;
5002 
5003       break;
5004 
5005     CASE_CFN_POWI:
5006       *base = gimple_call_arg (stmt, 0);
5007       arg1 = gimple_call_arg (stmt, 1);
5008 
5009       if (!tree_fits_shwi_p (arg1))
5010 	return false;
5011 
5012       *exponent = tree_to_shwi (arg1);
5013       break;
5014 
5015     default:
5016       return false;
5017     }
5018 
5019   /* Expanding negative exponents is generally unproductive, so we don't
5020      complicate matters with those.  Exponents of zero and one should
5021      have been handled by expression folding.  */
5022   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5023     return false;
5024 
5025   return true;
5026 }
5027 
5028 /* Try to derive and add operand entry for OP to *OPS.  Return false if
5029    unsuccessful.  */
5030 
5031 static bool
try_special_add_to_ops(vec<operand_entry * > * ops,enum tree_code code,tree op,gimple * def_stmt)5032 try_special_add_to_ops (vec<operand_entry *> *ops,
5033 			enum tree_code code,
5034 			tree op, gimple* def_stmt)
5035 {
5036   tree base = NULL_TREE;
5037   HOST_WIDE_INT exponent = 0;
5038 
5039   if (TREE_CODE (op) != SSA_NAME
5040       || ! has_single_use (op))
5041     return false;
5042 
5043   if (code == MULT_EXPR
5044       && reassoc_insert_powi_p
5045       && flag_unsafe_math_optimizations
5046       && is_gimple_call (def_stmt)
5047       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5048     {
5049       add_repeat_to_ops_vec (ops, base, exponent);
5050       gimple_set_visited (def_stmt, true);
5051       return true;
5052     }
5053   else if (code == MULT_EXPR
5054 	   && is_gimple_assign (def_stmt)
5055 	   && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5056 	   && !HONOR_SNANS (TREE_TYPE (op))
5057 	   && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5058 	       || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5059     {
5060       tree rhs1 = gimple_assign_rhs1 (def_stmt);
5061       tree cst = build_minus_one_cst (TREE_TYPE (op));
5062       add_to_ops_vec (ops, rhs1);
5063       add_to_ops_vec (ops, cst);
5064       gimple_set_visited (def_stmt, true);
5065       return true;
5066     }
5067 
5068   return false;
5069 }
5070 
5071 /* Recursively linearize a binary expression that is the RHS of STMT.
5072    Place the operands of the expression tree in the vector named OPS.  */
5073 
5074 static void
linearize_expr_tree(vec<operand_entry * > * ops,gimple * stmt,bool is_associative,bool set_visited)5075 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5076 		     bool is_associative, bool set_visited)
5077 {
5078   tree binlhs = gimple_assign_rhs1 (stmt);
5079   tree binrhs = gimple_assign_rhs2 (stmt);
5080   gimple *binlhsdef = NULL, *binrhsdef = NULL;
5081   bool binlhsisreassoc = false;
5082   bool binrhsisreassoc = false;
5083   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5084   struct loop *loop = loop_containing_stmt (stmt);
5085 
5086   if (set_visited)
5087     gimple_set_visited (stmt, true);
5088 
5089   if (TREE_CODE (binlhs) == SSA_NAME)
5090     {
5091       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5092       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5093 			 && !stmt_could_throw_p (binlhsdef));
5094     }
5095 
5096   if (TREE_CODE (binrhs) == SSA_NAME)
5097     {
5098       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5099       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5100 			 && !stmt_could_throw_p (binrhsdef));
5101     }
5102 
5103   /* If the LHS is not reassociable, but the RHS is, we need to swap
5104      them.  If neither is reassociable, there is nothing we can do, so
5105      just put them in the ops vector.  If the LHS is reassociable,
5106      linearize it.  If both are reassociable, then linearize the RHS
5107      and the LHS.  */
5108 
5109   if (!binlhsisreassoc)
5110     {
5111       /* If this is not a associative operation like division, give up.  */
5112       if (!is_associative)
5113 	{
5114 	  add_to_ops_vec (ops, binrhs);
5115 	  return;
5116 	}
5117 
5118       if (!binrhsisreassoc)
5119 	{
5120 	  if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5121 	    add_to_ops_vec (ops, binrhs);
5122 
5123 	  if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5124 	    add_to_ops_vec (ops, binlhs);
5125 
5126 	  return;
5127 	}
5128 
5129       if (dump_file && (dump_flags & TDF_DETAILS))
5130 	{
5131 	  fprintf (dump_file, "swapping operands of ");
5132 	  print_gimple_stmt (dump_file, stmt, 0);
5133 	}
5134 
5135       swap_ssa_operands (stmt,
5136 			 gimple_assign_rhs1_ptr (stmt),
5137 			 gimple_assign_rhs2_ptr (stmt));
5138       update_stmt (stmt);
5139 
5140       if (dump_file && (dump_flags & TDF_DETAILS))
5141 	{
5142 	  fprintf (dump_file, " is now ");
5143 	  print_gimple_stmt (dump_file, stmt, 0);
5144 	}
5145 
5146       /* We want to make it so the lhs is always the reassociative op,
5147 	 so swap.  */
5148       std::swap (binlhs, binrhs);
5149     }
5150   else if (binrhsisreassoc)
5151     {
5152       linearize_expr (stmt);
5153       binlhs = gimple_assign_rhs1 (stmt);
5154       binrhs = gimple_assign_rhs2 (stmt);
5155     }
5156 
5157   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5158 	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5159 				      rhscode, loop));
5160   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5161 		       is_associative, set_visited);
5162 
5163   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5164     add_to_ops_vec (ops, binrhs);
5165 }
5166 
5167 /* Repropagate the negates back into subtracts, since no other pass
5168    currently does it.  */
5169 
5170 static void
repropagate_negates(void)5171 repropagate_negates (void)
5172 {
5173   unsigned int i = 0;
5174   tree negate;
5175 
5176   FOR_EACH_VEC_ELT (plus_negates, i, negate)
5177     {
5178       gimple *user = get_single_immediate_use (negate);
5179 
5180       if (!user || !is_gimple_assign (user))
5181 	continue;
5182 
5183       /* The negate operand can be either operand of a PLUS_EXPR
5184 	 (it can be the LHS if the RHS is a constant for example).
5185 
5186 	 Force the negate operand to the RHS of the PLUS_EXPR, then
5187 	 transform the PLUS_EXPR into a MINUS_EXPR.  */
5188       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5189 	{
5190 	  /* If the negated operand appears on the LHS of the
5191 	     PLUS_EXPR, exchange the operands of the PLUS_EXPR
5192 	     to force the negated operand to the RHS of the PLUS_EXPR.  */
5193 	  if (gimple_assign_rhs1 (user) == negate)
5194 	    {
5195 	      swap_ssa_operands (user,
5196 				 gimple_assign_rhs1_ptr (user),
5197 				 gimple_assign_rhs2_ptr (user));
5198 	    }
5199 
5200 	  /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5201 	     the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
5202 	  if (gimple_assign_rhs2 (user) == negate)
5203 	    {
5204 	      tree rhs1 = gimple_assign_rhs1 (user);
5205 	      tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5206 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
5207 	      gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5208 	      update_stmt (user);
5209 	    }
5210 	}
5211       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5212 	{
5213 	  if (gimple_assign_rhs1 (user) == negate)
5214 	    {
5215 	      /* We have
5216 	           x = -a
5217 		   y = x - b
5218 		 which we transform into
5219 		   x = a + b
5220 		   y = -x .
5221 		 This pushes down the negate which we possibly can merge
5222 		 into some other operation, hence insert it into the
5223 		 plus_negates vector.  */
5224 	      gimple *feed = SSA_NAME_DEF_STMT (negate);
5225 	      tree a = gimple_assign_rhs1 (feed);
5226 	      tree b = gimple_assign_rhs2 (user);
5227 	      gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5228 	      gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5229 	      tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5230 	      gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5231 	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5232 	      gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5233 	      user = gsi_stmt (gsi2);
5234 	      update_stmt (user);
5235 	      reassoc_remove_stmt (&gsi);
5236 	      release_defs (feed);
5237 	      plus_negates.safe_push (gimple_assign_lhs (user));
5238 	    }
5239 	  else
5240 	    {
5241 	      /* Transform "x = -a; y = b - x" into "y = b + a", getting
5242 	         rid of one operation.  */
5243 	      gimple *feed = SSA_NAME_DEF_STMT (negate);
5244 	      tree a = gimple_assign_rhs1 (feed);
5245 	      tree rhs1 = gimple_assign_rhs1 (user);
5246 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
5247 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5248 	      update_stmt (gsi_stmt (gsi));
5249 	    }
5250 	}
5251     }
5252 }
5253 
5254 /* Returns true if OP is of a type for which we can do reassociation.
5255    That is for integral or non-saturating fixed-point types, and for
5256    floating point type when associative-math is enabled.  */
5257 
5258 static bool
can_reassociate_p(tree op)5259 can_reassociate_p (tree op)
5260 {
5261   tree type = TREE_TYPE (op);
5262   if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5263     return false;
5264   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5265       || NON_SAT_FIXED_POINT_TYPE_P (type)
5266       || (flag_associative_math && FLOAT_TYPE_P (type)))
5267     return true;
5268   return false;
5269 }
5270 
5271 /* Break up subtract operations in block BB.
5272 
5273    We do this top down because we don't know whether the subtract is
5274    part of a possible chain of reassociation except at the top.
5275 
5276    IE given
5277    d = f + g
5278    c = a + e
5279    b = c - d
5280    q = b - r
5281    k = t - q
5282 
5283    we want to break up k = t - q, but we won't until we've transformed q
5284    = b - r, which won't be broken up until we transform b = c - d.
5285 
5286    En passant, clear the GIMPLE visited flag on every statement
5287    and set UIDs within each basic block.  */
5288 
5289 static void
break_up_subtract_bb(basic_block bb)5290 break_up_subtract_bb (basic_block bb)
5291 {
5292   gimple_stmt_iterator gsi;
5293   basic_block son;
5294   unsigned int uid = 1;
5295 
5296   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5297     {
5298       gimple *stmt = gsi_stmt (gsi);
5299       gimple_set_visited (stmt, false);
5300       gimple_set_uid (stmt, uid++);
5301 
5302       if (!is_gimple_assign (stmt)
5303 	  || !can_reassociate_p (gimple_assign_lhs (stmt)))
5304 	continue;
5305 
5306       /* Look for simple gimple subtract operations.  */
5307       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5308 	{
5309 	  if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5310 	      || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5311 	    continue;
5312 
5313 	  /* Check for a subtract used only in an addition.  If this
5314 	     is the case, transform it into add of a negate for better
5315 	     reassociation.  IE transform C = A-B into C = A + -B if C
5316 	     is only used in an addition.  */
5317 	  if (should_break_up_subtract (stmt))
5318 	    break_up_subtract (stmt, &gsi);
5319 	}
5320       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5321 	       && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5322 	plus_negates.safe_push (gimple_assign_lhs (stmt));
5323     }
5324   for (son = first_dom_son (CDI_DOMINATORS, bb);
5325        son;
5326        son = next_dom_son (CDI_DOMINATORS, son))
5327     break_up_subtract_bb (son);
5328 }
5329 
5330 /* Used for repeated factor analysis.  */
5331 struct repeat_factor
5332 {
5333   /* An SSA name that occurs in a multiply chain.  */
5334   tree factor;
5335 
5336   /* Cached rank of the factor.  */
5337   unsigned rank;
5338 
5339   /* Number of occurrences of the factor in the chain.  */
5340   HOST_WIDE_INT count;
5341 
5342   /* An SSA name representing the product of this factor and
5343      all factors appearing later in the repeated factor vector.  */
5344   tree repr;
5345 };
5346 
5347 
5348 static vec<repeat_factor> repeat_factor_vec;
5349 
5350 /* Used for sorting the repeat factor vector.  Sort primarily by
5351    ascending occurrence count, secondarily by descending rank.  */
5352 
5353 static int
compare_repeat_factors(const void * x1,const void * x2)5354 compare_repeat_factors (const void *x1, const void *x2)
5355 {
5356   const repeat_factor *rf1 = (const repeat_factor *) x1;
5357   const repeat_factor *rf2 = (const repeat_factor *) x2;
5358 
5359   if (rf1->count != rf2->count)
5360     return rf1->count - rf2->count;
5361 
5362   return rf2->rank - rf1->rank;
5363 }
5364 
5365 /* Look for repeated operands in OPS in the multiply tree rooted at
5366    STMT.  Replace them with an optimal sequence of multiplies and powi
5367    builtin calls, and remove the used operands from OPS.  Return an
5368    SSA name representing the value of the replacement sequence.  */
5369 
5370 static tree
attempt_builtin_powi(gimple * stmt,vec<operand_entry * > * ops)5371 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5372 {
5373   unsigned i, j, vec_len;
5374   int ii;
5375   operand_entry *oe;
5376   repeat_factor *rf1, *rf2;
5377   repeat_factor rfnew;
5378   tree result = NULL_TREE;
5379   tree target_ssa, iter_result;
5380   tree type = TREE_TYPE (gimple_get_lhs (stmt));
5381   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5382   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5383   gimple *mul_stmt, *pow_stmt;
5384 
5385   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5386      target.  */
5387   if (!powi_fndecl)
5388     return NULL_TREE;
5389 
5390   /* Allocate the repeated factor vector.  */
5391   repeat_factor_vec.create (10);
5392 
5393   /* Scan the OPS vector for all SSA names in the product and build
5394      up a vector of occurrence counts for each factor.  */
5395   FOR_EACH_VEC_ELT (*ops, i, oe)
5396     {
5397       if (TREE_CODE (oe->op) == SSA_NAME)
5398 	{
5399 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5400 	    {
5401 	      if (rf1->factor == oe->op)
5402 		{
5403 		  rf1->count += oe->count;
5404 		  break;
5405 		}
5406 	    }
5407 
5408 	  if (j >= repeat_factor_vec.length ())
5409 	    {
5410 	      rfnew.factor = oe->op;
5411 	      rfnew.rank = oe->rank;
5412 	      rfnew.count = oe->count;
5413 	      rfnew.repr = NULL_TREE;
5414 	      repeat_factor_vec.safe_push (rfnew);
5415 	    }
5416 	}
5417     }
5418 
5419   /* Sort the repeated factor vector by (a) increasing occurrence count,
5420      and (b) decreasing rank.  */
5421   repeat_factor_vec.qsort (compare_repeat_factors);
5422 
5423   /* It is generally best to combine as many base factors as possible
5424      into a product before applying __builtin_powi to the result.
5425      However, the sort order chosen for the repeated factor vector
5426      allows us to cache partial results for the product of the base
5427      factors for subsequent use.  When we already have a cached partial
5428      result from a previous iteration, it is best to make use of it
5429      before looking for another __builtin_pow opportunity.
5430 
5431      As an example, consider x * x * y * y * y * z * z * z * z.
5432      We want to first compose the product x * y * z, raise it to the
5433      second power, then multiply this by y * z, and finally multiply
5434      by z.  This can be done in 5 multiplies provided we cache y * z
5435      for use in both expressions:
5436 
5437         t1 = y * z
5438 	t2 = t1 * x
5439 	t3 = t2 * t2
5440 	t4 = t1 * t3
5441 	result = t4 * z
5442 
5443      If we instead ignored the cached y * z and first multiplied by
5444      the __builtin_pow opportunity z * z, we would get the inferior:
5445 
5446         t1 = y * z
5447 	t2 = t1 * x
5448 	t3 = t2 * t2
5449 	t4 = z * z
5450 	t5 = t3 * t4
5451         result = t5 * y  */
5452 
5453   vec_len = repeat_factor_vec.length ();
5454 
5455   /* Repeatedly look for opportunities to create a builtin_powi call.  */
5456   while (true)
5457     {
5458       HOST_WIDE_INT power;
5459 
5460       /* First look for the largest cached product of factors from
5461 	 preceding iterations.  If found, create a builtin_powi for
5462 	 it if the minimum occurrence count for its factors is at
5463 	 least 2, or just use this cached product as our next
5464 	 multiplicand if the minimum occurrence count is 1.  */
5465       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5466 	{
5467 	  if (rf1->repr && rf1->count > 0)
5468 	    break;
5469 	}
5470 
5471       if (j < vec_len)
5472 	{
5473 	  power = rf1->count;
5474 
5475 	  if (power == 1)
5476 	    {
5477 	      iter_result = rf1->repr;
5478 
5479 	      if (dump_file && (dump_flags & TDF_DETAILS))
5480 		{
5481 		  unsigned elt;
5482 		  repeat_factor *rf;
5483 		  fputs ("Multiplying by cached product ", dump_file);
5484 		  for (elt = j; elt < vec_len; elt++)
5485 		    {
5486 		      rf = &repeat_factor_vec[elt];
5487 		      print_generic_expr (dump_file, rf->factor);
5488 		      if (elt < vec_len - 1)
5489 			fputs (" * ", dump_file);
5490 		    }
5491 		  fputs ("\n", dump_file);
5492 		}
5493 	    }
5494 	  else
5495 	    {
5496 	      iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5497 	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5498 					    build_int_cst (integer_type_node,
5499 							   power));
5500 	      gimple_call_set_lhs (pow_stmt, iter_result);
5501 	      gimple_set_location (pow_stmt, gimple_location (stmt));
5502 	      gimple_set_uid (pow_stmt, gimple_uid (stmt));
5503 	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5504 
5505 	      if (dump_file && (dump_flags & TDF_DETAILS))
5506 		{
5507 		  unsigned elt;
5508 		  repeat_factor *rf;
5509 		  fputs ("Building __builtin_pow call for cached product (",
5510 			 dump_file);
5511 		  for (elt = j; elt < vec_len; elt++)
5512 		    {
5513 		      rf = &repeat_factor_vec[elt];
5514 		      print_generic_expr (dump_file, rf->factor);
5515 		      if (elt < vec_len - 1)
5516 			fputs (" * ", dump_file);
5517 		    }
5518 		  fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5519 			   power);
5520 		}
5521 	    }
5522 	}
5523       else
5524 	{
5525 	  /* Otherwise, find the first factor in the repeated factor
5526 	     vector whose occurrence count is at least 2.  If no such
5527 	     factor exists, there are no builtin_powi opportunities
5528 	     remaining.  */
5529 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5530 	    {
5531 	      if (rf1->count >= 2)
5532 		break;
5533 	    }
5534 
5535 	  if (j >= vec_len)
5536 	    break;
5537 
5538 	  power = rf1->count;
5539 
5540 	  if (dump_file && (dump_flags & TDF_DETAILS))
5541 	    {
5542 	      unsigned elt;
5543 	      repeat_factor *rf;
5544 	      fputs ("Building __builtin_pow call for (", dump_file);
5545 	      for (elt = j; elt < vec_len; elt++)
5546 		{
5547 		  rf = &repeat_factor_vec[elt];
5548 		  print_generic_expr (dump_file, rf->factor);
5549 		  if (elt < vec_len - 1)
5550 		    fputs (" * ", dump_file);
5551 		}
5552 	      fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5553 	    }
5554 
5555 	  reassociate_stats.pows_created++;
5556 
5557 	  /* Visit each element of the vector in reverse order (so that
5558 	     high-occurrence elements are visited first, and within the
5559 	     same occurrence count, lower-ranked elements are visited
5560 	     first).  Form a linear product of all elements in this order
5561 	     whose occurrencce count is at least that of element J.
5562 	     Record the SSA name representing the product of each element
5563 	     with all subsequent elements in the vector.  */
5564 	  if (j == vec_len - 1)
5565 	    rf1->repr = rf1->factor;
5566 	  else
5567 	    {
5568 	      for (ii = vec_len - 2; ii >= (int)j; ii--)
5569 		{
5570 		  tree op1, op2;
5571 
5572 		  rf1 = &repeat_factor_vec[ii];
5573 		  rf2 = &repeat_factor_vec[ii + 1];
5574 
5575 		  /* Init the last factor's representative to be itself.  */
5576 		  if (!rf2->repr)
5577 		    rf2->repr = rf2->factor;
5578 
5579 		  op1 = rf1->factor;
5580 		  op2 = rf2->repr;
5581 
5582 		  target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5583 		  mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5584 						  op1, op2);
5585 		  gimple_set_location (mul_stmt, gimple_location (stmt));
5586 		  gimple_set_uid (mul_stmt, gimple_uid (stmt));
5587 		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5588 		  rf1->repr = target_ssa;
5589 
5590 		  /* Don't reprocess the multiply we just introduced.  */
5591 		  gimple_set_visited (mul_stmt, true);
5592 		}
5593 	    }
5594 
5595 	  /* Form a call to __builtin_powi for the maximum product
5596 	     just formed, raised to the power obtained earlier.  */
5597 	  rf1 = &repeat_factor_vec[j];
5598 	  iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5599 	  pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5600 					build_int_cst (integer_type_node,
5601 						       power));
5602 	  gimple_call_set_lhs (pow_stmt, iter_result);
5603 	  gimple_set_location (pow_stmt, gimple_location (stmt));
5604 	  gimple_set_uid (pow_stmt, gimple_uid (stmt));
5605 	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5606 	}
5607 
5608       /* If we previously formed at least one other builtin_powi call,
5609 	 form the product of this one and those others.  */
5610       if (result)
5611 	{
5612 	  tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5613 	  mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5614 					  result, iter_result);
5615 	  gimple_set_location (mul_stmt, gimple_location (stmt));
5616 	  gimple_set_uid (mul_stmt, gimple_uid (stmt));
5617 	  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5618 	  gimple_set_visited (mul_stmt, true);
5619 	  result = new_result;
5620 	}
5621       else
5622 	result = iter_result;
5623 
5624       /* Decrement the occurrence count of each element in the product
5625 	 by the count found above, and remove this many copies of each
5626 	 factor from OPS.  */
5627       for (i = j; i < vec_len; i++)
5628 	{
5629 	  unsigned k = power;
5630 	  unsigned n;
5631 
5632 	  rf1 = &repeat_factor_vec[i];
5633 	  rf1->count -= power;
5634 
5635 	  FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5636 	    {
5637 	      if (oe->op == rf1->factor)
5638 		{
5639 		  if (oe->count <= k)
5640 		    {
5641 		      ops->ordered_remove (n);
5642 		      k -= oe->count;
5643 
5644 		      if (k == 0)
5645 			break;
5646 		    }
5647 		  else
5648 		    {
5649 		      oe->count -= k;
5650 		      break;
5651 		    }
5652 		}
5653 	    }
5654 	}
5655     }
5656 
5657   /* At this point all elements in the repeated factor vector have a
5658      remaining occurrence count of 0 or 1, and those with a count of 1
5659      don't have cached representatives.  Re-sort the ops vector and
5660      clean up.  */
5661   ops->qsort (sort_by_operand_rank);
5662   repeat_factor_vec.release ();
5663 
5664   /* Return the final product computed herein.  Note that there may
5665      still be some elements with single occurrence count left in OPS;
5666      those will be handled by the normal reassociation logic.  */
5667   return result;
5668 }
5669 
5670 /* Attempt to optimize
5671    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5672    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
5673 
5674 static void
attempt_builtin_copysign(vec<operand_entry * > * ops)5675 attempt_builtin_copysign (vec<operand_entry *> *ops)
5676 {
5677   operand_entry *oe;
5678   unsigned int i;
5679   unsigned int length = ops->length ();
5680   tree cst = ops->last ()->op;
5681 
5682   if (length == 1 || TREE_CODE (cst) != REAL_CST)
5683     return;
5684 
5685   FOR_EACH_VEC_ELT (*ops, i, oe)
5686     {
5687       if (TREE_CODE (oe->op) == SSA_NAME
5688 	  && has_single_use (oe->op))
5689 	{
5690 	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5691 	  if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5692 	    {
5693 	      tree arg0, arg1;
5694 	      switch (gimple_call_combined_fn (old_call))
5695 		{
5696 		CASE_CFN_COPYSIGN:
5697 		CASE_CFN_COPYSIGN_FN:
5698 		  arg0 = gimple_call_arg (old_call, 0);
5699 		  arg1 = gimple_call_arg (old_call, 1);
5700 		  /* The first argument of copysign must be a constant,
5701 		     otherwise there's nothing to do.  */
5702 		  if (TREE_CODE (arg0) == REAL_CST)
5703 		    {
5704 		      tree type = TREE_TYPE (arg0);
5705 		      tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5706 		      /* If we couldn't fold to a single constant, skip it.
5707 			 That happens e.g. for inexact multiplication when
5708 			 -frounding-math.  */
5709 		      if (mul == NULL_TREE)
5710 			break;
5711 		      /* Instead of adjusting OLD_CALL, let's build a new
5712 			 call to not leak the LHS and prevent keeping bogus
5713 			 debug statements.  DCE will clean up the old call.  */
5714 		      gcall *new_call;
5715 		      if (gimple_call_internal_p (old_call))
5716 			new_call = gimple_build_call_internal
5717 			  (IFN_COPYSIGN, 2, mul, arg1);
5718 		      else
5719 			new_call = gimple_build_call
5720 			  (gimple_call_fndecl (old_call), 2, mul, arg1);
5721 		      tree lhs = make_ssa_name (type);
5722 		      gimple_call_set_lhs (new_call, lhs);
5723 		      gimple_set_location (new_call,
5724 					   gimple_location (old_call));
5725 		      insert_stmt_after (new_call, old_call);
5726 		      /* We've used the constant, get rid of it.  */
5727 		      ops->pop ();
5728 		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5729 		      /* Handle the CST1 < 0 case by negating the result.  */
5730 		      if (cst1_neg)
5731 			{
5732 			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5733 			  gimple *negate_stmt
5734 			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5735 			  insert_stmt_after (negate_stmt, new_call);
5736 			  oe->op = negrhs;
5737 			}
5738 		      else
5739 			oe->op = lhs;
5740 		      if (dump_file && (dump_flags & TDF_DETAILS))
5741 			{
5742 			  fprintf (dump_file, "Optimizing copysign: ");
5743 			  print_generic_expr (dump_file, cst);
5744 			  fprintf (dump_file, " * COPYSIGN (");
5745 			  print_generic_expr (dump_file, arg0);
5746 			  fprintf (dump_file, ", ");
5747 			  print_generic_expr (dump_file, arg1);
5748 			  fprintf (dump_file, ") into %sCOPYSIGN (",
5749 				   cst1_neg ? "-" : "");
5750 			  print_generic_expr (dump_file, mul);
5751 			  fprintf (dump_file, ", ");
5752 			  print_generic_expr (dump_file, arg1);
5753 			  fprintf (dump_file, "\n");
5754 			}
5755 		      return;
5756 		    }
5757 		  break;
5758 		default:
5759 		  break;
5760 		}
5761 	    }
5762 	}
5763     }
5764 }
5765 
5766 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
5767 
5768 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple * stmt,tree new_rhs)5769 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5770 {
5771   tree rhs1;
5772 
5773   if (dump_file && (dump_flags & TDF_DETAILS))
5774     {
5775       fprintf (dump_file, "Transforming ");
5776       print_gimple_stmt (dump_file, stmt, 0);
5777     }
5778 
5779   rhs1 = gimple_assign_rhs1 (stmt);
5780   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5781   update_stmt (stmt);
5782   remove_visited_stmt_chain (rhs1);
5783 
5784   if (dump_file && (dump_flags & TDF_DETAILS))
5785     {
5786       fprintf (dump_file, " into ");
5787       print_gimple_stmt (dump_file, stmt, 0);
5788     }
5789 }
5790 
5791 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
5792 
5793 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple * stmt,tree rhs1,tree rhs2)5794 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5795 			    tree rhs1, tree rhs2)
5796 {
5797   if (dump_file && (dump_flags & TDF_DETAILS))
5798     {
5799       fprintf (dump_file, "Transforming ");
5800       print_gimple_stmt (dump_file, stmt, 0);
5801     }
5802 
5803   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5804   update_stmt (gsi_stmt (*gsi));
5805   remove_visited_stmt_chain (rhs1);
5806 
5807   if (dump_file && (dump_flags & TDF_DETAILS))
5808     {
5809       fprintf (dump_file, " into ");
5810       print_gimple_stmt (dump_file, stmt, 0);
5811     }
5812 }
5813 
5814 /* Reassociate expressions in basic block BB and its post-dominator as
5815    children.
5816 
5817    Bubble up return status from maybe_optimize_range_tests.  */
5818 
5819 static bool
reassociate_bb(basic_block bb)5820 reassociate_bb (basic_block bb)
5821 {
5822   gimple_stmt_iterator gsi;
5823   basic_block son;
5824   gimple *stmt = last_stmt (bb);
5825   bool cfg_cleanup_needed = false;
5826 
5827   if (stmt && !gimple_visited_p (stmt))
5828     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5829 
5830   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5831     {
5832       stmt = gsi_stmt (gsi);
5833 
5834       if (is_gimple_assign (stmt)
5835 	  && !stmt_could_throw_p (stmt))
5836 	{
5837 	  tree lhs, rhs1, rhs2;
5838 	  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5839 
5840 	  /* If this is not a gimple binary expression, there is
5841 	     nothing for us to do with it.  */
5842 	  if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5843 	    continue;
5844 
5845 	  /* If this was part of an already processed statement,
5846 	     we don't need to touch it again. */
5847 	  if (gimple_visited_p (stmt))
5848 	    {
5849 	      /* This statement might have become dead because of previous
5850 		 reassociations.  */
5851 	      if (has_zero_uses (gimple_get_lhs (stmt)))
5852 		{
5853 		  reassoc_remove_stmt (&gsi);
5854 		  release_defs (stmt);
5855 		  /* We might end up removing the last stmt above which
5856 		     places the iterator to the end of the sequence.
5857 		     Reset it to the last stmt in this case which might
5858 		     be the end of the sequence as well if we removed
5859 		     the last statement of the sequence.  In which case
5860 		     we need to bail out.  */
5861 		  if (gsi_end_p (gsi))
5862 		    {
5863 		      gsi = gsi_last_bb (bb);
5864 		      if (gsi_end_p (gsi))
5865 			break;
5866 		    }
5867 		}
5868 	      continue;
5869 	    }
5870 
5871 	  lhs = gimple_assign_lhs (stmt);
5872 	  rhs1 = gimple_assign_rhs1 (stmt);
5873 	  rhs2 = gimple_assign_rhs2 (stmt);
5874 
5875 	  /* For non-bit or min/max operations we can't associate
5876 	     all types.  Verify that here.  */
5877 	  if (rhs_code != BIT_IOR_EXPR
5878 	      && rhs_code != BIT_AND_EXPR
5879 	      && rhs_code != BIT_XOR_EXPR
5880 	      && rhs_code != MIN_EXPR
5881 	      && rhs_code != MAX_EXPR
5882 	      && (!can_reassociate_p (lhs)
5883 		  || !can_reassociate_p (rhs1)
5884 		  || !can_reassociate_p (rhs2)))
5885 	    continue;
5886 
5887 	  if (associative_tree_code (rhs_code))
5888 	    {
5889 	      auto_vec<operand_entry *> ops;
5890 	      tree powi_result = NULL_TREE;
5891 	      bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5892 
5893 	      /* There may be no immediate uses left by the time we
5894 		 get here because we may have eliminated them all.  */
5895 	      if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5896 		continue;
5897 
5898 	      gimple_set_visited (stmt, true);
5899 	      linearize_expr_tree (&ops, stmt, true, true);
5900 	      ops.qsort (sort_by_operand_rank);
5901 	      int orig_len = ops.length ();
5902 	      optimize_ops_list (rhs_code, &ops);
5903 	      if (undistribute_ops_list (rhs_code, &ops,
5904 					 loop_containing_stmt (stmt)))
5905 		{
5906 		  ops.qsort (sort_by_operand_rank);
5907 		  optimize_ops_list (rhs_code, &ops);
5908 		}
5909 
5910 	      if (rhs_code == PLUS_EXPR
5911 		  && transform_add_to_multiply (&ops))
5912 		ops.qsort (sort_by_operand_rank);
5913 
5914 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5915 		{
5916 		  if (is_vector)
5917 		    optimize_vec_cond_expr (rhs_code, &ops);
5918 		  else
5919 		    optimize_range_tests (rhs_code, &ops, NULL);
5920 	        }
5921 
5922 	      if (rhs_code == MULT_EXPR && !is_vector)
5923 	        {
5924 		  attempt_builtin_copysign (&ops);
5925 
5926 		  if (reassoc_insert_powi_p
5927 		      && flag_unsafe_math_optimizations)
5928 		    powi_result = attempt_builtin_powi (stmt, &ops);
5929 		}
5930 
5931 	      operand_entry *last;
5932 	      bool negate_result = false;
5933 	      if (ops.length () > 1
5934 		  && rhs_code == MULT_EXPR)
5935 		{
5936 		  last = ops.last ();
5937 		  if ((integer_minus_onep (last->op)
5938 		       || real_minus_onep (last->op))
5939 		      && !HONOR_SNANS (TREE_TYPE (lhs))
5940 		      && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5941 			  || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5942 		    {
5943 		      ops.pop ();
5944 		      negate_result = true;
5945 		    }
5946 		}
5947 
5948 	      tree new_lhs = lhs;
5949 	      /* If the operand vector is now empty, all operands were
5950 		 consumed by the __builtin_powi optimization.  */
5951 	      if (ops.length () == 0)
5952 		transform_stmt_to_copy (&gsi, stmt, powi_result);
5953 	      else if (ops.length () == 1)
5954 		{
5955 		  tree last_op = ops.last ()->op;
5956 
5957 		  /* If the stmt that defines operand has to be inserted, insert it
5958 		     before the use.  */
5959 		  if (ops.last ()->stmt_to_insert)
5960 		    insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5961 		  if (powi_result)
5962 		    transform_stmt_to_multiply (&gsi, stmt, last_op,
5963 						powi_result);
5964 		  else
5965 		    transform_stmt_to_copy (&gsi, stmt, last_op);
5966 		}
5967 	      else
5968 		{
5969 		  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5970 		  int ops_num = ops.length ();
5971 		  int width = get_reassociation_width (ops_num, rhs_code, mode);
5972 
5973 		  if (dump_file && (dump_flags & TDF_DETAILS))
5974 		    fprintf (dump_file,
5975 			     "Width = %d was chosen for reassociation\n", width);
5976 
5977 
5978 		  /* For binary bit operations, if there are at least 3
5979 		     operands and the last last operand in OPS is a constant,
5980 		     move it to the front.  This helps ensure that we generate
5981 		     (X & Y) & C rather than (X & C) & Y.  The former will
5982 		     often match a canonical bit test when we get to RTL.  */
5983 		  if (ops.length () > 2
5984 		      && (rhs_code == BIT_AND_EXPR
5985 		          || rhs_code == BIT_IOR_EXPR
5986 		          || rhs_code == BIT_XOR_EXPR)
5987 		      && TREE_CODE (ops.last ()->op) == INTEGER_CST)
5988 		    std::swap (*ops[0], *ops[ops_num - 1]);
5989 
5990 		  if (width > 1
5991 		      && ops.length () > 3)
5992 		    rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5993 						width, ops);
5994 		  else
5995                     {
5996                       /* When there are three operands left, we want
5997                          to make sure the ones that get the double
5998                          binary op are chosen wisely.  */
5999                       int len = ops.length ();
6000                       if (len >= 3)
6001                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
6002 
6003 		      new_lhs = rewrite_expr_tree (stmt, 0, ops,
6004 						   powi_result != NULL
6005 						   || negate_result,
6006 						   len != orig_len);
6007                     }
6008 
6009 		  /* If we combined some repeated factors into a
6010 		     __builtin_powi call, multiply that result by the
6011 		     reassociated operands.  */
6012 		  if (powi_result)
6013 		    {
6014 		      gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6015 		      tree type = TREE_TYPE (lhs);
6016 		      tree target_ssa = make_temp_ssa_name (type, NULL,
6017 							    "reassocpow");
6018 		      gimple_set_lhs (lhs_stmt, target_ssa);
6019 		      update_stmt (lhs_stmt);
6020 		      if (lhs != new_lhs)
6021 			{
6022 			  target_ssa = new_lhs;
6023 			  new_lhs = lhs;
6024 			}
6025 		      mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6026 						      powi_result, target_ssa);
6027 		      gimple_set_location (mul_stmt, gimple_location (stmt));
6028 		      gimple_set_uid (mul_stmt, gimple_uid (stmt));
6029 		      gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6030 		    }
6031 		}
6032 
6033 	      if (negate_result)
6034 		{
6035 		  stmt = SSA_NAME_DEF_STMT (lhs);
6036 		  tree tmp = make_ssa_name (TREE_TYPE (lhs));
6037 		  gimple_set_lhs (stmt, tmp);
6038 		  if (lhs != new_lhs)
6039 		    tmp = new_lhs;
6040 		  gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6041 							   tmp);
6042 		  gimple_set_uid (neg_stmt, gimple_uid (stmt));
6043 		  gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6044 		  update_stmt (stmt);
6045 		}
6046 	    }
6047 	}
6048     }
6049   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6050        son;
6051        son = next_dom_son (CDI_POST_DOMINATORS, son))
6052     cfg_cleanup_needed |= reassociate_bb (son);
6053 
6054   return cfg_cleanup_needed;
6055 }
6056 
6057 /* Add jumps around shifts for range tests turned into bit tests.
6058    For each SSA_NAME VAR we have code like:
6059    VAR = ...; // final stmt of range comparison
6060    // bit test here...;
6061    OTHERVAR = ...; // final stmt of the bit test sequence
6062    RES = VAR | OTHERVAR;
6063    Turn the above into:
6064    VAR = ...;
6065    if (VAR != 0)
6066      goto <l3>;
6067    else
6068      goto <l2>;
6069    <l2>:
6070    // bit test here...;
6071    OTHERVAR = ...;
6072    <l3>:
6073    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
6074 
6075 static void
branch_fixup(void)6076 branch_fixup (void)
6077 {
6078   tree var;
6079   unsigned int i;
6080 
6081   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6082     {
6083       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6084       gimple *use_stmt;
6085       use_operand_p use;
6086       bool ok = single_imm_use (var, &use, &use_stmt);
6087       gcc_assert (ok
6088 		  && is_gimple_assign (use_stmt)
6089 		  && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6090 		  && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6091 
6092       basic_block cond_bb = gimple_bb (def_stmt);
6093       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6094       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6095 
6096       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6097       gimple *g = gimple_build_cond (NE_EXPR, var,
6098 				     build_zero_cst (TREE_TYPE (var)),
6099 				     NULL_TREE, NULL_TREE);
6100       location_t loc = gimple_location (use_stmt);
6101       gimple_set_location (g, loc);
6102       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6103 
6104       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6105       etrue->probability = profile_probability::even ();
6106       edge efalse = find_edge (cond_bb, then_bb);
6107       efalse->flags = EDGE_FALSE_VALUE;
6108       efalse->probability -= etrue->probability;
6109       then_bb->count -= etrue->count ();
6110 
6111       tree othervar = NULL_TREE;
6112       if (gimple_assign_rhs1 (use_stmt) == var)
6113 	othervar = gimple_assign_rhs2 (use_stmt);
6114       else if (gimple_assign_rhs2 (use_stmt) == var)
6115 	othervar = gimple_assign_rhs1 (use_stmt);
6116       else
6117 	gcc_unreachable ();
6118       tree lhs = gimple_assign_lhs (use_stmt);
6119       gphi *phi = create_phi_node (lhs, merge_bb);
6120       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6121       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6122       gsi = gsi_for_stmt (use_stmt);
6123       gsi_remove (&gsi, true);
6124 
6125       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6126       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6127     }
6128   reassoc_branch_fixups.release ();
6129 }
6130 
6131 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6132 void debug_ops_vector (vec<operand_entry *> ops);
6133 
6134 /* Dump the operand entry vector OPS to FILE.  */
6135 
6136 void
dump_ops_vector(FILE * file,vec<operand_entry * > ops)6137 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6138 {
6139   operand_entry *oe;
6140   unsigned int i;
6141 
6142   FOR_EACH_VEC_ELT (ops, i, oe)
6143     {
6144       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6145       print_generic_expr (file, oe->op);
6146       fprintf (file, "\n");
6147     }
6148 }
6149 
6150 /* Dump the operand entry vector OPS to STDERR.  */
6151 
6152 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry * > ops)6153 debug_ops_vector (vec<operand_entry *> ops)
6154 {
6155   dump_ops_vector (stderr, ops);
6156 }
6157 
6158 /* Bubble up return status from reassociate_bb.  */
6159 
6160 static bool
do_reassoc(void)6161 do_reassoc (void)
6162 {
6163   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6164   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6165 }
6166 
6167 /* Initialize the reassociation pass.  */
6168 
6169 static void
init_reassoc(void)6170 init_reassoc (void)
6171 {
6172   int i;
6173   long rank = 2;
6174   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6175 
6176   /* Find the loops, so that we can prevent moving calculations in
6177      them.  */
6178   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6179 
6180   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6181 
6182   next_operand_entry_id = 0;
6183 
6184   /* Reverse RPO (Reverse Post Order) will give us something where
6185      deeper loops come later.  */
6186   pre_and_rev_post_order_compute (NULL, bbs, false);
6187   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6188   operand_rank = new hash_map<tree, long>;
6189 
6190   /* Give each default definition a distinct rank.  This includes
6191      parameters and the static chain.  Walk backwards over all
6192      SSA names so that we get proper rank ordering according
6193      to tree_swap_operands_p.  */
6194   for (i = num_ssa_names - 1; i > 0; --i)
6195     {
6196       tree name = ssa_name (i);
6197       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6198 	insert_operand_rank (name, ++rank);
6199     }
6200 
6201   /* Set up rank for each BB  */
6202   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6203     bb_rank[bbs[i]] = ++rank << 16;
6204 
6205   free (bbs);
6206   calculate_dominance_info (CDI_POST_DOMINATORS);
6207   plus_negates = vNULL;
6208 }
6209 
6210 /* Cleanup after the reassociation pass, and print stats if
6211    requested.  */
6212 
6213 static void
fini_reassoc(void)6214 fini_reassoc (void)
6215 {
6216   statistics_counter_event (cfun, "Linearized",
6217 			    reassociate_stats.linearized);
6218   statistics_counter_event (cfun, "Constants eliminated",
6219 			    reassociate_stats.constants_eliminated);
6220   statistics_counter_event (cfun, "Ops eliminated",
6221 			    reassociate_stats.ops_eliminated);
6222   statistics_counter_event (cfun, "Statements rewritten",
6223 			    reassociate_stats.rewritten);
6224   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6225 			    reassociate_stats.pows_encountered);
6226   statistics_counter_event (cfun, "Built-in powi calls created",
6227 			    reassociate_stats.pows_created);
6228 
6229   delete operand_rank;
6230   operand_entry_pool.release ();
6231   free (bb_rank);
6232   plus_negates.release ();
6233   free_dominance_info (CDI_POST_DOMINATORS);
6234   loop_optimizer_finalize ();
6235 }
6236 
6237 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
6238    insertion of __builtin_powi calls.
6239 
6240    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6241    optimization of a gimple conditional.  Otherwise returns zero.  */
6242 
6243 static unsigned int
execute_reassoc(bool insert_powi_p)6244 execute_reassoc (bool insert_powi_p)
6245 {
6246   reassoc_insert_powi_p = insert_powi_p;
6247 
6248   init_reassoc ();
6249 
6250   bool cfg_cleanup_needed;
6251   cfg_cleanup_needed = do_reassoc ();
6252   repropagate_negates ();
6253   branch_fixup ();
6254 
6255   fini_reassoc ();
6256   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6257 }
6258 
6259 namespace {
6260 
6261 const pass_data pass_data_reassoc =
6262 {
6263   GIMPLE_PASS, /* type */
6264   "reassoc", /* name */
6265   OPTGROUP_NONE, /* optinfo_flags */
6266   TV_TREE_REASSOC, /* tv_id */
6267   ( PROP_cfg | PROP_ssa ), /* properties_required */
6268   0, /* properties_provided */
6269   0, /* properties_destroyed */
6270   0, /* todo_flags_start */
6271   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6272 };
6273 
6274 class pass_reassoc : public gimple_opt_pass
6275 {
6276 public:
pass_reassoc(gcc::context * ctxt)6277   pass_reassoc (gcc::context *ctxt)
6278     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6279   {}
6280 
6281   /* opt_pass methods: */
clone()6282   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
set_pass_param(unsigned int n,bool param)6283   void set_pass_param (unsigned int n, bool param)
6284     {
6285       gcc_assert (n == 0);
6286       insert_powi_p = param;
6287     }
gate(function *)6288   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
execute(function *)6289   virtual unsigned int execute (function *)
6290     { return execute_reassoc (insert_powi_p); }
6291 
6292  private:
6293   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
6294      point 3a in the pass header comment.  */
6295   bool insert_powi_p;
6296 }; // class pass_reassoc
6297 
6298 } // anon namespace
6299 
6300 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)6301 make_pass_reassoc (gcc::context *ctxt)
6302 {
6303   return new pass_reassoc (ctxt);
6304 }
6305