1 /* Reassociation for trees.
2    Copyright (C) 2005-2018 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dan@dberlin.org>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
55 
56 /*  This is a simple global reassociation pass.  It is, in part, based
57     on the LLVM pass of the same name (They do some things more/less
58     than we do, in different orders, etc).
59 
60     It consists of five steps:
61 
62     1. Breaking up subtract operations into addition + negate, where
63     it would promote the reassociation of adds.
64 
65     2. Left linearization of the expression trees, so that (A+B)+(C+D)
66     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67     During linearization, we place the operands of the binary
68     expressions into a vector of operand_entry_*
69 
70     3. Optimization of the operand lists, eliminating things like a +
71     -a, a & a, etc.
72 
73     3a. Combine repeated factors with the same occurrence counts
74     into a __builtin_powi call that will later be optimized into
75     an optimal number of multiplies.
76 
77     4. Rewrite the expression trees we linearized and optimized so
78     they are in proper rank order.
79 
80     5. Repropagate negates, as nothing else will clean it up ATM.
81 
82     A bit of theory on #4, since nobody seems to write anything down
83     about why it makes sense to do it the way they do it:
84 
85     We could do this much nicer theoretically, but don't (for reasons
86     explained after how to do it theoretically nice :P).
87 
88     In order to promote the most redundancy elimination, you want
89     binary expressions whose operands are the same rank (or
90     preferably, the same value) exposed to the redundancy eliminator,
91     for possible elimination.
92 
93     So the way to do this if we really cared, is to build the new op
94     tree from the leaves to the roots, merging as you go, and putting the
95     new op on the end of the worklist, until you are left with one
96     thing on the worklist.
97 
98     IE if you have to rewrite the following set of operands (listed with
99     rank in parentheses), with opcode PLUS_EXPR:
100 
101     a (1),  b (1),  c (1),  d (2), e (2)
102 
103 
104     We start with our merge worklist empty, and the ops list with all of
105     those on it.
106 
107     You want to first merge all leaves of the same rank, as much as
108     possible.
109 
110     So first build a binary op of
111 
112     mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 
114     Because there is no three operand form of PLUS_EXPR, c is not going to
115     be exposed to redundancy elimination as a rank 1 operand.
116 
117     So you might as well throw it on the merge worklist (you could also
118     consider it to now be a rank two operand, and merge it with d and e,
119     but in this case, you then have evicted e from a binary op. So at
120     least in this situation, you can't win.)
121 
122     Then build a binary op of d + e
123     mergetmp2 = d + e
124 
125     and put mergetmp2 on the merge worklist.
126 
127     so merge worklist = {mergetmp, c, mergetmp2}
128 
129     Continue building binary ops of these operations until you have only
130     one operation left on the worklist.
131 
132     So we have
133 
134     build binary op
135     mergetmp3 = mergetmp + c
136 
137     worklist = {mergetmp2, mergetmp3}
138 
139     mergetmp4 = mergetmp2 + mergetmp3
140 
141     worklist = {mergetmp4}
142 
143     because we have one operation left, we can now just set the original
144     statement equal to the result of that operation.
145 
146     This will at least expose a + b  and d + e to redundancy elimination
147     as binary operations.
148 
149     For extra points, you can reuse the old statements to build the
150     mergetmps, since you shouldn't run out.
151 
152     So why don't we do this?
153 
154     Because it's expensive, and rarely will help.  Most trees we are
155     reassociating have 3 or less ops.  If they have 2 ops, they already
156     will be written into a nice single binary op.  If you have 3 ops, a
157     single simple check suffices to tell you whether the first two are of the
158     same rank.  If so, you know to order it
159 
160     mergetmp = op1 + op2
161     newstmt = mergetmp + op3
162 
163     instead of
164     mergetmp = op2 + op3
165     newstmt = mergetmp + op1
166 
167     If all three are of the same rank, you can't expose them all in a
168     single binary operator anyway, so the above is *still* the best you
169     can do.
170 
171     Thus, this is what we do.  When we have three ops left, we check to see
172     what order to put them in, and call it a day.  As a nod to vector sum
173     reduction, we check if any of the ops are really a phi node that is a
174     destructive update for the associating op, and keep the destructive
175     update together for vector sum reduction recognition.  */
176 
177 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
178    point 3a in the pass header comment.  */
179 static bool reassoc_insert_powi_p;
180 
181 /* Statistics */
182 static struct
183 {
184   int linearized;
185   int constants_eliminated;
186   int ops_eliminated;
187   int rewritten;
188   int pows_encountered;
189   int pows_created;
190 } reassociate_stats;
191 
192 /* Operator, rank pair.  */
193 struct operand_entry
194 {
195   unsigned int rank;
196   unsigned int id;
197   tree op;
198   unsigned int count;
199   gimple *stmt_to_insert;
200 };
201 
202 static object_allocator<operand_entry> operand_entry_pool
203   ("operand entry pool");
204 
205 /* This is used to assign a unique ID to each struct operand_entry
206    so that qsort results are identical on different hosts.  */
207 static unsigned int next_operand_entry_id;
208 
209 /* Starting rank number for a given basic block, so that we can rank
210    operations using unmovable instructions in that BB based on the bb
211    depth.  */
212 static long *bb_rank;
213 
214 /* Operand->rank hashtable.  */
215 static hash_map<tree, long> *operand_rank;
216 
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218    all basic blocks the CFG should be adjusted - basic blocks
219    split right after that SSA_NAME's definition statement and before
220    the only use, which must be a bit ior.  */
221 static vec<tree> reassoc_branch_fixups;
222 
223 /* Forward decls.  */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226 
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228    possibly added by gsi_remove.  */
229 
230 bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 {
233   gimple *stmt = gsi_stmt (*gsi);
234 
235   if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236     return gsi_remove (gsi, true);
237 
238   gimple_stmt_iterator prev = *gsi;
239   gsi_prev (&prev);
240   unsigned uid = gimple_uid (stmt);
241   basic_block bb = gimple_bb (stmt);
242   bool ret = gsi_remove (gsi, true);
243   if (!gsi_end_p (prev))
244     gsi_next (&prev);
245   else
246     prev = gsi_start_bb (bb);
247   gimple *end_stmt = gsi_stmt (*gsi);
248   while ((stmt = gsi_stmt (prev)) != end_stmt)
249     {
250       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251       gimple_set_uid (stmt, uid);
252       gsi_next (&prev);
253     }
254   return ret;
255 }
256 
257 /* Bias amount for loop-carried phis.  We want this to be larger than
258    the depth of any reassociation tree we can see, but not larger than
259    the rank difference between two blocks.  */
260 #define PHI_LOOP_BIAS (1 << 15)
261 
262 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
263    an innermost loop, and the phi has only a single use which is inside
264    the loop, then the rank is the block rank of the loop latch plus an
265    extra bias for the loop-carried dependence.  This causes expressions
266    calculated into an accumulator variable to be independent for each
267    iteration of the loop.  If STMT is some other phi, the rank is the
268    block rank of its containing block.  */
269 static long
phi_rank(gimple * stmt)270 phi_rank (gimple *stmt)
271 {
272   basic_block bb = gimple_bb (stmt);
273   struct loop *father = bb->loop_father;
274   tree res;
275   unsigned i;
276   use_operand_p use;
277   gimple *use_stmt;
278 
279   /* We only care about real loops (those with a latch).  */
280   if (!father->latch)
281     return bb_rank[bb->index];
282 
283   /* Interesting phis must be in headers of innermost loops.  */
284   if (bb != father->header
285       || father->inner)
286     return bb_rank[bb->index];
287 
288   /* Ignore virtual SSA_NAMEs.  */
289   res = gimple_phi_result (stmt);
290   if (virtual_operand_p (res))
291     return bb_rank[bb->index];
292 
293   /* The phi definition must have a single use, and that use must be
294      within the loop.  Otherwise this isn't an accumulator pattern.  */
295   if (!single_imm_use (res, &use, &use_stmt)
296       || gimple_bb (use_stmt)->loop_father != father)
297     return bb_rank[bb->index];
298 
299   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
300   for (i = 0; i < gimple_phi_num_args (stmt); i++)
301     {
302       tree arg = gimple_phi_arg_def (stmt, i);
303       if (TREE_CODE (arg) == SSA_NAME
304 	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 	{
306 	  gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 	  if (gimple_bb (def_stmt)->loop_father == father)
308 	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
309 	}
310     }
311 
312   /* Must be an uninteresting phi.  */
313   return bb_rank[bb->index];
314 }
315 
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317    loop-carried dependence of an innermost loop, return TRUE; else
318    return FALSE.  */
319 static bool
loop_carried_phi(tree exp)320 loop_carried_phi (tree exp)
321 {
322   gimple *phi_stmt;
323   long block_rank;
324 
325   if (TREE_CODE (exp) != SSA_NAME
326       || SSA_NAME_IS_DEFAULT_DEF (exp))
327     return false;
328 
329   phi_stmt = SSA_NAME_DEF_STMT (exp);
330 
331   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332     return false;
333 
334   /* Non-loop-carried phis have block rank.  Loop-carried phis have
335      an additional bias added in.  If this phi doesn't have block rank,
336      it's biased and should not be propagated.  */
337   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338 
339   if (phi_rank (phi_stmt) != block_rank)
340     return true;
341 
342   return false;
343 }
344 
345 /* Return the maximum of RANK and the rank that should be propagated
346    from expression OP.  For most operands, this is just the rank of OP.
347    For loop-carried phis, the value is zero to avoid undoing the bias
348    in favor of the phi.  */
349 static long
propagate_rank(long rank,tree op)350 propagate_rank (long rank, tree op)
351 {
352   long op_rank;
353 
354   if (loop_carried_phi (op))
355     return rank;
356 
357   op_rank = get_rank (op);
358 
359   return MAX (rank, op_rank);
360 }
361 
362 /* Look up the operand rank structure for expression E.  */
363 
364 static inline long
find_operand_rank(tree e)365 find_operand_rank (tree e)
366 {
367   long *slot = operand_rank->get (e);
368   return slot ? *slot : -1;
369 }
370 
371 /* Insert {E,RANK} into the operand rank hashtable.  */
372 
373 static inline void
insert_operand_rank(tree e,long rank)374 insert_operand_rank (tree e, long rank)
375 {
376   gcc_assert (rank > 0);
377   gcc_assert (!operand_rank->put (e, rank));
378 }
379 
380 /* Given an expression E, return the rank of the expression.  */
381 
382 static long
get_rank(tree e)383 get_rank (tree e)
384 {
385   /* SSA_NAME's have the rank of the expression they are the result
386      of.
387      For globals and uninitialized values, the rank is 0.
388      For function arguments, use the pre-setup rank.
389      For PHI nodes, stores, asm statements, etc, we use the rank of
390      the BB.
391      For simple operations, the rank is the maximum rank of any of
392      its operands, or the bb_rank, whichever is less.
393      I make no claims that this is optimal, however, it gives good
394      results.  */
395 
396   /* We make an exception to the normal ranking system to break
397      dependences of accumulator variables in loops.  Suppose we
398      have a simple one-block loop containing:
399 
400        x_1 = phi(x_0, x_2)
401        b = a + x_1
402        c = b + d
403        x_2 = c + e
404 
405      As shown, each iteration of the calculation into x is fully
406      dependent upon the iteration before it.  We would prefer to
407      see this in the form:
408 
409        x_1 = phi(x_0, x_2)
410        b = a + d
411        c = b + e
412        x_2 = c + x_1
413 
414      If the loop is unrolled, the calculations of b and c from
415      different iterations can be interleaved.
416 
417      To obtain this result during reassociation, we bias the rank
418      of the phi definition x_1 upward, when it is recognized as an
419      accumulator pattern.  The artificial rank causes it to be
420      added last, providing the desired independence.  */
421 
422   if (TREE_CODE (e) == SSA_NAME)
423     {
424       ssa_op_iter iter;
425       gimple *stmt;
426       long rank;
427       tree op;
428 
429       if (SSA_NAME_IS_DEFAULT_DEF (e))
430 	return find_operand_rank (e);
431 
432       stmt = SSA_NAME_DEF_STMT (e);
433       if (gimple_code (stmt) == GIMPLE_PHI)
434 	return phi_rank (stmt);
435 
436       if (!is_gimple_assign (stmt))
437 	return bb_rank[gimple_bb (stmt)->index];
438 
439       /* If we already have a rank for this expression, use that.  */
440       rank = find_operand_rank (e);
441       if (rank != -1)
442 	return rank;
443 
444       /* Otherwise, find the maximum rank for the operands.  As an
445 	 exception, remove the bias from loop-carried phis when propagating
446 	 the rank so that dependent operations are not also biased.  */
447       /* Simply walk over all SSA uses - this takes advatage of the
448          fact that non-SSA operands are is_gimple_min_invariant and
449 	 thus have rank 0.  */
450       rank = 0;
451       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 	rank = propagate_rank (rank, op);
453 
454       if (dump_file && (dump_flags & TDF_DETAILS))
455 	{
456 	  fprintf (dump_file, "Rank for ");
457 	  print_generic_expr (dump_file, e);
458 	  fprintf (dump_file, " is %ld\n", (rank + 1));
459 	}
460 
461       /* Note the rank in the hashtable so we don't recompute it.  */
462       insert_operand_rank (e, (rank + 1));
463       return (rank + 1);
464     }
465 
466   /* Constants, globals, etc., are rank 0 */
467   return 0;
468 }
469 
470 
471 /* We want integer ones to end up last no matter what, since they are
472    the ones we can do the most with.  */
473 #define INTEGER_CONST_TYPE 1 << 4
474 #define FLOAT_ONE_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
477 
478 /* Classify an invariant tree into integer, float, or other, so that
479    we can sort them to be near other constants of the same type.  */
480 static inline int
constant_type(tree t)481 constant_type (tree t)
482 {
483   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
484     return INTEGER_CONST_TYPE;
485   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
486     {
487       /* Sort -1.0 and 1.0 constants last, while in some cases
488 	 const_binop can't optimize some inexact operations, multiplication
489 	 by -1.0 or 1.0 can be always merged with others.  */
490       if (real_onep (t) || real_minus_onep (t))
491 	return FLOAT_ONE_CONST_TYPE;
492       return FLOAT_CONST_TYPE;
493     }
494   else
495     return OTHER_CONST_TYPE;
496 }
497 
498 /* qsort comparison function to sort operand entries PA and PB by rank
499    so that the sorted array is ordered by rank in decreasing order.  */
500 static int
sort_by_operand_rank(const void * pa,const void * pb)501 sort_by_operand_rank (const void *pa, const void *pb)
502 {
503   const operand_entry *oea = *(const operand_entry *const *)pa;
504   const operand_entry *oeb = *(const operand_entry *const *)pb;
505 
506   if (oeb->rank != oea->rank)
507     return oeb->rank > oea->rank ? 1 : -1;
508 
509   /* It's nicer for optimize_expression if constants that are likely
510      to fold when added/multiplied/whatever are put next to each
511      other.  Since all constants have rank 0, order them by type.  */
512   if (oea->rank == 0)
513     {
514       if (constant_type (oeb->op) != constant_type (oea->op))
515 	return constant_type (oea->op) - constant_type (oeb->op);
516       else
517 	/* To make sorting result stable, we use unique IDs to determine
518 	   order.  */
519 	return oeb->id > oea->id ? 1 : -1;
520     }
521 
522   if (TREE_CODE (oea->op) != SSA_NAME)
523     {
524       if (TREE_CODE (oeb->op) != SSA_NAME)
525 	return oeb->id > oea->id ? 1 : -1;
526       else
527 	return 1;
528     }
529   else if (TREE_CODE (oeb->op) != SSA_NAME)
530     return -1;
531 
532   /* Lastly, make sure the versions that are the same go next to each
533      other.  */
534   if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
535     {
536       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
537 	 versions of removed SSA_NAMEs, so if possible, prefer to sort
538 	 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
539 	 See PR60418.  */
540       gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
541       gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
542       basic_block bba = gimple_bb (stmta);
543       basic_block bbb = gimple_bb (stmtb);
544       if (bbb != bba)
545 	{
546 	  /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
547 	     but the other might not.  */
548 	  if (!bba)
549 	    return 1;
550 	  if (!bbb)
551 	    return -1;
552 	  /* If neither is, compare bb_rank.  */
553 	  if (bb_rank[bbb->index] != bb_rank[bba->index])
554 	    return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
555 	}
556 
557       bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
558       bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
559       if (da != db)
560 	return da ? 1 : -1;
561 
562       return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
563     }
564 
565   return oeb->id > oea->id ? 1 : -1;
566 }
567 
568 /* Add an operand entry to *OPS for the tree operand OP.  */
569 
570 static void
571 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
572 {
573   operand_entry *oe = operand_entry_pool.allocate ();
574 
575   oe->op = op;
576   oe->rank = get_rank (op);
577   oe->id = next_operand_entry_id++;
578   oe->count = 1;
579   oe->stmt_to_insert = stmt_to_insert;
580   ops->safe_push (oe);
581 }
582 
583 /* Add an operand entry to *OPS for the tree operand OP with repeat
584    count REPEAT.  */
585 
586 static void
add_repeat_to_ops_vec(vec<operand_entry * > * ops,tree op,HOST_WIDE_INT repeat)587 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
588 		       HOST_WIDE_INT repeat)
589 {
590   operand_entry *oe = operand_entry_pool.allocate ();
591 
592   oe->op = op;
593   oe->rank = get_rank (op);
594   oe->id = next_operand_entry_id++;
595   oe->count = repeat;
596   oe->stmt_to_insert = NULL;
597   ops->safe_push (oe);
598 
599   reassociate_stats.pows_encountered++;
600 }
601 
602 /* Return true if STMT is reassociable operation containing a binary
603    operation with tree code CODE, and is inside LOOP.  */
604 
605 static bool
is_reassociable_op(gimple * stmt,enum tree_code code,struct loop * loop)606 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
607 {
608   basic_block bb = gimple_bb (stmt);
609 
610   if (gimple_bb (stmt) == NULL)
611     return false;
612 
613   if (!flow_bb_inside_loop_p (loop, bb))
614     return false;
615 
616   if (is_gimple_assign (stmt)
617       && gimple_assign_rhs_code (stmt) == code
618       && has_single_use (gimple_assign_lhs (stmt)))
619     {
620       tree rhs1 = gimple_assign_rhs1 (stmt);
621       tree rhs2 = gimple_assign_rhs1 (stmt);
622       if (TREE_CODE (rhs1) == SSA_NAME
623 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
624 	return false;
625       if (rhs2
626 	  && TREE_CODE (rhs2) == SSA_NAME
627 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
628 	return false;
629       return true;
630     }
631 
632   return false;
633 }
634 
635 
636 /* Return true if STMT is a nop-conversion.  */
637 
638 static bool
gimple_nop_conversion_p(gimple * stmt)639 gimple_nop_conversion_p (gimple *stmt)
640 {
641   if (gassign *ass = dyn_cast <gassign *> (stmt))
642     {
643       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
644 	  && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
645 				    TREE_TYPE (gimple_assign_rhs1 (ass))))
646 	return true;
647     }
648   return false;
649 }
650 
651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
652    operand of the negate operation.  Otherwise, return NULL.  */
653 
654 static tree
get_unary_op(tree name,enum tree_code opcode)655 get_unary_op (tree name, enum tree_code opcode)
656 {
657   gimple *stmt = SSA_NAME_DEF_STMT (name);
658 
659   /* Look through nop conversions (sign changes).  */
660   if (gimple_nop_conversion_p (stmt)
661       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
662     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
663 
664   if (!is_gimple_assign (stmt))
665     return NULL_TREE;
666 
667   if (gimple_assign_rhs_code (stmt) == opcode)
668     return gimple_assign_rhs1 (stmt);
669   return NULL_TREE;
670 }
671 
672 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
673 
674 static bool
ops_equal_values_p(tree op1,tree op2)675 ops_equal_values_p (tree op1, tree op2)
676 {
677   if (op1 == op2)
678     return true;
679 
680   tree orig_op1 = op1;
681   if (TREE_CODE (op1) == SSA_NAME)
682     {
683       gimple *stmt = SSA_NAME_DEF_STMT (op1);
684       if (gimple_nop_conversion_p (stmt))
685 	{
686 	  op1 = gimple_assign_rhs1 (stmt);
687 	  if (op1 == op2)
688 	    return true;
689 	}
690     }
691 
692   if (TREE_CODE (op2) == SSA_NAME)
693     {
694       gimple *stmt = SSA_NAME_DEF_STMT (op2);
695       if (gimple_nop_conversion_p (stmt))
696 	{
697 	  op2 = gimple_assign_rhs1 (stmt);
698 	  if (op1 == op2
699 	      || orig_op1 == op2)
700 	    return true;
701 	}
702     }
703 
704   return false;
705 }
706 
707 
708 /* If CURR and LAST are a pair of ops that OPCODE allows us to
709    eliminate through equivalences, do so, remove them from OPS, and
710    return true.  Otherwise, return false.  */
711 
712 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry * > * ops,bool * all_done,unsigned int i,operand_entry * curr,operand_entry * last)713 eliminate_duplicate_pair (enum tree_code opcode,
714 			  vec<operand_entry *> *ops,
715 			  bool *all_done,
716 			  unsigned int i,
717 			  operand_entry *curr,
718 			  operand_entry *last)
719 {
720 
721   /* If we have two of the same op, and the opcode is & |, min, or max,
722      we can eliminate one of them.
723      If we have two of the same op, and the opcode is ^, we can
724      eliminate both of them.  */
725 
726   if (last && last->op == curr->op)
727     {
728       switch (opcode)
729 	{
730 	case MAX_EXPR:
731 	case MIN_EXPR:
732 	case BIT_IOR_EXPR:
733 	case BIT_AND_EXPR:
734 	  if (dump_file && (dump_flags & TDF_DETAILS))
735 	    {
736 	      fprintf (dump_file, "Equivalence: ");
737 	      print_generic_expr (dump_file, curr->op);
738 	      fprintf (dump_file, " [&|minmax] ");
739 	      print_generic_expr (dump_file, last->op);
740 	      fprintf (dump_file, " -> ");
741 	      print_generic_stmt (dump_file, last->op);
742 	    }
743 
744 	  ops->ordered_remove (i);
745 	  reassociate_stats.ops_eliminated ++;
746 
747 	  return true;
748 
749 	case BIT_XOR_EXPR:
750 	  if (dump_file && (dump_flags & TDF_DETAILS))
751 	    {
752 	      fprintf (dump_file, "Equivalence: ");
753 	      print_generic_expr (dump_file, curr->op);
754 	      fprintf (dump_file, " ^ ");
755 	      print_generic_expr (dump_file, last->op);
756 	      fprintf (dump_file, " -> nothing\n");
757 	    }
758 
759 	  reassociate_stats.ops_eliminated += 2;
760 
761 	  if (ops->length () == 2)
762 	    {
763 	      ops->truncate (0);
764 	      add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
765 	      *all_done = true;
766 	    }
767 	  else
768 	    {
769 	      ops->ordered_remove (i-1);
770 	      ops->ordered_remove (i-1);
771 	    }
772 
773 	  return true;
774 
775 	default:
776 	  break;
777 	}
778     }
779   return false;
780 }
781 
782 static vec<tree> plus_negates;
783 
784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
785    expression, look in OPS for a corresponding positive operation to cancel
786    it out.  If we find one, remove the other from OPS, replace
787    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
788    return false. */
789 
790 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)791 eliminate_plus_minus_pair (enum tree_code opcode,
792 			   vec<operand_entry *> *ops,
793 			   unsigned int currindex,
794 			   operand_entry *curr)
795 {
796   tree negateop;
797   tree notop;
798   unsigned int i;
799   operand_entry *oe;
800 
801   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
802     return false;
803 
804   negateop = get_unary_op (curr->op, NEGATE_EXPR);
805   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
806   if (negateop == NULL_TREE && notop == NULL_TREE)
807     return false;
808 
809   /* Any non-negated version will have a rank that is one less than
810      the current rank.  So once we hit those ranks, if we don't find
811      one, we can stop.  */
812 
813   for (i = currindex + 1;
814        ops->iterate (i, &oe)
815        && oe->rank >= curr->rank - 1 ;
816        i++)
817     {
818       if (negateop
819 	  && ops_equal_values_p (oe->op, negateop))
820 	{
821 	  if (dump_file && (dump_flags & TDF_DETAILS))
822 	    {
823 	      fprintf (dump_file, "Equivalence: ");
824 	      print_generic_expr (dump_file, negateop);
825 	      fprintf (dump_file, " + -");
826 	      print_generic_expr (dump_file, oe->op);
827 	      fprintf (dump_file, " -> 0\n");
828 	    }
829 
830 	  ops->ordered_remove (i);
831 	  add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
832 	  ops->ordered_remove (currindex);
833 	  reassociate_stats.ops_eliminated ++;
834 
835 	  return true;
836 	}
837       else if (notop
838 	       && ops_equal_values_p (oe->op, notop))
839 	{
840 	  tree op_type = TREE_TYPE (oe->op);
841 
842 	  if (dump_file && (dump_flags & TDF_DETAILS))
843 	    {
844 	      fprintf (dump_file, "Equivalence: ");
845 	      print_generic_expr (dump_file, notop);
846 	      fprintf (dump_file, " + ~");
847 	      print_generic_expr (dump_file, oe->op);
848 	      fprintf (dump_file, " -> -1\n");
849 	    }
850 
851 	  ops->ordered_remove (i);
852 	  add_to_ops_vec (ops, build_all_ones_cst (op_type));
853 	  ops->ordered_remove (currindex);
854 	  reassociate_stats.ops_eliminated ++;
855 
856 	  return true;
857 	}
858     }
859 
860   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
861      save it for later inspection in repropagate_negates().  */
862   if (negateop != NULL_TREE
863       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
864     plus_negates.safe_push (curr->op);
865 
866   return false;
867 }
868 
869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
870    bitwise not expression, look in OPS for a corresponding operand to
871    cancel it out.  If we find one, remove the other from OPS, replace
872    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
873    false. */
874 
875 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)876 eliminate_not_pairs (enum tree_code opcode,
877 		     vec<operand_entry *> *ops,
878 		     unsigned int currindex,
879 		     operand_entry *curr)
880 {
881   tree notop;
882   unsigned int i;
883   operand_entry *oe;
884 
885   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
886       || TREE_CODE (curr->op) != SSA_NAME)
887     return false;
888 
889   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
890   if (notop == NULL_TREE)
891     return false;
892 
893   /* Any non-not version will have a rank that is one less than
894      the current rank.  So once we hit those ranks, if we don't find
895      one, we can stop.  */
896 
897   for (i = currindex + 1;
898        ops->iterate (i, &oe)
899        && oe->rank >= curr->rank - 1;
900        i++)
901     {
902       if (oe->op == notop)
903 	{
904 	  if (dump_file && (dump_flags & TDF_DETAILS))
905 	    {
906 	      fprintf (dump_file, "Equivalence: ");
907 	      print_generic_expr (dump_file, notop);
908 	      if (opcode == BIT_AND_EXPR)
909 		fprintf (dump_file, " & ~");
910 	      else if (opcode == BIT_IOR_EXPR)
911 		fprintf (dump_file, " | ~");
912 	      print_generic_expr (dump_file, oe->op);
913 	      if (opcode == BIT_AND_EXPR)
914 		fprintf (dump_file, " -> 0\n");
915 	      else if (opcode == BIT_IOR_EXPR)
916 		fprintf (dump_file, " -> -1\n");
917 	    }
918 
919 	  if (opcode == BIT_AND_EXPR)
920 	    oe->op = build_zero_cst (TREE_TYPE (oe->op));
921 	  else if (opcode == BIT_IOR_EXPR)
922 	    oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
923 
924 	  reassociate_stats.ops_eliminated += ops->length () - 1;
925 	  ops->truncate (0);
926 	  ops->quick_push (oe);
927 	  return true;
928 	}
929     }
930 
931   return false;
932 }
933 
934 /* Use constant value that may be present in OPS to try to eliminate
935    operands.  Note that this function is only really used when we've
936    eliminated ops for other reasons, or merged constants.  Across
937    single statements, fold already does all of this, plus more.  There
938    is little point in duplicating logic, so I've only included the
939    identities that I could ever construct testcases to trigger.  */
940 
941 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry * > * ops)942 eliminate_using_constants (enum tree_code opcode,
943 			   vec<operand_entry *> *ops)
944 {
945   operand_entry *oelast = ops->last ();
946   tree type = TREE_TYPE (oelast->op);
947 
948   if (oelast->rank == 0
949       && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
950     {
951       switch (opcode)
952 	{
953 	case BIT_AND_EXPR:
954 	  if (integer_zerop (oelast->op))
955 	    {
956 	      if (ops->length () != 1)
957 		{
958 		  if (dump_file && (dump_flags & TDF_DETAILS))
959 		    fprintf (dump_file, "Found & 0, removing all other ops\n");
960 
961 		  reassociate_stats.ops_eliminated += ops->length () - 1;
962 
963 		  ops->truncate (0);
964 		  ops->quick_push (oelast);
965 		  return;
966 		}
967 	    }
968 	  else if (integer_all_onesp (oelast->op))
969 	    {
970 	      if (ops->length () != 1)
971 		{
972 		  if (dump_file && (dump_flags & TDF_DETAILS))
973 		    fprintf (dump_file, "Found & -1, removing\n");
974 		  ops->pop ();
975 		  reassociate_stats.ops_eliminated++;
976 		}
977 	    }
978 	  break;
979 	case BIT_IOR_EXPR:
980 	  if (integer_all_onesp (oelast->op))
981 	    {
982 	      if (ops->length () != 1)
983 		{
984 		  if (dump_file && (dump_flags & TDF_DETAILS))
985 		    fprintf (dump_file, "Found | -1, removing all other ops\n");
986 
987 		  reassociate_stats.ops_eliminated += ops->length () - 1;
988 
989 		  ops->truncate (0);
990 		  ops->quick_push (oelast);
991 		  return;
992 		}
993 	    }
994 	  else if (integer_zerop (oelast->op))
995 	    {
996 	      if (ops->length () != 1)
997 		{
998 		  if (dump_file && (dump_flags & TDF_DETAILS))
999 		    fprintf (dump_file, "Found | 0, removing\n");
1000 		  ops->pop ();
1001 		  reassociate_stats.ops_eliminated++;
1002 		}
1003 	    }
1004 	  break;
1005 	case MULT_EXPR:
1006 	  if (integer_zerop (oelast->op)
1007 	      || (FLOAT_TYPE_P (type)
1008 		  && !HONOR_NANS (type)
1009 		  && !HONOR_SIGNED_ZEROS (type)
1010 		  && real_zerop (oelast->op)))
1011 	    {
1012 	      if (ops->length () != 1)
1013 		{
1014 		  if (dump_file && (dump_flags & TDF_DETAILS))
1015 		    fprintf (dump_file, "Found * 0, removing all other ops\n");
1016 
1017 		  reassociate_stats.ops_eliminated += ops->length () - 1;
1018 		  ops->truncate (0);
1019 		  ops->quick_push (oelast);
1020 		  return;
1021 		}
1022 	    }
1023 	  else if (integer_onep (oelast->op)
1024 		   || (FLOAT_TYPE_P (type)
1025 		       && !HONOR_SNANS (type)
1026 		       && real_onep (oelast->op)))
1027 	    {
1028 	      if (ops->length () != 1)
1029 		{
1030 		  if (dump_file && (dump_flags & TDF_DETAILS))
1031 		    fprintf (dump_file, "Found * 1, removing\n");
1032 		  ops->pop ();
1033 		  reassociate_stats.ops_eliminated++;
1034 		  return;
1035 		}
1036 	    }
1037 	  break;
1038 	case BIT_XOR_EXPR:
1039 	case PLUS_EXPR:
1040 	case MINUS_EXPR:
1041 	  if (integer_zerop (oelast->op)
1042 	      || (FLOAT_TYPE_P (type)
1043 		  && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1044 		  && fold_real_zero_addition_p (type, oelast->op,
1045 						opcode == MINUS_EXPR)))
1046 	    {
1047 	      if (ops->length () != 1)
1048 		{
1049 		  if (dump_file && (dump_flags & TDF_DETAILS))
1050 		    fprintf (dump_file, "Found [|^+] 0, removing\n");
1051 		  ops->pop ();
1052 		  reassociate_stats.ops_eliminated++;
1053 		  return;
1054 		}
1055 	    }
1056 	  break;
1057 	default:
1058 	  break;
1059 	}
1060     }
1061 }
1062 
1063 
1064 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1065 				 bool, bool);
1066 
1067 /* Structure for tracking and counting operands.  */
1068 struct oecount {
1069   unsigned int cnt;
1070   unsigned int id;
1071   enum tree_code oecode;
1072   tree op;
1073 };
1074 
1075 
1076 /* The heap for the oecount hashtable and the sorted list of operands.  */
1077 static vec<oecount> cvec;
1078 
1079 
1080 /* Oecount hashtable helpers.  */
1081 
1082 struct oecount_hasher : int_hash <int, 0, 1>
1083 {
1084   static inline hashval_t hash (int);
1085   static inline bool equal (int, int);
1086 };
1087 
1088 /* Hash function for oecount.  */
1089 
1090 inline hashval_t
hash(int p)1091 oecount_hasher::hash (int p)
1092 {
1093   const oecount *c = &cvec[p - 42];
1094   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1095 }
1096 
1097 /* Comparison function for oecount.  */
1098 
1099 inline bool
equal(int p1,int p2)1100 oecount_hasher::equal (int p1, int p2)
1101 {
1102   const oecount *c1 = &cvec[p1 - 42];
1103   const oecount *c2 = &cvec[p2 - 42];
1104   return c1->oecode == c2->oecode && c1->op == c2->op;
1105 }
1106 
1107 /* Comparison function for qsort sorting oecount elements by count.  */
1108 
1109 static int
oecount_cmp(const void * p1,const void * p2)1110 oecount_cmp (const void *p1, const void *p2)
1111 {
1112   const oecount *c1 = (const oecount *)p1;
1113   const oecount *c2 = (const oecount *)p2;
1114   if (c1->cnt != c2->cnt)
1115     return c1->cnt > c2->cnt ? 1 : -1;
1116   else
1117     /* If counts are identical, use unique IDs to stabilize qsort.  */
1118     return c1->id > c2->id ? 1 : -1;
1119 }
1120 
1121 /* Return TRUE iff STMT represents a builtin call that raises OP
1122    to some exponent.  */
1123 
1124 static bool
stmt_is_power_of_op(gimple * stmt,tree op)1125 stmt_is_power_of_op (gimple *stmt, tree op)
1126 {
1127   if (!is_gimple_call (stmt))
1128     return false;
1129 
1130   switch (gimple_call_combined_fn (stmt))
1131     {
1132     CASE_CFN_POW:
1133     CASE_CFN_POWI:
1134       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1135 
1136     default:
1137       return false;
1138     }
1139 }
1140 
1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1142    in place and return the result.  Assumes that stmt_is_power_of_op
1143    was previously called for STMT and returned TRUE.  */
1144 
1145 static HOST_WIDE_INT
decrement_power(gimple * stmt)1146 decrement_power (gimple *stmt)
1147 {
1148   REAL_VALUE_TYPE c, cint;
1149   HOST_WIDE_INT power;
1150   tree arg1;
1151 
1152   switch (gimple_call_combined_fn (stmt))
1153     {
1154     CASE_CFN_POW:
1155       arg1 = gimple_call_arg (stmt, 1);
1156       c = TREE_REAL_CST (arg1);
1157       power = real_to_integer (&c) - 1;
1158       real_from_integer (&cint, VOIDmode, power, SIGNED);
1159       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1160       return power;
1161 
1162     CASE_CFN_POWI:
1163       arg1 = gimple_call_arg (stmt, 1);
1164       power = TREE_INT_CST_LOW (arg1) - 1;
1165       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1166       return power;
1167 
1168     default:
1169       gcc_unreachable ();
1170     }
1171 }
1172 
1173 /* Replace SSA defined by STMT and replace all its uses with new
1174    SSA.  Also return the new SSA.  */
1175 
1176 static tree
make_new_ssa_for_def(gimple * stmt,enum tree_code opcode,tree op)1177 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1178 {
1179   gimple *use_stmt;
1180   use_operand_p use;
1181   imm_use_iterator iter;
1182   tree new_lhs, new_debug_lhs = NULL_TREE;
1183   tree lhs = gimple_get_lhs (stmt);
1184 
1185   new_lhs = make_ssa_name (TREE_TYPE (lhs));
1186   gimple_set_lhs (stmt, new_lhs);
1187 
1188   /* Also need to update GIMPLE_DEBUGs.  */
1189   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1190     {
1191       tree repl = new_lhs;
1192       if (is_gimple_debug (use_stmt))
1193 	{
1194 	  if (new_debug_lhs == NULL_TREE)
1195 	    {
1196 	      new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1197 	      gdebug *def_temp
1198 		= gimple_build_debug_bind (new_debug_lhs,
1199 					   build2 (opcode, TREE_TYPE (lhs),
1200 						   new_lhs, op),
1201 					   stmt);
1202 	      DECL_ARTIFICIAL (new_debug_lhs) = 1;
1203 	      TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1204 	      SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1205 	      gimple_set_uid (def_temp, gimple_uid (stmt));
1206 	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207 	      gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1208 	    }
1209 	  repl = new_debug_lhs;
1210 	}
1211       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1212 	SET_USE (use, repl);
1213       update_stmt (use_stmt);
1214     }
1215   return new_lhs;
1216 }
1217 
1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219    uses with new SSAs.  Also do this for the stmt that defines DEF
1220    if *DEF is not OP.  */
1221 
1222 static void
make_new_ssa_for_all_defs(tree * def,enum tree_code opcode,tree op,vec<gimple * > & stmts_to_fix)1223 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1224 			   vec<gimple *> &stmts_to_fix)
1225 {
1226   unsigned i;
1227   gimple *stmt;
1228 
1229   if (*def != op
1230       && TREE_CODE (*def) == SSA_NAME
1231       && (stmt = SSA_NAME_DEF_STMT (*def))
1232       && gimple_code (stmt) != GIMPLE_NOP)
1233     *def = make_new_ssa_for_def (stmt, opcode, op);
1234 
1235   FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1236     make_new_ssa_for_def (stmt, opcode, op);
1237 }
1238 
1239 /* Find the single immediate use of STMT's LHS, and replace it
1240    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1241    replace *DEF with OP as well.  */
1242 
1243 static void
propagate_op_to_single_use(tree op,gimple * stmt,tree * def)1244 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1245 {
1246   tree lhs;
1247   gimple *use_stmt;
1248   use_operand_p use;
1249   gimple_stmt_iterator gsi;
1250 
1251   if (is_gimple_call (stmt))
1252     lhs = gimple_call_lhs (stmt);
1253   else
1254     lhs = gimple_assign_lhs (stmt);
1255 
1256   gcc_assert (has_single_use (lhs));
1257   single_imm_use (lhs, &use, &use_stmt);
1258   if (lhs == *def)
1259     *def = op;
1260   SET_USE (use, op);
1261   if (TREE_CODE (op) != SSA_NAME)
1262     update_stmt (use_stmt);
1263   gsi = gsi_for_stmt (stmt);
1264   unlink_stmt_vdef (stmt);
1265   reassoc_remove_stmt (&gsi);
1266   release_defs (stmt);
1267 }
1268 
1269 /* Walks the linear chain with result *DEF searching for an operation
1270    with operand OP and code OPCODE removing that from the chain.  *DEF
1271    is updated if there is only one operand but no operation left.  */
1272 
1273 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1274 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1275 {
1276   tree orig_def = *def;
1277   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1278   /* PR72835 - Record the stmt chain that has to be updated such that
1279      we dont use the same LHS when the values computed are different.  */
1280   auto_vec<gimple *, 64> stmts_to_fix;
1281 
1282   do
1283     {
1284       tree name;
1285 
1286       if (opcode == MULT_EXPR)
1287 	{
1288 	  if (stmt_is_power_of_op (stmt, op))
1289 	    {
1290 	      if (decrement_power (stmt) == 1)
1291 		{
1292 		  if (stmts_to_fix.length () > 0)
1293 		    stmts_to_fix.pop ();
1294 		  propagate_op_to_single_use (op, stmt, def);
1295 		}
1296 	      break;
1297 	    }
1298 	  else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1299 	    {
1300 	      if (gimple_assign_rhs1 (stmt) == op)
1301 		{
1302 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1303 		  if (stmts_to_fix.length () > 0)
1304 		    stmts_to_fix.pop ();
1305 		  propagate_op_to_single_use (cst, stmt, def);
1306 		  break;
1307 		}
1308 	      else if (integer_minus_onep (op)
1309 		       || real_minus_onep (op))
1310 		{
1311 		  gimple_assign_set_rhs_code
1312 		    (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1313 		  break;
1314 		}
1315 	    }
1316 	}
1317 
1318       name = gimple_assign_rhs1 (stmt);
1319 
1320       /* If this is the operation we look for and one of the operands
1321          is ours simply propagate the other operand into the stmts
1322 	 single use.  */
1323       if (gimple_assign_rhs_code (stmt) == opcode
1324 	  && (name == op
1325 	      || gimple_assign_rhs2 (stmt) == op))
1326 	{
1327 	  if (name == op)
1328 	    name = gimple_assign_rhs2 (stmt);
1329 	  if (stmts_to_fix.length () > 0)
1330 	    stmts_to_fix.pop ();
1331 	  propagate_op_to_single_use (name, stmt, def);
1332 	  break;
1333 	}
1334 
1335       /* We might have a multiply of two __builtin_pow* calls, and
1336 	 the operand might be hiding in the rightmost one.  Likewise
1337 	 this can happen for a negate.  */
1338       if (opcode == MULT_EXPR
1339 	  && gimple_assign_rhs_code (stmt) == opcode
1340 	  && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1341 	  && has_single_use (gimple_assign_rhs2 (stmt)))
1342 	{
1343 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1344 	  if (stmt_is_power_of_op (stmt2, op))
1345 	    {
1346 	      if (decrement_power (stmt2) == 1)
1347 		propagate_op_to_single_use (op, stmt2, def);
1348 	      else
1349 		stmts_to_fix.safe_push (stmt2);
1350 	      break;
1351 	    }
1352 	  else if (is_gimple_assign (stmt2)
1353 		   && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1354 	    {
1355 	      if (gimple_assign_rhs1 (stmt2) == op)
1356 		{
1357 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1358 		  propagate_op_to_single_use (cst, stmt2, def);
1359 		  break;
1360 		}
1361 	      else if (integer_minus_onep (op)
1362 		       || real_minus_onep (op))
1363 		{
1364 		  stmts_to_fix.safe_push (stmt2);
1365 		  gimple_assign_set_rhs_code
1366 		    (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1367 		  break;
1368 		}
1369 	    }
1370 	}
1371 
1372       /* Continue walking the chain.  */
1373       gcc_assert (name != op
1374 		  && TREE_CODE (name) == SSA_NAME);
1375       stmt = SSA_NAME_DEF_STMT (name);
1376       stmts_to_fix.safe_push (stmt);
1377     }
1378   while (1);
1379 
1380   if (stmts_to_fix.length () > 0 || *def == orig_def)
1381     make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1382 }
1383 
1384 /* Returns true if statement S1 dominates statement S2.  Like
1385    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1386 
1387 static bool
reassoc_stmt_dominates_stmt_p(gimple * s1,gimple * s2)1388 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1389 {
1390   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1391 
1392   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393      SSA_NAME.  Assume it lives at the beginning of function and
1394      thus dominates everything.  */
1395   if (!bb1 || s1 == s2)
1396     return true;
1397 
1398   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1399   if (!bb2)
1400     return false;
1401 
1402   if (bb1 == bb2)
1403     {
1404       /* PHIs in the same basic block are assumed to be
1405 	 executed all in parallel, if only one stmt is a PHI,
1406 	 it dominates the other stmt in the same basic block.  */
1407       if (gimple_code (s1) == GIMPLE_PHI)
1408 	return true;
1409 
1410       if (gimple_code (s2) == GIMPLE_PHI)
1411 	return false;
1412 
1413       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1414 
1415       if (gimple_uid (s1) < gimple_uid (s2))
1416 	return true;
1417 
1418       if (gimple_uid (s1) > gimple_uid (s2))
1419 	return false;
1420 
1421       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1422       unsigned int uid = gimple_uid (s1);
1423       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1424 	{
1425 	  gimple *s = gsi_stmt (gsi);
1426 	  if (gimple_uid (s) != uid)
1427 	    break;
1428 	  if (s == s2)
1429 	    return true;
1430 	}
1431 
1432       return false;
1433     }
1434 
1435   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1436 }
1437 
1438 /* Insert STMT after INSERT_POINT.  */
1439 
1440 static void
insert_stmt_after(gimple * stmt,gimple * insert_point)1441 insert_stmt_after (gimple *stmt, gimple *insert_point)
1442 {
1443   gimple_stmt_iterator gsi;
1444   basic_block bb;
1445 
1446   if (gimple_code (insert_point) == GIMPLE_PHI)
1447     bb = gimple_bb (insert_point);
1448   else if (!stmt_ends_bb_p (insert_point))
1449     {
1450       gsi = gsi_for_stmt (insert_point);
1451       gimple_set_uid (stmt, gimple_uid (insert_point));
1452       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1453       return;
1454     }
1455   else
1456     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457        thus if it must end a basic block, it should be a call that can
1458        throw, or some assignment that can throw.  If it throws, the LHS
1459        of it will not be initialized though, so only valid places using
1460        the SSA_NAME should be dominated by the fallthru edge.  */
1461     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1462   gsi = gsi_after_labels (bb);
1463   if (gsi_end_p (gsi))
1464     {
1465       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1466       gimple_set_uid (stmt,
1467 		      gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1468     }
1469   else
1470     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1471   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1472 }
1473 
1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475    the result.  Places the statement after the definition of either
1476    OP1 or OP2.  Returns the new statement.  */
1477 
1478 static gimple *
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1479 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1480 {
1481   gimple *op1def = NULL, *op2def = NULL;
1482   gimple_stmt_iterator gsi;
1483   tree op;
1484   gassign *sum;
1485 
1486   /* Create the addition statement.  */
1487   op = make_ssa_name (type);
1488   sum = gimple_build_assign (op, opcode, op1, op2);
1489 
1490   /* Find an insertion place and insert.  */
1491   if (TREE_CODE (op1) == SSA_NAME)
1492     op1def = SSA_NAME_DEF_STMT (op1);
1493   if (TREE_CODE (op2) == SSA_NAME)
1494     op2def = SSA_NAME_DEF_STMT (op2);
1495   if ((!op1def || gimple_nop_p (op1def))
1496       && (!op2def || gimple_nop_p (op2def)))
1497     {
1498       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1499       if (gsi_end_p (gsi))
1500 	{
1501 	  gimple_stmt_iterator gsi2
1502 	    = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1503 	  gimple_set_uid (sum,
1504 			  gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1505 	}
1506       else
1507 	gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1508       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1509     }
1510   else
1511     {
1512       gimple *insert_point;
1513       if ((!op1def || gimple_nop_p (op1def))
1514 	   || (op2def && !gimple_nop_p (op2def)
1515 	       && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1516 	insert_point = op2def;
1517       else
1518 	insert_point = op1def;
1519       insert_stmt_after (sum, insert_point);
1520     }
1521   update_stmt (sum);
1522 
1523   return sum;
1524 }
1525 
1526 /* Perform un-distribution of divisions and multiplications.
1527    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528    to (A + B) / X for real X.
1529 
1530    The algorithm is organized as follows.
1531 
1532     - First we walk the addition chain *OPS looking for summands that
1533       are defined by a multiplication or a real division.  This results
1534       in the candidates bitmap with relevant indices into *OPS.
1535 
1536     - Second we build the chains of multiplications or divisions for
1537       these candidates, counting the number of occurrences of (operand, code)
1538       pairs in all of the candidates chains.
1539 
1540     - Third we sort the (operand, code) pairs by number of occurrence and
1541       process them starting with the pair with the most uses.
1542 
1543       * For each such pair we walk the candidates again to build a
1544         second candidate bitmap noting all multiplication/division chains
1545 	that have at least one occurrence of (operand, code).
1546 
1547       * We build an alternate addition chain only covering these
1548         candidates with one (operand, code) operation removed from their
1549 	multiplication/division chain.
1550 
1551       * The first candidate gets replaced by the alternate addition chain
1552         multiplied/divided by the operand.
1553 
1554       * All candidate chains get disabled for further processing and
1555         processing of (operand, code) pairs continues.
1556 
1557   The alternate addition chains built are re-processed by the main
1558   reassociation algorithm which allows optimizing a * x * y + b * y * x
1559   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1560 
1561 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry * > * ops,struct loop * loop)1562 undistribute_ops_list (enum tree_code opcode,
1563 		       vec<operand_entry *> *ops, struct loop *loop)
1564 {
1565   unsigned int length = ops->length ();
1566   operand_entry *oe1;
1567   unsigned i, j;
1568   unsigned nr_candidates, nr_candidates2;
1569   sbitmap_iterator sbi0;
1570   vec<operand_entry *> *subops;
1571   bool changed = false;
1572   unsigned int next_oecount_id = 0;
1573 
1574   if (length <= 1
1575       || opcode != PLUS_EXPR)
1576     return false;
1577 
1578   /* Build a list of candidates to process.  */
1579   auto_sbitmap candidates (length);
1580   bitmap_clear (candidates);
1581   nr_candidates = 0;
1582   FOR_EACH_VEC_ELT (*ops, i, oe1)
1583     {
1584       enum tree_code dcode;
1585       gimple *oe1def;
1586 
1587       if (TREE_CODE (oe1->op) != SSA_NAME)
1588 	continue;
1589       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1590       if (!is_gimple_assign (oe1def))
1591 	continue;
1592       dcode = gimple_assign_rhs_code (oe1def);
1593       if ((dcode != MULT_EXPR
1594 	   && dcode != RDIV_EXPR)
1595 	  || !is_reassociable_op (oe1def, dcode, loop))
1596 	continue;
1597 
1598       bitmap_set_bit (candidates, i);
1599       nr_candidates++;
1600     }
1601 
1602   if (nr_candidates < 2)
1603     return false;
1604 
1605   if (dump_file && (dump_flags & TDF_DETAILS))
1606     {
1607       fprintf (dump_file, "searching for un-distribute opportunities ");
1608       print_generic_expr (dump_file,
1609 	(*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1610       fprintf (dump_file, " %d\n", nr_candidates);
1611     }
1612 
1613   /* Build linearized sub-operand lists and the counting table.  */
1614   cvec.create (0);
1615 
1616   hash_table<oecount_hasher> ctable (15);
1617 
1618   /* ??? Macro arguments cannot have multi-argument template types in
1619      them.  This typedef is needed to workaround that limitation.  */
1620   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1621   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1622   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1623     {
1624       gimple *oedef;
1625       enum tree_code oecode;
1626       unsigned j;
1627 
1628       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1629       oecode = gimple_assign_rhs_code (oedef);
1630       linearize_expr_tree (&subops[i], oedef,
1631 			   associative_tree_code (oecode), false);
1632 
1633       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1634 	{
1635 	  oecount c;
1636 	  int *slot;
1637 	  int idx;
1638 	  c.oecode = oecode;
1639 	  c.cnt = 1;
1640 	  c.id = next_oecount_id++;
1641 	  c.op = oe1->op;
1642 	  cvec.safe_push (c);
1643 	  idx = cvec.length () + 41;
1644 	  slot = ctable.find_slot (idx, INSERT);
1645 	  if (!*slot)
1646 	    {
1647 	      *slot = idx;
1648 	    }
1649 	  else
1650 	    {
1651 	      cvec.pop ();
1652 	      cvec[*slot - 42].cnt++;
1653 	    }
1654 	}
1655     }
1656 
1657   /* Sort the counting table.  */
1658   cvec.qsort (oecount_cmp);
1659 
1660   if (dump_file && (dump_flags & TDF_DETAILS))
1661     {
1662       oecount *c;
1663       fprintf (dump_file, "Candidates:\n");
1664       FOR_EACH_VEC_ELT (cvec, j, c)
1665 	{
1666 	  fprintf (dump_file, "  %u %s: ", c->cnt,
1667 		   c->oecode == MULT_EXPR
1668 		   ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1669 	  print_generic_expr (dump_file, c->op);
1670 	  fprintf (dump_file, "\n");
1671 	}
1672     }
1673 
1674   /* Process the (operand, code) pairs in order of most occurrence.  */
1675   auto_sbitmap candidates2 (length);
1676   while (!cvec.is_empty ())
1677     {
1678       oecount *c = &cvec.last ();
1679       if (c->cnt < 2)
1680 	break;
1681 
1682       /* Now collect the operands in the outer chain that contain
1683          the common operand in their inner chain.  */
1684       bitmap_clear (candidates2);
1685       nr_candidates2 = 0;
1686       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1687 	{
1688 	  gimple *oedef;
1689 	  enum tree_code oecode;
1690 	  unsigned j;
1691 	  tree op = (*ops)[i]->op;
1692 
1693 	  /* If we undistributed in this chain already this may be
1694 	     a constant.  */
1695 	  if (TREE_CODE (op) != SSA_NAME)
1696 	    continue;
1697 
1698 	  oedef = SSA_NAME_DEF_STMT (op);
1699 	  oecode = gimple_assign_rhs_code (oedef);
1700 	  if (oecode != c->oecode)
1701 	    continue;
1702 
1703 	  FOR_EACH_VEC_ELT (subops[i], j, oe1)
1704 	    {
1705 	      if (oe1->op == c->op)
1706 		{
1707 		  bitmap_set_bit (candidates2, i);
1708 		  ++nr_candidates2;
1709 		  break;
1710 		}
1711 	    }
1712 	}
1713 
1714       if (nr_candidates2 >= 2)
1715 	{
1716 	  operand_entry *oe1, *oe2;
1717 	  gimple *prod;
1718 	  int first = bitmap_first_set_bit (candidates2);
1719 
1720 	  /* Build the new addition chain.  */
1721 	  oe1 = (*ops)[first];
1722 	  if (dump_file && (dump_flags & TDF_DETAILS))
1723 	    {
1724 	      fprintf (dump_file, "Building (");
1725 	      print_generic_expr (dump_file, oe1->op);
1726 	    }
1727 	  zero_one_operation (&oe1->op, c->oecode, c->op);
1728 	  EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1729 	    {
1730 	      gimple *sum;
1731 	      oe2 = (*ops)[i];
1732 	      if (dump_file && (dump_flags & TDF_DETAILS))
1733 		{
1734 		  fprintf (dump_file, " + ");
1735 		  print_generic_expr (dump_file, oe2->op);
1736 		}
1737 	      zero_one_operation (&oe2->op, c->oecode, c->op);
1738 	      sum = build_and_add_sum (TREE_TYPE (oe1->op),
1739 				       oe1->op, oe2->op, opcode);
1740 	      oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1741 	      oe2->rank = 0;
1742 	      oe1->op = gimple_get_lhs (sum);
1743 	    }
1744 
1745 	  /* Apply the multiplication/division.  */
1746 	  prod = build_and_add_sum (TREE_TYPE (oe1->op),
1747 				    oe1->op, c->op, c->oecode);
1748 	  if (dump_file && (dump_flags & TDF_DETAILS))
1749 	    {
1750 	      fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1751 	      print_generic_expr (dump_file, c->op);
1752 	      fprintf (dump_file, "\n");
1753 	    }
1754 
1755 	  /* Record it in the addition chain and disable further
1756 	     undistribution with this op.  */
1757 	  oe1->op = gimple_assign_lhs (prod);
1758 	  oe1->rank = get_rank (oe1->op);
1759 	  subops[first].release ();
1760 
1761 	  changed = true;
1762 	}
1763 
1764       cvec.pop ();
1765     }
1766 
1767   for (i = 0; i < ops->length (); ++i)
1768     subops[i].release ();
1769   free (subops);
1770   cvec.release ();
1771 
1772   return changed;
1773 }
1774 
1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1776    expression, examine the other OPS to see if any of them are comparisons
1777    of the same values, which we may be able to combine or eliminate.
1778    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1779 
1780 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)1781 eliminate_redundant_comparison (enum tree_code opcode,
1782 				vec<operand_entry *> *ops,
1783 				unsigned int currindex,
1784 				operand_entry *curr)
1785 {
1786   tree op1, op2;
1787   enum tree_code lcode, rcode;
1788   gimple *def1, *def2;
1789   int i;
1790   operand_entry *oe;
1791 
1792   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1793     return false;
1794 
1795   /* Check that CURR is a comparison.  */
1796   if (TREE_CODE (curr->op) != SSA_NAME)
1797     return false;
1798   def1 = SSA_NAME_DEF_STMT (curr->op);
1799   if (!is_gimple_assign (def1))
1800     return false;
1801   lcode = gimple_assign_rhs_code (def1);
1802   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1803     return false;
1804   op1 = gimple_assign_rhs1 (def1);
1805   op2 = gimple_assign_rhs2 (def1);
1806 
1807   /* Now look for a similar comparison in the remaining OPS.  */
1808   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1809     {
1810       tree t;
1811 
1812       if (TREE_CODE (oe->op) != SSA_NAME)
1813 	continue;
1814       def2 = SSA_NAME_DEF_STMT (oe->op);
1815       if (!is_gimple_assign (def2))
1816 	continue;
1817       rcode = gimple_assign_rhs_code (def2);
1818       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1819 	continue;
1820 
1821       /* If we got here, we have a match.  See if we can combine the
1822 	 two comparisons.  */
1823       if (opcode == BIT_IOR_EXPR)
1824 	t = maybe_fold_or_comparisons (lcode, op1, op2,
1825 				       rcode, gimple_assign_rhs1 (def2),
1826 				       gimple_assign_rhs2 (def2));
1827       else
1828 	t = maybe_fold_and_comparisons (lcode, op1, op2,
1829 					rcode, gimple_assign_rhs1 (def2),
1830 					gimple_assign_rhs2 (def2));
1831       if (!t)
1832 	continue;
1833 
1834       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1835 	 always give us a boolean_type_node value back.  If the original
1836 	 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1837 	 we need to convert.  */
1838       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1839 	t = fold_convert (TREE_TYPE (curr->op), t);
1840 
1841       if (TREE_CODE (t) != INTEGER_CST
1842 	  && !operand_equal_p (t, curr->op, 0))
1843 	{
1844 	  enum tree_code subcode;
1845 	  tree newop1, newop2;
1846 	  if (!COMPARISON_CLASS_P (t))
1847 	    continue;
1848 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1849 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1850 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1851 	  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1852 	    continue;
1853 	}
1854 
1855       if (dump_file && (dump_flags & TDF_DETAILS))
1856 	{
1857 	  fprintf (dump_file, "Equivalence: ");
1858 	  print_generic_expr (dump_file, curr->op);
1859 	  fprintf (dump_file, " %s ", op_symbol_code (opcode));
1860 	  print_generic_expr (dump_file, oe->op);
1861 	  fprintf (dump_file, " -> ");
1862 	  print_generic_expr (dump_file, t);
1863 	  fprintf (dump_file, "\n");
1864 	}
1865 
1866       /* Now we can delete oe, as it has been subsumed by the new combined
1867          expression t.  */
1868       ops->ordered_remove (i);
1869       reassociate_stats.ops_eliminated ++;
1870 
1871       /* If t is the same as curr->op, we're done.  Otherwise we must
1872 	 replace curr->op with t.  Special case is if we got a constant
1873 	 back, in which case we add it to the end instead of in place of
1874 	 the current entry.  */
1875       if (TREE_CODE (t) == INTEGER_CST)
1876 	{
1877 	  ops->ordered_remove (currindex);
1878 	  add_to_ops_vec (ops, t);
1879 	}
1880       else if (!operand_equal_p (t, curr->op, 0))
1881 	{
1882 	  gimple *sum;
1883 	  enum tree_code subcode;
1884 	  tree newop1;
1885 	  tree newop2;
1886 	  gcc_assert (COMPARISON_CLASS_P (t));
1887 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1888 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1889 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1890 	  gcc_checking_assert (is_gimple_val (newop1)
1891 			       && is_gimple_val (newop2));
1892 	  sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1893 	  curr->op = gimple_get_lhs (sum);
1894 	}
1895       return true;
1896     }
1897 
1898   return false;
1899 }
1900 
1901 
1902 /* Transform repeated addition of same values into multiply with
1903    constant.  */
1904 static bool
transform_add_to_multiply(vec<operand_entry * > * ops)1905 transform_add_to_multiply (vec<operand_entry *> *ops)
1906 {
1907   operand_entry *oe;
1908   tree op = NULL_TREE;
1909   int j;
1910   int i, start = -1, end = 0, count = 0;
1911   auto_vec<std::pair <int, int> > indxs;
1912   bool changed = false;
1913 
1914   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1915       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1916 	  || !flag_unsafe_math_optimizations))
1917     return false;
1918 
1919   /* Look for repeated operands.  */
1920   FOR_EACH_VEC_ELT (*ops, i, oe)
1921     {
1922       if (start == -1)
1923 	{
1924 	  count = 1;
1925 	  op = oe->op;
1926 	  start = i;
1927 	}
1928       else if (operand_equal_p (oe->op, op, 0))
1929 	{
1930 	  count++;
1931 	  end = i;
1932 	}
1933       else
1934 	{
1935 	  if (count > 1)
1936 	    indxs.safe_push (std::make_pair (start, end));
1937 	  count = 1;
1938 	  op = oe->op;
1939 	  start = i;
1940 	}
1941     }
1942 
1943   if (count > 1)
1944     indxs.safe_push (std::make_pair (start, end));
1945 
1946   for (j = indxs.length () - 1; j >= 0; --j)
1947     {
1948       /* Convert repeated operand addition to multiplication.  */
1949       start = indxs[j].first;
1950       end = indxs[j].second;
1951       op = (*ops)[start]->op;
1952       count = end - start + 1;
1953       for (i = end; i >= start; --i)
1954 	ops->unordered_remove (i);
1955       tree tmp = make_ssa_name (TREE_TYPE (op));
1956       tree cst = build_int_cst (integer_type_node, count);
1957       gassign *mul_stmt
1958 	= gimple_build_assign (tmp, MULT_EXPR,
1959 			       op, fold_convert (TREE_TYPE (op), cst));
1960       gimple_set_visited (mul_stmt, true);
1961       add_to_ops_vec (ops, tmp, mul_stmt);
1962       changed = true;
1963     }
1964 
1965   return changed;
1966 }
1967 
1968 
1969 /* Perform various identities and other optimizations on the list of
1970    operand entries, stored in OPS.  The tree code for the binary
1971    operation between all the operands is OPCODE.  */
1972 
1973 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry * > * ops)1974 optimize_ops_list (enum tree_code opcode,
1975 		   vec<operand_entry *> *ops)
1976 {
1977   unsigned int length = ops->length ();
1978   unsigned int i;
1979   operand_entry *oe;
1980   operand_entry *oelast = NULL;
1981   bool iterate = false;
1982 
1983   if (length == 1)
1984     return;
1985 
1986   oelast = ops->last ();
1987 
1988   /* If the last two are constants, pop the constants off, merge them
1989      and try the next two.  */
1990   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1991     {
1992       operand_entry *oelm1 = (*ops)[length - 2];
1993 
1994       if (oelm1->rank == 0
1995 	  && is_gimple_min_invariant (oelm1->op)
1996 	  && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1997 				       TREE_TYPE (oelast->op)))
1998 	{
1999 	  tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2000 				     oelm1->op, oelast->op);
2001 
2002 	  if (folded && is_gimple_min_invariant (folded))
2003 	    {
2004 	      if (dump_file && (dump_flags & TDF_DETAILS))
2005 		fprintf (dump_file, "Merging constants\n");
2006 
2007 	      ops->pop ();
2008 	      ops->pop ();
2009 
2010 	      add_to_ops_vec (ops, folded);
2011 	      reassociate_stats.constants_eliminated++;
2012 
2013 	      optimize_ops_list (opcode, ops);
2014 	      return;
2015 	    }
2016 	}
2017     }
2018 
2019   eliminate_using_constants (opcode, ops);
2020   oelast = NULL;
2021 
2022   for (i = 0; ops->iterate (i, &oe);)
2023     {
2024       bool done = false;
2025 
2026       if (eliminate_not_pairs (opcode, ops, i, oe))
2027 	return;
2028       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2029 	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2030 	  || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2031 	{
2032 	  if (done)
2033 	    return;
2034 	  iterate = true;
2035 	  oelast = NULL;
2036 	  continue;
2037 	}
2038       oelast = oe;
2039       i++;
2040     }
2041 
2042   length = ops->length ();
2043   oelast = ops->last ();
2044 
2045   if (iterate)
2046     optimize_ops_list (opcode, ops);
2047 }
2048 
2049 /* The following functions are subroutines to optimize_range_tests and allow
2050    it to try to change a logical combination of comparisons into a range
2051    test.
2052 
2053    For example, both
2054 	X == 2 || X == 5 || X == 3 || X == 4
2055    and
2056 	X >= 2 && X <= 5
2057    are converted to
2058 	(unsigned) (X - 2) <= 3
2059 
2060    For more information see comments above fold_test_range in fold-const.c,
2061    this implementation is for GIMPLE.  */
2062 
2063 struct range_entry
2064 {
2065   tree exp;
2066   tree low;
2067   tree high;
2068   bool in_p;
2069   bool strict_overflow_p;
2070   unsigned int idx, next;
2071 };
2072 
2073 /* This is similar to make_range in fold-const.c, but on top of
2074    GIMPLE instead of trees.  If EXP is non-NULL, it should be
2075    an SSA_NAME and STMT argument is ignored, otherwise STMT
2076    argument should be a GIMPLE_COND.  */
2077 
2078 static void
init_range_entry(struct range_entry * r,tree exp,gimple * stmt)2079 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2080 {
2081   int in_p;
2082   tree low, high;
2083   bool is_bool, strict_overflow_p;
2084 
2085   r->exp = NULL_TREE;
2086   r->in_p = false;
2087   r->strict_overflow_p = false;
2088   r->low = NULL_TREE;
2089   r->high = NULL_TREE;
2090   if (exp != NULL_TREE
2091       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2092     return;
2093 
2094   /* Start with simply saying "EXP != 0" and then look at the code of EXP
2095      and see if we can refine the range.  Some of the cases below may not
2096      happen, but it doesn't seem worth worrying about this.  We "continue"
2097      the outer loop when we've changed something; otherwise we "break"
2098      the switch, which will "break" the while.  */
2099   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2100   high = low;
2101   in_p = 0;
2102   strict_overflow_p = false;
2103   is_bool = false;
2104   if (exp == NULL_TREE)
2105     is_bool = true;
2106   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2107     {
2108       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2109 	is_bool = true;
2110       else
2111 	return;
2112     }
2113   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2114     is_bool = true;
2115 
2116   while (1)
2117     {
2118       enum tree_code code;
2119       tree arg0, arg1, exp_type;
2120       tree nexp;
2121       location_t loc;
2122 
2123       if (exp != NULL_TREE)
2124 	{
2125 	  if (TREE_CODE (exp) != SSA_NAME
2126 	      || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2127 	    break;
2128 
2129 	  stmt = SSA_NAME_DEF_STMT (exp);
2130 	  if (!is_gimple_assign (stmt))
2131 	    break;
2132 
2133 	  code = gimple_assign_rhs_code (stmt);
2134 	  arg0 = gimple_assign_rhs1 (stmt);
2135 	  arg1 = gimple_assign_rhs2 (stmt);
2136 	  exp_type = TREE_TYPE (exp);
2137 	}
2138       else
2139 	{
2140 	  code = gimple_cond_code (stmt);
2141 	  arg0 = gimple_cond_lhs (stmt);
2142 	  arg1 = gimple_cond_rhs (stmt);
2143 	  exp_type = boolean_type_node;
2144 	}
2145 
2146       if (TREE_CODE (arg0) != SSA_NAME
2147 	  || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2148 	break;
2149       loc = gimple_location (stmt);
2150       switch (code)
2151 	{
2152 	case BIT_NOT_EXPR:
2153 	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2154 	      /* Ensure the range is either +[-,0], +[0,0],
2155 		 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2156 		 -[1,1].  If it is e.g. +[-,-] or -[-,-]
2157 		 or similar expression of unconditional true or
2158 		 false, it should not be negated.  */
2159 	      && ((high && integer_zerop (high))
2160 		  || (low && integer_onep (low))))
2161 	    {
2162 	      in_p = !in_p;
2163 	      exp = arg0;
2164 	      continue;
2165 	    }
2166 	  break;
2167 	case SSA_NAME:
2168 	  exp = arg0;
2169 	  continue;
2170 	CASE_CONVERT:
2171 	  if (is_bool)
2172 	    {
2173 	      if ((TYPE_PRECISION (exp_type) == 1
2174 		   || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2175 		  && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2176 		return;
2177 	    }
2178 	  else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2179 	    {
2180 	      if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2181 		is_bool = true;
2182 	      else
2183 		return;
2184 	    }
2185 	  else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2186 	    is_bool = true;
2187 	  goto do_default;
2188 	case EQ_EXPR:
2189 	case NE_EXPR:
2190 	case LT_EXPR:
2191 	case LE_EXPR:
2192 	case GE_EXPR:
2193 	case GT_EXPR:
2194 	  is_bool = true;
2195 	  /* FALLTHRU */
2196 	default:
2197 	  if (!is_bool)
2198 	    return;
2199 	do_default:
2200 	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2201 				  &low, &high, &in_p,
2202 				  &strict_overflow_p);
2203 	  if (nexp != NULL_TREE)
2204 	    {
2205 	      exp = nexp;
2206 	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
2207 	      continue;
2208 	    }
2209 	  break;
2210 	}
2211       break;
2212     }
2213   if (is_bool)
2214     {
2215       r->exp = exp;
2216       r->in_p = in_p;
2217       r->low = low;
2218       r->high = high;
2219       r->strict_overflow_p = strict_overflow_p;
2220     }
2221 }
2222 
2223 /* Comparison function for qsort.  Sort entries
2224    without SSA_NAME exp first, then with SSA_NAMEs sorted
2225    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2226    by increasing ->low and if ->low is the same, by increasing
2227    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2228    maximum.  */
2229 
2230 static int
range_entry_cmp(const void * a,const void * b)2231 range_entry_cmp (const void *a, const void *b)
2232 {
2233   const struct range_entry *p = (const struct range_entry *) a;
2234   const struct range_entry *q = (const struct range_entry *) b;
2235 
2236   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2237     {
2238       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2239 	{
2240 	  /* Group range_entries for the same SSA_NAME together.  */
2241 	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2242 	    return -1;
2243 	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2244 	    return 1;
2245 	  /* If ->low is different, NULL low goes first, then by
2246 	     ascending low.  */
2247 	  if (p->low != NULL_TREE)
2248 	    {
2249 	      if (q->low != NULL_TREE)
2250 		{
2251 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2252 					  p->low, q->low);
2253 		  if (tem && integer_onep (tem))
2254 		    return -1;
2255 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2256 				     p->low, q->low);
2257 		  if (tem && integer_onep (tem))
2258 		    return 1;
2259 		}
2260 	      else
2261 		return 1;
2262 	    }
2263 	  else if (q->low != NULL_TREE)
2264 	    return -1;
2265 	  /* If ->high is different, NULL high goes last, before that by
2266 	     ascending high.  */
2267 	  if (p->high != NULL_TREE)
2268 	    {
2269 	      if (q->high != NULL_TREE)
2270 		{
2271 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2272 					  p->high, q->high);
2273 		  if (tem && integer_onep (tem))
2274 		    return -1;
2275 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2276 				     p->high, q->high);
2277 		  if (tem && integer_onep (tem))
2278 		    return 1;
2279 		}
2280 	      else
2281 		return -1;
2282 	    }
2283 	  else if (q->high != NULL_TREE)
2284 	    return 1;
2285 	  /* If both ranges are the same, sort below by ascending idx.  */
2286 	}
2287       else
2288 	return 1;
2289     }
2290   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2291     return -1;
2292 
2293   if (p->idx < q->idx)
2294     return -1;
2295   else
2296     {
2297       gcc_checking_assert (p->idx > q->idx);
2298       return 1;
2299     }
2300 }
2301 
2302 /* Helper function for update_range_test.  Force EXPR into an SSA_NAME,
2303    insert needed statements BEFORE or after GSI.  */
2304 
2305 static tree
force_into_ssa_name(gimple_stmt_iterator * gsi,tree expr,bool before)2306 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2307 {
2308   enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2309   tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2310   if (TREE_CODE (ret) != SSA_NAME)
2311     {
2312       gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2313       if (before)
2314 	gsi_insert_before (gsi, g, GSI_SAME_STMT);
2315       else
2316 	gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2317       ret = gimple_assign_lhs (g);
2318     }
2319   return ret;
2320 }
2321 
2322 /* Helper routine of optimize_range_test.
2323    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2324    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2325    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2326    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2327    an array of COUNT pointers to other ranges.  Return
2328    true if the range merge has been successful.
2329    If OPCODE is ERROR_MARK, this is called from within
2330    maybe_optimize_range_tests and is performing inter-bb range optimization.
2331    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2332    oe->rank.  */
2333 
2334 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,struct range_entry ** otherrangep,unsigned int count,enum tree_code opcode,vec<operand_entry * > * ops,tree exp,gimple_seq seq,bool in_p,tree low,tree high,bool strict_overflow_p)2335 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2336 		   struct range_entry **otherrangep,
2337 		   unsigned int count, enum tree_code opcode,
2338 		   vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2339 		   bool in_p, tree low, tree high, bool strict_overflow_p)
2340 {
2341   operand_entry *oe = (*ops)[range->idx];
2342   tree op = oe->op;
2343   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2344 		    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2345   location_t loc = gimple_location (stmt);
2346   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2347   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2348 				in_p, low, high);
2349   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2350   gimple_stmt_iterator gsi;
2351   unsigned int i, uid;
2352 
2353   if (tem == NULL_TREE)
2354     return false;
2355 
2356   /* If op is default def SSA_NAME, there is no place to insert the
2357      new comparison.  Give up, unless we can use OP itself as the
2358      range test.  */
2359   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2360     {
2361       if (op == range->exp
2362 	  && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2363 	      || TREE_CODE (optype) == BOOLEAN_TYPE)
2364 	  && (op == tem
2365 	      || (TREE_CODE (tem) == EQ_EXPR
2366 		  && TREE_OPERAND (tem, 0) == op
2367 		  && integer_onep (TREE_OPERAND (tem, 1))))
2368 	  && opcode != BIT_IOR_EXPR
2369 	  && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2370 	{
2371 	  stmt = NULL;
2372 	  tem = op;
2373 	}
2374       else
2375 	return false;
2376     }
2377 
2378   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2379     warning_at (loc, OPT_Wstrict_overflow,
2380 		"assuming signed overflow does not occur "
2381 		"when simplifying range test");
2382 
2383   if (dump_file && (dump_flags & TDF_DETAILS))
2384     {
2385       struct range_entry *r;
2386       fprintf (dump_file, "Optimizing range tests ");
2387       print_generic_expr (dump_file, range->exp);
2388       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2389       print_generic_expr (dump_file, range->low);
2390       fprintf (dump_file, ", ");
2391       print_generic_expr (dump_file, range->high);
2392       fprintf (dump_file, "]");
2393       for (i = 0; i < count; i++)
2394 	{
2395 	  if (otherrange)
2396 	    r = otherrange + i;
2397 	  else
2398 	    r = otherrangep[i];
2399 	  if (r->exp
2400 	      && r->exp != range->exp
2401 	      && TREE_CODE (r->exp) == SSA_NAME)
2402 	    {
2403 	      fprintf (dump_file, " and ");
2404 	      print_generic_expr (dump_file, r->exp);
2405 	    }
2406 	  else
2407 	    fprintf (dump_file, " and");
2408 	  fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2409 	  print_generic_expr (dump_file, r->low);
2410 	  fprintf (dump_file, ", ");
2411 	  print_generic_expr (dump_file, r->high);
2412 	  fprintf (dump_file, "]");
2413 	}
2414       fprintf (dump_file, "\n into ");
2415       print_generic_expr (dump_file, tem);
2416       fprintf (dump_file, "\n");
2417     }
2418 
2419   if (opcode == BIT_IOR_EXPR
2420       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2421     tem = invert_truthvalue_loc (loc, tem);
2422 
2423   tem = fold_convert_loc (loc, optype, tem);
2424   if (stmt)
2425     {
2426       gsi = gsi_for_stmt (stmt);
2427       uid = gimple_uid (stmt);
2428     }
2429   else
2430     {
2431       gsi = gsi_none ();
2432       uid = 0;
2433     }
2434   if (stmt == NULL)
2435     gcc_checking_assert (tem == op);
2436   /* In rare cases range->exp can be equal to lhs of stmt.
2437      In that case we have to insert after the stmt rather then before
2438      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2439   else if (op != range->exp)
2440     {
2441       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2442       tem = force_into_ssa_name (&gsi, tem, true);
2443       gsi_prev (&gsi);
2444     }
2445   else if (gimple_code (stmt) != GIMPLE_PHI)
2446     {
2447       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2448       tem = force_into_ssa_name (&gsi, tem, false);
2449     }
2450   else
2451     {
2452       gsi = gsi_after_labels (gimple_bb (stmt));
2453       if (!gsi_end_p (gsi))
2454 	uid = gimple_uid (gsi_stmt (gsi));
2455       else
2456 	{
2457 	  gsi = gsi_start_bb (gimple_bb (stmt));
2458 	  uid = 1;
2459 	  while (!gsi_end_p (gsi))
2460 	    {
2461 	      uid = gimple_uid (gsi_stmt (gsi));
2462 	      gsi_next (&gsi);
2463 	    }
2464 	}
2465       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2466       tem = force_into_ssa_name (&gsi, tem, true);
2467       if (gsi_end_p (gsi))
2468 	gsi = gsi_last_bb (gimple_bb (stmt));
2469       else
2470 	gsi_prev (&gsi);
2471     }
2472   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2473     if (gimple_uid (gsi_stmt (gsi)))
2474       break;
2475     else
2476       gimple_set_uid (gsi_stmt (gsi), uid);
2477 
2478   oe->op = tem;
2479   range->exp = exp;
2480   range->low = low;
2481   range->high = high;
2482   range->in_p = in_p;
2483   range->strict_overflow_p = false;
2484 
2485   for (i = 0; i < count; i++)
2486     {
2487       if (otherrange)
2488 	range = otherrange + i;
2489       else
2490 	range = otherrangep[i];
2491       oe = (*ops)[range->idx];
2492       /* Now change all the other range test immediate uses, so that
2493 	 those tests will be optimized away.  */
2494       if (opcode == ERROR_MARK)
2495 	{
2496 	  if (oe->op)
2497 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
2498 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
2499 	  else
2500 	    oe->op = (oe->rank == BIT_IOR_EXPR
2501 		      ? boolean_false_node : boolean_true_node);
2502 	}
2503       else
2504 	oe->op = error_mark_node;
2505       range->exp = NULL_TREE;
2506       range->low = NULL_TREE;
2507       range->high = NULL_TREE;
2508     }
2509   return true;
2510 }
2511 
2512 /* Optimize X == CST1 || X == CST2
2513    if popcount (CST1 ^ CST2) == 1 into
2514    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2515    Similarly for ranges.  E.g.
2516    X != 2 && X != 3 && X != 10 && X != 11
2517    will be transformed by the previous optimization into
2518    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2519    and this loop can transform that into
2520    !(((X & ~8) - 2U) <= 1U).  */
2521 
2522 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2523 optimize_range_tests_xor (enum tree_code opcode, tree type,
2524 			  tree lowi, tree lowj, tree highi, tree highj,
2525 			  vec<operand_entry *> *ops,
2526 			  struct range_entry *rangei,
2527 			  struct range_entry *rangej)
2528 {
2529   tree lowxor, highxor, tem, exp;
2530   /* Check lowi ^ lowj == highi ^ highj and
2531      popcount (lowi ^ lowj) == 1.  */
2532   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2533   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2534     return false;
2535   if (!integer_pow2p (lowxor))
2536     return false;
2537   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2538   if (!tree_int_cst_equal (lowxor, highxor))
2539     return false;
2540 
2541   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2542   exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2543   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2544   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2545   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2546 			 NULL, rangei->in_p, lowj, highj,
2547 			 rangei->strict_overflow_p
2548 			 || rangej->strict_overflow_p))
2549     return true;
2550   return false;
2551 }
2552 
2553 /* Optimize X == CST1 || X == CST2
2554    if popcount (CST2 - CST1) == 1 into
2555    ((X - CST1) & ~(CST2 - CST1)) == 0.
2556    Similarly for ranges.  E.g.
2557    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2558    || X == 75 || X == 45
2559    will be transformed by the previous optimization into
2560    (X - 43U) <= 3U || (X - 75U) <= 3U
2561    and this loop can transform that into
2562    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2563 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2564 optimize_range_tests_diff (enum tree_code opcode, tree type,
2565 			   tree lowi, tree lowj, tree highi, tree highj,
2566 			   vec<operand_entry *> *ops,
2567 			   struct range_entry *rangei,
2568 			   struct range_entry *rangej)
2569 {
2570   tree tem1, tem2, mask;
2571   /* Check highi - lowi == highj - lowj.  */
2572   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2573   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2574     return false;
2575   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2576   if (!tree_int_cst_equal (tem1, tem2))
2577     return false;
2578   /* Check popcount (lowj - lowi) == 1.  */
2579   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2580   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2581     return false;
2582   if (!integer_pow2p (tem1))
2583     return false;
2584 
2585   type = unsigned_type_for (type);
2586   tem1 = fold_convert (type, tem1);
2587   tem2 = fold_convert (type, tem2);
2588   lowi = fold_convert (type, lowi);
2589   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2590   tem1 = fold_build2 (MINUS_EXPR, type,
2591 		      fold_convert (type, rangei->exp), lowi);
2592   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2593   lowj = build_int_cst (type, 0);
2594   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2595 			 NULL, rangei->in_p, lowj, tem2,
2596 			 rangei->strict_overflow_p
2597 			 || rangej->strict_overflow_p))
2598     return true;
2599   return false;
2600 }
2601 
2602 /* It does some common checks for function optimize_range_tests_xor and
2603    optimize_range_tests_diff.
2604    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2605    Else it calls optimize_range_tests_diff.  */
2606 
2607 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry * > * ops,struct range_entry * ranges)2608 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2609 			bool optimize_xor, vec<operand_entry *> *ops,
2610 			struct range_entry *ranges)
2611 {
2612   int i, j;
2613   bool any_changes = false;
2614   for (i = first; i < length; i++)
2615     {
2616       tree lowi, highi, lowj, highj, type, tem;
2617 
2618       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2619 	continue;
2620       type = TREE_TYPE (ranges[i].exp);
2621       if (!INTEGRAL_TYPE_P (type))
2622 	continue;
2623       lowi = ranges[i].low;
2624       if (lowi == NULL_TREE)
2625 	lowi = TYPE_MIN_VALUE (type);
2626       highi = ranges[i].high;
2627       if (highi == NULL_TREE)
2628 	continue;
2629       for (j = i + 1; j < length && j < i + 64; j++)
2630 	{
2631 	  bool changes;
2632 	  if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2633 	    continue;
2634 	  lowj = ranges[j].low;
2635 	  if (lowj == NULL_TREE)
2636 	    continue;
2637 	  highj = ranges[j].high;
2638 	  if (highj == NULL_TREE)
2639 	    highj = TYPE_MAX_VALUE (type);
2640 	  /* Check lowj > highi.  */
2641 	  tem = fold_binary (GT_EXPR, boolean_type_node,
2642 			     lowj, highi);
2643 	  if (tem == NULL_TREE || !integer_onep (tem))
2644 	    continue;
2645 	  if (optimize_xor)
2646 	    changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2647 						highi, highj, ops,
2648 						ranges + i, ranges + j);
2649 	  else
2650 	    changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2651 						 highi, highj, ops,
2652 						 ranges + i, ranges + j);
2653 	  if (changes)
2654 	    {
2655 	      any_changes = true;
2656 	      break;
2657 	    }
2658 	}
2659     }
2660   return any_changes;
2661 }
2662 
2663 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
2664    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2665    EXP on success, NULL otherwise.  */
2666 
2667 static tree
extract_bit_test_mask(tree exp,int prec,tree totallow,tree low,tree high,wide_int * mask,tree * totallowp)2668 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2669 		       wide_int *mask, tree *totallowp)
2670 {
2671   tree tem = int_const_binop (MINUS_EXPR, high, low);
2672   if (tem == NULL_TREE
2673       || TREE_CODE (tem) != INTEGER_CST
2674       || TREE_OVERFLOW (tem)
2675       || tree_int_cst_sgn (tem) == -1
2676       || compare_tree_int (tem, prec) != -1)
2677     return NULL_TREE;
2678 
2679   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2680   *mask = wi::shifted_mask (0, max, false, prec);
2681   if (TREE_CODE (exp) == BIT_AND_EXPR
2682       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2683     {
2684       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2685       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2686       if (wi::popcount (msk) == 1
2687 	  && wi::ltu_p (msk, prec - max))
2688 	{
2689 	  *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2690 	  max += msk.to_uhwi ();
2691 	  exp = TREE_OPERAND (exp, 0);
2692 	  if (integer_zerop (low)
2693 	      && TREE_CODE (exp) == PLUS_EXPR
2694 	      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2695 	    {
2696 	      tree ret = TREE_OPERAND (exp, 0);
2697 	      STRIP_NOPS (ret);
2698 	      widest_int bias
2699 	        = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2700 				     TYPE_PRECISION (TREE_TYPE (low))));
2701 	      tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2702 	      if (totallowp)
2703 		{
2704 		  *totallowp = tbias;
2705 		  return ret;
2706 		}
2707 	      else if (!tree_int_cst_lt (totallow, tbias))
2708 		return NULL_TREE;
2709 	      bias = wi::to_widest (tbias);
2710 	      bias -= wi::to_widest (totallow);
2711 	      if (bias >= 0 && bias < prec - max)
2712 		{
2713 		  *mask = wi::lshift (*mask, bias);
2714 		  return ret;
2715 		}
2716 	    }
2717 	}
2718     }
2719   if (totallowp)
2720     return exp;
2721   if (!tree_int_cst_lt (totallow, low))
2722     return exp;
2723   tem = int_const_binop (MINUS_EXPR, low, totallow);
2724   if (tem == NULL_TREE
2725       || TREE_CODE (tem) != INTEGER_CST
2726       || TREE_OVERFLOW (tem)
2727       || compare_tree_int (tem, prec - max) == 1)
2728     return NULL_TREE;
2729 
2730   *mask = wi::lshift (*mask, wi::to_widest (tem));
2731   return exp;
2732 }
2733 
2734 /* Attempt to optimize small range tests using bit test.
2735    E.g.
2736    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2737    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2738    has been by earlier optimizations optimized into:
2739    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2740    As all the 43 through 82 range is less than 64 numbers,
2741    for 64-bit word targets optimize that into:
2742    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
2743 
2744 static bool
optimize_range_tests_to_bit_test(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)2745 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2746 				  vec<operand_entry *> *ops,
2747 				  struct range_entry *ranges)
2748 {
2749   int i, j;
2750   bool any_changes = false;
2751   int prec = GET_MODE_BITSIZE (word_mode);
2752   auto_vec<struct range_entry *, 64> candidates;
2753 
2754   for (i = first; i < length - 2; i++)
2755     {
2756       tree lowi, highi, lowj, highj, type;
2757 
2758       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2759 	continue;
2760       type = TREE_TYPE (ranges[i].exp);
2761       if (!INTEGRAL_TYPE_P (type))
2762 	continue;
2763       lowi = ranges[i].low;
2764       if (lowi == NULL_TREE)
2765 	lowi = TYPE_MIN_VALUE (type);
2766       highi = ranges[i].high;
2767       if (highi == NULL_TREE)
2768 	continue;
2769       wide_int mask;
2770       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2771 					highi, &mask, &lowi);
2772       if (exp == NULL_TREE)
2773 	continue;
2774       bool strict_overflow_p = ranges[i].strict_overflow_p;
2775       candidates.truncate (0);
2776       int end = MIN (i + 64, length);
2777       for (j = i + 1; j < end; j++)
2778 	{
2779 	  tree exp2;
2780 	  if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2781 	    continue;
2782 	  if (ranges[j].exp == exp)
2783 	    ;
2784 	  else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2785 	    {
2786 	      exp2 = TREE_OPERAND (ranges[j].exp, 0);
2787 	      if (exp2 == exp)
2788 		;
2789 	      else if (TREE_CODE (exp2) == PLUS_EXPR)
2790 		{
2791 		  exp2 = TREE_OPERAND (exp2, 0);
2792 		  STRIP_NOPS (exp2);
2793 		  if (exp2 != exp)
2794 		    continue;
2795 		}
2796 	      else
2797 		continue;
2798 	    }
2799 	  else
2800 	    continue;
2801 	  lowj = ranges[j].low;
2802 	  if (lowj == NULL_TREE)
2803 	    continue;
2804 	  highj = ranges[j].high;
2805 	  if (highj == NULL_TREE)
2806 	    highj = TYPE_MAX_VALUE (type);
2807 	  wide_int mask2;
2808 	  exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2809 					highj, &mask2, NULL);
2810 	  if (exp2 != exp)
2811 	    continue;
2812 	  mask |= mask2;
2813 	  strict_overflow_p |= ranges[j].strict_overflow_p;
2814 	  candidates.safe_push (&ranges[j]);
2815 	}
2816 
2817       /* If we need otherwise 3 or more comparisons, use a bit test.  */
2818       if (candidates.length () >= 2)
2819 	{
2820 	  tree high = wide_int_to_tree (TREE_TYPE (lowi),
2821 					wi::to_widest (lowi)
2822 					+ prec - 1 - wi::clz (mask));
2823 	  operand_entry *oe = (*ops)[ranges[i].idx];
2824 	  tree op = oe->op;
2825 	  gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2826 			    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2827 	  location_t loc = gimple_location (stmt);
2828 	  tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2829 
2830 	  /* See if it isn't cheaper to pretend the minimum value of the
2831 	     range is 0, if maximum value is small enough.
2832 	     We can avoid then subtraction of the minimum value, but the
2833 	     mask constant could be perhaps more expensive.  */
2834 	  if (compare_tree_int (lowi, 0) > 0
2835 	      && compare_tree_int (high, prec) < 0)
2836 	    {
2837 	      int cost_diff;
2838 	      HOST_WIDE_INT m = tree_to_uhwi (lowi);
2839 	      rtx reg = gen_raw_REG (word_mode, 10000);
2840 	      bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2841 	      cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2842 						      GEN_INT (-m)), speed_p);
2843 	      rtx r = immed_wide_int_const (mask, word_mode);
2844 	      cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2845 					 word_mode, speed_p);
2846 	      r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2847 	      cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2848 					 word_mode, speed_p);
2849 	      if (cost_diff > 0)
2850 		{
2851 		  mask = wi::lshift (mask, m);
2852 		  lowi = build_zero_cst (TREE_TYPE (lowi));
2853 		}
2854 	    }
2855 
2856 	  tree tem = build_range_check (loc, optype, unshare_expr (exp),
2857 					false, lowi, high);
2858 	  if (tem == NULL_TREE || is_gimple_val (tem))
2859 	    continue;
2860 	  tree etype = unsigned_type_for (TREE_TYPE (exp));
2861 	  exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2862 				 fold_convert_loc (loc, etype, exp),
2863 				 fold_convert_loc (loc, etype, lowi));
2864 	  exp = fold_convert_loc (loc, integer_type_node, exp);
2865 	  tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2866 	  exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2867 				 build_int_cst (word_type, 1), exp);
2868 	  exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2869 				 wide_int_to_tree (word_type, mask));
2870 	  exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2871 				 build_zero_cst (word_type));
2872 	  if (is_gimple_val (exp))
2873 	    continue;
2874 
2875 	  /* The shift might have undefined behavior if TEM is true,
2876 	     but reassociate_bb isn't prepared to have basic blocks
2877 	     split when it is running.  So, temporarily emit a code
2878 	     with BIT_IOR_EXPR instead of &&, and fix it up in
2879 	     branch_fixup.  */
2880 	  gimple_seq seq;
2881 	  tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2882 	  gcc_assert (TREE_CODE (tem) == SSA_NAME);
2883 	  gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2884 	  gimple_seq seq2;
2885 	  exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2886 	  gimple_seq_add_seq_without_update (&seq, seq2);
2887 	  gcc_assert (TREE_CODE (exp) == SSA_NAME);
2888 	  gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2889 	  gimple *g = gimple_build_assign (make_ssa_name (optype),
2890 					   BIT_IOR_EXPR, tem, exp);
2891 	  gimple_set_location (g, loc);
2892 	  gimple_seq_add_stmt_without_update (&seq, g);
2893 	  exp = gimple_assign_lhs (g);
2894 	  tree val = build_zero_cst (optype);
2895 	  if (update_range_test (&ranges[i], NULL, candidates.address (),
2896 				 candidates.length (), opcode, ops, exp,
2897 				 seq, false, val, val, strict_overflow_p))
2898 	    {
2899 	      any_changes = true;
2900 	      reassoc_branch_fixups.safe_push (tem);
2901 	    }
2902 	  else
2903 	    gimple_seq_discard (seq);
2904 	}
2905     }
2906   return any_changes;
2907 }
2908 
2909 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
2910    and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.  */
2911 
2912 static bool
optimize_range_tests_cmp_bitwise(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)2913 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
2914 				  vec<operand_entry *> *ops,
2915 				  struct range_entry *ranges)
2916 {
2917   int i;
2918   unsigned int b;
2919   bool any_changes = false;
2920   auto_vec<int, 128> buckets;
2921   auto_vec<int, 32> chains;
2922   auto_vec<struct range_entry *, 32> candidates;
2923 
2924   for (i = first; i < length; i++)
2925     {
2926       if (ranges[i].exp == NULL_TREE
2927 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
2928 	  || !ranges[i].in_p
2929 	  || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
2930 	  || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
2931 	  || ranges[i].low == NULL_TREE
2932 	  || ranges[i].low != ranges[i].high)
2933 	continue;
2934 
2935       bool zero_p = integer_zerop (ranges[i].low);
2936       if (!zero_p && !integer_all_onesp (ranges[i].low))
2937 	continue;
2938 
2939       b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
2940       if (buckets.length () <= b)
2941 	buckets.safe_grow_cleared (b + 1);
2942       if (chains.length () <= (unsigned) i)
2943 	chains.safe_grow (i + 1);
2944       chains[i] = buckets[b];
2945       buckets[b] = i + 1;
2946     }
2947 
2948   FOR_EACH_VEC_ELT (buckets, b, i)
2949     if (i && chains[i - 1])
2950       {
2951 	int j, k = i;
2952 	for (j = chains[i - 1]; j; j = chains[j - 1])
2953 	  {
2954 	    gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
2955 	    gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
2956 	    if (reassoc_stmt_dominates_stmt_p (gk, gj))
2957 	      k = j;
2958 	  }
2959 	tree type1 = TREE_TYPE (ranges[k - 1].exp);
2960 	tree type2 = NULL_TREE;
2961 	bool strict_overflow_p = false;
2962 	candidates.truncate (0);
2963 	for (j = i; j; j = chains[j - 1])
2964 	  {
2965 	    tree type = TREE_TYPE (ranges[j - 1].exp);
2966 	    strict_overflow_p |= ranges[j - 1].strict_overflow_p;
2967 	    if (j == k
2968 		|| useless_type_conversion_p (type1, type))
2969 	      ;
2970 	    else if (type2 == NULL_TREE
2971 		     || useless_type_conversion_p (type2, type))
2972 	      {
2973 		if (type2 == NULL_TREE)
2974 		  type2 = type;
2975 		candidates.safe_push (&ranges[j - 1]);
2976 	      }
2977 	  }
2978 	unsigned l = candidates.length ();
2979 	for (j = i; j; j = chains[j - 1])
2980 	  {
2981 	    tree type = TREE_TYPE (ranges[j - 1].exp);
2982 	    if (j == k)
2983 	      continue;
2984 	    if (useless_type_conversion_p (type1, type))
2985 	      ;
2986 	    else if (type2 == NULL_TREE
2987 		     || useless_type_conversion_p (type2, type))
2988 	      continue;
2989 	    candidates.safe_push (&ranges[j - 1]);
2990 	  }
2991 	gimple_seq seq = NULL;
2992 	tree op = NULL_TREE;
2993 	unsigned int id;
2994 	struct range_entry *r;
2995 	candidates.safe_push (&ranges[k - 1]);
2996 	FOR_EACH_VEC_ELT (candidates, id, r)
2997 	  {
2998 	    gimple *g;
2999 	    if (id == 0)
3000 	      {
3001 		op = r->exp;
3002 		continue;
3003 	      }
3004 	    if (id == l)
3005 	      {
3006 		g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3007 		gimple_seq_add_stmt_without_update (&seq, g);
3008 		op = gimple_assign_lhs (g);
3009 	      }
3010 	    tree type = TREE_TYPE (r->exp);
3011 	    tree exp = r->exp;
3012 	    if (id >= l && !useless_type_conversion_p (type1, type))
3013 	      {
3014 		g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3015 		gimple_seq_add_stmt_without_update (&seq, g);
3016 		exp = gimple_assign_lhs (g);
3017 	      }
3018 	    g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3019 				     (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3020 				     op, exp);
3021 	    gimple_seq_add_stmt_without_update (&seq, g);
3022 	    op = gimple_assign_lhs (g);
3023 	  }
3024 	candidates.pop ();
3025 	if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3026 			       candidates.length (), opcode, ops, op,
3027 			       seq, true, ranges[k - 1].low,
3028 			       ranges[k - 1].low, strict_overflow_p))
3029 	  any_changes = true;
3030 	else
3031 	  gimple_seq_discard (seq);
3032       }
3033 
3034   return any_changes;
3035 }
3036 
3037 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3038    a >= 0 && a < b into (unsigned) a < (unsigned) b
3039    a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
3040 
3041 static bool
optimize_range_tests_var_bound(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges,basic_block first_bb)3042 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3043 				vec<operand_entry *> *ops,
3044 				struct range_entry *ranges,
3045 				basic_block first_bb)
3046 {
3047   int i;
3048   bool any_changes = false;
3049   hash_map<tree, int> *map = NULL;
3050 
3051   for (i = first; i < length; i++)
3052     {
3053       if (ranges[i].exp == NULL_TREE
3054 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
3055 	  || !ranges[i].in_p)
3056 	continue;
3057 
3058       tree type = TREE_TYPE (ranges[i].exp);
3059       if (!INTEGRAL_TYPE_P (type)
3060 	  || TYPE_UNSIGNED (type)
3061 	  || ranges[i].low == NULL_TREE
3062 	  || !integer_zerop (ranges[i].low)
3063 	  || ranges[i].high != NULL_TREE)
3064 	continue;
3065       /* EXP >= 0 here.  */
3066       if (map == NULL)
3067 	map = new hash_map <tree, int>;
3068       map->put (ranges[i].exp, i);
3069     }
3070 
3071   if (map == NULL)
3072     return false;
3073 
3074   for (i = 0; i < length; i++)
3075     {
3076       bool in_p = ranges[i].in_p;
3077       if (ranges[i].low == NULL_TREE
3078 	  || ranges[i].high == NULL_TREE)
3079 	continue;
3080       if (!integer_zerop (ranges[i].low)
3081 	  || !integer_zerop (ranges[i].high))
3082 	{
3083 	  if (ranges[i].exp
3084 	      && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3085 	      && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3086 	      && integer_onep (ranges[i].low)
3087 	      && integer_onep (ranges[i].high))
3088 	    in_p = !in_p;
3089 	  else
3090 	    continue;
3091 	}
3092 
3093       gimple *stmt;
3094       tree_code ccode;
3095       tree rhs1, rhs2;
3096       if (ranges[i].exp)
3097 	{
3098 	  if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3099 	    continue;
3100 	  stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3101 	  if (!is_gimple_assign (stmt))
3102 	    continue;
3103 	  ccode = gimple_assign_rhs_code (stmt);
3104 	  rhs1 = gimple_assign_rhs1 (stmt);
3105 	  rhs2 = gimple_assign_rhs2 (stmt);
3106 	}
3107       else
3108 	{
3109 	  operand_entry *oe = (*ops)[ranges[i].idx];
3110 	  stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3111 	  if (gimple_code (stmt) != GIMPLE_COND)
3112 	    continue;
3113 	  ccode = gimple_cond_code (stmt);
3114 	  rhs1 = gimple_cond_lhs (stmt);
3115 	  rhs2 = gimple_cond_rhs (stmt);
3116 	}
3117 
3118       if (TREE_CODE (rhs1) != SSA_NAME
3119 	  || rhs2 == NULL_TREE
3120 	  || TREE_CODE (rhs2) != SSA_NAME)
3121 	continue;
3122 
3123       switch (ccode)
3124 	{
3125 	case GT_EXPR:
3126 	case GE_EXPR:
3127 	case LT_EXPR:
3128 	case LE_EXPR:
3129 	  break;
3130 	default:
3131 	  continue;
3132 	}
3133       if (in_p)
3134 	ccode = invert_tree_comparison (ccode, false);
3135       switch (ccode)
3136 	{
3137 	case GT_EXPR:
3138 	case GE_EXPR:
3139 	  std::swap (rhs1, rhs2);
3140 	  ccode = swap_tree_comparison (ccode);
3141 	  break;
3142 	case LT_EXPR:
3143 	case LE_EXPR:
3144 	  break;
3145 	default:
3146 	  gcc_unreachable ();
3147 	}
3148 
3149       int *idx = map->get (rhs1);
3150       if (idx == NULL)
3151 	continue;
3152 
3153       /* maybe_optimize_range_tests allows statements without side-effects
3154 	 in the basic blocks as long as they are consumed in the same bb.
3155 	 Make sure rhs2's def stmt is not among them, otherwise we can't
3156 	 use safely get_nonzero_bits on it.  E.g. in:
3157 	  # RANGE [-83, 1] NONZERO 173
3158 	  # k_32 = PHI <k_47(13), k_12(9)>
3159 	 ...
3160 	  if (k_32 >= 0)
3161 	    goto <bb 5>; [26.46%]
3162 	  else
3163 	    goto <bb 9>; [73.54%]
3164 
3165 	  <bb 5> [local count: 140323371]:
3166 	  # RANGE [0, 1] NONZERO 1
3167 	  _5 = (int) k_32;
3168 	  # RANGE [0, 4] NONZERO 4
3169 	  _21 = _5 << 2;
3170 	  # RANGE [0, 4] NONZERO 4
3171 	  iftmp.0_44 = (char) _21;
3172 	  if (k_32 < iftmp.0_44)
3173 	    goto <bb 6>; [84.48%]
3174 	  else
3175 	    goto <bb 9>; [15.52%]
3176 	 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3177 	 k_32 >= 0.  If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3178 	 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3179 	 those stmts even for negative k_32 and the value ranges would be no
3180 	 longer guaranteed and so the optimization would be invalid.  */
3181       if (opcode == ERROR_MARK)
3182 	{
3183 	  gimple *g = SSA_NAME_DEF_STMT (rhs2);
3184 	  basic_block bb2 = gimple_bb (g);
3185 	  if (bb2
3186 	      && bb2 != first_bb
3187 	      && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3188 	    {
3189 	      /* As an exception, handle a few common cases.  */
3190 	      if (gimple_assign_cast_p (g)
3191 		  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g)))
3192 		  && TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g)))
3193 		  && (TYPE_PRECISION (TREE_TYPE (rhs2))
3194 		      > TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (g)))))
3195 		/* Zero-extension is always ok.  */ ;
3196 	      else if (is_gimple_assign (g)
3197 		       && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3198 		       && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3199 		       && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3200 		/* Masking with INTEGER_CST with MSB clear is always ok
3201 		   too.  */ ;
3202 	      else
3203 		continue;
3204 	    }
3205 	}
3206 
3207       wide_int nz = get_nonzero_bits (rhs2);
3208       if (wi::neg_p (nz))
3209 	continue;
3210 
3211       /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3212 	 and RHS2 is known to be RHS2 >= 0.  */
3213       tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3214 
3215       enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3216       if ((ranges[*idx].strict_overflow_p
3217 	   || ranges[i].strict_overflow_p)
3218 	  && issue_strict_overflow_warning (wc))
3219 	warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3220 		    "assuming signed overflow does not occur "
3221 		    "when simplifying range test");
3222 
3223       if (dump_file && (dump_flags & TDF_DETAILS))
3224 	{
3225 	  struct range_entry *r = &ranges[*idx];
3226 	  fprintf (dump_file, "Optimizing range test ");
3227 	  print_generic_expr (dump_file, r->exp);
3228 	  fprintf (dump_file, " +[");
3229 	  print_generic_expr (dump_file, r->low);
3230 	  fprintf (dump_file, ", ");
3231 	  print_generic_expr (dump_file, r->high);
3232 	  fprintf (dump_file, "] and comparison ");
3233 	  print_generic_expr (dump_file, rhs1);
3234 	  fprintf (dump_file, " %s ", op_symbol_code (ccode));
3235 	  print_generic_expr (dump_file, rhs2);
3236 	  fprintf (dump_file, "\n into (");
3237 	  print_generic_expr (dump_file, utype);
3238 	  fprintf (dump_file, ") ");
3239 	  print_generic_expr (dump_file, rhs1);
3240 	  fprintf (dump_file, " %s (", op_symbol_code (ccode));
3241 	  print_generic_expr (dump_file, utype);
3242 	  fprintf (dump_file, ") ");
3243 	  print_generic_expr (dump_file, rhs2);
3244 	  fprintf (dump_file, "\n");
3245 	}
3246 
3247       operand_entry *oe = (*ops)[ranges[i].idx];
3248       ranges[i].in_p = 0;
3249       if (opcode == BIT_IOR_EXPR
3250 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3251 	{
3252 	  ranges[i].in_p = 1;
3253 	  ccode = invert_tree_comparison (ccode, false);
3254 	}
3255 
3256       unsigned int uid = gimple_uid (stmt);
3257       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3258       gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3259       gimple_set_uid (g, uid);
3260       rhs1 = gimple_assign_lhs (g);
3261       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3262       g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3263       gimple_set_uid (g, uid);
3264       rhs2 = gimple_assign_lhs (g);
3265       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3266       if (tree_swap_operands_p (rhs1, rhs2))
3267 	{
3268 	  std::swap (rhs1, rhs2);
3269 	  ccode = swap_tree_comparison (ccode);
3270 	}
3271       if (gimple_code (stmt) == GIMPLE_COND)
3272 	{
3273 	  gcond *c = as_a <gcond *> (stmt);
3274 	  gimple_cond_set_code (c, ccode);
3275 	  gimple_cond_set_lhs (c, rhs1);
3276 	  gimple_cond_set_rhs (c, rhs2);
3277 	  update_stmt (stmt);
3278 	}
3279       else
3280 	{
3281 	  tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3282 	  if (!INTEGRAL_TYPE_P (ctype)
3283 	      || (TREE_CODE (ctype) != BOOLEAN_TYPE
3284 		  && TYPE_PRECISION (ctype) != 1))
3285 	    ctype = boolean_type_node;
3286 	  g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3287 	  gimple_set_uid (g, uid);
3288 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3289 	  if (oe->op && ctype != TREE_TYPE (oe->op))
3290 	    {
3291 	      g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3292 				       NOP_EXPR, gimple_assign_lhs (g));
3293 	      gimple_set_uid (g, uid);
3294 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3295 	    }
3296 	  ranges[i].exp = gimple_assign_lhs (g);
3297 	  oe->op = ranges[i].exp;
3298 	  ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3299 	  ranges[i].high = ranges[i].low;
3300 	}
3301       ranges[i].strict_overflow_p = false;
3302       oe = (*ops)[ranges[*idx].idx];
3303       /* Now change all the other range test immediate uses, so that
3304 	 those tests will be optimized away.  */
3305       if (opcode == ERROR_MARK)
3306 	{
3307 	  if (oe->op)
3308 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
3309 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
3310 	  else
3311 	    oe->op = (oe->rank == BIT_IOR_EXPR
3312 		      ? boolean_false_node : boolean_true_node);
3313 	}
3314       else
3315 	oe->op = error_mark_node;
3316       ranges[*idx].exp = NULL_TREE;
3317       ranges[*idx].low = NULL_TREE;
3318       ranges[*idx].high = NULL_TREE;
3319       any_changes = true;
3320     }
3321 
3322   delete map;
3323   return any_changes;
3324 }
3325 
3326 /* Optimize range tests, similarly how fold_range_test optimizes
3327    it on trees.  The tree code for the binary
3328    operation between all the operands is OPCODE.
3329    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3330    maybe_optimize_range_tests for inter-bb range optimization.
3331    In that case if oe->op is NULL, oe->id is bb->index whose
3332    GIMPLE_COND is && or ||ed into the test, and oe->rank says
3333    the actual opcode.
3334    FIRST_BB is the first basic block if OPCODE is ERROR_MARK.  */
3335 
3336 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry * > * ops,basic_block first_bb)3337 optimize_range_tests (enum tree_code opcode,
3338 		      vec<operand_entry *> *ops, basic_block first_bb)
3339 {
3340   unsigned int length = ops->length (), i, j, first;
3341   operand_entry *oe;
3342   struct range_entry *ranges;
3343   bool any_changes = false;
3344 
3345   if (length == 1)
3346     return false;
3347 
3348   ranges = XNEWVEC (struct range_entry, length);
3349   for (i = 0; i < length; i++)
3350     {
3351       oe = (*ops)[i];
3352       ranges[i].idx = i;
3353       init_range_entry (ranges + i, oe->op,
3354 			oe->op
3355 			? NULL
3356 			: last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3357       /* For | invert it now, we will invert it again before emitting
3358 	 the optimized expression.  */
3359       if (opcode == BIT_IOR_EXPR
3360 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3361 	ranges[i].in_p = !ranges[i].in_p;
3362     }
3363 
3364   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3365   for (i = 0; i < length; i++)
3366     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3367       break;
3368 
3369   /* Try to merge ranges.  */
3370   for (first = i; i < length; i++)
3371     {
3372       tree low = ranges[i].low;
3373       tree high = ranges[i].high;
3374       int in_p = ranges[i].in_p;
3375       bool strict_overflow_p = ranges[i].strict_overflow_p;
3376       int update_fail_count = 0;
3377 
3378       for (j = i + 1; j < length; j++)
3379 	{
3380 	  if (ranges[i].exp != ranges[j].exp)
3381 	    break;
3382 	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3383 			     ranges[j].in_p, ranges[j].low, ranges[j].high))
3384 	    break;
3385 	  strict_overflow_p |= ranges[j].strict_overflow_p;
3386 	}
3387 
3388       if (j == i + 1)
3389 	continue;
3390 
3391       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3392 			     opcode, ops, ranges[i].exp, NULL, in_p,
3393 			     low, high, strict_overflow_p))
3394 	{
3395 	  i = j - 1;
3396 	  any_changes = true;
3397 	}
3398       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3399 	 while update_range_test would fail.  */
3400       else if (update_fail_count == 64)
3401 	i = j - 1;
3402       else
3403 	++update_fail_count;
3404     }
3405 
3406   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3407 					 ops, ranges);
3408 
3409   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3410     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3411 					   ops, ranges);
3412   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3413     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3414 						     ops, ranges);
3415   any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3416 						   ops, ranges);
3417   any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3418 						 ranges, first_bb);
3419 
3420   if (any_changes && opcode != ERROR_MARK)
3421     {
3422       j = 0;
3423       FOR_EACH_VEC_ELT (*ops, i, oe)
3424 	{
3425 	  if (oe->op == error_mark_node)
3426 	    continue;
3427 	  else if (i != j)
3428 	    (*ops)[j] = oe;
3429 	  j++;
3430 	}
3431       ops->truncate (j);
3432     }
3433 
3434   XDELETEVEC (ranges);
3435   return any_changes;
3436 }
3437 
3438 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3439    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
3440    otherwise the comparison code.  */
3441 
3442 static tree_code
ovce_extract_ops(tree var,gassign ** rets,bool * reti)3443 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3444 {
3445   if (TREE_CODE (var) != SSA_NAME)
3446     return ERROR_MARK;
3447 
3448   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3449   if (stmt == NULL)
3450     return ERROR_MARK;
3451 
3452   /* ??? If we start creating more COND_EXPR, we could perform
3453      this same optimization with them.	For now, simplify.  */
3454   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3455     return ERROR_MARK;
3456 
3457   tree cond = gimple_assign_rhs1 (stmt);
3458   tree_code cmp = TREE_CODE (cond);
3459   if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3460     return ERROR_MARK;
3461 
3462   /* ??? For now, allow only canonical true and false result vectors.
3463      We could expand this to other constants should the need arise,
3464      but at the moment we don't create them.  */
3465   tree t = gimple_assign_rhs2 (stmt);
3466   tree f = gimple_assign_rhs3 (stmt);
3467   bool inv;
3468   if (integer_all_onesp (t))
3469     inv = false;
3470   else if (integer_all_onesp (f))
3471     {
3472       cmp = invert_tree_comparison (cmp, false);
3473       inv = true;
3474     }
3475   else
3476     return ERROR_MARK;
3477   if (!integer_zerop (f))
3478     return ERROR_MARK;
3479 
3480   /* Success!  */
3481   if (rets)
3482     *rets = stmt;
3483   if (reti)
3484     *reti = inv;
3485   return cmp;
3486 }
3487 
3488 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3489    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
3490 
3491 static bool
optimize_vec_cond_expr(tree_code opcode,vec<operand_entry * > * ops)3492 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3493 {
3494   unsigned int length = ops->length (), i, j;
3495   bool any_changes = false;
3496 
3497   if (length == 1)
3498     return false;
3499 
3500   for (i = 0; i < length; ++i)
3501     {
3502       tree elt0 = (*ops)[i]->op;
3503 
3504       gassign *stmt0;
3505       bool invert;
3506       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3507       if (cmp0 == ERROR_MARK)
3508 	continue;
3509 
3510       for (j = i + 1; j < length; ++j)
3511 	{
3512 	  tree &elt1 = (*ops)[j]->op;
3513 
3514 	  gassign *stmt1;
3515 	  tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3516 	  if (cmp1 == ERROR_MARK)
3517 	    continue;
3518 
3519 	  tree cond0 = gimple_assign_rhs1 (stmt0);
3520 	  tree x0 = TREE_OPERAND (cond0, 0);
3521 	  tree y0 = TREE_OPERAND (cond0, 1);
3522 
3523 	  tree cond1 = gimple_assign_rhs1 (stmt1);
3524 	  tree x1 = TREE_OPERAND (cond1, 0);
3525 	  tree y1 = TREE_OPERAND (cond1, 1);
3526 
3527 	  tree comb;
3528 	  if (opcode == BIT_AND_EXPR)
3529 	    comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3530 	  else if (opcode == BIT_IOR_EXPR)
3531 	    comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3532 	  else
3533 	    gcc_unreachable ();
3534 	  if (comb == NULL)
3535 	    continue;
3536 
3537 	  /* Success! */
3538 	  if (dump_file && (dump_flags & TDF_DETAILS))
3539 	    {
3540 	      fprintf (dump_file, "Transforming ");
3541 	      print_generic_expr (dump_file, cond0);
3542 	      fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3543 	      print_generic_expr (dump_file, cond1);
3544 	      fprintf (dump_file, " into ");
3545 	      print_generic_expr (dump_file, comb);
3546 	      fputc ('\n', dump_file);
3547 	    }
3548 
3549 	  gimple_assign_set_rhs1 (stmt0, comb);
3550 	  if (invert)
3551 	    std::swap (*gimple_assign_rhs2_ptr (stmt0),
3552 		       *gimple_assign_rhs3_ptr (stmt0));
3553 	  update_stmt (stmt0);
3554 
3555 	  elt1 = error_mark_node;
3556 	  any_changes = true;
3557 	}
3558     }
3559 
3560   if (any_changes)
3561     {
3562       operand_entry *oe;
3563       j = 0;
3564       FOR_EACH_VEC_ELT (*ops, i, oe)
3565 	{
3566 	  if (oe->op == error_mark_node)
3567 	    continue;
3568 	  else if (i != j)
3569 	    (*ops)[j] = oe;
3570 	  j++;
3571 	}
3572       ops->truncate (j);
3573     }
3574 
3575   return any_changes;
3576 }
3577 
3578 /* Return true if STMT is a cast like:
3579    <bb N>:
3580    ...
3581    _123 = (int) _234;
3582 
3583    <bb M>:
3584    # _345 = PHI <_123(N), 1(...), 1(...)>
3585    where _234 has bool type, _123 has single use and
3586    bb N has a single successor M.  This is commonly used in
3587    the last block of a range test.
3588 
3589    Also Return true if STMT is tcc_compare like:
3590    <bb N>:
3591    ...
3592    _234 = a_2(D) == 2;
3593 
3594    <bb M>:
3595    # _345 = PHI <_234(N), 1(...), 1(...)>
3596    _346 = (int) _345;
3597    where _234 has booltype, single use and
3598    bb N has a single successor M.  This is commonly used in
3599    the last block of a range test.  */
3600 
3601 static bool
final_range_test_p(gimple * stmt)3602 final_range_test_p (gimple *stmt)
3603 {
3604   basic_block bb, rhs_bb, lhs_bb;
3605   edge e;
3606   tree lhs, rhs;
3607   use_operand_p use_p;
3608   gimple *use_stmt;
3609 
3610   if (!gimple_assign_cast_p (stmt)
3611       && (!is_gimple_assign (stmt)
3612 	  || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3613 	      != tcc_comparison)))
3614     return false;
3615   bb = gimple_bb (stmt);
3616   if (!single_succ_p (bb))
3617     return false;
3618   e = single_succ_edge (bb);
3619   if (e->flags & EDGE_COMPLEX)
3620     return false;
3621 
3622   lhs = gimple_assign_lhs (stmt);
3623   rhs = gimple_assign_rhs1 (stmt);
3624   if (gimple_assign_cast_p (stmt)
3625       && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3626 	  || TREE_CODE (rhs) != SSA_NAME
3627 	  || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3628     return false;
3629 
3630   if (!gimple_assign_cast_p (stmt)
3631       && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3632       return false;
3633 
3634   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
3635   if (!single_imm_use (lhs, &use_p, &use_stmt))
3636     return false;
3637 
3638   if (gimple_code (use_stmt) != GIMPLE_PHI
3639       || gimple_bb (use_stmt) != e->dest)
3640     return false;
3641 
3642   /* And that the rhs is defined in the same loop.  */
3643   if (gimple_assign_cast_p (stmt))
3644     {
3645       if (TREE_CODE (rhs) != SSA_NAME
3646 	  || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3647 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3648 	return false;
3649     }
3650   else
3651     {
3652       if (TREE_CODE (lhs) != SSA_NAME
3653 	  || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3654 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3655 	return false;
3656     }
3657 
3658   return true;
3659 }
3660 
3661 /* Return true if BB is suitable basic block for inter-bb range test
3662    optimization.  If BACKWARD is true, BB should be the only predecessor
3663    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3664    or compared with to find a common basic block to which all conditions
3665    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
3666    be the only predecessor of BB.  */
3667 
3668 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool backward)3669 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3670 		  bool backward)
3671 {
3672   edge_iterator ei, ei2;
3673   edge e, e2;
3674   gimple *stmt;
3675   gphi_iterator gsi;
3676   bool other_edge_seen = false;
3677   bool is_cond;
3678 
3679   if (test_bb == bb)
3680     return false;
3681   /* Check last stmt first.  */
3682   stmt = last_stmt (bb);
3683   if (stmt == NULL
3684       || (gimple_code (stmt) != GIMPLE_COND
3685 	  && (backward || !final_range_test_p (stmt)))
3686       || gimple_visited_p (stmt)
3687       || stmt_could_throw_p (stmt)
3688       || *other_bb == bb)
3689     return false;
3690   is_cond = gimple_code (stmt) == GIMPLE_COND;
3691   if (is_cond)
3692     {
3693       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3694 	 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3695 	 to *OTHER_BB (if not set yet, try to find it out).  */
3696       if (EDGE_COUNT (bb->succs) != 2)
3697 	return false;
3698       FOR_EACH_EDGE (e, ei, bb->succs)
3699 	{
3700 	  if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3701 	    return false;
3702 	  if (e->dest == test_bb)
3703 	    {
3704 	      if (backward)
3705 		continue;
3706 	      else
3707 		return false;
3708 	    }
3709 	  if (e->dest == bb)
3710 	    return false;
3711 	  if (*other_bb == NULL)
3712 	    {
3713 	      FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3714 		if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3715 		  return false;
3716 		else if (e->dest == e2->dest)
3717 		  *other_bb = e->dest;
3718 	      if (*other_bb == NULL)
3719 		return false;
3720 	    }
3721 	  if (e->dest == *other_bb)
3722 	    other_edge_seen = true;
3723 	  else if (backward)
3724 	    return false;
3725 	}
3726       if (*other_bb == NULL || !other_edge_seen)
3727 	return false;
3728     }
3729   else if (single_succ (bb) != *other_bb)
3730     return false;
3731 
3732   /* Now check all PHIs of *OTHER_BB.  */
3733   e = find_edge (bb, *other_bb);
3734   e2 = find_edge (test_bb, *other_bb);
3735   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3736     {
3737       gphi *phi = gsi.phi ();
3738       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3739 	 corresponding to BB and TEST_BB predecessor must be the same.  */
3740       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3741 			    gimple_phi_arg_def (phi, e2->dest_idx), 0))
3742 	{
3743 	  /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3744 	     one of the PHIs should have the lhs of the last stmt in
3745 	     that block as PHI arg and that PHI should have 0 or 1
3746 	     corresponding to it in all other range test basic blocks
3747 	     considered.  */
3748 	  if (!is_cond)
3749 	    {
3750 	      if (gimple_phi_arg_def (phi, e->dest_idx)
3751 		  == gimple_assign_lhs (stmt)
3752 		  && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3753 		      || integer_onep (gimple_phi_arg_def (phi,
3754 							   e2->dest_idx))))
3755 		continue;
3756 	    }
3757 	  else
3758 	    {
3759 	      gimple *test_last = last_stmt (test_bb);
3760 	      if (gimple_code (test_last) != GIMPLE_COND
3761 		  && gimple_phi_arg_def (phi, e2->dest_idx)
3762 		     == gimple_assign_lhs (test_last)
3763 		  && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3764 		      || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3765 		continue;
3766 	    }
3767 
3768 	  return false;
3769 	}
3770     }
3771   return true;
3772 }
3773 
3774 /* Return true if BB doesn't have side-effects that would disallow
3775    range test optimization, all SSA_NAMEs set in the bb are consumed
3776    in the bb and there are no PHIs.  */
3777 
3778 static bool
no_side_effect_bb(basic_block bb)3779 no_side_effect_bb (basic_block bb)
3780 {
3781   gimple_stmt_iterator gsi;
3782   gimple *last;
3783 
3784   if (!gimple_seq_empty_p (phi_nodes (bb)))
3785     return false;
3786   last = last_stmt (bb);
3787   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3788     {
3789       gimple *stmt = gsi_stmt (gsi);
3790       tree lhs;
3791       imm_use_iterator imm_iter;
3792       use_operand_p use_p;
3793 
3794       if (is_gimple_debug (stmt))
3795 	continue;
3796       if (gimple_has_side_effects (stmt))
3797 	return false;
3798       if (stmt == last)
3799 	return true;
3800       if (!is_gimple_assign (stmt))
3801 	return false;
3802       lhs = gimple_assign_lhs (stmt);
3803       if (TREE_CODE (lhs) != SSA_NAME)
3804 	return false;
3805       if (gimple_assign_rhs_could_trap_p (stmt))
3806 	return false;
3807       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3808 	{
3809 	  gimple *use_stmt = USE_STMT (use_p);
3810 	  if (is_gimple_debug (use_stmt))
3811 	    continue;
3812 	  if (gimple_bb (use_stmt) != bb)
3813 	    return false;
3814 	}
3815     }
3816   return false;
3817 }
3818 
3819 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3820    return true and fill in *OPS recursively.  */
3821 
3822 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry * > * ops,struct loop * loop)3823 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3824 	 struct loop *loop)
3825 {
3826   gimple *stmt = SSA_NAME_DEF_STMT (var);
3827   tree rhs[2];
3828   int i;
3829 
3830   if (!is_reassociable_op (stmt, code, loop))
3831     return false;
3832 
3833   rhs[0] = gimple_assign_rhs1 (stmt);
3834   rhs[1] = gimple_assign_rhs2 (stmt);
3835   gimple_set_visited (stmt, true);
3836   for (i = 0; i < 2; i++)
3837     if (TREE_CODE (rhs[i]) == SSA_NAME
3838 	&& !get_ops (rhs[i], code, ops, loop)
3839 	&& has_single_use (rhs[i]))
3840       {
3841 	operand_entry *oe = operand_entry_pool.allocate ();
3842 
3843 	oe->op = rhs[i];
3844 	oe->rank = code;
3845 	oe->id = 0;
3846 	oe->count = 1;
3847 	oe->stmt_to_insert = NULL;
3848 	ops->safe_push (oe);
3849       }
3850   return true;
3851 }
3852 
3853 /* Find the ops that were added by get_ops starting from VAR, see if
3854    they were changed during update_range_test and if yes, create new
3855    stmts.  */
3856 
3857 static tree
update_ops(tree var,enum tree_code code,vec<operand_entry * > ops,unsigned int * pidx,struct loop * loop)3858 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3859 	    unsigned int *pidx, struct loop *loop)
3860 {
3861   gimple *stmt = SSA_NAME_DEF_STMT (var);
3862   tree rhs[4];
3863   int i;
3864 
3865   if (!is_reassociable_op (stmt, code, loop))
3866     return NULL;
3867 
3868   rhs[0] = gimple_assign_rhs1 (stmt);
3869   rhs[1] = gimple_assign_rhs2 (stmt);
3870   rhs[2] = rhs[0];
3871   rhs[3] = rhs[1];
3872   for (i = 0; i < 2; i++)
3873     if (TREE_CODE (rhs[i]) == SSA_NAME)
3874       {
3875 	rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3876 	if (rhs[2 + i] == NULL_TREE)
3877 	  {
3878 	    if (has_single_use (rhs[i]))
3879 	      rhs[2 + i] = ops[(*pidx)++]->op;
3880 	    else
3881 	      rhs[2 + i] = rhs[i];
3882 	  }
3883       }
3884   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3885       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3886     {
3887       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3888       var = make_ssa_name (TREE_TYPE (var));
3889       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3890 					rhs[2], rhs[3]);
3891       gimple_set_uid (g, gimple_uid (stmt));
3892       gimple_set_visited (g, true);
3893       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3894     }
3895   return var;
3896 }
3897 
3898 /* Structure to track the initial value passed to get_ops and
3899    the range in the ops vector for each basic block.  */
3900 
3901 struct inter_bb_range_test_entry
3902 {
3903   tree op;
3904   unsigned int first_idx, last_idx;
3905 };
3906 
3907 /* Inter-bb range test optimization.
3908 
3909    Returns TRUE if a gimple conditional is optimized to a true/false,
3910    otherwise return FALSE.
3911 
3912    This indicates to the caller that it should run a CFG cleanup pass
3913    once reassociation is completed.  */
3914 
3915 static bool
maybe_optimize_range_tests(gimple * stmt)3916 maybe_optimize_range_tests (gimple *stmt)
3917 {
3918   basic_block first_bb = gimple_bb (stmt);
3919   basic_block last_bb = first_bb;
3920   basic_block other_bb = NULL;
3921   basic_block bb;
3922   edge_iterator ei;
3923   edge e;
3924   auto_vec<operand_entry *> ops;
3925   auto_vec<inter_bb_range_test_entry> bbinfo;
3926   bool any_changes = false;
3927   bool cfg_cleanup_needed = false;
3928 
3929   /* Consider only basic blocks that end with GIMPLE_COND or
3930      a cast statement satisfying final_range_test_p.  All
3931      but the last bb in the first_bb .. last_bb range
3932      should end with GIMPLE_COND.  */
3933   if (gimple_code (stmt) == GIMPLE_COND)
3934     {
3935       if (EDGE_COUNT (first_bb->succs) != 2)
3936 	return cfg_cleanup_needed;
3937     }
3938   else if (final_range_test_p (stmt))
3939     other_bb = single_succ (first_bb);
3940   else
3941     return cfg_cleanup_needed;
3942 
3943   if (stmt_could_throw_p (stmt))
3944     return cfg_cleanup_needed;
3945 
3946   /* As relative ordering of post-dominator sons isn't fixed,
3947      maybe_optimize_range_tests can be called first on any
3948      bb in the range we want to optimize.  So, start searching
3949      backwards, if first_bb can be set to a predecessor.  */
3950   while (single_pred_p (first_bb))
3951     {
3952       basic_block pred_bb = single_pred (first_bb);
3953       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3954 	break;
3955       if (!no_side_effect_bb (first_bb))
3956 	break;
3957       first_bb = pred_bb;
3958     }
3959   /* If first_bb is last_bb, other_bb hasn't been computed yet.
3960      Before starting forward search in last_bb successors, find
3961      out the other_bb.  */
3962   if (first_bb == last_bb)
3963     {
3964       other_bb = NULL;
3965       /* As non-GIMPLE_COND last stmt always terminates the range,
3966 	 if forward search didn't discover anything, just give up.  */
3967       if (gimple_code (stmt) != GIMPLE_COND)
3968 	return cfg_cleanup_needed;
3969       /* Look at both successors.  Either it ends with a GIMPLE_COND
3970 	 and satisfies suitable_cond_bb, or ends with a cast and
3971 	 other_bb is that cast's successor.  */
3972       FOR_EACH_EDGE (e, ei, first_bb->succs)
3973 	if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3974 	    || e->dest == first_bb)
3975 	  return cfg_cleanup_needed;
3976 	else if (single_pred_p (e->dest))
3977 	  {
3978 	    stmt = last_stmt (e->dest);
3979 	    if (stmt
3980 		&& gimple_code (stmt) == GIMPLE_COND
3981 		&& EDGE_COUNT (e->dest->succs) == 2)
3982 	      {
3983 		if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3984 		  break;
3985 		else
3986 		  other_bb = NULL;
3987 	      }
3988 	    else if (stmt
3989 		     && final_range_test_p (stmt)
3990 		     && find_edge (first_bb, single_succ (e->dest)))
3991 	      {
3992 		other_bb = single_succ (e->dest);
3993 		if (other_bb == first_bb)
3994 		  other_bb = NULL;
3995 	      }
3996 	  }
3997       if (other_bb == NULL)
3998 	return cfg_cleanup_needed;
3999     }
4000   /* Now do the forward search, moving last_bb to successor bbs
4001      that aren't other_bb.  */
4002   while (EDGE_COUNT (last_bb->succs) == 2)
4003     {
4004       FOR_EACH_EDGE (e, ei, last_bb->succs)
4005 	if (e->dest != other_bb)
4006 	  break;
4007       if (e == NULL)
4008 	break;
4009       if (!single_pred_p (e->dest))
4010 	break;
4011       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4012 	break;
4013       if (!no_side_effect_bb (e->dest))
4014 	break;
4015       last_bb = e->dest;
4016     }
4017   if (first_bb == last_bb)
4018     return cfg_cleanup_needed;
4019   /* Here basic blocks first_bb through last_bb's predecessor
4020      end with GIMPLE_COND, all of them have one of the edges to
4021      other_bb and another to another block in the range,
4022      all blocks except first_bb don't have side-effects and
4023      last_bb ends with either GIMPLE_COND, or cast satisfying
4024      final_range_test_p.  */
4025   for (bb = last_bb; ; bb = single_pred (bb))
4026     {
4027       enum tree_code code;
4028       tree lhs, rhs;
4029       inter_bb_range_test_entry bb_ent;
4030 
4031       bb_ent.op = NULL_TREE;
4032       bb_ent.first_idx = ops.length ();
4033       bb_ent.last_idx = bb_ent.first_idx;
4034       e = find_edge (bb, other_bb);
4035       stmt = last_stmt (bb);
4036       gimple_set_visited (stmt, true);
4037       if (gimple_code (stmt) != GIMPLE_COND)
4038 	{
4039 	  use_operand_p use_p;
4040 	  gimple *phi;
4041 	  edge e2;
4042 	  unsigned int d;
4043 
4044 	  lhs = gimple_assign_lhs (stmt);
4045 	  rhs = gimple_assign_rhs1 (stmt);
4046 	  gcc_assert (bb == last_bb);
4047 
4048 	  /* stmt is
4049 	     _123 = (int) _234;
4050 	     OR
4051 	     _234 = a_2(D) == 2;
4052 
4053 	     followed by:
4054 	     <bb M>:
4055 	     # _345 = PHI <_123(N), 1(...), 1(...)>
4056 
4057 	     or 0 instead of 1.  If it is 0, the _234
4058 	     range test is anded together with all the
4059 	     other range tests, if it is 1, it is ored with
4060 	     them.  */
4061 	  single_imm_use (lhs, &use_p, &phi);
4062 	  gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4063 	  e2 = find_edge (first_bb, other_bb);
4064 	  d = e2->dest_idx;
4065 	  gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4066 	  if (integer_zerop (gimple_phi_arg_def (phi, d)))
4067 	    code = BIT_AND_EXPR;
4068 	  else
4069 	    {
4070 	      gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4071 	      code = BIT_IOR_EXPR;
4072 	    }
4073 
4074 	  /* If _234 SSA_NAME_DEF_STMT is
4075 	     _234 = _567 | _789;
4076 	     (or &, corresponding to 1/0 in the phi arguments,
4077 	     push into ops the individual range test arguments
4078 	     of the bitwise or resp. and, recursively.  */
4079 	  if (TREE_CODE (rhs) == SSA_NAME
4080 	      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4081 		  != tcc_comparison)
4082 	      && !get_ops (rhs, code, &ops,
4083 			loop_containing_stmt (stmt))
4084 	      && has_single_use (rhs))
4085 	    {
4086 	      /* Otherwise, push the _234 range test itself.  */
4087 	      operand_entry *oe = operand_entry_pool.allocate ();
4088 
4089 	      oe->op = rhs;
4090 	      oe->rank = code;
4091 	      oe->id = 0;
4092 	      oe->count = 1;
4093 	      oe->stmt_to_insert = NULL;
4094 	      ops.safe_push (oe);
4095 	      bb_ent.last_idx++;
4096 	      bb_ent.op = rhs;
4097 	    }
4098 	  else if (is_gimple_assign (stmt)
4099 		   && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4100 		       == tcc_comparison)
4101 		   && !get_ops (lhs, code, &ops,
4102 				loop_containing_stmt (stmt))
4103 		   && has_single_use (lhs))
4104 	    {
4105 	      operand_entry *oe = operand_entry_pool.allocate ();
4106 	      oe->op = lhs;
4107 	      oe->rank = code;
4108 	      oe->id = 0;
4109 	      oe->count = 1;
4110 	      ops.safe_push (oe);
4111 	      bb_ent.last_idx++;
4112 	      bb_ent.op = lhs;
4113 	    }
4114 	  else
4115 	    {
4116 	      bb_ent.last_idx = ops.length ();
4117 	      bb_ent.op = rhs;
4118 	    }
4119 	  bbinfo.safe_push (bb_ent);
4120 	  continue;
4121 	}
4122       /* Otherwise stmt is GIMPLE_COND.  */
4123       code = gimple_cond_code (stmt);
4124       lhs = gimple_cond_lhs (stmt);
4125       rhs = gimple_cond_rhs (stmt);
4126       if (TREE_CODE (lhs) == SSA_NAME
4127 	  && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4128 	  && ((code != EQ_EXPR && code != NE_EXPR)
4129 	      || rhs != boolean_false_node
4130 		 /* Either push into ops the individual bitwise
4131 		    or resp. and operands, depending on which
4132 		    edge is other_bb.  */
4133 	      || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4134 				 ^ (code == EQ_EXPR))
4135 				? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4136 			   loop_containing_stmt (stmt))))
4137 	{
4138 	  /* Or push the GIMPLE_COND stmt itself.  */
4139 	  operand_entry *oe = operand_entry_pool.allocate ();
4140 
4141 	  oe->op = NULL;
4142 	  oe->rank = (e->flags & EDGE_TRUE_VALUE)
4143 		     ? BIT_IOR_EXPR : BIT_AND_EXPR;
4144 	  /* oe->op = NULL signs that there is no SSA_NAME
4145 	     for the range test, and oe->id instead is the
4146 	     basic block number, at which's end the GIMPLE_COND
4147 	     is.  */
4148 	  oe->id = bb->index;
4149 	  oe->count = 1;
4150 	  oe->stmt_to_insert = NULL;
4151 	  ops.safe_push (oe);
4152 	  bb_ent.op = NULL;
4153 	  bb_ent.last_idx++;
4154 	}
4155       else if (ops.length () > bb_ent.first_idx)
4156 	{
4157 	  bb_ent.op = lhs;
4158 	  bb_ent.last_idx = ops.length ();
4159 	}
4160       bbinfo.safe_push (bb_ent);
4161       if (bb == first_bb)
4162 	break;
4163     }
4164   if (ops.length () > 1)
4165     any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4166   if (any_changes)
4167     {
4168       unsigned int idx, max_idx = 0;
4169       /* update_ops relies on has_single_use predicates returning the
4170 	 same values as it did during get_ops earlier.  Additionally it
4171 	 never removes statements, only adds new ones and it should walk
4172 	 from the single imm use and check the predicate already before
4173 	 making those changes.
4174 	 On the other side, the handling of GIMPLE_COND directly can turn
4175 	 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4176 	 it needs to be done in a separate loop afterwards.  */
4177       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4178 	{
4179 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4180 	      && bbinfo[idx].op != NULL_TREE)
4181 	    {
4182 	      tree new_op;
4183 
4184 	      max_idx = idx;
4185 	      stmt = last_stmt (bb);
4186 	      new_op = update_ops (bbinfo[idx].op,
4187 				   (enum tree_code)
4188 				   ops[bbinfo[idx].first_idx]->rank,
4189 				   ops, &bbinfo[idx].first_idx,
4190 				   loop_containing_stmt (stmt));
4191 	      if (new_op == NULL_TREE)
4192 		{
4193 		  gcc_assert (bb == last_bb);
4194 		  new_op = ops[bbinfo[idx].first_idx++]->op;
4195 		}
4196 	      if (bbinfo[idx].op != new_op)
4197 		{
4198 		  imm_use_iterator iter;
4199 		  use_operand_p use_p;
4200 		  gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4201 
4202 		  FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4203 		    if (is_gimple_debug (use_stmt))
4204 		      continue;
4205 		    else if (gimple_code (use_stmt) == GIMPLE_COND
4206 			     || gimple_code (use_stmt) == GIMPLE_PHI)
4207 		      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4208 			SET_USE (use_p, new_op);
4209 		    else if ((is_gimple_assign (use_stmt)
4210 			      && (TREE_CODE_CLASS
4211 				  (gimple_assign_rhs_code (use_stmt))
4212 				  == tcc_comparison)))
4213 		      cast_or_tcc_cmp_stmt = use_stmt;
4214 		    else if (gimple_assign_cast_p (use_stmt))
4215 		      cast_or_tcc_cmp_stmt = use_stmt;
4216 		    else
4217 		      gcc_unreachable ();
4218 
4219 		  if (cast_or_tcc_cmp_stmt)
4220 		    {
4221 		      gcc_assert (bb == last_bb);
4222 		      tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4223 		      tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4224 		      enum tree_code rhs_code
4225 			= gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4226 			? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4227 			: CONVERT_EXPR;
4228 		      gassign *g;
4229 		      if (is_gimple_min_invariant (new_op))
4230 			{
4231 			  new_op = fold_convert (TREE_TYPE (lhs), new_op);
4232 			  g = gimple_build_assign (new_lhs, new_op);
4233 			}
4234 		      else
4235 			g = gimple_build_assign (new_lhs, rhs_code, new_op);
4236 		      gimple_stmt_iterator gsi
4237 			= gsi_for_stmt (cast_or_tcc_cmp_stmt);
4238 		      gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4239 		      gimple_set_visited (g, true);
4240 		      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4241 		      FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4242 			if (is_gimple_debug (use_stmt))
4243 			  continue;
4244 			else if (gimple_code (use_stmt) == GIMPLE_COND
4245 				 || gimple_code (use_stmt) == GIMPLE_PHI)
4246 			  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4247 			    SET_USE (use_p, new_lhs);
4248 			else
4249 			  gcc_unreachable ();
4250 		    }
4251 		}
4252 	    }
4253 	  if (bb == first_bb)
4254 	    break;
4255 	}
4256       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4257 	{
4258 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4259 	      && bbinfo[idx].op == NULL_TREE
4260 	      && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4261 	    {
4262 	      gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4263 
4264 	      if (idx > max_idx)
4265 		max_idx = idx;
4266 
4267 	      /* If we collapse the conditional to a true/false
4268 		 condition, then bubble that knowledge up to our caller.  */
4269 	      if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4270 		{
4271 		  gimple_cond_make_false (cond_stmt);
4272 		  cfg_cleanup_needed = true;
4273 		}
4274 	      else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4275 		{
4276 		  gimple_cond_make_true (cond_stmt);
4277 		  cfg_cleanup_needed = true;
4278 		}
4279 	      else
4280 		{
4281 		  gimple_cond_set_code (cond_stmt, NE_EXPR);
4282 		  gimple_cond_set_lhs (cond_stmt,
4283 				       ops[bbinfo[idx].first_idx]->op);
4284 		  gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4285 		}
4286 	      update_stmt (cond_stmt);
4287 	    }
4288 	  if (bb == first_bb)
4289 	    break;
4290 	}
4291 
4292       /* The above changes could result in basic blocks after the first
4293 	 modified one, up to and including last_bb, to be executed even if
4294 	 they would not be in the original program.  If the value ranges of
4295 	 assignment lhs' in those bbs were dependent on the conditions
4296 	 guarding those basic blocks which now can change, the VRs might
4297 	 be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
4298 	 are only used within the same bb, it should be not a big deal if
4299 	 we just reset all the VRs in those bbs.  See PR68671.  */
4300       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4301 	reset_flow_sensitive_info_in_bb (bb);
4302     }
4303   return cfg_cleanup_needed;
4304 }
4305 
4306 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4307    of STMT in it's operands.  This is also known as a "destructive
4308    update" operation.  */
4309 
4310 static bool
is_phi_for_stmt(gimple * stmt,tree operand)4311 is_phi_for_stmt (gimple *stmt, tree operand)
4312 {
4313   gimple *def_stmt;
4314   gphi *def_phi;
4315   tree lhs;
4316   use_operand_p arg_p;
4317   ssa_op_iter i;
4318 
4319   if (TREE_CODE (operand) != SSA_NAME)
4320     return false;
4321 
4322   lhs = gimple_assign_lhs (stmt);
4323 
4324   def_stmt = SSA_NAME_DEF_STMT (operand);
4325   def_phi = dyn_cast <gphi *> (def_stmt);
4326   if (!def_phi)
4327     return false;
4328 
4329   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4330     if (lhs == USE_FROM_PTR (arg_p))
4331       return true;
4332   return false;
4333 }
4334 
4335 /* Remove def stmt of VAR if VAR has zero uses and recurse
4336    on rhs1 operand if so.  */
4337 
4338 static void
remove_visited_stmt_chain(tree var)4339 remove_visited_stmt_chain (tree var)
4340 {
4341   gimple *stmt;
4342   gimple_stmt_iterator gsi;
4343 
4344   while (1)
4345     {
4346       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4347 	return;
4348       stmt = SSA_NAME_DEF_STMT (var);
4349       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4350 	{
4351 	  var = gimple_assign_rhs1 (stmt);
4352 	  gsi = gsi_for_stmt (stmt);
4353 	  reassoc_remove_stmt (&gsi);
4354 	  release_defs (stmt);
4355 	}
4356       else
4357 	return;
4358     }
4359 }
4360 
4361 /* This function checks three consequtive operands in
4362    passed operands vector OPS starting from OPINDEX and
4363    swaps two operands if it is profitable for binary operation
4364    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4365 
4366    We pair ops with the same rank if possible.
4367 
4368    The alternative we try is to see if STMT is a destructive
4369    update style statement, which is like:
4370    b = phi (a, ...)
4371    a = c + b;
4372    In that case, we want to use the destructive update form to
4373    expose the possible vectorizer sum reduction opportunity.
4374    In that case, the third operand will be the phi node. This
4375    check is not performed if STMT is null.
4376 
4377    We could, of course, try to be better as noted above, and do a
4378    lot of work to try to find these opportunities in >3 operand
4379    cases, but it is unlikely to be worth it.  */
4380 
4381 static void
swap_ops_for_binary_stmt(vec<operand_entry * > ops,unsigned int opindex,gimple * stmt)4382 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4383 			  unsigned int opindex, gimple *stmt)
4384 {
4385   operand_entry *oe1, *oe2, *oe3;
4386 
4387   oe1 = ops[opindex];
4388   oe2 = ops[opindex + 1];
4389   oe3 = ops[opindex + 2];
4390 
4391   if ((oe1->rank == oe2->rank
4392        && oe2->rank != oe3->rank)
4393       || (stmt && is_phi_for_stmt (stmt, oe3->op)
4394 	  && !is_phi_for_stmt (stmt, oe1->op)
4395 	  && !is_phi_for_stmt (stmt, oe2->op)))
4396     std::swap (*oe1, *oe3);
4397   else if ((oe1->rank == oe3->rank
4398 	    && oe2->rank != oe3->rank)
4399 	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
4400 	       && !is_phi_for_stmt (stmt, oe1->op)
4401 	       && !is_phi_for_stmt (stmt, oe3->op)))
4402     std::swap (*oe1, *oe2);
4403 }
4404 
4405 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4406    two definitions, otherwise return STMT.  */
4407 
4408 static inline gimple *
find_insert_point(gimple * stmt,tree rhs1,tree rhs2)4409 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4410 {
4411   if (TREE_CODE (rhs1) == SSA_NAME
4412       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4413     stmt = SSA_NAME_DEF_STMT (rhs1);
4414   if (TREE_CODE (rhs2) == SSA_NAME
4415       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4416     stmt = SSA_NAME_DEF_STMT (rhs2);
4417   return stmt;
4418 }
4419 
4420 /* If the stmt that defines operand has to be inserted, insert it
4421    before the use.  */
4422 static void
insert_stmt_before_use(gimple * stmt,gimple * stmt_to_insert)4423 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4424 {
4425   gcc_assert (is_gimple_assign (stmt_to_insert));
4426   tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4427   tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4428   gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4429   gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4430   gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4431 
4432   /* If the insert point is not stmt, then insert_point would be
4433      the point where operand rhs1 or rhs2 is defined. In this case,
4434      stmt_to_insert has to be inserted afterwards. This would
4435      only happen when the stmt insertion point is flexible. */
4436   if (stmt == insert_point)
4437     gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4438   else
4439     insert_stmt_after (stmt_to_insert, insert_point);
4440 }
4441 
4442 
4443 /* Recursively rewrite our linearized statements so that the operators
4444    match those in OPS[OPINDEX], putting the computation in rank
4445    order.  Return new lhs.
4446    CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4447    the current stmt and during recursive invocations.
4448    NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4449    recursive invocations.  */
4450 
4451 static tree
rewrite_expr_tree(gimple * stmt,unsigned int opindex,vec<operand_entry * > ops,bool changed,bool next_changed)4452 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4453 		   vec<operand_entry *> ops, bool changed, bool next_changed)
4454 {
4455   tree rhs1 = gimple_assign_rhs1 (stmt);
4456   tree rhs2 = gimple_assign_rhs2 (stmt);
4457   tree lhs = gimple_assign_lhs (stmt);
4458   operand_entry *oe;
4459 
4460   /* The final recursion case for this function is that you have
4461      exactly two operations left.
4462      If we had exactly one op in the entire list to start with, we
4463      would have never called this function, and the tail recursion
4464      rewrites them one at a time.  */
4465   if (opindex + 2 == ops.length ())
4466     {
4467       operand_entry *oe1, *oe2;
4468 
4469       oe1 = ops[opindex];
4470       oe2 = ops[opindex + 1];
4471 
4472       if (rhs1 != oe1->op || rhs2 != oe2->op)
4473 	{
4474 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4475 	  unsigned int uid = gimple_uid (stmt);
4476 
4477 	  if (dump_file && (dump_flags & TDF_DETAILS))
4478 	    {
4479 	      fprintf (dump_file, "Transforming ");
4480 	      print_gimple_stmt (dump_file, stmt, 0);
4481 	    }
4482 
4483 	  /* If the stmt that defines operand has to be inserted, insert it
4484 	     before the use.  */
4485 	  if (oe1->stmt_to_insert)
4486 	    insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4487 	  if (oe2->stmt_to_insert)
4488 	    insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4489 	  /* Even when changed is false, reassociation could have e.g. removed
4490 	     some redundant operations, so unless we are just swapping the
4491 	     arguments or unless there is no change at all (then we just
4492 	     return lhs), force creation of a new SSA_NAME.  */
4493 	  if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4494 	    {
4495 	      gimple *insert_point
4496 		= find_insert_point (stmt, oe1->op, oe2->op);
4497 	      lhs = make_ssa_name (TREE_TYPE (lhs));
4498 	      stmt
4499 		= gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4500 				       oe1->op, oe2->op);
4501 	      gimple_set_uid (stmt, uid);
4502 	      gimple_set_visited (stmt, true);
4503 	      if (insert_point == gsi_stmt (gsi))
4504 		gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4505 	      else
4506 		insert_stmt_after (stmt, insert_point);
4507 	    }
4508 	  else
4509 	    {
4510 	      gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4511 				   == stmt);
4512 	      gimple_assign_set_rhs1 (stmt, oe1->op);
4513 	      gimple_assign_set_rhs2 (stmt, oe2->op);
4514 	      update_stmt (stmt);
4515 	    }
4516 
4517 	  if (rhs1 != oe1->op && rhs1 != oe2->op)
4518 	    remove_visited_stmt_chain (rhs1);
4519 
4520 	  if (dump_file && (dump_flags & TDF_DETAILS))
4521 	    {
4522 	      fprintf (dump_file, " into ");
4523 	      print_gimple_stmt (dump_file, stmt, 0);
4524 	    }
4525 	}
4526       return lhs;
4527     }
4528 
4529   /* If we hit here, we should have 3 or more ops left.  */
4530   gcc_assert (opindex + 2 < ops.length ());
4531 
4532   /* Rewrite the next operator.  */
4533   oe = ops[opindex];
4534 
4535   /* If the stmt that defines operand has to be inserted, insert it
4536      before the use.  */
4537   if (oe->stmt_to_insert)
4538     insert_stmt_before_use (stmt, oe->stmt_to_insert);
4539 
4540   /* Recurse on the LHS of the binary operator, which is guaranteed to
4541      be the non-leaf side.  */
4542   tree new_rhs1
4543     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4544 			 changed || oe->op != rhs2 || next_changed,
4545 			 false);
4546 
4547   if (oe->op != rhs2 || new_rhs1 != rhs1)
4548     {
4549       if (dump_file && (dump_flags & TDF_DETAILS))
4550 	{
4551 	  fprintf (dump_file, "Transforming ");
4552 	  print_gimple_stmt (dump_file, stmt, 0);
4553 	}
4554 
4555       /* If changed is false, this is either opindex == 0
4556 	 or all outer rhs2's were equal to corresponding oe->op,
4557 	 and powi_result is NULL.
4558 	 That means lhs is equivalent before and after reassociation.
4559 	 Otherwise ensure the old lhs SSA_NAME is not reused and
4560 	 create a new stmt as well, so that any debug stmts will be
4561 	 properly adjusted.  */
4562       if (changed)
4563 	{
4564 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4565 	  unsigned int uid = gimple_uid (stmt);
4566 	  gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4567 
4568 	  lhs = make_ssa_name (TREE_TYPE (lhs));
4569 	  stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4570 				      new_rhs1, oe->op);
4571 	  gimple_set_uid (stmt, uid);
4572 	  gimple_set_visited (stmt, true);
4573 	  if (insert_point == gsi_stmt (gsi))
4574 	    gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4575 	  else
4576 	    insert_stmt_after (stmt, insert_point);
4577 	}
4578       else
4579 	{
4580 	  gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4581 			       == stmt);
4582 	  gimple_assign_set_rhs1 (stmt, new_rhs1);
4583 	  gimple_assign_set_rhs2 (stmt, oe->op);
4584 	  update_stmt (stmt);
4585 	}
4586 
4587       if (dump_file && (dump_flags & TDF_DETAILS))
4588 	{
4589 	  fprintf (dump_file, " into ");
4590 	  print_gimple_stmt (dump_file, stmt, 0);
4591 	}
4592     }
4593   return lhs;
4594 }
4595 
4596 /* Find out how many cycles we need to compute statements chain.
4597    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
4598    maximum number of independent statements we may execute per cycle.  */
4599 
4600 static int
get_required_cycles(int ops_num,int cpu_width)4601 get_required_cycles (int ops_num, int cpu_width)
4602 {
4603   int res;
4604   int elog;
4605   unsigned int rest;
4606 
4607   /* While we have more than 2 * cpu_width operands
4608      we may reduce number of operands by cpu_width
4609      per cycle.  */
4610   res = ops_num / (2 * cpu_width);
4611 
4612   /* Remained operands count may be reduced twice per cycle
4613      until we have only one operand.  */
4614   rest = (unsigned)(ops_num - res * cpu_width);
4615   elog = exact_log2 (rest);
4616   if (elog >= 0)
4617     res += elog;
4618   else
4619     res += floor_log2 (rest) + 1;
4620 
4621   return res;
4622 }
4623 
4624 /* Returns an optimal number of registers to use for computation of
4625    given statements.  */
4626 
4627 static int
get_reassociation_width(int ops_num,enum tree_code opc,machine_mode mode)4628 get_reassociation_width (int ops_num, enum tree_code opc,
4629 			 machine_mode mode)
4630 {
4631   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4632   int width;
4633   int width_min;
4634   int cycles_best;
4635 
4636   if (param_width > 0)
4637     width = param_width;
4638   else
4639     width = targetm.sched.reassociation_width (opc, mode);
4640 
4641   if (width == 1)
4642     return width;
4643 
4644   /* Get the minimal time required for sequence computation.  */
4645   cycles_best = get_required_cycles (ops_num, width);
4646 
4647   /* Check if we may use less width and still compute sequence for
4648      the same time.  It will allow us to reduce registers usage.
4649      get_required_cycles is monotonically increasing with lower width
4650      so we can perform a binary search for the minimal width that still
4651      results in the optimal cycle count.  */
4652   width_min = 1;
4653   while (width > width_min)
4654     {
4655       int width_mid = (width + width_min) / 2;
4656 
4657       if (get_required_cycles (ops_num, width_mid) == cycles_best)
4658 	width = width_mid;
4659       else if (width_min < width_mid)
4660 	width_min = width_mid;
4661       else
4662 	break;
4663     }
4664 
4665   return width;
4666 }
4667 
4668 /* Recursively rewrite our linearized statements so that the operators
4669    match those in OPS[OPINDEX], putting the computation in rank
4670    order and trying to allow operations to be executed in
4671    parallel.  */
4672 
4673 static void
rewrite_expr_tree_parallel(gassign * stmt,int width,vec<operand_entry * > ops)4674 rewrite_expr_tree_parallel (gassign *stmt, int width,
4675 			    vec<operand_entry *> ops)
4676 {
4677   enum tree_code opcode = gimple_assign_rhs_code (stmt);
4678   int op_num = ops.length ();
4679   gcc_assert (op_num > 0);
4680   int stmt_num = op_num - 1;
4681   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4682   int op_index = op_num - 1;
4683   int stmt_index = 0;
4684   int ready_stmts_end = 0;
4685   int i = 0;
4686   gimple *stmt1 = NULL, *stmt2 = NULL;
4687   tree last_rhs1 = gimple_assign_rhs1 (stmt);
4688 
4689   /* We start expression rewriting from the top statements.
4690      So, in this loop we create a full list of statements
4691      we will work with.  */
4692   stmts[stmt_num - 1] = stmt;
4693   for (i = stmt_num - 2; i >= 0; i--)
4694     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4695 
4696   for (i = 0; i < stmt_num; i++)
4697     {
4698       tree op1, op2;
4699 
4700       /* Determine whether we should use results of
4701 	 already handled statements or not.  */
4702       if (ready_stmts_end == 0
4703 	  && (i - stmt_index >= width || op_index < 1))
4704 	ready_stmts_end = i;
4705 
4706       /* Now we choose operands for the next statement.  Non zero
4707 	 value in ready_stmts_end means here that we should use
4708 	 the result of already generated statements as new operand.  */
4709       if (ready_stmts_end > 0)
4710 	{
4711 	  op1 = gimple_assign_lhs (stmts[stmt_index++]);
4712 	  if (ready_stmts_end > stmt_index)
4713 	    op2 = gimple_assign_lhs (stmts[stmt_index++]);
4714 	  else if (op_index >= 0)
4715 	    {
4716 	      operand_entry *oe = ops[op_index--];
4717 	      stmt2 = oe->stmt_to_insert;
4718 	      op2 = oe->op;
4719 	    }
4720 	  else
4721 	    {
4722 	      gcc_assert (stmt_index < i);
4723 	      op2 = gimple_assign_lhs (stmts[stmt_index++]);
4724 	    }
4725 
4726 	  if (stmt_index >= ready_stmts_end)
4727 	    ready_stmts_end = 0;
4728 	}
4729       else
4730 	{
4731 	  if (op_index > 1)
4732 	    swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4733 	  operand_entry *oe2 = ops[op_index--];
4734 	  operand_entry *oe1 = ops[op_index--];
4735 	  op2 = oe2->op;
4736 	  stmt2 = oe2->stmt_to_insert;
4737 	  op1 = oe1->op;
4738 	  stmt1 = oe1->stmt_to_insert;
4739 	}
4740 
4741       /* If we emit the last statement then we should put
4742 	 operands into the last statement.  It will also
4743 	 break the loop.  */
4744       if (op_index < 0 && stmt_index == i)
4745 	i = stmt_num - 1;
4746 
4747       if (dump_file && (dump_flags & TDF_DETAILS))
4748 	{
4749 	  fprintf (dump_file, "Transforming ");
4750 	  print_gimple_stmt (dump_file, stmts[i], 0);
4751 	}
4752 
4753       /* If the stmt that defines operand has to be inserted, insert it
4754 	 before the use.  */
4755       if (stmt1)
4756 	insert_stmt_before_use (stmts[i], stmt1);
4757       if (stmt2)
4758 	insert_stmt_before_use (stmts[i], stmt2);
4759       stmt1 = stmt2 = NULL;
4760 
4761       /* We keep original statement only for the last one.  All
4762 	 others are recreated.  */
4763       if (i == stmt_num - 1)
4764 	{
4765 	  gimple_assign_set_rhs1 (stmts[i], op1);
4766 	  gimple_assign_set_rhs2 (stmts[i], op2);
4767 	  update_stmt (stmts[i]);
4768 	}
4769       else
4770 	{
4771 	  stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4772 	  gimple_set_visited (stmts[i], true);
4773 	}
4774       if (dump_file && (dump_flags & TDF_DETAILS))
4775 	{
4776 	  fprintf (dump_file, " into ");
4777 	  print_gimple_stmt (dump_file, stmts[i], 0);
4778 	}
4779     }
4780 
4781   remove_visited_stmt_chain (last_rhs1);
4782 }
4783 
4784 /* Transform STMT, which is really (A +B) + (C + D) into the left
4785    linear form, ((A+B)+C)+D.
4786    Recurse on D if necessary.  */
4787 
4788 static void
linearize_expr(gimple * stmt)4789 linearize_expr (gimple *stmt)
4790 {
4791   gimple_stmt_iterator gsi;
4792   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4793   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4794   gimple *oldbinrhs = binrhs;
4795   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4796   gimple *newbinrhs = NULL;
4797   struct loop *loop = loop_containing_stmt (stmt);
4798   tree lhs = gimple_assign_lhs (stmt);
4799 
4800   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4801 	      && is_reassociable_op (binrhs, rhscode, loop));
4802 
4803   gsi = gsi_for_stmt (stmt);
4804 
4805   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4806   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4807 				gimple_assign_rhs_code (binrhs),
4808 				gimple_assign_lhs (binlhs),
4809 				gimple_assign_rhs2 (binrhs));
4810   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4811   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4812   gimple_set_uid (binrhs, gimple_uid (stmt));
4813 
4814   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4815     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4816 
4817   if (dump_file && (dump_flags & TDF_DETAILS))
4818     {
4819       fprintf (dump_file, "Linearized: ");
4820       print_gimple_stmt (dump_file, stmt, 0);
4821     }
4822 
4823   reassociate_stats.linearized++;
4824   update_stmt (stmt);
4825 
4826   gsi = gsi_for_stmt (oldbinrhs);
4827   reassoc_remove_stmt (&gsi);
4828   release_defs (oldbinrhs);
4829 
4830   gimple_set_visited (stmt, true);
4831   gimple_set_visited (binlhs, true);
4832   gimple_set_visited (binrhs, true);
4833 
4834   /* Tail recurse on the new rhs if it still needs reassociation.  */
4835   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4836     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4837 	   want to change the algorithm while converting to tuples.  */
4838     linearize_expr (stmt);
4839 }
4840 
4841 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4842    it.  Otherwise, return NULL.  */
4843 
4844 static gimple *
get_single_immediate_use(tree lhs)4845 get_single_immediate_use (tree lhs)
4846 {
4847   use_operand_p immuse;
4848   gimple *immusestmt;
4849 
4850   if (TREE_CODE (lhs) == SSA_NAME
4851       && single_imm_use (lhs, &immuse, &immusestmt)
4852       && is_gimple_assign (immusestmt))
4853     return immusestmt;
4854 
4855   return NULL;
4856 }
4857 
4858 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4859    representing the negated value.  Insertions of any necessary
4860    instructions go before GSI.
4861    This function is recursive in that, if you hand it "a_5" as the
4862    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4863    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
4864 
4865 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)4866 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4867 {
4868   gimple *negatedefstmt = NULL;
4869   tree resultofnegate;
4870   gimple_stmt_iterator gsi;
4871   unsigned int uid;
4872 
4873   /* If we are trying to negate a name, defined by an add, negate the
4874      add operands instead.  */
4875   if (TREE_CODE (tonegate) == SSA_NAME)
4876     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4877   if (TREE_CODE (tonegate) == SSA_NAME
4878       && is_gimple_assign (negatedefstmt)
4879       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4880       && has_single_use (gimple_assign_lhs (negatedefstmt))
4881       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4882     {
4883       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4884       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4885       tree lhs = gimple_assign_lhs (negatedefstmt);
4886       gimple *g;
4887 
4888       gsi = gsi_for_stmt (negatedefstmt);
4889       rhs1 = negate_value (rhs1, &gsi);
4890 
4891       gsi = gsi_for_stmt (negatedefstmt);
4892       rhs2 = negate_value (rhs2, &gsi);
4893 
4894       gsi = gsi_for_stmt (negatedefstmt);
4895       lhs = make_ssa_name (TREE_TYPE (lhs));
4896       gimple_set_visited (negatedefstmt, true);
4897       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4898       gimple_set_uid (g, gimple_uid (negatedefstmt));
4899       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4900       return lhs;
4901     }
4902 
4903   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4904   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4905 					     NULL_TREE, true, GSI_SAME_STMT);
4906   gsi = *gsip;
4907   uid = gimple_uid (gsi_stmt (gsi));
4908   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4909     {
4910       gimple *stmt = gsi_stmt (gsi);
4911       if (gimple_uid (stmt) != 0)
4912 	break;
4913       gimple_set_uid (stmt, uid);
4914     }
4915   return resultofnegate;
4916 }
4917 
4918 /* Return true if we should break up the subtract in STMT into an add
4919    with negate.  This is true when we the subtract operands are really
4920    adds, or the subtract itself is used in an add expression.  In
4921    either case, breaking up the subtract into an add with negate
4922    exposes the adds to reassociation.  */
4923 
4924 static bool
should_break_up_subtract(gimple * stmt)4925 should_break_up_subtract (gimple *stmt)
4926 {
4927   tree lhs = gimple_assign_lhs (stmt);
4928   tree binlhs = gimple_assign_rhs1 (stmt);
4929   tree binrhs = gimple_assign_rhs2 (stmt);
4930   gimple *immusestmt;
4931   struct loop *loop = loop_containing_stmt (stmt);
4932 
4933   if (TREE_CODE (binlhs) == SSA_NAME
4934       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4935     return true;
4936 
4937   if (TREE_CODE (binrhs) == SSA_NAME
4938       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4939     return true;
4940 
4941   if (TREE_CODE (lhs) == SSA_NAME
4942       && (immusestmt = get_single_immediate_use (lhs))
4943       && is_gimple_assign (immusestmt)
4944       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4945 	  || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
4946 	      && gimple_assign_rhs1 (immusestmt) == lhs)
4947 	  || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4948     return true;
4949   return false;
4950 }
4951 
4952 /* Transform STMT from A - B into A + -B.  */
4953 
4954 static void
break_up_subtract(gimple * stmt,gimple_stmt_iterator * gsip)4955 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4956 {
4957   tree rhs1 = gimple_assign_rhs1 (stmt);
4958   tree rhs2 = gimple_assign_rhs2 (stmt);
4959 
4960   if (dump_file && (dump_flags & TDF_DETAILS))
4961     {
4962       fprintf (dump_file, "Breaking up subtract ");
4963       print_gimple_stmt (dump_file, stmt, 0);
4964     }
4965 
4966   rhs2 = negate_value (rhs2, gsip);
4967   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4968   update_stmt (stmt);
4969 }
4970 
4971 /* Determine whether STMT is a builtin call that raises an SSA name
4972    to an integer power and has only one use.  If so, and this is early
4973    reassociation and unsafe math optimizations are permitted, place
4974    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4975    If any of these conditions does not hold, return FALSE.  */
4976 
4977 static bool
acceptable_pow_call(gcall * stmt,tree * base,HOST_WIDE_INT * exponent)4978 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4979 {
4980   tree arg1;
4981   REAL_VALUE_TYPE c, cint;
4982 
4983   switch (gimple_call_combined_fn (stmt))
4984     {
4985     CASE_CFN_POW:
4986       if (flag_errno_math)
4987 	return false;
4988 
4989       *base = gimple_call_arg (stmt, 0);
4990       arg1 = gimple_call_arg (stmt, 1);
4991 
4992       if (TREE_CODE (arg1) != REAL_CST)
4993 	return false;
4994 
4995       c = TREE_REAL_CST (arg1);
4996 
4997       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4998 	return false;
4999 
5000       *exponent = real_to_integer (&c);
5001       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5002       if (!real_identical (&c, &cint))
5003 	return false;
5004 
5005       break;
5006 
5007     CASE_CFN_POWI:
5008       *base = gimple_call_arg (stmt, 0);
5009       arg1 = gimple_call_arg (stmt, 1);
5010 
5011       if (!tree_fits_shwi_p (arg1))
5012 	return false;
5013 
5014       *exponent = tree_to_shwi (arg1);
5015       break;
5016 
5017     default:
5018       return false;
5019     }
5020 
5021   /* Expanding negative exponents is generally unproductive, so we don't
5022      complicate matters with those.  Exponents of zero and one should
5023      have been handled by expression folding.  */
5024   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5025     return false;
5026 
5027   return true;
5028 }
5029 
5030 /* Try to derive and add operand entry for OP to *OPS.  Return false if
5031    unsuccessful.  */
5032 
5033 static bool
try_special_add_to_ops(vec<operand_entry * > * ops,enum tree_code code,tree op,gimple * def_stmt)5034 try_special_add_to_ops (vec<operand_entry *> *ops,
5035 			enum tree_code code,
5036 			tree op, gimple* def_stmt)
5037 {
5038   tree base = NULL_TREE;
5039   HOST_WIDE_INT exponent = 0;
5040 
5041   if (TREE_CODE (op) != SSA_NAME
5042       || ! has_single_use (op))
5043     return false;
5044 
5045   if (code == MULT_EXPR
5046       && reassoc_insert_powi_p
5047       && flag_unsafe_math_optimizations
5048       && is_gimple_call (def_stmt)
5049       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5050     {
5051       add_repeat_to_ops_vec (ops, base, exponent);
5052       gimple_set_visited (def_stmt, true);
5053       return true;
5054     }
5055   else if (code == MULT_EXPR
5056 	   && is_gimple_assign (def_stmt)
5057 	   && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5058 	   && !HONOR_SNANS (TREE_TYPE (op))
5059 	   && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5060 	       || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5061     {
5062       tree rhs1 = gimple_assign_rhs1 (def_stmt);
5063       tree cst = build_minus_one_cst (TREE_TYPE (op));
5064       add_to_ops_vec (ops, rhs1);
5065       add_to_ops_vec (ops, cst);
5066       gimple_set_visited (def_stmt, true);
5067       return true;
5068     }
5069 
5070   return false;
5071 }
5072 
5073 /* Recursively linearize a binary expression that is the RHS of STMT.
5074    Place the operands of the expression tree in the vector named OPS.  */
5075 
5076 static void
linearize_expr_tree(vec<operand_entry * > * ops,gimple * stmt,bool is_associative,bool set_visited)5077 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5078 		     bool is_associative, bool set_visited)
5079 {
5080   tree binlhs = gimple_assign_rhs1 (stmt);
5081   tree binrhs = gimple_assign_rhs2 (stmt);
5082   gimple *binlhsdef = NULL, *binrhsdef = NULL;
5083   bool binlhsisreassoc = false;
5084   bool binrhsisreassoc = false;
5085   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5086   struct loop *loop = loop_containing_stmt (stmt);
5087 
5088   if (set_visited)
5089     gimple_set_visited (stmt, true);
5090 
5091   if (TREE_CODE (binlhs) == SSA_NAME)
5092     {
5093       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5094       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5095 			 && !stmt_could_throw_p (binlhsdef));
5096     }
5097 
5098   if (TREE_CODE (binrhs) == SSA_NAME)
5099     {
5100       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5101       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5102 			 && !stmt_could_throw_p (binrhsdef));
5103     }
5104 
5105   /* If the LHS is not reassociable, but the RHS is, we need to swap
5106      them.  If neither is reassociable, there is nothing we can do, so
5107      just put them in the ops vector.  If the LHS is reassociable,
5108      linearize it.  If both are reassociable, then linearize the RHS
5109      and the LHS.  */
5110 
5111   if (!binlhsisreassoc)
5112     {
5113       /* If this is not a associative operation like division, give up.  */
5114       if (!is_associative)
5115 	{
5116 	  add_to_ops_vec (ops, binrhs);
5117 	  return;
5118 	}
5119 
5120       if (!binrhsisreassoc)
5121 	{
5122 	  if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5123 	    add_to_ops_vec (ops, binrhs);
5124 
5125 	  if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5126 	    add_to_ops_vec (ops, binlhs);
5127 
5128 	  return;
5129 	}
5130 
5131       if (dump_file && (dump_flags & TDF_DETAILS))
5132 	{
5133 	  fprintf (dump_file, "swapping operands of ");
5134 	  print_gimple_stmt (dump_file, stmt, 0);
5135 	}
5136 
5137       swap_ssa_operands (stmt,
5138 			 gimple_assign_rhs1_ptr (stmt),
5139 			 gimple_assign_rhs2_ptr (stmt));
5140       update_stmt (stmt);
5141 
5142       if (dump_file && (dump_flags & TDF_DETAILS))
5143 	{
5144 	  fprintf (dump_file, " is now ");
5145 	  print_gimple_stmt (dump_file, stmt, 0);
5146 	}
5147 
5148       /* We want to make it so the lhs is always the reassociative op,
5149 	 so swap.  */
5150       std::swap (binlhs, binrhs);
5151     }
5152   else if (binrhsisreassoc)
5153     {
5154       linearize_expr (stmt);
5155       binlhs = gimple_assign_rhs1 (stmt);
5156       binrhs = gimple_assign_rhs2 (stmt);
5157     }
5158 
5159   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5160 	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5161 				      rhscode, loop));
5162   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5163 		       is_associative, set_visited);
5164 
5165   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5166     add_to_ops_vec (ops, binrhs);
5167 }
5168 
5169 /* Repropagate the negates back into subtracts, since no other pass
5170    currently does it.  */
5171 
5172 static void
repropagate_negates(void)5173 repropagate_negates (void)
5174 {
5175   unsigned int i = 0;
5176   tree negate;
5177 
5178   FOR_EACH_VEC_ELT (plus_negates, i, negate)
5179     {
5180       gimple *user = get_single_immediate_use (negate);
5181 
5182       if (!user || !is_gimple_assign (user))
5183 	continue;
5184 
5185       /* The negate operand can be either operand of a PLUS_EXPR
5186 	 (it can be the LHS if the RHS is a constant for example).
5187 
5188 	 Force the negate operand to the RHS of the PLUS_EXPR, then
5189 	 transform the PLUS_EXPR into a MINUS_EXPR.  */
5190       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5191 	{
5192 	  /* If the negated operand appears on the LHS of the
5193 	     PLUS_EXPR, exchange the operands of the PLUS_EXPR
5194 	     to force the negated operand to the RHS of the PLUS_EXPR.  */
5195 	  if (gimple_assign_rhs1 (user) == negate)
5196 	    {
5197 	      swap_ssa_operands (user,
5198 				 gimple_assign_rhs1_ptr (user),
5199 				 gimple_assign_rhs2_ptr (user));
5200 	    }
5201 
5202 	  /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5203 	     the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
5204 	  if (gimple_assign_rhs2 (user) == negate)
5205 	    {
5206 	      tree rhs1 = gimple_assign_rhs1 (user);
5207 	      tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5208 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
5209 	      gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5210 	      update_stmt (user);
5211 	    }
5212 	}
5213       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5214 	{
5215 	  if (gimple_assign_rhs1 (user) == negate)
5216 	    {
5217 	      /* We have
5218 	           x = -a
5219 		   y = x - b
5220 		 which we transform into
5221 		   x = a + b
5222 		   y = -x .
5223 		 This pushes down the negate which we possibly can merge
5224 		 into some other operation, hence insert it into the
5225 		 plus_negates vector.  */
5226 	      gimple *feed = SSA_NAME_DEF_STMT (negate);
5227 	      tree a = gimple_assign_rhs1 (feed);
5228 	      tree b = gimple_assign_rhs2 (user);
5229 	      gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5230 	      gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5231 	      tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5232 	      gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5233 	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5234 	      gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5235 	      user = gsi_stmt (gsi2);
5236 	      update_stmt (user);
5237 	      reassoc_remove_stmt (&gsi);
5238 	      release_defs (feed);
5239 	      plus_negates.safe_push (gimple_assign_lhs (user));
5240 	    }
5241 	  else
5242 	    {
5243 	      /* Transform "x = -a; y = b - x" into "y = b + a", getting
5244 	         rid of one operation.  */
5245 	      gimple *feed = SSA_NAME_DEF_STMT (negate);
5246 	      tree a = gimple_assign_rhs1 (feed);
5247 	      tree rhs1 = gimple_assign_rhs1 (user);
5248 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
5249 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5250 	      update_stmt (gsi_stmt (gsi));
5251 	    }
5252 	}
5253     }
5254 }
5255 
5256 /* Returns true if OP is of a type for which we can do reassociation.
5257    That is for integral or non-saturating fixed-point types, and for
5258    floating point type when associative-math is enabled.  */
5259 
5260 static bool
can_reassociate_p(tree op)5261 can_reassociate_p (tree op)
5262 {
5263   tree type = TREE_TYPE (op);
5264   if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5265     return false;
5266   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5267       || NON_SAT_FIXED_POINT_TYPE_P (type)
5268       || (flag_associative_math && FLOAT_TYPE_P (type)))
5269     return true;
5270   return false;
5271 }
5272 
5273 /* Break up subtract operations in block BB.
5274 
5275    We do this top down because we don't know whether the subtract is
5276    part of a possible chain of reassociation except at the top.
5277 
5278    IE given
5279    d = f + g
5280    c = a + e
5281    b = c - d
5282    q = b - r
5283    k = t - q
5284 
5285    we want to break up k = t - q, but we won't until we've transformed q
5286    = b - r, which won't be broken up until we transform b = c - d.
5287 
5288    En passant, clear the GIMPLE visited flag on every statement
5289    and set UIDs within each basic block.  */
5290 
5291 static void
break_up_subtract_bb(basic_block bb)5292 break_up_subtract_bb (basic_block bb)
5293 {
5294   gimple_stmt_iterator gsi;
5295   basic_block son;
5296   unsigned int uid = 1;
5297 
5298   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5299     {
5300       gimple *stmt = gsi_stmt (gsi);
5301       gimple_set_visited (stmt, false);
5302       gimple_set_uid (stmt, uid++);
5303 
5304       if (!is_gimple_assign (stmt)
5305 	  || !can_reassociate_p (gimple_assign_lhs (stmt)))
5306 	continue;
5307 
5308       /* Look for simple gimple subtract operations.  */
5309       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5310 	{
5311 	  if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5312 	      || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5313 	    continue;
5314 
5315 	  /* Check for a subtract used only in an addition.  If this
5316 	     is the case, transform it into add of a negate for better
5317 	     reassociation.  IE transform C = A-B into C = A + -B if C
5318 	     is only used in an addition.  */
5319 	  if (should_break_up_subtract (stmt))
5320 	    break_up_subtract (stmt, &gsi);
5321 	}
5322       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5323 	       && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5324 	plus_negates.safe_push (gimple_assign_lhs (stmt));
5325     }
5326   for (son = first_dom_son (CDI_DOMINATORS, bb);
5327        son;
5328        son = next_dom_son (CDI_DOMINATORS, son))
5329     break_up_subtract_bb (son);
5330 }
5331 
5332 /* Used for repeated factor analysis.  */
5333 struct repeat_factor
5334 {
5335   /* An SSA name that occurs in a multiply chain.  */
5336   tree factor;
5337 
5338   /* Cached rank of the factor.  */
5339   unsigned rank;
5340 
5341   /* Number of occurrences of the factor in the chain.  */
5342   HOST_WIDE_INT count;
5343 
5344   /* An SSA name representing the product of this factor and
5345      all factors appearing later in the repeated factor vector.  */
5346   tree repr;
5347 };
5348 
5349 
5350 static vec<repeat_factor> repeat_factor_vec;
5351 
5352 /* Used for sorting the repeat factor vector.  Sort primarily by
5353    ascending occurrence count, secondarily by descending rank.  */
5354 
5355 static int
compare_repeat_factors(const void * x1,const void * x2)5356 compare_repeat_factors (const void *x1, const void *x2)
5357 {
5358   const repeat_factor *rf1 = (const repeat_factor *) x1;
5359   const repeat_factor *rf2 = (const repeat_factor *) x2;
5360 
5361   if (rf1->count != rf2->count)
5362     return rf1->count - rf2->count;
5363 
5364   return rf2->rank - rf1->rank;
5365 }
5366 
5367 /* Look for repeated operands in OPS in the multiply tree rooted at
5368    STMT.  Replace them with an optimal sequence of multiplies and powi
5369    builtin calls, and remove the used operands from OPS.  Return an
5370    SSA name representing the value of the replacement sequence.  */
5371 
5372 static tree
attempt_builtin_powi(gimple * stmt,vec<operand_entry * > * ops)5373 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5374 {
5375   unsigned i, j, vec_len;
5376   int ii;
5377   operand_entry *oe;
5378   repeat_factor *rf1, *rf2;
5379   repeat_factor rfnew;
5380   tree result = NULL_TREE;
5381   tree target_ssa, iter_result;
5382   tree type = TREE_TYPE (gimple_get_lhs (stmt));
5383   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5384   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5385   gimple *mul_stmt, *pow_stmt;
5386 
5387   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5388      target.  */
5389   if (!powi_fndecl)
5390     return NULL_TREE;
5391 
5392   /* Allocate the repeated factor vector.  */
5393   repeat_factor_vec.create (10);
5394 
5395   /* Scan the OPS vector for all SSA names in the product and build
5396      up a vector of occurrence counts for each factor.  */
5397   FOR_EACH_VEC_ELT (*ops, i, oe)
5398     {
5399       if (TREE_CODE (oe->op) == SSA_NAME)
5400 	{
5401 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5402 	    {
5403 	      if (rf1->factor == oe->op)
5404 		{
5405 		  rf1->count += oe->count;
5406 		  break;
5407 		}
5408 	    }
5409 
5410 	  if (j >= repeat_factor_vec.length ())
5411 	    {
5412 	      rfnew.factor = oe->op;
5413 	      rfnew.rank = oe->rank;
5414 	      rfnew.count = oe->count;
5415 	      rfnew.repr = NULL_TREE;
5416 	      repeat_factor_vec.safe_push (rfnew);
5417 	    }
5418 	}
5419     }
5420 
5421   /* Sort the repeated factor vector by (a) increasing occurrence count,
5422      and (b) decreasing rank.  */
5423   repeat_factor_vec.qsort (compare_repeat_factors);
5424 
5425   /* It is generally best to combine as many base factors as possible
5426      into a product before applying __builtin_powi to the result.
5427      However, the sort order chosen for the repeated factor vector
5428      allows us to cache partial results for the product of the base
5429      factors for subsequent use.  When we already have a cached partial
5430      result from a previous iteration, it is best to make use of it
5431      before looking for another __builtin_pow opportunity.
5432 
5433      As an example, consider x * x * y * y * y * z * z * z * z.
5434      We want to first compose the product x * y * z, raise it to the
5435      second power, then multiply this by y * z, and finally multiply
5436      by z.  This can be done in 5 multiplies provided we cache y * z
5437      for use in both expressions:
5438 
5439         t1 = y * z
5440 	t2 = t1 * x
5441 	t3 = t2 * t2
5442 	t4 = t1 * t3
5443 	result = t4 * z
5444 
5445      If we instead ignored the cached y * z and first multiplied by
5446      the __builtin_pow opportunity z * z, we would get the inferior:
5447 
5448         t1 = y * z
5449 	t2 = t1 * x
5450 	t3 = t2 * t2
5451 	t4 = z * z
5452 	t5 = t3 * t4
5453         result = t5 * y  */
5454 
5455   vec_len = repeat_factor_vec.length ();
5456 
5457   /* Repeatedly look for opportunities to create a builtin_powi call.  */
5458   while (true)
5459     {
5460       HOST_WIDE_INT power;
5461 
5462       /* First look for the largest cached product of factors from
5463 	 preceding iterations.  If found, create a builtin_powi for
5464 	 it if the minimum occurrence count for its factors is at
5465 	 least 2, or just use this cached product as our next
5466 	 multiplicand if the minimum occurrence count is 1.  */
5467       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5468 	{
5469 	  if (rf1->repr && rf1->count > 0)
5470 	    break;
5471 	}
5472 
5473       if (j < vec_len)
5474 	{
5475 	  power = rf1->count;
5476 
5477 	  if (power == 1)
5478 	    {
5479 	      iter_result = rf1->repr;
5480 
5481 	      if (dump_file && (dump_flags & TDF_DETAILS))
5482 		{
5483 		  unsigned elt;
5484 		  repeat_factor *rf;
5485 		  fputs ("Multiplying by cached product ", dump_file);
5486 		  for (elt = j; elt < vec_len; elt++)
5487 		    {
5488 		      rf = &repeat_factor_vec[elt];
5489 		      print_generic_expr (dump_file, rf->factor);
5490 		      if (elt < vec_len - 1)
5491 			fputs (" * ", dump_file);
5492 		    }
5493 		  fputs ("\n", dump_file);
5494 		}
5495 	    }
5496 	  else
5497 	    {
5498 	      iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5499 	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5500 					    build_int_cst (integer_type_node,
5501 							   power));
5502 	      gimple_call_set_lhs (pow_stmt, iter_result);
5503 	      gimple_set_location (pow_stmt, gimple_location (stmt));
5504 	      gimple_set_uid (pow_stmt, gimple_uid (stmt));
5505 	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5506 
5507 	      if (dump_file && (dump_flags & TDF_DETAILS))
5508 		{
5509 		  unsigned elt;
5510 		  repeat_factor *rf;
5511 		  fputs ("Building __builtin_pow call for cached product (",
5512 			 dump_file);
5513 		  for (elt = j; elt < vec_len; elt++)
5514 		    {
5515 		      rf = &repeat_factor_vec[elt];
5516 		      print_generic_expr (dump_file, rf->factor);
5517 		      if (elt < vec_len - 1)
5518 			fputs (" * ", dump_file);
5519 		    }
5520 		  fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5521 			   power);
5522 		}
5523 	    }
5524 	}
5525       else
5526 	{
5527 	  /* Otherwise, find the first factor in the repeated factor
5528 	     vector whose occurrence count is at least 2.  If no such
5529 	     factor exists, there are no builtin_powi opportunities
5530 	     remaining.  */
5531 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5532 	    {
5533 	      if (rf1->count >= 2)
5534 		break;
5535 	    }
5536 
5537 	  if (j >= vec_len)
5538 	    break;
5539 
5540 	  power = rf1->count;
5541 
5542 	  if (dump_file && (dump_flags & TDF_DETAILS))
5543 	    {
5544 	      unsigned elt;
5545 	      repeat_factor *rf;
5546 	      fputs ("Building __builtin_pow call for (", dump_file);
5547 	      for (elt = j; elt < vec_len; elt++)
5548 		{
5549 		  rf = &repeat_factor_vec[elt];
5550 		  print_generic_expr (dump_file, rf->factor);
5551 		  if (elt < vec_len - 1)
5552 		    fputs (" * ", dump_file);
5553 		}
5554 	      fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5555 	    }
5556 
5557 	  reassociate_stats.pows_created++;
5558 
5559 	  /* Visit each element of the vector in reverse order (so that
5560 	     high-occurrence elements are visited first, and within the
5561 	     same occurrence count, lower-ranked elements are visited
5562 	     first).  Form a linear product of all elements in this order
5563 	     whose occurrencce count is at least that of element J.
5564 	     Record the SSA name representing the product of each element
5565 	     with all subsequent elements in the vector.  */
5566 	  if (j == vec_len - 1)
5567 	    rf1->repr = rf1->factor;
5568 	  else
5569 	    {
5570 	      for (ii = vec_len - 2; ii >= (int)j; ii--)
5571 		{
5572 		  tree op1, op2;
5573 
5574 		  rf1 = &repeat_factor_vec[ii];
5575 		  rf2 = &repeat_factor_vec[ii + 1];
5576 
5577 		  /* Init the last factor's representative to be itself.  */
5578 		  if (!rf2->repr)
5579 		    rf2->repr = rf2->factor;
5580 
5581 		  op1 = rf1->factor;
5582 		  op2 = rf2->repr;
5583 
5584 		  target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5585 		  mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5586 						  op1, op2);
5587 		  gimple_set_location (mul_stmt, gimple_location (stmt));
5588 		  gimple_set_uid (mul_stmt, gimple_uid (stmt));
5589 		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5590 		  rf1->repr = target_ssa;
5591 
5592 		  /* Don't reprocess the multiply we just introduced.  */
5593 		  gimple_set_visited (mul_stmt, true);
5594 		}
5595 	    }
5596 
5597 	  /* Form a call to __builtin_powi for the maximum product
5598 	     just formed, raised to the power obtained earlier.  */
5599 	  rf1 = &repeat_factor_vec[j];
5600 	  iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5601 	  pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5602 					build_int_cst (integer_type_node,
5603 						       power));
5604 	  gimple_call_set_lhs (pow_stmt, iter_result);
5605 	  gimple_set_location (pow_stmt, gimple_location (stmt));
5606 	  gimple_set_uid (pow_stmt, gimple_uid (stmt));
5607 	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5608 	}
5609 
5610       /* If we previously formed at least one other builtin_powi call,
5611 	 form the product of this one and those others.  */
5612       if (result)
5613 	{
5614 	  tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5615 	  mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5616 					  result, iter_result);
5617 	  gimple_set_location (mul_stmt, gimple_location (stmt));
5618 	  gimple_set_uid (mul_stmt, gimple_uid (stmt));
5619 	  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5620 	  gimple_set_visited (mul_stmt, true);
5621 	  result = new_result;
5622 	}
5623       else
5624 	result = iter_result;
5625 
5626       /* Decrement the occurrence count of each element in the product
5627 	 by the count found above, and remove this many copies of each
5628 	 factor from OPS.  */
5629       for (i = j; i < vec_len; i++)
5630 	{
5631 	  unsigned k = power;
5632 	  unsigned n;
5633 
5634 	  rf1 = &repeat_factor_vec[i];
5635 	  rf1->count -= power;
5636 
5637 	  FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5638 	    {
5639 	      if (oe->op == rf1->factor)
5640 		{
5641 		  if (oe->count <= k)
5642 		    {
5643 		      ops->ordered_remove (n);
5644 		      k -= oe->count;
5645 
5646 		      if (k == 0)
5647 			break;
5648 		    }
5649 		  else
5650 		    {
5651 		      oe->count -= k;
5652 		      break;
5653 		    }
5654 		}
5655 	    }
5656 	}
5657     }
5658 
5659   /* At this point all elements in the repeated factor vector have a
5660      remaining occurrence count of 0 or 1, and those with a count of 1
5661      don't have cached representatives.  Re-sort the ops vector and
5662      clean up.  */
5663   ops->qsort (sort_by_operand_rank);
5664   repeat_factor_vec.release ();
5665 
5666   /* Return the final product computed herein.  Note that there may
5667      still be some elements with single occurrence count left in OPS;
5668      those will be handled by the normal reassociation logic.  */
5669   return result;
5670 }
5671 
5672 /* Attempt to optimize
5673    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5674    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
5675 
5676 static void
attempt_builtin_copysign(vec<operand_entry * > * ops)5677 attempt_builtin_copysign (vec<operand_entry *> *ops)
5678 {
5679   operand_entry *oe;
5680   unsigned int i;
5681   unsigned int length = ops->length ();
5682   tree cst = ops->last ()->op;
5683 
5684   if (length == 1 || TREE_CODE (cst) != REAL_CST)
5685     return;
5686 
5687   FOR_EACH_VEC_ELT (*ops, i, oe)
5688     {
5689       if (TREE_CODE (oe->op) == SSA_NAME
5690 	  && has_single_use (oe->op))
5691 	{
5692 	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5693 	  if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5694 	    {
5695 	      tree arg0, arg1;
5696 	      switch (gimple_call_combined_fn (old_call))
5697 		{
5698 		CASE_CFN_COPYSIGN:
5699 		CASE_CFN_COPYSIGN_FN:
5700 		  arg0 = gimple_call_arg (old_call, 0);
5701 		  arg1 = gimple_call_arg (old_call, 1);
5702 		  /* The first argument of copysign must be a constant,
5703 		     otherwise there's nothing to do.  */
5704 		  if (TREE_CODE (arg0) == REAL_CST)
5705 		    {
5706 		      tree type = TREE_TYPE (arg0);
5707 		      tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5708 		      /* If we couldn't fold to a single constant, skip it.
5709 			 That happens e.g. for inexact multiplication when
5710 			 -frounding-math.  */
5711 		      if (mul == NULL_TREE)
5712 			break;
5713 		      /* Instead of adjusting OLD_CALL, let's build a new
5714 			 call to not leak the LHS and prevent keeping bogus
5715 			 debug statements.  DCE will clean up the old call.  */
5716 		      gcall *new_call;
5717 		      if (gimple_call_internal_p (old_call))
5718 			new_call = gimple_build_call_internal
5719 			  (IFN_COPYSIGN, 2, mul, arg1);
5720 		      else
5721 			new_call = gimple_build_call
5722 			  (gimple_call_fndecl (old_call), 2, mul, arg1);
5723 		      tree lhs = make_ssa_name (type);
5724 		      gimple_call_set_lhs (new_call, lhs);
5725 		      gimple_set_location (new_call,
5726 					   gimple_location (old_call));
5727 		      insert_stmt_after (new_call, old_call);
5728 		      /* We've used the constant, get rid of it.  */
5729 		      ops->pop ();
5730 		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5731 		      /* Handle the CST1 < 0 case by negating the result.  */
5732 		      if (cst1_neg)
5733 			{
5734 			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5735 			  gimple *negate_stmt
5736 			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5737 			  insert_stmt_after (negate_stmt, new_call);
5738 			  oe->op = negrhs;
5739 			}
5740 		      else
5741 			oe->op = lhs;
5742 		      if (dump_file && (dump_flags & TDF_DETAILS))
5743 			{
5744 			  fprintf (dump_file, "Optimizing copysign: ");
5745 			  print_generic_expr (dump_file, cst);
5746 			  fprintf (dump_file, " * COPYSIGN (");
5747 			  print_generic_expr (dump_file, arg0);
5748 			  fprintf (dump_file, ", ");
5749 			  print_generic_expr (dump_file, arg1);
5750 			  fprintf (dump_file, ") into %sCOPYSIGN (",
5751 				   cst1_neg ? "-" : "");
5752 			  print_generic_expr (dump_file, mul);
5753 			  fprintf (dump_file, ", ");
5754 			  print_generic_expr (dump_file, arg1);
5755 			  fprintf (dump_file, "\n");
5756 			}
5757 		      return;
5758 		    }
5759 		  break;
5760 		default:
5761 		  break;
5762 		}
5763 	    }
5764 	}
5765     }
5766 }
5767 
5768 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
5769 
5770 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple * stmt,tree new_rhs)5771 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5772 {
5773   tree rhs1;
5774 
5775   if (dump_file && (dump_flags & TDF_DETAILS))
5776     {
5777       fprintf (dump_file, "Transforming ");
5778       print_gimple_stmt (dump_file, stmt, 0);
5779     }
5780 
5781   rhs1 = gimple_assign_rhs1 (stmt);
5782   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5783   update_stmt (stmt);
5784   remove_visited_stmt_chain (rhs1);
5785 
5786   if (dump_file && (dump_flags & TDF_DETAILS))
5787     {
5788       fprintf (dump_file, " into ");
5789       print_gimple_stmt (dump_file, stmt, 0);
5790     }
5791 }
5792 
5793 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
5794 
5795 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple * stmt,tree rhs1,tree rhs2)5796 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5797 			    tree rhs1, tree rhs2)
5798 {
5799   if (dump_file && (dump_flags & TDF_DETAILS))
5800     {
5801       fprintf (dump_file, "Transforming ");
5802       print_gimple_stmt (dump_file, stmt, 0);
5803     }
5804 
5805   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5806   update_stmt (gsi_stmt (*gsi));
5807   remove_visited_stmt_chain (rhs1);
5808 
5809   if (dump_file && (dump_flags & TDF_DETAILS))
5810     {
5811       fprintf (dump_file, " into ");
5812       print_gimple_stmt (dump_file, stmt, 0);
5813     }
5814 }
5815 
5816 /* Reassociate expressions in basic block BB and its post-dominator as
5817    children.
5818 
5819    Bubble up return status from maybe_optimize_range_tests.  */
5820 
5821 static bool
reassociate_bb(basic_block bb)5822 reassociate_bb (basic_block bb)
5823 {
5824   gimple_stmt_iterator gsi;
5825   basic_block son;
5826   gimple *stmt = last_stmt (bb);
5827   bool cfg_cleanup_needed = false;
5828 
5829   if (stmt && !gimple_visited_p (stmt))
5830     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5831 
5832   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5833     {
5834       stmt = gsi_stmt (gsi);
5835 
5836       if (is_gimple_assign (stmt)
5837 	  && !stmt_could_throw_p (stmt))
5838 	{
5839 	  tree lhs, rhs1, rhs2;
5840 	  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5841 
5842 	  /* If this is not a gimple binary expression, there is
5843 	     nothing for us to do with it.  */
5844 	  if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5845 	    continue;
5846 
5847 	  /* If this was part of an already processed statement,
5848 	     we don't need to touch it again. */
5849 	  if (gimple_visited_p (stmt))
5850 	    {
5851 	      /* This statement might have become dead because of previous
5852 		 reassociations.  */
5853 	      if (has_zero_uses (gimple_get_lhs (stmt)))
5854 		{
5855 		  reassoc_remove_stmt (&gsi);
5856 		  release_defs (stmt);
5857 		  /* We might end up removing the last stmt above which
5858 		     places the iterator to the end of the sequence.
5859 		     Reset it to the last stmt in this case which might
5860 		     be the end of the sequence as well if we removed
5861 		     the last statement of the sequence.  In which case
5862 		     we need to bail out.  */
5863 		  if (gsi_end_p (gsi))
5864 		    {
5865 		      gsi = gsi_last_bb (bb);
5866 		      if (gsi_end_p (gsi))
5867 			break;
5868 		    }
5869 		}
5870 	      continue;
5871 	    }
5872 
5873 	  lhs = gimple_assign_lhs (stmt);
5874 	  rhs1 = gimple_assign_rhs1 (stmt);
5875 	  rhs2 = gimple_assign_rhs2 (stmt);
5876 
5877 	  /* For non-bit or min/max operations we can't associate
5878 	     all types.  Verify that here.  */
5879 	  if (rhs_code != BIT_IOR_EXPR
5880 	      && rhs_code != BIT_AND_EXPR
5881 	      && rhs_code != BIT_XOR_EXPR
5882 	      && rhs_code != MIN_EXPR
5883 	      && rhs_code != MAX_EXPR
5884 	      && (!can_reassociate_p (lhs)
5885 		  || !can_reassociate_p (rhs1)
5886 		  || !can_reassociate_p (rhs2)))
5887 	    continue;
5888 
5889 	  if (associative_tree_code (rhs_code))
5890 	    {
5891 	      auto_vec<operand_entry *> ops;
5892 	      tree powi_result = NULL_TREE;
5893 	      bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5894 
5895 	      /* There may be no immediate uses left by the time we
5896 		 get here because we may have eliminated them all.  */
5897 	      if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5898 		continue;
5899 
5900 	      gimple_set_visited (stmt, true);
5901 	      linearize_expr_tree (&ops, stmt, true, true);
5902 	      ops.qsort (sort_by_operand_rank);
5903 	      int orig_len = ops.length ();
5904 	      optimize_ops_list (rhs_code, &ops);
5905 	      if (undistribute_ops_list (rhs_code, &ops,
5906 					 loop_containing_stmt (stmt)))
5907 		{
5908 		  ops.qsort (sort_by_operand_rank);
5909 		  optimize_ops_list (rhs_code, &ops);
5910 		}
5911 
5912 	      if (rhs_code == PLUS_EXPR
5913 		  && transform_add_to_multiply (&ops))
5914 		ops.qsort (sort_by_operand_rank);
5915 
5916 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5917 		{
5918 		  if (is_vector)
5919 		    optimize_vec_cond_expr (rhs_code, &ops);
5920 		  else
5921 		    optimize_range_tests (rhs_code, &ops, NULL);
5922 	        }
5923 
5924 	      if (rhs_code == MULT_EXPR && !is_vector)
5925 	        {
5926 		  attempt_builtin_copysign (&ops);
5927 
5928 		  if (reassoc_insert_powi_p
5929 		      && flag_unsafe_math_optimizations)
5930 		    powi_result = attempt_builtin_powi (stmt, &ops);
5931 		}
5932 
5933 	      operand_entry *last;
5934 	      bool negate_result = false;
5935 	      if (ops.length () > 1
5936 		  && rhs_code == MULT_EXPR)
5937 		{
5938 		  last = ops.last ();
5939 		  if ((integer_minus_onep (last->op)
5940 		       || real_minus_onep (last->op))
5941 		      && !HONOR_SNANS (TREE_TYPE (lhs))
5942 		      && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5943 			  || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5944 		    {
5945 		      ops.pop ();
5946 		      negate_result = true;
5947 		    }
5948 		}
5949 
5950 	      tree new_lhs = lhs;
5951 	      /* If the operand vector is now empty, all operands were
5952 		 consumed by the __builtin_powi optimization.  */
5953 	      if (ops.length () == 0)
5954 		transform_stmt_to_copy (&gsi, stmt, powi_result);
5955 	      else if (ops.length () == 1)
5956 		{
5957 		  tree last_op = ops.last ()->op;
5958 
5959 		  /* If the stmt that defines operand has to be inserted, insert it
5960 		     before the use.  */
5961 		  if (ops.last ()->stmt_to_insert)
5962 		    insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5963 		  if (powi_result)
5964 		    transform_stmt_to_multiply (&gsi, stmt, last_op,
5965 						powi_result);
5966 		  else
5967 		    transform_stmt_to_copy (&gsi, stmt, last_op);
5968 		}
5969 	      else
5970 		{
5971 		  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5972 		  int ops_num = ops.length ();
5973 		  int width = get_reassociation_width (ops_num, rhs_code, mode);
5974 
5975 		  if (dump_file && (dump_flags & TDF_DETAILS))
5976 		    fprintf (dump_file,
5977 			     "Width = %d was chosen for reassociation\n", width);
5978 
5979 
5980 		  /* For binary bit operations, if there are at least 3
5981 		     operands and the last last operand in OPS is a constant,
5982 		     move it to the front.  This helps ensure that we generate
5983 		     (X & Y) & C rather than (X & C) & Y.  The former will
5984 		     often match a canonical bit test when we get to RTL.  */
5985 		  if (ops.length () > 2
5986 		      && (rhs_code == BIT_AND_EXPR
5987 		          || rhs_code == BIT_IOR_EXPR
5988 		          || rhs_code == BIT_XOR_EXPR)
5989 		      && TREE_CODE (ops.last ()->op) == INTEGER_CST)
5990 		    std::swap (*ops[0], *ops[ops_num - 1]);
5991 
5992 		  if (width > 1
5993 		      && ops.length () > 3)
5994 		    rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5995 						width, ops);
5996 		  else
5997                     {
5998                       /* When there are three operands left, we want
5999                          to make sure the ones that get the double
6000                          binary op are chosen wisely.  */
6001                       int len = ops.length ();
6002                       if (len >= 3)
6003                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
6004 
6005 		      new_lhs = rewrite_expr_tree (stmt, 0, ops,
6006 						   powi_result != NULL
6007 						   || negate_result,
6008 						   len != orig_len);
6009                     }
6010 
6011 		  /* If we combined some repeated factors into a
6012 		     __builtin_powi call, multiply that result by the
6013 		     reassociated operands.  */
6014 		  if (powi_result)
6015 		    {
6016 		      gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6017 		      tree type = TREE_TYPE (lhs);
6018 		      tree target_ssa = make_temp_ssa_name (type, NULL,
6019 							    "reassocpow");
6020 		      gimple_set_lhs (lhs_stmt, target_ssa);
6021 		      update_stmt (lhs_stmt);
6022 		      if (lhs != new_lhs)
6023 			{
6024 			  target_ssa = new_lhs;
6025 			  new_lhs = lhs;
6026 			}
6027 		      mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6028 						      powi_result, target_ssa);
6029 		      gimple_set_location (mul_stmt, gimple_location (stmt));
6030 		      gimple_set_uid (mul_stmt, gimple_uid (stmt));
6031 		      gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6032 		    }
6033 		}
6034 
6035 	      if (negate_result)
6036 		{
6037 		  stmt = SSA_NAME_DEF_STMT (lhs);
6038 		  tree tmp = make_ssa_name (TREE_TYPE (lhs));
6039 		  gimple_set_lhs (stmt, tmp);
6040 		  if (lhs != new_lhs)
6041 		    tmp = new_lhs;
6042 		  gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6043 							   tmp);
6044 		  gimple_set_uid (neg_stmt, gimple_uid (stmt));
6045 		  gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6046 		  update_stmt (stmt);
6047 		}
6048 	    }
6049 	}
6050     }
6051   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6052        son;
6053        son = next_dom_son (CDI_POST_DOMINATORS, son))
6054     cfg_cleanup_needed |= reassociate_bb (son);
6055 
6056   return cfg_cleanup_needed;
6057 }
6058 
6059 /* Add jumps around shifts for range tests turned into bit tests.
6060    For each SSA_NAME VAR we have code like:
6061    VAR = ...; // final stmt of range comparison
6062    // bit test here...;
6063    OTHERVAR = ...; // final stmt of the bit test sequence
6064    RES = VAR | OTHERVAR;
6065    Turn the above into:
6066    VAR = ...;
6067    if (VAR != 0)
6068      goto <l3>;
6069    else
6070      goto <l2>;
6071    <l2>:
6072    // bit test here...;
6073    OTHERVAR = ...;
6074    <l3>:
6075    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
6076 
6077 static void
branch_fixup(void)6078 branch_fixup (void)
6079 {
6080   tree var;
6081   unsigned int i;
6082 
6083   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6084     {
6085       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6086       gimple *use_stmt;
6087       use_operand_p use;
6088       bool ok = single_imm_use (var, &use, &use_stmt);
6089       gcc_assert (ok
6090 		  && is_gimple_assign (use_stmt)
6091 		  && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6092 		  && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6093 
6094       basic_block cond_bb = gimple_bb (def_stmt);
6095       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6096       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6097 
6098       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6099       gimple *g = gimple_build_cond (NE_EXPR, var,
6100 				     build_zero_cst (TREE_TYPE (var)),
6101 				     NULL_TREE, NULL_TREE);
6102       location_t loc = gimple_location (use_stmt);
6103       gimple_set_location (g, loc);
6104       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6105 
6106       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6107       etrue->probability = profile_probability::even ();
6108       edge efalse = find_edge (cond_bb, then_bb);
6109       efalse->flags = EDGE_FALSE_VALUE;
6110       efalse->probability -= etrue->probability;
6111       then_bb->count -= etrue->count ();
6112 
6113       tree othervar = NULL_TREE;
6114       if (gimple_assign_rhs1 (use_stmt) == var)
6115 	othervar = gimple_assign_rhs2 (use_stmt);
6116       else if (gimple_assign_rhs2 (use_stmt) == var)
6117 	othervar = gimple_assign_rhs1 (use_stmt);
6118       else
6119 	gcc_unreachable ();
6120       tree lhs = gimple_assign_lhs (use_stmt);
6121       gphi *phi = create_phi_node (lhs, merge_bb);
6122       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6123       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6124       gsi = gsi_for_stmt (use_stmt);
6125       gsi_remove (&gsi, true);
6126 
6127       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6128       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6129     }
6130   reassoc_branch_fixups.release ();
6131 }
6132 
6133 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6134 void debug_ops_vector (vec<operand_entry *> ops);
6135 
6136 /* Dump the operand entry vector OPS to FILE.  */
6137 
6138 void
dump_ops_vector(FILE * file,vec<operand_entry * > ops)6139 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6140 {
6141   operand_entry *oe;
6142   unsigned int i;
6143 
6144   FOR_EACH_VEC_ELT (ops, i, oe)
6145     {
6146       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6147       print_generic_expr (file, oe->op);
6148       fprintf (file, "\n");
6149     }
6150 }
6151 
6152 /* Dump the operand entry vector OPS to STDERR.  */
6153 
6154 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry * > ops)6155 debug_ops_vector (vec<operand_entry *> ops)
6156 {
6157   dump_ops_vector (stderr, ops);
6158 }
6159 
6160 /* Bubble up return status from reassociate_bb.  */
6161 
6162 static bool
do_reassoc(void)6163 do_reassoc (void)
6164 {
6165   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6166   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6167 }
6168 
6169 /* Initialize the reassociation pass.  */
6170 
6171 static void
init_reassoc(void)6172 init_reassoc (void)
6173 {
6174   int i;
6175   long rank = 2;
6176   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6177 
6178   /* Find the loops, so that we can prevent moving calculations in
6179      them.  */
6180   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6181 
6182   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6183 
6184   next_operand_entry_id = 0;
6185 
6186   /* Reverse RPO (Reverse Post Order) will give us something where
6187      deeper loops come later.  */
6188   pre_and_rev_post_order_compute (NULL, bbs, false);
6189   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6190   operand_rank = new hash_map<tree, long>;
6191 
6192   /* Give each default definition a distinct rank.  This includes
6193      parameters and the static chain.  Walk backwards over all
6194      SSA names so that we get proper rank ordering according
6195      to tree_swap_operands_p.  */
6196   for (i = num_ssa_names - 1; i > 0; --i)
6197     {
6198       tree name = ssa_name (i);
6199       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6200 	insert_operand_rank (name, ++rank);
6201     }
6202 
6203   /* Set up rank for each BB  */
6204   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6205     bb_rank[bbs[i]] = ++rank << 16;
6206 
6207   free (bbs);
6208   calculate_dominance_info (CDI_POST_DOMINATORS);
6209   plus_negates = vNULL;
6210 }
6211 
6212 /* Cleanup after the reassociation pass, and print stats if
6213    requested.  */
6214 
6215 static void
fini_reassoc(void)6216 fini_reassoc (void)
6217 {
6218   statistics_counter_event (cfun, "Linearized",
6219 			    reassociate_stats.linearized);
6220   statistics_counter_event (cfun, "Constants eliminated",
6221 			    reassociate_stats.constants_eliminated);
6222   statistics_counter_event (cfun, "Ops eliminated",
6223 			    reassociate_stats.ops_eliminated);
6224   statistics_counter_event (cfun, "Statements rewritten",
6225 			    reassociate_stats.rewritten);
6226   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6227 			    reassociate_stats.pows_encountered);
6228   statistics_counter_event (cfun, "Built-in powi calls created",
6229 			    reassociate_stats.pows_created);
6230 
6231   delete operand_rank;
6232   operand_entry_pool.release ();
6233   free (bb_rank);
6234   plus_negates.release ();
6235   free_dominance_info (CDI_POST_DOMINATORS);
6236   loop_optimizer_finalize ();
6237 }
6238 
6239 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
6240    insertion of __builtin_powi calls.
6241 
6242    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6243    optimization of a gimple conditional.  Otherwise returns zero.  */
6244 
6245 static unsigned int
execute_reassoc(bool insert_powi_p)6246 execute_reassoc (bool insert_powi_p)
6247 {
6248   reassoc_insert_powi_p = insert_powi_p;
6249 
6250   init_reassoc ();
6251 
6252   bool cfg_cleanup_needed;
6253   cfg_cleanup_needed = do_reassoc ();
6254   repropagate_negates ();
6255   branch_fixup ();
6256 
6257   fini_reassoc ();
6258   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6259 }
6260 
6261 namespace {
6262 
6263 const pass_data pass_data_reassoc =
6264 {
6265   GIMPLE_PASS, /* type */
6266   "reassoc", /* name */
6267   OPTGROUP_NONE, /* optinfo_flags */
6268   TV_TREE_REASSOC, /* tv_id */
6269   ( PROP_cfg | PROP_ssa ), /* properties_required */
6270   0, /* properties_provided */
6271   0, /* properties_destroyed */
6272   0, /* todo_flags_start */
6273   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6274 };
6275 
6276 class pass_reassoc : public gimple_opt_pass
6277 {
6278 public:
pass_reassoc(gcc::context * ctxt)6279   pass_reassoc (gcc::context *ctxt)
6280     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6281   {}
6282 
6283   /* opt_pass methods: */
clone()6284   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
set_pass_param(unsigned int n,bool param)6285   void set_pass_param (unsigned int n, bool param)
6286     {
6287       gcc_assert (n == 0);
6288       insert_powi_p = param;
6289     }
gate(function *)6290   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
execute(function *)6291   virtual unsigned int execute (function *)
6292     { return execute_reassoc (insert_powi_p); }
6293 
6294  private:
6295   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
6296      point 3a in the pass header comment.  */
6297   bool insert_powi_p;
6298 }; // class pass_reassoc
6299 
6300 } // anon namespace
6301 
6302 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)6303 make_pass_reassoc (gcc::context *ctxt)
6304 {
6305   return new pass_reassoc (ctxt);
6306 }
6307