1 /* Reassociation for trees.
2    Copyright (C) 2005-2019 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dan@dberlin.org>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h"
53 #include "gimplify.h"
54 #include "case-cfn-macros.h"
55 
56 /*  This is a simple global reassociation pass.  It is, in part, based
57     on the LLVM pass of the same name (They do some things more/less
58     than we do, in different orders, etc).
59 
60     It consists of five steps:
61 
62     1. Breaking up subtract operations into addition + negate, where
63     it would promote the reassociation of adds.
64 
65     2. Left linearization of the expression trees, so that (A+B)+(C+D)
66     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67     During linearization, we place the operands of the binary
68     expressions into a vector of operand_entry_*
69 
70     3. Optimization of the operand lists, eliminating things like a +
71     -a, a & a, etc.
72 
73     3a. Combine repeated factors with the same occurrence counts
74     into a __builtin_powi call that will later be optimized into
75     an optimal number of multiplies.
76 
77     4. Rewrite the expression trees we linearized and optimized so
78     they are in proper rank order.
79 
80     5. Repropagate negates, as nothing else will clean it up ATM.
81 
82     A bit of theory on #4, since nobody seems to write anything down
83     about why it makes sense to do it the way they do it:
84 
85     We could do this much nicer theoretically, but don't (for reasons
86     explained after how to do it theoretically nice :P).
87 
88     In order to promote the most redundancy elimination, you want
89     binary expressions whose operands are the same rank (or
90     preferably, the same value) exposed to the redundancy eliminator,
91     for possible elimination.
92 
93     So the way to do this if we really cared, is to build the new op
94     tree from the leaves to the roots, merging as you go, and putting the
95     new op on the end of the worklist, until you are left with one
96     thing on the worklist.
97 
98     IE if you have to rewrite the following set of operands (listed with
99     rank in parentheses), with opcode PLUS_EXPR:
100 
101     a (1),  b (1),  c (1),  d (2), e (2)
102 
103 
104     We start with our merge worklist empty, and the ops list with all of
105     those on it.
106 
107     You want to first merge all leaves of the same rank, as much as
108     possible.
109 
110     So first build a binary op of
111 
112     mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 
114     Because there is no three operand form of PLUS_EXPR, c is not going to
115     be exposed to redundancy elimination as a rank 1 operand.
116 
117     So you might as well throw it on the merge worklist (you could also
118     consider it to now be a rank two operand, and merge it with d and e,
119     but in this case, you then have evicted e from a binary op. So at
120     least in this situation, you can't win.)
121 
122     Then build a binary op of d + e
123     mergetmp2 = d + e
124 
125     and put mergetmp2 on the merge worklist.
126 
127     so merge worklist = {mergetmp, c, mergetmp2}
128 
129     Continue building binary ops of these operations until you have only
130     one operation left on the worklist.
131 
132     So we have
133 
134     build binary op
135     mergetmp3 = mergetmp + c
136 
137     worklist = {mergetmp2, mergetmp3}
138 
139     mergetmp4 = mergetmp2 + mergetmp3
140 
141     worklist = {mergetmp4}
142 
143     because we have one operation left, we can now just set the original
144     statement equal to the result of that operation.
145 
146     This will at least expose a + b  and d + e to redundancy elimination
147     as binary operations.
148 
149     For extra points, you can reuse the old statements to build the
150     mergetmps, since you shouldn't run out.
151 
152     So why don't we do this?
153 
154     Because it's expensive, and rarely will help.  Most trees we are
155     reassociating have 3 or less ops.  If they have 2 ops, they already
156     will be written into a nice single binary op.  If you have 3 ops, a
157     single simple check suffices to tell you whether the first two are of the
158     same rank.  If so, you know to order it
159 
160     mergetmp = op1 + op2
161     newstmt = mergetmp + op3
162 
163     instead of
164     mergetmp = op2 + op3
165     newstmt = mergetmp + op1
166 
167     If all three are of the same rank, you can't expose them all in a
168     single binary operator anyway, so the above is *still* the best you
169     can do.
170 
171     Thus, this is what we do.  When we have three ops left, we check to see
172     what order to put them in, and call it a day.  As a nod to vector sum
173     reduction, we check if any of the ops are really a phi node that is a
174     destructive update for the associating op, and keep the destructive
175     update together for vector sum reduction recognition.  */
176 
177 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
178    point 3a in the pass header comment.  */
179 static bool reassoc_insert_powi_p;
180 
181 /* Statistics */
182 static struct
183 {
184   int linearized;
185   int constants_eliminated;
186   int ops_eliminated;
187   int rewritten;
188   int pows_encountered;
189   int pows_created;
190 } reassociate_stats;
191 
192 /* Operator, rank pair.  */
193 struct operand_entry
194 {
195   unsigned int rank;
196   unsigned int id;
197   tree op;
198   unsigned int count;
199   gimple *stmt_to_insert;
200 };
201 
202 static object_allocator<operand_entry> operand_entry_pool
203   ("operand entry pool");
204 
205 /* This is used to assign a unique ID to each struct operand_entry
206    so that qsort results are identical on different hosts.  */
207 static unsigned int next_operand_entry_id;
208 
209 /* Starting rank number for a given basic block, so that we can rank
210    operations using unmovable instructions in that BB based on the bb
211    depth.  */
212 static long *bb_rank;
213 
214 /* Operand->rank hashtable.  */
215 static hash_map<tree, long> *operand_rank;
216 
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218    all basic blocks the CFG should be adjusted - basic blocks
219    split right after that SSA_NAME's definition statement and before
220    the only use, which must be a bit ior.  */
221 static vec<tree> reassoc_branch_fixups;
222 
223 /* Forward decls.  */
224 static long get_rank (tree);
225 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226 
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228    possibly added by gsi_remove.  */
229 
230 bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)231 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 {
233   gimple *stmt = gsi_stmt (*gsi);
234 
235   if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
236     return gsi_remove (gsi, true);
237 
238   gimple_stmt_iterator prev = *gsi;
239   gsi_prev (&prev);
240   unsigned uid = gimple_uid (stmt);
241   basic_block bb = gimple_bb (stmt);
242   bool ret = gsi_remove (gsi, true);
243   if (!gsi_end_p (prev))
244     gsi_next (&prev);
245   else
246     prev = gsi_start_bb (bb);
247   gimple *end_stmt = gsi_stmt (*gsi);
248   while ((stmt = gsi_stmt (prev)) != end_stmt)
249     {
250       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
251       gimple_set_uid (stmt, uid);
252       gsi_next (&prev);
253     }
254   return ret;
255 }
256 
257 /* Bias amount for loop-carried phis.  We want this to be larger than
258    the depth of any reassociation tree we can see, but not larger than
259    the rank difference between two blocks.  */
260 #define PHI_LOOP_BIAS (1 << 15)
261 
262 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
263    an innermost loop, and the phi has only a single use which is inside
264    the loop, then the rank is the block rank of the loop latch plus an
265    extra bias for the loop-carried dependence.  This causes expressions
266    calculated into an accumulator variable to be independent for each
267    iteration of the loop.  If STMT is some other phi, the rank is the
268    block rank of its containing block.  */
269 static long
phi_rank(gimple * stmt)270 phi_rank (gimple *stmt)
271 {
272   basic_block bb = gimple_bb (stmt);
273   struct loop *father = bb->loop_father;
274   tree res;
275   unsigned i;
276   use_operand_p use;
277   gimple *use_stmt;
278 
279   /* We only care about real loops (those with a latch).  */
280   if (!father->latch)
281     return bb_rank[bb->index];
282 
283   /* Interesting phis must be in headers of innermost loops.  */
284   if (bb != father->header
285       || father->inner)
286     return bb_rank[bb->index];
287 
288   /* Ignore virtual SSA_NAMEs.  */
289   res = gimple_phi_result (stmt);
290   if (virtual_operand_p (res))
291     return bb_rank[bb->index];
292 
293   /* The phi definition must have a single use, and that use must be
294      within the loop.  Otherwise this isn't an accumulator pattern.  */
295   if (!single_imm_use (res, &use, &use_stmt)
296       || gimple_bb (use_stmt)->loop_father != father)
297     return bb_rank[bb->index];
298 
299   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
300   for (i = 0; i < gimple_phi_num_args (stmt); i++)
301     {
302       tree arg = gimple_phi_arg_def (stmt, i);
303       if (TREE_CODE (arg) == SSA_NAME
304 	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 	{
306 	  gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
307 	  if (gimple_bb (def_stmt)->loop_father == father)
308 	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
309 	}
310     }
311 
312   /* Must be an uninteresting phi.  */
313   return bb_rank[bb->index];
314 }
315 
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317    loop-carried dependence of an innermost loop, return TRUE; else
318    return FALSE.  */
319 static bool
loop_carried_phi(tree exp)320 loop_carried_phi (tree exp)
321 {
322   gimple *phi_stmt;
323   long block_rank;
324 
325   if (TREE_CODE (exp) != SSA_NAME
326       || SSA_NAME_IS_DEFAULT_DEF (exp))
327     return false;
328 
329   phi_stmt = SSA_NAME_DEF_STMT (exp);
330 
331   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
332     return false;
333 
334   /* Non-loop-carried phis have block rank.  Loop-carried phis have
335      an additional bias added in.  If this phi doesn't have block rank,
336      it's biased and should not be propagated.  */
337   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338 
339   if (phi_rank (phi_stmt) != block_rank)
340     return true;
341 
342   return false;
343 }
344 
345 /* Return the maximum of RANK and the rank that should be propagated
346    from expression OP.  For most operands, this is just the rank of OP.
347    For loop-carried phis, the value is zero to avoid undoing the bias
348    in favor of the phi.  */
349 static long
propagate_rank(long rank,tree op)350 propagate_rank (long rank, tree op)
351 {
352   long op_rank;
353 
354   if (loop_carried_phi (op))
355     return rank;
356 
357   op_rank = get_rank (op);
358 
359   return MAX (rank, op_rank);
360 }
361 
362 /* Look up the operand rank structure for expression E.  */
363 
364 static inline long
find_operand_rank(tree e)365 find_operand_rank (tree e)
366 {
367   long *slot = operand_rank->get (e);
368   return slot ? *slot : -1;
369 }
370 
371 /* Insert {E,RANK} into the operand rank hashtable.  */
372 
373 static inline void
insert_operand_rank(tree e,long rank)374 insert_operand_rank (tree e, long rank)
375 {
376   gcc_assert (rank > 0);
377   gcc_assert (!operand_rank->put (e, rank));
378 }
379 
380 /* Given an expression E, return the rank of the expression.  */
381 
382 static long
get_rank(tree e)383 get_rank (tree e)
384 {
385   /* SSA_NAME's have the rank of the expression they are the result
386      of.
387      For globals and uninitialized values, the rank is 0.
388      For function arguments, use the pre-setup rank.
389      For PHI nodes, stores, asm statements, etc, we use the rank of
390      the BB.
391      For simple operations, the rank is the maximum rank of any of
392      its operands, or the bb_rank, whichever is less.
393      I make no claims that this is optimal, however, it gives good
394      results.  */
395 
396   /* We make an exception to the normal ranking system to break
397      dependences of accumulator variables in loops.  Suppose we
398      have a simple one-block loop containing:
399 
400        x_1 = phi(x_0, x_2)
401        b = a + x_1
402        c = b + d
403        x_2 = c + e
404 
405      As shown, each iteration of the calculation into x is fully
406      dependent upon the iteration before it.  We would prefer to
407      see this in the form:
408 
409        x_1 = phi(x_0, x_2)
410        b = a + d
411        c = b + e
412        x_2 = c + x_1
413 
414      If the loop is unrolled, the calculations of b and c from
415      different iterations can be interleaved.
416 
417      To obtain this result during reassociation, we bias the rank
418      of the phi definition x_1 upward, when it is recognized as an
419      accumulator pattern.  The artificial rank causes it to be
420      added last, providing the desired independence.  */
421 
422   if (TREE_CODE (e) == SSA_NAME)
423     {
424       ssa_op_iter iter;
425       gimple *stmt;
426       long rank;
427       tree op;
428 
429       if (SSA_NAME_IS_DEFAULT_DEF (e))
430 	return find_operand_rank (e);
431 
432       stmt = SSA_NAME_DEF_STMT (e);
433       if (gimple_code (stmt) == GIMPLE_PHI)
434 	return phi_rank (stmt);
435 
436       if (!is_gimple_assign (stmt))
437 	return bb_rank[gimple_bb (stmt)->index];
438 
439       /* If we already have a rank for this expression, use that.  */
440       rank = find_operand_rank (e);
441       if (rank != -1)
442 	return rank;
443 
444       /* Otherwise, find the maximum rank for the operands.  As an
445 	 exception, remove the bias from loop-carried phis when propagating
446 	 the rank so that dependent operations are not also biased.  */
447       /* Simply walk over all SSA uses - this takes advatage of the
448          fact that non-SSA operands are is_gimple_min_invariant and
449 	 thus have rank 0.  */
450       rank = 0;
451       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
452 	rank = propagate_rank (rank, op);
453 
454       if (dump_file && (dump_flags & TDF_DETAILS))
455 	{
456 	  fprintf (dump_file, "Rank for ");
457 	  print_generic_expr (dump_file, e);
458 	  fprintf (dump_file, " is %ld\n", (rank + 1));
459 	}
460 
461       /* Note the rank in the hashtable so we don't recompute it.  */
462       insert_operand_rank (e, (rank + 1));
463       return (rank + 1);
464     }
465 
466   /* Constants, globals, etc., are rank 0 */
467   return 0;
468 }
469 
470 
471 /* We want integer ones to end up last no matter what, since they are
472    the ones we can do the most with.  */
473 #define INTEGER_CONST_TYPE 1 << 4
474 #define FLOAT_ONE_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
477 
478 /* Classify an invariant tree into integer, float, or other, so that
479    we can sort them to be near other constants of the same type.  */
480 static inline int
constant_type(tree t)481 constant_type (tree t)
482 {
483   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
484     return INTEGER_CONST_TYPE;
485   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
486     {
487       /* Sort -1.0 and 1.0 constants last, while in some cases
488 	 const_binop can't optimize some inexact operations, multiplication
489 	 by -1.0 or 1.0 can be always merged with others.  */
490       if (real_onep (t) || real_minus_onep (t))
491 	return FLOAT_ONE_CONST_TYPE;
492       return FLOAT_CONST_TYPE;
493     }
494   else
495     return OTHER_CONST_TYPE;
496 }
497 
498 /* qsort comparison function to sort operand entries PA and PB by rank
499    so that the sorted array is ordered by rank in decreasing order.  */
500 static int
sort_by_operand_rank(const void * pa,const void * pb)501 sort_by_operand_rank (const void *pa, const void *pb)
502 {
503   const operand_entry *oea = *(const operand_entry *const *)pa;
504   const operand_entry *oeb = *(const operand_entry *const *)pb;
505 
506   if (oeb->rank != oea->rank)
507     return oeb->rank > oea->rank ? 1 : -1;
508 
509   /* It's nicer for optimize_expression if constants that are likely
510      to fold when added/multiplied/whatever are put next to each
511      other.  Since all constants have rank 0, order them by type.  */
512   if (oea->rank == 0)
513     {
514       if (constant_type (oeb->op) != constant_type (oea->op))
515 	return constant_type (oea->op) - constant_type (oeb->op);
516       else
517 	/* To make sorting result stable, we use unique IDs to determine
518 	   order.  */
519 	return oeb->id > oea->id ? 1 : -1;
520     }
521 
522   if (TREE_CODE (oea->op) != SSA_NAME)
523     {
524       if (TREE_CODE (oeb->op) != SSA_NAME)
525 	return oeb->id > oea->id ? 1 : -1;
526       else
527 	return 1;
528     }
529   else if (TREE_CODE (oeb->op) != SSA_NAME)
530     return -1;
531 
532   /* Lastly, make sure the versions that are the same go next to each
533      other.  */
534   if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
535     {
536       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
537 	 versions of removed SSA_NAMEs, so if possible, prefer to sort
538 	 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
539 	 See PR60418.  */
540       gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
541       gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
542       basic_block bba = gimple_bb (stmta);
543       basic_block bbb = gimple_bb (stmtb);
544       if (bbb != bba)
545 	{
546 	  /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
547 	     but the other might not.  */
548 	  if (!bba)
549 	    return 1;
550 	  if (!bbb)
551 	    return -1;
552 	  /* If neither is, compare bb_rank.  */
553 	  if (bb_rank[bbb->index] != bb_rank[bba->index])
554 	    return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
555 	}
556 
557       bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
558       bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
559       if (da != db)
560 	return da ? 1 : -1;
561 
562       return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
563     }
564 
565   return oeb->id > oea->id ? 1 : -1;
566 }
567 
568 /* Add an operand entry to *OPS for the tree operand OP.  */
569 
570 static void
571 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
572 {
573   operand_entry *oe = operand_entry_pool.allocate ();
574 
575   oe->op = op;
576   oe->rank = get_rank (op);
577   oe->id = next_operand_entry_id++;
578   oe->count = 1;
579   oe->stmt_to_insert = stmt_to_insert;
580   ops->safe_push (oe);
581 }
582 
583 /* Add an operand entry to *OPS for the tree operand OP with repeat
584    count REPEAT.  */
585 
586 static void
add_repeat_to_ops_vec(vec<operand_entry * > * ops,tree op,HOST_WIDE_INT repeat)587 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
588 		       HOST_WIDE_INT repeat)
589 {
590   operand_entry *oe = operand_entry_pool.allocate ();
591 
592   oe->op = op;
593   oe->rank = get_rank (op);
594   oe->id = next_operand_entry_id++;
595   oe->count = repeat;
596   oe->stmt_to_insert = NULL;
597   ops->safe_push (oe);
598 
599   reassociate_stats.pows_encountered++;
600 }
601 
602 /* Return true if STMT is reassociable operation containing a binary
603    operation with tree code CODE, and is inside LOOP.  */
604 
605 static bool
is_reassociable_op(gimple * stmt,enum tree_code code,struct loop * loop)606 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
607 {
608   basic_block bb = gimple_bb (stmt);
609 
610   if (gimple_bb (stmt) == NULL)
611     return false;
612 
613   if (!flow_bb_inside_loop_p (loop, bb))
614     return false;
615 
616   if (is_gimple_assign (stmt)
617       && gimple_assign_rhs_code (stmt) == code
618       && has_single_use (gimple_assign_lhs (stmt)))
619     {
620       tree rhs1 = gimple_assign_rhs1 (stmt);
621       tree rhs2 = gimple_assign_rhs2 (stmt);
622       if (TREE_CODE (rhs1) == SSA_NAME
623 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
624 	return false;
625       if (rhs2
626 	  && TREE_CODE (rhs2) == SSA_NAME
627 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
628 	return false;
629       return true;
630     }
631 
632   return false;
633 }
634 
635 
636 /* Return true if STMT is a nop-conversion.  */
637 
638 static bool
gimple_nop_conversion_p(gimple * stmt)639 gimple_nop_conversion_p (gimple *stmt)
640 {
641   if (gassign *ass = dyn_cast <gassign *> (stmt))
642     {
643       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
644 	  && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
645 				    TREE_TYPE (gimple_assign_rhs1 (ass))))
646 	return true;
647     }
648   return false;
649 }
650 
651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
652    operand of the negate operation.  Otherwise, return NULL.  */
653 
654 static tree
get_unary_op(tree name,enum tree_code opcode)655 get_unary_op (tree name, enum tree_code opcode)
656 {
657   gimple *stmt = SSA_NAME_DEF_STMT (name);
658 
659   /* Look through nop conversions (sign changes).  */
660   if (gimple_nop_conversion_p (stmt)
661       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
662     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
663 
664   if (!is_gimple_assign (stmt))
665     return NULL_TREE;
666 
667   if (gimple_assign_rhs_code (stmt) == opcode)
668     return gimple_assign_rhs1 (stmt);
669   return NULL_TREE;
670 }
671 
672 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
673 
674 static bool
ops_equal_values_p(tree op1,tree op2)675 ops_equal_values_p (tree op1, tree op2)
676 {
677   if (op1 == op2)
678     return true;
679 
680   tree orig_op1 = op1;
681   if (TREE_CODE (op1) == SSA_NAME)
682     {
683       gimple *stmt = SSA_NAME_DEF_STMT (op1);
684       if (gimple_nop_conversion_p (stmt))
685 	{
686 	  op1 = gimple_assign_rhs1 (stmt);
687 	  if (op1 == op2)
688 	    return true;
689 	}
690     }
691 
692   if (TREE_CODE (op2) == SSA_NAME)
693     {
694       gimple *stmt = SSA_NAME_DEF_STMT (op2);
695       if (gimple_nop_conversion_p (stmt))
696 	{
697 	  op2 = gimple_assign_rhs1 (stmt);
698 	  if (op1 == op2
699 	      || orig_op1 == op2)
700 	    return true;
701 	}
702     }
703 
704   return false;
705 }
706 
707 
708 /* If CURR and LAST are a pair of ops that OPCODE allows us to
709    eliminate through equivalences, do so, remove them from OPS, and
710    return true.  Otherwise, return false.  */
711 
712 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry * > * ops,bool * all_done,unsigned int i,operand_entry * curr,operand_entry * last)713 eliminate_duplicate_pair (enum tree_code opcode,
714 			  vec<operand_entry *> *ops,
715 			  bool *all_done,
716 			  unsigned int i,
717 			  operand_entry *curr,
718 			  operand_entry *last)
719 {
720 
721   /* If we have two of the same op, and the opcode is & |, min, or max,
722      we can eliminate one of them.
723      If we have two of the same op, and the opcode is ^, we can
724      eliminate both of them.  */
725 
726   if (last && last->op == curr->op)
727     {
728       switch (opcode)
729 	{
730 	case MAX_EXPR:
731 	case MIN_EXPR:
732 	case BIT_IOR_EXPR:
733 	case BIT_AND_EXPR:
734 	  if (dump_file && (dump_flags & TDF_DETAILS))
735 	    {
736 	      fprintf (dump_file, "Equivalence: ");
737 	      print_generic_expr (dump_file, curr->op);
738 	      fprintf (dump_file, " [&|minmax] ");
739 	      print_generic_expr (dump_file, last->op);
740 	      fprintf (dump_file, " -> ");
741 	      print_generic_stmt (dump_file, last->op);
742 	    }
743 
744 	  ops->ordered_remove (i);
745 	  reassociate_stats.ops_eliminated ++;
746 
747 	  return true;
748 
749 	case BIT_XOR_EXPR:
750 	  if (dump_file && (dump_flags & TDF_DETAILS))
751 	    {
752 	      fprintf (dump_file, "Equivalence: ");
753 	      print_generic_expr (dump_file, curr->op);
754 	      fprintf (dump_file, " ^ ");
755 	      print_generic_expr (dump_file, last->op);
756 	      fprintf (dump_file, " -> nothing\n");
757 	    }
758 
759 	  reassociate_stats.ops_eliminated += 2;
760 
761 	  if (ops->length () == 2)
762 	    {
763 	      ops->truncate (0);
764 	      add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
765 	      *all_done = true;
766 	    }
767 	  else
768 	    {
769 	      ops->ordered_remove (i-1);
770 	      ops->ordered_remove (i-1);
771 	    }
772 
773 	  return true;
774 
775 	default:
776 	  break;
777 	}
778     }
779   return false;
780 }
781 
782 static vec<tree> plus_negates;
783 
784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
785    expression, look in OPS for a corresponding positive operation to cancel
786    it out.  If we find one, remove the other from OPS, replace
787    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
788    return false. */
789 
790 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)791 eliminate_plus_minus_pair (enum tree_code opcode,
792 			   vec<operand_entry *> *ops,
793 			   unsigned int currindex,
794 			   operand_entry *curr)
795 {
796   tree negateop;
797   tree notop;
798   unsigned int i;
799   operand_entry *oe;
800 
801   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
802     return false;
803 
804   negateop = get_unary_op (curr->op, NEGATE_EXPR);
805   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
806   if (negateop == NULL_TREE && notop == NULL_TREE)
807     return false;
808 
809   /* Any non-negated version will have a rank that is one less than
810      the current rank.  So once we hit those ranks, if we don't find
811      one, we can stop.  */
812 
813   for (i = currindex + 1;
814        ops->iterate (i, &oe)
815        && oe->rank >= curr->rank - 1 ;
816        i++)
817     {
818       if (negateop
819 	  && ops_equal_values_p (oe->op, negateop))
820 	{
821 	  if (dump_file && (dump_flags & TDF_DETAILS))
822 	    {
823 	      fprintf (dump_file, "Equivalence: ");
824 	      print_generic_expr (dump_file, negateop);
825 	      fprintf (dump_file, " + -");
826 	      print_generic_expr (dump_file, oe->op);
827 	      fprintf (dump_file, " -> 0\n");
828 	    }
829 
830 	  ops->ordered_remove (i);
831 	  add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
832 	  ops->ordered_remove (currindex);
833 	  reassociate_stats.ops_eliminated ++;
834 
835 	  return true;
836 	}
837       else if (notop
838 	       && ops_equal_values_p (oe->op, notop))
839 	{
840 	  tree op_type = TREE_TYPE (oe->op);
841 
842 	  if (dump_file && (dump_flags & TDF_DETAILS))
843 	    {
844 	      fprintf (dump_file, "Equivalence: ");
845 	      print_generic_expr (dump_file, notop);
846 	      fprintf (dump_file, " + ~");
847 	      print_generic_expr (dump_file, oe->op);
848 	      fprintf (dump_file, " -> -1\n");
849 	    }
850 
851 	  ops->ordered_remove (i);
852 	  add_to_ops_vec (ops, build_all_ones_cst (op_type));
853 	  ops->ordered_remove (currindex);
854 	  reassociate_stats.ops_eliminated ++;
855 
856 	  return true;
857 	}
858     }
859 
860   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
861      save it for later inspection in repropagate_negates().  */
862   if (negateop != NULL_TREE
863       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
864     plus_negates.safe_push (curr->op);
865 
866   return false;
867 }
868 
869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
870    bitwise not expression, look in OPS for a corresponding operand to
871    cancel it out.  If we find one, remove the other from OPS, replace
872    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
873    false. */
874 
875 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)876 eliminate_not_pairs (enum tree_code opcode,
877 		     vec<operand_entry *> *ops,
878 		     unsigned int currindex,
879 		     operand_entry *curr)
880 {
881   tree notop;
882   unsigned int i;
883   operand_entry *oe;
884 
885   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
886       || TREE_CODE (curr->op) != SSA_NAME)
887     return false;
888 
889   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
890   if (notop == NULL_TREE)
891     return false;
892 
893   /* Any non-not version will have a rank that is one less than
894      the current rank.  So once we hit those ranks, if we don't find
895      one, we can stop.  */
896 
897   for (i = currindex + 1;
898        ops->iterate (i, &oe)
899        && oe->rank >= curr->rank - 1;
900        i++)
901     {
902       if (oe->op == notop)
903 	{
904 	  if (dump_file && (dump_flags & TDF_DETAILS))
905 	    {
906 	      fprintf (dump_file, "Equivalence: ");
907 	      print_generic_expr (dump_file, notop);
908 	      if (opcode == BIT_AND_EXPR)
909 		fprintf (dump_file, " & ~");
910 	      else if (opcode == BIT_IOR_EXPR)
911 		fprintf (dump_file, " | ~");
912 	      print_generic_expr (dump_file, oe->op);
913 	      if (opcode == BIT_AND_EXPR)
914 		fprintf (dump_file, " -> 0\n");
915 	      else if (opcode == BIT_IOR_EXPR)
916 		fprintf (dump_file, " -> -1\n");
917 	    }
918 
919 	  if (opcode == BIT_AND_EXPR)
920 	    oe->op = build_zero_cst (TREE_TYPE (oe->op));
921 	  else if (opcode == BIT_IOR_EXPR)
922 	    oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
923 
924 	  reassociate_stats.ops_eliminated += ops->length () - 1;
925 	  ops->truncate (0);
926 	  ops->quick_push (oe);
927 	  return true;
928 	}
929     }
930 
931   return false;
932 }
933 
934 /* Use constant value that may be present in OPS to try to eliminate
935    operands.  Note that this function is only really used when we've
936    eliminated ops for other reasons, or merged constants.  Across
937    single statements, fold already does all of this, plus more.  There
938    is little point in duplicating logic, so I've only included the
939    identities that I could ever construct testcases to trigger.  */
940 
941 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry * > * ops)942 eliminate_using_constants (enum tree_code opcode,
943 			   vec<operand_entry *> *ops)
944 {
945   operand_entry *oelast = ops->last ();
946   tree type = TREE_TYPE (oelast->op);
947 
948   if (oelast->rank == 0
949       && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
950     {
951       switch (opcode)
952 	{
953 	case BIT_AND_EXPR:
954 	  if (integer_zerop (oelast->op))
955 	    {
956 	      if (ops->length () != 1)
957 		{
958 		  if (dump_file && (dump_flags & TDF_DETAILS))
959 		    fprintf (dump_file, "Found & 0, removing all other ops\n");
960 
961 		  reassociate_stats.ops_eliminated += ops->length () - 1;
962 
963 		  ops->truncate (0);
964 		  ops->quick_push (oelast);
965 		  return;
966 		}
967 	    }
968 	  else if (integer_all_onesp (oelast->op))
969 	    {
970 	      if (ops->length () != 1)
971 		{
972 		  if (dump_file && (dump_flags & TDF_DETAILS))
973 		    fprintf (dump_file, "Found & -1, removing\n");
974 		  ops->pop ();
975 		  reassociate_stats.ops_eliminated++;
976 		}
977 	    }
978 	  break;
979 	case BIT_IOR_EXPR:
980 	  if (integer_all_onesp (oelast->op))
981 	    {
982 	      if (ops->length () != 1)
983 		{
984 		  if (dump_file && (dump_flags & TDF_DETAILS))
985 		    fprintf (dump_file, "Found | -1, removing all other ops\n");
986 
987 		  reassociate_stats.ops_eliminated += ops->length () - 1;
988 
989 		  ops->truncate (0);
990 		  ops->quick_push (oelast);
991 		  return;
992 		}
993 	    }
994 	  else if (integer_zerop (oelast->op))
995 	    {
996 	      if (ops->length () != 1)
997 		{
998 		  if (dump_file && (dump_flags & TDF_DETAILS))
999 		    fprintf (dump_file, "Found | 0, removing\n");
1000 		  ops->pop ();
1001 		  reassociate_stats.ops_eliminated++;
1002 		}
1003 	    }
1004 	  break;
1005 	case MULT_EXPR:
1006 	  if (integer_zerop (oelast->op)
1007 	      || (FLOAT_TYPE_P (type)
1008 		  && !HONOR_NANS (type)
1009 		  && !HONOR_SIGNED_ZEROS (type)
1010 		  && real_zerop (oelast->op)))
1011 	    {
1012 	      if (ops->length () != 1)
1013 		{
1014 		  if (dump_file && (dump_flags & TDF_DETAILS))
1015 		    fprintf (dump_file, "Found * 0, removing all other ops\n");
1016 
1017 		  reassociate_stats.ops_eliminated += ops->length () - 1;
1018 		  ops->truncate (0);
1019 		  ops->quick_push (oelast);
1020 		  return;
1021 		}
1022 	    }
1023 	  else if (integer_onep (oelast->op)
1024 		   || (FLOAT_TYPE_P (type)
1025 		       && !HONOR_SNANS (type)
1026 		       && real_onep (oelast->op)))
1027 	    {
1028 	      if (ops->length () != 1)
1029 		{
1030 		  if (dump_file && (dump_flags & TDF_DETAILS))
1031 		    fprintf (dump_file, "Found * 1, removing\n");
1032 		  ops->pop ();
1033 		  reassociate_stats.ops_eliminated++;
1034 		  return;
1035 		}
1036 	    }
1037 	  break;
1038 	case BIT_XOR_EXPR:
1039 	case PLUS_EXPR:
1040 	case MINUS_EXPR:
1041 	  if (integer_zerop (oelast->op)
1042 	      || (FLOAT_TYPE_P (type)
1043 		  && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1044 		  && fold_real_zero_addition_p (type, oelast->op,
1045 						opcode == MINUS_EXPR)))
1046 	    {
1047 	      if (ops->length () != 1)
1048 		{
1049 		  if (dump_file && (dump_flags & TDF_DETAILS))
1050 		    fprintf (dump_file, "Found [|^+] 0, removing\n");
1051 		  ops->pop ();
1052 		  reassociate_stats.ops_eliminated++;
1053 		  return;
1054 		}
1055 	    }
1056 	  break;
1057 	default:
1058 	  break;
1059 	}
1060     }
1061 }
1062 
1063 
1064 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1065 				 bool, bool);
1066 
1067 /* Structure for tracking and counting operands.  */
1068 struct oecount {
1069   unsigned int cnt;
1070   unsigned int id;
1071   enum tree_code oecode;
1072   tree op;
1073 };
1074 
1075 
1076 /* The heap for the oecount hashtable and the sorted list of operands.  */
1077 static vec<oecount> cvec;
1078 
1079 
1080 /* Oecount hashtable helpers.  */
1081 
1082 struct oecount_hasher : int_hash <int, 0, 1>
1083 {
1084   static inline hashval_t hash (int);
1085   static inline bool equal (int, int);
1086 };
1087 
1088 /* Hash function for oecount.  */
1089 
1090 inline hashval_t
hash(int p)1091 oecount_hasher::hash (int p)
1092 {
1093   const oecount *c = &cvec[p - 42];
1094   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1095 }
1096 
1097 /* Comparison function for oecount.  */
1098 
1099 inline bool
equal(int p1,int p2)1100 oecount_hasher::equal (int p1, int p2)
1101 {
1102   const oecount *c1 = &cvec[p1 - 42];
1103   const oecount *c2 = &cvec[p2 - 42];
1104   return c1->oecode == c2->oecode && c1->op == c2->op;
1105 }
1106 
1107 /* Comparison function for qsort sorting oecount elements by count.  */
1108 
1109 static int
oecount_cmp(const void * p1,const void * p2)1110 oecount_cmp (const void *p1, const void *p2)
1111 {
1112   const oecount *c1 = (const oecount *)p1;
1113   const oecount *c2 = (const oecount *)p2;
1114   if (c1->cnt != c2->cnt)
1115     return c1->cnt > c2->cnt ? 1 : -1;
1116   else
1117     /* If counts are identical, use unique IDs to stabilize qsort.  */
1118     return c1->id > c2->id ? 1 : -1;
1119 }
1120 
1121 /* Return TRUE iff STMT represents a builtin call that raises OP
1122    to some exponent.  */
1123 
1124 static bool
stmt_is_power_of_op(gimple * stmt,tree op)1125 stmt_is_power_of_op (gimple *stmt, tree op)
1126 {
1127   if (!is_gimple_call (stmt))
1128     return false;
1129 
1130   switch (gimple_call_combined_fn (stmt))
1131     {
1132     CASE_CFN_POW:
1133     CASE_CFN_POWI:
1134       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1135 
1136     default:
1137       return false;
1138     }
1139 }
1140 
1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1142    in place and return the result.  Assumes that stmt_is_power_of_op
1143    was previously called for STMT and returned TRUE.  */
1144 
1145 static HOST_WIDE_INT
decrement_power(gimple * stmt)1146 decrement_power (gimple *stmt)
1147 {
1148   REAL_VALUE_TYPE c, cint;
1149   HOST_WIDE_INT power;
1150   tree arg1;
1151 
1152   switch (gimple_call_combined_fn (stmt))
1153     {
1154     CASE_CFN_POW:
1155       arg1 = gimple_call_arg (stmt, 1);
1156       c = TREE_REAL_CST (arg1);
1157       power = real_to_integer (&c) - 1;
1158       real_from_integer (&cint, VOIDmode, power, SIGNED);
1159       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1160       return power;
1161 
1162     CASE_CFN_POWI:
1163       arg1 = gimple_call_arg (stmt, 1);
1164       power = TREE_INT_CST_LOW (arg1) - 1;
1165       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1166       return power;
1167 
1168     default:
1169       gcc_unreachable ();
1170     }
1171 }
1172 
1173 /* Replace SSA defined by STMT and replace all its uses with new
1174    SSA.  Also return the new SSA.  */
1175 
1176 static tree
make_new_ssa_for_def(gimple * stmt,enum tree_code opcode,tree op)1177 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1178 {
1179   gimple *use_stmt;
1180   use_operand_p use;
1181   imm_use_iterator iter;
1182   tree new_lhs, new_debug_lhs = NULL_TREE;
1183   tree lhs = gimple_get_lhs (stmt);
1184 
1185   new_lhs = make_ssa_name (TREE_TYPE (lhs));
1186   gimple_set_lhs (stmt, new_lhs);
1187 
1188   /* Also need to update GIMPLE_DEBUGs.  */
1189   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1190     {
1191       tree repl = new_lhs;
1192       if (is_gimple_debug (use_stmt))
1193 	{
1194 	  if (new_debug_lhs == NULL_TREE)
1195 	    {
1196 	      new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1197 	      gdebug *def_temp
1198 		= gimple_build_debug_bind (new_debug_lhs,
1199 					   build2 (opcode, TREE_TYPE (lhs),
1200 						   new_lhs, op),
1201 					   stmt);
1202 	      DECL_ARTIFICIAL (new_debug_lhs) = 1;
1203 	      TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1204 	      SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1205 	      gimple_set_uid (def_temp, gimple_uid (stmt));
1206 	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207 	      gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1208 	    }
1209 	  repl = new_debug_lhs;
1210 	}
1211       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1212 	SET_USE (use, repl);
1213       update_stmt (use_stmt);
1214     }
1215   return new_lhs;
1216 }
1217 
1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219    uses with new SSAs.  Also do this for the stmt that defines DEF
1220    if *DEF is not OP.  */
1221 
1222 static void
make_new_ssa_for_all_defs(tree * def,enum tree_code opcode,tree op,vec<gimple * > & stmts_to_fix)1223 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1224 			   vec<gimple *> &stmts_to_fix)
1225 {
1226   unsigned i;
1227   gimple *stmt;
1228 
1229   if (*def != op
1230       && TREE_CODE (*def) == SSA_NAME
1231       && (stmt = SSA_NAME_DEF_STMT (*def))
1232       && gimple_code (stmt) != GIMPLE_NOP)
1233     *def = make_new_ssa_for_def (stmt, opcode, op);
1234 
1235   FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1236     make_new_ssa_for_def (stmt, opcode, op);
1237 }
1238 
1239 /* Find the single immediate use of STMT's LHS, and replace it
1240    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1241    replace *DEF with OP as well.  */
1242 
1243 static void
propagate_op_to_single_use(tree op,gimple * stmt,tree * def)1244 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1245 {
1246   tree lhs;
1247   gimple *use_stmt;
1248   use_operand_p use;
1249   gimple_stmt_iterator gsi;
1250 
1251   if (is_gimple_call (stmt))
1252     lhs = gimple_call_lhs (stmt);
1253   else
1254     lhs = gimple_assign_lhs (stmt);
1255 
1256   gcc_assert (has_single_use (lhs));
1257   single_imm_use (lhs, &use, &use_stmt);
1258   if (lhs == *def)
1259     *def = op;
1260   SET_USE (use, op);
1261   if (TREE_CODE (op) != SSA_NAME)
1262     update_stmt (use_stmt);
1263   gsi = gsi_for_stmt (stmt);
1264   unlink_stmt_vdef (stmt);
1265   reassoc_remove_stmt (&gsi);
1266   release_defs (stmt);
1267 }
1268 
1269 /* Walks the linear chain with result *DEF searching for an operation
1270    with operand OP and code OPCODE removing that from the chain.  *DEF
1271    is updated if there is only one operand but no operation left.  */
1272 
1273 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1274 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1275 {
1276   tree orig_def = *def;
1277   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1278   /* PR72835 - Record the stmt chain that has to be updated such that
1279      we dont use the same LHS when the values computed are different.  */
1280   auto_vec<gimple *, 64> stmts_to_fix;
1281 
1282   do
1283     {
1284       tree name;
1285 
1286       if (opcode == MULT_EXPR)
1287 	{
1288 	  if (stmt_is_power_of_op (stmt, op))
1289 	    {
1290 	      if (decrement_power (stmt) == 1)
1291 		{
1292 		  if (stmts_to_fix.length () > 0)
1293 		    stmts_to_fix.pop ();
1294 		  propagate_op_to_single_use (op, stmt, def);
1295 		}
1296 	      break;
1297 	    }
1298 	  else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1299 	    {
1300 	      if (gimple_assign_rhs1 (stmt) == op)
1301 		{
1302 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1303 		  if (stmts_to_fix.length () > 0)
1304 		    stmts_to_fix.pop ();
1305 		  propagate_op_to_single_use (cst, stmt, def);
1306 		  break;
1307 		}
1308 	      else if (integer_minus_onep (op)
1309 		       || real_minus_onep (op))
1310 		{
1311 		  gimple_assign_set_rhs_code
1312 		    (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1313 		  break;
1314 		}
1315 	    }
1316 	}
1317 
1318       name = gimple_assign_rhs1 (stmt);
1319 
1320       /* If this is the operation we look for and one of the operands
1321          is ours simply propagate the other operand into the stmts
1322 	 single use.  */
1323       if (gimple_assign_rhs_code (stmt) == opcode
1324 	  && (name == op
1325 	      || gimple_assign_rhs2 (stmt) == op))
1326 	{
1327 	  if (name == op)
1328 	    name = gimple_assign_rhs2 (stmt);
1329 	  if (stmts_to_fix.length () > 0)
1330 	    stmts_to_fix.pop ();
1331 	  propagate_op_to_single_use (name, stmt, def);
1332 	  break;
1333 	}
1334 
1335       /* We might have a multiply of two __builtin_pow* calls, and
1336 	 the operand might be hiding in the rightmost one.  Likewise
1337 	 this can happen for a negate.  */
1338       if (opcode == MULT_EXPR
1339 	  && gimple_assign_rhs_code (stmt) == opcode
1340 	  && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1341 	  && has_single_use (gimple_assign_rhs2 (stmt)))
1342 	{
1343 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1344 	  if (stmt_is_power_of_op (stmt2, op))
1345 	    {
1346 	      if (decrement_power (stmt2) == 1)
1347 		propagate_op_to_single_use (op, stmt2, def);
1348 	      else
1349 		stmts_to_fix.safe_push (stmt2);
1350 	      break;
1351 	    }
1352 	  else if (is_gimple_assign (stmt2)
1353 		   && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1354 	    {
1355 	      if (gimple_assign_rhs1 (stmt2) == op)
1356 		{
1357 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1358 		  propagate_op_to_single_use (cst, stmt2, def);
1359 		  break;
1360 		}
1361 	      else if (integer_minus_onep (op)
1362 		       || real_minus_onep (op))
1363 		{
1364 		  stmts_to_fix.safe_push (stmt2);
1365 		  gimple_assign_set_rhs_code
1366 		    (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1367 		  break;
1368 		}
1369 	    }
1370 	}
1371 
1372       /* Continue walking the chain.  */
1373       gcc_assert (name != op
1374 		  && TREE_CODE (name) == SSA_NAME);
1375       stmt = SSA_NAME_DEF_STMT (name);
1376       stmts_to_fix.safe_push (stmt);
1377     }
1378   while (1);
1379 
1380   if (stmts_to_fix.length () > 0 || *def == orig_def)
1381     make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1382 }
1383 
1384 /* Returns true if statement S1 dominates statement S2.  Like
1385    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1386 
1387 static bool
reassoc_stmt_dominates_stmt_p(gimple * s1,gimple * s2)1388 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1389 {
1390   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1391 
1392   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393      SSA_NAME.  Assume it lives at the beginning of function and
1394      thus dominates everything.  */
1395   if (!bb1 || s1 == s2)
1396     return true;
1397 
1398   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1399   if (!bb2)
1400     return false;
1401 
1402   if (bb1 == bb2)
1403     {
1404       /* PHIs in the same basic block are assumed to be
1405 	 executed all in parallel, if only one stmt is a PHI,
1406 	 it dominates the other stmt in the same basic block.  */
1407       if (gimple_code (s1) == GIMPLE_PHI)
1408 	return true;
1409 
1410       if (gimple_code (s2) == GIMPLE_PHI)
1411 	return false;
1412 
1413       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1414 
1415       if (gimple_uid (s1) < gimple_uid (s2))
1416 	return true;
1417 
1418       if (gimple_uid (s1) > gimple_uid (s2))
1419 	return false;
1420 
1421       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1422       unsigned int uid = gimple_uid (s1);
1423       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1424 	{
1425 	  gimple *s = gsi_stmt (gsi);
1426 	  if (gimple_uid (s) != uid)
1427 	    break;
1428 	  if (s == s2)
1429 	    return true;
1430 	}
1431 
1432       return false;
1433     }
1434 
1435   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1436 }
1437 
1438 /* Insert STMT after INSERT_POINT.  */
1439 
1440 static void
insert_stmt_after(gimple * stmt,gimple * insert_point)1441 insert_stmt_after (gimple *stmt, gimple *insert_point)
1442 {
1443   gimple_stmt_iterator gsi;
1444   basic_block bb;
1445 
1446   if (gimple_code (insert_point) == GIMPLE_PHI)
1447     bb = gimple_bb (insert_point);
1448   else if (!stmt_ends_bb_p (insert_point))
1449     {
1450       gsi = gsi_for_stmt (insert_point);
1451       gimple_set_uid (stmt, gimple_uid (insert_point));
1452       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1453       return;
1454     }
1455   else
1456     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457        thus if it must end a basic block, it should be a call that can
1458        throw, or some assignment that can throw.  If it throws, the LHS
1459        of it will not be initialized though, so only valid places using
1460        the SSA_NAME should be dominated by the fallthru edge.  */
1461     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1462   gsi = gsi_after_labels (bb);
1463   if (gsi_end_p (gsi))
1464     {
1465       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1466       gimple_set_uid (stmt,
1467 		      gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1468     }
1469   else
1470     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1471   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1472 }
1473 
1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475    the result.  Places the statement after the definition of either
1476    OP1 or OP2.  Returns the new statement.  */
1477 
1478 static gimple *
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1479 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1480 {
1481   gimple *op1def = NULL, *op2def = NULL;
1482   gimple_stmt_iterator gsi;
1483   tree op;
1484   gassign *sum;
1485 
1486   /* Create the addition statement.  */
1487   op = make_ssa_name (type);
1488   sum = gimple_build_assign (op, opcode, op1, op2);
1489 
1490   /* Find an insertion place and insert.  */
1491   if (TREE_CODE (op1) == SSA_NAME)
1492     op1def = SSA_NAME_DEF_STMT (op1);
1493   if (TREE_CODE (op2) == SSA_NAME)
1494     op2def = SSA_NAME_DEF_STMT (op2);
1495   if ((!op1def || gimple_nop_p (op1def))
1496       && (!op2def || gimple_nop_p (op2def)))
1497     {
1498       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1499       if (gsi_end_p (gsi))
1500 	{
1501 	  gimple_stmt_iterator gsi2
1502 	    = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1503 	  gimple_set_uid (sum,
1504 			  gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1505 	}
1506       else
1507 	gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1508       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1509     }
1510   else
1511     {
1512       gimple *insert_point;
1513       if ((!op1def || gimple_nop_p (op1def))
1514 	   || (op2def && !gimple_nop_p (op2def)
1515 	       && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1516 	insert_point = op2def;
1517       else
1518 	insert_point = op1def;
1519       insert_stmt_after (sum, insert_point);
1520     }
1521   update_stmt (sum);
1522 
1523   return sum;
1524 }
1525 
1526 /* Perform un-distribution of divisions and multiplications.
1527    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528    to (A + B) / X for real X.
1529 
1530    The algorithm is organized as follows.
1531 
1532     - First we walk the addition chain *OPS looking for summands that
1533       are defined by a multiplication or a real division.  This results
1534       in the candidates bitmap with relevant indices into *OPS.
1535 
1536     - Second we build the chains of multiplications or divisions for
1537       these candidates, counting the number of occurrences of (operand, code)
1538       pairs in all of the candidates chains.
1539 
1540     - Third we sort the (operand, code) pairs by number of occurrence and
1541       process them starting with the pair with the most uses.
1542 
1543       * For each such pair we walk the candidates again to build a
1544         second candidate bitmap noting all multiplication/division chains
1545 	that have at least one occurrence of (operand, code).
1546 
1547       * We build an alternate addition chain only covering these
1548         candidates with one (operand, code) operation removed from their
1549 	multiplication/division chain.
1550 
1551       * The first candidate gets replaced by the alternate addition chain
1552         multiplied/divided by the operand.
1553 
1554       * All candidate chains get disabled for further processing and
1555         processing of (operand, code) pairs continues.
1556 
1557   The alternate addition chains built are re-processed by the main
1558   reassociation algorithm which allows optimizing a * x * y + b * y * x
1559   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1560 
1561 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry * > * ops,struct loop * loop)1562 undistribute_ops_list (enum tree_code opcode,
1563 		       vec<operand_entry *> *ops, struct loop *loop)
1564 {
1565   unsigned int length = ops->length ();
1566   operand_entry *oe1;
1567   unsigned i, j;
1568   unsigned nr_candidates, nr_candidates2;
1569   sbitmap_iterator sbi0;
1570   vec<operand_entry *> *subops;
1571   bool changed = false;
1572   unsigned int next_oecount_id = 0;
1573 
1574   if (length <= 1
1575       || opcode != PLUS_EXPR)
1576     return false;
1577 
1578   /* Build a list of candidates to process.  */
1579   auto_sbitmap candidates (length);
1580   bitmap_clear (candidates);
1581   nr_candidates = 0;
1582   FOR_EACH_VEC_ELT (*ops, i, oe1)
1583     {
1584       enum tree_code dcode;
1585       gimple *oe1def;
1586 
1587       if (TREE_CODE (oe1->op) != SSA_NAME)
1588 	continue;
1589       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1590       if (!is_gimple_assign (oe1def))
1591 	continue;
1592       dcode = gimple_assign_rhs_code (oe1def);
1593       if ((dcode != MULT_EXPR
1594 	   && dcode != RDIV_EXPR)
1595 	  || !is_reassociable_op (oe1def, dcode, loop))
1596 	continue;
1597 
1598       bitmap_set_bit (candidates, i);
1599       nr_candidates++;
1600     }
1601 
1602   if (nr_candidates < 2)
1603     return false;
1604 
1605   if (dump_file && (dump_flags & TDF_DETAILS))
1606     {
1607       fprintf (dump_file, "searching for un-distribute opportunities ");
1608       print_generic_expr (dump_file,
1609 	(*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1610       fprintf (dump_file, " %d\n", nr_candidates);
1611     }
1612 
1613   /* Build linearized sub-operand lists and the counting table.  */
1614   cvec.create (0);
1615 
1616   hash_table<oecount_hasher> ctable (15);
1617 
1618   /* ??? Macro arguments cannot have multi-argument template types in
1619      them.  This typedef is needed to workaround that limitation.  */
1620   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1621   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1622   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1623     {
1624       gimple *oedef;
1625       enum tree_code oecode;
1626       unsigned j;
1627 
1628       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1629       oecode = gimple_assign_rhs_code (oedef);
1630       linearize_expr_tree (&subops[i], oedef,
1631 			   associative_tree_code (oecode), false);
1632 
1633       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1634 	{
1635 	  oecount c;
1636 	  int *slot;
1637 	  int idx;
1638 	  c.oecode = oecode;
1639 	  c.cnt = 1;
1640 	  c.id = next_oecount_id++;
1641 	  c.op = oe1->op;
1642 	  cvec.safe_push (c);
1643 	  idx = cvec.length () + 41;
1644 	  slot = ctable.find_slot (idx, INSERT);
1645 	  if (!*slot)
1646 	    {
1647 	      *slot = idx;
1648 	    }
1649 	  else
1650 	    {
1651 	      cvec.pop ();
1652 	      cvec[*slot - 42].cnt++;
1653 	    }
1654 	}
1655     }
1656 
1657   /* Sort the counting table.  */
1658   cvec.qsort (oecount_cmp);
1659 
1660   if (dump_file && (dump_flags & TDF_DETAILS))
1661     {
1662       oecount *c;
1663       fprintf (dump_file, "Candidates:\n");
1664       FOR_EACH_VEC_ELT (cvec, j, c)
1665 	{
1666 	  fprintf (dump_file, "  %u %s: ", c->cnt,
1667 		   c->oecode == MULT_EXPR
1668 		   ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1669 	  print_generic_expr (dump_file, c->op);
1670 	  fprintf (dump_file, "\n");
1671 	}
1672     }
1673 
1674   /* Process the (operand, code) pairs in order of most occurrence.  */
1675   auto_sbitmap candidates2 (length);
1676   while (!cvec.is_empty ())
1677     {
1678       oecount *c = &cvec.last ();
1679       if (c->cnt < 2)
1680 	break;
1681 
1682       /* Now collect the operands in the outer chain that contain
1683          the common operand in their inner chain.  */
1684       bitmap_clear (candidates2);
1685       nr_candidates2 = 0;
1686       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1687 	{
1688 	  gimple *oedef;
1689 	  enum tree_code oecode;
1690 	  unsigned j;
1691 	  tree op = (*ops)[i]->op;
1692 
1693 	  /* If we undistributed in this chain already this may be
1694 	     a constant.  */
1695 	  if (TREE_CODE (op) != SSA_NAME)
1696 	    continue;
1697 
1698 	  oedef = SSA_NAME_DEF_STMT (op);
1699 	  oecode = gimple_assign_rhs_code (oedef);
1700 	  if (oecode != c->oecode)
1701 	    continue;
1702 
1703 	  FOR_EACH_VEC_ELT (subops[i], j, oe1)
1704 	    {
1705 	      if (oe1->op == c->op)
1706 		{
1707 		  bitmap_set_bit (candidates2, i);
1708 		  ++nr_candidates2;
1709 		  break;
1710 		}
1711 	    }
1712 	}
1713 
1714       if (nr_candidates2 >= 2)
1715 	{
1716 	  operand_entry *oe1, *oe2;
1717 	  gimple *prod;
1718 	  int first = bitmap_first_set_bit (candidates2);
1719 
1720 	  /* Build the new addition chain.  */
1721 	  oe1 = (*ops)[first];
1722 	  if (dump_file && (dump_flags & TDF_DETAILS))
1723 	    {
1724 	      fprintf (dump_file, "Building (");
1725 	      print_generic_expr (dump_file, oe1->op);
1726 	    }
1727 	  zero_one_operation (&oe1->op, c->oecode, c->op);
1728 	  EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1729 	    {
1730 	      gimple *sum;
1731 	      oe2 = (*ops)[i];
1732 	      if (dump_file && (dump_flags & TDF_DETAILS))
1733 		{
1734 		  fprintf (dump_file, " + ");
1735 		  print_generic_expr (dump_file, oe2->op);
1736 		}
1737 	      zero_one_operation (&oe2->op, c->oecode, c->op);
1738 	      sum = build_and_add_sum (TREE_TYPE (oe1->op),
1739 				       oe1->op, oe2->op, opcode);
1740 	      oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1741 	      oe2->rank = 0;
1742 	      oe1->op = gimple_get_lhs (sum);
1743 	    }
1744 
1745 	  /* Apply the multiplication/division.  */
1746 	  prod = build_and_add_sum (TREE_TYPE (oe1->op),
1747 				    oe1->op, c->op, c->oecode);
1748 	  if (dump_file && (dump_flags & TDF_DETAILS))
1749 	    {
1750 	      fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1751 	      print_generic_expr (dump_file, c->op);
1752 	      fprintf (dump_file, "\n");
1753 	    }
1754 
1755 	  /* Record it in the addition chain and disable further
1756 	     undistribution with this op.  */
1757 	  oe1->op = gimple_assign_lhs (prod);
1758 	  oe1->rank = get_rank (oe1->op);
1759 	  subops[first].release ();
1760 
1761 	  changed = true;
1762 	}
1763 
1764       cvec.pop ();
1765     }
1766 
1767   for (i = 0; i < ops->length (); ++i)
1768     subops[i].release ();
1769   free (subops);
1770   cvec.release ();
1771 
1772   return changed;
1773 }
1774 
1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1776    expression, examine the other OPS to see if any of them are comparisons
1777    of the same values, which we may be able to combine or eliminate.
1778    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
1779 
1780 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)1781 eliminate_redundant_comparison (enum tree_code opcode,
1782 				vec<operand_entry *> *ops,
1783 				unsigned int currindex,
1784 				operand_entry *curr)
1785 {
1786   tree op1, op2;
1787   enum tree_code lcode, rcode;
1788   gimple *def1, *def2;
1789   int i;
1790   operand_entry *oe;
1791 
1792   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1793     return false;
1794 
1795   /* Check that CURR is a comparison.  */
1796   if (TREE_CODE (curr->op) != SSA_NAME)
1797     return false;
1798   def1 = SSA_NAME_DEF_STMT (curr->op);
1799   if (!is_gimple_assign (def1))
1800     return false;
1801   lcode = gimple_assign_rhs_code (def1);
1802   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1803     return false;
1804   op1 = gimple_assign_rhs1 (def1);
1805   op2 = gimple_assign_rhs2 (def1);
1806 
1807   /* Now look for a similar comparison in the remaining OPS.  */
1808   for (i = currindex + 1; ops->iterate (i, &oe); i++)
1809     {
1810       tree t;
1811 
1812       if (TREE_CODE (oe->op) != SSA_NAME)
1813 	continue;
1814       def2 = SSA_NAME_DEF_STMT (oe->op);
1815       if (!is_gimple_assign (def2))
1816 	continue;
1817       rcode = gimple_assign_rhs_code (def2);
1818       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1819 	continue;
1820 
1821       /* If we got here, we have a match.  See if we can combine the
1822 	 two comparisons.  */
1823       if (opcode == BIT_IOR_EXPR)
1824 	t = maybe_fold_or_comparisons (lcode, op1, op2,
1825 				       rcode, gimple_assign_rhs1 (def2),
1826 				       gimple_assign_rhs2 (def2));
1827       else
1828 	t = maybe_fold_and_comparisons (lcode, op1, op2,
1829 					rcode, gimple_assign_rhs1 (def2),
1830 					gimple_assign_rhs2 (def2));
1831       if (!t)
1832 	continue;
1833 
1834       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1835 	 always give us a boolean_type_node value back.  If the original
1836 	 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1837 	 we need to convert.  */
1838       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1839 	t = fold_convert (TREE_TYPE (curr->op), t);
1840 
1841       if (TREE_CODE (t) != INTEGER_CST
1842 	  && !operand_equal_p (t, curr->op, 0))
1843 	{
1844 	  enum tree_code subcode;
1845 	  tree newop1, newop2;
1846 	  if (!COMPARISON_CLASS_P (t))
1847 	    continue;
1848 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1849 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1850 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1851 	  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1852 	    continue;
1853 	}
1854 
1855       if (dump_file && (dump_flags & TDF_DETAILS))
1856 	{
1857 	  fprintf (dump_file, "Equivalence: ");
1858 	  print_generic_expr (dump_file, curr->op);
1859 	  fprintf (dump_file, " %s ", op_symbol_code (opcode));
1860 	  print_generic_expr (dump_file, oe->op);
1861 	  fprintf (dump_file, " -> ");
1862 	  print_generic_expr (dump_file, t);
1863 	  fprintf (dump_file, "\n");
1864 	}
1865 
1866       /* Now we can delete oe, as it has been subsumed by the new combined
1867          expression t.  */
1868       ops->ordered_remove (i);
1869       reassociate_stats.ops_eliminated ++;
1870 
1871       /* If t is the same as curr->op, we're done.  Otherwise we must
1872 	 replace curr->op with t.  Special case is if we got a constant
1873 	 back, in which case we add it to the end instead of in place of
1874 	 the current entry.  */
1875       if (TREE_CODE (t) == INTEGER_CST)
1876 	{
1877 	  ops->ordered_remove (currindex);
1878 	  add_to_ops_vec (ops, t);
1879 	}
1880       else if (!operand_equal_p (t, curr->op, 0))
1881 	{
1882 	  gimple *sum;
1883 	  enum tree_code subcode;
1884 	  tree newop1;
1885 	  tree newop2;
1886 	  gcc_assert (COMPARISON_CLASS_P (t));
1887 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1888 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
1889 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
1890 	  gcc_checking_assert (is_gimple_val (newop1)
1891 			       && is_gimple_val (newop2));
1892 	  sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1893 	  curr->op = gimple_get_lhs (sum);
1894 	}
1895       return true;
1896     }
1897 
1898   return false;
1899 }
1900 
1901 
1902 /* Transform repeated addition of same values into multiply with
1903    constant.  */
1904 static bool
transform_add_to_multiply(vec<operand_entry * > * ops)1905 transform_add_to_multiply (vec<operand_entry *> *ops)
1906 {
1907   operand_entry *oe;
1908   tree op = NULL_TREE;
1909   int j;
1910   int i, start = -1, end = 0, count = 0;
1911   auto_vec<std::pair <int, int> > indxs;
1912   bool changed = false;
1913 
1914   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1915       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1916 	  || !flag_unsafe_math_optimizations))
1917     return false;
1918 
1919   /* Look for repeated operands.  */
1920   FOR_EACH_VEC_ELT (*ops, i, oe)
1921     {
1922       if (start == -1)
1923 	{
1924 	  count = 1;
1925 	  op = oe->op;
1926 	  start = i;
1927 	}
1928       else if (operand_equal_p (oe->op, op, 0))
1929 	{
1930 	  count++;
1931 	  end = i;
1932 	}
1933       else
1934 	{
1935 	  if (count > 1)
1936 	    indxs.safe_push (std::make_pair (start, end));
1937 	  count = 1;
1938 	  op = oe->op;
1939 	  start = i;
1940 	}
1941     }
1942 
1943   if (count > 1)
1944     indxs.safe_push (std::make_pair (start, end));
1945 
1946   for (j = indxs.length () - 1; j >= 0; --j)
1947     {
1948       /* Convert repeated operand addition to multiplication.  */
1949       start = indxs[j].first;
1950       end = indxs[j].second;
1951       op = (*ops)[start]->op;
1952       count = end - start + 1;
1953       for (i = end; i >= start; --i)
1954 	ops->unordered_remove (i);
1955       tree tmp = make_ssa_name (TREE_TYPE (op));
1956       tree cst = build_int_cst (integer_type_node, count);
1957       gassign *mul_stmt
1958 	= gimple_build_assign (tmp, MULT_EXPR,
1959 			       op, fold_convert (TREE_TYPE (op), cst));
1960       gimple_set_visited (mul_stmt, true);
1961       add_to_ops_vec (ops, tmp, mul_stmt);
1962       changed = true;
1963     }
1964 
1965   return changed;
1966 }
1967 
1968 
1969 /* Perform various identities and other optimizations on the list of
1970    operand entries, stored in OPS.  The tree code for the binary
1971    operation between all the operands is OPCODE.  */
1972 
1973 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry * > * ops)1974 optimize_ops_list (enum tree_code opcode,
1975 		   vec<operand_entry *> *ops)
1976 {
1977   unsigned int length = ops->length ();
1978   unsigned int i;
1979   operand_entry *oe;
1980   operand_entry *oelast = NULL;
1981   bool iterate = false;
1982 
1983   if (length == 1)
1984     return;
1985 
1986   oelast = ops->last ();
1987 
1988   /* If the last two are constants, pop the constants off, merge them
1989      and try the next two.  */
1990   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1991     {
1992       operand_entry *oelm1 = (*ops)[length - 2];
1993 
1994       if (oelm1->rank == 0
1995 	  && is_gimple_min_invariant (oelm1->op)
1996 	  && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1997 				       TREE_TYPE (oelast->op)))
1998 	{
1999 	  tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2000 				     oelm1->op, oelast->op);
2001 
2002 	  if (folded && is_gimple_min_invariant (folded))
2003 	    {
2004 	      if (dump_file && (dump_flags & TDF_DETAILS))
2005 		fprintf (dump_file, "Merging constants\n");
2006 
2007 	      ops->pop ();
2008 	      ops->pop ();
2009 
2010 	      add_to_ops_vec (ops, folded);
2011 	      reassociate_stats.constants_eliminated++;
2012 
2013 	      optimize_ops_list (opcode, ops);
2014 	      return;
2015 	    }
2016 	}
2017     }
2018 
2019   eliminate_using_constants (opcode, ops);
2020   oelast = NULL;
2021 
2022   for (i = 0; ops->iterate (i, &oe);)
2023     {
2024       bool done = false;
2025 
2026       if (eliminate_not_pairs (opcode, ops, i, oe))
2027 	return;
2028       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2029 	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2030 	  || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2031 	{
2032 	  if (done)
2033 	    return;
2034 	  iterate = true;
2035 	  oelast = NULL;
2036 	  continue;
2037 	}
2038       oelast = oe;
2039       i++;
2040     }
2041 
2042   length = ops->length ();
2043   oelast = ops->last ();
2044 
2045   if (iterate)
2046     optimize_ops_list (opcode, ops);
2047 }
2048 
2049 /* The following functions are subroutines to optimize_range_tests and allow
2050    it to try to change a logical combination of comparisons into a range
2051    test.
2052 
2053    For example, both
2054 	X == 2 || X == 5 || X == 3 || X == 4
2055    and
2056 	X >= 2 && X <= 5
2057    are converted to
2058 	(unsigned) (X - 2) <= 3
2059 
2060    For more information see comments above fold_test_range in fold-const.c,
2061    this implementation is for GIMPLE.  */
2062 
2063 struct range_entry
2064 {
2065   tree exp;
2066   tree low;
2067   tree high;
2068   bool in_p;
2069   bool strict_overflow_p;
2070   unsigned int idx, next;
2071 };
2072 
2073 /* This is similar to make_range in fold-const.c, but on top of
2074    GIMPLE instead of trees.  If EXP is non-NULL, it should be
2075    an SSA_NAME and STMT argument is ignored, otherwise STMT
2076    argument should be a GIMPLE_COND.  */
2077 
2078 static void
init_range_entry(struct range_entry * r,tree exp,gimple * stmt)2079 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2080 {
2081   int in_p;
2082   tree low, high;
2083   bool is_bool, strict_overflow_p;
2084 
2085   r->exp = NULL_TREE;
2086   r->in_p = false;
2087   r->strict_overflow_p = false;
2088   r->low = NULL_TREE;
2089   r->high = NULL_TREE;
2090   if (exp != NULL_TREE
2091       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2092     return;
2093 
2094   /* Start with simply saying "EXP != 0" and then look at the code of EXP
2095      and see if we can refine the range.  Some of the cases below may not
2096      happen, but it doesn't seem worth worrying about this.  We "continue"
2097      the outer loop when we've changed something; otherwise we "break"
2098      the switch, which will "break" the while.  */
2099   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2100   high = low;
2101   in_p = 0;
2102   strict_overflow_p = false;
2103   is_bool = false;
2104   if (exp == NULL_TREE)
2105     is_bool = true;
2106   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2107     {
2108       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2109 	is_bool = true;
2110       else
2111 	return;
2112     }
2113   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2114     is_bool = true;
2115 
2116   while (1)
2117     {
2118       enum tree_code code;
2119       tree arg0, arg1, exp_type;
2120       tree nexp;
2121       location_t loc;
2122 
2123       if (exp != NULL_TREE)
2124 	{
2125 	  if (TREE_CODE (exp) != SSA_NAME
2126 	      || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2127 	    break;
2128 
2129 	  stmt = SSA_NAME_DEF_STMT (exp);
2130 	  if (!is_gimple_assign (stmt))
2131 	    break;
2132 
2133 	  code = gimple_assign_rhs_code (stmt);
2134 	  arg0 = gimple_assign_rhs1 (stmt);
2135 	  arg1 = gimple_assign_rhs2 (stmt);
2136 	  exp_type = TREE_TYPE (exp);
2137 	}
2138       else
2139 	{
2140 	  code = gimple_cond_code (stmt);
2141 	  arg0 = gimple_cond_lhs (stmt);
2142 	  arg1 = gimple_cond_rhs (stmt);
2143 	  exp_type = boolean_type_node;
2144 	}
2145 
2146       if (TREE_CODE (arg0) != SSA_NAME
2147 	  || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2148 	break;
2149       loc = gimple_location (stmt);
2150       switch (code)
2151 	{
2152 	case BIT_NOT_EXPR:
2153 	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2154 	      /* Ensure the range is either +[-,0], +[0,0],
2155 		 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2156 		 -[1,1].  If it is e.g. +[-,-] or -[-,-]
2157 		 or similar expression of unconditional true or
2158 		 false, it should not be negated.  */
2159 	      && ((high && integer_zerop (high))
2160 		  || (low && integer_onep (low))))
2161 	    {
2162 	      in_p = !in_p;
2163 	      exp = arg0;
2164 	      continue;
2165 	    }
2166 	  break;
2167 	case SSA_NAME:
2168 	  exp = arg0;
2169 	  continue;
2170 	CASE_CONVERT:
2171 	  if (is_bool)
2172 	    {
2173 	      if ((TYPE_PRECISION (exp_type) == 1
2174 		   || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2175 		  && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2176 		return;
2177 	    }
2178 	  else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2179 	    {
2180 	      if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2181 		is_bool = true;
2182 	      else
2183 		return;
2184 	    }
2185 	  else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2186 	    is_bool = true;
2187 	  goto do_default;
2188 	case EQ_EXPR:
2189 	case NE_EXPR:
2190 	case LT_EXPR:
2191 	case LE_EXPR:
2192 	case GE_EXPR:
2193 	case GT_EXPR:
2194 	  is_bool = true;
2195 	  /* FALLTHRU */
2196 	default:
2197 	  if (!is_bool)
2198 	    return;
2199 	do_default:
2200 	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2201 				  &low, &high, &in_p,
2202 				  &strict_overflow_p);
2203 	  if (nexp != NULL_TREE)
2204 	    {
2205 	      exp = nexp;
2206 	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
2207 	      continue;
2208 	    }
2209 	  break;
2210 	}
2211       break;
2212     }
2213   if (is_bool)
2214     {
2215       r->exp = exp;
2216       r->in_p = in_p;
2217       r->low = low;
2218       r->high = high;
2219       r->strict_overflow_p = strict_overflow_p;
2220     }
2221 }
2222 
2223 /* Comparison function for qsort.  Sort entries
2224    without SSA_NAME exp first, then with SSA_NAMEs sorted
2225    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2226    by increasing ->low and if ->low is the same, by increasing
2227    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2228    maximum.  */
2229 
2230 static int
range_entry_cmp(const void * a,const void * b)2231 range_entry_cmp (const void *a, const void *b)
2232 {
2233   const struct range_entry *p = (const struct range_entry *) a;
2234   const struct range_entry *q = (const struct range_entry *) b;
2235 
2236   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2237     {
2238       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2239 	{
2240 	  /* Group range_entries for the same SSA_NAME together.  */
2241 	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2242 	    return -1;
2243 	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2244 	    return 1;
2245 	  /* If ->low is different, NULL low goes first, then by
2246 	     ascending low.  */
2247 	  if (p->low != NULL_TREE)
2248 	    {
2249 	      if (q->low != NULL_TREE)
2250 		{
2251 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2252 					  p->low, q->low);
2253 		  if (tem && integer_onep (tem))
2254 		    return -1;
2255 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2256 				     p->low, q->low);
2257 		  if (tem && integer_onep (tem))
2258 		    return 1;
2259 		}
2260 	      else
2261 		return 1;
2262 	    }
2263 	  else if (q->low != NULL_TREE)
2264 	    return -1;
2265 	  /* If ->high is different, NULL high goes last, before that by
2266 	     ascending high.  */
2267 	  if (p->high != NULL_TREE)
2268 	    {
2269 	      if (q->high != NULL_TREE)
2270 		{
2271 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2272 					  p->high, q->high);
2273 		  if (tem && integer_onep (tem))
2274 		    return -1;
2275 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2276 				     p->high, q->high);
2277 		  if (tem && integer_onep (tem))
2278 		    return 1;
2279 		}
2280 	      else
2281 		return -1;
2282 	    }
2283 	  else if (q->high != NULL_TREE)
2284 	    return 1;
2285 	  /* If both ranges are the same, sort below by ascending idx.  */
2286 	}
2287       else
2288 	return 1;
2289     }
2290   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2291     return -1;
2292 
2293   if (p->idx < q->idx)
2294     return -1;
2295   else
2296     {
2297       gcc_checking_assert (p->idx > q->idx);
2298       return 1;
2299     }
2300 }
2301 
2302 /* Helper function for update_range_test.  Force EXPR into an SSA_NAME,
2303    insert needed statements BEFORE or after GSI.  */
2304 
2305 static tree
force_into_ssa_name(gimple_stmt_iterator * gsi,tree expr,bool before)2306 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2307 {
2308   enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2309   tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2310   if (TREE_CODE (ret) != SSA_NAME)
2311     {
2312       gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2313       if (before)
2314 	gsi_insert_before (gsi, g, GSI_SAME_STMT);
2315       else
2316 	gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2317       ret = gimple_assign_lhs (g);
2318     }
2319   return ret;
2320 }
2321 
2322 /* Helper routine of optimize_range_test.
2323    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2324    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2325    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2326    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2327    an array of COUNT pointers to other ranges.  Return
2328    true if the range merge has been successful.
2329    If OPCODE is ERROR_MARK, this is called from within
2330    maybe_optimize_range_tests and is performing inter-bb range optimization.
2331    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2332    oe->rank.  */
2333 
2334 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,struct range_entry ** otherrangep,unsigned int count,enum tree_code opcode,vec<operand_entry * > * ops,tree exp,gimple_seq seq,bool in_p,tree low,tree high,bool strict_overflow_p)2335 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2336 		   struct range_entry **otherrangep,
2337 		   unsigned int count, enum tree_code opcode,
2338 		   vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2339 		   bool in_p, tree low, tree high, bool strict_overflow_p)
2340 {
2341   operand_entry *oe = (*ops)[range->idx];
2342   tree op = oe->op;
2343   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2344 		    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2345   location_t loc = gimple_location (stmt);
2346   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2347   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2348 				in_p, low, high);
2349   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2350   gimple_stmt_iterator gsi;
2351   unsigned int i, uid;
2352 
2353   if (tem == NULL_TREE)
2354     return false;
2355 
2356   /* If op is default def SSA_NAME, there is no place to insert the
2357      new comparison.  Give up, unless we can use OP itself as the
2358      range test.  */
2359   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2360     {
2361       if (op == range->exp
2362 	  && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2363 	      || TREE_CODE (optype) == BOOLEAN_TYPE)
2364 	  && (op == tem
2365 	      || (TREE_CODE (tem) == EQ_EXPR
2366 		  && TREE_OPERAND (tem, 0) == op
2367 		  && integer_onep (TREE_OPERAND (tem, 1))))
2368 	  && opcode != BIT_IOR_EXPR
2369 	  && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2370 	{
2371 	  stmt = NULL;
2372 	  tem = op;
2373 	}
2374       else
2375 	return false;
2376     }
2377 
2378   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2379     warning_at (loc, OPT_Wstrict_overflow,
2380 		"assuming signed overflow does not occur "
2381 		"when simplifying range test");
2382 
2383   if (dump_file && (dump_flags & TDF_DETAILS))
2384     {
2385       struct range_entry *r;
2386       fprintf (dump_file, "Optimizing range tests ");
2387       print_generic_expr (dump_file, range->exp);
2388       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2389       print_generic_expr (dump_file, range->low);
2390       fprintf (dump_file, ", ");
2391       print_generic_expr (dump_file, range->high);
2392       fprintf (dump_file, "]");
2393       for (i = 0; i < count; i++)
2394 	{
2395 	  if (otherrange)
2396 	    r = otherrange + i;
2397 	  else
2398 	    r = otherrangep[i];
2399 	  if (r->exp
2400 	      && r->exp != range->exp
2401 	      && TREE_CODE (r->exp) == SSA_NAME)
2402 	    {
2403 	      fprintf (dump_file, " and ");
2404 	      print_generic_expr (dump_file, r->exp);
2405 	    }
2406 	  else
2407 	    fprintf (dump_file, " and");
2408 	  fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2409 	  print_generic_expr (dump_file, r->low);
2410 	  fprintf (dump_file, ", ");
2411 	  print_generic_expr (dump_file, r->high);
2412 	  fprintf (dump_file, "]");
2413 	}
2414       fprintf (dump_file, "\n into ");
2415       print_generic_expr (dump_file, tem);
2416       fprintf (dump_file, "\n");
2417     }
2418 
2419   if (opcode == BIT_IOR_EXPR
2420       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2421     tem = invert_truthvalue_loc (loc, tem);
2422 
2423   tem = fold_convert_loc (loc, optype, tem);
2424   if (stmt)
2425     {
2426       gsi = gsi_for_stmt (stmt);
2427       uid = gimple_uid (stmt);
2428     }
2429   else
2430     {
2431       gsi = gsi_none ();
2432       uid = 0;
2433     }
2434   if (stmt == NULL)
2435     gcc_checking_assert (tem == op);
2436   /* In rare cases range->exp can be equal to lhs of stmt.
2437      In that case we have to insert after the stmt rather then before
2438      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2439   else if (op != range->exp)
2440     {
2441       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2442       tem = force_into_ssa_name (&gsi, tem, true);
2443       gsi_prev (&gsi);
2444     }
2445   else if (gimple_code (stmt) != GIMPLE_PHI)
2446     {
2447       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2448       tem = force_into_ssa_name (&gsi, tem, false);
2449     }
2450   else
2451     {
2452       gsi = gsi_after_labels (gimple_bb (stmt));
2453       if (!gsi_end_p (gsi))
2454 	uid = gimple_uid (gsi_stmt (gsi));
2455       else
2456 	{
2457 	  gsi = gsi_start_bb (gimple_bb (stmt));
2458 	  uid = 1;
2459 	  while (!gsi_end_p (gsi))
2460 	    {
2461 	      uid = gimple_uid (gsi_stmt (gsi));
2462 	      gsi_next (&gsi);
2463 	    }
2464 	}
2465       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2466       tem = force_into_ssa_name (&gsi, tem, true);
2467       if (gsi_end_p (gsi))
2468 	gsi = gsi_last_bb (gimple_bb (stmt));
2469       else
2470 	gsi_prev (&gsi);
2471     }
2472   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2473     if (gimple_uid (gsi_stmt (gsi)))
2474       break;
2475     else
2476       gimple_set_uid (gsi_stmt (gsi), uid);
2477 
2478   oe->op = tem;
2479   range->exp = exp;
2480   range->low = low;
2481   range->high = high;
2482   range->in_p = in_p;
2483   range->strict_overflow_p = false;
2484 
2485   for (i = 0; i < count; i++)
2486     {
2487       if (otherrange)
2488 	range = otherrange + i;
2489       else
2490 	range = otherrangep[i];
2491       oe = (*ops)[range->idx];
2492       /* Now change all the other range test immediate uses, so that
2493 	 those tests will be optimized away.  */
2494       if (opcode == ERROR_MARK)
2495 	{
2496 	  if (oe->op)
2497 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
2498 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
2499 	  else
2500 	    oe->op = (oe->rank == BIT_IOR_EXPR
2501 		      ? boolean_false_node : boolean_true_node);
2502 	}
2503       else
2504 	oe->op = error_mark_node;
2505       range->exp = NULL_TREE;
2506       range->low = NULL_TREE;
2507       range->high = NULL_TREE;
2508     }
2509   return true;
2510 }
2511 
2512 /* Optimize X == CST1 || X == CST2
2513    if popcount (CST1 ^ CST2) == 1 into
2514    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2515    Similarly for ranges.  E.g.
2516    X != 2 && X != 3 && X != 10 && X != 11
2517    will be transformed by the previous optimization into
2518    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2519    and this loop can transform that into
2520    !(((X & ~8) - 2U) <= 1U).  */
2521 
2522 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2523 optimize_range_tests_xor (enum tree_code opcode, tree type,
2524 			  tree lowi, tree lowj, tree highi, tree highj,
2525 			  vec<operand_entry *> *ops,
2526 			  struct range_entry *rangei,
2527 			  struct range_entry *rangej)
2528 {
2529   tree lowxor, highxor, tem, exp;
2530   /* Check lowi ^ lowj == highi ^ highj and
2531      popcount (lowi ^ lowj) == 1.  */
2532   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2533   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2534     return false;
2535   if (!integer_pow2p (lowxor))
2536     return false;
2537   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2538   if (!tree_int_cst_equal (lowxor, highxor))
2539     return false;
2540 
2541   exp = rangei->exp;
2542   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2543   int prec = GET_MODE_PRECISION (mode);
2544   if (TYPE_PRECISION (type) < prec
2545       || (wi::to_wide (TYPE_MIN_VALUE (type))
2546 	  != wi::min_value (prec, TYPE_SIGN (type)))
2547       || (wi::to_wide (TYPE_MAX_VALUE (type))
2548 	  != wi::max_value (prec, TYPE_SIGN (type))))
2549     {
2550       type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2551       exp = fold_convert (type, exp);
2552       lowxor = fold_convert (type, lowxor);
2553       lowi = fold_convert (type, lowi);
2554       highi = fold_convert (type, highi);
2555     }
2556   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2557   exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2558   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2559   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2560   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2561 			 NULL, rangei->in_p, lowj, highj,
2562 			 rangei->strict_overflow_p
2563 			 || rangej->strict_overflow_p))
2564     return true;
2565   return false;
2566 }
2567 
2568 /* Optimize X == CST1 || X == CST2
2569    if popcount (CST2 - CST1) == 1 into
2570    ((X - CST1) & ~(CST2 - CST1)) == 0.
2571    Similarly for ranges.  E.g.
2572    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2573    || X == 75 || X == 45
2574    will be transformed by the previous optimization into
2575    (X - 43U) <= 3U || (X - 75U) <= 3U
2576    and this loop can transform that into
2577    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2578 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2579 optimize_range_tests_diff (enum tree_code opcode, tree type,
2580 			   tree lowi, tree lowj, tree highi, tree highj,
2581 			   vec<operand_entry *> *ops,
2582 			   struct range_entry *rangei,
2583 			   struct range_entry *rangej)
2584 {
2585   tree tem1, tem2, mask;
2586   /* Check highi - lowi == highj - lowj.  */
2587   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2588   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2589     return false;
2590   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2591   if (!tree_int_cst_equal (tem1, tem2))
2592     return false;
2593   /* Check popcount (lowj - lowi) == 1.  */
2594   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2595   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2596     return false;
2597   if (!integer_pow2p (tem1))
2598     return false;
2599 
2600   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2601   int prec = GET_MODE_PRECISION (mode);
2602   if (TYPE_PRECISION (type) < prec
2603       || (wi::to_wide (TYPE_MIN_VALUE (type))
2604 	  != wi::min_value (prec, TYPE_SIGN (type)))
2605       || (wi::to_wide (TYPE_MAX_VALUE (type))
2606 	  != wi::max_value (prec, TYPE_SIGN (type))))
2607     type = build_nonstandard_integer_type (prec, 1);
2608   else
2609     type = unsigned_type_for (type);
2610   tem1 = fold_convert (type, tem1);
2611   tem2 = fold_convert (type, tem2);
2612   lowi = fold_convert (type, lowi);
2613   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2614   tem1 = fold_build2 (MINUS_EXPR, type,
2615 		      fold_convert (type, rangei->exp), lowi);
2616   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2617   lowj = build_int_cst (type, 0);
2618   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2619 			 NULL, rangei->in_p, lowj, tem2,
2620 			 rangei->strict_overflow_p
2621 			 || rangej->strict_overflow_p))
2622     return true;
2623   return false;
2624 }
2625 
2626 /* It does some common checks for function optimize_range_tests_xor and
2627    optimize_range_tests_diff.
2628    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2629    Else it calls optimize_range_tests_diff.  */
2630 
2631 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry * > * ops,struct range_entry * ranges)2632 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2633 			bool optimize_xor, vec<operand_entry *> *ops,
2634 			struct range_entry *ranges)
2635 {
2636   int i, j;
2637   bool any_changes = false;
2638   for (i = first; i < length; i++)
2639     {
2640       tree lowi, highi, lowj, highj, type, tem;
2641 
2642       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2643 	continue;
2644       type = TREE_TYPE (ranges[i].exp);
2645       if (!INTEGRAL_TYPE_P (type))
2646 	continue;
2647       lowi = ranges[i].low;
2648       if (lowi == NULL_TREE)
2649 	lowi = TYPE_MIN_VALUE (type);
2650       highi = ranges[i].high;
2651       if (highi == NULL_TREE)
2652 	continue;
2653       for (j = i + 1; j < length && j < i + 64; j++)
2654 	{
2655 	  bool changes;
2656 	  if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2657 	    continue;
2658 	  lowj = ranges[j].low;
2659 	  if (lowj == NULL_TREE)
2660 	    continue;
2661 	  highj = ranges[j].high;
2662 	  if (highj == NULL_TREE)
2663 	    highj = TYPE_MAX_VALUE (type);
2664 	  /* Check lowj > highi.  */
2665 	  tem = fold_binary (GT_EXPR, boolean_type_node,
2666 			     lowj, highi);
2667 	  if (tem == NULL_TREE || !integer_onep (tem))
2668 	    continue;
2669 	  if (optimize_xor)
2670 	    changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2671 						highi, highj, ops,
2672 						ranges + i, ranges + j);
2673 	  else
2674 	    changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2675 						 highi, highj, ops,
2676 						 ranges + i, ranges + j);
2677 	  if (changes)
2678 	    {
2679 	      any_changes = true;
2680 	      break;
2681 	    }
2682 	}
2683     }
2684   return any_changes;
2685 }
2686 
2687 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
2688    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2689    EXP on success, NULL otherwise.  */
2690 
2691 static tree
extract_bit_test_mask(tree exp,int prec,tree totallow,tree low,tree high,wide_int * mask,tree * totallowp)2692 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2693 		       wide_int *mask, tree *totallowp)
2694 {
2695   tree tem = int_const_binop (MINUS_EXPR, high, low);
2696   if (tem == NULL_TREE
2697       || TREE_CODE (tem) != INTEGER_CST
2698       || TREE_OVERFLOW (tem)
2699       || tree_int_cst_sgn (tem) == -1
2700       || compare_tree_int (tem, prec) != -1)
2701     return NULL_TREE;
2702 
2703   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2704   *mask = wi::shifted_mask (0, max, false, prec);
2705   if (TREE_CODE (exp) == BIT_AND_EXPR
2706       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2707     {
2708       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2709       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2710       if (wi::popcount (msk) == 1
2711 	  && wi::ltu_p (msk, prec - max))
2712 	{
2713 	  *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2714 	  max += msk.to_uhwi ();
2715 	  exp = TREE_OPERAND (exp, 0);
2716 	  if (integer_zerop (low)
2717 	      && TREE_CODE (exp) == PLUS_EXPR
2718 	      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2719 	    {
2720 	      tree ret = TREE_OPERAND (exp, 0);
2721 	      STRIP_NOPS (ret);
2722 	      widest_int bias
2723 	        = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2724 				     TYPE_PRECISION (TREE_TYPE (low))));
2725 	      tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2726 	      if (totallowp)
2727 		{
2728 		  *totallowp = tbias;
2729 		  return ret;
2730 		}
2731 	      else if (!tree_int_cst_lt (totallow, tbias))
2732 		return NULL_TREE;
2733 	      bias = wi::to_widest (tbias);
2734 	      bias -= wi::to_widest (totallow);
2735 	      if (bias >= 0 && bias < prec - max)
2736 		{
2737 		  *mask = wi::lshift (*mask, bias);
2738 		  return ret;
2739 		}
2740 	    }
2741 	}
2742     }
2743   if (totallowp)
2744     return exp;
2745   if (!tree_int_cst_lt (totallow, low))
2746     return exp;
2747   tem = int_const_binop (MINUS_EXPR, low, totallow);
2748   if (tem == NULL_TREE
2749       || TREE_CODE (tem) != INTEGER_CST
2750       || TREE_OVERFLOW (tem)
2751       || compare_tree_int (tem, prec - max) == 1)
2752     return NULL_TREE;
2753 
2754   *mask = wi::lshift (*mask, wi::to_widest (tem));
2755   return exp;
2756 }
2757 
2758 /* Attempt to optimize small range tests using bit test.
2759    E.g.
2760    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2761    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2762    has been by earlier optimizations optimized into:
2763    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2764    As all the 43 through 82 range is less than 64 numbers,
2765    for 64-bit word targets optimize that into:
2766    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
2767 
2768 static bool
optimize_range_tests_to_bit_test(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)2769 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2770 				  vec<operand_entry *> *ops,
2771 				  struct range_entry *ranges)
2772 {
2773   int i, j;
2774   bool any_changes = false;
2775   int prec = GET_MODE_BITSIZE (word_mode);
2776   auto_vec<struct range_entry *, 64> candidates;
2777 
2778   for (i = first; i < length - 2; i++)
2779     {
2780       tree lowi, highi, lowj, highj, type;
2781 
2782       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2783 	continue;
2784       type = TREE_TYPE (ranges[i].exp);
2785       if (!INTEGRAL_TYPE_P (type))
2786 	continue;
2787       lowi = ranges[i].low;
2788       if (lowi == NULL_TREE)
2789 	lowi = TYPE_MIN_VALUE (type);
2790       highi = ranges[i].high;
2791       if (highi == NULL_TREE)
2792 	continue;
2793       wide_int mask;
2794       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2795 					highi, &mask, &lowi);
2796       if (exp == NULL_TREE)
2797 	continue;
2798       bool strict_overflow_p = ranges[i].strict_overflow_p;
2799       candidates.truncate (0);
2800       int end = MIN (i + 64, length);
2801       for (j = i + 1; j < end; j++)
2802 	{
2803 	  tree exp2;
2804 	  if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2805 	    continue;
2806 	  if (ranges[j].exp == exp)
2807 	    ;
2808 	  else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2809 	    {
2810 	      exp2 = TREE_OPERAND (ranges[j].exp, 0);
2811 	      if (exp2 == exp)
2812 		;
2813 	      else if (TREE_CODE (exp2) == PLUS_EXPR)
2814 		{
2815 		  exp2 = TREE_OPERAND (exp2, 0);
2816 		  STRIP_NOPS (exp2);
2817 		  if (exp2 != exp)
2818 		    continue;
2819 		}
2820 	      else
2821 		continue;
2822 	    }
2823 	  else
2824 	    continue;
2825 	  lowj = ranges[j].low;
2826 	  if (lowj == NULL_TREE)
2827 	    continue;
2828 	  highj = ranges[j].high;
2829 	  if (highj == NULL_TREE)
2830 	    highj = TYPE_MAX_VALUE (type);
2831 	  wide_int mask2;
2832 	  exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2833 					highj, &mask2, NULL);
2834 	  if (exp2 != exp)
2835 	    continue;
2836 	  mask |= mask2;
2837 	  strict_overflow_p |= ranges[j].strict_overflow_p;
2838 	  candidates.safe_push (&ranges[j]);
2839 	}
2840 
2841       /* If we need otherwise 3 or more comparisons, use a bit test.  */
2842       if (candidates.length () >= 2)
2843 	{
2844 	  tree high = wide_int_to_tree (TREE_TYPE (lowi),
2845 					wi::to_widest (lowi)
2846 					+ prec - 1 - wi::clz (mask));
2847 	  operand_entry *oe = (*ops)[ranges[i].idx];
2848 	  tree op = oe->op;
2849 	  gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2850 			    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2851 	  location_t loc = gimple_location (stmt);
2852 	  tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2853 
2854 	  /* See if it isn't cheaper to pretend the minimum value of the
2855 	     range is 0, if maximum value is small enough.
2856 	     We can avoid then subtraction of the minimum value, but the
2857 	     mask constant could be perhaps more expensive.  */
2858 	  if (compare_tree_int (lowi, 0) > 0
2859 	      && compare_tree_int (high, prec) < 0)
2860 	    {
2861 	      int cost_diff;
2862 	      HOST_WIDE_INT m = tree_to_uhwi (lowi);
2863 	      rtx reg = gen_raw_REG (word_mode, 10000);
2864 	      bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2865 	      cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2866 						      GEN_INT (-m)), speed_p);
2867 	      rtx r = immed_wide_int_const (mask, word_mode);
2868 	      cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2869 					 word_mode, speed_p);
2870 	      r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2871 	      cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2872 					 word_mode, speed_p);
2873 	      if (cost_diff > 0)
2874 		{
2875 		  mask = wi::lshift (mask, m);
2876 		  lowi = build_zero_cst (TREE_TYPE (lowi));
2877 		}
2878 	    }
2879 
2880 	  tree tem = build_range_check (loc, optype, unshare_expr (exp),
2881 					false, lowi, high);
2882 	  if (tem == NULL_TREE || is_gimple_val (tem))
2883 	    continue;
2884 	  tree etype = unsigned_type_for (TREE_TYPE (exp));
2885 	  exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2886 				 fold_convert_loc (loc, etype, exp),
2887 				 fold_convert_loc (loc, etype, lowi));
2888 	  exp = fold_convert_loc (loc, integer_type_node, exp);
2889 	  tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2890 	  exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2891 				 build_int_cst (word_type, 1), exp);
2892 	  exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2893 				 wide_int_to_tree (word_type, mask));
2894 	  exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2895 				 build_zero_cst (word_type));
2896 	  if (is_gimple_val (exp))
2897 	    continue;
2898 
2899 	  /* The shift might have undefined behavior if TEM is true,
2900 	     but reassociate_bb isn't prepared to have basic blocks
2901 	     split when it is running.  So, temporarily emit a code
2902 	     with BIT_IOR_EXPR instead of &&, and fix it up in
2903 	     branch_fixup.  */
2904 	  gimple_seq seq;
2905 	  tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2906 	  gcc_assert (TREE_CODE (tem) == SSA_NAME);
2907 	  gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2908 	  gimple_seq seq2;
2909 	  exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2910 	  gimple_seq_add_seq_without_update (&seq, seq2);
2911 	  gcc_assert (TREE_CODE (exp) == SSA_NAME);
2912 	  gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2913 	  gimple *g = gimple_build_assign (make_ssa_name (optype),
2914 					   BIT_IOR_EXPR, tem, exp);
2915 	  gimple_set_location (g, loc);
2916 	  gimple_seq_add_stmt_without_update (&seq, g);
2917 	  exp = gimple_assign_lhs (g);
2918 	  tree val = build_zero_cst (optype);
2919 	  if (update_range_test (&ranges[i], NULL, candidates.address (),
2920 				 candidates.length (), opcode, ops, exp,
2921 				 seq, false, val, val, strict_overflow_p))
2922 	    {
2923 	      any_changes = true;
2924 	      reassoc_branch_fixups.safe_push (tem);
2925 	    }
2926 	  else
2927 	    gimple_seq_discard (seq);
2928 	}
2929     }
2930   return any_changes;
2931 }
2932 
2933 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
2934    and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.  */
2935 
2936 static bool
optimize_range_tests_cmp_bitwise(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)2937 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
2938 				  vec<operand_entry *> *ops,
2939 				  struct range_entry *ranges)
2940 {
2941   int i;
2942   unsigned int b;
2943   bool any_changes = false;
2944   auto_vec<int, 128> buckets;
2945   auto_vec<int, 32> chains;
2946   auto_vec<struct range_entry *, 32> candidates;
2947 
2948   for (i = first; i < length; i++)
2949     {
2950       if (ranges[i].exp == NULL_TREE
2951 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
2952 	  || !ranges[i].in_p
2953 	  || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
2954 	  || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
2955 	  || ranges[i].low == NULL_TREE
2956 	  || ranges[i].low != ranges[i].high)
2957 	continue;
2958 
2959       bool zero_p = integer_zerop (ranges[i].low);
2960       if (!zero_p && !integer_all_onesp (ranges[i].low))
2961 	continue;
2962 
2963       b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
2964       if (buckets.length () <= b)
2965 	buckets.safe_grow_cleared (b + 1);
2966       if (chains.length () <= (unsigned) i)
2967 	chains.safe_grow (i + 1);
2968       chains[i] = buckets[b];
2969       buckets[b] = i + 1;
2970     }
2971 
2972   FOR_EACH_VEC_ELT (buckets, b, i)
2973     if (i && chains[i - 1])
2974       {
2975 	int j, k = i;
2976 	for (j = chains[i - 1]; j; j = chains[j - 1])
2977 	  {
2978 	    gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
2979 	    gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
2980 	    if (reassoc_stmt_dominates_stmt_p (gk, gj))
2981 	      k = j;
2982 	  }
2983 	tree type1 = TREE_TYPE (ranges[k - 1].exp);
2984 	tree type2 = NULL_TREE;
2985 	bool strict_overflow_p = false;
2986 	candidates.truncate (0);
2987 	for (j = i; j; j = chains[j - 1])
2988 	  {
2989 	    tree type = TREE_TYPE (ranges[j - 1].exp);
2990 	    strict_overflow_p |= ranges[j - 1].strict_overflow_p;
2991 	    if (j == k
2992 		|| useless_type_conversion_p (type1, type))
2993 	      ;
2994 	    else if (type2 == NULL_TREE
2995 		     || useless_type_conversion_p (type2, type))
2996 	      {
2997 		if (type2 == NULL_TREE)
2998 		  type2 = type;
2999 		candidates.safe_push (&ranges[j - 1]);
3000 	      }
3001 	  }
3002 	unsigned l = candidates.length ();
3003 	for (j = i; j; j = chains[j - 1])
3004 	  {
3005 	    tree type = TREE_TYPE (ranges[j - 1].exp);
3006 	    if (j == k)
3007 	      continue;
3008 	    if (useless_type_conversion_p (type1, type))
3009 	      ;
3010 	    else if (type2 == NULL_TREE
3011 		     || useless_type_conversion_p (type2, type))
3012 	      continue;
3013 	    candidates.safe_push (&ranges[j - 1]);
3014 	  }
3015 	gimple_seq seq = NULL;
3016 	tree op = NULL_TREE;
3017 	unsigned int id;
3018 	struct range_entry *r;
3019 	candidates.safe_push (&ranges[k - 1]);
3020 	FOR_EACH_VEC_ELT (candidates, id, r)
3021 	  {
3022 	    gimple *g;
3023 	    if (id == 0)
3024 	      {
3025 		op = r->exp;
3026 		continue;
3027 	      }
3028 	    if (id == l)
3029 	      {
3030 		g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3031 		gimple_seq_add_stmt_without_update (&seq, g);
3032 		op = gimple_assign_lhs (g);
3033 	      }
3034 	    tree type = TREE_TYPE (r->exp);
3035 	    tree exp = r->exp;
3036 	    if (id >= l && !useless_type_conversion_p (type1, type))
3037 	      {
3038 		g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3039 		gimple_seq_add_stmt_without_update (&seq, g);
3040 		exp = gimple_assign_lhs (g);
3041 	      }
3042 	    g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3043 				     (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3044 				     op, exp);
3045 	    gimple_seq_add_stmt_without_update (&seq, g);
3046 	    op = gimple_assign_lhs (g);
3047 	  }
3048 	candidates.pop ();
3049 	if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3050 			       candidates.length (), opcode, ops, op,
3051 			       seq, true, ranges[k - 1].low,
3052 			       ranges[k - 1].low, strict_overflow_p))
3053 	  any_changes = true;
3054 	else
3055 	  gimple_seq_discard (seq);
3056       }
3057 
3058   return any_changes;
3059 }
3060 
3061 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3062    a >= 0 && a < b into (unsigned) a < (unsigned) b
3063    a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
3064 
3065 static bool
optimize_range_tests_var_bound(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges,basic_block first_bb)3066 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3067 				vec<operand_entry *> *ops,
3068 				struct range_entry *ranges,
3069 				basic_block first_bb)
3070 {
3071   int i;
3072   bool any_changes = false;
3073   hash_map<tree, int> *map = NULL;
3074 
3075   for (i = first; i < length; i++)
3076     {
3077       if (ranges[i].exp == NULL_TREE
3078 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
3079 	  || !ranges[i].in_p)
3080 	continue;
3081 
3082       tree type = TREE_TYPE (ranges[i].exp);
3083       if (!INTEGRAL_TYPE_P (type)
3084 	  || TYPE_UNSIGNED (type)
3085 	  || ranges[i].low == NULL_TREE
3086 	  || !integer_zerop (ranges[i].low)
3087 	  || ranges[i].high != NULL_TREE)
3088 	continue;
3089       /* EXP >= 0 here.  */
3090       if (map == NULL)
3091 	map = new hash_map <tree, int>;
3092       map->put (ranges[i].exp, i);
3093     }
3094 
3095   if (map == NULL)
3096     return false;
3097 
3098   for (i = 0; i < length; i++)
3099     {
3100       bool in_p = ranges[i].in_p;
3101       if (ranges[i].low == NULL_TREE
3102 	  || ranges[i].high == NULL_TREE)
3103 	continue;
3104       if (!integer_zerop (ranges[i].low)
3105 	  || !integer_zerop (ranges[i].high))
3106 	{
3107 	  if (ranges[i].exp
3108 	      && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3109 	      && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3110 	      && integer_onep (ranges[i].low)
3111 	      && integer_onep (ranges[i].high))
3112 	    in_p = !in_p;
3113 	  else
3114 	    continue;
3115 	}
3116 
3117       gimple *stmt;
3118       tree_code ccode;
3119       tree rhs1, rhs2;
3120       if (ranges[i].exp)
3121 	{
3122 	  if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3123 	    continue;
3124 	  stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3125 	  if (!is_gimple_assign (stmt))
3126 	    continue;
3127 	  ccode = gimple_assign_rhs_code (stmt);
3128 	  rhs1 = gimple_assign_rhs1 (stmt);
3129 	  rhs2 = gimple_assign_rhs2 (stmt);
3130 	}
3131       else
3132 	{
3133 	  operand_entry *oe = (*ops)[ranges[i].idx];
3134 	  stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3135 	  if (gimple_code (stmt) != GIMPLE_COND)
3136 	    continue;
3137 	  ccode = gimple_cond_code (stmt);
3138 	  rhs1 = gimple_cond_lhs (stmt);
3139 	  rhs2 = gimple_cond_rhs (stmt);
3140 	}
3141 
3142       if (TREE_CODE (rhs1) != SSA_NAME
3143 	  || rhs2 == NULL_TREE
3144 	  || TREE_CODE (rhs2) != SSA_NAME)
3145 	continue;
3146 
3147       switch (ccode)
3148 	{
3149 	case GT_EXPR:
3150 	case GE_EXPR:
3151 	case LT_EXPR:
3152 	case LE_EXPR:
3153 	  break;
3154 	default:
3155 	  continue;
3156 	}
3157       if (in_p)
3158 	ccode = invert_tree_comparison (ccode, false);
3159       switch (ccode)
3160 	{
3161 	case GT_EXPR:
3162 	case GE_EXPR:
3163 	  std::swap (rhs1, rhs2);
3164 	  ccode = swap_tree_comparison (ccode);
3165 	  break;
3166 	case LT_EXPR:
3167 	case LE_EXPR:
3168 	  break;
3169 	default:
3170 	  gcc_unreachable ();
3171 	}
3172 
3173       int *idx = map->get (rhs1);
3174       if (idx == NULL)
3175 	continue;
3176 
3177       /* maybe_optimize_range_tests allows statements without side-effects
3178 	 in the basic blocks as long as they are consumed in the same bb.
3179 	 Make sure rhs2's def stmt is not among them, otherwise we can't
3180 	 use safely get_nonzero_bits on it.  E.g. in:
3181 	  # RANGE [-83, 1] NONZERO 173
3182 	  # k_32 = PHI <k_47(13), k_12(9)>
3183 	 ...
3184 	  if (k_32 >= 0)
3185 	    goto <bb 5>; [26.46%]
3186 	  else
3187 	    goto <bb 9>; [73.54%]
3188 
3189 	  <bb 5> [local count: 140323371]:
3190 	  # RANGE [0, 1] NONZERO 1
3191 	  _5 = (int) k_32;
3192 	  # RANGE [0, 4] NONZERO 4
3193 	  _21 = _5 << 2;
3194 	  # RANGE [0, 4] NONZERO 4
3195 	  iftmp.0_44 = (char) _21;
3196 	  if (k_32 < iftmp.0_44)
3197 	    goto <bb 6>; [84.48%]
3198 	  else
3199 	    goto <bb 9>; [15.52%]
3200 	 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3201 	 k_32 >= 0.  If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3202 	 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3203 	 those stmts even for negative k_32 and the value ranges would be no
3204 	 longer guaranteed and so the optimization would be invalid.  */
3205       while (opcode == ERROR_MARK)
3206 	{
3207 	  gimple *g = SSA_NAME_DEF_STMT (rhs2);
3208 	  basic_block bb2 = gimple_bb (g);
3209 	  if (bb2
3210 	      && bb2 != first_bb
3211 	      && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3212 	    {
3213 	      /* As an exception, handle a few common cases.  */
3214 	      if (gimple_assign_cast_p (g)
3215 		  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3216 		{
3217 		  tree op0 = gimple_assign_rhs1 (g);
3218 		  if (TYPE_UNSIGNED (TREE_TYPE (op0))
3219 		      && (TYPE_PRECISION (TREE_TYPE (rhs2))
3220 			  > TYPE_PRECISION (TREE_TYPE (op0))))
3221 		    /* Zero-extension is always ok.  */
3222 		    break;
3223 		  else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3224 			   == TYPE_PRECISION (TREE_TYPE (op0))
3225 			   && TREE_CODE (op0) == SSA_NAME)
3226 		    {
3227 		      /* Cast from signed to unsigned or vice versa.  Retry
3228 			 with the op0 as new rhs2.  */
3229 		      rhs2 = op0;
3230 		      continue;
3231 		    }
3232 		}
3233 	      else if (is_gimple_assign (g)
3234 		       && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3235 		       && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3236 		       && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3237 		/* Masking with INTEGER_CST with MSB clear is always ok
3238 		   too.  */
3239 		break;
3240 	      rhs2 = NULL_TREE;
3241 	    }
3242 	  break;
3243 	}
3244       if (rhs2 == NULL_TREE)
3245 	continue;
3246 
3247       wide_int nz = get_nonzero_bits (rhs2);
3248       if (wi::neg_p (nz))
3249 	continue;
3250 
3251       /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3252 	 and RHS2 is known to be RHS2 >= 0.  */
3253       tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3254 
3255       enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3256       if ((ranges[*idx].strict_overflow_p
3257 	   || ranges[i].strict_overflow_p)
3258 	  && issue_strict_overflow_warning (wc))
3259 	warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3260 		    "assuming signed overflow does not occur "
3261 		    "when simplifying range test");
3262 
3263       if (dump_file && (dump_flags & TDF_DETAILS))
3264 	{
3265 	  struct range_entry *r = &ranges[*idx];
3266 	  fprintf (dump_file, "Optimizing range test ");
3267 	  print_generic_expr (dump_file, r->exp);
3268 	  fprintf (dump_file, " +[");
3269 	  print_generic_expr (dump_file, r->low);
3270 	  fprintf (dump_file, ", ");
3271 	  print_generic_expr (dump_file, r->high);
3272 	  fprintf (dump_file, "] and comparison ");
3273 	  print_generic_expr (dump_file, rhs1);
3274 	  fprintf (dump_file, " %s ", op_symbol_code (ccode));
3275 	  print_generic_expr (dump_file, rhs2);
3276 	  fprintf (dump_file, "\n into (");
3277 	  print_generic_expr (dump_file, utype);
3278 	  fprintf (dump_file, ") ");
3279 	  print_generic_expr (dump_file, rhs1);
3280 	  fprintf (dump_file, " %s (", op_symbol_code (ccode));
3281 	  print_generic_expr (dump_file, utype);
3282 	  fprintf (dump_file, ") ");
3283 	  print_generic_expr (dump_file, rhs2);
3284 	  fprintf (dump_file, "\n");
3285 	}
3286 
3287       operand_entry *oe = (*ops)[ranges[i].idx];
3288       ranges[i].in_p = 0;
3289       if (opcode == BIT_IOR_EXPR
3290 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3291 	{
3292 	  ranges[i].in_p = 1;
3293 	  ccode = invert_tree_comparison (ccode, false);
3294 	}
3295 
3296       unsigned int uid = gimple_uid (stmt);
3297       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3298       gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3299       gimple_set_uid (g, uid);
3300       rhs1 = gimple_assign_lhs (g);
3301       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3302       if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3303 	{
3304 	  g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3305 	  gimple_set_uid (g, uid);
3306 	  rhs2 = gimple_assign_lhs (g);
3307 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3308 	}
3309       if (tree_swap_operands_p (rhs1, rhs2))
3310 	{
3311 	  std::swap (rhs1, rhs2);
3312 	  ccode = swap_tree_comparison (ccode);
3313 	}
3314       if (gimple_code (stmt) == GIMPLE_COND)
3315 	{
3316 	  gcond *c = as_a <gcond *> (stmt);
3317 	  gimple_cond_set_code (c, ccode);
3318 	  gimple_cond_set_lhs (c, rhs1);
3319 	  gimple_cond_set_rhs (c, rhs2);
3320 	  update_stmt (stmt);
3321 	}
3322       else
3323 	{
3324 	  tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3325 	  if (!INTEGRAL_TYPE_P (ctype)
3326 	      || (TREE_CODE (ctype) != BOOLEAN_TYPE
3327 		  && TYPE_PRECISION (ctype) != 1))
3328 	    ctype = boolean_type_node;
3329 	  g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3330 	  gimple_set_uid (g, uid);
3331 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3332 	  if (oe->op && ctype != TREE_TYPE (oe->op))
3333 	    {
3334 	      g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3335 				       NOP_EXPR, gimple_assign_lhs (g));
3336 	      gimple_set_uid (g, uid);
3337 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3338 	    }
3339 	  ranges[i].exp = gimple_assign_lhs (g);
3340 	  oe->op = ranges[i].exp;
3341 	  ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3342 	  ranges[i].high = ranges[i].low;
3343 	}
3344       ranges[i].strict_overflow_p = false;
3345       oe = (*ops)[ranges[*idx].idx];
3346       /* Now change all the other range test immediate uses, so that
3347 	 those tests will be optimized away.  */
3348       if (opcode == ERROR_MARK)
3349 	{
3350 	  if (oe->op)
3351 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
3352 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
3353 	  else
3354 	    oe->op = (oe->rank == BIT_IOR_EXPR
3355 		      ? boolean_false_node : boolean_true_node);
3356 	}
3357       else
3358 	oe->op = error_mark_node;
3359       ranges[*idx].exp = NULL_TREE;
3360       ranges[*idx].low = NULL_TREE;
3361       ranges[*idx].high = NULL_TREE;
3362       any_changes = true;
3363     }
3364 
3365   delete map;
3366   return any_changes;
3367 }
3368 
3369 /* Optimize range tests, similarly how fold_range_test optimizes
3370    it on trees.  The tree code for the binary
3371    operation between all the operands is OPCODE.
3372    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3373    maybe_optimize_range_tests for inter-bb range optimization.
3374    In that case if oe->op is NULL, oe->id is bb->index whose
3375    GIMPLE_COND is && or ||ed into the test, and oe->rank says
3376    the actual opcode.
3377    FIRST_BB is the first basic block if OPCODE is ERROR_MARK.  */
3378 
3379 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry * > * ops,basic_block first_bb)3380 optimize_range_tests (enum tree_code opcode,
3381 		      vec<operand_entry *> *ops, basic_block first_bb)
3382 {
3383   unsigned int length = ops->length (), i, j, first;
3384   operand_entry *oe;
3385   struct range_entry *ranges;
3386   bool any_changes = false;
3387 
3388   if (length == 1)
3389     return false;
3390 
3391   ranges = XNEWVEC (struct range_entry, length);
3392   for (i = 0; i < length; i++)
3393     {
3394       oe = (*ops)[i];
3395       ranges[i].idx = i;
3396       init_range_entry (ranges + i, oe->op,
3397 			oe->op
3398 			? NULL
3399 			: last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3400       /* For | invert it now, we will invert it again before emitting
3401 	 the optimized expression.  */
3402       if (opcode == BIT_IOR_EXPR
3403 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3404 	ranges[i].in_p = !ranges[i].in_p;
3405     }
3406 
3407   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3408   for (i = 0; i < length; i++)
3409     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3410       break;
3411 
3412   /* Try to merge ranges.  */
3413   for (first = i; i < length; i++)
3414     {
3415       tree low = ranges[i].low;
3416       tree high = ranges[i].high;
3417       int in_p = ranges[i].in_p;
3418       bool strict_overflow_p = ranges[i].strict_overflow_p;
3419       int update_fail_count = 0;
3420 
3421       for (j = i + 1; j < length; j++)
3422 	{
3423 	  if (ranges[i].exp != ranges[j].exp)
3424 	    break;
3425 	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3426 			     ranges[j].in_p, ranges[j].low, ranges[j].high))
3427 	    break;
3428 	  strict_overflow_p |= ranges[j].strict_overflow_p;
3429 	}
3430 
3431       if (j == i + 1)
3432 	continue;
3433 
3434       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3435 			     opcode, ops, ranges[i].exp, NULL, in_p,
3436 			     low, high, strict_overflow_p))
3437 	{
3438 	  i = j - 1;
3439 	  any_changes = true;
3440 	}
3441       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3442 	 while update_range_test would fail.  */
3443       else if (update_fail_count == 64)
3444 	i = j - 1;
3445       else
3446 	++update_fail_count;
3447     }
3448 
3449   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3450 					 ops, ranges);
3451 
3452   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3453     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3454 					   ops, ranges);
3455   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3456     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3457 						     ops, ranges);
3458   any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3459 						   ops, ranges);
3460   any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3461 						 ranges, first_bb);
3462 
3463   if (any_changes && opcode != ERROR_MARK)
3464     {
3465       j = 0;
3466       FOR_EACH_VEC_ELT (*ops, i, oe)
3467 	{
3468 	  if (oe->op == error_mark_node)
3469 	    continue;
3470 	  else if (i != j)
3471 	    (*ops)[j] = oe;
3472 	  j++;
3473 	}
3474       ops->truncate (j);
3475     }
3476 
3477   XDELETEVEC (ranges);
3478   return any_changes;
3479 }
3480 
3481 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3482    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
3483    otherwise the comparison code.  */
3484 
3485 static tree_code
ovce_extract_ops(tree var,gassign ** rets,bool * reti)3486 ovce_extract_ops (tree var, gassign **rets, bool *reti)
3487 {
3488   if (TREE_CODE (var) != SSA_NAME)
3489     return ERROR_MARK;
3490 
3491   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3492   if (stmt == NULL)
3493     return ERROR_MARK;
3494 
3495   /* ??? If we start creating more COND_EXPR, we could perform
3496      this same optimization with them.	For now, simplify.  */
3497   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3498     return ERROR_MARK;
3499 
3500   tree cond = gimple_assign_rhs1 (stmt);
3501   tree_code cmp = TREE_CODE (cond);
3502   if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3503     return ERROR_MARK;
3504 
3505   /* ??? For now, allow only canonical true and false result vectors.
3506      We could expand this to other constants should the need arise,
3507      but at the moment we don't create them.  */
3508   tree t = gimple_assign_rhs2 (stmt);
3509   tree f = gimple_assign_rhs3 (stmt);
3510   bool inv;
3511   if (integer_all_onesp (t))
3512     inv = false;
3513   else if (integer_all_onesp (f))
3514     {
3515       cmp = invert_tree_comparison (cmp, false);
3516       inv = true;
3517     }
3518   else
3519     return ERROR_MARK;
3520   if (!integer_zerop (f))
3521     return ERROR_MARK;
3522 
3523   /* Success!  */
3524   if (rets)
3525     *rets = stmt;
3526   if (reti)
3527     *reti = inv;
3528   return cmp;
3529 }
3530 
3531 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3532    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
3533 
3534 static bool
optimize_vec_cond_expr(tree_code opcode,vec<operand_entry * > * ops)3535 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3536 {
3537   unsigned int length = ops->length (), i, j;
3538   bool any_changes = false;
3539 
3540   if (length == 1)
3541     return false;
3542 
3543   for (i = 0; i < length; ++i)
3544     {
3545       tree elt0 = (*ops)[i]->op;
3546 
3547       gassign *stmt0;
3548       bool invert;
3549       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3550       if (cmp0 == ERROR_MARK)
3551 	continue;
3552 
3553       for (j = i + 1; j < length; ++j)
3554 	{
3555 	  tree &elt1 = (*ops)[j]->op;
3556 
3557 	  gassign *stmt1;
3558 	  tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3559 	  if (cmp1 == ERROR_MARK)
3560 	    continue;
3561 
3562 	  tree cond0 = gimple_assign_rhs1 (stmt0);
3563 	  tree x0 = TREE_OPERAND (cond0, 0);
3564 	  tree y0 = TREE_OPERAND (cond0, 1);
3565 
3566 	  tree cond1 = gimple_assign_rhs1 (stmt1);
3567 	  tree x1 = TREE_OPERAND (cond1, 0);
3568 	  tree y1 = TREE_OPERAND (cond1, 1);
3569 
3570 	  tree comb;
3571 	  if (opcode == BIT_AND_EXPR)
3572 	    comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3573 	  else if (opcode == BIT_IOR_EXPR)
3574 	    comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3575 	  else
3576 	    gcc_unreachable ();
3577 	  if (comb == NULL)
3578 	    continue;
3579 
3580 	  /* Success! */
3581 	  if (dump_file && (dump_flags & TDF_DETAILS))
3582 	    {
3583 	      fprintf (dump_file, "Transforming ");
3584 	      print_generic_expr (dump_file, cond0);
3585 	      fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3586 	      print_generic_expr (dump_file, cond1);
3587 	      fprintf (dump_file, " into ");
3588 	      print_generic_expr (dump_file, comb);
3589 	      fputc ('\n', dump_file);
3590 	    }
3591 
3592 	  gimple_assign_set_rhs1 (stmt0, comb);
3593 	  if (invert)
3594 	    std::swap (*gimple_assign_rhs2_ptr (stmt0),
3595 		       *gimple_assign_rhs3_ptr (stmt0));
3596 	  update_stmt (stmt0);
3597 
3598 	  elt1 = error_mark_node;
3599 	  any_changes = true;
3600 	}
3601     }
3602 
3603   if (any_changes)
3604     {
3605       operand_entry *oe;
3606       j = 0;
3607       FOR_EACH_VEC_ELT (*ops, i, oe)
3608 	{
3609 	  if (oe->op == error_mark_node)
3610 	    continue;
3611 	  else if (i != j)
3612 	    (*ops)[j] = oe;
3613 	  j++;
3614 	}
3615       ops->truncate (j);
3616     }
3617 
3618   return any_changes;
3619 }
3620 
3621 /* Return true if STMT is a cast like:
3622    <bb N>:
3623    ...
3624    _123 = (int) _234;
3625 
3626    <bb M>:
3627    # _345 = PHI <_123(N), 1(...), 1(...)>
3628    where _234 has bool type, _123 has single use and
3629    bb N has a single successor M.  This is commonly used in
3630    the last block of a range test.
3631 
3632    Also Return true if STMT is tcc_compare like:
3633    <bb N>:
3634    ...
3635    _234 = a_2(D) == 2;
3636 
3637    <bb M>:
3638    # _345 = PHI <_234(N), 1(...), 1(...)>
3639    _346 = (int) _345;
3640    where _234 has booltype, single use and
3641    bb N has a single successor M.  This is commonly used in
3642    the last block of a range test.  */
3643 
3644 static bool
final_range_test_p(gimple * stmt)3645 final_range_test_p (gimple *stmt)
3646 {
3647   basic_block bb, rhs_bb, lhs_bb;
3648   edge e;
3649   tree lhs, rhs;
3650   use_operand_p use_p;
3651   gimple *use_stmt;
3652 
3653   if (!gimple_assign_cast_p (stmt)
3654       && (!is_gimple_assign (stmt)
3655 	  || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3656 	      != tcc_comparison)))
3657     return false;
3658   bb = gimple_bb (stmt);
3659   if (!single_succ_p (bb))
3660     return false;
3661   e = single_succ_edge (bb);
3662   if (e->flags & EDGE_COMPLEX)
3663     return false;
3664 
3665   lhs = gimple_assign_lhs (stmt);
3666   rhs = gimple_assign_rhs1 (stmt);
3667   if (gimple_assign_cast_p (stmt)
3668       && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3669 	  || TREE_CODE (rhs) != SSA_NAME
3670 	  || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3671     return false;
3672 
3673   if (!gimple_assign_cast_p (stmt)
3674       && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3675       return false;
3676 
3677   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
3678   if (!single_imm_use (lhs, &use_p, &use_stmt))
3679     return false;
3680 
3681   if (gimple_code (use_stmt) != GIMPLE_PHI
3682       || gimple_bb (use_stmt) != e->dest)
3683     return false;
3684 
3685   /* And that the rhs is defined in the same loop.  */
3686   if (gimple_assign_cast_p (stmt))
3687     {
3688       if (TREE_CODE (rhs) != SSA_NAME
3689 	  || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3690 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3691 	return false;
3692     }
3693   else
3694     {
3695       if (TREE_CODE (lhs) != SSA_NAME
3696 	  || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3697 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3698 	return false;
3699     }
3700 
3701   return true;
3702 }
3703 
3704 /* Return true if BB is suitable basic block for inter-bb range test
3705    optimization.  If BACKWARD is true, BB should be the only predecessor
3706    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3707    or compared with to find a common basic block to which all conditions
3708    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
3709    be the only predecessor of BB.  */
3710 
3711 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool backward)3712 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3713 		  bool backward)
3714 {
3715   edge_iterator ei, ei2;
3716   edge e, e2;
3717   gimple *stmt;
3718   gphi_iterator gsi;
3719   bool other_edge_seen = false;
3720   bool is_cond;
3721 
3722   if (test_bb == bb)
3723     return false;
3724   /* Check last stmt first.  */
3725   stmt = last_stmt (bb);
3726   if (stmt == NULL
3727       || (gimple_code (stmt) != GIMPLE_COND
3728 	  && (backward || !final_range_test_p (stmt)))
3729       || gimple_visited_p (stmt)
3730       || stmt_could_throw_p (cfun, stmt)
3731       || *other_bb == bb)
3732     return false;
3733   is_cond = gimple_code (stmt) == GIMPLE_COND;
3734   if (is_cond)
3735     {
3736       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3737 	 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3738 	 to *OTHER_BB (if not set yet, try to find it out).  */
3739       if (EDGE_COUNT (bb->succs) != 2)
3740 	return false;
3741       FOR_EACH_EDGE (e, ei, bb->succs)
3742 	{
3743 	  if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3744 	    return false;
3745 	  if (e->dest == test_bb)
3746 	    {
3747 	      if (backward)
3748 		continue;
3749 	      else
3750 		return false;
3751 	    }
3752 	  if (e->dest == bb)
3753 	    return false;
3754 	  if (*other_bb == NULL)
3755 	    {
3756 	      FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3757 		if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3758 		  return false;
3759 		else if (e->dest == e2->dest)
3760 		  *other_bb = e->dest;
3761 	      if (*other_bb == NULL)
3762 		return false;
3763 	    }
3764 	  if (e->dest == *other_bb)
3765 	    other_edge_seen = true;
3766 	  else if (backward)
3767 	    return false;
3768 	}
3769       if (*other_bb == NULL || !other_edge_seen)
3770 	return false;
3771     }
3772   else if (single_succ (bb) != *other_bb)
3773     return false;
3774 
3775   /* Now check all PHIs of *OTHER_BB.  */
3776   e = find_edge (bb, *other_bb);
3777   e2 = find_edge (test_bb, *other_bb);
3778   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3779     {
3780       gphi *phi = gsi.phi ();
3781       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3782 	 corresponding to BB and TEST_BB predecessor must be the same.  */
3783       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3784 			    gimple_phi_arg_def (phi, e2->dest_idx), 0))
3785 	{
3786 	  /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3787 	     one of the PHIs should have the lhs of the last stmt in
3788 	     that block as PHI arg and that PHI should have 0 or 1
3789 	     corresponding to it in all other range test basic blocks
3790 	     considered.  */
3791 	  if (!is_cond)
3792 	    {
3793 	      if (gimple_phi_arg_def (phi, e->dest_idx)
3794 		  == gimple_assign_lhs (stmt)
3795 		  && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3796 		      || integer_onep (gimple_phi_arg_def (phi,
3797 							   e2->dest_idx))))
3798 		continue;
3799 	    }
3800 	  else
3801 	    {
3802 	      gimple *test_last = last_stmt (test_bb);
3803 	      if (gimple_code (test_last) != GIMPLE_COND
3804 		  && gimple_phi_arg_def (phi, e2->dest_idx)
3805 		     == gimple_assign_lhs (test_last)
3806 		  && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3807 		      || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3808 		continue;
3809 	    }
3810 
3811 	  return false;
3812 	}
3813     }
3814   return true;
3815 }
3816 
3817 /* Return true if BB doesn't have side-effects that would disallow
3818    range test optimization, all SSA_NAMEs set in the bb are consumed
3819    in the bb and there are no PHIs.  */
3820 
3821 static bool
no_side_effect_bb(basic_block bb)3822 no_side_effect_bb (basic_block bb)
3823 {
3824   gimple_stmt_iterator gsi;
3825   gimple *last;
3826 
3827   if (!gimple_seq_empty_p (phi_nodes (bb)))
3828     return false;
3829   last = last_stmt (bb);
3830   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3831     {
3832       gimple *stmt = gsi_stmt (gsi);
3833       tree lhs;
3834       imm_use_iterator imm_iter;
3835       use_operand_p use_p;
3836 
3837       if (is_gimple_debug (stmt))
3838 	continue;
3839       if (gimple_has_side_effects (stmt))
3840 	return false;
3841       if (stmt == last)
3842 	return true;
3843       if (!is_gimple_assign (stmt))
3844 	return false;
3845       lhs = gimple_assign_lhs (stmt);
3846       if (TREE_CODE (lhs) != SSA_NAME)
3847 	return false;
3848       if (gimple_assign_rhs_could_trap_p (stmt))
3849 	return false;
3850       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3851 	{
3852 	  gimple *use_stmt = USE_STMT (use_p);
3853 	  if (is_gimple_debug (use_stmt))
3854 	    continue;
3855 	  if (gimple_bb (use_stmt) != bb)
3856 	    return false;
3857 	}
3858     }
3859   return false;
3860 }
3861 
3862 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3863    return true and fill in *OPS recursively.  */
3864 
3865 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry * > * ops,struct loop * loop)3866 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3867 	 struct loop *loop)
3868 {
3869   gimple *stmt = SSA_NAME_DEF_STMT (var);
3870   tree rhs[2];
3871   int i;
3872 
3873   if (!is_reassociable_op (stmt, code, loop))
3874     return false;
3875 
3876   rhs[0] = gimple_assign_rhs1 (stmt);
3877   rhs[1] = gimple_assign_rhs2 (stmt);
3878   gimple_set_visited (stmt, true);
3879   for (i = 0; i < 2; i++)
3880     if (TREE_CODE (rhs[i]) == SSA_NAME
3881 	&& !get_ops (rhs[i], code, ops, loop)
3882 	&& has_single_use (rhs[i]))
3883       {
3884 	operand_entry *oe = operand_entry_pool.allocate ();
3885 
3886 	oe->op = rhs[i];
3887 	oe->rank = code;
3888 	oe->id = 0;
3889 	oe->count = 1;
3890 	oe->stmt_to_insert = NULL;
3891 	ops->safe_push (oe);
3892       }
3893   return true;
3894 }
3895 
3896 /* Find the ops that were added by get_ops starting from VAR, see if
3897    they were changed during update_range_test and if yes, create new
3898    stmts.  */
3899 
3900 static tree
update_ops(tree var,enum tree_code code,vec<operand_entry * > ops,unsigned int * pidx,struct loop * loop)3901 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3902 	    unsigned int *pidx, struct loop *loop)
3903 {
3904   gimple *stmt = SSA_NAME_DEF_STMT (var);
3905   tree rhs[4];
3906   int i;
3907 
3908   if (!is_reassociable_op (stmt, code, loop))
3909     return NULL;
3910 
3911   rhs[0] = gimple_assign_rhs1 (stmt);
3912   rhs[1] = gimple_assign_rhs2 (stmt);
3913   rhs[2] = rhs[0];
3914   rhs[3] = rhs[1];
3915   for (i = 0; i < 2; i++)
3916     if (TREE_CODE (rhs[i]) == SSA_NAME)
3917       {
3918 	rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3919 	if (rhs[2 + i] == NULL_TREE)
3920 	  {
3921 	    if (has_single_use (rhs[i]))
3922 	      rhs[2 + i] = ops[(*pidx)++]->op;
3923 	    else
3924 	      rhs[2 + i] = rhs[i];
3925 	  }
3926       }
3927   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3928       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3929     {
3930       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3931       var = make_ssa_name (TREE_TYPE (var));
3932       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3933 					rhs[2], rhs[3]);
3934       gimple_set_uid (g, gimple_uid (stmt));
3935       gimple_set_visited (g, true);
3936       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3937     }
3938   return var;
3939 }
3940 
3941 /* Structure to track the initial value passed to get_ops and
3942    the range in the ops vector for each basic block.  */
3943 
3944 struct inter_bb_range_test_entry
3945 {
3946   tree op;
3947   unsigned int first_idx, last_idx;
3948 };
3949 
3950 /* Inter-bb range test optimization.
3951 
3952    Returns TRUE if a gimple conditional is optimized to a true/false,
3953    otherwise return FALSE.
3954 
3955    This indicates to the caller that it should run a CFG cleanup pass
3956    once reassociation is completed.  */
3957 
3958 static bool
maybe_optimize_range_tests(gimple * stmt)3959 maybe_optimize_range_tests (gimple *stmt)
3960 {
3961   basic_block first_bb = gimple_bb (stmt);
3962   basic_block last_bb = first_bb;
3963   basic_block other_bb = NULL;
3964   basic_block bb;
3965   edge_iterator ei;
3966   edge e;
3967   auto_vec<operand_entry *> ops;
3968   auto_vec<inter_bb_range_test_entry> bbinfo;
3969   bool any_changes = false;
3970   bool cfg_cleanup_needed = false;
3971 
3972   /* Consider only basic blocks that end with GIMPLE_COND or
3973      a cast statement satisfying final_range_test_p.  All
3974      but the last bb in the first_bb .. last_bb range
3975      should end with GIMPLE_COND.  */
3976   if (gimple_code (stmt) == GIMPLE_COND)
3977     {
3978       if (EDGE_COUNT (first_bb->succs) != 2)
3979 	return cfg_cleanup_needed;
3980     }
3981   else if (final_range_test_p (stmt))
3982     other_bb = single_succ (first_bb);
3983   else
3984     return cfg_cleanup_needed;
3985 
3986   if (stmt_could_throw_p (cfun, stmt))
3987     return cfg_cleanup_needed;
3988 
3989   /* As relative ordering of post-dominator sons isn't fixed,
3990      maybe_optimize_range_tests can be called first on any
3991      bb in the range we want to optimize.  So, start searching
3992      backwards, if first_bb can be set to a predecessor.  */
3993   while (single_pred_p (first_bb))
3994     {
3995       basic_block pred_bb = single_pred (first_bb);
3996       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3997 	break;
3998       if (!no_side_effect_bb (first_bb))
3999 	break;
4000       first_bb = pred_bb;
4001     }
4002   /* If first_bb is last_bb, other_bb hasn't been computed yet.
4003      Before starting forward search in last_bb successors, find
4004      out the other_bb.  */
4005   if (first_bb == last_bb)
4006     {
4007       other_bb = NULL;
4008       /* As non-GIMPLE_COND last stmt always terminates the range,
4009 	 if forward search didn't discover anything, just give up.  */
4010       if (gimple_code (stmt) != GIMPLE_COND)
4011 	return cfg_cleanup_needed;
4012       /* Look at both successors.  Either it ends with a GIMPLE_COND
4013 	 and satisfies suitable_cond_bb, or ends with a cast and
4014 	 other_bb is that cast's successor.  */
4015       FOR_EACH_EDGE (e, ei, first_bb->succs)
4016 	if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4017 	    || e->dest == first_bb)
4018 	  return cfg_cleanup_needed;
4019 	else if (single_pred_p (e->dest))
4020 	  {
4021 	    stmt = last_stmt (e->dest);
4022 	    if (stmt
4023 		&& gimple_code (stmt) == GIMPLE_COND
4024 		&& EDGE_COUNT (e->dest->succs) == 2)
4025 	      {
4026 		if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
4027 		  break;
4028 		else
4029 		  other_bb = NULL;
4030 	      }
4031 	    else if (stmt
4032 		     && final_range_test_p (stmt)
4033 		     && find_edge (first_bb, single_succ (e->dest)))
4034 	      {
4035 		other_bb = single_succ (e->dest);
4036 		if (other_bb == first_bb)
4037 		  other_bb = NULL;
4038 	      }
4039 	  }
4040       if (other_bb == NULL)
4041 	return cfg_cleanup_needed;
4042     }
4043   /* Now do the forward search, moving last_bb to successor bbs
4044      that aren't other_bb.  */
4045   while (EDGE_COUNT (last_bb->succs) == 2)
4046     {
4047       FOR_EACH_EDGE (e, ei, last_bb->succs)
4048 	if (e->dest != other_bb)
4049 	  break;
4050       if (e == NULL)
4051 	break;
4052       if (!single_pred_p (e->dest))
4053 	break;
4054       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4055 	break;
4056       if (!no_side_effect_bb (e->dest))
4057 	break;
4058       last_bb = e->dest;
4059     }
4060   if (first_bb == last_bb)
4061     return cfg_cleanup_needed;
4062   /* Here basic blocks first_bb through last_bb's predecessor
4063      end with GIMPLE_COND, all of them have one of the edges to
4064      other_bb and another to another block in the range,
4065      all blocks except first_bb don't have side-effects and
4066      last_bb ends with either GIMPLE_COND, or cast satisfying
4067      final_range_test_p.  */
4068   for (bb = last_bb; ; bb = single_pred (bb))
4069     {
4070       enum tree_code code;
4071       tree lhs, rhs;
4072       inter_bb_range_test_entry bb_ent;
4073 
4074       bb_ent.op = NULL_TREE;
4075       bb_ent.first_idx = ops.length ();
4076       bb_ent.last_idx = bb_ent.first_idx;
4077       e = find_edge (bb, other_bb);
4078       stmt = last_stmt (bb);
4079       gimple_set_visited (stmt, true);
4080       if (gimple_code (stmt) != GIMPLE_COND)
4081 	{
4082 	  use_operand_p use_p;
4083 	  gimple *phi;
4084 	  edge e2;
4085 	  unsigned int d;
4086 
4087 	  lhs = gimple_assign_lhs (stmt);
4088 	  rhs = gimple_assign_rhs1 (stmt);
4089 	  gcc_assert (bb == last_bb);
4090 
4091 	  /* stmt is
4092 	     _123 = (int) _234;
4093 	     OR
4094 	     _234 = a_2(D) == 2;
4095 
4096 	     followed by:
4097 	     <bb M>:
4098 	     # _345 = PHI <_123(N), 1(...), 1(...)>
4099 
4100 	     or 0 instead of 1.  If it is 0, the _234
4101 	     range test is anded together with all the
4102 	     other range tests, if it is 1, it is ored with
4103 	     them.  */
4104 	  single_imm_use (lhs, &use_p, &phi);
4105 	  gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4106 	  e2 = find_edge (first_bb, other_bb);
4107 	  d = e2->dest_idx;
4108 	  gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4109 	  if (integer_zerop (gimple_phi_arg_def (phi, d)))
4110 	    code = BIT_AND_EXPR;
4111 	  else
4112 	    {
4113 	      gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4114 	      code = BIT_IOR_EXPR;
4115 	    }
4116 
4117 	  /* If _234 SSA_NAME_DEF_STMT is
4118 	     _234 = _567 | _789;
4119 	     (or &, corresponding to 1/0 in the phi arguments,
4120 	     push into ops the individual range test arguments
4121 	     of the bitwise or resp. and, recursively.  */
4122 	  if (TREE_CODE (rhs) == SSA_NAME
4123 	      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4124 		  != tcc_comparison)
4125 	      && !get_ops (rhs, code, &ops,
4126 			loop_containing_stmt (stmt))
4127 	      && has_single_use (rhs))
4128 	    {
4129 	      /* Otherwise, push the _234 range test itself.  */
4130 	      operand_entry *oe = operand_entry_pool.allocate ();
4131 
4132 	      oe->op = rhs;
4133 	      oe->rank = code;
4134 	      oe->id = 0;
4135 	      oe->count = 1;
4136 	      oe->stmt_to_insert = NULL;
4137 	      ops.safe_push (oe);
4138 	      bb_ent.last_idx++;
4139 	      bb_ent.op = rhs;
4140 	    }
4141 	  else if (is_gimple_assign (stmt)
4142 		   && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4143 		       == tcc_comparison)
4144 		   && !get_ops (lhs, code, &ops,
4145 				loop_containing_stmt (stmt))
4146 		   && has_single_use (lhs))
4147 	    {
4148 	      operand_entry *oe = operand_entry_pool.allocate ();
4149 	      oe->op = lhs;
4150 	      oe->rank = code;
4151 	      oe->id = 0;
4152 	      oe->count = 1;
4153 	      ops.safe_push (oe);
4154 	      bb_ent.last_idx++;
4155 	      bb_ent.op = lhs;
4156 	    }
4157 	  else
4158 	    {
4159 	      bb_ent.last_idx = ops.length ();
4160 	      bb_ent.op = rhs;
4161 	    }
4162 	  bbinfo.safe_push (bb_ent);
4163 	  continue;
4164 	}
4165       /* Otherwise stmt is GIMPLE_COND.  */
4166       code = gimple_cond_code (stmt);
4167       lhs = gimple_cond_lhs (stmt);
4168       rhs = gimple_cond_rhs (stmt);
4169       if (TREE_CODE (lhs) == SSA_NAME
4170 	  && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4171 	  && ((code != EQ_EXPR && code != NE_EXPR)
4172 	      || rhs != boolean_false_node
4173 		 /* Either push into ops the individual bitwise
4174 		    or resp. and operands, depending on which
4175 		    edge is other_bb.  */
4176 	      || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4177 				 ^ (code == EQ_EXPR))
4178 				? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4179 			   loop_containing_stmt (stmt))))
4180 	{
4181 	  /* Or push the GIMPLE_COND stmt itself.  */
4182 	  operand_entry *oe = operand_entry_pool.allocate ();
4183 
4184 	  oe->op = NULL;
4185 	  oe->rank = (e->flags & EDGE_TRUE_VALUE)
4186 		     ? BIT_IOR_EXPR : BIT_AND_EXPR;
4187 	  /* oe->op = NULL signs that there is no SSA_NAME
4188 	     for the range test, and oe->id instead is the
4189 	     basic block number, at which's end the GIMPLE_COND
4190 	     is.  */
4191 	  oe->id = bb->index;
4192 	  oe->count = 1;
4193 	  oe->stmt_to_insert = NULL;
4194 	  ops.safe_push (oe);
4195 	  bb_ent.op = NULL;
4196 	  bb_ent.last_idx++;
4197 	}
4198       else if (ops.length () > bb_ent.first_idx)
4199 	{
4200 	  bb_ent.op = lhs;
4201 	  bb_ent.last_idx = ops.length ();
4202 	}
4203       bbinfo.safe_push (bb_ent);
4204       if (bb == first_bb)
4205 	break;
4206     }
4207   if (ops.length () > 1)
4208     any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4209   if (any_changes)
4210     {
4211       unsigned int idx, max_idx = 0;
4212       /* update_ops relies on has_single_use predicates returning the
4213 	 same values as it did during get_ops earlier.  Additionally it
4214 	 never removes statements, only adds new ones and it should walk
4215 	 from the single imm use and check the predicate already before
4216 	 making those changes.
4217 	 On the other side, the handling of GIMPLE_COND directly can turn
4218 	 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4219 	 it needs to be done in a separate loop afterwards.  */
4220       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4221 	{
4222 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4223 	      && bbinfo[idx].op != NULL_TREE)
4224 	    {
4225 	      tree new_op;
4226 
4227 	      max_idx = idx;
4228 	      stmt = last_stmt (bb);
4229 	      new_op = update_ops (bbinfo[idx].op,
4230 				   (enum tree_code)
4231 				   ops[bbinfo[idx].first_idx]->rank,
4232 				   ops, &bbinfo[idx].first_idx,
4233 				   loop_containing_stmt (stmt));
4234 	      if (new_op == NULL_TREE)
4235 		{
4236 		  gcc_assert (bb == last_bb);
4237 		  new_op = ops[bbinfo[idx].first_idx++]->op;
4238 		}
4239 	      if (bbinfo[idx].op != new_op)
4240 		{
4241 		  imm_use_iterator iter;
4242 		  use_operand_p use_p;
4243 		  gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4244 
4245 		  FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4246 		    if (is_gimple_debug (use_stmt))
4247 		      continue;
4248 		    else if (gimple_code (use_stmt) == GIMPLE_COND
4249 			     || gimple_code (use_stmt) == GIMPLE_PHI)
4250 		      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4251 			SET_USE (use_p, new_op);
4252 		    else if ((is_gimple_assign (use_stmt)
4253 			      && (TREE_CODE_CLASS
4254 				  (gimple_assign_rhs_code (use_stmt))
4255 				  == tcc_comparison)))
4256 		      cast_or_tcc_cmp_stmt = use_stmt;
4257 		    else if (gimple_assign_cast_p (use_stmt))
4258 		      cast_or_tcc_cmp_stmt = use_stmt;
4259 		    else
4260 		      gcc_unreachable ();
4261 
4262 		  if (cast_or_tcc_cmp_stmt)
4263 		    {
4264 		      gcc_assert (bb == last_bb);
4265 		      tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4266 		      tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4267 		      enum tree_code rhs_code
4268 			= gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4269 			? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4270 			: CONVERT_EXPR;
4271 		      gassign *g;
4272 		      if (is_gimple_min_invariant (new_op))
4273 			{
4274 			  new_op = fold_convert (TREE_TYPE (lhs), new_op);
4275 			  g = gimple_build_assign (new_lhs, new_op);
4276 			}
4277 		      else
4278 			g = gimple_build_assign (new_lhs, rhs_code, new_op);
4279 		      gimple_stmt_iterator gsi
4280 			= gsi_for_stmt (cast_or_tcc_cmp_stmt);
4281 		      gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4282 		      gimple_set_visited (g, true);
4283 		      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4284 		      FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4285 			if (is_gimple_debug (use_stmt))
4286 			  continue;
4287 			else if (gimple_code (use_stmt) == GIMPLE_COND
4288 				 || gimple_code (use_stmt) == GIMPLE_PHI)
4289 			  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4290 			    SET_USE (use_p, new_lhs);
4291 			else
4292 			  gcc_unreachable ();
4293 		    }
4294 		}
4295 	    }
4296 	  if (bb == first_bb)
4297 	    break;
4298 	}
4299       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4300 	{
4301 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4302 	      && bbinfo[idx].op == NULL_TREE
4303 	      && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4304 	    {
4305 	      gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4306 
4307 	      if (idx > max_idx)
4308 		max_idx = idx;
4309 
4310 	      /* If we collapse the conditional to a true/false
4311 		 condition, then bubble that knowledge up to our caller.  */
4312 	      if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4313 		{
4314 		  gimple_cond_make_false (cond_stmt);
4315 		  cfg_cleanup_needed = true;
4316 		}
4317 	      else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4318 		{
4319 		  gimple_cond_make_true (cond_stmt);
4320 		  cfg_cleanup_needed = true;
4321 		}
4322 	      else
4323 		{
4324 		  gimple_cond_set_code (cond_stmt, NE_EXPR);
4325 		  gimple_cond_set_lhs (cond_stmt,
4326 				       ops[bbinfo[idx].first_idx]->op);
4327 		  gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4328 		}
4329 	      update_stmt (cond_stmt);
4330 	    }
4331 	  if (bb == first_bb)
4332 	    break;
4333 	}
4334 
4335       /* The above changes could result in basic blocks after the first
4336 	 modified one, up to and including last_bb, to be executed even if
4337 	 they would not be in the original program.  If the value ranges of
4338 	 assignment lhs' in those bbs were dependent on the conditions
4339 	 guarding those basic blocks which now can change, the VRs might
4340 	 be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
4341 	 are only used within the same bb, it should be not a big deal if
4342 	 we just reset all the VRs in those bbs.  See PR68671.  */
4343       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4344 	reset_flow_sensitive_info_in_bb (bb);
4345     }
4346   return cfg_cleanup_needed;
4347 }
4348 
4349 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4350    of STMT in it's operands.  This is also known as a "destructive
4351    update" operation.  */
4352 
4353 static bool
is_phi_for_stmt(gimple * stmt,tree operand)4354 is_phi_for_stmt (gimple *stmt, tree operand)
4355 {
4356   gimple *def_stmt;
4357   gphi *def_phi;
4358   tree lhs;
4359   use_operand_p arg_p;
4360   ssa_op_iter i;
4361 
4362   if (TREE_CODE (operand) != SSA_NAME)
4363     return false;
4364 
4365   lhs = gimple_assign_lhs (stmt);
4366 
4367   def_stmt = SSA_NAME_DEF_STMT (operand);
4368   def_phi = dyn_cast <gphi *> (def_stmt);
4369   if (!def_phi)
4370     return false;
4371 
4372   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4373     if (lhs == USE_FROM_PTR (arg_p))
4374       return true;
4375   return false;
4376 }
4377 
4378 /* Remove def stmt of VAR if VAR has zero uses and recurse
4379    on rhs1 operand if so.  */
4380 
4381 static void
remove_visited_stmt_chain(tree var)4382 remove_visited_stmt_chain (tree var)
4383 {
4384   gimple *stmt;
4385   gimple_stmt_iterator gsi;
4386 
4387   while (1)
4388     {
4389       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4390 	return;
4391       stmt = SSA_NAME_DEF_STMT (var);
4392       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4393 	{
4394 	  var = gimple_assign_rhs1 (stmt);
4395 	  gsi = gsi_for_stmt (stmt);
4396 	  reassoc_remove_stmt (&gsi);
4397 	  release_defs (stmt);
4398 	}
4399       else
4400 	return;
4401     }
4402 }
4403 
4404 /* This function checks three consequtive operands in
4405    passed operands vector OPS starting from OPINDEX and
4406    swaps two operands if it is profitable for binary operation
4407    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4408 
4409    We pair ops with the same rank if possible.
4410 
4411    The alternative we try is to see if STMT is a destructive
4412    update style statement, which is like:
4413    b = phi (a, ...)
4414    a = c + b;
4415    In that case, we want to use the destructive update form to
4416    expose the possible vectorizer sum reduction opportunity.
4417    In that case, the third operand will be the phi node. This
4418    check is not performed if STMT is null.
4419 
4420    We could, of course, try to be better as noted above, and do a
4421    lot of work to try to find these opportunities in >3 operand
4422    cases, but it is unlikely to be worth it.  */
4423 
4424 static void
swap_ops_for_binary_stmt(vec<operand_entry * > ops,unsigned int opindex,gimple * stmt)4425 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4426 			  unsigned int opindex, gimple *stmt)
4427 {
4428   operand_entry *oe1, *oe2, *oe3;
4429 
4430   oe1 = ops[opindex];
4431   oe2 = ops[opindex + 1];
4432   oe3 = ops[opindex + 2];
4433 
4434   if ((oe1->rank == oe2->rank
4435        && oe2->rank != oe3->rank)
4436       || (stmt && is_phi_for_stmt (stmt, oe3->op)
4437 	  && !is_phi_for_stmt (stmt, oe1->op)
4438 	  && !is_phi_for_stmt (stmt, oe2->op)))
4439     std::swap (*oe1, *oe3);
4440   else if ((oe1->rank == oe3->rank
4441 	    && oe2->rank != oe3->rank)
4442 	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
4443 	       && !is_phi_for_stmt (stmt, oe1->op)
4444 	       && !is_phi_for_stmt (stmt, oe3->op)))
4445     std::swap (*oe1, *oe2);
4446 }
4447 
4448 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4449    two definitions, otherwise return STMT.  */
4450 
4451 static inline gimple *
find_insert_point(gimple * stmt,tree rhs1,tree rhs2)4452 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4453 {
4454   if (TREE_CODE (rhs1) == SSA_NAME
4455       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4456     stmt = SSA_NAME_DEF_STMT (rhs1);
4457   if (TREE_CODE (rhs2) == SSA_NAME
4458       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4459     stmt = SSA_NAME_DEF_STMT (rhs2);
4460   return stmt;
4461 }
4462 
4463 /* If the stmt that defines operand has to be inserted, insert it
4464    before the use.  */
4465 static void
insert_stmt_before_use(gimple * stmt,gimple * stmt_to_insert)4466 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4467 {
4468   gcc_assert (is_gimple_assign (stmt_to_insert));
4469   tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4470   tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4471   gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4472   gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4473   gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4474 
4475   /* If the insert point is not stmt, then insert_point would be
4476      the point where operand rhs1 or rhs2 is defined. In this case,
4477      stmt_to_insert has to be inserted afterwards. This would
4478      only happen when the stmt insertion point is flexible. */
4479   if (stmt == insert_point)
4480     gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4481   else
4482     insert_stmt_after (stmt_to_insert, insert_point);
4483 }
4484 
4485 
4486 /* Recursively rewrite our linearized statements so that the operators
4487    match those in OPS[OPINDEX], putting the computation in rank
4488    order.  Return new lhs.
4489    CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4490    the current stmt and during recursive invocations.
4491    NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4492    recursive invocations.  */
4493 
4494 static tree
rewrite_expr_tree(gimple * stmt,unsigned int opindex,vec<operand_entry * > ops,bool changed,bool next_changed)4495 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
4496 		   vec<operand_entry *> ops, bool changed, bool next_changed)
4497 {
4498   tree rhs1 = gimple_assign_rhs1 (stmt);
4499   tree rhs2 = gimple_assign_rhs2 (stmt);
4500   tree lhs = gimple_assign_lhs (stmt);
4501   operand_entry *oe;
4502 
4503   /* The final recursion case for this function is that you have
4504      exactly two operations left.
4505      If we had exactly one op in the entire list to start with, we
4506      would have never called this function, and the tail recursion
4507      rewrites them one at a time.  */
4508   if (opindex + 2 == ops.length ())
4509     {
4510       operand_entry *oe1, *oe2;
4511 
4512       oe1 = ops[opindex];
4513       oe2 = ops[opindex + 1];
4514 
4515       if (rhs1 != oe1->op || rhs2 != oe2->op)
4516 	{
4517 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4518 	  unsigned int uid = gimple_uid (stmt);
4519 
4520 	  if (dump_file && (dump_flags & TDF_DETAILS))
4521 	    {
4522 	      fprintf (dump_file, "Transforming ");
4523 	      print_gimple_stmt (dump_file, stmt, 0);
4524 	    }
4525 
4526 	  /* If the stmt that defines operand has to be inserted, insert it
4527 	     before the use.  */
4528 	  if (oe1->stmt_to_insert)
4529 	    insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4530 	  if (oe2->stmt_to_insert)
4531 	    insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4532 	  /* Even when changed is false, reassociation could have e.g. removed
4533 	     some redundant operations, so unless we are just swapping the
4534 	     arguments or unless there is no change at all (then we just
4535 	     return lhs), force creation of a new SSA_NAME.  */
4536 	  if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4537 	    {
4538 	      gimple *insert_point
4539 		= find_insert_point (stmt, oe1->op, oe2->op);
4540 	      lhs = make_ssa_name (TREE_TYPE (lhs));
4541 	      stmt
4542 		= gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4543 				       oe1->op, oe2->op);
4544 	      gimple_set_uid (stmt, uid);
4545 	      gimple_set_visited (stmt, true);
4546 	      if (insert_point == gsi_stmt (gsi))
4547 		gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4548 	      else
4549 		insert_stmt_after (stmt, insert_point);
4550 	    }
4551 	  else
4552 	    {
4553 	      gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4554 				   == stmt);
4555 	      gimple_assign_set_rhs1 (stmt, oe1->op);
4556 	      gimple_assign_set_rhs2 (stmt, oe2->op);
4557 	      update_stmt (stmt);
4558 	    }
4559 
4560 	  if (rhs1 != oe1->op && rhs1 != oe2->op)
4561 	    remove_visited_stmt_chain (rhs1);
4562 
4563 	  if (dump_file && (dump_flags & TDF_DETAILS))
4564 	    {
4565 	      fprintf (dump_file, " into ");
4566 	      print_gimple_stmt (dump_file, stmt, 0);
4567 	    }
4568 	}
4569       return lhs;
4570     }
4571 
4572   /* If we hit here, we should have 3 or more ops left.  */
4573   gcc_assert (opindex + 2 < ops.length ());
4574 
4575   /* Rewrite the next operator.  */
4576   oe = ops[opindex];
4577 
4578   /* If the stmt that defines operand has to be inserted, insert it
4579      before the use.  */
4580   if (oe->stmt_to_insert)
4581     insert_stmt_before_use (stmt, oe->stmt_to_insert);
4582 
4583   /* Recurse on the LHS of the binary operator, which is guaranteed to
4584      be the non-leaf side.  */
4585   tree new_rhs1
4586     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4587 			 changed || oe->op != rhs2 || next_changed,
4588 			 false);
4589 
4590   if (oe->op != rhs2 || new_rhs1 != rhs1)
4591     {
4592       if (dump_file && (dump_flags & TDF_DETAILS))
4593 	{
4594 	  fprintf (dump_file, "Transforming ");
4595 	  print_gimple_stmt (dump_file, stmt, 0);
4596 	}
4597 
4598       /* If changed is false, this is either opindex == 0
4599 	 or all outer rhs2's were equal to corresponding oe->op,
4600 	 and powi_result is NULL.
4601 	 That means lhs is equivalent before and after reassociation.
4602 	 Otherwise ensure the old lhs SSA_NAME is not reused and
4603 	 create a new stmt as well, so that any debug stmts will be
4604 	 properly adjusted.  */
4605       if (changed)
4606 	{
4607 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4608 	  unsigned int uid = gimple_uid (stmt);
4609 	  gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4610 
4611 	  lhs = make_ssa_name (TREE_TYPE (lhs));
4612 	  stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4613 				      new_rhs1, oe->op);
4614 	  gimple_set_uid (stmt, uid);
4615 	  gimple_set_visited (stmt, true);
4616 	  if (insert_point == gsi_stmt (gsi))
4617 	    gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4618 	  else
4619 	    insert_stmt_after (stmt, insert_point);
4620 	}
4621       else
4622 	{
4623 	  gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4624 			       == stmt);
4625 	  gimple_assign_set_rhs1 (stmt, new_rhs1);
4626 	  gimple_assign_set_rhs2 (stmt, oe->op);
4627 	  update_stmt (stmt);
4628 	}
4629 
4630       if (dump_file && (dump_flags & TDF_DETAILS))
4631 	{
4632 	  fprintf (dump_file, " into ");
4633 	  print_gimple_stmt (dump_file, stmt, 0);
4634 	}
4635     }
4636   return lhs;
4637 }
4638 
4639 /* Find out how many cycles we need to compute statements chain.
4640    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
4641    maximum number of independent statements we may execute per cycle.  */
4642 
4643 static int
get_required_cycles(int ops_num,int cpu_width)4644 get_required_cycles (int ops_num, int cpu_width)
4645 {
4646   int res;
4647   int elog;
4648   unsigned int rest;
4649 
4650   /* While we have more than 2 * cpu_width operands
4651      we may reduce number of operands by cpu_width
4652      per cycle.  */
4653   res = ops_num / (2 * cpu_width);
4654 
4655   /* Remained operands count may be reduced twice per cycle
4656      until we have only one operand.  */
4657   rest = (unsigned)(ops_num - res * cpu_width);
4658   elog = exact_log2 (rest);
4659   if (elog >= 0)
4660     res += elog;
4661   else
4662     res += floor_log2 (rest) + 1;
4663 
4664   return res;
4665 }
4666 
4667 /* Returns an optimal number of registers to use for computation of
4668    given statements.  */
4669 
4670 static int
get_reassociation_width(int ops_num,enum tree_code opc,machine_mode mode)4671 get_reassociation_width (int ops_num, enum tree_code opc,
4672 			 machine_mode mode)
4673 {
4674   int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4675   int width;
4676   int width_min;
4677   int cycles_best;
4678 
4679   if (param_width > 0)
4680     width = param_width;
4681   else
4682     width = targetm.sched.reassociation_width (opc, mode);
4683 
4684   if (width == 1)
4685     return width;
4686 
4687   /* Get the minimal time required for sequence computation.  */
4688   cycles_best = get_required_cycles (ops_num, width);
4689 
4690   /* Check if we may use less width and still compute sequence for
4691      the same time.  It will allow us to reduce registers usage.
4692      get_required_cycles is monotonically increasing with lower width
4693      so we can perform a binary search for the minimal width that still
4694      results in the optimal cycle count.  */
4695   width_min = 1;
4696   while (width > width_min)
4697     {
4698       int width_mid = (width + width_min) / 2;
4699 
4700       if (get_required_cycles (ops_num, width_mid) == cycles_best)
4701 	width = width_mid;
4702       else if (width_min < width_mid)
4703 	width_min = width_mid;
4704       else
4705 	break;
4706     }
4707 
4708   return width;
4709 }
4710 
4711 /* Recursively rewrite our linearized statements so that the operators
4712    match those in OPS[OPINDEX], putting the computation in rank
4713    order and trying to allow operations to be executed in
4714    parallel.  */
4715 
4716 static void
rewrite_expr_tree_parallel(gassign * stmt,int width,vec<operand_entry * > ops)4717 rewrite_expr_tree_parallel (gassign *stmt, int width,
4718 			    vec<operand_entry *> ops)
4719 {
4720   enum tree_code opcode = gimple_assign_rhs_code (stmt);
4721   int op_num = ops.length ();
4722   gcc_assert (op_num > 0);
4723   int stmt_num = op_num - 1;
4724   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4725   int op_index = op_num - 1;
4726   int stmt_index = 0;
4727   int ready_stmts_end = 0;
4728   int i = 0;
4729   gimple *stmt1 = NULL, *stmt2 = NULL;
4730   tree last_rhs1 = gimple_assign_rhs1 (stmt);
4731 
4732   /* We start expression rewriting from the top statements.
4733      So, in this loop we create a full list of statements
4734      we will work with.  */
4735   stmts[stmt_num - 1] = stmt;
4736   for (i = stmt_num - 2; i >= 0; i--)
4737     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4738 
4739   for (i = 0; i < stmt_num; i++)
4740     {
4741       tree op1, op2;
4742 
4743       /* Determine whether we should use results of
4744 	 already handled statements or not.  */
4745       if (ready_stmts_end == 0
4746 	  && (i - stmt_index >= width || op_index < 1))
4747 	ready_stmts_end = i;
4748 
4749       /* Now we choose operands for the next statement.  Non zero
4750 	 value in ready_stmts_end means here that we should use
4751 	 the result of already generated statements as new operand.  */
4752       if (ready_stmts_end > 0)
4753 	{
4754 	  op1 = gimple_assign_lhs (stmts[stmt_index++]);
4755 	  if (ready_stmts_end > stmt_index)
4756 	    op2 = gimple_assign_lhs (stmts[stmt_index++]);
4757 	  else if (op_index >= 0)
4758 	    {
4759 	      operand_entry *oe = ops[op_index--];
4760 	      stmt2 = oe->stmt_to_insert;
4761 	      op2 = oe->op;
4762 	    }
4763 	  else
4764 	    {
4765 	      gcc_assert (stmt_index < i);
4766 	      op2 = gimple_assign_lhs (stmts[stmt_index++]);
4767 	    }
4768 
4769 	  if (stmt_index >= ready_stmts_end)
4770 	    ready_stmts_end = 0;
4771 	}
4772       else
4773 	{
4774 	  if (op_index > 1)
4775 	    swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4776 	  operand_entry *oe2 = ops[op_index--];
4777 	  operand_entry *oe1 = ops[op_index--];
4778 	  op2 = oe2->op;
4779 	  stmt2 = oe2->stmt_to_insert;
4780 	  op1 = oe1->op;
4781 	  stmt1 = oe1->stmt_to_insert;
4782 	}
4783 
4784       /* If we emit the last statement then we should put
4785 	 operands into the last statement.  It will also
4786 	 break the loop.  */
4787       if (op_index < 0 && stmt_index == i)
4788 	i = stmt_num - 1;
4789 
4790       if (dump_file && (dump_flags & TDF_DETAILS))
4791 	{
4792 	  fprintf (dump_file, "Transforming ");
4793 	  print_gimple_stmt (dump_file, stmts[i], 0);
4794 	}
4795 
4796       /* If the stmt that defines operand has to be inserted, insert it
4797 	 before the use.  */
4798       if (stmt1)
4799 	insert_stmt_before_use (stmts[i], stmt1);
4800       if (stmt2)
4801 	insert_stmt_before_use (stmts[i], stmt2);
4802       stmt1 = stmt2 = NULL;
4803 
4804       /* We keep original statement only for the last one.  All
4805 	 others are recreated.  */
4806       if (i == stmt_num - 1)
4807 	{
4808 	  gimple_assign_set_rhs1 (stmts[i], op1);
4809 	  gimple_assign_set_rhs2 (stmts[i], op2);
4810 	  update_stmt (stmts[i]);
4811 	}
4812       else
4813 	{
4814 	  stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4815 	}
4816       if (dump_file && (dump_flags & TDF_DETAILS))
4817 	{
4818 	  fprintf (dump_file, " into ");
4819 	  print_gimple_stmt (dump_file, stmts[i], 0);
4820 	}
4821     }
4822 
4823   remove_visited_stmt_chain (last_rhs1);
4824 }
4825 
4826 /* Transform STMT, which is really (A +B) + (C + D) into the left
4827    linear form, ((A+B)+C)+D.
4828    Recurse on D if necessary.  */
4829 
4830 static void
linearize_expr(gimple * stmt)4831 linearize_expr (gimple *stmt)
4832 {
4833   gimple_stmt_iterator gsi;
4834   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4835   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4836   gimple *oldbinrhs = binrhs;
4837   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4838   gimple *newbinrhs = NULL;
4839   struct loop *loop = loop_containing_stmt (stmt);
4840   tree lhs = gimple_assign_lhs (stmt);
4841 
4842   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4843 	      && is_reassociable_op (binrhs, rhscode, loop));
4844 
4845   gsi = gsi_for_stmt (stmt);
4846 
4847   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4848   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4849 				gimple_assign_rhs_code (binrhs),
4850 				gimple_assign_lhs (binlhs),
4851 				gimple_assign_rhs2 (binrhs));
4852   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4853   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4854   gimple_set_uid (binrhs, gimple_uid (stmt));
4855 
4856   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4857     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4858 
4859   if (dump_file && (dump_flags & TDF_DETAILS))
4860     {
4861       fprintf (dump_file, "Linearized: ");
4862       print_gimple_stmt (dump_file, stmt, 0);
4863     }
4864 
4865   reassociate_stats.linearized++;
4866   update_stmt (stmt);
4867 
4868   gsi = gsi_for_stmt (oldbinrhs);
4869   reassoc_remove_stmt (&gsi);
4870   release_defs (oldbinrhs);
4871 
4872   gimple_set_visited (stmt, true);
4873   gimple_set_visited (binlhs, true);
4874   gimple_set_visited (binrhs, true);
4875 
4876   /* Tail recurse on the new rhs if it still needs reassociation.  */
4877   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4878     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4879 	   want to change the algorithm while converting to tuples.  */
4880     linearize_expr (stmt);
4881 }
4882 
4883 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4884    it.  Otherwise, return NULL.  */
4885 
4886 static gimple *
get_single_immediate_use(tree lhs)4887 get_single_immediate_use (tree lhs)
4888 {
4889   use_operand_p immuse;
4890   gimple *immusestmt;
4891 
4892   if (TREE_CODE (lhs) == SSA_NAME
4893       && single_imm_use (lhs, &immuse, &immusestmt)
4894       && is_gimple_assign (immusestmt))
4895     return immusestmt;
4896 
4897   return NULL;
4898 }
4899 
4900 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4901    representing the negated value.  Insertions of any necessary
4902    instructions go before GSI.
4903    This function is recursive in that, if you hand it "a_5" as the
4904    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4905    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
4906 
4907 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)4908 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4909 {
4910   gimple *negatedefstmt = NULL;
4911   tree resultofnegate;
4912   gimple_stmt_iterator gsi;
4913   unsigned int uid;
4914 
4915   /* If we are trying to negate a name, defined by an add, negate the
4916      add operands instead.  */
4917   if (TREE_CODE (tonegate) == SSA_NAME)
4918     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4919   if (TREE_CODE (tonegate) == SSA_NAME
4920       && is_gimple_assign (negatedefstmt)
4921       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4922       && has_single_use (gimple_assign_lhs (negatedefstmt))
4923       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4924     {
4925       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4926       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4927       tree lhs = gimple_assign_lhs (negatedefstmt);
4928       gimple *g;
4929 
4930       gsi = gsi_for_stmt (negatedefstmt);
4931       rhs1 = negate_value (rhs1, &gsi);
4932 
4933       gsi = gsi_for_stmt (negatedefstmt);
4934       rhs2 = negate_value (rhs2, &gsi);
4935 
4936       gsi = gsi_for_stmt (negatedefstmt);
4937       lhs = make_ssa_name (TREE_TYPE (lhs));
4938       gimple_set_visited (negatedefstmt, true);
4939       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4940       gimple_set_uid (g, gimple_uid (negatedefstmt));
4941       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4942       return lhs;
4943     }
4944 
4945   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4946   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4947 					     NULL_TREE, true, GSI_SAME_STMT);
4948   gsi = *gsip;
4949   uid = gimple_uid (gsi_stmt (gsi));
4950   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4951     {
4952       gimple *stmt = gsi_stmt (gsi);
4953       if (gimple_uid (stmt) != 0)
4954 	break;
4955       gimple_set_uid (stmt, uid);
4956     }
4957   return resultofnegate;
4958 }
4959 
4960 /* Return true if we should break up the subtract in STMT into an add
4961    with negate.  This is true when we the subtract operands are really
4962    adds, or the subtract itself is used in an add expression.  In
4963    either case, breaking up the subtract into an add with negate
4964    exposes the adds to reassociation.  */
4965 
4966 static bool
should_break_up_subtract(gimple * stmt)4967 should_break_up_subtract (gimple *stmt)
4968 {
4969   tree lhs = gimple_assign_lhs (stmt);
4970   tree binlhs = gimple_assign_rhs1 (stmt);
4971   tree binrhs = gimple_assign_rhs2 (stmt);
4972   gimple *immusestmt;
4973   struct loop *loop = loop_containing_stmt (stmt);
4974 
4975   if (TREE_CODE (binlhs) == SSA_NAME
4976       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4977     return true;
4978 
4979   if (TREE_CODE (binrhs) == SSA_NAME
4980       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4981     return true;
4982 
4983   if (TREE_CODE (lhs) == SSA_NAME
4984       && (immusestmt = get_single_immediate_use (lhs))
4985       && is_gimple_assign (immusestmt)
4986       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4987 	  || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
4988 	      && gimple_assign_rhs1 (immusestmt) == lhs)
4989 	  || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4990     return true;
4991   return false;
4992 }
4993 
4994 /* Transform STMT from A - B into A + -B.  */
4995 
4996 static void
break_up_subtract(gimple * stmt,gimple_stmt_iterator * gsip)4997 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4998 {
4999   tree rhs1 = gimple_assign_rhs1 (stmt);
5000   tree rhs2 = gimple_assign_rhs2 (stmt);
5001 
5002   if (dump_file && (dump_flags & TDF_DETAILS))
5003     {
5004       fprintf (dump_file, "Breaking up subtract ");
5005       print_gimple_stmt (dump_file, stmt, 0);
5006     }
5007 
5008   rhs2 = negate_value (rhs2, gsip);
5009   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5010   update_stmt (stmt);
5011 }
5012 
5013 /* Determine whether STMT is a builtin call that raises an SSA name
5014    to an integer power and has only one use.  If so, and this is early
5015    reassociation and unsafe math optimizations are permitted, place
5016    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5017    If any of these conditions does not hold, return FALSE.  */
5018 
5019 static bool
acceptable_pow_call(gcall * stmt,tree * base,HOST_WIDE_INT * exponent)5020 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5021 {
5022   tree arg1;
5023   REAL_VALUE_TYPE c, cint;
5024 
5025   switch (gimple_call_combined_fn (stmt))
5026     {
5027     CASE_CFN_POW:
5028       if (flag_errno_math)
5029 	return false;
5030 
5031       *base = gimple_call_arg (stmt, 0);
5032       arg1 = gimple_call_arg (stmt, 1);
5033 
5034       if (TREE_CODE (arg1) != REAL_CST)
5035 	return false;
5036 
5037       c = TREE_REAL_CST (arg1);
5038 
5039       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5040 	return false;
5041 
5042       *exponent = real_to_integer (&c);
5043       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5044       if (!real_identical (&c, &cint))
5045 	return false;
5046 
5047       break;
5048 
5049     CASE_CFN_POWI:
5050       *base = gimple_call_arg (stmt, 0);
5051       arg1 = gimple_call_arg (stmt, 1);
5052 
5053       if (!tree_fits_shwi_p (arg1))
5054 	return false;
5055 
5056       *exponent = tree_to_shwi (arg1);
5057       break;
5058 
5059     default:
5060       return false;
5061     }
5062 
5063   /* Expanding negative exponents is generally unproductive, so we don't
5064      complicate matters with those.  Exponents of zero and one should
5065      have been handled by expression folding.  */
5066   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5067     return false;
5068 
5069   return true;
5070 }
5071 
5072 /* Try to derive and add operand entry for OP to *OPS.  Return false if
5073    unsuccessful.  */
5074 
5075 static bool
try_special_add_to_ops(vec<operand_entry * > * ops,enum tree_code code,tree op,gimple * def_stmt)5076 try_special_add_to_ops (vec<operand_entry *> *ops,
5077 			enum tree_code code,
5078 			tree op, gimple* def_stmt)
5079 {
5080   tree base = NULL_TREE;
5081   HOST_WIDE_INT exponent = 0;
5082 
5083   if (TREE_CODE (op) != SSA_NAME
5084       || ! has_single_use (op))
5085     return false;
5086 
5087   if (code == MULT_EXPR
5088       && reassoc_insert_powi_p
5089       && flag_unsafe_math_optimizations
5090       && is_gimple_call (def_stmt)
5091       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5092     {
5093       add_repeat_to_ops_vec (ops, base, exponent);
5094       gimple_set_visited (def_stmt, true);
5095       return true;
5096     }
5097   else if (code == MULT_EXPR
5098 	   && is_gimple_assign (def_stmt)
5099 	   && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5100 	   && !HONOR_SNANS (TREE_TYPE (op))
5101 	   && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5102 	       || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5103     {
5104       tree rhs1 = gimple_assign_rhs1 (def_stmt);
5105       tree cst = build_minus_one_cst (TREE_TYPE (op));
5106       add_to_ops_vec (ops, rhs1);
5107       add_to_ops_vec (ops, cst);
5108       gimple_set_visited (def_stmt, true);
5109       return true;
5110     }
5111 
5112   return false;
5113 }
5114 
5115 /* Recursively linearize a binary expression that is the RHS of STMT.
5116    Place the operands of the expression tree in the vector named OPS.  */
5117 
5118 static void
linearize_expr_tree(vec<operand_entry * > * ops,gimple * stmt,bool is_associative,bool set_visited)5119 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5120 		     bool is_associative, bool set_visited)
5121 {
5122   tree binlhs = gimple_assign_rhs1 (stmt);
5123   tree binrhs = gimple_assign_rhs2 (stmt);
5124   gimple *binlhsdef = NULL, *binrhsdef = NULL;
5125   bool binlhsisreassoc = false;
5126   bool binrhsisreassoc = false;
5127   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5128   struct loop *loop = loop_containing_stmt (stmt);
5129 
5130   if (set_visited)
5131     gimple_set_visited (stmt, true);
5132 
5133   if (TREE_CODE (binlhs) == SSA_NAME)
5134     {
5135       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5136       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5137 			 && !stmt_could_throw_p (cfun, binlhsdef));
5138     }
5139 
5140   if (TREE_CODE (binrhs) == SSA_NAME)
5141     {
5142       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5143       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5144 			 && !stmt_could_throw_p (cfun, binrhsdef));
5145     }
5146 
5147   /* If the LHS is not reassociable, but the RHS is, we need to swap
5148      them.  If neither is reassociable, there is nothing we can do, so
5149      just put them in the ops vector.  If the LHS is reassociable,
5150      linearize it.  If both are reassociable, then linearize the RHS
5151      and the LHS.  */
5152 
5153   if (!binlhsisreassoc)
5154     {
5155       /* If this is not a associative operation like division, give up.  */
5156       if (!is_associative)
5157 	{
5158 	  add_to_ops_vec (ops, binrhs);
5159 	  return;
5160 	}
5161 
5162       if (!binrhsisreassoc)
5163 	{
5164 	  if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5165 	    add_to_ops_vec (ops, binrhs);
5166 
5167 	  if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5168 	    add_to_ops_vec (ops, binlhs);
5169 
5170 	  return;
5171 	}
5172 
5173       if (dump_file && (dump_flags & TDF_DETAILS))
5174 	{
5175 	  fprintf (dump_file, "swapping operands of ");
5176 	  print_gimple_stmt (dump_file, stmt, 0);
5177 	}
5178 
5179       swap_ssa_operands (stmt,
5180 			 gimple_assign_rhs1_ptr (stmt),
5181 			 gimple_assign_rhs2_ptr (stmt));
5182       update_stmt (stmt);
5183 
5184       if (dump_file && (dump_flags & TDF_DETAILS))
5185 	{
5186 	  fprintf (dump_file, " is now ");
5187 	  print_gimple_stmt (dump_file, stmt, 0);
5188 	}
5189 
5190       /* We want to make it so the lhs is always the reassociative op,
5191 	 so swap.  */
5192       std::swap (binlhs, binrhs);
5193     }
5194   else if (binrhsisreassoc)
5195     {
5196       linearize_expr (stmt);
5197       binlhs = gimple_assign_rhs1 (stmt);
5198       binrhs = gimple_assign_rhs2 (stmt);
5199     }
5200 
5201   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5202 	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5203 				      rhscode, loop));
5204   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5205 		       is_associative, set_visited);
5206 
5207   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5208     add_to_ops_vec (ops, binrhs);
5209 }
5210 
5211 /* Repropagate the negates back into subtracts, since no other pass
5212    currently does it.  */
5213 
5214 static void
repropagate_negates(void)5215 repropagate_negates (void)
5216 {
5217   unsigned int i = 0;
5218   tree negate;
5219 
5220   FOR_EACH_VEC_ELT (plus_negates, i, negate)
5221     {
5222       gimple *user = get_single_immediate_use (negate);
5223 
5224       if (!user || !is_gimple_assign (user))
5225 	continue;
5226 
5227       /* The negate operand can be either operand of a PLUS_EXPR
5228 	 (it can be the LHS if the RHS is a constant for example).
5229 
5230 	 Force the negate operand to the RHS of the PLUS_EXPR, then
5231 	 transform the PLUS_EXPR into a MINUS_EXPR.  */
5232       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5233 	{
5234 	  /* If the negated operand appears on the LHS of the
5235 	     PLUS_EXPR, exchange the operands of the PLUS_EXPR
5236 	     to force the negated operand to the RHS of the PLUS_EXPR.  */
5237 	  if (gimple_assign_rhs1 (user) == negate)
5238 	    {
5239 	      swap_ssa_operands (user,
5240 				 gimple_assign_rhs1_ptr (user),
5241 				 gimple_assign_rhs2_ptr (user));
5242 	    }
5243 
5244 	  /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5245 	     the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
5246 	  if (gimple_assign_rhs2 (user) == negate)
5247 	    {
5248 	      tree rhs1 = gimple_assign_rhs1 (user);
5249 	      tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5250 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
5251 	      gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5252 	      update_stmt (user);
5253 	    }
5254 	}
5255       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5256 	{
5257 	  if (gimple_assign_rhs1 (user) == negate)
5258 	    {
5259 	      /* We have
5260 	           x = -a
5261 		   y = x - b
5262 		 which we transform into
5263 		   x = a + b
5264 		   y = -x .
5265 		 This pushes down the negate which we possibly can merge
5266 		 into some other operation, hence insert it into the
5267 		 plus_negates vector.  */
5268 	      gimple *feed = SSA_NAME_DEF_STMT (negate);
5269 	      tree a = gimple_assign_rhs1 (feed);
5270 	      tree b = gimple_assign_rhs2 (user);
5271 	      gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5272 	      gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5273 	      tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5274 	      gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5275 	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5276 	      gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5277 	      user = gsi_stmt (gsi2);
5278 	      update_stmt (user);
5279 	      reassoc_remove_stmt (&gsi);
5280 	      release_defs (feed);
5281 	      plus_negates.safe_push (gimple_assign_lhs (user));
5282 	    }
5283 	  else
5284 	    {
5285 	      /* Transform "x = -a; y = b - x" into "y = b + a", getting
5286 	         rid of one operation.  */
5287 	      gimple *feed = SSA_NAME_DEF_STMT (negate);
5288 	      tree a = gimple_assign_rhs1 (feed);
5289 	      tree rhs1 = gimple_assign_rhs1 (user);
5290 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
5291 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5292 	      update_stmt (gsi_stmt (gsi));
5293 	    }
5294 	}
5295     }
5296 }
5297 
5298 /* Returns true if OP is of a type for which we can do reassociation.
5299    That is for integral or non-saturating fixed-point types, and for
5300    floating point type when associative-math is enabled.  */
5301 
5302 static bool
can_reassociate_p(tree op)5303 can_reassociate_p (tree op)
5304 {
5305   tree type = TREE_TYPE (op);
5306   if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5307     return false;
5308   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5309       || NON_SAT_FIXED_POINT_TYPE_P (type)
5310       || (flag_associative_math && FLOAT_TYPE_P (type)))
5311     return true;
5312   return false;
5313 }
5314 
5315 /* Break up subtract operations in block BB.
5316 
5317    We do this top down because we don't know whether the subtract is
5318    part of a possible chain of reassociation except at the top.
5319 
5320    IE given
5321    d = f + g
5322    c = a + e
5323    b = c - d
5324    q = b - r
5325    k = t - q
5326 
5327    we want to break up k = t - q, but we won't until we've transformed q
5328    = b - r, which won't be broken up until we transform b = c - d.
5329 
5330    En passant, clear the GIMPLE visited flag on every statement
5331    and set UIDs within each basic block.  */
5332 
5333 static void
break_up_subtract_bb(basic_block bb)5334 break_up_subtract_bb (basic_block bb)
5335 {
5336   gimple_stmt_iterator gsi;
5337   basic_block son;
5338   unsigned int uid = 1;
5339 
5340   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5341     {
5342       gimple *stmt = gsi_stmt (gsi);
5343       gimple_set_visited (stmt, false);
5344       gimple_set_uid (stmt, uid++);
5345 
5346       if (!is_gimple_assign (stmt)
5347 	  || !can_reassociate_p (gimple_assign_lhs (stmt)))
5348 	continue;
5349 
5350       /* Look for simple gimple subtract operations.  */
5351       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5352 	{
5353 	  if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5354 	      || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5355 	    continue;
5356 
5357 	  /* Check for a subtract used only in an addition.  If this
5358 	     is the case, transform it into add of a negate for better
5359 	     reassociation.  IE transform C = A-B into C = A + -B if C
5360 	     is only used in an addition.  */
5361 	  if (should_break_up_subtract (stmt))
5362 	    break_up_subtract (stmt, &gsi);
5363 	}
5364       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5365 	       && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5366 	plus_negates.safe_push (gimple_assign_lhs (stmt));
5367     }
5368   for (son = first_dom_son (CDI_DOMINATORS, bb);
5369        son;
5370        son = next_dom_son (CDI_DOMINATORS, son))
5371     break_up_subtract_bb (son);
5372 }
5373 
5374 /* Used for repeated factor analysis.  */
5375 struct repeat_factor
5376 {
5377   /* An SSA name that occurs in a multiply chain.  */
5378   tree factor;
5379 
5380   /* Cached rank of the factor.  */
5381   unsigned rank;
5382 
5383   /* Number of occurrences of the factor in the chain.  */
5384   HOST_WIDE_INT count;
5385 
5386   /* An SSA name representing the product of this factor and
5387      all factors appearing later in the repeated factor vector.  */
5388   tree repr;
5389 };
5390 
5391 
5392 static vec<repeat_factor> repeat_factor_vec;
5393 
5394 /* Used for sorting the repeat factor vector.  Sort primarily by
5395    ascending occurrence count, secondarily by descending rank.  */
5396 
5397 static int
compare_repeat_factors(const void * x1,const void * x2)5398 compare_repeat_factors (const void *x1, const void *x2)
5399 {
5400   const repeat_factor *rf1 = (const repeat_factor *) x1;
5401   const repeat_factor *rf2 = (const repeat_factor *) x2;
5402 
5403   if (rf1->count != rf2->count)
5404     return rf1->count - rf2->count;
5405 
5406   return rf2->rank - rf1->rank;
5407 }
5408 
5409 /* Look for repeated operands in OPS in the multiply tree rooted at
5410    STMT.  Replace them with an optimal sequence of multiplies and powi
5411    builtin calls, and remove the used operands from OPS.  Return an
5412    SSA name representing the value of the replacement sequence.  */
5413 
5414 static tree
attempt_builtin_powi(gimple * stmt,vec<operand_entry * > * ops)5415 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5416 {
5417   unsigned i, j, vec_len;
5418   int ii;
5419   operand_entry *oe;
5420   repeat_factor *rf1, *rf2;
5421   repeat_factor rfnew;
5422   tree result = NULL_TREE;
5423   tree target_ssa, iter_result;
5424   tree type = TREE_TYPE (gimple_get_lhs (stmt));
5425   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5426   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5427   gimple *mul_stmt, *pow_stmt;
5428 
5429   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5430      target.  */
5431   if (!powi_fndecl)
5432     return NULL_TREE;
5433 
5434   /* Allocate the repeated factor vector.  */
5435   repeat_factor_vec.create (10);
5436 
5437   /* Scan the OPS vector for all SSA names in the product and build
5438      up a vector of occurrence counts for each factor.  */
5439   FOR_EACH_VEC_ELT (*ops, i, oe)
5440     {
5441       if (TREE_CODE (oe->op) == SSA_NAME)
5442 	{
5443 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5444 	    {
5445 	      if (rf1->factor == oe->op)
5446 		{
5447 		  rf1->count += oe->count;
5448 		  break;
5449 		}
5450 	    }
5451 
5452 	  if (j >= repeat_factor_vec.length ())
5453 	    {
5454 	      rfnew.factor = oe->op;
5455 	      rfnew.rank = oe->rank;
5456 	      rfnew.count = oe->count;
5457 	      rfnew.repr = NULL_TREE;
5458 	      repeat_factor_vec.safe_push (rfnew);
5459 	    }
5460 	}
5461     }
5462 
5463   /* Sort the repeated factor vector by (a) increasing occurrence count,
5464      and (b) decreasing rank.  */
5465   repeat_factor_vec.qsort (compare_repeat_factors);
5466 
5467   /* It is generally best to combine as many base factors as possible
5468      into a product before applying __builtin_powi to the result.
5469      However, the sort order chosen for the repeated factor vector
5470      allows us to cache partial results for the product of the base
5471      factors for subsequent use.  When we already have a cached partial
5472      result from a previous iteration, it is best to make use of it
5473      before looking for another __builtin_pow opportunity.
5474 
5475      As an example, consider x * x * y * y * y * z * z * z * z.
5476      We want to first compose the product x * y * z, raise it to the
5477      second power, then multiply this by y * z, and finally multiply
5478      by z.  This can be done in 5 multiplies provided we cache y * z
5479      for use in both expressions:
5480 
5481         t1 = y * z
5482 	t2 = t1 * x
5483 	t3 = t2 * t2
5484 	t4 = t1 * t3
5485 	result = t4 * z
5486 
5487      If we instead ignored the cached y * z and first multiplied by
5488      the __builtin_pow opportunity z * z, we would get the inferior:
5489 
5490         t1 = y * z
5491 	t2 = t1 * x
5492 	t3 = t2 * t2
5493 	t4 = z * z
5494 	t5 = t3 * t4
5495         result = t5 * y  */
5496 
5497   vec_len = repeat_factor_vec.length ();
5498 
5499   /* Repeatedly look for opportunities to create a builtin_powi call.  */
5500   while (true)
5501     {
5502       HOST_WIDE_INT power;
5503 
5504       /* First look for the largest cached product of factors from
5505 	 preceding iterations.  If found, create a builtin_powi for
5506 	 it if the minimum occurrence count for its factors is at
5507 	 least 2, or just use this cached product as our next
5508 	 multiplicand if the minimum occurrence count is 1.  */
5509       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5510 	{
5511 	  if (rf1->repr && rf1->count > 0)
5512 	    break;
5513 	}
5514 
5515       if (j < vec_len)
5516 	{
5517 	  power = rf1->count;
5518 
5519 	  if (power == 1)
5520 	    {
5521 	      iter_result = rf1->repr;
5522 
5523 	      if (dump_file && (dump_flags & TDF_DETAILS))
5524 		{
5525 		  unsigned elt;
5526 		  repeat_factor *rf;
5527 		  fputs ("Multiplying by cached product ", dump_file);
5528 		  for (elt = j; elt < vec_len; elt++)
5529 		    {
5530 		      rf = &repeat_factor_vec[elt];
5531 		      print_generic_expr (dump_file, rf->factor);
5532 		      if (elt < vec_len - 1)
5533 			fputs (" * ", dump_file);
5534 		    }
5535 		  fputs ("\n", dump_file);
5536 		}
5537 	    }
5538 	  else
5539 	    {
5540 	      iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5541 	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5542 					    build_int_cst (integer_type_node,
5543 							   power));
5544 	      gimple_call_set_lhs (pow_stmt, iter_result);
5545 	      gimple_set_location (pow_stmt, gimple_location (stmt));
5546 	      gimple_set_uid (pow_stmt, gimple_uid (stmt));
5547 	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5548 
5549 	      if (dump_file && (dump_flags & TDF_DETAILS))
5550 		{
5551 		  unsigned elt;
5552 		  repeat_factor *rf;
5553 		  fputs ("Building __builtin_pow call for cached product (",
5554 			 dump_file);
5555 		  for (elt = j; elt < vec_len; elt++)
5556 		    {
5557 		      rf = &repeat_factor_vec[elt];
5558 		      print_generic_expr (dump_file, rf->factor);
5559 		      if (elt < vec_len - 1)
5560 			fputs (" * ", dump_file);
5561 		    }
5562 		  fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5563 			   power);
5564 		}
5565 	    }
5566 	}
5567       else
5568 	{
5569 	  /* Otherwise, find the first factor in the repeated factor
5570 	     vector whose occurrence count is at least 2.  If no such
5571 	     factor exists, there are no builtin_powi opportunities
5572 	     remaining.  */
5573 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5574 	    {
5575 	      if (rf1->count >= 2)
5576 		break;
5577 	    }
5578 
5579 	  if (j >= vec_len)
5580 	    break;
5581 
5582 	  power = rf1->count;
5583 
5584 	  if (dump_file && (dump_flags & TDF_DETAILS))
5585 	    {
5586 	      unsigned elt;
5587 	      repeat_factor *rf;
5588 	      fputs ("Building __builtin_pow call for (", dump_file);
5589 	      for (elt = j; elt < vec_len; elt++)
5590 		{
5591 		  rf = &repeat_factor_vec[elt];
5592 		  print_generic_expr (dump_file, rf->factor);
5593 		  if (elt < vec_len - 1)
5594 		    fputs (" * ", dump_file);
5595 		}
5596 	      fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5597 	    }
5598 
5599 	  reassociate_stats.pows_created++;
5600 
5601 	  /* Visit each element of the vector in reverse order (so that
5602 	     high-occurrence elements are visited first, and within the
5603 	     same occurrence count, lower-ranked elements are visited
5604 	     first).  Form a linear product of all elements in this order
5605 	     whose occurrencce count is at least that of element J.
5606 	     Record the SSA name representing the product of each element
5607 	     with all subsequent elements in the vector.  */
5608 	  if (j == vec_len - 1)
5609 	    rf1->repr = rf1->factor;
5610 	  else
5611 	    {
5612 	      for (ii = vec_len - 2; ii >= (int)j; ii--)
5613 		{
5614 		  tree op1, op2;
5615 
5616 		  rf1 = &repeat_factor_vec[ii];
5617 		  rf2 = &repeat_factor_vec[ii + 1];
5618 
5619 		  /* Init the last factor's representative to be itself.  */
5620 		  if (!rf2->repr)
5621 		    rf2->repr = rf2->factor;
5622 
5623 		  op1 = rf1->factor;
5624 		  op2 = rf2->repr;
5625 
5626 		  target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5627 		  mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5628 						  op1, op2);
5629 		  gimple_set_location (mul_stmt, gimple_location (stmt));
5630 		  gimple_set_uid (mul_stmt, gimple_uid (stmt));
5631 		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5632 		  rf1->repr = target_ssa;
5633 
5634 		  /* Don't reprocess the multiply we just introduced.  */
5635 		  gimple_set_visited (mul_stmt, true);
5636 		}
5637 	    }
5638 
5639 	  /* Form a call to __builtin_powi for the maximum product
5640 	     just formed, raised to the power obtained earlier.  */
5641 	  rf1 = &repeat_factor_vec[j];
5642 	  iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5643 	  pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5644 					build_int_cst (integer_type_node,
5645 						       power));
5646 	  gimple_call_set_lhs (pow_stmt, iter_result);
5647 	  gimple_set_location (pow_stmt, gimple_location (stmt));
5648 	  gimple_set_uid (pow_stmt, gimple_uid (stmt));
5649 	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5650 	}
5651 
5652       /* If we previously formed at least one other builtin_powi call,
5653 	 form the product of this one and those others.  */
5654       if (result)
5655 	{
5656 	  tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5657 	  mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5658 					  result, iter_result);
5659 	  gimple_set_location (mul_stmt, gimple_location (stmt));
5660 	  gimple_set_uid (mul_stmt, gimple_uid (stmt));
5661 	  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5662 	  gimple_set_visited (mul_stmt, true);
5663 	  result = new_result;
5664 	}
5665       else
5666 	result = iter_result;
5667 
5668       /* Decrement the occurrence count of each element in the product
5669 	 by the count found above, and remove this many copies of each
5670 	 factor from OPS.  */
5671       for (i = j; i < vec_len; i++)
5672 	{
5673 	  unsigned k = power;
5674 	  unsigned n;
5675 
5676 	  rf1 = &repeat_factor_vec[i];
5677 	  rf1->count -= power;
5678 
5679 	  FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5680 	    {
5681 	      if (oe->op == rf1->factor)
5682 		{
5683 		  if (oe->count <= k)
5684 		    {
5685 		      ops->ordered_remove (n);
5686 		      k -= oe->count;
5687 
5688 		      if (k == 0)
5689 			break;
5690 		    }
5691 		  else
5692 		    {
5693 		      oe->count -= k;
5694 		      break;
5695 		    }
5696 		}
5697 	    }
5698 	}
5699     }
5700 
5701   /* At this point all elements in the repeated factor vector have a
5702      remaining occurrence count of 0 or 1, and those with a count of 1
5703      don't have cached representatives.  Re-sort the ops vector and
5704      clean up.  */
5705   ops->qsort (sort_by_operand_rank);
5706   repeat_factor_vec.release ();
5707 
5708   /* Return the final product computed herein.  Note that there may
5709      still be some elements with single occurrence count left in OPS;
5710      those will be handled by the normal reassociation logic.  */
5711   return result;
5712 }
5713 
5714 /* Attempt to optimize
5715    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5716    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
5717 
5718 static void
attempt_builtin_copysign(vec<operand_entry * > * ops)5719 attempt_builtin_copysign (vec<operand_entry *> *ops)
5720 {
5721   operand_entry *oe;
5722   unsigned int i;
5723   unsigned int length = ops->length ();
5724   tree cst = ops->last ()->op;
5725 
5726   if (length == 1 || TREE_CODE (cst) != REAL_CST)
5727     return;
5728 
5729   FOR_EACH_VEC_ELT (*ops, i, oe)
5730     {
5731       if (TREE_CODE (oe->op) == SSA_NAME
5732 	  && has_single_use (oe->op))
5733 	{
5734 	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5735 	  if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5736 	    {
5737 	      tree arg0, arg1;
5738 	      switch (gimple_call_combined_fn (old_call))
5739 		{
5740 		CASE_CFN_COPYSIGN:
5741 		CASE_CFN_COPYSIGN_FN:
5742 		  arg0 = gimple_call_arg (old_call, 0);
5743 		  arg1 = gimple_call_arg (old_call, 1);
5744 		  /* The first argument of copysign must be a constant,
5745 		     otherwise there's nothing to do.  */
5746 		  if (TREE_CODE (arg0) == REAL_CST)
5747 		    {
5748 		      tree type = TREE_TYPE (arg0);
5749 		      tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5750 		      /* If we couldn't fold to a single constant, skip it.
5751 			 That happens e.g. for inexact multiplication when
5752 			 -frounding-math.  */
5753 		      if (mul == NULL_TREE)
5754 			break;
5755 		      /* Instead of adjusting OLD_CALL, let's build a new
5756 			 call to not leak the LHS and prevent keeping bogus
5757 			 debug statements.  DCE will clean up the old call.  */
5758 		      gcall *new_call;
5759 		      if (gimple_call_internal_p (old_call))
5760 			new_call = gimple_build_call_internal
5761 			  (IFN_COPYSIGN, 2, mul, arg1);
5762 		      else
5763 			new_call = gimple_build_call
5764 			  (gimple_call_fndecl (old_call), 2, mul, arg1);
5765 		      tree lhs = make_ssa_name (type);
5766 		      gimple_call_set_lhs (new_call, lhs);
5767 		      gimple_set_location (new_call,
5768 					   gimple_location (old_call));
5769 		      insert_stmt_after (new_call, old_call);
5770 		      /* We've used the constant, get rid of it.  */
5771 		      ops->pop ();
5772 		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5773 		      /* Handle the CST1 < 0 case by negating the result.  */
5774 		      if (cst1_neg)
5775 			{
5776 			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5777 			  gimple *negate_stmt
5778 			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5779 			  insert_stmt_after (negate_stmt, new_call);
5780 			  oe->op = negrhs;
5781 			}
5782 		      else
5783 			oe->op = lhs;
5784 		      if (dump_file && (dump_flags & TDF_DETAILS))
5785 			{
5786 			  fprintf (dump_file, "Optimizing copysign: ");
5787 			  print_generic_expr (dump_file, cst);
5788 			  fprintf (dump_file, " * COPYSIGN (");
5789 			  print_generic_expr (dump_file, arg0);
5790 			  fprintf (dump_file, ", ");
5791 			  print_generic_expr (dump_file, arg1);
5792 			  fprintf (dump_file, ") into %sCOPYSIGN (",
5793 				   cst1_neg ? "-" : "");
5794 			  print_generic_expr (dump_file, mul);
5795 			  fprintf (dump_file, ", ");
5796 			  print_generic_expr (dump_file, arg1);
5797 			  fprintf (dump_file, "\n");
5798 			}
5799 		      return;
5800 		    }
5801 		  break;
5802 		default:
5803 		  break;
5804 		}
5805 	    }
5806 	}
5807     }
5808 }
5809 
5810 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
5811 
5812 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple * stmt,tree new_rhs)5813 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5814 {
5815   tree rhs1;
5816 
5817   if (dump_file && (dump_flags & TDF_DETAILS))
5818     {
5819       fprintf (dump_file, "Transforming ");
5820       print_gimple_stmt (dump_file, stmt, 0);
5821     }
5822 
5823   rhs1 = gimple_assign_rhs1 (stmt);
5824   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5825   update_stmt (stmt);
5826   remove_visited_stmt_chain (rhs1);
5827 
5828   if (dump_file && (dump_flags & TDF_DETAILS))
5829     {
5830       fprintf (dump_file, " into ");
5831       print_gimple_stmt (dump_file, stmt, 0);
5832     }
5833 }
5834 
5835 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
5836 
5837 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple * stmt,tree rhs1,tree rhs2)5838 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5839 			    tree rhs1, tree rhs2)
5840 {
5841   if (dump_file && (dump_flags & TDF_DETAILS))
5842     {
5843       fprintf (dump_file, "Transforming ");
5844       print_gimple_stmt (dump_file, stmt, 0);
5845     }
5846 
5847   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5848   update_stmt (gsi_stmt (*gsi));
5849   remove_visited_stmt_chain (rhs1);
5850 
5851   if (dump_file && (dump_flags & TDF_DETAILS))
5852     {
5853       fprintf (dump_file, " into ");
5854       print_gimple_stmt (dump_file, stmt, 0);
5855     }
5856 }
5857 
5858 /* Reassociate expressions in basic block BB and its post-dominator as
5859    children.
5860 
5861    Bubble up return status from maybe_optimize_range_tests.  */
5862 
5863 static bool
reassociate_bb(basic_block bb)5864 reassociate_bb (basic_block bb)
5865 {
5866   gimple_stmt_iterator gsi;
5867   basic_block son;
5868   gimple *stmt = last_stmt (bb);
5869   bool cfg_cleanup_needed = false;
5870 
5871   if (stmt && !gimple_visited_p (stmt))
5872     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5873 
5874   for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5875     {
5876       stmt = gsi_stmt (gsi);
5877 
5878       if (is_gimple_assign (stmt)
5879 	  && !stmt_could_throw_p (cfun, stmt))
5880 	{
5881 	  tree lhs, rhs1, rhs2;
5882 	  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5883 
5884 	  /* If this is not a gimple binary expression, there is
5885 	     nothing for us to do with it.  */
5886 	  if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5887 	    continue;
5888 
5889 	  /* If this was part of an already processed statement,
5890 	     we don't need to touch it again. */
5891 	  if (gimple_visited_p (stmt))
5892 	    {
5893 	      /* This statement might have become dead because of previous
5894 		 reassociations.  */
5895 	      if (has_zero_uses (gimple_get_lhs (stmt)))
5896 		{
5897 		  reassoc_remove_stmt (&gsi);
5898 		  release_defs (stmt);
5899 		  /* We might end up removing the last stmt above which
5900 		     places the iterator to the end of the sequence.
5901 		     Reset it to the last stmt in this case which might
5902 		     be the end of the sequence as well if we removed
5903 		     the last statement of the sequence.  In which case
5904 		     we need to bail out.  */
5905 		  if (gsi_end_p (gsi))
5906 		    {
5907 		      gsi = gsi_last_bb (bb);
5908 		      if (gsi_end_p (gsi))
5909 			break;
5910 		    }
5911 		}
5912 	      continue;
5913 	    }
5914 
5915 	  lhs = gimple_assign_lhs (stmt);
5916 	  rhs1 = gimple_assign_rhs1 (stmt);
5917 	  rhs2 = gimple_assign_rhs2 (stmt);
5918 
5919 	  /* For non-bit or min/max operations we can't associate
5920 	     all types.  Verify that here.  */
5921 	  if (rhs_code != BIT_IOR_EXPR
5922 	      && rhs_code != BIT_AND_EXPR
5923 	      && rhs_code != BIT_XOR_EXPR
5924 	      && rhs_code != MIN_EXPR
5925 	      && rhs_code != MAX_EXPR
5926 	      && (!can_reassociate_p (lhs)
5927 		  || !can_reassociate_p (rhs1)
5928 		  || !can_reassociate_p (rhs2)))
5929 	    continue;
5930 
5931 	  if (associative_tree_code (rhs_code))
5932 	    {
5933 	      auto_vec<operand_entry *> ops;
5934 	      tree powi_result = NULL_TREE;
5935 	      bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5936 
5937 	      /* There may be no immediate uses left by the time we
5938 		 get here because we may have eliminated them all.  */
5939 	      if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5940 		continue;
5941 
5942 	      gimple_set_visited (stmt, true);
5943 	      linearize_expr_tree (&ops, stmt, true, true);
5944 	      ops.qsort (sort_by_operand_rank);
5945 	      int orig_len = ops.length ();
5946 	      optimize_ops_list (rhs_code, &ops);
5947 	      if (undistribute_ops_list (rhs_code, &ops,
5948 					 loop_containing_stmt (stmt)))
5949 		{
5950 		  ops.qsort (sort_by_operand_rank);
5951 		  optimize_ops_list (rhs_code, &ops);
5952 		}
5953 
5954 	      if (rhs_code == PLUS_EXPR
5955 		  && transform_add_to_multiply (&ops))
5956 		ops.qsort (sort_by_operand_rank);
5957 
5958 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5959 		{
5960 		  if (is_vector)
5961 		    optimize_vec_cond_expr (rhs_code, &ops);
5962 		  else
5963 		    optimize_range_tests (rhs_code, &ops, NULL);
5964 	        }
5965 
5966 	      if (rhs_code == MULT_EXPR && !is_vector)
5967 	        {
5968 		  attempt_builtin_copysign (&ops);
5969 
5970 		  if (reassoc_insert_powi_p
5971 		      && flag_unsafe_math_optimizations)
5972 		    powi_result = attempt_builtin_powi (stmt, &ops);
5973 		}
5974 
5975 	      operand_entry *last;
5976 	      bool negate_result = false;
5977 	      if (ops.length () > 1
5978 		  && rhs_code == MULT_EXPR)
5979 		{
5980 		  last = ops.last ();
5981 		  if ((integer_minus_onep (last->op)
5982 		       || real_minus_onep (last->op))
5983 		      && !HONOR_SNANS (TREE_TYPE (lhs))
5984 		      && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5985 			  || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5986 		    {
5987 		      ops.pop ();
5988 		      negate_result = true;
5989 		    }
5990 		}
5991 
5992 	      tree new_lhs = lhs;
5993 	      /* If the operand vector is now empty, all operands were
5994 		 consumed by the __builtin_powi optimization.  */
5995 	      if (ops.length () == 0)
5996 		transform_stmt_to_copy (&gsi, stmt, powi_result);
5997 	      else if (ops.length () == 1)
5998 		{
5999 		  tree last_op = ops.last ()->op;
6000 
6001 		  /* If the stmt that defines operand has to be inserted, insert it
6002 		     before the use.  */
6003 		  if (ops.last ()->stmt_to_insert)
6004 		    insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6005 		  if (powi_result)
6006 		    transform_stmt_to_multiply (&gsi, stmt, last_op,
6007 						powi_result);
6008 		  else
6009 		    transform_stmt_to_copy (&gsi, stmt, last_op);
6010 		}
6011 	      else
6012 		{
6013 		  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6014 		  int ops_num = ops.length ();
6015 		  int width = get_reassociation_width (ops_num, rhs_code, mode);
6016 
6017 		  if (dump_file && (dump_flags & TDF_DETAILS))
6018 		    fprintf (dump_file,
6019 			     "Width = %d was chosen for reassociation\n", width);
6020 
6021 
6022 		  /* For binary bit operations, if there are at least 3
6023 		     operands and the last last operand in OPS is a constant,
6024 		     move it to the front.  This helps ensure that we generate
6025 		     (X & Y) & C rather than (X & C) & Y.  The former will
6026 		     often match a canonical bit test when we get to RTL.  */
6027 		  if (ops.length () > 2
6028 		      && (rhs_code == BIT_AND_EXPR
6029 		          || rhs_code == BIT_IOR_EXPR
6030 		          || rhs_code == BIT_XOR_EXPR)
6031 		      && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6032 		    std::swap (*ops[0], *ops[ops_num - 1]);
6033 
6034 		  if (width > 1
6035 		      && ops.length () > 3)
6036 		    rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6037 						width, ops);
6038 		  else
6039                     {
6040                       /* When there are three operands left, we want
6041                          to make sure the ones that get the double
6042                          binary op are chosen wisely.  */
6043                       int len = ops.length ();
6044                       if (len >= 3)
6045                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
6046 
6047 		      new_lhs = rewrite_expr_tree (stmt, 0, ops,
6048 						   powi_result != NULL
6049 						   || negate_result,
6050 						   len != orig_len);
6051                     }
6052 
6053 		  /* If we combined some repeated factors into a
6054 		     __builtin_powi call, multiply that result by the
6055 		     reassociated operands.  */
6056 		  if (powi_result)
6057 		    {
6058 		      gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6059 		      tree type = TREE_TYPE (lhs);
6060 		      tree target_ssa = make_temp_ssa_name (type, NULL,
6061 							    "reassocpow");
6062 		      gimple_set_lhs (lhs_stmt, target_ssa);
6063 		      update_stmt (lhs_stmt);
6064 		      if (lhs != new_lhs)
6065 			{
6066 			  target_ssa = new_lhs;
6067 			  new_lhs = lhs;
6068 			}
6069 		      mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6070 						      powi_result, target_ssa);
6071 		      gimple_set_location (mul_stmt, gimple_location (stmt));
6072 		      gimple_set_uid (mul_stmt, gimple_uid (stmt));
6073 		      gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6074 		    }
6075 		}
6076 
6077 	      if (negate_result)
6078 		{
6079 		  stmt = SSA_NAME_DEF_STMT (lhs);
6080 		  tree tmp = make_ssa_name (TREE_TYPE (lhs));
6081 		  gimple_set_lhs (stmt, tmp);
6082 		  if (lhs != new_lhs)
6083 		    tmp = new_lhs;
6084 		  gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6085 							   tmp);
6086 		  gimple_set_uid (neg_stmt, gimple_uid (stmt));
6087 		  gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6088 		  update_stmt (stmt);
6089 		}
6090 	    }
6091 	}
6092     }
6093   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6094        son;
6095        son = next_dom_son (CDI_POST_DOMINATORS, son))
6096     cfg_cleanup_needed |= reassociate_bb (son);
6097 
6098   return cfg_cleanup_needed;
6099 }
6100 
6101 /* Add jumps around shifts for range tests turned into bit tests.
6102    For each SSA_NAME VAR we have code like:
6103    VAR = ...; // final stmt of range comparison
6104    // bit test here...;
6105    OTHERVAR = ...; // final stmt of the bit test sequence
6106    RES = VAR | OTHERVAR;
6107    Turn the above into:
6108    VAR = ...;
6109    if (VAR != 0)
6110      goto <l3>;
6111    else
6112      goto <l2>;
6113    <l2>:
6114    // bit test here...;
6115    OTHERVAR = ...;
6116    <l3>:
6117    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
6118 
6119 static void
branch_fixup(void)6120 branch_fixup (void)
6121 {
6122   tree var;
6123   unsigned int i;
6124 
6125   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6126     {
6127       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6128       gimple *use_stmt;
6129       use_operand_p use;
6130       bool ok = single_imm_use (var, &use, &use_stmt);
6131       gcc_assert (ok
6132 		  && is_gimple_assign (use_stmt)
6133 		  && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6134 		  && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6135 
6136       basic_block cond_bb = gimple_bb (def_stmt);
6137       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6138       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6139 
6140       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6141       gimple *g = gimple_build_cond (NE_EXPR, var,
6142 				     build_zero_cst (TREE_TYPE (var)),
6143 				     NULL_TREE, NULL_TREE);
6144       location_t loc = gimple_location (use_stmt);
6145       gimple_set_location (g, loc);
6146       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6147 
6148       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6149       etrue->probability = profile_probability::even ();
6150       edge efalse = find_edge (cond_bb, then_bb);
6151       efalse->flags = EDGE_FALSE_VALUE;
6152       efalse->probability -= etrue->probability;
6153       then_bb->count -= etrue->count ();
6154 
6155       tree othervar = NULL_TREE;
6156       if (gimple_assign_rhs1 (use_stmt) == var)
6157 	othervar = gimple_assign_rhs2 (use_stmt);
6158       else if (gimple_assign_rhs2 (use_stmt) == var)
6159 	othervar = gimple_assign_rhs1 (use_stmt);
6160       else
6161 	gcc_unreachable ();
6162       tree lhs = gimple_assign_lhs (use_stmt);
6163       gphi *phi = create_phi_node (lhs, merge_bb);
6164       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6165       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6166       gsi = gsi_for_stmt (use_stmt);
6167       gsi_remove (&gsi, true);
6168 
6169       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6170       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6171     }
6172   reassoc_branch_fixups.release ();
6173 }
6174 
6175 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6176 void debug_ops_vector (vec<operand_entry *> ops);
6177 
6178 /* Dump the operand entry vector OPS to FILE.  */
6179 
6180 void
dump_ops_vector(FILE * file,vec<operand_entry * > ops)6181 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6182 {
6183   operand_entry *oe;
6184   unsigned int i;
6185 
6186   FOR_EACH_VEC_ELT (ops, i, oe)
6187     {
6188       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6189       print_generic_expr (file, oe->op);
6190       fprintf (file, "\n");
6191     }
6192 }
6193 
6194 /* Dump the operand entry vector OPS to STDERR.  */
6195 
6196 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry * > ops)6197 debug_ops_vector (vec<operand_entry *> ops)
6198 {
6199   dump_ops_vector (stderr, ops);
6200 }
6201 
6202 /* Bubble up return status from reassociate_bb.  */
6203 
6204 static bool
do_reassoc(void)6205 do_reassoc (void)
6206 {
6207   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6208   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6209 }
6210 
6211 /* Initialize the reassociation pass.  */
6212 
6213 static void
init_reassoc(void)6214 init_reassoc (void)
6215 {
6216   int i;
6217   long rank = 2;
6218   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6219 
6220   /* Find the loops, so that we can prevent moving calculations in
6221      them.  */
6222   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6223 
6224   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6225 
6226   next_operand_entry_id = 0;
6227 
6228   /* Reverse RPO (Reverse Post Order) will give us something where
6229      deeper loops come later.  */
6230   pre_and_rev_post_order_compute (NULL, bbs, false);
6231   bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
6232   operand_rank = new hash_map<tree, long>;
6233 
6234   /* Give each default definition a distinct rank.  This includes
6235      parameters and the static chain.  Walk backwards over all
6236      SSA names so that we get proper rank ordering according
6237      to tree_swap_operands_p.  */
6238   for (i = num_ssa_names - 1; i > 0; --i)
6239     {
6240       tree name = ssa_name (i);
6241       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6242 	insert_operand_rank (name, ++rank);
6243     }
6244 
6245   /* Set up rank for each BB  */
6246   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6247     bb_rank[bbs[i]] = ++rank << 16;
6248 
6249   free (bbs);
6250   calculate_dominance_info (CDI_POST_DOMINATORS);
6251   plus_negates = vNULL;
6252 }
6253 
6254 /* Cleanup after the reassociation pass, and print stats if
6255    requested.  */
6256 
6257 static void
fini_reassoc(void)6258 fini_reassoc (void)
6259 {
6260   statistics_counter_event (cfun, "Linearized",
6261 			    reassociate_stats.linearized);
6262   statistics_counter_event (cfun, "Constants eliminated",
6263 			    reassociate_stats.constants_eliminated);
6264   statistics_counter_event (cfun, "Ops eliminated",
6265 			    reassociate_stats.ops_eliminated);
6266   statistics_counter_event (cfun, "Statements rewritten",
6267 			    reassociate_stats.rewritten);
6268   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6269 			    reassociate_stats.pows_encountered);
6270   statistics_counter_event (cfun, "Built-in powi calls created",
6271 			    reassociate_stats.pows_created);
6272 
6273   delete operand_rank;
6274   operand_entry_pool.release ();
6275   free (bb_rank);
6276   plus_negates.release ();
6277   free_dominance_info (CDI_POST_DOMINATORS);
6278   loop_optimizer_finalize ();
6279 }
6280 
6281 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
6282    insertion of __builtin_powi calls.
6283 
6284    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6285    optimization of a gimple conditional.  Otherwise returns zero.  */
6286 
6287 static unsigned int
execute_reassoc(bool insert_powi_p)6288 execute_reassoc (bool insert_powi_p)
6289 {
6290   reassoc_insert_powi_p = insert_powi_p;
6291 
6292   init_reassoc ();
6293 
6294   bool cfg_cleanup_needed;
6295   cfg_cleanup_needed = do_reassoc ();
6296   repropagate_negates ();
6297   branch_fixup ();
6298 
6299   fini_reassoc ();
6300   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6301 }
6302 
6303 namespace {
6304 
6305 const pass_data pass_data_reassoc =
6306 {
6307   GIMPLE_PASS, /* type */
6308   "reassoc", /* name */
6309   OPTGROUP_NONE, /* optinfo_flags */
6310   TV_TREE_REASSOC, /* tv_id */
6311   ( PROP_cfg | PROP_ssa ), /* properties_required */
6312   0, /* properties_provided */
6313   0, /* properties_destroyed */
6314   0, /* todo_flags_start */
6315   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6316 };
6317 
6318 class pass_reassoc : public gimple_opt_pass
6319 {
6320 public:
pass_reassoc(gcc::context * ctxt)6321   pass_reassoc (gcc::context *ctxt)
6322     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6323   {}
6324 
6325   /* opt_pass methods: */
clone()6326   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
set_pass_param(unsigned int n,bool param)6327   void set_pass_param (unsigned int n, bool param)
6328     {
6329       gcc_assert (n == 0);
6330       insert_powi_p = param;
6331     }
gate(function *)6332   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
execute(function *)6333   virtual unsigned int execute (function *)
6334     { return execute_reassoc (insert_powi_p); }
6335 
6336  private:
6337   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
6338      point 3a in the pass header comment.  */
6339   bool insert_powi_p;
6340 }; // class pass_reassoc
6341 
6342 } // anon namespace
6343 
6344 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)6345 make_pass_reassoc (gcc::context *ctxt)
6346 {
6347   return new pass_reassoc (ctxt);
6348 }
6349