1 /* Reassociation for trees.
2    Copyright (C) 2005-2020 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dan@dberlin.org>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11 
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-fold.h"
42 #include "tree-eh.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 
55 /*  This is a simple global reassociation pass.  It is, in part, based
56     on the LLVM pass of the same name (They do some things more/less
57     than we do, in different orders, etc).
58 
59     It consists of five steps:
60 
61     1. Breaking up subtract operations into addition + negate, where
62     it would promote the reassociation of adds.
63 
64     2. Left linearization of the expression trees, so that (A+B)+(C+D)
65     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66     During linearization, we place the operands of the binary
67     expressions into a vector of operand_entry_*
68 
69     3. Optimization of the operand lists, eliminating things like a +
70     -a, a & a, etc.
71 
72     3a. Combine repeated factors with the same occurrence counts
73     into a __builtin_powi call that will later be optimized into
74     an optimal number of multiplies.
75 
76     4. Rewrite the expression trees we linearized and optimized so
77     they are in proper rank order.
78 
79     5. Repropagate negates, as nothing else will clean it up ATM.
80 
81     A bit of theory on #4, since nobody seems to write anything down
82     about why it makes sense to do it the way they do it:
83 
84     We could do this much nicer theoretically, but don't (for reasons
85     explained after how to do it theoretically nice :P).
86 
87     In order to promote the most redundancy elimination, you want
88     binary expressions whose operands are the same rank (or
89     preferably, the same value) exposed to the redundancy eliminator,
90     for possible elimination.
91 
92     So the way to do this if we really cared, is to build the new op
93     tree from the leaves to the roots, merging as you go, and putting the
94     new op on the end of the worklist, until you are left with one
95     thing on the worklist.
96 
97     IE if you have to rewrite the following set of operands (listed with
98     rank in parentheses), with opcode PLUS_EXPR:
99 
100     a (1),  b (1),  c (1),  d (2), e (2)
101 
102 
103     We start with our merge worklist empty, and the ops list with all of
104     those on it.
105 
106     You want to first merge all leaves of the same rank, as much as
107     possible.
108 
109     So first build a binary op of
110 
111     mergetmp = a + b, and put "mergetmp" on the merge worklist.
112 
113     Because there is no three operand form of PLUS_EXPR, c is not going to
114     be exposed to redundancy elimination as a rank 1 operand.
115 
116     So you might as well throw it on the merge worklist (you could also
117     consider it to now be a rank two operand, and merge it with d and e,
118     but in this case, you then have evicted e from a binary op. So at
119     least in this situation, you can't win.)
120 
121     Then build a binary op of d + e
122     mergetmp2 = d + e
123 
124     and put mergetmp2 on the merge worklist.
125 
126     so merge worklist = {mergetmp, c, mergetmp2}
127 
128     Continue building binary ops of these operations until you have only
129     one operation left on the worklist.
130 
131     So we have
132 
133     build binary op
134     mergetmp3 = mergetmp + c
135 
136     worklist = {mergetmp2, mergetmp3}
137 
138     mergetmp4 = mergetmp2 + mergetmp3
139 
140     worklist = {mergetmp4}
141 
142     because we have one operation left, we can now just set the original
143     statement equal to the result of that operation.
144 
145     This will at least expose a + b  and d + e to redundancy elimination
146     as binary operations.
147 
148     For extra points, you can reuse the old statements to build the
149     mergetmps, since you shouldn't run out.
150 
151     So why don't we do this?
152 
153     Because it's expensive, and rarely will help.  Most trees we are
154     reassociating have 3 or less ops.  If they have 2 ops, they already
155     will be written into a nice single binary op.  If you have 3 ops, a
156     single simple check suffices to tell you whether the first two are of the
157     same rank.  If so, you know to order it
158 
159     mergetmp = op1 + op2
160     newstmt = mergetmp + op3
161 
162     instead of
163     mergetmp = op2 + op3
164     newstmt = mergetmp + op1
165 
166     If all three are of the same rank, you can't expose them all in a
167     single binary operator anyway, so the above is *still* the best you
168     can do.
169 
170     Thus, this is what we do.  When we have three ops left, we check to see
171     what order to put them in, and call it a day.  As a nod to vector sum
172     reduction, we check if any of the ops are really a phi node that is a
173     destructive update for the associating op, and keep the destructive
174     update together for vector sum reduction recognition.  */
175 
176 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
177    point 3a in the pass header comment.  */
178 static bool reassoc_insert_powi_p;
179 
180 /* Statistics */
181 static struct
182 {
183   int linearized;
184   int constants_eliminated;
185   int ops_eliminated;
186   int rewritten;
187   int pows_encountered;
188   int pows_created;
189 } reassociate_stats;
190 
191 /* Operator, rank pair.  */
192 struct operand_entry
193 {
194   unsigned int rank;
195   unsigned int id;
196   tree op;
197   unsigned int count;
198   gimple *stmt_to_insert;
199 };
200 
201 static object_allocator<operand_entry> operand_entry_pool
202   ("operand entry pool");
203 
204 /* This is used to assign a unique ID to each struct operand_entry
205    so that qsort results are identical on different hosts.  */
206 static unsigned int next_operand_entry_id;
207 
208 /* Starting rank number for a given basic block, so that we can rank
209    operations using unmovable instructions in that BB based on the bb
210    depth.  */
211 static int64_t *bb_rank;
212 
213 /* Operand->rank hashtable.  */
214 static hash_map<tree, int64_t> *operand_rank;
215 
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217    all basic blocks the CFG should be adjusted - basic blocks
218    split right after that SSA_NAME's definition statement and before
219    the only use, which must be a bit ior.  */
220 static vec<tree> reassoc_branch_fixups;
221 
222 /* Forward decls.  */
223 static int64_t get_rank (tree);
224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
225 
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227    possibly added by gsi_remove.  */
228 
229 bool
reassoc_remove_stmt(gimple_stmt_iterator * gsi)230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
231 {
232   gimple *stmt = gsi_stmt (*gsi);
233 
234   if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
235     return gsi_remove (gsi, true);
236 
237   gimple_stmt_iterator prev = *gsi;
238   gsi_prev (&prev);
239   unsigned uid = gimple_uid (stmt);
240   basic_block bb = gimple_bb (stmt);
241   bool ret = gsi_remove (gsi, true);
242   if (!gsi_end_p (prev))
243     gsi_next (&prev);
244   else
245     prev = gsi_start_bb (bb);
246   gimple *end_stmt = gsi_stmt (*gsi);
247   while ((stmt = gsi_stmt (prev)) != end_stmt)
248     {
249       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
250       gimple_set_uid (stmt, uid);
251       gsi_next (&prev);
252     }
253   return ret;
254 }
255 
256 /* Bias amount for loop-carried phis.  We want this to be larger than
257    the depth of any reassociation tree we can see, but not larger than
258    the rank difference between two blocks.  */
259 #define PHI_LOOP_BIAS (1 << 15)
260 
261 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
262    an innermost loop, and the phi has only a single use which is inside
263    the loop, then the rank is the block rank of the loop latch plus an
264    extra bias for the loop-carried dependence.  This causes expressions
265    calculated into an accumulator variable to be independent for each
266    iteration of the loop.  If STMT is some other phi, the rank is the
267    block rank of its containing block.  */
268 static int64_t
phi_rank(gimple * stmt)269 phi_rank (gimple *stmt)
270 {
271   basic_block bb = gimple_bb (stmt);
272   class loop *father = bb->loop_father;
273   tree res;
274   unsigned i;
275   use_operand_p use;
276   gimple *use_stmt;
277 
278   /* We only care about real loops (those with a latch).  */
279   if (!father->latch)
280     return bb_rank[bb->index];
281 
282   /* Interesting phis must be in headers of innermost loops.  */
283   if (bb != father->header
284       || father->inner)
285     return bb_rank[bb->index];
286 
287   /* Ignore virtual SSA_NAMEs.  */
288   res = gimple_phi_result (stmt);
289   if (virtual_operand_p (res))
290     return bb_rank[bb->index];
291 
292   /* The phi definition must have a single use, and that use must be
293      within the loop.  Otherwise this isn't an accumulator pattern.  */
294   if (!single_imm_use (res, &use, &use_stmt)
295       || gimple_bb (use_stmt)->loop_father != father)
296     return bb_rank[bb->index];
297 
298   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
299   for (i = 0; i < gimple_phi_num_args (stmt); i++)
300     {
301       tree arg = gimple_phi_arg_def (stmt, i);
302       if (TREE_CODE (arg) == SSA_NAME
303 	  && !SSA_NAME_IS_DEFAULT_DEF (arg))
304 	{
305 	  gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
306 	  if (gimple_bb (def_stmt)->loop_father == father)
307 	    return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
308 	}
309     }
310 
311   /* Must be an uninteresting phi.  */
312   return bb_rank[bb->index];
313 }
314 
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316    loop-carried dependence of an innermost loop, return TRUE; else
317    return FALSE.  */
318 static bool
loop_carried_phi(tree exp)319 loop_carried_phi (tree exp)
320 {
321   gimple *phi_stmt;
322   int64_t block_rank;
323 
324   if (TREE_CODE (exp) != SSA_NAME
325       || SSA_NAME_IS_DEFAULT_DEF (exp))
326     return false;
327 
328   phi_stmt = SSA_NAME_DEF_STMT (exp);
329 
330   if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
331     return false;
332 
333   /* Non-loop-carried phis have block rank.  Loop-carried phis have
334      an additional bias added in.  If this phi doesn't have block rank,
335      it's biased and should not be propagated.  */
336   block_rank = bb_rank[gimple_bb (phi_stmt)->index];
337 
338   if (phi_rank (phi_stmt) != block_rank)
339     return true;
340 
341   return false;
342 }
343 
344 /* Return the maximum of RANK and the rank that should be propagated
345    from expression OP.  For most operands, this is just the rank of OP.
346    For loop-carried phis, the value is zero to avoid undoing the bias
347    in favor of the phi.  */
348 static int64_t
propagate_rank(int64_t rank,tree op)349 propagate_rank (int64_t rank, tree op)
350 {
351   int64_t op_rank;
352 
353   if (loop_carried_phi (op))
354     return rank;
355 
356   op_rank = get_rank (op);
357 
358   return MAX (rank, op_rank);
359 }
360 
361 /* Look up the operand rank structure for expression E.  */
362 
363 static inline int64_t
find_operand_rank(tree e)364 find_operand_rank (tree e)
365 {
366   int64_t *slot = operand_rank->get (e);
367   return slot ? *slot : -1;
368 }
369 
370 /* Insert {E,RANK} into the operand rank hashtable.  */
371 
372 static inline void
insert_operand_rank(tree e,int64_t rank)373 insert_operand_rank (tree e, int64_t rank)
374 {
375   gcc_assert (rank > 0);
376   gcc_assert (!operand_rank->put (e, rank));
377 }
378 
379 /* Given an expression E, return the rank of the expression.  */
380 
381 static int64_t
get_rank(tree e)382 get_rank (tree e)
383 {
384   /* SSA_NAME's have the rank of the expression they are the result
385      of.
386      For globals and uninitialized values, the rank is 0.
387      For function arguments, use the pre-setup rank.
388      For PHI nodes, stores, asm statements, etc, we use the rank of
389      the BB.
390      For simple operations, the rank is the maximum rank of any of
391      its operands, or the bb_rank, whichever is less.
392      I make no claims that this is optimal, however, it gives good
393      results.  */
394 
395   /* We make an exception to the normal ranking system to break
396      dependences of accumulator variables in loops.  Suppose we
397      have a simple one-block loop containing:
398 
399        x_1 = phi(x_0, x_2)
400        b = a + x_1
401        c = b + d
402        x_2 = c + e
403 
404      As shown, each iteration of the calculation into x is fully
405      dependent upon the iteration before it.  We would prefer to
406      see this in the form:
407 
408        x_1 = phi(x_0, x_2)
409        b = a + d
410        c = b + e
411        x_2 = c + x_1
412 
413      If the loop is unrolled, the calculations of b and c from
414      different iterations can be interleaved.
415 
416      To obtain this result during reassociation, we bias the rank
417      of the phi definition x_1 upward, when it is recognized as an
418      accumulator pattern.  The artificial rank causes it to be
419      added last, providing the desired independence.  */
420 
421   if (TREE_CODE (e) == SSA_NAME)
422     {
423       ssa_op_iter iter;
424       gimple *stmt;
425       int64_t rank;
426       tree op;
427 
428       if (SSA_NAME_IS_DEFAULT_DEF (e))
429 	return find_operand_rank (e);
430 
431       stmt = SSA_NAME_DEF_STMT (e);
432       if (gimple_code (stmt) == GIMPLE_PHI)
433 	return phi_rank (stmt);
434 
435       if (!is_gimple_assign (stmt))
436 	return bb_rank[gimple_bb (stmt)->index];
437 
438       /* If we already have a rank for this expression, use that.  */
439       rank = find_operand_rank (e);
440       if (rank != -1)
441 	return rank;
442 
443       /* Otherwise, find the maximum rank for the operands.  As an
444 	 exception, remove the bias from loop-carried phis when propagating
445 	 the rank so that dependent operations are not also biased.  */
446       /* Simply walk over all SSA uses - this takes advatage of the
447          fact that non-SSA operands are is_gimple_min_invariant and
448 	 thus have rank 0.  */
449       rank = 0;
450       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
451 	rank = propagate_rank (rank, op);
452 
453       if (dump_file && (dump_flags & TDF_DETAILS))
454 	{
455 	  fprintf (dump_file, "Rank for ");
456 	  print_generic_expr (dump_file, e);
457 	  fprintf (dump_file, " is %" PRId64 "\n", (rank + 1));
458 	}
459 
460       /* Note the rank in the hashtable so we don't recompute it.  */
461       insert_operand_rank (e, (rank + 1));
462       return (rank + 1);
463     }
464 
465   /* Constants, globals, etc., are rank 0 */
466   return 0;
467 }
468 
469 
470 /* We want integer ones to end up last no matter what, since they are
471    the ones we can do the most with.  */
472 #define INTEGER_CONST_TYPE 1 << 4
473 #define FLOAT_ONE_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
476 
477 /* Classify an invariant tree into integer, float, or other, so that
478    we can sort them to be near other constants of the same type.  */
479 static inline int
constant_type(tree t)480 constant_type (tree t)
481 {
482   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
483     return INTEGER_CONST_TYPE;
484   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
485     {
486       /* Sort -1.0 and 1.0 constants last, while in some cases
487 	 const_binop can't optimize some inexact operations, multiplication
488 	 by -1.0 or 1.0 can be always merged with others.  */
489       if (real_onep (t) || real_minus_onep (t))
490 	return FLOAT_ONE_CONST_TYPE;
491       return FLOAT_CONST_TYPE;
492     }
493   else
494     return OTHER_CONST_TYPE;
495 }
496 
497 /* qsort comparison function to sort operand entries PA and PB by rank
498    so that the sorted array is ordered by rank in decreasing order.  */
499 static int
sort_by_operand_rank(const void * pa,const void * pb)500 sort_by_operand_rank (const void *pa, const void *pb)
501 {
502   const operand_entry *oea = *(const operand_entry *const *)pa;
503   const operand_entry *oeb = *(const operand_entry *const *)pb;
504 
505   if (oeb->rank != oea->rank)
506     return oeb->rank > oea->rank ? 1 : -1;
507 
508   /* It's nicer for optimize_expression if constants that are likely
509      to fold when added/multiplied/whatever are put next to each
510      other.  Since all constants have rank 0, order them by type.  */
511   if (oea->rank == 0)
512     {
513       if (constant_type (oeb->op) != constant_type (oea->op))
514 	return constant_type (oea->op) - constant_type (oeb->op);
515       else
516 	/* To make sorting result stable, we use unique IDs to determine
517 	   order.  */
518 	return oeb->id > oea->id ? 1 : -1;
519     }
520 
521   if (TREE_CODE (oea->op) != SSA_NAME)
522     {
523       if (TREE_CODE (oeb->op) != SSA_NAME)
524 	return oeb->id > oea->id ? 1 : -1;
525       else
526 	return 1;
527     }
528   else if (TREE_CODE (oeb->op) != SSA_NAME)
529     return -1;
530 
531   /* Lastly, make sure the versions that are the same go next to each
532      other.  */
533   if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
534     {
535       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
536 	 versions of removed SSA_NAMEs, so if possible, prefer to sort
537 	 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
538 	 See PR60418.  */
539       gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
540       gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
541       basic_block bba = gimple_bb (stmta);
542       basic_block bbb = gimple_bb (stmtb);
543       if (bbb != bba)
544 	{
545 	  /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
546 	     but the other might not.  */
547 	  if (!bba)
548 	    return 1;
549 	  if (!bbb)
550 	    return -1;
551 	  /* If neither is, compare bb_rank.  */
552 	  if (bb_rank[bbb->index] != bb_rank[bba->index])
553 	    return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
554 	}
555 
556       bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
557       bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
558       if (da != db)
559 	return da ? 1 : -1;
560 
561       return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
562     }
563 
564   return oeb->id > oea->id ? 1 : -1;
565 }
566 
567 /* Add an operand entry to *OPS for the tree operand OP.  */
568 
569 static void
570 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
571 {
572   operand_entry *oe = operand_entry_pool.allocate ();
573 
574   oe->op = op;
575   oe->rank = get_rank (op);
576   oe->id = next_operand_entry_id++;
577   oe->count = 1;
578   oe->stmt_to_insert = stmt_to_insert;
579   ops->safe_push (oe);
580 }
581 
582 /* Add an operand entry to *OPS for the tree operand OP with repeat
583    count REPEAT.  */
584 
585 static void
add_repeat_to_ops_vec(vec<operand_entry * > * ops,tree op,HOST_WIDE_INT repeat)586 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
587 		       HOST_WIDE_INT repeat)
588 {
589   operand_entry *oe = operand_entry_pool.allocate ();
590 
591   oe->op = op;
592   oe->rank = get_rank (op);
593   oe->id = next_operand_entry_id++;
594   oe->count = repeat;
595   oe->stmt_to_insert = NULL;
596   ops->safe_push (oe);
597 
598   reassociate_stats.pows_encountered++;
599 }
600 
601 /* Return true if STMT is reassociable operation containing a binary
602    operation with tree code CODE, and is inside LOOP.  */
603 
604 static bool
is_reassociable_op(gimple * stmt,enum tree_code code,class loop * loop)605 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
606 {
607   basic_block bb = gimple_bb (stmt);
608 
609   if (gimple_bb (stmt) == NULL)
610     return false;
611 
612   if (!flow_bb_inside_loop_p (loop, bb))
613     return false;
614 
615   if (is_gimple_assign (stmt)
616       && gimple_assign_rhs_code (stmt) == code
617       && has_single_use (gimple_assign_lhs (stmt)))
618     {
619       tree rhs1 = gimple_assign_rhs1 (stmt);
620       tree rhs2 = gimple_assign_rhs2 (stmt);
621       if (TREE_CODE (rhs1) == SSA_NAME
622 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
623 	return false;
624       if (rhs2
625 	  && TREE_CODE (rhs2) == SSA_NAME
626 	  && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2))
627 	return false;
628       return true;
629     }
630 
631   return false;
632 }
633 
634 
635 /* Return true if STMT is a nop-conversion.  */
636 
637 static bool
gimple_nop_conversion_p(gimple * stmt)638 gimple_nop_conversion_p (gimple *stmt)
639 {
640   if (gassign *ass = dyn_cast <gassign *> (stmt))
641     {
642       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
643 	  && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
644 				    TREE_TYPE (gimple_assign_rhs1 (ass))))
645 	return true;
646     }
647   return false;
648 }
649 
650 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
651    operand of the negate operation.  Otherwise, return NULL.  */
652 
653 static tree
get_unary_op(tree name,enum tree_code opcode)654 get_unary_op (tree name, enum tree_code opcode)
655 {
656   gimple *stmt = SSA_NAME_DEF_STMT (name);
657 
658   /* Look through nop conversions (sign changes).  */
659   if (gimple_nop_conversion_p (stmt)
660       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
661     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
662 
663   if (!is_gimple_assign (stmt))
664     return NULL_TREE;
665 
666   if (gimple_assign_rhs_code (stmt) == opcode)
667     return gimple_assign_rhs1 (stmt);
668   return NULL_TREE;
669 }
670 
671 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
672 
673 static bool
ops_equal_values_p(tree op1,tree op2)674 ops_equal_values_p (tree op1, tree op2)
675 {
676   if (op1 == op2)
677     return true;
678 
679   tree orig_op1 = op1;
680   if (TREE_CODE (op1) == SSA_NAME)
681     {
682       gimple *stmt = SSA_NAME_DEF_STMT (op1);
683       if (gimple_nop_conversion_p (stmt))
684 	{
685 	  op1 = gimple_assign_rhs1 (stmt);
686 	  if (op1 == op2)
687 	    return true;
688 	}
689     }
690 
691   if (TREE_CODE (op2) == SSA_NAME)
692     {
693       gimple *stmt = SSA_NAME_DEF_STMT (op2);
694       if (gimple_nop_conversion_p (stmt))
695 	{
696 	  op2 = gimple_assign_rhs1 (stmt);
697 	  if (op1 == op2
698 	      || orig_op1 == op2)
699 	    return true;
700 	}
701     }
702 
703   return false;
704 }
705 
706 
707 /* If CURR and LAST are a pair of ops that OPCODE allows us to
708    eliminate through equivalences, do so, remove them from OPS, and
709    return true.  Otherwise, return false.  */
710 
711 static bool
eliminate_duplicate_pair(enum tree_code opcode,vec<operand_entry * > * ops,bool * all_done,unsigned int i,operand_entry * curr,operand_entry * last)712 eliminate_duplicate_pair (enum tree_code opcode,
713 			  vec<operand_entry *> *ops,
714 			  bool *all_done,
715 			  unsigned int i,
716 			  operand_entry *curr,
717 			  operand_entry *last)
718 {
719 
720   /* If we have two of the same op, and the opcode is & |, min, or max,
721      we can eliminate one of them.
722      If we have two of the same op, and the opcode is ^, we can
723      eliminate both of them.  */
724 
725   if (last && last->op == curr->op)
726     {
727       switch (opcode)
728 	{
729 	case MAX_EXPR:
730 	case MIN_EXPR:
731 	case BIT_IOR_EXPR:
732 	case BIT_AND_EXPR:
733 	  if (dump_file && (dump_flags & TDF_DETAILS))
734 	    {
735 	      fprintf (dump_file, "Equivalence: ");
736 	      print_generic_expr (dump_file, curr->op);
737 	      fprintf (dump_file, " [&|minmax] ");
738 	      print_generic_expr (dump_file, last->op);
739 	      fprintf (dump_file, " -> ");
740 	      print_generic_stmt (dump_file, last->op);
741 	    }
742 
743 	  ops->ordered_remove (i);
744 	  reassociate_stats.ops_eliminated ++;
745 
746 	  return true;
747 
748 	case BIT_XOR_EXPR:
749 	  if (dump_file && (dump_flags & TDF_DETAILS))
750 	    {
751 	      fprintf (dump_file, "Equivalence: ");
752 	      print_generic_expr (dump_file, curr->op);
753 	      fprintf (dump_file, " ^ ");
754 	      print_generic_expr (dump_file, last->op);
755 	      fprintf (dump_file, " -> nothing\n");
756 	    }
757 
758 	  reassociate_stats.ops_eliminated += 2;
759 
760 	  if (ops->length () == 2)
761 	    {
762 	      ops->truncate (0);
763 	      add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
764 	      *all_done = true;
765 	    }
766 	  else
767 	    {
768 	      ops->ordered_remove (i-1);
769 	      ops->ordered_remove (i-1);
770 	    }
771 
772 	  return true;
773 
774 	default:
775 	  break;
776 	}
777     }
778   return false;
779 }
780 
781 static vec<tree> plus_negates;
782 
783 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
784    expression, look in OPS for a corresponding positive operation to cancel
785    it out.  If we find one, remove the other from OPS, replace
786    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
787    return false. */
788 
789 static bool
eliminate_plus_minus_pair(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)790 eliminate_plus_minus_pair (enum tree_code opcode,
791 			   vec<operand_entry *> *ops,
792 			   unsigned int currindex,
793 			   operand_entry *curr)
794 {
795   tree negateop;
796   tree notop;
797   unsigned int i;
798   operand_entry *oe;
799 
800   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
801     return false;
802 
803   negateop = get_unary_op (curr->op, NEGATE_EXPR);
804   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
805   if (negateop == NULL_TREE && notop == NULL_TREE)
806     return false;
807 
808   /* Any non-negated version will have a rank that is one less than
809      the current rank.  So once we hit those ranks, if we don't find
810      one, we can stop.  */
811 
812   for (i = currindex + 1;
813        ops->iterate (i, &oe)
814        && oe->rank >= curr->rank - 1 ;
815        i++)
816     {
817       if (negateop
818 	  && ops_equal_values_p (oe->op, negateop))
819 	{
820 	  if (dump_file && (dump_flags & TDF_DETAILS))
821 	    {
822 	      fprintf (dump_file, "Equivalence: ");
823 	      print_generic_expr (dump_file, negateop);
824 	      fprintf (dump_file, " + -");
825 	      print_generic_expr (dump_file, oe->op);
826 	      fprintf (dump_file, " -> 0\n");
827 	    }
828 
829 	  ops->ordered_remove (i);
830 	  add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
831 	  ops->ordered_remove (currindex);
832 	  reassociate_stats.ops_eliminated ++;
833 
834 	  return true;
835 	}
836       else if (notop
837 	       && ops_equal_values_p (oe->op, notop))
838 	{
839 	  tree op_type = TREE_TYPE (oe->op);
840 
841 	  if (dump_file && (dump_flags & TDF_DETAILS))
842 	    {
843 	      fprintf (dump_file, "Equivalence: ");
844 	      print_generic_expr (dump_file, notop);
845 	      fprintf (dump_file, " + ~");
846 	      print_generic_expr (dump_file, oe->op);
847 	      fprintf (dump_file, " -> -1\n");
848 	    }
849 
850 	  ops->ordered_remove (i);
851 	  add_to_ops_vec (ops, build_all_ones_cst (op_type));
852 	  ops->ordered_remove (currindex);
853 	  reassociate_stats.ops_eliminated ++;
854 
855 	  return true;
856 	}
857     }
858 
859   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
860      save it for later inspection in repropagate_negates().  */
861   if (negateop != NULL_TREE
862       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
863     plus_negates.safe_push (curr->op);
864 
865   return false;
866 }
867 
868 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
869    bitwise not expression, look in OPS for a corresponding operand to
870    cancel it out.  If we find one, remove the other from OPS, replace
871    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
872    false. */
873 
874 static bool
eliminate_not_pairs(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)875 eliminate_not_pairs (enum tree_code opcode,
876 		     vec<operand_entry *> *ops,
877 		     unsigned int currindex,
878 		     operand_entry *curr)
879 {
880   tree notop;
881   unsigned int i;
882   operand_entry *oe;
883 
884   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
885       || TREE_CODE (curr->op) != SSA_NAME)
886     return false;
887 
888   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
889   if (notop == NULL_TREE)
890     return false;
891 
892   /* Any non-not version will have a rank that is one less than
893      the current rank.  So once we hit those ranks, if we don't find
894      one, we can stop.  */
895 
896   for (i = currindex + 1;
897        ops->iterate (i, &oe)
898        && oe->rank >= curr->rank - 1;
899        i++)
900     {
901       if (oe->op == notop)
902 	{
903 	  if (dump_file && (dump_flags & TDF_DETAILS))
904 	    {
905 	      fprintf (dump_file, "Equivalence: ");
906 	      print_generic_expr (dump_file, notop);
907 	      if (opcode == BIT_AND_EXPR)
908 		fprintf (dump_file, " & ~");
909 	      else if (opcode == BIT_IOR_EXPR)
910 		fprintf (dump_file, " | ~");
911 	      print_generic_expr (dump_file, oe->op);
912 	      if (opcode == BIT_AND_EXPR)
913 		fprintf (dump_file, " -> 0\n");
914 	      else if (opcode == BIT_IOR_EXPR)
915 		fprintf (dump_file, " -> -1\n");
916 	    }
917 
918 	  if (opcode == BIT_AND_EXPR)
919 	    oe->op = build_zero_cst (TREE_TYPE (oe->op));
920 	  else if (opcode == BIT_IOR_EXPR)
921 	    oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
922 
923 	  reassociate_stats.ops_eliminated += ops->length () - 1;
924 	  ops->truncate (0);
925 	  ops->quick_push (oe);
926 	  return true;
927 	}
928     }
929 
930   return false;
931 }
932 
933 /* Use constant value that may be present in OPS to try to eliminate
934    operands.  Note that this function is only really used when we've
935    eliminated ops for other reasons, or merged constants.  Across
936    single statements, fold already does all of this, plus more.  There
937    is little point in duplicating logic, so I've only included the
938    identities that I could ever construct testcases to trigger.  */
939 
940 static void
eliminate_using_constants(enum tree_code opcode,vec<operand_entry * > * ops)941 eliminate_using_constants (enum tree_code opcode,
942 			   vec<operand_entry *> *ops)
943 {
944   operand_entry *oelast = ops->last ();
945   tree type = TREE_TYPE (oelast->op);
946 
947   if (oelast->rank == 0
948       && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
949     {
950       switch (opcode)
951 	{
952 	case BIT_AND_EXPR:
953 	  if (integer_zerop (oelast->op))
954 	    {
955 	      if (ops->length () != 1)
956 		{
957 		  if (dump_file && (dump_flags & TDF_DETAILS))
958 		    fprintf (dump_file, "Found & 0, removing all other ops\n");
959 
960 		  reassociate_stats.ops_eliminated += ops->length () - 1;
961 
962 		  ops->truncate (0);
963 		  ops->quick_push (oelast);
964 		  return;
965 		}
966 	    }
967 	  else if (integer_all_onesp (oelast->op))
968 	    {
969 	      if (ops->length () != 1)
970 		{
971 		  if (dump_file && (dump_flags & TDF_DETAILS))
972 		    fprintf (dump_file, "Found & -1, removing\n");
973 		  ops->pop ();
974 		  reassociate_stats.ops_eliminated++;
975 		}
976 	    }
977 	  break;
978 	case BIT_IOR_EXPR:
979 	  if (integer_all_onesp (oelast->op))
980 	    {
981 	      if (ops->length () != 1)
982 		{
983 		  if (dump_file && (dump_flags & TDF_DETAILS))
984 		    fprintf (dump_file, "Found | -1, removing all other ops\n");
985 
986 		  reassociate_stats.ops_eliminated += ops->length () - 1;
987 
988 		  ops->truncate (0);
989 		  ops->quick_push (oelast);
990 		  return;
991 		}
992 	    }
993 	  else if (integer_zerop (oelast->op))
994 	    {
995 	      if (ops->length () != 1)
996 		{
997 		  if (dump_file && (dump_flags & TDF_DETAILS))
998 		    fprintf (dump_file, "Found | 0, removing\n");
999 		  ops->pop ();
1000 		  reassociate_stats.ops_eliminated++;
1001 		}
1002 	    }
1003 	  break;
1004 	case MULT_EXPR:
1005 	  if (integer_zerop (oelast->op)
1006 	      || (FLOAT_TYPE_P (type)
1007 		  && !HONOR_NANS (type)
1008 		  && !HONOR_SIGNED_ZEROS (type)
1009 		  && real_zerop (oelast->op)))
1010 	    {
1011 	      if (ops->length () != 1)
1012 		{
1013 		  if (dump_file && (dump_flags & TDF_DETAILS))
1014 		    fprintf (dump_file, "Found * 0, removing all other ops\n");
1015 
1016 		  reassociate_stats.ops_eliminated += ops->length () - 1;
1017 		  ops->truncate (0);
1018 		  ops->quick_push (oelast);
1019 		  return;
1020 		}
1021 	    }
1022 	  else if (integer_onep (oelast->op)
1023 		   || (FLOAT_TYPE_P (type)
1024 		       && !HONOR_SNANS (type)
1025 		       && real_onep (oelast->op)))
1026 	    {
1027 	      if (ops->length () != 1)
1028 		{
1029 		  if (dump_file && (dump_flags & TDF_DETAILS))
1030 		    fprintf (dump_file, "Found * 1, removing\n");
1031 		  ops->pop ();
1032 		  reassociate_stats.ops_eliminated++;
1033 		  return;
1034 		}
1035 	    }
1036 	  break;
1037 	case BIT_XOR_EXPR:
1038 	case PLUS_EXPR:
1039 	case MINUS_EXPR:
1040 	  if (integer_zerop (oelast->op)
1041 	      || (FLOAT_TYPE_P (type)
1042 		  && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1043 		  && fold_real_zero_addition_p (type, oelast->op,
1044 						opcode == MINUS_EXPR)))
1045 	    {
1046 	      if (ops->length () != 1)
1047 		{
1048 		  if (dump_file && (dump_flags & TDF_DETAILS))
1049 		    fprintf (dump_file, "Found [|^+] 0, removing\n");
1050 		  ops->pop ();
1051 		  reassociate_stats.ops_eliminated++;
1052 		  return;
1053 		}
1054 	    }
1055 	  break;
1056 	default:
1057 	  break;
1058 	}
1059     }
1060 }
1061 
1062 
1063 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1064 				 bool, bool);
1065 
1066 /* Structure for tracking and counting operands.  */
1067 struct oecount {
1068   unsigned int cnt;
1069   unsigned int id;
1070   enum tree_code oecode;
1071   tree op;
1072 };
1073 
1074 
1075 /* The heap for the oecount hashtable and the sorted list of operands.  */
1076 static vec<oecount> cvec;
1077 
1078 
1079 /* Oecount hashtable helpers.  */
1080 
1081 struct oecount_hasher : int_hash <int, 0, 1>
1082 {
1083   static inline hashval_t hash (int);
1084   static inline bool equal (int, int);
1085 };
1086 
1087 /* Hash function for oecount.  */
1088 
1089 inline hashval_t
hash(int p)1090 oecount_hasher::hash (int p)
1091 {
1092   const oecount *c = &cvec[p - 42];
1093   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1094 }
1095 
1096 /* Comparison function for oecount.  */
1097 
1098 inline bool
equal(int p1,int p2)1099 oecount_hasher::equal (int p1, int p2)
1100 {
1101   const oecount *c1 = &cvec[p1 - 42];
1102   const oecount *c2 = &cvec[p2 - 42];
1103   return c1->oecode == c2->oecode && c1->op == c2->op;
1104 }
1105 
1106 /* Comparison function for qsort sorting oecount elements by count.  */
1107 
1108 static int
oecount_cmp(const void * p1,const void * p2)1109 oecount_cmp (const void *p1, const void *p2)
1110 {
1111   const oecount *c1 = (const oecount *)p1;
1112   const oecount *c2 = (const oecount *)p2;
1113   if (c1->cnt != c2->cnt)
1114     return c1->cnt > c2->cnt ? 1 : -1;
1115   else
1116     /* If counts are identical, use unique IDs to stabilize qsort.  */
1117     return c1->id > c2->id ? 1 : -1;
1118 }
1119 
1120 /* Return TRUE iff STMT represents a builtin call that raises OP
1121    to some exponent.  */
1122 
1123 static bool
stmt_is_power_of_op(gimple * stmt,tree op)1124 stmt_is_power_of_op (gimple *stmt, tree op)
1125 {
1126   if (!is_gimple_call (stmt))
1127     return false;
1128 
1129   switch (gimple_call_combined_fn (stmt))
1130     {
1131     CASE_CFN_POW:
1132     CASE_CFN_POWI:
1133       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1134 
1135     default:
1136       return false;
1137     }
1138 }
1139 
1140 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1141    in place and return the result.  Assumes that stmt_is_power_of_op
1142    was previously called for STMT and returned TRUE.  */
1143 
1144 static HOST_WIDE_INT
decrement_power(gimple * stmt)1145 decrement_power (gimple *stmt)
1146 {
1147   REAL_VALUE_TYPE c, cint;
1148   HOST_WIDE_INT power;
1149   tree arg1;
1150 
1151   switch (gimple_call_combined_fn (stmt))
1152     {
1153     CASE_CFN_POW:
1154       arg1 = gimple_call_arg (stmt, 1);
1155       c = TREE_REAL_CST (arg1);
1156       power = real_to_integer (&c) - 1;
1157       real_from_integer (&cint, VOIDmode, power, SIGNED);
1158       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1159       return power;
1160 
1161     CASE_CFN_POWI:
1162       arg1 = gimple_call_arg (stmt, 1);
1163       power = TREE_INT_CST_LOW (arg1) - 1;
1164       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1165       return power;
1166 
1167     default:
1168       gcc_unreachable ();
1169     }
1170 }
1171 
1172 /* Replace SSA defined by STMT and replace all its uses with new
1173    SSA.  Also return the new SSA.  */
1174 
1175 static tree
make_new_ssa_for_def(gimple * stmt,enum tree_code opcode,tree op)1176 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1177 {
1178   gimple *use_stmt;
1179   use_operand_p use;
1180   imm_use_iterator iter;
1181   tree new_lhs, new_debug_lhs = NULL_TREE;
1182   tree lhs = gimple_get_lhs (stmt);
1183 
1184   new_lhs = make_ssa_name (TREE_TYPE (lhs));
1185   gimple_set_lhs (stmt, new_lhs);
1186 
1187   /* Also need to update GIMPLE_DEBUGs.  */
1188   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1189     {
1190       tree repl = new_lhs;
1191       if (is_gimple_debug (use_stmt))
1192 	{
1193 	  if (new_debug_lhs == NULL_TREE)
1194 	    {
1195 	      new_debug_lhs = make_node (DEBUG_EXPR_DECL);
1196 	      gdebug *def_temp
1197 		= gimple_build_debug_bind (new_debug_lhs,
1198 					   build2 (opcode, TREE_TYPE (lhs),
1199 						   new_lhs, op),
1200 					   stmt);
1201 	      DECL_ARTIFICIAL (new_debug_lhs) = 1;
1202 	      TREE_TYPE (new_debug_lhs) = TREE_TYPE (lhs);
1203 	      SET_DECL_MODE (new_debug_lhs, TYPE_MODE (TREE_TYPE (lhs)));
1204 	      gimple_set_uid (def_temp, gimple_uid (stmt));
1205 	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1206 	      gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1207 	    }
1208 	  repl = new_debug_lhs;
1209 	}
1210       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1211 	SET_USE (use, repl);
1212       update_stmt (use_stmt);
1213     }
1214   return new_lhs;
1215 }
1216 
1217 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1218    uses with new SSAs.  Also do this for the stmt that defines DEF
1219    if *DEF is not OP.  */
1220 
1221 static void
make_new_ssa_for_all_defs(tree * def,enum tree_code opcode,tree op,vec<gimple * > & stmts_to_fix)1222 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1223 			   vec<gimple *> &stmts_to_fix)
1224 {
1225   unsigned i;
1226   gimple *stmt;
1227 
1228   if (*def != op
1229       && TREE_CODE (*def) == SSA_NAME
1230       && (stmt = SSA_NAME_DEF_STMT (*def))
1231       && gimple_code (stmt) != GIMPLE_NOP)
1232     *def = make_new_ssa_for_def (stmt, opcode, op);
1233 
1234   FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1235     make_new_ssa_for_def (stmt, opcode, op);
1236 }
1237 
1238 /* Find the single immediate use of STMT's LHS, and replace it
1239    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1240    replace *DEF with OP as well.  */
1241 
1242 static void
propagate_op_to_single_use(tree op,gimple * stmt,tree * def)1243 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1244 {
1245   tree lhs;
1246   gimple *use_stmt;
1247   use_operand_p use;
1248   gimple_stmt_iterator gsi;
1249 
1250   if (is_gimple_call (stmt))
1251     lhs = gimple_call_lhs (stmt);
1252   else
1253     lhs = gimple_assign_lhs (stmt);
1254 
1255   gcc_assert (has_single_use (lhs));
1256   single_imm_use (lhs, &use, &use_stmt);
1257   if (lhs == *def)
1258     *def = op;
1259   SET_USE (use, op);
1260   if (TREE_CODE (op) != SSA_NAME)
1261     update_stmt (use_stmt);
1262   gsi = gsi_for_stmt (stmt);
1263   unlink_stmt_vdef (stmt);
1264   reassoc_remove_stmt (&gsi);
1265   release_defs (stmt);
1266 }
1267 
1268 /* Walks the linear chain with result *DEF searching for an operation
1269    with operand OP and code OPCODE removing that from the chain.  *DEF
1270    is updated if there is only one operand but no operation left.  */
1271 
1272 static void
zero_one_operation(tree * def,enum tree_code opcode,tree op)1273 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1274 {
1275   tree orig_def = *def;
1276   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1277   /* PR72835 - Record the stmt chain that has to be updated such that
1278      we dont use the same LHS when the values computed are different.  */
1279   auto_vec<gimple *, 64> stmts_to_fix;
1280 
1281   do
1282     {
1283       tree name;
1284 
1285       if (opcode == MULT_EXPR)
1286 	{
1287 	  if (stmt_is_power_of_op (stmt, op))
1288 	    {
1289 	      if (decrement_power (stmt) == 1)
1290 		{
1291 		  if (stmts_to_fix.length () > 0)
1292 		    stmts_to_fix.pop ();
1293 		  propagate_op_to_single_use (op, stmt, def);
1294 		}
1295 	      break;
1296 	    }
1297 	  else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1298 	    {
1299 	      if (gimple_assign_rhs1 (stmt) == op)
1300 		{
1301 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1302 		  if (stmts_to_fix.length () > 0)
1303 		    stmts_to_fix.pop ();
1304 		  propagate_op_to_single_use (cst, stmt, def);
1305 		  break;
1306 		}
1307 	      else if (integer_minus_onep (op)
1308 		       || real_minus_onep (op))
1309 		{
1310 		  gimple_assign_set_rhs_code
1311 		    (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1312 		  break;
1313 		}
1314 	    }
1315 	}
1316 
1317       name = gimple_assign_rhs1 (stmt);
1318 
1319       /* If this is the operation we look for and one of the operands
1320          is ours simply propagate the other operand into the stmts
1321 	 single use.  */
1322       if (gimple_assign_rhs_code (stmt) == opcode
1323 	  && (name == op
1324 	      || gimple_assign_rhs2 (stmt) == op))
1325 	{
1326 	  if (name == op)
1327 	    name = gimple_assign_rhs2 (stmt);
1328 	  if (stmts_to_fix.length () > 0)
1329 	    stmts_to_fix.pop ();
1330 	  propagate_op_to_single_use (name, stmt, def);
1331 	  break;
1332 	}
1333 
1334       /* We might have a multiply of two __builtin_pow* calls, and
1335 	 the operand might be hiding in the rightmost one.  Likewise
1336 	 this can happen for a negate.  */
1337       if (opcode == MULT_EXPR
1338 	  && gimple_assign_rhs_code (stmt) == opcode
1339 	  && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1340 	  && has_single_use (gimple_assign_rhs2 (stmt)))
1341 	{
1342 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1343 	  if (stmt_is_power_of_op (stmt2, op))
1344 	    {
1345 	      if (decrement_power (stmt2) == 1)
1346 		propagate_op_to_single_use (op, stmt2, def);
1347 	      else
1348 		stmts_to_fix.safe_push (stmt2);
1349 	      break;
1350 	    }
1351 	  else if (is_gimple_assign (stmt2)
1352 		   && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1353 	    {
1354 	      if (gimple_assign_rhs1 (stmt2) == op)
1355 		{
1356 		  tree cst = build_minus_one_cst (TREE_TYPE (op));
1357 		  propagate_op_to_single_use (cst, stmt2, def);
1358 		  break;
1359 		}
1360 	      else if (integer_minus_onep (op)
1361 		       || real_minus_onep (op))
1362 		{
1363 		  stmts_to_fix.safe_push (stmt2);
1364 		  gimple_assign_set_rhs_code
1365 		    (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1366 		  break;
1367 		}
1368 	    }
1369 	}
1370 
1371       /* Continue walking the chain.  */
1372       gcc_assert (name != op
1373 		  && TREE_CODE (name) == SSA_NAME);
1374       stmt = SSA_NAME_DEF_STMT (name);
1375       stmts_to_fix.safe_push (stmt);
1376     }
1377   while (1);
1378 
1379   if (stmts_to_fix.length () > 0 || *def == orig_def)
1380     make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1381 }
1382 
1383 /* Returns true if statement S1 dominates statement S2.  Like
1384    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1385 
1386 static bool
reassoc_stmt_dominates_stmt_p(gimple * s1,gimple * s2)1387 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1388 {
1389   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1390 
1391   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1392      SSA_NAME.  Assume it lives at the beginning of function and
1393      thus dominates everything.  */
1394   if (!bb1 || s1 == s2)
1395     return true;
1396 
1397   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1398   if (!bb2)
1399     return false;
1400 
1401   if (bb1 == bb2)
1402     {
1403       /* PHIs in the same basic block are assumed to be
1404 	 executed all in parallel, if only one stmt is a PHI,
1405 	 it dominates the other stmt in the same basic block.  */
1406       if (gimple_code (s1) == GIMPLE_PHI)
1407 	return true;
1408 
1409       if (gimple_code (s2) == GIMPLE_PHI)
1410 	return false;
1411 
1412       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1413 
1414       if (gimple_uid (s1) < gimple_uid (s2))
1415 	return true;
1416 
1417       if (gimple_uid (s1) > gimple_uid (s2))
1418 	return false;
1419 
1420       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1421       unsigned int uid = gimple_uid (s1);
1422       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1423 	{
1424 	  gimple *s = gsi_stmt (gsi);
1425 	  if (gimple_uid (s) != uid)
1426 	    break;
1427 	  if (s == s2)
1428 	    return true;
1429 	}
1430 
1431       return false;
1432     }
1433 
1434   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1435 }
1436 
1437 /* Insert STMT after INSERT_POINT.  */
1438 
1439 static void
insert_stmt_after(gimple * stmt,gimple * insert_point)1440 insert_stmt_after (gimple *stmt, gimple *insert_point)
1441 {
1442   gimple_stmt_iterator gsi;
1443   basic_block bb;
1444 
1445   if (gimple_code (insert_point) == GIMPLE_PHI)
1446     bb = gimple_bb (insert_point);
1447   else if (!stmt_ends_bb_p (insert_point))
1448     {
1449       gsi = gsi_for_stmt (insert_point);
1450       gimple_set_uid (stmt, gimple_uid (insert_point));
1451       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1452       return;
1453     }
1454   else
1455     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1456        thus if it must end a basic block, it should be a call that can
1457        throw, or some assignment that can throw.  If it throws, the LHS
1458        of it will not be initialized though, so only valid places using
1459        the SSA_NAME should be dominated by the fallthru edge.  */
1460     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1461   gsi = gsi_after_labels (bb);
1462   if (gsi_end_p (gsi))
1463     {
1464       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1465       gimple_set_uid (stmt,
1466 		      gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1467     }
1468   else
1469     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1470   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1471 }
1472 
1473 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1474    the result.  Places the statement after the definition of either
1475    OP1 or OP2.  Returns the new statement.  */
1476 
1477 static gimple *
build_and_add_sum(tree type,tree op1,tree op2,enum tree_code opcode)1478 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1479 {
1480   gimple *op1def = NULL, *op2def = NULL;
1481   gimple_stmt_iterator gsi;
1482   tree op;
1483   gassign *sum;
1484 
1485   /* Create the addition statement.  */
1486   op = make_ssa_name (type);
1487   sum = gimple_build_assign (op, opcode, op1, op2);
1488 
1489   /* Find an insertion place and insert.  */
1490   if (TREE_CODE (op1) == SSA_NAME)
1491     op1def = SSA_NAME_DEF_STMT (op1);
1492   if (TREE_CODE (op2) == SSA_NAME)
1493     op2def = SSA_NAME_DEF_STMT (op2);
1494   if ((!op1def || gimple_nop_p (op1def))
1495       && (!op2def || gimple_nop_p (op2def)))
1496     {
1497       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1498       if (gsi_end_p (gsi))
1499 	{
1500 	  gimple_stmt_iterator gsi2
1501 	    = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1502 	  gimple_set_uid (sum,
1503 			  gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1504 	}
1505       else
1506 	gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1507       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1508     }
1509   else
1510     {
1511       gimple *insert_point;
1512       if ((!op1def || gimple_nop_p (op1def))
1513 	   || (op2def && !gimple_nop_p (op2def)
1514 	       && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1515 	insert_point = op2def;
1516       else
1517 	insert_point = op1def;
1518       insert_stmt_after (sum, insert_point);
1519     }
1520   update_stmt (sum);
1521 
1522   return sum;
1523 }
1524 
1525 /* Perform un-distribution of divisions and multiplications.
1526    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1527    to (A + B) / X for real X.
1528 
1529    The algorithm is organized as follows.
1530 
1531     - First we walk the addition chain *OPS looking for summands that
1532       are defined by a multiplication or a real division.  This results
1533       in the candidates bitmap with relevant indices into *OPS.
1534 
1535     - Second we build the chains of multiplications or divisions for
1536       these candidates, counting the number of occurrences of (operand, code)
1537       pairs in all of the candidates chains.
1538 
1539     - Third we sort the (operand, code) pairs by number of occurrence and
1540       process them starting with the pair with the most uses.
1541 
1542       * For each such pair we walk the candidates again to build a
1543         second candidate bitmap noting all multiplication/division chains
1544 	that have at least one occurrence of (operand, code).
1545 
1546       * We build an alternate addition chain only covering these
1547         candidates with one (operand, code) operation removed from their
1548 	multiplication/division chain.
1549 
1550       * The first candidate gets replaced by the alternate addition chain
1551         multiplied/divided by the operand.
1552 
1553       * All candidate chains get disabled for further processing and
1554         processing of (operand, code) pairs continues.
1555 
1556   The alternate addition chains built are re-processed by the main
1557   reassociation algorithm which allows optimizing a * x * y + b * y * x
1558   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1559 
1560 static bool
undistribute_ops_list(enum tree_code opcode,vec<operand_entry * > * ops,class loop * loop)1561 undistribute_ops_list (enum tree_code opcode,
1562 		       vec<operand_entry *> *ops, class loop *loop)
1563 {
1564   unsigned int length = ops->length ();
1565   operand_entry *oe1;
1566   unsigned i, j;
1567   unsigned nr_candidates, nr_candidates2;
1568   sbitmap_iterator sbi0;
1569   vec<operand_entry *> *subops;
1570   bool changed = false;
1571   unsigned int next_oecount_id = 0;
1572 
1573   if (length <= 1
1574       || opcode != PLUS_EXPR)
1575     return false;
1576 
1577   /* Build a list of candidates to process.  */
1578   auto_sbitmap candidates (length);
1579   bitmap_clear (candidates);
1580   nr_candidates = 0;
1581   FOR_EACH_VEC_ELT (*ops, i, oe1)
1582     {
1583       enum tree_code dcode;
1584       gimple *oe1def;
1585 
1586       if (TREE_CODE (oe1->op) != SSA_NAME)
1587 	continue;
1588       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1589       if (!is_gimple_assign (oe1def))
1590 	continue;
1591       dcode = gimple_assign_rhs_code (oe1def);
1592       if ((dcode != MULT_EXPR
1593 	   && dcode != RDIV_EXPR)
1594 	  || !is_reassociable_op (oe1def, dcode, loop))
1595 	continue;
1596 
1597       bitmap_set_bit (candidates, i);
1598       nr_candidates++;
1599     }
1600 
1601   if (nr_candidates < 2)
1602     return false;
1603 
1604   if (dump_file && (dump_flags & TDF_DETAILS))
1605     {
1606       fprintf (dump_file, "searching for un-distribute opportunities ");
1607       print_generic_expr (dump_file,
1608 	(*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1609       fprintf (dump_file, " %d\n", nr_candidates);
1610     }
1611 
1612   /* Build linearized sub-operand lists and the counting table.  */
1613   cvec.create (0);
1614 
1615   hash_table<oecount_hasher> ctable (15);
1616 
1617   /* ??? Macro arguments cannot have multi-argument template types in
1618      them.  This typedef is needed to workaround that limitation.  */
1619   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1620   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1621   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1622     {
1623       gimple *oedef;
1624       enum tree_code oecode;
1625       unsigned j;
1626 
1627       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1628       oecode = gimple_assign_rhs_code (oedef);
1629       linearize_expr_tree (&subops[i], oedef,
1630 			   associative_tree_code (oecode), false);
1631 
1632       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1633 	{
1634 	  oecount c;
1635 	  int *slot;
1636 	  int idx;
1637 	  c.oecode = oecode;
1638 	  c.cnt = 1;
1639 	  c.id = next_oecount_id++;
1640 	  c.op = oe1->op;
1641 	  cvec.safe_push (c);
1642 	  idx = cvec.length () + 41;
1643 	  slot = ctable.find_slot (idx, INSERT);
1644 	  if (!*slot)
1645 	    {
1646 	      *slot = idx;
1647 	    }
1648 	  else
1649 	    {
1650 	      cvec.pop ();
1651 	      cvec[*slot - 42].cnt++;
1652 	    }
1653 	}
1654     }
1655 
1656   /* Sort the counting table.  */
1657   cvec.qsort (oecount_cmp);
1658 
1659   if (dump_file && (dump_flags & TDF_DETAILS))
1660     {
1661       oecount *c;
1662       fprintf (dump_file, "Candidates:\n");
1663       FOR_EACH_VEC_ELT (cvec, j, c)
1664 	{
1665 	  fprintf (dump_file, "  %u %s: ", c->cnt,
1666 		   c->oecode == MULT_EXPR
1667 		   ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1668 	  print_generic_expr (dump_file, c->op);
1669 	  fprintf (dump_file, "\n");
1670 	}
1671     }
1672 
1673   /* Process the (operand, code) pairs in order of most occurrence.  */
1674   auto_sbitmap candidates2 (length);
1675   while (!cvec.is_empty ())
1676     {
1677       oecount *c = &cvec.last ();
1678       if (c->cnt < 2)
1679 	break;
1680 
1681       /* Now collect the operands in the outer chain that contain
1682          the common operand in their inner chain.  */
1683       bitmap_clear (candidates2);
1684       nr_candidates2 = 0;
1685       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1686 	{
1687 	  gimple *oedef;
1688 	  enum tree_code oecode;
1689 	  unsigned j;
1690 	  tree op = (*ops)[i]->op;
1691 
1692 	  /* If we undistributed in this chain already this may be
1693 	     a constant.  */
1694 	  if (TREE_CODE (op) != SSA_NAME)
1695 	    continue;
1696 
1697 	  oedef = SSA_NAME_DEF_STMT (op);
1698 	  oecode = gimple_assign_rhs_code (oedef);
1699 	  if (oecode != c->oecode)
1700 	    continue;
1701 
1702 	  FOR_EACH_VEC_ELT (subops[i], j, oe1)
1703 	    {
1704 	      if (oe1->op == c->op)
1705 		{
1706 		  bitmap_set_bit (candidates2, i);
1707 		  ++nr_candidates2;
1708 		  break;
1709 		}
1710 	    }
1711 	}
1712 
1713       if (nr_candidates2 >= 2)
1714 	{
1715 	  operand_entry *oe1, *oe2;
1716 	  gimple *prod;
1717 	  int first = bitmap_first_set_bit (candidates2);
1718 
1719 	  /* Build the new addition chain.  */
1720 	  oe1 = (*ops)[first];
1721 	  if (dump_file && (dump_flags & TDF_DETAILS))
1722 	    {
1723 	      fprintf (dump_file, "Building (");
1724 	      print_generic_expr (dump_file, oe1->op);
1725 	    }
1726 	  zero_one_operation (&oe1->op, c->oecode, c->op);
1727 	  EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1728 	    {
1729 	      gimple *sum;
1730 	      oe2 = (*ops)[i];
1731 	      if (dump_file && (dump_flags & TDF_DETAILS))
1732 		{
1733 		  fprintf (dump_file, " + ");
1734 		  print_generic_expr (dump_file, oe2->op);
1735 		}
1736 	      zero_one_operation (&oe2->op, c->oecode, c->op);
1737 	      sum = build_and_add_sum (TREE_TYPE (oe1->op),
1738 				       oe1->op, oe2->op, opcode);
1739 	      oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1740 	      oe2->rank = 0;
1741 	      oe1->op = gimple_get_lhs (sum);
1742 	    }
1743 
1744 	  /* Apply the multiplication/division.  */
1745 	  prod = build_and_add_sum (TREE_TYPE (oe1->op),
1746 				    oe1->op, c->op, c->oecode);
1747 	  if (dump_file && (dump_flags & TDF_DETAILS))
1748 	    {
1749 	      fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1750 	      print_generic_expr (dump_file, c->op);
1751 	      fprintf (dump_file, "\n");
1752 	    }
1753 
1754 	  /* Record it in the addition chain and disable further
1755 	     undistribution with this op.  */
1756 	  oe1->op = gimple_assign_lhs (prod);
1757 	  oe1->rank = get_rank (oe1->op);
1758 	  subops[first].release ();
1759 
1760 	  changed = true;
1761 	}
1762 
1763       cvec.pop ();
1764     }
1765 
1766   for (i = 0; i < ops->length (); ++i)
1767     subops[i].release ();
1768   free (subops);
1769   cvec.release ();
1770 
1771   return changed;
1772 }
1773 
1774 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1775    first: element index for each relevant BIT_FIELD_REF.
1776    second: the index of vec ops* for each relevant BIT_FIELD_REF.  */
1777 typedef std::pair<unsigned, unsigned> v_info_elem;
1778 struct v_info {
1779   tree vec_type;
1780   auto_vec<v_info_elem, 32> vec;
1781 };
1782 typedef v_info *v_info_ptr;
1783 
1784 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode.  */
1785 static int
sort_by_mach_mode(const void * p_i,const void * p_j)1786 sort_by_mach_mode (const void *p_i, const void *p_j)
1787 {
1788   const tree tr1 = *((const tree *) p_i);
1789   const tree tr2 = *((const tree *) p_j);
1790   unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1791   unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1792   if (mode1 > mode2)
1793     return 1;
1794   else if (mode1 < mode2)
1795     return -1;
1796   if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1797     return -1;
1798   else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1799     return 1;
1800   return 0;
1801 }
1802 
1803 /* Cleanup hash map for VECTOR information.  */
1804 static void
cleanup_vinfo_map(hash_map<tree,v_info_ptr> & info_map)1805 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1806 {
1807   for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1808        it != info_map.end (); ++it)
1809     {
1810       v_info_ptr info = (*it).second;
1811       delete info;
1812       (*it).second = NULL;
1813     }
1814 }
1815 
1816 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1817      V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1818    is transformed to
1819      Vs = (V1 + V2 + ... + Vn)
1820      Vs[0] + Vs[1] + ... + Vs[k]
1821 
1822    The basic steps are listed below:
1823 
1824     1) Check the addition chain *OPS by looking those summands coming from
1825        VECTOR bit_field_ref on VECTOR type.  Put the information into
1826        v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1827 
1828     2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1829        continuous, they can cover the whole VECTOR perfectly without any holes.
1830        Obtain one VECTOR list which contain candidates to be transformed.
1831 
1832     3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1833        candidates with same mode, build the addition statements for them and
1834        generate BIT_FIELD_REFs accordingly.
1835 
1836    TODO:
1837        The current implementation requires the whole VECTORs should be fully
1838        covered, but it can be extended to support partial, checking adjacent
1839        but not fill the whole, it may need some cost model to define the
1840        boundary to do or not.
1841 */
1842 static bool
undistribute_bitref_for_vector(enum tree_code opcode,vec<operand_entry * > * ops,struct loop * loop)1843 undistribute_bitref_for_vector (enum tree_code opcode,
1844 				vec<operand_entry *> *ops, struct loop *loop)
1845 {
1846   if (ops->length () <= 1)
1847     return false;
1848 
1849   if (opcode != PLUS_EXPR
1850       && opcode != MULT_EXPR
1851       && opcode != BIT_XOR_EXPR
1852       && opcode != BIT_IOR_EXPR
1853       && opcode != BIT_AND_EXPR)
1854     return false;
1855 
1856   hash_map<tree, v_info_ptr> v_info_map;
1857   operand_entry *oe1;
1858   unsigned i;
1859 
1860   /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1861      information into map.  */
1862   FOR_EACH_VEC_ELT (*ops, i, oe1)
1863     {
1864       enum tree_code dcode;
1865       gimple *oe1def;
1866 
1867       if (TREE_CODE (oe1->op) != SSA_NAME)
1868 	continue;
1869       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1870       if (!is_gimple_assign (oe1def))
1871 	continue;
1872       dcode = gimple_assign_rhs_code (oe1def);
1873       if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1874 	continue;
1875 
1876       tree rhs = gimple_assign_rhs1 (oe1def);
1877       tree vec = TREE_OPERAND (rhs, 0);
1878       tree vec_type = TREE_TYPE (vec);
1879 
1880       if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1881 	continue;
1882 
1883       /* Ignore it if target machine can't support this VECTOR type.  */
1884       if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1885 	continue;
1886 
1887       /* Check const vector type, constrain BIT_FIELD_REF offset and size.  */
1888       if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1889 	continue;
1890 
1891       if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1892 	  || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1893 	continue;
1894 
1895       /* The type of BIT_FIELD_REF might not be equal to the element type of
1896 	 the vector.  We want to use a vector type with element type the
1897 	 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec).  */
1898       if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1899 	{
1900 	  machine_mode simd_mode;
1901 	  unsigned HOST_WIDE_INT size, nunits;
1902 	  unsigned HOST_WIDE_INT elem_size
1903 	    = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1904 	  if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1905 	    continue;
1906 	  if (size <= elem_size || (size % elem_size) != 0)
1907 	    continue;
1908 	  nunits = size / elem_size;
1909 	  if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1910 				nunits).exists (&simd_mode))
1911 	    continue;
1912 	  vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1913 
1914 	  /* Ignore it if target machine can't support this VECTOR type.  */
1915 	  if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1916 	    continue;
1917 
1918 	  /* Check const vector type, constrain BIT_FIELD_REF offset and
1919 	     size.  */
1920 	  if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1921 	    continue;
1922 
1923 	  if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1924 			GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1925 	    continue;
1926 	}
1927 
1928       tree elem_type = TREE_TYPE (vec_type);
1929       unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1930       if (maybe_ne (bit_field_size (rhs), elem_size))
1931 	continue;
1932 
1933       unsigned idx;
1934       if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1935 	continue;
1936 
1937       /* Ignore it if target machine can't support this type of VECTOR
1938          operation.  */
1939       optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1940       if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1941 	continue;
1942 
1943       bool existed;
1944       v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1945       if (!existed)
1946 	{
1947 	  info = new v_info;
1948 	  info->vec_type = vec_type;
1949 	}
1950       else if (!types_compatible_p (vec_type, info->vec_type))
1951 	continue;
1952       info->vec.safe_push (std::make_pair (idx, i));
1953     }
1954 
1955   /* At least two VECTOR to combine.  */
1956   if (v_info_map.elements () <= 1)
1957     {
1958       cleanup_vinfo_map (v_info_map);
1959       return false;
1960     }
1961 
1962   /* Verify all VECTOR candidates by checking two conditions:
1963        1) sorted offsets are adjacent, no holes.
1964        2) can fill the whole VECTOR perfectly.
1965      And add the valid candidates to a vector for further handling.  */
1966   auto_vec<tree> valid_vecs (v_info_map.elements ());
1967   for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1968        it != v_info_map.end (); ++it)
1969     {
1970       tree cand_vec = (*it).first;
1971       v_info_ptr cand_info = (*it).second;
1972       unsigned int num_elems
1973 	= TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
1974       if (cand_info->vec.length () != num_elems)
1975 	continue;
1976       sbitmap holes = sbitmap_alloc (num_elems);
1977       bitmap_ones (holes);
1978       bool valid = true;
1979       v_info_elem *curr;
1980       FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
1981 	{
1982 	  if (!bitmap_bit_p (holes, curr->first))
1983 	    {
1984 	      valid = false;
1985 	      break;
1986 	    }
1987 	  else
1988 	    bitmap_clear_bit (holes, curr->first);
1989 	}
1990       if (valid && bitmap_empty_p (holes))
1991 	valid_vecs.quick_push (cand_vec);
1992       sbitmap_free (holes);
1993     }
1994 
1995   /* At least two VECTOR to combine.  */
1996   if (valid_vecs.length () <= 1)
1997     {
1998       cleanup_vinfo_map (v_info_map);
1999       return false;
2000     }
2001 
2002   valid_vecs.qsort (sort_by_mach_mode);
2003   /* Go through all candidates by machine mode order, query the mode_to_total
2004      to get the total number for each mode and skip the single one.  */
2005   for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2006     {
2007       tree tvec = valid_vecs[i];
2008       enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2009 
2010       /* Skip modes with only a single candidate.  */
2011       if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2012 	continue;
2013 
2014       unsigned int idx, j;
2015       gimple *sum = NULL;
2016       tree sum_vec = tvec;
2017       v_info_ptr info_ptr = *(v_info_map.get (tvec));
2018       v_info_elem *elem;
2019       tree vec_type = info_ptr->vec_type;
2020 
2021       /* Build the sum for all candidates with same mode.  */
2022       do
2023 	{
2024 	  sum = build_and_add_sum (vec_type, sum_vec,
2025 				   valid_vecs[i + 1], opcode);
2026 	  if (!useless_type_conversion_p (vec_type,
2027 					  TREE_TYPE (valid_vecs[i + 1])))
2028 	    {
2029 	      /* Update the operands only after build_and_add_sum,
2030 		 so that we don't have to repeat the placement algorithm
2031 		 of build_and_add_sum.  */
2032 	      gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2033 	      tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2034 				 valid_vecs[i + 1]);
2035 	      tree lhs = make_ssa_name (vec_type);
2036 	      gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2037 	      gimple_set_uid (g, gimple_uid (sum));
2038 	      gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2039 	      gimple_assign_set_rhs2 (sum, lhs);
2040 	      if (sum_vec == tvec)
2041 		{
2042 		  vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2043 		  lhs = make_ssa_name (vec_type);
2044 		  g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2045 		  gimple_set_uid (g, gimple_uid (sum));
2046 		  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2047 		  gimple_assign_set_rhs1 (sum, lhs);
2048 		}
2049 	      update_stmt (sum);
2050 	    }
2051 	  sum_vec = gimple_get_lhs (sum);
2052 	  info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2053 	  gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2054 	  /* Update those related ops of current candidate VECTOR.  */
2055 	  FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2056 	    {
2057 	      idx = elem->second;
2058 	      gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2059 	      /* Set this then op definition will get DCEd later.  */
2060 	      gimple_set_visited (def, true);
2061 	      if (opcode == PLUS_EXPR
2062 		  || opcode == BIT_XOR_EXPR
2063 		  || opcode == BIT_IOR_EXPR)
2064 		(*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2065 	      else if (opcode == MULT_EXPR)
2066 		(*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2067 	      else
2068 		{
2069 		  gcc_assert (opcode == BIT_AND_EXPR);
2070 		  (*ops)[idx]->op
2071 		    = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2072 		}
2073 	      (*ops)[idx]->rank = 0;
2074 	    }
2075 	  if (dump_file && (dump_flags & TDF_DETAILS))
2076 	    {
2077 	      fprintf (dump_file, "Generating addition -> ");
2078 	      print_gimple_stmt (dump_file, sum, 0);
2079 	    }
2080 	  i++;
2081 	}
2082       while ((i < valid_vecs.length () - 1)
2083 	     && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2084 
2085       /* Referring to first valid VECTOR with this mode, generate the
2086          BIT_FIELD_REF statements accordingly.  */
2087       info_ptr = *(v_info_map.get (tvec));
2088       gcc_assert (sum);
2089       tree elem_type = TREE_TYPE (vec_type);
2090       FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2091 	{
2092 	  idx = elem->second;
2093 	  tree dst = make_ssa_name (elem_type);
2094 	  tree pos = bitsize_int (elem->first
2095 				  * tree_to_uhwi (TYPE_SIZE (elem_type)));
2096 	  tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2097 			     TYPE_SIZE (elem_type), pos);
2098 	  gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2099 	  insert_stmt_after (gs, sum);
2100 	  gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2101 	  /* Set this then op definition will get DCEd later.  */
2102 	  gimple_set_visited (def, true);
2103 	  (*ops)[idx]->op = gimple_assign_lhs (gs);
2104 	  (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2105 	  if (dump_file && (dump_flags & TDF_DETAILS))
2106 	    {
2107 	      fprintf (dump_file, "Generating bit_field_ref -> ");
2108 	      print_gimple_stmt (dump_file, gs, 0);
2109 	    }
2110 	}
2111     }
2112 
2113   if (dump_file && (dump_flags & TDF_DETAILS))
2114     fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2115 
2116   cleanup_vinfo_map (v_info_map);
2117 
2118   return true;
2119 }
2120 
2121 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2122    expression, examine the other OPS to see if any of them are comparisons
2123    of the same values, which we may be able to combine or eliminate.
2124    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
2125 
2126 static bool
eliminate_redundant_comparison(enum tree_code opcode,vec<operand_entry * > * ops,unsigned int currindex,operand_entry * curr)2127 eliminate_redundant_comparison (enum tree_code opcode,
2128 				vec<operand_entry *> *ops,
2129 				unsigned int currindex,
2130 				operand_entry *curr)
2131 {
2132   tree op1, op2;
2133   enum tree_code lcode, rcode;
2134   gimple *def1, *def2;
2135   int i;
2136   operand_entry *oe;
2137 
2138   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2139     return false;
2140 
2141   /* Check that CURR is a comparison.  */
2142   if (TREE_CODE (curr->op) != SSA_NAME)
2143     return false;
2144   def1 = SSA_NAME_DEF_STMT (curr->op);
2145   if (!is_gimple_assign (def1))
2146     return false;
2147   lcode = gimple_assign_rhs_code (def1);
2148   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2149     return false;
2150   op1 = gimple_assign_rhs1 (def1);
2151   op2 = gimple_assign_rhs2 (def1);
2152 
2153   /* Now look for a similar comparison in the remaining OPS.  */
2154   for (i = currindex + 1; ops->iterate (i, &oe); i++)
2155     {
2156       tree t;
2157 
2158       if (TREE_CODE (oe->op) != SSA_NAME)
2159 	continue;
2160       def2 = SSA_NAME_DEF_STMT (oe->op);
2161       if (!is_gimple_assign (def2))
2162 	continue;
2163       rcode = gimple_assign_rhs_code (def2);
2164       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2165 	continue;
2166 
2167       /* If we got here, we have a match.  See if we can combine the
2168 	 two comparisons.  */
2169       tree type = TREE_TYPE (gimple_assign_lhs (def1));
2170       if (opcode == BIT_IOR_EXPR)
2171 	t = maybe_fold_or_comparisons (type,
2172 				       lcode, op1, op2,
2173 				       rcode, gimple_assign_rhs1 (def2),
2174 				       gimple_assign_rhs2 (def2));
2175       else
2176 	t = maybe_fold_and_comparisons (type,
2177 					lcode, op1, op2,
2178 					rcode, gimple_assign_rhs1 (def2),
2179 					gimple_assign_rhs2 (def2));
2180       if (!t)
2181 	continue;
2182 
2183       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2184 	 always give us a boolean_type_node value back.  If the original
2185 	 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2186 	 we need to convert.  */
2187       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2188 	t = fold_convert (TREE_TYPE (curr->op), t);
2189 
2190       if (TREE_CODE (t) != INTEGER_CST
2191 	  && !operand_equal_p (t, curr->op, 0))
2192 	{
2193 	  enum tree_code subcode;
2194 	  tree newop1, newop2;
2195 	  if (!COMPARISON_CLASS_P (t))
2196 	    continue;
2197 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2198 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
2199 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
2200 	  if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2201 	    continue;
2202 	}
2203 
2204       if (dump_file && (dump_flags & TDF_DETAILS))
2205 	{
2206 	  fprintf (dump_file, "Equivalence: ");
2207 	  print_generic_expr (dump_file, curr->op);
2208 	  fprintf (dump_file, " %s ", op_symbol_code (opcode));
2209 	  print_generic_expr (dump_file, oe->op);
2210 	  fprintf (dump_file, " -> ");
2211 	  print_generic_expr (dump_file, t);
2212 	  fprintf (dump_file, "\n");
2213 	}
2214 
2215       /* Now we can delete oe, as it has been subsumed by the new combined
2216          expression t.  */
2217       ops->ordered_remove (i);
2218       reassociate_stats.ops_eliminated ++;
2219 
2220       /* If t is the same as curr->op, we're done.  Otherwise we must
2221 	 replace curr->op with t.  Special case is if we got a constant
2222 	 back, in which case we add it to the end instead of in place of
2223 	 the current entry.  */
2224       if (TREE_CODE (t) == INTEGER_CST)
2225 	{
2226 	  ops->ordered_remove (currindex);
2227 	  add_to_ops_vec (ops, t);
2228 	}
2229       else if (!operand_equal_p (t, curr->op, 0))
2230 	{
2231 	  gimple *sum;
2232 	  enum tree_code subcode;
2233 	  tree newop1;
2234 	  tree newop2;
2235 	  gcc_assert (COMPARISON_CLASS_P (t));
2236 	  extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2237 	  STRIP_USELESS_TYPE_CONVERSION (newop1);
2238 	  STRIP_USELESS_TYPE_CONVERSION (newop2);
2239 	  gcc_checking_assert (is_gimple_val (newop1)
2240 			       && is_gimple_val (newop2));
2241 	  sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2242 	  curr->op = gimple_get_lhs (sum);
2243 	}
2244       return true;
2245     }
2246 
2247   return false;
2248 }
2249 
2250 
2251 /* Transform repeated addition of same values into multiply with
2252    constant.  */
2253 static bool
transform_add_to_multiply(vec<operand_entry * > * ops)2254 transform_add_to_multiply (vec<operand_entry *> *ops)
2255 {
2256   operand_entry *oe;
2257   tree op = NULL_TREE;
2258   int j;
2259   int i, start = -1, end = 0, count = 0;
2260   auto_vec<std::pair <int, int> > indxs;
2261   bool changed = false;
2262 
2263   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2264       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2265 	  || !flag_unsafe_math_optimizations))
2266     return false;
2267 
2268   /* Look for repeated operands.  */
2269   FOR_EACH_VEC_ELT (*ops, i, oe)
2270     {
2271       if (start == -1)
2272 	{
2273 	  count = 1;
2274 	  op = oe->op;
2275 	  start = i;
2276 	}
2277       else if (operand_equal_p (oe->op, op, 0))
2278 	{
2279 	  count++;
2280 	  end = i;
2281 	}
2282       else
2283 	{
2284 	  if (count > 1)
2285 	    indxs.safe_push (std::make_pair (start, end));
2286 	  count = 1;
2287 	  op = oe->op;
2288 	  start = i;
2289 	}
2290     }
2291 
2292   if (count > 1)
2293     indxs.safe_push (std::make_pair (start, end));
2294 
2295   for (j = indxs.length () - 1; j >= 0; --j)
2296     {
2297       /* Convert repeated operand addition to multiplication.  */
2298       start = indxs[j].first;
2299       end = indxs[j].second;
2300       op = (*ops)[start]->op;
2301       count = end - start + 1;
2302       for (i = end; i >= start; --i)
2303 	ops->unordered_remove (i);
2304       tree tmp = make_ssa_name (TREE_TYPE (op));
2305       tree cst = build_int_cst (integer_type_node, count);
2306       gassign *mul_stmt
2307 	= gimple_build_assign (tmp, MULT_EXPR,
2308 			       op, fold_convert (TREE_TYPE (op), cst));
2309       gimple_set_visited (mul_stmt, true);
2310       add_to_ops_vec (ops, tmp, mul_stmt);
2311       changed = true;
2312     }
2313 
2314   return changed;
2315 }
2316 
2317 
2318 /* Perform various identities and other optimizations on the list of
2319    operand entries, stored in OPS.  The tree code for the binary
2320    operation between all the operands is OPCODE.  */
2321 
2322 static void
optimize_ops_list(enum tree_code opcode,vec<operand_entry * > * ops)2323 optimize_ops_list (enum tree_code opcode,
2324 		   vec<operand_entry *> *ops)
2325 {
2326   unsigned int length = ops->length ();
2327   unsigned int i;
2328   operand_entry *oe;
2329   operand_entry *oelast = NULL;
2330   bool iterate = false;
2331 
2332   if (length == 1)
2333     return;
2334 
2335   oelast = ops->last ();
2336 
2337   /* If the last two are constants, pop the constants off, merge them
2338      and try the next two.  */
2339   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2340     {
2341       operand_entry *oelm1 = (*ops)[length - 2];
2342 
2343       if (oelm1->rank == 0
2344 	  && is_gimple_min_invariant (oelm1->op)
2345 	  && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2346 				       TREE_TYPE (oelast->op)))
2347 	{
2348 	  tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2349 				     oelm1->op, oelast->op);
2350 
2351 	  if (folded && is_gimple_min_invariant (folded))
2352 	    {
2353 	      if (dump_file && (dump_flags & TDF_DETAILS))
2354 		fprintf (dump_file, "Merging constants\n");
2355 
2356 	      ops->pop ();
2357 	      ops->pop ();
2358 
2359 	      add_to_ops_vec (ops, folded);
2360 	      reassociate_stats.constants_eliminated++;
2361 
2362 	      optimize_ops_list (opcode, ops);
2363 	      return;
2364 	    }
2365 	}
2366     }
2367 
2368   eliminate_using_constants (opcode, ops);
2369   oelast = NULL;
2370 
2371   for (i = 0; ops->iterate (i, &oe);)
2372     {
2373       bool done = false;
2374 
2375       if (eliminate_not_pairs (opcode, ops, i, oe))
2376 	return;
2377       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2378 	  || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2379 	  || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2380 	{
2381 	  if (done)
2382 	    return;
2383 	  iterate = true;
2384 	  oelast = NULL;
2385 	  continue;
2386 	}
2387       oelast = oe;
2388       i++;
2389     }
2390 
2391   if (iterate)
2392     optimize_ops_list (opcode, ops);
2393 }
2394 
2395 /* The following functions are subroutines to optimize_range_tests and allow
2396    it to try to change a logical combination of comparisons into a range
2397    test.
2398 
2399    For example, both
2400 	X == 2 || X == 5 || X == 3 || X == 4
2401    and
2402 	X >= 2 && X <= 5
2403    are converted to
2404 	(unsigned) (X - 2) <= 3
2405 
2406    For more information see comments above fold_test_range in fold-const.c,
2407    this implementation is for GIMPLE.  */
2408 
2409 struct range_entry
2410 {
2411   tree exp;
2412   tree low;
2413   tree high;
2414   bool in_p;
2415   bool strict_overflow_p;
2416   unsigned int idx, next;
2417 };
2418 
2419 /* This is similar to make_range in fold-const.c, but on top of
2420    GIMPLE instead of trees.  If EXP is non-NULL, it should be
2421    an SSA_NAME and STMT argument is ignored, otherwise STMT
2422    argument should be a GIMPLE_COND.  */
2423 
2424 static void
init_range_entry(struct range_entry * r,tree exp,gimple * stmt)2425 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2426 {
2427   int in_p;
2428   tree low, high;
2429   bool is_bool, strict_overflow_p;
2430 
2431   r->exp = NULL_TREE;
2432   r->in_p = false;
2433   r->strict_overflow_p = false;
2434   r->low = NULL_TREE;
2435   r->high = NULL_TREE;
2436   if (exp != NULL_TREE
2437       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2438     return;
2439 
2440   /* Start with simply saying "EXP != 0" and then look at the code of EXP
2441      and see if we can refine the range.  Some of the cases below may not
2442      happen, but it doesn't seem worth worrying about this.  We "continue"
2443      the outer loop when we've changed something; otherwise we "break"
2444      the switch, which will "break" the while.  */
2445   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2446   high = low;
2447   in_p = 0;
2448   strict_overflow_p = false;
2449   is_bool = false;
2450   if (exp == NULL_TREE)
2451     is_bool = true;
2452   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2453     {
2454       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2455 	is_bool = true;
2456       else
2457 	return;
2458     }
2459   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2460     is_bool = true;
2461 
2462   while (1)
2463     {
2464       enum tree_code code;
2465       tree arg0, arg1, exp_type;
2466       tree nexp;
2467       location_t loc;
2468 
2469       if (exp != NULL_TREE)
2470 	{
2471 	  if (TREE_CODE (exp) != SSA_NAME
2472 	      || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2473 	    break;
2474 
2475 	  stmt = SSA_NAME_DEF_STMT (exp);
2476 	  if (!is_gimple_assign (stmt))
2477 	    break;
2478 
2479 	  code = gimple_assign_rhs_code (stmt);
2480 	  arg0 = gimple_assign_rhs1 (stmt);
2481 	  arg1 = gimple_assign_rhs2 (stmt);
2482 	  exp_type = TREE_TYPE (exp);
2483 	}
2484       else
2485 	{
2486 	  code = gimple_cond_code (stmt);
2487 	  arg0 = gimple_cond_lhs (stmt);
2488 	  arg1 = gimple_cond_rhs (stmt);
2489 	  exp_type = boolean_type_node;
2490 	}
2491 
2492       if (TREE_CODE (arg0) != SSA_NAME
2493 	  || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2494 	break;
2495       loc = gimple_location (stmt);
2496       switch (code)
2497 	{
2498 	case BIT_NOT_EXPR:
2499 	  if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2500 	      /* Ensure the range is either +[-,0], +[0,0],
2501 		 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2502 		 -[1,1].  If it is e.g. +[-,-] or -[-,-]
2503 		 or similar expression of unconditional true or
2504 		 false, it should not be negated.  */
2505 	      && ((high && integer_zerop (high))
2506 		  || (low && integer_onep (low))))
2507 	    {
2508 	      in_p = !in_p;
2509 	      exp = arg0;
2510 	      continue;
2511 	    }
2512 	  break;
2513 	case SSA_NAME:
2514 	  exp = arg0;
2515 	  continue;
2516 	CASE_CONVERT:
2517 	  if (is_bool)
2518 	    {
2519 	      if ((TYPE_PRECISION (exp_type) == 1
2520 		   || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2521 		  && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2522 		return;
2523 	    }
2524 	  else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2525 	    {
2526 	      if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2527 		is_bool = true;
2528 	      else
2529 		return;
2530 	    }
2531 	  else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2532 	    is_bool = true;
2533 	  goto do_default;
2534 	case EQ_EXPR:
2535 	case NE_EXPR:
2536 	case LT_EXPR:
2537 	case LE_EXPR:
2538 	case GE_EXPR:
2539 	case GT_EXPR:
2540 	  is_bool = true;
2541 	  /* FALLTHRU */
2542 	default:
2543 	  if (!is_bool)
2544 	    return;
2545 	do_default:
2546 	  nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2547 				  &low, &high, &in_p,
2548 				  &strict_overflow_p);
2549 	  if (nexp != NULL_TREE)
2550 	    {
2551 	      exp = nexp;
2552 	      gcc_assert (TREE_CODE (exp) == SSA_NAME);
2553 	      continue;
2554 	    }
2555 	  break;
2556 	}
2557       break;
2558     }
2559   if (is_bool)
2560     {
2561       r->exp = exp;
2562       r->in_p = in_p;
2563       r->low = low;
2564       r->high = high;
2565       r->strict_overflow_p = strict_overflow_p;
2566     }
2567 }
2568 
2569 /* Comparison function for qsort.  Sort entries
2570    without SSA_NAME exp first, then with SSA_NAMEs sorted
2571    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2572    by increasing ->low and if ->low is the same, by increasing
2573    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2574    maximum.  */
2575 
2576 static int
range_entry_cmp(const void * a,const void * b)2577 range_entry_cmp (const void *a, const void *b)
2578 {
2579   const struct range_entry *p = (const struct range_entry *) a;
2580   const struct range_entry *q = (const struct range_entry *) b;
2581 
2582   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2583     {
2584       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2585 	{
2586 	  /* Group range_entries for the same SSA_NAME together.  */
2587 	  if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2588 	    return -1;
2589 	  else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2590 	    return 1;
2591 	  /* If ->low is different, NULL low goes first, then by
2592 	     ascending low.  */
2593 	  if (p->low != NULL_TREE)
2594 	    {
2595 	      if (q->low != NULL_TREE)
2596 		{
2597 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2598 					  p->low, q->low);
2599 		  if (tem && integer_onep (tem))
2600 		    return -1;
2601 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2602 				     p->low, q->low);
2603 		  if (tem && integer_onep (tem))
2604 		    return 1;
2605 		}
2606 	      else
2607 		return 1;
2608 	    }
2609 	  else if (q->low != NULL_TREE)
2610 	    return -1;
2611 	  /* If ->high is different, NULL high goes last, before that by
2612 	     ascending high.  */
2613 	  if (p->high != NULL_TREE)
2614 	    {
2615 	      if (q->high != NULL_TREE)
2616 		{
2617 		  tree tem = fold_binary (LT_EXPR, boolean_type_node,
2618 					  p->high, q->high);
2619 		  if (tem && integer_onep (tem))
2620 		    return -1;
2621 		  tem = fold_binary (GT_EXPR, boolean_type_node,
2622 				     p->high, q->high);
2623 		  if (tem && integer_onep (tem))
2624 		    return 1;
2625 		}
2626 	      else
2627 		return -1;
2628 	    }
2629 	  else if (q->high != NULL_TREE)
2630 	    return 1;
2631 	  /* If both ranges are the same, sort below by ascending idx.  */
2632 	}
2633       else
2634 	return 1;
2635     }
2636   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2637     return -1;
2638 
2639   if (p->idx < q->idx)
2640     return -1;
2641   else
2642     {
2643       gcc_checking_assert (p->idx > q->idx);
2644       return 1;
2645     }
2646 }
2647 
2648 /* Helper function for update_range_test.  Force EXPR into an SSA_NAME,
2649    insert needed statements BEFORE or after GSI.  */
2650 
2651 static tree
force_into_ssa_name(gimple_stmt_iterator * gsi,tree expr,bool before)2652 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2653 {
2654   enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2655   tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2656   if (TREE_CODE (ret) != SSA_NAME)
2657     {
2658       gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2659       if (before)
2660 	gsi_insert_before (gsi, g, GSI_SAME_STMT);
2661       else
2662 	gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2663       ret = gimple_assign_lhs (g);
2664     }
2665   return ret;
2666 }
2667 
2668 /* Helper routine of optimize_range_test.
2669    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2670    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2671    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2672    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2673    an array of COUNT pointers to other ranges.  Return
2674    true if the range merge has been successful.
2675    If OPCODE is ERROR_MARK, this is called from within
2676    maybe_optimize_range_tests and is performing inter-bb range optimization.
2677    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2678    oe->rank.  */
2679 
2680 static bool
update_range_test(struct range_entry * range,struct range_entry * otherrange,struct range_entry ** otherrangep,unsigned int count,enum tree_code opcode,vec<operand_entry * > * ops,tree exp,gimple_seq seq,bool in_p,tree low,tree high,bool strict_overflow_p)2681 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2682 		   struct range_entry **otherrangep,
2683 		   unsigned int count, enum tree_code opcode,
2684 		   vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2685 		   bool in_p, tree low, tree high, bool strict_overflow_p)
2686 {
2687   operand_entry *oe = (*ops)[range->idx];
2688   tree op = oe->op;
2689   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2690 		    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2691   location_t loc = gimple_location (stmt);
2692   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2693   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2694 				in_p, low, high);
2695   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2696   gimple_stmt_iterator gsi;
2697   unsigned int i, uid;
2698 
2699   if (tem == NULL_TREE)
2700     return false;
2701 
2702   /* If op is default def SSA_NAME, there is no place to insert the
2703      new comparison.  Give up, unless we can use OP itself as the
2704      range test.  */
2705   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2706     {
2707       if (op == range->exp
2708 	  && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2709 	      || TREE_CODE (optype) == BOOLEAN_TYPE)
2710 	  && (op == tem
2711 	      || (TREE_CODE (tem) == EQ_EXPR
2712 		  && TREE_OPERAND (tem, 0) == op
2713 		  && integer_onep (TREE_OPERAND (tem, 1))))
2714 	  && opcode != BIT_IOR_EXPR
2715 	  && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2716 	{
2717 	  stmt = NULL;
2718 	  tem = op;
2719 	}
2720       else
2721 	return false;
2722     }
2723 
2724   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2725     warning_at (loc, OPT_Wstrict_overflow,
2726 		"assuming signed overflow does not occur "
2727 		"when simplifying range test");
2728 
2729   if (dump_file && (dump_flags & TDF_DETAILS))
2730     {
2731       struct range_entry *r;
2732       fprintf (dump_file, "Optimizing range tests ");
2733       print_generic_expr (dump_file, range->exp);
2734       fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2735       print_generic_expr (dump_file, range->low);
2736       fprintf (dump_file, ", ");
2737       print_generic_expr (dump_file, range->high);
2738       fprintf (dump_file, "]");
2739       for (i = 0; i < count; i++)
2740 	{
2741 	  if (otherrange)
2742 	    r = otherrange + i;
2743 	  else
2744 	    r = otherrangep[i];
2745 	  if (r->exp
2746 	      && r->exp != range->exp
2747 	      && TREE_CODE (r->exp) == SSA_NAME)
2748 	    {
2749 	      fprintf (dump_file, " and ");
2750 	      print_generic_expr (dump_file, r->exp);
2751 	    }
2752 	  else
2753 	    fprintf (dump_file, " and");
2754 	  fprintf (dump_file, " %c[", r->in_p ? '+' : '-');
2755 	  print_generic_expr (dump_file, r->low);
2756 	  fprintf (dump_file, ", ");
2757 	  print_generic_expr (dump_file, r->high);
2758 	  fprintf (dump_file, "]");
2759 	}
2760       fprintf (dump_file, "\n into ");
2761       print_generic_expr (dump_file, tem);
2762       fprintf (dump_file, "\n");
2763     }
2764 
2765   if (opcode == BIT_IOR_EXPR
2766       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2767     tem = invert_truthvalue_loc (loc, tem);
2768 
2769   tem = fold_convert_loc (loc, optype, tem);
2770   if (stmt)
2771     {
2772       gsi = gsi_for_stmt (stmt);
2773       uid = gimple_uid (stmt);
2774     }
2775   else
2776     {
2777       gsi = gsi_none ();
2778       uid = 0;
2779     }
2780   if (stmt == NULL)
2781     gcc_checking_assert (tem == op);
2782   /* In rare cases range->exp can be equal to lhs of stmt.
2783      In that case we have to insert after the stmt rather then before
2784      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2785   else if (op != range->exp)
2786     {
2787       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2788       tem = force_into_ssa_name (&gsi, tem, true);
2789       gsi_prev (&gsi);
2790     }
2791   else if (gimple_code (stmt) != GIMPLE_PHI)
2792     {
2793       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2794       tem = force_into_ssa_name (&gsi, tem, false);
2795     }
2796   else
2797     {
2798       gsi = gsi_after_labels (gimple_bb (stmt));
2799       if (!gsi_end_p (gsi))
2800 	uid = gimple_uid (gsi_stmt (gsi));
2801       else
2802 	{
2803 	  gsi = gsi_start_bb (gimple_bb (stmt));
2804 	  uid = 1;
2805 	  while (!gsi_end_p (gsi))
2806 	    {
2807 	      uid = gimple_uid (gsi_stmt (gsi));
2808 	      gsi_next (&gsi);
2809 	    }
2810 	}
2811       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2812       tem = force_into_ssa_name (&gsi, tem, true);
2813       if (gsi_end_p (gsi))
2814 	gsi = gsi_last_bb (gimple_bb (stmt));
2815       else
2816 	gsi_prev (&gsi);
2817     }
2818   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2819     if (gimple_uid (gsi_stmt (gsi)))
2820       break;
2821     else
2822       gimple_set_uid (gsi_stmt (gsi), uid);
2823 
2824   oe->op = tem;
2825   range->exp = exp;
2826   range->low = low;
2827   range->high = high;
2828   range->in_p = in_p;
2829   range->strict_overflow_p = false;
2830 
2831   for (i = 0; i < count; i++)
2832     {
2833       if (otherrange)
2834 	range = otherrange + i;
2835       else
2836 	range = otherrangep[i];
2837       oe = (*ops)[range->idx];
2838       /* Now change all the other range test immediate uses, so that
2839 	 those tests will be optimized away.  */
2840       if (opcode == ERROR_MARK)
2841 	{
2842 	  if (oe->op)
2843 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
2844 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
2845 	  else
2846 	    oe->op = (oe->rank == BIT_IOR_EXPR
2847 		      ? boolean_false_node : boolean_true_node);
2848 	}
2849       else
2850 	oe->op = error_mark_node;
2851       range->exp = NULL_TREE;
2852       range->low = NULL_TREE;
2853       range->high = NULL_TREE;
2854     }
2855   return true;
2856 }
2857 
2858 /* Optimize X == CST1 || X == CST2
2859    if popcount (CST1 ^ CST2) == 1 into
2860    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2861    Similarly for ranges.  E.g.
2862    X != 2 && X != 3 && X != 10 && X != 11
2863    will be transformed by the previous optimization into
2864    !((X - 2U) <= 1U || (X - 10U) <= 1U)
2865    and this loop can transform that into
2866    !(((X & ~8) - 2U) <= 1U).  */
2867 
2868 static bool
optimize_range_tests_xor(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2869 optimize_range_tests_xor (enum tree_code opcode, tree type,
2870 			  tree lowi, tree lowj, tree highi, tree highj,
2871 			  vec<operand_entry *> *ops,
2872 			  struct range_entry *rangei,
2873 			  struct range_entry *rangej)
2874 {
2875   tree lowxor, highxor, tem, exp;
2876   /* Check lowi ^ lowj == highi ^ highj and
2877      popcount (lowi ^ lowj) == 1.  */
2878   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2879   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2880     return false;
2881   if (!integer_pow2p (lowxor))
2882     return false;
2883   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2884   if (!tree_int_cst_equal (lowxor, highxor))
2885     return false;
2886 
2887   exp = rangei->exp;
2888   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2889   int prec = GET_MODE_PRECISION (mode);
2890   if (TYPE_PRECISION (type) < prec
2891       || (wi::to_wide (TYPE_MIN_VALUE (type))
2892 	  != wi::min_value (prec, TYPE_SIGN (type)))
2893       || (wi::to_wide (TYPE_MAX_VALUE (type))
2894 	  != wi::max_value (prec, TYPE_SIGN (type))))
2895     {
2896       type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2897       exp = fold_convert (type, exp);
2898       lowxor = fold_convert (type, lowxor);
2899       lowi = fold_convert (type, lowi);
2900       highi = fold_convert (type, highi);
2901     }
2902   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2903   exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2904   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2905   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2906   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2907 			 NULL, rangei->in_p, lowj, highj,
2908 			 rangei->strict_overflow_p
2909 			 || rangej->strict_overflow_p))
2910     return true;
2911   return false;
2912 }
2913 
2914 /* Optimize X == CST1 || X == CST2
2915    if popcount (CST2 - CST1) == 1 into
2916    ((X - CST1) & ~(CST2 - CST1)) == 0.
2917    Similarly for ranges.  E.g.
2918    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2919    || X == 75 || X == 45
2920    will be transformed by the previous optimization into
2921    (X - 43U) <= 3U || (X - 75U) <= 3U
2922    and this loop can transform that into
2923    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
2924 static bool
optimize_range_tests_diff(enum tree_code opcode,tree type,tree lowi,tree lowj,tree highi,tree highj,vec<operand_entry * > * ops,struct range_entry * rangei,struct range_entry * rangej)2925 optimize_range_tests_diff (enum tree_code opcode, tree type,
2926 			   tree lowi, tree lowj, tree highi, tree highj,
2927 			   vec<operand_entry *> *ops,
2928 			   struct range_entry *rangei,
2929 			   struct range_entry *rangej)
2930 {
2931   tree tem1, tem2, mask;
2932   /* Check highi - lowi == highj - lowj.  */
2933   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2934   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2935     return false;
2936   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2937   if (!tree_int_cst_equal (tem1, tem2))
2938     return false;
2939   /* Check popcount (lowj - lowi) == 1.  */
2940   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2941   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2942     return false;
2943   if (!integer_pow2p (tem1))
2944     return false;
2945 
2946   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2947   int prec = GET_MODE_PRECISION (mode);
2948   if (TYPE_PRECISION (type) < prec
2949       || (wi::to_wide (TYPE_MIN_VALUE (type))
2950 	  != wi::min_value (prec, TYPE_SIGN (type)))
2951       || (wi::to_wide (TYPE_MAX_VALUE (type))
2952 	  != wi::max_value (prec, TYPE_SIGN (type))))
2953     type = build_nonstandard_integer_type (prec, 1);
2954   else
2955     type = unsigned_type_for (type);
2956   tem1 = fold_convert (type, tem1);
2957   tem2 = fold_convert (type, tem2);
2958   lowi = fold_convert (type, lowi);
2959   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2960   tem1 = fold_build2 (MINUS_EXPR, type,
2961 		      fold_convert (type, rangei->exp), lowi);
2962   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2963   lowj = build_int_cst (type, 0);
2964   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2965 			 NULL, rangei->in_p, lowj, tem2,
2966 			 rangei->strict_overflow_p
2967 			 || rangej->strict_overflow_p))
2968     return true;
2969   return false;
2970 }
2971 
2972 /* It does some common checks for function optimize_range_tests_xor and
2973    optimize_range_tests_diff.
2974    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2975    Else it calls optimize_range_tests_diff.  */
2976 
2977 static bool
optimize_range_tests_1(enum tree_code opcode,int first,int length,bool optimize_xor,vec<operand_entry * > * ops,struct range_entry * ranges)2978 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2979 			bool optimize_xor, vec<operand_entry *> *ops,
2980 			struct range_entry *ranges)
2981 {
2982   int i, j;
2983   bool any_changes = false;
2984   for (i = first; i < length; i++)
2985     {
2986       tree lowi, highi, lowj, highj, type, tem;
2987 
2988       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2989 	continue;
2990       type = TREE_TYPE (ranges[i].exp);
2991       if (!INTEGRAL_TYPE_P (type))
2992 	continue;
2993       lowi = ranges[i].low;
2994       if (lowi == NULL_TREE)
2995 	lowi = TYPE_MIN_VALUE (type);
2996       highi = ranges[i].high;
2997       if (highi == NULL_TREE)
2998 	continue;
2999       for (j = i + 1; j < length && j < i + 64; j++)
3000 	{
3001 	  bool changes;
3002 	  if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3003 	    continue;
3004 	  lowj = ranges[j].low;
3005 	  if (lowj == NULL_TREE)
3006 	    continue;
3007 	  highj = ranges[j].high;
3008 	  if (highj == NULL_TREE)
3009 	    highj = TYPE_MAX_VALUE (type);
3010 	  /* Check lowj > highi.  */
3011 	  tem = fold_binary (GT_EXPR, boolean_type_node,
3012 			     lowj, highi);
3013 	  if (tem == NULL_TREE || !integer_onep (tem))
3014 	    continue;
3015 	  if (optimize_xor)
3016 	    changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3017 						highi, highj, ops,
3018 						ranges + i, ranges + j);
3019 	  else
3020 	    changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3021 						 highi, highj, ops,
3022 						 ranges + i, ranges + j);
3023 	  if (changes)
3024 	    {
3025 	      any_changes = true;
3026 	      break;
3027 	    }
3028 	}
3029     }
3030   return any_changes;
3031 }
3032 
3033 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
3034    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3035    EXP on success, NULL otherwise.  */
3036 
3037 static tree
extract_bit_test_mask(tree exp,int prec,tree totallow,tree low,tree high,wide_int * mask,tree * totallowp)3038 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3039 		       wide_int *mask, tree *totallowp)
3040 {
3041   tree tem = int_const_binop (MINUS_EXPR, high, low);
3042   if (tem == NULL_TREE
3043       || TREE_CODE (tem) != INTEGER_CST
3044       || TREE_OVERFLOW (tem)
3045       || tree_int_cst_sgn (tem) == -1
3046       || compare_tree_int (tem, prec) != -1)
3047     return NULL_TREE;
3048 
3049   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3050   *mask = wi::shifted_mask (0, max, false, prec);
3051   if (TREE_CODE (exp) == BIT_AND_EXPR
3052       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3053     {
3054       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3055       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3056       if (wi::popcount (msk) == 1
3057 	  && wi::ltu_p (msk, prec - max))
3058 	{
3059 	  *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3060 	  max += msk.to_uhwi ();
3061 	  exp = TREE_OPERAND (exp, 0);
3062 	  if (integer_zerop (low)
3063 	      && TREE_CODE (exp) == PLUS_EXPR
3064 	      && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3065 	    {
3066 	      tree ret = TREE_OPERAND (exp, 0);
3067 	      STRIP_NOPS (ret);
3068 	      widest_int bias
3069 	        = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3070 				     TYPE_PRECISION (TREE_TYPE (low))));
3071 	      tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3072 	      if (totallowp)
3073 		{
3074 		  *totallowp = tbias;
3075 		  return ret;
3076 		}
3077 	      else if (!tree_int_cst_lt (totallow, tbias))
3078 		return NULL_TREE;
3079 	      bias = wi::to_widest (tbias);
3080 	      bias -= wi::to_widest (totallow);
3081 	      if (bias >= 0 && bias < prec - max)
3082 		{
3083 		  *mask = wi::lshift (*mask, bias);
3084 		  return ret;
3085 		}
3086 	    }
3087 	}
3088     }
3089   if (totallowp)
3090     return exp;
3091   if (!tree_int_cst_lt (totallow, low))
3092     return exp;
3093   tem = int_const_binop (MINUS_EXPR, low, totallow);
3094   if (tem == NULL_TREE
3095       || TREE_CODE (tem) != INTEGER_CST
3096       || TREE_OVERFLOW (tem)
3097       || compare_tree_int (tem, prec - max) == 1)
3098     return NULL_TREE;
3099 
3100   *mask = wi::lshift (*mask, wi::to_widest (tem));
3101   return exp;
3102 }
3103 
3104 /* Attempt to optimize small range tests using bit test.
3105    E.g.
3106    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3107    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3108    has been by earlier optimizations optimized into:
3109    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3110    As all the 43 through 82 range is less than 64 numbers,
3111    for 64-bit word targets optimize that into:
3112    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
3113 
3114 static bool
optimize_range_tests_to_bit_test(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3115 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3116 				  vec<operand_entry *> *ops,
3117 				  struct range_entry *ranges)
3118 {
3119   int i, j;
3120   bool any_changes = false;
3121   int prec = GET_MODE_BITSIZE (word_mode);
3122   auto_vec<struct range_entry *, 64> candidates;
3123 
3124   for (i = first; i < length - 2; i++)
3125     {
3126       tree lowi, highi, lowj, highj, type;
3127 
3128       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3129 	continue;
3130       type = TREE_TYPE (ranges[i].exp);
3131       if (!INTEGRAL_TYPE_P (type))
3132 	continue;
3133       lowi = ranges[i].low;
3134       if (lowi == NULL_TREE)
3135 	lowi = TYPE_MIN_VALUE (type);
3136       highi = ranges[i].high;
3137       if (highi == NULL_TREE)
3138 	continue;
3139       wide_int mask;
3140       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3141 					highi, &mask, &lowi);
3142       if (exp == NULL_TREE)
3143 	continue;
3144       bool strict_overflow_p = ranges[i].strict_overflow_p;
3145       candidates.truncate (0);
3146       int end = MIN (i + 64, length);
3147       for (j = i + 1; j < end; j++)
3148 	{
3149 	  tree exp2;
3150 	  if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3151 	    continue;
3152 	  if (ranges[j].exp == exp)
3153 	    ;
3154 	  else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3155 	    {
3156 	      exp2 = TREE_OPERAND (ranges[j].exp, 0);
3157 	      if (exp2 == exp)
3158 		;
3159 	      else if (TREE_CODE (exp2) == PLUS_EXPR)
3160 		{
3161 		  exp2 = TREE_OPERAND (exp2, 0);
3162 		  STRIP_NOPS (exp2);
3163 		  if (exp2 != exp)
3164 		    continue;
3165 		}
3166 	      else
3167 		continue;
3168 	    }
3169 	  else
3170 	    continue;
3171 	  lowj = ranges[j].low;
3172 	  if (lowj == NULL_TREE)
3173 	    continue;
3174 	  highj = ranges[j].high;
3175 	  if (highj == NULL_TREE)
3176 	    highj = TYPE_MAX_VALUE (type);
3177 	  wide_int mask2;
3178 	  exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3179 					highj, &mask2, NULL);
3180 	  if (exp2 != exp)
3181 	    continue;
3182 	  mask |= mask2;
3183 	  strict_overflow_p |= ranges[j].strict_overflow_p;
3184 	  candidates.safe_push (&ranges[j]);
3185 	}
3186 
3187       /* If we need otherwise 3 or more comparisons, use a bit test.  */
3188       if (candidates.length () >= 2)
3189 	{
3190 	  tree high = wide_int_to_tree (TREE_TYPE (lowi),
3191 					wi::to_widest (lowi)
3192 					+ prec - 1 - wi::clz (mask));
3193 	  operand_entry *oe = (*ops)[ranges[i].idx];
3194 	  tree op = oe->op;
3195 	  gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3196 			    : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3197 	  location_t loc = gimple_location (stmt);
3198 	  tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3199 
3200 	  /* See if it isn't cheaper to pretend the minimum value of the
3201 	     range is 0, if maximum value is small enough.
3202 	     We can avoid then subtraction of the minimum value, but the
3203 	     mask constant could be perhaps more expensive.  */
3204 	  if (compare_tree_int (lowi, 0) > 0
3205 	      && compare_tree_int (high, prec) < 0)
3206 	    {
3207 	      int cost_diff;
3208 	      HOST_WIDE_INT m = tree_to_uhwi (lowi);
3209 	      rtx reg = gen_raw_REG (word_mode, 10000);
3210 	      bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3211 	      cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
3212 						      GEN_INT (-m)), speed_p);
3213 	      rtx r = immed_wide_int_const (mask, word_mode);
3214 	      cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3215 					 word_mode, speed_p);
3216 	      r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3217 	      cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3218 					 word_mode, speed_p);
3219 	      if (cost_diff > 0)
3220 		{
3221 		  mask = wi::lshift (mask, m);
3222 		  lowi = build_zero_cst (TREE_TYPE (lowi));
3223 		}
3224 	    }
3225 
3226 	  tree tem = build_range_check (loc, optype, unshare_expr (exp),
3227 					false, lowi, high);
3228 	  if (tem == NULL_TREE || is_gimple_val (tem))
3229 	    continue;
3230 	  tree etype = unsigned_type_for (TREE_TYPE (exp));
3231 	  exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3232 				 fold_convert_loc (loc, etype, exp),
3233 				 fold_convert_loc (loc, etype, lowi));
3234 	  exp = fold_convert_loc (loc, integer_type_node, exp);
3235 	  tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3236 	  exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3237 				 build_int_cst (word_type, 1), exp);
3238 	  exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3239 				 wide_int_to_tree (word_type, mask));
3240 	  exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3241 				 build_zero_cst (word_type));
3242 	  if (is_gimple_val (exp))
3243 	    continue;
3244 
3245 	  /* The shift might have undefined behavior if TEM is true,
3246 	     but reassociate_bb isn't prepared to have basic blocks
3247 	     split when it is running.  So, temporarily emit a code
3248 	     with BIT_IOR_EXPR instead of &&, and fix it up in
3249 	     branch_fixup.  */
3250 	  gimple_seq seq;
3251 	  tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3252 	  gcc_assert (TREE_CODE (tem) == SSA_NAME);
3253 	  gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3254 	  gimple_seq seq2;
3255 	  exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3256 	  gimple_seq_add_seq_without_update (&seq, seq2);
3257 	  gcc_assert (TREE_CODE (exp) == SSA_NAME);
3258 	  gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3259 	  gimple *g = gimple_build_assign (make_ssa_name (optype),
3260 					   BIT_IOR_EXPR, tem, exp);
3261 	  gimple_set_location (g, loc);
3262 	  gimple_seq_add_stmt_without_update (&seq, g);
3263 	  exp = gimple_assign_lhs (g);
3264 	  tree val = build_zero_cst (optype);
3265 	  if (update_range_test (&ranges[i], NULL, candidates.address (),
3266 				 candidates.length (), opcode, ops, exp,
3267 				 seq, false, val, val, strict_overflow_p))
3268 	    {
3269 	      any_changes = true;
3270 	      reassoc_branch_fixups.safe_push (tem);
3271 	    }
3272 	  else
3273 	    gimple_seq_discard (seq);
3274 	}
3275     }
3276   return any_changes;
3277 }
3278 
3279 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3280    and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.  */
3281 
3282 static bool
optimize_range_tests_cmp_bitwise(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges)3283 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3284 				  vec<operand_entry *> *ops,
3285 				  struct range_entry *ranges)
3286 {
3287   int i;
3288   unsigned int b;
3289   bool any_changes = false;
3290   auto_vec<int, 128> buckets;
3291   auto_vec<int, 32> chains;
3292   auto_vec<struct range_entry *, 32> candidates;
3293 
3294   for (i = first; i < length; i++)
3295     {
3296       if (ranges[i].exp == NULL_TREE
3297 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
3298 	  || !ranges[i].in_p
3299 	  || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3300 	  || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
3301 	  || ranges[i].low == NULL_TREE
3302 	  || ranges[i].low != ranges[i].high)
3303 	continue;
3304 
3305       bool zero_p = integer_zerop (ranges[i].low);
3306       if (!zero_p && !integer_all_onesp (ranges[i].low))
3307 	continue;
3308 
3309       b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
3310       if (buckets.length () <= b)
3311 	buckets.safe_grow_cleared (b + 1);
3312       if (chains.length () <= (unsigned) i)
3313 	chains.safe_grow (i + 1);
3314       chains[i] = buckets[b];
3315       buckets[b] = i + 1;
3316     }
3317 
3318   FOR_EACH_VEC_ELT (buckets, b, i)
3319     if (i && chains[i - 1])
3320       {
3321 	int j, k = i;
3322 	for (j = chains[i - 1]; j; j = chains[j - 1])
3323 	  {
3324 	    gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3325 	    gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3326 	    if (reassoc_stmt_dominates_stmt_p (gk, gj))
3327 	      k = j;
3328 	  }
3329 	tree type1 = TREE_TYPE (ranges[k - 1].exp);
3330 	tree type2 = NULL_TREE;
3331 	bool strict_overflow_p = false;
3332 	candidates.truncate (0);
3333 	for (j = i; j; j = chains[j - 1])
3334 	  {
3335 	    tree type = TREE_TYPE (ranges[j - 1].exp);
3336 	    strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3337 	    if (j == k
3338 		|| useless_type_conversion_p (type1, type))
3339 	      ;
3340 	    else if (type2 == NULL_TREE
3341 		     || useless_type_conversion_p (type2, type))
3342 	      {
3343 		if (type2 == NULL_TREE)
3344 		  type2 = type;
3345 		candidates.safe_push (&ranges[j - 1]);
3346 	      }
3347 	  }
3348 	unsigned l = candidates.length ();
3349 	for (j = i; j; j = chains[j - 1])
3350 	  {
3351 	    tree type = TREE_TYPE (ranges[j - 1].exp);
3352 	    if (j == k)
3353 	      continue;
3354 	    if (useless_type_conversion_p (type1, type))
3355 	      ;
3356 	    else if (type2 == NULL_TREE
3357 		     || useless_type_conversion_p (type2, type))
3358 	      continue;
3359 	    candidates.safe_push (&ranges[j - 1]);
3360 	  }
3361 	gimple_seq seq = NULL;
3362 	tree op = NULL_TREE;
3363 	unsigned int id;
3364 	struct range_entry *r;
3365 	candidates.safe_push (&ranges[k - 1]);
3366 	FOR_EACH_VEC_ELT (candidates, id, r)
3367 	  {
3368 	    gimple *g;
3369 	    if (id == 0)
3370 	      {
3371 		op = r->exp;
3372 		continue;
3373 	      }
3374 	    if (id == l)
3375 	      {
3376 		g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3377 		gimple_seq_add_stmt_without_update (&seq, g);
3378 		op = gimple_assign_lhs (g);
3379 	      }
3380 	    tree type = TREE_TYPE (r->exp);
3381 	    tree exp = r->exp;
3382 	    if (id >= l && !useless_type_conversion_p (type1, type))
3383 	      {
3384 		g = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, exp);
3385 		gimple_seq_add_stmt_without_update (&seq, g);
3386 		exp = gimple_assign_lhs (g);
3387 	      }
3388 	    g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3389 				     (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
3390 				     op, exp);
3391 	    gimple_seq_add_stmt_without_update (&seq, g);
3392 	    op = gimple_assign_lhs (g);
3393 	  }
3394 	candidates.pop ();
3395 	if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3396 			       candidates.length (), opcode, ops, op,
3397 			       seq, true, ranges[k - 1].low,
3398 			       ranges[k - 1].low, strict_overflow_p))
3399 	  any_changes = true;
3400 	else
3401 	  gimple_seq_discard (seq);
3402       }
3403 
3404   return any_changes;
3405 }
3406 
3407 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3408    a >= 0 && a < b into (unsigned) a < (unsigned) b
3409    a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
3410 
3411 static bool
optimize_range_tests_var_bound(enum tree_code opcode,int first,int length,vec<operand_entry * > * ops,struct range_entry * ranges,basic_block first_bb)3412 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3413 				vec<operand_entry *> *ops,
3414 				struct range_entry *ranges,
3415 				basic_block first_bb)
3416 {
3417   int i;
3418   bool any_changes = false;
3419   hash_map<tree, int> *map = NULL;
3420 
3421   for (i = first; i < length; i++)
3422     {
3423       if (ranges[i].exp == NULL_TREE
3424 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
3425 	  || !ranges[i].in_p)
3426 	continue;
3427 
3428       tree type = TREE_TYPE (ranges[i].exp);
3429       if (!INTEGRAL_TYPE_P (type)
3430 	  || TYPE_UNSIGNED (type)
3431 	  || ranges[i].low == NULL_TREE
3432 	  || !integer_zerop (ranges[i].low)
3433 	  || ranges[i].high != NULL_TREE)
3434 	continue;
3435       /* EXP >= 0 here.  */
3436       if (map == NULL)
3437 	map = new hash_map <tree, int>;
3438       map->put (ranges[i].exp, i);
3439     }
3440 
3441   if (map == NULL)
3442     return false;
3443 
3444   for (i = 0; i < length; i++)
3445     {
3446       bool in_p = ranges[i].in_p;
3447       if (ranges[i].low == NULL_TREE
3448 	  || ranges[i].high == NULL_TREE)
3449 	continue;
3450       if (!integer_zerop (ranges[i].low)
3451 	  || !integer_zerop (ranges[i].high))
3452 	{
3453 	  if (ranges[i].exp
3454 	      && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3455 	      && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3456 	      && integer_onep (ranges[i].low)
3457 	      && integer_onep (ranges[i].high))
3458 	    in_p = !in_p;
3459 	  else
3460 	    continue;
3461 	}
3462 
3463       gimple *stmt;
3464       tree_code ccode;
3465       tree rhs1, rhs2;
3466       if (ranges[i].exp)
3467 	{
3468 	  if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3469 	    continue;
3470 	  stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3471 	  if (!is_gimple_assign (stmt))
3472 	    continue;
3473 	  ccode = gimple_assign_rhs_code (stmt);
3474 	  rhs1 = gimple_assign_rhs1 (stmt);
3475 	  rhs2 = gimple_assign_rhs2 (stmt);
3476 	}
3477       else
3478 	{
3479 	  operand_entry *oe = (*ops)[ranges[i].idx];
3480 	  stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3481 	  if (gimple_code (stmt) != GIMPLE_COND)
3482 	    continue;
3483 	  ccode = gimple_cond_code (stmt);
3484 	  rhs1 = gimple_cond_lhs (stmt);
3485 	  rhs2 = gimple_cond_rhs (stmt);
3486 	}
3487 
3488       if (TREE_CODE (rhs1) != SSA_NAME
3489 	  || rhs2 == NULL_TREE
3490 	  || TREE_CODE (rhs2) != SSA_NAME)
3491 	continue;
3492 
3493       switch (ccode)
3494 	{
3495 	case GT_EXPR:
3496 	case GE_EXPR:
3497 	case LT_EXPR:
3498 	case LE_EXPR:
3499 	  break;
3500 	default:
3501 	  continue;
3502 	}
3503       if (in_p)
3504 	ccode = invert_tree_comparison (ccode, false);
3505       switch (ccode)
3506 	{
3507 	case GT_EXPR:
3508 	case GE_EXPR:
3509 	  std::swap (rhs1, rhs2);
3510 	  ccode = swap_tree_comparison (ccode);
3511 	  break;
3512 	case LT_EXPR:
3513 	case LE_EXPR:
3514 	  break;
3515 	default:
3516 	  gcc_unreachable ();
3517 	}
3518 
3519       int *idx = map->get (rhs1);
3520       if (idx == NULL)
3521 	continue;
3522 
3523       /* maybe_optimize_range_tests allows statements without side-effects
3524 	 in the basic blocks as long as they are consumed in the same bb.
3525 	 Make sure rhs2's def stmt is not among them, otherwise we can't
3526 	 use safely get_nonzero_bits on it.  E.g. in:
3527 	  # RANGE [-83, 1] NONZERO 173
3528 	  # k_32 = PHI <k_47(13), k_12(9)>
3529 	 ...
3530 	  if (k_32 >= 0)
3531 	    goto <bb 5>; [26.46%]
3532 	  else
3533 	    goto <bb 9>; [73.54%]
3534 
3535 	  <bb 5> [local count: 140323371]:
3536 	  # RANGE [0, 1] NONZERO 1
3537 	  _5 = (int) k_32;
3538 	  # RANGE [0, 4] NONZERO 4
3539 	  _21 = _5 << 2;
3540 	  # RANGE [0, 4] NONZERO 4
3541 	  iftmp.0_44 = (char) _21;
3542 	  if (k_32 < iftmp.0_44)
3543 	    goto <bb 6>; [84.48%]
3544 	  else
3545 	    goto <bb 9>; [15.52%]
3546 	 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3547 	 k_32 >= 0.  If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3548 	 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3549 	 those stmts even for negative k_32 and the value ranges would be no
3550 	 longer guaranteed and so the optimization would be invalid.  */
3551       while (opcode == ERROR_MARK)
3552 	{
3553 	  gimple *g = SSA_NAME_DEF_STMT (rhs2);
3554 	  basic_block bb2 = gimple_bb (g);
3555 	  if (bb2
3556 	      && bb2 != first_bb
3557 	      && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3558 	    {
3559 	      /* As an exception, handle a few common cases.  */
3560 	      if (gimple_assign_cast_p (g)
3561 		  && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3562 		{
3563 		  tree op0 = gimple_assign_rhs1 (g);
3564 		  if (TYPE_UNSIGNED (TREE_TYPE (op0))
3565 		      && (TYPE_PRECISION (TREE_TYPE (rhs2))
3566 			  > TYPE_PRECISION (TREE_TYPE (op0))))
3567 		    /* Zero-extension is always ok.  */
3568 		    break;
3569 		  else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3570 			   == TYPE_PRECISION (TREE_TYPE (op0))
3571 			   && TREE_CODE (op0) == SSA_NAME)
3572 		    {
3573 		      /* Cast from signed to unsigned or vice versa.  Retry
3574 			 with the op0 as new rhs2.  */
3575 		      rhs2 = op0;
3576 		      continue;
3577 		    }
3578 		}
3579 	      else if (is_gimple_assign (g)
3580 		       && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3581 		       && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3582 		       && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3583 		/* Masking with INTEGER_CST with MSB clear is always ok
3584 		   too.  */
3585 		break;
3586 	      rhs2 = NULL_TREE;
3587 	    }
3588 	  break;
3589 	}
3590       if (rhs2 == NULL_TREE)
3591 	continue;
3592 
3593       wide_int nz = get_nonzero_bits (rhs2);
3594       if (wi::neg_p (nz))
3595 	continue;
3596 
3597       /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3598 	 and RHS2 is known to be RHS2 >= 0.  */
3599       tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3600 
3601       enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3602       if ((ranges[*idx].strict_overflow_p
3603 	   || ranges[i].strict_overflow_p)
3604 	  && issue_strict_overflow_warning (wc))
3605 	warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3606 		    "assuming signed overflow does not occur "
3607 		    "when simplifying range test");
3608 
3609       if (dump_file && (dump_flags & TDF_DETAILS))
3610 	{
3611 	  struct range_entry *r = &ranges[*idx];
3612 	  fprintf (dump_file, "Optimizing range test ");
3613 	  print_generic_expr (dump_file, r->exp);
3614 	  fprintf (dump_file, " +[");
3615 	  print_generic_expr (dump_file, r->low);
3616 	  fprintf (dump_file, ", ");
3617 	  print_generic_expr (dump_file, r->high);
3618 	  fprintf (dump_file, "] and comparison ");
3619 	  print_generic_expr (dump_file, rhs1);
3620 	  fprintf (dump_file, " %s ", op_symbol_code (ccode));
3621 	  print_generic_expr (dump_file, rhs2);
3622 	  fprintf (dump_file, "\n into (");
3623 	  print_generic_expr (dump_file, utype);
3624 	  fprintf (dump_file, ") ");
3625 	  print_generic_expr (dump_file, rhs1);
3626 	  fprintf (dump_file, " %s (", op_symbol_code (ccode));
3627 	  print_generic_expr (dump_file, utype);
3628 	  fprintf (dump_file, ") ");
3629 	  print_generic_expr (dump_file, rhs2);
3630 	  fprintf (dump_file, "\n");
3631 	}
3632 
3633       operand_entry *oe = (*ops)[ranges[i].idx];
3634       ranges[i].in_p = 0;
3635       if (opcode == BIT_IOR_EXPR
3636 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3637 	{
3638 	  ranges[i].in_p = 1;
3639 	  ccode = invert_tree_comparison (ccode, false);
3640 	}
3641 
3642       unsigned int uid = gimple_uid (stmt);
3643       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3644       gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
3645       gimple_set_uid (g, uid);
3646       rhs1 = gimple_assign_lhs (g);
3647       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3648       if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
3649 	{
3650 	  g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
3651 	  gimple_set_uid (g, uid);
3652 	  rhs2 = gimple_assign_lhs (g);
3653 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3654 	}
3655       if (tree_swap_operands_p (rhs1, rhs2))
3656 	{
3657 	  std::swap (rhs1, rhs2);
3658 	  ccode = swap_tree_comparison (ccode);
3659 	}
3660       if (gimple_code (stmt) == GIMPLE_COND)
3661 	{
3662 	  gcond *c = as_a <gcond *> (stmt);
3663 	  gimple_cond_set_code (c, ccode);
3664 	  gimple_cond_set_lhs (c, rhs1);
3665 	  gimple_cond_set_rhs (c, rhs2);
3666 	  update_stmt (stmt);
3667 	}
3668       else
3669 	{
3670 	  tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
3671 	  if (!INTEGRAL_TYPE_P (ctype)
3672 	      || (TREE_CODE (ctype) != BOOLEAN_TYPE
3673 		  && TYPE_PRECISION (ctype) != 1))
3674 	    ctype = boolean_type_node;
3675 	  g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
3676 	  gimple_set_uid (g, uid);
3677 	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3678 	  if (oe->op && ctype != TREE_TYPE (oe->op))
3679 	    {
3680 	      g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
3681 				       NOP_EXPR, gimple_assign_lhs (g));
3682 	      gimple_set_uid (g, uid);
3683 	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3684 	    }
3685 	  ranges[i].exp = gimple_assign_lhs (g);
3686 	  oe->op = ranges[i].exp;
3687 	  ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
3688 	  ranges[i].high = ranges[i].low;
3689 	}
3690       ranges[i].strict_overflow_p = false;
3691       oe = (*ops)[ranges[*idx].idx];
3692       /* Now change all the other range test immediate uses, so that
3693 	 those tests will be optimized away.  */
3694       if (opcode == ERROR_MARK)
3695 	{
3696 	  if (oe->op)
3697 	    oe->op = build_int_cst (TREE_TYPE (oe->op),
3698 				    oe->rank == BIT_IOR_EXPR ? 0 : 1);
3699 	  else
3700 	    oe->op = (oe->rank == BIT_IOR_EXPR
3701 		      ? boolean_false_node : boolean_true_node);
3702 	}
3703       else
3704 	oe->op = error_mark_node;
3705       ranges[*idx].exp = NULL_TREE;
3706       ranges[*idx].low = NULL_TREE;
3707       ranges[*idx].high = NULL_TREE;
3708       any_changes = true;
3709     }
3710 
3711   delete map;
3712   return any_changes;
3713 }
3714 
3715 /* Optimize range tests, similarly how fold_range_test optimizes
3716    it on trees.  The tree code for the binary
3717    operation between all the operands is OPCODE.
3718    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3719    maybe_optimize_range_tests for inter-bb range optimization.
3720    In that case if oe->op is NULL, oe->id is bb->index whose
3721    GIMPLE_COND is && or ||ed into the test, and oe->rank says
3722    the actual opcode.
3723    FIRST_BB is the first basic block if OPCODE is ERROR_MARK.  */
3724 
3725 static bool
optimize_range_tests(enum tree_code opcode,vec<operand_entry * > * ops,basic_block first_bb)3726 optimize_range_tests (enum tree_code opcode,
3727 		      vec<operand_entry *> *ops, basic_block first_bb)
3728 {
3729   unsigned int length = ops->length (), i, j, first;
3730   operand_entry *oe;
3731   struct range_entry *ranges;
3732   bool any_changes = false;
3733 
3734   if (length == 1)
3735     return false;
3736 
3737   ranges = XNEWVEC (struct range_entry, length);
3738   for (i = 0; i < length; i++)
3739     {
3740       oe = (*ops)[i];
3741       ranges[i].idx = i;
3742       init_range_entry (ranges + i, oe->op,
3743 			oe->op
3744 			? NULL
3745 			: last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
3746       /* For | invert it now, we will invert it again before emitting
3747 	 the optimized expression.  */
3748       if (opcode == BIT_IOR_EXPR
3749 	  || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
3750 	ranges[i].in_p = !ranges[i].in_p;
3751     }
3752 
3753   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
3754   for (i = 0; i < length; i++)
3755     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
3756       break;
3757 
3758   /* Try to merge ranges.  */
3759   for (first = i; i < length; i++)
3760     {
3761       tree low = ranges[i].low;
3762       tree high = ranges[i].high;
3763       int in_p = ranges[i].in_p;
3764       bool strict_overflow_p = ranges[i].strict_overflow_p;
3765       int update_fail_count = 0;
3766 
3767       for (j = i + 1; j < length; j++)
3768 	{
3769 	  if (ranges[i].exp != ranges[j].exp)
3770 	    break;
3771 	  if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
3772 			     ranges[j].in_p, ranges[j].low, ranges[j].high))
3773 	    break;
3774 	  strict_overflow_p |= ranges[j].strict_overflow_p;
3775 	}
3776 
3777       if (j == i + 1)
3778 	continue;
3779 
3780       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
3781 			     opcode, ops, ranges[i].exp, NULL, in_p,
3782 			     low, high, strict_overflow_p))
3783 	{
3784 	  i = j - 1;
3785 	  any_changes = true;
3786 	}
3787       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3788 	 while update_range_test would fail.  */
3789       else if (update_fail_count == 64)
3790 	i = j - 1;
3791       else
3792 	++update_fail_count;
3793     }
3794 
3795   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
3796 					 ops, ranges);
3797 
3798   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
3799     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
3800 					   ops, ranges);
3801   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
3802     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
3803 						     ops, ranges);
3804   any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
3805 						   ops, ranges);
3806   any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
3807 						 ranges, first_bb);
3808 
3809   if (any_changes && opcode != ERROR_MARK)
3810     {
3811       j = 0;
3812       FOR_EACH_VEC_ELT (*ops, i, oe)
3813 	{
3814 	  if (oe->op == error_mark_node)
3815 	    continue;
3816 	  else if (i != j)
3817 	    (*ops)[j] = oe;
3818 	  j++;
3819 	}
3820       ops->truncate (j);
3821     }
3822 
3823   XDELETEVEC (ranges);
3824   return any_changes;
3825 }
3826 
3827 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3828    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
3829    otherwise the comparison code.  TYPE is a return value that is set
3830    to type of comparison.  */
3831 
3832 static tree_code
ovce_extract_ops(tree var,gassign ** rets,bool * reti,tree * type)3833 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
3834 {
3835   if (TREE_CODE (var) != SSA_NAME)
3836     return ERROR_MARK;
3837 
3838   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3839   if (stmt == NULL)
3840     return ERROR_MARK;
3841 
3842   /* ??? If we start creating more COND_EXPR, we could perform
3843      this same optimization with them.	For now, simplify.  */
3844   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
3845     return ERROR_MARK;
3846 
3847   tree cond = gimple_assign_rhs1 (stmt);
3848   tree_code cmp = TREE_CODE (cond);
3849   if (TREE_CODE_CLASS (cmp) != tcc_comparison)
3850     return ERROR_MARK;
3851 
3852   /* ??? For now, allow only canonical true and false result vectors.
3853      We could expand this to other constants should the need arise,
3854      but at the moment we don't create them.  */
3855   tree t = gimple_assign_rhs2 (stmt);
3856   tree f = gimple_assign_rhs3 (stmt);
3857   bool inv;
3858   if (integer_all_onesp (t))
3859     inv = false;
3860   else if (integer_all_onesp (f))
3861     {
3862       cmp = invert_tree_comparison (cmp, false);
3863       inv = true;
3864     }
3865   else
3866     return ERROR_MARK;
3867   if (!integer_zerop (f))
3868     return ERROR_MARK;
3869 
3870   /* Success!  */
3871   if (rets)
3872     *rets = stmt;
3873   if (reti)
3874     *reti = inv;
3875   if (type)
3876     *type = TREE_TYPE (cond);
3877   return cmp;
3878 }
3879 
3880 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3881    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
3882 
3883 static bool
optimize_vec_cond_expr(tree_code opcode,vec<operand_entry * > * ops)3884 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
3885 {
3886   unsigned int length = ops->length (), i, j;
3887   bool any_changes = false;
3888 
3889   if (length == 1)
3890     return false;
3891 
3892   for (i = 0; i < length; ++i)
3893     {
3894       tree elt0 = (*ops)[i]->op;
3895 
3896       gassign *stmt0;
3897       bool invert;
3898       tree type;
3899       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type);
3900       if (cmp0 == ERROR_MARK)
3901 	continue;
3902 
3903       for (j = i + 1; j < length; ++j)
3904 	{
3905 	  tree &elt1 = (*ops)[j]->op;
3906 
3907 	  gassign *stmt1;
3908 	  tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL);
3909 	  if (cmp1 == ERROR_MARK)
3910 	    continue;
3911 
3912 	  tree cond0 = gimple_assign_rhs1 (stmt0);
3913 	  tree x0 = TREE_OPERAND (cond0, 0);
3914 	  tree y0 = TREE_OPERAND (cond0, 1);
3915 
3916 	  tree cond1 = gimple_assign_rhs1 (stmt1);
3917 	  tree x1 = TREE_OPERAND (cond1, 0);
3918 	  tree y1 = TREE_OPERAND (cond1, 1);
3919 
3920 	  tree comb;
3921 	  if (opcode == BIT_AND_EXPR)
3922 	    comb = maybe_fold_and_comparisons (type, cmp0, x0, y0, cmp1, x1,
3923 					       y1);
3924 	  else if (opcode == BIT_IOR_EXPR)
3925 	    comb = maybe_fold_or_comparisons (type, cmp0, x0, y0, cmp1, x1,
3926 					      y1);
3927 	  else
3928 	    gcc_unreachable ();
3929 	  if (comb == NULL)
3930 	    continue;
3931 
3932 	  /* Success! */
3933 	  if (dump_file && (dump_flags & TDF_DETAILS))
3934 	    {
3935 	      fprintf (dump_file, "Transforming ");
3936 	      print_generic_expr (dump_file, cond0);
3937 	      fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3938 	      print_generic_expr (dump_file, cond1);
3939 	      fprintf (dump_file, " into ");
3940 	      print_generic_expr (dump_file, comb);
3941 	      fputc ('\n', dump_file);
3942 	    }
3943 
3944 	  gimple_assign_set_rhs1 (stmt0, comb);
3945 	  if (invert)
3946 	    std::swap (*gimple_assign_rhs2_ptr (stmt0),
3947 		       *gimple_assign_rhs3_ptr (stmt0));
3948 	  update_stmt (stmt0);
3949 
3950 	  elt1 = error_mark_node;
3951 	  any_changes = true;
3952 	}
3953     }
3954 
3955   if (any_changes)
3956     {
3957       operand_entry *oe;
3958       j = 0;
3959       FOR_EACH_VEC_ELT (*ops, i, oe)
3960 	{
3961 	  if (oe->op == error_mark_node)
3962 	    continue;
3963 	  else if (i != j)
3964 	    (*ops)[j] = oe;
3965 	  j++;
3966 	}
3967       ops->truncate (j);
3968     }
3969 
3970   return any_changes;
3971 }
3972 
3973 /* Return true if STMT is a cast like:
3974    <bb N>:
3975    ...
3976    _123 = (int) _234;
3977 
3978    <bb M>:
3979    # _345 = PHI <_123(N), 1(...), 1(...)>
3980    where _234 has bool type, _123 has single use and
3981    bb N has a single successor M.  This is commonly used in
3982    the last block of a range test.
3983 
3984    Also Return true if STMT is tcc_compare like:
3985    <bb N>:
3986    ...
3987    _234 = a_2(D) == 2;
3988 
3989    <bb M>:
3990    # _345 = PHI <_234(N), 1(...), 1(...)>
3991    _346 = (int) _345;
3992    where _234 has booltype, single use and
3993    bb N has a single successor M.  This is commonly used in
3994    the last block of a range test.  */
3995 
3996 static bool
final_range_test_p(gimple * stmt)3997 final_range_test_p (gimple *stmt)
3998 {
3999   basic_block bb, rhs_bb, lhs_bb;
4000   edge e;
4001   tree lhs, rhs;
4002   use_operand_p use_p;
4003   gimple *use_stmt;
4004 
4005   if (!gimple_assign_cast_p (stmt)
4006       && (!is_gimple_assign (stmt)
4007 	  || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4008 	      != tcc_comparison)))
4009     return false;
4010   bb = gimple_bb (stmt);
4011   if (!single_succ_p (bb))
4012     return false;
4013   e = single_succ_edge (bb);
4014   if (e->flags & EDGE_COMPLEX)
4015     return false;
4016 
4017   lhs = gimple_assign_lhs (stmt);
4018   rhs = gimple_assign_rhs1 (stmt);
4019   if (gimple_assign_cast_p (stmt)
4020       && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4021 	  || TREE_CODE (rhs) != SSA_NAME
4022 	  || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4023     return false;
4024 
4025   if (!gimple_assign_cast_p (stmt)
4026       && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4027       return false;
4028 
4029   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
4030   if (!single_imm_use (lhs, &use_p, &use_stmt))
4031     return false;
4032 
4033   if (gimple_code (use_stmt) != GIMPLE_PHI
4034       || gimple_bb (use_stmt) != e->dest)
4035     return false;
4036 
4037   /* And that the rhs is defined in the same loop.  */
4038   if (gimple_assign_cast_p (stmt))
4039     {
4040       if (TREE_CODE (rhs) != SSA_NAME
4041 	  || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4042 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4043 	return false;
4044     }
4045   else
4046     {
4047       if (TREE_CODE (lhs) != SSA_NAME
4048 	  || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4049 	  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4050 	return false;
4051     }
4052 
4053   return true;
4054 }
4055 
4056 /* Return true if BB is suitable basic block for inter-bb range test
4057    optimization.  If BACKWARD is true, BB should be the only predecessor
4058    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4059    or compared with to find a common basic block to which all conditions
4060    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
4061    be the only predecessor of BB.  */
4062 
4063 static bool
suitable_cond_bb(basic_block bb,basic_block test_bb,basic_block * other_bb,bool backward)4064 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4065 		  bool backward)
4066 {
4067   edge_iterator ei, ei2;
4068   edge e, e2;
4069   gimple *stmt;
4070   gphi_iterator gsi;
4071   bool other_edge_seen = false;
4072   bool is_cond;
4073 
4074   if (test_bb == bb)
4075     return false;
4076   /* Check last stmt first.  */
4077   stmt = last_stmt (bb);
4078   if (stmt == NULL
4079       || (gimple_code (stmt) != GIMPLE_COND
4080 	  && (backward || !final_range_test_p (stmt)))
4081       || gimple_visited_p (stmt)
4082       || stmt_could_throw_p (cfun, stmt)
4083       || *other_bb == bb)
4084     return false;
4085   is_cond = gimple_code (stmt) == GIMPLE_COND;
4086   if (is_cond)
4087     {
4088       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4089 	 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4090 	 to *OTHER_BB (if not set yet, try to find it out).  */
4091       if (EDGE_COUNT (bb->succs) != 2)
4092 	return false;
4093       FOR_EACH_EDGE (e, ei, bb->succs)
4094 	{
4095 	  if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4096 	    return false;
4097 	  if (e->dest == test_bb)
4098 	    {
4099 	      if (backward)
4100 		continue;
4101 	      else
4102 		return false;
4103 	    }
4104 	  if (e->dest == bb)
4105 	    return false;
4106 	  if (*other_bb == NULL)
4107 	    {
4108 	      FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4109 		if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4110 		  return false;
4111 		else if (e->dest == e2->dest)
4112 		  *other_bb = e->dest;
4113 	      if (*other_bb == NULL)
4114 		return false;
4115 	    }
4116 	  if (e->dest == *other_bb)
4117 	    other_edge_seen = true;
4118 	  else if (backward)
4119 	    return false;
4120 	}
4121       if (*other_bb == NULL || !other_edge_seen)
4122 	return false;
4123     }
4124   else if (single_succ (bb) != *other_bb)
4125     return false;
4126 
4127   /* Now check all PHIs of *OTHER_BB.  */
4128   e = find_edge (bb, *other_bb);
4129   e2 = find_edge (test_bb, *other_bb);
4130   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4131     {
4132       gphi *phi = gsi.phi ();
4133       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4134 	 corresponding to BB and TEST_BB predecessor must be the same.  */
4135       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4136 			    gimple_phi_arg_def (phi, e2->dest_idx), 0))
4137 	{
4138 	  /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4139 	     one of the PHIs should have the lhs of the last stmt in
4140 	     that block as PHI arg and that PHI should have 0 or 1
4141 	     corresponding to it in all other range test basic blocks
4142 	     considered.  */
4143 	  if (!is_cond)
4144 	    {
4145 	      if (gimple_phi_arg_def (phi, e->dest_idx)
4146 		  == gimple_assign_lhs (stmt)
4147 		  && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4148 		      || integer_onep (gimple_phi_arg_def (phi,
4149 							   e2->dest_idx))))
4150 		continue;
4151 	    }
4152 	  else
4153 	    {
4154 	      gimple *test_last = last_stmt (test_bb);
4155 	      if (gimple_code (test_last) != GIMPLE_COND
4156 		  && gimple_phi_arg_def (phi, e2->dest_idx)
4157 		     == gimple_assign_lhs (test_last)
4158 		  && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
4159 		      || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
4160 		continue;
4161 	    }
4162 
4163 	  return false;
4164 	}
4165     }
4166   return true;
4167 }
4168 
4169 /* Return true if BB doesn't have side-effects that would disallow
4170    range test optimization, all SSA_NAMEs set in the bb are consumed
4171    in the bb and there are no PHIs.  */
4172 
4173 static bool
no_side_effect_bb(basic_block bb)4174 no_side_effect_bb (basic_block bb)
4175 {
4176   gimple_stmt_iterator gsi;
4177   gimple *last;
4178 
4179   if (!gimple_seq_empty_p (phi_nodes (bb)))
4180     return false;
4181   last = last_stmt (bb);
4182   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4183     {
4184       gimple *stmt = gsi_stmt (gsi);
4185       tree lhs;
4186       imm_use_iterator imm_iter;
4187       use_operand_p use_p;
4188 
4189       if (is_gimple_debug (stmt))
4190 	continue;
4191       if (gimple_has_side_effects (stmt))
4192 	return false;
4193       if (stmt == last)
4194 	return true;
4195       if (!is_gimple_assign (stmt))
4196 	return false;
4197       lhs = gimple_assign_lhs (stmt);
4198       if (TREE_CODE (lhs) != SSA_NAME)
4199 	return false;
4200       if (gimple_assign_rhs_could_trap_p (stmt))
4201 	return false;
4202       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4203 	{
4204 	  gimple *use_stmt = USE_STMT (use_p);
4205 	  if (is_gimple_debug (use_stmt))
4206 	    continue;
4207 	  if (gimple_bb (use_stmt) != bb)
4208 	    return false;
4209 	}
4210     }
4211   return false;
4212 }
4213 
4214 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4215    return true and fill in *OPS recursively.  */
4216 
4217 static bool
get_ops(tree var,enum tree_code code,vec<operand_entry * > * ops,class loop * loop)4218 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4219 	 class loop *loop)
4220 {
4221   gimple *stmt = SSA_NAME_DEF_STMT (var);
4222   tree rhs[2];
4223   int i;
4224 
4225   if (!is_reassociable_op (stmt, code, loop))
4226     return false;
4227 
4228   rhs[0] = gimple_assign_rhs1 (stmt);
4229   rhs[1] = gimple_assign_rhs2 (stmt);
4230   gimple_set_visited (stmt, true);
4231   for (i = 0; i < 2; i++)
4232     if (TREE_CODE (rhs[i]) == SSA_NAME
4233 	&& !get_ops (rhs[i], code, ops, loop)
4234 	&& has_single_use (rhs[i]))
4235       {
4236 	operand_entry *oe = operand_entry_pool.allocate ();
4237 
4238 	oe->op = rhs[i];
4239 	oe->rank = code;
4240 	oe->id = 0;
4241 	oe->count = 1;
4242 	oe->stmt_to_insert = NULL;
4243 	ops->safe_push (oe);
4244       }
4245   return true;
4246 }
4247 
4248 /* Find the ops that were added by get_ops starting from VAR, see if
4249    they were changed during update_range_test and if yes, create new
4250    stmts.  */
4251 
4252 static tree
update_ops(tree var,enum tree_code code,vec<operand_entry * > ops,unsigned int * pidx,class loop * loop)4253 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
4254 	    unsigned int *pidx, class loop *loop)
4255 {
4256   gimple *stmt = SSA_NAME_DEF_STMT (var);
4257   tree rhs[4];
4258   int i;
4259 
4260   if (!is_reassociable_op (stmt, code, loop))
4261     return NULL;
4262 
4263   rhs[0] = gimple_assign_rhs1 (stmt);
4264   rhs[1] = gimple_assign_rhs2 (stmt);
4265   rhs[2] = rhs[0];
4266   rhs[3] = rhs[1];
4267   for (i = 0; i < 2; i++)
4268     if (TREE_CODE (rhs[i]) == SSA_NAME)
4269       {
4270 	rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4271 	if (rhs[2 + i] == NULL_TREE)
4272 	  {
4273 	    if (has_single_use (rhs[i]))
4274 	      rhs[2 + i] = ops[(*pidx)++]->op;
4275 	    else
4276 	      rhs[2 + i] = rhs[i];
4277 	  }
4278       }
4279   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4280       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4281     {
4282       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4283       var = make_ssa_name (TREE_TYPE (var));
4284       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4285 					rhs[2], rhs[3]);
4286       gimple_set_uid (g, gimple_uid (stmt));
4287       gimple_set_visited (g, true);
4288       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4289     }
4290   return var;
4291 }
4292 
4293 /* Structure to track the initial value passed to get_ops and
4294    the range in the ops vector for each basic block.  */
4295 
4296 struct inter_bb_range_test_entry
4297 {
4298   tree op;
4299   unsigned int first_idx, last_idx;
4300 };
4301 
4302 /* Inter-bb range test optimization.
4303 
4304    Returns TRUE if a gimple conditional is optimized to a true/false,
4305    otherwise return FALSE.
4306 
4307    This indicates to the caller that it should run a CFG cleanup pass
4308    once reassociation is completed.  */
4309 
4310 static bool
maybe_optimize_range_tests(gimple * stmt)4311 maybe_optimize_range_tests (gimple *stmt)
4312 {
4313   basic_block first_bb = gimple_bb (stmt);
4314   basic_block last_bb = first_bb;
4315   basic_block other_bb = NULL;
4316   basic_block bb;
4317   edge_iterator ei;
4318   edge e;
4319   auto_vec<operand_entry *> ops;
4320   auto_vec<inter_bb_range_test_entry> bbinfo;
4321   bool any_changes = false;
4322   bool cfg_cleanup_needed = false;
4323 
4324   /* Consider only basic blocks that end with GIMPLE_COND or
4325      a cast statement satisfying final_range_test_p.  All
4326      but the last bb in the first_bb .. last_bb range
4327      should end with GIMPLE_COND.  */
4328   if (gimple_code (stmt) == GIMPLE_COND)
4329     {
4330       if (EDGE_COUNT (first_bb->succs) != 2)
4331 	return cfg_cleanup_needed;
4332     }
4333   else if (final_range_test_p (stmt))
4334     other_bb = single_succ (first_bb);
4335   else
4336     return cfg_cleanup_needed;
4337 
4338   if (stmt_could_throw_p (cfun, stmt))
4339     return cfg_cleanup_needed;
4340 
4341   /* As relative ordering of post-dominator sons isn't fixed,
4342      maybe_optimize_range_tests can be called first on any
4343      bb in the range we want to optimize.  So, start searching
4344      backwards, if first_bb can be set to a predecessor.  */
4345   while (single_pred_p (first_bb))
4346     {
4347       basic_block pred_bb = single_pred (first_bb);
4348       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
4349 	break;
4350       if (!no_side_effect_bb (first_bb))
4351 	break;
4352       first_bb = pred_bb;
4353     }
4354   /* If first_bb is last_bb, other_bb hasn't been computed yet.
4355      Before starting forward search in last_bb successors, find
4356      out the other_bb.  */
4357   if (first_bb == last_bb)
4358     {
4359       other_bb = NULL;
4360       /* As non-GIMPLE_COND last stmt always terminates the range,
4361 	 if forward search didn't discover anything, just give up.  */
4362       if (gimple_code (stmt) != GIMPLE_COND)
4363 	return cfg_cleanup_needed;
4364       /* Look at both successors.  Either it ends with a GIMPLE_COND
4365 	 and satisfies suitable_cond_bb, or ends with a cast and
4366 	 other_bb is that cast's successor.  */
4367       FOR_EACH_EDGE (e, ei, first_bb->succs)
4368 	if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4369 	    || e->dest == first_bb)
4370 	  return cfg_cleanup_needed;
4371 	else if (single_pred_p (e->dest))
4372 	  {
4373 	    stmt = last_stmt (e->dest);
4374 	    if (stmt
4375 		&& gimple_code (stmt) == GIMPLE_COND
4376 		&& EDGE_COUNT (e->dest->succs) == 2)
4377 	      {
4378 		if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
4379 		  break;
4380 		else
4381 		  other_bb = NULL;
4382 	      }
4383 	    else if (stmt
4384 		     && final_range_test_p (stmt)
4385 		     && find_edge (first_bb, single_succ (e->dest)))
4386 	      {
4387 		other_bb = single_succ (e->dest);
4388 		if (other_bb == first_bb)
4389 		  other_bb = NULL;
4390 	      }
4391 	  }
4392       if (other_bb == NULL)
4393 	return cfg_cleanup_needed;
4394     }
4395   /* Now do the forward search, moving last_bb to successor bbs
4396      that aren't other_bb.  */
4397   while (EDGE_COUNT (last_bb->succs) == 2)
4398     {
4399       FOR_EACH_EDGE (e, ei, last_bb->succs)
4400 	if (e->dest != other_bb)
4401 	  break;
4402       if (e == NULL)
4403 	break;
4404       if (!single_pred_p (e->dest))
4405 	break;
4406       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
4407 	break;
4408       if (!no_side_effect_bb (e->dest))
4409 	break;
4410       last_bb = e->dest;
4411     }
4412   if (first_bb == last_bb)
4413     return cfg_cleanup_needed;
4414   /* Here basic blocks first_bb through last_bb's predecessor
4415      end with GIMPLE_COND, all of them have one of the edges to
4416      other_bb and another to another block in the range,
4417      all blocks except first_bb don't have side-effects and
4418      last_bb ends with either GIMPLE_COND, or cast satisfying
4419      final_range_test_p.  */
4420   for (bb = last_bb; ; bb = single_pred (bb))
4421     {
4422       enum tree_code code;
4423       tree lhs, rhs;
4424       inter_bb_range_test_entry bb_ent;
4425 
4426       bb_ent.op = NULL_TREE;
4427       bb_ent.first_idx = ops.length ();
4428       bb_ent.last_idx = bb_ent.first_idx;
4429       e = find_edge (bb, other_bb);
4430       stmt = last_stmt (bb);
4431       gimple_set_visited (stmt, true);
4432       if (gimple_code (stmt) != GIMPLE_COND)
4433 	{
4434 	  use_operand_p use_p;
4435 	  gimple *phi;
4436 	  edge e2;
4437 	  unsigned int d;
4438 
4439 	  lhs = gimple_assign_lhs (stmt);
4440 	  rhs = gimple_assign_rhs1 (stmt);
4441 	  gcc_assert (bb == last_bb);
4442 
4443 	  /* stmt is
4444 	     _123 = (int) _234;
4445 	     OR
4446 	     _234 = a_2(D) == 2;
4447 
4448 	     followed by:
4449 	     <bb M>:
4450 	     # _345 = PHI <_123(N), 1(...), 1(...)>
4451 
4452 	     or 0 instead of 1.  If it is 0, the _234
4453 	     range test is anded together with all the
4454 	     other range tests, if it is 1, it is ored with
4455 	     them.  */
4456 	  single_imm_use (lhs, &use_p, &phi);
4457 	  gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4458 	  e2 = find_edge (first_bb, other_bb);
4459 	  d = e2->dest_idx;
4460 	  gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4461 	  if (integer_zerop (gimple_phi_arg_def (phi, d)))
4462 	    code = BIT_AND_EXPR;
4463 	  else
4464 	    {
4465 	      gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4466 	      code = BIT_IOR_EXPR;
4467 	    }
4468 
4469 	  /* If _234 SSA_NAME_DEF_STMT is
4470 	     _234 = _567 | _789;
4471 	     (or &, corresponding to 1/0 in the phi arguments,
4472 	     push into ops the individual range test arguments
4473 	     of the bitwise or resp. and, recursively.  */
4474 	  if (TREE_CODE (rhs) == SSA_NAME
4475 	      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4476 		  != tcc_comparison)
4477 	      && !get_ops (rhs, code, &ops,
4478 			loop_containing_stmt (stmt))
4479 	      && has_single_use (rhs))
4480 	    {
4481 	      /* Otherwise, push the _234 range test itself.  */
4482 	      operand_entry *oe = operand_entry_pool.allocate ();
4483 
4484 	      oe->op = rhs;
4485 	      oe->rank = code;
4486 	      oe->id = 0;
4487 	      oe->count = 1;
4488 	      oe->stmt_to_insert = NULL;
4489 	      ops.safe_push (oe);
4490 	      bb_ent.last_idx++;
4491 	      bb_ent.op = rhs;
4492 	    }
4493 	  else if (is_gimple_assign (stmt)
4494 		   && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4495 		       == tcc_comparison)
4496 		   && !get_ops (lhs, code, &ops,
4497 				loop_containing_stmt (stmt))
4498 		   && has_single_use (lhs))
4499 	    {
4500 	      operand_entry *oe = operand_entry_pool.allocate ();
4501 	      oe->op = lhs;
4502 	      oe->rank = code;
4503 	      oe->id = 0;
4504 	      oe->count = 1;
4505 	      ops.safe_push (oe);
4506 	      bb_ent.last_idx++;
4507 	      bb_ent.op = lhs;
4508 	    }
4509 	  else
4510 	    {
4511 	      bb_ent.last_idx = ops.length ();
4512 	      bb_ent.op = rhs;
4513 	    }
4514 	  bbinfo.safe_push (bb_ent);
4515 	  continue;
4516 	}
4517       /* Otherwise stmt is GIMPLE_COND.  */
4518       code = gimple_cond_code (stmt);
4519       lhs = gimple_cond_lhs (stmt);
4520       rhs = gimple_cond_rhs (stmt);
4521       if (TREE_CODE (lhs) == SSA_NAME
4522 	  && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4523 	  && ((code != EQ_EXPR && code != NE_EXPR)
4524 	      || rhs != boolean_false_node
4525 		 /* Either push into ops the individual bitwise
4526 		    or resp. and operands, depending on which
4527 		    edge is other_bb.  */
4528 	      || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4529 				 ^ (code == EQ_EXPR))
4530 				? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4531 			   loop_containing_stmt (stmt))))
4532 	{
4533 	  /* Or push the GIMPLE_COND stmt itself.  */
4534 	  operand_entry *oe = operand_entry_pool.allocate ();
4535 
4536 	  oe->op = NULL;
4537 	  oe->rank = (e->flags & EDGE_TRUE_VALUE)
4538 		     ? BIT_IOR_EXPR : BIT_AND_EXPR;
4539 	  /* oe->op = NULL signs that there is no SSA_NAME
4540 	     for the range test, and oe->id instead is the
4541 	     basic block number, at which's end the GIMPLE_COND
4542 	     is.  */
4543 	  oe->id = bb->index;
4544 	  oe->count = 1;
4545 	  oe->stmt_to_insert = NULL;
4546 	  ops.safe_push (oe);
4547 	  bb_ent.op = NULL;
4548 	  bb_ent.last_idx++;
4549 	}
4550       else if (ops.length () > bb_ent.first_idx)
4551 	{
4552 	  bb_ent.op = lhs;
4553 	  bb_ent.last_idx = ops.length ();
4554 	}
4555       bbinfo.safe_push (bb_ent);
4556       if (bb == first_bb)
4557 	break;
4558     }
4559   if (ops.length () > 1)
4560     any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
4561   if (any_changes)
4562     {
4563       unsigned int idx, max_idx = 0;
4564       /* update_ops relies on has_single_use predicates returning the
4565 	 same values as it did during get_ops earlier.  Additionally it
4566 	 never removes statements, only adds new ones and it should walk
4567 	 from the single imm use and check the predicate already before
4568 	 making those changes.
4569 	 On the other side, the handling of GIMPLE_COND directly can turn
4570 	 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4571 	 it needs to be done in a separate loop afterwards.  */
4572       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4573 	{
4574 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4575 	      && bbinfo[idx].op != NULL_TREE)
4576 	    {
4577 	      tree new_op;
4578 
4579 	      max_idx = idx;
4580 	      stmt = last_stmt (bb);
4581 	      new_op = update_ops (bbinfo[idx].op,
4582 				   (enum tree_code)
4583 				   ops[bbinfo[idx].first_idx]->rank,
4584 				   ops, &bbinfo[idx].first_idx,
4585 				   loop_containing_stmt (stmt));
4586 	      if (new_op == NULL_TREE)
4587 		{
4588 		  gcc_assert (bb == last_bb);
4589 		  new_op = ops[bbinfo[idx].first_idx++]->op;
4590 		}
4591 	      if (bbinfo[idx].op != new_op)
4592 		{
4593 		  imm_use_iterator iter;
4594 		  use_operand_p use_p;
4595 		  gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
4596 
4597 		  FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
4598 		    if (is_gimple_debug (use_stmt))
4599 		      continue;
4600 		    else if (gimple_code (use_stmt) == GIMPLE_COND
4601 			     || gimple_code (use_stmt) == GIMPLE_PHI)
4602 		      FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4603 			SET_USE (use_p, new_op);
4604 		    else if ((is_gimple_assign (use_stmt)
4605 			      && (TREE_CODE_CLASS
4606 				  (gimple_assign_rhs_code (use_stmt))
4607 				  == tcc_comparison)))
4608 		      cast_or_tcc_cmp_stmt = use_stmt;
4609 		    else if (gimple_assign_cast_p (use_stmt))
4610 		      cast_or_tcc_cmp_stmt = use_stmt;
4611 		    else
4612 		      gcc_unreachable ();
4613 
4614 		  if (cast_or_tcc_cmp_stmt)
4615 		    {
4616 		      gcc_assert (bb == last_bb);
4617 		      tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
4618 		      tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
4619 		      enum tree_code rhs_code
4620 			= gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
4621 			? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
4622 			: CONVERT_EXPR;
4623 		      gassign *g;
4624 		      if (is_gimple_min_invariant (new_op))
4625 			{
4626 			  new_op = fold_convert (TREE_TYPE (lhs), new_op);
4627 			  g = gimple_build_assign (new_lhs, new_op);
4628 			}
4629 		      else
4630 			g = gimple_build_assign (new_lhs, rhs_code, new_op);
4631 		      gimple_stmt_iterator gsi
4632 			= gsi_for_stmt (cast_or_tcc_cmp_stmt);
4633 		      gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
4634 		      gimple_set_visited (g, true);
4635 		      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4636 		      FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
4637 			if (is_gimple_debug (use_stmt))
4638 			  continue;
4639 			else if (gimple_code (use_stmt) == GIMPLE_COND
4640 				 || gimple_code (use_stmt) == GIMPLE_PHI)
4641 			  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
4642 			    SET_USE (use_p, new_lhs);
4643 			else
4644 			  gcc_unreachable ();
4645 		    }
4646 		}
4647 	    }
4648 	  if (bb == first_bb)
4649 	    break;
4650 	}
4651       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
4652 	{
4653 	  if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
4654 	      && bbinfo[idx].op == NULL_TREE
4655 	      && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
4656 	    {
4657 	      gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
4658 
4659 	      if (idx > max_idx)
4660 		max_idx = idx;
4661 
4662 	      /* If we collapse the conditional to a true/false
4663 		 condition, then bubble that knowledge up to our caller.  */
4664 	      if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
4665 		{
4666 		  gimple_cond_make_false (cond_stmt);
4667 		  cfg_cleanup_needed = true;
4668 		}
4669 	      else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
4670 		{
4671 		  gimple_cond_make_true (cond_stmt);
4672 		  cfg_cleanup_needed = true;
4673 		}
4674 	      else
4675 		{
4676 		  gimple_cond_set_code (cond_stmt, NE_EXPR);
4677 		  gimple_cond_set_lhs (cond_stmt,
4678 				       ops[bbinfo[idx].first_idx]->op);
4679 		  gimple_cond_set_rhs (cond_stmt, boolean_false_node);
4680 		}
4681 	      update_stmt (cond_stmt);
4682 	    }
4683 	  if (bb == first_bb)
4684 	    break;
4685 	}
4686 
4687       /* The above changes could result in basic blocks after the first
4688 	 modified one, up to and including last_bb, to be executed even if
4689 	 they would not be in the original program.  If the value ranges of
4690 	 assignment lhs' in those bbs were dependent on the conditions
4691 	 guarding those basic blocks which now can change, the VRs might
4692 	 be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
4693 	 are only used within the same bb, it should be not a big deal if
4694 	 we just reset all the VRs in those bbs.  See PR68671.  */
4695       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
4696 	reset_flow_sensitive_info_in_bb (bb);
4697     }
4698   return cfg_cleanup_needed;
4699 }
4700 
4701 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4702    of STMT in it's operands.  This is also known as a "destructive
4703    update" operation.  */
4704 
4705 static bool
is_phi_for_stmt(gimple * stmt,tree operand)4706 is_phi_for_stmt (gimple *stmt, tree operand)
4707 {
4708   gimple *def_stmt;
4709   gphi *def_phi;
4710   tree lhs;
4711   use_operand_p arg_p;
4712   ssa_op_iter i;
4713 
4714   if (TREE_CODE (operand) != SSA_NAME)
4715     return false;
4716 
4717   lhs = gimple_assign_lhs (stmt);
4718 
4719   def_stmt = SSA_NAME_DEF_STMT (operand);
4720   def_phi = dyn_cast <gphi *> (def_stmt);
4721   if (!def_phi)
4722     return false;
4723 
4724   FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
4725     if (lhs == USE_FROM_PTR (arg_p))
4726       return true;
4727   return false;
4728 }
4729 
4730 /* Remove def stmt of VAR if VAR has zero uses and recurse
4731    on rhs1 operand if so.  */
4732 
4733 static void
remove_visited_stmt_chain(tree var)4734 remove_visited_stmt_chain (tree var)
4735 {
4736   gimple *stmt;
4737   gimple_stmt_iterator gsi;
4738 
4739   while (1)
4740     {
4741       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
4742 	return;
4743       stmt = SSA_NAME_DEF_STMT (var);
4744       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
4745 	{
4746 	  var = gimple_assign_rhs1 (stmt);
4747 	  gsi = gsi_for_stmt (stmt);
4748 	  reassoc_remove_stmt (&gsi);
4749 	  release_defs (stmt);
4750 	}
4751       else
4752 	return;
4753     }
4754 }
4755 
4756 /* This function checks three consequtive operands in
4757    passed operands vector OPS starting from OPINDEX and
4758    swaps two operands if it is profitable for binary operation
4759    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4760 
4761    We pair ops with the same rank if possible.
4762 
4763    The alternative we try is to see if STMT is a destructive
4764    update style statement, which is like:
4765    b = phi (a, ...)
4766    a = c + b;
4767    In that case, we want to use the destructive update form to
4768    expose the possible vectorizer sum reduction opportunity.
4769    In that case, the third operand will be the phi node. This
4770    check is not performed if STMT is null.
4771 
4772    We could, of course, try to be better as noted above, and do a
4773    lot of work to try to find these opportunities in >3 operand
4774    cases, but it is unlikely to be worth it.  */
4775 
4776 static void
swap_ops_for_binary_stmt(vec<operand_entry * > ops,unsigned int opindex,gimple * stmt)4777 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
4778 			  unsigned int opindex, gimple *stmt)
4779 {
4780   operand_entry *oe1, *oe2, *oe3;
4781 
4782   oe1 = ops[opindex];
4783   oe2 = ops[opindex + 1];
4784   oe3 = ops[opindex + 2];
4785 
4786   if ((oe1->rank == oe2->rank
4787        && oe2->rank != oe3->rank)
4788       || (stmt && is_phi_for_stmt (stmt, oe3->op)
4789 	  && !is_phi_for_stmt (stmt, oe1->op)
4790 	  && !is_phi_for_stmt (stmt, oe2->op)))
4791     std::swap (*oe1, *oe3);
4792   else if ((oe1->rank == oe3->rank
4793 	    && oe2->rank != oe3->rank)
4794 	   || (stmt && is_phi_for_stmt (stmt, oe2->op)
4795 	       && !is_phi_for_stmt (stmt, oe1->op)
4796 	       && !is_phi_for_stmt (stmt, oe3->op)))
4797     std::swap (*oe1, *oe2);
4798 }
4799 
4800 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4801    two definitions, otherwise return STMT.  */
4802 
4803 static inline gimple *
find_insert_point(gimple * stmt,tree rhs1,tree rhs2)4804 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
4805 {
4806   if (TREE_CODE (rhs1) == SSA_NAME
4807       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
4808     stmt = SSA_NAME_DEF_STMT (rhs1);
4809   if (TREE_CODE (rhs2) == SSA_NAME
4810       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
4811     stmt = SSA_NAME_DEF_STMT (rhs2);
4812   return stmt;
4813 }
4814 
4815 /* If the stmt that defines operand has to be inserted, insert it
4816    before the use.  */
4817 static void
insert_stmt_before_use(gimple * stmt,gimple * stmt_to_insert)4818 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
4819 {
4820   gcc_assert (is_gimple_assign (stmt_to_insert));
4821   tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
4822   tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
4823   gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
4824   gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
4825   gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
4826 
4827   /* If the insert point is not stmt, then insert_point would be
4828      the point where operand rhs1 or rhs2 is defined. In this case,
4829      stmt_to_insert has to be inserted afterwards. This would
4830      only happen when the stmt insertion point is flexible. */
4831   if (stmt == insert_point)
4832     gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
4833   else
4834     insert_stmt_after (stmt_to_insert, insert_point);
4835 }
4836 
4837 
4838 /* Recursively rewrite our linearized statements so that the operators
4839    match those in OPS[OPINDEX], putting the computation in rank
4840    order.  Return new lhs.
4841    CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4842    the current stmt and during recursive invocations.
4843    NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4844    recursive invocations.  */
4845 
4846 static tree
rewrite_expr_tree(gimple * stmt,enum tree_code rhs_code,unsigned int opindex,vec<operand_entry * > ops,bool changed,bool next_changed)4847 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
4848 		   vec<operand_entry *> ops, bool changed, bool next_changed)
4849 {
4850   tree rhs1 = gimple_assign_rhs1 (stmt);
4851   tree rhs2 = gimple_assign_rhs2 (stmt);
4852   tree lhs = gimple_assign_lhs (stmt);
4853   operand_entry *oe;
4854 
4855   /* The final recursion case for this function is that you have
4856      exactly two operations left.
4857      If we had exactly one op in the entire list to start with, we
4858      would have never called this function, and the tail recursion
4859      rewrites them one at a time.  */
4860   if (opindex + 2 == ops.length ())
4861     {
4862       operand_entry *oe1, *oe2;
4863 
4864       oe1 = ops[opindex];
4865       oe2 = ops[opindex + 1];
4866 
4867       if (rhs1 != oe1->op || rhs2 != oe2->op)
4868 	{
4869 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4870 	  unsigned int uid = gimple_uid (stmt);
4871 
4872 	  if (dump_file && (dump_flags & TDF_DETAILS))
4873 	    {
4874 	      fprintf (dump_file, "Transforming ");
4875 	      print_gimple_stmt (dump_file, stmt, 0);
4876 	    }
4877 
4878 	  /* If the stmt that defines operand has to be inserted, insert it
4879 	     before the use.  */
4880 	  if (oe1->stmt_to_insert)
4881 	    insert_stmt_before_use (stmt, oe1->stmt_to_insert);
4882 	  if (oe2->stmt_to_insert)
4883 	    insert_stmt_before_use (stmt, oe2->stmt_to_insert);
4884 	  /* Even when changed is false, reassociation could have e.g. removed
4885 	     some redundant operations, so unless we are just swapping the
4886 	     arguments or unless there is no change at all (then we just
4887 	     return lhs), force creation of a new SSA_NAME.  */
4888 	  if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
4889 	    {
4890 	      gimple *insert_point
4891 		= find_insert_point (stmt, oe1->op, oe2->op);
4892 	      lhs = make_ssa_name (TREE_TYPE (lhs));
4893 	      stmt
4894 		= gimple_build_assign (lhs, rhs_code,
4895 				       oe1->op, oe2->op);
4896 	      gimple_set_uid (stmt, uid);
4897 	      gimple_set_visited (stmt, true);
4898 	      if (insert_point == gsi_stmt (gsi))
4899 		gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4900 	      else
4901 		insert_stmt_after (stmt, insert_point);
4902 	    }
4903 	  else
4904 	    {
4905 	      gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4906 				   == stmt);
4907 	      gimple_assign_set_rhs1 (stmt, oe1->op);
4908 	      gimple_assign_set_rhs2 (stmt, oe2->op);
4909 	      update_stmt (stmt);
4910 	    }
4911 
4912 	  if (rhs1 != oe1->op && rhs1 != oe2->op)
4913 	    remove_visited_stmt_chain (rhs1);
4914 
4915 	  if (dump_file && (dump_flags & TDF_DETAILS))
4916 	    {
4917 	      fprintf (dump_file, " into ");
4918 	      print_gimple_stmt (dump_file, stmt, 0);
4919 	    }
4920 	}
4921       return lhs;
4922     }
4923 
4924   /* If we hit here, we should have 3 or more ops left.  */
4925   gcc_assert (opindex + 2 < ops.length ());
4926 
4927   /* Rewrite the next operator.  */
4928   oe = ops[opindex];
4929 
4930   /* If the stmt that defines operand has to be inserted, insert it
4931      before the use.  */
4932   if (oe->stmt_to_insert)
4933     insert_stmt_before_use (stmt, oe->stmt_to_insert);
4934 
4935   /* Recurse on the LHS of the binary operator, which is guaranteed to
4936      be the non-leaf side.  */
4937   tree new_rhs1
4938     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
4939 			 changed || oe->op != rhs2 || next_changed,
4940 			 false);
4941 
4942   if (oe->op != rhs2 || new_rhs1 != rhs1)
4943     {
4944       if (dump_file && (dump_flags & TDF_DETAILS))
4945 	{
4946 	  fprintf (dump_file, "Transforming ");
4947 	  print_gimple_stmt (dump_file, stmt, 0);
4948 	}
4949 
4950       /* If changed is false, this is either opindex == 0
4951 	 or all outer rhs2's were equal to corresponding oe->op,
4952 	 and powi_result is NULL.
4953 	 That means lhs is equivalent before and after reassociation.
4954 	 Otherwise ensure the old lhs SSA_NAME is not reused and
4955 	 create a new stmt as well, so that any debug stmts will be
4956 	 properly adjusted.  */
4957       if (changed)
4958 	{
4959 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4960 	  unsigned int uid = gimple_uid (stmt);
4961 	  gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4962 
4963 	  lhs = make_ssa_name (TREE_TYPE (lhs));
4964 	  stmt = gimple_build_assign (lhs, rhs_code,
4965 				      new_rhs1, oe->op);
4966 	  gimple_set_uid (stmt, uid);
4967 	  gimple_set_visited (stmt, true);
4968 	  if (insert_point == gsi_stmt (gsi))
4969 	    gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4970 	  else
4971 	    insert_stmt_after (stmt, insert_point);
4972 	}
4973       else
4974 	{
4975 	  gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4976 			       == stmt);
4977 	  gimple_assign_set_rhs1 (stmt, new_rhs1);
4978 	  gimple_assign_set_rhs2 (stmt, oe->op);
4979 	  update_stmt (stmt);
4980 	}
4981 
4982       if (dump_file && (dump_flags & TDF_DETAILS))
4983 	{
4984 	  fprintf (dump_file, " into ");
4985 	  print_gimple_stmt (dump_file, stmt, 0);
4986 	}
4987     }
4988   return lhs;
4989 }
4990 
4991 /* Find out how many cycles we need to compute statements chain.
4992    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
4993    maximum number of independent statements we may execute per cycle.  */
4994 
4995 static int
get_required_cycles(int ops_num,int cpu_width)4996 get_required_cycles (int ops_num, int cpu_width)
4997 {
4998   int res;
4999   int elog;
5000   unsigned int rest;
5001 
5002   /* While we have more than 2 * cpu_width operands
5003      we may reduce number of operands by cpu_width
5004      per cycle.  */
5005   res = ops_num / (2 * cpu_width);
5006 
5007   /* Remained operands count may be reduced twice per cycle
5008      until we have only one operand.  */
5009   rest = (unsigned)(ops_num - res * cpu_width);
5010   elog = exact_log2 (rest);
5011   if (elog >= 0)
5012     res += elog;
5013   else
5014     res += floor_log2 (rest) + 1;
5015 
5016   return res;
5017 }
5018 
5019 /* Returns an optimal number of registers to use for computation of
5020    given statements.  */
5021 
5022 static int
get_reassociation_width(int ops_num,enum tree_code opc,machine_mode mode)5023 get_reassociation_width (int ops_num, enum tree_code opc,
5024 			 machine_mode mode)
5025 {
5026   int param_width = param_tree_reassoc_width;
5027   int width;
5028   int width_min;
5029   int cycles_best;
5030 
5031   if (param_width > 0)
5032     width = param_width;
5033   else
5034     width = targetm.sched.reassociation_width (opc, mode);
5035 
5036   if (width == 1)
5037     return width;
5038 
5039   /* Get the minimal time required for sequence computation.  */
5040   cycles_best = get_required_cycles (ops_num, width);
5041 
5042   /* Check if we may use less width and still compute sequence for
5043      the same time.  It will allow us to reduce registers usage.
5044      get_required_cycles is monotonically increasing with lower width
5045      so we can perform a binary search for the minimal width that still
5046      results in the optimal cycle count.  */
5047   width_min = 1;
5048   while (width > width_min)
5049     {
5050       int width_mid = (width + width_min) / 2;
5051 
5052       if (get_required_cycles (ops_num, width_mid) == cycles_best)
5053 	width = width_mid;
5054       else if (width_min < width_mid)
5055 	width_min = width_mid;
5056       else
5057 	break;
5058     }
5059 
5060   return width;
5061 }
5062 
5063 /* Recursively rewrite our linearized statements so that the operators
5064    match those in OPS[OPINDEX], putting the computation in rank
5065    order and trying to allow operations to be executed in
5066    parallel.  */
5067 
5068 static void
rewrite_expr_tree_parallel(gassign * stmt,int width,vec<operand_entry * > ops)5069 rewrite_expr_tree_parallel (gassign *stmt, int width,
5070 			    vec<operand_entry *> ops)
5071 {
5072   enum tree_code opcode = gimple_assign_rhs_code (stmt);
5073   int op_num = ops.length ();
5074   gcc_assert (op_num > 0);
5075   int stmt_num = op_num - 1;
5076   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5077   int op_index = op_num - 1;
5078   int stmt_index = 0;
5079   int ready_stmts_end = 0;
5080   int i = 0;
5081   gimple *stmt1 = NULL, *stmt2 = NULL;
5082   tree last_rhs1 = gimple_assign_rhs1 (stmt);
5083 
5084   /* We start expression rewriting from the top statements.
5085      So, in this loop we create a full list of statements
5086      we will work with.  */
5087   stmts[stmt_num - 1] = stmt;
5088   for (i = stmt_num - 2; i >= 0; i--)
5089     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5090 
5091   for (i = 0; i < stmt_num; i++)
5092     {
5093       tree op1, op2;
5094 
5095       /* Determine whether we should use results of
5096 	 already handled statements or not.  */
5097       if (ready_stmts_end == 0
5098 	  && (i - stmt_index >= width || op_index < 1))
5099 	ready_stmts_end = i;
5100 
5101       /* Now we choose operands for the next statement.  Non zero
5102 	 value in ready_stmts_end means here that we should use
5103 	 the result of already generated statements as new operand.  */
5104       if (ready_stmts_end > 0)
5105 	{
5106 	  op1 = gimple_assign_lhs (stmts[stmt_index++]);
5107 	  if (ready_stmts_end > stmt_index)
5108 	    op2 = gimple_assign_lhs (stmts[stmt_index++]);
5109 	  else if (op_index >= 0)
5110 	    {
5111 	      operand_entry *oe = ops[op_index--];
5112 	      stmt2 = oe->stmt_to_insert;
5113 	      op2 = oe->op;
5114 	    }
5115 	  else
5116 	    {
5117 	      gcc_assert (stmt_index < i);
5118 	      op2 = gimple_assign_lhs (stmts[stmt_index++]);
5119 	    }
5120 
5121 	  if (stmt_index >= ready_stmts_end)
5122 	    ready_stmts_end = 0;
5123 	}
5124       else
5125 	{
5126 	  if (op_index > 1)
5127 	    swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
5128 	  operand_entry *oe2 = ops[op_index--];
5129 	  operand_entry *oe1 = ops[op_index--];
5130 	  op2 = oe2->op;
5131 	  stmt2 = oe2->stmt_to_insert;
5132 	  op1 = oe1->op;
5133 	  stmt1 = oe1->stmt_to_insert;
5134 	}
5135 
5136       /* If we emit the last statement then we should put
5137 	 operands into the last statement.  It will also
5138 	 break the loop.  */
5139       if (op_index < 0 && stmt_index == i)
5140 	i = stmt_num - 1;
5141 
5142       if (dump_file && (dump_flags & TDF_DETAILS))
5143 	{
5144 	  fprintf (dump_file, "Transforming ");
5145 	  print_gimple_stmt (dump_file, stmts[i], 0);
5146 	}
5147 
5148       /* If the stmt that defines operand has to be inserted, insert it
5149 	 before the use.  */
5150       if (stmt1)
5151 	insert_stmt_before_use (stmts[i], stmt1);
5152       if (stmt2)
5153 	insert_stmt_before_use (stmts[i], stmt2);
5154       stmt1 = stmt2 = NULL;
5155 
5156       /* We keep original statement only for the last one.  All
5157 	 others are recreated.  */
5158       if (i == stmt_num - 1)
5159 	{
5160 	  gimple_assign_set_rhs1 (stmts[i], op1);
5161 	  gimple_assign_set_rhs2 (stmts[i], op2);
5162 	  update_stmt (stmts[i]);
5163 	}
5164       else
5165 	{
5166 	  stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5167 	  gimple_set_visited (stmts[i], true);
5168 	}
5169       if (dump_file && (dump_flags & TDF_DETAILS))
5170 	{
5171 	  fprintf (dump_file, " into ");
5172 	  print_gimple_stmt (dump_file, stmts[i], 0);
5173 	}
5174     }
5175 
5176   remove_visited_stmt_chain (last_rhs1);
5177 }
5178 
5179 /* Transform STMT, which is really (A +B) + (C + D) into the left
5180    linear form, ((A+B)+C)+D.
5181    Recurse on D if necessary.  */
5182 
5183 static void
linearize_expr(gimple * stmt)5184 linearize_expr (gimple *stmt)
5185 {
5186   gimple_stmt_iterator gsi;
5187   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5188   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5189   gimple *oldbinrhs = binrhs;
5190   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5191   gimple *newbinrhs = NULL;
5192   class loop *loop = loop_containing_stmt (stmt);
5193   tree lhs = gimple_assign_lhs (stmt);
5194 
5195   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5196 	      && is_reassociable_op (binrhs, rhscode, loop));
5197 
5198   gsi = gsi_for_stmt (stmt);
5199 
5200   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5201   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5202 				gimple_assign_rhs_code (binrhs),
5203 				gimple_assign_lhs (binlhs),
5204 				gimple_assign_rhs2 (binrhs));
5205   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5206   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5207   gimple_set_uid (binrhs, gimple_uid (stmt));
5208 
5209   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5210     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5211 
5212   if (dump_file && (dump_flags & TDF_DETAILS))
5213     {
5214       fprintf (dump_file, "Linearized: ");
5215       print_gimple_stmt (dump_file, stmt, 0);
5216     }
5217 
5218   reassociate_stats.linearized++;
5219   update_stmt (stmt);
5220 
5221   gsi = gsi_for_stmt (oldbinrhs);
5222   reassoc_remove_stmt (&gsi);
5223   release_defs (oldbinrhs);
5224 
5225   gimple_set_visited (stmt, true);
5226   gimple_set_visited (binlhs, true);
5227   gimple_set_visited (binrhs, true);
5228 
5229   /* Tail recurse on the new rhs if it still needs reassociation.  */
5230   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5231     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5232 	   want to change the algorithm while converting to tuples.  */
5233     linearize_expr (stmt);
5234 }
5235 
5236 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5237    it.  Otherwise, return NULL.  */
5238 
5239 static gimple *
get_single_immediate_use(tree lhs)5240 get_single_immediate_use (tree lhs)
5241 {
5242   use_operand_p immuse;
5243   gimple *immusestmt;
5244 
5245   if (TREE_CODE (lhs) == SSA_NAME
5246       && single_imm_use (lhs, &immuse, &immusestmt)
5247       && is_gimple_assign (immusestmt))
5248     return immusestmt;
5249 
5250   return NULL;
5251 }
5252 
5253 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5254    representing the negated value.  Insertions of any necessary
5255    instructions go before GSI.
5256    This function is recursive in that, if you hand it "a_5" as the
5257    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5258    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
5259 
5260 static tree
negate_value(tree tonegate,gimple_stmt_iterator * gsip)5261 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5262 {
5263   gimple *negatedefstmt = NULL;
5264   tree resultofnegate;
5265   gimple_stmt_iterator gsi;
5266   unsigned int uid;
5267 
5268   /* If we are trying to negate a name, defined by an add, negate the
5269      add operands instead.  */
5270   if (TREE_CODE (tonegate) == SSA_NAME)
5271     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5272   if (TREE_CODE (tonegate) == SSA_NAME
5273       && is_gimple_assign (negatedefstmt)
5274       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5275       && has_single_use (gimple_assign_lhs (negatedefstmt))
5276       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5277     {
5278       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5279       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5280       tree lhs = gimple_assign_lhs (negatedefstmt);
5281       gimple *g;
5282 
5283       gsi = gsi_for_stmt (negatedefstmt);
5284       rhs1 = negate_value (rhs1, &gsi);
5285 
5286       gsi = gsi_for_stmt (negatedefstmt);
5287       rhs2 = negate_value (rhs2, &gsi);
5288 
5289       gsi = gsi_for_stmt (negatedefstmt);
5290       lhs = make_ssa_name (TREE_TYPE (lhs));
5291       gimple_set_visited (negatedefstmt, true);
5292       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5293       gimple_set_uid (g, gimple_uid (negatedefstmt));
5294       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5295       return lhs;
5296     }
5297 
5298   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5299   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5300 					     NULL_TREE, true, GSI_SAME_STMT);
5301   gsi = *gsip;
5302   uid = gimple_uid (gsi_stmt (gsi));
5303   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5304     {
5305       gimple *stmt = gsi_stmt (gsi);
5306       if (gimple_uid (stmt) != 0)
5307 	break;
5308       gimple_set_uid (stmt, uid);
5309     }
5310   return resultofnegate;
5311 }
5312 
5313 /* Return true if we should break up the subtract in STMT into an add
5314    with negate.  This is true when we the subtract operands are really
5315    adds, or the subtract itself is used in an add expression.  In
5316    either case, breaking up the subtract into an add with negate
5317    exposes the adds to reassociation.  */
5318 
5319 static bool
should_break_up_subtract(gimple * stmt)5320 should_break_up_subtract (gimple *stmt)
5321 {
5322   tree lhs = gimple_assign_lhs (stmt);
5323   tree binlhs = gimple_assign_rhs1 (stmt);
5324   tree binrhs = gimple_assign_rhs2 (stmt);
5325   gimple *immusestmt;
5326   class loop *loop = loop_containing_stmt (stmt);
5327 
5328   if (TREE_CODE (binlhs) == SSA_NAME
5329       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5330     return true;
5331 
5332   if (TREE_CODE (binrhs) == SSA_NAME
5333       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5334     return true;
5335 
5336   if (TREE_CODE (lhs) == SSA_NAME
5337       && (immusestmt = get_single_immediate_use (lhs))
5338       && is_gimple_assign (immusestmt)
5339       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
5340 	  || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
5341 	      && gimple_assign_rhs1 (immusestmt) == lhs)
5342 	  || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
5343     return true;
5344   return false;
5345 }
5346 
5347 /* Transform STMT from A - B into A + -B.  */
5348 
5349 static void
break_up_subtract(gimple * stmt,gimple_stmt_iterator * gsip)5350 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
5351 {
5352   tree rhs1 = gimple_assign_rhs1 (stmt);
5353   tree rhs2 = gimple_assign_rhs2 (stmt);
5354 
5355   if (dump_file && (dump_flags & TDF_DETAILS))
5356     {
5357       fprintf (dump_file, "Breaking up subtract ");
5358       print_gimple_stmt (dump_file, stmt, 0);
5359     }
5360 
5361   rhs2 = negate_value (rhs2, gsip);
5362   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
5363   update_stmt (stmt);
5364 }
5365 
5366 /* Determine whether STMT is a builtin call that raises an SSA name
5367    to an integer power and has only one use.  If so, and this is early
5368    reassociation and unsafe math optimizations are permitted, place
5369    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5370    If any of these conditions does not hold, return FALSE.  */
5371 
5372 static bool
acceptable_pow_call(gcall * stmt,tree * base,HOST_WIDE_INT * exponent)5373 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
5374 {
5375   tree arg1;
5376   REAL_VALUE_TYPE c, cint;
5377 
5378   switch (gimple_call_combined_fn (stmt))
5379     {
5380     CASE_CFN_POW:
5381       if (flag_errno_math)
5382 	return false;
5383 
5384       *base = gimple_call_arg (stmt, 0);
5385       arg1 = gimple_call_arg (stmt, 1);
5386 
5387       if (TREE_CODE (arg1) != REAL_CST)
5388 	return false;
5389 
5390       c = TREE_REAL_CST (arg1);
5391 
5392       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
5393 	return false;
5394 
5395       *exponent = real_to_integer (&c);
5396       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
5397       if (!real_identical (&c, &cint))
5398 	return false;
5399 
5400       break;
5401 
5402     CASE_CFN_POWI:
5403       *base = gimple_call_arg (stmt, 0);
5404       arg1 = gimple_call_arg (stmt, 1);
5405 
5406       if (!tree_fits_shwi_p (arg1))
5407 	return false;
5408 
5409       *exponent = tree_to_shwi (arg1);
5410       break;
5411 
5412     default:
5413       return false;
5414     }
5415 
5416   /* Expanding negative exponents is generally unproductive, so we don't
5417      complicate matters with those.  Exponents of zero and one should
5418      have been handled by expression folding.  */
5419   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
5420     return false;
5421 
5422   return true;
5423 }
5424 
5425 /* Try to derive and add operand entry for OP to *OPS.  Return false if
5426    unsuccessful.  */
5427 
5428 static bool
try_special_add_to_ops(vec<operand_entry * > * ops,enum tree_code code,tree op,gimple * def_stmt)5429 try_special_add_to_ops (vec<operand_entry *> *ops,
5430 			enum tree_code code,
5431 			tree op, gimple* def_stmt)
5432 {
5433   tree base = NULL_TREE;
5434   HOST_WIDE_INT exponent = 0;
5435 
5436   if (TREE_CODE (op) != SSA_NAME
5437       || ! has_single_use (op))
5438     return false;
5439 
5440   if (code == MULT_EXPR
5441       && reassoc_insert_powi_p
5442       && flag_unsafe_math_optimizations
5443       && is_gimple_call (def_stmt)
5444       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
5445     {
5446       add_repeat_to_ops_vec (ops, base, exponent);
5447       gimple_set_visited (def_stmt, true);
5448       return true;
5449     }
5450   else if (code == MULT_EXPR
5451 	   && is_gimple_assign (def_stmt)
5452 	   && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
5453 	   && !HONOR_SNANS (TREE_TYPE (op))
5454 	   && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
5455 	       || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
5456     {
5457       tree rhs1 = gimple_assign_rhs1 (def_stmt);
5458       tree cst = build_minus_one_cst (TREE_TYPE (op));
5459       add_to_ops_vec (ops, rhs1);
5460       add_to_ops_vec (ops, cst);
5461       gimple_set_visited (def_stmt, true);
5462       return true;
5463     }
5464 
5465   return false;
5466 }
5467 
5468 /* Recursively linearize a binary expression that is the RHS of STMT.
5469    Place the operands of the expression tree in the vector named OPS.  */
5470 
5471 static void
linearize_expr_tree(vec<operand_entry * > * ops,gimple * stmt,bool is_associative,bool set_visited)5472 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
5473 		     bool is_associative, bool set_visited)
5474 {
5475   tree binlhs = gimple_assign_rhs1 (stmt);
5476   tree binrhs = gimple_assign_rhs2 (stmt);
5477   gimple *binlhsdef = NULL, *binrhsdef = NULL;
5478   bool binlhsisreassoc = false;
5479   bool binrhsisreassoc = false;
5480   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5481   class loop *loop = loop_containing_stmt (stmt);
5482 
5483   if (set_visited)
5484     gimple_set_visited (stmt, true);
5485 
5486   if (TREE_CODE (binlhs) == SSA_NAME)
5487     {
5488       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
5489       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
5490 			 && !stmt_could_throw_p (cfun, binlhsdef));
5491     }
5492 
5493   if (TREE_CODE (binrhs) == SSA_NAME)
5494     {
5495       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
5496       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
5497 			 && !stmt_could_throw_p (cfun, binrhsdef));
5498     }
5499 
5500   /* If the LHS is not reassociable, but the RHS is, we need to swap
5501      them.  If neither is reassociable, there is nothing we can do, so
5502      just put them in the ops vector.  If the LHS is reassociable,
5503      linearize it.  If both are reassociable, then linearize the RHS
5504      and the LHS.  */
5505 
5506   if (!binlhsisreassoc)
5507     {
5508       /* If this is not a associative operation like division, give up.  */
5509       if (!is_associative)
5510 	{
5511 	  add_to_ops_vec (ops, binrhs);
5512 	  return;
5513 	}
5514 
5515       if (!binrhsisreassoc)
5516 	{
5517 	  bool swap = false;
5518 	  if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5519 	    /* If we add ops for the rhs we expect to be able to recurse
5520 	       to it via the lhs during expression rewrite so swap
5521 	       operands.  */
5522 	    swap = true;
5523 	  else
5524 	    add_to_ops_vec (ops, binrhs);
5525 
5526 	  if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
5527 	    add_to_ops_vec (ops, binlhs);
5528 
5529 	  if (!swap)
5530 	    return;
5531 	}
5532 
5533       if (dump_file && (dump_flags & TDF_DETAILS))
5534 	{
5535 	  fprintf (dump_file, "swapping operands of ");
5536 	  print_gimple_stmt (dump_file, stmt, 0);
5537 	}
5538 
5539       swap_ssa_operands (stmt,
5540 			 gimple_assign_rhs1_ptr (stmt),
5541 			 gimple_assign_rhs2_ptr (stmt));
5542       update_stmt (stmt);
5543 
5544       if (dump_file && (dump_flags & TDF_DETAILS))
5545 	{
5546 	  fprintf (dump_file, " is now ");
5547 	  print_gimple_stmt (dump_file, stmt, 0);
5548 	}
5549       if (!binrhsisreassoc)
5550 	return;
5551 
5552       /* We want to make it so the lhs is always the reassociative op,
5553 	 so swap.  */
5554       std::swap (binlhs, binrhs);
5555     }
5556   else if (binrhsisreassoc)
5557     {
5558       linearize_expr (stmt);
5559       binlhs = gimple_assign_rhs1 (stmt);
5560       binrhs = gimple_assign_rhs2 (stmt);
5561     }
5562 
5563   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
5564 	      || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
5565 				      rhscode, loop));
5566   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
5567 		       is_associative, set_visited);
5568 
5569   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
5570     add_to_ops_vec (ops, binrhs);
5571 }
5572 
5573 /* Repropagate the negates back into subtracts, since no other pass
5574    currently does it.  */
5575 
5576 static void
repropagate_negates(void)5577 repropagate_negates (void)
5578 {
5579   unsigned int i = 0;
5580   tree negate;
5581 
5582   FOR_EACH_VEC_ELT (plus_negates, i, negate)
5583     {
5584       gimple *user = get_single_immediate_use (negate);
5585 
5586       if (!user || !is_gimple_assign (user))
5587 	continue;
5588 
5589       /* The negate operand can be either operand of a PLUS_EXPR
5590 	 (it can be the LHS if the RHS is a constant for example).
5591 
5592 	 Force the negate operand to the RHS of the PLUS_EXPR, then
5593 	 transform the PLUS_EXPR into a MINUS_EXPR.  */
5594       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
5595 	{
5596 	  /* If the negated operand appears on the LHS of the
5597 	     PLUS_EXPR, exchange the operands of the PLUS_EXPR
5598 	     to force the negated operand to the RHS of the PLUS_EXPR.  */
5599 	  if (gimple_assign_rhs1 (user) == negate)
5600 	    {
5601 	      swap_ssa_operands (user,
5602 				 gimple_assign_rhs1_ptr (user),
5603 				 gimple_assign_rhs2_ptr (user));
5604 	    }
5605 
5606 	  /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5607 	     the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
5608 	  if (gimple_assign_rhs2 (user) == negate)
5609 	    {
5610 	      tree rhs1 = gimple_assign_rhs1 (user);
5611 	      tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
5612 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
5613 	      gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
5614 	      update_stmt (user);
5615 	    }
5616 	}
5617       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
5618 	{
5619 	  if (gimple_assign_rhs1 (user) == negate)
5620 	    {
5621 	      /* We have
5622 	           x = -a
5623 		   y = x - b
5624 		 which we transform into
5625 		   x = a + b
5626 		   y = -x .
5627 		 This pushes down the negate which we possibly can merge
5628 		 into some other operation, hence insert it into the
5629 		 plus_negates vector.  */
5630 	      gimple *feed = SSA_NAME_DEF_STMT (negate);
5631 	      tree a = gimple_assign_rhs1 (feed);
5632 	      tree b = gimple_assign_rhs2 (user);
5633 	      gimple_stmt_iterator gsi = gsi_for_stmt (feed);
5634 	      gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
5635 	      tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
5636 	      gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
5637 	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5638 	      gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
5639 	      user = gsi_stmt (gsi2);
5640 	      update_stmt (user);
5641 	      reassoc_remove_stmt (&gsi);
5642 	      release_defs (feed);
5643 	      plus_negates.safe_push (gimple_assign_lhs (user));
5644 	    }
5645 	  else
5646 	    {
5647 	      /* Transform "x = -a; y = b - x" into "y = b + a", getting
5648 	         rid of one operation.  */
5649 	      gimple *feed = SSA_NAME_DEF_STMT (negate);
5650 	      tree a = gimple_assign_rhs1 (feed);
5651 	      tree rhs1 = gimple_assign_rhs1 (user);
5652 	      gimple_stmt_iterator gsi = gsi_for_stmt (user);
5653 	      gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
5654 	      update_stmt (gsi_stmt (gsi));
5655 	    }
5656 	}
5657     }
5658 }
5659 
5660 /* Returns true if OP is of a type for which we can do reassociation.
5661    That is for integral or non-saturating fixed-point types, and for
5662    floating point type when associative-math is enabled.  */
5663 
5664 static bool
can_reassociate_p(tree op)5665 can_reassociate_p (tree op)
5666 {
5667   tree type = TREE_TYPE (op);
5668   if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
5669     return false;
5670   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
5671       || NON_SAT_FIXED_POINT_TYPE_P (type)
5672       || (flag_associative_math && FLOAT_TYPE_P (type)))
5673     return true;
5674   return false;
5675 }
5676 
5677 /* Break up subtract operations in block BB.
5678 
5679    We do this top down because we don't know whether the subtract is
5680    part of a possible chain of reassociation except at the top.
5681 
5682    IE given
5683    d = f + g
5684    c = a + e
5685    b = c - d
5686    q = b - r
5687    k = t - q
5688 
5689    we want to break up k = t - q, but we won't until we've transformed q
5690    = b - r, which won't be broken up until we transform b = c - d.
5691 
5692    En passant, clear the GIMPLE visited flag on every statement
5693    and set UIDs within each basic block.  */
5694 
5695 static void
break_up_subtract_bb(basic_block bb)5696 break_up_subtract_bb (basic_block bb)
5697 {
5698   gimple_stmt_iterator gsi;
5699   basic_block son;
5700   unsigned int uid = 1;
5701 
5702   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5703     {
5704       gimple *stmt = gsi_stmt (gsi);
5705       gimple_set_visited (stmt, false);
5706       gimple_set_uid (stmt, uid++);
5707 
5708       if (!is_gimple_assign (stmt)
5709 	  || !can_reassociate_p (gimple_assign_lhs (stmt)))
5710 	continue;
5711 
5712       /* Look for simple gimple subtract operations.  */
5713       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
5714 	{
5715 	  if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
5716 	      || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
5717 	    continue;
5718 
5719 	  /* Check for a subtract used only in an addition.  If this
5720 	     is the case, transform it into add of a negate for better
5721 	     reassociation.  IE transform C = A-B into C = A + -B if C
5722 	     is only used in an addition.  */
5723 	  if (should_break_up_subtract (stmt))
5724 	    break_up_subtract (stmt, &gsi);
5725 	}
5726       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
5727 	       && can_reassociate_p (gimple_assign_rhs1 (stmt)))
5728 	plus_negates.safe_push (gimple_assign_lhs (stmt));
5729     }
5730   for (son = first_dom_son (CDI_DOMINATORS, bb);
5731        son;
5732        son = next_dom_son (CDI_DOMINATORS, son))
5733     break_up_subtract_bb (son);
5734 }
5735 
5736 /* Used for repeated factor analysis.  */
5737 struct repeat_factor
5738 {
5739   /* An SSA name that occurs in a multiply chain.  */
5740   tree factor;
5741 
5742   /* Cached rank of the factor.  */
5743   unsigned rank;
5744 
5745   /* Number of occurrences of the factor in the chain.  */
5746   HOST_WIDE_INT count;
5747 
5748   /* An SSA name representing the product of this factor and
5749      all factors appearing later in the repeated factor vector.  */
5750   tree repr;
5751 };
5752 
5753 
5754 static vec<repeat_factor> repeat_factor_vec;
5755 
5756 /* Used for sorting the repeat factor vector.  Sort primarily by
5757    ascending occurrence count, secondarily by descending rank.  */
5758 
5759 static int
compare_repeat_factors(const void * x1,const void * x2)5760 compare_repeat_factors (const void *x1, const void *x2)
5761 {
5762   const repeat_factor *rf1 = (const repeat_factor *) x1;
5763   const repeat_factor *rf2 = (const repeat_factor *) x2;
5764 
5765   if (rf1->count != rf2->count)
5766     return rf1->count - rf2->count;
5767 
5768   return rf2->rank - rf1->rank;
5769 }
5770 
5771 /* Look for repeated operands in OPS in the multiply tree rooted at
5772    STMT.  Replace them with an optimal sequence of multiplies and powi
5773    builtin calls, and remove the used operands from OPS.  Return an
5774    SSA name representing the value of the replacement sequence.  */
5775 
5776 static tree
attempt_builtin_powi(gimple * stmt,vec<operand_entry * > * ops)5777 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
5778 {
5779   unsigned i, j, vec_len;
5780   int ii;
5781   operand_entry *oe;
5782   repeat_factor *rf1, *rf2;
5783   repeat_factor rfnew;
5784   tree result = NULL_TREE;
5785   tree target_ssa, iter_result;
5786   tree type = TREE_TYPE (gimple_get_lhs (stmt));
5787   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
5788   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5789   gimple *mul_stmt, *pow_stmt;
5790 
5791   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5792      target.  */
5793   if (!powi_fndecl)
5794     return NULL_TREE;
5795 
5796   /* Allocate the repeated factor vector.  */
5797   repeat_factor_vec.create (10);
5798 
5799   /* Scan the OPS vector for all SSA names in the product and build
5800      up a vector of occurrence counts for each factor.  */
5801   FOR_EACH_VEC_ELT (*ops, i, oe)
5802     {
5803       if (TREE_CODE (oe->op) == SSA_NAME)
5804 	{
5805 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5806 	    {
5807 	      if (rf1->factor == oe->op)
5808 		{
5809 		  rf1->count += oe->count;
5810 		  break;
5811 		}
5812 	    }
5813 
5814 	  if (j >= repeat_factor_vec.length ())
5815 	    {
5816 	      rfnew.factor = oe->op;
5817 	      rfnew.rank = oe->rank;
5818 	      rfnew.count = oe->count;
5819 	      rfnew.repr = NULL_TREE;
5820 	      repeat_factor_vec.safe_push (rfnew);
5821 	    }
5822 	}
5823     }
5824 
5825   /* Sort the repeated factor vector by (a) increasing occurrence count,
5826      and (b) decreasing rank.  */
5827   repeat_factor_vec.qsort (compare_repeat_factors);
5828 
5829   /* It is generally best to combine as many base factors as possible
5830      into a product before applying __builtin_powi to the result.
5831      However, the sort order chosen for the repeated factor vector
5832      allows us to cache partial results for the product of the base
5833      factors for subsequent use.  When we already have a cached partial
5834      result from a previous iteration, it is best to make use of it
5835      before looking for another __builtin_pow opportunity.
5836 
5837      As an example, consider x * x * y * y * y * z * z * z * z.
5838      We want to first compose the product x * y * z, raise it to the
5839      second power, then multiply this by y * z, and finally multiply
5840      by z.  This can be done in 5 multiplies provided we cache y * z
5841      for use in both expressions:
5842 
5843         t1 = y * z
5844 	t2 = t1 * x
5845 	t3 = t2 * t2
5846 	t4 = t1 * t3
5847 	result = t4 * z
5848 
5849      If we instead ignored the cached y * z and first multiplied by
5850      the __builtin_pow opportunity z * z, we would get the inferior:
5851 
5852         t1 = y * z
5853 	t2 = t1 * x
5854 	t3 = t2 * t2
5855 	t4 = z * z
5856 	t5 = t3 * t4
5857         result = t5 * y  */
5858 
5859   vec_len = repeat_factor_vec.length ();
5860 
5861   /* Repeatedly look for opportunities to create a builtin_powi call.  */
5862   while (true)
5863     {
5864       HOST_WIDE_INT power;
5865 
5866       /* First look for the largest cached product of factors from
5867 	 preceding iterations.  If found, create a builtin_powi for
5868 	 it if the minimum occurrence count for its factors is at
5869 	 least 2, or just use this cached product as our next
5870 	 multiplicand if the minimum occurrence count is 1.  */
5871       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5872 	{
5873 	  if (rf1->repr && rf1->count > 0)
5874 	    break;
5875 	}
5876 
5877       if (j < vec_len)
5878 	{
5879 	  power = rf1->count;
5880 
5881 	  if (power == 1)
5882 	    {
5883 	      iter_result = rf1->repr;
5884 
5885 	      if (dump_file && (dump_flags & TDF_DETAILS))
5886 		{
5887 		  unsigned elt;
5888 		  repeat_factor *rf;
5889 		  fputs ("Multiplying by cached product ", dump_file);
5890 		  for (elt = j; elt < vec_len; elt++)
5891 		    {
5892 		      rf = &repeat_factor_vec[elt];
5893 		      print_generic_expr (dump_file, rf->factor);
5894 		      if (elt < vec_len - 1)
5895 			fputs (" * ", dump_file);
5896 		    }
5897 		  fputs ("\n", dump_file);
5898 		}
5899 	    }
5900 	  else
5901 	    {
5902 	      iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5903 	      pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5904 					    build_int_cst (integer_type_node,
5905 							   power));
5906 	      gimple_call_set_lhs (pow_stmt, iter_result);
5907 	      gimple_set_location (pow_stmt, gimple_location (stmt));
5908 	      gimple_set_uid (pow_stmt, gimple_uid (stmt));
5909 	      gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5910 
5911 	      if (dump_file && (dump_flags & TDF_DETAILS))
5912 		{
5913 		  unsigned elt;
5914 		  repeat_factor *rf;
5915 		  fputs ("Building __builtin_pow call for cached product (",
5916 			 dump_file);
5917 		  for (elt = j; elt < vec_len; elt++)
5918 		    {
5919 		      rf = &repeat_factor_vec[elt];
5920 		      print_generic_expr (dump_file, rf->factor);
5921 		      if (elt < vec_len - 1)
5922 			fputs (" * ", dump_file);
5923 		    }
5924 		  fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5925 			   power);
5926 		}
5927 	    }
5928 	}
5929       else
5930 	{
5931 	  /* Otherwise, find the first factor in the repeated factor
5932 	     vector whose occurrence count is at least 2.  If no such
5933 	     factor exists, there are no builtin_powi opportunities
5934 	     remaining.  */
5935 	  FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5936 	    {
5937 	      if (rf1->count >= 2)
5938 		break;
5939 	    }
5940 
5941 	  if (j >= vec_len)
5942 	    break;
5943 
5944 	  power = rf1->count;
5945 
5946 	  if (dump_file && (dump_flags & TDF_DETAILS))
5947 	    {
5948 	      unsigned elt;
5949 	      repeat_factor *rf;
5950 	      fputs ("Building __builtin_pow call for (", dump_file);
5951 	      for (elt = j; elt < vec_len; elt++)
5952 		{
5953 		  rf = &repeat_factor_vec[elt];
5954 		  print_generic_expr (dump_file, rf->factor);
5955 		  if (elt < vec_len - 1)
5956 		    fputs (" * ", dump_file);
5957 		}
5958 	      fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5959 	    }
5960 
5961 	  reassociate_stats.pows_created++;
5962 
5963 	  /* Visit each element of the vector in reverse order (so that
5964 	     high-occurrence elements are visited first, and within the
5965 	     same occurrence count, lower-ranked elements are visited
5966 	     first).  Form a linear product of all elements in this order
5967 	     whose occurrencce count is at least that of element J.
5968 	     Record the SSA name representing the product of each element
5969 	     with all subsequent elements in the vector.  */
5970 	  if (j == vec_len - 1)
5971 	    rf1->repr = rf1->factor;
5972 	  else
5973 	    {
5974 	      for (ii = vec_len - 2; ii >= (int)j; ii--)
5975 		{
5976 		  tree op1, op2;
5977 
5978 		  rf1 = &repeat_factor_vec[ii];
5979 		  rf2 = &repeat_factor_vec[ii + 1];
5980 
5981 		  /* Init the last factor's representative to be itself.  */
5982 		  if (!rf2->repr)
5983 		    rf2->repr = rf2->factor;
5984 
5985 		  op1 = rf1->factor;
5986 		  op2 = rf2->repr;
5987 
5988 		  target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5989 		  mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5990 						  op1, op2);
5991 		  gimple_set_location (mul_stmt, gimple_location (stmt));
5992 		  gimple_set_uid (mul_stmt, gimple_uid (stmt));
5993 		  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5994 		  rf1->repr = target_ssa;
5995 
5996 		  /* Don't reprocess the multiply we just introduced.  */
5997 		  gimple_set_visited (mul_stmt, true);
5998 		}
5999 	    }
6000 
6001 	  /* Form a call to __builtin_powi for the maximum product
6002 	     just formed, raised to the power obtained earlier.  */
6003 	  rf1 = &repeat_factor_vec[j];
6004 	  iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6005 	  pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6006 					build_int_cst (integer_type_node,
6007 						       power));
6008 	  gimple_call_set_lhs (pow_stmt, iter_result);
6009 	  gimple_set_location (pow_stmt, gimple_location (stmt));
6010 	  gimple_set_uid (pow_stmt, gimple_uid (stmt));
6011 	  gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6012 	}
6013 
6014       /* If we previously formed at least one other builtin_powi call,
6015 	 form the product of this one and those others.  */
6016       if (result)
6017 	{
6018 	  tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6019 	  mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6020 					  result, iter_result);
6021 	  gimple_set_location (mul_stmt, gimple_location (stmt));
6022 	  gimple_set_uid (mul_stmt, gimple_uid (stmt));
6023 	  gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6024 	  gimple_set_visited (mul_stmt, true);
6025 	  result = new_result;
6026 	}
6027       else
6028 	result = iter_result;
6029 
6030       /* Decrement the occurrence count of each element in the product
6031 	 by the count found above, and remove this many copies of each
6032 	 factor from OPS.  */
6033       for (i = j; i < vec_len; i++)
6034 	{
6035 	  unsigned k = power;
6036 	  unsigned n;
6037 
6038 	  rf1 = &repeat_factor_vec[i];
6039 	  rf1->count -= power;
6040 
6041 	  FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6042 	    {
6043 	      if (oe->op == rf1->factor)
6044 		{
6045 		  if (oe->count <= k)
6046 		    {
6047 		      ops->ordered_remove (n);
6048 		      k -= oe->count;
6049 
6050 		      if (k == 0)
6051 			break;
6052 		    }
6053 		  else
6054 		    {
6055 		      oe->count -= k;
6056 		      break;
6057 		    }
6058 		}
6059 	    }
6060 	}
6061     }
6062 
6063   /* At this point all elements in the repeated factor vector have a
6064      remaining occurrence count of 0 or 1, and those with a count of 1
6065      don't have cached representatives.  Re-sort the ops vector and
6066      clean up.  */
6067   ops->qsort (sort_by_operand_rank);
6068   repeat_factor_vec.release ();
6069 
6070   /* Return the final product computed herein.  Note that there may
6071      still be some elements with single occurrence count left in OPS;
6072      those will be handled by the normal reassociation logic.  */
6073   return result;
6074 }
6075 
6076 /* Attempt to optimize
6077    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6078    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
6079 
6080 static void
attempt_builtin_copysign(vec<operand_entry * > * ops)6081 attempt_builtin_copysign (vec<operand_entry *> *ops)
6082 {
6083   operand_entry *oe;
6084   unsigned int i;
6085   unsigned int length = ops->length ();
6086   tree cst = ops->last ()->op;
6087 
6088   if (length == 1 || TREE_CODE (cst) != REAL_CST)
6089     return;
6090 
6091   FOR_EACH_VEC_ELT (*ops, i, oe)
6092     {
6093       if (TREE_CODE (oe->op) == SSA_NAME
6094 	  && has_single_use (oe->op))
6095 	{
6096 	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6097 	  if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6098 	    {
6099 	      tree arg0, arg1;
6100 	      switch (gimple_call_combined_fn (old_call))
6101 		{
6102 		CASE_CFN_COPYSIGN:
6103 		CASE_CFN_COPYSIGN_FN:
6104 		  arg0 = gimple_call_arg (old_call, 0);
6105 		  arg1 = gimple_call_arg (old_call, 1);
6106 		  /* The first argument of copysign must be a constant,
6107 		     otherwise there's nothing to do.  */
6108 		  if (TREE_CODE (arg0) == REAL_CST)
6109 		    {
6110 		      tree type = TREE_TYPE (arg0);
6111 		      tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6112 		      /* If we couldn't fold to a single constant, skip it.
6113 			 That happens e.g. for inexact multiplication when
6114 			 -frounding-math.  */
6115 		      if (mul == NULL_TREE)
6116 			break;
6117 		      /* Instead of adjusting OLD_CALL, let's build a new
6118 			 call to not leak the LHS and prevent keeping bogus
6119 			 debug statements.  DCE will clean up the old call.  */
6120 		      gcall *new_call;
6121 		      if (gimple_call_internal_p (old_call))
6122 			new_call = gimple_build_call_internal
6123 			  (IFN_COPYSIGN, 2, mul, arg1);
6124 		      else
6125 			new_call = gimple_build_call
6126 			  (gimple_call_fndecl (old_call), 2, mul, arg1);
6127 		      tree lhs = make_ssa_name (type);
6128 		      gimple_call_set_lhs (new_call, lhs);
6129 		      gimple_set_location (new_call,
6130 					   gimple_location (old_call));
6131 		      insert_stmt_after (new_call, old_call);
6132 		      /* We've used the constant, get rid of it.  */
6133 		      ops->pop ();
6134 		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6135 		      /* Handle the CST1 < 0 case by negating the result.  */
6136 		      if (cst1_neg)
6137 			{
6138 			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6139 			  gimple *negate_stmt
6140 			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6141 			  insert_stmt_after (negate_stmt, new_call);
6142 			  oe->op = negrhs;
6143 			}
6144 		      else
6145 			oe->op = lhs;
6146 		      if (dump_file && (dump_flags & TDF_DETAILS))
6147 			{
6148 			  fprintf (dump_file, "Optimizing copysign: ");
6149 			  print_generic_expr (dump_file, cst);
6150 			  fprintf (dump_file, " * COPYSIGN (");
6151 			  print_generic_expr (dump_file, arg0);
6152 			  fprintf (dump_file, ", ");
6153 			  print_generic_expr (dump_file, arg1);
6154 			  fprintf (dump_file, ") into %sCOPYSIGN (",
6155 				   cst1_neg ? "-" : "");
6156 			  print_generic_expr (dump_file, mul);
6157 			  fprintf (dump_file, ", ");
6158 			  print_generic_expr (dump_file, arg1);
6159 			  fprintf (dump_file, "\n");
6160 			}
6161 		      return;
6162 		    }
6163 		  break;
6164 		default:
6165 		  break;
6166 		}
6167 	    }
6168 	}
6169     }
6170 }
6171 
6172 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
6173 
6174 static void
transform_stmt_to_copy(gimple_stmt_iterator * gsi,gimple * stmt,tree new_rhs)6175 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6176 {
6177   tree rhs1;
6178 
6179   if (dump_file && (dump_flags & TDF_DETAILS))
6180     {
6181       fprintf (dump_file, "Transforming ");
6182       print_gimple_stmt (dump_file, stmt, 0);
6183     }
6184 
6185   rhs1 = gimple_assign_rhs1 (stmt);
6186   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6187   update_stmt (stmt);
6188   remove_visited_stmt_chain (rhs1);
6189 
6190   if (dump_file && (dump_flags & TDF_DETAILS))
6191     {
6192       fprintf (dump_file, " into ");
6193       print_gimple_stmt (dump_file, stmt, 0);
6194     }
6195 }
6196 
6197 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
6198 
6199 static void
transform_stmt_to_multiply(gimple_stmt_iterator * gsi,gimple * stmt,tree rhs1,tree rhs2)6200 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6201 			    tree rhs1, tree rhs2)
6202 {
6203   if (dump_file && (dump_flags & TDF_DETAILS))
6204     {
6205       fprintf (dump_file, "Transforming ");
6206       print_gimple_stmt (dump_file, stmt, 0);
6207     }
6208 
6209   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6210   update_stmt (gsi_stmt (*gsi));
6211   remove_visited_stmt_chain (rhs1);
6212 
6213   if (dump_file && (dump_flags & TDF_DETAILS))
6214     {
6215       fprintf (dump_file, " into ");
6216       print_gimple_stmt (dump_file, stmt, 0);
6217     }
6218 }
6219 
6220 /* Reassociate expressions in basic block BB and its post-dominator as
6221    children.
6222 
6223    Bubble up return status from maybe_optimize_range_tests.  */
6224 
6225 static bool
reassociate_bb(basic_block bb)6226 reassociate_bb (basic_block bb)
6227 {
6228   gimple_stmt_iterator gsi;
6229   basic_block son;
6230   gimple *stmt = last_stmt (bb);
6231   bool cfg_cleanup_needed = false;
6232 
6233   if (stmt && !gimple_visited_p (stmt))
6234     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
6235 
6236   bool do_prev = false;
6237   for (gsi = gsi_last_bb (bb);
6238        !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
6239     {
6240       do_prev = true;
6241       stmt = gsi_stmt (gsi);
6242 
6243       if (is_gimple_assign (stmt)
6244 	  && !stmt_could_throw_p (cfun, stmt))
6245 	{
6246 	  tree lhs, rhs1, rhs2;
6247 	  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
6248 
6249 	  /* If this was part of an already processed statement,
6250 	     we don't need to touch it again. */
6251 	  if (gimple_visited_p (stmt))
6252 	    {
6253 	      /* This statement might have become dead because of previous
6254 		 reassociations.  */
6255 	      if (has_zero_uses (gimple_get_lhs (stmt)))
6256 		{
6257 		  reassoc_remove_stmt (&gsi);
6258 		  release_defs (stmt);
6259 		  /* We might end up removing the last stmt above which
6260 		     places the iterator to the end of the sequence.
6261 		     Reset it to the last stmt in this case and make sure
6262 		     we don't do gsi_prev in that case.  */
6263 		  if (gsi_end_p (gsi))
6264 		    {
6265 		      gsi = gsi_last_bb (bb);
6266 		      do_prev = false;
6267 		    }
6268 		}
6269 	      continue;
6270 	    }
6271 
6272 	  /* If this is not a gimple binary expression, there is
6273 	     nothing for us to do with it.  */
6274 	  if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6275 	    continue;
6276 
6277 	  lhs = gimple_assign_lhs (stmt);
6278 	  rhs1 = gimple_assign_rhs1 (stmt);
6279 	  rhs2 = gimple_assign_rhs2 (stmt);
6280 
6281 	  /* For non-bit or min/max operations we can't associate
6282 	     all types.  Verify that here.  */
6283 	  if (rhs_code != BIT_IOR_EXPR
6284 	      && rhs_code != BIT_AND_EXPR
6285 	      && rhs_code != BIT_XOR_EXPR
6286 	      && rhs_code != MIN_EXPR
6287 	      && rhs_code != MAX_EXPR
6288 	      && (!can_reassociate_p (lhs)
6289 		  || !can_reassociate_p (rhs1)
6290 		  || !can_reassociate_p (rhs2)))
6291 	    continue;
6292 
6293 	  if (associative_tree_code (rhs_code))
6294 	    {
6295 	      auto_vec<operand_entry *> ops;
6296 	      tree powi_result = NULL_TREE;
6297 	      bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
6298 
6299 	      /* There may be no immediate uses left by the time we
6300 		 get here because we may have eliminated them all.  */
6301 	      if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
6302 		continue;
6303 
6304 	      gimple_set_visited (stmt, true);
6305 	      linearize_expr_tree (&ops, stmt, true, true);
6306 	      ops.qsort (sort_by_operand_rank);
6307 	      int orig_len = ops.length ();
6308 	      optimize_ops_list (rhs_code, &ops);
6309 	      if (undistribute_ops_list (rhs_code, &ops,
6310 					 loop_containing_stmt (stmt)))
6311 		{
6312 		  ops.qsort (sort_by_operand_rank);
6313 		  optimize_ops_list (rhs_code, &ops);
6314 		}
6315 	      if (undistribute_bitref_for_vector (rhs_code, &ops,
6316 						  loop_containing_stmt (stmt)))
6317 		{
6318 		  ops.qsort (sort_by_operand_rank);
6319 		  optimize_ops_list (rhs_code, &ops);
6320 		}
6321 	      if (rhs_code == PLUS_EXPR
6322 		  && transform_add_to_multiply (&ops))
6323 		ops.qsort (sort_by_operand_rank);
6324 
6325 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
6326 		{
6327 		  if (is_vector)
6328 		    optimize_vec_cond_expr (rhs_code, &ops);
6329 		  else
6330 		    optimize_range_tests (rhs_code, &ops, NULL);
6331 	        }
6332 
6333 	      if (rhs_code == MULT_EXPR && !is_vector)
6334 	        {
6335 		  attempt_builtin_copysign (&ops);
6336 
6337 		  if (reassoc_insert_powi_p
6338 		      && flag_unsafe_math_optimizations)
6339 		    powi_result = attempt_builtin_powi (stmt, &ops);
6340 		}
6341 
6342 	      operand_entry *last;
6343 	      bool negate_result = false;
6344 	      if (ops.length () > 1
6345 		  && rhs_code == MULT_EXPR)
6346 		{
6347 		  last = ops.last ();
6348 		  if ((integer_minus_onep (last->op)
6349 		       || real_minus_onep (last->op))
6350 		      && !HONOR_SNANS (TREE_TYPE (lhs))
6351 		      && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
6352 			  || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
6353 		    {
6354 		      ops.pop ();
6355 		      negate_result = true;
6356 		    }
6357 		}
6358 
6359 	      tree new_lhs = lhs;
6360 	      /* If the operand vector is now empty, all operands were
6361 		 consumed by the __builtin_powi optimization.  */
6362 	      if (ops.length () == 0)
6363 		transform_stmt_to_copy (&gsi, stmt, powi_result);
6364 	      else if (ops.length () == 1)
6365 		{
6366 		  tree last_op = ops.last ()->op;
6367 
6368 		  /* If the stmt that defines operand has to be inserted, insert it
6369 		     before the use.  */
6370 		  if (ops.last ()->stmt_to_insert)
6371 		    insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
6372 		  if (powi_result)
6373 		    transform_stmt_to_multiply (&gsi, stmt, last_op,
6374 						powi_result);
6375 		  else
6376 		    transform_stmt_to_copy (&gsi, stmt, last_op);
6377 		}
6378 	      else
6379 		{
6380 		  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
6381 		  int ops_num = ops.length ();
6382 		  int width;
6383 
6384 		  /* For binary bit operations, if there are at least 3
6385 		     operands and the last operand in OPS is a constant,
6386 		     move it to the front.  This helps ensure that we generate
6387 		     (X & Y) & C rather than (X & C) & Y.  The former will
6388 		     often match a canonical bit test when we get to RTL.  */
6389 		  if (ops.length () > 2
6390 		      && (rhs_code == BIT_AND_EXPR
6391 		          || rhs_code == BIT_IOR_EXPR
6392 		          || rhs_code == BIT_XOR_EXPR)
6393 		      && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6394 		    std::swap (*ops[0], *ops[ops_num - 1]);
6395 
6396 		  /* Only rewrite the expression tree to parallel in the
6397 		     last reassoc pass to avoid useless work back-and-forth
6398 		     with initial linearization.  */
6399 		  if (!reassoc_insert_powi_p
6400 		      && ops.length () > 3
6401 		      && (width = get_reassociation_width (ops_num, rhs_code,
6402 							   mode)) > 1)
6403 		    {
6404 		      if (dump_file && (dump_flags & TDF_DETAILS))
6405 			fprintf (dump_file,
6406 				 "Width = %d was chosen for reassociation\n",
6407 				 width);
6408 		      rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6409 						  width, ops);
6410 		    }
6411 		  else
6412                     {
6413                       /* When there are three operands left, we want
6414                          to make sure the ones that get the double
6415                          binary op are chosen wisely.  */
6416                       int len = ops.length ();
6417                       if (len >= 3)
6418                         swap_ops_for_binary_stmt (ops, len - 3, stmt);
6419 
6420 		      new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
6421 						   powi_result != NULL
6422 						   || negate_result,
6423 						   len != orig_len);
6424                     }
6425 
6426 		  /* If we combined some repeated factors into a
6427 		     __builtin_powi call, multiply that result by the
6428 		     reassociated operands.  */
6429 		  if (powi_result)
6430 		    {
6431 		      gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
6432 		      tree type = TREE_TYPE (lhs);
6433 		      tree target_ssa = make_temp_ssa_name (type, NULL,
6434 							    "reassocpow");
6435 		      gimple_set_lhs (lhs_stmt, target_ssa);
6436 		      update_stmt (lhs_stmt);
6437 		      if (lhs != new_lhs)
6438 			{
6439 			  target_ssa = new_lhs;
6440 			  new_lhs = lhs;
6441 			}
6442 		      mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
6443 						      powi_result, target_ssa);
6444 		      gimple_set_location (mul_stmt, gimple_location (stmt));
6445 		      gimple_set_uid (mul_stmt, gimple_uid (stmt));
6446 		      gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
6447 		    }
6448 		}
6449 
6450 	      if (negate_result)
6451 		{
6452 		  stmt = SSA_NAME_DEF_STMT (lhs);
6453 		  tree tmp = make_ssa_name (TREE_TYPE (lhs));
6454 		  gimple_set_lhs (stmt, tmp);
6455 		  if (lhs != new_lhs)
6456 		    tmp = new_lhs;
6457 		  gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
6458 							   tmp);
6459 		  gimple_set_uid (neg_stmt, gimple_uid (stmt));
6460 		  gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
6461 		  update_stmt (stmt);
6462 		}
6463 	    }
6464 	}
6465     }
6466   for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
6467        son;
6468        son = next_dom_son (CDI_POST_DOMINATORS, son))
6469     cfg_cleanup_needed |= reassociate_bb (son);
6470 
6471   return cfg_cleanup_needed;
6472 }
6473 
6474 /* Add jumps around shifts for range tests turned into bit tests.
6475    For each SSA_NAME VAR we have code like:
6476    VAR = ...; // final stmt of range comparison
6477    // bit test here...;
6478    OTHERVAR = ...; // final stmt of the bit test sequence
6479    RES = VAR | OTHERVAR;
6480    Turn the above into:
6481    VAR = ...;
6482    if (VAR != 0)
6483      goto <l3>;
6484    else
6485      goto <l2>;
6486    <l2>:
6487    // bit test here...;
6488    OTHERVAR = ...;
6489    <l3>:
6490    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
6491 
6492 static void
branch_fixup(void)6493 branch_fixup (void)
6494 {
6495   tree var;
6496   unsigned int i;
6497 
6498   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
6499     {
6500       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
6501       gimple *use_stmt;
6502       use_operand_p use;
6503       bool ok = single_imm_use (var, &use, &use_stmt);
6504       gcc_assert (ok
6505 		  && is_gimple_assign (use_stmt)
6506 		  && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
6507 		  && gimple_bb (def_stmt) == gimple_bb (use_stmt));
6508 
6509       basic_block cond_bb = gimple_bb (def_stmt);
6510       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
6511       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
6512 
6513       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6514       gimple *g = gimple_build_cond (NE_EXPR, var,
6515 				     build_zero_cst (TREE_TYPE (var)),
6516 				     NULL_TREE, NULL_TREE);
6517       location_t loc = gimple_location (use_stmt);
6518       gimple_set_location (g, loc);
6519       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
6520 
6521       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
6522       etrue->probability = profile_probability::even ();
6523       edge efalse = find_edge (cond_bb, then_bb);
6524       efalse->flags = EDGE_FALSE_VALUE;
6525       efalse->probability -= etrue->probability;
6526       then_bb->count -= etrue->count ();
6527 
6528       tree othervar = NULL_TREE;
6529       if (gimple_assign_rhs1 (use_stmt) == var)
6530 	othervar = gimple_assign_rhs2 (use_stmt);
6531       else if (gimple_assign_rhs2 (use_stmt) == var)
6532 	othervar = gimple_assign_rhs1 (use_stmt);
6533       else
6534 	gcc_unreachable ();
6535       tree lhs = gimple_assign_lhs (use_stmt);
6536       gphi *phi = create_phi_node (lhs, merge_bb);
6537       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
6538       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
6539       gsi = gsi_for_stmt (use_stmt);
6540       gsi_remove (&gsi, true);
6541 
6542       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
6543       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
6544     }
6545   reassoc_branch_fixups.release ();
6546 }
6547 
6548 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
6549 void debug_ops_vector (vec<operand_entry *> ops);
6550 
6551 /* Dump the operand entry vector OPS to FILE.  */
6552 
6553 void
dump_ops_vector(FILE * file,vec<operand_entry * > ops)6554 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
6555 {
6556   operand_entry *oe;
6557   unsigned int i;
6558 
6559   FOR_EACH_VEC_ELT (ops, i, oe)
6560     {
6561       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
6562       print_generic_expr (file, oe->op);
6563       fprintf (file, "\n");
6564     }
6565 }
6566 
6567 /* Dump the operand entry vector OPS to STDERR.  */
6568 
6569 DEBUG_FUNCTION void
debug_ops_vector(vec<operand_entry * > ops)6570 debug_ops_vector (vec<operand_entry *> ops)
6571 {
6572   dump_ops_vector (stderr, ops);
6573 }
6574 
6575 /* Bubble up return status from reassociate_bb.  */
6576 
6577 static bool
do_reassoc(void)6578 do_reassoc (void)
6579 {
6580   break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6581   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
6582 }
6583 
6584 /* Initialize the reassociation pass.  */
6585 
6586 static void
init_reassoc(void)6587 init_reassoc (void)
6588 {
6589   int i;
6590   int64_t rank = 2;
6591   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
6592 
6593   /* Find the loops, so that we can prevent moving calculations in
6594      them.  */
6595   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
6596 
6597   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
6598 
6599   next_operand_entry_id = 0;
6600 
6601   /* Reverse RPO (Reverse Post Order) will give us something where
6602      deeper loops come later.  */
6603   pre_and_rev_post_order_compute (NULL, bbs, false);
6604   bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
6605   operand_rank = new hash_map<tree, int64_t>;
6606 
6607   /* Give each default definition a distinct rank.  This includes
6608      parameters and the static chain.  Walk backwards over all
6609      SSA names so that we get proper rank ordering according
6610      to tree_swap_operands_p.  */
6611   for (i = num_ssa_names - 1; i > 0; --i)
6612     {
6613       tree name = ssa_name (i);
6614       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
6615 	insert_operand_rank (name, ++rank);
6616     }
6617 
6618   /* Set up rank for each BB  */
6619   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
6620     bb_rank[bbs[i]] = ++rank << 16;
6621 
6622   free (bbs);
6623   calculate_dominance_info (CDI_POST_DOMINATORS);
6624   plus_negates = vNULL;
6625 }
6626 
6627 /* Cleanup after the reassociation pass, and print stats if
6628    requested.  */
6629 
6630 static void
fini_reassoc(void)6631 fini_reassoc (void)
6632 {
6633   statistics_counter_event (cfun, "Linearized",
6634 			    reassociate_stats.linearized);
6635   statistics_counter_event (cfun, "Constants eliminated",
6636 			    reassociate_stats.constants_eliminated);
6637   statistics_counter_event (cfun, "Ops eliminated",
6638 			    reassociate_stats.ops_eliminated);
6639   statistics_counter_event (cfun, "Statements rewritten",
6640 			    reassociate_stats.rewritten);
6641   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
6642 			    reassociate_stats.pows_encountered);
6643   statistics_counter_event (cfun, "Built-in powi calls created",
6644 			    reassociate_stats.pows_created);
6645 
6646   delete operand_rank;
6647   operand_entry_pool.release ();
6648   free (bb_rank);
6649   plus_negates.release ();
6650   free_dominance_info (CDI_POST_DOMINATORS);
6651   loop_optimizer_finalize ();
6652 }
6653 
6654 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
6655    insertion of __builtin_powi calls.
6656 
6657    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6658    optimization of a gimple conditional.  Otherwise returns zero.  */
6659 
6660 static unsigned int
execute_reassoc(bool insert_powi_p)6661 execute_reassoc (bool insert_powi_p)
6662 {
6663   reassoc_insert_powi_p = insert_powi_p;
6664 
6665   init_reassoc ();
6666 
6667   bool cfg_cleanup_needed;
6668   cfg_cleanup_needed = do_reassoc ();
6669   repropagate_negates ();
6670   branch_fixup ();
6671 
6672   fini_reassoc ();
6673   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
6674 }
6675 
6676 namespace {
6677 
6678 const pass_data pass_data_reassoc =
6679 {
6680   GIMPLE_PASS, /* type */
6681   "reassoc", /* name */
6682   OPTGROUP_NONE, /* optinfo_flags */
6683   TV_TREE_REASSOC, /* tv_id */
6684   ( PROP_cfg | PROP_ssa ), /* properties_required */
6685   0, /* properties_provided */
6686   0, /* properties_destroyed */
6687   0, /* todo_flags_start */
6688   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
6689 };
6690 
6691 class pass_reassoc : public gimple_opt_pass
6692 {
6693 public:
pass_reassoc(gcc::context * ctxt)6694   pass_reassoc (gcc::context *ctxt)
6695     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
6696   {}
6697 
6698   /* opt_pass methods: */
clone()6699   opt_pass * clone () { return new pass_reassoc (m_ctxt); }
set_pass_param(unsigned int n,bool param)6700   void set_pass_param (unsigned int n, bool param)
6701     {
6702       gcc_assert (n == 0);
6703       insert_powi_p = param;
6704     }
gate(function *)6705   virtual bool gate (function *) { return flag_tree_reassoc != 0; }
execute(function *)6706   virtual unsigned int execute (function *)
6707     { return execute_reassoc (insert_powi_p); }
6708 
6709  private:
6710   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
6711      point 3a in the pass header comment.  */
6712   bool insert_powi_p;
6713 }; // class pass_reassoc
6714 
6715 } // anon namespace
6716 
6717 gimple_opt_pass *
make_pass_reassoc(gcc::context * ctxt)6718 make_pass_reassoc (gcc::context *ctxt)
6719 {
6720   return new pass_reassoc (ctxt);
6721 }
6722