1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2006-2013 Free Software Foundation, Inc.
3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-flow.h"
31 #include "cfgloop.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "params.h"
35 #include "tree-data-ref.h"
36 #include "tree-vectorizer.h"
37 #include "recog.h"		/* FIXME: for insn_data */
38 #include "diagnostic-core.h"
39 #include "dumpfile.h"
40 
41 /* Pattern recognition functions  */
42 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
43 					    tree *);
44 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
45 					     tree *);
46 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
47 					   tree *);
48 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
49 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
50                                                  tree *);
51 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
52 	                                tree *, tree *);
53 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
54 						      tree *, tree *);
55 static gimple vect_recog_divmod_pattern (vec<gimple> *,
56 					 tree *, tree *);
57 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
58 						  tree *, tree *);
59 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
60 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
61 	vect_recog_widen_mult_pattern,
62 	vect_recog_widen_sum_pattern,
63 	vect_recog_dot_prod_pattern,
64 	vect_recog_pow_pattern,
65 	vect_recog_widen_shift_pattern,
66 	vect_recog_over_widening_pattern,
67 	vect_recog_vector_vector_shift_pattern,
68 	vect_recog_divmod_pattern,
69 	vect_recog_mixed_size_cond_pattern,
70 	vect_recog_bool_pattern};
71 
72 static inline void
append_pattern_def_seq(stmt_vec_info stmt_info,gimple stmt)73 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
74 {
75   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
76 				      stmt);
77 }
78 
79 static inline void
new_pattern_def_seq(stmt_vec_info stmt_info,gimple stmt)80 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
81 {
82   STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
83   append_pattern_def_seq (stmt_info, stmt);
84 }
85 
86 /* Check whether STMT2 is in the same loop or basic block as STMT1.
87    Which of the two applies depends on whether we're currently doing
88    loop-based or basic-block-based vectorization, as determined by
89    the vinfo_for_stmt for STMT1 (which must be defined).
90 
91    If this returns true, vinfo_for_stmt for STMT2 is guaranteed
92    to be defined as well.  */
93 
94 static bool
vect_same_loop_or_bb_p(gimple stmt1,gimple stmt2)95 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
96 {
97   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
98   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
99   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
100 
101   if (!gimple_bb (stmt2))
102     return false;
103 
104   if (loop_vinfo)
105     {
106       struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
107       if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
108 	return false;
109     }
110   else
111     {
112       if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
113 	  || gimple_code (stmt2) == GIMPLE_PHI)
114 	return false;
115     }
116 
117   gcc_assert (vinfo_for_stmt (stmt2));
118   return true;
119 }
120 
121 /* If the LHS of DEF_STMT has a single use, and that statement is
122    in the same loop or basic block, return it.  */
123 
124 static gimple
vect_single_imm_use(gimple def_stmt)125 vect_single_imm_use (gimple def_stmt)
126 {
127   tree lhs = gimple_assign_lhs (def_stmt);
128   use_operand_p use_p;
129   gimple use_stmt;
130 
131   if (!single_imm_use (lhs, &use_p, &use_stmt))
132     return NULL;
133 
134   if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
135     return NULL;
136 
137   return use_stmt;
138 }
139 
140 /* Check whether NAME, an ssa-name used in USE_STMT,
141    is a result of a type promotion or demotion, such that:
142      DEF_STMT: NAME = NOP (name0)
143    where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
144    If CHECK_SIGN is TRUE, check that either both types are signed or both are
145    unsigned.  */
146 
147 static bool
type_conversion_p(tree name,gimple use_stmt,bool check_sign,tree * orig_type,gimple * def_stmt,bool * promotion)148 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
149 		   tree *orig_type, gimple *def_stmt, bool *promotion)
150 {
151   tree dummy;
152   gimple dummy_gimple;
153   loop_vec_info loop_vinfo;
154   stmt_vec_info stmt_vinfo;
155   tree type = TREE_TYPE (name);
156   tree oprnd0;
157   enum vect_def_type dt;
158   tree def;
159   bb_vec_info bb_vinfo;
160 
161   stmt_vinfo = vinfo_for_stmt (use_stmt);
162   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
163   bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
164   if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
165 			   &def, &dt))
166     return false;
167 
168   if (dt != vect_internal_def
169       && dt != vect_external_def && dt != vect_constant_def)
170     return false;
171 
172   if (!*def_stmt)
173     return false;
174 
175   if (!is_gimple_assign (*def_stmt))
176     return false;
177 
178   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
179     return false;
180 
181   oprnd0 = gimple_assign_rhs1 (*def_stmt);
182 
183   *orig_type = TREE_TYPE (oprnd0);
184   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
185       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
186     return false;
187 
188   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
189     *promotion = true;
190   else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
191     *promotion = false;
192   else
193     return false;
194 
195   if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
196 			   bb_vinfo, &dummy_gimple, &dummy, &dt))
197     return false;
198 
199   return true;
200 }
201 
202 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
203    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
204 
205 static tree
vect_recog_temp_ssa_var(tree type,gimple stmt)206 vect_recog_temp_ssa_var (tree type, gimple stmt)
207 {
208   return make_temp_ssa_name (type, stmt, "patt");
209 }
210 
211 /* Function vect_recog_dot_prod_pattern
212 
213    Try to find the following pattern:
214 
215      type x_t, y_t;
216      TYPE1 prod;
217      TYPE2 sum = init;
218    loop:
219      sum_0 = phi <init, sum_1>
220      S1  x_t = ...
221      S2  y_t = ...
222      S3  x_T = (TYPE1) x_t;
223      S4  y_T = (TYPE1) y_t;
224      S5  prod = x_T * y_T;
225      [S6  prod = (TYPE2) prod;  #optional]
226      S7  sum_1 = prod + sum_0;
227 
228    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
229    same size of 'TYPE1' or bigger. This is a special case of a reduction
230    computation.
231 
232    Input:
233 
234    * STMTS: Contains a stmt from which the pattern search begins.  In the
235    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
236    will be detected.
237 
238    Output:
239 
240    * TYPE_IN: The type of the input arguments to the pattern.
241 
242    * TYPE_OUT: The type of the output  of this pattern.
243 
244    * Return value: A new stmt that will be used to replace the sequence of
245    stmts that constitute the pattern. In this case it will be:
246         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
247 
248    Note: The dot-prod idiom is a widening reduction pattern that is
249          vectorized without preserving all the intermediate results. It
250          produces only N/2 (widened) results (by summing up pairs of
251          intermediate results) rather than all N results.  Therefore, we
252          cannot allow this pattern when we want to get all the results and in
253          the correct order (as is the case when this computation is in an
254          inner-loop nested in an outer-loop that us being vectorized).  */
255 
256 static gimple
vect_recog_dot_prod_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)257 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
258 			     tree *type_out)
259 {
260   gimple stmt, last_stmt = (*stmts)[0];
261   tree oprnd0, oprnd1;
262   tree oprnd00, oprnd01;
263   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
264   tree type, half_type;
265   gimple pattern_stmt;
266   tree prod_type;
267   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
268   struct loop *loop;
269   tree var;
270   bool promotion;
271 
272   if (!loop_info)
273     return NULL;
274 
275   loop = LOOP_VINFO_LOOP (loop_info);
276 
277   if (!is_gimple_assign (last_stmt))
278     return NULL;
279 
280   type = gimple_expr_type (last_stmt);
281 
282   /* Look for the following pattern
283           DX = (TYPE1) X;
284           DY = (TYPE1) Y;
285           DPROD = DX * DY;
286           DDPROD = (TYPE2) DPROD;
287           sum_1 = DDPROD + sum_0;
288      In which
289      - DX is double the size of X
290      - DY is double the size of Y
291      - DX, DY, DPROD all have the same type
292      - sum is the same size of DPROD or bigger
293      - sum has been recognized as a reduction variable.
294 
295      This is equivalent to:
296        DPROD = X w* Y;          #widen mult
297        sum_1 = DPROD w+ sum_0;  #widen summation
298      or
299        DPROD = X w* Y;          #widen mult
300        sum_1 = DPROD + sum_0;   #summation
301    */
302 
303   /* Starting from LAST_STMT, follow the defs of its uses in search
304      of the above pattern.  */
305 
306   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
307     return NULL;
308 
309   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
310     {
311       /* Has been detected as widening-summation?  */
312 
313       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
314       type = gimple_expr_type (stmt);
315       if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
316         return NULL;
317       oprnd0 = gimple_assign_rhs1 (stmt);
318       oprnd1 = gimple_assign_rhs2 (stmt);
319       half_type = TREE_TYPE (oprnd0);
320     }
321   else
322     {
323       gimple def_stmt;
324 
325       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
326         return NULL;
327       oprnd0 = gimple_assign_rhs1 (last_stmt);
328       oprnd1 = gimple_assign_rhs2 (last_stmt);
329       if (!types_compatible_p (TREE_TYPE (oprnd0), type)
330 	  || !types_compatible_p (TREE_TYPE (oprnd1), type))
331         return NULL;
332       stmt = last_stmt;
333 
334       if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
335                                &promotion)
336          && promotion)
337         {
338           stmt = def_stmt;
339           oprnd0 = gimple_assign_rhs1 (stmt);
340         }
341       else
342         half_type = type;
343     }
344 
345   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
346      we know that oprnd1 is the reduction variable (defined by a loop-header
347      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
348      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
349   if (TREE_CODE (oprnd0) != SSA_NAME)
350     return NULL;
351 
352   prod_type = half_type;
353   stmt = SSA_NAME_DEF_STMT (oprnd0);
354 
355   /* It could not be the dot_prod pattern if the stmt is outside the loop.  */
356   if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
357     return NULL;
358 
359   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
360      inside the loop (in case we are analyzing an outer-loop).  */
361   if (!is_gimple_assign (stmt))
362     return NULL;
363   stmt_vinfo = vinfo_for_stmt (stmt);
364   gcc_assert (stmt_vinfo);
365   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
366     return NULL;
367   if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
368     return NULL;
369   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
370     {
371       /* Has been detected as a widening multiplication?  */
372 
373       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
374       if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
375         return NULL;
376       stmt_vinfo = vinfo_for_stmt (stmt);
377       gcc_assert (stmt_vinfo);
378       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
379       oprnd00 = gimple_assign_rhs1 (stmt);
380       oprnd01 = gimple_assign_rhs2 (stmt);
381     }
382   else
383     {
384       tree half_type0, half_type1;
385       gimple def_stmt;
386       tree oprnd0, oprnd1;
387 
388       oprnd0 = gimple_assign_rhs1 (stmt);
389       oprnd1 = gimple_assign_rhs2 (stmt);
390       if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
391           || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
392         return NULL;
393       if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
394                                 &promotion)
395           || !promotion)
396         return NULL;
397       oprnd00 = gimple_assign_rhs1 (def_stmt);
398       if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
399                                 &promotion)
400           || !promotion)
401         return NULL;
402       oprnd01 = gimple_assign_rhs1 (def_stmt);
403       if (!types_compatible_p (half_type0, half_type1))
404         return NULL;
405       if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
406 	return NULL;
407     }
408 
409   half_type = TREE_TYPE (oprnd00);
410   *type_in = half_type;
411   *type_out = type;
412 
413   /* Pattern detected. Create a stmt to be used to replace the pattern: */
414   var = vect_recog_temp_ssa_var (type, NULL);
415   pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
416 					       oprnd00, oprnd01, oprnd1);
417 
418   if (dump_enabled_p ())
419     {
420       dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
421                        "vect_recog_dot_prod_pattern: detected: ");
422       dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
423     }
424 
425   /* We don't allow changing the order of the computation in the inner-loop
426      when doing outer-loop vectorization.  */
427   gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
428 
429   return pattern_stmt;
430 }
431 
432 
433 /* Handle widening operation by a constant.  At the moment we support MULT_EXPR
434    and LSHIFT_EXPR.
435 
436    For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
437    we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
438 
439    Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
440    HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
441    that satisfies the above restrictions,  we can perform a widening opeartion
442    from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
443    with a_it = (interm_type) a_t;  */
444 
445 static bool
vect_handle_widen_op_by_const(gimple stmt,enum tree_code code,tree const_oprnd,tree * oprnd,vec<gimple> * stmts,tree type,tree * half_type,gimple def_stmt)446 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
447 		               tree const_oprnd, tree *oprnd,
448    		               vec<gimple> *stmts, tree type,
449 			       tree *half_type, gimple def_stmt)
450 {
451   tree new_type, new_oprnd;
452   gimple new_stmt;
453 
454   if (code != MULT_EXPR && code != LSHIFT_EXPR)
455     return false;
456 
457   if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
458         || (code == LSHIFT_EXPR
459             && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
460 	    	!= 1))
461       && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
462     {
463       /* CONST_OPRND is a constant of HALF_TYPE.  */
464       *oprnd = gimple_assign_rhs1 (def_stmt);
465       return true;
466     }
467 
468   if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
469     return false;
470 
471   if (!vect_same_loop_or_bb_p (stmt, def_stmt))
472     return false;
473 
474   /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
475      a type 2 times bigger than HALF_TYPE.  */
476   new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
477                                              TYPE_UNSIGNED (type));
478   if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
479       || (code == LSHIFT_EXPR
480           && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
481     return false;
482 
483   /* Use NEW_TYPE for widening operation.  */
484   if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
485     {
486       new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
487       /* Check if the already created pattern stmt is what we need.  */
488       if (!is_gimple_assign (new_stmt)
489           || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
490           || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
491         return false;
492 
493       stmts->safe_push (def_stmt);
494       *oprnd = gimple_assign_lhs (new_stmt);
495     }
496   else
497     {
498       /* Create a_T = (NEW_TYPE) a_t;  */
499       *oprnd = gimple_assign_rhs1 (def_stmt);
500       new_oprnd = make_ssa_name (new_type, NULL);
501       new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
502 					       NULL_TREE);
503       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
504       stmts->safe_push (def_stmt);
505       *oprnd = new_oprnd;
506     }
507 
508   *half_type = new_type;
509   return true;
510 }
511 
512 
513 /* Function vect_recog_widen_mult_pattern
514 
515    Try to find the following pattern:
516 
517      type a_t, b_t;
518      TYPE a_T, b_T, prod_T;
519 
520      S1  a_t = ;
521      S2  b_t = ;
522      S3  a_T = (TYPE) a_t;
523      S4  b_T = (TYPE) b_t;
524      S5  prod_T = a_T * b_T;
525 
526    where type 'TYPE' is at least double the size of type 'type'.
527 
528    Also detect unsigned cases:
529 
530      unsigned type a_t, b_t;
531      unsigned TYPE u_prod_T;
532      TYPE a_T, b_T, prod_T;
533 
534      S1  a_t = ;
535      S2  b_t = ;
536      S3  a_T = (TYPE) a_t;
537      S4  b_T = (TYPE) b_t;
538      S5  prod_T = a_T * b_T;
539      S6  u_prod_T = (unsigned TYPE) prod_T;
540 
541    and multiplication by constants:
542 
543      type a_t;
544      TYPE a_T, prod_T;
545 
546      S1  a_t = ;
547      S3  a_T = (TYPE) a_t;
548      S5  prod_T = a_T * CONST;
549 
550    A special case of multiplication by constants is when 'TYPE' is 4 times
551    bigger than 'type', but CONST fits an intermediate type 2 times smaller
552    than 'TYPE'.  In that case we create an additional pattern stmt for S3
553    to create a variable of the intermediate type, and perform widen-mult
554    on the intermediate type as well:
555 
556      type a_t;
557      interm_type a_it;
558      TYPE a_T, prod_T,  prod_T';
559 
560      S1  a_t = ;
561      S3  a_T = (TYPE) a_t;
562            '--> a_it = (interm_type) a_t;
563      S5  prod_T = a_T * CONST;
564            '--> prod_T' = a_it w* CONST;
565 
566    Input/Output:
567 
568    * STMTS: Contains a stmt from which the pattern search begins.  In the
569    example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
570    is detected.  In case of unsigned widen-mult, the original stmt (S5) is
571    replaced with S6 in STMTS.  In case of multiplication by a constant
572    of an intermediate type (the last case above), STMTS also contains S3
573    (inserted before S5).
574 
575    Output:
576 
577    * TYPE_IN: The type of the input arguments to the pattern.
578 
579    * TYPE_OUT: The type of the output of this pattern.
580 
581    * Return value: A new stmt that will be used to replace the sequence of
582    stmts that constitute the pattern.  In this case it will be:
583         WIDEN_MULT <a_t, b_t>
584 */
585 
586 static gimple
vect_recog_widen_mult_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)587 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
588                                tree *type_in, tree *type_out)
589 {
590   gimple last_stmt = stmts->pop ();
591   gimple def_stmt0, def_stmt1;
592   tree oprnd0, oprnd1;
593   tree type, half_type0, half_type1;
594   gimple pattern_stmt;
595   tree vectype, vectype_out = NULL_TREE;
596   tree var;
597   enum tree_code dummy_code;
598   int dummy_int;
599   vec<tree> dummy_vec;
600   bool op1_ok;
601   bool promotion;
602 
603   if (!is_gimple_assign (last_stmt))
604     return NULL;
605 
606   type = gimple_expr_type (last_stmt);
607 
608   /* Starting from LAST_STMT, follow the defs of its uses in search
609      of the above pattern.  */
610 
611   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
612     return NULL;
613 
614   oprnd0 = gimple_assign_rhs1 (last_stmt);
615   oprnd1 = gimple_assign_rhs2 (last_stmt);
616   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
617       || !types_compatible_p (TREE_TYPE (oprnd1), type))
618     return NULL;
619 
620   /* Check argument 0.  */
621   if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
622                          &promotion)
623       || !promotion)
624      return NULL;
625   /* Check argument 1.  */
626   op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
627                               &def_stmt1, &promotion);
628 
629   if (op1_ok && promotion)
630     {
631       oprnd0 = gimple_assign_rhs1 (def_stmt0);
632       oprnd1 = gimple_assign_rhs1 (def_stmt1);
633     }
634   else
635     {
636       if (TREE_CODE (oprnd1) == INTEGER_CST
637           && TREE_CODE (half_type0) == INTEGER_TYPE
638           && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
639 		                            &oprnd0, stmts, type,
640 					    &half_type0, def_stmt0))
641 	{
642 	  half_type1 = half_type0;
643 	  oprnd1 = fold_convert (half_type1, oprnd1);
644 	}
645       else
646         return NULL;
647     }
648 
649   /* Handle unsigned case.  Look for
650      S6  u_prod_T = (unsigned TYPE) prod_T;
651      Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
652   if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
653     {
654       gimple use_stmt;
655       tree use_lhs;
656       tree use_type;
657 
658       if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
659         return NULL;
660 
661       use_stmt = vect_single_imm_use (last_stmt);
662       if (!use_stmt || !is_gimple_assign (use_stmt)
663 	  || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
664         return NULL;
665 
666       use_lhs = gimple_assign_lhs (use_stmt);
667       use_type = TREE_TYPE (use_lhs);
668       if (!INTEGRAL_TYPE_P (use_type)
669           || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
670           || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
671         return NULL;
672 
673       type = use_type;
674       last_stmt = use_stmt;
675     }
676 
677   if (!types_compatible_p (half_type0, half_type1))
678     return NULL;
679 
680   /* Pattern detected.  */
681   if (dump_enabled_p ())
682     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
683                      "vect_recog_widen_mult_pattern: detected: ");
684 
685   /* Check target support  */
686   vectype = get_vectype_for_scalar_type (half_type0);
687   vectype_out = get_vectype_for_scalar_type (type);
688   if (!vectype
689       || !vectype_out
690       || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
691 					  vectype_out, vectype,
692 					  &dummy_code, &dummy_code,
693 					  &dummy_int, &dummy_vec))
694     return NULL;
695 
696   *type_in = vectype;
697   *type_out = vectype_out;
698 
699   /* Pattern supported. Create a stmt to be used to replace the pattern: */
700   var = vect_recog_temp_ssa_var (type, NULL);
701   pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
702 					       oprnd1);
703 
704   if (dump_enabled_p ())
705     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
706 
707   stmts->safe_push (last_stmt);
708   return pattern_stmt;
709 }
710 
711 
712 /* Function vect_recog_pow_pattern
713 
714    Try to find the following pattern:
715 
716      x = POW (y, N);
717 
718    with POW being one of pow, powf, powi, powif and N being
719    either 2 or 0.5.
720 
721    Input:
722 
723    * LAST_STMT: A stmt from which the pattern search begins.
724 
725    Output:
726 
727    * TYPE_IN: The type of the input arguments to the pattern.
728 
729    * TYPE_OUT: The type of the output of this pattern.
730 
731    * Return value: A new stmt that will be used to replace the sequence of
732    stmts that constitute the pattern. In this case it will be:
733         x = x * x
734    or
735 	x = sqrt (x)
736 */
737 
738 static gimple
vect_recog_pow_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)739 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
740 			tree *type_out)
741 {
742   gimple last_stmt = (*stmts)[0];
743   tree fn, base, exp = NULL;
744   gimple stmt;
745   tree var;
746 
747   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
748     return NULL;
749 
750   fn = gimple_call_fndecl (last_stmt);
751   if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
752    return NULL;
753 
754   switch (DECL_FUNCTION_CODE (fn))
755     {
756     case BUILT_IN_POWIF:
757     case BUILT_IN_POWI:
758     case BUILT_IN_POWF:
759     case BUILT_IN_POW:
760       base = gimple_call_arg (last_stmt, 0);
761       exp = gimple_call_arg (last_stmt, 1);
762       if (TREE_CODE (exp) != REAL_CST
763 	  && TREE_CODE (exp) != INTEGER_CST)
764         return NULL;
765       break;
766 
767     default:
768       return NULL;
769     }
770 
771   /* We now have a pow or powi builtin function call with a constant
772      exponent.  */
773 
774   *type_out = NULL_TREE;
775 
776   /* Catch squaring.  */
777   if ((host_integerp (exp, 0)
778        && tree_low_cst (exp, 0) == 2)
779       || (TREE_CODE (exp) == REAL_CST
780           && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
781     {
782       *type_in = TREE_TYPE (base);
783 
784       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
785       stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
786       return stmt;
787     }
788 
789   /* Catch square root.  */
790   if (TREE_CODE (exp) == REAL_CST
791       && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
792     {
793       tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
794       *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
795       if (*type_in)
796 	{
797 	  gimple stmt = gimple_build_call (newfn, 1, base);
798 	  if (vectorizable_function (stmt, *type_in, *type_in)
799 	      != NULL_TREE)
800 	    {
801 	      var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
802 	      gimple_call_set_lhs (stmt, var);
803 	      return stmt;
804 	    }
805 	}
806     }
807 
808   return NULL;
809 }
810 
811 
812 /* Function vect_recog_widen_sum_pattern
813 
814    Try to find the following pattern:
815 
816      type x_t;
817      TYPE x_T, sum = init;
818    loop:
819      sum_0 = phi <init, sum_1>
820      S1  x_t = *p;
821      S2  x_T = (TYPE) x_t;
822      S3  sum_1 = x_T + sum_0;
823 
824    where type 'TYPE' is at least double the size of type 'type', i.e - we're
825    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
826    a special case of a reduction computation.
827 
828    Input:
829 
830    * LAST_STMT: A stmt from which the pattern search begins. In the example,
831    when this function is called with S3, the pattern {S2,S3} will be detected.
832 
833    Output:
834 
835    * TYPE_IN: The type of the input arguments to the pattern.
836 
837    * TYPE_OUT: The type of the output of this pattern.
838 
839    * Return value: A new stmt that will be used to replace the sequence of
840    stmts that constitute the pattern. In this case it will be:
841         WIDEN_SUM <x_t, sum_0>
842 
843    Note: The widening-sum idiom is a widening reduction pattern that is
844 	 vectorized without preserving all the intermediate results. It
845          produces only N/2 (widened) results (by summing up pairs of
846 	 intermediate results) rather than all N results.  Therefore, we
847 	 cannot allow this pattern when we want to get all the results and in
848 	 the correct order (as is the case when this computation is in an
849 	 inner-loop nested in an outer-loop that us being vectorized).  */
850 
851 static gimple
vect_recog_widen_sum_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)852 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
853 			      tree *type_out)
854 {
855   gimple stmt, last_stmt = (*stmts)[0];
856   tree oprnd0, oprnd1;
857   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
858   tree type, half_type;
859   gimple pattern_stmt;
860   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
861   struct loop *loop;
862   tree var;
863   bool promotion;
864 
865   if (!loop_info)
866     return NULL;
867 
868   loop = LOOP_VINFO_LOOP (loop_info);
869 
870   if (!is_gimple_assign (last_stmt))
871     return NULL;
872 
873   type = gimple_expr_type (last_stmt);
874 
875   /* Look for the following pattern
876           DX = (TYPE) X;
877           sum_1 = DX + sum_0;
878      In which DX is at least double the size of X, and sum_1 has been
879      recognized as a reduction variable.
880    */
881 
882   /* Starting from LAST_STMT, follow the defs of its uses in search
883      of the above pattern.  */
884 
885   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
886     return NULL;
887 
888   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
889     return NULL;
890 
891   oprnd0 = gimple_assign_rhs1 (last_stmt);
892   oprnd1 = gimple_assign_rhs2 (last_stmt);
893   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
894       || !types_compatible_p (TREE_TYPE (oprnd1), type))
895     return NULL;
896 
897   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
898      we know that oprnd1 is the reduction variable (defined by a loop-header
899      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
900      Left to check that oprnd0 is defined by a cast from type 'type' to type
901      'TYPE'.  */
902 
903   if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
904                           &promotion)
905       || !promotion)
906      return NULL;
907 
908   oprnd0 = gimple_assign_rhs1 (stmt);
909   *type_in = half_type;
910   *type_out = type;
911 
912   /* Pattern detected. Create a stmt to be used to replace the pattern: */
913   var = vect_recog_temp_ssa_var (type, NULL);
914   pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
915 					       oprnd0, oprnd1);
916 
917   if (dump_enabled_p ())
918     {
919       dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
920                        "vect_recog_widen_sum_pattern: detected: ");
921       dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
922     }
923 
924   /* We don't allow changing the order of the computation in the inner-loop
925      when doing outer-loop vectorization.  */
926   gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
927 
928   return pattern_stmt;
929 }
930 
931 
932 /* Return TRUE if the operation in STMT can be performed on a smaller type.
933 
934    Input:
935    STMT - a statement to check.
936    DEF - we support operations with two operands, one of which is constant.
937          The other operand can be defined by a demotion operation, or by a
938          previous statement in a sequence of over-promoted operations.  In the
939          later case DEF is used to replace that operand.  (It is defined by a
940          pattern statement we created for the previous statement in the
941          sequence).
942 
943    Input/output:
944    NEW_TYPE - Output: a smaller type that we are trying to use.  Input: if not
945          NULL, it's the type of DEF.
946    STMTS - additional pattern statements.  If a pattern statement (type
947          conversion) is created in this function, its original statement is
948          added to STMTS.
949 
950    Output:
951    OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
952          operands to use in the new pattern statement for STMT (will be created
953          in vect_recog_over_widening_pattern ()).
954    NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
955          statements for STMT: the first one is a type promotion and the second
956          one is the operation itself.  We return the type promotion statement
957 	 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
958          the second pattern statement.  */
959 
960 static bool
vect_operation_fits_smaller_type(gimple stmt,tree def,tree * new_type,tree * op0,tree * op1,gimple * new_def_stmt,vec<gimple> * stmts)961 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
962                                   tree *op0, tree *op1, gimple *new_def_stmt,
963                                   vec<gimple> *stmts)
964 {
965   enum tree_code code;
966   tree const_oprnd, oprnd;
967   tree interm_type = NULL_TREE, half_type, new_oprnd, type;
968   gimple def_stmt, new_stmt;
969   bool first = false;
970   bool promotion;
971 
972   *op0 = NULL_TREE;
973   *op1 = NULL_TREE;
974   *new_def_stmt = NULL;
975 
976   if (!is_gimple_assign (stmt))
977     return false;
978 
979   code = gimple_assign_rhs_code (stmt);
980   if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
981       && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
982     return false;
983 
984   oprnd = gimple_assign_rhs1 (stmt);
985   const_oprnd = gimple_assign_rhs2 (stmt);
986   type = gimple_expr_type (stmt);
987 
988   if (TREE_CODE (oprnd) != SSA_NAME
989       || TREE_CODE (const_oprnd) != INTEGER_CST)
990     return false;
991 
992   /* If oprnd has other uses besides that in stmt we cannot mark it
993      as being part of a pattern only.  */
994   if (!has_single_use (oprnd))
995     return false;
996 
997   /* If we are in the middle of a sequence, we use DEF from a previous
998      statement.  Otherwise, OPRND has to be a result of type promotion.  */
999   if (*new_type)
1000     {
1001       half_type = *new_type;
1002       oprnd = def;
1003     }
1004   else
1005     {
1006       first = true;
1007       if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1008 			      &promotion)
1009 	  || !promotion
1010 	  || !vect_same_loop_or_bb_p (stmt, def_stmt))
1011         return false;
1012     }
1013 
1014   /* Can we perform the operation on a smaller type?  */
1015   switch (code)
1016     {
1017       case BIT_IOR_EXPR:
1018       case BIT_XOR_EXPR:
1019       case BIT_AND_EXPR:
1020         if (!int_fits_type_p (const_oprnd, half_type))
1021           {
1022             /* HALF_TYPE is not enough.  Try a bigger type if possible.  */
1023             if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1024               return false;
1025 
1026             interm_type = build_nonstandard_integer_type (
1027                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1028             if (!int_fits_type_p (const_oprnd, interm_type))
1029               return false;
1030           }
1031 
1032         break;
1033 
1034       case LSHIFT_EXPR:
1035         /* Try intermediate type - HALF_TYPE is not enough for sure.  */
1036         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1037           return false;
1038 
1039         /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1040           (e.g., if the original value was char, the shift amount is at most 8
1041            if we want to use short).  */
1042         if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1043           return false;
1044 
1045         interm_type = build_nonstandard_integer_type (
1046                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1047 
1048         if (!vect_supportable_shift (code, interm_type))
1049           return false;
1050 
1051         break;
1052 
1053       case RSHIFT_EXPR:
1054         if (vect_supportable_shift (code, half_type))
1055           break;
1056 
1057         /* Try intermediate type - HALF_TYPE is not supported.  */
1058         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1059           return false;
1060 
1061         interm_type = build_nonstandard_integer_type (
1062                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1063 
1064         if (!vect_supportable_shift (code, interm_type))
1065           return false;
1066 
1067         break;
1068 
1069       default:
1070         gcc_unreachable ();
1071     }
1072 
1073   /* There are four possible cases:
1074      1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1075         the first statement in the sequence)
1076         a. The original, HALF_TYPE, is not enough - we replace the promotion
1077            from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1078         b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1079            promotion.
1080      2. OPRND is defined by a pattern statement we created.
1081         a. Its type is not sufficient for the operation, we create a new stmt:
1082            a type conversion for OPRND from HALF_TYPE to INTERM_TYPE.  We store
1083            this statement in NEW_DEF_STMT, and it is later put in
1084 	   STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1085         b. OPRND is good to use in the new statement.  */
1086   if (first)
1087     {
1088       if (interm_type)
1089         {
1090           /* Replace the original type conversion HALF_TYPE->TYPE with
1091              HALF_TYPE->INTERM_TYPE.  */
1092           if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1093             {
1094               new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1095               /* Check if the already created pattern stmt is what we need.  */
1096               if (!is_gimple_assign (new_stmt)
1097                   || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1098                   || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1099                 return false;
1100 
1101 	      stmts->safe_push (def_stmt);
1102               oprnd = gimple_assign_lhs (new_stmt);
1103             }
1104           else
1105             {
1106               /* Create NEW_OPRND = (INTERM_TYPE) OPRND.  */
1107               oprnd = gimple_assign_rhs1 (def_stmt);
1108               new_oprnd = make_ssa_name (interm_type, NULL);
1109               new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1110                                                        oprnd, NULL_TREE);
1111               STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1112               stmts->safe_push (def_stmt);
1113               oprnd = new_oprnd;
1114             }
1115         }
1116       else
1117         {
1118           /* Retrieve the operand before the type promotion.  */
1119           oprnd = gimple_assign_rhs1 (def_stmt);
1120         }
1121     }
1122   else
1123     {
1124       if (interm_type)
1125         {
1126           /* Create a type conversion HALF_TYPE->INTERM_TYPE.  */
1127           new_oprnd = make_ssa_name (interm_type, NULL);
1128           new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1129                                                    oprnd, NULL_TREE);
1130           oprnd = new_oprnd;
1131           *new_def_stmt = new_stmt;
1132         }
1133 
1134       /* Otherwise, OPRND is already set.  */
1135     }
1136 
1137   if (interm_type)
1138     *new_type = interm_type;
1139   else
1140     *new_type = half_type;
1141 
1142   *op0 = oprnd;
1143   *op1 = fold_convert (*new_type, const_oprnd);
1144 
1145   return true;
1146 }
1147 
1148 
1149 /* Try to find a statement or a sequence of statements that can be performed
1150    on a smaller type:
1151 
1152      type x_t;
1153      TYPE x_T, res0_T, res1_T;
1154    loop:
1155      S1  x_t = *p;
1156      S2  x_T = (TYPE) x_t;
1157      S3  res0_T = op (x_T, C0);
1158      S4  res1_T = op (res0_T, C1);
1159      S5  ... = () res1_T;  - type demotion
1160 
1161    where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1162    constants.
1163    Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1164    be 'type' or some intermediate type.  For now, we expect S5 to be a type
1165    demotion operation.  We also check that S3 and S4 have only one use.  */
1166 
1167 static gimple
vect_recog_over_widening_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1168 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1169                                   tree *type_in, tree *type_out)
1170 {
1171   gimple stmt = stmts->pop ();
1172   gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1173   tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1174   tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1175   bool first;
1176   tree type = NULL;
1177 
1178   first = true;
1179   while (1)
1180     {
1181       if (!vinfo_for_stmt (stmt)
1182           || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1183         return NULL;
1184 
1185       new_def_stmt = NULL;
1186       if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1187                                              &op0, &op1, &new_def_stmt,
1188                                              stmts))
1189         {
1190           if (first)
1191             return NULL;
1192           else
1193             break;
1194         }
1195 
1196       /* STMT can be performed on a smaller type.  Check its uses.  */
1197       use_stmt = vect_single_imm_use (stmt);
1198       if (!use_stmt || !is_gimple_assign (use_stmt))
1199         return NULL;
1200 
1201       /* Create pattern statement for STMT.  */
1202       vectype = get_vectype_for_scalar_type (new_type);
1203       if (!vectype)
1204         return NULL;
1205 
1206       /* We want to collect all the statements for which we create pattern
1207          statetments, except for the case when the last statement in the
1208          sequence doesn't have a corresponding pattern statement.  In such
1209          case we associate the last pattern statement with the last statement
1210          in the sequence.  Therefore, we only add the original statement to
1211          the list if we know that it is not the last.  */
1212       if (prev_stmt)
1213         stmts->safe_push (prev_stmt);
1214 
1215       var = vect_recog_temp_ssa_var (new_type, NULL);
1216       pattern_stmt
1217 	= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1218 					op0, op1);
1219       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1220       new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1221 
1222       if (dump_enabled_p ())
1223         {
1224           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1225                            "created pattern stmt: ");
1226           dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
1227         }
1228 
1229       type = gimple_expr_type (stmt);
1230       prev_stmt = stmt;
1231       stmt = use_stmt;
1232 
1233       first = false;
1234     }
1235 
1236   /* We got a sequence.  We expect it to end with a type demotion operation.
1237      Otherwise, we quit (for now).  There are three possible cases: the
1238      conversion is to NEW_TYPE (we don't do anything), the conversion is to
1239      a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1240      NEW_TYPE differs (we create a new conversion statement).  */
1241   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1242     {
1243       use_lhs = gimple_assign_lhs (use_stmt);
1244       use_type = TREE_TYPE (use_lhs);
1245       /* Support only type demotion or signedess change.  */
1246       if (!INTEGRAL_TYPE_P (use_type)
1247 	  || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1248         return NULL;
1249 
1250       /* Check that NEW_TYPE is not bigger than the conversion result.  */
1251       if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1252 	return NULL;
1253 
1254       if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1255           || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1256         {
1257           /* Create NEW_TYPE->USE_TYPE conversion.  */
1258           new_oprnd = make_ssa_name (use_type, NULL);
1259           pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1260                                                        var, NULL_TREE);
1261           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1262 
1263           *type_in = get_vectype_for_scalar_type (new_type);
1264           *type_out = get_vectype_for_scalar_type (use_type);
1265 
1266           /* We created a pattern statement for the last statement in the
1267              sequence, so we don't need to associate it with the pattern
1268              statement created for PREV_STMT.  Therefore, we add PREV_STMT
1269              to the list in order to mark it later in vect_pattern_recog_1.  */
1270           if (prev_stmt)
1271             stmts->safe_push (prev_stmt);
1272         }
1273       else
1274         {
1275           if (prev_stmt)
1276 	    STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1277 	       = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1278 
1279           *type_in = vectype;
1280           *type_out = NULL_TREE;
1281         }
1282 
1283       stmts->safe_push (use_stmt);
1284     }
1285   else
1286     /* TODO: support general case, create a conversion to the correct type.  */
1287     return NULL;
1288 
1289   /* Pattern detected.  */
1290   if (dump_enabled_p ())
1291     {
1292       dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1293                        "vect_recog_over_widening_pattern: detected: ");
1294       dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
1295     }
1296 
1297   return pattern_stmt;
1298 }
1299 
1300 /* Detect widening shift pattern:
1301 
1302    type a_t;
1303    TYPE a_T, res_T;
1304 
1305    S1 a_t = ;
1306    S2 a_T = (TYPE) a_t;
1307    S3 res_T = a_T << CONST;
1308 
1309   where type 'TYPE' is at least double the size of type 'type'.
1310 
1311   Also detect cases where the shift result is immediately converted
1312   to another type 'result_type' that is no larger in size than 'TYPE'.
1313   In those cases we perform a widen-shift that directly results in
1314   'result_type', to avoid a possible over-widening situation:
1315 
1316   type a_t;
1317   TYPE a_T, res_T;
1318   result_type res_result;
1319 
1320   S1 a_t = ;
1321   S2 a_T = (TYPE) a_t;
1322   S3 res_T = a_T << CONST;
1323   S4 res_result = (result_type) res_T;
1324       '--> res_result' = a_t w<< CONST;
1325 
1326   And a case when 'TYPE' is 4 times bigger than 'type'.  In that case we
1327   create an additional pattern stmt for S2 to create a variable of an
1328   intermediate type, and perform widen-shift on the intermediate type:
1329 
1330   type a_t;
1331   interm_type a_it;
1332   TYPE a_T, res_T, res_T';
1333 
1334   S1 a_t = ;
1335   S2 a_T = (TYPE) a_t;
1336       '--> a_it = (interm_type) a_t;
1337   S3 res_T = a_T << CONST;
1338       '--> res_T' = a_it <<* CONST;
1339 
1340   Input/Output:
1341 
1342   * STMTS: Contains a stmt from which the pattern search begins.
1343     In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1344     in STMTS.  When an intermediate type is used and a pattern statement is
1345     created for S2, we also put S2 here (before S3).
1346 
1347   Output:
1348 
1349   * TYPE_IN: The type of the input arguments to the pattern.
1350 
1351   * TYPE_OUT: The type of the output of this pattern.
1352 
1353   * Return value: A new stmt that will be used to replace the sequence of
1354     stmts that constitute the pattern.  In this case it will be:
1355     WIDEN_LSHIFT_EXPR <a_t, CONST>.  */
1356 
1357 static gimple
vect_recog_widen_shift_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1358 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1359 				tree *type_in, tree *type_out)
1360 {
1361   gimple last_stmt = stmts->pop ();
1362   gimple def_stmt0;
1363   tree oprnd0, oprnd1;
1364   tree type, half_type0;
1365   gimple pattern_stmt;
1366   tree vectype, vectype_out = NULL_TREE;
1367   tree var;
1368   enum tree_code dummy_code;
1369   int dummy_int;
1370   vec<tree>  dummy_vec;
1371   gimple use_stmt;
1372   bool promotion;
1373 
1374   if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1375     return NULL;
1376 
1377   if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1378     return NULL;
1379 
1380   if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1381     return NULL;
1382 
1383   oprnd0 = gimple_assign_rhs1 (last_stmt);
1384   oprnd1 = gimple_assign_rhs2 (last_stmt);
1385   if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1386     return NULL;
1387 
1388   /* Check operand 0: it has to be defined by a type promotion.  */
1389   if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1390                           &promotion)
1391       || !promotion)
1392      return NULL;
1393 
1394   /* Check operand 1: has to be positive.  We check that it fits the type
1395      in vect_handle_widen_op_by_const ().  */
1396   if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1397     return NULL;
1398 
1399   oprnd0 = gimple_assign_rhs1 (def_stmt0);
1400   type = gimple_expr_type (last_stmt);
1401 
1402   /* Check for subsequent conversion to another type.  */
1403   use_stmt = vect_single_imm_use (last_stmt);
1404   if (use_stmt && is_gimple_assign (use_stmt)
1405       && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1406       && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1407     {
1408       tree use_lhs = gimple_assign_lhs (use_stmt);
1409       tree use_type = TREE_TYPE (use_lhs);
1410 
1411       if (INTEGRAL_TYPE_P (use_type)
1412 	  && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1413 	{
1414 	  last_stmt = use_stmt;
1415 	  type = use_type;
1416 	}
1417     }
1418 
1419   /* Check if this a widening operation.  */
1420   if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1421        				      &oprnd0, stmts,
1422 	                              type, &half_type0, def_stmt0))
1423     return NULL;
1424 
1425   /* Pattern detected.  */
1426   if (dump_enabled_p ())
1427     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1428                      "vect_recog_widen_shift_pattern: detected: ");
1429 
1430   /* Check target support.  */
1431   vectype = get_vectype_for_scalar_type (half_type0);
1432   vectype_out = get_vectype_for_scalar_type (type);
1433 
1434   if (!vectype
1435       || !vectype_out
1436       || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1437 					  vectype_out, vectype,
1438 					  &dummy_code, &dummy_code,
1439 					  &dummy_int, &dummy_vec))
1440     return NULL;
1441 
1442   *type_in = vectype;
1443   *type_out = vectype_out;
1444 
1445   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1446   var = vect_recog_temp_ssa_var (type, NULL);
1447   pattern_stmt =
1448     gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1449 
1450   if (dump_enabled_p ())
1451     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1452 
1453   stmts->safe_push (last_stmt);
1454   return pattern_stmt;
1455 }
1456 
1457 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1458    vectorized:
1459 
1460    type a_t;
1461    TYPE b_T, res_T;
1462 
1463    S1 a_t = ;
1464    S2 b_T = ;
1465    S3 res_T = b_T op a_t;
1466 
1467   where type 'TYPE' is a type with different size than 'type',
1468   and op is <<, >> or rotate.
1469 
1470   Also detect cases:
1471 
1472    type a_t;
1473    TYPE b_T, c_T, res_T;
1474 
1475    S0 c_T = ;
1476    S1 a_t = (type) c_T;
1477    S2 b_T = ;
1478    S3 res_T = b_T op a_t;
1479 
1480   Input/Output:
1481 
1482   * STMTS: Contains a stmt from which the pattern search begins,
1483     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
1484     with a shift/rotate which has same type on both operands, in the
1485     second case just b_T op c_T, in the first case with added cast
1486     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1487 
1488   Output:
1489 
1490   * TYPE_IN: The type of the input arguments to the pattern.
1491 
1492   * TYPE_OUT: The type of the output of this pattern.
1493 
1494   * Return value: A new stmt that will be used to replace the shift/rotate
1495     S3 stmt.  */
1496 
1497 static gimple
vect_recog_vector_vector_shift_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1498 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1499 					tree *type_in, tree *type_out)
1500 {
1501   gimple last_stmt = stmts->pop ();
1502   tree oprnd0, oprnd1, lhs, var;
1503   gimple pattern_stmt, def_stmt;
1504   enum tree_code rhs_code;
1505   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1506   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1507   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1508   enum vect_def_type dt;
1509   tree def;
1510 
1511   if (!is_gimple_assign (last_stmt))
1512     return NULL;
1513 
1514   rhs_code = gimple_assign_rhs_code (last_stmt);
1515   switch (rhs_code)
1516     {
1517     case LSHIFT_EXPR:
1518     case RSHIFT_EXPR:
1519     case LROTATE_EXPR:
1520     case RROTATE_EXPR:
1521       break;
1522     default:
1523       return NULL;
1524     }
1525 
1526   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1527     return NULL;
1528 
1529   lhs = gimple_assign_lhs (last_stmt);
1530   oprnd0 = gimple_assign_rhs1 (last_stmt);
1531   oprnd1 = gimple_assign_rhs2 (last_stmt);
1532   if (TREE_CODE (oprnd0) != SSA_NAME
1533       || TREE_CODE (oprnd1) != SSA_NAME
1534       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1535       || TYPE_PRECISION (TREE_TYPE (oprnd1))
1536 	 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1537       || TYPE_PRECISION (TREE_TYPE (lhs))
1538 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1539     return NULL;
1540 
1541   if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1542 			   &def, &dt))
1543     return NULL;
1544 
1545   if (dt != vect_internal_def)
1546     return NULL;
1547 
1548   *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1549   *type_out = *type_in;
1550   if (*type_in == NULL_TREE)
1551     return NULL;
1552 
1553   def = NULL_TREE;
1554   if (gimple_assign_cast_p (def_stmt))
1555     {
1556       tree rhs1 = gimple_assign_rhs1 (def_stmt);
1557       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1558 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
1559 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1560 	def = rhs1;
1561     }
1562 
1563   if (def == NULL_TREE)
1564     {
1565       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1566       def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1567 					       NULL_TREE);
1568       new_pattern_def_seq (stmt_vinfo, def_stmt);
1569     }
1570 
1571   /* Pattern detected.  */
1572   if (dump_enabled_p ())
1573     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1574                      "vect_recog_vector_vector_shift_pattern: detected: ");
1575 
1576   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1577   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1578   pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1579 
1580   if (dump_enabled_p ())
1581     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1582 
1583   stmts->safe_push (last_stmt);
1584   return pattern_stmt;
1585 }
1586 
1587 /* Detect a signed division by a constant that wouldn't be
1588    otherwise vectorized:
1589 
1590    type a_t, b_t;
1591 
1592    S1 a_t = b_t / N;
1593 
1594   where type 'type' is an integral type and N is a constant.
1595 
1596   Similarly handle modulo by a constant:
1597 
1598    S4 a_t = b_t % N;
1599 
1600   Input/Output:
1601 
1602   * STMTS: Contains a stmt from which the pattern search begins,
1603     i.e. the division stmt.  S1 is replaced by if N is a power
1604     of two constant and type is signed:
1605   S3  y_t = b_t < 0 ? N - 1 : 0;
1606   S2  x_t = b_t + y_t;
1607   S1' a_t = x_t >> log2 (N);
1608 
1609     S4 is replaced if N is a power of two constant and
1610     type is signed by (where *_T temporaries have unsigned type):
1611   S9  y_T = b_t < 0 ? -1U : 0U;
1612   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1613   S7  z_t = (type) z_T;
1614   S6  w_t = b_t + z_t;
1615   S5  x_t = w_t & (N - 1);
1616   S4' a_t = x_t - z_t;
1617 
1618   Output:
1619 
1620   * TYPE_IN: The type of the input arguments to the pattern.
1621 
1622   * TYPE_OUT: The type of the output of this pattern.
1623 
1624   * Return value: A new stmt that will be used to replace the division
1625     S1 or modulo S4 stmt.  */
1626 
1627 static gimple
vect_recog_divmod_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)1628 vect_recog_divmod_pattern (vec<gimple> *stmts,
1629 			   tree *type_in, tree *type_out)
1630 {
1631   gimple last_stmt = stmts->pop ();
1632   tree oprnd0, oprnd1, vectype, itype, cond;
1633   gimple pattern_stmt, def_stmt;
1634   enum tree_code rhs_code;
1635   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1636   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1637   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1638   optab optab;
1639   tree q;
1640   int dummy_int, prec;
1641   stmt_vec_info def_stmt_vinfo;
1642 
1643   if (!is_gimple_assign (last_stmt))
1644     return NULL;
1645 
1646   rhs_code = gimple_assign_rhs_code (last_stmt);
1647   switch (rhs_code)
1648     {
1649     case TRUNC_DIV_EXPR:
1650     case TRUNC_MOD_EXPR:
1651       break;
1652     default:
1653       return NULL;
1654     }
1655 
1656   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1657     return NULL;
1658 
1659   oprnd0 = gimple_assign_rhs1 (last_stmt);
1660   oprnd1 = gimple_assign_rhs2 (last_stmt);
1661   itype = TREE_TYPE (oprnd0);
1662   if (TREE_CODE (oprnd0) != SSA_NAME
1663       || TREE_CODE (oprnd1) != INTEGER_CST
1664       || TREE_CODE (itype) != INTEGER_TYPE
1665       || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1666     return NULL;
1667 
1668   vectype = get_vectype_for_scalar_type (itype);
1669   if (vectype == NULL_TREE)
1670     return NULL;
1671 
1672   /* If the target can handle vectorized division or modulo natively,
1673      don't attempt to optimize this.  */
1674   optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1675   if (optab != unknown_optab)
1676     {
1677       enum machine_mode vec_mode = TYPE_MODE (vectype);
1678       int icode = (int) optab_handler (optab, vec_mode);
1679       if (icode != CODE_FOR_nothing)
1680 	return NULL;
1681     }
1682 
1683   prec = TYPE_PRECISION (itype);
1684   if (integer_pow2p (oprnd1))
1685     {
1686       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1687 	return NULL;
1688 
1689       /* Pattern detected.  */
1690       if (dump_enabled_p ())
1691         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1692                          "vect_recog_divmod_pattern: detected: ");
1693 
1694       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1695 		     build_int_cst (itype, 0));
1696       if (rhs_code == TRUNC_DIV_EXPR)
1697 	{
1698 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
1699 	  tree shift;
1700 	  def_stmt
1701 	    = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1702 					    fold_build2 (MINUS_EXPR, itype,
1703 							 oprnd1,
1704 							 build_int_cst (itype,
1705 									1)),
1706 					    build_int_cst (itype, 0));
1707 	  new_pattern_def_seq (stmt_vinfo, def_stmt);
1708 	  var = vect_recog_temp_ssa_var (itype, NULL);
1709 	  def_stmt
1710 	    = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1711 					    gimple_assign_lhs (def_stmt));
1712 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1713 
1714 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
1715 	  pattern_stmt
1716 	    = gimple_build_assign_with_ops (RSHIFT_EXPR,
1717 					    vect_recog_temp_ssa_var (itype,
1718 								     NULL),
1719 					    var, shift);
1720 	}
1721       else
1722 	{
1723 	  tree signmask;
1724 	  STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1725 	  if (compare_tree_int (oprnd1, 2) == 0)
1726 	    {
1727 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
1728 	      def_stmt
1729 		= gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1730 						build_int_cst (itype, 1),
1731 						build_int_cst (itype, 0));
1732 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1733 	    }
1734 	  else
1735 	    {
1736 	      tree utype
1737 		= build_nonstandard_integer_type (prec, 1);
1738 	      tree vecutype = get_vectype_for_scalar_type (utype);
1739 	      tree shift
1740 		= build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1741 					- tree_log2 (oprnd1));
1742 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
1743 
1744 	      def_stmt
1745 		= gimple_build_assign_with_ops (COND_EXPR, var, cond,
1746 						build_int_cst (utype, -1),
1747 						build_int_cst (utype, 0));
1748 	      def_stmt_vinfo
1749 		= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1750 	      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1751 	      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1752 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1753 	      var = vect_recog_temp_ssa_var (utype, NULL);
1754 	      def_stmt
1755 		= gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1756 						gimple_assign_lhs (def_stmt),
1757 						shift);
1758 	      def_stmt_vinfo
1759 		= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1760 	      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1761 	      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1762 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1763 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
1764 	      def_stmt
1765 		= gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1766 						NULL_TREE);
1767 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1768 	    }
1769 	  def_stmt
1770 	    = gimple_build_assign_with_ops (PLUS_EXPR,
1771 					    vect_recog_temp_ssa_var (itype,
1772 								     NULL),
1773 					    oprnd0, signmask);
1774 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1775 	  def_stmt
1776 	    = gimple_build_assign_with_ops (BIT_AND_EXPR,
1777 					    vect_recog_temp_ssa_var (itype,
1778 								     NULL),
1779 					    gimple_assign_lhs (def_stmt),
1780 					    fold_build2 (MINUS_EXPR, itype,
1781 							 oprnd1,
1782 							 build_int_cst (itype,
1783 									1)));
1784 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1785 
1786 	  pattern_stmt
1787 	    = gimple_build_assign_with_ops (MINUS_EXPR,
1788 					    vect_recog_temp_ssa_var (itype,
1789 								     NULL),
1790 					    gimple_assign_lhs (def_stmt),
1791 					    signmask);
1792 	}
1793 
1794       if (dump_enabled_p ())
1795 	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
1796                               0);
1797 
1798       stmts->safe_push (last_stmt);
1799 
1800       *type_in = vectype;
1801       *type_out = vectype;
1802       return pattern_stmt;
1803     }
1804 
1805   if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
1806       || integer_zerop (oprnd1)
1807       || prec > HOST_BITS_PER_WIDE_INT)
1808     return NULL;
1809 
1810   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
1811     return NULL;
1812 
1813   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1814 
1815   if (TYPE_UNSIGNED (itype))
1816     {
1817       unsigned HOST_WIDE_INT mh, ml;
1818       int pre_shift, post_shift;
1819       unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
1820 				 & GET_MODE_MASK (TYPE_MODE (itype));
1821       tree t1, t2, t3, t4;
1822 
1823       if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
1824 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
1825 	return NULL;
1826 
1827       /* Find a suitable multiplier and right shift count
1828 	 instead of multiplying with D.  */
1829       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
1830 
1831       /* If the suggested multiplier is more than SIZE bits, we can do better
1832 	 for even divisors, using an initial right shift.  */
1833       if (mh != 0 && (d & 1) == 0)
1834 	{
1835 	  pre_shift = floor_log2 (d & -d);
1836 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
1837 				  &ml, &post_shift, &dummy_int);
1838 	  gcc_assert (!mh);
1839 	}
1840       else
1841 	pre_shift = 0;
1842 
1843       if (mh != 0)
1844 	{
1845 	  if (post_shift - 1 >= prec)
1846 	    return NULL;
1847 
1848 	  /* t1 = oprnd0 h* ml;
1849 	     t2 = oprnd0 - t1;
1850 	     t3 = t2 >> 1;
1851 	     t4 = t1 + t3;
1852 	     q = t4 >> (post_shift - 1);  */
1853 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
1854 	  def_stmt
1855 	    = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1856 					    build_int_cst (itype, ml));
1857 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1858 
1859 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
1860 	  def_stmt
1861 	    = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
1862 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1863 
1864 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
1865 	  def_stmt
1866 	    = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
1867 					    integer_one_node);
1868 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1869 
1870 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
1871 	  def_stmt
1872 	    = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
1873 
1874 	  if (post_shift != 1)
1875 	    {
1876 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1877 
1878 	      q = vect_recog_temp_ssa_var (itype, NULL);
1879 	      pattern_stmt
1880 		= gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
1881 						build_int_cst (itype,
1882 							       post_shift
1883 							       - 1));
1884 	    }
1885 	  else
1886 	    {
1887 	      q = t4;
1888 	      pattern_stmt = def_stmt;
1889 	    }
1890 	}
1891       else
1892 	{
1893 	  if (pre_shift >= prec || post_shift >= prec)
1894 	    return NULL;
1895 
1896 	  /* t1 = oprnd0 >> pre_shift;
1897 	     t2 = t1 h* ml;
1898 	     q = t2 >> post_shift;  */
1899 	  if (pre_shift)
1900 	    {
1901 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
1902 	      def_stmt
1903 		= gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
1904 						build_int_cst (NULL,
1905 							       pre_shift));
1906 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1907 	    }
1908 	  else
1909 	    t1 = oprnd0;
1910 
1911 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
1912 	  def_stmt
1913 	    = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
1914 					    build_int_cst (itype, ml));
1915 
1916 	  if (post_shift)
1917 	    {
1918 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
1919 
1920 	      q = vect_recog_temp_ssa_var (itype, NULL);
1921 	      def_stmt
1922 		= gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
1923 						build_int_cst (itype,
1924 							       post_shift));
1925 	    }
1926 	  else
1927 	    q = t2;
1928 
1929 	  pattern_stmt = def_stmt;
1930 	}
1931     }
1932   else
1933     {
1934       unsigned HOST_WIDE_INT ml;
1935       int post_shift;
1936       HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
1937       unsigned HOST_WIDE_INT abs_d;
1938       bool add = false;
1939       tree t1, t2, t3, t4;
1940 
1941       /* Give up for -1.  */
1942       if (d == -1)
1943 	return NULL;
1944 
1945       /* Since d might be INT_MIN, we have to cast to
1946 	 unsigned HOST_WIDE_INT before negating to avoid
1947 	 undefined signed overflow.  */
1948       abs_d = (d >= 0
1949 	       ? (unsigned HOST_WIDE_INT) d
1950 	       : - (unsigned HOST_WIDE_INT) d);
1951 
1952       /* n rem d = n rem -d */
1953       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
1954 	{
1955 	  d = abs_d;
1956 	  oprnd1 = build_int_cst (itype, abs_d);
1957 	}
1958       else if (HOST_BITS_PER_WIDE_INT >= prec
1959 	       && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1960 	/* This case is not handled correctly below.  */
1961 	return NULL;
1962 
1963       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
1964       if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1965 	{
1966 	  add = true;
1967 	  ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
1968 	}
1969       if (post_shift >= prec)
1970 	return NULL;
1971 
1972       /* t1 = oprnd1 h* ml;  */
1973       t1 = vect_recog_temp_ssa_var (itype, NULL);
1974       def_stmt
1975 	= gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1976 					build_int_cst (itype, ml));
1977       append_pattern_def_seq (stmt_vinfo, def_stmt);
1978 
1979       if (add)
1980 	{
1981 	  /* t2 = t1 + oprnd0;  */
1982 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
1983 	  def_stmt
1984 	    = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
1985 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1986 	}
1987       else
1988 	t2 = t1;
1989 
1990       if (post_shift)
1991 	{
1992 	  /* t3 = t2 >> post_shift;  */
1993 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
1994 	  def_stmt
1995 	    = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
1996 					    build_int_cst (itype, post_shift));
1997 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1998 	}
1999       else
2000 	t3 = t2;
2001 
2002       /* t4 = oprnd0 >> (prec - 1);  */
2003       t4 = vect_recog_temp_ssa_var (itype, NULL);
2004       def_stmt
2005 	= gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2006 					build_int_cst (itype, prec - 1));
2007       append_pattern_def_seq (stmt_vinfo, def_stmt);
2008 
2009       /* q = t3 - t4;  or q = t4 - t3;  */
2010       q = vect_recog_temp_ssa_var (itype, NULL);
2011       pattern_stmt
2012 	= gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2013 					d < 0 ? t3 : t4);
2014     }
2015 
2016   if (rhs_code == TRUNC_MOD_EXPR)
2017     {
2018       tree r, t1;
2019 
2020       /* We divided.  Now finish by:
2021 	 t1 = q * oprnd1;
2022 	 r = oprnd0 - t1;  */
2023       append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2024 
2025       t1 = vect_recog_temp_ssa_var (itype, NULL);
2026       def_stmt
2027 	= gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2028       append_pattern_def_seq (stmt_vinfo, def_stmt);
2029 
2030       r = vect_recog_temp_ssa_var (itype, NULL);
2031       pattern_stmt
2032 	= gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2033     }
2034 
2035   /* Pattern detected.  */
2036   if (dump_enabled_p ())
2037     {
2038       dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2039                        "vect_recog_divmod_pattern: detected: ");
2040       dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2041     }
2042 
2043   stmts->safe_push (last_stmt);
2044 
2045   *type_in = vectype;
2046   *type_out = vectype;
2047   return pattern_stmt;
2048 }
2049 
2050 /* Function vect_recog_mixed_size_cond_pattern
2051 
2052    Try to find the following pattern:
2053 
2054      type x_t, y_t;
2055      TYPE a_T, b_T, c_T;
2056    loop:
2057      S1  a_T = x_t CMP y_t ? b_T : c_T;
2058 
2059    where type 'TYPE' is an integral type which has different size
2060    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
2061    than 'type', the constants need to fit into an integer type
2062    with the same width as 'type') or results of conversion from 'type'.
2063 
2064    Input:
2065 
2066    * LAST_STMT: A stmt from which the pattern search begins.
2067 
2068    Output:
2069 
2070    * TYPE_IN: The type of the input arguments to the pattern.
2071 
2072    * TYPE_OUT: The type of the output of this pattern.
2073 
2074    * Return value: A new stmt that will be used to replace the pattern.
2075 	Additionally a def_stmt is added.
2076 
2077 	a_it = x_t CMP y_t ? b_it : c_it;
2078 	a_T = (TYPE) a_it;  */
2079 
2080 static gimple
vect_recog_mixed_size_cond_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)2081 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2082 				    tree *type_out)
2083 {
2084   gimple last_stmt = (*stmts)[0];
2085   tree cond_expr, then_clause, else_clause;
2086   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2087   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2088   enum machine_mode cmpmode;
2089   gimple pattern_stmt, def_stmt;
2090   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2091   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2092   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2093   gimple def_stmt0 = NULL, def_stmt1 = NULL;
2094   bool promotion;
2095   tree comp_scalar_type;
2096 
2097   if (!is_gimple_assign (last_stmt)
2098       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2099       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2100     return NULL;
2101 
2102   cond_expr = gimple_assign_rhs1 (last_stmt);
2103   then_clause = gimple_assign_rhs2 (last_stmt);
2104   else_clause = gimple_assign_rhs3 (last_stmt);
2105 
2106   if (!COMPARISON_CLASS_P (cond_expr))
2107     return NULL;
2108 
2109   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2110   comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2111   if (comp_vectype == NULL_TREE)
2112     return NULL;
2113 
2114   type = gimple_expr_type (last_stmt);
2115   if (types_compatible_p (type, comp_scalar_type)
2116       || ((TREE_CODE (then_clause) != INTEGER_CST
2117 	   || TREE_CODE (else_clause) != INTEGER_CST)
2118 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
2119       || !INTEGRAL_TYPE_P (type))
2120     return NULL;
2121 
2122   if ((TREE_CODE (then_clause) != INTEGER_CST
2123        && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2124                               &def_stmt0, &promotion))
2125       || (TREE_CODE (else_clause) != INTEGER_CST
2126           && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2127                                  &def_stmt1, &promotion)))
2128     return NULL;
2129 
2130   if (orig_type0 && orig_type1
2131       && !types_compatible_p (orig_type0, orig_type1))
2132     return NULL;
2133 
2134   if (orig_type0)
2135     {
2136       if (!types_compatible_p (orig_type0, comp_scalar_type))
2137 	return NULL;
2138       then_clause = gimple_assign_rhs1 (def_stmt0);
2139       itype = orig_type0;
2140     }
2141 
2142   if (orig_type1)
2143     {
2144       if (!types_compatible_p (orig_type1, comp_scalar_type))
2145 	return NULL;
2146       else_clause = gimple_assign_rhs1 (def_stmt1);
2147       itype = orig_type1;
2148     }
2149 
2150   cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2151 
2152   if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2153     return NULL;
2154 
2155   vectype = get_vectype_for_scalar_type (type);
2156   if (vectype == NULL_TREE)
2157     return NULL;
2158 
2159   if (expand_vec_cond_expr_p (vectype, comp_vectype))
2160     return NULL;
2161 
2162   if (itype == NULL_TREE)
2163     itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2164   					    TYPE_UNSIGNED (type));
2165 
2166   if (itype == NULL_TREE
2167       || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2168     return NULL;
2169 
2170   vecitype = get_vectype_for_scalar_type (itype);
2171   if (vecitype == NULL_TREE)
2172     return NULL;
2173 
2174   if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2175     return NULL;
2176 
2177   if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2178     {
2179       if ((TREE_CODE (then_clause) == INTEGER_CST
2180 	   && !int_fits_type_p (then_clause, itype))
2181 	  || (TREE_CODE (else_clause) == INTEGER_CST
2182 	      && !int_fits_type_p (else_clause, itype)))
2183 	return NULL;
2184     }
2185 
2186   def_stmt
2187     = gimple_build_assign_with_ops (COND_EXPR,
2188 				    vect_recog_temp_ssa_var (itype, NULL),
2189 				    unshare_expr (cond_expr),
2190 				    fold_convert (itype, then_clause),
2191 				    fold_convert (itype, else_clause));
2192   pattern_stmt
2193     = gimple_build_assign_with_ops (NOP_EXPR,
2194 				    vect_recog_temp_ssa_var (type, NULL),
2195 				    gimple_assign_lhs (def_stmt), NULL_TREE);
2196 
2197   new_pattern_def_seq (stmt_vinfo, def_stmt);
2198   def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2199   set_vinfo_for_stmt (def_stmt, def_stmt_info);
2200   STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2201   *type_in = vecitype;
2202   *type_out = vectype;
2203 
2204   if (dump_enabled_p ())
2205     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2206                      "vect_recog_mixed_size_cond_pattern: detected: ");
2207 
2208   return pattern_stmt;
2209 }
2210 
2211 
2212 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
2213    true if bool VAR can be optimized that way.  */
2214 
2215 static bool
check_bool_pattern(tree var,loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)2216 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2217 {
2218   gimple def_stmt;
2219   enum vect_def_type dt;
2220   tree def, rhs1;
2221   enum tree_code rhs_code;
2222 
2223   if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2224 			   &dt))
2225     return false;
2226 
2227   if (dt != vect_internal_def)
2228     return false;
2229 
2230   if (!is_gimple_assign (def_stmt))
2231     return false;
2232 
2233   if (!has_single_use (def))
2234     return false;
2235 
2236   rhs1 = gimple_assign_rhs1 (def_stmt);
2237   rhs_code = gimple_assign_rhs_code (def_stmt);
2238   switch (rhs_code)
2239     {
2240     case SSA_NAME:
2241       return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2242 
2243     CASE_CONVERT:
2244       if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2245 	   || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2246 	  && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2247 	return false;
2248       return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2249 
2250     case BIT_NOT_EXPR:
2251       return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2252 
2253     case BIT_AND_EXPR:
2254     case BIT_IOR_EXPR:
2255     case BIT_XOR_EXPR:
2256       if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2257 	return false;
2258       return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2259 				 bb_vinfo);
2260 
2261     default:
2262       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2263 	{
2264 	  tree vecitype, comp_vectype;
2265 
2266 	  /* If the comparison can throw, then is_gimple_condexpr will be
2267 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
2268 	  if (stmt_could_throw_p (def_stmt))
2269 	    return false;
2270 
2271 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2272 	  if (comp_vectype == NULL_TREE)
2273 	    return false;
2274 
2275 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2276 	    {
2277 	      enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2278 	      tree itype
2279 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2280 	      vecitype = get_vectype_for_scalar_type (itype);
2281 	      if (vecitype == NULL_TREE)
2282 		return false;
2283 	    }
2284 	  else
2285 	    vecitype = comp_vectype;
2286 	  return expand_vec_cond_expr_p (vecitype, comp_vectype);
2287 	}
2288       return false;
2289     }
2290 }
2291 
2292 
2293 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
2294    stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2295    to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT.  */
2296 
2297 static tree
adjust_bool_pattern_cast(tree type,tree var)2298 adjust_bool_pattern_cast (tree type, tree var)
2299 {
2300   stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2301   gimple cast_stmt, pattern_stmt;
2302 
2303   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2304   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2305   new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2306   cast_stmt
2307     = gimple_build_assign_with_ops (NOP_EXPR,
2308 				    vect_recog_temp_ssa_var (type, NULL),
2309 				    gimple_assign_lhs (pattern_stmt),
2310 				    NULL_TREE);
2311   STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2312   return gimple_assign_lhs (cast_stmt);
2313 }
2314 
2315 
2316 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations,
2317    recursively.  VAR is an SSA_NAME that should be transformed from bool
2318    to a wider integer type, OUT_TYPE is the desired final integer type of
2319    the whole pattern, TRUEVAL should be NULL unless optimizing
2320    BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2321    in the then_clause, STMTS is where statements with added pattern stmts
2322    should be pushed to.  */
2323 
2324 static tree
adjust_bool_pattern(tree var,tree out_type,tree trueval,vec<gimple> * stmts)2325 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2326 		     vec<gimple> *stmts)
2327 {
2328   gimple stmt = SSA_NAME_DEF_STMT (var);
2329   enum tree_code rhs_code, def_rhs_code;
2330   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2331   location_t loc;
2332   gimple pattern_stmt, def_stmt;
2333 
2334   rhs1 = gimple_assign_rhs1 (stmt);
2335   rhs2 = gimple_assign_rhs2 (stmt);
2336   rhs_code = gimple_assign_rhs_code (stmt);
2337   loc = gimple_location (stmt);
2338   switch (rhs_code)
2339     {
2340     case SSA_NAME:
2341     CASE_CONVERT:
2342       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2343       itype = TREE_TYPE (irhs1);
2344       pattern_stmt
2345 	= gimple_build_assign_with_ops (SSA_NAME,
2346 					vect_recog_temp_ssa_var (itype, NULL),
2347 					irhs1, NULL_TREE);
2348       break;
2349 
2350     case BIT_NOT_EXPR:
2351       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2352       itype = TREE_TYPE (irhs1);
2353       pattern_stmt
2354 	= gimple_build_assign_with_ops (BIT_XOR_EXPR,
2355 					vect_recog_temp_ssa_var (itype, NULL),
2356 					irhs1, build_int_cst (itype, 1));
2357       break;
2358 
2359     case BIT_AND_EXPR:
2360       /* Try to optimize x = y & (a < b ? 1 : 0); into
2361 	 x = (a < b ? y : 0);
2362 
2363 	 E.g. for:
2364 	   bool a_b, b_b, c_b;
2365 	   TYPE d_T;
2366 
2367 	   S1  a_b = x1 CMP1 y1;
2368 	   S2  b_b = x2 CMP2 y2;
2369 	   S3  c_b = a_b & b_b;
2370 	   S4  d_T = (TYPE) c_b;
2371 
2372 	 we would normally emit:
2373 
2374 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2375 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
2376 	   S3'  c_T = a_T & b_T;
2377 	   S4'  d_T = c_T;
2378 
2379 	 but we can save one stmt by using the
2380 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
2381 	 BIT_AND_EXPR stmt out:
2382 
2383 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2384 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
2385 	   S4'  f_T = c_T;
2386 
2387 	 At least when VEC_COND_EXPR is implemented using masks
2388 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2389 	 computes the comparison masks and ands it, in one case with
2390 	 all ones vector, in the other case with a vector register.
2391 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2392 	 often more expensive.  */
2393       def_stmt = SSA_NAME_DEF_STMT (rhs2);
2394       def_rhs_code = gimple_assign_rhs_code (def_stmt);
2395       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2396 	{
2397 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2398 	  irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2399 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
2400 	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2401 	    {
2402 	      gimple tstmt;
2403 	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2404 	      irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2405 	      tstmt = stmts->pop ();
2406 	      gcc_assert (tstmt == def_stmt);
2407 	      stmts->quick_push (stmt);
2408 	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2409 		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2410 	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2411 	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2412 	      return irhs2;
2413 	    }
2414 	  else
2415 	    irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2416 	  goto and_ior_xor;
2417 	}
2418       def_stmt = SSA_NAME_DEF_STMT (rhs1);
2419       def_rhs_code = gimple_assign_rhs_code (def_stmt);
2420       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2421 	{
2422 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2423 	  irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2424 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
2425 	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2426 	    {
2427 	      gimple tstmt;
2428 	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2429 	      irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2430 	      tstmt = stmts->pop ();
2431 	      gcc_assert (tstmt == def_stmt);
2432 	      stmts->quick_push (stmt);
2433 	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2434 		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2435 	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2436 	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2437 	      return irhs1;
2438 	    }
2439 	  else
2440 	    irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2441 	  goto and_ior_xor;
2442 	}
2443       /* FALLTHRU */
2444     case BIT_IOR_EXPR:
2445     case BIT_XOR_EXPR:
2446       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2447       irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2448     and_ior_xor:
2449       if (TYPE_PRECISION (TREE_TYPE (irhs1))
2450 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
2451 	{
2452 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2453 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2454 	  int out_prec = TYPE_PRECISION (out_type);
2455 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2456 	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2457 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2458 	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2459 	  else
2460 	    {
2461 	      irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2462 	      irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2463 	    }
2464 	}
2465       itype = TREE_TYPE (irhs1);
2466       pattern_stmt
2467 	= gimple_build_assign_with_ops (rhs_code,
2468 					vect_recog_temp_ssa_var (itype, NULL),
2469 					irhs1, irhs2);
2470       break;
2471 
2472     default:
2473       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2474       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2475 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2476 	  || (TYPE_PRECISION (TREE_TYPE (rhs1))
2477 	      != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2478 	{
2479 	  enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2480 	  itype
2481 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2482 	}
2483       else
2484 	itype = TREE_TYPE (rhs1);
2485       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2486       if (trueval == NULL_TREE)
2487 	trueval = build_int_cst (itype, 1);
2488       else
2489 	gcc_checking_assert (useless_type_conversion_p (itype,
2490 							TREE_TYPE (trueval)));
2491       pattern_stmt
2492 	= gimple_build_assign_with_ops (COND_EXPR,
2493 					vect_recog_temp_ssa_var (itype, NULL),
2494 					cond_expr, trueval,
2495 					build_int_cst (itype, 0));
2496       break;
2497     }
2498 
2499   stmts->safe_push (stmt);
2500   gimple_set_location (pattern_stmt, loc);
2501   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2502   return gimple_assign_lhs (pattern_stmt);
2503 }
2504 
2505 
2506 /* Function vect_recog_bool_pattern
2507 
2508    Try to find pattern like following:
2509 
2510      bool a_b, b_b, c_b, d_b, e_b;
2511      TYPE f_T;
2512    loop:
2513      S1  a_b = x1 CMP1 y1;
2514      S2  b_b = x2 CMP2 y2;
2515      S3  c_b = a_b & b_b;
2516      S4  d_b = x3 CMP3 y3;
2517      S5  e_b = c_b | d_b;
2518      S6  f_T = (TYPE) e_b;
2519 
2520    where type 'TYPE' is an integral type.
2521 
2522    Input:
2523 
2524    * LAST_STMT: A stmt at the end from which the pattern
2525 		search begins, i.e. cast of a bool to
2526 		an integer type.
2527 
2528    Output:
2529 
2530    * TYPE_IN: The type of the input arguments to the pattern.
2531 
2532    * TYPE_OUT: The type of the output of this pattern.
2533 
2534    * Return value: A new stmt that will be used to replace the pattern.
2535 
2536 	Assuming size of TYPE is the same as size of all comparisons
2537 	(otherwise some casts would be added where needed), the above
2538 	sequence we create related pattern stmts:
2539 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
2540 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
2541 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
2542 	S5'  e_T = c_T | d_T;
2543 	S6'  f_T = e_T;
2544 
2545 	Instead of the above S3' we could emit:
2546 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
2547 	S3'  c_T = a_T | b_T;
2548 	but the above is more efficient.  */
2549 
2550 static gimple
vect_recog_bool_pattern(vec<gimple> * stmts,tree * type_in,tree * type_out)2551 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2552 			 tree *type_out)
2553 {
2554   gimple last_stmt = stmts->pop ();
2555   enum tree_code rhs_code;
2556   tree var, lhs, rhs, vectype;
2557   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2558   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2559   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2560   gimple pattern_stmt;
2561 
2562   if (!is_gimple_assign (last_stmt))
2563     return NULL;
2564 
2565   var = gimple_assign_rhs1 (last_stmt);
2566   lhs = gimple_assign_lhs (last_stmt);
2567 
2568   if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2569        || !TYPE_UNSIGNED (TREE_TYPE (var)))
2570       && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2571     return NULL;
2572 
2573   rhs_code = gimple_assign_rhs_code (last_stmt);
2574   if (CONVERT_EXPR_CODE_P (rhs_code))
2575     {
2576       if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2577 	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2578 	return NULL;
2579       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2580       if (vectype == NULL_TREE)
2581 	return NULL;
2582 
2583       if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2584 	return NULL;
2585 
2586       rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2587       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2588       if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2589 	pattern_stmt
2590 	  = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2591       else
2592 	pattern_stmt
2593 	  = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2594       *type_out = vectype;
2595       *type_in = vectype;
2596       stmts->safe_push (last_stmt);
2597       if (dump_enabled_p ())
2598 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2599                          "vect_recog_bool_pattern: detected: ");
2600 
2601       return pattern_stmt;
2602     }
2603   else if (rhs_code == SSA_NAME
2604 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
2605     {
2606       stmt_vec_info pattern_stmt_info;
2607       vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2608       gcc_assert (vectype != NULL_TREE);
2609       if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2610 	return NULL;
2611       if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2612 	return NULL;
2613 
2614       rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2615       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2616       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2617 	{
2618 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2619 	  gimple cast_stmt
2620 	    = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2621 	  new_pattern_def_seq (stmt_vinfo, cast_stmt);
2622 	  rhs = rhs2;
2623 	}
2624       pattern_stmt
2625 	= gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2626       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2627 						bb_vinfo);
2628       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2629       STMT_VINFO_DATA_REF (pattern_stmt_info)
2630 	= STMT_VINFO_DATA_REF (stmt_vinfo);
2631       STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2632 	= STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2633       STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2634       STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2635 	= STMT_VINFO_DR_OFFSET (stmt_vinfo);
2636       STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2637       STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2638 	= STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2639       DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2640       *type_out = vectype;
2641       *type_in = vectype;
2642       stmts->safe_push (last_stmt);
2643       if (dump_enabled_p ())
2644 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2645                          "vect_recog_bool_pattern: detected: ");
2646       return pattern_stmt;
2647     }
2648   else
2649     return NULL;
2650 }
2651 
2652 
2653 /* Mark statements that are involved in a pattern.  */
2654 
2655 static inline void
vect_mark_pattern_stmts(gimple orig_stmt,gimple pattern_stmt,tree pattern_vectype)2656 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2657                          tree pattern_vectype)
2658 {
2659   stmt_vec_info pattern_stmt_info, def_stmt_info;
2660   stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2661   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2662   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2663   gimple def_stmt;
2664 
2665   pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2666   if (pattern_stmt_info == NULL)
2667     {
2668       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2669 						bb_vinfo);
2670       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2671     }
2672   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2673 
2674   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2675   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2676     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2677   STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2678   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2679   STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2680   STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2681     = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2682   if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2683     {
2684       gimple_stmt_iterator si;
2685       for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2686 	   !gsi_end_p (si); gsi_next (&si))
2687 	{
2688 	  def_stmt = gsi_stmt (si);
2689 	  def_stmt_info = vinfo_for_stmt (def_stmt);
2690 	  if (def_stmt_info == NULL)
2691 	    {
2692 	      def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2693 						 bb_vinfo);
2694 	      set_vinfo_for_stmt (def_stmt, def_stmt_info);
2695 	    }
2696 	  gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2697 	  STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2698 	  STMT_VINFO_DEF_TYPE (def_stmt_info)
2699 	    = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2700 	  if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2701 	    STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2702 	}
2703     }
2704 }
2705 
2706 /* Function vect_pattern_recog_1
2707 
2708    Input:
2709    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2710         computation pattern.
2711    STMT: A stmt from which the pattern search should start.
2712 
2713    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2714    expression that computes the same functionality and can be used to
2715    replace the sequence of stmts that are involved in the pattern.
2716 
2717    Output:
2718    This function checks if the expression returned by PATTERN_RECOG_FUNC is
2719    supported in vector form by the target.  We use 'TYPE_IN' to obtain the
2720    relevant vector type. If 'TYPE_IN' is already a vector type, then this
2721    indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2722    If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2723    to the available target pattern.
2724 
2725    This function also does some bookkeeping, as explained in the documentation
2726    for vect_recog_pattern.  */
2727 
2728 static void
vect_pattern_recog_1(vect_recog_func_ptr vect_recog_func,gimple_stmt_iterator si,vec<gimple> * stmts_to_replace)2729 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2730 		      gimple_stmt_iterator si,
2731 		      vec<gimple> *stmts_to_replace)
2732 {
2733   gimple stmt = gsi_stmt (si), pattern_stmt;
2734   stmt_vec_info stmt_info;
2735   loop_vec_info loop_vinfo;
2736   tree pattern_vectype;
2737   tree type_in, type_out;
2738   enum tree_code code;
2739   int i;
2740   gimple next;
2741 
2742   stmts_to_replace->truncate (0);
2743   stmts_to_replace->quick_push (stmt);
2744   pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2745   if (!pattern_stmt)
2746     return;
2747 
2748   stmt = stmts_to_replace->last ();
2749   stmt_info = vinfo_for_stmt (stmt);
2750   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2751 
2752   if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2753     {
2754       /* No need to check target support (already checked by the pattern
2755          recognition function).  */
2756       pattern_vectype = type_out ? type_out : type_in;
2757     }
2758   else
2759     {
2760       enum machine_mode vec_mode;
2761       enum insn_code icode;
2762       optab optab;
2763 
2764       /* Check target support  */
2765       type_in = get_vectype_for_scalar_type (type_in);
2766       if (!type_in)
2767 	return;
2768       if (type_out)
2769 	type_out = get_vectype_for_scalar_type (type_out);
2770       else
2771 	type_out = type_in;
2772       if (!type_out)
2773 	return;
2774       pattern_vectype = type_out;
2775 
2776       if (is_gimple_assign (pattern_stmt))
2777 	code = gimple_assign_rhs_code (pattern_stmt);
2778       else
2779         {
2780 	  gcc_assert (is_gimple_call (pattern_stmt));
2781 	  code = CALL_EXPR;
2782 	}
2783 
2784       optab = optab_for_tree_code (code, type_in, optab_default);
2785       vec_mode = TYPE_MODE (type_in);
2786       if (!optab
2787           || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2788           || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2789 	return;
2790     }
2791 
2792   /* Found a vectorizable pattern.  */
2793   if (dump_enabled_p ())
2794     {
2795       dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2796                        "pattern recognized: ");
2797       dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2798     }
2799 
2800   /* Mark the stmts that are involved in the pattern. */
2801   vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2802 
2803   /* Patterns cannot be vectorized using SLP, because they change the order of
2804      computation.  */
2805   if (loop_vinfo)
2806     FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2807       if (next == stmt)
2808         LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
2809 
2810   /* It is possible that additional pattern stmts are created and inserted in
2811      STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
2812      relevant statements.  */
2813   for (i = 0; stmts_to_replace->iterate (i, &stmt)
2814 	      && (unsigned) i < (stmts_to_replace->length () - 1);
2815        i++)
2816     {
2817       stmt_info = vinfo_for_stmt (stmt);
2818       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2819       if (dump_enabled_p ())
2820         {
2821           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2822                            "additional pattern stmt: ");
2823           dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2824         }
2825 
2826       vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2827     }
2828 }
2829 
2830 
2831 /* Function vect_pattern_recog
2832 
2833    Input:
2834    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2835         computation idioms.
2836 
2837    Output - for each computation idiom that is detected we create a new stmt
2838         that provides the same functionality and that can be vectorized.  We
2839         also record some information in the struct_stmt_info of the relevant
2840         stmts, as explained below:
2841 
2842    At the entry to this function we have the following stmts, with the
2843    following initial value in the STMT_VINFO fields:
2844 
2845          stmt                     in_pattern_p  related_stmt    vec_stmt
2846          S1: a_i = ....                 -       -               -
2847          S2: a_2 = ..use(a_i)..         -       -               -
2848          S3: a_1 = ..use(a_2)..         -       -               -
2849          S4: a_0 = ..use(a_1)..         -       -               -
2850          S5: ... = ..use(a_0)..         -       -               -
2851 
2852    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2853    represented by a single stmt.  We then:
2854    - create a new stmt S6 equivalent to the pattern (the stmt is not
2855      inserted into the code)
2856    - fill in the STMT_VINFO fields as follows:
2857 
2858                                   in_pattern_p  related_stmt    vec_stmt
2859          S1: a_i = ....                 -       -               -
2860          S2: a_2 = ..use(a_i)..         -       -               -
2861          S3: a_1 = ..use(a_2)..         -       -               -
2862          S4: a_0 = ..use(a_1)..         true    S6              -
2863           '---> S6: a_new = ....        -       S4              -
2864          S5: ... = ..use(a_0)..         -       -               -
2865 
2866    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2867    to each other through the RELATED_STMT field).
2868 
2869    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2870    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
2871    remain irrelevant unless used by stmts other than S4.
2872 
2873    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2874    (because they are marked as irrelevant).  It will vectorize S6, and record
2875    a pointer to the new vector stmt VS6 from S6 (as usual).
2876    S4 will be skipped, and S5 will be vectorized as usual:
2877 
2878                                   in_pattern_p  related_stmt    vec_stmt
2879          S1: a_i = ....                 -       -               -
2880          S2: a_2 = ..use(a_i)..         -       -               -
2881          S3: a_1 = ..use(a_2)..         -       -               -
2882        > VS6: va_new = ....             -       -               -
2883          S4: a_0 = ..use(a_1)..         true    S6              VS6
2884           '---> S6: a_new = ....        -       S4              VS6
2885        > VS5: ... = ..vuse(va_new)..    -       -               -
2886          S5: ... = ..use(a_0)..         -       -               -
2887 
2888    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2889    elsewhere), and we'll end up with:
2890 
2891         VS6: va_new = ....
2892         VS5: ... = ..vuse(va_new)..
2893 
2894    In case of more than one pattern statements, e.g., widen-mult with
2895    intermediate type:
2896 
2897      S1  a_t = ;
2898      S2  a_T = (TYPE) a_t;
2899            '--> S3: a_it = (interm_type) a_t;
2900      S4  prod_T = a_T * CONST;
2901            '--> S5: prod_T' = a_it w* CONST;
2902 
2903    there may be other users of a_T outside the pattern.  In that case S2 will
2904    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2905    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
2906    be recorded in S3.  */
2907 
2908 void
vect_pattern_recog(loop_vec_info loop_vinfo,bb_vec_info bb_vinfo)2909 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2910 {
2911   struct loop *loop;
2912   basic_block *bbs;
2913   unsigned int nbbs;
2914   gimple_stmt_iterator si;
2915   unsigned int i, j;
2916   vect_recog_func_ptr vect_recog_func;
2917   vec<gimple> stmts_to_replace;
2918   stmts_to_replace.create (1);
2919   gimple stmt;
2920 
2921   if (dump_enabled_p ())
2922     dump_printf_loc (MSG_NOTE, vect_location,
2923                      "=== vect_pattern_recog ===");
2924 
2925   if (loop_vinfo)
2926     {
2927       loop = LOOP_VINFO_LOOP (loop_vinfo);
2928       bbs = LOOP_VINFO_BBS (loop_vinfo);
2929       nbbs = loop->num_nodes;
2930     }
2931   else
2932     {
2933       bbs = &BB_VINFO_BB (bb_vinfo);
2934       nbbs = 1;
2935     }
2936 
2937   /* Scan through the loop stmts, applying the pattern recognition
2938      functions starting at each stmt visited:  */
2939   for (i = 0; i < nbbs; i++)
2940     {
2941       basic_block bb = bbs[i];
2942       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2943         {
2944 	  if (bb_vinfo && (stmt = gsi_stmt (si))
2945 	      && vinfo_for_stmt (stmt)
2946 	      && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
2947 	   continue;
2948 
2949           /* Scan over all generic vect_recog_xxx_pattern functions.  */
2950           for (j = 0; j < NUM_PATTERNS; j++)
2951             {
2952 	      vect_recog_func = vect_vect_recog_func_ptrs[j];
2953 	      vect_pattern_recog_1 (vect_recog_func, si,
2954 				    &stmts_to_replace);
2955             }
2956         }
2957     }
2958 
2959   stmts_to_replace.release ();
2960 }
2961