1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2006-2016 Free Software Foundation, Inc.
3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h"		/* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 
45 /* Pattern recognition functions  */
46 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
47 					    tree *);
48 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
49 					     tree *);
50 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
51 					   tree *);
52 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
53 				      tree *);
54 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
55 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
56                                                  tree *);
57 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
58 	                                tree *, tree *);
59 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
60 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
61 						      tree *, tree *);
62 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
63 					 tree *, tree *);
64 
65 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
66 				       tree *, tree *);
67 
68 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
69 						  tree *, tree *);
70 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
71 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
72 
73 struct vect_recog_func
74 {
75   vect_recog_func_ptr fn;
76   const char *name;
77 };
78 
79 /* Note that ordering matters - the first pattern matching on a stmt
80    is taken which means usually the more complex one needs to preceed
81    the less comples onex (widen_sum only after dot_prod or sad for example).  */
82 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
83       { vect_recog_widen_mult_pattern, "widen_mult" },
84       { vect_recog_dot_prod_pattern, "dot_prod" },
85       { vect_recog_sad_pattern, "sad" },
86       { vect_recog_widen_sum_pattern, "widen_sum" },
87       { vect_recog_pow_pattern, "pow" },
88       { vect_recog_widen_shift_pattern, "widen_shift" },
89       { vect_recog_over_widening_pattern, "over_widening" },
90       { vect_recog_rotate_pattern, "rotate" },
91       { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
92       {	vect_recog_divmod_pattern, "divmod" },
93       {	vect_recog_mult_pattern, "mult" },
94       {	vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
95       {	vect_recog_bool_pattern, "bool" },
96       {	vect_recog_mask_conversion_pattern, "mask_conversion" }
97 };
98 
99 static inline void
append_pattern_def_seq(stmt_vec_info stmt_info,gimple * stmt)100 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
101 {
102   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
103 				      stmt);
104 }
105 
106 static inline void
new_pattern_def_seq(stmt_vec_info stmt_info,gimple * stmt)107 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
108 {
109   STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
110   append_pattern_def_seq (stmt_info, stmt);
111 }
112 
113 /* Check whether STMT2 is in the same loop or basic block as STMT1.
114    Which of the two applies depends on whether we're currently doing
115    loop-based or basic-block-based vectorization, as determined by
116    the vinfo_for_stmt for STMT1 (which must be defined).
117 
118    If this returns true, vinfo_for_stmt for STMT2 is guaranteed
119    to be defined as well.  */
120 
121 static bool
vect_same_loop_or_bb_p(gimple * stmt1,gimple * stmt2)122 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
123 {
124   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
125   return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
126 }
127 
128 /* If the LHS of DEF_STMT has a single use, and that statement is
129    in the same loop or basic block, return it.  */
130 
131 static gimple *
vect_single_imm_use(gimple * def_stmt)132 vect_single_imm_use (gimple *def_stmt)
133 {
134   tree lhs = gimple_assign_lhs (def_stmt);
135   use_operand_p use_p;
136   gimple *use_stmt;
137 
138   if (!single_imm_use (lhs, &use_p, &use_stmt))
139     return NULL;
140 
141   if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
142     return NULL;
143 
144   return use_stmt;
145 }
146 
147 /* Check whether NAME, an ssa-name used in USE_STMT,
148    is a result of a type promotion, such that:
149      DEF_STMT: NAME = NOP (name0)
150    If CHECK_SIGN is TRUE, check that either both types are signed or both are
151    unsigned.  */
152 
153 static bool
type_conversion_p(tree name,gimple * use_stmt,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)154 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
155 		   tree *orig_type, gimple **def_stmt, bool *promotion)
156 {
157   gimple *dummy_gimple;
158   stmt_vec_info stmt_vinfo;
159   tree type = TREE_TYPE (name);
160   tree oprnd0;
161   enum vect_def_type dt;
162 
163   stmt_vinfo = vinfo_for_stmt (use_stmt);
164   if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
165     return false;
166 
167   if (dt != vect_internal_def
168       && dt != vect_external_def && dt != vect_constant_def)
169     return false;
170 
171   if (!*def_stmt)
172     return false;
173 
174   if (dt == vect_internal_def)
175     {
176       stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
177       if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
178 	return false;
179     }
180 
181   if (!is_gimple_assign (*def_stmt))
182     return false;
183 
184   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
185     return false;
186 
187   oprnd0 = gimple_assign_rhs1 (*def_stmt);
188 
189   *orig_type = TREE_TYPE (oprnd0);
190   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
191       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
192     return false;
193 
194   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
195     *promotion = true;
196   else
197     *promotion = false;
198 
199   if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
200     return false;
201 
202   return true;
203 }
204 
205 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
206    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
207 
208 static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)209 vect_recog_temp_ssa_var (tree type, gimple *stmt)
210 {
211   return make_temp_ssa_name (type, stmt, "patt");
212 }
213 
214 /* Function vect_recog_dot_prod_pattern
215 
216    Try to find the following pattern:
217 
218      type x_t, y_t;
219      TYPE1 prod;
220      TYPE2 sum = init;
221    loop:
222      sum_0 = phi <init, sum_1>
223      S1  x_t = ...
224      S2  y_t = ...
225      S3  x_T = (TYPE1) x_t;
226      S4  y_T = (TYPE1) y_t;
227      S5  prod = x_T * y_T;
228      [S6  prod = (TYPE2) prod;  #optional]
229      S7  sum_1 = prod + sum_0;
230 
231    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
232    same size of 'TYPE1' or bigger. This is a special case of a reduction
233    computation.
234 
235    Input:
236 
237    * STMTS: Contains a stmt from which the pattern search begins.  In the
238    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
239    will be detected.
240 
241    Output:
242 
243    * TYPE_IN: The type of the input arguments to the pattern.
244 
245    * TYPE_OUT: The type of the output  of this pattern.
246 
247    * Return value: A new stmt that will be used to replace the sequence of
248    stmts that constitute the pattern. In this case it will be:
249         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
250 
251    Note: The dot-prod idiom is a widening reduction pattern that is
252          vectorized without preserving all the intermediate results. It
253          produces only N/2 (widened) results (by summing up pairs of
254          intermediate results) rather than all N results.  Therefore, we
255          cannot allow this pattern when we want to get all the results and in
256          the correct order (as is the case when this computation is in an
257          inner-loop nested in an outer-loop that us being vectorized).  */
258 
259 static gimple *
vect_recog_dot_prod_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)260 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
261 			     tree *type_out)
262 {
263   gimple *stmt, *last_stmt = (*stmts)[0];
264   tree oprnd0, oprnd1;
265   tree oprnd00, oprnd01;
266   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
267   tree type, half_type;
268   gimple *pattern_stmt;
269   tree prod_type;
270   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
271   struct loop *loop;
272   tree var;
273   bool promotion;
274 
275   if (!loop_info)
276     return NULL;
277 
278   loop = LOOP_VINFO_LOOP (loop_info);
279 
280   /* We don't allow changing the order of the computation in the inner-loop
281      when doing outer-loop vectorization.  */
282   if (loop && nested_in_vect_loop_p (loop, last_stmt))
283     return NULL;
284 
285   if (!is_gimple_assign (last_stmt))
286     return NULL;
287 
288   type = gimple_expr_type (last_stmt);
289 
290   /* Look for the following pattern
291           DX = (TYPE1) X;
292           DY = (TYPE1) Y;
293           DPROD = DX * DY;
294           DDPROD = (TYPE2) DPROD;
295           sum_1 = DDPROD + sum_0;
296      In which
297      - DX is double the size of X
298      - DY is double the size of Y
299      - DX, DY, DPROD all have the same type
300      - sum is the same size of DPROD or bigger
301      - sum has been recognized as a reduction variable.
302 
303      This is equivalent to:
304        DPROD = X w* Y;          #widen mult
305        sum_1 = DPROD w+ sum_0;  #widen summation
306      or
307        DPROD = X w* Y;          #widen mult
308        sum_1 = DPROD + sum_0;   #summation
309    */
310 
311   /* Starting from LAST_STMT, follow the defs of its uses in search
312      of the above pattern.  */
313 
314   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
315     return NULL;
316 
317   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
318     {
319       /* Has been detected as widening-summation?  */
320 
321       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
322       type = gimple_expr_type (stmt);
323       if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
324         return NULL;
325       oprnd0 = gimple_assign_rhs1 (stmt);
326       oprnd1 = gimple_assign_rhs2 (stmt);
327       half_type = TREE_TYPE (oprnd0);
328     }
329   else
330     {
331       gimple *def_stmt;
332 
333       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
334 	  && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
335 	return NULL;
336       oprnd0 = gimple_assign_rhs1 (last_stmt);
337       oprnd1 = gimple_assign_rhs2 (last_stmt);
338       if (!types_compatible_p (TREE_TYPE (oprnd0), type)
339 	  || !types_compatible_p (TREE_TYPE (oprnd1), type))
340         return NULL;
341       stmt = last_stmt;
342 
343       if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
344 			     &promotion)
345 	  && promotion)
346         {
347           stmt = def_stmt;
348           oprnd0 = gimple_assign_rhs1 (stmt);
349         }
350       else
351         half_type = type;
352     }
353 
354   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
355      we know that oprnd1 is the reduction variable (defined by a loop-header
356      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
357      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
358   if (TREE_CODE (oprnd0) != SSA_NAME)
359     return NULL;
360 
361   prod_type = half_type;
362   stmt = SSA_NAME_DEF_STMT (oprnd0);
363 
364   /* It could not be the dot_prod pattern if the stmt is outside the loop.  */
365   if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
366     return NULL;
367 
368   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
369      inside the loop (in case we are analyzing an outer-loop).  */
370   if (!is_gimple_assign (stmt))
371     return NULL;
372   stmt_vinfo = vinfo_for_stmt (stmt);
373   gcc_assert (stmt_vinfo);
374   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
375     return NULL;
376   if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
377     return NULL;
378   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
379     {
380       /* Has been detected as a widening multiplication?  */
381 
382       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
383       if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
384         return NULL;
385       stmt_vinfo = vinfo_for_stmt (stmt);
386       gcc_assert (stmt_vinfo);
387       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
388       oprnd00 = gimple_assign_rhs1 (stmt);
389       oprnd01 = gimple_assign_rhs2 (stmt);
390       STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
391 	  = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
392     }
393   else
394     {
395       tree half_type0, half_type1;
396       gimple *def_stmt;
397       tree oprnd0, oprnd1;
398 
399       oprnd0 = gimple_assign_rhs1 (stmt);
400       oprnd1 = gimple_assign_rhs2 (stmt);
401       if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
402           || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
403         return NULL;
404       if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
405 			      &promotion)
406 	  || !promotion)
407         return NULL;
408       oprnd00 = gimple_assign_rhs1 (def_stmt);
409       if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
410 			      &promotion)
411 	  || !promotion)
412         return NULL;
413       oprnd01 = gimple_assign_rhs1 (def_stmt);
414       if (!types_compatible_p (half_type0, half_type1))
415         return NULL;
416       if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
417 	return NULL;
418     }
419 
420   half_type = TREE_TYPE (oprnd00);
421   *type_in = half_type;
422   *type_out = type;
423 
424   /* Pattern detected. Create a stmt to be used to replace the pattern: */
425   var = vect_recog_temp_ssa_var (type, NULL);
426   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
427 				      oprnd00, oprnd01, oprnd1);
428 
429   if (dump_enabled_p ())
430     {
431       dump_printf_loc (MSG_NOTE, vect_location,
432                        "vect_recog_dot_prod_pattern: detected: ");
433       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
434       dump_printf (MSG_NOTE, "\n");
435     }
436 
437   return pattern_stmt;
438 }
439 
440 
441 /* Function vect_recog_sad_pattern
442 
443    Try to find the following Sum of Absolute Difference (SAD) pattern:
444 
445      type x_t, y_t;
446      signed TYPE1 diff, abs_diff;
447      TYPE2 sum = init;
448    loop:
449      sum_0 = phi <init, sum_1>
450      S1  x_t = ...
451      S2  y_t = ...
452      S3  x_T = (TYPE1) x_t;
453      S4  y_T = (TYPE1) y_t;
454      S5  diff = x_T - y_T;
455      S6  abs_diff = ABS_EXPR <diff>;
456      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
457      S8  sum_1 = abs_diff + sum_0;
458 
459    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
460    same size of 'TYPE1' or bigger. This is a special case of a reduction
461    computation.
462 
463    Input:
464 
465    * STMTS: Contains a stmt from which the pattern search begins.  In the
466    example, when this function is called with S8, the pattern
467    {S3,S4,S5,S6,S7,S8} will be detected.
468 
469    Output:
470 
471    * TYPE_IN: The type of the input arguments to the pattern.
472 
473    * TYPE_OUT: The type of the output of this pattern.
474 
475    * Return value: A new stmt that will be used to replace the sequence of
476    stmts that constitute the pattern. In this case it will be:
477         SAD_EXPR <x_t, y_t, sum_0>
478   */
479 
480 static gimple *
vect_recog_sad_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)481 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
482 			     tree *type_out)
483 {
484   gimple *last_stmt = (*stmts)[0];
485   tree sad_oprnd0, sad_oprnd1;
486   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
487   tree half_type;
488   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
489   struct loop *loop;
490   bool promotion;
491 
492   if (!loop_info)
493     return NULL;
494 
495   loop = LOOP_VINFO_LOOP (loop_info);
496 
497   /* We don't allow changing the order of the computation in the inner-loop
498      when doing outer-loop vectorization.  */
499   if (loop && nested_in_vect_loop_p (loop, last_stmt))
500     return NULL;
501 
502   if (!is_gimple_assign (last_stmt))
503     return NULL;
504 
505   tree sum_type = gimple_expr_type (last_stmt);
506 
507   /* Look for the following pattern
508           DX = (TYPE1) X;
509           DY = (TYPE1) Y;
510           DDIFF = DX - DY;
511           DAD = ABS_EXPR <DDIFF>;
512           DDPROD = (TYPE2) DPROD;
513           sum_1 = DAD + sum_0;
514      In which
515      - DX is at least double the size of X
516      - DY is at least double the size of Y
517      - DX, DY, DDIFF, DAD all have the same type
518      - sum is the same size of DAD or bigger
519      - sum has been recognized as a reduction variable.
520 
521      This is equivalent to:
522        DDIFF = X w- Y;          #widen sub
523        DAD = ABS_EXPR <DDIFF>;
524        sum_1 = DAD w+ sum_0;    #widen summation
525      or
526        DDIFF = X w- Y;          #widen sub
527        DAD = ABS_EXPR <DDIFF>;
528        sum_1 = DAD + sum_0;     #summation
529    */
530 
531   /* Starting from LAST_STMT, follow the defs of its uses in search
532      of the above pattern.  */
533 
534   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
535     return NULL;
536 
537   tree plus_oprnd0, plus_oprnd1;
538 
539   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
540     {
541       /* Has been detected as widening-summation?  */
542 
543       gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
544       sum_type = gimple_expr_type (stmt);
545       if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
546         return NULL;
547       plus_oprnd0 = gimple_assign_rhs1 (stmt);
548       plus_oprnd1 = gimple_assign_rhs2 (stmt);
549       half_type = TREE_TYPE (plus_oprnd0);
550     }
551   else
552     {
553       gimple *def_stmt;
554 
555       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
556 	  && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
557 	return NULL;
558       plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
559       plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
560       if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
561 	  || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
562         return NULL;
563 
564       /* The type conversion could be promotion, demotion,
565          or just signed -> unsigned.  */
566       if (type_conversion_p (plus_oprnd0, last_stmt, false,
567                              &half_type, &def_stmt, &promotion))
568         plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
569       else
570         half_type = sum_type;
571     }
572 
573   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
574      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
575      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
576      Then check that plus_oprnd0 is defined by an abs_expr.  */
577 
578   if (TREE_CODE (plus_oprnd0) != SSA_NAME)
579     return NULL;
580 
581   tree abs_type = half_type;
582   gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
583 
584   /* It could not be the sad pattern if the abs_stmt is outside the loop.  */
585   if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
586     return NULL;
587 
588   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
589      inside the loop (in case we are analyzing an outer-loop).  */
590   if (!is_gimple_assign (abs_stmt))
591     return NULL;
592 
593   stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
594   gcc_assert (abs_stmt_vinfo);
595   if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
596     return NULL;
597   if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
598     return NULL;
599 
600   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
601   if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
602     return NULL;
603   if (TYPE_UNSIGNED (abs_type))
604     return NULL;
605 
606   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
607 
608   if (TREE_CODE (abs_oprnd) != SSA_NAME)
609     return NULL;
610 
611   gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
612 
613   /* It could not be the sad pattern if the diff_stmt is outside the loop.  */
614   if (!gimple_bb (diff_stmt)
615       || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
616     return NULL;
617 
618   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
619      inside the loop (in case we are analyzing an outer-loop).  */
620   if (!is_gimple_assign (diff_stmt))
621     return NULL;
622 
623   stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
624   gcc_assert (diff_stmt_vinfo);
625   if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
626     return NULL;
627   if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
628     return NULL;
629 
630   tree half_type0, half_type1;
631   gimple *def_stmt;
632 
633   tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
634   tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
635 
636   if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
637       || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
638     return NULL;
639   if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
640                           &half_type0, &def_stmt, &promotion)
641       || !promotion)
642     return NULL;
643   sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
644 
645   if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
646                           &half_type1, &def_stmt, &promotion)
647       || !promotion)
648     return NULL;
649   sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
650 
651   if (!types_compatible_p (half_type0, half_type1))
652     return NULL;
653   if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
654       || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
655     return NULL;
656 
657   *type_in = TREE_TYPE (sad_oprnd0);
658   *type_out = sum_type;
659 
660   /* Pattern detected. Create a stmt to be used to replace the pattern: */
661   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
662   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
663 					      sad_oprnd1, plus_oprnd1);
664 
665   if (dump_enabled_p ())
666     {
667       dump_printf_loc (MSG_NOTE, vect_location,
668                        "vect_recog_sad_pattern: detected: ");
669       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
670       dump_printf (MSG_NOTE, "\n");
671     }
672 
673   return pattern_stmt;
674 }
675 
676 
677 /* Handle widening operation by a constant.  At the moment we support MULT_EXPR
678    and LSHIFT_EXPR.
679 
680    For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
681    we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
682 
683    Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
684    HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
685    that satisfies the above restrictions,  we can perform a widening opeartion
686    from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
687    with a_it = (interm_type) a_t;  Store such operation in *WSTMT.  */
688 
689 static bool
vect_handle_widen_op_by_const(gimple * stmt,enum tree_code code,tree const_oprnd,tree * oprnd,gimple ** wstmt,tree type,tree * half_type,gimple * def_stmt)690 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
691 		               tree const_oprnd, tree *oprnd,
692 			       gimple **wstmt, tree type,
693 			       tree *half_type, gimple *def_stmt)
694 {
695   tree new_type, new_oprnd;
696 
697   if (code != MULT_EXPR && code != LSHIFT_EXPR)
698     return false;
699 
700   if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
701         || (code == LSHIFT_EXPR
702             && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
703 	    	!= 1))
704       && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
705     {
706       /* CONST_OPRND is a constant of HALF_TYPE.  */
707       *oprnd = gimple_assign_rhs1 (def_stmt);
708       return true;
709     }
710 
711   if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
712     return false;
713 
714   if (!vect_same_loop_or_bb_p (stmt, def_stmt))
715     return false;
716 
717   /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
718      a type 2 times bigger than HALF_TYPE.  */
719   new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
720                                              TYPE_UNSIGNED (type));
721   if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
722       || (code == LSHIFT_EXPR
723           && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
724     return false;
725 
726   /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t;  */
727   *oprnd = gimple_assign_rhs1 (def_stmt);
728   new_oprnd = make_ssa_name (new_type);
729   *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
730   *oprnd = new_oprnd;
731 
732   *half_type = new_type;
733   return true;
734 }
735 
736 
737 /* Function vect_recog_widen_mult_pattern
738 
739    Try to find the following pattern:
740 
741      type1 a_t;
742      type2 b_t;
743      TYPE a_T, b_T, prod_T;
744 
745      S1  a_t = ;
746      S2  b_t = ;
747      S3  a_T = (TYPE) a_t;
748      S4  b_T = (TYPE) b_t;
749      S5  prod_T = a_T * b_T;
750 
751    where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
752 
753    Also detect unsigned cases:
754 
755      unsigned type1 a_t;
756      unsigned type2 b_t;
757      unsigned TYPE u_prod_T;
758      TYPE a_T, b_T, prod_T;
759 
760      S1  a_t = ;
761      S2  b_t = ;
762      S3  a_T = (TYPE) a_t;
763      S4  b_T = (TYPE) b_t;
764      S5  prod_T = a_T * b_T;
765      S6  u_prod_T = (unsigned TYPE) prod_T;
766 
767    and multiplication by constants:
768 
769      type a_t;
770      TYPE a_T, prod_T;
771 
772      S1  a_t = ;
773      S3  a_T = (TYPE) a_t;
774      S5  prod_T = a_T * CONST;
775 
776    A special case of multiplication by constants is when 'TYPE' is 4 times
777    bigger than 'type', but CONST fits an intermediate type 2 times smaller
778    than 'TYPE'.  In that case we create an additional pattern stmt for S3
779    to create a variable of the intermediate type, and perform widen-mult
780    on the intermediate type as well:
781 
782      type a_t;
783      interm_type a_it;
784      TYPE a_T, prod_T,  prod_T';
785 
786      S1  a_t = ;
787      S3  a_T = (TYPE) a_t;
788            '--> a_it = (interm_type) a_t;
789      S5  prod_T = a_T * CONST;
790            '--> prod_T' = a_it w* CONST;
791 
792    Input/Output:
793 
794    * STMTS: Contains a stmt from which the pattern search begins.  In the
795    example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
796    is detected.  In case of unsigned widen-mult, the original stmt (S5) is
797    replaced with S6 in STMTS.  In case of multiplication by a constant
798    of an intermediate type (the last case above), STMTS also contains S3
799    (inserted before S5).
800 
801    Output:
802 
803    * TYPE_IN: The type of the input arguments to the pattern.
804 
805    * TYPE_OUT: The type of the output of this pattern.
806 
807    * Return value: A new stmt that will be used to replace the sequence of
808    stmts that constitute the pattern.  In this case it will be:
809         WIDEN_MULT <a_t, b_t>
810    If the result of WIDEN_MULT needs to be converted to a larger type, the
811    returned stmt will be this type conversion stmt.
812 */
813 
814 static gimple *
vect_recog_widen_mult_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)815 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
816                                tree *type_in, tree *type_out)
817 {
818   gimple *last_stmt = stmts->pop ();
819   gimple *def_stmt0, *def_stmt1;
820   tree oprnd0, oprnd1;
821   tree type, half_type0, half_type1;
822   gimple *new_stmt = NULL, *pattern_stmt = NULL;
823   tree vectype, vecitype;
824   tree var;
825   enum tree_code dummy_code;
826   int dummy_int;
827   vec<tree> dummy_vec;
828   bool op1_ok;
829   bool promotion;
830 
831   if (!is_gimple_assign (last_stmt))
832     return NULL;
833 
834   type = gimple_expr_type (last_stmt);
835 
836   /* Starting from LAST_STMT, follow the defs of its uses in search
837      of the above pattern.  */
838 
839   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
840     return NULL;
841 
842   oprnd0 = gimple_assign_rhs1 (last_stmt);
843   oprnd1 = gimple_assign_rhs2 (last_stmt);
844   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
845       || !types_compatible_p (TREE_TYPE (oprnd1), type))
846     return NULL;
847 
848   /* Check argument 0.  */
849   if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
850                          &promotion)
851       || !promotion)
852      return NULL;
853   /* Check argument 1.  */
854   op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
855                               &def_stmt1, &promotion);
856 
857   if (op1_ok && promotion)
858     {
859       oprnd0 = gimple_assign_rhs1 (def_stmt0);
860       oprnd1 = gimple_assign_rhs1 (def_stmt1);
861     }
862   else
863     {
864       if (TREE_CODE (oprnd1) == INTEGER_CST
865           && TREE_CODE (half_type0) == INTEGER_TYPE
866           && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
867 		                            &oprnd0, &new_stmt, type,
868 					    &half_type0, def_stmt0))
869 	{
870 	  half_type1 = half_type0;
871 	  oprnd1 = fold_convert (half_type1, oprnd1);
872 	}
873       else
874         return NULL;
875     }
876 
877   /* If the two arguments have different sizes, convert the one with
878      the smaller type into the larger type.  */
879   if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
880     {
881       /* If we already used up the single-stmt slot give up.  */
882       if (new_stmt)
883 	return NULL;
884 
885       tree* oprnd = NULL;
886       gimple *def_stmt = NULL;
887 
888       if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
889 	{
890 	  def_stmt = def_stmt0;
891 	  half_type0 = half_type1;
892 	  oprnd = &oprnd0;
893 	}
894       else
895 	{
896 	  def_stmt = def_stmt1;
897 	  half_type1 = half_type0;
898 	  oprnd = &oprnd1;
899 	}
900 
901       tree old_oprnd = gimple_assign_rhs1 (def_stmt);
902       tree new_oprnd = make_ssa_name (half_type0);
903       new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
904       *oprnd = new_oprnd;
905     }
906 
907   /* Handle unsigned case.  Look for
908      S6  u_prod_T = (unsigned TYPE) prod_T;
909      Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
910   if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
911     {
912       gimple *use_stmt;
913       tree use_lhs;
914       tree use_type;
915 
916       if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
917         return NULL;
918 
919       use_stmt = vect_single_imm_use (last_stmt);
920       if (!use_stmt || !is_gimple_assign (use_stmt)
921 	  || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
922         return NULL;
923 
924       use_lhs = gimple_assign_lhs (use_stmt);
925       use_type = TREE_TYPE (use_lhs);
926       if (!INTEGRAL_TYPE_P (use_type)
927           || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
928           || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
929         return NULL;
930 
931       type = use_type;
932       last_stmt = use_stmt;
933     }
934 
935   if (!types_compatible_p (half_type0, half_type1))
936     return NULL;
937 
938   /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
939      to get an intermediate result of type ITYPE.  In this case we need
940      to build a statement to convert this intermediate result to type TYPE.  */
941   tree itype = type;
942   if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
943     itype = build_nonstandard_integer_type
944               (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
945                TYPE_UNSIGNED (type));
946 
947   /* Pattern detected.  */
948   if (dump_enabled_p ())
949     dump_printf_loc (MSG_NOTE, vect_location,
950                      "vect_recog_widen_mult_pattern: detected:\n");
951 
952   /* Check target support  */
953   vectype = get_vectype_for_scalar_type (half_type0);
954   vecitype = get_vectype_for_scalar_type (itype);
955   if (!vectype
956       || !vecitype
957       || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
958 					  vecitype, vectype,
959 					  &dummy_code, &dummy_code,
960 					  &dummy_int, &dummy_vec))
961     return NULL;
962 
963   *type_in = vectype;
964   *type_out = get_vectype_for_scalar_type (type);
965 
966   /* Pattern supported. Create a stmt to be used to replace the pattern: */
967   var = vect_recog_temp_ssa_var (itype, NULL);
968   pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
969 
970   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
971   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
972 
973   /* If the original two operands have different sizes, we may need to convert
974      the smaller one into the larget type.  If this is the case, at this point
975      the new stmt is already built.  */
976   if (new_stmt)
977     {
978       append_pattern_def_seq (stmt_vinfo, new_stmt);
979       stmt_vec_info new_stmt_info
980         = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
981       set_vinfo_for_stmt (new_stmt, new_stmt_info);
982       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
983     }
984 
985   /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
986      the result of the widen-mult operation into type TYPE.  */
987   if (itype != type)
988     {
989       append_pattern_def_seq (stmt_vinfo, pattern_stmt);
990       stmt_vec_info pattern_stmt_info
991         = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
992       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
993       STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
994       pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
995 					  NOP_EXPR,
996 					  gimple_assign_lhs (pattern_stmt));
997     }
998 
999   if (dump_enabled_p ())
1000     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1001 
1002   stmts->safe_push (last_stmt);
1003   return pattern_stmt;
1004 }
1005 
1006 
1007 /* Function vect_recog_pow_pattern
1008 
1009    Try to find the following pattern:
1010 
1011      x = POW (y, N);
1012 
1013    with POW being one of pow, powf, powi, powif and N being
1014    either 2 or 0.5.
1015 
1016    Input:
1017 
1018    * LAST_STMT: A stmt from which the pattern search begins.
1019 
1020    Output:
1021 
1022    * TYPE_IN: The type of the input arguments to the pattern.
1023 
1024    * TYPE_OUT: The type of the output of this pattern.
1025 
1026    * Return value: A new stmt that will be used to replace the sequence of
1027    stmts that constitute the pattern. In this case it will be:
1028         x = x * x
1029    or
1030 	x = sqrt (x)
1031 */
1032 
1033 static gimple *
vect_recog_pow_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1034 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1035 			tree *type_out)
1036 {
1037   gimple *last_stmt = (*stmts)[0];
1038   tree base, exp = NULL;
1039   gimple *stmt;
1040   tree var;
1041 
1042   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1043     return NULL;
1044 
1045   switch (gimple_call_combined_fn (last_stmt))
1046     {
1047     CASE_CFN_POW:
1048     CASE_CFN_POWI:
1049       base = gimple_call_arg (last_stmt, 0);
1050       exp = gimple_call_arg (last_stmt, 1);
1051       if (TREE_CODE (exp) != REAL_CST
1052 	  && TREE_CODE (exp) != INTEGER_CST)
1053         return NULL;
1054       break;
1055 
1056     default:
1057       return NULL;
1058     }
1059 
1060   /* We now have a pow or powi builtin function call with a constant
1061      exponent.  */
1062 
1063   *type_out = NULL_TREE;
1064 
1065   /* Catch squaring.  */
1066   if ((tree_fits_shwi_p (exp)
1067        && tree_to_shwi (exp) == 2)
1068       || (TREE_CODE (exp) == REAL_CST
1069           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1070     {
1071       *type_in = TREE_TYPE (base);
1072 
1073       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1074       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1075       return stmt;
1076     }
1077 
1078   /* Catch square root.  */
1079   if (TREE_CODE (exp) == REAL_CST
1080       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1081     {
1082       *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1083       if (*type_in
1084 	  && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1085 					     OPTIMIZE_FOR_SPEED))
1086 	{
1087 	  gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1088 	  var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1089 	  gimple_call_set_lhs (stmt, var);
1090 	  return stmt;
1091 	}
1092     }
1093 
1094   return NULL;
1095 }
1096 
1097 
1098 /* Function vect_recog_widen_sum_pattern
1099 
1100    Try to find the following pattern:
1101 
1102      type x_t;
1103      TYPE x_T, sum = init;
1104    loop:
1105      sum_0 = phi <init, sum_1>
1106      S1  x_t = *p;
1107      S2  x_T = (TYPE) x_t;
1108      S3  sum_1 = x_T + sum_0;
1109 
1110    where type 'TYPE' is at least double the size of type 'type', i.e - we're
1111    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1112    a special case of a reduction computation.
1113 
1114    Input:
1115 
1116    * LAST_STMT: A stmt from which the pattern search begins. In the example,
1117    when this function is called with S3, the pattern {S2,S3} will be detected.
1118 
1119    Output:
1120 
1121    * TYPE_IN: The type of the input arguments to the pattern.
1122 
1123    * TYPE_OUT: The type of the output of this pattern.
1124 
1125    * Return value: A new stmt that will be used to replace the sequence of
1126    stmts that constitute the pattern. In this case it will be:
1127         WIDEN_SUM <x_t, sum_0>
1128 
1129    Note: The widening-sum idiom is a widening reduction pattern that is
1130 	 vectorized without preserving all the intermediate results. It
1131          produces only N/2 (widened) results (by summing up pairs of
1132 	 intermediate results) rather than all N results.  Therefore, we
1133 	 cannot allow this pattern when we want to get all the results and in
1134 	 the correct order (as is the case when this computation is in an
1135 	 inner-loop nested in an outer-loop that us being vectorized).  */
1136 
1137 static gimple *
vect_recog_widen_sum_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1138 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1139 			      tree *type_out)
1140 {
1141   gimple *stmt, *last_stmt = (*stmts)[0];
1142   tree oprnd0, oprnd1;
1143   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1144   tree type, half_type;
1145   gimple *pattern_stmt;
1146   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1147   struct loop *loop;
1148   tree var;
1149   bool promotion;
1150 
1151   if (!loop_info)
1152     return NULL;
1153 
1154   loop = LOOP_VINFO_LOOP (loop_info);
1155 
1156   /* We don't allow changing the order of the computation in the inner-loop
1157      when doing outer-loop vectorization.  */
1158   if (loop && nested_in_vect_loop_p (loop, last_stmt))
1159     return NULL;
1160 
1161   if (!is_gimple_assign (last_stmt))
1162     return NULL;
1163 
1164   type = gimple_expr_type (last_stmt);
1165 
1166   /* Look for the following pattern
1167           DX = (TYPE) X;
1168           sum_1 = DX + sum_0;
1169      In which DX is at least double the size of X, and sum_1 has been
1170      recognized as a reduction variable.
1171    */
1172 
1173   /* Starting from LAST_STMT, follow the defs of its uses in search
1174      of the above pattern.  */
1175 
1176   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1177     return NULL;
1178 
1179   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
1180       && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
1181     return NULL;
1182 
1183   oprnd0 = gimple_assign_rhs1 (last_stmt);
1184   oprnd1 = gimple_assign_rhs2 (last_stmt);
1185   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1186       || !types_compatible_p (TREE_TYPE (oprnd1), type))
1187     return NULL;
1188 
1189   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1190      we know that oprnd1 is the reduction variable (defined by a loop-header
1191      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1192      Left to check that oprnd0 is defined by a cast from type 'type' to type
1193      'TYPE'.  */
1194 
1195   if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1196                           &promotion)
1197       || !promotion)
1198      return NULL;
1199 
1200   oprnd0 = gimple_assign_rhs1 (stmt);
1201   *type_in = half_type;
1202   *type_out = type;
1203 
1204   /* Pattern detected. Create a stmt to be used to replace the pattern: */
1205   var = vect_recog_temp_ssa_var (type, NULL);
1206   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1207 
1208   if (dump_enabled_p ())
1209     {
1210       dump_printf_loc (MSG_NOTE, vect_location,
1211                        "vect_recog_widen_sum_pattern: detected: ");
1212       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1213       dump_printf (MSG_NOTE, "\n");
1214     }
1215 
1216   return pattern_stmt;
1217 }
1218 
1219 
1220 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1221 
1222    Input:
1223    STMT - a statement to check.
1224    DEF - we support operations with two operands, one of which is constant.
1225          The other operand can be defined by a demotion operation, or by a
1226          previous statement in a sequence of over-promoted operations.  In the
1227          later case DEF is used to replace that operand.  (It is defined by a
1228          pattern statement we created for the previous statement in the
1229          sequence).
1230 
1231    Input/output:
1232    NEW_TYPE - Output: a smaller type that we are trying to use.  Input: if not
1233          NULL, it's the type of DEF.
1234    STMTS - additional pattern statements.  If a pattern statement (type
1235          conversion) is created in this function, its original statement is
1236          added to STMTS.
1237 
1238    Output:
1239    OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1240          operands to use in the new pattern statement for STMT (will be created
1241          in vect_recog_over_widening_pattern ()).
1242    NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1243          statements for STMT: the first one is a type promotion and the second
1244          one is the operation itself.  We return the type promotion statement
1245 	 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1246          the second pattern statement.  */
1247 
1248 static bool
vect_operation_fits_smaller_type(gimple * stmt,tree def,tree * new_type,tree * op0,tree * op1,gimple ** new_def_stmt,vec<gimple * > * stmts)1249 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1250 				  tree *op0, tree *op1, gimple **new_def_stmt,
1251 				  vec<gimple *> *stmts)
1252 {
1253   enum tree_code code;
1254   tree const_oprnd, oprnd;
1255   tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1256   gimple *def_stmt, *new_stmt;
1257   bool first = false;
1258   bool promotion;
1259 
1260   *op0 = NULL_TREE;
1261   *op1 = NULL_TREE;
1262   *new_def_stmt = NULL;
1263 
1264   if (!is_gimple_assign (stmt))
1265     return false;
1266 
1267   code = gimple_assign_rhs_code (stmt);
1268   if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1269       && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1270     return false;
1271 
1272   oprnd = gimple_assign_rhs1 (stmt);
1273   const_oprnd = gimple_assign_rhs2 (stmt);
1274   type = gimple_expr_type (stmt);
1275 
1276   if (TREE_CODE (oprnd) != SSA_NAME
1277       || TREE_CODE (const_oprnd) != INTEGER_CST)
1278     return false;
1279 
1280   /* If oprnd has other uses besides that in stmt we cannot mark it
1281      as being part of a pattern only.  */
1282   if (!has_single_use (oprnd))
1283     return false;
1284 
1285   /* If we are in the middle of a sequence, we use DEF from a previous
1286      statement.  Otherwise, OPRND has to be a result of type promotion.  */
1287   if (*new_type)
1288     {
1289       half_type = *new_type;
1290       oprnd = def;
1291     }
1292   else
1293     {
1294       first = true;
1295       if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1296 			      &promotion)
1297 	  || !promotion
1298 	  || !vect_same_loop_or_bb_p (stmt, def_stmt))
1299         return false;
1300     }
1301 
1302   /* Can we perform the operation on a smaller type?  */
1303   switch (code)
1304     {
1305       case BIT_IOR_EXPR:
1306       case BIT_XOR_EXPR:
1307       case BIT_AND_EXPR:
1308         if (!int_fits_type_p (const_oprnd, half_type))
1309           {
1310             /* HALF_TYPE is not enough.  Try a bigger type if possible.  */
1311             if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1312               return false;
1313 
1314             interm_type = build_nonstandard_integer_type (
1315                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1316             if (!int_fits_type_p (const_oprnd, interm_type))
1317               return false;
1318           }
1319 
1320         break;
1321 
1322       case LSHIFT_EXPR:
1323         /* Try intermediate type - HALF_TYPE is not enough for sure.  */
1324         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1325           return false;
1326 
1327         /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1328           (e.g., if the original value was char, the shift amount is at most 8
1329            if we want to use short).  */
1330         if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1331           return false;
1332 
1333         interm_type = build_nonstandard_integer_type (
1334                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1335 
1336         if (!vect_supportable_shift (code, interm_type))
1337           return false;
1338 
1339         break;
1340 
1341       case RSHIFT_EXPR:
1342         if (vect_supportable_shift (code, half_type))
1343           break;
1344 
1345         /* Try intermediate type - HALF_TYPE is not supported.  */
1346         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1347           return false;
1348 
1349         interm_type = build_nonstandard_integer_type (
1350                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1351 
1352         if (!vect_supportable_shift (code, interm_type))
1353           return false;
1354 
1355         break;
1356 
1357       default:
1358         gcc_unreachable ();
1359     }
1360 
1361   /* There are four possible cases:
1362      1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1363         the first statement in the sequence)
1364         a. The original, HALF_TYPE, is not enough - we replace the promotion
1365            from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1366         b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1367            promotion.
1368      2. OPRND is defined by a pattern statement we created.
1369         a. Its type is not sufficient for the operation, we create a new stmt:
1370            a type conversion for OPRND from HALF_TYPE to INTERM_TYPE.  We store
1371            this statement in NEW_DEF_STMT, and it is later put in
1372 	   STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1373         b. OPRND is good to use in the new statement.  */
1374   if (first)
1375     {
1376       if (interm_type)
1377         {
1378           /* Replace the original type conversion HALF_TYPE->TYPE with
1379              HALF_TYPE->INTERM_TYPE.  */
1380           if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1381             {
1382               new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1383               /* Check if the already created pattern stmt is what we need.  */
1384               if (!is_gimple_assign (new_stmt)
1385                   || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1386                   || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1387                 return false;
1388 
1389 	      stmts->safe_push (def_stmt);
1390               oprnd = gimple_assign_lhs (new_stmt);
1391             }
1392           else
1393             {
1394               /* Create NEW_OPRND = (INTERM_TYPE) OPRND.  */
1395               oprnd = gimple_assign_rhs1 (def_stmt);
1396 	      new_oprnd = make_ssa_name (interm_type);
1397 	      new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1398               STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1399               stmts->safe_push (def_stmt);
1400               oprnd = new_oprnd;
1401             }
1402         }
1403       else
1404         {
1405           /* Retrieve the operand before the type promotion.  */
1406           oprnd = gimple_assign_rhs1 (def_stmt);
1407         }
1408     }
1409   else
1410     {
1411       if (interm_type)
1412         {
1413           /* Create a type conversion HALF_TYPE->INTERM_TYPE.  */
1414 	  new_oprnd = make_ssa_name (interm_type);
1415 	  new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1416           oprnd = new_oprnd;
1417           *new_def_stmt = new_stmt;
1418         }
1419 
1420       /* Otherwise, OPRND is already set.  */
1421     }
1422 
1423   if (interm_type)
1424     *new_type = interm_type;
1425   else
1426     *new_type = half_type;
1427 
1428   *op0 = oprnd;
1429   *op1 = fold_convert (*new_type, const_oprnd);
1430 
1431   return true;
1432 }
1433 
1434 
1435 /* Try to find a statement or a sequence of statements that can be performed
1436    on a smaller type:
1437 
1438      type x_t;
1439      TYPE x_T, res0_T, res1_T;
1440    loop:
1441      S1  x_t = *p;
1442      S2  x_T = (TYPE) x_t;
1443      S3  res0_T = op (x_T, C0);
1444      S4  res1_T = op (res0_T, C1);
1445      S5  ... = () res1_T;  - type demotion
1446 
1447    where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1448    constants.
1449    Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1450    be 'type' or some intermediate type.  For now, we expect S5 to be a type
1451    demotion operation.  We also check that S3 and S4 have only one use.  */
1452 
1453 static gimple *
vect_recog_over_widening_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1454 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1455                                   tree *type_in, tree *type_out)
1456 {
1457   gimple *stmt = stmts->pop ();
1458   gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1459 	 *use_stmt = NULL;
1460   tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1461   tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1462   bool first;
1463   tree type = NULL;
1464 
1465   first = true;
1466   while (1)
1467     {
1468       if (!vinfo_for_stmt (stmt)
1469           || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1470         return NULL;
1471 
1472       new_def_stmt = NULL;
1473       if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1474                                              &op0, &op1, &new_def_stmt,
1475                                              stmts))
1476         {
1477           if (first)
1478             return NULL;
1479           else
1480             break;
1481         }
1482 
1483       /* STMT can be performed on a smaller type.  Check its uses.  */
1484       use_stmt = vect_single_imm_use (stmt);
1485       if (!use_stmt || !is_gimple_assign (use_stmt))
1486         return NULL;
1487 
1488       /* Create pattern statement for STMT.  */
1489       vectype = get_vectype_for_scalar_type (new_type);
1490       if (!vectype)
1491         return NULL;
1492 
1493       /* We want to collect all the statements for which we create pattern
1494          statetments, except for the case when the last statement in the
1495          sequence doesn't have a corresponding pattern statement.  In such
1496          case we associate the last pattern statement with the last statement
1497          in the sequence.  Therefore, we only add the original statement to
1498          the list if we know that it is not the last.  */
1499       if (prev_stmt)
1500         stmts->safe_push (prev_stmt);
1501 
1502       var = vect_recog_temp_ssa_var (new_type, NULL);
1503       pattern_stmt
1504 	= gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1505       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1506       new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1507 
1508       if (dump_enabled_p ())
1509         {
1510           dump_printf_loc (MSG_NOTE, vect_location,
1511                            "created pattern stmt: ");
1512           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1513           dump_printf (MSG_NOTE, "\n");
1514         }
1515 
1516       type = gimple_expr_type (stmt);
1517       prev_stmt = stmt;
1518       stmt = use_stmt;
1519 
1520       first = false;
1521     }
1522 
1523   /* We got a sequence.  We expect it to end with a type demotion operation.
1524      Otherwise, we quit (for now).  There are three possible cases: the
1525      conversion is to NEW_TYPE (we don't do anything), the conversion is to
1526      a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1527      NEW_TYPE differs (we create a new conversion statement).  */
1528   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1529     {
1530       use_lhs = gimple_assign_lhs (use_stmt);
1531       use_type = TREE_TYPE (use_lhs);
1532       /* Support only type demotion or signedess change.  */
1533       if (!INTEGRAL_TYPE_P (use_type)
1534 	  || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1535         return NULL;
1536 
1537       /* Check that NEW_TYPE is not bigger than the conversion result.  */
1538       if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1539 	return NULL;
1540 
1541       if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1542           || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1543         {
1544           /* Create NEW_TYPE->USE_TYPE conversion.  */
1545 	  new_oprnd = make_ssa_name (use_type);
1546 	  pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1547           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1548 
1549           *type_in = get_vectype_for_scalar_type (new_type);
1550           *type_out = get_vectype_for_scalar_type (use_type);
1551 
1552           /* We created a pattern statement for the last statement in the
1553              sequence, so we don't need to associate it with the pattern
1554              statement created for PREV_STMT.  Therefore, we add PREV_STMT
1555              to the list in order to mark it later in vect_pattern_recog_1.  */
1556           if (prev_stmt)
1557             stmts->safe_push (prev_stmt);
1558         }
1559       else
1560         {
1561           if (prev_stmt)
1562 	    STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1563 	       = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1564 
1565           *type_in = vectype;
1566           *type_out = NULL_TREE;
1567         }
1568 
1569       stmts->safe_push (use_stmt);
1570     }
1571   else
1572     /* TODO: support general case, create a conversion to the correct type.  */
1573     return NULL;
1574 
1575   /* Pattern detected.  */
1576   if (dump_enabled_p ())
1577     {
1578       dump_printf_loc (MSG_NOTE, vect_location,
1579                        "vect_recog_over_widening_pattern: detected: ");
1580       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1581       dump_printf (MSG_NOTE, "\n");
1582     }
1583 
1584   return pattern_stmt;
1585 }
1586 
1587 /* Detect widening shift pattern:
1588 
1589    type a_t;
1590    TYPE a_T, res_T;
1591 
1592    S1 a_t = ;
1593    S2 a_T = (TYPE) a_t;
1594    S3 res_T = a_T << CONST;
1595 
1596   where type 'TYPE' is at least double the size of type 'type'.
1597 
1598   Also detect cases where the shift result is immediately converted
1599   to another type 'result_type' that is no larger in size than 'TYPE'.
1600   In those cases we perform a widen-shift that directly results in
1601   'result_type', to avoid a possible over-widening situation:
1602 
1603   type a_t;
1604   TYPE a_T, res_T;
1605   result_type res_result;
1606 
1607   S1 a_t = ;
1608   S2 a_T = (TYPE) a_t;
1609   S3 res_T = a_T << CONST;
1610   S4 res_result = (result_type) res_T;
1611       '--> res_result' = a_t w<< CONST;
1612 
1613   And a case when 'TYPE' is 4 times bigger than 'type'.  In that case we
1614   create an additional pattern stmt for S2 to create a variable of an
1615   intermediate type, and perform widen-shift on the intermediate type:
1616 
1617   type a_t;
1618   interm_type a_it;
1619   TYPE a_T, res_T, res_T';
1620 
1621   S1 a_t = ;
1622   S2 a_T = (TYPE) a_t;
1623       '--> a_it = (interm_type) a_t;
1624   S3 res_T = a_T << CONST;
1625       '--> res_T' = a_it <<* CONST;
1626 
1627   Input/Output:
1628 
1629   * STMTS: Contains a stmt from which the pattern search begins.
1630     In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1631     in STMTS.  When an intermediate type is used and a pattern statement is
1632     created for S2, we also put S2 here (before S3).
1633 
1634   Output:
1635 
1636   * TYPE_IN: The type of the input arguments to the pattern.
1637 
1638   * TYPE_OUT: The type of the output of this pattern.
1639 
1640   * Return value: A new stmt that will be used to replace the sequence of
1641     stmts that constitute the pattern.  In this case it will be:
1642     WIDEN_LSHIFT_EXPR <a_t, CONST>.  */
1643 
1644 static gimple *
vect_recog_widen_shift_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1645 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1646 				tree *type_in, tree *type_out)
1647 {
1648   gimple *last_stmt = stmts->pop ();
1649   gimple *def_stmt0;
1650   tree oprnd0, oprnd1;
1651   tree type, half_type0;
1652   gimple *pattern_stmt;
1653   tree vectype, vectype_out = NULL_TREE;
1654   tree var;
1655   enum tree_code dummy_code;
1656   int dummy_int;
1657   vec<tree>  dummy_vec;
1658   gimple *use_stmt;
1659   bool promotion;
1660 
1661   if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1662     return NULL;
1663 
1664   if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1665     return NULL;
1666 
1667   if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1668     return NULL;
1669 
1670   oprnd0 = gimple_assign_rhs1 (last_stmt);
1671   oprnd1 = gimple_assign_rhs2 (last_stmt);
1672   if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1673     return NULL;
1674 
1675   /* Check operand 0: it has to be defined by a type promotion.  */
1676   if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1677 			  &promotion)
1678       || !promotion)
1679      return NULL;
1680 
1681   /* Check operand 1: has to be positive.  We check that it fits the type
1682      in vect_handle_widen_op_by_const ().  */
1683   if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1684     return NULL;
1685 
1686   oprnd0 = gimple_assign_rhs1 (def_stmt0);
1687   type = gimple_expr_type (last_stmt);
1688 
1689   /* Check for subsequent conversion to another type.  */
1690   use_stmt = vect_single_imm_use (last_stmt);
1691   if (use_stmt && is_gimple_assign (use_stmt)
1692       && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1693       && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1694     {
1695       tree use_lhs = gimple_assign_lhs (use_stmt);
1696       tree use_type = TREE_TYPE (use_lhs);
1697 
1698       if (INTEGRAL_TYPE_P (use_type)
1699 	  && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1700 	{
1701 	  last_stmt = use_stmt;
1702 	  type = use_type;
1703 	}
1704     }
1705 
1706   /* Check if this a widening operation.  */
1707   gimple *wstmt = NULL;
1708   if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1709        				      &oprnd0, &wstmt,
1710 	                              type, &half_type0, def_stmt0))
1711     return NULL;
1712 
1713   /* Pattern detected.  */
1714   if (dump_enabled_p ())
1715     dump_printf_loc (MSG_NOTE, vect_location,
1716                      "vect_recog_widen_shift_pattern: detected:\n");
1717 
1718   /* Check target support.  */
1719   vectype = get_vectype_for_scalar_type (half_type0);
1720   vectype_out = get_vectype_for_scalar_type (type);
1721 
1722   if (!vectype
1723       || !vectype_out
1724       || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1725 					  vectype_out, vectype,
1726 					  &dummy_code, &dummy_code,
1727 					  &dummy_int, &dummy_vec))
1728     return NULL;
1729 
1730   *type_in = vectype;
1731   *type_out = vectype_out;
1732 
1733   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1734   var = vect_recog_temp_ssa_var (type, NULL);
1735   pattern_stmt =
1736     gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1737   if (wstmt)
1738     {
1739       stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1740       new_pattern_def_seq (stmt_vinfo, wstmt);
1741       stmt_vec_info new_stmt_info
1742 	= new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1743       set_vinfo_for_stmt (wstmt, new_stmt_info);
1744       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1745     }
1746 
1747   if (dump_enabled_p ())
1748     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1749 
1750   stmts->safe_push (last_stmt);
1751   return pattern_stmt;
1752 }
1753 
1754 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1755 
1756    type a_t, b_t, c_t;
1757 
1758    S0 a_t = b_t r<< c_t;
1759 
1760   Input/Output:
1761 
1762   * STMTS: Contains a stmt from which the pattern search begins,
1763     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
1764     with a sequence:
1765 
1766    S1 d_t = -c_t;
1767    S2 e_t = d_t & (B - 1);
1768    S3 f_t = b_t << c_t;
1769    S4 g_t = b_t >> e_t;
1770    S0 a_t = f_t | g_t;
1771 
1772     where B is element bitsize of type.
1773 
1774   Output:
1775 
1776   * TYPE_IN: The type of the input arguments to the pattern.
1777 
1778   * TYPE_OUT: The type of the output of this pattern.
1779 
1780   * Return value: A new stmt that will be used to replace the rotate
1781     S0 stmt.  */
1782 
1783 static gimple *
vect_recog_rotate_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1784 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1785 {
1786   gimple *last_stmt = stmts->pop ();
1787   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1788   gimple *pattern_stmt, *def_stmt;
1789   enum tree_code rhs_code;
1790   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1791   vec_info *vinfo = stmt_vinfo->vinfo;
1792   enum vect_def_type dt;
1793   optab optab1, optab2;
1794   edge ext_def = NULL;
1795 
1796   if (!is_gimple_assign (last_stmt))
1797     return NULL;
1798 
1799   rhs_code = gimple_assign_rhs_code (last_stmt);
1800   switch (rhs_code)
1801     {
1802     case LROTATE_EXPR:
1803     case RROTATE_EXPR:
1804       break;
1805     default:
1806       return NULL;
1807     }
1808 
1809   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1810     return NULL;
1811 
1812   lhs = gimple_assign_lhs (last_stmt);
1813   oprnd0 = gimple_assign_rhs1 (last_stmt);
1814   type = TREE_TYPE (oprnd0);
1815   oprnd1 = gimple_assign_rhs2 (last_stmt);
1816   if (TREE_CODE (oprnd0) != SSA_NAME
1817       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1818       || !INTEGRAL_TYPE_P (type)
1819       || !TYPE_UNSIGNED (type))
1820     return NULL;
1821 
1822   if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1823     return NULL;
1824 
1825   if (dt != vect_internal_def
1826       && dt != vect_constant_def
1827       && dt != vect_external_def)
1828     return NULL;
1829 
1830   vectype = get_vectype_for_scalar_type (type);
1831   if (vectype == NULL_TREE)
1832     return NULL;
1833 
1834   /* If vector/vector or vector/scalar rotate is supported by the target,
1835      don't do anything here.  */
1836   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1837   if (optab1
1838       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1839     return NULL;
1840 
1841   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1842     {
1843       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1844       if (optab2
1845 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1846 	return NULL;
1847     }
1848 
1849   /* If vector/vector or vector/scalar shifts aren't supported by the target,
1850      don't do anything here either.  */
1851   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1852   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1853   if (!optab1
1854       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1855       || !optab2
1856       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1857     {
1858       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1859 	return NULL;
1860       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1861       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1862       if (!optab1
1863 	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1864 	  || !optab2
1865 	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1866 	return NULL;
1867     }
1868 
1869   *type_in = vectype;
1870   *type_out = vectype;
1871   if (*type_in == NULL_TREE)
1872     return NULL;
1873 
1874   if (dt == vect_external_def
1875       && TREE_CODE (oprnd1) == SSA_NAME
1876       && is_a <loop_vec_info> (vinfo))
1877     {
1878       struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1879       ext_def = loop_preheader_edge (loop);
1880       if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1881 	{
1882 	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1883 	  if (bb == NULL
1884 	      || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1885 	    ext_def = NULL;
1886 	}
1887     }
1888 
1889   def = NULL_TREE;
1890   if (TREE_CODE (oprnd1) == INTEGER_CST
1891       || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1892     def = oprnd1;
1893   else if (def_stmt && gimple_assign_cast_p (def_stmt))
1894     {
1895       tree rhs1 = gimple_assign_rhs1 (def_stmt);
1896       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1897 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
1898 	     == TYPE_PRECISION (type))
1899 	def = rhs1;
1900     }
1901 
1902   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1903   if (def == NULL_TREE)
1904     {
1905       def = vect_recog_temp_ssa_var (type, NULL);
1906       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1907       if (ext_def)
1908 	{
1909 	  basic_block new_bb
1910 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1911 	  gcc_assert (!new_bb);
1912 	}
1913       else
1914 	append_pattern_def_seq (stmt_vinfo, def_stmt);
1915     }
1916   stype = TREE_TYPE (def);
1917 
1918   if (TREE_CODE (def) == INTEGER_CST)
1919     {
1920       if (!tree_fits_uhwi_p (def)
1921 	  || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1922 	  || integer_zerop (def))
1923 	return NULL;
1924       def2 = build_int_cst (stype,
1925 			    GET_MODE_PRECISION (TYPE_MODE (type))
1926 			    - tree_to_uhwi (def));
1927     }
1928   else
1929     {
1930       tree vecstype = get_vectype_for_scalar_type (stype);
1931       stmt_vec_info def_stmt_vinfo;
1932 
1933       if (vecstype == NULL_TREE)
1934 	return NULL;
1935       def2 = vect_recog_temp_ssa_var (stype, NULL);
1936       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1937       if (ext_def)
1938 	{
1939 	  basic_block new_bb
1940 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1941 	  gcc_assert (!new_bb);
1942 	}
1943       else
1944 	{
1945 	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1946 	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1947 	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1948 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1949 	}
1950 
1951       def2 = vect_recog_temp_ssa_var (stype, NULL);
1952       tree mask
1953 	= build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1954       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1955 				      gimple_assign_lhs (def_stmt), mask);
1956       if (ext_def)
1957 	{
1958 	  basic_block new_bb
1959 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1960 	  gcc_assert (!new_bb);
1961 	}
1962       else
1963 	{
1964 	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1965 	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1966 	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1967 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
1968 	}
1969     }
1970 
1971   var1 = vect_recog_temp_ssa_var (type, NULL);
1972   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1973 					? LSHIFT_EXPR : RSHIFT_EXPR,
1974 				  oprnd0, def);
1975   append_pattern_def_seq (stmt_vinfo, def_stmt);
1976 
1977   var2 = vect_recog_temp_ssa_var (type, NULL);
1978   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1979 					? RSHIFT_EXPR : LSHIFT_EXPR,
1980 				  oprnd0, def2);
1981   append_pattern_def_seq (stmt_vinfo, def_stmt);
1982 
1983   /* Pattern detected.  */
1984   if (dump_enabled_p ())
1985     dump_printf_loc (MSG_NOTE, vect_location,
1986 		     "vect_recog_rotate_pattern: detected:\n");
1987 
1988   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1989   var = vect_recog_temp_ssa_var (type, NULL);
1990   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1991 
1992   if (dump_enabled_p ())
1993     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1994 
1995   stmts->safe_push (last_stmt);
1996   return pattern_stmt;
1997 }
1998 
1999 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2000    vectorized:
2001 
2002    type a_t;
2003    TYPE b_T, res_T;
2004 
2005    S1 a_t = ;
2006    S2 b_T = ;
2007    S3 res_T = b_T op a_t;
2008 
2009   where type 'TYPE' is a type with different size than 'type',
2010   and op is <<, >> or rotate.
2011 
2012   Also detect cases:
2013 
2014    type a_t;
2015    TYPE b_T, c_T, res_T;
2016 
2017    S0 c_T = ;
2018    S1 a_t = (type) c_T;
2019    S2 b_T = ;
2020    S3 res_T = b_T op a_t;
2021 
2022   Input/Output:
2023 
2024   * STMTS: Contains a stmt from which the pattern search begins,
2025     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
2026     with a shift/rotate which has same type on both operands, in the
2027     second case just b_T op c_T, in the first case with added cast
2028     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2029 
2030   Output:
2031 
2032   * TYPE_IN: The type of the input arguments to the pattern.
2033 
2034   * TYPE_OUT: The type of the output of this pattern.
2035 
2036   * Return value: A new stmt that will be used to replace the shift/rotate
2037     S3 stmt.  */
2038 
2039 static gimple *
vect_recog_vector_vector_shift_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2040 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2041 					tree *type_in, tree *type_out)
2042 {
2043   gimple *last_stmt = stmts->pop ();
2044   tree oprnd0, oprnd1, lhs, var;
2045   gimple *pattern_stmt, *def_stmt;
2046   enum tree_code rhs_code;
2047   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2048   vec_info *vinfo = stmt_vinfo->vinfo;
2049   enum vect_def_type dt;
2050 
2051   if (!is_gimple_assign (last_stmt))
2052     return NULL;
2053 
2054   rhs_code = gimple_assign_rhs_code (last_stmt);
2055   switch (rhs_code)
2056     {
2057     case LSHIFT_EXPR:
2058     case RSHIFT_EXPR:
2059     case LROTATE_EXPR:
2060     case RROTATE_EXPR:
2061       break;
2062     default:
2063       return NULL;
2064     }
2065 
2066   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2067     return NULL;
2068 
2069   lhs = gimple_assign_lhs (last_stmt);
2070   oprnd0 = gimple_assign_rhs1 (last_stmt);
2071   oprnd1 = gimple_assign_rhs2 (last_stmt);
2072   if (TREE_CODE (oprnd0) != SSA_NAME
2073       || TREE_CODE (oprnd1) != SSA_NAME
2074       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2075       || TYPE_PRECISION (TREE_TYPE (oprnd1))
2076 	 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2077       || TYPE_PRECISION (TREE_TYPE (lhs))
2078 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2079     return NULL;
2080 
2081   if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2082     return NULL;
2083 
2084   if (dt != vect_internal_def)
2085     return NULL;
2086 
2087   *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2088   *type_out = *type_in;
2089   if (*type_in == NULL_TREE)
2090     return NULL;
2091 
2092   tree def = NULL_TREE;
2093   stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2094   if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2095     {
2096       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2097       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2098 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2099 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2100 	{
2101 	  if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2102 	      >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2103 	    def = rhs1;
2104 	  else
2105 	    {
2106 	      tree mask
2107 		= build_low_bits_mask (TREE_TYPE (rhs1),
2108 				       TYPE_PRECISION (TREE_TYPE (oprnd1)));
2109 	      def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2110 	      def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2111 	      new_pattern_def_seq (stmt_vinfo, def_stmt);
2112 	    }
2113 	}
2114     }
2115 
2116   if (def == NULL_TREE)
2117     {
2118       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2119       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2120       new_pattern_def_seq (stmt_vinfo, def_stmt);
2121     }
2122 
2123   /* Pattern detected.  */
2124   if (dump_enabled_p ())
2125     dump_printf_loc (MSG_NOTE, vect_location,
2126                      "vect_recog_vector_vector_shift_pattern: detected:\n");
2127 
2128   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2129   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2130   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2131 
2132   if (dump_enabled_p ())
2133     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2134 
2135   stmts->safe_push (last_stmt);
2136   return pattern_stmt;
2137 }
2138 
2139 /* Detect multiplication by constant which are postive or negatives of power 2,
2140    and convert them to shift patterns.
2141 
2142    Mult with constants that are postive power of two.
2143    type a_t;
2144    type b_t
2145    S1: b_t = a_t * n
2146 
2147    or
2148 
2149    Mult with constants that are negative power of two.
2150    S2: b_t = a_t * -n
2151 
2152    Input/Output:
2153 
2154    STMTS: Contains a stmt from which the pattern search begins,
2155    i.e. the mult stmt.  Convert the mult operation to LSHIFT if
2156    constant operand is a power of 2.
2157    type a_t, b_t
2158    S1': b_t = a_t << log2 (n)
2159 
2160    Convert the mult operation to LSHIFT and followed by a NEGATE
2161    if constant operand is a negative power of 2.
2162    type a_t, b_t, res_T;
2163    S2': b_t = a_t << log2 (n)
2164    S3': res_T  = - (b_t)
2165 
2166  Output:
2167 
2168   * TYPE_IN: The type of the input arguments to the pattern.
2169 
2170   * TYPE_OUT: The type of the output of this pattern.
2171 
2172   * Return value: A new stmt that will be used to replace the multiplication
2173     S1 or S2 stmt.  */
2174 
2175 static gimple *
vect_recog_mult_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2176 vect_recog_mult_pattern (vec<gimple *> *stmts,
2177 			 tree *type_in, tree *type_out)
2178 {
2179   gimple *last_stmt = stmts->pop ();
2180   tree oprnd0, oprnd1, vectype, itype;
2181   gimple *pattern_stmt, *def_stmt;
2182   optab optab;
2183   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2184   int power2_val, power2_neg_val;
2185   tree shift;
2186 
2187   if (!is_gimple_assign (last_stmt))
2188     return NULL;
2189 
2190   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2191     return NULL;
2192 
2193   oprnd0 = gimple_assign_rhs1 (last_stmt);
2194   oprnd1 = gimple_assign_rhs2 (last_stmt);
2195   itype = TREE_TYPE (oprnd0);
2196 
2197   if (TREE_CODE (oprnd0) != SSA_NAME
2198       || TREE_CODE (oprnd1) != INTEGER_CST
2199       || !INTEGRAL_TYPE_P (itype)
2200       || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2201     return NULL;
2202 
2203   vectype = get_vectype_for_scalar_type (itype);
2204   if (vectype == NULL_TREE)
2205     return NULL;
2206 
2207   /* If the target can handle vectorized multiplication natively,
2208      don't attempt to optimize this.  */
2209   optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2210   if (optab != unknown_optab)
2211     {
2212       machine_mode vec_mode = TYPE_MODE (vectype);
2213       int icode = (int) optab_handler (optab, vec_mode);
2214       if (icode != CODE_FOR_nothing)
2215 	return NULL;
2216     }
2217 
2218   /* If target cannot handle vector left shift then we cannot
2219      optimize and bail out.  */
2220   optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2221   if (!optab
2222       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2223     return NULL;
2224 
2225   power2_val = wi::exact_log2 (oprnd1);
2226   power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
2227 
2228   /* Handle constant operands that are postive or negative powers of 2.  */
2229   if (power2_val != -1)
2230     {
2231       shift = build_int_cst (itype, power2_val);
2232       pattern_stmt
2233 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2234 			       LSHIFT_EXPR, oprnd0, shift);
2235     }
2236   else if (power2_neg_val != -1)
2237     {
2238       /* If the target cannot handle vector NEGATE then we cannot
2239 	 do the optimization.  */
2240       optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
2241       if (!optab
2242 	  || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2243 	return NULL;
2244 
2245       shift = build_int_cst (itype, power2_neg_val);
2246       def_stmt
2247 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2248 			       LSHIFT_EXPR, oprnd0, shift);
2249       new_pattern_def_seq (stmt_vinfo, def_stmt);
2250       pattern_stmt
2251 	 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2252 				NEGATE_EXPR, gimple_assign_lhs (def_stmt));
2253     }
2254   else
2255     return NULL;
2256 
2257   /* Pattern detected.  */
2258   if (dump_enabled_p ())
2259     dump_printf_loc (MSG_NOTE, vect_location,
2260 		     "vect_recog_mult_pattern: detected:\n");
2261 
2262   if (dump_enabled_p ())
2263     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2264 			  pattern_stmt,0);
2265 
2266   stmts->safe_push (last_stmt);
2267   *type_in = vectype;
2268   *type_out = vectype;
2269 
2270   return pattern_stmt;
2271 }
2272 
2273 /* Detect a signed division by a constant that wouldn't be
2274    otherwise vectorized:
2275 
2276    type a_t, b_t;
2277 
2278    S1 a_t = b_t / N;
2279 
2280   where type 'type' is an integral type and N is a constant.
2281 
2282   Similarly handle modulo by a constant:
2283 
2284    S4 a_t = b_t % N;
2285 
2286   Input/Output:
2287 
2288   * STMTS: Contains a stmt from which the pattern search begins,
2289     i.e. the division stmt.  S1 is replaced by if N is a power
2290     of two constant and type is signed:
2291   S3  y_t = b_t < 0 ? N - 1 : 0;
2292   S2  x_t = b_t + y_t;
2293   S1' a_t = x_t >> log2 (N);
2294 
2295     S4 is replaced if N is a power of two constant and
2296     type is signed by (where *_T temporaries have unsigned type):
2297   S9  y_T = b_t < 0 ? -1U : 0U;
2298   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2299   S7  z_t = (type) z_T;
2300   S6  w_t = b_t + z_t;
2301   S5  x_t = w_t & (N - 1);
2302   S4' a_t = x_t - z_t;
2303 
2304   Output:
2305 
2306   * TYPE_IN: The type of the input arguments to the pattern.
2307 
2308   * TYPE_OUT: The type of the output of this pattern.
2309 
2310   * Return value: A new stmt that will be used to replace the division
2311     S1 or modulo S4 stmt.  */
2312 
2313 static gimple *
vect_recog_divmod_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2314 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2315 			   tree *type_in, tree *type_out)
2316 {
2317   gimple *last_stmt = stmts->pop ();
2318   tree oprnd0, oprnd1, vectype, itype, cond;
2319   gimple *pattern_stmt, *def_stmt;
2320   enum tree_code rhs_code;
2321   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2322   vec_info *vinfo = stmt_vinfo->vinfo;
2323   optab optab;
2324   tree q;
2325   int dummy_int, prec;
2326   stmt_vec_info def_stmt_vinfo;
2327 
2328   if (!is_gimple_assign (last_stmt))
2329     return NULL;
2330 
2331   rhs_code = gimple_assign_rhs_code (last_stmt);
2332   switch (rhs_code)
2333     {
2334     case TRUNC_DIV_EXPR:
2335     case TRUNC_MOD_EXPR:
2336       break;
2337     default:
2338       return NULL;
2339     }
2340 
2341   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2342     return NULL;
2343 
2344   oprnd0 = gimple_assign_rhs1 (last_stmt);
2345   oprnd1 = gimple_assign_rhs2 (last_stmt);
2346   itype = TREE_TYPE (oprnd0);
2347   if (TREE_CODE (oprnd0) != SSA_NAME
2348       || TREE_CODE (oprnd1) != INTEGER_CST
2349       || TREE_CODE (itype) != INTEGER_TYPE
2350       || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2351     return NULL;
2352 
2353   vectype = get_vectype_for_scalar_type (itype);
2354   if (vectype == NULL_TREE)
2355     return NULL;
2356 
2357   /* If the target can handle vectorized division or modulo natively,
2358      don't attempt to optimize this.  */
2359   optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2360   if (optab != unknown_optab)
2361     {
2362       machine_mode vec_mode = TYPE_MODE (vectype);
2363       int icode = (int) optab_handler (optab, vec_mode);
2364       if (icode != CODE_FOR_nothing)
2365 	return NULL;
2366     }
2367 
2368   prec = TYPE_PRECISION (itype);
2369   if (integer_pow2p (oprnd1))
2370     {
2371       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2372 	return NULL;
2373 
2374       /* Pattern detected.  */
2375       if (dump_enabled_p ())
2376         dump_printf_loc (MSG_NOTE, vect_location,
2377                          "vect_recog_divmod_pattern: detected:\n");
2378 
2379       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2380 		     build_int_cst (itype, 0));
2381       if (rhs_code == TRUNC_DIV_EXPR)
2382 	{
2383 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
2384 	  tree shift;
2385 	  def_stmt
2386 	    = gimple_build_assign (var, COND_EXPR, cond,
2387 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
2388 						build_int_cst (itype, 1)),
2389 				   build_int_cst (itype, 0));
2390 	  new_pattern_def_seq (stmt_vinfo, def_stmt);
2391 	  var = vect_recog_temp_ssa_var (itype, NULL);
2392 	  def_stmt
2393 	    = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2394 				   gimple_assign_lhs (def_stmt));
2395 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2396 
2397 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
2398 	  pattern_stmt
2399 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2400 				   RSHIFT_EXPR, var, shift);
2401 	}
2402       else
2403 	{
2404 	  tree signmask;
2405 	  STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2406 	  if (compare_tree_int (oprnd1, 2) == 0)
2407 	    {
2408 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
2409 	      def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2410 					      build_int_cst (itype, 1),
2411 					      build_int_cst (itype, 0));
2412 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2413 	    }
2414 	  else
2415 	    {
2416 	      tree utype
2417 		= build_nonstandard_integer_type (prec, 1);
2418 	      tree vecutype = get_vectype_for_scalar_type (utype);
2419 	      tree shift
2420 		= build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2421 					- tree_log2 (oprnd1));
2422 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
2423 
2424 	      def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2425 					      build_int_cst (utype, -1),
2426 					      build_int_cst (utype, 0));
2427 	      def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2428 	      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2429 	      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2430 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2431 	      var = vect_recog_temp_ssa_var (utype, NULL);
2432 	      def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2433 					      gimple_assign_lhs (def_stmt),
2434 					      shift);
2435 	      def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2436 	      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2437 	      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2438 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2439 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
2440 	      def_stmt
2441 		= gimple_build_assign (signmask, NOP_EXPR, var);
2442 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2443 	    }
2444 	  def_stmt
2445 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2446 				   PLUS_EXPR, oprnd0, signmask);
2447 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2448 	  def_stmt
2449 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2450 				   BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2451 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
2452 						build_int_cst (itype, 1)));
2453 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2454 
2455 	  pattern_stmt
2456 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2457 				   MINUS_EXPR, gimple_assign_lhs (def_stmt),
2458 				   signmask);
2459 	}
2460 
2461       if (dump_enabled_p ())
2462 	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2463                               0);
2464 
2465       stmts->safe_push (last_stmt);
2466 
2467       *type_in = vectype;
2468       *type_out = vectype;
2469       return pattern_stmt;
2470     }
2471 
2472   if (prec > HOST_BITS_PER_WIDE_INT
2473       || integer_zerop (oprnd1))
2474     return NULL;
2475 
2476   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2477     return NULL;
2478 
2479   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2480 
2481   if (TYPE_UNSIGNED (itype))
2482     {
2483       unsigned HOST_WIDE_INT mh, ml;
2484       int pre_shift, post_shift;
2485       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2486 				  & GET_MODE_MASK (TYPE_MODE (itype)));
2487       tree t1, t2, t3, t4;
2488 
2489       if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2490 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
2491 	return NULL;
2492 
2493       /* Find a suitable multiplier and right shift count
2494 	 instead of multiplying with D.  */
2495       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2496 
2497       /* If the suggested multiplier is more than SIZE bits, we can do better
2498 	 for even divisors, using an initial right shift.  */
2499       if (mh != 0 && (d & 1) == 0)
2500 	{
2501 	  pre_shift = floor_log2 (d & -d);
2502 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2503 				  &ml, &post_shift, &dummy_int);
2504 	  gcc_assert (!mh);
2505 	}
2506       else
2507 	pre_shift = 0;
2508 
2509       if (mh != 0)
2510 	{
2511 	  if (post_shift - 1 >= prec)
2512 	    return NULL;
2513 
2514 	  /* t1 = oprnd0 h* ml;
2515 	     t2 = oprnd0 - t1;
2516 	     t3 = t2 >> 1;
2517 	     t4 = t1 + t3;
2518 	     q = t4 >> (post_shift - 1);  */
2519 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
2520 	  def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2521 					  build_int_cst (itype, ml));
2522 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2523 
2524 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2525 	  def_stmt
2526 	    = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2527 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2528 
2529 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
2530 	  def_stmt
2531 	    = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2532 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2533 
2534 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
2535 	  def_stmt
2536 	    = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2537 
2538 	  if (post_shift != 1)
2539 	    {
2540 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2541 
2542 	      q = vect_recog_temp_ssa_var (itype, NULL);
2543 	      pattern_stmt
2544 		= gimple_build_assign (q, RSHIFT_EXPR, t4,
2545 				       build_int_cst (itype, post_shift - 1));
2546 	    }
2547 	  else
2548 	    {
2549 	      q = t4;
2550 	      pattern_stmt = def_stmt;
2551 	    }
2552 	}
2553       else
2554 	{
2555 	  if (pre_shift >= prec || post_shift >= prec)
2556 	    return NULL;
2557 
2558 	  /* t1 = oprnd0 >> pre_shift;
2559 	     t2 = t1 h* ml;
2560 	     q = t2 >> post_shift;  */
2561 	  if (pre_shift)
2562 	    {
2563 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
2564 	      def_stmt
2565 		= gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2566 				       build_int_cst (NULL, pre_shift));
2567 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2568 	    }
2569 	  else
2570 	    t1 = oprnd0;
2571 
2572 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2573 	  def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2574 					  build_int_cst (itype, ml));
2575 
2576 	  if (post_shift)
2577 	    {
2578 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2579 
2580 	      q = vect_recog_temp_ssa_var (itype, NULL);
2581 	      def_stmt
2582 		= gimple_build_assign (q, RSHIFT_EXPR, t2,
2583 				       build_int_cst (itype, post_shift));
2584 	    }
2585 	  else
2586 	    q = t2;
2587 
2588 	  pattern_stmt = def_stmt;
2589 	}
2590     }
2591   else
2592     {
2593       unsigned HOST_WIDE_INT ml;
2594       int post_shift;
2595       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2596       unsigned HOST_WIDE_INT abs_d;
2597       bool add = false;
2598       tree t1, t2, t3, t4;
2599 
2600       /* Give up for -1.  */
2601       if (d == -1)
2602 	return NULL;
2603 
2604       /* Since d might be INT_MIN, we have to cast to
2605 	 unsigned HOST_WIDE_INT before negating to avoid
2606 	 undefined signed overflow.  */
2607       abs_d = (d >= 0
2608 	       ? (unsigned HOST_WIDE_INT) d
2609 	       : - (unsigned HOST_WIDE_INT) d);
2610 
2611       /* n rem d = n rem -d */
2612       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2613 	{
2614 	  d = abs_d;
2615 	  oprnd1 = build_int_cst (itype, abs_d);
2616 	}
2617       else if (HOST_BITS_PER_WIDE_INT >= prec
2618 	       && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2619 	/* This case is not handled correctly below.  */
2620 	return NULL;
2621 
2622       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2623       if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2624 	{
2625 	  add = true;
2626 	  ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2627 	}
2628       if (post_shift >= prec)
2629 	return NULL;
2630 
2631       /* t1 = oprnd0 h* ml;  */
2632       t1 = vect_recog_temp_ssa_var (itype, NULL);
2633       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2634 				      build_int_cst (itype, ml));
2635 
2636       if (add)
2637 	{
2638 	  /* t2 = t1 + oprnd0;  */
2639 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2640 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2641 	  def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2642 	}
2643       else
2644 	t2 = t1;
2645 
2646       if (post_shift)
2647 	{
2648 	  /* t3 = t2 >> post_shift;  */
2649 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2650 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
2651 	  def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2652 					  build_int_cst (itype, post_shift));
2653 	}
2654       else
2655 	t3 = t2;
2656 
2657       wide_int oprnd0_min, oprnd0_max;
2658       int msb = 1;
2659       if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2660 	{
2661 	  if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2662 	    msb = 0;
2663 	  else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2664 	    msb = -1;
2665 	}
2666 
2667       if (msb == 0 && d >= 0)
2668 	{
2669 	  /* q = t3;  */
2670 	  q = t3;
2671 	  pattern_stmt = def_stmt;
2672 	}
2673       else
2674 	{
2675 	  /* t4 = oprnd0 >> (prec - 1);
2676 	     or if we know from VRP that oprnd0 >= 0
2677 	     t4 = 0;
2678 	     or if we know from VRP that oprnd0 < 0
2679 	     t4 = -1;  */
2680 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2681 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
2682 	  if (msb != 1)
2683 	    def_stmt = gimple_build_assign (t4, INTEGER_CST,
2684 					    build_int_cst (itype, msb));
2685 	  else
2686 	    def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2687 					    build_int_cst (itype, prec - 1));
2688 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2689 
2690 	  /* q = t3 - t4;  or q = t4 - t3;  */
2691 	  q = vect_recog_temp_ssa_var (itype, NULL);
2692 	  pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2693 					      d < 0 ? t3 : t4);
2694 	}
2695     }
2696 
2697   if (rhs_code == TRUNC_MOD_EXPR)
2698     {
2699       tree r, t1;
2700 
2701       /* We divided.  Now finish by:
2702 	 t1 = q * oprnd1;
2703 	 r = oprnd0 - t1;  */
2704       append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2705 
2706       t1 = vect_recog_temp_ssa_var (itype, NULL);
2707       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2708       append_pattern_def_seq (stmt_vinfo, def_stmt);
2709 
2710       r = vect_recog_temp_ssa_var (itype, NULL);
2711       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2712     }
2713 
2714   /* Pattern detected.  */
2715   if (dump_enabled_p ())
2716     {
2717       dump_printf_loc (MSG_NOTE, vect_location,
2718                        "vect_recog_divmod_pattern: detected: ");
2719       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2720       dump_printf (MSG_NOTE, "\n");
2721     }
2722 
2723   stmts->safe_push (last_stmt);
2724 
2725   *type_in = vectype;
2726   *type_out = vectype;
2727   return pattern_stmt;
2728 }
2729 
2730 /* Function vect_recog_mixed_size_cond_pattern
2731 
2732    Try to find the following pattern:
2733 
2734      type x_t, y_t;
2735      TYPE a_T, b_T, c_T;
2736    loop:
2737      S1  a_T = x_t CMP y_t ? b_T : c_T;
2738 
2739    where type 'TYPE' is an integral type which has different size
2740    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
2741    than 'type', the constants need to fit into an integer type
2742    with the same width as 'type') or results of conversion from 'type'.
2743 
2744    Input:
2745 
2746    * LAST_STMT: A stmt from which the pattern search begins.
2747 
2748    Output:
2749 
2750    * TYPE_IN: The type of the input arguments to the pattern.
2751 
2752    * TYPE_OUT: The type of the output of this pattern.
2753 
2754    * Return value: A new stmt that will be used to replace the pattern.
2755 	Additionally a def_stmt is added.
2756 
2757 	a_it = x_t CMP y_t ? b_it : c_it;
2758 	a_T = (TYPE) a_it;  */
2759 
2760 static gimple *
vect_recog_mixed_size_cond_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2761 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
2762 				    tree *type_out)
2763 {
2764   gimple *last_stmt = (*stmts)[0];
2765   tree cond_expr, then_clause, else_clause;
2766   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2767   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2768   gimple *pattern_stmt, *def_stmt;
2769   vec_info *vinfo = stmt_vinfo->vinfo;
2770   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2771   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
2772   bool promotion;
2773   tree comp_scalar_type;
2774 
2775   if (!is_gimple_assign (last_stmt)
2776       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2777       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2778     return NULL;
2779 
2780   cond_expr = gimple_assign_rhs1 (last_stmt);
2781   then_clause = gimple_assign_rhs2 (last_stmt);
2782   else_clause = gimple_assign_rhs3 (last_stmt);
2783 
2784   if (!COMPARISON_CLASS_P (cond_expr))
2785     return NULL;
2786 
2787   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2788   comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2789   if (comp_vectype == NULL_TREE)
2790     return NULL;
2791 
2792   type = gimple_expr_type (last_stmt);
2793   if (types_compatible_p (type, comp_scalar_type)
2794       || ((TREE_CODE (then_clause) != INTEGER_CST
2795 	   || TREE_CODE (else_clause) != INTEGER_CST)
2796 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
2797       || !INTEGRAL_TYPE_P (type))
2798     return NULL;
2799 
2800   if ((TREE_CODE (then_clause) != INTEGER_CST
2801        && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2802                               &def_stmt0, &promotion))
2803       || (TREE_CODE (else_clause) != INTEGER_CST
2804           && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2805                                  &def_stmt1, &promotion)))
2806     return NULL;
2807 
2808   if (orig_type0 && orig_type1
2809       && !types_compatible_p (orig_type0, orig_type1))
2810     return NULL;
2811 
2812   if (orig_type0)
2813     {
2814       if (!types_compatible_p (orig_type0, comp_scalar_type))
2815 	return NULL;
2816       then_clause = gimple_assign_rhs1 (def_stmt0);
2817       itype = orig_type0;
2818     }
2819 
2820   if (orig_type1)
2821     {
2822       if (!types_compatible_p (orig_type1, comp_scalar_type))
2823 	return NULL;
2824       else_clause = gimple_assign_rhs1 (def_stmt1);
2825       itype = orig_type1;
2826     }
2827 
2828 
2829   HOST_WIDE_INT cmp_mode_size
2830     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
2831 
2832   if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
2833     return NULL;
2834 
2835   vectype = get_vectype_for_scalar_type (type);
2836   if (vectype == NULL_TREE)
2837     return NULL;
2838 
2839   if (expand_vec_cond_expr_p (vectype, comp_vectype))
2840     return NULL;
2841 
2842   if (itype == NULL_TREE)
2843     itype = build_nonstandard_integer_type (cmp_mode_size,
2844   					    TYPE_UNSIGNED (type));
2845 
2846   if (itype == NULL_TREE
2847       || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
2848     return NULL;
2849 
2850   vecitype = get_vectype_for_scalar_type (itype);
2851   if (vecitype == NULL_TREE)
2852     return NULL;
2853 
2854   if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2855     return NULL;
2856 
2857   if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
2858     {
2859       if ((TREE_CODE (then_clause) == INTEGER_CST
2860 	   && !int_fits_type_p (then_clause, itype))
2861 	  || (TREE_CODE (else_clause) == INTEGER_CST
2862 	      && !int_fits_type_p (else_clause, itype)))
2863 	return NULL;
2864     }
2865 
2866   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2867 				  COND_EXPR, unshare_expr (cond_expr),
2868 				  fold_convert (itype, then_clause),
2869 				  fold_convert (itype, else_clause));
2870   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2871 				      NOP_EXPR, gimple_assign_lhs (def_stmt));
2872 
2873   new_pattern_def_seq (stmt_vinfo, def_stmt);
2874   def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
2875   set_vinfo_for_stmt (def_stmt, def_stmt_info);
2876   STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2877   *type_in = vecitype;
2878   *type_out = vectype;
2879 
2880   if (dump_enabled_p ())
2881     dump_printf_loc (MSG_NOTE, vect_location,
2882                      "vect_recog_mixed_size_cond_pattern: detected:\n");
2883 
2884   return pattern_stmt;
2885 }
2886 
2887 
2888 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
2889    true if bool VAR can and should be optimized that way.  Assume it shouldn't
2890    in case it's a result of a comparison which can be directly vectorized into
2891    a vector comparison.  */
2892 
2893 static bool
check_bool_pattern(tree var,vec_info * vinfo)2894 check_bool_pattern (tree var, vec_info *vinfo)
2895 {
2896   gimple *def_stmt;
2897   enum vect_def_type dt;
2898   tree rhs1;
2899   enum tree_code rhs_code;
2900 
2901   if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
2902     return false;
2903 
2904   if (dt != vect_internal_def)
2905     return false;
2906 
2907   if (!is_gimple_assign (def_stmt))
2908     return false;
2909 
2910   if (!has_single_use (var))
2911     return false;
2912 
2913   rhs1 = gimple_assign_rhs1 (def_stmt);
2914   rhs_code = gimple_assign_rhs_code (def_stmt);
2915   switch (rhs_code)
2916     {
2917     case SSA_NAME:
2918       return check_bool_pattern (rhs1, vinfo);
2919 
2920     CASE_CONVERT:
2921       if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2922 	   || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2923 	  && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2924 	return false;
2925       return check_bool_pattern (rhs1, vinfo);
2926 
2927     case BIT_NOT_EXPR:
2928       return check_bool_pattern (rhs1, vinfo);
2929 
2930     case BIT_AND_EXPR:
2931     case BIT_IOR_EXPR:
2932     case BIT_XOR_EXPR:
2933       if (!check_bool_pattern (rhs1, vinfo))
2934 	return false;
2935       return check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo);
2936 
2937     default:
2938       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2939 	{
2940 	  tree vecitype, comp_vectype, mask_type;
2941 
2942 	  /* If the comparison can throw, then is_gimple_condexpr will be
2943 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
2944 	  if (stmt_could_throw_p (def_stmt))
2945 	    return false;
2946 
2947 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2948 	  if (comp_vectype == NULL_TREE)
2949 	    return false;
2950 
2951 	  mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
2952 	  if (mask_type
2953 	      && expand_vec_cmp_expr_p (comp_vectype, mask_type))
2954 	    return false;
2955 
2956 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2957 	    {
2958 	      machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2959 	      tree itype
2960 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2961 	      vecitype = get_vectype_for_scalar_type (itype);
2962 	      if (vecitype == NULL_TREE)
2963 		return false;
2964 	    }
2965 	  else
2966 	    vecitype = comp_vectype;
2967 	  return expand_vec_cond_expr_p (vecitype, comp_vectype);
2968 	}
2969       return false;
2970     }
2971 }
2972 
2973 
2974 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
2975    stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2976    to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT.  */
2977 
2978 static tree
adjust_bool_pattern_cast(tree type,tree var)2979 adjust_bool_pattern_cast (tree type, tree var)
2980 {
2981   stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2982   gimple *cast_stmt, *pattern_stmt;
2983 
2984   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2985   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2986   new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2987   cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2988 				   NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2989   STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2990   return gimple_assign_lhs (cast_stmt);
2991 }
2992 
2993 
2994 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations,
2995    recursively.  VAR is an SSA_NAME that should be transformed from bool
2996    to a wider integer type, OUT_TYPE is the desired final integer type of
2997    the whole pattern, TRUEVAL should be NULL unless optimizing
2998    BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2999    in the then_clause, STMTS is where statements with added pattern stmts
3000    should be pushed to.  */
3001 
3002 static tree
adjust_bool_pattern(tree var,tree out_type,tree trueval,vec<gimple * > * stmts)3003 adjust_bool_pattern (tree var, tree out_type, tree trueval,
3004 		     vec<gimple *> *stmts)
3005 {
3006   gimple *stmt = SSA_NAME_DEF_STMT (var);
3007   enum tree_code rhs_code, def_rhs_code;
3008   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3009   location_t loc;
3010   gimple *pattern_stmt, *def_stmt;
3011 
3012   rhs1 = gimple_assign_rhs1 (stmt);
3013   rhs2 = gimple_assign_rhs2 (stmt);
3014   rhs_code = gimple_assign_rhs_code (stmt);
3015   loc = gimple_location (stmt);
3016   switch (rhs_code)
3017     {
3018     case SSA_NAME:
3019     CASE_CONVERT:
3020       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3021       itype = TREE_TYPE (irhs1);
3022       pattern_stmt
3023 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3024 			       SSA_NAME, irhs1);
3025       break;
3026 
3027     case BIT_NOT_EXPR:
3028       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3029       itype = TREE_TYPE (irhs1);
3030       pattern_stmt
3031 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3032 			       BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3033       break;
3034 
3035     case BIT_AND_EXPR:
3036       /* Try to optimize x = y & (a < b ? 1 : 0); into
3037 	 x = (a < b ? y : 0);
3038 
3039 	 E.g. for:
3040 	   bool a_b, b_b, c_b;
3041 	   TYPE d_T;
3042 
3043 	   S1  a_b = x1 CMP1 y1;
3044 	   S2  b_b = x2 CMP2 y2;
3045 	   S3  c_b = a_b & b_b;
3046 	   S4  d_T = (TYPE) c_b;
3047 
3048 	 we would normally emit:
3049 
3050 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3051 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
3052 	   S3'  c_T = a_T & b_T;
3053 	   S4'  d_T = c_T;
3054 
3055 	 but we can save one stmt by using the
3056 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
3057 	 BIT_AND_EXPR stmt out:
3058 
3059 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3060 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
3061 	   S4'  f_T = c_T;
3062 
3063 	 At least when VEC_COND_EXPR is implemented using masks
3064 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3065 	 computes the comparison masks and ands it, in one case with
3066 	 all ones vector, in the other case with a vector register.
3067 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3068 	 often more expensive.  */
3069       def_stmt = SSA_NAME_DEF_STMT (rhs2);
3070       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3071       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3072 	{
3073 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3074 	  irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3075 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
3076 	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3077 	    {
3078 	      gimple *tstmt;
3079 	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3080 	      irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3081 	      tstmt = stmts->pop ();
3082 	      gcc_assert (tstmt == def_stmt);
3083 	      stmts->quick_push (stmt);
3084 	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3085 		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3086 	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3087 	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3088 	      return irhs2;
3089 	    }
3090 	  else
3091 	    irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3092 	  goto and_ior_xor;
3093 	}
3094       def_stmt = SSA_NAME_DEF_STMT (rhs1);
3095       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3096       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3097 	{
3098 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3099 	  irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3100 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
3101 	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3102 	    {
3103 	      gimple *tstmt;
3104 	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3105 	      irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3106 	      tstmt = stmts->pop ();
3107 	      gcc_assert (tstmt == def_stmt);
3108 	      stmts->quick_push (stmt);
3109 	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3110 		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3111 	      gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3112 	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3113 	      return irhs1;
3114 	    }
3115 	  else
3116 	    irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3117 	  goto and_ior_xor;
3118 	}
3119       /* FALLTHRU */
3120     case BIT_IOR_EXPR:
3121     case BIT_XOR_EXPR:
3122       irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3123       irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3124     and_ior_xor:
3125       if (TYPE_PRECISION (TREE_TYPE (irhs1))
3126 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
3127 	{
3128 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3129 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3130 	  int out_prec = TYPE_PRECISION (out_type);
3131 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3132 	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3133 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3134 	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3135 	  else
3136 	    {
3137 	      irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3138 	      irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3139 	    }
3140 	}
3141       itype = TREE_TYPE (irhs1);
3142       pattern_stmt
3143 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3144 			       rhs_code, irhs1, irhs2);
3145       break;
3146 
3147     default:
3148       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3149       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3150 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3151 	  || (TYPE_PRECISION (TREE_TYPE (rhs1))
3152 	      != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3153 	{
3154 	  machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3155 	  itype
3156 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3157 	}
3158       else
3159 	itype = TREE_TYPE (rhs1);
3160       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3161       if (trueval == NULL_TREE)
3162 	trueval = build_int_cst (itype, 1);
3163       else
3164 	gcc_checking_assert (useless_type_conversion_p (itype,
3165 							TREE_TYPE (trueval)));
3166       pattern_stmt
3167 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3168 			       COND_EXPR, cond_expr, trueval,
3169 			       build_int_cst (itype, 0));
3170       break;
3171     }
3172 
3173   stmts->safe_push (stmt);
3174   gimple_set_location (pattern_stmt, loc);
3175   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3176   return gimple_assign_lhs (pattern_stmt);
3177 }
3178 
3179 
3180 /* Helper for search_type_for_mask.  */
3181 
3182 static tree
search_type_for_mask_1(tree var,vec_info * vinfo,hash_map<gimple *,tree> & cache)3183 search_type_for_mask_1 (tree var, vec_info *vinfo,
3184 			hash_map<gimple *, tree> &cache)
3185 {
3186   gimple *def_stmt;
3187   enum vect_def_type dt;
3188   tree rhs1;
3189   enum tree_code rhs_code;
3190   tree res = NULL_TREE, res2;
3191 
3192   if (TREE_CODE (var) != SSA_NAME)
3193     return NULL_TREE;
3194 
3195   if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3196        || !TYPE_UNSIGNED (TREE_TYPE (var)))
3197       && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3198     return NULL_TREE;
3199 
3200   if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3201     return NULL_TREE;
3202 
3203   if (dt != vect_internal_def)
3204     return NULL_TREE;
3205 
3206   if (!is_gimple_assign (def_stmt))
3207     return NULL_TREE;
3208 
3209   tree *c = cache.get (def_stmt);
3210   if (c)
3211     return *c;
3212 
3213   rhs_code = gimple_assign_rhs_code (def_stmt);
3214   rhs1 = gimple_assign_rhs1 (def_stmt);
3215 
3216   switch (rhs_code)
3217     {
3218     case SSA_NAME:
3219     case BIT_NOT_EXPR:
3220     CASE_CONVERT:
3221       res = search_type_for_mask_1 (rhs1, vinfo, cache);
3222       break;
3223 
3224     case BIT_AND_EXPR:
3225     case BIT_IOR_EXPR:
3226     case BIT_XOR_EXPR:
3227       res = search_type_for_mask_1 (rhs1, vinfo, cache);
3228       res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3229 				     cache);
3230       if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3231 	res = res2;
3232       break;
3233 
3234     default:
3235       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3236 	{
3237 	  tree comp_vectype, mask_type;
3238 
3239 	  if (TREE_CODE (TREE_TYPE (rhs1)) == BOOLEAN_TYPE)
3240 	    {
3241 	      res = search_type_for_mask_1 (rhs1, vinfo, cache);
3242 	      res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3243 					     vinfo, cache);
3244 	      if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3245 		res = res2;
3246 	      break;
3247 	    }
3248 
3249 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3250 	  if (comp_vectype == NULL_TREE)
3251 	    {
3252 	      res = NULL_TREE;
3253 	      break;
3254 	    }
3255 
3256 	  mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3257 	  if (!mask_type
3258 	      || !expand_vec_cmp_expr_p (comp_vectype, mask_type))
3259 	    {
3260 	      res = NULL_TREE;
3261 	      break;
3262 	    }
3263 
3264 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3265 	      || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3266 	    {
3267 	      machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3268 	      res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3269 	    }
3270 	  else
3271 	    res = TREE_TYPE (rhs1);
3272 	}
3273     }
3274 
3275   cache.put (def_stmt, res);
3276   return res;
3277 }
3278 
3279 /* Return the proper type for converting bool VAR into
3280    an integer value or NULL_TREE if no such type exists.
3281    The type is chosen so that converted value has the
3282    same number of elements as VAR's vector type.  */
3283 
3284 static tree
search_type_for_mask(tree var,vec_info * vinfo)3285 search_type_for_mask (tree var, vec_info *vinfo)
3286 {
3287   hash_map<gimple *, tree> cache;
3288   return search_type_for_mask_1 (var, vinfo, cache);
3289 }
3290 
3291 /* Function vect_recog_bool_pattern
3292 
3293    Try to find pattern like following:
3294 
3295      bool a_b, b_b, c_b, d_b, e_b;
3296      TYPE f_T;
3297    loop:
3298      S1  a_b = x1 CMP1 y1;
3299      S2  b_b = x2 CMP2 y2;
3300      S3  c_b = a_b & b_b;
3301      S4  d_b = x3 CMP3 y3;
3302      S5  e_b = c_b | d_b;
3303      S6  f_T = (TYPE) e_b;
3304 
3305    where type 'TYPE' is an integral type.  Or a similar pattern
3306    ending in
3307 
3308      S6  f_Y = e_b ? r_Y : s_Y;
3309 
3310    as results from if-conversion of a complex condition.
3311 
3312    Input:
3313 
3314    * LAST_STMT: A stmt at the end from which the pattern
3315 		search begins, i.e. cast of a bool to
3316 		an integer type.
3317 
3318    Output:
3319 
3320    * TYPE_IN: The type of the input arguments to the pattern.
3321 
3322    * TYPE_OUT: The type of the output of this pattern.
3323 
3324    * Return value: A new stmt that will be used to replace the pattern.
3325 
3326 	Assuming size of TYPE is the same as size of all comparisons
3327 	(otherwise some casts would be added where needed), the above
3328 	sequence we create related pattern stmts:
3329 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3330 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
3331 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
3332 	S5'  e_T = c_T | d_T;
3333 	S6'  f_T = e_T;
3334 
3335 	Instead of the above S3' we could emit:
3336 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
3337 	S3'  c_T = a_T | b_T;
3338 	but the above is more efficient.  */
3339 
3340 static gimple *
vect_recog_bool_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3341 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3342 			 tree *type_out)
3343 {
3344   gimple *last_stmt = stmts->pop ();
3345   enum tree_code rhs_code;
3346   tree var, lhs, rhs, vectype;
3347   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3348   stmt_vec_info new_stmt_info;
3349   vec_info *vinfo = stmt_vinfo->vinfo;
3350   gimple *pattern_stmt;
3351 
3352   if (!is_gimple_assign (last_stmt))
3353     return NULL;
3354 
3355   var = gimple_assign_rhs1 (last_stmt);
3356   lhs = gimple_assign_lhs (last_stmt);
3357 
3358   if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3359        || !TYPE_UNSIGNED (TREE_TYPE (var)))
3360       && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3361     return NULL;
3362 
3363   rhs_code = gimple_assign_rhs_code (last_stmt);
3364   if (CONVERT_EXPR_CODE_P (rhs_code))
3365     {
3366       if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3367 	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3368 	return NULL;
3369       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3370       if (vectype == NULL_TREE)
3371 	return NULL;
3372 
3373       if (check_bool_pattern (var, vinfo))
3374 	{
3375 	  rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3376 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3377 	  if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3378 	    pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3379 	  else
3380 	    pattern_stmt
3381 	      = gimple_build_assign (lhs, NOP_EXPR, rhs);
3382 	}
3383       else
3384 	{
3385 	  tree type = search_type_for_mask (var, vinfo);
3386 	  tree cst0, cst1, tmp;
3387 
3388 	  if (!type)
3389 	    return NULL;
3390 
3391 	  /* We may directly use cond with narrowed type to avoid
3392 	     multiple cond exprs with following result packing and
3393 	     perform single cond with packed mask instead.  In case
3394 	     of widening we better make cond first and then extract
3395 	     results.  */
3396 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3397 	    type = TREE_TYPE (lhs);
3398 
3399 	  cst0 = build_int_cst (type, 0);
3400 	  cst1 = build_int_cst (type, 1);
3401 	  tmp = vect_recog_temp_ssa_var (type, NULL);
3402 	  pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3403 
3404 	  if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3405 	    {
3406 	      tree new_vectype = get_vectype_for_scalar_type (type);
3407 	      new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3408 	      set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3409 	      STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3410 	      new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3411 
3412 	      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3413 	      pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3414 	    }
3415 	}
3416 
3417       *type_out = vectype;
3418       *type_in = vectype;
3419       stmts->safe_push (last_stmt);
3420       if (dump_enabled_p ())
3421 	dump_printf_loc (MSG_NOTE, vect_location,
3422                          "vect_recog_bool_pattern: detected:\n");
3423 
3424       return pattern_stmt;
3425     }
3426   else if (rhs_code == COND_EXPR
3427 	   && TREE_CODE (var) == SSA_NAME)
3428     {
3429       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3430       if (vectype == NULL_TREE)
3431 	return NULL;
3432 
3433       /* Build a scalar type for the boolean result that when
3434          vectorized matches the vector type of the result in
3435 	 size and number of elements.  */
3436       unsigned prec
3437 	= wi::udiv_trunc (TYPE_SIZE (vectype),
3438 			  TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3439       tree type
3440 	= build_nonstandard_integer_type (prec,
3441 					  TYPE_UNSIGNED (TREE_TYPE (var)));
3442       if (get_vectype_for_scalar_type (type) == NULL_TREE)
3443 	return NULL;
3444 
3445       if (!check_bool_pattern (var, vinfo))
3446 	return NULL;
3447 
3448       rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3449 
3450       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3451       pattern_stmt
3452 	  = gimple_build_assign (lhs, COND_EXPR,
3453 				 build2 (NE_EXPR, boolean_type_node,
3454 					 rhs, build_int_cst (type, 0)),
3455 				 gimple_assign_rhs2 (last_stmt),
3456 				 gimple_assign_rhs3 (last_stmt));
3457       *type_out = vectype;
3458       *type_in = vectype;
3459       stmts->safe_push (last_stmt);
3460       if (dump_enabled_p ())
3461 	dump_printf_loc (MSG_NOTE, vect_location,
3462                          "vect_recog_bool_pattern: detected:\n");
3463 
3464       return pattern_stmt;
3465     }
3466   else if (rhs_code == SSA_NAME
3467 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
3468     {
3469       stmt_vec_info pattern_stmt_info;
3470       vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3471       gcc_assert (vectype != NULL_TREE);
3472       if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3473 	return NULL;
3474 
3475       if (check_bool_pattern (var, vinfo))
3476 	rhs = adjust_bool_pattern (var, TREE_TYPE (vectype),
3477 				   NULL_TREE, stmts);
3478       else
3479 	{
3480 	  tree type = search_type_for_mask (var, vinfo);
3481 	  tree cst0, cst1, new_vectype;
3482 
3483 	  if (!type)
3484 	    return NULL;
3485 
3486 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3487 	    type = TREE_TYPE (vectype);
3488 
3489 	  cst0 = build_int_cst (type, 0);
3490 	  cst1 = build_int_cst (type, 1);
3491 	  new_vectype = get_vectype_for_scalar_type (type);
3492 
3493 	  rhs = vect_recog_temp_ssa_var (type, NULL);
3494 	  pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3495 
3496 	  pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3497 	  set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3498 	  STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3499 	  append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3500 	}
3501 
3502       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3503       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3504 	{
3505 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3506 	  gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3507 	  append_pattern_def_seq (stmt_vinfo, cast_stmt);
3508 	  rhs = rhs2;
3509 	}
3510       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3511       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3512       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3513       STMT_VINFO_DATA_REF (pattern_stmt_info)
3514 	= STMT_VINFO_DATA_REF (stmt_vinfo);
3515       STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3516 	= STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3517       STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3518       STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3519 	= STMT_VINFO_DR_OFFSET (stmt_vinfo);
3520       STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3521       STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3522 	= STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3523       DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3524       *type_out = vectype;
3525       *type_in = vectype;
3526       stmts->safe_push (last_stmt);
3527       if (dump_enabled_p ())
3528 	dump_printf_loc (MSG_NOTE, vect_location,
3529                          "vect_recog_bool_pattern: detected:\n");
3530       return pattern_stmt;
3531     }
3532   else
3533     return NULL;
3534 }
3535 
3536 
3537 /* A helper for vect_recog_mask_conversion_pattern.  Build
3538    conversion of MASK to a type suitable for masking VECTYPE.
3539    Built statement gets required vectype and is appended to
3540    a pattern sequence of STMT_VINFO.
3541 
3542    Return converted mask.  */
3543 
3544 static tree
build_mask_conversion(tree mask,tree vectype,stmt_vec_info stmt_vinfo,vec_info * vinfo)3545 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3546 		       vec_info *vinfo)
3547 {
3548   gimple *stmt;
3549   tree masktype, tmp;
3550   stmt_vec_info new_stmt_info;
3551 
3552   masktype = build_same_sized_truth_vector_type (vectype);
3553   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3554   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3555   new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3556   set_vinfo_for_stmt (stmt, new_stmt_info);
3557   STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3558   append_pattern_def_seq (stmt_vinfo, stmt);
3559 
3560   return tmp;
3561 }
3562 
3563 
3564 /* Function vect_recog_mask_conversion_pattern
3565 
3566    Try to find statements which require boolean type
3567    converison.  Additional conversion statements are
3568    added to handle such cases.  For example:
3569 
3570    bool m_1, m_2, m_3;
3571    int i_4, i_5;
3572    double d_6, d_7;
3573    char c_1, c_2, c_3;
3574 
3575    S1   m_1 = i_4 > i_5;
3576    S2   m_2 = d_6 < d_7;
3577    S3   m_3 = m_1 & m_2;
3578    S4   c_1 = m_3 ? c_2 : c_3;
3579 
3580    Will be transformed into:
3581 
3582    S1   m_1 = i_4 > i_5;
3583    S2   m_2 = d_6 < d_7;
3584    S3'' m_2' = (_Bool[bitsize=32])m_2
3585    S3'  m_3' = m_1 & m_2';
3586    S4'' m_3'' = (_Bool[bitsize=8])m_3'
3587    S4'  c_1' = m_3'' ? c_2 : c_3;  */
3588 
3589 static gimple *
vect_recog_mask_conversion_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3590 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3591 				    tree *type_out)
3592 {
3593   gimple *last_stmt = stmts->pop ();
3594   enum tree_code rhs_code;
3595   tree lhs, rhs1, rhs2, tmp, rhs1_type, rhs2_type, vectype1, vectype2;
3596   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3597   stmt_vec_info pattern_stmt_info;
3598   vec_info *vinfo = stmt_vinfo->vinfo;
3599   gimple *pattern_stmt;
3600 
3601   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
3602   if (is_gimple_call (last_stmt)
3603       && gimple_call_internal_p (last_stmt)
3604       && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3605 	  || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3606     {
3607       bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3608 
3609       if (load)
3610 	{
3611 	  lhs = gimple_call_lhs (last_stmt);
3612 	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3613 	}
3614       else
3615 	{
3616 	  rhs2 = gimple_call_arg (last_stmt, 3);
3617 	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3618 	}
3619 
3620       rhs1 = gimple_call_arg (last_stmt, 2);
3621       rhs1_type = search_type_for_mask (rhs1, vinfo);
3622       if (!rhs1_type)
3623 	return NULL;
3624       vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3625 
3626       if (!vectype1 || !vectype2
3627 	  || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3628 	return NULL;
3629 
3630       tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3631 
3632       if (load)
3633 	{
3634 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3635 	  pattern_stmt
3636 	    = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3637 					  gimple_call_arg (last_stmt, 0),
3638 					  gimple_call_arg (last_stmt, 1),
3639 					  tmp);
3640 	  gimple_call_set_lhs (pattern_stmt, lhs);
3641 	}
3642       else
3643 	  pattern_stmt
3644 	    = gimple_build_call_internal (IFN_MASK_STORE, 4,
3645 					  gimple_call_arg (last_stmt, 0),
3646 					  gimple_call_arg (last_stmt, 1),
3647 					  tmp,
3648 					  gimple_call_arg (last_stmt, 3));
3649 
3650 
3651       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3652       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3653       STMT_VINFO_DATA_REF (pattern_stmt_info)
3654 	= STMT_VINFO_DATA_REF (stmt_vinfo);
3655       STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3656 	= STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3657       STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3658       STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3659 	= STMT_VINFO_DR_OFFSET (stmt_vinfo);
3660       STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3661       STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3662 	= STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3663       DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3664 
3665       *type_out = vectype1;
3666       *type_in = vectype1;
3667       stmts->safe_push (last_stmt);
3668       if (dump_enabled_p ())
3669 	dump_printf_loc (MSG_NOTE, vect_location,
3670                          "vect_recog_mask_conversion_pattern: detected:\n");
3671 
3672       return pattern_stmt;
3673     }
3674 
3675   if (!is_gimple_assign (last_stmt))
3676     return NULL;
3677 
3678   lhs = gimple_assign_lhs (last_stmt);
3679   rhs1 = gimple_assign_rhs1 (last_stmt);
3680   rhs_code = gimple_assign_rhs_code (last_stmt);
3681 
3682   /* Check for cond expression requiring mask conversion.  */
3683   if (rhs_code == COND_EXPR)
3684     {
3685       /* vect_recog_mixed_size_cond_pattern could apply.
3686 	 Do nothing then.  */
3687       if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3688 	return NULL;
3689 
3690       vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3691 
3692       if (TREE_CODE (rhs1) == SSA_NAME)
3693 	{
3694 	  rhs1_type = search_type_for_mask (rhs1, vinfo);
3695 	  if (!rhs1_type)
3696 	    return NULL;
3697 	}
3698       else if (COMPARISON_CLASS_P (rhs1))
3699 	rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3700       else
3701 	return NULL;
3702 
3703       vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3704 
3705       if (!vectype1 || !vectype2
3706 	  || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3707 	return NULL;
3708 
3709       /* If rhs1 is a comparison we need to move it into a
3710 	 separate statement.  */
3711       if (TREE_CODE (rhs1) != SSA_NAME)
3712 	{
3713 	  tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3714 	  pattern_stmt = gimple_build_assign (tmp, rhs1);
3715 	  rhs1 = tmp;
3716 
3717 	  pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3718 	  set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3719 	  STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
3720 	  append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3721 	}
3722 
3723       tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3724 
3725       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3726       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
3727 					  gimple_assign_rhs2 (last_stmt),
3728 					  gimple_assign_rhs3 (last_stmt));
3729 
3730       *type_out = vectype1;
3731       *type_in = vectype1;
3732       stmts->safe_push (last_stmt);
3733       if (dump_enabled_p ())
3734 	dump_printf_loc (MSG_NOTE, vect_location,
3735                          "vect_recog_mask_conversion_pattern: detected:\n");
3736 
3737       return pattern_stmt;
3738     }
3739 
3740   /* Now check for binary boolean operations requiring conversion for
3741      one of operands.  */
3742   if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)
3743     return NULL;
3744 
3745   if (rhs_code != BIT_IOR_EXPR
3746       && rhs_code != BIT_XOR_EXPR
3747       && rhs_code != BIT_AND_EXPR)
3748     return NULL;
3749 
3750   rhs2 = gimple_assign_rhs2 (last_stmt);
3751 
3752   rhs1_type = search_type_for_mask (rhs1, vinfo);
3753   rhs2_type = search_type_for_mask (rhs2, vinfo);
3754 
3755   if (!rhs1_type || !rhs2_type
3756       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
3757     return NULL;
3758 
3759   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
3760     {
3761       vectype1 = get_mask_type_for_scalar_type (rhs1_type);
3762       if (!vectype1)
3763 	return NULL;
3764       rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
3765     }
3766   else
3767     {
3768       vectype1 = get_mask_type_for_scalar_type (rhs2_type);
3769       if (!vectype1)
3770 	return NULL;
3771       rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3772     }
3773 
3774   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3775   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
3776 
3777   *type_out = vectype1;
3778   *type_in = vectype1;
3779   stmts->safe_push (last_stmt);
3780   if (dump_enabled_p ())
3781     dump_printf_loc (MSG_NOTE, vect_location,
3782 		     "vect_recog_mask_conversion_pattern: detected:\n");
3783 
3784   return pattern_stmt;
3785 }
3786 
3787 
3788 /* Mark statements that are involved in a pattern.  */
3789 
3790 static inline void
vect_mark_pattern_stmts(gimple * orig_stmt,gimple * pattern_stmt,tree pattern_vectype)3791 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
3792                          tree pattern_vectype)
3793 {
3794   stmt_vec_info pattern_stmt_info, def_stmt_info;
3795   stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3796   vec_info *vinfo = orig_stmt_info->vinfo;
3797   gimple *def_stmt;
3798 
3799   pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3800   if (pattern_stmt_info == NULL)
3801     {
3802       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3803       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3804     }
3805   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3806 
3807   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3808   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3809     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3810   STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3811   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3812   STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3813   STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3814     = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3815   if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3816     {
3817       gimple_stmt_iterator si;
3818       for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3819 	   !gsi_end_p (si); gsi_next (&si))
3820 	{
3821 	  def_stmt = gsi_stmt (si);
3822 	  def_stmt_info = vinfo_for_stmt (def_stmt);
3823 	  if (def_stmt_info == NULL)
3824 	    {
3825 	      def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3826 	      set_vinfo_for_stmt (def_stmt, def_stmt_info);
3827 	    }
3828 	  gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3829 	  STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3830 	  STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3831 	  if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3832 	    STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3833 	}
3834     }
3835 }
3836 
3837 /* Function vect_pattern_recog_1
3838 
3839    Input:
3840    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3841         computation pattern.
3842    STMT: A stmt from which the pattern search should start.
3843 
3844    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3845    expression that computes the same functionality and can be used to
3846    replace the sequence of stmts that are involved in the pattern.
3847 
3848    Output:
3849    This function checks if the expression returned by PATTERN_RECOG_FUNC is
3850    supported in vector form by the target.  We use 'TYPE_IN' to obtain the
3851    relevant vector type. If 'TYPE_IN' is already a vector type, then this
3852    indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3853    If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3854    to the available target pattern.
3855 
3856    This function also does some bookkeeping, as explained in the documentation
3857    for vect_recog_pattern.  */
3858 
3859 static bool
vect_pattern_recog_1(vect_recog_func * recog_func,gimple_stmt_iterator si,vec<gimple * > * stmts_to_replace)3860 vect_pattern_recog_1 (vect_recog_func *recog_func,
3861 		      gimple_stmt_iterator si,
3862 		      vec<gimple *> *stmts_to_replace)
3863 {
3864   gimple *stmt = gsi_stmt (si), *pattern_stmt;
3865   stmt_vec_info stmt_info;
3866   loop_vec_info loop_vinfo;
3867   tree pattern_vectype;
3868   tree type_in, type_out;
3869   enum tree_code code;
3870   int i;
3871   gimple *next;
3872 
3873   stmts_to_replace->truncate (0);
3874   stmts_to_replace->quick_push (stmt);
3875   pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
3876   if (!pattern_stmt)
3877     return false;
3878 
3879   stmt = stmts_to_replace->last ();
3880   stmt_info = vinfo_for_stmt (stmt);
3881   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3882 
3883   if (VECTOR_BOOLEAN_TYPE_P (type_in)
3884       || VECTOR_MODE_P (TYPE_MODE (type_in)))
3885     {
3886       /* No need to check target support (already checked by the pattern
3887          recognition function).  */
3888       pattern_vectype = type_out ? type_out : type_in;
3889     }
3890   else
3891     {
3892       machine_mode vec_mode;
3893       enum insn_code icode;
3894       optab optab;
3895 
3896       /* Check target support  */
3897       type_in = get_vectype_for_scalar_type (type_in);
3898       if (!type_in)
3899 	return false;
3900       if (type_out)
3901 	type_out = get_vectype_for_scalar_type (type_out);
3902       else
3903 	type_out = type_in;
3904       if (!type_out)
3905 	return false;
3906       pattern_vectype = type_out;
3907 
3908       if (is_gimple_assign (pattern_stmt))
3909 	code = gimple_assign_rhs_code (pattern_stmt);
3910       else
3911         {
3912 	  gcc_assert (is_gimple_call (pattern_stmt));
3913 	  code = CALL_EXPR;
3914 	}
3915 
3916       optab = optab_for_tree_code (code, type_in, optab_default);
3917       vec_mode = TYPE_MODE (type_in);
3918       if (!optab
3919           || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3920           || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3921 	return false;
3922     }
3923 
3924   /* Found a vectorizable pattern.  */
3925   if (dump_enabled_p ())
3926     {
3927       dump_printf_loc (MSG_NOTE, vect_location,
3928                        "%s pattern recognized: ", recog_func->name);
3929       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3930     }
3931 
3932   /* Mark the stmts that are involved in the pattern. */
3933   vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3934 
3935   /* Patterns cannot be vectorized using SLP, because they change the order of
3936      computation.  */
3937   if (loop_vinfo)
3938     FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3939       if (next == stmt)
3940         LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3941 
3942   /* It is possible that additional pattern stmts are created and inserted in
3943      STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
3944      relevant statements.  */
3945   for (i = 0; stmts_to_replace->iterate (i, &stmt)
3946 	      && (unsigned) i < (stmts_to_replace->length () - 1);
3947        i++)
3948     {
3949       stmt_info = vinfo_for_stmt (stmt);
3950       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3951       if (dump_enabled_p ())
3952         {
3953           dump_printf_loc (MSG_NOTE, vect_location,
3954                            "additional pattern stmt: ");
3955           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3956         }
3957 
3958       vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3959     }
3960 
3961   return true;
3962 }
3963 
3964 
3965 /* Function vect_pattern_recog
3966 
3967    Input:
3968    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3969         computation idioms.
3970 
3971    Output - for each computation idiom that is detected we create a new stmt
3972         that provides the same functionality and that can be vectorized.  We
3973         also record some information in the struct_stmt_info of the relevant
3974         stmts, as explained below:
3975 
3976    At the entry to this function we have the following stmts, with the
3977    following initial value in the STMT_VINFO fields:
3978 
3979          stmt                     in_pattern_p  related_stmt    vec_stmt
3980          S1: a_i = ....                 -       -               -
3981          S2: a_2 = ..use(a_i)..         -       -               -
3982          S3: a_1 = ..use(a_2)..         -       -               -
3983          S4: a_0 = ..use(a_1)..         -       -               -
3984          S5: ... = ..use(a_0)..         -       -               -
3985 
3986    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3987    represented by a single stmt.  We then:
3988    - create a new stmt S6 equivalent to the pattern (the stmt is not
3989      inserted into the code)
3990    - fill in the STMT_VINFO fields as follows:
3991 
3992                                   in_pattern_p  related_stmt    vec_stmt
3993          S1: a_i = ....                 -       -               -
3994          S2: a_2 = ..use(a_i)..         -       -               -
3995          S3: a_1 = ..use(a_2)..         -       -               -
3996          S4: a_0 = ..use(a_1)..         true    S6              -
3997           '---> S6: a_new = ....        -       S4              -
3998          S5: ... = ..use(a_0)..         -       -               -
3999 
4000    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4001    to each other through the RELATED_STMT field).
4002 
4003    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4004    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
4005    remain irrelevant unless used by stmts other than S4.
4006 
4007    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4008    (because they are marked as irrelevant).  It will vectorize S6, and record
4009    a pointer to the new vector stmt VS6 from S6 (as usual).
4010    S4 will be skipped, and S5 will be vectorized as usual:
4011 
4012                                   in_pattern_p  related_stmt    vec_stmt
4013          S1: a_i = ....                 -       -               -
4014          S2: a_2 = ..use(a_i)..         -       -               -
4015          S3: a_1 = ..use(a_2)..         -       -               -
4016        > VS6: va_new = ....             -       -               -
4017          S4: a_0 = ..use(a_1)..         true    S6              VS6
4018           '---> S6: a_new = ....        -       S4              VS6
4019        > VS5: ... = ..vuse(va_new)..    -       -               -
4020          S5: ... = ..use(a_0)..         -       -               -
4021 
4022    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4023    elsewhere), and we'll end up with:
4024 
4025         VS6: va_new = ....
4026         VS5: ... = ..vuse(va_new)..
4027 
4028    In case of more than one pattern statements, e.g., widen-mult with
4029    intermediate type:
4030 
4031      S1  a_t = ;
4032      S2  a_T = (TYPE) a_t;
4033            '--> S3: a_it = (interm_type) a_t;
4034      S4  prod_T = a_T * CONST;
4035            '--> S5: prod_T' = a_it w* CONST;
4036 
4037    there may be other users of a_T outside the pattern.  In that case S2 will
4038    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4039    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
4040    be recorded in S3.  */
4041 
4042 void
vect_pattern_recog(vec_info * vinfo)4043 vect_pattern_recog (vec_info *vinfo)
4044 {
4045   struct loop *loop;
4046   basic_block *bbs;
4047   unsigned int nbbs;
4048   gimple_stmt_iterator si;
4049   unsigned int i, j;
4050   auto_vec<gimple *, 1> stmts_to_replace;
4051   gimple *stmt;
4052 
4053   if (dump_enabled_p ())
4054     dump_printf_loc (MSG_NOTE, vect_location,
4055                      "=== vect_pattern_recog ===\n");
4056 
4057   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4058     {
4059       loop = LOOP_VINFO_LOOP (loop_vinfo);
4060       bbs = LOOP_VINFO_BBS (loop_vinfo);
4061       nbbs = loop->num_nodes;
4062 
4063       /* Scan through the loop stmts, applying the pattern recognition
4064 	 functions starting at each stmt visited:  */
4065       for (i = 0; i < nbbs; i++)
4066 	{
4067 	  basic_block bb = bbs[i];
4068 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4069 	    {
4070 	      /* Scan over all generic vect_recog_xxx_pattern functions.  */
4071 	      for (j = 0; j < NUM_PATTERNS; j++)
4072 		if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4073 					  &stmts_to_replace))
4074 		  break;
4075 	    }
4076 	}
4077     }
4078   else
4079     {
4080       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4081       for (si = bb_vinfo->region_begin;
4082 	   gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4083 	{
4084 	  if ((stmt = gsi_stmt (si))
4085 	      && vinfo_for_stmt (stmt)
4086 	      && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4087 	    continue;
4088 
4089 	  /* Scan over all generic vect_recog_xxx_pattern functions.  */
4090 	  for (j = 0; j < NUM_PATTERNS; j++)
4091 	    if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4092 				      &stmts_to_replace))
4093 	      break;
4094 	}
4095     }
4096 }
4097